294 files changed, 7481 insertions, 2305 deletions
diff --git a/.bazelrc b/.bazelrc
index 4a0671f4a..3c31282ce 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# RBE requires a strong hash function, such as SHA256.
+startup --host_jvm_args=-Dbazel.DigestFunction=SHA256
+
 # Build with C++17.
 build --cxxopt=-std=c++17
 
@@ -22,11 +25,17 @@ build --stamp --workspace_status_command tools/workspace_status.sh
 build:remote --remote_executor=grpcs://remotebuildexecution.googleapis.com
 build:remote --project_id=gvisor-rbe
 build:remote --remote_instance_name=projects/gvisor-rbe/instances/default_instance
+build:remote3 --remote_executor=grpcs://remotebuildexecution.googleapis.com
+build:remote3 --project_id=gvisor-rbe
+build:remote3 --remote_instance_name=projects/gvisor-rbe/instances/default_instance
+
 # Enable authentication. This will pick up application default credentials by
 # default. You can use --google_credentials=some_file.json to use a service
 # account credential instead.
 build:remote --google_default_credentials=true
 build:remote --auth_scope="https://www.googleapis.com/auth/cloud-source-tools"
+build:remote3 --google_default_credentials=true
+build:remote3 --auth_scope="https://www.googleapis.com/auth/cloud-source-tools"
 
 # Add a custom platform and toolchain that builds in a privileged docker
 # container, which is required by our syscall tests.
@@ -37,8 +46,13 @@ build:remote --platforms=//tools/bazeldefs:rbe_ubuntu1604
 build:remote --crosstool_top=@rbe_default//cc:toolchain
 build:remote --jobs=50
 build:remote --remote_timeout=3600
-# RBE requires a strong hash function, such as SHA256.
-startup --host_jvm_args=-Dbazel.DigestFunction=SHA256
+build:remote3 --host_platform=//tools/bazeldefs:rbe_ubuntu1604_bazel3
+build:remote3 --extra_toolchains=//tools/bazeldefs:cc-toolchain-clang-x86_64-default_bazel3
+build:remote3 --extra_execution_platforms=//tools/bazeldefs:rbe_ubuntu1604_bazel3
+build:remote3 --platforms=//tools/bazeldefs:rbe_ubuntu1604_bazel3
+build:remote3 --crosstool_top=@rbe_default//cc:toolchain
+build:remote3 --jobs=50
+build:remote3 --remote_timeout=3600
 
 # Set flags for uploading to BES in order to view results in the Bazel Build
 # Results UI.
diff --git a/.travis.yml b/.travis.yml
index 9d3141f38..1d955b05d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,8 +30,10 @@ services:
   - docker
 jobs:
   include:
-   - os: linux
-     arch: amd64
+   # AMD64 builds are tested on kokoro, so don't run them in travis to save
+   # capacity for arm64 builds.
+   # - os: linux
+   #   arch: amd64
    - os: linux
      arch: arm64
 script:
diff --git a/Makefile b/Makefile
index 599cb350f..8e97fc978 100644
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Helpful pretty-printer.
+MAKEBANNER := \033[1;34mmake\033[0m
+submake = echo -e '$(MAKEBANNER) $1' >&2; $(MAKE) $1
+
 # Described below.
 OPTIONS :=
 STARTUP_OPTIONS :=
@@ -85,7 +89,7 @@ endif
 ##
 define images
 $(1)-%: ## Image tool: $(1) a given image (also may use 'all-images').
-	@$(MAKE) -C images $$@
+	@$(call submake,-C images $$@)
 endef
 rebuild-...: ## Rebuild the given image. Also may use 'rebuild-all-images'.
 $(eval $(call images,rebuild))
@@ -96,7 +100,7 @@ $(eval $(call images,push))
 load-...: ## Load (pull or rebuild) the given image. Also may use 'load-all-images'.
 $(eval $(call images,load))
 list-images: ## List all available images.
-	@$(MAKE) -C images $$@
+	@$(call submake, -C images $$@)
 
 ##
 ## Canonical build and test targets.
@@ -106,21 +110,116 @@ list-images: ## List all available images.
 ##   new subsystem or workflow, consider adding a new target here.
 ##
 runsc: ## Builds the runsc binary.
-	@$(MAKE) build TARGETS="//runsc"
+	@$(call submake,build OPTIONS="-c opt" TARGETS="//runsc")
 .PHONY: runsc
 
+debian: ## Builds the debian packages.
+	@$(call submake,build OPTIONS="-c opt" TARGETS="//runsc:runsc-debian")
+.PHONY: debian
+
 smoke-test: ## Runs a simple smoke test after build runsc.
-	@$(MAKE) run DOCKER_PRIVILEGED="" ARGS="--alsologtostderr --network none --debug --TESTONLY-unsafe-nonroot=true --rootless do true"
+	@$(call submake,run DOCKER_PRIVILEGED="" ARGS="--alsologtostderr --network none --debug --TESTONLY-unsafe-nonroot=true --rootless do true")
 .PHONY: smoke-tests
 
-unit-tests: ## Runs all unit tests in pkg runsc and tools.
-	@$(MAKE) test OPTIONS="pkg/... runsc/... tools/..."
-.PHONY: unit-tests
+unit-tests: ## Local package unit tests in pkg/..., runsc/, tools/.., etc.
+	@$(call submake,test TARGETS="pkg/... runsc/... tools/... benchmarks/... benchmarks/runner:runner_test")
 
-tests: ## Runs all local ptrace system call tests.
-	@$(MAKE) test OPTIONS="--test_tag_filters runsc_ptrace test/syscalls/..."
+tests: ## Runs all unit tests and syscall tests.
+tests: unit-tests
+	@$(call submake,test TARGETS="test/syscalls/...")
 .PHONY: tests
 
+
+integration-tests: ## Run all standard integration tests.
+integration-tests: docker-tests overlay-tests hostnet-tests swgso-tests
+integration-tests: do-tests kvm-tests root-tests containerd-tests
+.PHONY: integration-tests
+
+network-tests: ## Run all networking integration tests.
+network-tests: iptables-tests packetdrill-tests packetimpact-tests
+.PHONY: network-tests
+
+# Standard integration targets.
+INTEGRATION_TARGETS := //test/image:image_test //test/e2e:integration_test
+
+syscall-%-tests:
+	@$(call submake,test OPTIONS="--test_tag_filters runsc_$* test/syscalls/...")
+
+syscall-native-tests:
+	@$(call submake,test OPTIONS="--test_tag_filters native test/syscalls/...")
+.PHONY: syscall-native-tests
+
+syscall-tests: ## Run all system call tests.
+syscall-tests: syscall-ptrace-tests syscall-kvm-tests syscall-native-tests
+.PHONY: syscall-tests
+
+%-runtime-tests: load-runtimes_%
+	@$(call submake,install-test-runtime)
+	@$(call submake,test-runtime TARGETS="//test/runtimes:$*")
+
+do-tests: runsc
+	@$(call submake,run TARGETS="//runsc" ARGS="--rootless do true")
+	@$(call submake,run TARGETS="//runsc" ARGS="--rootless -network=none do true")
+	@$(call submake,sudo TARGETS="//runsc" ARGS="do true")
+.PHONY: do-tests
+
+simple-tests: unit-tests # Compatibility target.
+.PHONY: simple-tests
+
+IMAGE_FILTER := HelloWorld\|Httpd\|Ruby\|Stdio
+INTEGRATION_FILTER := Life\|Pause\|Connect\|JobControl\|Overlay\|Exec\|DirCreation/root
+
+docker-tests: load-basic-images
+	@$(call submake,install-test-runtime RUNTIME="vfs1")
+	@$(call submake,test-runtime RUNTIME="vfs1" TARGETS="$(INTEGRATION_TARGETS)")
+	@$(call submake,install-test-runtime RUNTIME="vfs2" ARGS="--vfs2")
+	@$(call submake,test-runtime RUNTIME="vfs2" OPTIONS="--test_filter=$(IMAGE_FILTER)\|$(INTEGRATION_FILTER)" TARGETS="$(INTEGRATION_TARGETS)")
+.PHONY: docker-tests
+
+overlay-tests: load-basic-images
+	@$(call submake,install-test-runtime RUNTIME="overlay" ARGS="--overlay")
+	@$(call submake,test-runtime RUNTIME="overlay" TARGETS="$(INTEGRATION_TARGETS)")
+.PHONY: overlay-tests
+
+swgso-tests: load-basic-images
+	@$(call submake,install-test-runtime RUNTIME="swgso" ARGS="--software-gso=true --gso=false")
+	@$(call submake,test-runtime RUNTIME="swgso" TARGETS="$(INTEGRATION_TARGETS)")
+.PHONY: swgso-tests
+
+hostnet-tests: load-basic-images
+	@$(call submake,install-test-runtime RUNTIME="hostnet" ARGS="--network=host")
+	@$(call submake,test-runtime RUNTIME="hostnet" OPTIONS="--test_arg=-checkpoint=false" TARGETS="$(INTEGRATION_TARGETS)")
+.PHONY: hostnet-tests
+
+kvm-tests: load-basic-images
+	@(lsmod | grep -E '^(kvm_intel|kvm_amd)') || sudo modprobe kvm
+	@if ! [[ -w /dev/kvm ]]; then sudo chmod a+rw /dev/kvm; fi
+	@$(call submake,test TARGETS="//pkg/sentry/platform/kvm:kvm_test")
+	@$(call submake,install-test-runtime RUNTIME="kvm" ARGS="--platform=kvm")
+	@$(call submake,test-runtime RUNTIME="kvm" TARGETS="$(INTEGRATION_TARGETS)")
+.PHONY: kvm-tests
+
+iptables-tests: load-iptables
+	@$(call submake,test-runtime RUNTIME="runc" TARGETS="//test/iptables:iptables_test")
+	@$(call submake,install-test-runtime RUNTIME="iptables" ARGS="--net-raw")
+	@$(call submake,test-runtime RUNTIME="iptables" TARGETS="//test/iptables:iptables_test")
+.PHONY: iptables-tests
+
+packetdrill-tests: load-packetdrill
+	@$(call submake,install-test-runtime RUNTIME="packetdrill")
+	@$(call submake,test-runtime RUNTIME="packetdrill" TARGETS="$(shell $(MAKE) query TARGETS='attr(tags, packetdrill, tests(//...))')")
+.PHONY: packetdrill-tests
+
+packetimpact-tests: load-packetimpact
+	@$(call submake,install-test-runtime RUNTIME="packetimpact")
+	@$(call submake,test-runtime RUNTIME="packetimpact" TARGETS="$(shell $(MAKE) query TARGETS='attr(tags, packetimpact, tests(//...))')")
+.PHONY: packetimpact-tests
+
+root-tests: load-basic-images
+	@$(call submake,install-test-runtime)
+	@$(call submake,sudo TARGETS="//test/root:root_test" ARGS="-test.v")
+.PHONY: test-root
+
 # Specific containerd version tests.
 containerd-test-%: load-basic_alpine load-basic_python load-basic_busybox load-basic_resolv load-basic_httpd install-test-runtime
 	@CONTAINERD_VERSION=$* $(MAKE) sudo TARGETS="tools/installers:containerd"
@@ -154,7 +253,7 @@ WEBSITE_PROJECT := gvisordev
 WEBSITE_REGION  := us-central1
 
 website-build: load-jekyll ## Build the site image locally.
-	@$(MAKE) run TARGETS="//website:website"
+	@$(call submake,run TARGETS="//website:website")
 .PHONY: website-build
 
 website-server: website-build ## Run a local server for development.
@@ -205,8 +304,8 @@ $(RELEASE_KEY):
 release: $(RELEASE_KEY) ## Builds a release.
 	@mkdir -p $(RELEASE_ROOT)
 	@T=$$(mktemp -d /tmp/release.XXXXXX); \
-	  $(MAKE) copy TARGETS="runsc" DESTINATION=$$T && \
-	  $(MAKE) copy TARGETS="runsc:runsc-debian" DESTINATION=$$T && \
+	  $(call submake,copy TARGETS="runsc" DESTINATION=$$T) && \
+	  $(call submake,copy TARGETS="runsc:runsc-debian" DESTINATION=$$T) && \
 	  NIGHTLY=$(RELEASE_NIGHTLY) tools/make_release.sh $(RELEASE_KEY) $(RELEASE_ROOT) $$T/*; \
 	rc=$$?; rm -rf $$T; exit $$rc
 .PHONY: release
@@ -229,43 +328,47 @@ tag: ## Creates and pushes a release tag.
 ##
 ifeq (,$(BRANCH_NAME))
 RUNTIME     := runsc
-RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/runsc
+RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/$(RUNTIME)
 else
 RUNTIME     := $(BRANCH_NAME)
-RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/$(BRANCH_NAME)
+RUNTIME_DIR := $(shell dirname $(shell mktemp -u))/$(RUNTIME)
 endif
 RUNTIME_BIN     := $(RUNTIME_DIR)/runsc
 RUNTIME_LOG_DIR := $(RUNTIME_DIR)/logs
 RUNTIME_LOGS    := $(RUNTIME_LOG_DIR)/runsc.log.%TEST%.%TIMESTAMP%.%COMMAND%
 
 dev: ## Installs a set of local runtimes. Requires sudo.
-	@$(MAKE) refresh ARGS="--net-raw"
-	@$(MAKE) configure RUNTIME="$(RUNTIME)" ARGS="--net-raw"
-	@$(MAKE) configure RUNTIME="$(RUNTIME)-d" ARGS="--net-raw --debug --strace --log-packets"
-	@$(MAKE) configure RUNTIME="$(RUNTIME)-p" ARGS="--net-raw --profile"
-	@$(MAKE) configure RUNTIME="$(RUNTIME)-vfs2-d" ARGS="--net-raw --debug --strace --log-packets --vfs2"
+	@$(call submake,refresh ARGS="--net-raw")
+	@$(call submake,configure RUNTIME_NAME="$(RUNTIME)" ARGS="--net-raw")
+	@$(call submake,configure RUNTIME_NAME="$(RUNTIME)-d" ARGS="--net-raw --debug --strace --log-packets")
+	@$(call submake,configure RUNTIME_NAME="$(RUNTIME)-p" ARGS="--net-raw --profile")
+	@$(call submake,configure RUNTIME_NAME="$(RUNTIME)-vfs2-d" ARGS="--net-raw --debug --strace --log-packets --vfs2")
 	@sudo systemctl restart docker
 .PHONY: dev
 
-refresh: ## Refreshes the runtime binary (for development only). Must have called 'dev' or 'test-install' first.
+refresh: ## Refreshes the runtime binary (for development only). Must have called 'dev' or 'install-test-runtime' first.
 	@mkdir -p "$(RUNTIME_DIR)"
-	@$(MAKE) copy TARGETS=runsc DESTINATION="$(RUNTIME_BIN)"
+	@$(call submake,copy TARGETS=runsc DESTINATION="$(RUNTIME_BIN)")
 .PHONY: install
 
 install-test-runtime: ## Installs the runtime for testing. Requires sudo.
-	@$(MAKE) refresh ARGS="--net-raw --TESTONLY-test-name-env=RUNSC_TEST_NAME --debug --strace --log-packets $(ARGS)"
-	@$(MAKE) configure RUNTIME=runsc
-	@$(MAKE) configure
+	@$(call submake,refresh ARGS="--net-raw --TESTONLY-test-name-env=RUNSC_TEST_NAME --debug --strace --log-packets $(ARGS)")
+	@$(call submake,configure RUNTIME_NAME=runsc)
+	@$(call submake,configure RUNTIME_NAME="$(RUNTIME)")
 	@sudo systemctl restart docker
+	@if [[ -f /etc/docker/daemon.json ]]; then \
+		sudo chmod 0755 /etc/docker && \
+		sudo chmod 0644 /etc/docker/daemon.json; \
+	fi
 .PHONY: install-test-runtime
 
-configure: ## Configures a single runtime. Requires sudo. Typically called from dev or test-install.
-	@sudo sudo "$(RUNTIME_BIN)" install --experimental=true --runtime="$(RUNTIME)" -- --debug-log "$(RUNTIME_LOGS)" $(ARGS)
-	@echo "Installed runtime \"$(RUNTIME)\" @ $(RUNTIME_BIN)"
-	@echo "Logs are in: $(RUNTIME_LOG_DIR)"
+configure: ## Configures a single runtime. Requires sudo. Typically called from dev or install-test-runtime.
+	@sudo sudo "$(RUNTIME_BIN)" install --experimental=true --runtime="$(RUNTIME_NAME)" -- --debug-log "$(RUNTIME_LOGS)" $(ARGS)
+	@echo -e "$(INFO) Installed runtime \"$(RUNTIME)\" @ $(RUNTIME_BIN)"
+	@echo -e "$(INFO) Logs are in: $(RUNTIME_LOG_DIR)"
 	@sudo rm -rf "$(RUNTIME_LOG_DIR)" && mkdir -p "$(RUNTIME_LOG_DIR)"
 .PHONY: configure
 
 test-runtime: ## A convenient wrapper around test that provides the runtime argument. Target must still be provided.
-	@$(MAKE) test OPTIONS="$(OPTIONS) --test_arg=--runtime=$(RUNTIME)"
-.PHONY: runtime-test
+	@$(call submake,test OPTIONS="$(OPTIONS) --test_output=streamed --test_arg=--runtime=$(RUNTIME)")
+.PHONY: test-runtime
diff --git a/WORKSPACE b/WORKSPACE
index 2ab750a9d..49f231755 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -42,6 +42,28 @@ http_archive(
     ],
 )
 
+http_archive(
+    name = "io_bazel_rules_go_bazel3", # To replace the above.
+    patch_args = ["-p1"],
+    patches = [
+        "//tools/nogo:io_bazel_rules_go-visibility.patch",
+    ],
+    sha256 = "87f0fb9747854cb76a0a82430adccb6269f7d394237104a4523b51061c469171",
+    urls = [
+        "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.23.1/rules_go-v0.23.1.tar.gz",
+        "https://github.com/bazelbuild/rules_go/releases/download/v0.23.1/rules_go-v0.23.1.tar.gz",
+    ],
+)
+
+http_archive(
+    name = "bazel_gazelle_bazel3", # To replace the above.
+    sha256 = "bfd86b3cbe855d6c16c6fce60d76bd51f5c8dbc9cfcaef7a2bb5c1aafd0710e8",
+    urls = [
+        "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.21.0/bazel-gazelle-v0.21.0.tar.gz",
+        "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.21.0/bazel-gazelle-v0.21.0.tar.gz",
+    ],
+)
+
 load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_dependencies")
 
 go_rules_dependencies()
@@ -123,6 +145,16 @@ http_archive(
     ],
 )
 
+http_archive(
+    name = "bazel_toolchains_bazel3", # To replace the above.
+    sha256 = "144290c4166bd67e76a54f96cd504ed86416ca3ca82030282760f0823c10be48",
+    strip_prefix = "bazel-toolchains-3.1.1",
+    urls = [
+        "https://github.com/bazelbuild/bazel-toolchains/releases/download/3.1.1/bazel-toolchains-3.1.1.tar.gz",
+        "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/releases/download/3.1.1/bazel-toolchains-3.1.1.tar.gz",
+    ],
+)
+
 # Creates a default toolchain config for RBE.
 load("@bazel_toolchains//rules:rbe_repo.bzl", "rbe_autoconfig")
 
diff --git a/images/Makefile b/images/Makefile
index 1485607bd..9de359a28 100644
--- a/images/Makefile
+++ b/images/Makefile
@@ -34,8 +34,15 @@ list-all-images:
 	@for image in $(ALL_IMAGES); do echo $${image}; done
 .PHONY: list-build-images
 
+# Handy wrapper to allow load-all-images, push-all-images, etc.
 %-all-images:
 	@$(MAKE) $(patsubst %,$*-%,$(ALL_IMAGES))
+load-all-images:
+	@$(MAKE) $(patsubst %,load-%,$(ALL_IMAGES))
+
+# Handy wrapper to load specified "groups", e.g. load-basic-images, etc.
+load-%-images:
+	@$(MAKE) $(patsubst %,load-%,$(subst /,_,$(subst ./,,$(shell find ./$* -name Dockerfile -exec dirname {} \;))))
 
 # tag is a function that returns the tag name, given an image.
 #
diff --git a/images/hostoverlaytest/Dockerfile b/images/basic/hostoverlaytest/Dockerfile
index d83439e9c..d83439e9c 100644
--- a/images/hostoverlaytest/Dockerfile
+++ b/images/basic/hostoverlaytest/Dockerfile
diff --git a/images/hostoverlaytest/test.c b/images/basic/hostoverlaytest/test.c
index 088f90746..088f90746 100644
--- a/images/hostoverlaytest/test.c
+++ b/images/basic/hostoverlaytest/test.c
diff --git a/images/hostoverlaytest/testfile.txt b/images/basic/hostoverlaytest/testfile.txt
index e4188c841..e4188c841 100644
--- a/images/hostoverlaytest/testfile.txt
+++ b/images/basic/hostoverlaytest/testfile.txt
diff --git a/images/tmpfile/Dockerfile b/images/basic/tmpfile/Dockerfile
index e3816c8cb..e3816c8cb 100644
--- a/images/tmpfile/Dockerfile
+++ b/images/basic/tmpfile/Dockerfile
diff --git a/images/benchmarks/ffmpeg/Dockerfile b/images/benchmarks/ffmpeg/Dockerfile
new file mode 100644
index 000000000..7108df64f
--- /dev/null
+++ b/images/benchmarks/ffmpeg/Dockerfile
@@ -0,0 +1,9 @@
+FROM ubuntu:18.04
+
+RUN set -x \
+        && apt-get update \
+        && apt-get install -y \
+            ffmpeg \
+        && rm -rf /var/lib/apt/lists/*
+WORKDIR /media
+ADD https://samples.ffmpeg.org/MPEG-4/video.mp4 video.mp4
diff --git a/images/benchmarks/redis/Dockerfile b/images/benchmarks/redis/Dockerfile
new file mode 100644
index 000000000..0f17249af
--- /dev/null
+++ b/images/benchmarks/redis/Dockerfile
@@ -0,0 +1 @@
+FROM redis:5.0.4
diff --git a/images/default/Dockerfile b/images/default/Dockerfile
index 397082b02..2b38e6c58 100644
--- a/images/default/Dockerfile
+++ b/images/default/Dockerfile
@@ -1,7 +1,7 @@
 FROM fedora:31
 # Install bazel.
 RUN dnf install -y dnf-plugins-core && dnf copr enable -y vbatts/bazel
-RUN dnf install -y git gcc make golang gcc-c++ glibc-devel python3 which python3-pip python3-devel libffi-devel openssl-devel pkg-config glibc-static libstdc++-static patch
+RUN dnf install -y git gcc make golang gcc-c++ glibc-devel python3 which python3-pip python3-devel libffi-devel openssl-devel pkg-config glibc-static libstdc++-static patch diffutils
 RUN pip install pycparser
 RUN dnf install -y bazel3
 # Install gcloud.
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD
index 2b789c4ec..05ca5342f 100644
--- a/pkg/abi/linux/BUILD
+++ b/pkg/abi/linux/BUILD
@@ -29,6 +29,7 @@ go_library(
         "file_amd64.go",
         "file_arm64.go",
         "fs.go",
+        "fuse.go",
         "futex.go",
         "inotify.go",
         "ioctl.go",
@@ -72,6 +73,9 @@ go_library(
         "//pkg/abi",
         "//pkg/binary",
         "//pkg/bits",
+        "//pkg/usermem",
+        "//tools/go_marshal/marshal",
+        "//tools/go_marshal/primitive",
     ],
 )
 
diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
new file mode 100644
index 000000000..d3ebbccc4
--- /dev/null
+++ b/pkg/abi/linux/fuse.go
@@ -0,0 +1,143 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// +marshal
+type FUSEOpcode uint32
+
+// +marshal
+type FUSEOpID uint64
+
+// Opcodes for FUSE operations. Analogous to the opcodes in include/linux/fuse.h.
+const (
+	FUSE_LOOKUP   FUSEOpcode = 1
+	FUSE_FORGET              = 2 /* no reply */
+	FUSE_GETATTR             = 3
+	FUSE_SETATTR             = 4
+	FUSE_READLINK            = 5
+	FUSE_SYMLINK             = 6
+	_
+	FUSE_MKNOD   = 8
+	FUSE_MKDIR   = 9
+	FUSE_UNLINK  = 10
+	FUSE_RMDIR   = 11
+	FUSE_RENAME  = 12
+	FUSE_LINK    = 13
+	FUSE_OPEN    = 14
+	FUSE_READ    = 15
+	FUSE_WRITE   = 16
+	FUSE_STATFS  = 17
+	FUSE_RELEASE = 18
+	_
+	FUSE_FSYNC        = 20
+	FUSE_SETXATTR     = 21
+	FUSE_GETXATTR     = 22
+	FUSE_LISTXATTR    = 23
+	FUSE_REMOVEXATTR  = 24
+	FUSE_FLUSH        = 25
+	FUSE_INIT         = 26
+	FUSE_OPENDIR      = 27
+	FUSE_READDIR      = 28
+	FUSE_RELEASEDIR   = 29
+	FUSE_FSYNCDIR     = 30
+	FUSE_GETLK        = 31
+	FUSE_SETLK        = 32
+	FUSE_SETLKW       = 33
+	FUSE_ACCESS       = 34
+	FUSE_CREATE       = 35
+	FUSE_INTERRUPT    = 36
+	FUSE_BMAP         = 37
+	FUSE_DESTROY      = 38
+	FUSE_IOCTL        = 39
+	FUSE_POLL         = 40
+	FUSE_NOTIFY_REPLY = 41
+	FUSE_BATCH_FORGET = 42
+)
+
+const (
+	// FUSE_MIN_READ_BUFFER is the minimum size the read can be for any FUSE filesystem.
+	// This is the minimum size Linux supports. See linux.fuse.h.
+	FUSE_MIN_READ_BUFFER uint32 = 8192
+)
+
+// FUSEHeaderIn is the header read by the daemon with each request.
+//
+// +marshal
+type FUSEHeaderIn struct {
+	// Len specifies the total length of the data, including this header.
+	Len uint32
+
+	// Opcode specifies the kind of operation of the request.
+	Opcode FUSEOpcode
+
+	// Unique specifies the unique identifier for this request.
+	Unique FUSEOpID
+
+	// NodeID is the ID of the filesystem object being operated on.
+	NodeID uint64
+
+	// UID is the UID of the requesting process.
+	UID uint32
+
+	// GID is the GID of the requesting process.
+	GID uint32
+
+	// PID is the PID of the requesting process.
+	PID uint32
+
+	_ uint32
+}
+
+// FUSEHeaderOut is the header written by the daemon when it processes
+// a request and wants to send a reply (almost all operations require a
+// reply; if they do not, this will be explicitly documented).
+//
+// +marshal
+type FUSEHeaderOut struct {
+	// Len specifies the total length of the data, including this header.
+	Len uint32
+
+	// Error specifies the error that occurred (0 if none).
+	Error int32
+
+	// Unique specifies the unique identifier of the corresponding request.
+	Unique FUSEOpID
+}
+
+// FUSEWriteIn is the header written by a daemon when it makes a
+// write request to the FUSE filesystem.
+//
+// +marshal
+type FUSEWriteIn struct {
+	// Fh specifies the file handle that is being written to.
+	Fh uint64
+
+	// Offset is the offset of the write.
+	Offset uint64
+
+	// Size is the size of data being written.
+	Size uint32
+
+	// WriteFlags is the flags used during the write.
+	WriteFlags uint32
+
+	// LockOwner is the ID of the lock owner.
+	LockOwner uint64
+
+	// Flags is the flags for the request.
+	Flags uint32
+
+	_ uint32
+}
diff --git a/pkg/abi/linux/futex.go b/pkg/abi/linux/futex.go
index 08bfde3b5..8138088a6 100644
--- a/pkg/abi/linux/futex.go
+++ b/pkg/abi/linux/futex.go
@@ -60,3 +60,21 @@ const (
 	FUTEX_WAITERS    = 0x80000000
 	FUTEX_OWNER_DIED = 0x40000000
 )
+
+// FUTEX_BITSET_MATCH_ANY has all bits set.
+const FUTEX_BITSET_MATCH_ANY = 0xffffffff
+
+// ROBUST_LIST_LIMIT protects against a deliberately circular list.
+const ROBUST_LIST_LIMIT = 2048
+
+// RobustListHead corresponds to Linux's struct robust_list_head.
+//
+// +marshal
+type RobustListHead struct {
+	List          uint64
+	FutexOffset   uint64
+	ListOpPending uint64
+}
+
+// SizeOfRobustListHead is the size of a RobustListHead struct.
+var SizeOfRobustListHead = (*RobustListHead)(nil).SizeBytes()
diff --git a/pkg/abi/linux/netdevice.go b/pkg/abi/linux/netdevice.go
index 7866352b4..0faf015c7 100644
--- a/pkg/abi/linux/netdevice.go
+++ b/pkg/abi/linux/netdevice.go
@@ -22,6 +22,8 @@ const (
 )
 
 // IFReq is an interface request.
+//
+// +marshal
 type IFReq struct {
 	// IFName is an encoded name, normally null-terminated. This should be
 	// accessed via the Name and SetName functions.
@@ -79,6 +81,8 @@ type IFMap struct {
 
 // IFConf is used to return a list of interfaces and their addresses. See
 // netdevice(7) and struct ifconf for more detail on its use.
+//
+// +marshal
 type IFConf struct {
 	Len int32
 	_   [4]byte // Pad to sizeof(struct ifconf).
diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go
index 46d8b0b42..a91f9f018 100644
--- a/pkg/abi/linux/netfilter.go
+++ b/pkg/abi/linux/netfilter.go
@@ -14,6 +14,14 @@
 
 package linux
 
+import (
+	"io"
+
+	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
+)
+
 // This file contains structures required to support netfilter, specifically
 // the iptables tool.
 
@@ -76,6 +84,8 @@ const (
 
 // IPTEntry is an iptable rule. It corresponds to struct ipt_entry in
 // include/uapi/linux/netfilter_ipv4/ip_tables.h.
+//
+// +marshal
 type IPTEntry struct {
 	// IP is used to filter packets based on the IP header.
 	IP IPTIP
@@ -112,21 +122,41 @@ type IPTEntry struct {
 // SizeOfIPTEntry is the size of an IPTEntry.
 const SizeOfIPTEntry = 112
 
-// KernelIPTEntry is identical to IPTEntry, but includes the Elems field. This
-// struct marshaled via the binary package to write an IPTEntry to userspace.
+// KernelIPTEntry is identical to IPTEntry, but includes the Elems field.
+// KernelIPTEntry itself is not Marshallable but it implements some methods of
+// marshal.Marshallable that help in other implementations of Marshallable.
 type KernelIPTEntry struct {
-	IPTEntry
+	Entry IPTEntry
 
 	// Elems holds the data for all this rule's matches followed by the
 	// target. It is variable length -- users have to iterate over any
 	// matches and use TargetOffset and NextOffset to make sense of the
 	// data.
-	Elems []byte
+	Elems primitive.ByteSlice
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (ke *KernelIPTEntry) SizeBytes() int {
+	return ke.Entry.SizeBytes() + ke.Elems.SizeBytes()
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (ke *KernelIPTEntry) MarshalBytes(dst []byte) {
+	ke.Entry.MarshalBytes(dst)
+	ke.Elems.MarshalBytes(dst[ke.Entry.SizeBytes():])
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (ke *KernelIPTEntry) UnmarshalBytes(src []byte) {
+	ke.Entry.UnmarshalBytes(src)
+	ke.Elems.UnmarshalBytes(src[ke.Entry.SizeBytes():])
 }
 
 // IPTIP contains information for matching a packet's IP header.
 // It corresponds to struct ipt_ip in
 // include/uapi/linux/netfilter_ipv4/ip_tables.h.
+//
+// +marshal
 type IPTIP struct {
 	// Src is the source IP address.
 	Src InetAddr
@@ -189,6 +219,8 @@ const SizeOfIPTIP = 84
 
 // XTCounters holds packet and byte counts for a rule. It corresponds to struct
 // xt_counters in include/uapi/linux/netfilter/x_tables.h.
+//
+// +marshal
 type XTCounters struct {
 	// Pcnt is the packet count.
 	Pcnt uint64
@@ -321,6 +353,8 @@ const SizeOfXTRedirectTarget = 56
 
 // IPTGetinfo is the argument for the IPT_SO_GET_INFO sockopt. It corresponds
 // to struct ipt_getinfo in include/uapi/linux/netfilter_ipv4/ip_tables.h.
+//
+// +marshal
 type IPTGetinfo struct {
 	Name       TableName
 	ValidHooks uint32
@@ -336,6 +370,8 @@ const SizeOfIPTGetinfo = 84
 // IPTGetEntries is the argument for the IPT_SO_GET_ENTRIES sockopt. It
 // corresponds to struct ipt_get_entries in
 // include/uapi/linux/netfilter_ipv4/ip_tables.h.
+//
+// +marshal
 type IPTGetEntries struct {
 	Name TableName
 	Size uint32
@@ -350,13 +386,103 @@ type IPTGetEntries struct {
 const SizeOfIPTGetEntries = 40
 
 // KernelIPTGetEntries is identical to IPTGetEntries, but includes the
-// Entrytable field. This struct marshaled via the binary package to write an
-// KernelIPTGetEntries to userspace.
+// Entrytable field. This has been manually made marshal.Marshallable since it
+// is dynamically sized.
 type KernelIPTGetEntries struct {
 	IPTGetEntries
 	Entrytable []KernelIPTEntry
 }
 
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (ke *KernelIPTGetEntries) SizeBytes() int {
+	res := ke.IPTGetEntries.SizeBytes()
+	for _, entry := range ke.Entrytable {
+		res += entry.SizeBytes()
+	}
+	return res
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (ke *KernelIPTGetEntries) MarshalBytes(dst []byte) {
+	ke.IPTGetEntries.MarshalBytes(dst)
+	marshalledUntil := ke.IPTGetEntries.SizeBytes()
+	for i := 0; i < len(ke.Entrytable); i++ {
+		ke.Entrytable[i].MarshalBytes(dst[marshalledUntil:])
+		marshalledUntil += ke.Entrytable[i].SizeBytes()
+	}
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (ke *KernelIPTGetEntries) UnmarshalBytes(src []byte) {
+	ke.IPTGetEntries.UnmarshalBytes(src)
+	unmarshalledUntil := ke.IPTGetEntries.SizeBytes()
+	for i := 0; i < len(ke.Entrytable); i++ {
+		ke.Entrytable[i].UnmarshalBytes(src[unmarshalledUntil:])
+		unmarshalledUntil += ke.Entrytable[i].SizeBytes()
+	}
+}
+
+// Packed implements marshal.Marshallable.Packed.
+func (ke *KernelIPTGetEntries) Packed() bool {
+	// KernelIPTGetEntries isn't packed because the ke.Entrytable contains an
+	// indirection to the actual data we want to marshal (the slice data
+	// pointer), and the memory for KernelIPTGetEntries contains the slice
+	// header which we don't want to marshal.
+	return false
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (ke *KernelIPTGetEntries) MarshalUnsafe(dst []byte) {
+	// Fall back to safe Marshal because the type in not packed.
+	ke.MarshalBytes(dst)
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (ke *KernelIPTGetEntries) UnmarshalUnsafe(src []byte) {
+	// Fall back to safe Unmarshal because the type in not packed.
+	ke.UnmarshalBytes(src)
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+func (ke *KernelIPTGetEntries) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
+	buf := task.CopyScratchBuffer(ke.SizeBytes()) // escapes: okay.
+	length, err := task.CopyInBytes(addr, buf)    // escapes: okay.
+	// Unmarshal unconditionally. If we had a short copy-in, this results in a
+	// partially unmarshalled struct.
+	ke.UnmarshalBytes(buf) // escapes: fallback.
+	return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+func (ke *KernelIPTGetEntries) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
+	// Type KernelIPTGetEntries doesn't have a packed layout in memory, fall
+	// back to MarshalBytes.
+	return task.CopyOutBytes(addr, ke.marshalAll(task))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+func (ke *KernelIPTGetEntries) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) {
+	// Type KernelIPTGetEntries doesn't have a packed layout in memory, fall
+	// back to MarshalBytes.
+	return task.CopyOutBytes(addr, ke.marshalAll(task)[:limit])
+}
+
+func (ke *KernelIPTGetEntries) marshalAll(task marshal.Task) []byte {
+	buf := task.CopyScratchBuffer(ke.SizeBytes())
+	ke.MarshalBytes(buf)
+	return buf
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (ke *KernelIPTGetEntries) WriteTo(w io.Writer) (int64, error) {
+	buf := make([]byte, ke.SizeBytes())
+	ke.MarshalBytes(buf)
+	length, err := w.Write(buf)
+	return int64(length), err
+}
+
+var _ marshal.Marshallable = (*KernelIPTGetEntries)(nil)
+
 // IPTReplace is the argument for the IPT_SO_SET_REPLACE sockopt. It
 // corresponds to struct ipt_replace in
 // include/uapi/linux/netfilter_ipv4/ip_tables.h.
@@ -374,12 +500,6 @@ type IPTReplace struct {
 	// Entries [0]IPTEntry
 }
 
-// KernelIPTReplace is identical to IPTReplace, but includes the Entries field.
-type KernelIPTReplace struct {
-	IPTReplace
-	Entries [0]IPTEntry
-}
-
 // SizeOfIPTReplace is the size of an IPTReplace.
 const SizeOfIPTReplace = 96
 
@@ -392,6 +512,8 @@ func (en ExtensionName) String() string {
 }
 
 // TableName holds the name of a netfilter table.
+//
+// +marshal
 type TableName [XT_TABLE_MAXNAMELEN]byte
 
 // String implements fmt.Stringer.
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go
index 4a14ef691..c24a8216e 100644
--- a/pkg/abi/linux/socket.go
+++ b/pkg/abi/linux/socket.go
@@ -134,6 +134,15 @@ const (
 	SHUT_RDWR = 2
 )
 
+// Packet types from <linux/if_packet.h>
+const (
+	PACKET_HOST      = 0 // To us
+	PACKET_BROADCAST = 1 // To all
+	PACKET_MULTICAST = 2 // To group
+	PACKET_OTHERHOST = 3 // To someone else
+	PACKET_OUTGOING  = 4 // Outgoing of any type
+)
+
 // Socket options from socket.h.
 const (
 	SO_DEBUG                 = 1
@@ -225,6 +234,8 @@ const (
 const SockAddrMax = 128
 
 // InetAddr is struct in_addr, from uapi/linux/in.h.
+//
+// +marshal
 type InetAddr [4]byte
 
 // SockAddrInet is struct sockaddr_in, from uapi/linux/in.h.
@@ -294,6 +305,8 @@ func (s *SockAddrUnix) implementsSockAddr()    {}
 func (s *SockAddrNetlink) implementsSockAddr() {}
 
 // Linger is struct linger, from include/linux/socket.h.
+//
+// +marshal
 type Linger struct {
 	OnOff  int32
 	Linger int32
@@ -308,6 +321,8 @@ const SizeOfLinger = 8
 // the end of this struct or within existing unusued space, so its size grows
 // over time. The current iteration is based on linux v4.17. New versions are
 // always backwards compatible.
+//
+// +marshal
 type TCPInfo struct {
 	State       uint8
 	CaState     uint8
@@ -405,6 +420,8 @@ var SizeOfControlMessageHeader = int(binary.Size(ControlMessageHeader{}))
 // A ControlMessageCredentials is an SCM_CREDENTIALS socket control message.
 //
 // ControlMessageCredentials represents struct ucred from linux/socket.h.
+//
+// +marshal
 type ControlMessageCredentials struct {
 	PID int32
 	UID uint32
diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go
index 57b89ad7d..2cb59f934 100644
--- a/pkg/p9/messages.go
+++ b/pkg/p9/messages.go
@@ -2506,7 +2506,7 @@ type msgFactory struct {
 var msgRegistry registry
 
 type registry struct {
-	factories [math.MaxUint8]msgFactory
+	factories [math.MaxUint8 + 1]msgFactory
 
 	// largestFixedSize is computed so that given some message size M, you can
 	// compute the maximum payload size (e.g. for Twrite, Rread) with
diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go
index daba8b172..fd95eb2d2 100644
--- a/pkg/sentry/arch/arch_aarch64.go
+++ b/pkg/sentry/arch/arch_aarch64.go
@@ -28,7 +28,14 @@ import (
 )
 
 // Registers represents the CPU registers for this architecture.
-type Registers = linux.PtraceRegs
+//
+// +stateify savable
+type Registers struct {
+	linux.PtraceRegs
+
+	// TPIDR_EL0 is the EL0 Read/Write Software Thread ID Register.
+	TPIDR_EL0 uint64
+}
 
 const (
 	// SyscallWidth is the width of insturctions.
@@ -101,9 +108,6 @@ type State struct {
 	// Our floating point state.
 	aarch64FPState `state:"wait"`
 
-	// TLS pointer
-	TPValue uint64
-
 	// FeatureSet is a pointer to the currently active feature set.
 	FeatureSet *cpuid.FeatureSet
 
@@ -157,7 +161,6 @@ func (s *State) Fork() State {
 	return State{
 		Regs:           s.Regs,
 		aarch64FPState: s.aarch64FPState.fork(),
-		TPValue:        s.TPValue,
 		FeatureSet:     s.FeatureSet,
 		OrigR0:         s.OrigR0,
 	}
@@ -241,18 +244,18 @@ func (s *State) ptraceGetRegs() Registers {
 	return s.Regs
 }
 
-var registersSize = (*Registers)(nil).SizeBytes()
+var ptraceRegistersSize = (*linux.PtraceRegs)(nil).SizeBytes()
 
 // PtraceSetRegs implements Context.PtraceSetRegs.
 func (s *State) PtraceSetRegs(src io.Reader) (int, error) {
 	var regs Registers
-	buf := make([]byte, registersSize)
+	buf := make([]byte, ptraceRegistersSize)
 	if _, err := io.ReadFull(src, buf); err != nil {
 		return 0, err
 	}
 	regs.UnmarshalUnsafe(buf)
 	s.Regs = regs
-	return registersSize, nil
+	return ptraceRegistersSize, nil
 }
 
 // PtraceGetFPRegs implements Context.PtraceGetFPRegs.
@@ -278,7 +281,7 @@ const (
 func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, error) {
 	switch regset {
 	case _NT_PRSTATUS:
-		if maxlen < registersSize {
+		if maxlen < ptraceRegistersSize {
 			return 0, syserror.EFAULT
 		}
 		return s.PtraceGetRegs(dst)
@@ -291,7 +294,7 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int,
 func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, error) {
 	switch regset {
 	case _NT_PRSTATUS:
-		if maxlen < registersSize {
+		if maxlen < ptraceRegistersSize {
 			return 0, syserror.EFAULT
 		}
 		return s.PtraceSetRegs(src)
diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go
index 3b3a0a272..1c3e3c14c 100644
--- a/pkg/sentry/arch/arch_amd64.go
+++ b/pkg/sentry/arch/arch_amd64.go
@@ -300,7 +300,7 @@ func (c *context64) PtracePeekUser(addr uintptr) (interface{}, error) {
 	// PTRACE_PEEKUSER and PTRACE_POKEUSER are only effective on regs and
 	// u_debugreg, returning 0 or silently no-oping for other fields
 	// respectively.
-	if addr < uintptr(registersSize) {
+	if addr < uintptr(ptraceRegistersSize) {
 		regs := c.ptraceGetRegs()
 		buf := make([]byte, regs.SizeBytes())
 		regs.MarshalUnsafe(buf)
@@ -315,7 +315,7 @@ func (c *context64) PtracePokeUser(addr, data uintptr) error {
 	if addr&7 != 0 || addr >= userStructSize {
 		return syscall.EIO
 	}
-	if addr < uintptr(registersSize) {
+	if addr < uintptr(ptraceRegistersSize) {
 		regs := c.ptraceGetRegs()
 		buf := make([]byte, regs.SizeBytes())
 		regs.MarshalUnsafe(buf)
diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go
index ada7ac7b8..cabbf60e0 100644
--- a/pkg/sentry/arch/arch_arm64.go
+++ b/pkg/sentry/arch/arch_arm64.go
@@ -142,7 +142,7 @@ func (c *context64) SetStack(value uintptr) {
 
 // TLS returns the current TLS pointer.
 func (c *context64) TLS() uintptr {
-	return uintptr(c.TPValue)
+	return uintptr(c.Regs.TPIDR_EL0)
 }
 
 // SetTLS sets the current TLS pointer. Returns false if value is invalid.
@@ -151,7 +151,7 @@ func (c *context64) SetTLS(value uintptr) bool {
 		return false
 	}
 
-	c.TPValue = uint64(value)
+	c.Regs.TPIDR_EL0 = uint64(value)
 	return true
 }
 
diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go
index dc458b37f..b9405b320 100644
--- a/pkg/sentry/arch/arch_x86.go
+++ b/pkg/sentry/arch/arch_x86.go
@@ -31,7 +31,11 @@ import (
 )
 
 // Registers represents the CPU registers for this architecture.
-type Registers = linux.PtraceRegs
+//
+// +stateify savable
+type Registers struct {
+	linux.PtraceRegs
+}
 
 // System-related constants for x86.
 const (
@@ -311,12 +315,12 @@ func (s *State) ptraceGetRegs() Registers {
 	return regs
 }
 
-var registersSize = (*Registers)(nil).SizeBytes()
+var ptraceRegistersSize = (*linux.PtraceRegs)(nil).SizeBytes()
 
 // PtraceSetRegs implements Context.PtraceSetRegs.
 func (s *State) PtraceSetRegs(src io.Reader) (int, error) {
 	var regs Registers
-	buf := make([]byte, registersSize)
+	buf := make([]byte, ptraceRegistersSize)
 	if _, err := io.ReadFull(src, buf); err != nil {
 		return 0, err
 	}
@@ -374,7 +378,7 @@ func (s *State) PtraceSetRegs(src io.Reader) (int, error) {
 	}
 	regs.Eflags = (s.Regs.Eflags &^ eflagsPtraceMutable) | (regs.Eflags & eflagsPtraceMutable)
 	s.Regs = regs
-	return registersSize, nil
+	return ptraceRegistersSize, nil
 }
 
 // isUserSegmentSelector returns true if the given segment selector specifies a
@@ -543,7 +547,7 @@ const (
 func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, error) {
 	switch regset {
 	case _NT_PRSTATUS:
-		if maxlen < registersSize {
+		if maxlen < ptraceRegistersSize {
 			return 0, syserror.EFAULT
 		}
 		return s.PtraceGetRegs(dst)
@@ -563,7 +567,7 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int,
 func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, error) {
 	switch regset {
 	case _NT_PRSTATUS:
-		if maxlen < registersSize {
+		if maxlen < ptraceRegistersSize {
 			return 0, syserror.EFAULT
 		}
 		return s.PtraceSetRegs(src)
diff --git a/pkg/sentry/fs/file_operations.go b/pkg/sentry/fs/file_operations.go
index beba0f771..f5537411e 100644
--- a/pkg/sentry/fs/file_operations.go
+++ b/pkg/sentry/fs/file_operations.go
@@ -160,6 +160,7 @@ type FileOperations interface {
 	// refer.
 	//
 	// Preconditions: The AddressSpace (if any) that io refers to is activated.
+	// Must only be called from a task goroutine.
 	Ioctl(ctx context.Context, file *File, io usermem.IO, args arch.SyscallArguments) (uintptr, error)
 }
 
diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD
index 789369220..5fb419bcd 100644
--- a/pkg/sentry/fs/fsutil/BUILD
+++ b/pkg/sentry/fs/fsutil/BUILD
@@ -8,7 +8,6 @@ go_template_instance(
     out = "dirty_set_impl.go",
     imports = {
         "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
-        "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
     },
     package = "fsutil",
     prefix = "Dirty",
@@ -25,14 +24,14 @@ go_template_instance(
     name = "frame_ref_set_impl",
     out = "frame_ref_set_impl.go",
     imports = {
-        "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
+        "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
     },
     package = "fsutil",
     prefix = "FrameRef",
     template = "//pkg/segment:generic_set",
     types = {
         "Key": "uint64",
-        "Range": "platform.FileRange",
+        "Range": "memmap.FileRange",
         "Value": "uint64",
         "Functions": "FrameRefSetFunctions",
     },
@@ -43,7 +42,6 @@ go_template_instance(
     out = "file_range_set_impl.go",
     imports = {
         "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
-        "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
     },
     package = "fsutil",
     prefix = "FileRange",
@@ -86,7 +84,6 @@ go_library(
         "//pkg/sentry/kernel/time",
         "//pkg/sentry/memmap",
         "//pkg/sentry/pgalloc",
-        "//pkg/sentry/platform",
         "//pkg/sentry/socket/unix/transport",
         "//pkg/sentry/usage",
         "//pkg/state",
diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go
index c6cd45087..2c9446c1d 100644
--- a/pkg/sentry/fs/fsutil/dirty_set.go
+++ b/pkg/sentry/fs/fsutil/dirty_set.go
@@ -20,7 +20,6 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
@@ -159,7 +158,7 @@ func (ds *DirtySet) AllowClean(mr memmap.MappableRange) {
 // repeatedly until all bytes have been written. max is the true size of the
 // cached object; offsets beyond max will not be passed to writeAt, even if
 // they are marked dirty.
-func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, dirty *DirtySet, max uint64, mem memmap.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
 	var changedDirty bool
 	defer func() {
 		if changedDirty {
@@ -194,7 +193,7 @@ func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet
 // successful partial write, SyncDirtyAll will call it repeatedly until all
 // bytes have been written. max is the true size of the cached object; offsets
 // beyond max will not be passed to writeAt, even if they are marked dirty.
-func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max uint64, mem memmap.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
 	dseg := dirty.FirstSegment()
 	for dseg.Ok() {
 		if err := syncDirtyRange(ctx, dseg.Range(), cache, max, mem, writeAt); err != nil {
@@ -210,7 +209,7 @@ func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max
 }
 
 // Preconditions: mr must be page-aligned.
-func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, max uint64, mem memmap.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
 	for cseg := cache.LowerBoundSegment(mr.Start); cseg.Ok() && cseg.Start() < mr.End; cseg = cseg.NextSegment() {
 		wbr := cseg.Range().Intersect(mr)
 		if max < wbr.Start {
diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go
index 5643cdac9..bbafebf03 100644
--- a/pkg/sentry/fs/fsutil/file_range_set.go
+++ b/pkg/sentry/fs/fsutil/file_range_set.go
@@ -23,13 +23,12 @@ import (
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
 // FileRangeSet maps offsets into a memmap.Mappable to offsets into a
-// platform.File. It is used to implement Mappables that store data in
+// memmap.File. It is used to implement Mappables that store data in
 // sparsely-allocated memory.
 //
 // type FileRangeSet <generated by go_generics>
@@ -65,20 +64,20 @@ func (FileRangeSetFunctions) Split(mr memmap.MappableRange, frstart uint64, spli
 }
 
 // FileRange returns the FileRange mapped by seg.
-func (seg FileRangeIterator) FileRange() platform.FileRange {
+func (seg FileRangeIterator) FileRange() memmap.FileRange {
 	return seg.FileRangeOf(seg.Range())
 }
 
 // FileRangeOf returns the FileRange mapped by mr.
 //
 // Preconditions: seg.Range().IsSupersetOf(mr). mr.Length() != 0.
-func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) platform.FileRange {
+func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) memmap.FileRange {
 	frstart := seg.Value() + (mr.Start - seg.Start())
-	return platform.FileRange{frstart, frstart + mr.Length()}
+	return memmap.FileRange{frstart, frstart + mr.Length()}
 }
 
 // Fill attempts to ensure that all memmap.Mappable offsets in required are
-// mapped to a platform.File offset, by allocating from mf with the given
+// mapped to a memmap.File offset, by allocating from mf with the given
 // memory usage kind and invoking readAt to store data into memory. (If readAt
 // returns a successful partial read, Fill will call it repeatedly until all
 // bytes have been read.) EOF is handled consistently with the requirements of
@@ -141,7 +140,7 @@ func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.Map
 }
 
 // Drop removes segments for memmap.Mappable offsets in mr, freeing the
-// corresponding platform.FileRanges.
+// corresponding memmap.FileRanges.
 //
 // Preconditions: mr must be page-aligned.
 func (frs *FileRangeSet) Drop(mr memmap.MappableRange, mf *pgalloc.MemoryFile) {
@@ -154,7 +153,7 @@ func (frs *FileRangeSet) Drop(mr memmap.MappableRange, mf *pgalloc.MemoryFile) {
 }
 
 // DropAll removes all segments in mr, freeing the corresponding
-// platform.FileRanges.
+// memmap.FileRanges.
 func (frs *FileRangeSet) DropAll(mf *pgalloc.MemoryFile) {
 	for seg := frs.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
 		mf.DecRef(seg.FileRange())
diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go
index dd6f5aba6..a808894df 100644
--- a/pkg/sentry/fs/fsutil/frame_ref_set.go
+++ b/pkg/sentry/fs/fsutil/frame_ref_set.go
@@ -17,7 +17,7 @@ package fsutil
 import (
 	"math"
 
-	"gvisor.dev/gvisor/pkg/sentry/platform"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 )
 
@@ -39,7 +39,7 @@ func (FrameRefSetFunctions) ClearValue(val *uint64) {
 }
 
 // Merge implements segment.Functions.Merge.
-func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) {
+func (FrameRefSetFunctions) Merge(_ memmap.FileRange, val1 uint64, _ memmap.FileRange, val2 uint64) (uint64, bool) {
 	if val1 != val2 {
 		return 0, false
 	}
@@ -47,13 +47,13 @@ func (FrameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.
 }
 
 // Split implements segment.Functions.Split.
-func (FrameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) {
+func (FrameRefSetFunctions) Split(_ memmap.FileRange, val uint64, _ uint64) (uint64, uint64) {
 	return val, val
 }
 
 // IncRefAndAccount adds a reference on the range fr. All newly inserted segments
 // are accounted as host page cache memory mappings.
-func (refs *FrameRefSet) IncRefAndAccount(fr platform.FileRange) {
+func (refs *FrameRefSet) IncRefAndAccount(fr memmap.FileRange) {
 	seg, gap := refs.Find(fr.Start)
 	for {
 		switch {
@@ -74,7 +74,7 @@ func (refs *FrameRefSet) IncRefAndAccount(fr platform.FileRange) {
 
 // DecRefAndAccount removes a reference on the range fr and untracks segments
 // that are removed from memory accounting.
-func (refs *FrameRefSet) DecRefAndAccount(fr platform.FileRange) {
+func (refs *FrameRefSet) DecRefAndAccount(fr memmap.FileRange) {
 	seg := refs.FindSegment(fr.Start)
 
 	for seg.Ok() && seg.Start() < fr.End {
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go
index e82afd112..ef0113b52 100644
--- a/pkg/sentry/fs/fsutil/host_file_mapper.go
+++ b/pkg/sentry/fs/fsutil/host_file_mapper.go
@@ -21,7 +21,6 @@ import (
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
@@ -126,7 +125,7 @@ func (f *HostFileMapper) DecRefOn(mr memmap.MappableRange) {
 // offsets in fr or until the next call to UnmapAll.
 //
 // Preconditions: The caller must hold a reference on all offsets in fr.
-func (f *HostFileMapper) MapInternal(fr platform.FileRange, fd int, write bool) (safemem.BlockSeq, error) {
+func (f *HostFileMapper) MapInternal(fr memmap.FileRange, fd int, write bool) (safemem.BlockSeq, error) {
 	chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift)
 	f.mapsMu.Lock()
 	defer f.mapsMu.Unlock()
@@ -146,7 +145,7 @@ func (f *HostFileMapper) MapInternal(fr platform.FileRange, fd int, write bool)
 }
 
 // Preconditions: f.mapsMu must be locked.
-func (f *HostFileMapper) forEachMappingBlockLocked(fr platform.FileRange, fd int, write bool, fn func(safemem.Block)) error {
+func (f *HostFileMapper) forEachMappingBlockLocked(fr memmap.FileRange, fd int, write bool, fn func(safemem.Block)) error {
 	prot := syscall.PROT_READ
 	if write {
 		prot |= syscall.PROT_WRITE
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
index 78fec553e..c15d8a946 100644
--- a/pkg/sentry/fs/fsutil/host_mappable.go
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -21,18 +21,17 @@ import (
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-// HostMappable implements memmap.Mappable and platform.File over a
+// HostMappable implements memmap.Mappable and memmap.File over a
 // CachedFileObject.
 //
 // Lock order (compare the lock order model in mm/mm.go):
 //   truncateMu ("fs locks")
 //     mu ("memmap.Mappable locks not taken by Translate")
-//       ("platform.File locks")
+//       ("memmap.File locks")
 //   	     backingFile ("CachedFileObject locks")
 //
 // +stateify savable
@@ -124,24 +123,24 @@ func (h *HostMappable) NotifyChangeFD() error {
 	return nil
 }
 
-// MapInternal implements platform.File.MapInternal.
-func (h *HostMappable) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+// MapInternal implements memmap.File.MapInternal.
+func (h *HostMappable) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
 	return h.hostFileMapper.MapInternal(fr, h.backingFile.FD(), at.Write)
 }
 
-// FD implements platform.File.FD.
+// FD implements memmap.File.FD.
 func (h *HostMappable) FD() int {
 	return h.backingFile.FD()
 }
 
-// IncRef implements platform.File.IncRef.
-func (h *HostMappable) IncRef(fr platform.FileRange) {
+// IncRef implements memmap.File.IncRef.
+func (h *HostMappable) IncRef(fr memmap.FileRange) {
 	mr := memmap.MappableRange{Start: fr.Start, End: fr.End}
 	h.hostFileMapper.IncRefOn(mr)
 }
 
-// DecRef implements platform.File.DecRef.
-func (h *HostMappable) DecRef(fr platform.FileRange) {
+// DecRef implements memmap.File.DecRef.
+func (h *HostMappable) DecRef(fr memmap.FileRange) {
 	mr := memmap.MappableRange{Start: fr.Start, End: fr.End}
 	h.hostFileMapper.DecRefOn(mr)
 }
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
index 800c8b4e1..fe8b0b6ac 100644
--- a/pkg/sentry/fs/fsutil/inode_cached.go
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -26,7 +26,6 @@ import (
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -934,7 +933,7 @@ func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange
 
 // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
 func (c *CachingInodeOperations) InvalidateUnsavable(ctx context.Context) error {
-	// Whether we have a host fd (and consequently what platform.File is
+	// Whether we have a host fd (and consequently what memmap.File is
 	// mapped) can change across save/restore, so invalidate all translations
 	// unconditionally.
 	c.mapsMu.Lock()
@@ -999,10 +998,10 @@ func (c *CachingInodeOperations) Evict(ctx context.Context, er pgalloc.Evictable
 	}
 }
 
-// IncRef implements platform.File.IncRef. This is used when we directly map an
-// underlying host fd and CachingInodeOperations is used as the platform.File
+// IncRef implements memmap.File.IncRef. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the memmap.File
 // during translation.
-func (c *CachingInodeOperations) IncRef(fr platform.FileRange) {
+func (c *CachingInodeOperations) IncRef(fr memmap.FileRange) {
 	// Hot path. Avoid defers.
 	c.dataMu.Lock()
 	seg, gap := c.refs.Find(fr.Start)
@@ -1024,10 +1023,10 @@ func (c *CachingInodeOperations) IncRef(fr platform.FileRange) {
 	}
 }
 
-// DecRef implements platform.File.DecRef. This is used when we directly map an
-// underlying host fd and CachingInodeOperations is used as the platform.File
+// DecRef implements memmap.File.DecRef. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the memmap.File
 // during translation.
-func (c *CachingInodeOperations) DecRef(fr platform.FileRange) {
+func (c *CachingInodeOperations) DecRef(fr memmap.FileRange) {
 	// Hot path. Avoid defers.
 	c.dataMu.Lock()
 	seg := c.refs.FindSegment(fr.Start)
@@ -1046,15 +1045,15 @@ func (c *CachingInodeOperations) DecRef(fr platform.FileRange) {
 	c.dataMu.Unlock()
 }
 
-// MapInternal implements platform.File.MapInternal. This is used when we
+// MapInternal implements memmap.File.MapInternal. This is used when we
 // directly map an underlying host fd and CachingInodeOperations is used as the
-// platform.File during translation.
-func (c *CachingInodeOperations) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+// memmap.File during translation.
+func (c *CachingInodeOperations) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
 	return c.hostFileMapper.MapInternal(fr, c.backingFile.FD(), at.Write)
 }
 
-// FD implements platform.File.FD. This is used when we directly map an
-// underlying host fd and CachingInodeOperations is used as the platform.File
+// FD implements memmap.File.FD. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the memmap.File
 // during translation.
 func (c *CachingInodeOperations) FD() int {
 	return c.backingFile.FD()
diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go
index 2018b978a..a91cae3ef 100644
--- a/pkg/sentry/fsimpl/devpts/slave.go
+++ b/pkg/sentry/fsimpl/devpts/slave.go
@@ -132,7 +132,7 @@ func (sfd *slaveFileDescription) Write(ctx context.Context, src usermem.IOSequen
 	return sfd.inode.t.ld.outputQueueWrite(ctx, src)
 }
 
-// Ioctl implements vfs.FileDescripionImpl.Ioctl.
+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
 func (sfd *slaveFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
 	switch cmd := args[1].Uint(); cmd {
 	case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 3e00c2abb..67649e811 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -1,20 +1,63 @@
-load("//tools:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 licenses(["notice"])
 
+go_template_instance(
+    name = "request_list",
+    out = "request_list.go",
+    package = "fuse",
+    prefix = "request",
+    template = "//pkg/ilist:generic_list",
+    types = {
+        "Element": "*Request",
+        "Linker": "*Request",
+    },
+)
+
 go_library(
     name = "fuse",
     srcs = [
+        "connection.go",
         "dev.go",
+        "fusefs.go",
+        "register.go",
+        "request_list.go",
     ],
     visibility = ["//pkg/sentry:internal"],
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/log",
         "//pkg/sentry/fsimpl/devtmpfs",
+        "//pkg/sentry/fsimpl/kernfs",
+        "//pkg/sentry/kernel",
+        "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/vfs",
+        "//pkg/sync",
+        "//pkg/syserror",
+        "//pkg/usermem",
+        "//pkg/waiter",
+        "//tools/go_marshal/marshal",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
+
+go_test(
+    name = "dev_test",
+    size = "small",
+    srcs = ["dev_test.go"],
+    library = ":fuse",
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/sentry/fsimpl/testutil",
+        "//pkg/sentry/fsimpl/tmpfs",
         "//pkg/sentry/kernel",
+        "//pkg/sentry/kernel/auth",
         "//pkg/sentry/vfs",
         "//pkg/syserror",
         "//pkg/usermem",
+        "//pkg/waiter",
+        "//tools/go_marshal/marshal",
     ],
 )
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
new file mode 100644
index 000000000..f330da0bd
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -0,0 +1,255 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"errors"
+	"fmt"
+	"syscall"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+)
+
+// MaxActiveRequestsDefault is the default setting controlling the upper bound
+// on the number of active requests at any given time.
+const MaxActiveRequestsDefault = 10000
+
+var (
+	// Ordinary requests have even IDs, while interrupts IDs are odd.
+	InitReqBit uint64 = 1
+	ReqIDStep  uint64 = 2
+)
+
+// Request represents a FUSE operation request that hasn't been sent to the
+// server yet.
+//
+// +stateify savable
+type Request struct {
+	requestEntry
+
+	id   linux.FUSEOpID
+	hdr  *linux.FUSEHeaderIn
+	data []byte
+}
+
+// Response represents an actual response from the server, including the
+// response payload.
+//
+// +stateify savable
+type Response struct {
+	opcode linux.FUSEOpcode
+	hdr    linux.FUSEHeaderOut
+	data   []byte
+}
+
+// Connection is the struct by which the sentry communicates with the FUSE server daemon.
+type Connection struct {
+	fd *DeviceFD
+
+	// MaxWrite is the daemon's maximum size of a write buffer.
+	// This is negotiated during FUSE_INIT.
+	MaxWrite uint32
+}
+
+// NewFUSEConnection creates a FUSE connection to fd
+func NewFUSEConnection(_ context.Context, fd *vfs.FileDescription, maxInFlightRequests uint64) (*Connection, error) {
+	// Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
+	// mount a FUSE filesystem.
+	fuseFD := fd.Impl().(*DeviceFD)
+	fuseFD.mounted = true
+
+	// Create the writeBuf for the header to be stored in.
+	hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+	fuseFD.writeBuf = make([]byte, hdrLen)
+	fuseFD.completions = make(map[linux.FUSEOpID]*futureResponse)
+	fuseFD.fullQueueCh = make(chan struct{}, maxInFlightRequests)
+	fuseFD.writeCursor = 0
+
+	return &Connection{
+		fd: fuseFD,
+	}, nil
+}
+
+// NewRequest creates a new request that can be sent to the FUSE server.
+func (conn *Connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint64, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*Request, error) {
+	conn.fd.mu.Lock()
+	defer conn.fd.mu.Unlock()
+	conn.fd.nextOpID += linux.FUSEOpID(ReqIDStep)
+
+	hdrLen := (*linux.FUSEHeaderIn)(nil).SizeBytes()
+	hdr := linux.FUSEHeaderIn{
+		Len:    uint32(hdrLen + payload.SizeBytes()),
+		Opcode: opcode,
+		Unique: conn.fd.nextOpID,
+		NodeID: ino,
+		UID:    uint32(creds.EffectiveKUID),
+		GID:    uint32(creds.EffectiveKGID),
+		PID:    pid,
+	}
+
+	buf := make([]byte, hdr.Len)
+	hdr.MarshalUnsafe(buf[:hdrLen])
+	payload.MarshalUnsafe(buf[hdrLen:])
+
+	return &Request{
+		id:   hdr.Unique,
+		hdr:  &hdr,
+		data: buf,
+	}, nil
+}
+
+// Call makes a request to the server and blocks the invoking task until a
+// server responds with a response.
+// NOTE: If no task is provided then the Call will simply enqueue the request
+// and return a nil response. No blocking will happen in this case. Instead,
+// this is used to signify that the processing of this request will happen by
+// the kernel.Task that writes the response. See FUSE_INIT for such an
+// invocation.
+func (conn *Connection) Call(t *kernel.Task, r *Request) (*Response, error) {
+	fut, err := conn.callFuture(t, r)
+	if err != nil {
+		return nil, err
+	}
+
+	return fut.resolve(t)
+}
+
+// Error returns the error of the FUSE call.
+func (r *Response) Error() error {
+	errno := r.hdr.Error
+	if errno >= 0 {
+		return nil
+	}
+
+	sysErrNo := syscall.Errno(-errno)
+	return error(sysErrNo)
+}
+
+// UnmarshalPayload unmarshals the response data into m.
+func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
+	hdrLen := r.hdr.SizeBytes()
+	haveDataLen := r.hdr.Len - uint32(hdrLen)
+	wantDataLen := uint32(m.SizeBytes())
+
+	if haveDataLen < wantDataLen {
+		return fmt.Errorf("payload too small. Minimum data lenth required: %d,  but got data length %d", wantDataLen, haveDataLen)
+	}
+
+	m.UnmarshalUnsafe(r.data[hdrLen:])
+	return nil
+}
+
+// callFuture makes a request to the server and returns a future response.
+// Call resolve() when the response needs to be fulfilled.
+func (conn *Connection) callFuture(t *kernel.Task, r *Request) (*futureResponse, error) {
+	conn.fd.mu.Lock()
+	defer conn.fd.mu.Unlock()
+
+	// Is the queue full?
+	//
+	// We must busy wait here until the request can be queued. We don't
+	// block on the fd.fullQueueCh with a lock - so after being signalled,
+	// before we acquire the lock, it is possible that a barging task enters
+	// and queues a request. As a result, upon acquiring the lock we must
+	// again check if the room is available.
+	//
+	// This can potentially starve a request forever but this can only happen
+	// if there are always too many ongoing requests all the time. The
+	// supported maxActiveRequests setting should be really high to avoid this.
+	for conn.fd.numActiveRequests == conn.fd.fs.opts.maxActiveRequests {
+		if t == nil {
+			// Since there is no task that is waiting. We must error out.
+			return nil, errors.New("FUSE request queue full")
+		}
+
+		log.Infof("Blocking request %v from being queued. Too many active requests: %v",
+			r.id, conn.fd.numActiveRequests)
+		conn.fd.mu.Unlock()
+		err := t.Block(conn.fd.fullQueueCh)
+		conn.fd.mu.Lock()
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return conn.callFutureLocked(t, r)
+}
+
+// callFutureLocked makes a request to the server and returns a future response.
+func (conn *Connection) callFutureLocked(t *kernel.Task, r *Request) (*futureResponse, error) {
+	conn.fd.queue.PushBack(r)
+	conn.fd.numActiveRequests += 1
+	fut := newFutureResponse(r.hdr.Opcode)
+	conn.fd.completions[r.id] = fut
+
+	// Signal the readers that there is something to read.
+	conn.fd.waitQueue.Notify(waiter.EventIn)
+
+	return fut, nil
+}
+
+// futureResponse represents an in-flight request, that may or may not have
+// completed yet. Convert it to a resolved Response by calling Resolve, but note
+// that this may block.
+//
+// +stateify savable
+type futureResponse struct {
+	opcode linux.FUSEOpcode
+	ch     chan struct{}
+	hdr    *linux.FUSEHeaderOut
+	data   []byte
+}
+
+// newFutureResponse creates a future response to a FUSE request.
+func newFutureResponse(opcode linux.FUSEOpcode) *futureResponse {
+	return &futureResponse{
+		opcode: opcode,
+		ch:     make(chan struct{}),
+	}
+}
+
+// resolve blocks the task until the server responds to its corresponding request,
+// then returns a resolved response.
+func (f *futureResponse) resolve(t *kernel.Task) (*Response, error) {
+	// If there is no Task associated with this request  - then we don't try to resolve
+	// the response.  Instead, the task writing the response (proxy to the server) will
+	// process the response on our behalf.
+	if t == nil {
+		log.Infof("fuse.Response.resolve: Not waiting on a response from server.")
+		return nil, nil
+	}
+
+	if err := t.Block(f.ch); err != nil {
+		return nil, err
+	}
+
+	return f.getResponse(), nil
+}
+
+// getResponse creates a Response from the data the futureResponse has.
+func (f *futureResponse) getResponse() *Response {
+	return &Response{
+		opcode: f.opcode,
+		hdr:    *f.hdr,
+		data:   f.data,
+	}
+}
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index dc33268af..f3443ac71 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -15,13 +15,17 @@
 package fuse
 
 import (
+	"syscall"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/pkg/waiter"
 )
 
 const fuseDevMinor = 229
@@ -51,9 +55,46 @@ type DeviceFD struct {
 	vfs.DentryMetadataFileDescriptionImpl
 	vfs.NoLockFD
 
-	// TODO(gvisor.dev/issue/2987): Add all the data structures needed to enqueue
-	// and deque requests, control synchronization and establish communication
-	// between the FUSE kernel module and the /dev/fuse character device.
+	// mounted specifies whether a FUSE filesystem was mounted using the DeviceFD.
+	mounted bool
+
+	// nextOpID is used to create new requests.
+	nextOpID linux.FUSEOpID
+
+	// queue is the list of requests that need to be processed by the FUSE server.
+	queue requestList
+
+	// numActiveRequests is the number of requests made by the Sentry that has
+	// yet to be responded to.
+	numActiveRequests uint64
+
+	// completions is used to map a request to its response. A Writer will use this
+	// to notify the caller of a completed response.
+	completions map[linux.FUSEOpID]*futureResponse
+
+	writeCursor uint32
+
+	// writeBuf is the memory buffer used to copy in the FUSE out header from
+	// userspace.
+	writeBuf []byte
+
+	// writeCursorFR current FR being copied from server.
+	writeCursorFR *futureResponse
+
+	// mu protects all the queues, maps, buffers and cursors and nextOpID.
+	mu sync.Mutex
+
+	// waitQueue is used to notify interested parties when the device becomes
+	// readable or writable.
+	waitQueue waiter.Queue
+
+	// fullQueueCh is a channel used to synchronize the readers with the writers.
+	// Writers (inbound requests to the filesystem) block if there are too many
+	// unprocessed in-flight requests.
+	fullQueueCh chan struct{}
+
+	// fs is the FUSE filesystem that this FD is being used for.
+	fs *filesystem
 }
 
 // Release implements vfs.FileDescriptionImpl.Release.
@@ -61,45 +102,293 @@ func (fd *DeviceFD) Release() {}
 
 // PRead implements vfs.FileDescriptionImpl.PRead.
 func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+	if !fd.mounted {
+		return 0, syserror.EPERM
+	}
+
 	return 0, syserror.ENOSYS
 }
 
 // Read implements vfs.FileDescriptionImpl.Read.
 func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
-	return 0, syserror.ENOSYS
+	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+	if !fd.mounted {
+		return 0, syserror.EPERM
+	}
+
+	// We require that any Read done on this filesystem have a sane minimum
+	// read buffer. It must have the capacity for the fixed parts of any request
+	// header (Linux uses the request header and the FUSEWriteIn header for this
+	// calculation) + the negotiated MaxWrite room for the data.
+	minBuffSize := linux.FUSE_MIN_READ_BUFFER
+	inHdrLen := uint32((*linux.FUSEHeaderIn)(nil).SizeBytes())
+	writeHdrLen := uint32((*linux.FUSEWriteIn)(nil).SizeBytes())
+	negotiatedMinBuffSize := inHdrLen + writeHdrLen + fd.fs.conn.MaxWrite
+	if minBuffSize < negotiatedMinBuffSize {
+		minBuffSize = negotiatedMinBuffSize
+	}
+
+	// If the read buffer is too small, error out.
+	if dst.NumBytes() < int64(minBuffSize) {
+		return 0, syserror.EINVAL
+	}
+
+	fd.mu.Lock()
+	defer fd.mu.Unlock()
+	return fd.readLocked(ctx, dst, opts)
+}
+
+// readLocked implements the reading of the fuse device while locked with DeviceFD.mu.
+func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+	if fd.queue.Empty() {
+		return 0, syserror.ErrWouldBlock
+	}
+
+	var readCursor uint32
+	var bytesRead int64
+	for {
+		req := fd.queue.Front()
+		if dst.NumBytes() < int64(req.hdr.Len) {
+			// The request is too large. Cannot process it. All requests must be smaller than the
+			// negotiated size as specified by Connection.MaxWrite set as part of the FUSE_INIT
+			// handshake.
+			errno := -int32(syscall.EIO)
+			if req.hdr.Opcode == linux.FUSE_SETXATTR {
+				errno = -int32(syscall.E2BIG)
+			}
+
+			// Return the error to the calling task.
+			if err := fd.sendError(ctx, errno, req); err != nil {
+				return 0, err
+			}
+
+			// We're done with this request.
+			fd.queue.Remove(req)
+
+			// Restart the read as this request was invalid.
+			log.Warningf("fuse.DeviceFD.Read: request found was too large. Restarting read.")
+			return fd.readLocked(ctx, dst, opts)
+		}
+
+		n, err := dst.CopyOut(ctx, req.data[readCursor:])
+		if err != nil {
+			return 0, err
+		}
+		readCursor += uint32(n)
+		bytesRead += int64(n)
+
+		if readCursor >= req.hdr.Len {
+			// Fully done with this req, remove it from the queue.
+			fd.queue.Remove(req)
+			break
+		}
+	}
+
+	return bytesRead, nil
 }
 
 // PWrite implements vfs.FileDescriptionImpl.PWrite.
 func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+	if !fd.mounted {
+		return 0, syserror.EPERM
+	}
+
 	return 0, syserror.ENOSYS
 }
 
 // Write implements vfs.FileDescriptionImpl.Write.
 func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
-	return 0, syserror.ENOSYS
+	fd.mu.Lock()
+	defer fd.mu.Unlock()
+	return fd.writeLocked(ctx, src, opts)
+}
+
+// writeLocked implements writing to the fuse device while locked with DeviceFD.mu.
+func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+	if !fd.mounted {
+		return 0, syserror.EPERM
+	}
+
+	var cn, n int64
+	hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+
+	for src.NumBytes() > 0 {
+		if fd.writeCursorFR != nil {
+			// Already have common header, and we're now copying the payload.
+			wantBytes := fd.writeCursorFR.hdr.Len
+
+			// Note that the FR data doesn't have the header. Copy it over if its necessary.
+			if fd.writeCursorFR.data == nil {
+				fd.writeCursorFR.data = make([]byte, wantBytes)
+			}
+
+			bytesCopied, err := src.CopyIn(ctx, fd.writeCursorFR.data[fd.writeCursor:wantBytes])
+			if err != nil {
+				return 0, err
+			}
+			src = src.DropFirst(bytesCopied)
+
+			cn = int64(bytesCopied)
+			n += cn
+			fd.writeCursor += uint32(cn)
+			if fd.writeCursor == wantBytes {
+				// Done reading this full response. Clean up and unblock the
+				// initiator.
+				break
+			}
+
+			// Check if we have more data in src.
+			continue
+		}
+
+		// Assert that the header isn't read into the writeBuf yet.
+		if fd.writeCursor >= hdrLen {
+			return 0, syserror.EINVAL
+		}
+
+		// We don't have the full common response header yet.
+		wantBytes := hdrLen - fd.writeCursor
+		bytesCopied, err := src.CopyIn(ctx, fd.writeBuf[fd.writeCursor:wantBytes])
+		if err != nil {
+			return 0, err
+		}
+		src = src.DropFirst(bytesCopied)
+
+		cn = int64(bytesCopied)
+		n += cn
+		fd.writeCursor += uint32(cn)
+		if fd.writeCursor == hdrLen {
+			// Have full header in the writeBuf. Use it to fetch the actual futureResponse
+			// from the device's completions map.
+			var hdr linux.FUSEHeaderOut
+			hdr.UnmarshalBytes(fd.writeBuf)
+
+			// We have the header now and so the writeBuf has served its purpose.
+			// We could reset it manually here but instead of doing that, at the
+			// end of the write, the writeCursor will be set to 0 thereby allowing
+			// the next request to overwrite whats in the buffer,
+
+			fut, ok := fd.completions[hdr.Unique]
+			if !ok {
+				// Server sent us a response for a request we never sent?
+				return 0, syserror.EINVAL
+			}
+
+			delete(fd.completions, hdr.Unique)
+
+			// Copy over the header into the future response. The rest of the payload
+			// will be copied over to the FR's data in the next iteration.
+			fut.hdr = &hdr
+			fd.writeCursorFR = fut
+
+			// Next iteration will now try read the complete request, if src has
+			// any data remaining. Otherwise we're done.
+		}
+	}
+
+	if fd.writeCursorFR != nil {
+		if err := fd.sendResponse(ctx, fd.writeCursorFR); err != nil {
+			return 0, err
+		}
+
+		// Ready the device for the next request.
+		fd.writeCursorFR = nil
+		fd.writeCursor = 0
+	}
+
+	return n, nil
+}
+
+// Readiness implements vfs.FileDescriptionImpl.Readiness.
+func (fd *DeviceFD) Readiness(mask waiter.EventMask) waiter.EventMask {
+	var ready waiter.EventMask
+	ready |= waiter.EventOut // FD is always writable
+	if !fd.queue.Empty() {
+		// Have reqs available, FD is readable.
+		ready |= waiter.EventIn
+	}
+
+	return ready & mask
+}
+
+// EventRegister implements waiter.Waitable.EventRegister.
+func (fd *DeviceFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+	fd.waitQueue.EventRegister(e, mask)
+}
+
+// EventUnregister implements waiter.Waitable.EventUnregister.
+func (fd *DeviceFD) EventUnregister(e *waiter.Entry) {
+	fd.waitQueue.EventUnregister(e)
 }
 
 // Seek implements vfs.FileDescriptionImpl.Seek.
 func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+	// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+	if !fd.mounted {
+		return 0, syserror.EPERM
+	}
+
 	return 0, syserror.ENOSYS
 }
 
-// Register registers the FUSE device with vfsObj.
-func Register(vfsObj *vfs.VirtualFilesystem) error {
-	if err := vfsObj.RegisterDevice(vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, fuseDevice{}, &vfs.RegisterDeviceOptions{
-		GroupName: "misc",
-	}); err != nil {
+// sendResponse sends a response to the waiting task (if any).
+func (fd *DeviceFD) sendResponse(ctx context.Context, fut *futureResponse) error {
+	// See if the running task need to perform some action before returning.
+	// Since we just finished writing the future, we can be sure that
+	// getResponse generates a populated response.
+	if err := fd.noReceiverAction(ctx, fut.getResponse()); err != nil {
 		return err
 	}
 
+	// Signal that the queue is no longer full.
+	select {
+	case fd.fullQueueCh <- struct{}{}:
+	default:
+	}
+	fd.numActiveRequests -= 1
+
+	// Signal the task waiting on a response.
+	close(fut.ch)
 	return nil
 }
 
-// CreateDevtmpfsFile creates a device special file in devtmpfs.
-func CreateDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor) error {
-	if err := dev.CreateDeviceFile(ctx, "fuse", vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, 0666 /* mode */); err != nil {
+// sendError sends an error response to the waiting task (if any).
+func (fd *DeviceFD) sendError(ctx context.Context, errno int32, req *Request) error {
+	// Return the error to the calling task.
+	outHdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+	respHdr := linux.FUSEHeaderOut{
+		Len:    outHdrLen,
+		Error:  errno,
+		Unique: req.hdr.Unique,
+	}
+
+	fut, ok := fd.completions[respHdr.Unique]
+	if !ok {
+		// Server sent us a response for a request we never sent?
+		return syserror.EINVAL
+	}
+	delete(fd.completions, respHdr.Unique)
+
+	fut.hdr = &respHdr
+	if err := fd.sendResponse(ctx, fut); err != nil {
 		return err
 	}
 
 	return nil
 }
+
+// noReceiverAction has the calling kernel.Task do some action if its known that no
+// receiver is going to be waiting on the future channel. This is to be used by:
+// FUSE_INIT.
+func (fd *DeviceFD) noReceiverAction(ctx context.Context, r *Response) error {
+	if r.opcode == linux.FUSE_INIT {
+		// TODO: process init response here.
+		// Maybe get the creds from the context?
+		// creds := auth.CredentialsFromContext(ctx)
+	}
+
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go
new file mode 100644
index 000000000..fcd77832a
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/dev_test.go
@@ -0,0 +1,429 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"fmt"
+	"io"
+	"math/rand"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+)
+
+// echoTestOpcode is the Opcode used during testing. The server used in tests
+// will simply echo the payload back with the appropriate headers.
+const echoTestOpcode linux.FUSEOpcode = 1000
+
+type testPayload struct {
+	data uint32
+}
+
+// TestFUSECommunication tests that the communication layer between the Sentry and the
+// FUSE server daemon works as expected.
+func TestFUSECommunication(t *testing.T) {
+	s := setup(t)
+	defer s.Destroy()
+
+	k := kernel.KernelFromContext(s.Ctx)
+	creds := auth.CredentialsFromContext(s.Ctx)
+
+	// Create test cases with different number of concurrent clients and servers.
+	testCases := []struct {
+		Name              string
+		NumClients        int
+		NumServers        int
+		MaxActiveRequests uint64
+	}{
+		{
+			Name:              "SingleClientSingleServer",
+			NumClients:        1,
+			NumServers:        1,
+			MaxActiveRequests: MaxActiveRequestsDefault,
+		},
+		{
+			Name:              "SingleClientMultipleServers",
+			NumClients:        1,
+			NumServers:        10,
+			MaxActiveRequests: MaxActiveRequestsDefault,
+		},
+		{
+			Name:              "MultipleClientsSingleServer",
+			NumClients:        10,
+			NumServers:        1,
+			MaxActiveRequests: MaxActiveRequestsDefault,
+		},
+		{
+			Name:              "MultipleClientsMultipleServers",
+			NumClients:        10,
+			NumServers:        10,
+			MaxActiveRequests: MaxActiveRequestsDefault,
+		},
+		{
+			Name:              "RequestCapacityFull",
+			NumClients:        10,
+			NumServers:        1,
+			MaxActiveRequests: 1,
+		},
+		{
+			Name:              "RequestCapacityContinuouslyFull",
+			NumClients:        100,
+			NumServers:        2,
+			MaxActiveRequests: 2,
+		},
+	}
+
+	for _, testCase := range testCases {
+		t.Run(testCase.Name, func(t *testing.T) {
+			conn, fd, err := newTestConnection(s, k, testCase.MaxActiveRequests)
+			if err != nil {
+				t.Fatalf("newTestConnection: %v", err)
+			}
+
+			clientsDone := make([]chan struct{}, testCase.NumClients)
+			serversDone := make([]chan struct{}, testCase.NumServers)
+			serversKill := make([]chan struct{}, testCase.NumServers)
+
+			// FUSE clients.
+			for i := 0; i < testCase.NumClients; i++ {
+				clientsDone[i] = make(chan struct{})
+				go func(i int) {
+					fuseClientRun(t, s, k, conn, creds, uint32(i), uint64(i), clientsDone[i])
+				}(i)
+			}
+
+			// FUSE servers.
+			for j := 0; j < testCase.NumServers; j++ {
+				serversDone[j] = make(chan struct{})
+				serversKill[j] = make(chan struct{}, 1) // The kill command shouldn't block.
+				go func(j int) {
+					fuseServerRun(t, s, k, fd, serversDone[j], serversKill[j])
+				}(j)
+			}
+
+			// Tear down.
+			//
+			// Make sure all the clients are done.
+			for i := 0; i < testCase.NumClients; i++ {
+				<-clientsDone[i]
+			}
+
+			// Kill any server that is potentially waiting.
+			for j := 0; j < testCase.NumServers; j++ {
+				serversKill[j] <- struct{}{}
+			}
+
+			// Make sure all the servers are done.
+			for j := 0; j < testCase.NumServers; j++ {
+				<-serversDone[j]
+			}
+		})
+	}
+}
+
+// CallTest makes a request to the server and blocks the invoking
+// goroutine until a server responds with a response. Doesn't block
+// a kernel.Task. Analogous to Connection.Call but used for testing.
+func CallTest(conn *Connection, t *kernel.Task, r *Request, i uint32) (*Response, error) {
+	conn.fd.mu.Lock()
+
+	// Wait until we're certain that a new request can be processed.
+	for conn.fd.numActiveRequests == conn.fd.fs.opts.maxActiveRequests {
+		conn.fd.mu.Unlock()
+		select {
+		case <-conn.fd.fullQueueCh:
+		}
+		conn.fd.mu.Lock()
+	}
+
+	fut, err := conn.callFutureLocked(t, r) // No task given.
+	conn.fd.mu.Unlock()
+
+	if err != nil {
+		return nil, err
+	}
+
+	// Resolve the response.
+	//
+	// Block without a task.
+	select {
+	case <-fut.ch:
+	}
+
+	// A response is ready. Resolve and return it.
+	return fut.getResponse(), nil
+}
+
+// ReadTest is analogous to vfs.FileDescription.Read and reads from the FUSE
+// device. However, it does so by - not blocking the task that is calling - and
+// instead just waits on a channel. The behaviour is essentially the same as
+// DeviceFD.Read except it guarantees that the task is not blocked.
+func ReadTest(serverTask *kernel.Task, fd *vfs.FileDescription, inIOseq usermem.IOSequence, killServer chan struct{}) (int64, bool, error) {
+	var err error
+	var n, total int64
+
+	dev := fd.Impl().(*DeviceFD)
+
+	// Register for notifications.
+	w, ch := waiter.NewChannelEntry(nil)
+	dev.EventRegister(&w, waiter.EventIn)
+	for {
+		// Issue the request and break out if it completes with anything other than
+		// "would block".
+		n, err = dev.Read(serverTask, inIOseq, vfs.ReadOptions{})
+		total += n
+		if err != syserror.ErrWouldBlock {
+			break
+		}
+
+		// Wait for a notification that we should retry.
+		// Emulate the blocking for when no requests are available
+		select {
+		case <-ch:
+		case <-killServer:
+			// Server killed by the main program.
+			return 0, true, nil
+		}
+	}
+
+	dev.EventUnregister(&w)
+	return total, false, err
+}
+
+// fuseClientRun emulates all the actions of a normal FUSE request. It creates
+// a header, a payload, calls the server, waits for the response, and processes
+// the response.
+func fuseClientRun(t *testing.T, s *testutil.System, k *kernel.Kernel, conn *Connection, creds *auth.Credentials, pid uint32, inode uint64, clientDone chan struct{}) {
+	defer func() { clientDone <- struct{}{} }()
+
+	tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
+	clientTask, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("fuse-client-%v", pid), tc, s.MntNs, s.Root, s.Root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	testObj := &testPayload{
+		data: rand.Uint32(),
+	}
+
+	req, err := conn.NewRequest(creds, pid, inode, echoTestOpcode, testObj)
+	if err != nil {
+		t.Fatalf("NewRequest creation failed: %v", err)
+	}
+
+	// Queue up a request.
+	// Analogous to Call except it doesn't block on the task.
+	resp, err := CallTest(conn, clientTask, req, pid)
+	if err != nil {
+		t.Fatalf("CallTaskNonBlock failed: %v", err)
+	}
+
+	if err = resp.Error(); err != nil {
+		t.Fatalf("Server responded with an error: %v", err)
+	}
+
+	var respTestPayload testPayload
+	if err := resp.UnmarshalPayload(&respTestPayload); err != nil {
+		t.Fatalf("Unmarshalling payload error: %v", err)
+	}
+
+	if resp.hdr.Unique != req.hdr.Unique {
+		t.Fatalf("got response for another request. Expected response for req %v but got response for req %v",
+			req.hdr.Unique, resp.hdr.Unique)
+	}
+
+	if respTestPayload.data != testObj.data {
+		t.Fatalf("read incorrect data. Data expected: %v, but got %v", testObj.data, respTestPayload.data)
+	}
+
+}
+
+// fuseServerRun creates a task and emulates all the actions of a simple FUSE server
+// that simply reads a request and echos the same struct back as a response using the
+// appropriate headers.
+func fuseServerRun(t *testing.T, s *testutil.System, k *kernel.Kernel, fd *vfs.FileDescription, serverDone, killServer chan struct{}) {
+	defer func() { serverDone <- struct{}{} }()
+
+	// Create the tasks that the server will be using.
+	tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
+	var readPayload testPayload
+
+	serverTask, err := testutil.CreateTask(s.Ctx, "fuse-server", tc, s.MntNs, s.Root, s.Root)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Read the request.
+	for {
+		inHdrLen := uint32((*linux.FUSEHeaderIn)(nil).SizeBytes())
+		payloadLen := uint32(readPayload.SizeBytes())
+
+		// The raed buffer must meet some certain size criteria.
+		buffSize := inHdrLen + payloadLen
+		if buffSize < linux.FUSE_MIN_READ_BUFFER {
+			buffSize = linux.FUSE_MIN_READ_BUFFER
+		}
+		inBuf := make([]byte, buffSize)
+		inIOseq := usermem.BytesIOSequence(inBuf)
+
+		n, serverKilled, err := ReadTest(serverTask, fd, inIOseq, killServer)
+		if err != nil {
+			t.Fatalf("Read failed :%v", err)
+		}
+
+		// Server should shut down. No new requests are going to be made.
+		if serverKilled {
+			break
+		}
+
+		if n <= 0 {
+			t.Fatalf("Read read no bytes")
+		}
+
+		var readFUSEHeaderIn linux.FUSEHeaderIn
+		readFUSEHeaderIn.UnmarshalUnsafe(inBuf[:inHdrLen])
+		readPayload.UnmarshalUnsafe(inBuf[inHdrLen : inHdrLen+payloadLen])
+
+		if readFUSEHeaderIn.Opcode != echoTestOpcode {
+			t.Fatalf("read incorrect data. Header: %v, Payload: %v", readFUSEHeaderIn, readPayload)
+		}
+
+		// Write the response.
+		outHdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+		outBuf := make([]byte, outHdrLen+payloadLen)
+		outHeader := linux.FUSEHeaderOut{
+			Len:    outHdrLen + payloadLen,
+			Error:  0,
+			Unique: readFUSEHeaderIn.Unique,
+		}
+
+		// Echo the payload back.
+		outHeader.MarshalUnsafe(outBuf[:outHdrLen])
+		readPayload.MarshalUnsafe(outBuf[outHdrLen:])
+		outIOseq := usermem.BytesIOSequence(outBuf)
+
+		n, err = fd.Write(s.Ctx, outIOseq, vfs.WriteOptions{})
+		if err != nil {
+			t.Fatalf("Write failed :%v", err)
+		}
+	}
+}
+
+func setup(t *testing.T) *testutil.System {
+	k, err := testutil.Boot()
+	if err != nil {
+		t.Fatalf("Error creating kernel: %v", err)
+	}
+
+	ctx := k.SupervisorContext()
+	creds := auth.CredentialsFromContext(ctx)
+
+	k.VFS().MustRegisterFilesystemType(Name, &FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserList:  true,
+		AllowUserMount: true,
+	})
+
+	mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", tmpfs.Name, &vfs.GetFilesystemOptions{})
+	if err != nil {
+		t.Fatalf("NewMountNamespace(): %v", err)
+	}
+
+	return testutil.NewSystem(ctx, t, k.VFS(), mntns)
+}
+
+// newTestConnection creates a fuse connection that the sentry can communicate with
+// and the FD for the server to communicate with.
+func newTestConnection(system *testutil.System, k *kernel.Kernel, maxActiveRequests uint64) (*Connection, *vfs.FileDescription, error) {
+	vfsObj := &vfs.VirtualFilesystem{}
+	fuseDev := &DeviceFD{}
+
+	if err := vfsObj.Init(); err != nil {
+		return nil, nil, err
+	}
+
+	vd := vfsObj.NewAnonVirtualDentry("genCountFD")
+	defer vd.DecRef()
+	if err := fuseDev.vfsfd.Init(fuseDev, linux.O_RDWR|linux.O_CREAT, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{}); err != nil {
+		return nil, nil, err
+	}
+
+	fsopts := filesystemOptions{
+		maxActiveRequests: maxActiveRequests,
+	}
+	fs, err := NewFUSEFilesystem(system.Ctx, 0, &fsopts, &fuseDev.vfsfd)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	return fs.conn, &fuseDev.vfsfd, nil
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (t *testPayload) SizeBytes() int {
+	return 4
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (t *testPayload) MarshalBytes(dst []byte) {
+	usermem.ByteOrder.PutUint32(dst[:4], t.data)
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (t *testPayload) UnmarshalBytes(src []byte) {
+	*t = testPayload{data: usermem.ByteOrder.Uint32(src[:4])}
+}
+
+// Packed implements marshal.Marshallable.Packed.
+func (t *testPayload) Packed() bool {
+	return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (t *testPayload) MarshalUnsafe(dst []byte) {
+	t.MarshalBytes(dst)
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (t *testPayload) UnmarshalUnsafe(src []byte) {
+	t.UnmarshalBytes(src)
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+func (t *testPayload) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) {
+	panic("not implemented")
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+func (t *testPayload) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
+	panic("not implemented")
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+func (t *testPayload) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
+	panic("not implemented")
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (t *testPayload) WriteTo(w io.Writer) (int64, error) {
+	panic("not implemented")
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
new file mode 100644
index 000000000..911b6f7cb
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -0,0 +1,224 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package fuse implements fusefs.
+package fuse
+
+import (
+	"strconv"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Name is the default filesystem name.
+const Name = "fuse"
+
+// FilesystemType implements vfs.FilesystemType.
+type FilesystemType struct{}
+
+type filesystemOptions struct {
+	// userID specifies the numeric uid of the mount owner.
+	// This option should not be specified by the filesystem owner.
+	// It is set by libfuse (or, if libfuse is not used, must be set
+	// by the filesystem itself). For more information, see man page
+	// for fuse(8)
+	userID uint32
+
+	// groupID specifies the numeric gid of the mount owner.
+	// This option should not be specified by the filesystem owner.
+	// It is set by libfuse (or, if libfuse is not used, must be set
+	// by the filesystem itself). For more information, see man page
+	// for fuse(8)
+	groupID uint32
+
+	// rootMode specifies the the file mode of the filesystem's root.
+	rootMode linux.FileMode
+
+	// maxActiveRequests specifies the maximum number of active requests that can
+	// exist at any time. Any further requests will block when trying to
+	// Call the server.
+	maxActiveRequests uint64
+}
+
+// filesystem implements vfs.FilesystemImpl.
+type filesystem struct {
+	kernfs.Filesystem
+	devMinor uint32
+
+	// conn is used for communication between the FUSE server
+	// daemon and the sentry fusefs.
+	conn *Connection
+
+	// opts is the options the fusefs is initialized with.
+	opts *filesystemOptions
+}
+
+// Name implements vfs.FilesystemType.Name.
+func (FilesystemType) Name() string {
+	return Name
+}
+
+// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
+func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+	devMinor, err := vfsObj.GetAnonBlockDevMinor()
+	if err != nil {
+		return nil, nil, err
+	}
+
+	var fsopts filesystemOptions
+	mopts := vfs.GenericParseMountOptions(opts.Data)
+	deviceDescriptorStr, ok := mopts["fd"]
+	if !ok {
+		log.Warningf("%s.GetFilesystem: communication file descriptor N (obtained by opening /dev/fuse) must be specified as 'fd=N'", fsType.Name())
+		return nil, nil, syserror.EINVAL
+	}
+	delete(mopts, "fd")
+
+	deviceDescriptor, err := strconv.ParseInt(deviceDescriptorStr, 10 /* base */, 32 /* bitSize */)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	kernelTask := kernel.TaskFromContext(ctx)
+	if kernelTask == nil {
+		log.Warningf("%s.GetFilesystem: couldn't get kernel task from context", fsType.Name())
+		return nil, nil, syserror.EINVAL
+	}
+	fuseFd := kernelTask.GetFileVFS2(int32(deviceDescriptor))
+
+	// Parse and set all the other supported FUSE mount options.
+	// TODO(gVisor.dev/issue/3229): Expand the supported mount options.
+	if userIDStr, ok := mopts["user_id"]; ok {
+		delete(mopts, "user_id")
+		userID, err := strconv.ParseUint(userIDStr, 10, 32)
+		if err != nil {
+			log.Warningf("%s.GetFilesystem: invalid user_id: user_id=%s", fsType.Name(), userIDStr)
+			return nil, nil, syserror.EINVAL
+		}
+		fsopts.userID = uint32(userID)
+	}
+
+	if groupIDStr, ok := mopts["group_id"]; ok {
+		delete(mopts, "group_id")
+		groupID, err := strconv.ParseUint(groupIDStr, 10, 32)
+		if err != nil {
+			log.Warningf("%s.GetFilesystem: invalid group_id: group_id=%s", fsType.Name(), groupIDStr)
+			return nil, nil, syserror.EINVAL
+		}
+		fsopts.groupID = uint32(groupID)
+	}
+
+	rootMode := linux.FileMode(0777)
+	modeStr, ok := mopts["rootmode"]
+	if ok {
+		delete(mopts, "rootmode")
+		mode, err := strconv.ParseUint(modeStr, 8, 32)
+		if err != nil {
+			log.Warningf("%s.GetFilesystem: invalid mode: %q", fsType.Name(), modeStr)
+			return nil, nil, syserror.EINVAL
+		}
+		rootMode = linux.FileMode(mode)
+	}
+	fsopts.rootMode = rootMode
+
+	// Set the maxInFlightRequests option.
+	fsopts.maxActiveRequests = MaxActiveRequestsDefault
+
+	// Check for unparsed options.
+	if len(mopts) != 0 {
+		log.Warningf("%s.GetFilesystem: unknown options: %v", fsType.Name(), mopts)
+		return nil, nil, syserror.EINVAL
+	}
+
+	// Create a new FUSE filesystem.
+	fs, err := NewFUSEFilesystem(ctx, devMinor, &fsopts, fuseFd)
+	if err != nil {
+		log.Warningf("%s.NewFUSEFilesystem: failed with error: %v", fsType.Name(), err)
+		return nil, nil, err
+	}
+
+	fs.VFSFilesystem().Init(vfsObj, &fsType, fs)
+
+	// TODO: dispatch a FUSE_INIT request to the FUSE daemon server before
+	//  returning. Mount will not block on this dispatched request.
+
+	// root is the fusefs root directory.
+	root := fs.newInode(creds, fsopts.rootMode)
+
+	return fs.VFSFilesystem(), root.VFSDentry(), nil
+}
+
+// NewFUSEFilesystem creates a new FUSE filesystem.
+func NewFUSEFilesystem(ctx context.Context, devMinor uint32, opts *filesystemOptions, device *vfs.FileDescription) (*filesystem, error) {
+	fs := &filesystem{
+		devMinor: devMinor,
+		opts:     opts,
+	}
+
+	conn, err := NewFUSEConnection(ctx, device, opts.maxActiveRequests)
+	if err != nil {
+		log.Warningf("fuse.NewFUSEFilesystem: NewFUSEConnection failed with error: %v", err)
+		return nil, syserror.EINVAL
+	}
+
+	fs.conn = conn
+	fuseFD := device.Impl().(*DeviceFD)
+	fuseFD.fs = fs
+
+	return fs, nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {
+	fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+	fs.Filesystem.Release()
+}
+
+// Inode implements kernfs.Inode.
+type Inode struct {
+	kernfs.InodeAttrs
+	kernfs.InodeNoDynamicLookup
+	kernfs.InodeNotSymlink
+	kernfs.InodeDirectoryNoNewChildren
+	kernfs.OrderedChildren
+
+	locks vfs.FileLocks
+
+	dentry kernfs.Dentry
+}
+
+func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry {
+	i := &Inode{}
+	i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
+	i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+	i.dentry.Init(i)
+
+	return &i.dentry
+}
+
+// Open implements kernfs.Inode.Open.
+func (i *Inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+	fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts)
+	if err != nil {
+		return nil, err
+	}
+	return fd.VFSFileDescription(), nil
+}
diff --git a/pkg/sentry/fsimpl/fuse/register.go b/pkg/sentry/fsimpl/fuse/register.go
new file mode 100644
index 000000000..b5b581152
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/register.go
@@ -0,0 +1,42 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// Register registers the FUSE device with vfsObj.
+func Register(vfsObj *vfs.VirtualFilesystem) error {
+	if err := vfsObj.RegisterDevice(vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, fuseDevice{}, &vfs.RegisterDeviceOptions{
+		GroupName: "misc",
+	}); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// CreateDevtmpfsFile creates a device special file in devtmpfs.
+func CreateDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor) error {
+	if err := dev.CreateDeviceFile(ctx, "fuse", vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, 0666 /* mode */); err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index cd5f5049e..00e3c99cd 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -150,11 +150,9 @@ afterSymlink:
 			return nil, err
 		}
 		if d != d.parent && !d.cachedMetadataAuthoritative() {
-			_, attrMask, attr, err := d.parent.file.getAttr(ctx, dentryAttrMask())
-			if err != nil {
+			if err := d.parent.updateFromGetattr(ctx); err != nil {
 				return nil, err
 			}
-			d.parent.updateFromP9Attrs(attrMask, &attr)
 		}
 		rp.Advance()
 		return d.parent, nil
@@ -209,17 +207,28 @@ func (fs *filesystem) getChildLocked(ctx context.Context, vfsObj *vfs.VirtualFil
 
 // Preconditions: As for getChildLocked. !parent.isSynthetic().
 func (fs *filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *dentry, name string, child *dentry, ds **[]*dentry) (*dentry, error) {
+	if child != nil {
+		// Need to lock child.metadataMu because we might be updating child
+		// metadata. We need to hold the lock *before* getting metadata from the
+		// server and release it after updating local metadata.
+		child.metadataMu.Lock()
+	}
 	qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name)
 	if err != nil && err != syserror.ENOENT {
+		if child != nil {
+			child.metadataMu.Unlock()
+		}
 		return nil, err
 	}
 	if child != nil {
 		if !file.isNil() && inoFromPath(qid.Path) == child.ino {
 			// The file at this path hasn't changed. Just update cached metadata.
 			file.close(ctx)
-			child.updateFromP9Attrs(attrMask, &attr)
+			child.updateFromP9AttrsLocked(attrMask, &attr)
+			child.metadataMu.Unlock()
 			return child, nil
 		}
+		child.metadataMu.Unlock()
 		if file.isNil() && child.isSynthetic() {
 			// We have a synthetic file, and no remote file has arisen to
 			// replace it.
@@ -1325,7 +1334,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
 		fs.renameMuRUnlockAndCheckCaching(&ds)
 		return err
 	}
-	if err := d.setStat(ctx, rp.Credentials(), &opts.Stat, rp.Mount()); err != nil {
+	if err := d.setStat(ctx, rp.Credentials(), &opts, rp.Mount()); err != nil {
 		fs.renameMuRUnlockAndCheckCaching(&ds)
 		return err
 	}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index b74d489a0..e20de84b5 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -785,8 +785,8 @@ func (d *dentry) cachedMetadataAuthoritative() bool {
 
 // updateFromP9Attrs is called to update d's metadata after an update from the
 // remote filesystem.
-func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) {
-	d.metadataMu.Lock()
+// Precondition: d.metadataMu must be locked.
+func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) {
 	if mask.Mode {
 		if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want {
 			d.metadataMu.Unlock()
@@ -822,7 +822,6 @@ func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) {
 	if mask.Size {
 		d.updateFileSizeLocked(attr.Size)
 	}
-	d.metadataMu.Unlock()
 }
 
 // Preconditions: !d.isSynthetic()
@@ -834,6 +833,10 @@ func (d *dentry) updateFromGetattr(ctx context.Context) error {
 		file            p9file
 		handleMuRLocked bool
 	)
+	// d.metadataMu must be locked *before* we getAttr so that we do not end up
+	// updating stale attributes in d.updateFromP9AttrsLocked().
+	d.metadataMu.Lock()
+	defer d.metadataMu.Unlock()
 	d.handleMu.RLock()
 	if !d.handle.file.isNil() {
 		file = d.handle.file
@@ -849,7 +852,7 @@ func (d *dentry) updateFromGetattr(ctx context.Context) error {
 	if err != nil {
 		return err
 	}
-	d.updateFromP9Attrs(attrMask, &attr)
+	d.updateFromP9AttrsLocked(attrMask, &attr)
 	return nil
 }
 
@@ -885,7 +888,8 @@ func (d *dentry) statTo(stat *linux.Statx) {
 	stat.DevMinor = d.fs.devMinor
 }
 
-func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mnt *vfs.Mount) error {
+func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs.SetStatOptions, mnt *vfs.Mount) error {
+	stat := &opts.Stat
 	if stat.Mask == 0 {
 		return nil
 	}
@@ -893,7 +897,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
 		return syserror.EPERM
 	}
 	mode := linux.FileMode(atomic.LoadUint32(&d.mode))
-	if err := vfs.CheckSetStat(ctx, creds, stat, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
+	if err := vfs.CheckSetStat(ctx, creds, opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
 		return err
 	}
 	if err := mnt.CheckBeginWrite(); err != nil {
@@ -934,6 +938,17 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
 	}
 	if !d.isSynthetic() {
 		if stat.Mask != 0 {
+			if stat.Mask&linux.STATX_SIZE != 0 {
+				// Check whether to allow a truncate request to be made.
+				switch d.mode & linux.S_IFMT {
+				case linux.S_IFREG:
+					// Allow.
+				case linux.S_IFDIR:
+					return syserror.EISDIR
+				default:
+					return syserror.EINVAL
+				}
+			}
 			if err := d.file.setAttr(ctx, p9.SetAttrMask{
 				Permissions:        stat.Mask&linux.STATX_MODE != 0,
 				UID:                stat.Mask&linux.STATX_UID != 0,
@@ -1495,7 +1510,7 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
 
 // SetStat implements vfs.FileDescriptionImpl.SetStat.
 func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
-	if err := fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, fd.vfsfd.Mount()); err != nil {
+	if err := fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts, fd.vfsfd.Mount()); err != nil {
 		return err
 	}
 	if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index f10350c97..09f142cfc 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -29,7 +29,6 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
@@ -155,26 +154,53 @@ func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
 
 // PWrite implements vfs.FileDescriptionImpl.PWrite.
 func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	n, _, err := fd.pwrite(ctx, src, offset, opts)
+	return n, err
+}
+
+// pwrite returns the number of bytes written, final offset, error. The final
+// offset should be ignored by PWrite.
+func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
 	if offset < 0 {
-		return 0, syserror.EINVAL
+		return 0, offset, syserror.EINVAL
 	}
 
 	// Check that flags are supported.
 	//
 	// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
 	if opts.Flags&^linux.RWF_HIPRI != 0 {
-		return 0, syserror.EOPNOTSUPP
+		return 0, offset, syserror.EOPNOTSUPP
+	}
+
+	d := fd.dentry()
+	// If the fd was opened with O_APPEND, make sure the file size is updated.
+	// There is a possible race here if size is modified externally after
+	// metadata cache is updated.
+	if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() {
+		if err := d.updateFromGetattr(ctx); err != nil {
+			return 0, offset, err
+		}
 	}
 
+	d.metadataMu.Lock()
+	defer d.metadataMu.Unlock()
+	// Set offset to file size if the fd was opened with O_APPEND.
+	if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
+		// Holding d.metadataMu is sufficient for reading d.size.
+		offset = int64(d.size)
+	}
 	limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
 	if err != nil {
-		return 0, err
+		return 0, offset, err
 	}
 	src = src.TakeFirst64(limit)
+	n, err := fd.pwriteLocked(ctx, src, offset, opts)
+	return n, offset + n, err
+}
 
+// Preconditions: fd.dentry().metatdataMu must be locked.
+func (fd *regularFileFD) pwriteLocked(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
 	d := fd.dentry()
-	d.metadataMu.Lock()
-	defer d.metadataMu.Unlock()
 	if d.fs.opts.interop != InteropModeShared {
 		// Compare Linux's mm/filemap.c:__generic_file_write_iter() =>
 		// file_update_time(). This is d.touchCMtime(), but without locking
@@ -194,12 +220,12 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
 			return 0, syserror.EINVAL
 		}
 		mr := memmap.MappableRange{pgstart, pgend}
-		var freed []platform.FileRange
+		var freed []memmap.FileRange
 		d.dataMu.Lock()
 		cseg := d.cache.LowerBoundSegment(mr.Start)
 		for cseg.Ok() && cseg.Start() < mr.End {
 			cseg = d.cache.Isolate(cseg, mr)
-			freed = append(freed, platform.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()})
+			freed = append(freed, memmap.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()})
 			cseg = d.cache.Remove(cseg).NextSegment()
 		}
 		d.dataMu.Unlock()
@@ -237,8 +263,8 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
 // Write implements vfs.FileDescriptionImpl.Write.
 func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
 	fd.mu.Lock()
-	n, err := fd.PWrite(ctx, src, fd.off, opts)
-	fd.off += n
+	n, off, err := fd.pwrite(ctx, src, fd.off, opts)
+	fd.off = off
 	fd.mu.Unlock()
 	return n, err
 }
@@ -794,7 +820,7 @@ func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange
 
 // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
 func (d *dentry) InvalidateUnsavable(ctx context.Context) error {
-	// Whether we have a host fd (and consequently what platform.File is
+	// Whether we have a host fd (and consequently what memmap.File is
 	// mapped) can change across save/restore, so invalidate all translations
 	// unconditionally.
 	d.mapsMu.Lock()
@@ -842,8 +868,8 @@ func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) {
 	}
 }
 
-// dentryPlatformFile implements platform.File. It exists solely because dentry
-// cannot implement both vfs.DentryImpl.IncRef and platform.File.IncRef.
+// dentryPlatformFile implements memmap.File. It exists solely because dentry
+// cannot implement both vfs.DentryImpl.IncRef and memmap.File.IncRef.
 //
 // dentryPlatformFile is only used when a host FD representing the remote file
 // is available (i.e. dentry.handle.fd >= 0), and that FD is used for
@@ -851,7 +877,7 @@ func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) {
 type dentryPlatformFile struct {
 	*dentry
 
-	// fdRefs counts references on platform.File offsets. fdRefs is protected
+	// fdRefs counts references on memmap.File offsets. fdRefs is protected
 	// by dentry.dataMu.
 	fdRefs fsutil.FrameRefSet
 
@@ -863,29 +889,29 @@ type dentryPlatformFile struct {
 	hostFileMapperInitOnce sync.Once
 }
 
-// IncRef implements platform.File.IncRef.
-func (d *dentryPlatformFile) IncRef(fr platform.FileRange) {
+// IncRef implements memmap.File.IncRef.
+func (d *dentryPlatformFile) IncRef(fr memmap.FileRange) {
 	d.dataMu.Lock()
 	d.fdRefs.IncRefAndAccount(fr)
 	d.dataMu.Unlock()
 }
 
-// DecRef implements platform.File.DecRef.
-func (d *dentryPlatformFile) DecRef(fr platform.FileRange) {
+// DecRef implements memmap.File.DecRef.
+func (d *dentryPlatformFile) DecRef(fr memmap.FileRange) {
 	d.dataMu.Lock()
 	d.fdRefs.DecRefAndAccount(fr)
 	d.dataMu.Unlock()
 }
 
-// MapInternal implements platform.File.MapInternal.
-func (d *dentryPlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+// MapInternal implements memmap.File.MapInternal.
+func (d *dentryPlatformFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
 	d.handleMu.RLock()
 	bs, err := d.hostFileMapper.MapInternal(fr, int(d.handle.fd), at.Write)
 	d.handleMu.RUnlock()
 	return bs, err
 }
 
-// FD implements platform.File.FD.
+// FD implements memmap.File.FD.
 func (d *dentryPlatformFile) FD() int {
 	d.handleMu.RLock()
 	fd := d.handle.fd
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index a7b53b2d2..811528982 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -16,6 +16,7 @@ package gofer
 
 import (
 	"sync"
+	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -144,7 +145,7 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
 	// mmap due to lock ordering; MM locks precede dentry.dataMu. That doesn't
 	// hold here since specialFileFD doesn't client-cache data. Just buffer the
 	// read instead.
-	if d := fd.dentry(); d.fs.opts.interop != InteropModeShared {
+	if d := fd.dentry(); d.cachedMetadataAuthoritative() {
 		d.touchAtime(fd.vfsfd.Mount())
 	}
 	buf := make([]byte, dst.NumBytes())
@@ -176,39 +177,76 @@ func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
 
 // PWrite implements vfs.FileDescriptionImpl.PWrite.
 func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	n, _, err := fd.pwrite(ctx, src, offset, opts)
+	return n, err
+}
+
+// pwrite returns the number of bytes written, final offset, error. The final
+// offset should be ignored by PWrite.
+func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
 	if fd.seekable && offset < 0 {
-		return 0, syserror.EINVAL
+		return 0, offset, syserror.EINVAL
 	}
 
 	// Check that flags are supported.
 	//
 	// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
 	if opts.Flags&^linux.RWF_HIPRI != 0 {
-		return 0, syserror.EOPNOTSUPP
+		return 0, offset, syserror.EOPNOTSUPP
+	}
+
+	d := fd.dentry()
+	// If the regular file fd was opened with O_APPEND, make sure the file size
+	// is updated. There is a possible race here if size is modified externally
+	// after metadata cache is updated.
+	if fd.seekable && fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() {
+		if err := d.updateFromGetattr(ctx); err != nil {
+			return 0, offset, err
+		}
 	}
 
 	if fd.seekable {
+		// We need to hold the metadataMu *while* writing to a regular file.
+		d.metadataMu.Lock()
+		defer d.metadataMu.Unlock()
+
+		// Set offset to file size if the regular file was opened with O_APPEND.
+		if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
+			// Holding d.metadataMu is sufficient for reading d.size.
+			offset = int64(d.size)
+		}
 		limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
 		if err != nil {
-			return 0, err
+			return 0, offset, err
 		}
 		src = src.TakeFirst64(limit)
 	}
 
 	// Do a buffered write. See rationale in PRead.
-	if d := fd.dentry(); d.fs.opts.interop != InteropModeShared {
+	if d.cachedMetadataAuthoritative() {
 		d.touchCMtime()
 	}
 	buf := make([]byte, src.NumBytes())
 	// Don't do partial writes if we get a partial read from src.
 	if _, err := src.CopyIn(ctx, buf); err != nil {
-		return 0, err
+		return 0, offset, err
 	}
 	n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
 	if err == syserror.EAGAIN {
 		err = syserror.ErrWouldBlock
 	}
-	return int64(n), err
+	finalOff = offset
+	// Update file size for regular files.
+	if fd.seekable {
+		finalOff += int64(n)
+		// d.metadataMu is already locked at this point.
+		if uint64(finalOff) > d.size {
+			d.dataMu.Lock()
+			defer d.dataMu.Unlock()
+			atomic.StoreUint64(&d.size, uint64(finalOff))
+		}
+	}
+	return int64(n), finalOff, err
 }
 
 // Write implements vfs.FileDescriptionImpl.Write.
@@ -218,8 +256,8 @@ func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts
 	}
 
 	fd.mu.Lock()
-	n, err := fd.PWrite(ctx, src, fd.off, opts)
-	fd.off += n
+	n, off, err := fd.pwrite(ctx, src, fd.off, opts)
+	fd.off = off
 	fd.mu.Unlock()
 	return n, err
 }
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index e86fbe2d5..bd701bbc7 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -34,7 +34,6 @@ go_library(
         "//pkg/sentry/kernel",
         "//pkg/sentry/kernel/auth",
         "//pkg/sentry/memmap",
-        "//pkg/sentry/platform",
         "//pkg/sentry/socket/control",
         "//pkg/sentry/socket/unix",
         "//pkg/sentry/socket/unix/transport",
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 1a88cb657..c894f2ca0 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -373,7 +373,7 @@ func (i *inode) fstat(fs *filesystem) (linux.Statx, error) {
 
 // SetStat implements kernfs.Inode.
 func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
-	s := opts.Stat
+	s := &opts.Stat
 
 	m := s.Mask
 	if m == 0 {
@@ -386,7 +386,7 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
 	if err := syscall.Fstat(i.hostFD, &hostStat); err != nil {
 		return err
 	}
-	if err := vfs.CheckSetStat(ctx, creds, &s, linux.FileMode(hostStat.Mode&linux.PermissionsMask), auth.KUID(hostStat.Uid), auth.KGID(hostStat.Gid)); err != nil {
+	if err := vfs.CheckSetStat(ctx, creds, &opts, linux.FileMode(hostStat.Mode), auth.KUID(hostStat.Uid), auth.KGID(hostStat.Gid)); err != nil {
 		return err
 	}
 
@@ -396,6 +396,9 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
 		}
 	}
 	if m&linux.STATX_SIZE != 0 {
+		if hostStat.Mode&linux.S_IFMT != linux.S_IFREG {
+			return syserror.EINVAL
+		}
 		if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil {
 			return err
 		}
diff --git a/pkg/sentry/fsimpl/host/mmap.go b/pkg/sentry/fsimpl/host/mmap.go
index 8545a82f0..65d3af38c 100644
--- a/pkg/sentry/fsimpl/host/mmap.go
+++ b/pkg/sentry/fsimpl/host/mmap.go
@@ -19,13 +19,12 @@ import (
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-// inodePlatformFile implements platform.File. It exists solely because inode
-// cannot implement both kernfs.Inode.IncRef and platform.File.IncRef.
+// inodePlatformFile implements memmap.File. It exists solely because inode
+// cannot implement both kernfs.Inode.IncRef and memmap.File.IncRef.
 //
 // inodePlatformFile should only be used if inode.canMap is true.
 type inodePlatformFile struct {
@@ -34,7 +33,7 @@ type inodePlatformFile struct {
 	// fdRefsMu protects fdRefs.
 	fdRefsMu sync.Mutex
 
-	// fdRefs counts references on platform.File offsets. It is used solely for
+	// fdRefs counts references on memmap.File offsets. It is used solely for
 	// memory accounting.
 	fdRefs fsutil.FrameRefSet
 
@@ -45,32 +44,32 @@ type inodePlatformFile struct {
 	fileMapperInitOnce sync.Once
 }
 
-// IncRef implements platform.File.IncRef.
+// IncRef implements memmap.File.IncRef.
 //
 // Precondition: i.inode.canMap must be true.
-func (i *inodePlatformFile) IncRef(fr platform.FileRange) {
+func (i *inodePlatformFile) IncRef(fr memmap.FileRange) {
 	i.fdRefsMu.Lock()
 	i.fdRefs.IncRefAndAccount(fr)
 	i.fdRefsMu.Unlock()
 }
 
-// DecRef implements platform.File.DecRef.
+// DecRef implements memmap.File.DecRef.
 //
 // Precondition: i.inode.canMap must be true.
-func (i *inodePlatformFile) DecRef(fr platform.FileRange) {
+func (i *inodePlatformFile) DecRef(fr memmap.FileRange) {
 	i.fdRefsMu.Lock()
 	i.fdRefs.DecRefAndAccount(fr)
 	i.fdRefsMu.Unlock()
 }
 
-// MapInternal implements platform.File.MapInternal.
+// MapInternal implements memmap.File.MapInternal.
 //
 // Precondition: i.inode.canMap must be true.
-func (i *inodePlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+func (i *inodePlatformFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
 	return i.fileMapper.MapInternal(fr, i.hostFD, at.Write)
 }
 
-// FD implements platform.File.FD.
+// FD implements memmap.File.FD.
 func (i *inodePlatformFile) FD() int {
 	return i.hostFD
 }
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index 3f0aea73a..1d37ccb98 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -112,7 +112,7 @@ func (fd *GenericDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence
 	return fd.DirectoryFileDescriptionDefaultImpl.PWrite(ctx, src, offset, opts)
 }
 
-// Release implements vfs.FileDecriptionImpl.Release.
+// Release implements vfs.FileDescriptionImpl.Release.
 func (fd *GenericDirectoryFD) Release() {}
 
 func (fd *GenericDirectoryFD) filesystem() *vfs.Filesystem {
@@ -123,7 +123,7 @@ func (fd *GenericDirectoryFD) inode() Inode {
 	return fd.vfsfd.VirtualDentry().Dentry().Impl().(*Dentry).inode
 }
 
-// IterDirents implements vfs.FileDecriptionImpl.IterDirents. IterDirents holds
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents. IterDirents holds
 // o.mu when calling cb.
 func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
 	fd.mu.Lock()
@@ -198,7 +198,7 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent
 	return err
 }
 
-// Seek implements vfs.FileDecriptionImpl.Seek.
+// Seek implements vfs.FileDescriptionImpl.Seek.
 func (fd *GenericDirectoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
 	fd.mu.Lock()
 	defer fd.mu.Unlock()
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 2ab3f1761..579e627f0 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -267,7 +267,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut
 	if opts.Stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID) != 0 {
 		return syserror.EPERM
 	}
-	if err := vfs.CheckSetStat(ctx, creds, &opts.Stat, a.Mode(), auth.KUID(atomic.LoadUint32(&a.uid)), auth.KGID(atomic.LoadUint32(&a.gid))); err != nil {
+	if err := vfs.CheckSetStat(ctx, creds, &opts, a.Mode(), auth.KUID(atomic.LoadUint32(&a.uid)), auth.KGID(atomic.LoadUint32(&a.gid))); err != nil {
 		return err
 	}
 
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index ff82e1f20..6b705e955 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -1104,7 +1104,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
 	}
 
 	mode := linux.FileMode(atomic.LoadUint32(&d.mode))
-	if err := vfs.CheckSetStat(ctx, rp.Credentials(), &opts.Stat, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
+	if err := vfs.CheckSetStat(ctx, rp.Credentials(), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
 		return err
 	}
 	mnt := rp.Mount()
diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/non_directory.go
index a3c1f7a8d..c0749e711 100644
--- a/pkg/sentry/fsimpl/overlay/non_directory.go
+++ b/pkg/sentry/fsimpl/overlay/non_directory.go
@@ -151,7 +151,7 @@ func (fd *nonDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux
 func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
 	d := fd.dentry()
 	mode := linux.FileMode(atomic.LoadUint32(&d.mode))
-	if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
+	if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
 		return err
 	}
 	mnt := fd.vfsfd.Mount()
@@ -176,7 +176,7 @@ func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions)
 	return nil
 }
 
-// StatFS implements vfs.FileDesciptionImpl.StatFS.
+// StatFS implements vfs.FileDescriptionImpl.StatFS.
 func (fd *nonDirectoryFD) StatFS(ctx context.Context) (linux.Statfs, error) {
 	return fd.filesystem().statFS(ctx)
 }
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index dad4db1a7..79c2725f3 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -128,7 +128,7 @@ func (fd *subtasksFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallbac
 	return fd.GenericDirectoryFD.IterDirents(ctx, cb)
 }
 
-// Seek implements vfs.FileDecriptionImpl.Seek.
+// Seek implements vfs.FileDescriptionImpl.Seek.
 func (fd *subtasksFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
 	if fd.task.ExitState() >= kernel.TaskExitZombie {
 		return 0, syserror.ENOENT
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index a0f20c2d4..ef210a69b 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -649,7 +649,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
 		fs.mu.RUnlock()
 		return err
 	}
-	if err := d.inode.setStat(ctx, rp.Credentials(), &opts.Stat); err != nil {
+	if err := d.inode.setStat(ctx, rp.Credentials(), &opts); err != nil {
 		fs.mu.RUnlock()
 		return err
 	}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index 1cdb46e6f..abbaa5d60 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -325,8 +325,15 @@ func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
 
 // PWrite implements vfs.FileDescriptionImpl.PWrite.
 func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	n, _, err := fd.pwrite(ctx, src, offset, opts)
+	return n, err
+}
+
+// pwrite returns the number of bytes written, final offset and error. The
+// final offset should be ignored by PWrite.
+func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
 	if offset < 0 {
-		return 0, syserror.EINVAL
+		return 0, offset, syserror.EINVAL
 	}
 
 	// Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since
@@ -334,40 +341,44 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
 	//
 	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
 	if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 {
-		return 0, syserror.EOPNOTSUPP
+		return 0, offset, syserror.EOPNOTSUPP
 	}
 
 	srclen := src.NumBytes()
 	if srclen == 0 {
-		return 0, nil
+		return 0, offset, nil
 	}
 	f := fd.inode().impl.(*regularFile)
+	f.inode.mu.Lock()
+	defer f.inode.mu.Unlock()
+	// If the file is opened with O_APPEND, update offset to file size.
+	if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
+		// Locking f.inode.mu is sufficient for reading f.size.
+		offset = int64(f.size)
+	}
 	if end := offset + srclen; end < offset {
 		// Overflow.
-		return 0, syserror.EINVAL
+		return 0, offset, syserror.EINVAL
 	}
 
-	var err error
 	srclen, err = vfs.CheckLimit(ctx, offset, srclen)
 	if err != nil {
-		return 0, err
+		return 0, offset, err
 	}
 	src = src.TakeFirst64(srclen)
 
-	f.inode.mu.Lock()
 	rw := getRegularFileReadWriter(f, offset)
 	n, err := src.CopyInTo(ctx, rw)
-	fd.inode().touchCMtimeLocked()
-	f.inode.mu.Unlock()
+	f.inode.touchCMtimeLocked()
 	putRegularFileReadWriter(rw)
-	return n, err
+	return n, n + offset, err
 }
 
 // Write implements vfs.FileDescriptionImpl.Write.
 func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
 	fd.offMu.Lock()
-	n, err := fd.PWrite(ctx, src, fd.off, opts)
-	fd.off += n
+	n, off, err := fd.pwrite(ctx, src, fd.off, opts)
+	fd.off = off
 	fd.offMu.Unlock()
 	return n, err
 }
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index d7f4f0779..2545d88e9 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -452,7 +452,8 @@ func (i *inode) statTo(stat *linux.Statx) {
 	}
 }
 
-func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx) error {
+func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs.SetStatOptions) error {
+	stat := &opts.Stat
 	if stat.Mask == 0 {
 		return nil
 	}
@@ -460,7 +461,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linu
 		return syserror.EPERM
 	}
 	mode := linux.FileMode(atomic.LoadUint32(&i.mode))
-	if err := vfs.CheckSetStat(ctx, creds, stat, mode, auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil {
+	if err := vfs.CheckSetStat(ctx, creds, opts, mode, auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil {
 		return err
 	}
 	i.mu.Lock()
@@ -695,7 +696,7 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
 func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
 	creds := auth.CredentialsFromContext(ctx)
 	d := fd.dentry()
-	if err := d.inode.setStat(ctx, creds, &opts.Stat); err != nil {
+	if err := d.inode.setStat(ctx, creds, &opts); err != nil {
 		return err
 	}
 
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 25fe1921b..f6886a758 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -132,6 +132,7 @@ go_library(
         "task_stop.go",
         "task_syscall.go",
         "task_usermem.go",
+        "task_work.go",
         "thread_group.go",
         "threads.go",
         "timekeeper.go",
diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go
index 732e66da4..bcc1b29a8 100644
--- a/pkg/sentry/kernel/futex/futex.go
+++ b/pkg/sentry/kernel/futex/futex.go
@@ -717,10 +717,10 @@ func (m *Manager) lockPILocked(w *Waiter, t Target, addr usermem.Addr, tid uint3
 	}
 }
 
-// UnlockPI unlock the futex following the Priority-inheritance futex
-// rules. The address provided must contain the caller's TID. If there are
-// waiters, TID of the next waiter (FIFO) is set to the given address, and the
-// waiter woken up. If there are no waiters, 0 is set to the address.
+// UnlockPI unlocks the futex following the Priority-inheritance futex rules.
+// The address provided must contain the caller's TID. If there are waiters,
+// TID of the next waiter (FIFO) is set to the given address, and the waiter
+// woken up. If there are no waiters, 0 is set to the address.
 func (m *Manager) UnlockPI(t Target, addr usermem.Addr, tid uint32, private bool) error {
 	k, err := getKey(t, addr, private)
 	if err != nil {
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 240cd6fe0..15dae0f5b 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -1469,6 +1469,11 @@ func (k *Kernel) NowMonotonic() int64 {
 	return now
 }
 
+// AfterFunc implements tcpip.Clock.AfterFunc.
+func (k *Kernel) AfterFunc(d time.Duration, f func()) tcpip.Timer {
+	return ktime.TcpipAfterFunc(k.realtimeClock, d, f)
+}
+
 // SetMemoryFile sets Kernel.mf. SetMemoryFile must be called before Init or
 // LoadFrom.
 func (k *Kernel) SetMemoryFile(mf *pgalloc.MemoryFile) {
diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD
index bfd779837..c211fc8d0 100644
--- a/pkg/sentry/kernel/shm/BUILD
+++ b/pkg/sentry/kernel/shm/BUILD
@@ -20,7 +20,6 @@ go_library(
         "//pkg/sentry/kernel/time",
         "//pkg/sentry/memmap",
         "//pkg/sentry/pgalloc",
-        "//pkg/sentry/platform",
         "//pkg/sentry/usage",
         "//pkg/sync",
         "//pkg/syserror",
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index f66cfcc7f..55b4c2cdb 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -45,7 +45,6 @@ import (
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
@@ -370,7 +369,7 @@ type Shm struct {
 
 	// fr is the offset into mfp.MemoryFile() that backs this contents of this
 	// segment. Immutable.
-	fr platform.FileRange
+	fr memmap.FileRange
 
 	// mu protects all fields below.
 	mu sync.Mutex `state:"nosave"`
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index f48247c94..c4db05bd8 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -68,6 +68,21 @@ type Task struct {
 	// runState is exclusive to the task goroutine.
 	runState taskRunState
 
+	// taskWorkCount represents the current size of the task work queue. It is
+	// used to avoid acquiring taskWorkMu when the queue is empty.
+	//
+	// Must accessed with atomic memory operations.
+	taskWorkCount int32
+
+	// taskWorkMu protects taskWork.
+	taskWorkMu sync.Mutex `state:"nosave"`
+
+	// taskWork is a queue of work to be executed before resuming user execution.
+	// It is similar to the task_work mechanism in Linux.
+	//
+	// taskWork is exclusive to the task goroutine.
+	taskWork []TaskWorker
+
 	// haveSyscallReturn is true if tc.Arch().Return() represents a value
 	// returned by a syscall (or set by ptrace after a syscall).
 	//
@@ -550,6 +565,10 @@ type Task struct {
 	// futexWaiter is exclusive to the task goroutine.
 	futexWaiter *futex.Waiter `state:"nosave"`
 
+	// robustList is a pointer to the head of the tasks's robust futex
+	// list.
+	robustList usermem.Addr
+
 	// startTime is the real time at which the task started. It is set when
 	// a Task is created or invokes execve(2).
 	//
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index 9b69f3cbe..7803b98d0 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -207,6 +207,9 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
 		return flags.CloseOnExec
 	})
 
+	// Handle the robust futex list.
+	t.exitRobustList()
+
 	// NOTE(b/30815691): We currently do not implement privileged
 	// executables (set-user/group-ID bits and file capabilities). This
 	// allows us to unconditionally enable user dumpability on the new mm.
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index c4ade6e8e..231ac548a 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -253,6 +253,9 @@ func (*runExitMain) execute(t *Task) taskRunState {
 		}
 	}
 
+	// Handle the robust futex list.
+	t.exitRobustList()
+
 	// Deactivate the address space and update max RSS before releasing the
 	// task's MM.
 	t.Deactivate()
diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go
index a53e77c9f..4b535c949 100644
--- a/pkg/sentry/kernel/task_futex.go
+++ b/pkg/sentry/kernel/task_futex.go
@@ -15,6 +15,7 @@
 package kernel
 
 import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
@@ -52,3 +53,127 @@ func (t *Task) LoadUint32(addr usermem.Addr) (uint32, error) {
 func (t *Task) GetSharedKey(addr usermem.Addr) (futex.Key, error) {
 	return t.MemoryManager().GetSharedFutexKey(t, addr)
 }
+
+// GetRobustList sets the robust futex list for the task.
+func (t *Task) GetRobustList() usermem.Addr {
+	t.mu.Lock()
+	addr := t.robustList
+	t.mu.Unlock()
+	return addr
+}
+
+// SetRobustList sets the robust futex list for the task.
+func (t *Task) SetRobustList(addr usermem.Addr) {
+	t.mu.Lock()
+	t.robustList = addr
+	t.mu.Unlock()
+}
+
+// exitRobustList walks the robust futex list, marking locks dead and notifying
+// wakers. It corresponds to Linux's exit_robust_list(). Following Linux,
+// errors are silently ignored.
+func (t *Task) exitRobustList() {
+	t.mu.Lock()
+	addr := t.robustList
+	t.robustList = 0
+	t.mu.Unlock()
+
+	if addr == 0 {
+		return
+	}
+
+	var rl linux.RobustListHead
+	if _, err := rl.CopyIn(t, usermem.Addr(addr)); err != nil {
+		return
+	}
+
+	next := rl.List
+	done := 0
+	var pendingLockAddr usermem.Addr
+	if rl.ListOpPending != 0 {
+		pendingLockAddr = usermem.Addr(rl.ListOpPending + rl.FutexOffset)
+	}
+
+	// Wake up normal elements.
+	for usermem.Addr(next) != addr {
+		// We traverse to the next element of the list before we
+		// actually wake anything. This prevents the race where waking
+		// this futex causes a modification of the list.
+		thisLockAddr := usermem.Addr(next + rl.FutexOffset)
+
+		// Try to decode the next element in the list before waking the
+		// current futex. But don't check the error until after we've
+		// woken the current futex. Linux does it in this order too
+		_, nextErr := t.CopyIn(usermem.Addr(next), &next)
+
+		// Wakeup the current futex if it's not pending.
+		if thisLockAddr != pendingLockAddr {
+			t.wakeRobustListOne(thisLockAddr)
+		}
+
+		// If there was an error copying the next futex, we must bail.
+		if nextErr != nil {
+			break
+		}
+
+		// This is a user structure, so it could be a massive list, or
+		// even contain a loop if they are trying to mess with us. We
+		// cap traversal to prevent that.
+		done++
+		if done >= linux.ROBUST_LIST_LIMIT {
+			break
+		}
+	}
+
+	// Is there a pending entry to wake?
+	if pendingLockAddr != 0 {
+		t.wakeRobustListOne(pendingLockAddr)
+	}
+}
+
+// wakeRobustListOne wakes a single futex from the robust list.
+func (t *Task) wakeRobustListOne(addr usermem.Addr) {
+	// Bit 0 in address signals PI futex.
+	pi := addr&1 == 1
+	addr = addr &^ 1
+
+	// Load the futex.
+	f, err := t.LoadUint32(addr)
+	if err != nil {
+		// Can't read this single value? Ignore the problem.
+		// We can wake the other futexes in the list.
+		return
+	}
+
+	tid := uint32(t.ThreadID())
+	for {
+		// Is this held by someone else?
+		if f&linux.FUTEX_TID_MASK != tid {
+			return
+		}
+
+		// This thread is dying and it's holding this futex. We need to
+		// set the owner died bit and wake up any waiters.
+		newF := (f & linux.FUTEX_WAITERS) | linux.FUTEX_OWNER_DIED
+		if curF, err := t.CompareAndSwapUint32(addr, f, newF); err != nil {
+			return
+		} else if curF != f {
+			// Futex changed out from under us. Try again...
+			f = curF
+			continue
+		}
+
+		// Wake waiters if there are any.
+		if f&linux.FUTEX_WAITERS != 0 {
+			private := f&linux.FUTEX_PRIVATE_FLAG != 0
+			if pi {
+				t.Futex().UnlockPI(t, addr, tid, private)
+				return
+			}
+			t.Futex().Wake(t, addr, private, linux.FUTEX_BITSET_MATCH_ANY, 1)
+		}
+
+		// Done.
+		return
+	}
+}
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index d654dd997..7d4f44caf 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -167,7 +167,22 @@ func (app *runApp) execute(t *Task) taskRunState {
 		return (*runInterrupt)(nil)
 	}
 
-	// We're about to switch to the application again. If there's still a
+	// Execute any task work callbacks before returning to user space.
+	if atomic.LoadInt32(&t.taskWorkCount) > 0 {
+		t.taskWorkMu.Lock()
+		queue := t.taskWork
+		t.taskWork = nil
+		atomic.StoreInt32(&t.taskWorkCount, 0)
+		t.taskWorkMu.Unlock()
+
+		// Do not hold taskWorkMu while executing task work, which may register
+		// more work.
+		for _, work := range queue {
+			work.TaskWork(t)
+		}
+	}
+
+	// We're about to switch to the application again. If there's still an
 	// unhandled SyscallRestartErrno that wasn't translated to an EINTR,
 	// restart the syscall that was interrupted. If there's a saved signal
 	// mask, restore it. (Note that restoring the saved signal mask may unblock
diff --git a/pkg/sentry/kernel/task_work.go b/pkg/sentry/kernel/task_work.go
new file mode 100644
index 000000000..dda5a433a
--- /dev/null
+++ b/pkg/sentry/kernel/task_work.go
@@ -0,0 +1,38 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import "sync/atomic"
+
+// TaskWorker is a deferred task.
+//
+// This must be savable.
+type TaskWorker interface {
+	// TaskWork will be executed prior to returning to user space. Note that
+	// TaskWork may call RegisterWork again, but this will not be executed until
+	// the next return to user space, unlike in Linux. This effectively allows
+	// registration of indefinite user return hooks, but not by default.
+	TaskWork(t *Task)
+}
+
+// RegisterWork can be used to register additional task work that will be
+// performed prior to returning to user space. See TaskWorker.TaskWork for
+// semantics regarding registration.
+func (t *Task) RegisterWork(work TaskWorker) {
+	t.taskWorkMu.Lock()
+	defer t.taskWorkMu.Unlock()
+	atomic.AddInt32(&t.taskWorkCount, 1)
+	t.taskWork = append(t.taskWork, work)
+}
diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD
index 7ba7dc50c..2817aa3ba 100644
--- a/pkg/sentry/kernel/time/BUILD
+++ b/pkg/sentry/kernel/time/BUILD
@@ -6,6 +6,7 @@ go_library(
     name = "time",
     srcs = [
         "context.go",
+        "tcpip.go",
         "time.go",
     ],
     visibility = ["//pkg/sentry:internal"],
diff --git a/pkg/sentry/kernel/time/tcpip.go b/pkg/sentry/kernel/time/tcpip.go
new file mode 100644
index 000000000..c4474c0cf
--- /dev/null
+++ b/pkg/sentry/kernel/time/tcpip.go
@@ -0,0 +1,131 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package time
+
+import (
+	"sync"
+	"time"
+)
+
+// TcpipAfterFunc waits for duration to elapse according to clock then runs fn.
+// The timer is started immediately and will fire exactly once.
+func TcpipAfterFunc(clock Clock, duration time.Duration, fn func()) *TcpipTimer {
+	timer := &TcpipTimer{
+		clock: clock,
+	}
+	timer.notifier = functionNotifier{
+		fn: func() {
+			// tcpip.Timer.Stop() explicitly states that the function is called in a
+			// separate goroutine that Stop() does not synchronize with.
+			// Timer.Destroy() synchronizes with calls to TimerListener.Notify().
+			// This is semantically meaningful because, in the former case, it's
+			// legal to call tcpip.Timer.Stop() while holding locks that may also be
+			// taken by the function, but this isn't so in the latter case. Most
+			// immediately, Timer calls TimerListener.Notify() while holding
+			// Timer.mu. A deadlock occurs without spawning a goroutine:
+			//   T1: (Timer expires)
+			//     => Timer.Tick()           <- Timer.mu.Lock() called
+			//     => TimerListener.Notify()
+			//     => Timer.Stop()
+			//     => Timer.Destroy()        <- Timer.mu.Lock() called, deadlock!
+			//
+			// Spawning a goroutine avoids the deadlock:
+			//   T1: (Timer expires)
+			//     => Timer.Tick()           <- Timer.mu.Lock() called
+			//     => TimerListener.Notify() <- Launches T2
+			//   T2:
+			//     => Timer.Stop()
+			//     => Timer.Destroy()        <- Timer.mu.Lock() called, blocks
+			//   T1:
+			//     => (returns)              <- Timer.mu.Unlock() called
+			//   T2:
+			//     => (continues)            <- No deadlock!
+			go func() {
+				timer.Stop()
+				fn()
+			}()
+		},
+	}
+	timer.Reset(duration)
+	return timer
+}
+
+// TcpipTimer is a resettable timer with variable duration expirations.
+// Implements tcpip.Timer, which does not define a Destroy method; instead, all
+// resources are released after timer expiration and calls to Timer.Stop.
+//
+// Must be created by AfterFunc.
+type TcpipTimer struct {
+	// clock is the time source. clock is immutable.
+	clock Clock
+
+	// notifier is called when the Timer expires. notifier is immutable.
+	notifier functionNotifier
+
+	// mu protects t.
+	mu sync.Mutex
+
+	// t stores the latest running Timer. This is replaced whenever Reset is
+	// called since Timer cannot be restarted once it has been Destroyed by Stop.
+	//
+	// This field is nil iff Stop has been called.
+	t *Timer
+}
+
+// Stop implements tcpip.Timer.Stop.
+func (r *TcpipTimer) Stop() bool {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.t == nil {
+		return false
+	}
+	_, lastSetting := r.t.Swap(Setting{})
+	r.t.Destroy()
+	r.t = nil
+	return lastSetting.Enabled
+}
+
+// Reset implements tcpip.Timer.Reset.
+func (r *TcpipTimer) Reset(d time.Duration) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.t == nil {
+		r.t = NewTimer(r.clock, &r.notifier)
+	}
+
+	r.t.Swap(Setting{
+		Enabled: true,
+		Period:  0,
+		Next:    r.clock.Now().Add(d),
+	})
+}
+
+// functionNotifier is a TimerListener that runs a function.
+//
+// functionNotifier cannot be saved or loaded.
+type functionNotifier struct {
+	fn func()
+}
+
+// Notify implements ktime.TimerListener.Notify.
+func (f *functionNotifier) Notify(uint64, Setting) (Setting, bool) {
+	f.fn()
+	return Setting{}, false
+}
+
+// Destroy implements ktime.TimerListener.Destroy.
+func (f *functionNotifier) Destroy() {}
diff --git a/pkg/sentry/kernel/timekeeper.go b/pkg/sentry/kernel/timekeeper.go
index 5f3908d8b..7c4fefb16 100644
--- a/pkg/sentry/kernel/timekeeper.go
+++ b/pkg/sentry/kernel/timekeeper.go
@@ -21,8 +21,8 @@ import (
 
 	"gvisor.dev/gvisor/pkg/log"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
 	"gvisor.dev/gvisor/pkg/sync"
 )
@@ -90,7 +90,7 @@ type Timekeeper struct {
 // NewTimekeeper does not take ownership of paramPage.
 //
 // SetClocks must be called on the returned Timekeeper before it is usable.
-func NewTimekeeper(mfp pgalloc.MemoryFileProvider, paramPage platform.FileRange) (*Timekeeper, error) {
+func NewTimekeeper(mfp pgalloc.MemoryFileProvider, paramPage memmap.FileRange) (*Timekeeper, error) {
 	return &Timekeeper{
 		params: NewVDSOParamPage(mfp, paramPage),
 	}, nil
diff --git a/pkg/sentry/kernel/vdso.go b/pkg/sentry/kernel/vdso.go
index f1b3c212c..290c32466 100644
--- a/pkg/sentry/kernel/vdso.go
+++ b/pkg/sentry/kernel/vdso.go
@@ -19,8 +19,8 @@ import (
 
 	"gvisor.dev/gvisor/pkg/binary"
 	"gvisor.dev/gvisor/pkg/safemem"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
@@ -58,7 +58,7 @@ type vdsoParams struct {
 type VDSOParamPage struct {
 	// The parameter page is fr, allocated from mfp.MemoryFile().
 	mfp pgalloc.MemoryFileProvider
-	fr  platform.FileRange
+	fr  memmap.FileRange
 
 	// seq is the current sequence count written to the page.
 	//
@@ -81,7 +81,7 @@ type VDSOParamPage struct {
 // * VDSOParamPage must be the only writer to fr.
 //
 // * mfp.MemoryFile().MapInternal(fr) must return a single safemem.Block.
-func NewVDSOParamPage(mfp pgalloc.MemoryFileProvider, fr platform.FileRange) *VDSOParamPage {
+func NewVDSOParamPage(mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *VDSOParamPage {
 	return &VDSOParamPage{mfp: mfp, fr: fr}
 }
 
diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD
index a98b66de1..2c95669cd 100644
--- a/pkg/sentry/memmap/BUILD
+++ b/pkg/sentry/memmap/BUILD
@@ -28,9 +28,21 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "file_range",
+    out = "file_range.go",
+    package = "memmap",
+    prefix = "File",
+    template = "//pkg/segment:generic_range",
+    types = {
+        "T": "uint64",
+    },
+)
+
 go_library(
     name = "memmap",
     srcs = [
+        "file_range.go",
         "mappable_range.go",
         "mapping_set.go",
         "mapping_set_impl.go",
@@ -40,7 +52,7 @@ go_library(
     deps = [
         "//pkg/context",
         "//pkg/log",
-        "//pkg/sentry/platform",
+        "//pkg/safemem",
         "//pkg/syserror",
         "//pkg/usermem",
     ],
diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go
index c6db9fc8f..c188f6c29 100644
--- a/pkg/sentry/memmap/memmap.go
+++ b/pkg/sentry/memmap/memmap.go
@@ -19,12 +19,12 @@ import (
 	"fmt"
 
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
+	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
 // Mappable represents a memory-mappable object, a mutable mapping from uint64
-// offsets to (platform.File, uint64 File offset) pairs.
+// offsets to (File, uint64 File offset) pairs.
 //
 // See mm/mm.go for Mappable's place in the lock order.
 //
@@ -74,7 +74,7 @@ type Mappable interface {
 	// Translations are valid until invalidated by a callback to
 	// MappingSpace.Invalidate or until the caller removes its mapping of the
 	// translated range. Mappable implementations must ensure that at least one
-	// reference is held on all pages in a platform.File that may be the result
+	// reference is held on all pages in a File that may be the result
 	// of a valid Translation.
 	//
 	// Preconditions: required.Length() > 0. optional.IsSupersetOf(required).
@@ -100,7 +100,7 @@ type Translation struct {
 	Source MappableRange
 
 	// File is the mapped file.
-	File platform.File
+	File File
 
 	// Offset is the offset into File at which this Translation begins.
 	Offset uint64
@@ -110,9 +110,9 @@ type Translation struct {
 	Perms usermem.AccessType
 }
 
-// FileRange returns the platform.FileRange represented by t.
-func (t Translation) FileRange() platform.FileRange {
-	return platform.FileRange{t.Offset, t.Offset + t.Source.Length()}
+// FileRange returns the FileRange represented by t.
+func (t Translation) FileRange() FileRange {
+	return FileRange{t.Offset, t.Offset + t.Source.Length()}
 }
 
 // CheckTranslateResult returns an error if (ts, terr) does not satisfy all
@@ -361,3 +361,49 @@ type MMapOpts struct {
 	// TODO(jamieliu): Replace entirely with MappingIdentity?
 	Hint string
 }
+
+// File represents a host file that may be mapped into an platform.AddressSpace.
+type File interface {
+	// All pages in a File are reference-counted.
+
+	// IncRef increments the reference count on all pages in fr.
+	//
+	// Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() >
+	// 0. At least one reference must be held on all pages in fr. (The File
+	// interface does not provide a way to acquire an initial reference;
+	// implementors may define mechanisms for doing so.)
+	IncRef(fr FileRange)
+
+	// DecRef decrements the reference count on all pages in fr.
+	//
+	// Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() >
+	// 0. At least one reference must be held on all pages in fr.
+	DecRef(fr FileRange)
+
+	// MapInternal returns a mapping of the given file offsets in the invoking
+	// process' address space for reading and writing.
+	//
+	// Note that fr.Start and fr.End need not be page-aligned.
+	//
+	// Preconditions: fr.Length() > 0. At least one reference must be held on
+	// all pages in fr.
+	//
+	// Postconditions: The returned mapping is valid as long as at least one
+	// reference is held on the mapped pages.
+	MapInternal(fr FileRange, at usermem.AccessType) (safemem.BlockSeq, error)
+
+	// FD returns the file descriptor represented by the File.
+	//
+	// The only permitted operation on the returned file descriptor is to map
+	// pages from it consistent with the requirements of AddressSpace.MapFile.
+	FD() int
+}
+
+// FileRange represents a range of uint64 offsets into a File.
+//
+// type FileRange <generated using go_generics>
+
+// String implements fmt.Stringer.String.
+func (fr FileRange) String() string {
+	return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End)
+}
diff --git a/pkg/sentry/mm/BUILD b/pkg/sentry/mm/BUILD
index a036ce53c..f9d0837a1 100644
--- a/pkg/sentry/mm/BUILD
+++ b/pkg/sentry/mm/BUILD
@@ -7,14 +7,14 @@ go_template_instance(
     name = "file_refcount_set",
     out = "file_refcount_set.go",
     imports = {
-        "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
+        "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
     },
     package = "mm",
     prefix = "fileRefcount",
     template = "//pkg/segment:generic_set",
     types = {
         "Key": "uint64",
-        "Range": "platform.FileRange",
+        "Range": "memmap.FileRange",
         "Value": "int32",
         "Functions": "fileRefcountSetFunctions",
     },
diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go
index 379148903..1999ec706 100644
--- a/pkg/sentry/mm/aio_context.go
+++ b/pkg/sentry/mm/aio_context.go
@@ -20,7 +20,6 @@ import (
 	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
@@ -243,7 +242,7 @@ type aioMappable struct {
 	refs.AtomicRefCount
 
 	mfp pgalloc.MemoryFileProvider
-	fr  platform.FileRange
+	fr  memmap.FileRange
 }
 
 var aioRingBufferSize = uint64(usermem.Addr(linux.AIORingSize).MustRoundUp())
diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go
index 6db7c3d40..3e85964e4 100644
--- a/pkg/sentry/mm/mm.go
+++ b/pkg/sentry/mm/mm.go
@@ -25,7 +25,7 @@
 //           Locks taken by memmap.Mappable.Translate
 //             mm.privateRefs.mu
 //               platform.AddressSpace locks
-//                 platform.File locks
+//                 memmap.File locks
 //         mm.aioManager.mu
 //           mm.AIOContext.mu
 //
@@ -396,7 +396,7 @@ type pma struct {
 	// file is the file mapped by this pma. Only pmas for which file ==
 	// MemoryManager.mfp.MemoryFile() may be saved. pmas hold a reference to
 	// the corresponding file range while they exist.
-	file platform.File `state:"nosave"`
+	file memmap.File `state:"nosave"`
 
 	// off is the offset into file at which this pma begins.
 	//
@@ -436,7 +436,7 @@ type pma struct {
 	private bool
 
 	// If internalMappings is not empty, it is the cached return value of
-	// file.MapInternal for the platform.FileRange mapped by this pma.
+	// file.MapInternal for the memmap.FileRange mapped by this pma.
 	internalMappings safemem.BlockSeq `state:"nosave"`
 }
 
@@ -469,10 +469,10 @@ func (fileRefcountSetFunctions) MaxKey() uint64 {
 func (fileRefcountSetFunctions) ClearValue(_ *int32) {
 }
 
-func (fileRefcountSetFunctions) Merge(_ platform.FileRange, rc1 int32, _ platform.FileRange, rc2 int32) (int32, bool) {
+func (fileRefcountSetFunctions) Merge(_ memmap.FileRange, rc1 int32, _ memmap.FileRange, rc2 int32) (int32, bool) {
 	return rc1, rc1 == rc2
 }
 
-func (fileRefcountSetFunctions) Split(_ platform.FileRange, rc int32, _ uint64) (int32, int32) {
+func (fileRefcountSetFunctions) Split(_ memmap.FileRange, rc int32, _ uint64) (int32, int32) {
 	return rc, rc
 }
diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go
index 62e4c20af..930ec895f 100644
--- a/pkg/sentry/mm/pma.go
+++ b/pkg/sentry/mm/pma.go
@@ -21,7 +21,6 @@ import (
 	"gvisor.dev/gvisor/pkg/safecopy"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -604,7 +603,7 @@ func (mm *MemoryManager) invalidateLocked(ar usermem.AddrRange, invalidatePrivat
 	}
 }
 
-// Pin returns the platform.File ranges currently mapped by addresses in ar in
+// Pin returns the memmap.File ranges currently mapped by addresses in ar in
 // mm, acquiring a reference on the returned ranges which the caller must
 // release by calling Unpin. If not all addresses are mapped, Pin returns a
 // non-nil error. Note that Pin may return both a non-empty slice of
@@ -674,15 +673,15 @@ type PinnedRange struct {
 	Source usermem.AddrRange
 
 	// File is the mapped file.
-	File platform.File
+	File memmap.File
 
 	// Offset is the offset into File at which this PinnedRange begins.
 	Offset uint64
 }
 
-// FileRange returns the platform.File offsets mapped by pr.
-func (pr PinnedRange) FileRange() platform.FileRange {
-	return platform.FileRange{pr.Offset, pr.Offset + uint64(pr.Source.Length())}
+// FileRange returns the memmap.File offsets mapped by pr.
+func (pr PinnedRange) FileRange() memmap.FileRange {
+	return memmap.FileRange{pr.Offset, pr.Offset + uint64(pr.Source.Length())}
 }
 
 // Unpin releases the reference held by prs.
@@ -857,7 +856,7 @@ func (mm *MemoryManager) vecInternalMappingsLocked(ars usermem.AddrRangeSeq) saf
 }
 
 // incPrivateRef acquires a reference on private pages in fr.
-func (mm *MemoryManager) incPrivateRef(fr platform.FileRange) {
+func (mm *MemoryManager) incPrivateRef(fr memmap.FileRange) {
 	mm.privateRefs.mu.Lock()
 	defer mm.privateRefs.mu.Unlock()
 	refSet := &mm.privateRefs.refs
@@ -878,8 +877,8 @@ func (mm *MemoryManager) incPrivateRef(fr platform.FileRange) {
 }
 
 // decPrivateRef releases a reference on private pages in fr.
-func (mm *MemoryManager) decPrivateRef(fr platform.FileRange) {
-	var freed []platform.FileRange
+func (mm *MemoryManager) decPrivateRef(fr memmap.FileRange) {
+	var freed []memmap.FileRange
 
 	mm.privateRefs.mu.Lock()
 	refSet := &mm.privateRefs.refs
@@ -951,7 +950,7 @@ func (pmaSetFunctions) Merge(ar1 usermem.AddrRange, pma1 pma, ar2 usermem.AddrRa
 
 	// Discard internal mappings instead of trying to merge them, since merging
 	// them requires an allocation and getting them again from the
-	// platform.File might not.
+	// memmap.File might not.
 	pma1.internalMappings = safemem.BlockSeq{}
 	return pma1, true
 }
@@ -1012,12 +1011,12 @@ func (pseg pmaIterator) getInternalMappingsLocked() error {
 	return nil
 }
 
-func (pseg pmaIterator) fileRange() platform.FileRange {
+func (pseg pmaIterator) fileRange() memmap.FileRange {
 	return pseg.fileRangeOf(pseg.Range())
 }
 
 // Preconditions: pseg.Range().IsSupersetOf(ar). ar.Length != 0.
-func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) platform.FileRange {
+func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) memmap.FileRange {
 	if checkInvariants {
 		if !pseg.Ok() {
 			panic("terminal pma iterator")
@@ -1032,5 +1031,5 @@ func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) platform.FileRange {
 
 	pma := pseg.ValuePtr()
 	pstart := pseg.Start()
-	return platform.FileRange{pma.off + uint64(ar.Start-pstart), pma.off + uint64(ar.End-pstart)}
+	return memmap.FileRange{pma.off + uint64(ar.Start-pstart), pma.off + uint64(ar.End-pstart)}
 }
diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go
index 9ad52082d..0e142fb11 100644
--- a/pkg/sentry/mm/special_mappable.go
+++ b/pkg/sentry/mm/special_mappable.go
@@ -19,7 +19,6 @@ import (
 	"gvisor.dev/gvisor/pkg/refs"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -35,7 +34,7 @@ type SpecialMappable struct {
 	refs.AtomicRefCount
 
 	mfp  pgalloc.MemoryFileProvider
-	fr   platform.FileRange
+	fr   memmap.FileRange
 	name string
 }
 
@@ -44,7 +43,7 @@ type SpecialMappable struct {
 // SpecialMappable will use the given name in /proc/[pid]/maps.
 //
 // Preconditions: fr.Length() != 0.
-func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr platform.FileRange) *SpecialMappable {
+func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr memmap.FileRange) *SpecialMappable {
 	m := SpecialMappable{mfp: mfp, fr: fr, name: name}
 	m.EnableLeakCheck("mm.SpecialMappable")
 	return &m
@@ -126,7 +125,7 @@ func (m *SpecialMappable) MemoryFileProvider() pgalloc.MemoryFileProvider {
 
 // FileRange returns the offsets into MemoryFileProvider().MemoryFile() that
 // store the SpecialMappable's contents.
-func (m *SpecialMappable) FileRange() platform.FileRange {
+func (m *SpecialMappable) FileRange() memmap.FileRange {
 	return m.fr
 }
 
diff --git a/pkg/sentry/pgalloc/BUILD b/pkg/sentry/pgalloc/BUILD
index e1fcb175f..7a3311a70 100644
--- a/pkg/sentry/pgalloc/BUILD
+++ b/pkg/sentry/pgalloc/BUILD
@@ -36,14 +36,14 @@ go_template_instance(
         "trackGaps": "1",
     },
     imports = {
-        "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
+        "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
     },
     package = "pgalloc",
     prefix = "usage",
     template = "//pkg/segment:generic_set",
     types = {
         "Key": "uint64",
-        "Range": "platform.FileRange",
+        "Range": "memmap.FileRange",
         "Value": "usageInfo",
         "Functions": "usageSetFunctions",
     },
@@ -56,14 +56,14 @@ go_template_instance(
         "minDegree": "10",
     },
     imports = {
-        "platform": "gvisor.dev/gvisor/pkg/sentry/platform",
+        "memmap": "gvisor.dev/gvisor/pkg/sentry/memmap",
     },
     package = "pgalloc",
     prefix = "reclaim",
     template = "//pkg/segment:generic_set",
     types = {
         "Key": "uint64",
-        "Range": "platform.FileRange",
+        "Range": "memmap.FileRange",
         "Value": "reclaimSetValue",
         "Functions": "reclaimSetFunctions",
     },
@@ -89,7 +89,7 @@ go_library(
         "//pkg/safemem",
         "//pkg/sentry/arch",
         "//pkg/sentry/hostmm",
-        "//pkg/sentry/platform",
+        "//pkg/sentry/memmap",
         "//pkg/sentry/usage",
         "//pkg/state",
         "//pkg/state/wire",
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index afab97c0a..3243d7214 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -33,14 +33,14 @@ import (
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/hostmm"
-	"gvisor.dev/gvisor/pkg/sentry/platform"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-// MemoryFile is a platform.File whose pages may be allocated to arbitrary
+// MemoryFile is a memmap.File whose pages may be allocated to arbitrary
 // users.
 type MemoryFile struct {
 	// opts holds options passed to NewMemoryFile. opts is immutable.
@@ -372,7 +372,7 @@ func (f *MemoryFile) Destroy() {
 // to Allocate.
 //
 // Preconditions: length must be page-aligned and non-zero.
-func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.FileRange, error) {
+func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (memmap.FileRange, error) {
 	if length == 0 || length%usermem.PageSize != 0 {
 		panic(fmt.Sprintf("invalid allocation length: %#x", length))
 	}
@@ -390,7 +390,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi
 	// Find a range in the underlying file.
 	fr, ok := findAvailableRange(&f.usage, f.fileSize, length, alignment)
 	if !ok {
-		return platform.FileRange{}, syserror.ENOMEM
+		return memmap.FileRange{}, syserror.ENOMEM
 	}
 
 	// Expand the file if needed.
@@ -398,7 +398,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi
 		// Round the new file size up to be chunk-aligned.
 		newFileSize := (int64(fr.End) + chunkMask) &^ chunkMask
 		if err := f.file.Truncate(newFileSize); err != nil {
-			return platform.FileRange{}, err
+			return memmap.FileRange{}, err
 		}
 		f.fileSize = newFileSize
 		f.mappingsMu.Lock()
@@ -416,7 +416,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi
 				bs[i] = 0
 			}
 		}); err != nil {
-			return platform.FileRange{}, err
+			return memmap.FileRange{}, err
 		}
 	}
 	if !f.usage.Add(fr, usageInfo{
@@ -439,7 +439,7 @@ func (f *MemoryFile) Allocate(length uint64, kind usage.MemoryKind) (platform.Fi
 // space for mappings to be allocated downwards.
 //
 // Precondition: alignment must be a power of 2.
-func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint64) (platform.FileRange, bool) {
+func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint64) (memmap.FileRange, bool) {
 	alignmentMask := alignment - 1
 
 	// Search for space in existing gaps, starting at the current end of the
@@ -461,7 +461,7 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6
 			break
 		}
 		if start := unalignedStart &^ alignmentMask; start >= gap.Start() {
-			return platform.FileRange{start, start + length}, true
+			return memmap.FileRange{start, start + length}, true
 		}
 
 		gap = gap.PrevLargeEnoughGap(length)
@@ -475,7 +475,7 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6
 	min = (min + alignmentMask) &^ alignmentMask
 	if min+length < min {
 		// Overflow: allocation would exceed the range of uint64.
-		return platform.FileRange{}, false
+		return memmap.FileRange{}, false
 	}
 
 	// Determine the minimum file size required to fit this allocation at its end.
@@ -484,7 +484,7 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6
 		if newFileSize <= fileSize {
 			if fileSize != 0 {
 				// Overflow: allocation would exceed the range of int64.
-				return platform.FileRange{}, false
+				return memmap.FileRange{}, false
 			}
 			newFileSize = chunkSize
 		}
@@ -496,7 +496,7 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6
 			continue
 		}
 		if start := unalignedStart &^ alignmentMask; start >= min {
-			return platform.FileRange{start, start + length}, true
+			return memmap.FileRange{start, start + length}, true
 		}
 	}
 }
@@ -508,22 +508,22 @@ func findAvailableRange(usage *usageSet, fileSize int64, length, alignment uint6
 // by r.ReadToBlocks(), it returns that error.
 //
 // Preconditions: length > 0. length must be page-aligned.
-func (f *MemoryFile) AllocateAndFill(length uint64, kind usage.MemoryKind, r safemem.Reader) (platform.FileRange, error) {
+func (f *MemoryFile) AllocateAndFill(length uint64, kind usage.MemoryKind, r safemem.Reader) (memmap.FileRange, error) {
 	fr, err := f.Allocate(length, kind)
 	if err != nil {
-		return platform.FileRange{}, err
+		return memmap.FileRange{}, err
 	}
 	dsts, err := f.MapInternal(fr, usermem.Write)
 	if err != nil {
 		f.DecRef(fr)
-		return platform.FileRange{}, err
+		return memmap.FileRange{}, err
 	}
 	n, err := safemem.ReadFullToBlocks(r, dsts)
 	un := uint64(usermem.Addr(n).RoundDown())
 	if un < length {
 		// Free unused memory and update fr to contain only the memory that is
 		// still allocated.
-		f.DecRef(platform.FileRange{fr.Start + un, fr.End})
+		f.DecRef(memmap.FileRange{fr.Start + un, fr.End})
 		fr.End = fr.Start + un
 	}
 	return fr, err
@@ -540,7 +540,7 @@ const (
 // will read zeroes.
 //
 // Preconditions: fr.Length() > 0.
-func (f *MemoryFile) Decommit(fr platform.FileRange) error {
+func (f *MemoryFile) Decommit(fr memmap.FileRange) error {
 	if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 {
 		panic(fmt.Sprintf("invalid range: %v", fr))
 	}
@@ -560,7 +560,7 @@ func (f *MemoryFile) Decommit(fr platform.FileRange) error {
 	return nil
 }
 
-func (f *MemoryFile) markDecommitted(fr platform.FileRange) {
+func (f *MemoryFile) markDecommitted(fr memmap.FileRange) {
 	f.mu.Lock()
 	defer f.mu.Unlock()
 	// Since we're changing the knownCommitted attribute, we need to merge
@@ -581,8 +581,8 @@ func (f *MemoryFile) markDecommitted(fr platform.FileRange) {
 	f.usage.MergeRange(fr)
 }
 
-// IncRef implements platform.File.IncRef.
-func (f *MemoryFile) IncRef(fr platform.FileRange) {
+// IncRef implements memmap.File.IncRef.
+func (f *MemoryFile) IncRef(fr memmap.FileRange) {
 	if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 {
 		panic(fmt.Sprintf("invalid range: %v", fr))
 	}
@@ -600,8 +600,8 @@ func (f *MemoryFile) IncRef(fr platform.FileRange) {
 	f.usage.MergeAdjacent(fr)
 }
 
-// DecRef implements platform.File.DecRef.
-func (f *MemoryFile) DecRef(fr platform.FileRange) {
+// DecRef implements memmap.File.DecRef.
+func (f *MemoryFile) DecRef(fr memmap.FileRange) {
 	if !fr.WellFormed() || fr.Length() == 0 || fr.Start%usermem.PageSize != 0 || fr.End%usermem.PageSize != 0 {
 		panic(fmt.Sprintf("invalid range: %v", fr))
 	}
@@ -637,8 +637,8 @@ func (f *MemoryFile) DecRef(fr platform.FileRange) {
 	}
 }
 
-// MapInternal implements platform.File.MapInternal.
-func (f *MemoryFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+// MapInternal implements memmap.File.MapInternal.
+func (f *MemoryFile) MapInternal(fr memmap.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
 	if !fr.WellFormed() || fr.Length() == 0 {
 		panic(fmt.Sprintf("invalid range: %v", fr))
 	}
@@ -664,7 +664,7 @@ func (f *MemoryFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (
 
 // forEachMappingSlice invokes fn on a sequence of byte slices that
 // collectively map all bytes in fr.
-func (f *MemoryFile) forEachMappingSlice(fr platform.FileRange, fn func([]byte)) error {
+func (f *MemoryFile) forEachMappingSlice(fr memmap.FileRange, fn func([]byte)) error {
 	mappings := f.mappings.Load().([]uintptr)
 	for chunkStart := fr.Start &^ chunkMask; chunkStart < fr.End; chunkStart += chunkSize {
 		chunk := int(chunkStart >> chunkShift)
@@ -944,7 +944,7 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(
 					continue
 				case !populated && populatedRun:
 					// Finish the run by changing this segment.
-					runRange := platform.FileRange{
+					runRange := memmap.FileRange{
 						Start: r.Start + uint64(populatedRunStart*usermem.PageSize),
 						End:   r.Start + uint64(i*usermem.PageSize),
 					}
@@ -1009,7 +1009,7 @@ func (f *MemoryFile) File() *os.File {
 	return f.file
 }
 
-// FD implements platform.File.FD.
+// FD implements memmap.File.FD.
 func (f *MemoryFile) FD() int {
 	return int(f.file.Fd())
 }
@@ -1090,13 +1090,13 @@ func (f *MemoryFile) runReclaim() {
 //
 // Note that there returned range will be removed from tracking. It
 // must be reclaimed (removed from f.usage) at this point.
-func (f *MemoryFile) findReclaimable() (platform.FileRange, bool) {
+func (f *MemoryFile) findReclaimable() (memmap.FileRange, bool) {
 	f.mu.Lock()
 	defer f.mu.Unlock()
 	for {
 		for {
 			if f.destroyed {
-				return platform.FileRange{}, false
+				return memmap.FileRange{}, false
 			}
 			if f.reclaimable {
 				break
@@ -1120,7 +1120,7 @@ func (f *MemoryFile) findReclaimable() (platform.FileRange, bool) {
 	}
 }
 
-func (f *MemoryFile) markReclaimed(fr platform.FileRange) {
+func (f *MemoryFile) markReclaimed(fr memmap.FileRange) {
 	f.mu.Lock()
 	defer f.mu.Unlock()
 	seg := f.usage.FindSegment(fr.Start)
@@ -1222,11 +1222,11 @@ func (usageSetFunctions) MaxKey() uint64 {
 func (usageSetFunctions) ClearValue(val *usageInfo) {
 }
 
-func (usageSetFunctions) Merge(_ platform.FileRange, val1 usageInfo, _ platform.FileRange, val2 usageInfo) (usageInfo, bool) {
+func (usageSetFunctions) Merge(_ memmap.FileRange, val1 usageInfo, _ memmap.FileRange, val2 usageInfo) (usageInfo, bool) {
 	return val1, val1 == val2
 }
 
-func (usageSetFunctions) Split(_ platform.FileRange, val usageInfo, _ uint64) (usageInfo, usageInfo) {
+func (usageSetFunctions) Split(_ memmap.FileRange, val usageInfo, _ uint64) (usageInfo, usageInfo) {
 	return val, val
 }
 
@@ -1270,10 +1270,10 @@ func (reclaimSetFunctions) MaxKey() uint64 {
 func (reclaimSetFunctions) ClearValue(val *reclaimSetValue) {
 }
 
-func (reclaimSetFunctions) Merge(_ platform.FileRange, _ reclaimSetValue, _ platform.FileRange, _ reclaimSetValue) (reclaimSetValue, bool) {
+func (reclaimSetFunctions) Merge(_ memmap.FileRange, _ reclaimSetValue, _ memmap.FileRange, _ reclaimSetValue) (reclaimSetValue, bool) {
 	return reclaimSetValue{}, true
 }
 
-func (reclaimSetFunctions) Split(_ platform.FileRange, _ reclaimSetValue, _ uint64) (reclaimSetValue, reclaimSetValue) {
+func (reclaimSetFunctions) Split(_ memmap.FileRange, _ reclaimSetValue, _ uint64) (reclaimSetValue, reclaimSetValue) {
 	return reclaimSetValue{}, reclaimSetValue{}
 }
diff --git a/pkg/sentry/platform/BUILD b/pkg/sentry/platform/BUILD
index 453241eca..209b28053 100644
--- a/pkg/sentry/platform/BUILD
+++ b/pkg/sentry/platform/BUILD
@@ -1,39 +1,21 @@
 load("//tools:defs.bzl", "go_library")
-load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 package(licenses = ["notice"])
 
-go_template_instance(
-    name = "file_range",
-    out = "file_range.go",
-    package = "platform",
-    prefix = "File",
-    template = "//pkg/segment:generic_range",
-    types = {
-        "T": "uint64",
-    },
-)
-
 go_library(
     name = "platform",
     srcs = [
         "context.go",
-        "file_range.go",
         "mmap_min_addr.go",
         "platform.go",
     ],
     visibility = ["//pkg/sentry:internal"],
     deps = [
         "//pkg/abi/linux",
-        "//pkg/atomicbitops",
         "//pkg/context",
-        "//pkg/log",
-        "//pkg/safecopy",
-        "//pkg/safemem",
         "//pkg/seccomp",
         "//pkg/sentry/arch",
-        "//pkg/sentry/usage",
-        "//pkg/syserror",
+        "//pkg/sentry/memmap",
         "//pkg/usermem",
     ],
 )
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index 10a10bfe2..b5d27a72a 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -47,6 +47,7 @@ go_library(
         "//pkg/safecopy",
         "//pkg/seccomp",
         "//pkg/sentry/arch",
+        "//pkg/sentry/memmap",
         "//pkg/sentry/platform",
         "//pkg/sentry/platform/interrupt",
         "//pkg/sentry/platform/ring0",
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index faf1d5e1c..98a3e539d 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -18,6 +18,7 @@ import (
 	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/atomicbitops"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
 	"gvisor.dev/gvisor/pkg/sync"
@@ -150,7 +151,7 @@ func (as *addressSpace) mapLocked(addr usermem.Addr, m hostMapEntry, at usermem.
 }
 
 // MapFile implements platform.AddressSpace.MapFile.
-func (as *addressSpace) MapFile(addr usermem.Addr, f platform.File, fr platform.FileRange, at usermem.AccessType, precommit bool) error {
+func (as *addressSpace) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error {
 	as.mu.Lock()
 	defer as.mu.Unlock()
 
diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go
index 171513f3f..4b13eec30 100644
--- a/pkg/sentry/platform/platform.go
+++ b/pkg/sentry/platform/platform.go
@@ -22,9 +22,9 @@ import (
 	"os"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/seccomp"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
@@ -207,7 +207,7 @@ type AddressSpace interface {
 	// Preconditions: addr and fr must be page-aligned. fr.Length() > 0.
 	// at.Any() == true. At least one reference must be held on all pages in
 	// fr, and must continue to be held as long as pages are mapped.
-	MapFile(addr usermem.Addr, f File, fr FileRange, at usermem.AccessType, precommit bool) error
+	MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error
 
 	// Unmap unmaps the given range.
 	//
@@ -310,52 +310,6 @@ func (f SegmentationFault) Error() string {
 	return fmt.Sprintf("segmentation fault at %#x", f.Addr)
 }
 
-// File represents a host file that may be mapped into an AddressSpace.
-type File interface {
-	// All pages in a File are reference-counted.
-
-	// IncRef increments the reference count on all pages in fr.
-	//
-	// Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() >
-	// 0. At least one reference must be held on all pages in fr. (The File
-	// interface does not provide a way to acquire an initial reference;
-	// implementors may define mechanisms for doing so.)
-	IncRef(fr FileRange)
-
-	// DecRef decrements the reference count on all pages in fr.
-	//
-	// Preconditions: fr.Start and fr.End must be page-aligned. fr.Length() >
-	// 0. At least one reference must be held on all pages in fr.
-	DecRef(fr FileRange)
-
-	// MapInternal returns a mapping of the given file offsets in the invoking
-	// process' address space for reading and writing.
-	//
-	// Note that fr.Start and fr.End need not be page-aligned.
-	//
-	// Preconditions: fr.Length() > 0. At least one reference must be held on
-	// all pages in fr.
-	//
-	// Postconditions: The returned mapping is valid as long as at least one
-	// reference is held on the mapped pages.
-	MapInternal(fr FileRange, at usermem.AccessType) (safemem.BlockSeq, error)
-
-	// FD returns the file descriptor represented by the File.
-	//
-	// The only permitted operation on the returned file descriptor is to map
-	// pages from it consistent with the requirements of AddressSpace.MapFile.
-	FD() int
-}
-
-// FileRange represents a range of uint64 offsets into a File.
-//
-// type FileRange <generated using go_generics>
-
-// String implements fmt.Stringer.String.
-func (fr FileRange) String() string {
-	return fmt.Sprintf("[%#x, %#x)", fr.Start, fr.End)
-}
-
 // Requirements is used to specify platform specific requirements.
 type Requirements struct {
 	// RequiresCurrentPIDNS indicates that the sandbox has to be started in the
diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD
index 30402c2df..29fd23cc3 100644
--- a/pkg/sentry/platform/ptrace/BUILD
+++ b/pkg/sentry/platform/ptrace/BUILD
@@ -30,6 +30,7 @@ go_library(
         "//pkg/seccomp",
         "//pkg/sentry/arch",
         "//pkg/sentry/hostcpu",
+        "//pkg/sentry/memmap",
         "//pkg/sentry/platform",
         "//pkg/sentry/platform/interrupt",
         "//pkg/sync",
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index 2389423b0..c990f3454 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -24,6 +24,7 @@ import (
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/procid"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -616,7 +617,7 @@ func (s *subprocess) syscall(sysno uintptr, args ...arch.SyscallArgument) (uintp
 }
 
 // MapFile implements platform.AddressSpace.MapFile.
-func (s *subprocess) MapFile(addr usermem.Addr, f platform.File, fr platform.FileRange, at usermem.AccessType, precommit bool) error {
+func (s *subprocess) MapFile(addr usermem.Addr, f memmap.File, fr memmap.FileRange, at usermem.AccessType, precommit bool) error {
 	var flags int
 	if precommit {
 		flags |= syscall.MAP_POPULATE
diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go
index ccacaea6b..fca3a5478 100644
--- a/pkg/sentry/platform/ring0/kernel_arm64.go
+++ b/pkg/sentry/platform/ring0/kernel_arm64.go
@@ -58,7 +58,13 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
 
 	regs.Pstate &= ^uint64(UserFlagsClear)
 	regs.Pstate |= UserFlagsSet
+
+	SetTLS(regs.TPIDR_EL0)
+
 	kernelExitToEl0()
+
+	regs.TPIDR_EL0 = GetTLS()
+
 	vector = c.vecCode
 
 	// Perform the switch.
diff --git a/pkg/sentry/socket/BUILD b/pkg/sentry/socket/BUILD
index c40c6d673..c0fd3425b 100644
--- a/pkg/sentry/socket/BUILD
+++ b/pkg/sentry/socket/BUILD
@@ -20,5 +20,6 @@ go_library(
         "//pkg/syserr",
         "//pkg/tcpip",
         "//pkg/usermem",
+        "//tools/go_marshal/marshal",
     ],
 )
diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD
index ff81ea6e6..e76e498de 100644
--- a/pkg/sentry/socket/hostinet/BUILD
+++ b/pkg/sentry/socket/hostinet/BUILD
@@ -40,6 +40,8 @@ go_library(
         "//pkg/tcpip/stack",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/marshal",
+        "//tools/go_marshal/primitive",
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/pkg/sentry/socket/hostinet/socket.go b/pkg/sentry/socket/hostinet/socket.go
index a92aed2c9..532a1ea5d 100644
--- a/pkg/sentry/socket/hostinet/socket.go
+++ b/pkg/sentry/socket/hostinet/socket.go
@@ -36,6 +36,8 @@ import (
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 const (
@@ -319,7 +321,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
 }
 
 // GetSockOpt implements socket.Socket.GetSockOpt.
-func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
+func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
 	if outLen < 0 {
 		return nil, syserr.ErrInvalidArgument
 	}
@@ -364,7 +366,8 @@ func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr
 	if err != nil {
 		return nil, syserr.FromError(err)
 	}
-	return opt, nil
+	optP := primitive.ByteSlice(opt)
+	return &optP, nil
 }
 
 // SetSockOpt implements socket.Socket.SetSockOpt.
@@ -708,6 +711,6 @@ func (p *socketProvider) Pair(t *kernel.Task, stype linux.SockType, protocol int
 func init() {
 	for _, family := range []int{syscall.AF_INET, syscall.AF_INET6} {
 		socket.RegisterProvider(family, &socketProvider{family})
-		socket.RegisterProviderVFS2(family, &socketProviderVFS2{})
+		socket.RegisterProviderVFS2(family, &socketProviderVFS2{family})
 	}
 }
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index f7abe77d3..a9f0604ae 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -66,7 +66,7 @@ func nflog(format string, args ...interface{}) {
 func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPTGetinfo, *syserr.Error) {
 	// Read in the struct and table name.
 	var info linux.IPTGetinfo
-	if _, err := t.CopyIn(outPtr, &info); err != nil {
+	if _, err := info.CopyIn(t, outPtr); err != nil {
 		return linux.IPTGetinfo{}, syserr.FromError(err)
 	}
 
@@ -84,7 +84,7 @@ func GetInfo(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr) (linux.IPT
 func GetEntries(t *kernel.Task, stack *stack.Stack, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
 	// Read in the struct and table name.
 	var userEntries linux.IPTGetEntries
-	if _, err := t.CopyIn(outPtr, &userEntries); err != nil {
+	if _, err := userEntries.CopyIn(t, outPtr); err != nil {
 		nflog("couldn't copy in entries %q", userEntries.Name)
 		return linux.KernelIPTGetEntries{}, syserr.FromError(err)
 	}
@@ -145,7 +145,7 @@ func convertNetstackToBinary(stack *stack.Stack, tablename linux.TableName) (lin
 
 		// Each rule corresponds to an entry.
 		entry := linux.KernelIPTEntry{
-			IPTEntry: linux.IPTEntry{
+			Entry: linux.IPTEntry{
 				IP: linux.IPTIP{
 					Protocol: uint16(rule.Filter.Protocol),
 				},
@@ -153,20 +153,20 @@ func convertNetstackToBinary(stack *stack.Stack, tablename linux.TableName) (lin
 				TargetOffset: linux.SizeOfIPTEntry,
 			},
 		}
-		copy(entry.IPTEntry.IP.Dst[:], rule.Filter.Dst)
-		copy(entry.IPTEntry.IP.DstMask[:], rule.Filter.DstMask)
-		copy(entry.IPTEntry.IP.Src[:], rule.Filter.Src)
-		copy(entry.IPTEntry.IP.SrcMask[:], rule.Filter.SrcMask)
-		copy(entry.IPTEntry.IP.OutputInterface[:], rule.Filter.OutputInterface)
-		copy(entry.IPTEntry.IP.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask)
+		copy(entry.Entry.IP.Dst[:], rule.Filter.Dst)
+		copy(entry.Entry.IP.DstMask[:], rule.Filter.DstMask)
+		copy(entry.Entry.IP.Src[:], rule.Filter.Src)
+		copy(entry.Entry.IP.SrcMask[:], rule.Filter.SrcMask)
+		copy(entry.Entry.IP.OutputInterface[:], rule.Filter.OutputInterface)
+		copy(entry.Entry.IP.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask)
 		if rule.Filter.DstInvert {
-			entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_DSTIP
+			entry.Entry.IP.InverseFlags |= linux.IPT_INV_DSTIP
 		}
 		if rule.Filter.SrcInvert {
-			entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_SRCIP
+			entry.Entry.IP.InverseFlags |= linux.IPT_INV_SRCIP
 		}
 		if rule.Filter.OutputInterfaceInvert {
-			entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_VIA_OUT
+			entry.Entry.IP.InverseFlags |= linux.IPT_INV_VIA_OUT
 		}
 
 		for _, matcher := range rule.Matchers {
@@ -178,8 +178,8 @@ func convertNetstackToBinary(stack *stack.Stack, tablename linux.TableName) (lin
 				panic(fmt.Sprintf("matcher %T is not 64-bit aligned", matcher))
 			}
 			entry.Elems = append(entry.Elems, serialized...)
-			entry.NextOffset += uint16(len(serialized))
-			entry.TargetOffset += uint16(len(serialized))
+			entry.Entry.NextOffset += uint16(len(serialized))
+			entry.Entry.TargetOffset += uint16(len(serialized))
 		}
 
 		// Serialize and append the target.
@@ -188,11 +188,11 @@ func convertNetstackToBinary(stack *stack.Stack, tablename linux.TableName) (lin
 			panic(fmt.Sprintf("target %T is not 64-bit aligned", rule.Target))
 		}
 		entry.Elems = append(entry.Elems, serialized...)
-		entry.NextOffset += uint16(len(serialized))
+		entry.Entry.NextOffset += uint16(len(serialized))
 
 		nflog("convert to binary: adding entry: %+v", entry)
 
-		entries.Size += uint32(entry.NextOffset)
+		entries.Size += uint32(entry.Entry.NextOffset)
 		entries.Entrytable = append(entries.Entrytable, entry)
 		info.NumEntries++
 	}
@@ -342,10 +342,10 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 	// TODO(gvisor.dev/issue/170): Support other tables.
 	var table stack.Table
 	switch replace.Name.String() {
-	case stack.TablenameFilter:
+	case stack.FilterTable:
 		table = stack.EmptyFilterTable()
-	case stack.TablenameNat:
-		table = stack.EmptyNatTable()
+	case stack.NATTable:
+		table = stack.EmptyNATTable()
 	default:
 		nflog("we don't yet support writing to the %q table (gvisor.dev/issue/170)", replace.Name.String())
 		return syserr.ErrInvalidArgument
@@ -431,6 +431,8 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 	for hook, _ := range replace.HookEntry {
 		if table.ValidHooks()&(1<<hook) != 0 {
 			hk := hookFromLinux(hook)
+			table.BuiltinChains[hk] = stack.HookUnset
+			table.Underflows[hk] = stack.HookUnset
 			for offset, ruleIdx := range offsets {
 				if offset == replace.HookEntry[hook] {
 					table.BuiltinChains[hk] = ruleIdx
@@ -456,8 +458,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 
 	// Add the user chains.
 	for ruleIdx, rule := range table.Rules {
-		target, ok := rule.Target.(stack.UserChainTarget)
-		if !ok {
+		if _, ok := rule.Target.(stack.UserChainTarget); !ok {
 			continue
 		}
 
@@ -473,7 +474,6 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 			nflog("user chain's first node must have no matchers")
 			return syserr.ErrInvalidArgument
 		}
-		table.UserChains[target.Name] = ruleIdx + 1
 	}
 
 	// Set each jump to point to the appropriate rule. Right now they hold byte
@@ -499,7 +499,10 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 	// Since we only support modifying the INPUT, PREROUTING and OUTPUT chain right now,
 	// make sure all other chains point to ACCEPT rules.
 	for hook, ruleIdx := range table.BuiltinChains {
-		if hook == stack.Forward || hook == stack.Postrouting {
+		if hook := stack.Hook(hook); hook == stack.Forward || hook == stack.Postrouting {
+			if ruleIdx == stack.HookUnset {
+				continue
+			}
 			if !isUnconditionalAccept(table.Rules[ruleIdx]) {
 				nflog("hook %d is unsupported.", hook)
 				return syserr.ErrInvalidArgument
@@ -512,9 +515,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
 	// - There are no chains without an unconditional final rule.
 	// - There are no chains without an unconditional underflow rule.
 
-	stk.IPTables().ReplaceTable(replace.Name.String(), table)
-
-	return nil
+	return syserr.TranslateNetstackError(stk.IPTables().ReplaceTable(replace.Name.String(), table))
 }
 
 // parseMatchers parses 0 or more matchers from optVal. optVal should contain
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD
index d5ca3ac56..0546801bf 100644
--- a/pkg/sentry/socket/netlink/BUILD
+++ b/pkg/sentry/socket/netlink/BUILD
@@ -36,6 +36,8 @@ go_library(
         "//pkg/tcpip",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/marshal",
+        "//tools/go_marshal/primitive",
     ],
 )
 
diff --git a/pkg/sentry/socket/netlink/socket.go b/pkg/sentry/socket/netlink/socket.go
index 81f34c5a2..98ca7add0 100644
--- a/pkg/sentry/socket/netlink/socket.go
+++ b/pkg/sentry/socket/netlink/socket.go
@@ -38,6 +38,8 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 const sizeOfInt32 int = 4
@@ -330,7 +332,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
 }
 
 // GetSockOpt implements socket.Socket.GetSockOpt.
-func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
+func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
 	switch level {
 	case linux.SOL_SOCKET:
 		switch name {
@@ -340,24 +342,26 @@ func (s *socketOpsCommon) GetSockOpt(t *kernel.Task, level int, name int, outPtr
 			}
 			s.mu.Lock()
 			defer s.mu.Unlock()
-			return int32(s.sendBufferSize), nil
+			sendBufferSizeP := primitive.Int32(s.sendBufferSize)
+			return &sendBufferSizeP, nil
 
 		case linux.SO_RCVBUF:
 			if outLen < sizeOfInt32 {
 				return nil, syserr.ErrInvalidArgument
 			}
 			// We don't have limit on receiving size.
-			return int32(math.MaxInt32), nil
+			recvBufferSizeP := primitive.Int32(math.MaxInt32)
+			return &recvBufferSizeP, nil
 
 		case linux.SO_PASSCRED:
 			if outLen < sizeOfInt32 {
 				return nil, syserr.ErrInvalidArgument
 			}
-			var passcred int32
+			var passcred primitive.Int32
 			if s.Passcred() {
 				passcred = 1
 			}
-			return passcred, nil
+			return &passcred, nil
 
 		default:
 			socket.GetSockOptEmitUnimplementedEvent(t, name)
diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD
index ea6ebd0e2..1fb777a6c 100644
--- a/pkg/sentry/socket/netstack/BUILD
+++ b/pkg/sentry/socket/netstack/BUILD
@@ -51,6 +51,8 @@ go_library(
         "//pkg/tcpip/transport/udp",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/marshal",
+        "//tools/go_marshal/primitive",
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 49a04e613..44b3fff46 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -26,6 +26,7 @@ package netstack
 
 import (
 	"bytes"
+	"fmt"
 	"io"
 	"math"
 	"reflect"
@@ -61,6 +62,8 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 func mustCreateMetric(name, description string) *tcpip.StatCounter {
@@ -909,7 +912,7 @@ func (s *socketOpsCommon) Shutdown(t *kernel.Task, how int) *syserr.Error {
 
 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
 // tcpip.Endpoint.
-func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
+func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
 	// TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is
 	// implemented specifically for netstack.SocketOperations rather than
 	// commonEndpoint. commonEndpoint should be extended to support socket
@@ -919,25 +922,25 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
 		if outLen < sizeOfInt32 {
 			return nil, syserr.ErrInvalidArgument
 		}
-		val := int32(0)
+		val := primitive.Int32(0)
 		s.readMu.Lock()
 		defer s.readMu.Unlock()
 		if s.sockOptTimestamp {
 			val = 1
 		}
-		return val, nil
+		return &val, nil
 	}
 	if level == linux.SOL_TCP && name == linux.TCP_INQ {
 		if outLen < sizeOfInt32 {
 			return nil, syserr.ErrInvalidArgument
 		}
-		val := int32(0)
+		val := primitive.Int32(0)
 		s.readMu.Lock()
 		defer s.readMu.Unlock()
 		if s.sockOptInq {
 			val = 1
 		}
-		return val, nil
+		return &val, nil
 	}
 
 	if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP {
@@ -955,7 +958,7 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
 			if err != nil {
 				return nil, err
 			}
-			return info, nil
+			return &info, nil
 
 		case linux.IPT_SO_GET_ENTRIES:
 			if outLen < linux.SizeOfIPTGetEntries {
@@ -970,7 +973,7 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
 			if err != nil {
 				return nil, err
 			}
-			return entries, nil
+			return &entries, nil
 
 		}
 	}
@@ -980,7 +983,7 @@ func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr us
 
 // GetSockOpt can be used to implement the linux syscall getsockopt(2) for
 // sockets backed by a commonEndpoint.
-func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name, outLen int) (interface{}, *syserr.Error) {
+func GetSockOpt(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, level, name, outLen int) (marshal.Marshallable, *syserr.Error) {
 	switch level {
 	case linux.SOL_SOCKET:
 		return getSockOptSocket(t, s, ep, family, skType, name, outLen)
@@ -1013,7 +1016,7 @@ func boolToInt32(v bool) int32 {
 }
 
 // getSockOptSocket implements GetSockOpt when level is SOL_SOCKET.
-func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, name, outLen int) (interface{}, *syserr.Error) {
+func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, family int, skType linux.SockType, name, outLen int) (marshal.Marshallable, *syserr.Error) {
 	// TODO(b/124056281): Stop rejecting short optLen values in getsockopt.
 	switch name {
 	case linux.SO_ERROR:
@@ -1024,9 +1027,12 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 		// Get the last error and convert it.
 		err := ep.GetSockOpt(tcpip.ErrorOption{})
 		if err == nil {
-			return int32(0), nil
+			optP := primitive.Int32(0)
+			return &optP, nil
 		}
-		return int32(syserr.TranslateNetstackError(err).ToLinux().Number()), nil
+
+		optP := primitive.Int32(syserr.TranslateNetstackError(err).ToLinux().Number())
+		return &optP, nil
 
 	case linux.SO_PEERCRED:
 		if family != linux.AF_UNIX || outLen < syscall.SizeofUcred {
@@ -1034,11 +1040,12 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 		}
 
 		tcred := t.Credentials()
-		return syscall.Ucred{
-			Pid: int32(t.ThreadGroup().ID()),
-			Uid: uint32(tcred.EffectiveKUID.In(tcred.UserNamespace).OrOverflow()),
-			Gid: uint32(tcred.EffectiveKGID.In(tcred.UserNamespace).OrOverflow()),
-		}, nil
+		creds := linux.ControlMessageCredentials{
+			PID: int32(t.ThreadGroup().ID()),
+			UID: uint32(tcred.EffectiveKUID.In(tcred.UserNamespace).OrOverflow()),
+			GID: uint32(tcred.EffectiveKGID.In(tcred.UserNamespace).OrOverflow()),
+		}
+		return &creds, nil
 
 	case linux.SO_PASSCRED:
 		if outLen < sizeOfInt32 {
@@ -1049,7 +1056,9 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.SO_SNDBUF:
 		if outLen < sizeOfInt32 {
@@ -1065,7 +1074,8 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 			size = math.MaxInt32
 		}
 
-		return int32(size), nil
+		sizeP := primitive.Int32(size)
+		return &sizeP, nil
 
 	case linux.SO_RCVBUF:
 		if outLen < sizeOfInt32 {
@@ -1081,7 +1091,8 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 			size = math.MaxInt32
 		}
 
-		return int32(size), nil
+		sizeP := primitive.Int32(size)
+		return &sizeP, nil
 
 	case linux.SO_REUSEADDR:
 		if outLen < sizeOfInt32 {
@@ -1092,7 +1103,8 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.SO_REUSEPORT:
 		if outLen < sizeOfInt32 {
@@ -1103,7 +1115,9 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.SO_BINDTODEVICE:
 		var v tcpip.BindToDeviceOption
@@ -1111,7 +1125,8 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 			return nil, syserr.TranslateNetstackError(err)
 		}
 		if v == 0 {
-			return []byte{}, nil
+			var b primitive.ByteSlice
+			return &b, nil
 		}
 		if outLen < linux.IFNAMSIZ {
 			return nil, syserr.ErrInvalidArgument
@@ -1126,7 +1141,9 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 			// interface was removed.
 			return nil, syserr.ErrUnknownDevice
 		}
-		return append([]byte(nic.Name), 0), nil
+
+		name := primitive.ByteSlice(append([]byte(nic.Name), 0))
+		return &name, nil
 
 	case linux.SO_BROADCAST:
 		if outLen < sizeOfInt32 {
@@ -1137,7 +1154,9 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.SO_KEEPALIVE:
 		if outLen < sizeOfInt32 {
@@ -1148,13 +1167,17 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.SO_LINGER:
 		if outLen < linux.SizeOfLinger {
 			return nil, syserr.ErrInvalidArgument
 		}
-		return linux.Linger{}, nil
+
+		linger := linux.Linger{}
+		return &linger, nil
 
 	case linux.SO_SNDTIMEO:
 		// TODO(igudger): Linux allows shorter lengths for partial results.
@@ -1162,7 +1185,8 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 			return nil, syserr.ErrInvalidArgument
 		}
 
-		return linux.NsecToTimeval(s.SendTimeout()), nil
+		sendTimeout := linux.NsecToTimeval(s.SendTimeout())
+		return &sendTimeout, nil
 
 	case linux.SO_RCVTIMEO:
 		// TODO(igudger): Linux allows shorter lengths for partial results.
@@ -1170,7 +1194,8 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 			return nil, syserr.ErrInvalidArgument
 		}
 
-		return linux.NsecToTimeval(s.RecvTimeout()), nil
+		recvTimeout := linux.NsecToTimeval(s.RecvTimeout())
+		return &recvTimeout, nil
 
 	case linux.SO_OOBINLINE:
 		if outLen < sizeOfInt32 {
@@ -1182,7 +1207,8 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 			return nil, syserr.TranslateNetstackError(err)
 		}
 
-		return int32(v), nil
+		vP := primitive.Int32(v)
+		return &vP, nil
 
 	case linux.SO_NO_CHECK:
 		if outLen < sizeOfInt32 {
@@ -1193,7 +1219,8 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	default:
 		socket.GetSockOptEmitUnimplementedEvent(t, name)
@@ -1202,7 +1229,7 @@ func getSockOptSocket(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, fam
 }
 
 // getSockOptTCP implements GetSockOpt when level is SOL_TCP.
-func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) {
+func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal.Marshallable, *syserr.Error) {
 	switch name {
 	case linux.TCP_NODELAY:
 		if outLen < sizeOfInt32 {
@@ -1213,7 +1240,9 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(!v), nil
+
+		vP := primitive.Int32(boolToInt32(!v))
+		return &vP, nil
 
 	case linux.TCP_CORK:
 		if outLen < sizeOfInt32 {
@@ -1224,7 +1253,9 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.TCP_QUICKACK:
 		if outLen < sizeOfInt32 {
@@ -1235,7 +1266,9 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.TCP_MAXSEG:
 		if outLen < sizeOfInt32 {
@@ -1246,8 +1279,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-
-		return int32(v), nil
+		vP := primitive.Int32(v)
+		return &vP, nil
 
 	case linux.TCP_KEEPIDLE:
 		if outLen < sizeOfInt32 {
@@ -1258,8 +1291,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err := ep.GetSockOpt(&v); err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-
-		return int32(time.Duration(v) / time.Second), nil
+		keepAliveIdle := primitive.Int32(time.Duration(v) / time.Second)
+		return &keepAliveIdle, nil
 
 	case linux.TCP_KEEPINTVL:
 		if outLen < sizeOfInt32 {
@@ -1270,8 +1303,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err := ep.GetSockOpt(&v); err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-
-		return int32(time.Duration(v) / time.Second), nil
+		keepAliveInterval := primitive.Int32(time.Duration(v) / time.Second)
+		return &keepAliveInterval, nil
 
 	case linux.TCP_KEEPCNT:
 		if outLen < sizeOfInt32 {
@@ -1282,8 +1315,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-
-		return int32(v), nil
+		vP := primitive.Int32(v)
+		return &vP, nil
 
 	case linux.TCP_USER_TIMEOUT:
 		if outLen < sizeOfInt32 {
@@ -1294,8 +1327,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err := ep.GetSockOpt(&v); err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-
-		return int32(time.Duration(v) / time.Millisecond), nil
+		tcpUserTimeout := primitive.Int32(time.Duration(v) / time.Millisecond)
+		return &tcpUserTimeout, nil
 
 	case linux.TCP_INFO:
 		var v tcpip.TCPInfoOption
@@ -1308,12 +1341,13 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		info := linux.TCPInfo{}
 
 		// Linux truncates the output binary to outLen.
-		ib := binary.Marshal(nil, usermem.ByteOrder, &info)
-		if len(ib) > outLen {
-			ib = ib[:outLen]
+		buf := t.CopyScratchBuffer(info.SizeBytes())
+		info.MarshalUnsafe(buf)
+		if len(buf) > outLen {
+			buf = buf[:outLen]
 		}
-
-		return ib, nil
+		bufP := primitive.ByteSlice(buf)
+		return &bufP, nil
 
 	case linux.TCP_CC_INFO,
 		linux.TCP_NOTSENT_LOWAT,
@@ -1343,7 +1377,9 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		}
 		b := make([]byte, toCopy)
 		copy(b, v)
-		return b, nil
+
+		bP := primitive.ByteSlice(b)
+		return &bP, nil
 
 	case linux.TCP_LINGER2:
 		if outLen < sizeOfInt32 {
@@ -1355,7 +1391,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 			return nil, syserr.TranslateNetstackError(err)
 		}
 
-		return int32(time.Duration(v) / time.Second), nil
+		lingerTimeout := primitive.Int32(time.Duration(v) / time.Second)
+		return &lingerTimeout, nil
 
 	case linux.TCP_DEFER_ACCEPT:
 		if outLen < sizeOfInt32 {
@@ -1367,7 +1404,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 			return nil, syserr.TranslateNetstackError(err)
 		}
 
-		return int32(time.Duration(v) / time.Second), nil
+		tcpDeferAccept := primitive.Int32(time.Duration(v) / time.Second)
+		return &tcpDeferAccept, nil
 
 	case linux.TCP_SYNCNT:
 		if outLen < sizeOfInt32 {
@@ -1378,8 +1416,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-
-		return int32(v), nil
+		vP := primitive.Int32(v)
+		return &vP, nil
 
 	case linux.TCP_WINDOW_CLAMP:
 		if outLen < sizeOfInt32 {
@@ -1390,8 +1428,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-
-		return int32(v), nil
+		vP := primitive.Int32(v)
+		return &vP, nil
 	default:
 		emitUnimplementedEventTCP(t, name)
 	}
@@ -1399,7 +1437,7 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
 }
 
 // getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6.
-func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) {
+func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (marshal.Marshallable, *syserr.Error) {
 	switch name {
 	case linux.IPV6_V6ONLY:
 		if outLen < sizeOfInt32 {
@@ -1410,7 +1448,9 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.IPV6_PATHMTU:
 		t.Kernel().EmitUnimplementedEvent(t)
@@ -1418,21 +1458,24 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf
 	case linux.IPV6_TCLASS:
 		// Length handling for parity with Linux.
 		if outLen == 0 {
-			return make([]byte, 0), nil
+			var b primitive.ByteSlice
+			return &b, nil
 		}
 		v, err := ep.GetSockOptInt(tcpip.IPv6TrafficClassOption)
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
 
-		uintv := uint32(v)
+		uintv := primitive.Uint32(v)
 		// Linux truncates the output binary to outLen.
-		ib := binary.Marshal(nil, usermem.ByteOrder, &uintv)
+		ib := t.CopyScratchBuffer(uintv.SizeBytes())
+		uintv.MarshalUnsafe(ib)
 		// Handle cases where outLen is lesser than sizeOfInt32.
 		if len(ib) > outLen {
 			ib = ib[:outLen]
 		}
-		return ib, nil
+		ibP := primitive.ByteSlice(ib)
+		return &ibP, nil
 
 	case linux.IPV6_RECVTCLASS:
 		if outLen < sizeOfInt32 {
@@ -1443,7 +1486,9 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	default:
 		emitUnimplementedEventIPv6(t, name)
@@ -1452,7 +1497,7 @@ func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interf
 }
 
 // getSockOptIP implements GetSockOpt when level is SOL_IP.
-func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family int) (interface{}, *syserr.Error) {
+func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family int) (marshal.Marshallable, *syserr.Error) {
 	switch name {
 	case linux.IP_TTL:
 		if outLen < sizeOfInt32 {
@@ -1465,11 +1510,12 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
 		}
 
 		// Fill in the default value, if needed.
-		if v == 0 {
-			v = DefaultTTL
+		vP := primitive.Int32(v)
+		if vP == 0 {
+			vP = DefaultTTL
 		}
 
-		return int32(v), nil
+		return &vP, nil
 
 	case linux.IP_MULTICAST_TTL:
 		if outLen < sizeOfInt32 {
@@ -1481,7 +1527,8 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
 			return nil, syserr.TranslateNetstackError(err)
 		}
 
-		return int32(v), nil
+		vP := primitive.Int32(v)
+		return &vP, nil
 
 	case linux.IP_MULTICAST_IF:
 		if outLen < len(linux.InetAddr{}) {
@@ -1495,7 +1542,7 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
 
 		a, _ := ConvertAddress(linux.AF_INET, tcpip.FullAddress{Addr: v.InterfaceAddr})
 
-		return a.(*linux.SockAddrInet).Addr, nil
+		return &a.(*linux.SockAddrInet).Addr, nil
 
 	case linux.IP_MULTICAST_LOOP:
 		if outLen < sizeOfInt32 {
@@ -1506,21 +1553,26 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.IP_TOS:
 		// Length handling for parity with Linux.
 		if outLen == 0 {
-			return []byte(nil), nil
+			var b primitive.ByteSlice
+			return &b, nil
 		}
 		v, err := ep.GetSockOptInt(tcpip.IPv4TOSOption)
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
 		if outLen < sizeOfInt32 {
-			return uint8(v), nil
+			vP := primitive.Uint8(v)
+			return &vP, nil
 		}
-		return int32(v), nil
+		vP := primitive.Int32(v)
+		return &vP, nil
 
 	case linux.IP_RECVTOS:
 		if outLen < sizeOfInt32 {
@@ -1531,7 +1583,9 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	case linux.IP_PKTINFO:
 		if outLen < sizeOfInt32 {
@@ -1542,7 +1596,9 @@ func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int, family in
 		if err != nil {
 			return nil, syserr.TranslateNetstackError(err)
 		}
-		return boolToInt32(v), nil
+
+		vP := primitive.Int32(boolToInt32(v))
+		return &vP, nil
 
 	default:
 		emitUnimplementedEventIP(t, name)
@@ -2468,6 +2524,23 @@ func (s *socketOpsCommon) fillCmsgInq(cmsg *socket.ControlMessages) {
 	cmsg.IP.Inq = int32(len(s.readView) + rcvBufUsed)
 }
 
+func toLinuxPacketType(pktType tcpip.PacketType) uint8 {
+	switch pktType {
+	case tcpip.PacketHost:
+		return linux.PACKET_HOST
+	case tcpip.PacketOtherHost:
+		return linux.PACKET_OTHERHOST
+	case tcpip.PacketOutgoing:
+		return linux.PACKET_OUTGOING
+	case tcpip.PacketBroadcast:
+		return linux.PACKET_BROADCAST
+	case tcpip.PacketMulticast:
+		return linux.PACKET_MULTICAST
+	default:
+		panic(fmt.Sprintf("unknown packet type: %d", pktType))
+	}
+}
+
 // nonBlockingRead issues a non-blocking read.
 //
 // TODO(b/78348848): Support timestamps for stream sockets.
@@ -2526,6 +2599,7 @@ func (s *socketOpsCommon) nonBlockingRead(ctx context.Context, dst usermem.IOSeq
 		switch v := addr.(type) {
 		case *linux.SockAddrLink:
 			v.Protocol = htons(uint16(s.linkPacketInfo.Protocol))
+			v.PacketType = toLinuxPacketType(s.linkPacketInfo.PktType)
 		}
 	}
 
@@ -2761,6 +2835,11 @@ func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO,
 }
 
 func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		panic("ioctl(2) may only be called from a task goroutine")
+	}
+
 	// SIOCGSTAMP is implemented by netstack rather than all commonEndpoint
 	// sockets.
 	// TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP.
@@ -2773,9 +2852,7 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy
 		}
 
 		tv := linux.NsecToTimeval(s.timestampNS)
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &tv, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		_, err := tv.CopyOut(t, args[2].Pointer())
 		return 0, err
 
 	case linux.TIOCINQ:
@@ -2794,9 +2871,8 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy
 		}
 
 		// Copy result to userspace.
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		vP := primitive.Int32(v)
+		_, err := vP.CopyOut(t, args[2].Pointer())
 		return 0, err
 	}
 
@@ -2805,6 +2881,11 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy
 
 // Ioctl performs a socket ioctl.
 func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	t := kernel.TaskFromContext(ctx)
+	if t == nil {
+		panic("ioctl(2) may only be called from a task goroutine")
+	}
+
 	switch arg := int(args[1].Int()); arg {
 	case linux.SIOCGIFFLAGS,
 		linux.SIOCGIFADDR,
@@ -2821,37 +2902,28 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc
 		linux.SIOCETHTOOL:
 
 		var ifr linux.IFReq
-		if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifr, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		if _, err := ifr.CopyIn(t, args[2].Pointer()); err != nil {
 			return 0, err
 		}
 		if err := interfaceIoctl(ctx, io, arg, &ifr); err != nil {
 			return 0, err.ToError()
 		}
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &ifr, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		_, err := ifr.CopyOut(t, args[2].Pointer())
 		return 0, err
 
 	case linux.SIOCGIFCONF:
 		// Return a list of interface addresses or the buffer size
 		// necessary to hold the list.
 		var ifc linux.IFConf
-		if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifc, usermem.IOOpts{
-			AddressSpaceActive: true,
-		}); err != nil {
+		if _, err := ifc.CopyIn(t, args[2].Pointer()); err != nil {
 			return 0, err
 		}
 
-		if err := ifconfIoctl(ctx, io, &ifc); err != nil {
+		if err := ifconfIoctl(ctx, t, io, &ifc); err != nil {
 			return 0, err
 		}
 
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), ifc, usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
-
+		_, err := ifc.CopyOut(t, args[2].Pointer())
 		return 0, err
 
 	case linux.TIOCINQ:
@@ -2864,9 +2936,8 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc
 			v = math.MaxInt32
 		}
 		// Copy result to userspace.
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		vP := primitive.Int32(v)
+		_, err := vP.CopyOut(t, args[2].Pointer())
 		return 0, err
 
 	case linux.TIOCOUTQ:
@@ -2880,9 +2951,8 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc
 		}
 
 		// Copy result to userspace.
-		_, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{
-			AddressSpaceActive: true,
-		})
+		vP := primitive.Int32(v)
+		_, err := vP.CopyOut(t, args[2].Pointer())
 		return 0, err
 
 	case linux.SIOCGIFMEM, linux.SIOCGIFPFLAGS, linux.SIOCGMIIPHY, linux.SIOCGMIIREG:
@@ -3031,7 +3101,7 @@ func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFRe
 }
 
 // ifconfIoctl populates a struct ifconf for the SIOCGIFCONF ioctl.
-func ifconfIoctl(ctx context.Context, io usermem.IO, ifc *linux.IFConf) error {
+func ifconfIoctl(ctx context.Context, t *kernel.Task, io usermem.IO, ifc *linux.IFConf) error {
 	// If Ptr is NULL, return the necessary buffer size via Len.
 	// Otherwise, write up to Len bytes starting at Ptr containing ifreq
 	// structs.
@@ -3068,9 +3138,7 @@ func ifconfIoctl(ctx context.Context, io usermem.IO, ifc *linux.IFConf) error {
 			// Copy the ifr to userspace.
 			dst := uintptr(ifc.Ptr) + uintptr(ifc.Len)
 			ifc.Len += int32(linux.SizeOfIFReq)
-			if _, err := usermem.CopyObjectOut(ctx, io, usermem.Addr(dst), ifr, usermem.IOOpts{
-				AddressSpaceActive: true,
-			}); err != nil {
+			if _, err := ifr.CopyOut(t, usermem.Addr(dst)); err != nil {
 				return err
 			}
 		}
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index d65a89316..a9025b0ec 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -31,6 +31,8 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // SocketVFS2 encapsulates all the state needed to represent a network stack
@@ -200,7 +202,7 @@ func (s *SocketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal
 
 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
 // tcpip.Endpoint.
-func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
+func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
 	// TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is
 	// implemented specifically for netstack.SocketVFS2 rather than
 	// commonEndpoint. commonEndpoint should be extended to support socket
@@ -210,25 +212,25 @@ func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.
 		if outLen < sizeOfInt32 {
 			return nil, syserr.ErrInvalidArgument
 		}
-		val := int32(0)
+		val := primitive.Int32(0)
 		s.readMu.Lock()
 		defer s.readMu.Unlock()
 		if s.sockOptTimestamp {
 			val = 1
 		}
-		return val, nil
+		return &val, nil
 	}
 	if level == linux.SOL_TCP && name == linux.TCP_INQ {
 		if outLen < sizeOfInt32 {
 			return nil, syserr.ErrInvalidArgument
 		}
-		val := int32(0)
+		val := primitive.Int32(0)
 		s.readMu.Lock()
 		defer s.readMu.Unlock()
 		if s.sockOptInq {
 			val = 1
 		}
-		return val, nil
+		return &val, nil
 	}
 
 	if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP {
@@ -246,7 +248,7 @@ func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.
 			if err != nil {
 				return nil, err
 			}
-			return info, nil
+			return &info, nil
 
 		case linux.IPT_SO_GET_ENTRIES:
 			if outLen < linux.SizeOfIPTGetEntries {
@@ -261,7 +263,7 @@ func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.
 			if err != nil {
 				return nil, err
 			}
-			return entries, nil
+			return &entries, nil
 
 		}
 	}
diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go
index fcd7f9d7f..d112757fb 100644
--- a/pkg/sentry/socket/socket.go
+++ b/pkg/sentry/socket/socket.go
@@ -35,6 +35,7 @@ import (
 	"gvisor.dev/gvisor/pkg/syserr"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 )
 
 // ControlMessages represents the union of unix control messages and tcpip
@@ -86,7 +87,7 @@ type SocketOps interface {
 	Shutdown(t *kernel.Task, how int) *syserr.Error
 
 	// GetSockOpt implements the getsockopt(2) linux syscall.
-	GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error)
+	GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error)
 
 	// SetSockOpt implements the setsockopt(2) linux syscall.
 	SetSockOpt(t *kernel.Task, level int, name int, opt []byte) *syserr.Error
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index cca5e70f1..061a689a9 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -35,5 +35,6 @@ go_library(
         "//pkg/tcpip",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/marshal",
     ],
 )
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 4bb2b6ff4..0482d33cf 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -40,6 +40,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 )
 
 // SocketOperations is a Unix socket. It is similar to a netstack socket,
@@ -184,7 +185,7 @@ func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO,
 
 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
 // a transport.Endpoint.
-func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
+func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
 	return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outLen)
 }
 
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index ff2149250..05c16fcfe 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -32,6 +32,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 )
 
 // SocketVFS2 implements socket.SocketVFS2 (and by extension,
@@ -89,7 +90,7 @@ func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint3
 
 // GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
 // a transport.Endpoint.
-func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level int, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
+func (s *SocketVFS2) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (marshal.Marshallable, *syserr.Error) {
 	return netstack.GetSockOpt(t, s, s.ep, linux.AF_UNIX, s.ep.Type(), level, name, outLen)
 }
 
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index 217fcfef2..4a9b04fd0 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -99,5 +99,7 @@ go_library(
         "//pkg/syserror",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/marshal",
+        "//tools/go_marshal/primitive",
     ],
 )
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index ea4f9b1a7..80c65164a 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -325,8 +325,8 @@ var AMD64 = &kernel.SyscallTable{
 		270: syscalls.Supported("pselect", Pselect),
 		271: syscalls.Supported("ppoll", Ppoll),
 		272: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil),
-		273: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil),
-		274: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil),
+		273: syscalls.Supported("set_robust_list", SetRobustList),
+		274: syscalls.Supported("get_robust_list", GetRobustList),
 		275: syscalls.Supported("splice", Splice),
 		276: syscalls.Supported("tee", Tee),
 		277: syscalls.PartiallySupported("sync_file_range", SyncFileRange, "Full data flush is not guaranteed at this time.", nil),
diff --git a/pkg/sentry/syscalls/linux/sys_futex.go b/pkg/sentry/syscalls/linux/sys_futex.go
index b68261f72..f04d78856 100644
--- a/pkg/sentry/syscalls/linux/sys_futex.go
+++ b/pkg/sentry/syscalls/linux/sys_futex.go
@@ -198,7 +198,7 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 		switch cmd {
 		case linux.FUTEX_WAIT:
 			// WAIT uses a relative timeout.
-			mask = ^uint32(0)
+			mask = linux.FUTEX_BITSET_MATCH_ANY
 			var timeoutDur time.Duration
 			if !forever {
 				timeoutDur = time.Duration(timespec.ToNsecCapped()) * time.Nanosecond
@@ -286,3 +286,49 @@ func Futex(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 		return 0, nil, syserror.ENOSYS
 	}
 }
+
+// SetRobustList implements linux syscall set_robust_list(2).
+func SetRobustList(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	// Despite the syscall using the name 'pid' for this variable, it is
+	// very much a tid.
+	head := args[0].Pointer()
+	length := args[1].SizeT()
+
+	if length != uint(linux.SizeOfRobustListHead) {
+		return 0, nil, syserror.EINVAL
+	}
+	t.SetRobustList(head)
+	return 0, nil, nil
+}
+
+// GetRobustList implements linux syscall get_robust_list(2).
+func GetRobustList(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	// Despite the syscall using the name 'pid' for this variable, it is
+	// very much a tid.
+	tid := args[0].Int()
+	head := args[1].Pointer()
+	size := args[2].Pointer()
+
+	if tid < 0 {
+		return 0, nil, syserror.EINVAL
+	}
+
+	ot := t
+	if tid != 0 {
+		if ot = t.PIDNamespace().TaskWithID(kernel.ThreadID(tid)); ot == nil {
+			return 0, nil, syserror.ESRCH
+		}
+	}
+
+	// Copy out head pointer.
+	if _, err := t.CopyOut(head, uint64(ot.GetRobustList())); err != nil {
+		return 0, nil, err
+	}
+
+	// Copy out size, which is a constant.
+	if _, err := t.CopyOut(size, uint64(linux.SizeOfRobustListHead)); err != nil {
+		return 0, nil, err
+	}
+
+	return 0, nil, nil
+}
diff --git a/pkg/sentry/syscalls/linux/sys_socket.go b/pkg/sentry/syscalls/linux/sys_socket.go
index 0760af77b..414fce8e3 100644
--- a/pkg/sentry/syscalls/linux/sys_socket.go
+++ b/pkg/sentry/syscalls/linux/sys_socket.go
@@ -29,6 +29,8 @@ import (
 	"gvisor.dev/gvisor/pkg/syserr"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // LINT.IfChange
@@ -474,7 +476,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 	}
 
 	if v != nil {
-		if _, err := t.CopyOut(optValAddr, v); err != nil {
+		if _, err := v.CopyOut(t, optValAddr); err != nil {
 			return 0, nil, err
 		}
 	}
@@ -484,7 +486,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 
 // getSockOpt tries to handle common socket options, or dispatches to a specific
 // socket implementation.
-func getSockOpt(t *kernel.Task, s socket.Socket, level, name int, optValAddr usermem.Addr, len int) (interface{}, *syserr.Error) {
+func getSockOpt(t *kernel.Task, s socket.Socket, level, name int, optValAddr usermem.Addr, len int) (marshal.Marshallable, *syserr.Error) {
 	if level == linux.SOL_SOCKET {
 		switch name {
 		case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL:
@@ -496,13 +498,16 @@ func getSockOpt(t *kernel.Task, s socket.Socket, level, name int, optValAddr use
 		switch name {
 		case linux.SO_TYPE:
 			_, skType, _ := s.Type()
-			return int32(skType), nil
+			v := primitive.Int32(skType)
+			return &v, nil
 		case linux.SO_DOMAIN:
 			family, _, _ := s.Type()
-			return int32(family), nil
+			v := primitive.Int32(family)
+			return &v, nil
 		case linux.SO_PROTOCOL:
 			_, _, protocol := s.Type()
-			return int32(protocol), nil
+			v := primitive.Int32(protocol)
+			return &v, nil
 		}
 	}
 
@@ -539,7 +544,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 		return 0, nil, syserror.EINVAL
 	}
 	buf := t.CopyScratchBuffer(int(optLen))
-	if _, err := t.CopyIn(optValAddr, &buf); err != nil {
+	if _, err := t.CopyInBytes(optValAddr, buf); err != nil {
 		return 0, nil, err
 	}
 
diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD
index 0c740335b..64696b438 100644
--- a/pkg/sentry/syscalls/linux/vfs2/BUILD
+++ b/pkg/sentry/syscalls/linux/vfs2/BUILD
@@ -72,5 +72,7 @@ go_library(
         "//pkg/syserror",
         "//pkg/usermem",
         "//pkg/waiter",
+        "//tools/go_marshal/marshal",
+        "//tools/go_marshal/primitive",
     ],
 )
diff --git a/pkg/sentry/syscalls/linux/vfs2/mount.go b/pkg/sentry/syscalls/linux/vfs2/mount.go
index adeaa39cc..ea337de7c 100644
--- a/pkg/sentry/syscalls/linux/vfs2/mount.go
+++ b/pkg/sentry/syscalls/linux/vfs2/mount.go
@@ -77,8 +77,7 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 
 	// Silently allow MS_NOSUID, since we don't implement set-id bits
 	// anyway.
-	const unsupportedFlags = linux.MS_NODEV |
-		linux.MS_NODIRATIME | linux.MS_STRICTATIME
+	const unsupportedFlags = linux.MS_NODIRATIME | linux.MS_STRICTATIME
 
 	// Linux just allows passing any flags to mount(2) - it won't fail when
 	// unknown or unsupported flags are passed. Since we don't implement
@@ -94,6 +93,12 @@ func Mount(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	if flags&linux.MS_NOEXEC == linux.MS_NOEXEC {
 		opts.Flags.NoExec = true
 	}
+	if flags&linux.MS_NODEV == linux.MS_NODEV {
+		opts.Flags.NoDev = true
+	}
+	if flags&linux.MS_NOSUID == linux.MS_NOSUID {
+		opts.Flags.NoSUID = true
+	}
 	if flags&linux.MS_RDONLY == linux.MS_RDONLY {
 		opts.ReadOnly = true
 	}
diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go
index 09ecfed26..6daedd173 100644
--- a/pkg/sentry/syscalls/linux/vfs2/setstat.go
+++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go
@@ -178,6 +178,7 @@ func Truncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 			Mask: linux.STATX_SIZE,
 			Size: uint64(length),
 		},
+		NeedWritePerm: true,
 	})
 	return 0, nil, handleSetSizeError(t, err)
 }
@@ -197,6 +198,10 @@ func Ftruncate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
 	}
 	defer file.DecRef()
 
+	if !file.IsWritable() {
+		return 0, nil, syserror.EINVAL
+	}
+
 	err := file.SetStat(t, vfs.SetStatOptions{
 		Stat: linux.Statx{
 			Mask: linux.STATX_SIZE,
diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go
index 10b668477..8096a8f9c 100644
--- a/pkg/sentry/syscalls/linux/vfs2/socket.go
+++ b/pkg/sentry/syscalls/linux/vfs2/socket.go
@@ -30,6 +30,8 @@ import (
 	"gvisor.dev/gvisor/pkg/syserr"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
+	"gvisor.dev/gvisor/tools/go_marshal/marshal"
+	"gvisor.dev/gvisor/tools/go_marshal/primitive"
 )
 
 // minListenBacklog is the minimum reasonable backlog for listening sockets.
@@ -477,7 +479,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 	}
 
 	if v != nil {
-		if _, err := t.CopyOut(optValAddr, v); err != nil {
+		if _, err := v.CopyOut(t, optValAddr); err != nil {
 			return 0, nil, err
 		}
 	}
@@ -487,7 +489,7 @@ func GetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 
 // getSockOpt tries to handle common socket options, or dispatches to a specific
 // socket implementation.
-func getSockOpt(t *kernel.Task, s socket.SocketVFS2, level, name int, optValAddr usermem.Addr, len int) (interface{}, *syserr.Error) {
+func getSockOpt(t *kernel.Task, s socket.SocketVFS2, level, name int, optValAddr usermem.Addr, len int) (marshal.Marshallable, *syserr.Error) {
 	if level == linux.SOL_SOCKET {
 		switch name {
 		case linux.SO_TYPE, linux.SO_DOMAIN, linux.SO_PROTOCOL:
@@ -499,13 +501,16 @@ func getSockOpt(t *kernel.Task, s socket.SocketVFS2, level, name int, optValAddr
 		switch name {
 		case linux.SO_TYPE:
 			_, skType, _ := s.Type()
-			return int32(skType), nil
+			v := primitive.Int32(skType)
+			return &v, nil
 		case linux.SO_DOMAIN:
 			family, _, _ := s.Type()
-			return int32(family), nil
+			v := primitive.Int32(family)
+			return &v, nil
 		case linux.SO_PROTOCOL:
 			_, _, protocol := s.Type()
-			return int32(protocol), nil
+			v := primitive.Int32(protocol)
+			return &v, nil
 		}
 	}
 
@@ -542,7 +547,7 @@ func SetSockOpt(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
 		return 0, nil, syserror.EINVAL
 	}
 	buf := t.CopyScratchBuffer(int(optLen))
-	if _, err := t.CopyIn(optValAddr, &buf); err != nil {
+	if _, err := t.CopyInBytes(optValAddr, buf); err != nil {
 		return 0, nil, err
 	}
 
diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index 945a364a7..63ab11f8c 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -15,12 +15,15 @@
 package vfs2
 
 import (
+	"io"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
@@ -110,16 +113,20 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 
 	// Move data.
 	var (
-		n     int64
-		err   error
-		inCh  chan struct{}
-		outCh chan struct{}
+		n   int64
+		err error
 	)
+	dw := dualWaiter{
+		inFile:  inFile,
+		outFile: outFile,
+	}
+	defer dw.destroy()
 	for {
 		// If both input and output are pipes, delegate to the pipe
-		// implementation. Otherwise, exactly one end is a pipe, which we
-		// ensure is consistently ordered after the non-pipe FD's locks by
-		// passing the pipe FD as usermem.IO to the non-pipe end.
+		// implementation. Otherwise, exactly one end is a pipe, which
+		// we ensure is consistently ordered after the non-pipe FD's
+		// locks by passing the pipe FD as usermem.IO to the non-pipe
+		// end.
 		switch {
 		case inIsPipe && outIsPipe:
 			n, err = pipe.Splice(t, outPipeFD, inPipeFD, count)
@@ -137,38 +144,15 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 			} else {
 				n, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{})
 			}
+		default:
+			panic("not possible")
 		}
+
 		if n != 0 || err != syserror.ErrWouldBlock || nonBlock {
 			break
 		}
-
-		// Note that the blocking behavior here is a bit different than the
-		// normal pattern. Because we need to have both data to read and data
-		// to write simultaneously, we actually explicitly block on both of
-		// these cases in turn before returning to the splice operation.
-		if inFile.Readiness(eventMaskRead)&eventMaskRead == 0 {
-			if inCh == nil {
-				inCh = make(chan struct{}, 1)
-				inW, _ := waiter.NewChannelEntry(inCh)
-				inFile.EventRegister(&inW, eventMaskRead)
-				defer inFile.EventUnregister(&inW)
-				continue // Need to refresh readiness.
-			}
-			if err = t.Block(inCh); err != nil {
-				break
-			}
-		}
-		if outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 {
-			if outCh == nil {
-				outCh = make(chan struct{}, 1)
-				outW, _ := waiter.NewChannelEntry(outCh)
-				outFile.EventRegister(&outW, eventMaskWrite)
-				defer outFile.EventUnregister(&outW)
-				continue // Need to refresh readiness.
-			}
-			if err = t.Block(outCh); err != nil {
-				break
-			}
+		if err = dw.waitForBoth(t); err != nil {
+			break
 		}
 	}
 
@@ -247,45 +231,256 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
 
 	// Copy data.
 	var (
-		inCh  chan struct{}
-		outCh chan struct{}
+		n   int64
+		err error
 	)
+	dw := dualWaiter{
+		inFile:  inFile,
+		outFile: outFile,
+	}
+	defer dw.destroy()
 	for {
-		n, err := pipe.Tee(t, outPipeFD, inPipeFD, count)
-		if n != 0 {
-			return uintptr(n), nil, nil
+		n, err = pipe.Tee(t, outPipeFD, inPipeFD, count)
+		if n != 0 || err != syserror.ErrWouldBlock || nonBlock {
+			break
+		}
+		if err = dw.waitForBoth(t); err != nil {
+			break
+		}
+	}
+	if n == 0 {
+		return 0, nil, err
+	}
+	outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent)
+	return uintptr(n), nil, nil
+}
+
+// Sendfile implements linux system call sendfile(2).
+func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	outFD := args[0].Int()
+	inFD := args[1].Int()
+	offsetAddr := args[2].Pointer()
+	count := int64(args[3].SizeT())
+
+	inFile := t.GetFileVFS2(inFD)
+	if inFile == nil {
+		return 0, nil, syserror.EBADF
+	}
+	defer inFile.DecRef()
+	if !inFile.IsReadable() {
+		return 0, nil, syserror.EBADF
+	}
+
+	outFile := t.GetFileVFS2(outFD)
+	if outFile == nil {
+		return 0, nil, syserror.EBADF
+	}
+	defer outFile.DecRef()
+	if !outFile.IsWritable() {
+		return 0, nil, syserror.EBADF
+	}
+
+	// Verify that the outFile Append flag is not set.
+	if outFile.StatusFlags()&linux.O_APPEND != 0 {
+		return 0, nil, syserror.EINVAL
+	}
+
+	// Verify that inFile is a regular file or block device. This is a
+	// requirement; the same check appears in Linux
+	// (fs/splice.c:splice_direct_to_actor).
+	if stat, err := inFile.Stat(t, vfs.StatOptions{Mask: linux.STATX_TYPE}); err != nil {
+		return 0, nil, err
+	} else if stat.Mask&linux.STATX_TYPE == 0 ||
+		(stat.Mode&linux.S_IFMT != linux.S_IFREG && stat.Mode&linux.S_IFMT != linux.S_IFBLK) {
+		return 0, nil, syserror.EINVAL
+	}
+
+	// Copy offset if it exists.
+	offset := int64(-1)
+	if offsetAddr != 0 {
+		if inFile.Options().DenyPRead {
+			return 0, nil, syserror.ESPIPE
 		}
-		if err != syserror.ErrWouldBlock || nonBlock {
+		if _, err := t.CopyIn(offsetAddr, &offset); err != nil {
 			return 0, nil, err
 		}
+		if offset < 0 {
+			return 0, nil, syserror.EINVAL
+		}
+		if offset+count < 0 {
+			return 0, nil, syserror.EINVAL
+		}
+	}
+
+	// Validate count. This must come after offset checks.
+	if count < 0 {
+		return 0, nil, syserror.EINVAL
+	}
+	if count == 0 {
+		return 0, nil, nil
+	}
+	if count > int64(kernel.MAX_RW_COUNT) {
+		count = int64(kernel.MAX_RW_COUNT)
+	}
 
-		// Note that the blocking behavior here is a bit different than the
-		// normal pattern. Because we need to have both data to read and data
-		// to write simultaneously, we actually explicitly block on both of
-		// these cases in turn before returning to the tee operation.
-		if inFile.Readiness(eventMaskRead)&eventMaskRead == 0 {
-			if inCh == nil {
-				inCh = make(chan struct{}, 1)
-				inW, _ := waiter.NewChannelEntry(inCh)
-				inFile.EventRegister(&inW, eventMaskRead)
-				defer inFile.EventUnregister(&inW)
-				continue // Need to refresh readiness.
+	// Copy data.
+	var (
+		n   int64
+		err error
+	)
+	dw := dualWaiter{
+		inFile:  inFile,
+		outFile: outFile,
+	}
+	defer dw.destroy()
+	outPipeFD, outIsPipe := outFile.Impl().(*pipe.VFSPipeFD)
+	// Reading from input file should never block, since it is regular or
+	// block device. We only need to check if writing to the output file
+	// can block.
+	nonBlock := outFile.StatusFlags()&linux.O_NONBLOCK != 0
+	if outIsPipe {
+		for n < count {
+			var spliceN int64
+			if offset != -1 {
+				spliceN, err = inFile.PRead(t, outPipeFD.IOSequence(count), offset, vfs.ReadOptions{})
+				offset += spliceN
+			} else {
+				spliceN, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{})
 			}
-			if err := t.Block(inCh); err != nil {
-				return 0, nil, err
+			n += spliceN
+			if err == syserror.ErrWouldBlock && !nonBlock {
+				err = dw.waitForBoth(t)
+			}
+			if err != nil {
+				break
 			}
 		}
-		if outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 {
-			if outCh == nil {
-				outCh = make(chan struct{}, 1)
-				outW, _ := waiter.NewChannelEntry(outCh)
-				outFile.EventRegister(&outW, eventMaskWrite)
-				defer outFile.EventUnregister(&outW)
-				continue // Need to refresh readiness.
+	} else {
+		// Read inFile to buffer, then write the contents to outFile.
+		buf := make([]byte, count)
+		for n < count {
+			var readN int64
+			if offset != -1 {
+				readN, err = inFile.PRead(t, usermem.BytesIOSequence(buf), offset, vfs.ReadOptions{})
+				offset += readN
+			} else {
+				readN, err = inFile.Read(t, usermem.BytesIOSequence(buf), vfs.ReadOptions{})
+			}
+			if readN == 0 && err == io.EOF {
+				// We reached the end of the file. Eat the
+				// error and exit the loop.
+				err = nil
+				break
 			}
-			if err := t.Block(outCh); err != nil {
-				return 0, nil, err
+			n += readN
+			if err != nil {
+				break
+			}
+
+			// Write all of the bytes that we read. This may need
+			// multiple write calls to complete.
+			wbuf := buf[:n]
+			for len(wbuf) > 0 {
+				var writeN int64
+				writeN, err = outFile.Write(t, usermem.BytesIOSequence(wbuf), vfs.WriteOptions{})
+				wbuf = wbuf[writeN:]
+				if err == syserror.ErrWouldBlock && !nonBlock {
+					err = dw.waitForOut(t)
+				}
+				if err != nil {
+					// We didn't complete the write. Only
+					// report the bytes that were actually
+					// written, and rewind the offset.
+					notWritten := int64(len(wbuf))
+					n -= notWritten
+					if offset != -1 {
+						offset -= notWritten
+					}
+					break
+				}
+			}
+			if err == syserror.ErrWouldBlock && !nonBlock {
+				err = dw.waitForBoth(t)
 			}
+			if err != nil {
+				break
+			}
+		}
+	}
+
+	if offsetAddr != 0 {
+		// Copy out the new offset.
+		if _, err := t.CopyOut(offsetAddr, offset); err != nil {
+			return 0, nil, err
+		}
+	}
+
+	if n == 0 {
+		return 0, nil, err
+	}
+
+	inFile.Dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent)
+	outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent)
+	return uintptr(n), nil, nil
+}
+
+// dualWaiter is used to wait on one or both vfs.FileDescriptions. It is not
+// thread-safe, and does not take a reference on the vfs.FileDescriptions.
+//
+// Users must call destroy() when finished.
+type dualWaiter struct {
+	inFile  *vfs.FileDescription
+	outFile *vfs.FileDescription
+
+	inW   waiter.Entry
+	inCh  chan struct{}
+	outW  waiter.Entry
+	outCh chan struct{}
+}
+
+// waitForBoth waits for both dw.inFile and dw.outFile to be ready.
+func (dw *dualWaiter) waitForBoth(t *kernel.Task) error {
+	if dw.inFile.Readiness(eventMaskRead)&eventMaskRead == 0 {
+		if dw.inCh == nil {
+			dw.inW, dw.inCh = waiter.NewChannelEntry(nil)
+			dw.inFile.EventRegister(&dw.inW, eventMaskRead)
+			// We might be ready now. Try again before blocking.
+			return nil
+		}
+		if err := t.Block(dw.inCh); err != nil {
+			return err
+		}
+	}
+	return dw.waitForOut(t)
+}
+
+// waitForOut waits for dw.outfile to be read.
+func (dw *dualWaiter) waitForOut(t *kernel.Task) error {
+	if dw.outFile.Readiness(eventMaskWrite)&eventMaskWrite == 0 {
+		if dw.outCh == nil {
+			dw.outW, dw.outCh = waiter.NewChannelEntry(nil)
+			dw.outFile.EventRegister(&dw.outW, eventMaskWrite)
+			// We might be ready now. Try again before blocking.
+			return nil
 		}
+		if err := t.Block(dw.outCh); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// destroy cleans up resources help by dw. No more calls to wait* can occur
+// after destroy is called.
+func (dw *dualWaiter) destroy() {
+	if dw.inCh != nil {
+		dw.inFile.EventUnregister(&dw.inW)
+		dw.inCh = nil
+	}
+	if dw.outCh != nil {
+		dw.outFile.EventUnregister(&dw.outW)
+		dw.outCh = nil
 	}
+	dw.inFile = nil
+	dw.outFile = nil
 }
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index 8f497ecc7..c576d9475 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -44,7 +44,7 @@ func Override() {
 	s.Table[23] = syscalls.Supported("select", Select)
 	s.Table[32] = syscalls.Supported("dup", Dup)
 	s.Table[33] = syscalls.Supported("dup2", Dup2)
-	delete(s.Table, 40) // sendfile
+	s.Table[40] = syscalls.Supported("sendfile", Sendfile)
 	s.Table[41] = syscalls.Supported("socket", Socket)
 	s.Table[42] = syscalls.Supported("connect", Connect)
 	s.Table[43] = syscalls.Supported("accept", Accept)
@@ -62,7 +62,7 @@ func Override() {
 	s.Table[55] = syscalls.Supported("getsockopt", GetSockOpt)
 	s.Table[59] = syscalls.Supported("execve", Execve)
 	s.Table[72] = syscalls.Supported("fcntl", Fcntl)
-	s.Table[73] = syscalls.Supported("fcntl", Flock)
+	s.Table[73] = syscalls.Supported("flock", Flock)
 	s.Table[74] = syscalls.Supported("fsync", Fsync)
 	s.Table[75] = syscalls.Supported("fdatasync", Fdatasync)
 	s.Table[76] = syscalls.Supported("truncate", Truncate)
@@ -163,6 +163,106 @@ func Override() {
 
 	// Override ARM64.
 	s = linux.ARM64
+	s.Table[5] = syscalls.Supported("setxattr", Setxattr)
+	s.Table[6] = syscalls.Supported("lsetxattr", Lsetxattr)
+	s.Table[7] = syscalls.Supported("fsetxattr", Fsetxattr)
+	s.Table[8] = syscalls.Supported("getxattr", Getxattr)
+	s.Table[9] = syscalls.Supported("lgetxattr", Lgetxattr)
+	s.Table[10] = syscalls.Supported("fgetxattr", Fgetxattr)
+	s.Table[11] = syscalls.Supported("listxattr", Listxattr)
+	s.Table[12] = syscalls.Supported("llistxattr", Llistxattr)
+	s.Table[13] = syscalls.Supported("flistxattr", Flistxattr)
+	s.Table[14] = syscalls.Supported("removexattr", Removexattr)
+	s.Table[15] = syscalls.Supported("lremovexattr", Lremovexattr)
+	s.Table[16] = syscalls.Supported("fremovexattr", Fremovexattr)
+	s.Table[17] = syscalls.Supported("getcwd", Getcwd)
+	s.Table[19] = syscalls.Supported("eventfd2", Eventfd2)
+	s.Table[20] = syscalls.Supported("epoll_create1", EpollCreate1)
+	s.Table[21] = syscalls.Supported("epoll_ctl", EpollCtl)
+	s.Table[22] = syscalls.Supported("epoll_pwait", EpollPwait)
+	s.Table[23] = syscalls.Supported("dup", Dup)
+	s.Table[24] = syscalls.Supported("dup3", Dup3)
+	s.Table[25] = syscalls.Supported("fcntl", Fcntl)
+	s.Table[26] = syscalls.PartiallySupported("inotify_init1", InotifyInit1, "inotify events are only available inside the sandbox.", nil)
+	s.Table[27] = syscalls.PartiallySupported("inotify_add_watch", InotifyAddWatch, "inotify events are only available inside the sandbox.", nil)
+	s.Table[28] = syscalls.PartiallySupported("inotify_rm_watch", InotifyRmWatch, "inotify events are only available inside the sandbox.", nil)
+	s.Table[29] = syscalls.Supported("ioctl", Ioctl)
+	s.Table[32] = syscalls.Supported("flock", Flock)
+	s.Table[33] = syscalls.Supported("mknodat", Mknodat)
+	s.Table[34] = syscalls.Supported("mkdirat", Mkdirat)
+	s.Table[35] = syscalls.Supported("unlinkat", Unlinkat)
+	s.Table[36] = syscalls.Supported("symlinkat", Symlinkat)
+	s.Table[37] = syscalls.Supported("linkat", Linkat)
+	s.Table[38] = syscalls.Supported("renameat", Renameat)
+	s.Table[39] = syscalls.Supported("umount2", Umount2)
+	s.Table[40] = syscalls.Supported("mount", Mount)
+	s.Table[43] = syscalls.Supported("statfs", Statfs)
+	s.Table[44] = syscalls.Supported("fstatfs", Fstatfs)
+	s.Table[45] = syscalls.Supported("truncate", Truncate)
+	s.Table[46] = syscalls.Supported("ftruncate", Ftruncate)
+	s.Table[48] = syscalls.Supported("faccessat", Faccessat)
+	s.Table[49] = syscalls.Supported("chdir", Chdir)
+	s.Table[50] = syscalls.Supported("fchdir", Fchdir)
+	s.Table[51] = syscalls.Supported("chroot", Chroot)
+	s.Table[52] = syscalls.Supported("fchmod", Fchmod)
+	s.Table[53] = syscalls.Supported("fchmodat", Fchmodat)
+	s.Table[54] = syscalls.Supported("fchownat", Fchownat)
+	s.Table[55] = syscalls.Supported("fchown", Fchown)
+	s.Table[56] = syscalls.Supported("openat", Openat)
+	s.Table[57] = syscalls.Supported("close", Close)
+	s.Table[59] = syscalls.Supported("pipe2", Pipe2)
+	s.Table[61] = syscalls.Supported("getdents64", Getdents64)
+	s.Table[62] = syscalls.Supported("lseek", Lseek)
 	s.Table[63] = syscalls.Supported("read", Read)
+	s.Table[64] = syscalls.Supported("write", Write)
+	s.Table[65] = syscalls.Supported("readv", Readv)
+	s.Table[66] = syscalls.Supported("writev", Writev)
+	s.Table[67] = syscalls.Supported("pread64", Pread64)
+	s.Table[68] = syscalls.Supported("pwrite64", Pwrite64)
+	s.Table[69] = syscalls.Supported("preadv", Preadv)
+	s.Table[70] = syscalls.Supported("pwritev", Pwritev)
+	s.Table[72] = syscalls.Supported("pselect", Pselect)
+	s.Table[73] = syscalls.Supported("ppoll", Ppoll)
+	s.Table[74] = syscalls.Supported("signalfd4", Signalfd4)
+	s.Table[76] = syscalls.Supported("splice", Splice)
+	s.Table[77] = syscalls.Supported("tee", Tee)
+	s.Table[78] = syscalls.Supported("readlinkat", Readlinkat)
+	s.Table[80] = syscalls.Supported("fstat", Fstat)
+	s.Table[81] = syscalls.Supported("sync", Sync)
+	s.Table[82] = syscalls.Supported("fsync", Fsync)
+	s.Table[83] = syscalls.Supported("fdatasync", Fdatasync)
+	s.Table[84] = syscalls.Supported("sync_file_range", SyncFileRange)
+	s.Table[85] = syscalls.Supported("timerfd_create", TimerfdCreate)
+	s.Table[86] = syscalls.Supported("timerfd_settime", TimerfdSettime)
+	s.Table[87] = syscalls.Supported("timerfd_gettime", TimerfdGettime)
+	s.Table[88] = syscalls.Supported("utimensat", Utimensat)
+	s.Table[198] = syscalls.Supported("socket", Socket)
+	s.Table[199] = syscalls.Supported("socketpair", SocketPair)
+	s.Table[200] = syscalls.Supported("bind", Bind)
+	s.Table[201] = syscalls.Supported("listen", Listen)
+	s.Table[202] = syscalls.Supported("accept", Accept)
+	s.Table[203] = syscalls.Supported("connect", Connect)
+	s.Table[204] = syscalls.Supported("getsockname", GetSockName)
+	s.Table[205] = syscalls.Supported("getpeername", GetPeerName)
+	s.Table[206] = syscalls.Supported("sendto", SendTo)
+	s.Table[207] = syscalls.Supported("recvfrom", RecvFrom)
+	s.Table[208] = syscalls.Supported("setsockopt", SetSockOpt)
+	s.Table[209] = syscalls.Supported("getsockopt", GetSockOpt)
+	s.Table[210] = syscalls.Supported("shutdown", Shutdown)
+	s.Table[211] = syscalls.Supported("sendmsg", SendMsg)
+	s.Table[212] = syscalls.Supported("recvmsg", RecvMsg)
+	s.Table[221] = syscalls.Supported("execve", Execve)
+	s.Table[222] = syscalls.Supported("mmap", Mmap)
+	s.Table[242] = syscalls.Supported("accept4", Accept4)
+	s.Table[243] = syscalls.Supported("recvmmsg", RecvMMsg)
+	s.Table[267] = syscalls.Supported("syncfs", Syncfs)
+	s.Table[269] = syscalls.Supported("sendmmsg", SendMMsg)
+	s.Table[276] = syscalls.Supported("renameat2", Renameat2)
+	s.Table[279] = syscalls.Supported("memfd_create", MemfdCreate)
+	s.Table[281] = syscalls.Supported("execveat", Execveat)
+	s.Table[286] = syscalls.Supported("preadv2", Preadv2)
+	s.Table[287] = syscalls.Supported("pwritev2", Pwritev2)
+	s.Table[291] = syscalls.Supported("statx", Statx)
+
 	s.Init()
 }
diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go
index f223aeda8..dfc8573fd 100644
--- a/pkg/sentry/vfs/options.go
+++ b/pkg/sentry/vfs/options.go
@@ -79,6 +79,17 @@ type MountFlags struct {
 	// NoATime is equivalent to MS_NOATIME and indicates that the
 	// filesystem should not update access time in-place.
 	NoATime bool
+
+	// NoDev is equivalent to MS_NODEV and indicates that the
+	// filesystem should not allow access to devices (special files).
+	// TODO(gVisor.dev/issue/3186): respect this flag in non FUSE
+	// filesystems.
+	NoDev bool
+
+	// NoSUID is equivalent to MS_NOSUID and indicates that the
+	// filesystem should not honor set-user-ID and set-group-ID bits or
+	// file capabilities when executing programs.
+	NoSUID bool
 }
 
 // MountOptions contains options to VirtualFilesystem.MountAt().
@@ -153,6 +164,12 @@ type SetStatOptions struct {
 	// == UTIME_OMIT (VFS users must unset the corresponding bit in Stat.Mask
 	// instead).
 	Stat linux.Statx
+
+	// NeedWritePerm indicates that write permission on the file is needed for
+	// this operation. This is needed for truncate(2) (note that ftruncate(2)
+	// does not require the same check--instead, it checks that the fd is
+	// writable).
+	NeedWritePerm bool
 }
 
 // BoundEndpointOptions contains options to VirtualFilesystem.BoundEndpointAt()
diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go
index 9cb050597..33389c1df 100644
--- a/pkg/sentry/vfs/permissions.go
+++ b/pkg/sentry/vfs/permissions.go
@@ -183,7 +183,8 @@ func MayWriteFileWithOpenFlags(flags uint32) bool {
 // CheckSetStat checks that creds has permission to change the metadata of a
 // file with the given permissions, UID, and GID as specified by stat, subject
 // to the rules of Linux's fs/attr.c:setattr_prepare().
-func CheckSetStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) error {
+func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOptions, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) error {
+	stat := &opts.Stat
 	if stat.Mask&linux.STATX_SIZE != 0 {
 		limit, err := CheckLimit(ctx, 0, int64(stat.Size))
 		if err != nil {
@@ -215,6 +216,11 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, stat *linux.Stat
 			return syserror.EPERM
 		}
 	}
+	if opts.NeedWritePerm && !creds.HasCapability(linux.CAP_DAC_OVERRIDE) {
+		if err := GenericCheckPermissions(creds, MayWrite, mode, kuid, kgid); err != nil {
+			return err
+		}
+	}
 	if stat.Mask&(linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME) != 0 {
 		if !CanActAsOwner(creds, kuid) {
 			if (stat.Mask&linux.STATX_ATIME != 0 && stat.Atime.Nsec != linux.UTIME_NOW) ||
diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go
index 7908c5744..1a631b31a 100644
--- a/pkg/tcpip/header/icmpv4.go
+++ b/pkg/tcpip/header/icmpv4.go
@@ -72,6 +72,7 @@ const (
 // Values for ICMP code as defined in RFC 792.
 const (
 	ICMPv4TTLExceeded         = 0
+	ICMPv4HostUnreachable     = 1
 	ICMPv4PortUnreachable     = 3
 	ICMPv4FragmentationNeeded = 4
 )
diff --git a/pkg/tcpip/header/icmpv6.go b/pkg/tcpip/header/icmpv6.go
index c7ee2de57..a13b4b809 100644
--- a/pkg/tcpip/header/icmpv6.go
+++ b/pkg/tcpip/header/icmpv6.go
@@ -110,9 +110,16 @@ const (
 	ICMPv6RedirectMsg     ICMPv6Type = 137
 )
 
-// Values for ICMP code as defined in RFC 4443.
+// Values for ICMP destination unreachable code as defined in RFC 4443 section
+// 3.1.
 const (
-	ICMPv6PortUnreachable = 4
+	ICMPv6NetworkUnreachable = 0
+	ICMPv6Prohibited         = 1
+	ICMPv6BeyondScope        = 2
+	ICMPv6AddressUnreachable = 3
+	ICMPv6PortUnreachable    = 4
+	ICMPv6Policy             = 5
+	ICMPv6RejectRoute        = 6
 )
 
 // Type is the ICMP type field.
diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index a2bb773d4..e12a5929b 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -302,3 +302,7 @@ func (e *Endpoint) RemoveNotify(handle *NotificationHandle) {
 func (*Endpoint) ARPHardwareType() header.ARPHardwareType {
 	return header.ARPHardwareNone
 }
+
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *Endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+}
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index 6aa1badc7..c18bb91fb 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -386,26 +386,33 @@ const (
 	_VIRTIO_NET_HDR_GSO_TCPV6 = 4
 )
 
-// WritePacket writes outbound packets to the file descriptor. If it is not
-// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
 	if e.hdrSize > 0 {
 		// Add ethernet header if needed.
 		eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
 		pkt.LinkHeader = buffer.View(eth)
 		ethHdr := &header.EthernetFields{
-			DstAddr: r.RemoteLinkAddress,
+			DstAddr: remote,
 			Type:    protocol,
 		}
 
 		// Preserve the src address if it's set in the route.
-		if r.LocalLinkAddress != "" {
-			ethHdr.SrcAddr = r.LocalLinkAddress
+		if local != "" {
+			ethHdr.SrcAddr = local
 		} else {
 			ethHdr.SrcAddr = e.addr
 		}
 		eth.Encode(ethHdr)
 	}
+}
+
+// WritePacket writes outbound packets to the file descriptor. If it is not
+// currently writable, the packet is dropped.
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+	if e.hdrSize > 0 {
+		e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt)
+	}
 
 	var builder iovec.Builder
 
@@ -448,22 +455,8 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc
 	// Send a batch of packets through batchFD.
 	mmsgHdrs := make([]rawfile.MMsgHdr, 0, len(batch))
 	for _, pkt := range batch {
-		var eth header.Ethernet
 		if e.hdrSize > 0 {
-			// Add ethernet header if needed.
-			eth = make(header.Ethernet, header.EthernetMinimumSize)
-			ethHdr := &header.EthernetFields{
-				DstAddr: pkt.EgressRoute.RemoteLinkAddress,
-				Type:    pkt.NetworkProtocolNumber,
-			}
-
-			// Preserve the src address if it's set in the route.
-			if pkt.EgressRoute.LocalLinkAddress != "" {
-				ethHdr.SrcAddr = pkt.EgressRoute.LocalLinkAddress
-			} else {
-				ethHdr.SrcAddr = e.addr
-			}
-			eth.Encode(ethHdr)
+			e.AddHeader(pkt.EgressRoute.LocalLinkAddress, pkt.EgressRoute.RemoteLinkAddress, pkt.NetworkProtocolNumber, pkt)
 		}
 
 		var vnetHdrBuf []byte
@@ -493,7 +486,6 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc
 
 		var builder iovec.Builder
 		builder.Add(vnetHdrBuf)
-		builder.Add(eth)
 		builder.Add(pkt.Header.View())
 		for _, v := range pkt.Data.Views() {
 			builder.Add(v)
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index 4bad930c7..7b995b85a 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -107,6 +107,10 @@ func (c *context) DeliverNetworkPacket(remote tcpip.LinkAddress, local tcpip.Lin
 	c.ch <- packetInfo{remote, protocol, pkt}
 }
 
+func (c *context) DeliverOutboundPacket(remote tcpip.LinkAddress, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	panic("unimplemented")
+}
+
 func TestNoEthernetProperties(t *testing.T) {
 	c := newContext(t, &Options{MTU: mtu})
 	defer c.cleanup()
@@ -510,6 +514,10 @@ func (d *fakeNetworkDispatcher) DeliverNetworkPacket(remote, local tcpip.LinkAdd
 	d.pkts = append(d.pkts, pkt)
 }
 
+func (d *fakeNetworkDispatcher) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	panic("unimplemented")
+}
+
 func TestDispatchPacketFormat(t *testing.T) {
 	for _, test := range []struct {
 		name          string
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index 3b17d8c28..781cdd317 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -118,3 +118,6 @@ func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
 func (*endpoint) ARPHardwareType() header.ARPHardwareType {
 	return header.ARPHardwareLoopback
 }
+
+func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+}
diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go
index c305d9e86..56a611825 100644
--- a/pkg/tcpip/link/muxed/injectable.go
+++ b/pkg/tcpip/link/muxed/injectable.go
@@ -135,6 +135,10 @@ func (*InjectableEndpoint) ARPHardwareType() header.ARPHardwareType {
 	panic("unsupported operation")
 }
 
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (*InjectableEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+}
+
 // NewInjectableEndpoint creates a new multi-endpoint injectable endpoint.
 func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint) *InjectableEndpoint {
 	return &InjectableEndpoint{
diff --git a/pkg/tcpip/link/nested/nested.go b/pkg/tcpip/link/nested/nested.go
index 328bd048e..d40de54df 100644
--- a/pkg/tcpip/link/nested/nested.go
+++ b/pkg/tcpip/link/nested/nested.go
@@ -61,6 +61,16 @@ func (e *Endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protoco
 	}
 }
 
+// DeliverOutboundPacket implements stack.NetworkDispatcher.DeliverOutboundPacket.
+func (e *Endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	e.mu.RLock()
+	d := e.dispatcher
+	e.mu.RUnlock()
+	if d != nil {
+		d.DeliverOutboundPacket(remote, local, protocol, pkt)
+	}
+}
+
 // Attach implements stack.LinkEndpoint.
 func (e *Endpoint) Attach(dispatcher stack.NetworkDispatcher) {
 	e.mu.Lock()
@@ -135,3 +145,8 @@ func (e *Endpoint) GSOMaxSize() uint32 {
 func (e *Endpoint) ARPHardwareType() header.ARPHardwareType {
 	return e.child.ARPHardwareType()
 }
+
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *Endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	e.child.AddHeader(local, remote, protocol, pkt)
+}
diff --git a/pkg/tcpip/link/nested/nested_test.go b/pkg/tcpip/link/nested/nested_test.go
index c1a219f02..7d9249c1c 100644
--- a/pkg/tcpip/link/nested/nested_test.go
+++ b/pkg/tcpip/link/nested/nested_test.go
@@ -55,6 +55,10 @@ func (d *counterDispatcher) DeliverNetworkPacket(tcpip.LinkAddress, tcpip.LinkAd
 	d.count++
 }
 
+func (d *counterDispatcher) DeliverOutboundPacket(tcpip.LinkAddress, tcpip.LinkAddress, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) {
+	panic("unimplemented")
+}
+
 func TestNestedLinkEndpoint(t *testing.T) {
 	const emptyAddress = tcpip.LinkAddress("")
 
diff --git a/pkg/tcpip/link/packetsocket/BUILD b/pkg/tcpip/link/packetsocket/BUILD
new file mode 100644
index 000000000..6fff160ce
--- /dev/null
+++ b/pkg/tcpip/link/packetsocket/BUILD
@@ -0,0 +1,14 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "packetsocket",
+    srcs = ["endpoint.go"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//pkg/tcpip",
+        "//pkg/tcpip/link/nested",
+        "//pkg/tcpip/stack",
+    ],
+)
diff --git a/pkg/tcpip/link/packetsocket/endpoint.go b/pkg/tcpip/link/packetsocket/endpoint.go
new file mode 100644
index 000000000..3922c2a04
--- /dev/null
+++ b/pkg/tcpip/link/packetsocket/endpoint.go
@@ -0,0 +1,50 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package packetsocket provides a link layer endpoint that provides the ability
+// to loop outbound packets to any AF_PACKET sockets that may be interested in
+// the outgoing packet.
+package packetsocket
+
+import (
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/link/nested"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+type endpoint struct {
+	nested.Endpoint
+}
+
+// New creates a new packetsocket LinkEndpoint.
+func New(lower stack.LinkEndpoint) stack.LinkEndpoint {
+	e := &endpoint{}
+	e.Endpoint.Init(lower, e)
+	return e
+}
+
+// WritePacket implements stack.LinkEndpoint.WritePacket.
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+	e.Endpoint.DeliverOutboundPacket(r.RemoteLinkAddress, r.LocalLinkAddress, protocol, pkt)
+	return e.Endpoint.WritePacket(r, gso, protocol, pkt)
+}
+
+// WritePackets implements stack.LinkEndpoint.WritePackets.
+func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+	for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
+		e.Endpoint.DeliverOutboundPacket(pkt.EgressRoute.RemoteLinkAddress, pkt.EgressRoute.LocalLinkAddress, pkt.NetworkProtocolNumber, pkt)
+	}
+
+	return e.Endpoint.WritePackets(r, gso, pkts, proto)
+}
diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go
index c84fe1bb9..467083239 100644
--- a/pkg/tcpip/link/qdisc/fifo/endpoint.go
+++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go
@@ -107,6 +107,11 @@ func (e *endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protoco
 	e.dispatcher.DeliverNetworkPacket(remote, local, protocol, pkt)
 }
 
+// DeliverOutboundPacket implements stack.NetworkDispatcher.DeliverOutboundPacket.
+func (e *endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	e.dispatcher.DeliverOutboundPacket(remote, local, protocol, pkt)
+}
+
 // Attach implements stack.LinkEndpoint.Attach.
 func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) {
 	e.dispatcher = dispatcher
@@ -194,6 +199,8 @@ func (e *endpoint) WritePackets(_ *stack.Route, _ *stack.GSO, pkts stack.PacketB
 
 // WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
 func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
+	// TODO(gvisor.dev/issue/3267/): Queue these packets as well once
+	// WriteRawPacket takes PacketBuffer instead of VectorisedView.
 	return e.lower.WriteRawPacket(vv)
 }
 
@@ -213,3 +220,8 @@ func (e *endpoint) Wait() {
 func (e *endpoint) ARPHardwareType() header.ARPHardwareType {
 	return e.lower.ARPHardwareType()
 }
+
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	e.lower.AddHeader(local, remote, protocol, pkt)
+}
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index a36862c67..507c76b76 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -183,22 +183,29 @@ func (e *endpoint) LinkAddress() tcpip.LinkAddress {
 	return e.addr
 }
 
-// WritePacket writes outbound packets to the file descriptor. If it is not
-// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
-	// Add the ethernet header here.
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	// Add ethernet header if needed.
 	eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
 	pkt.LinkHeader = buffer.View(eth)
 	ethHdr := &header.EthernetFields{
-		DstAddr: r.RemoteLinkAddress,
+		DstAddr: remote,
 		Type:    protocol,
 	}
-	if r.LocalLinkAddress != "" {
-		ethHdr.SrcAddr = r.LocalLinkAddress
+
+	// Preserve the src address if it's set in the route.
+	if local != "" {
+		ethHdr.SrcAddr = local
 	} else {
 		ethHdr.SrcAddr = e.addr
 	}
 	eth.Encode(ethHdr)
+}
+
+// WritePacket writes outbound packets to the file descriptor. If it is not
+// currently writable, the packet is dropped.
+func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+	e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt)
 
 	v := pkt.Data.ToView()
 	// Transmit the packet.
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 28a2e88ba..8f3cd9449 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -143,6 +143,10 @@ func (c *testContext) DeliverNetworkPacket(remoteLinkAddr, localLinkAddr tcpip.L
 	c.packetCh <- struct{}{}
 }
 
+func (c *testContext) DeliverOutboundPacket(remoteLinkAddr, localLinkAddr tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	panic("unimplemented")
+}
+
 func (c *testContext) cleanup() {
 	c.ep.Close()
 	closeFDs(&c.txCfg)
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index d9cd4e83a..509076643 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -123,6 +123,11 @@ func (e *endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protoco
 	e.Endpoint.DeliverNetworkPacket(remote, local, protocol, pkt)
 }
 
+// DeliverOutboundPacket implements stack.NetworkDispatcher.DeliverOutboundPacket.
+func (e *endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	e.Endpoint.DeliverOutboundPacket(remote, local, protocol, pkt)
+}
+
 func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
 	writer := e.writer
 	if writer == nil && atomic.LoadUint32(&LogPackets) == 1 {
diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go
index 47446efec..04ae58e59 100644
--- a/pkg/tcpip/link/tun/device.go
+++ b/pkg/tcpip/link/tun/device.go
@@ -272,21 +272,9 @@ func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) {
 	if d.hasFlags(linux.IFF_TAP) {
 		// Add ethernet header if not provided.
 		if info.Pkt.LinkHeader == nil {
-			hdr := &header.EthernetFields{
-				SrcAddr: info.Route.LocalLinkAddress,
-				DstAddr: info.Route.RemoteLinkAddress,
-				Type:    info.Proto,
-			}
-			if hdr.SrcAddr == "" {
-				hdr.SrcAddr = d.endpoint.LinkAddress()
-			}
-
-			eth := make(header.Ethernet, header.EthernetMinimumSize)
-			eth.Encode(hdr)
-			vv.AppendView(buffer.View(eth))
-		} else {
-			vv.AppendView(info.Pkt.LinkHeader)
+			d.endpoint.AddHeader(info.Route.LocalLinkAddress, info.Route.RemoteLinkAddress, info.Proto, info.Pkt)
 		}
+		vv.AppendView(info.Pkt.LinkHeader)
 	}
 
 	// Append upper headers.
@@ -366,3 +354,30 @@ func (e *tunEndpoint) ARPHardwareType() header.ARPHardwareType {
 	}
 	return header.ARPHardwareNone
 }
+
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *tunEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	if !e.isTap {
+		return
+	}
+	eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
+	pkt.LinkHeader = buffer.View(eth)
+	hdr := &header.EthernetFields{
+		SrcAddr: local,
+		DstAddr: remote,
+		Type:    protocol,
+	}
+	if hdr.SrcAddr == "" {
+		hdr.SrcAddr = e.LinkAddress()
+	}
+
+	eth.Encode(hdr)
+}
+
+// MaxHeaderLength returns the maximum size of the link layer header.
+func (e *tunEndpoint) MaxHeaderLength() uint16 {
+	if e.isTap {
+		return header.EthernetMinimumSize
+	}
+	return 0
+}
diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go
index 24a8dc2eb..b152a0f26 100644
--- a/pkg/tcpip/link/waitable/waitable.go
+++ b/pkg/tcpip/link/waitable/waitable.go
@@ -60,6 +60,15 @@ func (e *Endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protoco
 	e.dispatchGate.Leave()
 }
 
+// DeliverOutboundPacket implements stack.NetworkDispatcher.DeliverOutboundPacket.
+func (e *Endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	if !e.dispatchGate.Enter() {
+		return
+	}
+	e.dispatcher.DeliverOutboundPacket(remote, local, protocol, pkt)
+	e.dispatchGate.Leave()
+}
+
 // Attach implements stack.LinkEndpoint.Attach. It saves the dispatcher and
 // registers with the lower endpoint as its dispatcher so that "e" is called
 // for inbound packets.
@@ -153,3 +162,8 @@ func (e *Endpoint) Wait() {}
 func (e *Endpoint) ARPHardwareType() header.ARPHardwareType {
 	return e.lower.ARPHardwareType()
 }
+
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *Endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	e.lower.AddHeader(local, remote, protocol, pkt)
+}
diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go
index ffb2354be..c448a888f 100644
--- a/pkg/tcpip/link/waitable/waitable_test.go
+++ b/pkg/tcpip/link/waitable/waitable_test.go
@@ -40,6 +40,10 @@ func (e *countedEndpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress,
 	e.dispatchCount++
 }
 
+func (e *countedEndpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	panic("unimplemented")
+}
+
 func (e *countedEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
 	e.attachCount++
 	e.dispatcher = dispatcher
@@ -90,6 +94,11 @@ func (*countedEndpoint) ARPHardwareType() header.ARPHardwareType {
 // Wait implements stack.LinkEndpoint.Wait.
 func (*countedEndpoint) Wait() {}
 
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *countedEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	panic("unimplemented")
+}
+
 func TestWaitWrite(t *testing.T) {
 	ep := &countedEndpoint{}
 	wep := New(ep)
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index b0f57040c..31a242482 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -160,9 +160,12 @@ func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
 }
 
 // LinkAddressRequest implements stack.LinkAddressResolver.LinkAddressRequest.
-func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP stack.LinkEndpoint) *tcpip.Error {
+func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP stack.LinkEndpoint) *tcpip.Error {
 	r := &stack.Route{
-		RemoteLinkAddress: header.EthernetBroadcastAddress,
+		RemoteLinkAddress: remoteLinkAddr,
+	}
+	if len(r.RemoteLinkAddress) == 0 {
+		r.RemoteLinkAddress = header.EthernetBroadcastAddress
 	}
 
 	hdr := buffer.NewPrependable(int(linkEP.MaxHeaderLength()) + header.ARPSize)
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index 66e67429c..a35a64a0f 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -32,10 +32,14 @@ import (
 )
 
 const (
-	stackLinkAddr = tcpip.LinkAddress("\x0a\x0a\x0b\x0b\x0c\x0c")
-	stackAddr1    = tcpip.Address("\x0a\x00\x00\x01")
-	stackAddr2    = tcpip.Address("\x0a\x00\x00\x02")
-	stackAddrBad  = tcpip.Address("\x0a\x00\x00\x03")
+	stackLinkAddr1 = tcpip.LinkAddress("\x0a\x0a\x0b\x0b\x0c\x0c")
+	stackLinkAddr2 = tcpip.LinkAddress("\x0b\x0b\x0c\x0c\x0d\x0d")
+	stackAddr1     = tcpip.Address("\x0a\x00\x00\x01")
+	stackAddr2     = tcpip.Address("\x0a\x00\x00\x02")
+	stackAddrBad   = tcpip.Address("\x0a\x00\x00\x03")
+
+	defaultChannelSize = 1
+	defaultMTU         = 65536
 )
 
 type testContext struct {
@@ -50,8 +54,7 @@ func newTestContext(t *testing.T) *testContext {
 		TransportProtocols: []stack.TransportProtocol{icmp.NewProtocol4()},
 	})
 
-	const defaultMTU = 65536
-	ep := channel.New(256, defaultMTU, stackLinkAddr)
+	ep := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr1)
 	wep := stack.LinkEndpoint(ep)
 
 	if testing.Verbose() {
@@ -119,7 +122,7 @@ func TestDirectRequest(t *testing.T) {
 			if !rep.IsValid() {
 				t.Fatalf("invalid ARP response pi.Pkt.Header.UsedLength()=%d", pi.Pkt.Header.UsedLength())
 			}
-			if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr; got != want {
+			if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr1; got != want {
 				t.Errorf("got HardwareAddressSender = %s, want = %s", got, want)
 			}
 			if got, want := tcpip.Address(rep.ProtocolAddressSender()), tcpip.Address(h.ProtocolAddressTarget()); got != want {
@@ -144,3 +147,44 @@ func TestDirectRequest(t *testing.T) {
 		t.Errorf("stackAddrBad: unexpected packet sent, Proto=%v", pkt.Proto)
 	}
 }
+
+func TestLinkAddressRequest(t *testing.T) {
+	tests := []struct {
+		name           string
+		remoteLinkAddr tcpip.LinkAddress
+		expectLinkAddr tcpip.LinkAddress
+	}{
+		{
+			name:           "Unicast",
+			remoteLinkAddr: stackLinkAddr2,
+			expectLinkAddr: stackLinkAddr2,
+		},
+		{
+			name:           "Multicast",
+			remoteLinkAddr: "",
+			expectLinkAddr: header.EthernetBroadcastAddress,
+		},
+	}
+
+	for _, test := range tests {
+		p := arp.NewProtocol()
+		linkRes, ok := p.(stack.LinkAddressResolver)
+		if !ok {
+			t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver")
+		}
+
+		linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr1)
+		if err := linkRes.LinkAddressRequest(stackAddr1, stackAddr2, test.remoteLinkAddr, linkEP); err != nil {
+			t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", stackAddr1, stackAddr2, test.remoteLinkAddr, err)
+		}
+
+		pkt, ok := linkEP.Read()
+		if !ok {
+			t.Fatal("expected to send a link address request")
+		}
+
+		if got, want := pkt.Route.RemoteLinkAddress, test.expectLinkAddr; got != want {
+			t.Errorf("got pkt.Route.RemoteLinkAddress = %s, want = %s", got, want)
+		}
+	}
+}
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index a5b780ca2..615bae648 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -185,6 +185,11 @@ func (*testObject) ARPHardwareType() header.ARPHardwareType {
 	panic("not implemented")
 }
 
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (*testObject) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+	panic("not implemented")
+}
+
 func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
 	s := stack.New(stack.Options{
 		NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol()},
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 1b67aa066..83e71cb8c 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -129,6 +129,9 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
 
 		pkt.Data.TrimFront(header.ICMPv4MinimumSize)
 		switch h.Code() {
+		case header.ICMPv4HostUnreachable:
+			e.handleControl(stack.ControlNoRoute, 0, pkt)
+
 		case header.ICMPv4PortUnreachable:
 			e.handleControl(stack.ControlPortUnreachable, 0, pkt)
 
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index 3b79749b5..24600d877 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -128,6 +128,8 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		}
 		pkt.Data.TrimFront(header.ICMPv6DstUnreachableMinimumSize)
 		switch header.ICMPv6(hdr).Code() {
+		case header.ICMPv6NetworkUnreachable:
+			e.handleControl(stack.ControlNetworkUnreachable, 0, pkt)
 		case header.ICMPv6PortUnreachable:
 			e.handleControl(stack.ControlPortUnreachable, 0, pkt)
 		}
@@ -502,7 +504,7 @@ func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
 }
 
 // LinkAddressRequest implements stack.LinkAddressResolver.
-func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP stack.LinkEndpoint) *tcpip.Error {
+func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP stack.LinkEndpoint) *tcpip.Error {
 	snaddr := header.SolicitedNodeAddr(addr)
 
 	// TODO(b/148672031): Use stack.FindRoute instead of manually creating the
@@ -511,8 +513,12 @@ func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP stack.
 	r := &stack.Route{
 		LocalAddress:      localAddr,
 		RemoteAddress:     snaddr,
-		RemoteLinkAddress: header.EthernetAddressFromMulticastIPv6Address(snaddr),
+		RemoteLinkAddress: remoteLinkAddr,
 	}
+	if len(r.RemoteLinkAddress) == 0 {
+		r.RemoteLinkAddress = header.EthernetAddressFromMulticastIPv6Address(snaddr)
+	}
+
 	hdr := buffer.NewPrependable(int(linkEP.MaxHeaderLength()) + header.IPv6MinimumSize + header.ICMPv6NeighborAdvertSize)
 	pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborAdvertSize))
 	pkt.SetType(header.ICMPv6NeighborSolicit)
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 52a01b44e..f86aaed1d 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -34,6 +34,9 @@ const (
 	linkAddr0 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06")
 	linkAddr1 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e")
 	linkAddr2 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0f")
+
+	defaultChannelSize = 1
+	defaultMTU         = 65536
 )
 
 var (
@@ -257,8 +260,7 @@ func newTestContext(t *testing.T) *testContext {
 		}),
 	}
 
-	const defaultMTU = 65536
-	c.linkEP0 = channel.New(256, defaultMTU, linkAddr0)
+	c.linkEP0 = channel.New(defaultChannelSize, defaultMTU, linkAddr0)
 
 	wrappedEP0 := stack.LinkEndpoint(endpointWithResolutionCapability{LinkEndpoint: c.linkEP0})
 	if testing.Verbose() {
@@ -271,7 +273,7 @@ func newTestContext(t *testing.T) *testContext {
 		t.Fatalf("AddAddress lladdr0: %v", err)
 	}
 
-	c.linkEP1 = channel.New(256, defaultMTU, linkAddr1)
+	c.linkEP1 = channel.New(defaultChannelSize, defaultMTU, linkAddr1)
 	wrappedEP1 := stack.LinkEndpoint(endpointWithResolutionCapability{LinkEndpoint: c.linkEP1})
 	if err := c.s1.CreateNIC(1, wrappedEP1); err != nil {
 		t.Fatalf("CreateNIC failed: %v", err)
@@ -951,3 +953,47 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
 		})
 	}
 }
+
+func TestLinkAddressRequest(t *testing.T) {
+	snaddr := header.SolicitedNodeAddr(lladdr0)
+	mcaddr := header.EthernetAddressFromMulticastIPv6Address(snaddr)
+
+	tests := []struct {
+		name           string
+		remoteLinkAddr tcpip.LinkAddress
+		expectLinkAddr tcpip.LinkAddress
+	}{
+		{
+			name:           "Unicast",
+			remoteLinkAddr: linkAddr1,
+			expectLinkAddr: linkAddr1,
+		},
+		{
+			name:           "Multicast",
+			remoteLinkAddr: "",
+			expectLinkAddr: mcaddr,
+		},
+	}
+
+	for _, test := range tests {
+		p := NewProtocol()
+		linkRes, ok := p.(stack.LinkAddressResolver)
+		if !ok {
+			t.Fatalf("expected IPv6 protocol to implement stack.LinkAddressResolver")
+		}
+
+		linkEP := channel.New(defaultChannelSize, defaultMTU, linkAddr0)
+		if err := linkRes.LinkAddressRequest(lladdr0, lladdr1, test.remoteLinkAddr, linkEP); err != nil {
+			t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", lladdr0, lladdr1, test.remoteLinkAddr, err)
+		}
+
+		pkt, ok := linkEP.Read()
+		if !ok {
+			t.Fatal("expected to send a link address request")
+		}
+
+		if got, want := pkt.Route.RemoteLinkAddress, test.expectLinkAddr; got != want {
+			t.Errorf("got pkt.Route.RemoteLinkAddress = %s, want = %s", got, want)
+		}
+	}
+}
diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go
index d39baf620..559a1c4dd 100644
--- a/pkg/tcpip/stack/conntrack.go
+++ b/pkg/tcpip/stack/conntrack.go
@@ -49,7 +49,8 @@ const (
 type manipType int
 
 const (
-	manipDstPrerouting manipType = iota
+	manipNone manipType = iota
+	manipDstPrerouting
 	manipDstOutput
 )
 
@@ -113,13 +114,11 @@ type conn struct {
 	// update the state of tcb. It is immutable.
 	tcbHook Hook
 
-	// mu protects tcb.
+	// mu protects all mutable state.
 	mu sync.Mutex `state:"nosave"`
-
 	// tcb is TCB control block. It is used to keep track of states
 	// of tcp connection and is protected by mu.
 	tcb tcpconntrack.TCB
-
 	// lastUsed is the last time the connection saw a relevant packet, and
 	// is updated by each packet on the connection. It is protected by mu.
 	lastUsed time.Time `state:".(unixTime)"`
@@ -141,8 +140,26 @@ func (cn *conn) timedOut(now time.Time) bool {
 	return now.Sub(cn.lastUsed) > defaultTimeout
 }
 
+// update the connection tracking state.
+//
+// Precondition: ct.mu must be held.
+func (ct *conn) updateLocked(tcpHeader header.TCP, hook Hook) {
+	// Update the state of tcb. tcb assumes it's always initialized on the
+	// client. However, we only need to know whether the connection is
+	// established or not, so the client/server distinction isn't important.
+	// TODO(gvisor.dev/issue/170): Add support in tcpconntrack to handle
+	// other tcp states.
+	if ct.tcb.IsEmpty() {
+		ct.tcb.Init(tcpHeader)
+	} else if hook == ct.tcbHook {
+		ct.tcb.UpdateStateOutbound(tcpHeader)
+	} else {
+		ct.tcb.UpdateStateInbound(tcpHeader)
+	}
+}
+
 // ConnTrack tracks all connections created for NAT rules. Most users are
-// expected to only call handlePacket and createConnFor.
+// expected to only call handlePacket, insertRedirectConn, and maybeInsertNoop.
 //
 // ConnTrack keeps all connections in a slice of buckets, each of which holds a
 // linked list of tuples. This gives us some desirable properties:
@@ -248,8 +265,7 @@ func (ct *ConnTrack) connFor(pkt *PacketBuffer) (*conn, direction) {
 	return nil, dirOriginal
 }
 
-// createConnFor creates a new conn for pkt.
-func (ct *ConnTrack) createConnFor(pkt *PacketBuffer, hook Hook, rt RedirectTarget) *conn {
+func (ct *ConnTrack) insertRedirectConn(pkt *PacketBuffer, hook Hook, rt RedirectTarget) *conn {
 	tid, err := packetToTupleID(pkt)
 	if err != nil {
 		return nil
@@ -272,10 +288,15 @@ func (ct *ConnTrack) createConnFor(pkt *PacketBuffer, hook Hook, rt RedirectTarg
 		manip = manipDstOutput
 	}
 	conn := newConn(tid, replyTID, manip, hook)
+	ct.insertConn(conn)
+	return conn
+}
 
+// insertConn inserts conn into the appropriate table bucket.
+func (ct *ConnTrack) insertConn(conn *conn) {
 	// Lock the buckets in the correct order.
-	tupleBucket := ct.bucket(tid)
-	replyBucket := ct.bucket(replyTID)
+	tupleBucket := ct.bucket(conn.original.tupleID)
+	replyBucket := ct.bucket(conn.reply.tupleID)
 	ct.mu.RLock()
 	defer ct.mu.RUnlock()
 	if tupleBucket < replyBucket {
@@ -289,22 +310,37 @@ func (ct *ConnTrack) createConnFor(pkt *PacketBuffer, hook Hook, rt RedirectTarg
 		ct.buckets[tupleBucket].mu.Lock()
 	}
 
-	// Add the tuple to the map.
-	ct.buckets[tupleBucket].tuples.PushFront(&conn.original)
-	ct.buckets[replyBucket].tuples.PushFront(&conn.reply)
+	// Now that we hold the locks, ensure the tuple hasn't been inserted by
+	// another thread.
+	alreadyInserted := false
+	for other := ct.buckets[tupleBucket].tuples.Front(); other != nil; other = other.Next() {
+		if other.tupleID == conn.original.tupleID {
+			alreadyInserted = true
+			break
+		}
+	}
+
+	if !alreadyInserted {
+		// Add the tuple to the map.
+		ct.buckets[tupleBucket].tuples.PushFront(&conn.original)
+		ct.buckets[replyBucket].tuples.PushFront(&conn.reply)
+	}
 
 	// Unlocking can happen in any order.
 	ct.buckets[tupleBucket].mu.Unlock()
 	if tupleBucket != replyBucket {
 		ct.buckets[replyBucket].mu.Unlock()
 	}
-
-	return conn
 }
 
 // handlePacketPrerouting manipulates ports for packets in Prerouting hook.
 // TODO(gvisor.dev/issue/170): Change address for Prerouting hook.
 func handlePacketPrerouting(pkt *PacketBuffer, conn *conn, dir direction) {
+	// If this is a noop entry, don't do anything.
+	if conn.manip == manipNone {
+		return
+	}
+
 	netHeader := header.IPv4(pkt.NetworkHeader)
 	tcpHeader := header.TCP(pkt.TransportHeader)
 
@@ -322,12 +358,22 @@ func handlePacketPrerouting(pkt *PacketBuffer, conn *conn, dir direction) {
 		netHeader.SetSourceAddress(conn.original.dstAddr)
 	}
 
+	// TODO(gvisor.dev/issue/170): TCP checksums aren't usually validated
+	// on inbound packets, so we don't recalculate them. However, we should
+	// support cases when they are validated, e.g. when we can't offload
+	// receive checksumming.
+
 	netHeader.SetChecksum(0)
 	netHeader.SetChecksum(^netHeader.CalculateChecksum())
 }
 
 // handlePacketOutput manipulates ports for packets in Output hook.
 func handlePacketOutput(pkt *PacketBuffer, conn *conn, gso *GSO, r *Route, dir direction) {
+	// If this is a noop entry, don't do anything.
+	if conn.manip == manipNone {
+		return
+	}
+
 	netHeader := header.IPv4(pkt.NetworkHeader)
 	tcpHeader := header.TCP(pkt.TransportHeader)
 
@@ -362,20 +408,31 @@ func handlePacketOutput(pkt *PacketBuffer, conn *conn, gso *GSO, r *Route, dir d
 }
 
 // handlePacket will manipulate the port and address of the packet if the
-// connection exists.
-func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Route) {
+// connection exists. Returns whether, after the packet traverses the tables,
+// it should create a new entry in the table.
+func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Route) bool {
 	if pkt.NatDone {
-		return
+		return false
 	}
 
 	if hook != Prerouting && hook != Output {
-		return
+		return false
+	}
+
+	// TODO(gvisor.dev/issue/170): Support other transport protocols.
+	if pkt.NetworkHeader == nil || header.IPv4(pkt.NetworkHeader).TransportProtocol() != header.TCPProtocolNumber {
+		return false
 	}
 
 	conn, dir := ct.connFor(pkt)
+	// Connection or Rule not found for the packet.
 	if conn == nil {
-		// Connection not found for the packet or the packet is invalid.
-		return
+		return true
+	}
+
+	tcpHeader := header.TCP(pkt.TransportHeader)
+	if tcpHeader == nil {
+		return false
 	}
 
 	switch hook {
@@ -395,14 +452,39 @@ func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Rou
 	// Mark the connection as having been used recently so it isn't reaped.
 	conn.lastUsed = time.Now()
 	// Update connection state.
-	if tcpHeader := header.TCP(pkt.TransportHeader); conn.tcb.IsEmpty() {
-		conn.tcb.Init(tcpHeader)
-		conn.tcbHook = hook
-	} else if hook == conn.tcbHook {
-		conn.tcb.UpdateStateOutbound(tcpHeader)
-	} else {
-		conn.tcb.UpdateStateInbound(tcpHeader)
+	conn.updateLocked(header.TCP(pkt.TransportHeader), hook)
+
+	return false
+}
+
+// maybeInsertNoop tries to insert a no-op connection entry to keep connections
+// from getting clobbered when replies arrive. It only inserts if there isn't
+// already a connection for pkt.
+//
+// This should be called after traversing iptables rules only, to ensure that
+// pkt.NatDone is set correctly.
+func (ct *ConnTrack) maybeInsertNoop(pkt *PacketBuffer, hook Hook) {
+	// If there were a rule applying to this packet, it would be marked
+	// with NatDone.
+	if pkt.NatDone {
+		return
+	}
+
+	// We only track TCP connections.
+	if pkt.NetworkHeader == nil || header.IPv4(pkt.NetworkHeader).TransportProtocol() != header.TCPProtocolNumber {
+		return
+	}
+
+	// This is the first packet we're seeing for the TCP connection. Insert
+	// the noop entry (an identity mapping) so that the response doesn't
+	// get NATed, breaking the connection.
+	tid, err := packetToTupleID(pkt)
+	if err != nil {
+		return
 	}
+	conn := newConn(tid, tid.reply(), manipNone, hook)
+	conn.updateLocked(header.TCP(pkt.TransportHeader), hook)
+	ct.insertConn(conn)
 }
 
 // bucket gets the conntrack bucket for a tupleID.
diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go
index eefb4b07f..c962693f5 100644
--- a/pkg/tcpip/stack/forwarder_test.go
+++ b/pkg/tcpip/stack/forwarder_test.go
@@ -121,10 +121,12 @@ func (*fwdTestNetworkEndpoint) Close() {}
 type fwdTestNetworkProtocol struct {
 	addrCache              *linkAddrCache
 	addrResolveDelay       time.Duration
-	onLinkAddressResolved  func(cache *linkAddrCache, addr tcpip.Address)
+	onLinkAddressResolved  func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress)
 	onResolveStaticAddress func(tcpip.Address) (tcpip.LinkAddress, bool)
 }
 
+var _ LinkAddressResolver = (*fwdTestNetworkProtocol)(nil)
+
 func (f *fwdTestNetworkProtocol) Number() tcpip.NetworkProtocolNumber {
 	return fwdTestNetNumber
 }
@@ -174,10 +176,10 @@ func (f *fwdTestNetworkProtocol) Close() {}
 
 func (f *fwdTestNetworkProtocol) Wait() {}
 
-func (f *fwdTestNetworkProtocol) LinkAddressRequest(addr, localAddr tcpip.Address, linkEP LinkEndpoint) *tcpip.Error {
+func (f *fwdTestNetworkProtocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP LinkEndpoint) *tcpip.Error {
 	if f.addrCache != nil && f.onLinkAddressResolved != nil {
 		time.AfterFunc(f.addrResolveDelay, func() {
-			f.onLinkAddressResolved(f.addrCache, addr)
+			f.onLinkAddressResolved(f.addrCache, addr, remoteLinkAddr)
 		})
 	}
 	return nil
@@ -307,6 +309,11 @@ func (*fwdTestLinkEndpoint) ARPHardwareType() header.ARPHardwareType {
 	panic("not implemented")
 }
 
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *fwdTestLinkEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
+	panic("not implemented")
+}
+
 func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol) (ep1, ep2 *fwdTestLinkEndpoint) {
 	// Create a stack with the network protocol and two NICs.
 	s := New(Options{
@@ -400,7 +407,7 @@ func TestForwardingWithFakeResolver(t *testing.T) {
 	// Create a network protocol with a fake resolver.
 	proto := &fwdTestNetworkProtocol{
 		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address) {
+		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
 			// Any address will be resolved to the link address "c".
 			cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
 		},
@@ -458,7 +465,7 @@ func TestForwardingWithFakeResolverPartialTimeout(t *testing.T) {
 	// Create a network protocol with a fake resolver.
 	proto := &fwdTestNetworkProtocol{
 		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address) {
+		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
 			// Only packets to address 3 will be resolved to the
 			// link address "c".
 			if addr == "\x03" {
@@ -510,7 +517,7 @@ func TestForwardingWithFakeResolverTwoPackets(t *testing.T) {
 	// Create a network protocol with a fake resolver.
 	proto := &fwdTestNetworkProtocol{
 		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address) {
+		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
 			// Any packets will be resolved to the link address "c".
 			cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
 		},
@@ -554,7 +561,7 @@ func TestForwardingWithFakeResolverManyPackets(t *testing.T) {
 	// Create a network protocol with a fake resolver.
 	proto := &fwdTestNetworkProtocol{
 		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address) {
+		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
 			// Any packets will be resolved to the link address "c".
 			cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
 		},
@@ -611,7 +618,7 @@ func TestForwardingWithFakeResolverManyResolutions(t *testing.T) {
 	// Create a network protocol with a fake resolver.
 	proto := &fwdTestNetworkProtocol{
 		addrResolveDelay: 500 * time.Millisecond,
-		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address) {
+		onLinkAddressResolved: func(cache *linkAddrCache, addr tcpip.Address, _ tcpip.LinkAddress) {
 			// Any packets will be resolved to the link address "c".
 			cache.add(tcpip.FullAddress{NIC: 2, Addr: addr}, "c")
 		},
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index bbf3b60e8..cbbae4224 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -22,22 +22,30 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 )
 
-// Table names.
+// tableID is an index into IPTables.tables.
+type tableID int
+
 const (
-	TablenameNat    = "nat"
-	TablenameMangle = "mangle"
-	TablenameFilter = "filter"
+	natID tableID = iota
+	mangleID
+	filterID
+	numTables
 )
 
-// Chain names as defined by net/ipv4/netfilter/ip_tables.c.
+// Table names.
 const (
-	ChainNamePrerouting  = "PREROUTING"
-	ChainNameInput       = "INPUT"
-	ChainNameForward     = "FORWARD"
-	ChainNameOutput      = "OUTPUT"
-	ChainNamePostrouting = "POSTROUTING"
+	NATTable    = "nat"
+	MangleTable = "mangle"
+	FilterTable = "filter"
 )
 
+// nameToID is immutable.
+var nameToID = map[string]tableID{
+	NATTable:    natID,
+	MangleTable: mangleID,
+	FilterTable: filterID,
+}
+
 // HookUnset indicates that there is no hook set for an entrypoint or
 // underflow.
 const HookUnset = -1
@@ -48,11 +56,9 @@ const reaperDelay = 5 * time.Second
 // DefaultTables returns a default set of tables. Each chain is set to accept
 // all packets.
 func DefaultTables() *IPTables {
-	// TODO(gvisor.dev/issue/170): We may be able to swap out some strings for
-	// iotas.
 	return &IPTables{
-		tables: map[string]Table{
-			TablenameNat: Table{
+		tables: [numTables]Table{
+			natID: Table{
 				Rules: []Rule{
 					Rule{Target: AcceptTarget{}},
 					Rule{Target: AcceptTarget{}},
@@ -60,60 +66,66 @@ func DefaultTables() *IPTables {
 					Rule{Target: AcceptTarget{}},
 					Rule{Target: ErrorTarget{}},
 				},
-				BuiltinChains: map[Hook]int{
+				BuiltinChains: [NumHooks]int{
 					Prerouting:  0,
 					Input:       1,
+					Forward:     HookUnset,
 					Output:      2,
 					Postrouting: 3,
 				},
-				Underflows: map[Hook]int{
+				Underflows: [NumHooks]int{
 					Prerouting:  0,
 					Input:       1,
+					Forward:     HookUnset,
 					Output:      2,
 					Postrouting: 3,
 				},
-				UserChains: map[string]int{},
 			},
-			TablenameMangle: Table{
+			mangleID: Table{
 				Rules: []Rule{
 					Rule{Target: AcceptTarget{}},
 					Rule{Target: AcceptTarget{}},
 					Rule{Target: ErrorTarget{}},
 				},
-				BuiltinChains: map[Hook]int{
+				BuiltinChains: [NumHooks]int{
 					Prerouting: 0,
 					Output:     1,
 				},
-				Underflows: map[Hook]int{
-					Prerouting: 0,
-					Output:     1,
+				Underflows: [NumHooks]int{
+					Prerouting:  0,
+					Input:       HookUnset,
+					Forward:     HookUnset,
+					Output:      1,
+					Postrouting: HookUnset,
 				},
-				UserChains: map[string]int{},
 			},
-			TablenameFilter: Table{
+			filterID: Table{
 				Rules: []Rule{
 					Rule{Target: AcceptTarget{}},
 					Rule{Target: AcceptTarget{}},
 					Rule{Target: AcceptTarget{}},
 					Rule{Target: ErrorTarget{}},
 				},
-				BuiltinChains: map[Hook]int{
-					Input:   0,
-					Forward: 1,
-					Output:  2,
+				BuiltinChains: [NumHooks]int{
+					Prerouting:  HookUnset,
+					Input:       0,
+					Forward:     1,
+					Output:      2,
+					Postrouting: HookUnset,
 				},
-				Underflows: map[Hook]int{
-					Input:   0,
-					Forward: 1,
-					Output:  2,
+				Underflows: [NumHooks]int{
+					Prerouting:  HookUnset,
+					Input:       0,
+					Forward:     1,
+					Output:      2,
+					Postrouting: HookUnset,
 				},
-				UserChains: map[string]int{},
 			},
 		},
-		priorities: map[Hook][]string{
-			Input:      []string{TablenameNat, TablenameFilter},
-			Prerouting: []string{TablenameMangle, TablenameNat},
-			Output:     []string{TablenameMangle, TablenameNat, TablenameFilter},
+		priorities: [NumHooks][]tableID{
+			Prerouting: []tableID{mangleID, natID},
+			Input:      []tableID{natID, filterID},
+			Output:     []tableID{mangleID, natID, filterID},
 		},
 		connections: ConnTrack{
 			seed: generateRandUint32(),
@@ -127,51 +139,48 @@ func DefaultTables() *IPTables {
 func EmptyFilterTable() Table {
 	return Table{
 		Rules: []Rule{},
-		BuiltinChains: map[Hook]int{
-			Input:   HookUnset,
-			Forward: HookUnset,
-			Output:  HookUnset,
+		BuiltinChains: [NumHooks]int{
+			Prerouting:  HookUnset,
+			Postrouting: HookUnset,
 		},
-		Underflows: map[Hook]int{
-			Input:   HookUnset,
-			Forward: HookUnset,
-			Output:  HookUnset,
+		Underflows: [NumHooks]int{
+			Prerouting:  HookUnset,
+			Postrouting: HookUnset,
 		},
-		UserChains: map[string]int{},
 	}
 }
 
-// EmptyNatTable returns a Table with no rules and the filter table chains
+// EmptyNATTable returns a Table with no rules and the filter table chains
 // mapped to HookUnset.
-func EmptyNatTable() Table {
+func EmptyNATTable() Table {
 	return Table{
 		Rules: []Rule{},
-		BuiltinChains: map[Hook]int{
-			Prerouting:  HookUnset,
-			Input:       HookUnset,
-			Output:      HookUnset,
-			Postrouting: HookUnset,
+		BuiltinChains: [NumHooks]int{
+			Forward: HookUnset,
 		},
-		Underflows: map[Hook]int{
-			Prerouting:  HookUnset,
-			Input:       HookUnset,
-			Output:      HookUnset,
-			Postrouting: HookUnset,
+		Underflows: [NumHooks]int{
+			Forward: HookUnset,
 		},
-		UserChains: map[string]int{},
 	}
 }
 
-// GetTable returns table by name.
+// GetTable returns a table by name.
 func (it *IPTables) GetTable(name string) (Table, bool) {
+	id, ok := nameToID[name]
+	if !ok {
+		return Table{}, false
+	}
 	it.mu.RLock()
 	defer it.mu.RUnlock()
-	t, ok := it.tables[name]
-	return t, ok
+	return it.tables[id], true
 }
 
 // ReplaceTable replaces or inserts table by name.
-func (it *IPTables) ReplaceTable(name string, table Table) {
+func (it *IPTables) ReplaceTable(name string, table Table) *tcpip.Error {
+	id, ok := nameToID[name]
+	if !ok {
+		return tcpip.ErrInvalidOptionValue
+	}
 	it.mu.Lock()
 	defer it.mu.Unlock()
 	// If iptables is being enabled, initialize the conntrack table and
@@ -181,14 +190,8 @@ func (it *IPTables) ReplaceTable(name string, table Table) {
 		it.startReaper(reaperDelay)
 	}
 	it.modified = true
-	it.tables[name] = table
-}
-
-// GetPriorities returns slice of priorities associated with hook.
-func (it *IPTables) GetPriorities(hook Hook) []string {
-	it.mu.RLock()
-	defer it.mu.RUnlock()
-	return it.priorities[hook]
+	it.tables[id] = table
+	return nil
 }
 
 // A chainVerdict is what a table decides should be done with a packet.
@@ -223,11 +226,19 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr
 
 	// Packets are manipulated only if connection and matching
 	// NAT rule exists.
-	it.connections.handlePacket(pkt, hook, gso, r)
+	shouldTrack := it.connections.handlePacket(pkt, hook, gso, r)
 
 	// Go through each table containing the hook.
-	for _, tablename := range it.GetPriorities(hook) {
-		table, _ := it.GetTable(tablename)
+	it.mu.RLock()
+	defer it.mu.RUnlock()
+	priorities := it.priorities[hook]
+	for _, tableID := range priorities {
+		// If handlePacket already NATed the packet, we don't need to
+		// check the NAT table.
+		if tableID == natID && pkt.NatDone {
+			continue
+		}
+		table := it.tables[tableID]
 		ruleIdx := table.BuiltinChains[hook]
 		switch verdict := it.checkChain(hook, pkt, table, ruleIdx, gso, r, address, nicName); verdict {
 		// If the table returns Accept, move on to the next table.
@@ -256,6 +267,20 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr
 		}
 	}
 
+	// If this connection should be tracked, try to add an entry for it. If
+	// traversing the nat table didn't end in adding an entry,
+	// maybeInsertNoop will add a no-op entry for the connection. This is
+	// needeed when establishing connections so that the SYN/ACK reply to an
+	// outgoing SYN is delivered to the correct endpoint rather than being
+	// redirected by a prerouting rule.
+	//
+	// From the iptables documentation: "If there is no rule, a `null'
+	// binding is created: this usually does not map the packet, but exists
+	// to ensure we don't map another stream over an existing one."
+	if shouldTrack {
+		it.connections.maybeInsertNoop(pkt, hook)
+	}
+
 	// Every table returned Accept.
 	return true
 }
diff --git a/pkg/tcpip/stack/iptables_targets.go b/pkg/tcpip/stack/iptables_targets.go
index d43f60c67..dc88033c7 100644
--- a/pkg/tcpip/stack/iptables_targets.go
+++ b/pkg/tcpip/stack/iptables_targets.go
@@ -153,7 +153,7 @@ func (rt RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, gso
 		// Set up conection for matching NAT rule. Only the first
 		// packet of the connection comes here. Other packets will be
 		// manipulated in connection tracking.
-		if conn := ct.createConnFor(pkt, hook, rt); conn != nil {
+		if conn := ct.insertRedirectConn(pkt, hook, rt); conn != nil {
 			ct.handlePacket(pkt, hook, gso, r)
 		}
 	default:
diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go
index eb70e3104..73274ada9 100644
--- a/pkg/tcpip/stack/iptables_types.go
+++ b/pkg/tcpip/stack/iptables_types.go
@@ -84,14 +84,14 @@ type IPTables struct {
 	// mu protects tables, priorities, and modified.
 	mu sync.RWMutex
 
-	// tables maps table names to tables. User tables have arbitrary names.
-	// mu needs to be locked for accessing.
-	tables map[string]Table
+	// tables maps tableIDs to tables. Holds builtin tables only, not user
+	// tables. mu must be locked for accessing.
+	tables [numTables]Table
 
 	// priorities maps each hook to a list of table names. The order of the
 	// list is the order in which each table should be visited for that
 	// hook. mu needs to be locked for accessing.
-	priorities map[Hook][]string
+	priorities [NumHooks][]tableID
 
 	// modified is whether tables have been modified at least once. It is
 	// used to elide the iptables performance overhead for workloads that
@@ -113,22 +113,20 @@ type Table struct {
 	Rules []Rule
 
 	// BuiltinChains maps builtin chains to their entrypoint rule in Rules.
-	BuiltinChains map[Hook]int
+	BuiltinChains [NumHooks]int
 
 	// Underflows maps builtin chains to their underflow rule in Rules
 	// (i.e. the rule to execute if the chain returns without a verdict).
-	Underflows map[Hook]int
-
-	// UserChains holds user-defined chains for the keyed by name. Users
-	// can give their chains arbitrary names.
-	UserChains map[string]int
+	Underflows [NumHooks]int
 }
 
 // ValidHooks returns a bitmap of the builtin hooks for the given table.
 func (table *Table) ValidHooks() uint32 {
 	hooks := uint32(0)
-	for hook := range table.BuiltinChains {
-		hooks |= 1 << hook
+	for hook, ruleIdx := range table.BuiltinChains {
+		if ruleIdx != HookUnset {
+			hooks |= 1 << hook
+		}
 	}
 	return hooks
 }
diff --git a/pkg/tcpip/stack/linkaddrcache.go b/pkg/tcpip/stack/linkaddrcache.go
index 403557fd7..6f73a0ce4 100644
--- a/pkg/tcpip/stack/linkaddrcache.go
+++ b/pkg/tcpip/stack/linkaddrcache.go
@@ -244,7 +244,7 @@ func (c *linkAddrCache) startAddressResolution(k tcpip.FullAddress, linkRes Link
 	for i := 0; ; i++ {
 		// Send link request, then wait for the timeout limit and check
 		// whether the request succeeded.
-		linkRes.LinkAddressRequest(k.Addr, localAddr, linkEP)
+		linkRes.LinkAddressRequest(k.Addr, localAddr, "" /* linkAddr */, linkEP)
 
 		select {
 		case now := <-time.After(c.resolutionTimeout):
diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go
index 1baa498d0..b15b8d1cb 100644
--- a/pkg/tcpip/stack/linkaddrcache_test.go
+++ b/pkg/tcpip/stack/linkaddrcache_test.go
@@ -48,7 +48,7 @@ type testLinkAddressResolver struct {
 	onLinkAddressRequest func()
 }
 
-func (r *testLinkAddressResolver) LinkAddressRequest(addr, _ tcpip.Address, _ LinkEndpoint) *tcpip.Error {
+func (r *testLinkAddressResolver) LinkAddressRequest(addr, _ tcpip.Address, _ tcpip.LinkAddress, _ LinkEndpoint) *tcpip.Error {
 	time.AfterFunc(r.delay, func() { r.fakeRequest(addr) })
 	if f := r.onLinkAddressRequest; f != nil {
 		f()
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go
index e28c23d66..9dce11a97 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/stack/ndp.go
@@ -469,7 +469,7 @@ type ndpState struct {
 
 	rtrSolicit struct {
 		// The timer used to send the next router solicitation message.
-		timer *time.Timer
+		timer tcpip.Timer
 
 		// Used to let the Router Solicitation timer know that it has been stopped.
 		//
@@ -503,7 +503,7 @@ type ndpState struct {
 // to the DAD goroutine that DAD should stop.
 type dadState struct {
 	// The DAD timer to send the next NS message, or resolve the address.
-	timer *time.Timer
+	timer tcpip.Timer
 
 	// Used to let the DAD timer know that it has been stopped.
 	//
@@ -515,38 +515,38 @@ type dadState struct {
 // defaultRouterState holds data associated with a default router discovered by
 // a Router Advertisement (RA).
 type defaultRouterState struct {
-	// Timer to invalidate the default router.
+	// Job to invalidate the default router.
 	//
 	// Must not be nil.
-	invalidationTimer *tcpip.CancellableTimer
+	invalidationJob *tcpip.Job
 }
 
 // onLinkPrefixState holds data associated with an on-link prefix discovered by
 // a Router Advertisement's Prefix Information option (PI) when the NDP
 // configurations was configured to do so.
 type onLinkPrefixState struct {
-	// Timer to invalidate the on-link prefix.
+	// Job to invalidate the on-link prefix.
 	//
 	// Must not be nil.
-	invalidationTimer *tcpip.CancellableTimer
+	invalidationJob *tcpip.Job
 }
 
 // tempSLAACAddrState holds state associated with a temporary SLAAC address.
 type tempSLAACAddrState struct {
-	// Timer to deprecate the temporary SLAAC address.
+	// Job to deprecate the temporary SLAAC address.
 	//
 	// Must not be nil.
-	deprecationTimer *tcpip.CancellableTimer
+	deprecationJob *tcpip.Job
 
-	// Timer to invalidate the temporary SLAAC address.
+	// Job to invalidate the temporary SLAAC address.
 	//
 	// Must not be nil.
-	invalidationTimer *tcpip.CancellableTimer
+	invalidationJob *tcpip.Job
 
-	// Timer to regenerate the temporary SLAAC address.
+	// Job to regenerate the temporary SLAAC address.
 	//
 	// Must not be nil.
-	regenTimer *tcpip.CancellableTimer
+	regenJob *tcpip.Job
 
 	createdAt time.Time
 
@@ -561,15 +561,15 @@ type tempSLAACAddrState struct {
 
 // slaacPrefixState holds state associated with a SLAAC prefix.
 type slaacPrefixState struct {
-	// Timer to deprecate the prefix.
+	// Job to deprecate the prefix.
 	//
 	// Must not be nil.
-	deprecationTimer *tcpip.CancellableTimer
+	deprecationJob *tcpip.Job
 
-	// Timer to invalidate the prefix.
+	// Job to invalidate the prefix.
 	//
 	// Must not be nil.
-	invalidationTimer *tcpip.CancellableTimer
+	invalidationJob *tcpip.Job
 
 	// Nonzero only when the address is not valid forever.
 	validUntil time.Time
@@ -651,12 +651,12 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, ref *ref
 	}
 
 	var done bool
-	var timer *time.Timer
+	var timer tcpip.Timer
 	// We initially start a timer to fire immediately because some of the DAD work
 	// cannot be done while holding the NIC's lock. This is effectively the same
 	// as starting a goroutine but we use a timer that fires immediately so we can
 	// reset it for the next DAD iteration.
-	timer = time.AfterFunc(0, func() {
+	timer = ndp.nic.stack.Clock().AfterFunc(0, func() {
 		ndp.nic.mu.Lock()
 		defer ndp.nic.mu.Unlock()
 
@@ -871,9 +871,9 @@ func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
 
 		case ok && rl != 0:
 			// This is an already discovered default router. Update
-			// the invalidation timer.
-			rtr.invalidationTimer.StopLocked()
-			rtr.invalidationTimer.Reset(rl)
+			// the invalidation job.
+			rtr.invalidationJob.Cancel()
+			rtr.invalidationJob.Schedule(rl)
 			ndp.defaultRouters[ip] = rtr
 
 		case ok && rl == 0:
@@ -950,7 +950,7 @@ func (ndp *ndpState) invalidateDefaultRouter(ip tcpip.Address) {
 		return
 	}
 
-	rtr.invalidationTimer.StopLocked()
+	rtr.invalidationJob.Cancel()
 	delete(ndp.defaultRouters, ip)
 
 	// Let the integrator know a discovered default router is invalidated.
@@ -979,12 +979,12 @@ func (ndp *ndpState) rememberDefaultRouter(ip tcpip.Address, rl time.Duration) {
 	}
 
 	state := defaultRouterState{
-		invalidationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
+		invalidationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
 			ndp.invalidateDefaultRouter(ip)
 		}),
 	}
 
-	state.invalidationTimer.Reset(rl)
+	state.invalidationJob.Schedule(rl)
 
 	ndp.defaultRouters[ip] = state
 }
@@ -1009,13 +1009,13 @@ func (ndp *ndpState) rememberOnLinkPrefix(prefix tcpip.Subnet, l time.Duration)
 	}
 
 	state := onLinkPrefixState{
-		invalidationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
+		invalidationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
 			ndp.invalidateOnLinkPrefix(prefix)
 		}),
 	}
 
 	if l < header.NDPInfiniteLifetime {
-		state.invalidationTimer.Reset(l)
+		state.invalidationJob.Schedule(l)
 	}
 
 	ndp.onLinkPrefixes[prefix] = state
@@ -1033,7 +1033,7 @@ func (ndp *ndpState) invalidateOnLinkPrefix(prefix tcpip.Subnet) {
 		return
 	}
 
-	s.invalidationTimer.StopLocked()
+	s.invalidationJob.Cancel()
 	delete(ndp.onLinkPrefixes, prefix)
 
 	// Let the integrator know a discovered on-link prefix is invalidated.
@@ -1082,14 +1082,14 @@ func (ndp *ndpState) handleOnLinkPrefixInformation(pi header.NDPPrefixInformatio
 	// This is an already discovered on-link prefix with a
 	// new non-zero valid lifetime.
 	//
-	// Update the invalidation timer.
+	// Update the invalidation job.
 
-	prefixState.invalidationTimer.StopLocked()
+	prefixState.invalidationJob.Cancel()
 
 	if vl < header.NDPInfiniteLifetime {
-		// Prefix is valid for a finite lifetime, reset the timer to expire after
+		// Prefix is valid for a finite lifetime, schedule the job to execute after
 		// the new valid lifetime.
-		prefixState.invalidationTimer.Reset(vl)
+		prefixState.invalidationJob.Schedule(vl)
 	}
 
 	ndp.onLinkPrefixes[prefix] = prefixState
@@ -1154,7 +1154,7 @@ func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
 	}
 
 	state := slaacPrefixState{
-		deprecationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
+		deprecationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
 			state, ok := ndp.slaacPrefixes[prefix]
 			if !ok {
 				panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for the deprecated SLAAC prefix %s", prefix))
@@ -1162,7 +1162,7 @@ func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
 
 			ndp.deprecateSLAACAddress(state.stableAddr.ref)
 		}),
-		invalidationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
+		invalidationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
 			state, ok := ndp.slaacPrefixes[prefix]
 			if !ok {
 				panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for the invalidated SLAAC prefix %s", prefix))
@@ -1184,19 +1184,19 @@ func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
 
 	if !ndp.generateSLAACAddr(prefix, &state) {
 		// We were unable to generate an address for the prefix, we do not nothing
-		// further as there is no reason to maintain state or timers for a prefix we
+		// further as there is no reason to maintain state or jobs for a prefix we
 		// do not have an address for.
 		return
 	}
 
-	// Setup the initial timers to deprecate and invalidate prefix.
+	// Setup the initial jobs to deprecate and invalidate prefix.
 
 	if pl < header.NDPInfiniteLifetime && pl != 0 {
-		state.deprecationTimer.Reset(pl)
+		state.deprecationJob.Schedule(pl)
 	}
 
 	if vl < header.NDPInfiniteLifetime {
-		state.invalidationTimer.Reset(vl)
+		state.invalidationJob.Schedule(vl)
 		state.validUntil = now.Add(vl)
 	}
 
@@ -1428,7 +1428,7 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
 	}
 
 	state := tempSLAACAddrState{
-		deprecationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
+		deprecationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
 			prefixState, ok := ndp.slaacPrefixes[prefix]
 			if !ok {
 				panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for %s to deprecate temporary address %s", prefix, generatedAddr))
@@ -1441,7 +1441,7 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
 
 			ndp.deprecateSLAACAddress(tempAddrState.ref)
 		}),
-		invalidationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
+		invalidationJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
 			prefixState, ok := ndp.slaacPrefixes[prefix]
 			if !ok {
 				panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for %s to invalidate temporary address %s", prefix, generatedAddr))
@@ -1454,7 +1454,7 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
 
 			ndp.invalidateTempSLAACAddr(prefixState.tempAddrs, generatedAddr.Address, tempAddrState)
 		}),
-		regenTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
+		regenJob: ndp.nic.stack.newJob(&ndp.nic.mu, func() {
 			prefixState, ok := ndp.slaacPrefixes[prefix]
 			if !ok {
 				panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for %s to regenerate temporary address after %s", prefix, generatedAddr))
@@ -1481,9 +1481,9 @@ func (ndp *ndpState) generateTempSLAACAddr(prefix tcpip.Subnet, prefixState *sla
 		ref:       ref,
 	}
 
-	state.deprecationTimer.Reset(pl)
-	state.invalidationTimer.Reset(vl)
-	state.regenTimer.Reset(pl - ndp.configs.RegenAdvanceDuration)
+	state.deprecationJob.Schedule(pl)
+	state.invalidationJob.Schedule(vl)
+	state.regenJob.Schedule(pl - ndp.configs.RegenAdvanceDuration)
 
 	prefixState.generationAttempts++
 	prefixState.tempAddrs[generatedAddr.Address] = state
@@ -1518,16 +1518,16 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
 		prefixState.stableAddr.ref.deprecated = false
 	}
 
-	// If prefix was preferred for some finite lifetime before, stop the
-	// deprecation timer so it can be reset.
-	prefixState.deprecationTimer.StopLocked()
+	// If prefix was preferred for some finite lifetime before, cancel the
+	// deprecation job so it can be reset.
+	prefixState.deprecationJob.Cancel()
 
 	now := time.Now()
 
-	// Reset the deprecation timer if prefix has a finite preferred lifetime.
+	// Schedule the deprecation job if prefix has a finite preferred lifetime.
 	if pl < header.NDPInfiniteLifetime {
 		if !deprecated {
-			prefixState.deprecationTimer.Reset(pl)
+			prefixState.deprecationJob.Schedule(pl)
 		}
 		prefixState.preferredUntil = now.Add(pl)
 	} else {
@@ -1546,9 +1546,9 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
 	// 3) Otherwise, reset the valid lifetime of the prefix to 2 hours.
 
 	if vl >= header.NDPInfiniteLifetime {
-		// Handle the infinite valid lifetime separately as we do not keep a timer
-		// in this case.
-		prefixState.invalidationTimer.StopLocked()
+		// Handle the infinite valid lifetime separately as we do not schedule a
+		// job in this case.
+		prefixState.invalidationJob.Cancel()
 		prefixState.validUntil = time.Time{}
 	} else {
 		var effectiveVl time.Duration
@@ -1569,8 +1569,8 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
 		}
 
 		if effectiveVl != 0 {
-			prefixState.invalidationTimer.StopLocked()
-			prefixState.invalidationTimer.Reset(effectiveVl)
+			prefixState.invalidationJob.Cancel()
+			prefixState.invalidationJob.Schedule(effectiveVl)
 			prefixState.validUntil = now.Add(effectiveVl)
 		}
 	}
@@ -1582,7 +1582,7 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
 	}
 
 	// Note, we do not need to update the entries in the temporary address map
-	// after updating the timers because the timers are held as pointers.
+	// after updating the jobs because the jobs are held as pointers.
 	var regenForAddr tcpip.Address
 	allAddressesRegenerated := true
 	for tempAddr, tempAddrState := range prefixState.tempAddrs {
@@ -1596,14 +1596,14 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
 		}
 
 		// If the address is no longer valid, invalidate it immediately. Otherwise,
-		// reset the invalidation timer.
+		// reset the invalidation job.
 		newValidLifetime := validUntil.Sub(now)
 		if newValidLifetime <= 0 {
 			ndp.invalidateTempSLAACAddr(prefixState.tempAddrs, tempAddr, tempAddrState)
 			continue
 		}
-		tempAddrState.invalidationTimer.StopLocked()
-		tempAddrState.invalidationTimer.Reset(newValidLifetime)
+		tempAddrState.invalidationJob.Cancel()
+		tempAddrState.invalidationJob.Schedule(newValidLifetime)
 
 		// As per RFC 4941 section 3.3 step 4, the preferred lifetime of a temporary
 		// address is the lower of the preferred lifetime of the stable address or
@@ -1616,17 +1616,17 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
 		}
 
 		// If the address is no longer preferred, deprecate it immediately.
-		// Otherwise, reset the deprecation timer.
+		// Otherwise, schedule the deprecation job again.
 		newPreferredLifetime := preferredUntil.Sub(now)
-		tempAddrState.deprecationTimer.StopLocked()
+		tempAddrState.deprecationJob.Cancel()
 		if newPreferredLifetime <= 0 {
 			ndp.deprecateSLAACAddress(tempAddrState.ref)
 		} else {
 			tempAddrState.ref.deprecated = false
-			tempAddrState.deprecationTimer.Reset(newPreferredLifetime)
+			tempAddrState.deprecationJob.Schedule(newPreferredLifetime)
 		}
 
-		tempAddrState.regenTimer.StopLocked()
+		tempAddrState.regenJob.Cancel()
 		if tempAddrState.regenerated {
 		} else {
 			allAddressesRegenerated = false
@@ -1637,7 +1637,7 @@ func (ndp *ndpState) refreshSLAACPrefixLifetimes(prefix tcpip.Subnet, prefixStat
 				// immediately after we finish iterating over the temporary addresses.
 				regenForAddr = tempAddr
 			} else {
-				tempAddrState.regenTimer.Reset(newPreferredLifetime - ndp.configs.RegenAdvanceDuration)
+				tempAddrState.regenJob.Schedule(newPreferredLifetime - ndp.configs.RegenAdvanceDuration)
 			}
 		}
 	}
@@ -1717,7 +1717,7 @@ func (ndp *ndpState) cleanupSLAACAddrResourcesAndNotify(addr tcpip.AddressWithPr
 	ndp.cleanupSLAACPrefixResources(prefix, state)
 }
 
-// cleanupSLAACPrefixResources cleansup a SLAAC prefix's timers and entry.
+// cleanupSLAACPrefixResources cleans up a SLAAC prefix's jobs and entry.
 //
 // Panics if the SLAAC prefix is not known.
 //
@@ -1729,8 +1729,8 @@ func (ndp *ndpState) cleanupSLAACPrefixResources(prefix tcpip.Subnet, state slaa
 	}
 
 	state.stableAddr.ref = nil
-	state.deprecationTimer.StopLocked()
-	state.invalidationTimer.StopLocked()
+	state.deprecationJob.Cancel()
+	state.invalidationJob.Cancel()
 	delete(ndp.slaacPrefixes, prefix)
 }
 
@@ -1775,13 +1775,13 @@ func (ndp *ndpState) cleanupTempSLAACAddrResourcesAndNotify(addr tcpip.AddressWi
 }
 
 // cleanupTempSLAACAddrResourcesAndNotify cleans up a temporary SLAAC address's
-// timers and entry.
+// jobs and entry.
 //
 // The NIC that ndp belongs to MUST be locked.
 func (ndp *ndpState) cleanupTempSLAACAddrResources(tempAddrs map[tcpip.Address]tempSLAACAddrState, tempAddr tcpip.Address, tempAddrState tempSLAACAddrState) {
-	tempAddrState.deprecationTimer.StopLocked()
-	tempAddrState.invalidationTimer.StopLocked()
-	tempAddrState.regenTimer.StopLocked()
+	tempAddrState.deprecationJob.Cancel()
+	tempAddrState.invalidationJob.Cancel()
+	tempAddrState.regenJob.Cancel()
 	delete(tempAddrs, tempAddr)
 }
 
@@ -1860,7 +1860,7 @@ func (ndp *ndpState) startSolicitingRouters() {
 
 	var done bool
 	ndp.rtrSolicit.done = &done
-	ndp.rtrSolicit.timer = time.AfterFunc(delay, func() {
+	ndp.rtrSolicit.timer = ndp.nic.stack.Clock().AfterFunc(delay, func() {
 		ndp.nic.mu.Lock()
 		if done {
 			// If we reach this point, it means that the RS timer fired after another
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index 6f86abc98..644ba7c33 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -1254,7 +1254,7 @@ func TestRouterDiscovery(t *testing.T) {
 	default:
 	}
 
-	// Wait for lladdr2's router invalidation timer to fire. The lifetime
+	// Wait for lladdr2's router invalidation job to execute. The lifetime
 	// of the router should have been updated to the most recent (smaller)
 	// lifetime.
 	//
@@ -1271,7 +1271,7 @@ func TestRouterDiscovery(t *testing.T) {
 	e.InjectInbound(header.IPv6ProtocolNumber, raBuf(llAddr2, 0))
 	expectRouterEvent(llAddr2, false)
 
-	// Wait for lladdr3's router invalidation timer to fire. The lifetime
+	// Wait for lladdr3's router invalidation job to execute. The lifetime
 	// of the router should have been updated to the most recent (smaller)
 	// lifetime.
 	//
@@ -1502,7 +1502,7 @@ func TestPrefixDiscovery(t *testing.T) {
 	default:
 	}
 
-	// Wait for prefix2's most recent invalidation timer plus some buffer to
+	// Wait for prefix2's most recent invalidation job plus some buffer to
 	// expire.
 	select {
 	case e := <-ndpDisp.prefixC:
@@ -2395,7 +2395,7 @@ func TestAutoGenTempAddrRegen(t *testing.T) {
 	for _, addr := range tempAddrs {
 		// Wait for a deprecation then invalidation event, or just an invalidation
 		// event. We need to cover both cases but cannot deterministically hit both
-		// cases because the deprecation and invalidation timers could fire in any
+		// cases because the deprecation and invalidation jobs could execute in any
 		// order.
 		select {
 		case e := <-ndpDisp.autoGenAddrC:
@@ -2432,9 +2432,9 @@ func TestAutoGenTempAddrRegen(t *testing.T) {
 	}
 }
 
-// TestAutoGenTempAddrRegenTimerUpdates tests that a temporary address's
-// regeneration timer gets updated when refreshing the address's lifetimes.
-func TestAutoGenTempAddrRegenTimerUpdates(t *testing.T) {
+// TestAutoGenTempAddrRegenJobUpdates tests that a temporary address's
+// regeneration job gets updated when refreshing the address's lifetimes.
+func TestAutoGenTempAddrRegenJobUpdates(t *testing.T) {
 	const (
 		nicID            = 1
 		regenAfter       = 2 * time.Second
@@ -2533,7 +2533,7 @@ func TestAutoGenTempAddrRegenTimerUpdates(t *testing.T) {
 	//
 	// A new temporary address should immediately be generated since the
 	// regeneration time has already passed since the last address was generated
-	// - this regeneration does not depend on a timer.
+	// - this regeneration does not depend on a job.
 	e.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, 0, prefix, true, true, 100, 100))
 	expectAutoGenAddrEvent(tempAddr2, newAddr)
 
@@ -2559,11 +2559,11 @@ func TestAutoGenTempAddrRegenTimerUpdates(t *testing.T) {
 	}
 
 	// Set the maximum lifetimes for temporary addresses such that on the next
-	// RA, the regeneration timer gets reset.
+	// RA, the regeneration job gets scheduled again.
 	//
 	// The maximum lifetime is the sum of the minimum lifetimes for temporary
 	// addresses + the time that has already passed since the last address was
-	// generated so that the regeneration timer is needed to generate the next
+	// generated so that the regeneration job is needed to generate the next
 	// address.
 	newLifetimes := newMinVLDuration + regenAfter + defaultAsyncNegativeEventTimeout
 	ndpConfigs.MaxTempAddrValidLifetime = newLifetimes
@@ -2993,9 +2993,9 @@ func TestAutoGenAddrDeprecateFromPI(t *testing.T) {
 	expectPrimaryAddr(addr2)
 }
 
-// TestAutoGenAddrTimerDeprecation tests that an address is properly deprecated
+// TestAutoGenAddrJobDeprecation tests that an address is properly deprecated
 // when its preferred lifetime expires.
-func TestAutoGenAddrTimerDeprecation(t *testing.T) {
+func TestAutoGenAddrJobDeprecation(t *testing.T) {
 	const nicID = 1
 	const newMinVL = 2
 	newMinVLDuration := newMinVL * time.Second
@@ -3513,8 +3513,8 @@ func TestAutoGenAddrRemoval(t *testing.T) {
 	}
 	expectAutoGenAddrEvent(addr, invalidatedAddr)
 
-	// Wait for the original valid lifetime to make sure the original timer
-	// got stopped/cleaned up.
+	// Wait for the original valid lifetime to make sure the original job got
+	// cancelled/cleaned up.
 	select {
 	case <-ndpDisp.autoGenAddrC:
 		t.Fatal("unexpectedly received an auto gen addr event")
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 7b80534e6..fea0ce7e8 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -1200,15 +1200,13 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
 
 	// Are any packet sockets listening for this network protocol?
 	packetEPs := n.mu.packetEPs[protocol]
-	// Check whether there are packet sockets listening for every protocol.
-	// If we received a packet with protocol EthernetProtocolAll, then the
-	// previous for loop will have handled it.
-	if protocol != header.EthernetProtocolAll {
-		packetEPs = append(packetEPs, n.mu.packetEPs[header.EthernetProtocolAll]...)
-	}
+	// Add any other packet sockets that maybe listening for all protocols.
+	packetEPs = append(packetEPs, n.mu.packetEPs[header.EthernetProtocolAll]...)
 	n.mu.RUnlock()
 	for _, ep := range packetEPs {
-		ep.HandlePacket(n.id, local, protocol, pkt.Clone())
+		p := pkt.Clone()
+		p.PktType = tcpip.PacketHost
+		ep.HandlePacket(n.id, local, protocol, p)
 	}
 
 	if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber {
@@ -1311,6 +1309,24 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
 	}
 }
 
+// DeliverOutboundPacket implements NetworkDispatcher.DeliverOutboundPacket.
+func (n *NIC) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
+	n.mu.RLock()
+	// We do not deliver to protocol specific packet endpoints as on Linux
+	// only ETH_P_ALL endpoints get outbound packets.
+	// Add any other packet sockets that maybe listening for all protocols.
+	packetEPs := n.mu.packetEPs[header.EthernetProtocolAll]
+	n.mu.RUnlock()
+	for _, ep := range packetEPs {
+		p := pkt.Clone()
+		p.PktType = tcpip.PacketOutgoing
+		// Add the link layer header as outgoing packets are intercepted
+		// before the link layer header is created.
+		n.linkEP.AddHeader(local, remote, protocol, p)
+		ep.HandlePacket(n.id, local, protocol, p)
+	}
+}
+
 func (n *NIC) forwardPacket(r *Route, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
 	// TODO(b/143425874) Decrease the TTL field in forwarded packets.
 	// TODO(b/151227689): Avoid copying the packet when forwarding. We can do this
diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go
index 3bc9fd831..a70792b50 100644
--- a/pkg/tcpip/stack/nic_test.go
+++ b/pkg/tcpip/stack/nic_test.go
@@ -89,6 +89,11 @@ func (*testLinkEndpoint) ARPHardwareType() header.ARPHardwareType {
 	panic("not implemented")
 }
 
+// AddHeader implements stack.LinkEndpoint.AddHeader.
+func (e *testLinkEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
+	panic("not implemented")
+}
+
 var _ NetworkEndpoint = (*testIPv6Endpoint)(nil)
 
 // An IPv6 NetworkEndpoint that throws away outgoing packets.
@@ -238,7 +243,7 @@ func (*testIPv6Protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
 }
 
 // LinkAddressRequest implements LinkAddressResolver.
-func (*testIPv6Protocol) LinkAddressRequest(_, _ tcpip.Address, _ LinkEndpoint) *tcpip.Error {
+func (*testIPv6Protocol) LinkAddressRequest(_, _ tcpip.Address, _ tcpip.LinkAddress, _ LinkEndpoint) *tcpip.Error {
 	return nil
 }
 
diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go
index e3556d5d2..5d6865e35 100644
--- a/pkg/tcpip/stack/packet_buffer.go
+++ b/pkg/tcpip/stack/packet_buffer.go
@@ -79,6 +79,10 @@ type PacketBuffer struct {
 	// NatDone indicates if the packet has been manipulated as per NAT
 	// iptables rule.
 	NatDone bool
+
+	// PktType indicates the SockAddrLink.PacketType of the packet as defined in
+	// https://www.man7.org/linux/man-pages/man7/packet.7.html.
+	PktType tcpip.PacketType
 }
 
 // Clone makes a copy of pk. It clones the Data field, which creates a new
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index f260eeb7f..8604c4259 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -52,8 +52,11 @@ type TransportEndpointID struct {
 type ControlType int
 
 // The following are the allowed values for ControlType values.
+// TODO(http://gvisor.dev/issue/3210): Support time exceeded messages.
 const (
-	ControlPacketTooBig ControlType = iota
+	ControlNetworkUnreachable ControlType = iota
+	ControlNoRoute
+	ControlPacketTooBig
 	ControlPortUnreachable
 	ControlUnknown
 )
@@ -330,8 +333,7 @@ type NetworkProtocol interface {
 }
 
 // NetworkDispatcher contains the methods used by the network stack to deliver
-// packets to the appropriate network endpoint after it has been handled by
-// the data link layer.
+// inbound/outbound packets to the appropriate network/packet(if any) endpoints.
 type NetworkDispatcher interface {
 	// DeliverNetworkPacket finds the appropriate network protocol endpoint
 	// and hands the packet over for further processing.
@@ -342,6 +344,16 @@ type NetworkDispatcher interface {
 	//
 	// DeliverNetworkPacket takes ownership of pkt.
 	DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
+
+	// DeliverOutboundPacket is called by link layer when a packet is being
+	// sent out.
+	//
+	// pkt.LinkHeader may or may not be set before calling
+	// DeliverOutboundPacket. Some packets do not have link headers (e.g.
+	// packets sent via loopback), and won't have the field set.
+	//
+	// DeliverOutboundPacket takes ownership of pkt.
+	DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
 }
 
 // LinkEndpointCapabilities is the type associated with the capabilities
@@ -443,6 +455,9 @@ type LinkEndpoint interface {
 	// See:
 	// https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/include/uapi/linux/if_arp.h#L30
 	ARPHardwareType() header.ARPHardwareType
+
+	// AddHeader adds a link layer header to pkt if required.
+	AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
 }
 
 // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are
@@ -463,12 +478,13 @@ type InjectableLinkEndpoint interface {
 // A LinkAddressResolver is an extension to a NetworkProtocol that
 // can resolve link addresses.
 type LinkAddressResolver interface {
-	// LinkAddressRequest sends a request for the LinkAddress of addr.
-	// The request is sent on linkEP with localAddr as the source.
+	// LinkAddressRequest sends a request for the LinkAddress of addr. Broadcasts
+	// the request on the local network if remoteLinkAddr is the zero value. The
+	// request is sent on linkEP with localAddr as the source.
 	//
 	// A valid response will cause the discovery protocol's network
 	// endpoint to call AddLinkAddress.
-	LinkAddressRequest(addr, localAddr tcpip.Address, linkEP LinkEndpoint) *tcpip.Error
+	LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP LinkEndpoint) *tcpip.Error
 
 	// ResolveStaticAddress attempts to resolve address without sending
 	// requests. It either resolves the name immediately or returns the
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 2b7ece851..a6faa22c2 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -728,6 +728,11 @@ func New(opts Options) *Stack {
 	return s
 }
 
+// newJob returns a tcpip.Job using the Stack clock.
+func (s *Stack) newJob(l sync.Locker, f func()) *tcpip.Job {
+	return tcpip.NewJob(s.clock, l, f)
+}
+
 // UniqueID returns a unique identifier.
 func (s *Stack) UniqueID() uint64 {
 	return s.uniqueIDGenerator.UniqueID()
@@ -801,9 +806,10 @@ func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h f
 	}
 }
 
-// NowNanoseconds implements tcpip.Clock.NowNanoseconds.
-func (s *Stack) NowNanoseconds() int64 {
-	return s.clock.NowNanoseconds()
+// Clock returns the Stack's clock for retrieving the current time and
+// scheduling work.
+func (s *Stack) Clock() tcpip.Clock {
+	return s.clock
 }
 
 // Stats returns a mutable copy of the current stats.
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 48ad56d4d..21aafb0a2 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -192,7 +192,7 @@ func (e ErrSaveRejection) Error() string {
 	return "save rejected due to unsupported networking state: " + e.Err.Error()
 }
 
-// A Clock provides the current time.
+// A Clock provides the current time and schedules work for execution.
 //
 // Times returned by a Clock should always be used for application-visible
 // time. Only monotonic times should be used for netstack internal timekeeping.
@@ -203,6 +203,31 @@ type Clock interface {
 
 	// NowMonotonic returns a monotonic time value.
 	NowMonotonic() int64
+
+	// AfterFunc waits for the duration to elapse and then calls f in its own
+	// goroutine. It returns a Timer that can be used to cancel the call using
+	// its Stop method.
+	AfterFunc(d time.Duration, f func()) Timer
+}
+
+// Timer represents a single event. A Timer must be created with
+// Clock.AfterFunc.
+type Timer interface {
+	// Stop prevents the Timer from firing. It returns true if the call stops the
+	// timer, false if the timer has already expired or been stopped.
+	//
+	// If Stop returns false, then the timer has already expired and the function
+	// f of Clock.AfterFunc(d, f) has been started in its own goroutine; Stop
+	// does not wait for f to complete before returning. If the caller needs to
+	// know whether f is completed, it must coordinate with f explicitly.
+	Stop() bool
+
+	// Reset changes the timer to expire after duration d.
+	//
+	// Reset should be invoked only on stopped or expired timers. If the timer is
+	// known to have expired, Reset can be used directly. Otherwise, the caller
+	// must coordinate with the function f of Clock.AfterFunc(d, f).
+	Reset(d time.Duration)
 }
 
 // Address is a byte slice cast as a string that represents the address of a
@@ -316,6 +341,28 @@ const (
 	ShutdownWrite
 )
 
+// PacketType is used to indicate the destination of the packet.
+type PacketType uint8
+
+const (
+	// PacketHost indicates a packet addressed to the local host.
+	PacketHost PacketType = iota
+
+	// PacketOtherHost indicates an outgoing packet addressed to
+	// another host caught by a NIC in promiscuous mode.
+	PacketOtherHost
+
+	// PacketOutgoing for a packet originating from the local host
+	// that is looped back to a packet socket.
+	PacketOutgoing
+
+	// PacketBroadcast indicates a link layer broadcast packet.
+	PacketBroadcast
+
+	// PacketMulticast indicates a link layer multicast packet.
+	PacketMulticast
+)
+
 // FullAddress represents a full transport node address, as required by the
 // Connect() and Bind() methods.
 //
@@ -555,6 +602,9 @@ type Endpoint interface {
 type LinkPacketInfo struct {
 	// Protocol is the NetworkProtocolNumber for the packet.
 	Protocol NetworkProtocolNumber
+
+	// PktType is used to indicate the destination of the packet.
+	PktType PacketType
 }
 
 // PacketEndpoint are additional methods that are only implemented by Packet
diff --git a/pkg/tcpip/time_unsafe.go b/pkg/tcpip/time_unsafe.go
index 7f172f978..f32d58091 100644
--- a/pkg/tcpip/time_unsafe.go
+++ b/pkg/tcpip/time_unsafe.go
@@ -20,7 +20,7 @@
 package tcpip
 
 import (
-	_ "time"   // Used with go:linkname.
+	"time"     // Used with go:linkname.
 	_ "unsafe" // Required for go:linkname.
 )
 
@@ -45,3 +45,31 @@ func (*StdClock) NowMonotonic() int64 {
 	_, _, mono := now()
 	return mono
 }
+
+// AfterFunc implements Clock.AfterFunc.
+func (*StdClock) AfterFunc(d time.Duration, f func()) Timer {
+	return &stdTimer{
+		t: time.AfterFunc(d, f),
+	}
+}
+
+type stdTimer struct {
+	t *time.Timer
+}
+
+var _ Timer = (*stdTimer)(nil)
+
+// Stop implements Timer.Stop.
+func (st *stdTimer) Stop() bool {
+	return st.t.Stop()
+}
+
+// Reset implements Timer.Reset.
+func (st *stdTimer) Reset(d time.Duration) {
+	st.t.Reset(d)
+}
+
+// NewStdTimer returns a Timer implemented with the time package.
+func NewStdTimer(t *time.Timer) Timer {
+	return &stdTimer{t: t}
+}
diff --git a/pkg/tcpip/timer.go b/pkg/tcpip/timer.go
index 5554c573f..f1dd7c310 100644
--- a/pkg/tcpip/timer.go
+++ b/pkg/tcpip/timer.go
@@ -20,50 +20,49 @@ import (
 	"gvisor.dev/gvisor/pkg/sync"
 )
 
-// cancellableTimerInstance is a specific instance of CancellableTimer.
+// jobInstance is a specific instance of Job.
 //
-// Different instances are created each time CancellableTimer is Reset so each
-// timer has its own earlyReturn signal. This is to address a bug when a
-// CancellableTimer is stopped and reset in quick succession resulting in a
-// timer instance's earlyReturn signal being affected or seen by another timer
-// instance.
+// Different instances are created each time Job is scheduled so each timer has
+// its own earlyReturn signal. This is to address a bug when a Job is stopped
+// and reset in quick succession resulting in a timer instance's earlyReturn
+// signal being affected or seen by another timer instance.
 //
 // Consider the following sceneario where timer instances share a common
 // earlyReturn signal (T1 creates, stops and resets a Cancellable timer under a
 // lock L; T2, T3, T4 and T5 are goroutines that handle the first (A), second
 // (B), third (C), and fourth (D) instance of the timer firing, respectively):
 //   T1: Obtain L
-//   T1: Create a new CancellableTimer w/ lock L (create instance A)
+//   T1: Create a new Job w/ lock L (create instance A)
 //   T2: instance A fires, blocked trying to obtain L.
 //   T1: Attempt to stop instance A (set earlyReturn = true)
-//   T1: Reset timer (create instance B)
+//   T1: Schedule timer (create instance B)
 //   T3: instance B fires, blocked trying to obtain L.
 //   T1: Attempt to stop instance B (set earlyReturn = true)
-//   T1: Reset timer (create instance C)
+//   T1: Schedule timer (create instance C)
 //   T4: instance C fires, blocked trying to obtain L.
 //   T1: Attempt to stop instance C (set earlyReturn = true)
-//   T1: Reset timer (create instance D)
+//   T1: Schedule timer (create instance D)
 //   T5: instance D fires, blocked trying to obtain L.
 //   T1: Release L
 //
-// Now that T1 has released L, any of the 4 timer instances can take L and check
-// earlyReturn. If the timers simply check earlyReturn and then do nothing
-// further, then instance D will never early return even though it was not
-// requested to stop. If the timers reset earlyReturn before early returning,
-// then all but one of the timers will do work when only one was expected to.
-// If CancellableTimer resets earlyReturn when resetting, then all the timers
+// Now that T1 has released L, any of the 4 timer instances can take L and
+// check earlyReturn. If the timers simply check earlyReturn and then do
+// nothing further, then instance D will never early return even though it was
+// not requested to stop. If the timers reset earlyReturn before early
+// returning, then all but one of the timers will do work when only one was
+// expected to. If Job resets earlyReturn when resetting, then all the timers
 // will fire (again, when only one was expected to).
 //
 // To address the above concerns the simplest solution was to give each timer
 // its own earlyReturn signal.
-type cancellableTimerInstance struct {
-	timer *time.Timer
+type jobInstance struct {
+	timer Timer
 
 	// Used to inform the timer to early return when it gets stopped while the
 	// lock the timer tries to obtain when fired is held (T1 is a goroutine that
 	// tries to cancel the timer and T2 is the goroutine that handles the timer
 	// firing):
-	//   T1: Obtain the lock, then call StopLocked()
+	//   T1: Obtain the lock, then call Cancel()
 	//   T2: timer fires, and gets blocked on obtaining the lock
 	//   T1: Releases lock
 	//   T2: Obtains lock does unintended work
@@ -74,29 +73,33 @@ type cancellableTimerInstance struct {
 	earlyReturn *bool
 }
 
-// stop stops the timer instance t from firing if it hasn't fired already. If it
+// stop stops the job instance j from firing if it hasn't fired already. If it
 // has fired and is blocked at obtaining the lock, earlyReturn will be set to
 // true so that it will early return when it obtains the lock.
-func (t *cancellableTimerInstance) stop() {
-	if t.timer != nil {
-		t.timer.Stop()
-		*t.earlyReturn = true
+func (j *jobInstance) stop() {
+	if j.timer != nil {
+		j.timer.Stop()
+		*j.earlyReturn = true
 	}
 }
 
-// CancellableTimer is a timer that does some work and can be safely cancelled
-// when it fires at the same time some "related work" is being done.
+// Job represents some work that can be scheduled for execution. The work can
+// be safely cancelled when it fires at the same time some "related work" is
+// being done.
 //
 // The term "related work" is defined as some work that needs to be done while
 // holding some lock that the timer must also hold while doing some work.
 //
-// Note, it is not safe to copy a CancellableTimer as its timer instance creates
-// a closure over the address of the CancellableTimer.
-type CancellableTimer struct {
+// Note, it is not safe to copy a Job as its timer instance creates
+// a closure over the address of the Job.
+type Job struct {
 	_ sync.NoCopy
 
+	// The clock used to schedule the backing timer
+	clock Clock
+
 	// The active instance of a cancellable timer.
-	instance cancellableTimerInstance
+	instance jobInstance
 
 	// locker is the lock taken by the timer immediately after it fires and must
 	// be held when attempting to stop the timer.
@@ -113,59 +116,91 @@ type CancellableTimer struct {
 	fn func()
 }
 
-// StopLocked prevents the Timer from firing if it has not fired already.
+// Cancel prevents the Job from executing if it has not executed already.
 //
-// If the timer is blocked on obtaining the t.locker lock when StopLocked is
-// called, it will early return instead of calling t.fn.
+// Cancel requires appropriate locking to be in place for any resources managed
+// by the Job. If the Job is blocked on obtaining the lock when Cancel is
+// called, it will early return.
 //
 // Note, t will be modified.
 //
-// t.locker MUST be locked.
-func (t *CancellableTimer) StopLocked() {
-	t.instance.stop()
+// j.locker MUST be locked.
+func (j *Job) Cancel() {
+	j.instance.stop()
 
 	// Nothing to do with the stopped instance anymore.
-	t.instance = cancellableTimerInstance{}
+	j.instance = jobInstance{}
 }
 
-// Reset changes the timer to expire after duration d.
+// Schedule schedules the Job for execution after duration d. This can be
+// called on cancelled or completed Jobs to schedule them again.
 //
-// Note, t will be modified.
+// Schedule should be invoked only on unscheduled, cancelled, or completed
+// Jobs. To be safe, callers should always call Cancel before calling Schedule.
 //
-// Reset should only be called on stopped or expired timers. To be safe, callers
-// should always call StopLocked before calling Reset.
-func (t *CancellableTimer) Reset(d time.Duration) {
+// Note, j will be modified.
+func (j *Job) Schedule(d time.Duration) {
 	// Create a new instance.
 	earlyReturn := false
 
 	// Capture the locker so that updating the timer does not cause a data race
 	// when a timer fires and tries to obtain the lock (read the timer's locker).
-	locker := t.locker
-	t.instance = cancellableTimerInstance{
-		timer: time.AfterFunc(d, func() {
+	locker := j.locker
+	j.instance = jobInstance{
+		timer: j.clock.AfterFunc(d, func() {
 			locker.Lock()
 			defer locker.Unlock()
 
 			if earlyReturn {
 				// If we reach this point, it means that the timer fired while another
-				// goroutine called StopLocked while it had the lock. Simply return
-				// here and do nothing further.
+				// goroutine called Cancel while it had the lock. Simply return here
+				// and do nothing further.
 				earlyReturn = false
 				return
 			}
 
-			t.fn()
+			j.fn()
 		}),
 		earlyReturn: &earlyReturn,
 	}
 }
 
-// NewCancellableTimer returns an unscheduled CancellableTimer with the given
-// locker and fn.
-//
-// fn MUST NOT attempt to lock locker.
-//
-// Callers must call Reset to schedule the timer to fire.
-func NewCancellableTimer(locker sync.Locker, fn func()) *CancellableTimer {
-	return &CancellableTimer{locker: locker, fn: fn}
+// NewJob returns a new Job that can be used to schedule f to run in its own
+// gorountine. l will be locked before calling f then unlocked after f returns.
+//
+//  var clock tcpip.StdClock
+//  var mu sync.Mutex
+//  message := "foo"
+//  job := tcpip.NewJob(&clock, &mu, func() {
+//    fmt.Println(message)
+//  })
+//  job.Schedule(time.Second)
+//
+//  mu.Lock()
+//  message = "bar"
+//  mu.Unlock()
+//
+//  // Output: bar
+//
+// f MUST NOT attempt to lock l.
+//
+// l MUST be locked prior to calling the returned job's Cancel().
+//
+//  var clock tcpip.StdClock
+//  var mu sync.Mutex
+//  message := "foo"
+//  job := tcpip.NewJob(&clock, &mu, func() {
+//    fmt.Println(message)
+//  })
+//  job.Schedule(time.Second)
+//
+//  mu.Lock()
+//  job.Cancel()
+//  mu.Unlock()
+func NewJob(c Clock, l sync.Locker, f func()) *Job {
+	return &Job{
+		clock:  c,
+		locker: l,
+		fn:     f,
+	}
 }
diff --git a/pkg/tcpip/timer_test.go b/pkg/tcpip/timer_test.go
index b4940e397..a82384c49 100644
--- a/pkg/tcpip/timer_test.go
+++ b/pkg/tcpip/timer_test.go
@@ -28,8 +28,8 @@ const (
 	longDuration   = 1 * time.Second
 )
 
-func TestCancellableTimerReassignment(t *testing.T) {
-	var timer tcpip.CancellableTimer
+func TestJobReschedule(t *testing.T) {
+	var clock tcpip.StdClock
 	var wg sync.WaitGroup
 	var lock sync.Mutex
 
@@ -43,26 +43,27 @@ func TestCancellableTimerReassignment(t *testing.T) {
 			// that has an active timer (even if it has been stopped as a stopped
 			// timer may be blocked on a lock before it can check if it has been
 			// stopped while another goroutine holds the same lock).
-			timer = *tcpip.NewCancellableTimer(&lock, func() {
+			job := tcpip.NewJob(&clock, &lock, func() {
 				wg.Done()
 			})
-			timer.Reset(shortDuration)
+			job.Schedule(shortDuration)
 			lock.Unlock()
 		}()
 	}
 	wg.Wait()
 }
 
-func TestCancellableTimerFire(t *testing.T) {
+func TestJobExecution(t *testing.T) {
 	t.Parallel()
 
-	ch := make(chan struct{})
+	var clock tcpip.StdClock
 	var lock sync.Mutex
+	ch := make(chan struct{})
 
-	timer := tcpip.NewCancellableTimer(&lock, func() {
+	job := tcpip.NewJob(&clock, &lock, func() {
 		ch <- struct{}{}
 	})
-	timer.Reset(shortDuration)
+	job.Schedule(shortDuration)
 
 	// Wait for timer to fire.
 	select {
@@ -82,17 +83,18 @@ func TestCancellableTimerFire(t *testing.T) {
 func TestCancellableTimerResetFromLongDuration(t *testing.T) {
 	t.Parallel()
 
-	ch := make(chan struct{})
+	var clock tcpip.StdClock
 	var lock sync.Mutex
+	ch := make(chan struct{})
 
-	timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
-	timer.Reset(middleDuration)
+	job := tcpip.NewJob(&clock, &lock, func() { ch <- struct{}{} })
+	job.Schedule(middleDuration)
 
 	lock.Lock()
-	timer.StopLocked()
+	job.Cancel()
 	lock.Unlock()
 
-	timer.Reset(shortDuration)
+	job.Schedule(shortDuration)
 
 	// Wait for timer to fire.
 	select {
@@ -109,16 +111,17 @@ func TestCancellableTimerResetFromLongDuration(t *testing.T) {
 	}
 }
 
-func TestCancellableTimerResetFromShortDuration(t *testing.T) {
+func TestJobRescheduleFromShortDuration(t *testing.T) {
 	t.Parallel()
 
-	ch := make(chan struct{})
+	var clock tcpip.StdClock
 	var lock sync.Mutex
+	ch := make(chan struct{})
 
 	lock.Lock()
-	timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
-	timer.Reset(shortDuration)
-	timer.StopLocked()
+	job := tcpip.NewJob(&clock, &lock, func() { ch <- struct{}{} })
+	job.Schedule(shortDuration)
+	job.Cancel()
 	lock.Unlock()
 
 	// Wait for timer to fire if it wasn't correctly stopped.
@@ -128,7 +131,7 @@ func TestCancellableTimerResetFromShortDuration(t *testing.T) {
 	case <-time.After(middleDuration):
 	}
 
-	timer.Reset(shortDuration)
+	job.Schedule(shortDuration)
 
 	// Wait for timer to fire.
 	select {
@@ -145,17 +148,18 @@ func TestCancellableTimerResetFromShortDuration(t *testing.T) {
 	}
 }
 
-func TestCancellableTimerImmediatelyStop(t *testing.T) {
+func TestJobImmediatelyCancel(t *testing.T) {
 	t.Parallel()
 
-	ch := make(chan struct{})
+	var clock tcpip.StdClock
 	var lock sync.Mutex
+	ch := make(chan struct{})
 
 	for i := 0; i < 1000; i++ {
 		lock.Lock()
-		timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
-		timer.Reset(shortDuration)
-		timer.StopLocked()
+		job := tcpip.NewJob(&clock, &lock, func() { ch <- struct{}{} })
+		job.Schedule(shortDuration)
+		job.Cancel()
 		lock.Unlock()
 	}
 
@@ -167,25 +171,26 @@ func TestCancellableTimerImmediatelyStop(t *testing.T) {
 	}
 }
 
-func TestCancellableTimerStoppedResetWithoutLock(t *testing.T) {
+func TestJobCancelledRescheduleWithoutLock(t *testing.T) {
 	t.Parallel()
 
-	ch := make(chan struct{})
+	var clock tcpip.StdClock
 	var lock sync.Mutex
+	ch := make(chan struct{})
 
 	lock.Lock()
-	timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
-	timer.Reset(shortDuration)
-	timer.StopLocked()
+	job := tcpip.NewJob(&clock, &lock, func() { ch <- struct{}{} })
+	job.Schedule(shortDuration)
+	job.Cancel()
 	lock.Unlock()
 
 	for i := 0; i < 10; i++ {
-		timer.Reset(middleDuration)
+		job.Schedule(middleDuration)
 
 		lock.Lock()
 		// Sleep until the timer fires and gets blocked trying to take the lock.
 		time.Sleep(middleDuration * 2)
-		timer.StopLocked()
+		job.Cancel()
 		lock.Unlock()
 	}
 
@@ -201,17 +206,18 @@ func TestCancellableTimerStoppedResetWithoutLock(t *testing.T) {
 func TestManyCancellableTimerResetAfterBlockedOnLock(t *testing.T) {
 	t.Parallel()
 
-	ch := make(chan struct{})
+	var clock tcpip.StdClock
 	var lock sync.Mutex
+	ch := make(chan struct{})
 
 	lock.Lock()
-	timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
-	timer.Reset(shortDuration)
+	job := tcpip.NewJob(&clock, &lock, func() { ch <- struct{}{} })
+	job.Schedule(shortDuration)
 	for i := 0; i < 10; i++ {
 		// Sleep until the timer fires and gets blocked trying to take the lock.
 		time.Sleep(middleDuration)
-		timer.StopLocked()
-		timer.Reset(shortDuration)
+		job.Cancel()
+		job.Schedule(shortDuration)
 	}
 	lock.Unlock()
 
@@ -230,18 +236,19 @@ func TestManyCancellableTimerResetAfterBlockedOnLock(t *testing.T) {
 	}
 }
 
-func TestManyCancellableTimerResetUnderLock(t *testing.T) {
+func TestManyJobReschedulesUnderLock(t *testing.T) {
 	t.Parallel()
 
-	ch := make(chan struct{})
+	var clock tcpip.StdClock
 	var lock sync.Mutex
+	ch := make(chan struct{})
 
 	lock.Lock()
-	timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
-	timer.Reset(shortDuration)
+	job := tcpip.NewJob(&clock, &lock, func() { ch <- struct{}{} })
+	job.Schedule(shortDuration)
 	for i := 0; i < 10; i++ {
-		timer.StopLocked()
-		timer.Reset(shortDuration)
+		job.Cancel()
+		job.Schedule(shortDuration)
 	}
 	lock.Unlock()
 
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index 678f4e016..4612be4e7 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -797,7 +797,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 	e.rcvList.PushBack(packet)
 	e.rcvBufSize += packet.data.Size()
 
-	packet.timestamp = e.stack.NowNanoseconds()
+	packet.timestamp = e.stack.Clock().NowNanoseconds()
 
 	e.rcvMu.Unlock()
 	e.stats.PacketsReceived.Increment()
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index 7b2083a09..0e46e6355 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -441,6 +441,7 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress,
 			Addr: tcpip.Address(hdr.SourceAddress()),
 		}
 		packet.packetInfo.Protocol = netProto
+		packet.packetInfo.PktType = pkt.PktType
 	} else {
 		// Guess the would-be ethernet header.
 		packet.senderAddr = tcpip.FullAddress{
@@ -448,34 +449,57 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress,
 			Addr: tcpip.Address(localAddr),
 		}
 		packet.packetInfo.Protocol = netProto
+		packet.packetInfo.PktType = pkt.PktType
 	}
 
 	if ep.cooked {
 		// Cooked packets can simply be queued.
-		packet.data = pkt.Data
+		switch pkt.PktType {
+		case tcpip.PacketHost:
+			packet.data = pkt.Data
+		case tcpip.PacketOutgoing:
+			// Strip Link Header from the Header.
+			pkt.Header = buffer.NewPrependableFromView(pkt.Header.View()[len(pkt.LinkHeader):])
+			combinedVV := pkt.Header.View().ToVectorisedView()
+			combinedVV.Append(pkt.Data)
+			packet.data = combinedVV
+		default:
+			panic(fmt.Sprintf("unexpected PktType in pkt: %+v", pkt))
+		}
+
 	} else {
 		// Raw packets need their ethernet headers prepended before
 		// queueing.
 		var linkHeader buffer.View
-		if len(pkt.LinkHeader) == 0 {
-			// We weren't provided with an actual ethernet header,
-			// so fake one.
-			ethFields := header.EthernetFields{
-				SrcAddr: tcpip.LinkAddress([]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
-				DstAddr: localAddr,
-				Type:    netProto,
+		var combinedVV buffer.VectorisedView
+		if pkt.PktType != tcpip.PacketOutgoing {
+			if len(pkt.LinkHeader) == 0 {
+				// We weren't provided with an actual ethernet header,
+				// so fake one.
+				ethFields := header.EthernetFields{
+					SrcAddr: tcpip.LinkAddress([]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
+					DstAddr: localAddr,
+					Type:    netProto,
+				}
+				fakeHeader := make(header.Ethernet, header.EthernetMinimumSize)
+				fakeHeader.Encode(&ethFields)
+				linkHeader = buffer.View(fakeHeader)
+			} else {
+				linkHeader = append(buffer.View(nil), pkt.LinkHeader...)
 			}
-			fakeHeader := make(header.Ethernet, header.EthernetMinimumSize)
-			fakeHeader.Encode(&ethFields)
-			linkHeader = buffer.View(fakeHeader)
-		} else {
-			linkHeader = append(buffer.View(nil), pkt.LinkHeader...)
+			combinedVV = linkHeader.ToVectorisedView()
+		}
+		if pkt.PktType == tcpip.PacketOutgoing {
+			// For outgoing packets the Link, Network and Transport
+			// headers are in the pkt.Header fields normally unless
+			// a Raw socket is in use in which case pkt.Header could
+			// be nil.
+			combinedVV.AppendView(pkt.Header.View())
 		}
-		combinedVV := linkHeader.ToVectorisedView()
 		combinedVV.Append(pkt.Data)
 		packet.data = combinedVV
 	}
-	packet.timestampNS = ep.stack.NowNanoseconds()
+	packet.timestampNS = ep.stack.Clock().NowNanoseconds()
 
 	ep.rcvList.PushBack(&packet)
 	ep.rcvBufSize += packet.data.Size()
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index c2e9fd29f..f85a68554 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -456,7 +456,7 @@ func (e *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error {
 	defer e.mu.Unlock()
 
 	// If a local address was specified, verify that it's valid.
-	if e.stack.CheckLocalAddress(addr.NIC, e.NetProto, addr.Addr) == 0 {
+	if len(addr.Addr) != 0 && e.stack.CheckLocalAddress(addr.NIC, e.NetProto, addr.Addr) == 0 {
 		return tcpip.ErrBadLocalAddress
 	}
 
@@ -700,7 +700,7 @@ func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
 	}
 	combinedVV.Append(pkt.Data)
 	packet.data = combinedVV
-	packet.timestampNS = e.stack.NowNanoseconds()
+	packet.timestampNS = e.stack.Clock().NowNanoseconds()
 
 	e.rcvList.PushBack(packet)
 	e.rcvBufSize += packet.data.Size()
diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index 18ff89ffc..e860ee484 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -49,6 +49,7 @@ go_library(
         "segment_heap.go",
         "segment_queue.go",
         "segment_state.go",
+        "segment_unsafe.go",
         "snd.go",
         "snd_state.go",
         "tcp_endpoint_list.go",
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 81b740115..1798510bc 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -490,6 +490,9 @@ func (h *handshake) resolveRoute() *tcpip.Error {
 				<-h.ep.undrain
 				h.ep.mu.Lock()
 			}
+			if n&notifyError != 0 {
+				return h.ep.takeLastError()
+			}
 		}
 
 		// Wait for notification.
@@ -616,6 +619,9 @@ func (h *handshake) execute() *tcpip.Error {
 				<-h.ep.undrain
 				h.ep.mu.Lock()
 			}
+			if n&notifyError != 0 {
+				return h.ep.takeLastError()
+			}
 
 		case wakerForNewSegment:
 			if err := h.processSegments(); err != nil {
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 83dc10ed0..0f7487963 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1209,6 +1209,14 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
 	e.owner = owner
 }
 
+func (e *endpoint) takeLastError() *tcpip.Error {
+	e.lastErrorMu.Lock()
+	defer e.lastErrorMu.Unlock()
+	err := e.lastError
+	e.lastError = nil
+	return err
+}
+
 // Read reads data from the endpoint.
 func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
 	e.LockUser()
@@ -1956,11 +1964,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
 	switch o := opt.(type) {
 	case tcpip.ErrorOption:
-		e.lastErrorMu.Lock()
-		err := e.lastError
-		e.lastError = nil
-		e.lastErrorMu.Unlock()
-		return err
+		return e.takeLastError()
 
 	case *tcpip.BindToDeviceOption:
 		e.LockUser()
@@ -2546,6 +2550,18 @@ func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.C
 		e.sndBufMu.Unlock()
 
 		e.notifyProtocolGoroutine(notifyMTUChanged)
+
+	case stack.ControlNoRoute:
+		e.lastErrorMu.Lock()
+		e.lastError = tcpip.ErrNoRoute
+		e.lastErrorMu.Unlock()
+		e.notifyProtocolGoroutine(notifyError)
+
+	case stack.ControlNetworkUnreachable:
+		e.lastErrorMu.Lock()
+		e.lastError = tcpip.ErrNetworkUnreachable
+		e.lastErrorMu.Unlock()
+		e.notifyProtocolGoroutine(notifyError)
 	}
 }
 
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index b34e47bbd..5d6174a59 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -49,7 +49,7 @@ const (
 
 	// DefaultReceiveBufferSize is the default size of the receive buffer
 	// for an endpoint.
-	DefaultReceiveBufferSize = 1 << 20 // 1MB
+	DefaultReceiveBufferSize = 32 << 10 // 32KB
 
 	// MaxBufferSize is the largest size a receive/send buffer can grow to.
 	MaxBufferSize = 4 << 20 // 4MB
diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index dd89a292a..5e0bfe585 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -372,7 +372,7 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) {
 			// We only store the segment if it's within our buffer
 			// size limit.
 			if r.pendingBufUsed < r.pendingBufSize {
-				r.pendingBufUsed += s.logicalLen()
+				r.pendingBufUsed += seqnum.Size(s.segMemSize())
 				s.incRef()
 				heap.Push(&r.pendingRcvdSegments, s)
 				UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt)
@@ -406,7 +406,7 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) {
 		}
 
 		heap.Pop(&r.pendingRcvdSegments)
-		r.pendingBufUsed -= s.logicalLen()
+		r.pendingBufUsed -= seqnum.Size(s.segMemSize())
 		s.decRef()
 	}
 	return false, nil
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index 0280892a8..bb60dc29d 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -138,6 +138,12 @@ func (s *segment) logicalLen() seqnum.Size {
 	return l
 }
 
+// segMemSize is the amount of memory used to hold the segment data and
+// the associated metadata.
+func (s *segment) segMemSize() int {
+	return segSize + s.data.Size()
+}
+
 // parse populates the sequence & ack numbers, flags, and window fields of the
 // segment from the TCP header stored in the data. It then updates the view to
 // skip the header.
diff --git a/pkg/tcpip/transport/tcp/segment_unsafe.go b/pkg/tcpip/transport/tcp/segment_unsafe.go
new file mode 100644
index 000000000..0ab7b8f56
--- /dev/null
+++ b/pkg/tcpip/transport/tcp/segment_unsafe.go
@@ -0,0 +1,23 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp
+
+import (
+	"unsafe"
+)
+
+const (
+	segSize = int(unsafe.Sizeof(segment{}))
+)
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 06fde2a79..37e7767d6 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -143,12 +143,14 @@ func New(t *testing.T, mtu uint32) *Context {
 		TransportProtocols: []stack.TransportProtocol{tcp.NewProtocol()},
 	})
 
+	const sendBufferSize = 1 << 20 // 1 MiB
+	const recvBufferSize = 1 << 20 // 1 MiB
 	// Allow minimum send/receive buffer sizes to be 1 during tests.
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{Min: 1, Default: tcp.DefaultSendBufferSize, Max: 10 * tcp.DefaultSendBufferSize}); err != nil {
+	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{Min: 1, Default: sendBufferSize, Max: 10 * sendBufferSize}); err != nil {
 		t.Fatalf("SetTransportProtocolOption failed: %s", err)
 	}
 
-	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{Min: 1, Default: tcp.DefaultReceiveBufferSize, Max: 10 * tcp.DefaultReceiveBufferSize}); err != nil {
+	if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{Min: 1, Default: recvBufferSize, Max: 10 * recvBufferSize}); err != nil {
 		t.Fatalf("SetTransportProtocolOption failed: %s", err)
 	}
 
@@ -202,7 +204,7 @@ func New(t *testing.T, mtu uint32) *Context {
 		t:           t,
 		s:           s,
 		linkEP:      ep,
-		WindowScale: uint8(tcp.FindWndScale(tcp.DefaultReceiveBufferSize)),
+		WindowScale: uint8(tcp.FindWndScale(recvBufferSize)),
 	}
 }
 
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index a14643ae8..6e692da07 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -1451,7 +1451,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 		packet.tos, _ = header.IPv6(pkt.NetworkHeader).TOS()
 	}
 
-	packet.timestamp = e.stack.NowNanoseconds()
+	packet.timestamp = e.stack.Clock().NowNanoseconds()
 
 	e.rcvMu.Unlock()
 
diff --git a/pkg/test/criutil/criutil.go b/pkg/test/criutil/criutil.go
index 66f10c016..70945f234 100644
--- a/pkg/test/criutil/criutil.go
+++ b/pkg/test/criutil/criutil.go
@@ -40,9 +40,9 @@ type Crictl struct {
 	cleanup  []func()
 }
 
-// resolvePath attempts to find binary paths. It may set the path to invalid,
+// ResolvePath attempts to find binary paths. It may set the path to invalid,
 // which will cause the execution to fail with a sensible error.
-func resolvePath(executable string) string {
+func ResolvePath(executable string) string {
 	runtime, err := dockerutil.RuntimePath()
 	if err == nil {
 		// Check first the directory of the runtime itself.
@@ -230,7 +230,7 @@ func (cc *Crictl) Import(image string) error {
 	// be pushing a lot of bytes in order to import the image. The connect
 	// timeout stays the same and is inherited from the Crictl instance.
 	cmd := testutil.Command(cc.logger,
-		resolvePath("ctr"),
+		ResolvePath("ctr"),
 		fmt.Sprintf("--connect-timeout=%s", 30*time.Second),
 		fmt.Sprintf("--address=%s", cc.endpoint),
 		"-n", "k8s.io", "images", "import", "-")
@@ -358,7 +358,7 @@ func (cc *Crictl) StopPodAndContainer(podID, contID string) error {
 // run runs crictl with the given args.
 func (cc *Crictl) run(args ...string) (string, error) {
 	defaultArgs := []string{
-		resolvePath("crictl"),
+		ResolvePath("crictl"),
 		"--image-endpoint", fmt.Sprintf("unix://%s", cc.endpoint),
 		"--runtime-endpoint", fmt.Sprintf("unix://%s", cc.endpoint),
 	}
diff --git a/pkg/test/dockerutil/BUILD b/pkg/test/dockerutil/BUILD
index 83b80c8bc..a5e84658a 100644
--- a/pkg/test/dockerutil/BUILD
+++ b/pkg/test/dockerutil/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
 
 package(licenses = ["notice"])
 
@@ -10,6 +10,7 @@ go_library(
         "dockerutil.go",
         "exec.go",
         "network.go",
+        "profile.go",
     ],
     visibility = ["//:sandbox"],
     deps = [
@@ -23,3 +24,19 @@ go_library(
         "@com_github_docker_go_connections//nat:go_default_library",
     ],
 )
+
+go_test(
+    name = "profile_test",
+    size = "large",
+    srcs = [
+        "profile_test.go",
+    ],
+    library = ":dockerutil",
+    tags = [
+        # Requires docker and runsc to be configured before test runs.
+        # Also requires the test to be run as root.
+        "manual",
+        "local",
+    ],
+    visibility = ["//:sandbox"],
+)
diff --git a/pkg/test/dockerutil/README.md b/pkg/test/dockerutil/README.md
new file mode 100644
index 000000000..870292096
--- /dev/null
+++ b/pkg/test/dockerutil/README.md
@@ -0,0 +1,86 @@
+# dockerutil
+
+This package is for creating and controlling docker containers for testing
+runsc, gVisor's docker/kubernetes binary. A simple test may look like:
+
+```
+ func TestSuperCool(t *testing.T) {
+   ctx := context.Background()
+   c := dockerutil.MakeContainer(ctx, t)
+   got, err := c.Run(ctx, dockerutil.RunOpts{
+     Image: "basic/alpine"
+   }, "echo", "super cool")
+   if err != nil {
+      t.Fatalf("err was not nil: %v", err)
+   }
+   want := "super cool"
+   if !strings.Contains(got, want){
+     t.Fatalf("want: %s, got: %s", want, got)
+   }
+ }
+```
+
+For further examples, see many of our end to end tests elsewhere in the repo,
+such as those in //test/e2e or benchmarks at //test/benchmarks.
+
+dockerutil uses the "official" docker golang api, which is
+[very powerful](https://godoc.org/github.com/docker/docker/client). dockerutil
+is a thin wrapper around this API, allowing desired new use cases to be easily
+implemented.
+
+## Profiling
+
+dockerutil is capable of generating profiles. Currently, the only option is to
+use pprof profiles generated by `runsc debug`. The profiler will generate Block,
+CPU, Heap, Goroutine, and Mutex profiles. To generate profiles:
+
+*   Install runsc with the `--profile` flag: `make configure RUNTIME=myrunsc
+    ARGS="--profile"` Also add other flags with ARGS like `--platform=kvm` or
+    `--vfs2`.
+*   Restart docker: `sudo service docker restart`
+
+To run and generate CPU profiles run:
+
+```
+make sudo TARGETS=//path/to:target \
+  ARGS="--runtime=myrunsc -test.v -test.bench=. --pprof-cpu" OPTIONS="-c opt"
+```
+
+Profiles would be at: `/tmp/profile/myrunsc/CONTAINERNAME/cpu.pprof`
+
+Container name in most tests and benchmarks in gVisor is usually the test name
+and some random characters like so:
+`BenchmarkABSL-CleanCache-JF2J2ZYF3U7SL47QAA727CSJI3C4ZAW2`
+
+Profiling requires root as runsc debug inspects running containers in /var/run
+among other things.
+
+### Writing for Profiling
+
+The below shows an example of using profiles with dockerutil.
+
+```
+func TestSuperCool(t *testing.T){
+  ctx := context.Background()
+  // profiled and using runtime from dockerutil.runtime flag
+  profiled := MakeContainer()
+
+  // not profiled and using runtime runc
+  native := MakeNativeContainer()
+
+  err := profiled.Spawn(ctx, RunOpts{
+    Image: "some/image",
+  }, "sleep", "100000")
+  // profiling has begun here
+  ...
+  expensive setup that I don't want to profile.
+  ...
+  profiled.RestartProfiles()
+  // profiled activity
+}
+```
+
+In the above example, `profiled` would be profiled and `native` would not. The
+call to `RestartProfiles()` restarts the clock on profiling. This is useful if
+the main activity being tested is done with `docker exec` or `container.Spawn()`
+followed by one or more `container.Exec()` calls.
diff --git a/pkg/test/dockerutil/container.go b/pkg/test/dockerutil/container.go
index 17acdaf6f..b59503188 100644
--- a/pkg/test/dockerutil/container.go
+++ b/pkg/test/dockerutil/container.go
@@ -43,15 +43,21 @@ import (
 // See: https://pkg.go.dev/github.com/docker/docker.
 type Container struct {
 	Name    string
-	Runtime string
+	runtime string
 
 	logger   testutil.Logger
 	client   *client.Client
 	id       string
 	mounts   []mount.Mount
 	links    []string
-	cleanups []func()
 	copyErr  error
+	cleanups []func()
+
+	// Profiles are profiles added to this container. They contain methods
+	// that are run after Creation, Start, and Cleanup of this Container, along
+	// a handle to restart the profile. Generally, tests/benchmarks using
+	// profiles need to run as root.
+	profiles []Profile
 
 	// Stores streams attached to the container. Used by WaitForOutputSubmatch.
 	streams types.HijackedResponse
@@ -106,7 +112,19 @@ type RunOpts struct {
 // MakeContainer sets up the struct for a Docker container.
 //
 // Names of containers will be unique.
+// Containers will check flags for profiling requests.
 func MakeContainer(ctx context.Context, logger testutil.Logger) *Container {
+	c := MakeNativeContainer(ctx, logger)
+	c.runtime = *runtime
+	if p := MakePprofFromFlags(c); p != nil {
+		c.AddProfile(p)
+	}
+	return c
+}
+
+// MakeNativeContainer sets up the struct for a DockerContainer using runc. Native
+// containers aren't profiled.
+func MakeNativeContainer(ctx context.Context, logger testutil.Logger) *Container {
 	// Slashes are not allowed in container names.
 	name := testutil.RandomID(logger.Name())
 	name = strings.ReplaceAll(name, "/", "-")
@@ -114,20 +132,33 @@ func MakeContainer(ctx context.Context, logger testutil.Logger) *Container {
 	if err != nil {
 		return nil
 	}
-
 	client.NegotiateAPIVersion(ctx)
-
 	return &Container{
 		logger:  logger,
 		Name:    name,
-		Runtime: *runtime,
+		runtime: "",
 		client:  client,
 	}
 }
 
+// AddProfile adds a profile to this container.
+func (c *Container) AddProfile(p Profile) {
+	c.profiles = append(c.profiles, p)
+}
+
+// RestartProfiles calls Restart on all profiles for this container.
+func (c *Container) RestartProfiles() error {
+	for _, profile := range c.profiles {
+		if err := profile.Restart(c); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 // Spawn is analogous to 'docker run -d'.
 func (c *Container) Spawn(ctx context.Context, r RunOpts, args ...string) error {
-	if err := c.create(ctx, r, args); err != nil {
+	if err := c.create(ctx, c.config(r, args), c.hostConfig(r), nil); err != nil {
 		return err
 	}
 	return c.Start(ctx)
@@ -153,7 +184,7 @@ func (c *Container) SpawnProcess(ctx context.Context, r RunOpts, args ...string)
 
 // Run is analogous to 'docker run'.
 func (c *Container) Run(ctx context.Context, r RunOpts, args ...string) (string, error) {
-	if err := c.create(ctx, r, args); err != nil {
+	if err := c.create(ctx, c.config(r, args), c.hostConfig(r), nil); err != nil {
 		return "", err
 	}
 
@@ -181,27 +212,25 @@ func (c *Container) MakeLink(target string) string {
 
 // CreateFrom creates a container from the given configs.
 func (c *Container) CreateFrom(ctx context.Context, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error {
-	cont, err := c.client.ContainerCreate(ctx, conf, hostconf, netconf, c.Name)
-	if err != nil {
-		return err
-	}
-	c.id = cont.ID
-	return nil
+	return c.create(ctx, conf, hostconf, netconf)
 }
 
 // Create is analogous to 'docker create'.
 func (c *Container) Create(ctx context.Context, r RunOpts, args ...string) error {
-	return c.create(ctx, r, args)
+	return c.create(ctx, c.config(r, args), c.hostConfig(r), nil)
 }
 
-func (c *Container) create(ctx context.Context, r RunOpts, args []string) error {
-	conf := c.config(r, args)
-	hostconf := c.hostConfig(r)
+func (c *Container) create(ctx context.Context, conf *container.Config, hostconf *container.HostConfig, netconf *network.NetworkingConfig) error {
 	cont, err := c.client.ContainerCreate(ctx, conf, hostconf, nil, c.Name)
 	if err != nil {
 		return err
 	}
 	c.id = cont.ID
+	for _, profile := range c.profiles {
+		if err := profile.OnCreate(c); err != nil {
+			return fmt.Errorf("OnCreate method failed with: %v", err)
+		}
+	}
 	return nil
 }
 
@@ -227,7 +256,7 @@ func (c *Container) hostConfig(r RunOpts) *container.HostConfig {
 	c.mounts = append(c.mounts, r.Mounts...)
 
 	return &container.HostConfig{
-		Runtime:         c.Runtime,
+		Runtime:         c.runtime,
 		Mounts:          c.mounts,
 		PublishAllPorts: true,
 		Links:           r.Links,
@@ -261,8 +290,15 @@ func (c *Container) Start(ctx context.Context) error {
 	c.cleanups = append(c.cleanups, func() {
 		c.streams.Close()
 	})
-
-	return c.client.ContainerStart(ctx, c.id, types.ContainerStartOptions{})
+	if err := c.client.ContainerStart(ctx, c.id, types.ContainerStartOptions{}); err != nil {
+		return fmt.Errorf("ContainerStart failed: %v", err)
+	}
+	for _, profile := range c.profiles {
+		if err := profile.OnStart(c); err != nil {
+			return fmt.Errorf("OnStart method failed: %v", err)
+		}
+	}
+	return nil
 }
 
 // Stop is analogous to 'docker stop'.
@@ -482,6 +518,12 @@ func (c *Container) Remove(ctx context.Context) error {
 
 // CleanUp kills and deletes the container (best effort).
 func (c *Container) CleanUp(ctx context.Context) {
+	// Execute profile cleanups before the container goes down.
+	for _, profile := range c.profiles {
+		profile.OnCleanUp(c)
+	}
+	// Forget profiles.
+	c.profiles = nil
 	// Kill the container.
 	if err := c.Kill(ctx); err != nil && !strings.Contains(err.Error(), "is not running") {
 		// Just log; can't do anything here.
diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go
index df09babf3..5a9dd8bd8 100644
--- a/pkg/test/dockerutil/dockerutil.go
+++ b/pkg/test/dockerutil/dockerutil.go
@@ -25,6 +25,7 @@ import (
 	"os/exec"
 	"regexp"
 	"strconv"
+	"time"
 
 	"gvisor.dev/gvisor/pkg/test/testutil"
 )
@@ -42,6 +43,26 @@ var (
 
 	// config is the default Docker daemon configuration path.
 	config = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths")
+
+	// The following flags are for the "pprof" profiler tool.
+
+	// pprofBaseDir allows the user to change the directory to which profiles are
+	// written. By default, profiles will appear under:
+	// /tmp/profile/RUNTIME/CONTAINER_NAME/*.pprof.
+	pprofBaseDir = flag.String("pprof-dir", "/tmp/profile", "base directory in: BASEDIR/RUNTIME/CONTINER_NAME/FILENAME (e.g. /tmp/profile/runtime/mycontainer/cpu.pprof)")
+
+	// duration is the max duration `runsc debug` will run and capture profiles.
+	// If the container's clean up method is called prior to duration, the
+	// profiling process will be killed.
+	duration = flag.Duration("pprof-duration", 10*time.Second, "duration to run the profile in seconds")
+
+	// The below flags enable each type of profile. Multiple profiles can be
+	// enabled for each run.
+	pprofBlock = flag.Bool("pprof-block", false, "enables block profiling with runsc debug")
+	pprofCPU   = flag.Bool("pprof-cpu", false, "enables CPU profiling with runsc debug")
+	pprofGo    = flag.Bool("pprof-go", false, "enables goroutine profiling with runsc debug")
+	pprofHeap  = flag.Bool("pprof-heap", false, "enables heap profiling with runsc debug")
+	pprofMutex = flag.Bool("pprof-mutex", false, "enables mutex profiling with runsc debug")
 )
 
 // EnsureSupportedDockerVersion checks if correct docker is installed.
diff --git a/pkg/test/dockerutil/exec.go b/pkg/test/dockerutil/exec.go
index 921d1da9e..4c739c9e9 100644
--- a/pkg/test/dockerutil/exec.go
+++ b/pkg/test/dockerutil/exec.go
@@ -87,7 +87,6 @@ func (c *Container) doExec(ctx context.Context, r ExecOpts, args []string) (Proc
 		execid:    resp.ID,
 		conn:      hijack,
 	}, nil
-
 }
 
 func (c *Container) execConfig(r ExecOpts, cmd []string) types.ExecConfig {
diff --git a/pkg/test/dockerutil/profile.go b/pkg/test/dockerutil/profile.go
new file mode 100644
index 000000000..1fab33083
--- /dev/null
+++ b/pkg/test/dockerutil/profile.go
@@ -0,0 +1,152 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package dockerutil
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"time"
+)
+
+// Profile represents profile-like operations on a container,
+// such as running perf or pprof. It is meant to be added to containers
+// such that the container type calls the Profile during its lifecycle.
+type Profile interface {
+	// OnCreate is called just after the container is created when the container
+	// has a valid ID (e.g. c.ID()).
+	OnCreate(c *Container) error
+
+	// OnStart is called just after the container is started when the container
+	// has a valid Pid (e.g. c.SandboxPid()).
+	OnStart(c *Container) error
+
+	// Restart restarts the Profile on request.
+	Restart(c *Container) error
+
+	// OnCleanUp is called during the container's cleanup method.
+	// Cleanups should just log errors if they have them.
+	OnCleanUp(c *Container) error
+}
+
+// Pprof is for running profiles with 'runsc debug'. Pprof workloads
+// should be run as root and ONLY against runsc sandboxes. The runtime
+// should have --profile set as an option in /etc/docker/daemon.json in
+// order for profiling to work with Pprof.
+type Pprof struct {
+	BasePath         string // path to put profiles
+	BlockProfile     bool
+	CPUProfile       bool
+	GoRoutineProfile bool
+	HeapProfile      bool
+	MutexProfile     bool
+	Duration         time.Duration // duration to run profiler e.g. '10s' or '1m'.
+	shouldRun        bool
+	cmd              *exec.Cmd
+	stdout           io.ReadCloser
+	stderr           io.ReadCloser
+}
+
+// MakePprofFromFlags makes a Pprof profile from flags.
+func MakePprofFromFlags(c *Container) *Pprof {
+	if !(*pprofBlock || *pprofCPU || *pprofGo || *pprofHeap || *pprofMutex) {
+		return nil
+	}
+	return &Pprof{
+		BasePath:         filepath.Join(*pprofBaseDir, c.runtime, c.Name),
+		BlockProfile:     *pprofBlock,
+		CPUProfile:       *pprofCPU,
+		GoRoutineProfile: *pprofGo,
+		HeapProfile:      *pprofHeap,
+		MutexProfile:     *pprofMutex,
+		Duration:         *duration,
+	}
+}
+
+// OnCreate implements Profile.OnCreate.
+func (p *Pprof) OnCreate(c *Container) error {
+	return os.MkdirAll(p.BasePath, 0755)
+}
+
+// OnStart implements Profile.OnStart.
+func (p *Pprof) OnStart(c *Container) error {
+	path, err := RuntimePath()
+	if err != nil {
+		return fmt.Errorf("failed to get runtime path: %v", err)
+	}
+
+	// The root directory of this container's runtime.
+	root := fmt.Sprintf("--root=/var/run/docker/runtime-%s/moby", c.runtime)
+	// Format is `runsc --root=rootdir debug --profile-*=file --duration=* containerID`.
+	args := []string{root, "debug"}
+	args = append(args, p.makeProfileArgs(c)...)
+	args = append(args, c.ID())
+
+	// Best effort wait until container is running.
+	for now := time.Now(); time.Since(now) < 5*time.Second; {
+		if status, err := c.Status(context.Background()); err != nil {
+			return fmt.Errorf("failed to get status with: %v", err)
+
+		} else if status.Running {
+			break
+		}
+		time.Sleep(500 * time.Millisecond)
+	}
+	p.cmd = exec.Command(path, args...)
+	if err := p.cmd.Start(); err != nil {
+		return fmt.Errorf("process failed: %v", err)
+	}
+	return nil
+}
+
+// Restart implements Profile.Restart.
+func (p *Pprof) Restart(c *Container) error {
+	p.OnCleanUp(c)
+	return p.OnStart(c)
+}
+
+// OnCleanUp implements Profile.OnCleanup
+func (p *Pprof) OnCleanUp(c *Container) error {
+	defer func() { p.cmd = nil }()
+	if p.cmd != nil && p.cmd.Process != nil && p.cmd.ProcessState != nil && !p.cmd.ProcessState.Exited() {
+		return p.cmd.Process.Kill()
+	}
+	return nil
+}
+
+// makeProfileArgs turns Pprof fields into runsc debug flags.
+func (p *Pprof) makeProfileArgs(c *Container) []string {
+	var ret []string
+	if p.BlockProfile {
+		ret = append(ret, fmt.Sprintf("--profile-block=%s", filepath.Join(p.BasePath, "block.pprof")))
+	}
+	if p.CPUProfile {
+		ret = append(ret, fmt.Sprintf("--profile-cpu=%s", filepath.Join(p.BasePath, "cpu.pprof")))
+	}
+	if p.GoRoutineProfile {
+		ret = append(ret, fmt.Sprintf("--profile-goroutine=%s", filepath.Join(p.BasePath, "go.pprof")))
+	}
+	if p.HeapProfile {
+		ret = append(ret, fmt.Sprintf("--profile-heap=%s", filepath.Join(p.BasePath, "heap.pprof")))
+	}
+	if p.MutexProfile {
+		ret = append(ret, fmt.Sprintf("--profile-mutex=%s", filepath.Join(p.BasePath, "mutex.pprof")))
+	}
+	ret = append(ret, fmt.Sprintf("--duration=%s", p.Duration))
+	return ret
+}
diff --git a/pkg/test/dockerutil/profile_test.go b/pkg/test/dockerutil/profile_test.go
new file mode 100644
index 000000000..b7b4d7618
--- /dev/null
+++ b/pkg/test/dockerutil/profile_test.go
@@ -0,0 +1,117 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package dockerutil
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+type testCase struct {
+	name          string
+	pprof         Pprof
+	expectedFiles []string
+}
+
+func TestPprof(t *testing.T) {
+	// Basepath and expected file names for each type of profile.
+	basePath := "/tmp/test/profile"
+	block := "block.pprof"
+	cpu := "cpu.pprof"
+	goprofle := "go.pprof"
+	heap := "heap.pprof"
+	mutex := "mutex.pprof"
+
+	testCases := []testCase{
+		{
+			name: "Cpu",
+			pprof: Pprof{
+				BasePath:   basePath,
+				CPUProfile: true,
+				Duration:   2 * time.Second,
+			},
+			expectedFiles: []string{cpu},
+		},
+		{
+			name: "All",
+			pprof: Pprof{
+				BasePath:         basePath,
+				BlockProfile:     true,
+				CPUProfile:       true,
+				GoRoutineProfile: true,
+				HeapProfile:      true,
+				MutexProfile:     true,
+				Duration:         2 * time.Second,
+			},
+			expectedFiles: []string{block, cpu, goprofle, heap, mutex},
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			ctx := context.Background()
+			c := MakeContainer(ctx, t)
+			// Set basepath to include the container name so there are no conflicts.
+			tc.pprof.BasePath = filepath.Join(tc.pprof.BasePath, c.Name)
+			c.AddProfile(&tc.pprof)
+
+			func() {
+				defer c.CleanUp(ctx)
+				// Start a container.
+				if err := c.Spawn(ctx, RunOpts{
+					Image: "basic/alpine",
+				}, "sleep", "1000"); err != nil {
+					t.Fatalf("run failed with: %v", err)
+				}
+
+				if status, err := c.Status(context.Background()); !status.Running {
+					t.Fatalf("container is not yet running: %+v err: %v", status, err)
+				}
+
+				// End early if the expected files exist and have data.
+				for start := time.Now(); time.Since(start) < tc.pprof.Duration; time.Sleep(500 * time.Millisecond) {
+					if err := checkFiles(tc); err == nil {
+						break
+					}
+				}
+			}()
+
+			// Check all expected files exist and have data.
+			if err := checkFiles(tc); err != nil {
+				t.Fatalf(err.Error())
+			}
+		})
+	}
+}
+
+func checkFiles(tc testCase) error {
+	for _, file := range tc.expectedFiles {
+		stat, err := os.Stat(filepath.Join(tc.pprof.BasePath, file))
+		if err != nil {
+			return fmt.Errorf("stat failed with: %v", err)
+		} else if stat.Size() < 1 {
+			return fmt.Errorf("file not written to: %+v", stat)
+		}
+	}
+	return nil
+}
+
+func TestMain(m *testing.M) {
+	EnsureSupportedDockerVersion()
+	os.Exit(m.Run())
+}
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 55d45aaa6..9f52438c2 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -90,6 +90,7 @@ go_library(
         "//pkg/tcpip",
         "//pkg/tcpip/link/fdbased",
         "//pkg/tcpip/link/loopback",
+        "//pkg/tcpip/link/packetsocket",
         "//pkg/tcpip/link/qdisc/fifo",
         "//pkg/tcpip/link/sniffer",
         "//pkg/tcpip/network/arp",
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 14d2f56a5..4e1fa7665 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -25,6 +25,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
 	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+	"gvisor.dev/gvisor/pkg/tcpip/link/packetsocket"
 	"gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo"
 	"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
 	"gvisor.dev/gvisor/pkg/tcpip/network/arp"
@@ -252,6 +253,9 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
 			linkEP = fifo.New(linkEP, runtime.GOMAXPROCS(0), 1000)
 		}
 
+		// Enable support for AF_PACKET sockets to receive outgoing packets.
+		linkEP = packetsocket.New(linkEP)
+
 		log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
 		if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
 			return err
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 56f4ba15d..9a1ed8e9e 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -77,6 +77,10 @@ func registerFilesystems(k *kernel.Kernel) error {
 		AllowUserMount: true,
 		AllowUserList:  true,
 	})
+	vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserMount: true,
+		AllowUserList:  true,
+	})
 
 	// Setup files in devtmpfs.
 	if err := memdev.Register(vfsObj); err != nil {
@@ -119,6 +123,7 @@ func registerFilesystems(k *kernel.Kernel) error {
 			return fmt.Errorf("creating fusedev devtmpfs files: %w", err)
 		}
 	}
+
 	return nil
 }
 
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index cd76645bd..5e8247bc8 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -643,7 +643,9 @@ func TestExec(t *testing.T) {
 			if err != nil {
 				t.Fatalf("error creating temporary directory: %v", err)
 			}
-			cmd := fmt.Sprintf("ln -s /bin/true %q/symlink && sleep 100", dir)
+			// Note that some shells may exec the final command in a sequence as
+			// an optimization. We avoid this here by adding the exit 0.
+			cmd := fmt.Sprintf("ln -s /bin/true %q/symlink && sleep 100 && exit 0", dir)
 			spec := testutil.NewSpecWithArgs("sh", "-c", cmd)
 
 			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 1036b0630..05e3637f7 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -31,5 +31,6 @@ go_test(
     deps = [
         "//pkg/log",
         "//pkg/p9",
+        "//pkg/test/testutil",
     ],
 )
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index b7521bda7..ebefeacf2 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -48,36 +48,6 @@ const (
 	openFlags = syscall.O_NOFOLLOW | syscall.O_CLOEXEC
 )
 
-type fileType int
-
-const (
-	regular fileType = iota
-	directory
-	symlink
-	socket
-	unknown
-)
-
-// String implements fmt.Stringer.
-func (f fileType) String() string {
-	switch f {
-	case regular:
-		return "regular"
-	case directory:
-		return "directory"
-	case symlink:
-		return "symlink"
-	case socket:
-		return "socket"
-	}
-	return "unknown"
-}
-
-// ControlSocketAddr generates an abstract unix socket name for the given id.
-func ControlSocketAddr(id string) string {
-	return fmt.Sprintf("\x00runsc-gofer.%s", id)
-}
-
 // Config sets configuration options for each attach point.
 type Config struct {
 	// ROMount is set to true if this is a readonly mount.
@@ -132,7 +102,7 @@ func (a *attachPoint) Attach() (p9.File, error) {
 		return nil, fmt.Errorf("attach point already attached, prefix: %s", a.prefix)
 	}
 
-	f, err := openAnyFile(a.prefix, func(mode int) (*fd.FD, error) {
+	f, readable, err := openAnyFile(a.prefix, func(mode int) (*fd.FD, error) {
 		return fd.Open(a.prefix, openFlags|mode, 0)
 	})
 	if err != nil {
@@ -144,7 +114,7 @@ func (a *attachPoint) Attach() (p9.File, error) {
 		return nil, fmt.Errorf("unable to stat %q: %v", a.prefix, err)
 	}
 
-	lf, err := newLocalFile(a, f, a.prefix, stat)
+	lf, err := newLocalFile(a, f, a.prefix, readable, stat)
 	if err != nil {
 		return nil, fmt.Errorf("unable to create localFile %q: %v", a.prefix, err)
 	}
@@ -199,8 +169,6 @@ func (a *attachPoint) makeQID(stat syscall.Stat_t) p9.QID {
 // entire file up when it's opened in write mode, and would perform badly when
 // multiple files are only being opened for read (esp. startup).
 type localFile struct {
-	p9.DefaultWalkGetAttr
-
 	// attachPoint is the attachPoint that serves this localFile.
 	attachPoint *attachPoint
 
@@ -212,12 +180,19 @@ type localFile struct {
 	// opened with.
 	file *fd.FD
 
+	// controlReadable tells whether 'file' was opened with read permissions
+	// during a walk.
+	controlReadable bool
+
 	// mode is the mode in which the file was opened. Set to invalidMode
 	// if localFile isn't opened.
 	mode p9.OpenFlags
 
-	// ft is the fileType for this file.
-	ft fileType
+	// fileType for this file. It is equivalent to:
+	// syscall.Stat_t.Mode & syscall.S_IFMT
+	fileType uint32
+
+	qid p9.QID
 
 	// readDirMu protects against concurrent Readdir calls.
 	readDirMu sync.Mutex
@@ -251,83 +226,88 @@ func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) {
 	return fd.New(d), nil
 }
 
-func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, error) {
+func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, bool, error) {
 	path := path.Join(parent.hostPath, name)
-	f, err := openAnyFile(path, func(mode int) (*fd.FD, error) {
+	f, readable, err := openAnyFile(path, func(mode int) (*fd.FD, error) {
 		return fd.OpenAt(parent.file, name, openFlags|mode, 0)
 	})
-	return f, path, err
+	return f, path, readable, err
 }
 
 // openAnyFile attempts to open the file in O_RDONLY and if it fails fallsback
 // to O_PATH. 'path' is used for logging messages only. 'fn' is what does the
 // actual file open and is customizable by the caller.
-func openAnyFile(path string, fn func(mode int) (*fd.FD, error)) (*fd.FD, error) {
+func openAnyFile(path string, fn func(mode int) (*fd.FD, error)) (*fd.FD, bool, error) {
 	// Attempt to open file in the following mode in order:
 	//   1. RDONLY | NONBLOCK: for all files, directories, ro mounts, FIFOs.
 	//      Use non-blocking to prevent getting stuck inside open(2) for
 	//      FIFOs. This option has no effect on regular files.
 	//   2. PATH: for symlinks, sockets.
-	modes := []int{syscall.O_RDONLY | syscall.O_NONBLOCK, unix.O_PATH}
+	options := []struct {
+		mode     int
+		readable bool
+	}{
+		{
+			mode:     syscall.O_RDONLY | syscall.O_NONBLOCK,
+			readable: true,
+		},
+		{
+			mode:     unix.O_PATH,
+			readable: false,
+		},
+	}
 
 	var err error
-	var file *fd.FD
-	for i, mode := range modes {
-		file, err = fn(mode)
+	for i, option := range options {
+		var file *fd.FD
+		file, err = fn(option.mode)
 		if err == nil {
-			// openat succeeded, we're done.
-			break
+			// Succeeded opening the file, we're done.
+			return file, option.readable, nil
 		}
 		switch e := extractErrno(err); e {
 		case syscall.ENOENT:
 			// File doesn't exist, no point in retrying.
-			return nil, e
+			return nil, false, e
 		}
-		// openat failed. Try again with next mode, preserving 'err' in case this
-		// was the last attempt.
-		log.Debugf("Attempt %d to open file failed, mode: %#x, path: %q, err: %v", i, openFlags|mode, path, err)
-	}
-	if err != nil {
-		// All attempts to open file have failed, return the last error.
-		log.Debugf("Failed to open file, path: %q, err: %v", path, err)
-		return nil, extractErrno(err)
+		// File failed to open. Try again with next mode, preserving 'err' in case
+		// this was the last attempt.
+		log.Debugf("Attempt %d to open file failed, mode: %#x, path: %q, err: %v", i, openFlags|option.mode, path, err)
 	}
-
-	return file, nil
+	// All attempts to open file have failed, return the last error.
+	log.Debugf("Failed to open file, path: %q, err: %v", path, err)
+	return nil, false, extractErrno(err)
 }
 
-func getSupportedFileType(stat syscall.Stat_t, permitSocket bool) (fileType, error) {
-	var ft fileType
+func checkSupportedFileType(stat syscall.Stat_t, permitSocket bool) error {
 	switch stat.Mode & syscall.S_IFMT {
-	case syscall.S_IFREG:
-		ft = regular
-	case syscall.S_IFDIR:
-		ft = directory
-	case syscall.S_IFLNK:
-		ft = symlink
+	case syscall.S_IFREG, syscall.S_IFDIR, syscall.S_IFLNK:
+		return nil
+
 	case syscall.S_IFSOCK:
 		if !permitSocket {
-			return unknown, syscall.EPERM
+			return syscall.EPERM
 		}
-		ft = socket
+		return nil
+
 	default:
-		return unknown, syscall.EPERM
+		return syscall.EPERM
 	}
-	return ft, nil
 }
 
-func newLocalFile(a *attachPoint, file *fd.FD, path string, stat syscall.Stat_t) (*localFile, error) {
-	ft, err := getSupportedFileType(stat, a.conf.HostUDS)
-	if err != nil {
+func newLocalFile(a *attachPoint, file *fd.FD, path string, readable bool, stat syscall.Stat_t) (*localFile, error) {
+	if err := checkSupportedFileType(stat, a.conf.HostUDS); err != nil {
 		return nil, err
 	}
 
 	return &localFile{
-		attachPoint: a,
-		hostPath:    path,
-		file:        file,
-		mode:        invalidMode,
-		ft:          ft,
+		attachPoint:     a,
+		hostPath:        path,
+		file:            file,
+		mode:            invalidMode,
+		fileType:        stat.Mode & syscall.S_IFMT,
+		qid:             a.makeQID(stat),
+		controlReadable: readable,
 	}, nil
 }
 
@@ -346,7 +326,7 @@ func newFDMaybe(file *fd.FD) *fd.FD {
 
 	// fd is blocking; non-blocking is required.
 	if err := syscall.SetNonblock(dup.FD(), true); err != nil {
-		dup.Close()
+		_ = dup.Close()
 		return nil
 	}
 	return dup
@@ -380,7 +360,7 @@ func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 
 	// Check if control file can be used or if a new open must be created.
 	var newFile *fd.FD
-	if flags == p9.ReadOnly {
+	if flags == p9.ReadOnly && l.controlReadable {
 		log.Debugf("Open reusing control file, flags: %v, %q", flags, l.hostPath)
 		newFile = l.file
 	} else {
@@ -396,16 +376,8 @@ func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 		}
 	}
 
-	stat, err := fstat(newFile.FD())
-	if err != nil {
-		if newFile != l.file {
-			newFile.Close()
-		}
-		return nil, p9.QID{}, 0, extractErrno(err)
-	}
-
 	var fd *fd.FD
-	if stat.Mode&syscall.S_IFMT == syscall.S_IFREG {
+	if l.fileType == syscall.S_IFREG {
 		// Donate FD for regular files only.
 		fd = newFDMaybe(newFile)
 	}
@@ -418,7 +390,7 @@ func (l *localFile) Open(flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
 		l.file = newFile
 	}
 	l.mode = flags & p9.OpenFlagsModeMask
-	return fd, l.attachPoint.makeQID(stat), 0, nil
+	return fd, l.qid, 0, nil
 }
 
 // Create implements p9.File.
@@ -446,7 +418,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid
 		return nil, nil, p9.QID{}, 0, extractErrno(err)
 	}
 	cu := cleanup.Make(func() {
-		child.Close()
+		_ = child.Close()
 		// Best effort attempt to remove the file in case of failure.
 		if err := syscall.Unlinkat(l.file.FD(), name); err != nil {
 			log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, name), err)
@@ -467,10 +439,12 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid
 		hostPath:    path.Join(l.hostPath, name),
 		file:        child,
 		mode:        mode,
+		fileType:    syscall.S_IFREG,
+		qid:         l.attachPoint.makeQID(stat),
 	}
 
 	cu.Release()
-	return newFDMaybe(c.file), c, l.attachPoint.makeQID(stat), 0, nil
+	return newFDMaybe(c.file), c, c.qid, 0, nil
 }
 
 // Mkdir implements p9.File.
@@ -516,55 +490,74 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID)
 
 // Walk implements p9.File.
 func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) {
+	qids, file, _, err := l.walk(names)
+	return qids, file, err
+}
+
+// WalkGetAttr implements p9.File.
+func (l *localFile) WalkGetAttr(names []string) ([]p9.QID, p9.File, p9.AttrMask, p9.Attr, error) {
+	qids, file, stat, err := l.walk(names)
+	if err != nil {
+		return nil, nil, p9.AttrMask{}, p9.Attr{}, err
+	}
+	mask, attr := l.fillAttr(stat)
+	return qids, file, mask, attr, nil
+}
+
+func (l *localFile) walk(names []string) ([]p9.QID, p9.File, syscall.Stat_t, error) {
 	// Duplicate current file if 'names' is empty.
 	if len(names) == 0 {
-		newFile, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) {
+		newFile, readable, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) {
 			return reopenProcFd(l.file, openFlags|mode)
 		})
 		if err != nil {
-			return nil, nil, extractErrno(err)
+			return nil, nil, syscall.Stat_t{}, extractErrno(err)
 		}
 
 		stat, err := fstat(newFile.FD())
 		if err != nil {
-			newFile.Close()
-			return nil, nil, extractErrno(err)
+			_ = newFile.Close()
+			return nil, nil, syscall.Stat_t{}, extractErrno(err)
 		}
 
 		c := &localFile{
-			attachPoint: l.attachPoint,
-			hostPath:    l.hostPath,
-			file:        newFile,
-			mode:        invalidMode,
+			attachPoint:     l.attachPoint,
+			hostPath:        l.hostPath,
+			file:            newFile,
+			mode:            invalidMode,
+			fileType:        l.fileType,
+			qid:             l.attachPoint.makeQID(stat),
+			controlReadable: readable,
 		}
-		return []p9.QID{l.attachPoint.makeQID(stat)}, c, nil
+		return []p9.QID{c.qid}, c, stat, nil
 	}
 
 	var qids []p9.QID
+	var lastStat syscall.Stat_t
 	last := l
 	for _, name := range names {
-		f, path, err := openAnyFileFromParent(last, name)
+		f, path, readable, err := openAnyFileFromParent(last, name)
 		if last != l {
-			last.Close()
+			_ = last.Close()
 		}
 		if err != nil {
-			return nil, nil, extractErrno(err)
+			return nil, nil, syscall.Stat_t{}, extractErrno(err)
 		}
-		stat, err := fstat(f.FD())
+		lastStat, err = fstat(f.FD())
 		if err != nil {
-			f.Close()
-			return nil, nil, extractErrno(err)
+			_ = f.Close()
+			return nil, nil, syscall.Stat_t{}, extractErrno(err)
 		}
-		c, err := newLocalFile(last.attachPoint, f, path, stat)
+		c, err := newLocalFile(last.attachPoint, f, path, readable, lastStat)
 		if err != nil {
-			f.Close()
-			return nil, nil, extractErrno(err)
+			_ = f.Close()
+			return nil, nil, syscall.Stat_t{}, extractErrno(err)
 		}
 
-		qids = append(qids, l.attachPoint.makeQID(stat))
+		qids = append(qids, c.qid)
 		last = c
 	}
-	return qids, last, nil
+	return qids, last, lastStat, nil
 }
 
 // StatFS implements p9.File.
@@ -604,7 +597,11 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error)
 	if err != nil {
 		return p9.QID{}, p9.AttrMask{}, p9.Attr{}, extractErrno(err)
 	}
+	mask, attr := l.fillAttr(stat)
+	return l.qid, mask, attr, nil
+}
 
+func (l *localFile) fillAttr(stat syscall.Stat_t) (p9.AttrMask, p9.Attr) {
 	attr := p9.Attr{
 		Mode:             p9.FileMode(stat.Mode),
 		UID:              p9.UID(stat.Uid),
@@ -633,8 +630,7 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error)
 		MTime:  true,
 		CTime:  true,
 	}
-
-	return l.attachPoint.makeQID(stat), valid, attr, nil
+	return valid, attr
 }
 
 // SetAttr implements p9.File. Due to mismatch in file API, options
@@ -675,7 +671,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
 	// Check if it's possible to use cached file, or if another one needs to be
 	// opened for write.
 	f := l.file
-	if l.ft == regular && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
+	if l.fileType == syscall.S_IFREG && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite {
 		var err error
 		f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY)
 		if err != nil {
@@ -731,7 +727,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
 			}
 		}
 
-		if l.ft == symlink {
+		if l.fileType == syscall.S_IFLNK {
 			// utimensat operates different that other syscalls. To operate on a
 			// symlink it *requires* AT_SYMLINK_NOFOLLOW with dirFD and a non-empty
 			// name.
@@ -915,7 +911,7 @@ func (l *localFile) Link(target p9.File, newName string) error {
 }
 
 // Mknod implements p9.File.
-func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, uid p9.UID, gid p9.GID) (p9.QID, error) {
+func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, _ p9.UID, _ p9.GID) (p9.QID, error) {
 	conf := l.attachPoint.conf
 	if conf.ROMount {
 		if conf.PanicOnWrite {
@@ -1113,13 +1109,13 @@ func (l *localFile) Connect(flags p9.ConnectFlags) (*fd.FD, error) {
 	}
 
 	if err := syscall.SetNonblock(f, true); err != nil {
-		syscall.Close(f)
+		_ = syscall.Close(f)
 		return nil, err
 	}
 
 	sa := syscall.SockaddrUnix{Name: l.hostPath}
 	if err := syscall.Connect(f, &sa); err != nil {
-		syscall.Close(f)
+		_ = syscall.Close(f)
 		return nil, err
 	}
 
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index 05af7e397..94f167417 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -26,6 +26,19 @@ import (
 
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/p9"
+	"gvisor.dev/gvisor/pkg/test/testutil"
+)
+
+var allOpenFlags = []p9.OpenFlags{p9.ReadOnly, p9.WriteOnly, p9.ReadWrite}
+
+var (
+	allTypes = []uint32{syscall.S_IFREG, syscall.S_IFDIR, syscall.S_IFLNK}
+
+	// allConfs is set in init().
+	allConfs []Config
+
+	rwConfs = []Config{{ROMount: false}}
+	roConfs = []Config{{ROMount: true}}
 )
 
 func init() {
@@ -39,6 +52,13 @@ func init() {
 	}
 }
 
+func configTestName(config *Config) string {
+	if config.ROMount {
+		return "ROMount"
+	}
+	return "RWMount"
+}
+
 func assertPanic(t *testing.T, f func()) {
 	defer func() {
 		if r := recover(); r == nil {
@@ -88,71 +108,76 @@ func testReadWrite(f p9.File, flags p9.OpenFlags, content []byte) error {
 	return nil
 }
 
-var allOpenFlags = []p9.OpenFlags{p9.ReadOnly, p9.WriteOnly, p9.ReadWrite}
-
-var (
-	allTypes = []fileType{regular, directory, symlink}
-
-	// allConfs is set in init() above.
-	allConfs []Config
-
-	rwConfs = []Config{{ROMount: false}}
-	roConfs = []Config{{ROMount: true}}
-)
-
 type state struct {
-	root *localFile
-	file *localFile
-	conf Config
-	ft   fileType
+	root     *localFile
+	file     *localFile
+	conf     Config
+	fileType uint32
 }
 
 func (s state) String() string {
-	return fmt.Sprintf("type(%v)", s.ft)
+	return fmt.Sprintf("type(%v)", s.fileType)
+}
+
+func typeName(fileType uint32) string {
+	switch fileType {
+	case syscall.S_IFREG:
+		return "file"
+	case syscall.S_IFDIR:
+		return "directory"
+	case syscall.S_IFLNK:
+		return "symlink"
+	default:
+		panic(fmt.Sprintf("invalid file type for test: %d", fileType))
+	}
 }
 
 func runAll(t *testing.T, test func(*testing.T, state)) {
 	runCustom(t, allTypes, allConfs, test)
 }
 
-func runCustom(t *testing.T, types []fileType, confs []Config, test func(*testing.T, state)) {
+func runCustom(t *testing.T, types []uint32, confs []Config, test func(*testing.T, state)) {
 	for _, c := range confs {
-		t.Logf("Config: %+v", c)
-
 		for _, ft := range types {
-			t.Logf("File type: %v", ft)
+			name := fmt.Sprintf("%s/%s", configTestName(&c), typeName(ft))
+			t.Run(name, func(t *testing.T) {
+				path, name, err := setup(ft)
+				if err != nil {
+					t.Fatalf("%v", err)
+				}
+				defer os.RemoveAll(path)
 
-			path, name, err := setup(ft)
-			if err != nil {
-				t.Fatalf("%v", err)
-			}
-			defer os.RemoveAll(path)
+				a, err := NewAttachPoint(path, c)
+				if err != nil {
+					t.Fatalf("NewAttachPoint failed: %v", err)
+				}
+				root, err := a.Attach()
+				if err != nil {
+					t.Fatalf("Attach failed, err: %v", err)
+				}
 
-			a, err := NewAttachPoint(path, c)
-			if err != nil {
-				t.Fatalf("NewAttachPoint failed: %v", err)
-			}
-			root, err := a.Attach()
-			if err != nil {
-				t.Fatalf("Attach failed, err: %v", err)
-			}
+				_, file, err := root.Walk([]string{name})
+				if err != nil {
+					root.Close()
+					t.Fatalf("root.Walk({%q}) failed, err: %v", "symlink", err)
+				}
 
-			_, file, err := root.Walk([]string{name})
-			if err != nil {
+				st := state{
+					root:     root.(*localFile),
+					file:     file.(*localFile),
+					conf:     c,
+					fileType: ft,
+				}
+				test(t, st)
+				file.Close()
 				root.Close()
-				t.Fatalf("root.Walk({%q}) failed, err: %v", "symlink", err)
-			}
-
-			st := state{root: root.(*localFile), file: file.(*localFile), conf: c, ft: ft}
-			test(t, st)
-			file.Close()
-			root.Close()
+			})
 		}
 	}
 }
 
-func setup(ft fileType) (string, string, error) {
-	path, err := ioutil.TempDir("", "root-")
+func setup(fileType uint32) (string, string, error) {
+	path, err := ioutil.TempDir(testutil.TmpDir(), "root-")
 	if err != nil {
 		return "", "", fmt.Errorf("ioutil.TempDir() failed, err: %v", err)
 	}
@@ -169,26 +194,26 @@ func setup(ft fileType) (string, string, error) {
 	defer root.Close()
 
 	var name string
-	switch ft {
-	case regular:
+	switch fileType {
+	case syscall.S_IFREG:
 		name = "file"
 		_, f, _, _, err := root.Create(name, p9.ReadWrite, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid()))
 		if err != nil {
 			return "", "", fmt.Errorf("createFile(root, %q) failed, err: %v", "test", err)
 		}
 		defer f.Close()
-	case directory:
+	case syscall.S_IFDIR:
 		name = "dir"
 		if _, err := root.Mkdir(name, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil {
 			return "", "", fmt.Errorf("root.MkDir(%q) failed, err: %v", name, err)
 		}
-	case symlink:
+	case syscall.S_IFLNK:
 		name = "symlink"
 		if _, err := root.Symlink("/some/target", name, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil {
 			return "", "", fmt.Errorf("root.Symlink(%q) failed, err: %v", name, err)
 		}
 	default:
-		panic(fmt.Sprintf("unknown file type %v", ft))
+		panic(fmt.Sprintf("unknown file type %v", fileType))
 	}
 	return path, name, nil
 }
@@ -202,7 +227,7 @@ func createFile(dir *localFile, name string) (*localFile, error) {
 }
 
 func TestReadWrite(t *testing.T) {
-	runCustom(t, []fileType{directory}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
 		child, err := createFile(s.file, "test")
 		if err != nil {
 			t.Fatalf("%v: createFile() failed, err: %v", s, err)
@@ -232,7 +257,7 @@ func TestReadWrite(t *testing.T) {
 }
 
 func TestCreate(t *testing.T) {
-	runCustom(t, []fileType{directory}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
 		for i, flags := range allOpenFlags {
 			_, l, _, _, err := s.file.Create(fmt.Sprintf("test-%d", i), flags, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid()))
 			if err != nil {
@@ -249,7 +274,7 @@ func TestCreate(t *testing.T) {
 // TestReadWriteDup tests that a file opened in any mode can be dup'ed and
 // reopened in any other mode.
 func TestReadWriteDup(t *testing.T) {
-	runCustom(t, []fileType{directory}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
 		child, err := createFile(s.file, "test")
 		if err != nil {
 			t.Fatalf("%v: createFile() failed, err: %v", s, err)
@@ -291,7 +316,7 @@ func TestReadWriteDup(t *testing.T) {
 }
 
 func TestUnopened(t *testing.T) {
-	runCustom(t, []fileType{regular}, allConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{syscall.S_IFREG}, allConfs, func(t *testing.T, s state) {
 		b := []byte("foobar")
 		if _, err := s.file.WriteAt(b, 0); err != syscall.EBADF {
 			t.Errorf("%v: WriteAt() should have failed, got: %v, expected: syscall.EBADF", s, err)
@@ -308,6 +333,32 @@ func TestUnopened(t *testing.T) {
 	})
 }
 
+// TestOpenOPath is a regression test to ensure that a file that cannot be open
+// for read is allowed to be open. This was happening because the control file
+// was open with O_PATH, but Open() was not checking for it and allowing the
+// control file to be reused.
+func TestOpenOPath(t *testing.T) {
+	runCustom(t, []uint32{syscall.S_IFREG}, rwConfs, func(t *testing.T, s state) {
+		// Fist remove all permissions on the file.
+		if err := s.file.SetAttr(p9.SetAttrMask{Permissions: true}, p9.SetAttr{Permissions: p9.FileMode(0)}); err != nil {
+			t.Fatalf("SetAttr(): %v", err)
+		}
+		// Then walk to the file again to open a new control file.
+		filename := filepath.Base(s.file.hostPath)
+		_, newFile, err := s.root.Walk([]string{filename})
+		if err != nil {
+			t.Fatalf("root.Walk(%q): %v", filename, err)
+		}
+
+		if newFile.(*localFile).controlReadable {
+			t.Fatalf("control file didn't open with O_PATH: %+v", newFile)
+		}
+		if _, _, _, err := newFile.Open(p9.ReadOnly); err != syscall.EACCES {
+			t.Fatalf("Open() should have failed, got: %v, wanted: EACCES", err)
+		}
+	})
+}
+
 func SetGetAttr(l *localFile, valid p9.SetAttrMask, attr p9.SetAttr) (p9.Attr, error) {
 	if err := l.SetAttr(valid, attr); err != nil {
 		return p9.Attr{}, err
@@ -324,7 +375,7 @@ func TestSetAttrPerm(t *testing.T) {
 		valid := p9.SetAttrMask{Permissions: true}
 		attr := p9.SetAttr{Permissions: 0777}
 		got, err := SetGetAttr(s.file, valid, attr)
-		if s.ft == symlink {
+		if s.fileType == syscall.S_IFLNK {
 			if err == nil {
 				t.Fatalf("%v: SetGetAttr(valid, %v) should have failed", s, attr.Permissions)
 			}
@@ -345,7 +396,7 @@ func TestSetAttrSize(t *testing.T) {
 			valid := p9.SetAttrMask{Size: true}
 			attr := p9.SetAttr{Size: size}
 			got, err := SetGetAttr(s.file, valid, attr)
-			if s.ft == symlink || s.ft == directory {
+			if s.fileType == syscall.S_IFLNK || s.fileType == syscall.S_IFDIR {
 				if err == nil {
 					t.Fatalf("%v: SetGetAttr(valid, %v) should have failed", s, attr.Permissions)
 				}
@@ -427,7 +478,7 @@ func TestLink(t *testing.T) {
 		}
 
 		err = dir.Link(s.file, linkFile)
-		if s.ft == directory {
+		if s.fileType == syscall.S_IFDIR {
 			if err != syscall.EPERM {
 				t.Errorf("%v: Link(target, %s) should have failed, got: %v, expected: syscall.EPERM", s, linkFile, err)
 			}
@@ -485,7 +536,7 @@ func TestROMountPanics(t *testing.T) {
 }
 
 func TestWalkNotFound(t *testing.T) {
-	runCustom(t, []fileType{directory}, allConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{syscall.S_IFDIR}, allConfs, func(t *testing.T, s state) {
 		if _, _, err := s.file.Walk([]string{"nobody-here"}); err != syscall.ENOENT {
 			t.Errorf("%v: Walk(%q) should have failed, got: %v, expected: syscall.ENOENT", s, "nobody-here", err)
 		}
@@ -506,7 +557,7 @@ func TestWalkDup(t *testing.T) {
 }
 
 func TestReaddir(t *testing.T) {
-	runCustom(t, []fileType{directory}, rwConfs, func(t *testing.T, s state) {
+	runCustom(t, []uint32{syscall.S_IFDIR}, rwConfs, func(t *testing.T, s state) {
 		name := "dir"
 		if _, err := s.file.Mkdir(name, 0777, p9.UID(os.Getuid()), p9.GID(os.Getgid())); err != nil {
 			t.Fatalf("%v: MkDir(%s) failed, err: %v", s, name, err)
diff --git a/scripts/benchmark.sh b/scripts/benchmark.sh
deleted file mode 100755
index c49f988b8..000000000
--- a/scripts/benchmark.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-
-# Copyright 2020 The gVisor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-source $(dirname $0)/common.sh
-
-make load-all-images
-
-if [[ -z "${1:-}" ]]; then
-  target=$(query "attr(tags, manual, tests(//test/benchmarks/...))")
-else
-  target="$1"
-fi
-
-install_runsc_for_benchmarks benchmark
-
-echo $target
-benchmark_runsc $target "${@:2}"
diff --git a/scripts/common.sh b/scripts/common.sh
index 36158654f..3ca699e4a 100755
--- a/scripts/common.sh
+++ b/scripts/common.sh
@@ -42,15 +42,6 @@ function test_runsc() {
   test --test_arg=--runtime=${RUNTIME} "$@"
 }
 
-function benchmark_runsc() {
-  test_runsc -c opt \
-    --nocache_test_results \
-    --test_arg=-test.bench=. \
-    --test_arg=-test.benchmem \
-    --jobs=1 \
-    "$@"
-}
-
 function install_runsc_for_test() {
   local -r test_name=$1
   shift
@@ -72,24 +63,6 @@ function install_runsc_for_test() {
       "$@"
 }
 
-function install_runsc_for_benchmarks() {
-  local -r test_name=$1
-  shift
-  if [[ -z "${test_name}" ]]; then
-    echo "Missing mandatory test name"
-    exit 1
-  fi
-
-  # Add test to the name, so it doesn't conflict with other runtimes.
-  set_runtime $(find_branch_name)_"${test_name}"
-
-  # ${RUNSC_TEST_NAME} is set by tests (see dockerutil) to pass the test name
-  # down to the runtime.
-  install_runsc "${RUNTIME}" \
-      --TESTONLY-test-name-env=RUNSC_TEST_NAME \
-      "$@"
-}
-
 # Installs the runsc with given runtime name. set_runtime must have been called
 # to set runtime and logs location.
 function install_runsc() {
diff --git a/scripts/docker_tests.sh b/scripts/docker_tests.sh
index dce0a4085..07e9f3109 100755
--- a/scripts/docker_tests.sh
+++ b/scripts/docker_tests.sh
@@ -22,4 +22,6 @@ install_runsc_for_test docker
 test_runsc //test/image:image_test //test/e2e:integration_test
 
 install_runsc_for_test docker --vfs2
-test_runsc //test/image:image_test --test_filter=.*TestHelloWorld
+IMAGE_FILTER="Hello|Httpd|Ruby|Stdio"
+INTEGRATION_FILTER="LifeCycle|Pause|Connect|JobControl|Overlay|Exec|DirCreation/root"
+test_runsc //test/e2e:integration_test //test/image:image_test --test_filter="${IMAGE_FILTER}|${INTEGRATION_FILTER}"
diff --git a/test/benchmarks/README.md b/test/benchmarks/README.md
index 9ff602cf1..d1bbabf6f 100644
--- a/test/benchmarks/README.md
+++ b/test/benchmarks/README.md
@@ -13,33 +13,51 @@ To run benchmarks you will need:
 
 *   Docker installed (17.09.0 or greater).
 
-The easiest way to run benchmarks is to use the script at
-//scripts/benchmark.sh.
+The easiest way to setup runsc for running benchmarks is to use the make file.
+From the root directory:
 
-If not using the script, you will need:
+*   Download images: `make load-all-images`
+*   Install runsc suitable for benchmarking, which should probably not have
+    strace or debug logs enabled. For example:`make configure RUNTIME=myrunsc
+    ARGS=--platform=kvm`.
+*   Restart docker: `sudo service docker restart`
 
-*   `runsc` configured with docker
+You should now have a runtime with the following options configured in
+`/etc/docker/daemon.json`
 
-Note: benchmarks call the runtime by name. If docker can run it with
-`--runtime=` flag, these tools should work.
+```
+"myrunsc": {
+            "path": "/tmp/myrunsc/runsc",
+            "runtimeArgs": [
+                "--debug-log",
+                "/tmp/bench/logs/runsc.log.%TEST%.%TIMESTAMP%.%COMMAND%",
+                "--platform=kvm"
+            ]
+        },
+
+```
+
+This runtime has been configured with a debugging off and strace logs off and is
+using kvm for demonstration.
 
 ## Running benchmarks
 
-The easiest way to run is with the script at //scripts/benchmarks.sh. The script
-will run all benchmarks under //test/benchmarks if a target is not provided.
+Given the runtime above runtime `myrunsc`, run benchmarks with the following:
 
-```bash
-./script/benchmarks.sh //path/to/target
+```
+make sudo TARGETS=//path/to:target ARGS="--runtime=myrunsc -test.v \
+  -test.bench=." OPTIONS="-c opt
 ```
 
-If you want to run benchmarks manually:
-
-*   Run `make load-all-images` from `//`
-*   Run with:
+For example, to run only the Iperf tests:
 
-```bash
-bazel test --test_arg=--runtime=RUNTIME -c opt --test_output=streamed --test_timeout=600 --test_arg=-test.bench=. --nocache_test_results //path/to/target
 ```
+make sudo TARGETS=//test/benchmarks/network:network_test \
+  ARGS="--runtime=myrunsc -test.v -test.bench=Iperf" OPTIONS="-c opt"
+```
+
+Benchmarks are run with root as some benchmarks require root privileges to do
+things like drop caches.
 
 ## Writing benchmarks
 
@@ -69,6 +87,7 @@ var h harness.Harness
 func BenchmarkMyCoolOne(b *testing.B) {
   machine, err := h.GetMachine()
   // check err
+  defer machine.CleanUp()
 
   ctx := context.Background()
   container := machine.GetContainer(ctx, b)
@@ -82,7 +101,7 @@ func BenchmarkMyCoolOne(b *testing.B) {
       Image: "benchmarks/my-cool-image",
       Env: []string{"MY_VAR=awesome"},
       other options...see dockerutil
-    }, "sh", "-c", "echo MY_VAR" ...)
+    }, "sh", "-c", "echo MY_VAR")
     //check err
     b.StopTimer()
 
@@ -107,12 +126,32 @@ Some notes on the above:
     flags, remote virtual machines (eventually), and other services.
 *   Respect `b.N` in that users of the benchmark may want to "run for an hour"
     or something of the sort.
-*   Use the `b.ReportMetric` method to report custom metrics.
+*   Use the `b.ReportMetric()` method to report custom metrics.
 *   Set the timer if time is useful for reporting. There isn't a way to turn off
     default metrics in testing.B (B/op, allocs/op, ns/op).
 *   Take a look at dockerutil at //pkg/test/dockerutil to see all methods
     available from containers. The API is based on the "official"
     [docker API for golang](https://pkg.go.dev/mod/github.com/docker/docker).
-*   `harness.GetMachine` marks how many machines this tests needs. If you have a
-    client and server and to mark them as multiple machines, call it
-    `GetMachine` twice.
+*   `harness.GetMachine()` marks how many machines this tests needs. If you have
+    a client and server and to mark them as multiple machines, call
+    `harness.GetMachine()` twice.
+
+## Profiling
+
+For profiling, the runtime is required to have the `--profile` flag enabled.
+This flag loosens seccomp filters so that the runtime can write profile data to
+disk. This configuration is not recommended for production.
+
+*   Install runsc with the `--profile` flag: `make configure RUNTIME=myrunsc
+    ARGS="--profile --platform=kvm --vfs2"`. The kvm and vfs2 flags are not
+    required, but are included for demonstration.
+*   Restart docker: `sudo service docker restart`
+
+To run and generate CPU profiles fs_test test run:
+
+```
+make sudo TARGETS=//test/benchmarks/fs:fs_test \
+  ARGS="--runtime=myrunsc -test.v -test.bench=. --pprof-cpu" OPTIONS="-c opt"
+```
+
+Profiles would be at: `/tmp/profile/myrunsc/CONTAINERNAME/cpu.pprof`
diff --git a/test/benchmarks/database/BUILD b/test/benchmarks/database/BUILD
new file mode 100644
index 000000000..5e33465cd
--- /dev/null
+++ b/test/benchmarks/database/BUILD
@@ -0,0 +1,28 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "database",
+    testonly = 1,
+    srcs = ["database.go"],
+    deps = ["//test/benchmarks/harness"],
+)
+
+go_test(
+    name = "database_test",
+    size = "enormous",
+    srcs = [
+        "redis_test.go",
+    ],
+    library = ":database",
+    tags = [
+        # Requires docker and runsc to be configured before test runs.
+        "manual",
+        "local",
+    ],
+    deps = [
+        "//pkg/test/dockerutil",
+        "//test/benchmarks/harness",
+    ],
+)
diff --git a/test/benchmarks/database/database.go b/test/benchmarks/database/database.go
new file mode 100644
index 000000000..9eeb59f9a
--- /dev/null
+++ b/test/benchmarks/database/database.go
@@ -0,0 +1,31 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package database holds benchmarks around database applications.
+package database
+
+import (
+	"os"
+	"testing"
+
+	"gvisor.dev/gvisor/test/benchmarks/harness"
+)
+
+var h harness.Harness
+
+// TestMain is the main method for package database.
+func TestMain(m *testing.M) {
+	h.Init()
+	os.Exit(m.Run())
+}
diff --git a/test/benchmarks/database/redis_test.go b/test/benchmarks/database/redis_test.go
new file mode 100644
index 000000000..6d39f4d66
--- /dev/null
+++ b/test/benchmarks/database/redis_test.go
@@ -0,0 +1,197 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package database
+
+import (
+	"context"
+	"fmt"
+	"regexp"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/test/dockerutil"
+	"gvisor.dev/gvisor/test/benchmarks/harness"
+)
+
+// All possible operations from redis. Note: "ping" will
+// run both PING_INLINE and PING_BUILD.
+var operations []string = []string{
+	"PING_INLINE",
+	"PING_BULK",
+	"SET",
+	"GET",
+	"INCR",
+	"LPUSH",
+	"RPUSH",
+	"LPOP",
+	"RPOP",
+	"SADD",
+	"HSET",
+	"SPOP",
+	"LRANGE_100",
+	"LRANGE_300",
+	"LRANGE_500",
+	"LRANGE_600",
+	"MSET",
+}
+
+// BenchmarkRedis runs redis-benchmark against a redis instance and reports
+// data in queries per second. Each is reported by named operation (e.g. LPUSH).
+func BenchmarkRedis(b *testing.B) {
+	clientMachine, err := h.GetMachine()
+	if err != nil {
+		b.Fatalf("failed to get machine: %v", err)
+	}
+	defer clientMachine.CleanUp()
+
+	serverMachine, err := h.GetMachine()
+	if err != nil {
+		b.Fatalf("failed to get machine: %v", err)
+	}
+	defer serverMachine.CleanUp()
+
+	// Redis runs on port 6379 by default.
+	port := 6379
+	ctx := context.Background()
+
+	for _, operation := range operations {
+		b.Run(operation, func(b *testing.B) {
+			server := serverMachine.GetContainer(ctx, b)
+			defer server.CleanUp(ctx)
+
+			// The redis docker container takes no arguments to run a redis server.
+			if err := server.Spawn(ctx, dockerutil.RunOpts{
+				Image: "benchmarks/redis",
+				Ports: []int{port},
+			}); err != nil {
+				b.Fatalf("failed to start redis server with: %v", err)
+			}
+
+			if out, err := server.WaitForOutput(ctx, "Ready to accept connections", 3*time.Second); err != nil {
+				b.Fatalf("failed to start redis server: %v %s", err, out)
+			}
+
+			ip, err := serverMachine.IPAddress()
+			if err != nil {
+				b.Fatal("failed to get IP from server: %v", err)
+			}
+
+			serverPort, err := server.FindPort(ctx, port)
+			if err != nil {
+				b.Fatal("failed to get IP from server: %v", err)
+			}
+
+			if err = harness.WaitUntilServing(ctx, clientMachine, ip, serverPort); err != nil {
+				b.Fatalf("failed to start redis with: %v", err)
+			}
+
+			// runs redis benchmark -t operation for 100K requests against server.
+			cmd := strings.Split(
+				fmt.Sprintf("redis-benchmark --csv -t %s -h %s -p %d", operation, ip, serverPort), " ")
+
+			// There is no -t PING_BULK for redis-benchmark, so adjust the command in that case.
+			// Note that "ping" will run both PING_INLINE and PING_BULK.
+			if operation == "PING_BULK" {
+				cmd = strings.Split(
+					fmt.Sprintf("redis-benchmark --csv -t ping -h %s -p %d", ip, serverPort), " ")
+			}
+			// Reset profiles and timer to begin the measurement.
+			server.RestartProfiles()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				client := clientMachine.GetNativeContainer(ctx, b)
+				defer client.CleanUp(ctx)
+				out, err := client.Run(ctx, dockerutil.RunOpts{
+					Image: "benchmarks/redis",
+				}, cmd...)
+				if err != nil {
+					b.Fatalf("redis-benchmark failed with: %v", err)
+				}
+
+				// Stop time while we parse results.
+				b.StopTimer()
+				result, err := parseOperation(operation, out)
+				if err != nil {
+					b.Fatalf("parsing result %s failed with err: %v", out, err)
+				}
+				b.ReportMetric(result, operation) // operations per second
+				b.StartTimer()
+			}
+		})
+	}
+}
+
+// parseOperation grabs the metric operations per second from redis-benchmark output.
+func parseOperation(operation, data string) (float64, error) {
+	re := regexp.MustCompile(fmt.Sprintf(`"%s( .*)?","(\d*\.\d*)"`, operation))
+	match := re.FindStringSubmatch(data)
+	// If no match, simply don't add it to the result map.
+	if len(match) < 3 {
+		return 0.0, fmt.Errorf("could not find %s in %s", operation, data)
+	}
+	return strconv.ParseFloat(match[2], 64)
+}
+
+// TestParser tests the parser on sample data.
+func TestParser(t *testing.T) {
+	sampleData := `
+	"PING_INLINE","48661.80"
+	"PING_BULK","50301.81"
+	"SET","48923.68"
+	"GET","49382.71"
+	"INCR","49975.02"
+	"LPUSH","49875.31"
+	"RPUSH","50276.52"
+	"LPOP","50327.12"
+	"RPOP","50556.12"
+	"SADD","49504.95"
+	"HSET","49504.95"
+	"SPOP","50025.02"
+	"LPUSH (needed to benchmark LRANGE)","48875.86"
+	"LRANGE_100 (first 100 elements)","33955.86"
+	"LRANGE_300 (first 300 elements)","16550.81"
+	"LRANGE_500 (first 450 elements)","13653.74"
+	"LRANGE_600 (first 600 elements)","11219.57"
+	"MSET (10 keys)","44682.75"
+	`
+	wants := map[string]float64{
+		"PING_INLINE": 48661.80,
+		"PING_BULK":   50301.81,
+		"SET":         48923.68,
+		"GET":         49382.71,
+		"INCR":        49975.02,
+		"LPUSH":       49875.31,
+		"RPUSH":       50276.52,
+		"LPOP":        50327.12,
+		"RPOP":        50556.12,
+		"SADD":        49504.95,
+		"HSET":        49504.95,
+		"SPOP":        50025.02,
+		"LRANGE_100":  33955.86,
+		"LRANGE_300":  16550.81,
+		"LRANGE_500":  13653.74,
+		"LRANGE_600":  11219.57,
+		"MSET":        44682.75,
+	}
+	for op, want := range wants {
+		if got, err := parseOperation(op, sampleData); err != nil {
+			t.Fatalf("failed to parse %s: %v", op, err)
+		} else if want != got {
+			t.Fatalf("wanted %f for op %s, got %f", want, op, got)
+		}
+	}
+}
diff --git a/test/benchmarks/fs/bazel_test.go b/test/benchmarks/fs/bazel_test.go
index fdcac1a7a..9b652fd43 100644
--- a/test/benchmarks/fs/bazel_test.go
+++ b/test/benchmarks/fs/bazel_test.go
@@ -15,6 +15,7 @@ package fs
 
 import (
 	"context"
+	"fmt"
 	"strings"
 	"testing"
 
@@ -51,10 +52,10 @@ func BenchmarkABSL(b *testing.B) {
 
 			workdir := "/abseil-cpp"
 
-			// Start a container.
+			// Start a container and sleep by an order of b.N.
 			if err := container.Spawn(ctx, dockerutil.RunOpts{
 				Image: "benchmarks/absl",
-			}, "sleep", "1000"); err != nil {
+			}, "sleep", fmt.Sprintf("%d", 1000000)); err != nil {
 				b.Fatalf("run failed with: %v", err)
 			}
 
@@ -67,15 +68,21 @@ func BenchmarkABSL(b *testing.B) {
 				workdir = "/tmp" + workdir
 			}
 
-			// Drop Caches.
-			if bm.clearCache {
-				if out, err := machine.RunCommand("/bin/sh -c sync; echo 3 > /proc/sys/vm/drop_caches"); err != nil {
-					b.Fatalf("failed to drop caches: %v %s", err, out)
-				}
-			}
-
+			// Restart profiles after the copy.
+			container.RestartProfiles()
 			b.ResetTimer()
+			// Drop Caches and bazel clean should happen inside the loop as we may use
+			// time options with b.N. (e.g. Run for an hour.)
 			for i := 0; i < b.N; i++ {
+				b.StopTimer()
+				// Drop Caches for clear cache runs.
+				if bm.clearCache {
+					if out, err := machine.RunCommand("/bin/sh", "-c", "sync && sysctl vm.drop_caches=3"); err != nil {
+						b.Skipf("failed to drop caches: %v %s. You probably need root.", err, out)
+					}
+				}
+				b.StartTimer()
+
 				got, err := container.Exec(ctx, dockerutil.ExecOpts{
 					WorkDir: workdir,
 				}, "bazel", "build", "-c", "opt", "absl/base/...")
@@ -88,6 +95,13 @@ func BenchmarkABSL(b *testing.B) {
 				if !strings.Contains(got, want) {
 					b.Fatalf("string %s not in: %s", want, got)
 				}
+				// Clean bazel in case we use b.N.
+				_, err = container.Exec(ctx, dockerutil.ExecOpts{
+					WorkDir: workdir,
+				}, "bazel", "clean")
+				if err != nil {
+					b.Fatalf("build failed with: %v", err)
+				}
 				b.StartTimer()
 			}
 		})
diff --git a/test/benchmarks/harness/machine.go b/test/benchmarks/harness/machine.go
index 93c0db9ce..88e5e841b 100644
--- a/test/benchmarks/harness/machine.go
+++ b/test/benchmarks/harness/machine.go
@@ -25,9 +25,14 @@ import (
 
 // Machine describes a real machine for use in benchmarks.
 type Machine interface {
-	// GetContainer gets a container from the machine,
+	// GetContainer gets a container from the machine. The container uses the
+	// runtime under test and is profiled if requested by flags.
 	GetContainer(ctx context.Context, log testutil.Logger) *dockerutil.Container
 
+	// GetNativeContainer gets a native container from the machine. Native containers
+	// use runc by default and are not profiled.
+	GetNativeContainer(ctx context.Context, log testutil.Logger) *dockerutil.Container
+
 	// RunCommand runs cmd on this machine.
 	RunCommand(cmd string, args ...string) (string, error)
 
@@ -47,6 +52,11 @@ func (l *localMachine) GetContainer(ctx context.Context, logger testutil.Logger)
 	return dockerutil.MakeContainer(ctx, logger)
 }
 
+// GetContainer implements Machine.GetContainer for localMachine.
+func (l *localMachine) GetNativeContainer(ctx context.Context, logger testutil.Logger) *dockerutil.Container {
+	return dockerutil.MakeNativeContainer(ctx, logger)
+}
+
 // RunCommand implements Machine.RunCommand for localMachine.
 func (l *localMachine) RunCommand(cmd string, args ...string) (string, error) {
 	c := exec.Command(cmd, args...)
diff --git a/test/benchmarks/harness/util.go b/test/benchmarks/harness/util.go
index cc7de6426..bc551c582 100644
--- a/test/benchmarks/harness/util.go
+++ b/test/benchmarks/harness/util.go
@@ -27,12 +27,20 @@ import (
 // IP:port.
 func WaitUntilServing(ctx context.Context, machine Machine, server net.IP, port int) error {
 	var logger testutil.DefaultLogger = "netcat"
-	netcat := machine.GetContainer(ctx, logger)
+	netcat := machine.GetNativeContainer(ctx, logger)
 	defer netcat.CleanUp(ctx)
 
-	cmd := fmt.Sprintf("while ! nc -zv %s %d; do true; done", server.String(), port)
+	cmd := fmt.Sprintf("while ! nc -zv %s %d; do true; done", server, port)
 	_, err := netcat.Run(ctx, dockerutil.RunOpts{
 		Image: "packetdrill",
 	}, "sh", "-c", cmd)
 	return err
 }
+
+// DropCaches drops caches on the provided machine. Requires root.
+func DropCaches(machine Machine) error {
+	if out, err := machine.RunCommand("/bin/sh", "-c", "sync | sysctl vm.drop_caches=3"); err != nil {
+		return fmt.Errorf("failed to drop caches: %v logs: %s", err, out)
+	}
+	return nil
+}
diff --git a/test/benchmarks/media/BUILD b/test/benchmarks/media/BUILD
new file mode 100644
index 000000000..6c41fc4f6
--- /dev/null
+++ b/test/benchmarks/media/BUILD
@@ -0,0 +1,21 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "media",
+    testonly = 1,
+    srcs = ["media.go"],
+    deps = ["//test/benchmarks/harness"],
+)
+
+go_test(
+    name = "media_test",
+    size = "large",
+    srcs = ["ffmpeg_test.go"],
+    library = ":media",
+    deps = [
+        "//pkg/test/dockerutil",
+        "//test/benchmarks/harness",
+    ],
+)
diff --git a/test/benchmarks/media/ffmpeg_test.go b/test/benchmarks/media/ffmpeg_test.go
new file mode 100644
index 000000000..bfcfbab80
--- /dev/null
+++ b/test/benchmarks/media/ffmpeg_test.go
@@ -0,0 +1,52 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package media
+
+import (
+	"context"
+	"strings"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/test/dockerutil"
+	"gvisor.dev/gvisor/test/benchmarks/harness"
+)
+
+// BenchmarkFfmpeg runs ffmpeg in a container and records runtime.
+// BenchmarkFfmpeg should run as root to drop caches.
+func BenchmarkFfmpeg(b *testing.B) {
+	machine, err := h.GetMachine()
+	if err != nil {
+		b.Fatalf("failed to get machine: %v", err)
+	}
+	defer machine.CleanUp()
+
+	ctx := context.Background()
+	container := machine.GetContainer(ctx, b)
+	cmd := strings.Split("ffmpeg -i video.mp4 -c:v libx264 -preset veryslow output.mp4", " ")
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		if err := harness.DropCaches(machine); err != nil {
+			b.Skipf("failed to drop caches: %v. You probably need root.", err)
+		}
+		b.StartTimer()
+
+		if _, err := container.Run(ctx, dockerutil.RunOpts{
+			Image: "benchmarks/ffmpeg",
+		}, cmd...); err != nil {
+			b.Fatalf("failed to run container: %v", err)
+		}
+	}
+}
diff --git a/test/benchmarks/media/media.go b/test/benchmarks/media/media.go
new file mode 100644
index 000000000..c7b35b758
--- /dev/null
+++ b/test/benchmarks/media/media.go
@@ -0,0 +1,31 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package media holds benchmarks around media processing applications.
+package media
+
+import (
+	"os"
+	"testing"
+
+	"gvisor.dev/gvisor/test/benchmarks/harness"
+)
+
+var h harness.Harness
+
+// TestMain is the main method for package media.
+func TestMain(m *testing.M) {
+	h.Init()
+	os.Exit(m.Run())
+}
diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD
index 16d267bc8..363041fb7 100644
--- a/test/benchmarks/network/BUILD
+++ b/test/benchmarks/network/BUILD
@@ -24,6 +24,7 @@ go_test(
     ],
     deps = [
         "//pkg/test/dockerutil",
+        "//pkg/test/testutil",
         "//test/benchmarks/harness",
     ],
 )
diff --git a/test/benchmarks/network/httpd_test.go b/test/benchmarks/network/httpd_test.go
index f9afdf15f..fe23ca949 100644
--- a/test/benchmarks/network/httpd_test.go
+++ b/test/benchmarks/network/httpd_test.go
@@ -52,12 +52,12 @@ func BenchmarkHttpdConcurrency(b *testing.B) {
 	defer serverMachine.CleanUp()
 
 	// The test iterates over client concurrency, so set other parameters.
-	requests := 1000
+	requests := 10000
 	concurrency := []int{1, 5, 10, 25}
 	doc := docs["10Kb"]
 
 	for _, c := range concurrency {
-		b.Run(fmt.Sprintf("%dConcurrency", c), func(b *testing.B) {
+		b.Run(fmt.Sprintf("%d", c), func(b *testing.B) {
 			runHttpd(b, clientMachine, serverMachine, doc, requests, c)
 		})
 	}
@@ -78,7 +78,7 @@ func BenchmarkHttpdDocSize(b *testing.B) {
 	}
 	defer serverMachine.CleanUp()
 
-	requests := 1000
+	requests := 10000
 	concurrency := 1
 
 	for name, filename := range docs {
@@ -129,7 +129,7 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc st
 	harness.WaitUntilServing(ctx, clientMachine, ip, servingPort)
 
 	// Grab a client.
-	client := clientMachine.GetContainer(ctx, b)
+	client := clientMachine.GetNativeContainer(ctx, b)
 	defer client.CleanUp(ctx)
 
 	path := fmt.Sprintf("http://%s:%d/%s", ip, servingPort, doc)
@@ -137,6 +137,7 @@ func runHttpd(b *testing.B, clientMachine, serverMachine harness.Machine, doc st
 	cmd = fmt.Sprintf("ab -n %d -c %d %s", requests, concurrency, path)
 
 	b.ResetTimer()
+	server.RestartProfiles()
 	for i := 0; i < b.N; i++ {
 		out, err := client.Run(ctx, dockerutil.RunOpts{
 			Image: "benchmarks/ab",
diff --git a/test/benchmarks/network/iperf_test.go b/test/benchmarks/network/iperf_test.go
index 664e0797e..a5e198e14 100644
--- a/test/benchmarks/network/iperf_test.go
+++ b/test/benchmarks/network/iperf_test.go
@@ -22,12 +22,13 @@ import (
 	"testing"
 
 	"gvisor.dev/gvisor/pkg/test/dockerutil"
+	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/test/benchmarks/harness"
 )
 
 func BenchmarkIperf(b *testing.B) {
+	const time = 10 // time in seconds to run the client.
 
-	// Get two machines
 	clientMachine, err := h.GetMachine()
 	if err != nil {
 		b.Fatalf("failed to get machine: %v", err)
@@ -39,30 +40,32 @@ func BenchmarkIperf(b *testing.B) {
 		b.Fatalf("failed to get machine: %v", err)
 	}
 	defer serverMachine.CleanUp()
-
+	ctx := context.Background()
 	for _, bm := range []struct {
-		name          string
-		clientRuntime string
-		serverRuntime string
+		name       string
+		clientFunc func(context.Context, testutil.Logger) *dockerutil.Container
+		serverFunc func(context.Context, testutil.Logger) *dockerutil.Container
 	}{
 		// We are either measuring the server or the client. The other should be
 		// runc. e.g. Upload sees how fast the runtime under test uploads to a native
 		// server.
-		{name: "Upload", clientRuntime: dockerutil.Runtime(), serverRuntime: "runc"},
-		{name: "Download", clientRuntime: "runc", serverRuntime: dockerutil.Runtime()},
+		{
+			name:       "Upload",
+			clientFunc: clientMachine.GetContainer,
+			serverFunc: serverMachine.GetNativeContainer,
+		},
+		{
+			name:       "Download",
+			clientFunc: clientMachine.GetNativeContainer,
+			serverFunc: serverMachine.GetContainer,
+		},
 	} {
 		b.Run(bm.name, func(b *testing.B) {
-
-			// Get a container from the server and set its runtime.
-			ctx := context.Background()
-			server := serverMachine.GetContainer(ctx, b)
+			// Set up the containers.
+			server := bm.serverFunc(ctx, b)
 			defer server.CleanUp(ctx)
-			server.Runtime = bm.serverRuntime
-
-			// Get a container from the client and set its runtime.
-			client := clientMachine.GetContainer(ctx, b)
+			client := bm.clientFunc(ctx, b)
 			defer client.CleanUp(ctx)
-			client.Runtime = bm.clientRuntime
 
 			// iperf serves on port 5001 by default.
 			port := 5001
@@ -91,11 +94,14 @@ func BenchmarkIperf(b *testing.B) {
 			}
 
 			// iperf report in Kb realtime
-			cmd := fmt.Sprintf("iperf -f K --realtime -c %s -p %d", ip.String(), servingPort)
+			cmd := fmt.Sprintf("iperf -f K --realtime --time %d -c %s -p %d", time, ip.String(), servingPort)
 
 			// Run the client.
 			b.ResetTimer()
 
+			// Restart the server profiles. If the server isn't being profiled
+			// this does nothing.
+			server.RestartProfiles()
 			for i := 0; i < b.N; i++ {
 				out, err := client.Run(ctx, dockerutil.RunOpts{
 					Image: "benchmarks/iperf",
diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go
index 5a9455b33..ef42b689a 100644
--- a/test/e2e/integration_test.go
+++ b/test/e2e/integration_test.go
@@ -40,6 +40,9 @@ import (
 	"gvisor.dev/gvisor/pkg/test/testutil"
 )
 
+// defaultWait is the default wait time used for tests.
+const defaultWait = time.Minute
+
 // httpRequestSucceeds sends a request to a given url and checks that the status is OK.
 func httpRequestSucceeds(client http.Client, server string, port int) error {
 	url := fmt.Sprintf("http://%s:%d", server, port)
@@ -76,10 +79,10 @@ func TestLifeCycle(t *testing.T) {
 	if err != nil {
 		t.Fatalf("docker.FindPort(80) failed: %v", err)
 	}
-	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+	if err := testutil.WaitForHTTP(port, defaultWait); err != nil {
 		t.Fatalf("WaitForHTTP() timeout: %v", err)
 	}
-	client := http.Client{Timeout: time.Duration(2 * time.Second)}
+	client := http.Client{Timeout: defaultWait}
 	if err := httpRequestSucceeds(client, "localhost", port); err != nil {
 		t.Errorf("http request failed: %v", err)
 	}
@@ -116,12 +119,12 @@ func TestPauseResume(t *testing.T) {
 	}
 
 	// Wait until it's up and running.
-	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+	if err := testutil.WaitForHTTP(port, defaultWait); err != nil {
 		t.Fatalf("WaitForHTTP() timeout: %v", err)
 	}
 
 	// Check that container is working.
-	client := http.Client{Timeout: time.Duration(2 * time.Second)}
+	client := http.Client{Timeout: defaultWait}
 	if err := httpRequestSucceeds(client, "localhost", port); err != nil {
 		t.Error("http request failed:", err)
 	}
@@ -131,6 +134,7 @@ func TestPauseResume(t *testing.T) {
 	}
 
 	// Check if container is paused.
+	client = http.Client{Timeout: 10 * time.Millisecond} // Don't wait a minute.
 	switch _, err := client.Get(fmt.Sprintf("http://localhost:%d", port)); v := err.(type) {
 	case nil:
 		t.Errorf("http req expected to fail but it succeeded")
@@ -147,11 +151,12 @@ func TestPauseResume(t *testing.T) {
 	}
 
 	// Wait until it's up and running.
-	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+	if err := testutil.WaitForHTTP(port, defaultWait); err != nil {
 		t.Fatalf("WaitForHTTP() timeout: %v", err)
 	}
 
 	// Check if container is working again.
+	client = http.Client{Timeout: defaultWait}
 	if err := httpRequestSucceeds(client, "localhost", port); err != nil {
 		t.Error("http request failed:", err)
 	}
@@ -178,12 +183,12 @@ func TestCheckpointRestore(t *testing.T) {
 	if err := d.Checkpoint(ctx, "test"); err != nil {
 		t.Fatalf("docker checkpoint failed: %v", err)
 	}
-	if err := d.WaitTimeout(ctx, 30*time.Second); err != nil {
+	if err := d.WaitTimeout(ctx, defaultWait); err != nil {
 		t.Fatalf("wait failed: %v", err)
 	}
 
 	// TODO(b/143498576): Remove Poll after github.com/moby/moby/issues/38963 is fixed.
-	if err := testutil.Poll(func() error { return d.Restore(ctx, "test") }, 15*time.Second); err != nil {
+	if err := testutil.Poll(func() error { return d.Restore(ctx, "test") }, defaultWait); err != nil {
 		t.Fatalf("docker restore failed: %v", err)
 	}
 
@@ -194,12 +199,12 @@ func TestCheckpointRestore(t *testing.T) {
 	}
 
 	// Wait until it's up and running.
-	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+	if err := testutil.WaitForHTTP(port, defaultWait); err != nil {
 		t.Fatalf("WaitForHTTP() timeout: %v", err)
 	}
 
 	// Check if container is working again.
-	client := http.Client{Timeout: time.Duration(2 * time.Second)}
+	client := http.Client{Timeout: defaultWait}
 	if err := httpRequestSucceeds(client, "localhost", port); err != nil {
 		t.Error("http request failed:", err)
 	}
@@ -236,7 +241,7 @@ func TestConnectToSelf(t *testing.T) {
 	if want := "server\n"; reply != want {
 		t.Errorf("Error on server, want: %q, got: %q", want, reply)
 	}
-	if _, err := d.WaitForOutput(ctx, "^client\n$", 1*time.Second); err != nil {
+	if _, err := d.WaitForOutput(ctx, "^client\n$", defaultWait); err != nil {
 		t.Fatalf("docker.WaitForOutput(client) timeout: %v", err)
 	}
 }
@@ -375,7 +380,7 @@ func TestTmpFile(t *testing.T) {
 	d := dockerutil.MakeContainer(ctx, t)
 	defer d.CleanUp(ctx)
 
-	opts := dockerutil.RunOpts{Image: "tmpfile"}
+	opts := dockerutil.RunOpts{Image: "basic/tmpfile"}
 	got, err := d.Run(ctx, opts, "cat", "/tmp/foo/file.txt")
 	if err != nil {
 		t.Fatalf("docker run failed: %v", err)
@@ -427,7 +432,7 @@ func TestHostOverlayfsCopyUp(t *testing.T) {
 	defer d.CleanUp(ctx)
 
 	if _, err := d.Run(ctx, dockerutil.RunOpts{
-		Image:   "hostoverlaytest",
+		Image:   "basic/hostoverlaytest",
 		WorkDir: "/root",
 	}, "./test"); err != nil {
 		t.Fatalf("docker run failed: %v", err)
diff --git a/test/image/image_test.go b/test/image/image_test.go
index 8aa78035f..ac6186688 100644
--- a/test/image/image_test.go
+++ b/test/image/image_test.go
@@ -37,6 +37,13 @@ import (
 	"gvisor.dev/gvisor/pkg/test/testutil"
 )
 
+// defaultWait defines how long to wait for progress.
+//
+// See BUILD: This is at least a "large" test, so allow up to 1 minute for any
+// given "wait" step. Note that all tests are run in parallel, which may cause
+// individual slow-downs (but a huge speed-up in aggregate).
+const defaultWait = time.Minute
+
 func TestHelloWorld(t *testing.T) {
 	ctx := context.Background()
 	d := dockerutil.MakeContainer(ctx, t)
@@ -130,7 +137,7 @@ func TestHttpd(t *testing.T) {
 	}
 
 	// Wait until it's up and running.
-	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+	if err := testutil.WaitForHTTP(port, defaultWait); err != nil {
 		t.Errorf("WaitForHTTP() timeout: %v", err)
 	}
 
@@ -159,7 +166,7 @@ func TestNginx(t *testing.T) {
 	}
 
 	// Wait until it's up and running.
-	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+	if err := testutil.WaitForHTTP(port, defaultWait); err != nil {
 		t.Errorf("WaitForHTTP() timeout: %v", err)
 	}
 
@@ -180,7 +187,7 @@ func TestMysql(t *testing.T) {
 	}
 
 	// Wait until it's up and running.
-	if _, err := server.WaitForOutput(ctx, "port: 3306  MySQL Community Server", 3*time.Minute); err != nil {
+	if _, err := server.WaitForOutput(ctx, "port: 3306  MySQL Community Server", defaultWait); err != nil {
 		t.Fatalf("WaitForOutput() timeout: %v", err)
 	}
 
@@ -200,7 +207,7 @@ func TestMysql(t *testing.T) {
 	}
 
 	// Ensure file executed to the end and shutdown mysql.
-	if _, err := server.WaitForOutput(ctx, "mysqld: Shutdown complete", 30*time.Second); err != nil {
+	if _, err := server.WaitForOutput(ctx, "mysqld: Shutdown complete", defaultWait); err != nil {
 		t.Fatalf("WaitForOutput() timeout: %v", err)
 	}
 }
@@ -225,7 +232,7 @@ func TestTomcat(t *testing.T) {
 	}
 
 	// Wait until it's up and running.
-	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+	if err := testutil.WaitForHTTP(port, defaultWait); err != nil {
 		t.Fatalf("WaitForHTTP() timeout: %v", err)
 	}
 
@@ -262,7 +269,7 @@ func TestRuby(t *testing.T) {
 	}
 
 	// Wait until it's up and running, 'gem install' can take some time.
-	if err := testutil.WaitForHTTP(port, 1*time.Minute); err != nil {
+	if err := testutil.WaitForHTTP(port, time.Minute); err != nil {
 		t.Fatalf("WaitForHTTP() timeout: %v", err)
 	}
 
@@ -299,7 +306,7 @@ func TestStdio(t *testing.T) {
 	}
 
 	for _, want := range []string{wantStdout, wantStderr} {
-		if _, err := d.WaitForOutput(ctx, want, 5*time.Second); err != nil {
+		if _, err := d.WaitForOutput(ctx, want, defaultWait); err != nil {
 			t.Fatalf("docker didn't get output %q : %v", want, err)
 		}
 	}
diff --git a/test/iptables/filter_input.go b/test/iptables/filter_input.go
index 068f228bd..af4355ba8 100644
--- a/test/iptables/filter_input.go
+++ b/test/iptables/filter_input.go
@@ -81,7 +81,7 @@ func (FilterInputDropUDP) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputDropUDP) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, dropPort, sendloopDuration)
+	return spawnUDPLoop(ip, dropPort, sendloopDuration)
 }
 
 // FilterInputDropOnlyUDP tests that "-p udp -j DROP" only affects UDP traffic.
@@ -141,7 +141,7 @@ func (FilterInputDropUDPPort) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputDropUDPPort) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, dropPort, sendloopDuration)
+	return spawnUDPLoop(ip, dropPort, sendloopDuration)
 }
 
 // FilterInputDropDifferentUDPPort tests that dropping traffic for a single UDP port
@@ -169,7 +169,7 @@ func (FilterInputDropDifferentUDPPort) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputDropDifferentUDPPort) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputDropTCPDestPort tests that connections are not accepted on specified source ports.
@@ -269,7 +269,7 @@ func (FilterInputDropAll) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputDropAll) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, dropPort, sendloopDuration)
+	return spawnUDPLoop(ip, dropPort, sendloopDuration)
 }
 
 // FilterInputMultiUDPRules verifies that multiple UDP rules are applied
@@ -365,7 +365,7 @@ func (FilterInputDefaultPolicyAccept) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputDefaultPolicyAccept) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputDefaultPolicyDrop tests the default DROP policy.
@@ -396,7 +396,7 @@ func (FilterInputDefaultPolicyDrop) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputDefaultPolicyDrop) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputReturnUnderflow tests that -j RETURN in a built-in chain causes
@@ -428,7 +428,7 @@ func (FilterInputReturnUnderflow) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputReturnUnderflow) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputSerializeJump verifies that we can serialize jumps.
@@ -482,7 +482,7 @@ func (FilterInputJumpBasic) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputJumpBasic) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputJumpReturn jumps, returns, and executes a rule.
@@ -512,7 +512,7 @@ func (FilterInputJumpReturn) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputJumpReturn) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputJumpReturnDrop jumps to a chain, returns, and DROPs packets.
@@ -549,7 +549,7 @@ func (FilterInputJumpReturnDrop) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputJumpReturnDrop) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, dropPort, sendloopDuration)
+	return spawnUDPLoop(ip, dropPort, sendloopDuration)
 }
 
 // FilterInputJumpBuiltin verifies that jumping to a top-levl chain is illegal.
@@ -604,7 +604,7 @@ func (FilterInputJumpTwice) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputJumpTwice) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputDestination verifies that we can filter packets via `-d
@@ -638,7 +638,7 @@ func (FilterInputDestination) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputDestination) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputInvertDestination verifies that we can filter packets via `! -d
@@ -667,7 +667,7 @@ func (FilterInputInvertDestination) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputInvertDestination) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputSource verifies that we can filter packets via `-s
@@ -696,7 +696,7 @@ func (FilterInputSource) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputSource) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // FilterInputInvertSource verifies that we can filter packets via `! -s
@@ -725,5 +725,5 @@ func (FilterInputInvertSource) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (FilterInputInvertSource) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
diff --git a/test/iptables/iptables_test.go b/test/iptables/iptables_test.go
index f5ac79370..f303030aa 100644
--- a/test/iptables/iptables_test.go
+++ b/test/iptables/iptables_test.go
@@ -263,6 +263,13 @@ func TestNATPreRedirectTCPPort(t *testing.T) {
 	singleTest(t, NATPreRedirectTCPPort{})
 }
 
+func TestNATPreRedirectTCPOutgoing(t *testing.T) {
+	singleTest(t, NATPreRedirectTCPOutgoing{})
+}
+
+func TestNATOutRedirectTCPIncoming(t *testing.T) {
+	singleTest(t, NATOutRedirectTCPIncoming{})
+}
 func TestNATOutRedirectUDPPort(t *testing.T) {
 	singleTest(t, NATOutRedirectUDPPort{})
 }
diff --git a/test/iptables/iptables_util.go b/test/iptables/iptables_util.go
index d4bc55b24..174694002 100644
--- a/test/iptables/iptables_util.go
+++ b/test/iptables/iptables_util.go
@@ -84,17 +84,42 @@ func listenUDP(port int, timeout time.Duration) error {
 // sendUDPLoop sends 1 byte UDP packets repeatedly to the IP and port specified
 // over a duration.
 func sendUDPLoop(ip net.IP, port int, duration time.Duration) error {
-	// Send packets for a few seconds.
+	conn, err := connectUDP(ip, port)
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+	loopUDP(conn, duration)
+	return nil
+}
+
+// spawnUDPLoop works like sendUDPLoop, but returns immediately and sends
+// packets in another goroutine.
+func spawnUDPLoop(ip net.IP, port int, duration time.Duration) error {
+	conn, err := connectUDP(ip, port)
+	if err != nil {
+		return err
+	}
+	go func() {
+		defer conn.Close()
+		loopUDP(conn, duration)
+	}()
+	return nil
+}
+
+func connectUDP(ip net.IP, port int) (net.Conn, error) {
 	remote := net.UDPAddr{
 		IP:   ip,
 		Port: port,
 	}
 	conn, err := net.DialUDP(network, nil, &remote)
 	if err != nil {
-		return err
+		return nil, err
 	}
-	defer conn.Close()
+	return conn, nil
+}
 
+func loopUDP(conn net.Conn, duration time.Duration) {
 	to := time.After(duration)
 	for timedOut := false; !timedOut; {
 		// This may return an error (connection refused) if the remote
@@ -109,8 +134,6 @@ func sendUDPLoop(ip net.IP, port int, duration time.Duration) error {
 			time.Sleep(200 * time.Millisecond)
 		}
 	}
-
-	return nil
 }
 
 // listenTCP listens for connections on a TCP port.
diff --git a/test/iptables/nat.go b/test/iptables/nat.go
index 8562b0820..23288577d 100644
--- a/test/iptables/nat.go
+++ b/test/iptables/nat.go
@@ -28,6 +28,8 @@ const (
 func init() {
 	RegisterTestCase(NATPreRedirectUDPPort{})
 	RegisterTestCase(NATPreRedirectTCPPort{})
+	RegisterTestCase(NATPreRedirectTCPOutgoing{})
+	RegisterTestCase(NATOutRedirectTCPIncoming{})
 	RegisterTestCase(NATOutRedirectUDPPort{})
 	RegisterTestCase(NATOutRedirectTCPPort{})
 	RegisterTestCase(NATDropUDP{})
@@ -65,7 +67,7 @@ func (NATPreRedirectUDPPort) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (NATPreRedirectUDPPort) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // NATPreRedirectTCPPort tests that connections are redirected on specified ports.
@@ -91,6 +93,56 @@ func (NATPreRedirectTCPPort) LocalAction(ip net.IP) error {
 	return connectTCP(ip, dropPort, sendloopDuration)
 }
 
+// NATPreRedirectTCPOutgoing verifies that outgoing TCP connections aren't
+// affected by PREROUTING connection tracking.
+type NATPreRedirectTCPOutgoing struct{}
+
+// Name implements TestCase.Name.
+func (NATPreRedirectTCPOutgoing) Name() string {
+	return "NATPreRedirectTCPOutgoing"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATPreRedirectTCPOutgoing) ContainerAction(ip net.IP) error {
+	// Redirect all incoming TCP traffic to a closed port.
+	if err := natTable("-A", "PREROUTING", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil {
+		return err
+	}
+
+	// Establish a connection to the host process.
+	return connectTCP(ip, acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATPreRedirectTCPOutgoing) LocalAction(ip net.IP) error {
+	return listenTCP(acceptPort, sendloopDuration)
+}
+
+// NATOutRedirectTCPIncoming verifies that incoming TCP connections aren't
+// affected by OUTPUT connection tracking.
+type NATOutRedirectTCPIncoming struct{}
+
+// Name implements TestCase.Name.
+func (NATOutRedirectTCPIncoming) Name() string {
+	return "NATOutRedirectTCPIncoming"
+}
+
+// ContainerAction implements TestCase.ContainerAction.
+func (NATOutRedirectTCPIncoming) ContainerAction(ip net.IP) error {
+	// Redirect all outgoing TCP traffic to a closed port.
+	if err := natTable("-A", "OUTPUT", "-p", "tcp", "-j", "REDIRECT", "--to-ports", fmt.Sprintf("%d", dropPort)); err != nil {
+		return err
+	}
+
+	// Establish a connection to the host process.
+	return listenTCP(acceptPort, sendloopDuration)
+}
+
+// LocalAction implements TestCase.LocalAction.
+func (NATOutRedirectTCPIncoming) LocalAction(ip net.IP) error {
+	return connectTCP(ip, acceptPort, sendloopDuration)
+}
+
 // NATOutRedirectUDPPort tests that packets are redirected to different port.
 type NATOutRedirectUDPPort struct{}
 
@@ -135,7 +187,7 @@ func (NATDropUDP) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (NATDropUDP) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // NATAcceptAll tests that all UDP packets are accepted.
@@ -161,7 +213,7 @@ func (NATAcceptAll) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (NATAcceptAll) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // NATOutRedirectIP uses iptables to select packets based on destination IP and
@@ -258,7 +310,7 @@ func (NATPreRedirectIP) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (NATPreRedirectIP) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, dropPort, sendloopDuration)
+	return spawnUDPLoop(ip, dropPort, sendloopDuration)
 }
 
 // NATPreDontRedirectIP tests that iptables matching with "-d" does not match
@@ -280,7 +332,7 @@ func (NATPreDontRedirectIP) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (NATPreDontRedirectIP) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, acceptPort, sendloopDuration)
+	return spawnUDPLoop(ip, acceptPort, sendloopDuration)
 }
 
 // NATPreRedirectInvert tests that iptables can match with "! -d".
@@ -301,7 +353,7 @@ func (NATPreRedirectInvert) ContainerAction(ip net.IP) error {
 
 // LocalAction implements TestCase.LocalAction.
 func (NATPreRedirectInvert) LocalAction(ip net.IP) error {
-	return sendUDPLoop(ip, dropPort, sendloopDuration)
+	return spawnUDPLoop(ip, dropPort, sendloopDuration)
 }
 
 // NATRedirectRequiresProtocol tests that use of the --to-ports flag requires a
diff --git a/test/packetdrill/defs.bzl b/test/packetdrill/defs.bzl
index f499c177b..fc28ce9ba 100644
--- a/test/packetdrill/defs.bzl
+++ b/test/packetdrill/defs.bzl
@@ -26,7 +26,7 @@ def _packetdrill_test_impl(ctx):
 
     transitive_files = depset()
     if hasattr(ctx.attr._test_runner, "data_runfiles"):
-        transitive_files = depset(ctx.attr._test_runner.data_runfiles.files)
+        transitive_files = ctx.attr._test_runner.data_runfiles.files
     runfiles = ctx.runfiles(
         files = [test_runner] + ctx.files._init_script + ctx.files.scripts,
         transitive_files = transitive_files,
@@ -60,11 +60,15 @@ _packetdrill_test = rule(
     implementation = _packetdrill_test_impl,
 )
 
-_PACKETDRILL_TAGS = ["local", "manual"]
+PACKETDRILL_TAGS = [
+    "local",
+    "manual",
+    "packetdrill",
+]
 
 def packetdrill_linux_test(name, **kwargs):
     if "tags" not in kwargs:
-        kwargs["tags"] = _PACKETDRILL_TAGS
+        kwargs["tags"] = PACKETDRILL_TAGS
     _packetdrill_test(
         name = name,
         flags = ["--dut_platform", "linux"],
@@ -73,7 +77,7 @@ def packetdrill_linux_test(name, **kwargs):
 
 def packetdrill_netstack_test(name, **kwargs):
     if "tags" not in kwargs:
-        kwargs["tags"] = _PACKETDRILL_TAGS
+        kwargs["tags"] = PACKETDRILL_TAGS
     _packetdrill_test(
         name = name,
         # This is the default runtime unless
diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl
index 77cdfea12..79b3c9162 100644
--- a/test/packetimpact/runner/defs.bzl
+++ b/test/packetimpact/runner/defs.bzl
@@ -55,7 +55,11 @@ _packetimpact_test = rule(
     implementation = _packetimpact_test_impl,
 )
 
-PACKETIMPACT_TAGS = ["local", "manual"]
+PACKETIMPACT_TAGS = [
+    "local",
+    "manual",
+    "packetimpact",
+]
 
 def packetimpact_linux_test(
         name,
@@ -75,7 +79,7 @@ def packetimpact_linux_test(
         name = name + "_linux_test",
         testbench_binary = testbench_binary,
         flags = ["--dut_platform", "linux"] + expect_failure_flag,
-        tags = PACKETIMPACT_TAGS + ["packetimpact"],
+        tags = PACKETIMPACT_TAGS,
         **kwargs
     )
 
@@ -101,7 +105,7 @@ def packetimpact_netstack_test(
         # This is the default runtime unless
         # "--test_arg=--runtime=OTHER_RUNTIME" is used to override the value.
         flags = ["--dut_platform", "netstack", "--runtime=runsc-d"] + expect_failure_flag,
-        tags = PACKETIMPACT_TAGS + ["packetimpact"],
+        tags = PACKETIMPACT_TAGS,
         **kwargs
     )
 
@@ -121,7 +125,10 @@ def packetimpact_go_test(name, size = "small", pure = True, expect_linux_failure
         name = testbench_binary,
         size = size,
         pure = pure,
-        tags = PACKETIMPACT_TAGS,
+        tags = [
+            "local",
+            "manual",
+        ],
         **kwargs
     )
     packetimpact_linux_test(
diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go
index ff5f5c7f1..74e1e6def 100644
--- a/test/packetimpact/runner/packetimpact_test.go
+++ b/test/packetimpact/runner/packetimpact_test.go
@@ -142,7 +142,7 @@ func TestOne(t *testing.T) {
 	// Create the Docker container for the DUT.
 	dut := dockerutil.MakeContainer(ctx, logger("dut"))
 	if *dutPlatform == "linux" {
-		dut.Runtime = ""
+		dut = dockerutil.MakeNativeContainer(ctx, logger("dut"))
 	}
 
 	runOpts := dockerutil.RunOpts{
@@ -208,8 +208,7 @@ func TestOne(t *testing.T) {
 	}
 
 	// Create the Docker container for the testbench.
-	testbench := dockerutil.MakeContainer(ctx, logger("testbench"))
-	testbench.Runtime = "" // The testbench always runs on Linux.
+	testbench := dockerutil.MakeNativeContainer(ctx, logger("testbench"))
 
 	tbb := path.Base(*testbenchBinary)
 	containerTestbenchBinary := "/packetimpact/" + tbb
@@ -280,11 +279,13 @@ func TestOne(t *testing.T) {
 	}
 
 	// Because the Linux kernel receives the SYN-ACK but didn't send the SYN it
-	// will issue a RST. To prevent this IPtables can be used to filter out all
+	// will issue an RST. To prevent this IPtables can be used to filter out all
 	// incoming packets. The raw socket that packetimpact tests use will still see
 	// everything.
-	if logs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, "iptables", "-A", "INPUT", "-i", testNetDev, "-j", "DROP"); err != nil {
-		t.Fatalf("unable to Exec iptables on container %s: %s, logs from testbench:\n%s", testbench.Name, err, logs)
+	for _, bin := range []string{"iptables", "ip6tables"} {
+		if logs, err := testbench.Exec(ctx, dockerutil.ExecOpts{}, bin, "-A", "INPUT", "-i", testNetDev, "-p", "tcp", "-j", "DROP"); err != nil {
+			t.Fatalf("unable to Exec %s on container %s: %s, logs from testbench:\n%s", bin, testbench.Name, err, logs)
+		}
 	}
 
 	// FIXME(b/156449515): Some piece of the system has a race. The old
diff --git a/test/packetimpact/testbench/connections.go b/test/packetimpact/testbench/connections.go
index 5d9cec73e..3af5f83fd 100644
--- a/test/packetimpact/testbench/connections.go
+++ b/test/packetimpact/testbench/connections.go
@@ -41,7 +41,8 @@ func portFromSockaddr(sa unix.Sockaddr) (uint16, error) {
 	return 0, fmt.Errorf("sockaddr type %T does not contain port", sa)
 }
 
-// pickPort makes a new socket and returns the socket FD and port. The domain should be AF_INET or AF_INET6. The caller must close the FD when done with
+// pickPort makes a new socket and returns the socket FD and port. The domain
+// should be AF_INET or AF_INET6. The caller must close the FD when done with
 // the port if there is no error.
 func pickPort(domain, typ int) (fd int, port uint16, err error) {
 	fd, err = unix.Socket(domain, typ, 0)
@@ -428,7 +429,6 @@ type Connection struct {
 	layerStates []layerState
 	injector    Injector
 	sniffer     Sniffer
-	t           *testing.T
 }
 
 // Returns the default incoming frame against which to match. If received is
@@ -461,7 +461,9 @@ func (conn *Connection) match(override, received Layers) bool {
 }
 
 // Close frees associated resources held by the Connection.
-func (conn *Connection) Close() {
+func (conn *Connection) Close(t *testing.T) {
+	t.Helper()
+
 	errs := multierr.Combine(conn.sniffer.close(), conn.injector.close())
 	for _, s := range conn.layerStates {
 		if err := s.close(); err != nil {
@@ -469,7 +471,7 @@ func (conn *Connection) Close() {
 		}
 	}
 	if errs != nil {
-		conn.t.Fatalf("unable to close %+v: %s", conn, errs)
+		t.Fatalf("unable to close %+v: %s", conn, errs)
 	}
 }
 
@@ -481,7 +483,9 @@ func (conn *Connection) Close() {
 // overriden first. As an example, valid values of overrideLayers for a TCP-
 // over-IPv4-over-Ethernet connection are: nil, [TCP], [IPv4, TCP], and
 // [Ethernet, IPv4, TCP].
-func (conn *Connection) CreateFrame(overrideLayers Layers, additionalLayers ...Layer) Layers {
+func (conn *Connection) CreateFrame(t *testing.T, overrideLayers Layers, additionalLayers ...Layer) Layers {
+	t.Helper()
+
 	var layersToSend Layers
 	for i, s := range conn.layerStates {
 		layer := s.outgoing()
@@ -490,7 +494,7 @@ func (conn *Connection) CreateFrame(overrideLayers Layers, additionalLayers ...L
 		// end.
 		if j := len(overrideLayers) - (len(conn.layerStates) - i); j >= 0 {
 			if err := layer.merge(overrideLayers[j]); err != nil {
-				conn.t.Fatalf("can't merge %+v into %+v: %s", layer, overrideLayers[j], err)
+				t.Fatalf("can't merge %+v into %+v: %s", layer, overrideLayers[j], err)
 			}
 		}
 		layersToSend = append(layersToSend, layer)
@@ -504,21 +508,25 @@ func (conn *Connection) CreateFrame(overrideLayers Layers, additionalLayers ...L
 // This method is useful for sending out-of-band control messages such as
 // ICMP packets, where it would not make sense to update the transport layer's
 // state using the ICMP header.
-func (conn *Connection) SendFrameStateless(frame Layers) {
+func (conn *Connection) SendFrameStateless(t *testing.T, frame Layers) {
+	t.Helper()
+
 	outBytes, err := frame.ToBytes()
 	if err != nil {
-		conn.t.Fatalf("can't build outgoing packet: %s", err)
+		t.Fatalf("can't build outgoing packet: %s", err)
 	}
-	conn.injector.Send(outBytes)
+	conn.injector.Send(t, outBytes)
 }
 
 // SendFrame sends a frame on the wire and updates the state of all layers.
-func (conn *Connection) SendFrame(frame Layers) {
+func (conn *Connection) SendFrame(t *testing.T, frame Layers) {
+	t.Helper()
+
 	outBytes, err := frame.ToBytes()
 	if err != nil {
-		conn.t.Fatalf("can't build outgoing packet: %s", err)
+		t.Fatalf("can't build outgoing packet: %s", err)
 	}
-	conn.injector.Send(outBytes)
+	conn.injector.Send(t, outBytes)
 
 	// frame might have nil values where the caller wanted to use default values.
 	// sentFrame will have no nil values in it because it comes from parsing the
@@ -527,7 +535,7 @@ func (conn *Connection) SendFrame(frame Layers) {
 	// Update the state of each layer based on what was sent.
 	for i, s := range conn.layerStates {
 		if err := s.sent(sentFrame[i]); err != nil {
-			conn.t.Fatalf("Unable to update the state of %+v with %s: %s", s, sentFrame[i], err)
+			t.Fatalf("Unable to update the state of %+v with %s: %s", s, sentFrame[i], err)
 		}
 	}
 }
@@ -537,18 +545,22 @@ func (conn *Connection) SendFrame(frame Layers) {
 //
 // Types defined with Connection as the underlying type should expose
 // type-safe versions of this method.
-func (conn *Connection) send(overrideLayers Layers, additionalLayers ...Layer) {
-	conn.SendFrame(conn.CreateFrame(overrideLayers, additionalLayers...))
+func (conn *Connection) send(t *testing.T, overrideLayers Layers, additionalLayers ...Layer) {
+	t.Helper()
+
+	conn.SendFrame(t, conn.CreateFrame(t, overrideLayers, additionalLayers...))
 }
 
 // recvFrame gets the next successfully parsed frame (of type Layers) within the
 // timeout provided. If no parsable frame arrives before the timeout, it returns
 // nil.
-func (conn *Connection) recvFrame(timeout time.Duration) Layers {
+func (conn *Connection) recvFrame(t *testing.T, timeout time.Duration) Layers {
+	t.Helper()
+
 	if timeout <= 0 {
 		return nil
 	}
-	b := conn.sniffer.Recv(timeout)
+	b := conn.sniffer.Recv(t, timeout)
 	if b == nil {
 		return nil
 	}
@@ -568,32 +580,36 @@ func (e *layersError) Error() string {
 // Expect expects a frame with the final layerStates layer matching the
 // provided Layer within the timeout specified. If it doesn't arrive in time,
 // an error is returned.
-func (conn *Connection) Expect(layer Layer, timeout time.Duration) (Layer, error) {
+func (conn *Connection) Expect(t *testing.T, layer Layer, timeout time.Duration) (Layer, error) {
+	t.Helper()
+
 	// Make a frame that will ignore all but the final layer.
 	layers := make([]Layer, len(conn.layerStates))
 	layers[len(layers)-1] = layer
 
-	gotFrame, err := conn.ExpectFrame(layers, timeout)
+	gotFrame, err := conn.ExpectFrame(t, layers, timeout)
 	if err != nil {
 		return nil, err
 	}
 	if len(conn.layerStates)-1 < len(gotFrame) {
 		return gotFrame[len(conn.layerStates)-1], nil
 	}
-	conn.t.Fatal("the received frame should be at least as long as the expected layers")
+	t.Fatalf("the received frame should be at least as long as the expected layers, got %d layers, want at least %d layers, got frame: %#v", len(gotFrame), len(conn.layerStates), gotFrame)
 	panic("unreachable")
 }
 
 // ExpectFrame expects a frame that matches the provided Layers within the
 // timeout specified. If one arrives in time, the Layers is returned without an
 // error. If it doesn't arrive in time, it returns nil and error is non-nil.
-func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layers, error) {
+func (conn *Connection) ExpectFrame(t *testing.T, layers Layers, timeout time.Duration) (Layers, error) {
+	t.Helper()
+
 	deadline := time.Now().Add(timeout)
 	var errs error
 	for {
 		var gotLayers Layers
 		if timeout = time.Until(deadline); timeout > 0 {
-			gotLayers = conn.recvFrame(timeout)
+			gotLayers = conn.recvFrame(t, timeout)
 		}
 		if gotLayers == nil {
 			if errs == nil {
@@ -604,7 +620,7 @@ func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layer
 		if conn.match(layers, gotLayers) {
 			for i, s := range conn.layerStates {
 				if err := s.received(gotLayers[i]); err != nil {
-					conn.t.Fatal(err)
+					t.Fatalf("failed to update test connection's layer states based on received frame: %s", err)
 				}
 			}
 			return gotLayers, nil
@@ -615,8 +631,10 @@ func (conn *Connection) ExpectFrame(layers Layers, timeout time.Duration) (Layer
 
 // Drain drains the sniffer's receive buffer by receiving packets until there's
 // nothing else to receive.
-func (conn *Connection) Drain() {
-	conn.sniffer.Drain()
+func (conn *Connection) Drain(t *testing.T) {
+	t.Helper()
+
+	conn.sniffer.Drain(t)
 }
 
 // TCPIPv4 maintains the state for all the layers in a TCP/IPv4 connection.
@@ -624,6 +642,8 @@ type TCPIPv4 Connection
 
 // NewTCPIPv4 creates a new TCPIPv4 connection with reasonable defaults.
 func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 {
+	t.Helper()
+
 	etherState, err := newEtherState(Ether{}, Ether{})
 	if err != nil {
 		t.Fatalf("can't make etherState: %s", err)
@@ -649,57 +669,58 @@ func NewTCPIPv4(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv4 {
 		layerStates: []layerState{etherState, ipv4State, tcpState},
 		injector:    injector,
 		sniffer:     sniffer,
-		t:           t,
 	}
 }
 
 // Connect performs a TCP 3-way handshake. The input Connection should have a
 // final TCP Layer.
-func (conn *TCPIPv4) Connect() {
-	conn.t.Helper()
+func (conn *TCPIPv4) Connect(t *testing.T) {
+	t.Helper()
 
 	// Send the SYN.
-	conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn)})
+	conn.Send(t, TCP{Flags: Uint8(header.TCPFlagSyn)})
 
 	// Wait for the SYN-ACK.
-	synAck, err := conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
+	synAck, err := conn.Expect(t, TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
 	if err != nil {
-		conn.t.Fatalf("didn't get synack during handshake: %s", err)
+		t.Fatalf("didn't get synack during handshake: %s", err)
 	}
 	conn.layerStates[len(conn.layerStates)-1].(*tcpState).synAck = synAck
 
 	// Send an ACK.
-	conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)})
+	conn.Send(t, TCP{Flags: Uint8(header.TCPFlagAck)})
 }
 
 // ConnectWithOptions performs a TCP 3-way handshake with given TCP options.
 // The input Connection should have a final TCP Layer.
-func (conn *TCPIPv4) ConnectWithOptions(options []byte) {
-	conn.t.Helper()
+func (conn *TCPIPv4) ConnectWithOptions(t *testing.T, options []byte) {
+	t.Helper()
 
 	// Send the SYN.
-	conn.Send(TCP{Flags: Uint8(header.TCPFlagSyn), Options: options})
+	conn.Send(t, TCP{Flags: Uint8(header.TCPFlagSyn), Options: options})
 
 	// Wait for the SYN-ACK.
-	synAck, err := conn.Expect(TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
+	synAck, err := conn.Expect(t, TCP{Flags: Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
 	if err != nil {
-		conn.t.Fatalf("didn't get synack during handshake: %s", err)
+		t.Fatalf("didn't get synack during handshake: %s", err)
 	}
 	conn.layerStates[len(conn.layerStates)-1].(*tcpState).synAck = synAck
 
 	// Send an ACK.
-	conn.Send(TCP{Flags: Uint8(header.TCPFlagAck)})
+	conn.Send(t, TCP{Flags: Uint8(header.TCPFlagAck)})
 }
 
 // ExpectData is a convenient method that expects a Layer and the Layer after
 // it. If it doens't arrive in time, it returns nil.
-func (conn *TCPIPv4) ExpectData(tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) {
+func (conn *TCPIPv4) ExpectData(t *testing.T, tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) {
+	t.Helper()
+
 	expected := make([]Layer, len(conn.layerStates))
 	expected[len(expected)-1] = tcp
 	if payload != nil {
 		expected = append(expected, payload)
 	}
-	return (*Connection)(conn).ExpectFrame(expected, timeout)
+	return (*Connection)(conn).ExpectFrame(t, expected, timeout)
 }
 
 // ExpectNextData attempts to receive the next incoming segment for the
@@ -708,9 +729,11 @@ func (conn *TCPIPv4) ExpectData(tcp *TCP, payload *Payload, timeout time.Duratio
 // It differs from ExpectData() in that here we are only interested in the next
 // received segment, while ExpectData() can receive multiple segments for the
 // connection until there is a match with given layers or a timeout.
-func (conn *TCPIPv4) ExpectNextData(tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) {
+func (conn *TCPIPv4) ExpectNextData(t *testing.T, tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) {
+	t.Helper()
+
 	// Receive the first incoming TCP segment for this connection.
-	got, err := conn.ExpectData(&TCP{}, nil, timeout)
+	got, err := conn.ExpectData(t, &TCP{}, nil, timeout)
 	if err != nil {
 		return nil, err
 	}
@@ -719,7 +742,7 @@ func (conn *TCPIPv4) ExpectNextData(tcp *TCP, payload *Payload, timeout time.Dur
 	expected[len(expected)-1] = tcp
 	if payload != nil {
 		expected = append(expected, payload)
-		tcp.SeqNum = Uint32(uint32(*conn.RemoteSeqNum()) - uint32(payload.Length()))
+		tcp.SeqNum = Uint32(uint32(*conn.RemoteSeqNum(t)) - uint32(payload.Length()))
 	}
 	if !(*Connection)(conn).match(expected, got) {
 		return nil, fmt.Errorf("next frame is not matching %s during %s: got %s", expected, timeout, got)
@@ -729,71 +752,91 @@ func (conn *TCPIPv4) ExpectNextData(tcp *TCP, payload *Payload, timeout time.Dur
 
 // Send a packet with reasonable defaults. Potentially override the TCP layer in
 // the connection with the provided layer and add additionLayers.
-func (conn *TCPIPv4) Send(tcp TCP, additionalLayers ...Layer) {
-	(*Connection)(conn).send(Layers{&tcp}, additionalLayers...)
+func (conn *TCPIPv4) Send(t *testing.T, tcp TCP, additionalLayers ...Layer) {
+	t.Helper()
+
+	(*Connection)(conn).send(t, Layers{&tcp}, additionalLayers...)
 }
 
 // Close frees associated resources held by the TCPIPv4 connection.
-func (conn *TCPIPv4) Close() {
-	(*Connection)(conn).Close()
+func (conn *TCPIPv4) Close(t *testing.T) {
+	t.Helper()
+
+	(*Connection)(conn).Close(t)
 }
 
 // Expect expects a frame with the TCP layer matching the provided TCP within
 // the timeout specified. If it doesn't arrive in time, an error is returned.
-func (conn *TCPIPv4) Expect(tcp TCP, timeout time.Duration) (*TCP, error) {
-	layer, err := (*Connection)(conn).Expect(&tcp, timeout)
+func (conn *TCPIPv4) Expect(t *testing.T, tcp TCP, timeout time.Duration) (*TCP, error) {
+	t.Helper()
+
+	layer, err := (*Connection)(conn).Expect(t, &tcp, timeout)
 	if layer == nil {
 		return nil, err
 	}
 	gotTCP, ok := layer.(*TCP)
 	if !ok {
-		conn.t.Fatalf("expected %s to be TCP", layer)
+		t.Fatalf("expected %s to be TCP", layer)
 	}
 	return gotTCP, err
 }
 
-func (conn *TCPIPv4) tcpState() *tcpState {
+func (conn *TCPIPv4) tcpState(t *testing.T) *tcpState {
+	t.Helper()
+
 	state, ok := conn.layerStates[2].(*tcpState)
 	if !ok {
-		conn.t.Fatalf("got transport-layer state type=%T, expected tcpState", conn.layerStates[2])
+		t.Fatalf("got transport-layer state type=%T, expected tcpState", conn.layerStates[2])
 	}
 	return state
 }
 
-func (conn *TCPIPv4) ipv4State() *ipv4State {
+func (conn *TCPIPv4) ipv4State(t *testing.T) *ipv4State {
+	t.Helper()
+
 	state, ok := conn.layerStates[1].(*ipv4State)
 	if !ok {
-		conn.t.Fatalf("expected network-layer state type=%T, expected ipv4State", conn.layerStates[1])
+		t.Fatalf("expected network-layer state type=%T, expected ipv4State", conn.layerStates[1])
 	}
 	return state
 }
 
 // RemoteSeqNum returns the next expected sequence number from the DUT.
-func (conn *TCPIPv4) RemoteSeqNum() *seqnum.Value {
-	return conn.tcpState().remoteSeqNum
+func (conn *TCPIPv4) RemoteSeqNum(t *testing.T) *seqnum.Value {
+	t.Helper()
+
+	return conn.tcpState(t).remoteSeqNum
 }
 
 // LocalSeqNum returns the next sequence number to send from the testbench.
-func (conn *TCPIPv4) LocalSeqNum() *seqnum.Value {
-	return conn.tcpState().localSeqNum
+func (conn *TCPIPv4) LocalSeqNum(t *testing.T) *seqnum.Value {
+	t.Helper()
+
+	return conn.tcpState(t).localSeqNum
 }
 
 // SynAck returns the SynAck that was part of the handshake.
-func (conn *TCPIPv4) SynAck() *TCP {
-	return conn.tcpState().synAck
+func (conn *TCPIPv4) SynAck(t *testing.T) *TCP {
+	t.Helper()
+
+	return conn.tcpState(t).synAck
 }
 
 // LocalAddr gets the local socket address of this connection.
-func (conn *TCPIPv4) LocalAddr() *unix.SockaddrInet4 {
-	sa := &unix.SockaddrInet4{Port: int(*conn.tcpState().out.SrcPort)}
-	copy(sa.Addr[:], *conn.ipv4State().out.SrcAddr)
+func (conn *TCPIPv4) LocalAddr(t *testing.T) *unix.SockaddrInet4 {
+	t.Helper()
+
+	sa := &unix.SockaddrInet4{Port: int(*conn.tcpState(t).out.SrcPort)}
+	copy(sa.Addr[:], *conn.ipv4State(t).out.SrcAddr)
 	return sa
 }
 
 // Drain drains the sniffer's receive buffer by receiving packets until there's
 // nothing else to receive.
-func (conn *TCPIPv4) Drain() {
-	conn.sniffer.Drain()
+func (conn *TCPIPv4) Drain(t *testing.T) {
+	t.Helper()
+
+	conn.sniffer.Drain(t)
 }
 
 // IPv6Conn maintains the state for all the layers in a IPv6 connection.
@@ -801,6 +844,8 @@ type IPv6Conn Connection
 
 // NewIPv6Conn creates a new IPv6Conn connection with reasonable defaults.
 func NewIPv6Conn(t *testing.T, outgoingIPv6, incomingIPv6 IPv6) IPv6Conn {
+	t.Helper()
+
 	etherState, err := newEtherState(Ether{}, Ether{})
 	if err != nil {
 		t.Fatalf("can't make EtherState: %s", err)
@@ -823,25 +868,30 @@ func NewIPv6Conn(t *testing.T, outgoingIPv6, incomingIPv6 IPv6) IPv6Conn {
 		layerStates: []layerState{etherState, ipv6State},
 		injector:    injector,
 		sniffer:     sniffer,
-		t:           t,
 	}
 }
 
 // Send sends a frame with ipv6 overriding the IPv6 layer defaults and
 // additionalLayers added after it.
-func (conn *IPv6Conn) Send(ipv6 IPv6, additionalLayers ...Layer) {
-	(*Connection)(conn).send(Layers{&ipv6}, additionalLayers...)
+func (conn *IPv6Conn) Send(t *testing.T, ipv6 IPv6, additionalLayers ...Layer) {
+	t.Helper()
+
+	(*Connection)(conn).send(t, Layers{&ipv6}, additionalLayers...)
 }
 
 // Close to clean up any resources held.
-func (conn *IPv6Conn) Close() {
-	(*Connection)(conn).Close()
+func (conn *IPv6Conn) Close(t *testing.T) {
+	t.Helper()
+
+	(*Connection)(conn).Close(t)
 }
 
 // ExpectFrame expects a frame that matches the provided Layers within the
 // timeout specified. If it doesn't arrive in time, an error is returned.
-func (conn *IPv6Conn) ExpectFrame(frame Layers, timeout time.Duration) (Layers, error) {
-	return (*Connection)(conn).ExpectFrame(frame, timeout)
+func (conn *IPv6Conn) ExpectFrame(t *testing.T, frame Layers, timeout time.Duration) (Layers, error) {
+	t.Helper()
+
+	return (*Connection)(conn).ExpectFrame(t, frame, timeout)
 }
 
 // UDPIPv4 maintains the state for all the layers in a UDP/IPv4 connection.
@@ -849,6 +899,8 @@ type UDPIPv4 Connection
 
 // NewUDPIPv4 creates a new UDPIPv4 connection with reasonable defaults.
 func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 {
+	t.Helper()
+
 	etherState, err := newEtherState(Ether{}, Ether{})
 	if err != nil {
 		t.Fatalf("can't make etherState: %s", err)
@@ -874,81 +926,96 @@ func NewUDPIPv4(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv4 {
 		layerStates: []layerState{etherState, ipv4State, udpState},
 		injector:    injector,
 		sniffer:     sniffer,
-		t:           t,
 	}
 }
 
-func (conn *UDPIPv4) udpState() *udpState {
+func (conn *UDPIPv4) udpState(t *testing.T) *udpState {
+	t.Helper()
+
 	state, ok := conn.layerStates[2].(*udpState)
 	if !ok {
-		conn.t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2])
+		t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2])
 	}
 	return state
 }
 
-func (conn *UDPIPv4) ipv4State() *ipv4State {
+func (conn *UDPIPv4) ipv4State(t *testing.T) *ipv4State {
+	t.Helper()
+
 	state, ok := conn.layerStates[1].(*ipv4State)
 	if !ok {
-		conn.t.Fatalf("got network-layer state type=%T, expected ipv4State", conn.layerStates[1])
+		t.Fatalf("got network-layer state type=%T, expected ipv4State", conn.layerStates[1])
 	}
 	return state
 }
 
 // LocalAddr gets the local socket address of this connection.
-func (conn *UDPIPv4) LocalAddr() *unix.SockaddrInet4 {
-	sa := &unix.SockaddrInet4{Port: int(*conn.udpState().out.SrcPort)}
-	copy(sa.Addr[:], *conn.ipv4State().out.SrcAddr)
+func (conn *UDPIPv4) LocalAddr(t *testing.T) *unix.SockaddrInet4 {
+	t.Helper()
+
+	sa := &unix.SockaddrInet4{Port: int(*conn.udpState(t).out.SrcPort)}
+	copy(sa.Addr[:], *conn.ipv4State(t).out.SrcAddr)
 	return sa
 }
 
 // Send sends a packet with reasonable defaults, potentially overriding the UDP
 // layer and adding additionLayers.
-func (conn *UDPIPv4) Send(udp UDP, additionalLayers ...Layer) {
-	(*Connection)(conn).send(Layers{&udp}, additionalLayers...)
+func (conn *UDPIPv4) Send(t *testing.T, udp UDP, additionalLayers ...Layer) {
+	t.Helper()
+
+	(*Connection)(conn).send(t, Layers{&udp}, additionalLayers...)
 }
 
 // SendIP sends a packet with reasonable defaults, potentially overriding the
 // UDP and IPv4 headers and adding additionLayers.
-func (conn *UDPIPv4) SendIP(ip IPv4, udp UDP, additionalLayers ...Layer) {
-	(*Connection)(conn).send(Layers{&ip, &udp}, additionalLayers...)
+func (conn *UDPIPv4) SendIP(t *testing.T, ip IPv4, udp UDP, additionalLayers ...Layer) {
+	t.Helper()
+
+	(*Connection)(conn).send(t, Layers{&ip, &udp}, additionalLayers...)
 }
 
 // Expect expects a frame with the UDP layer matching the provided UDP within
 // the timeout specified. If it doesn't arrive in time, an error is returned.
-func (conn *UDPIPv4) Expect(udp UDP, timeout time.Duration) (*UDP, error) {
-	conn.t.Helper()
-	layer, err := (*Connection)(conn).Expect(&udp, timeout)
+func (conn *UDPIPv4) Expect(t *testing.T, udp UDP, timeout time.Duration) (*UDP, error) {
+	t.Helper()
+
+	layer, err := (*Connection)(conn).Expect(t, &udp, timeout)
 	if err != nil {
 		return nil, err
 	}
 	gotUDP, ok := layer.(*UDP)
 	if !ok {
-		conn.t.Fatalf("expected %s to be UDP", layer)
+		t.Fatalf("expected %s to be UDP", layer)
 	}
 	return gotUDP, nil
 }
 
 // ExpectData is a convenient method that expects a Layer and the Layer after
 // it. If it doens't arrive in time, it returns nil.
-func (conn *UDPIPv4) ExpectData(udp UDP, payload Payload, timeout time.Duration) (Layers, error) {
-	conn.t.Helper()
+func (conn *UDPIPv4) ExpectData(t *testing.T, udp UDP, payload Payload, timeout time.Duration) (Layers, error) {
+	t.Helper()
+
 	expected := make([]Layer, len(conn.layerStates))
 	expected[len(expected)-1] = &udp
 	if payload.length() != 0 {
 		expected = append(expected, &payload)
 	}
-	return (*Connection)(conn).ExpectFrame(expected, timeout)
+	return (*Connection)(conn).ExpectFrame(t, expected, timeout)
 }
 
 // Close frees associated resources held by the UDPIPv4 connection.
-func (conn *UDPIPv4) Close() {
-	(*Connection)(conn).Close()
+func (conn *UDPIPv4) Close(t *testing.T) {
+	t.Helper()
+
+	(*Connection)(conn).Close(t)
 }
 
 // Drain drains the sniffer's receive buffer by receiving packets until there's
 // nothing else to receive.
-func (conn *UDPIPv4) Drain() {
-	conn.sniffer.Drain()
+func (conn *UDPIPv4) Drain(t *testing.T) {
+	t.Helper()
+
+	conn.sniffer.Drain(t)
 }
 
 // UDPIPv6 maintains the state for all the layers in a UDP/IPv6 connection.
@@ -956,6 +1023,8 @@ type UDPIPv6 Connection
 
 // NewUDPIPv6 creates a new UDPIPv6 connection with reasonable defaults.
 func NewUDPIPv6(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv6 {
+	t.Helper()
+
 	etherState, err := newEtherState(Ether{}, Ether{})
 	if err != nil {
 		t.Fatalf("can't make etherState: %s", err)
@@ -980,84 +1049,157 @@ func NewUDPIPv6(t *testing.T, outgoingUDP, incomingUDP UDP) UDPIPv6 {
 		layerStates: []layerState{etherState, ipv6State, udpState},
 		injector:    injector,
 		sniffer:     sniffer,
-		t:           t,
 	}
 }
 
-func (conn *UDPIPv6) udpState() *udpState {
+func (conn *UDPIPv6) udpState(t *testing.T) *udpState {
+	t.Helper()
+
 	state, ok := conn.layerStates[2].(*udpState)
 	if !ok {
-		conn.t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2])
+		t.Fatalf("got transport-layer state type=%T, expected udpState", conn.layerStates[2])
 	}
 	return state
 }
 
-func (conn *UDPIPv6) ipv6State() *ipv6State {
+func (conn *UDPIPv6) ipv6State(t *testing.T) *ipv6State {
+	t.Helper()
+
 	state, ok := conn.layerStates[1].(*ipv6State)
 	if !ok {
-		conn.t.Fatalf("got network-layer state type=%T, expected ipv6State", conn.layerStates[1])
+		t.Fatalf("got network-layer state type=%T, expected ipv6State", conn.layerStates[1])
 	}
 	return state
 }
 
 // LocalAddr gets the local socket address of this connection.
-func (conn *UDPIPv6) LocalAddr() *unix.SockaddrInet6 {
+func (conn *UDPIPv6) LocalAddr(t *testing.T) *unix.SockaddrInet6 {
+	t.Helper()
+
 	sa := &unix.SockaddrInet6{
-		Port: int(*conn.udpState().out.SrcPort),
+		Port: int(*conn.udpState(t).out.SrcPort),
 		// Local address is in perspective to the remote host, so it's scoped to the
 		// ID of the remote interface.
 		ZoneId: uint32(RemoteInterfaceID),
 	}
-	copy(sa.Addr[:], *conn.ipv6State().out.SrcAddr)
+	copy(sa.Addr[:], *conn.ipv6State(t).out.SrcAddr)
 	return sa
 }
 
 // Send sends a packet with reasonable defaults, potentially overriding the UDP
 // layer and adding additionLayers.
-func (conn *UDPIPv6) Send(udp UDP, additionalLayers ...Layer) {
-	(*Connection)(conn).send(Layers{&udp}, additionalLayers...)
+func (conn *UDPIPv6) Send(t *testing.T, udp UDP, additionalLayers ...Layer) {
+	t.Helper()
+
+	(*Connection)(conn).send(t, Layers{&udp}, additionalLayers...)
 }
 
 // SendIPv6 sends a packet with reasonable defaults, potentially overriding the
 // UDP and IPv6 headers and adding additionLayers.
-func (conn *UDPIPv6) SendIPv6(ip IPv6, udp UDP, additionalLayers ...Layer) {
-	(*Connection)(conn).send(Layers{&ip, &udp}, additionalLayers...)
+func (conn *UDPIPv6) SendIPv6(t *testing.T, ip IPv6, udp UDP, additionalLayers ...Layer) {
+	t.Helper()
+
+	(*Connection)(conn).send(t, Layers{&ip, &udp}, additionalLayers...)
 }
 
 // Expect expects a frame with the UDP layer matching the provided UDP within
 // the timeout specified. If it doesn't arrive in time, an error is returned.
-func (conn *UDPIPv6) Expect(udp UDP, timeout time.Duration) (*UDP, error) {
-	conn.t.Helper()
-	layer, err := (*Connection)(conn).Expect(&udp, timeout)
+func (conn *UDPIPv6) Expect(t *testing.T, udp UDP, timeout time.Duration) (*UDP, error) {
+	t.Helper()
+
+	layer, err := (*Connection)(conn).Expect(t, &udp, timeout)
 	if err != nil {
 		return nil, err
 	}
 	gotUDP, ok := layer.(*UDP)
 	if !ok {
-		conn.t.Fatalf("expected %s to be UDP", layer)
+		t.Fatalf("expected %s to be UDP", layer)
 	}
 	return gotUDP, nil
 }
 
 // ExpectData is a convenient method that expects a Layer and the Layer after
 // it. If it doens't arrive in time, it returns nil.
-func (conn *UDPIPv6) ExpectData(udp UDP, payload Payload, timeout time.Duration) (Layers, error) {
-	conn.t.Helper()
+func (conn *UDPIPv6) ExpectData(t *testing.T, udp UDP, payload Payload, timeout time.Duration) (Layers, error) {
+	t.Helper()
+
 	expected := make([]Layer, len(conn.layerStates))
 	expected[len(expected)-1] = &udp
 	if payload.length() != 0 {
 		expected = append(expected, &payload)
 	}
-	return (*Connection)(conn).ExpectFrame(expected, timeout)
+	return (*Connection)(conn).ExpectFrame(t, expected, timeout)
 }
 
 // Close frees associated resources held by the UDPIPv6 connection.
-func (conn *UDPIPv6) Close() {
-	(*Connection)(conn).Close()
+func (conn *UDPIPv6) Close(t *testing.T) {
+	t.Helper()
+
+	(*Connection)(conn).Close(t)
 }
 
 // Drain drains the sniffer's receive buffer by receiving packets until there's
 // nothing else to receive.
-func (conn *UDPIPv6) Drain() {
-	conn.sniffer.Drain()
+func (conn *UDPIPv6) Drain(t *testing.T) {
+	t.Helper()
+
+	conn.sniffer.Drain(t)
+}
+
+// TCPIPv6 maintains the state for all the layers in a TCP/IPv6 connection.
+type TCPIPv6 Connection
+
+// NewTCPIPv6 creates a new TCPIPv6 connection with reasonable defaults.
+func NewTCPIPv6(t *testing.T, outgoingTCP, incomingTCP TCP) TCPIPv6 {
+	etherState, err := newEtherState(Ether{}, Ether{})
+	if err != nil {
+		t.Fatalf("can't make etherState: %s", err)
+	}
+	ipv6State, err := newIPv6State(IPv6{}, IPv6{})
+	if err != nil {
+		t.Fatalf("can't make ipv6State: %s", err)
+	}
+	tcpState, err := newTCPState(unix.AF_INET6, outgoingTCP, incomingTCP)
+	if err != nil {
+		t.Fatalf("can't make tcpState: %s", err)
+	}
+	injector, err := NewInjector(t)
+	if err != nil {
+		t.Fatalf("can't make injector: %s", err)
+	}
+	sniffer, err := NewSniffer(t)
+	if err != nil {
+		t.Fatalf("can't make sniffer: %s", err)
+	}
+
+	return TCPIPv6{
+		layerStates: []layerState{etherState, ipv6State, tcpState},
+		injector:    injector,
+		sniffer:     sniffer,
+	}
+}
+
+func (conn *TCPIPv6) SrcPort() uint16 {
+	state := conn.layerStates[2].(*tcpState)
+	return *state.out.SrcPort
+}
+
+// ExpectData is a convenient method that expects a Layer and the Layer after
+// it. If it doens't arrive in time, it returns nil.
+func (conn *TCPIPv6) ExpectData(t *testing.T, tcp *TCP, payload *Payload, timeout time.Duration) (Layers, error) {
+	t.Helper()
+
+	expected := make([]Layer, len(conn.layerStates))
+	expected[len(expected)-1] = tcp
+	if payload != nil {
+		expected = append(expected, payload)
+	}
+	return (*Connection)(conn).ExpectFrame(t, expected, timeout)
+}
+
+// Close frees associated resources held by the TCPIPv6 connection.
+func (conn *TCPIPv6) Close(t *testing.T) {
+	t.Helper()
+
+	(*Connection)(conn).Close(t)
 }
diff --git a/test/packetimpact/testbench/dut.go b/test/packetimpact/testbench/dut.go
index 51be13759..73c532e75 100644
--- a/test/packetimpact/testbench/dut.go
+++ b/test/packetimpact/testbench/dut.go
@@ -31,13 +31,14 @@ import (
 
 // DUT communicates with the DUT to force it to make POSIX calls.
 type DUT struct {
-	t           *testing.T
 	conn        *grpc.ClientConn
 	posixServer POSIXClient
 }
 
 // NewDUT creates a new connection with the DUT over gRPC.
 func NewDUT(t *testing.T) DUT {
+	t.Helper()
+
 	flag.Parse()
 	if err := genPseudoFlags(); err != nil {
 		t.Fatal("generating psuedo flags:", err)
@@ -50,7 +51,6 @@ func NewDUT(t *testing.T) DUT {
 	}
 	posixServer := NewPOSIXClient(conn)
 	return DUT{
-		t:           t,
 		conn:        conn,
 		posixServer: posixServer,
 	}
@@ -61,8 +61,9 @@ func (dut *DUT) TearDown() {
 	dut.conn.Close()
 }
 
-func (dut *DUT) sockaddrToProto(sa unix.Sockaddr) *pb.Sockaddr {
-	dut.t.Helper()
+func (dut *DUT) sockaddrToProto(t *testing.T, sa unix.Sockaddr) *pb.Sockaddr {
+	t.Helper()
+
 	switch s := sa.(type) {
 	case *unix.SockaddrInet4:
 		return &pb.Sockaddr{
@@ -87,12 +88,13 @@ func (dut *DUT) sockaddrToProto(sa unix.Sockaddr) *pb.Sockaddr {
 			},
 		}
 	}
-	dut.t.Fatalf("can't parse Sockaddr struct: %+v", sa)
+	t.Fatalf("can't parse Sockaddr struct: %+v", sa)
 	return nil
 }
 
-func (dut *DUT) protoToSockaddr(sa *pb.Sockaddr) unix.Sockaddr {
-	dut.t.Helper()
+func (dut *DUT) protoToSockaddr(t *testing.T, sa *pb.Sockaddr) unix.Sockaddr {
+	t.Helper()
+
 	switch s := sa.Sockaddr.(type) {
 	case *pb.Sockaddr_In:
 		ret := unix.SockaddrInet4{
@@ -108,31 +110,32 @@ func (dut *DUT) protoToSockaddr(sa *pb.Sockaddr) unix.Sockaddr {
 		copy(ret.Addr[:], s.In6.GetAddr())
 		return &ret
 	}
-	dut.t.Fatalf("can't parse Sockaddr proto: %+v", sa)
+	t.Fatalf("can't parse Sockaddr proto: %#v", sa)
 	return nil
 }
 
 // CreateBoundSocket makes a new socket on the DUT, with type typ and protocol
 // proto, and bound to the IP address addr. Returns the new file descriptor and
 // the port that was selected on the DUT.
-func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16) {
-	dut.t.Helper()
+func (dut *DUT) CreateBoundSocket(t *testing.T, typ, proto int32, addr net.IP) (int32, uint16) {
+	t.Helper()
+
 	var fd int32
 	if addr.To4() != nil {
-		fd = dut.Socket(unix.AF_INET, typ, proto)
+		fd = dut.Socket(t, unix.AF_INET, typ, proto)
 		sa := unix.SockaddrInet4{}
 		copy(sa.Addr[:], addr.To4())
-		dut.Bind(fd, &sa)
+		dut.Bind(t, fd, &sa)
 	} else if addr.To16() != nil {
-		fd = dut.Socket(unix.AF_INET6, typ, proto)
+		fd = dut.Socket(t, unix.AF_INET6, typ, proto)
 		sa := unix.SockaddrInet6{}
 		copy(sa.Addr[:], addr.To16())
 		sa.ZoneId = uint32(RemoteInterfaceID)
-		dut.Bind(fd, &sa)
+		dut.Bind(t, fd, &sa)
 	} else {
-		dut.t.Fatalf("unknown ip addr type for remoteIP")
+		t.Fatalf("invalid IP address: %s", addr)
 	}
-	sa := dut.GetSockName(fd)
+	sa := dut.GetSockName(t, fd)
 	var port int
 	switch s := sa.(type) {
 	case *unix.SockaddrInet4:
@@ -140,15 +143,17 @@ func (dut *DUT) CreateBoundSocket(typ, proto int32, addr net.IP) (int32, uint16)
 	case *unix.SockaddrInet6:
 		port = s.Port
 	default:
-		dut.t.Fatalf("unknown sockaddr type from getsockname: %t", sa)
+		t.Fatalf("unknown sockaddr type from getsockname: %T", sa)
 	}
 	return fd, uint16(port)
 }
 
 // CreateListener makes a new TCP connection. If it fails, the test ends.
-func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) {
-	fd, remotePort := dut.CreateBoundSocket(typ, proto, net.ParseIP(RemoteIPv4))
-	dut.Listen(fd, backlog)
+func (dut *DUT) CreateListener(t *testing.T, typ, proto, backlog int32) (int32, uint16) {
+	t.Helper()
+
+	fd, remotePort := dut.CreateBoundSocket(t, typ, proto, net.ParseIP(RemoteIPv4))
+	dut.Listen(t, fd, backlog)
 	return fd, remotePort
 }
 
@@ -158,53 +163,57 @@ func (dut *DUT) CreateListener(typ, proto, backlog int32) (int32, uint16) {
 // Accept calls accept on the DUT and causes a fatal test failure if it doesn't
 // succeed. If more control over the timeout or error handling is needed, use
 // AcceptWithErrno.
-func (dut *DUT) Accept(sockfd int32) (int32, unix.Sockaddr) {
-	dut.t.Helper()
+func (dut *DUT) Accept(t *testing.T, sockfd int32) (int32, unix.Sockaddr) {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	fd, sa, err := dut.AcceptWithErrno(ctx, sockfd)
+	fd, sa, err := dut.AcceptWithErrno(ctx, t, sockfd)
 	if fd < 0 {
-		dut.t.Fatalf("failed to accept: %s", err)
+		t.Fatalf("failed to accept: %s", err)
 	}
 	return fd, sa
 }
 
 // AcceptWithErrno calls accept on the DUT.
-func (dut *DUT) AcceptWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) {
-	dut.t.Helper()
+func (dut *DUT) AcceptWithErrno(ctx context.Context, t *testing.T, sockfd int32) (int32, unix.Sockaddr, error) {
+	t.Helper()
+
 	req := pb.AcceptRequest{
 		Sockfd: sockfd,
 	}
 	resp, err := dut.posixServer.Accept(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Accept: %s", err)
+		t.Fatalf("failed to call Accept: %s", err)
 	}
-	return resp.GetFd(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_())
+	return resp.GetFd(), dut.protoToSockaddr(t, resp.GetAddr()), syscall.Errno(resp.GetErrno_())
 }
 
 // Bind calls bind on the DUT and causes a fatal test failure if it doesn't
 // succeed. If more control over the timeout or error handling is
 // needed, use BindWithErrno.
-func (dut *DUT) Bind(fd int32, sa unix.Sockaddr) {
-	dut.t.Helper()
+func (dut *DUT) Bind(t *testing.T, fd int32, sa unix.Sockaddr) {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.BindWithErrno(ctx, fd, sa)
+	ret, err := dut.BindWithErrno(ctx, t, fd, sa)
 	if ret != 0 {
-		dut.t.Fatalf("failed to bind socket: %s", err)
+		t.Fatalf("failed to bind socket: %s", err)
 	}
 }
 
 // BindWithErrno calls bind on the DUT.
-func (dut *DUT) BindWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) BindWithErrno(ctx context.Context, t *testing.T, fd int32, sa unix.Sockaddr) (int32, error) {
+	t.Helper()
+
 	req := pb.BindRequest{
 		Sockfd: fd,
-		Addr:   dut.sockaddrToProto(sa),
+		Addr:   dut.sockaddrToProto(t, sa),
 	}
 	resp, err := dut.posixServer.Bind(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Bind: %s", err)
+		t.Fatalf("failed to call Bind: %s", err)
 	}
 	return resp.GetRet(), syscall.Errno(resp.GetErrno_())
 }
@@ -212,25 +221,27 @@ func (dut *DUT) BindWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) (
 // Close calls close on the DUT and causes a fatal test failure if it doesn't
 // succeed. If more control over the timeout or error handling is needed, use
 // CloseWithErrno.
-func (dut *DUT) Close(fd int32) {
-	dut.t.Helper()
+func (dut *DUT) Close(t *testing.T, fd int32) {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.CloseWithErrno(ctx, fd)
+	ret, err := dut.CloseWithErrno(ctx, t, fd)
 	if ret != 0 {
-		dut.t.Fatalf("failed to close: %s", err)
+		t.Fatalf("failed to close: %s", err)
 	}
 }
 
 // CloseWithErrno calls close on the DUT.
-func (dut *DUT) CloseWithErrno(ctx context.Context, fd int32) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) CloseWithErrno(ctx context.Context, t *testing.T, fd int32) (int32, error) {
+	t.Helper()
+
 	req := pb.CloseRequest{
 		Fd: fd,
 	}
 	resp, err := dut.posixServer.Close(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Close: %s", err)
+		t.Fatalf("failed to call Close: %s", err)
 	}
 	return resp.GetRet(), syscall.Errno(resp.GetErrno_())
 }
@@ -238,28 +249,30 @@ func (dut *DUT) CloseWithErrno(ctx context.Context, fd int32) (int32, error) {
 // Connect calls connect on the DUT and causes a fatal test failure if it
 // doesn't succeed. If more control over the timeout or error handling is
 // needed, use ConnectWithErrno.
-func (dut *DUT) Connect(fd int32, sa unix.Sockaddr) {
-	dut.t.Helper()
+func (dut *DUT) Connect(t *testing.T, fd int32, sa unix.Sockaddr) {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.ConnectWithErrno(ctx, fd, sa)
+	ret, err := dut.ConnectWithErrno(ctx, t, fd, sa)
 	// Ignore 'operation in progress' error that can be returned when the socket
 	// is non-blocking.
 	if err != syscall.Errno(unix.EINPROGRESS) && ret != 0 {
-		dut.t.Fatalf("failed to connect socket: %s", err)
+		t.Fatalf("failed to connect socket: %s", err)
 	}
 }
 
 // ConnectWithErrno calls bind on the DUT.
-func (dut *DUT) ConnectWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) ConnectWithErrno(ctx context.Context, t *testing.T, fd int32, sa unix.Sockaddr) (int32, error) {
+	t.Helper()
+
 	req := pb.ConnectRequest{
 		Sockfd: fd,
-		Addr:   dut.sockaddrToProto(sa),
+		Addr:   dut.sockaddrToProto(t, sa),
 	}
 	resp, err := dut.posixServer.Connect(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Connect: %s", err)
+		t.Fatalf("failed to call Connect: %s", err)
 	}
 	return resp.GetRet(), syscall.Errno(resp.GetErrno_())
 }
@@ -267,20 +280,22 @@ func (dut *DUT) ConnectWithErrno(ctx context.Context, fd int32, sa unix.Sockaddr
 // Fcntl calls fcntl on the DUT and causes a fatal test failure if it
 // doesn't succeed. If more control over the timeout or error handling is
 // needed, use FcntlWithErrno.
-func (dut *DUT) Fcntl(fd, cmd, arg int32) int32 {
-	dut.t.Helper()
+func (dut *DUT) Fcntl(t *testing.T, fd, cmd, arg int32) int32 {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.FcntlWithErrno(ctx, fd, cmd, arg)
+	ret, err := dut.FcntlWithErrno(ctx, t, fd, cmd, arg)
 	if ret == -1 {
-		dut.t.Fatalf("failed to Fcntl: ret=%d, errno=%s", ret, err)
+		t.Fatalf("failed to Fcntl: ret=%d, errno=%s", ret, err)
 	}
 	return ret
 }
 
 // FcntlWithErrno calls fcntl on the DUT.
-func (dut *DUT) FcntlWithErrno(ctx context.Context, fd, cmd, arg int32) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) FcntlWithErrno(ctx context.Context, t *testing.T, fd, cmd, arg int32) (int32, error) {
+	t.Helper()
+
 	req := pb.FcntlRequest{
 		Fd:  fd,
 		Cmd: cmd,
@@ -288,7 +303,7 @@ func (dut *DUT) FcntlWithErrno(ctx context.Context, fd, cmd, arg int32) (int32,
 	}
 	resp, err := dut.posixServer.Fcntl(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Fcntl: %s", err)
+		t.Fatalf("failed to call Fcntl: %s", err)
 	}
 	return resp.GetRet(), syscall.Errno(resp.GetErrno_())
 }
@@ -296,32 +311,35 @@ func (dut *DUT) FcntlWithErrno(ctx context.Context, fd, cmd, arg int32) (int32,
 // GetSockName calls getsockname on the DUT and causes a fatal test failure if
 // it doesn't succeed. If more control over the timeout or error handling is
 // needed, use GetSockNameWithErrno.
-func (dut *DUT) GetSockName(sockfd int32) unix.Sockaddr {
-	dut.t.Helper()
+func (dut *DUT) GetSockName(t *testing.T, sockfd int32) unix.Sockaddr {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, sa, err := dut.GetSockNameWithErrno(ctx, sockfd)
+	ret, sa, err := dut.GetSockNameWithErrno(ctx, t, sockfd)
 	if ret != 0 {
-		dut.t.Fatalf("failed to getsockname: %s", err)
+		t.Fatalf("failed to getsockname: %s", err)
 	}
 	return sa
 }
 
 // GetSockNameWithErrno calls getsockname on the DUT.
-func (dut *DUT) GetSockNameWithErrno(ctx context.Context, sockfd int32) (int32, unix.Sockaddr, error) {
-	dut.t.Helper()
+func (dut *DUT) GetSockNameWithErrno(ctx context.Context, t *testing.T, sockfd int32) (int32, unix.Sockaddr, error) {
+	t.Helper()
+
 	req := pb.GetSockNameRequest{
 		Sockfd: sockfd,
 	}
 	resp, err := dut.posixServer.GetSockName(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Bind: %s", err)
+		t.Fatalf("failed to call Bind: %s", err)
 	}
-	return resp.GetRet(), dut.protoToSockaddr(resp.GetAddr()), syscall.Errno(resp.GetErrno_())
+	return resp.GetRet(), dut.protoToSockaddr(t, resp.GetAddr()), syscall.Errno(resp.GetErrno_())
 }
 
-func (dut *DUT) getSockOpt(ctx context.Context, sockfd, level, optname, optlen int32, typ pb.GetSockOptRequest_SockOptType) (int32, *pb.SockOptVal, error) {
-	dut.t.Helper()
+func (dut *DUT) getSockOpt(ctx context.Context, t *testing.T, sockfd, level, optname, optlen int32, typ pb.GetSockOptRequest_SockOptType) (int32, *pb.SockOptVal, error) {
+	t.Helper()
+
 	req := pb.GetSockOptRequest{
 		Sockfd:  sockfd,
 		Level:   level,
@@ -331,11 +349,11 @@ func (dut *DUT) getSockOpt(ctx context.Context, sockfd, level, optname, optlen i
 	}
 	resp, err := dut.posixServer.GetSockOpt(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call GetSockOpt: %s", err)
+		t.Fatalf("failed to call GetSockOpt: %s", err)
 	}
 	optval := resp.GetOptval()
 	if optval == nil {
-		dut.t.Fatalf("GetSockOpt response does not contain a value")
+		t.Fatalf("GetSockOpt response does not contain a value")
 	}
 	return resp.GetRet(), optval, syscall.Errno(resp.GetErrno_())
 }
@@ -345,13 +363,14 @@ func (dut *DUT) getSockOpt(ctx context.Context, sockfd, level, optname, optlen i
 // needed, use GetSockOptWithErrno. Because endianess and the width of values
 // might differ between the testbench and DUT architectures, prefer to use a
 // more specific GetSockOptXxx function.
-func (dut *DUT) GetSockOpt(sockfd, level, optname, optlen int32) []byte {
-	dut.t.Helper()
+func (dut *DUT) GetSockOpt(t *testing.T, sockfd, level, optname, optlen int32) []byte {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, optval, err := dut.GetSockOptWithErrno(ctx, sockfd, level, optname, optlen)
+	ret, optval, err := dut.GetSockOptWithErrno(ctx, t, sockfd, level, optname, optlen)
 	if ret != 0 {
-		dut.t.Fatalf("failed to GetSockOpt: %s", err)
+		t.Fatalf("failed to GetSockOpt: %s", err)
 	}
 	return optval
 }
@@ -359,12 +378,13 @@ func (dut *DUT) GetSockOpt(sockfd, level, optname, optlen int32) []byte {
 // GetSockOptWithErrno calls getsockopt on the DUT. Because endianess and the
 // width of values might differ between the testbench and DUT architectures,
 // prefer to use a more specific GetSockOptXxxWithErrno function.
-func (dut *DUT) GetSockOptWithErrno(ctx context.Context, sockfd, level, optname, optlen int32) (int32, []byte, error) {
-	dut.t.Helper()
-	ret, optval, errno := dut.getSockOpt(ctx, sockfd, level, optname, optlen, pb.GetSockOptRequest_BYTES)
+func (dut *DUT) GetSockOptWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname, optlen int32) (int32, []byte, error) {
+	t.Helper()
+
+	ret, optval, errno := dut.getSockOpt(ctx, t, sockfd, level, optname, optlen, pb.GetSockOptRequest_BYTES)
 	bytesval, ok := optval.Val.(*pb.SockOptVal_Bytesval)
 	if !ok {
-		dut.t.Fatalf("GetSockOpt got value type: %T, want bytes", optval)
+		t.Fatalf("GetSockOpt got value type: %T, want bytes", optval.Val)
 	}
 	return ret, bytesval.Bytesval, errno
 }
@@ -372,24 +392,26 @@ func (dut *DUT) GetSockOptWithErrno(ctx context.Context, sockfd, level, optname,
 // GetSockOptInt calls getsockopt on the DUT and causes a fatal test failure
 // if it doesn't succeed. If more control over the int optval or error handling
 // is needed, use GetSockOptIntWithErrno.
-func (dut *DUT) GetSockOptInt(sockfd, level, optname int32) int32 {
-	dut.t.Helper()
+func (dut *DUT) GetSockOptInt(t *testing.T, sockfd, level, optname int32) int32 {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, intval, err := dut.GetSockOptIntWithErrno(ctx, sockfd, level, optname)
+	ret, intval, err := dut.GetSockOptIntWithErrno(ctx, t, sockfd, level, optname)
 	if ret != 0 {
-		dut.t.Fatalf("failed to GetSockOptInt: %s", err)
+		t.Fatalf("failed to GetSockOptInt: %s", err)
 	}
 	return intval
 }
 
 // GetSockOptIntWithErrno calls getsockopt with an integer optval.
-func (dut *DUT) GetSockOptIntWithErrno(ctx context.Context, sockfd, level, optname int32) (int32, int32, error) {
-	dut.t.Helper()
-	ret, optval, errno := dut.getSockOpt(ctx, sockfd, level, optname, 0, pb.GetSockOptRequest_INT)
+func (dut *DUT) GetSockOptIntWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname int32) (int32, int32, error) {
+	t.Helper()
+
+	ret, optval, errno := dut.getSockOpt(ctx, t, sockfd, level, optname, 0, pb.GetSockOptRequest_INT)
 	intval, ok := optval.Val.(*pb.SockOptVal_Intval)
 	if !ok {
-		dut.t.Fatalf("GetSockOpt got value type: %T, want int", optval)
+		t.Fatalf("GetSockOpt got value type: %T, want int", optval.Val)
 	}
 	return ret, intval.Intval, errno
 }
@@ -397,24 +419,26 @@ func (dut *DUT) GetSockOptIntWithErrno(ctx context.Context, sockfd, level, optna
 // GetSockOptTimeval calls getsockopt on the DUT and causes a fatal test failure
 // if it doesn't succeed. If more control over the timeout or error handling is
 // needed, use GetSockOptTimevalWithErrno.
-func (dut *DUT) GetSockOptTimeval(sockfd, level, optname int32) unix.Timeval {
-	dut.t.Helper()
+func (dut *DUT) GetSockOptTimeval(t *testing.T, sockfd, level, optname int32) unix.Timeval {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, timeval, err := dut.GetSockOptTimevalWithErrno(ctx, sockfd, level, optname)
+	ret, timeval, err := dut.GetSockOptTimevalWithErrno(ctx, t, sockfd, level, optname)
 	if ret != 0 {
-		dut.t.Fatalf("failed to GetSockOptTimeval: %s", err)
+		t.Fatalf("failed to GetSockOptTimeval: %s", err)
 	}
 	return timeval
 }
 
 // GetSockOptTimevalWithErrno calls getsockopt and returns a timeval.
-func (dut *DUT) GetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, optname int32) (int32, unix.Timeval, error) {
-	dut.t.Helper()
-	ret, optval, errno := dut.getSockOpt(ctx, sockfd, level, optname, 0, pb.GetSockOptRequest_TIME)
+func (dut *DUT) GetSockOptTimevalWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname int32) (int32, unix.Timeval, error) {
+	t.Helper()
+
+	ret, optval, errno := dut.getSockOpt(ctx, t, sockfd, level, optname, 0, pb.GetSockOptRequest_TIME)
 	tv, ok := optval.Val.(*pb.SockOptVal_Timeval)
 	if !ok {
-		dut.t.Fatalf("GetSockOpt got value type: %T, want timeval", optval)
+		t.Fatalf("GetSockOpt got value type: %T, want timeval", optval.Val)
 	}
 	timeval := unix.Timeval{
 		Sec:  tv.Timeval.Seconds,
@@ -426,26 +450,28 @@ func (dut *DUT) GetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, o
 // Listen calls listen on the DUT and causes a fatal test failure if it doesn't
 // succeed. If more control over the timeout or error handling is needed, use
 // ListenWithErrno.
-func (dut *DUT) Listen(sockfd, backlog int32) {
-	dut.t.Helper()
+func (dut *DUT) Listen(t *testing.T, sockfd, backlog int32) {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.ListenWithErrno(ctx, sockfd, backlog)
+	ret, err := dut.ListenWithErrno(ctx, t, sockfd, backlog)
 	if ret != 0 {
-		dut.t.Fatalf("failed to listen: %s", err)
+		t.Fatalf("failed to listen: %s", err)
 	}
 }
 
 // ListenWithErrno calls listen on the DUT.
-func (dut *DUT) ListenWithErrno(ctx context.Context, sockfd, backlog int32) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) ListenWithErrno(ctx context.Context, t *testing.T, sockfd, backlog int32) (int32, error) {
+	t.Helper()
+
 	req := pb.ListenRequest{
 		Sockfd:  sockfd,
 		Backlog: backlog,
 	}
 	resp, err := dut.posixServer.Listen(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Listen: %s", err)
+		t.Fatalf("failed to call Listen: %s", err)
 	}
 	return resp.GetRet(), syscall.Errno(resp.GetErrno_())
 }
@@ -453,20 +479,22 @@ func (dut *DUT) ListenWithErrno(ctx context.Context, sockfd, backlog int32) (int
 // Send calls send on the DUT and causes a fatal test failure if it doesn't
 // succeed. If more control over the timeout or error handling is needed, use
 // SendWithErrno.
-func (dut *DUT) Send(sockfd int32, buf []byte, flags int32) int32 {
-	dut.t.Helper()
+func (dut *DUT) Send(t *testing.T, sockfd int32, buf []byte, flags int32) int32 {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.SendWithErrno(ctx, sockfd, buf, flags)
+	ret, err := dut.SendWithErrno(ctx, t, sockfd, buf, flags)
 	if ret == -1 {
-		dut.t.Fatalf("failed to send: %s", err)
+		t.Fatalf("failed to send: %s", err)
 	}
 	return ret
 }
 
 // SendWithErrno calls send on the DUT.
-func (dut *DUT) SendWithErrno(ctx context.Context, sockfd int32, buf []byte, flags int32) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) SendWithErrno(ctx context.Context, t *testing.T, sockfd int32, buf []byte, flags int32) (int32, error) {
+	t.Helper()
+
 	req := pb.SendRequest{
 		Sockfd: sockfd,
 		Buf:    buf,
@@ -474,7 +502,7 @@ func (dut *DUT) SendWithErrno(ctx context.Context, sockfd int32, buf []byte, fla
 	}
 	resp, err := dut.posixServer.Send(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Send: %s", err)
+		t.Fatalf("failed to call Send: %s", err)
 	}
 	return resp.GetRet(), syscall.Errno(resp.GetErrno_())
 }
@@ -482,48 +510,52 @@ func (dut *DUT) SendWithErrno(ctx context.Context, sockfd int32, buf []byte, fla
 // SendTo calls sendto on the DUT and causes a fatal test failure if it doesn't
 // succeed. If more control over the timeout or error handling is needed, use
 // SendToWithErrno.
-func (dut *DUT) SendTo(sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) int32 {
-	dut.t.Helper()
+func (dut *DUT) SendTo(t *testing.T, sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) int32 {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.SendToWithErrno(ctx, sockfd, buf, flags, destAddr)
+	ret, err := dut.SendToWithErrno(ctx, t, sockfd, buf, flags, destAddr)
 	if ret == -1 {
-		dut.t.Fatalf("failed to sendto: %s", err)
+		t.Fatalf("failed to sendto: %s", err)
 	}
 	return ret
 }
 
 // SendToWithErrno calls sendto on the DUT.
-func (dut *DUT) SendToWithErrno(ctx context.Context, sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) SendToWithErrno(ctx context.Context, t *testing.T, sockfd int32, buf []byte, flags int32, destAddr unix.Sockaddr) (int32, error) {
+	t.Helper()
+
 	req := pb.SendToRequest{
 		Sockfd:   sockfd,
 		Buf:      buf,
 		Flags:    flags,
-		DestAddr: dut.sockaddrToProto(destAddr),
+		DestAddr: dut.sockaddrToProto(t, destAddr),
 	}
 	resp, err := dut.posixServer.SendTo(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("faled to call SendTo: %s", err)
+		t.Fatalf("faled to call SendTo: %s", err)
 	}
 	return resp.GetRet(), syscall.Errno(resp.GetErrno_())
 }
 
 // SetNonBlocking will set O_NONBLOCK flag for fd if nonblocking
 // is true, otherwise it will clear the flag.
-func (dut *DUT) SetNonBlocking(fd int32, nonblocking bool) {
-	dut.t.Helper()
-	flags := dut.Fcntl(fd, unix.F_GETFL, 0)
+func (dut *DUT) SetNonBlocking(t *testing.T, fd int32, nonblocking bool) {
+	t.Helper()
+
+	flags := dut.Fcntl(t, fd, unix.F_GETFL, 0)
 	if nonblocking {
 		flags |= unix.O_NONBLOCK
 	} else {
 		flags &= ^unix.O_NONBLOCK
 	}
-	dut.Fcntl(fd, unix.F_SETFL, flags)
+	dut.Fcntl(t, fd, unix.F_SETFL, flags)
 }
 
-func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, optval *pb.SockOptVal) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) setSockOpt(ctx context.Context, t *testing.T, sockfd, level, optname int32, optval *pb.SockOptVal) (int32, error) {
+	t.Helper()
+
 	req := pb.SetSockOptRequest{
 		Sockfd:  sockfd,
 		Level:   level,
@@ -532,7 +564,7 @@ func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, op
 	}
 	resp, err := dut.posixServer.SetSockOpt(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call SetSockOpt: %s", err)
+		t.Fatalf("failed to call SetSockOpt: %s", err)
 	}
 	return resp.GetRet(), syscall.Errno(resp.GetErrno_())
 }
@@ -542,81 +574,89 @@ func (dut *DUT) setSockOpt(ctx context.Context, sockfd, level, optname int32, op
 // needed, use SetSockOptWithErrno. Because endianess and the width of values
 // might differ between the testbench and DUT architectures, prefer to use a
 // more specific SetSockOptXxx function.
-func (dut *DUT) SetSockOpt(sockfd, level, optname int32, optval []byte) {
-	dut.t.Helper()
+func (dut *DUT) SetSockOpt(t *testing.T, sockfd, level, optname int32, optval []byte) {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.SetSockOptWithErrno(ctx, sockfd, level, optname, optval)
+	ret, err := dut.SetSockOptWithErrno(ctx, t, sockfd, level, optname, optval)
 	if ret != 0 {
-		dut.t.Fatalf("failed to SetSockOpt: %s", err)
+		t.Fatalf("failed to SetSockOpt: %s", err)
 	}
 }
 
 // SetSockOptWithErrno calls setsockopt on the DUT. Because endianess and the
 // width of values might differ between the testbench and DUT architectures,
 // prefer to use a more specific SetSockOptXxxWithErrno function.
-func (dut *DUT) SetSockOptWithErrno(ctx context.Context, sockfd, level, optname int32, optval []byte) (int32, error) {
-	dut.t.Helper()
-	return dut.setSockOpt(ctx, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Bytesval{optval}})
+func (dut *DUT) SetSockOptWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname int32, optval []byte) (int32, error) {
+	t.Helper()
+
+	return dut.setSockOpt(ctx, t, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Bytesval{optval}})
 }
 
 // SetSockOptInt calls setsockopt on the DUT and causes a fatal test failure
 // if it doesn't succeed. If more control over the int optval or error handling
 // is needed, use SetSockOptIntWithErrno.
-func (dut *DUT) SetSockOptInt(sockfd, level, optname, optval int32) {
-	dut.t.Helper()
+func (dut *DUT) SetSockOptInt(t *testing.T, sockfd, level, optname, optval int32) {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.SetSockOptIntWithErrno(ctx, sockfd, level, optname, optval)
+	ret, err := dut.SetSockOptIntWithErrno(ctx, t, sockfd, level, optname, optval)
 	if ret != 0 {
-		dut.t.Fatalf("failed to SetSockOptInt: %s", err)
+		t.Fatalf("failed to SetSockOptInt: %s", err)
 	}
 }
 
 // SetSockOptIntWithErrno calls setsockopt with an integer optval.
-func (dut *DUT) SetSockOptIntWithErrno(ctx context.Context, sockfd, level, optname, optval int32) (int32, error) {
-	dut.t.Helper()
-	return dut.setSockOpt(ctx, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Intval{optval}})
+func (dut *DUT) SetSockOptIntWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname, optval int32) (int32, error) {
+	t.Helper()
+
+	return dut.setSockOpt(ctx, t, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Intval{optval}})
 }
 
 // SetSockOptTimeval calls setsockopt on the DUT and causes a fatal test failure
 // if it doesn't succeed. If more control over the timeout or error handling is
 // needed, use SetSockOptTimevalWithErrno.
-func (dut *DUT) SetSockOptTimeval(sockfd, level, optname int32, tv *unix.Timeval) {
-	dut.t.Helper()
+func (dut *DUT) SetSockOptTimeval(t *testing.T, sockfd, level, optname int32, tv *unix.Timeval) {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, err := dut.SetSockOptTimevalWithErrno(ctx, sockfd, level, optname, tv)
+	ret, err := dut.SetSockOptTimevalWithErrno(ctx, t, sockfd, level, optname, tv)
 	if ret != 0 {
-		dut.t.Fatalf("failed to SetSockOptTimeval: %s", err)
+		t.Fatalf("failed to SetSockOptTimeval: %s", err)
 	}
 }
 
 // SetSockOptTimevalWithErrno calls setsockopt with the timeval converted to
 // bytes.
-func (dut *DUT) SetSockOptTimevalWithErrno(ctx context.Context, sockfd, level, optname int32, tv *unix.Timeval) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) SetSockOptTimevalWithErrno(ctx context.Context, t *testing.T, sockfd, level, optname int32, tv *unix.Timeval) (int32, error) {
+	t.Helper()
+
 	timeval := pb.Timeval{
 		Seconds:      int64(tv.Sec),
 		Microseconds: int64(tv.Usec),
 	}
-	return dut.setSockOpt(ctx, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Timeval{&timeval}})
+	return dut.setSockOpt(ctx, t, sockfd, level, optname, &pb.SockOptVal{Val: &pb.SockOptVal_Timeval{&timeval}})
 }
 
 // Socket calls socket on the DUT and returns the file descriptor. If socket
 // fails on the DUT, the test ends.
-func (dut *DUT) Socket(domain, typ, proto int32) int32 {
-	dut.t.Helper()
-	fd, err := dut.SocketWithErrno(domain, typ, proto)
+func (dut *DUT) Socket(t *testing.T, domain, typ, proto int32) int32 {
+	t.Helper()
+
+	fd, err := dut.SocketWithErrno(t, domain, typ, proto)
 	if fd < 0 {
-		dut.t.Fatalf("failed to create socket: %s", err)
+		t.Fatalf("failed to create socket: %s", err)
 	}
 	return fd
 }
 
 // SocketWithErrno calls socket on the DUT and returns the fd and errno.
-func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) {
-	dut.t.Helper()
+func (dut *DUT) SocketWithErrno(t *testing.T, domain, typ, proto int32) (int32, error) {
+	t.Helper()
+
 	req := pb.SocketRequest{
 		Domain:   domain,
 		Type:     typ,
@@ -625,7 +665,7 @@ func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) {
 	ctx := context.Background()
 	resp, err := dut.posixServer.Socket(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Socket: %s", err)
+		t.Fatalf("failed to call Socket: %s", err)
 	}
 	return resp.GetFd(), syscall.Errno(resp.GetErrno_())
 }
@@ -633,20 +673,22 @@ func (dut *DUT) SocketWithErrno(domain, typ, proto int32) (int32, error) {
 // Recv calls recv on the DUT and causes a fatal test failure if it doesn't
 // succeed. If more control over the timeout or error handling is needed, use
 // RecvWithErrno.
-func (dut *DUT) Recv(sockfd, len, flags int32) []byte {
-	dut.t.Helper()
+func (dut *DUT) Recv(t *testing.T, sockfd, len, flags int32) []byte {
+	t.Helper()
+
 	ctx, cancel := context.WithTimeout(context.Background(), RPCTimeout)
 	defer cancel()
-	ret, buf, err := dut.RecvWithErrno(ctx, sockfd, len, flags)
+	ret, buf, err := dut.RecvWithErrno(ctx, t, sockfd, len, flags)
 	if ret == -1 {
-		dut.t.Fatalf("failed to recv: %s", err)
+		t.Fatalf("failed to recv: %s", err)
 	}
 	return buf
 }
 
 // RecvWithErrno calls recv on the DUT.
-func (dut *DUT) RecvWithErrno(ctx context.Context, sockfd, len, flags int32) (int32, []byte, error) {
-	dut.t.Helper()
+func (dut *DUT) RecvWithErrno(ctx context.Context, t *testing.T, sockfd, len, flags int32) (int32, []byte, error) {
+	t.Helper()
+
 	req := pb.RecvRequest{
 		Sockfd: sockfd,
 		Len:    len,
@@ -654,7 +696,7 @@ func (dut *DUT) RecvWithErrno(ctx context.Context, sockfd, len, flags int32) (in
 	}
 	resp, err := dut.posixServer.Recv(ctx, &req)
 	if err != nil {
-		dut.t.Fatalf("failed to call Recv: %s", err)
+		t.Fatalf("failed to call Recv: %s", err)
 	}
 	return resp.GetRet(), resp.GetBuf(), syscall.Errno(resp.GetErrno_())
 }
diff --git a/test/packetimpact/testbench/layers.go b/test/packetimpact/testbench/layers.go
index 645f6c1a9..24aa46cce 100644
--- a/test/packetimpact/testbench/layers.go
+++ b/test/packetimpact/testbench/layers.go
@@ -805,7 +805,11 @@ func (l *ICMPv6) ToBytes() ([]byte, error) {
 		// We need to search forward to find the IPv6 header.
 		for prev := l.Prev(); prev != nil; prev = prev.Prev() {
 			if ipv6, ok := prev.(*IPv6); ok {
-				h.SetChecksum(header.ICMPv6Checksum(h, *ipv6.SrcAddr, *ipv6.DstAddr, buffer.VectorisedView{}))
+				payload, err := payload(l)
+				if err != nil {
+					return nil, err
+				}
+				h.SetChecksum(header.ICMPv6Checksum(h, *ipv6.SrcAddr, *ipv6.DstAddr, payload))
 				break
 			}
 		}
diff --git a/test/packetimpact/testbench/rawsockets.go b/test/packetimpact/testbench/rawsockets.go
index 278229b7e..57e822725 100644
--- a/test/packetimpact/testbench/rawsockets.go
+++ b/test/packetimpact/testbench/rawsockets.go
@@ -28,7 +28,6 @@ import (
 
 // Sniffer can sniff raw packets on the wire.
 type Sniffer struct {
-	t  *testing.T
 	fd int
 }
 
@@ -40,6 +39,8 @@ func htons(x uint16) uint16 {
 
 // NewSniffer creates a Sniffer connected to *device.
 func NewSniffer(t *testing.T) (Sniffer, error) {
+	t.Helper()
+
 	snifferFd, err := unix.Socket(unix.AF_PACKET, unix.SOCK_RAW, int(htons(unix.ETH_P_ALL)))
 	if err != nil {
 		return Sniffer{}, err
@@ -51,7 +52,6 @@ func NewSniffer(t *testing.T) (Sniffer, error) {
 		t.Fatalf("can't setsockopt SO_RCVBUF to 10M: %s", err)
 	}
 	return Sniffer{
-		t:  t,
 		fd: snifferFd,
 	}, nil
 }
@@ -61,7 +61,9 @@ func NewSniffer(t *testing.T) (Sniffer, error) {
 const maxReadSize int = 65536
 
 // Recv tries to read one frame until the timeout is up.
-func (s *Sniffer) Recv(timeout time.Duration) []byte {
+func (s *Sniffer) Recv(t *testing.T, timeout time.Duration) []byte {
+	t.Helper()
+
 	deadline := time.Now().Add(timeout)
 	for {
 		timeout = deadline.Sub(time.Now())
@@ -75,7 +77,7 @@ func (s *Sniffer) Recv(timeout time.Duration) []byte {
 		}
 
 		if err := unix.SetsockoptTimeval(s.fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv); err != nil {
-			s.t.Fatalf("can't setsockopt SO_RCVTIMEO: %s", err)
+			t.Fatalf("can't setsockopt SO_RCVTIMEO: %s", err)
 		}
 
 		buf := make([]byte, maxReadSize)
@@ -85,10 +87,10 @@ func (s *Sniffer) Recv(timeout time.Duration) []byte {
 			continue
 		}
 		if err != nil {
-			s.t.Fatalf("can't read: %s", err)
+			t.Fatalf("can't read: %s", err)
 		}
 		if nread > maxReadSize {
-			s.t.Fatalf("received a truncated frame of %d bytes", nread)
+			t.Fatalf("received a truncated frame of %d bytes, want at most %d bytes", nread, maxReadSize)
 		}
 		return buf[:nread]
 	}
@@ -96,14 +98,16 @@ func (s *Sniffer) Recv(timeout time.Duration) []byte {
 
 // Drain drains the Sniffer's socket receive buffer by receiving until there's
 // nothing else to receive.
-func (s *Sniffer) Drain() {
-	s.t.Helper()
+func (s *Sniffer) Drain(t *testing.T) {
+	t.Helper()
+
 	flags, err := unix.FcntlInt(uintptr(s.fd), unix.F_GETFL, 0)
 	if err != nil {
-		s.t.Fatalf("failed to get sniffer socket fd flags: %s", err)
+		t.Fatalf("failed to get sniffer socket fd flags: %s", err)
 	}
-	if _, err := unix.FcntlInt(uintptr(s.fd), unix.F_SETFL, flags|unix.O_NONBLOCK); err != nil {
-		s.t.Fatalf("failed to make sniffer socket non-blocking: %s", err)
+	nonBlockingFlags := flags | unix.O_NONBLOCK
+	if _, err := unix.FcntlInt(uintptr(s.fd), unix.F_SETFL, nonBlockingFlags); err != nil {
+		t.Fatalf("failed to make sniffer socket non-blocking with flags %b: %s", nonBlockingFlags, err)
 	}
 	for {
 		buf := make([]byte, maxReadSize)
@@ -113,7 +117,7 @@ func (s *Sniffer) Drain() {
 		}
 	}
 	if _, err := unix.FcntlInt(uintptr(s.fd), unix.F_SETFL, flags); err != nil {
-		s.t.Fatalf("failed to restore sniffer socket fd flags: %s", err)
+		t.Fatalf("failed to restore sniffer socket fd flags to %b: %s", flags, err)
 	}
 }
 
@@ -128,12 +132,13 @@ func (s *Sniffer) close() error {
 
 // Injector can inject raw frames.
 type Injector struct {
-	t  *testing.T
 	fd int
 }
 
 // NewInjector creates a new injector on *device.
 func NewInjector(t *testing.T) (Injector, error) {
+	t.Helper()
+
 	ifInfo, err := net.InterfaceByName(Device)
 	if err != nil {
 		return Injector{}, err
@@ -156,15 +161,20 @@ func NewInjector(t *testing.T) (Injector, error) {
 		return Injector{}, err
 	}
 	return Injector{
-		t:  t,
 		fd: injectFd,
 	}, nil
 }
 
 // Send a raw frame.
-func (i *Injector) Send(b []byte) {
-	if _, err := unix.Write(i.fd, b); err != nil {
-		i.t.Fatalf("can't write: %s of len %d", err, len(b))
+func (i *Injector) Send(t *testing.T, b []byte) {
+	t.Helper()
+
+	n, err := unix.Write(i.fd, b)
+	if err != nil {
+		t.Fatalf("can't write bytes of len %d: %s", len(b), err)
+	}
+	if n != len(b) {
+		t.Fatalf("got %d bytes written, want %d", n, len(b))
 	}
 }
 
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 6a07889be..27905dcff 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -220,6 +220,16 @@ packetimpact_go_test(
 )
 
 packetimpact_go_test(
+    name = "tcp_network_unreachable",
+    srcs = ["tcp_network_unreachable_test.go"],
+    deps = [
+        "//pkg/tcpip/header",
+        "//test/packetimpact/testbench",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
+
+packetimpact_go_test(
     name = "tcp_cork_mss",
     srcs = ["tcp_cork_mss_test.go"],
     deps = [
diff --git a/test/packetimpact/tests/fin_wait2_timeout_test.go b/test/packetimpact/tests/fin_wait2_timeout_test.go
index 407565078..a61054c2c 100644
--- a/test/packetimpact/tests/fin_wait2_timeout_test.go
+++ b/test/packetimpact/tests/fin_wait2_timeout_test.go
@@ -39,34 +39,34 @@ func TestFinWait2Timeout(t *testing.T) {
 		t.Run(tt.description, func(t *testing.T) {
 			dut := testbench.NewDUT(t)
 			defer dut.TearDown()
-			listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-			defer dut.Close(listenFd)
+			listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+			defer dut.Close(t, listenFd)
 			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-			defer conn.Close()
-			conn.Connect()
+			defer conn.Close(t)
+			conn.Connect(t)
 
-			acceptFd, _ := dut.Accept(listenFd)
+			acceptFd, _ := dut.Accept(t, listenFd)
 			if tt.linger2 {
 				tv := unix.Timeval{Sec: 1, Usec: 0}
-				dut.SetSockOptTimeval(acceptFd, unix.SOL_TCP, unix.TCP_LINGER2, &tv)
+				dut.SetSockOptTimeval(t, acceptFd, unix.SOL_TCP, unix.TCP_LINGER2, &tv)
 			}
-			dut.Close(acceptFd)
+			dut.Close(t, acceptFd)
 
-			if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagFin | header.TCPFlagAck)}, time.Second); err != nil {
 				t.Fatalf("expected a FIN-ACK within 1 second but got none: %s", err)
 			}
-			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
 
 			time.Sleep(5 * time.Second)
-			conn.Drain()
+			conn.Drain(t)
 
-			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
 			if tt.linger2 {
-				if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
+				if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
 					t.Fatalf("expected a RST packet within a second but got none: %s", err)
 				}
 			} else {
-				if got, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, 10*time.Second); got != nil || err == nil {
+				if got, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, 10*time.Second); got != nil || err == nil {
 					t.Fatalf("expected no RST packets within ten seconds but got one: %s", got)
 				}
 			}
diff --git a/test/packetimpact/tests/icmpv6_param_problem_test.go b/test/packetimpact/tests/icmpv6_param_problem_test.go
index 8dfd26ee8..2d59d552d 100644
--- a/test/packetimpact/tests/icmpv6_param_problem_test.go
+++ b/test/packetimpact/tests/icmpv6_param_problem_test.go
@@ -34,7 +34,7 @@ func TestICMPv6ParamProblemTest(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
 	conn := testbench.NewIPv6Conn(t, testbench.IPv6{}, testbench.IPv6{})
-	defer conn.Close()
+	defer conn.Close(t)
 	ipv6 := testbench.IPv6{
 		// 254 is reserved and used for experimentation and testing. This should
 		// cause an error.
@@ -45,8 +45,8 @@ func TestICMPv6ParamProblemTest(t *testing.T) {
 		Payload: []byte("hello world"),
 	}
 
-	toSend := (*testbench.Connection)(&conn).CreateFrame(testbench.Layers{&ipv6}, &icmpv6)
-	(*testbench.Connection)(&conn).SendFrame(toSend)
+	toSend := (*testbench.Connection)(&conn).CreateFrame(t, testbench.Layers{&ipv6}, &icmpv6)
+	(*testbench.Connection)(&conn).SendFrame(t, toSend)
 
 	// Build the expected ICMPv6 payload, which includes an index to the
 	// problematic byte and also the problematic packet as described in
@@ -72,7 +72,7 @@ func TestICMPv6ParamProblemTest(t *testing.T) {
 		&expectedICMPv6,
 	}
 	timeout := time.Second
-	if _, err := conn.ExpectFrame(paramProblem, timeout); err != nil {
+	if _, err := conn.ExpectFrame(t, paramProblem, timeout); err != nil {
 		t.Errorf("expected %s within %s but got none: %s", paramProblem, timeout, err)
 	}
 }
diff --git a/test/packetimpact/tests/ipv4_id_uniqueness_test.go b/test/packetimpact/tests/ipv4_id_uniqueness_test.go
index 70f6df5e0..cf881418c 100644
--- a/test/packetimpact/tests/ipv4_id_uniqueness_test.go
+++ b/test/packetimpact/tests/ipv4_id_uniqueness_test.go
@@ -31,8 +31,8 @@ func init() {
 	testbench.RegisterFlags(flag.CommandLine)
 }
 
-func recvTCPSegment(conn *testbench.TCPIPv4, expect *testbench.TCP, expectPayload *testbench.Payload) (uint16, error) {
-	layers, err := conn.ExpectData(expect, expectPayload, time.Second)
+func recvTCPSegment(t *testing.T, conn *testbench.TCPIPv4, expect *testbench.TCP, expectPayload *testbench.Payload) (uint16, error) {
+	layers, err := conn.ExpectData(t, expect, expectPayload, time.Second)
 	if err != nil {
 		return 0, fmt.Errorf("failed to receive TCP segment: %s", err)
 	}
@@ -69,17 +69,17 @@ func TestIPv4RetransmitIdentificationUniqueness(t *testing.T) {
 			dut := testbench.NewDUT(t)
 			defer dut.TearDown()
 
-			listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-			defer dut.Close(listenFD)
+			listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+			defer dut.Close(t, listenFD)
 
 			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-			defer conn.Close()
+			defer conn.Close(t)
 
-			conn.Connect()
-			remoteFD, _ := dut.Accept(listenFD)
-			defer dut.Close(remoteFD)
+			conn.Connect(t)
+			remoteFD, _ := dut.Accept(t, listenFD)
+			defer dut.Close(t, remoteFD)
 
-			dut.SetSockOptInt(remoteFD, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
+			dut.SetSockOptInt(t, remoteFD, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
 
 			// TODO(b/129291778) The following socket option clears the DF bit on
 			// IP packets sent over the socket, and is currently not supported by
@@ -87,30 +87,30 @@ func TestIPv4RetransmitIdentificationUniqueness(t *testing.T) {
 			// socket option being not supported does not affect the operation of
 			// this test. Once the socket option is supported, the following call
 			// can be changed to simply assert success.
-			ret, errno := dut.SetSockOptIntWithErrno(context.Background(), remoteFD, unix.IPPROTO_IP, linux.IP_MTU_DISCOVER, linux.IP_PMTUDISC_DONT)
+			ret, errno := dut.SetSockOptIntWithErrno(context.Background(), t, remoteFD, unix.IPPROTO_IP, linux.IP_MTU_DISCOVER, linux.IP_PMTUDISC_DONT)
 			if ret == -1 && errno != unix.ENOTSUP {
 				t.Fatalf("failed to set IP_MTU_DISCOVER socket option to IP_PMTUDISC_DONT: %s", errno)
 			}
 
 			samplePayload := &testbench.Payload{Bytes: tc.payload}
 
-			dut.Send(remoteFD, tc.payload, 0)
-			if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
+			dut.Send(t, remoteFD, tc.payload, 0)
+			if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
 				t.Fatalf("failed to receive TCP segment sent for RTT calculation: %s", err)
 			}
 			// Let the DUT estimate RTO with RTT from the DATA-ACK.
 			// TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which
 			// we can skip sending this ACK.
-			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
 
-			dut.Send(remoteFD, tc.payload, 0)
-			expectTCP := &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum()))}
-			originalID, err := recvTCPSegment(&conn, expectTCP, samplePayload)
+			dut.Send(t, remoteFD, tc.payload, 0)
+			expectTCP := &testbench.TCP{SeqNum: testbench.Uint32(uint32(*conn.RemoteSeqNum(t)))}
+			originalID, err := recvTCPSegment(t, &conn, expectTCP, samplePayload)
 			if err != nil {
 				t.Fatalf("failed to receive TCP segment: %s", err)
 			}
 
-			retransmitID, err := recvTCPSegment(&conn, expectTCP, samplePayload)
+			retransmitID, err := recvTCPSegment(t, &conn, expectTCP, samplePayload)
 			if err != nil {
 				t.Fatalf("failed to receive retransmitted TCP segment: %s", err)
 			}
diff --git a/test/packetimpact/tests/ipv6_fragment_reassembly_test.go b/test/packetimpact/tests/ipv6_fragment_reassembly_test.go
index 7b462c8e2..b5f94ad4b 100644
--- a/test/packetimpact/tests/ipv6_fragment_reassembly_test.go
+++ b/test/packetimpact/tests/ipv6_fragment_reassembly_test.go
@@ -48,7 +48,7 @@ func TestIPv6FragmentReassembly(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
 	conn := testbench.NewIPv6Conn(t, testbench.IPv6{}, testbench.IPv6{})
-	defer conn.Close()
+	defer conn.Close(t)
 
 	firstPayloadToSend := make([]byte, firstPayloadLength)
 	for i := range firstPayloadToSend {
@@ -81,7 +81,7 @@ func TestIPv6FragmentReassembly(t *testing.T) {
 		buffer.NewVectorisedView(len(secondPayloadToSend), []buffer.View{secondPayloadToSend}),
 	)
 
-	conn.Send(testbench.IPv6{},
+	conn.Send(t, testbench.IPv6{},
 		&testbench.IPv6FragmentExtHdr{
 			FragmentOffset: testbench.Uint16(0),
 			MoreFragments:  testbench.Bool(true),
@@ -96,7 +96,7 @@ func TestIPv6FragmentReassembly(t *testing.T) {
 
 	icmpv6ProtoNum := header.IPv6ExtensionHeaderIdentifier(header.ICMPv6ProtocolNumber)
 
-	conn.Send(testbench.IPv6{},
+	conn.Send(t, testbench.IPv6{},
 		&testbench.IPv6FragmentExtHdr{
 			NextHeader:     &icmpv6ProtoNum,
 			FragmentOffset: testbench.Uint16((firstPayloadLength + header.ICMPv6EchoMinimumSize) / 8),
@@ -107,7 +107,7 @@ func TestIPv6FragmentReassembly(t *testing.T) {
 			Bytes: secondPayloadToSend,
 		})
 
-	gotEchoReplyFirstPart, err := conn.ExpectFrame(testbench.Layers{
+	gotEchoReplyFirstPart, err := conn.ExpectFrame(t, testbench.Layers{
 		&testbench.Ether{},
 		&testbench.IPv6{},
 		&testbench.IPv6FragmentExtHdr{
@@ -142,7 +142,7 @@ func TestIPv6FragmentReassembly(t *testing.T) {
 			hex.Dump(wantFirstPayload))
 	}
 
-	gotEchoReplySecondPart, err := conn.ExpectFrame(testbench.Layers{
+	gotEchoReplySecondPart, err := conn.ExpectFrame(t, testbench.Layers{
 		&testbench.Ether{},
 		&testbench.IPv6{},
 		&testbench.IPv6FragmentExtHdr{
diff --git a/test/packetimpact/tests/ipv6_unknown_options_action_test.go b/test/packetimpact/tests/ipv6_unknown_options_action_test.go
index 100b30ad7..d7d63cbd2 100644
--- a/test/packetimpact/tests/ipv6_unknown_options_action_test.go
+++ b/test/packetimpact/tests/ipv6_unknown_options_action_test.go
@@ -23,21 +23,21 @@ import (
 
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
-	tb "gvisor.dev/gvisor/test/packetimpact/testbench"
+	"gvisor.dev/gvisor/test/packetimpact/testbench"
 )
 
 func init() {
-	tb.RegisterFlags(flag.CommandLine)
+	testbench.RegisterFlags(flag.CommandLine)
 }
 
-func mkHopByHopOptionsExtHdr(optType byte) tb.Layer {
-	return &tb.IPv6HopByHopOptionsExtHdr{
+func mkHopByHopOptionsExtHdr(optType byte) testbench.Layer {
+	return &testbench.IPv6HopByHopOptionsExtHdr{
 		Options: []byte{optType, 0x04, 0x00, 0x00, 0x00, 0x00},
 	}
 }
 
-func mkDestinationOptionsExtHdr(optType byte) tb.Layer {
-	return &tb.IPv6DestinationOptionsExtHdr{
+func mkDestinationOptionsExtHdr(optType byte) testbench.Layer {
+	return &testbench.IPv6DestinationOptionsExtHdr{
 		Options: []byte{optType, 0x04, 0x00, 0x00, 0x00, 0x00},
 	}
 }
@@ -49,7 +49,7 @@ func optionTypeFromAction(action header.IPv6OptionUnknownAction) byte {
 func TestIPv6UnknownOptionAction(t *testing.T) {
 	for _, tt := range []struct {
 		description  string
-		mkExtHdr     func(optType byte) tb.Layer
+		mkExtHdr     func(optType byte) testbench.Layer
 		action       header.IPv6OptionUnknownAction
 		multicastDst bool
 		wantICMPv6   bool
@@ -140,21 +140,21 @@ func TestIPv6UnknownOptionAction(t *testing.T) {
 		},
 	} {
 		t.Run(tt.description, func(t *testing.T) {
-			dut := tb.NewDUT(t)
+			dut := testbench.NewDUT(t)
 			defer dut.TearDown()
-			ipv6Conn := tb.NewIPv6Conn(t, tb.IPv6{}, tb.IPv6{})
-			conn := (*tb.Connection)(&ipv6Conn)
-			defer ipv6Conn.Close()
+			ipv6Conn := testbench.NewIPv6Conn(t, testbench.IPv6{}, testbench.IPv6{})
+			conn := (*testbench.Connection)(&ipv6Conn)
+			defer ipv6Conn.Close(t)
 
-			outgoingOverride := tb.Layers{}
+			outgoingOverride := testbench.Layers{}
 			if tt.multicastDst {
-				outgoingOverride = tb.Layers{&tb.IPv6{
-					DstAddr: tb.Address(tcpip.Address(net.ParseIP("ff02::1"))),
+				outgoingOverride = testbench.Layers{&testbench.IPv6{
+					DstAddr: testbench.Address(tcpip.Address(net.ParseIP("ff02::1"))),
 				}}
 			}
 
-			outgoing := conn.CreateFrame(outgoingOverride, tt.mkExtHdr(optionTypeFromAction(tt.action)))
-			conn.SendFrame(outgoing)
+			outgoing := conn.CreateFrame(t, outgoingOverride, tt.mkExtHdr(optionTypeFromAction(tt.action)))
+			conn.SendFrame(t, outgoing)
 			ipv6Sent := outgoing[1:]
 			invokingPacket, err := ipv6Sent.ToBytes()
 			if err != nil {
@@ -167,12 +167,12 @@ func TestIPv6UnknownOptionAction(t *testing.T) {
 			// after the IPv6 header (after NextHeader and ExtHdrLen).
 			binary.BigEndian.PutUint32(icmpv6Payload, header.IPv6MinimumSize+2)
 			icmpv6Payload = append(icmpv6Payload, invokingPacket...)
-			gotICMPv6, err := ipv6Conn.ExpectFrame(tb.Layers{
-				&tb.Ether{},
-				&tb.IPv6{},
-				&tb.ICMPv6{
-					Type:    tb.ICMPv6Type(header.ICMPv6ParamProblem),
-					Code:    tb.Byte(2),
+			gotICMPv6, err := ipv6Conn.ExpectFrame(t, testbench.Layers{
+				&testbench.Ether{},
+				&testbench.IPv6{},
+				&testbench.ICMPv6{
+					Type:    testbench.ICMPv6Type(header.ICMPv6ParamProblem),
+					Code:    testbench.Byte(2),
 					Payload: icmpv6Payload,
 				},
 			}, time.Second)
diff --git a/test/packetimpact/tests/tcp_close_wait_ack_test.go b/test/packetimpact/tests/tcp_close_wait_ack_test.go
index 6e7ff41d7..e6a96f214 100644
--- a/test/packetimpact/tests/tcp_close_wait_ack_test.go
+++ b/test/packetimpact/tests/tcp_close_wait_ack_test.go
@@ -33,39 +33,39 @@ func init() {
 func TestCloseWaitAck(t *testing.T) {
 	for _, tt := range []struct {
 		description    string
-		makeTestingTCP func(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP
+		makeTestingTCP func(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset, windowSize seqnum.Size) testbench.TCP
 		seqNumOffset   seqnum.Size
 		expectAck      bool
 	}{
-		{"OTW", GenerateOTWSeqSegment, 0, false},
-		{"OTW", GenerateOTWSeqSegment, 1, true},
-		{"OTW", GenerateOTWSeqSegment, 2, true},
-		{"ACK", GenerateUnaccACKSegment, 0, false},
-		{"ACK", GenerateUnaccACKSegment, 1, true},
-		{"ACK", GenerateUnaccACKSegment, 2, true},
+		{"OTW", generateOTWSeqSegment, 0, false},
+		{"OTW", generateOTWSeqSegment, 1, true},
+		{"OTW", generateOTWSeqSegment, 2, true},
+		{"ACK", generateUnaccACKSegment, 0, false},
+		{"ACK", generateUnaccACKSegment, 1, true},
+		{"ACK", generateUnaccACKSegment, 2, true},
 	} {
 		t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) {
 			dut := testbench.NewDUT(t)
 			defer dut.TearDown()
-			listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-			defer dut.Close(listenFd)
+			listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+			defer dut.Close(t, listenFd)
 			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-			defer conn.Close()
+			defer conn.Close(t)
 
-			conn.Connect()
-			acceptFd, _ := dut.Accept(listenFd)
+			conn.Connect(t)
+			acceptFd, _ := dut.Accept(t, listenFd)
 
 			// Send a FIN to DUT to intiate the active close
-			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)})
-			gotTCP, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)})
+			gotTCP, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
 			if err != nil {
 				t.Fatalf("expected an ACK for our fin and DUT should enter CLOSE_WAIT: %s", err)
 			}
 			windowSize := seqnum.Size(*gotTCP.WindowSize)
 
 			// Send a segment with OTW Seq / unacc ACK and expect an ACK back
-			conn.Send(tt.makeTestingTCP(&conn, tt.seqNumOffset, windowSize), &testbench.Payload{Bytes: []byte("Sample Data")})
-			gotAck, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
+			conn.Send(t, tt.makeTestingTCP(t, &conn, tt.seqNumOffset, windowSize), &testbench.Payload{Bytes: []byte("Sample Data")})
+			gotAck, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
 			if tt.expectAck && err != nil {
 				t.Fatalf("expected an ack but got none: %s", err)
 			}
@@ -74,35 +74,36 @@ func TestCloseWaitAck(t *testing.T) {
 			}
 
 			// Now let's verify DUT is indeed in CLOSE_WAIT
-			dut.Close(acceptFd)
-			if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil {
+			dut.Close(t, acceptFd)
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagFin)}, time.Second); err != nil {
 				t.Fatalf("expected DUT to send a FIN: %s", err)
 			}
 			// Ack the FIN from DUT
-			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
 			// Send some extra data to DUT
-			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: []byte("Sample Data")})
-			if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: []byte("Sample Data")})
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, time.Second); err != nil {
 				t.Fatalf("expected DUT to send an RST: %s", err)
 			}
 		})
 	}
 }
 
-// This generates an segment with seqnum = RCV.NXT + RCV.WND + seqNumOffset, the
-// generated segment is only acceptable when seqNumOffset is 0, otherwise an ACK
-// is expected from the receiver.
-func GenerateOTWSeqSegment(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
-	lastAcceptable := conn.LocalSeqNum().Add(windowSize)
+// generateOTWSeqSegment generates an segment with
+// seqnum = RCV.NXT + RCV.WND + seqNumOffset, the generated segment is only
+// acceptable when seqNumOffset is 0, otherwise an ACK is expected from the
+// receiver.
+func generateOTWSeqSegment(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
+	lastAcceptable := conn.LocalSeqNum(t).Add(windowSize)
 	otwSeq := uint32(lastAcceptable.Add(seqNumOffset))
 	return testbench.TCP{SeqNum: testbench.Uint32(otwSeq), Flags: testbench.Uint8(header.TCPFlagAck)}
 }
 
-// This generates an segment with acknum = SND.NXT + seqNumOffset, the generated
-// segment is only acceptable when seqNumOffset is 0, otherwise an ACK is
-// expected from the receiver.
-func GenerateUnaccACKSegment(conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
-	lastAcceptable := conn.RemoteSeqNum()
+// generateUnaccACKSegment generates an segment with
+// acknum = SND.NXT + seqNumOffset, the generated segment is only acceptable
+// when seqNumOffset is 0, otherwise an ACK is expected from the receiver.
+func generateUnaccACKSegment(t *testing.T, conn *testbench.TCPIPv4, seqNumOffset seqnum.Size, windowSize seqnum.Size) testbench.TCP {
+	lastAcceptable := conn.RemoteSeqNum(t)
 	unaccAck := uint32(lastAcceptable.Add(seqNumOffset))
 	return testbench.TCP{AckNum: testbench.Uint32(unaccAck), Flags: testbench.Uint8(header.TCPFlagAck)}
 }
diff --git a/test/packetimpact/tests/tcp_cork_mss_test.go b/test/packetimpact/tests/tcp_cork_mss_test.go
index fb8f48629..8feea4a82 100644
--- a/test/packetimpact/tests/tcp_cork_mss_test.go
+++ b/test/packetimpact/tests/tcp_cork_mss_test.go
@@ -32,53 +32,53 @@ func init() {
 func TestTCPCorkMSS(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFD)
+	listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFD)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
 	const mss = uint32(header.TCPDefaultMSS)
 	options := make([]byte, header.TCPOptionMSSLength)
 	header.EncodeMSSOption(mss, options)
-	conn.ConnectWithOptions(options)
+	conn.ConnectWithOptions(t, options)
 
-	acceptFD, _ := dut.Accept(listenFD)
-	defer dut.Close(acceptFD)
+	acceptFD, _ := dut.Accept(t, listenFD)
+	defer dut.Close(t, acceptFD)
 
-	dut.SetSockOptInt(acceptFD, unix.IPPROTO_TCP, unix.TCP_CORK, 1)
+	dut.SetSockOptInt(t, acceptFD, unix.IPPROTO_TCP, unix.TCP_CORK, 1)
 
 	// Let the dut application send 2 small segments to be held up and coalesced
 	// until the application sends a larger segment to fill up to > MSS.
 	sampleData := []byte("Sample Data")
-	dut.Send(acceptFD, sampleData, 0)
-	dut.Send(acceptFD, sampleData, 0)
+	dut.Send(t, acceptFD, sampleData, 0)
+	dut.Send(t, acceptFD, sampleData, 0)
 
 	expectedData := sampleData
 	expectedData = append(expectedData, sampleData...)
 	largeData := make([]byte, mss+1)
 	expectedData = append(expectedData, largeData...)
-	dut.Send(acceptFD, largeData, 0)
+	dut.Send(t, acceptFD, largeData, 0)
 
 	// Expect the segments to be coalesced and sent and capped to MSS.
 	expectedPayload := testbench.Payload{Bytes: expectedData[:mss]}
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &expectedPayload, time.Second); err != nil {
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &expectedPayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
 	// Expect the coalesced segment to be split and transmitted.
 	expectedPayload = testbench.Payload{Bytes: expectedData[mss:]}
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil {
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
 
 	// Check for segments to *not* be held up because of TCP_CORK when
 	// the current send window is less than MSS.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(2 * len(sampleData)))})
-	dut.Send(acceptFD, sampleData, 0)
-	dut.Send(acceptFD, sampleData, 0)
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(2 * len(sampleData)))})
+	dut.Send(t, acceptFD, sampleData, 0)
+	dut.Send(t, acceptFD, sampleData, 0)
 	expectedPayload = testbench.Payload{Bytes: append(sampleData, sampleData...)}
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil {
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &expectedPayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
 }
diff --git a/test/packetimpact/tests/tcp_handshake_window_size_test.go b/test/packetimpact/tests/tcp_handshake_window_size_test.go
index 652b530d0..22937d92f 100644
--- a/test/packetimpact/tests/tcp_handshake_window_size_test.go
+++ b/test/packetimpact/tests/tcp_handshake_window_size_test.go
@@ -33,14 +33,14 @@ func init() {
 func TestTCPHandshakeWindowSize(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFD)
+	listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFD)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
 	// Start handshake with zero window size.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), WindowSize: testbench.Uint16(uint16(0))})
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), WindowSize: testbench.Uint16(uint16(0))})
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
 		t.Fatalf("expected SYN-ACK: %s", err)
 	}
 	// Update the advertised window size to a non-zero value with the ACK that
@@ -48,10 +48,10 @@ func TestTCPHandshakeWindowSize(t *testing.T) {
 	//
 	// Set the window size with MSB set and expect the dut to treat it as
 	// an unsigned value.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(1 << 15))})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(uint16(1 << 15))})
 
-	acceptFd, _ := dut.Accept(listenFD)
-	defer dut.Close(acceptFd)
+	acceptFd, _ := dut.Accept(t, listenFD)
+	defer dut.Close(t, acceptFd)
 
 	sampleData := []byte("Sample Data")
 	samplePayload := &testbench.Payload{Bytes: sampleData}
@@ -59,8 +59,8 @@ func TestTCPHandshakeWindowSize(t *testing.T) {
 	// Since we advertised a zero window followed by a non-zero window,
 	// expect the dut to honor the recently advertised non-zero window
 	// and actually send out the data instead of probing for zero window.
-	dut.Send(acceptFd, sampleData, 0)
-	if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload, time.Second); err != nil {
+	dut.Send(t, acceptFd, sampleData, 0)
+	if _, err := conn.ExpectNextData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
 }
diff --git a/test/packetimpact/tests/tcp_network_unreachable_test.go b/test/packetimpact/tests/tcp_network_unreachable_test.go
new file mode 100644
index 000000000..900352fa1
--- /dev/null
+++ b/test/packetimpact/tests/tcp_network_unreachable_test.go
@@ -0,0 +1,139 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_synsent_reset_test
+
+import (
+	"context"
+	"flag"
+	"net"
+	"syscall"
+	"testing"
+	"time"
+
+	"golang.org/x/sys/unix"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/test/packetimpact/testbench"
+)
+
+func init() {
+	testbench.RegisterFlags(flag.CommandLine)
+}
+
+// TestTCPSynSentUnreachable verifies that TCP connections fail immediately when
+// an ICMP destination unreachable message is sent in response to the inital
+// SYN.
+func TestTCPSynSentUnreachable(t *testing.T) {
+	// Create the DUT and connection.
+	dut := testbench.NewDUT(t)
+	defer dut.TearDown()
+	clientFD, clientPort := dut.CreateBoundSocket(t, unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4))
+	port := uint16(9001)
+	conn := testbench.NewTCPIPv4(t, testbench.TCP{SrcPort: &port, DstPort: &clientPort}, testbench.TCP{SrcPort: &clientPort, DstPort: &port})
+	defer conn.Close(t)
+
+	// Bring the DUT to SYN-SENT state with a non-blocking connect.
+	ctx, cancel := context.WithTimeout(context.Background(), testbench.RPCTimeout)
+	defer cancel()
+	sa := unix.SockaddrInet4{Port: int(port)}
+	copy(sa.Addr[:], net.IP(net.ParseIP(testbench.LocalIPv4)).To4())
+	if _, err := dut.ConnectWithErrno(ctx, t, clientFD, &sa); err != syscall.Errno(unix.EINPROGRESS) {
+		t.Errorf("expected connect to fail with EINPROGRESS, but got %v", err)
+	}
+
+	// Get the SYN.
+	tcpLayers, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, nil, time.Second)
+	if err != nil {
+		t.Fatalf("expected SYN: %s", err)
+	}
+
+	// Send a host unreachable message.
+	rawConn := (*testbench.Connection)(&conn)
+	layers := rawConn.CreateFrame(t, nil)
+	layers = layers[:len(layers)-1]
+	const ipLayer = 1
+	const tcpLayer = ipLayer + 1
+	ip, ok := tcpLayers[ipLayer].(*testbench.IPv4)
+	if !ok {
+		t.Fatalf("expected %s to be IPv4", tcpLayers[ipLayer])
+	}
+	tcp, ok := tcpLayers[tcpLayer].(*testbench.TCP)
+	if !ok {
+		t.Fatalf("expected %s to be TCP", tcpLayers[tcpLayer])
+	}
+	var icmpv4 testbench.ICMPv4 = testbench.ICMPv4{Type: testbench.ICMPv4Type(header.ICMPv4DstUnreachable), Code: testbench.Uint8(header.ICMPv4HostUnreachable)}
+	layers = append(layers, &icmpv4, ip, tcp)
+	rawConn.SendFrameStateless(t, layers)
+
+	if _, err = dut.ConnectWithErrno(ctx, t, clientFD, &sa); err != syscall.Errno(unix.EHOSTUNREACH) {
+		t.Errorf("expected connect to fail with EHOSTUNREACH, but got %v", err)
+	}
+}
+
+// TestTCPSynSentUnreachable6 verifies that TCP connections fail immediately when
+// an ICMP destination unreachable message is sent in response to the inital
+// SYN.
+func TestTCPSynSentUnreachable6(t *testing.T) {
+	// Create the DUT and connection.
+	dut := testbench.NewDUT(t)
+	defer dut.TearDown()
+	clientFD, clientPort := dut.CreateBoundSocket(t, unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv6))
+	conn := testbench.NewTCPIPv6(t, testbench.TCP{DstPort: &clientPort}, testbench.TCP{SrcPort: &clientPort})
+	defer conn.Close(t)
+
+	// Bring the DUT to SYN-SENT state with a non-blocking connect.
+	ctx, cancel := context.WithTimeout(context.Background(), testbench.RPCTimeout)
+	defer cancel()
+	sa := unix.SockaddrInet6{
+		Port:   int(conn.SrcPort()),
+		ZoneId: uint32(testbench.RemoteInterfaceID),
+	}
+	copy(sa.Addr[:], net.IP(net.ParseIP(testbench.LocalIPv6)).To16())
+	if _, err := dut.ConnectWithErrno(ctx, t, clientFD, &sa); err != syscall.Errno(unix.EINPROGRESS) {
+		t.Errorf("expected connect to fail with EINPROGRESS, but got %v", err)
+	}
+
+	// Get the SYN.
+	tcpLayers, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, nil, time.Second)
+	if err != nil {
+		t.Fatalf("expected SYN: %s", err)
+	}
+
+	// Send a host unreachable message.
+	rawConn := (*testbench.Connection)(&conn)
+	layers := rawConn.CreateFrame(t, nil)
+	layers = layers[:len(layers)-1]
+	const ipLayer = 1
+	const tcpLayer = ipLayer + 1
+	ip, ok := tcpLayers[ipLayer].(*testbench.IPv6)
+	if !ok {
+		t.Fatalf("expected %s to be IPv6", tcpLayers[ipLayer])
+	}
+	tcp, ok := tcpLayers[tcpLayer].(*testbench.TCP)
+	if !ok {
+		t.Fatalf("expected %s to be TCP", tcpLayers[tcpLayer])
+	}
+	var icmpv6 testbench.ICMPv6 = testbench.ICMPv6{
+		Type: testbench.ICMPv6Type(header.ICMPv6DstUnreachable),
+		Code: testbench.Uint8(header.ICMPv6NetworkUnreachable),
+		// Per RFC 4443 3.1, the payload contains 4 zeroed bytes.
+		Payload: []byte{0, 0, 0, 0},
+	}
+	layers = append(layers, &icmpv6, ip, tcp)
+	rawConn.SendFrameStateless(t, layers)
+
+	if _, err = dut.ConnectWithErrno(ctx, t, clientFD, &sa); err != syscall.Errno(unix.ENETUNREACH) {
+		t.Errorf("expected connect to fail with ENETUNREACH, but got %v", err)
+	}
+}
diff --git a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go
index b9b3e91d3..82b7a85ff 100644
--- a/test/packetimpact/tests/tcp_noaccept_close_rst_test.go
+++ b/test/packetimpact/tests/tcp_noaccept_close_rst_test.go
@@ -31,12 +31,12 @@ func init() {
 func TestTcpNoAcceptCloseReset(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	conn.Connect()
-	defer conn.Close()
-	dut.Close(listenFd)
-	if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil {
+	conn.Connect(t)
+	defer conn.Close(t)
+	dut.Close(t, listenFd)
+	if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)}, 1*time.Second); err != nil {
 		t.Fatalf("expected a RST-ACK packet but got none: %s", err)
 	}
 }
diff --git a/test/packetimpact/tests/tcp_outside_the_window_test.go b/test/packetimpact/tests/tcp_outside_the_window_test.go
index ad8c74234..08f759f7c 100644
--- a/test/packetimpact/tests/tcp_outside_the_window_test.go
+++ b/test/packetimpact/tests/tcp_outside_the_window_test.go
@@ -63,25 +63,25 @@ func TestTCPOutsideTheWindow(t *testing.T) {
 		t.Run(fmt.Sprintf("%s%d", tt.description, tt.seqNumOffset), func(t *testing.T) {
 			dut := testbench.NewDUT(t)
 			defer dut.TearDown()
-			listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-			defer dut.Close(listenFD)
+			listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+			defer dut.Close(t, listenFD)
 			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-			defer conn.Close()
-			conn.Connect()
-			acceptFD, _ := dut.Accept(listenFD)
-			defer dut.Close(acceptFD)
+			defer conn.Close(t)
+			conn.Connect(t)
+			acceptFD, _ := dut.Accept(t, listenFD)
+			defer dut.Close(t, acceptFD)
 
-			windowSize := seqnum.Size(*conn.SynAck().WindowSize) + tt.seqNumOffset
-			conn.Drain()
+			windowSize := seqnum.Size(*conn.SynAck(t).WindowSize) + tt.seqNumOffset
+			conn.Drain(t)
 			// Ignore whatever incrementing that this out-of-order packet might cause
 			// to the AckNum.
-			localSeqNum := testbench.Uint32(uint32(*conn.LocalSeqNum()))
-			conn.Send(testbench.TCP{
+			localSeqNum := testbench.Uint32(uint32(*conn.LocalSeqNum(t)))
+			conn.Send(t, testbench.TCP{
 				Flags:  testbench.Uint8(tt.tcpFlags),
-				SeqNum: testbench.Uint32(uint32(conn.LocalSeqNum().Add(windowSize))),
+				SeqNum: testbench.Uint32(uint32(conn.LocalSeqNum(t).Add(windowSize))),
 			}, tt.payload...)
 			timeout := 3 * time.Second
-			gotACK, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: localSeqNum}, timeout)
+			gotACK, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: localSeqNum}, timeout)
 			if tt.expectACK && err != nil {
 				t.Fatalf("expected an ACK packet within %s but got none: %s", timeout, err)
 			}
diff --git a/test/packetimpact/tests/tcp_paws_mechanism_test.go b/test/packetimpact/tests/tcp_paws_mechanism_test.go
index 55db4ece6..37f3b56dd 100644
--- a/test/packetimpact/tests/tcp_paws_mechanism_test.go
+++ b/test/packetimpact/tests/tcp_paws_mechanism_test.go
@@ -32,15 +32,15 @@ func init() {
 func TestPAWSMechanism(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFD)
+	listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFD)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
 	options := make([]byte, header.TCPOptionTSLength)
 	header.EncodeTSOption(currentTS(), 0, options)
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), Options: options})
-	synAck, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn), Options: options})
+	synAck, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, time.Second)
 	if err != nil {
 		t.Fatalf("didn't get synack during handshake: %s", err)
 	}
@@ -50,9 +50,9 @@ func TestPAWSMechanism(t *testing.T) {
 	}
 	tsecr := parsedSynOpts.TSVal
 	header.EncodeTSOption(currentTS(), tsecr, options)
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options})
-	acceptFD, _ := dut.Accept(listenFD)
-	defer dut.Close(acceptFD)
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options})
+	acceptFD, _ := dut.Accept(t, listenFD)
+	defer dut.Close(t, acceptFD)
 
 	sampleData := []byte("Sample Data")
 	sentTSVal := currentTS()
@@ -61,9 +61,9 @@ func TestPAWSMechanism(t *testing.T) {
 	// every time we send one, it should not cause any flakiness because timestamps
 	// only need to be non-decreasing.
 	time.Sleep(3 * time.Millisecond)
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData})
 
-	gotTCP, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
+	gotTCP, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
 	if err != nil {
 		t.Fatalf("expected an ACK but got none: %s", err)
 	}
@@ -86,9 +86,9 @@ func TestPAWSMechanism(t *testing.T) {
 	// 3ms here is chosen arbitrarily and this time.Sleep() should not cause flakiness
 	// due to the exact same reasoning discussed above.
 	time.Sleep(3 * time.Millisecond)
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), Options: options}, &testbench.Payload{Bytes: sampleData})
 
-	gotTCP, err = conn.Expect(testbench.TCP{AckNum: lastAckNum, Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
+	gotTCP, err = conn.Expect(t, testbench.TCP{AckNum: lastAckNum, Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second)
 	if err != nil {
 		t.Fatalf("expected segment with AckNum %d but got none: %s", lastAckNum, err)
 	}
diff --git a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
index 8fbec893b..d9f3ea0f2 100644
--- a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
+++ b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
@@ -52,26 +52,26 @@ func TestQueueReceiveInSynSent(t *testing.T) {
 			dut := testbench.NewDUT(t)
 			defer dut.TearDown()
 
-			socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4))
+			socket, remotePort := dut.CreateBoundSocket(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4))
 			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-			defer conn.Close()
+			defer conn.Close(t)
 
 			sampleData := []byte("Sample Data")
 
-			dut.SetNonBlocking(socket, true)
-			if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) {
+			dut.SetNonBlocking(t, socket, true)
+			if _, err := dut.ConnectWithErrno(context.Background(), t, socket, conn.LocalAddr(t)); !errors.Is(err, syscall.EINPROGRESS) {
 				t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err)
 			}
-			if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil {
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil {
 				t.Fatalf("expected a SYN from DUT, but got none: %s", err)
 			}
 
-			if _, _, err := dut.RecvWithErrno(context.Background(), socket, int32(len(sampleData)), 0); err != syscall.Errno(unix.EWOULDBLOCK) {
+			if _, _, err := dut.RecvWithErrno(context.Background(), t, socket, int32(len(sampleData)), 0); err != syscall.Errno(unix.EWOULDBLOCK) {
 				t.Fatalf("expected error %s, got %s", syscall.Errno(unix.EWOULDBLOCK), err)
 			}
 
 			// Test blocking read.
-			dut.SetNonBlocking(socket, false)
+			dut.SetNonBlocking(t, socket, false)
 
 			var wg sync.WaitGroup
 			defer wg.Wait()
@@ -86,7 +86,7 @@ func TestQueueReceiveInSynSent(t *testing.T) {
 				block.Done()
 				// Issue RECEIVE call in SYN-SENT, this should be queued for
 				// process until the connection is established.
-				n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0)
+				n, buff, err := dut.RecvWithErrno(ctx, t, socket, int32(len(sampleData)), 0)
 				if tt.reset {
 					if err != syscall.Errno(unix.ECONNREFUSED) {
 						t.Errorf("expected error %s, got %s", syscall.Errno(unix.ECONNREFUSED), err)
@@ -112,19 +112,19 @@ func TestQueueReceiveInSynSent(t *testing.T) {
 			time.Sleep(100 * time.Millisecond)
 
 			if tt.reset {
-				conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)})
+				conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)})
 				return
 			}
 
 			// Bring the connection to Established.
-			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)})
-			if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)})
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
 				t.Fatalf("expected an ACK from DUT, but got none: %s", err)
 			}
 
 			// Send sample payload and expect an ACK.
-			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData})
-			if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData})
+			if _, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
 				t.Fatalf("expected an ACK from DUT, but got none: %s", err)
 			}
 		})
diff --git a/test/packetimpact/tests/tcp_reordering_test.go b/test/packetimpact/tests/tcp_reordering_test.go
index a5378a9dd..8742819ca 100644
--- a/test/packetimpact/tests/tcp_reordering_test.go
+++ b/test/packetimpact/tests/tcp_reordering_test.go
@@ -32,10 +32,10 @@ func init() {
 func TestReorderingWindow(t *testing.T) {
 	dut := tb.NewDUT(t)
 	defer dut.TearDown()
-	listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFd)
+	listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFd)
 	conn := tb.NewTCPIPv4(t, tb.TCP{DstPort: &remotePort}, tb.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
 	// Enable SACK.
 	opts := make([]byte, 40)
@@ -49,17 +49,17 @@ func TestReorderingWindow(t *testing.T) {
 	const mss = minMTU - header.IPv4MinimumSize - header.TCPMinimumSize
 	optsOff += header.EncodeMSSOption(mss, opts[optsOff:])
 
-	conn.ConnectWithOptions(opts[:optsOff])
+	conn.ConnectWithOptions(t, opts[:optsOff])
 
-	acceptFd, _ := dut.Accept(listenFd)
-	defer dut.Close(acceptFd)
+	acceptFd, _ := dut.Accept(t, listenFd)
+	defer dut.Close(t, acceptFd)
 
 	if tb.DUTType == "linux" {
 		// Linux has changed its handling of reordering, force the old behavior.
-		dut.SetSockOpt(acceptFd, unix.IPPROTO_TCP, unix.TCP_CONGESTION, []byte("reno"))
+		dut.SetSockOpt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_CONGESTION, []byte("reno"))
 	}
 
-	pls := dut.GetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_MAXSEG)
+	pls := dut.GetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_MAXSEG)
 	if tb.DUTType == "netstack" {
 		// netstack does not impliment TCP_MAXSEG correctly. Fake it
 		// here. Netstack uses the max SACK size which is 32. The MSS
@@ -69,13 +69,13 @@ func TestReorderingWindow(t *testing.T) {
 
 	payload := make([]byte, pls)
 
-	seqNum1 := *conn.RemoteSeqNum()
+	seqNum1 := *conn.RemoteSeqNum(t)
 	const numPkts = 10
 	// Send some packets, checking that we receive each.
 	for i, sn := 0, seqNum1; i < numPkts; i++ {
-		dut.Send(acceptFd, payload, 0)
+		dut.Send(t, acceptFd, payload, 0)
 
-		gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second)
+		gotOne, err := conn.Expect(t, tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second)
 		sn.UpdateForward(seqnum.Size(len(payload)))
 		if err != nil {
 			t.Errorf("Expect #%d: %s", i+1, err)
@@ -86,7 +86,7 @@ func TestReorderingWindow(t *testing.T) {
 		}
 	}
 
-	seqNum2 := *conn.RemoteSeqNum()
+	seqNum2 := *conn.RemoteSeqNum(t)
 
 	// SACK packets #2-4.
 	sackBlock := make([]byte, 40)
@@ -97,13 +97,13 @@ func TestReorderingWindow(t *testing.T) {
 		seqNum1.Add(seqnum.Size(len(payload))),
 		seqNum1.Add(seqnum.Size(4 * len(payload))),
 	}}, sackBlock[sbOff:])
-	conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1)), Options: sackBlock[:sbOff]})
+	conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1)), Options: sackBlock[:sbOff]})
 
 	// ACK first packet.
-	conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1) + uint32(len(payload)))})
+	conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum1) + uint32(len(payload)))})
 
 	// Check for retransmit.
-	gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(seqNum1))}, time.Second)
+	gotOne, err := conn.Expect(t, tb.TCP{SeqNum: tb.Uint32(uint32(seqNum1))}, time.Second)
 	if err != nil {
 		t.Error("Expect for retransmit:", err)
 	}
@@ -123,14 +123,14 @@ func TestReorderingWindow(t *testing.T) {
 		seqNum1.Add(seqnum.Size(4 * len(payload))),
 	}}, dsackBlock[dsbOff:])
 
-	conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum2)), Options: dsackBlock[:dsbOff]})
+	conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck), AckNum: tb.Uint32(uint32(seqNum2)), Options: dsackBlock[:dsbOff]})
 
 	// Send half of the original window of packets, checking that we
 	// received each.
 	for i, sn := 0, seqNum2; i < numPkts/2; i++ {
-		dut.Send(acceptFd, payload, 0)
+		dut.Send(t, acceptFd, payload, 0)
 
-		gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second)
+		gotOne, err := conn.Expect(t, tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second)
 		sn.UpdateForward(seqnum.Size(len(payload)))
 		if err != nil {
 			t.Errorf("Expect #%d: %s", i+1, err)
@@ -144,8 +144,8 @@ func TestReorderingWindow(t *testing.T) {
 	if tb.DUTType == "netstack" {
 		// The window should now be halved, so we should receive any
 		// more, even if we send them.
-		dut.Send(acceptFd, payload, 0)
-		if got, err := conn.Expect(tb.TCP{}, 100*time.Millisecond); got != nil || err == nil {
+		dut.Send(t, acceptFd, payload, 0)
+		if got, err := conn.Expect(t, tb.TCP{}, 100*time.Millisecond); got != nil || err == nil {
 			t.Fatalf("expected no packets within 100 millisecond, but got one: %s", got)
 		}
 		return
@@ -153,9 +153,9 @@ func TestReorderingWindow(t *testing.T) {
 
 	// Linux reduces the window by three. Check that we can receive the rest.
 	for i, sn := 0, seqNum2.Add(seqnum.Size(numPkts/2*len(payload))); i < 2; i++ {
-		dut.Send(acceptFd, payload, 0)
+		dut.Send(t, acceptFd, payload, 0)
 
-		gotOne, err := conn.Expect(tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second)
+		gotOne, err := conn.Expect(t, tb.TCP{SeqNum: tb.Uint32(uint32(sn))}, time.Second)
 		sn.UpdateForward(seqnum.Size(len(payload)))
 		if err != nil {
 			t.Errorf("Expect #%d: %s", i+1, err)
@@ -167,8 +167,8 @@ func TestReorderingWindow(t *testing.T) {
 	}
 
 	// The window should now be full.
-	dut.Send(acceptFd, payload, 0)
-	if got, err := conn.Expect(tb.TCP{}, 100*time.Millisecond); got != nil || err == nil {
+	dut.Send(t, acceptFd, payload, 0)
+	if got, err := conn.Expect(t, tb.TCP{}, 100*time.Millisecond); got != nil || err == nil {
 		t.Fatalf("expected no packets within 100 millisecond, but got one: %s", got)
 	}
 }
diff --git a/test/packetimpact/tests/tcp_retransmits_test.go b/test/packetimpact/tests/tcp_retransmits_test.go
index 6940eb7fb..072014ff8 100644
--- a/test/packetimpact/tests/tcp_retransmits_test.go
+++ b/test/packetimpact/tests/tcp_retransmits_test.go
@@ -33,41 +33,41 @@ func init() {
 func TestRetransmits(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFd)
+	listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFd)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
-	conn.Connect()
-	acceptFd, _ := dut.Accept(listenFd)
-	defer dut.Close(acceptFd)
+	conn.Connect(t)
+	acceptFd, _ := dut.Accept(t, listenFd)
+	defer dut.Close(t, acceptFd)
 
-	dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
+	dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
 
 	sampleData := []byte("Sample Data")
 	samplePayload := &testbench.Payload{Bytes: sampleData}
 
-	dut.Send(acceptFd, sampleData, 0)
-	if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
+	dut.Send(t, acceptFd, sampleData, 0)
+	if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
 	// Give a chance for the dut to estimate RTO with RTT from the DATA-ACK.
 	// TODO(gvisor.dev/issue/2685) Estimate RTO during handshake, after which
 	// we can skip sending this ACK.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
 
 	startRTO := time.Second
 	current := startRTO
 	first := time.Now()
-	dut.Send(acceptFd, sampleData, 0)
-	seq := testbench.Uint32(uint32(*conn.RemoteSeqNum()))
-	if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil {
+	dut.Send(t, acceptFd, sampleData, 0)
+	seq := testbench.Uint32(uint32(*conn.RemoteSeqNum(t)))
+	if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: seq}, samplePayload, startRTO); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
 	// Expect retransmits of the same segment.
 	for i := 0; i < 5; i++ {
 		start := time.Now()
-		if _, err := conn.ExpectData(&testbench.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil {
+		if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: seq}, samplePayload, 2*current); err != nil {
 			t.Fatalf("expected payload was not received: %s loop %d", err, i)
 		}
 		if i == 0 {
diff --git a/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go
index 90ab85419..f91b06ba1 100644
--- a/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go
+++ b/test/packetimpact/tests/tcp_send_window_sizes_piggyback_test.go
@@ -61,23 +61,23 @@ func TestSendWindowSizesPiggyback(t *testing.T) {
 		t.Run(fmt.Sprintf("%s%d", tt.description, tt.windowSize), func(t *testing.T) {
 			dut := testbench.NewDUT(t)
 			defer dut.TearDown()
-			listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-			defer dut.Close(listenFd)
+			listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+			defer dut.Close(t, listenFd)
 
 			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort, WindowSize: testbench.Uint16(tt.windowSize)}, testbench.TCP{SrcPort: &remotePort})
-			defer conn.Close()
+			defer conn.Close(t)
 
-			conn.Connect()
-			acceptFd, _ := dut.Accept(listenFd)
-			defer dut.Close(acceptFd)
+			conn.Connect(t)
+			acceptFd, _ := dut.Accept(t, listenFd)
+			defer dut.Close(t, acceptFd)
 
-			dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
+			dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
 
 			expectedTCP := testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}
 
-			dut.Send(acceptFd, sampleData, 0)
+			dut.Send(t, acceptFd, sampleData, 0)
 			expectedPayload := testbench.Payload{Bytes: tt.expectedPayload1}
-			if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil {
+			if _, err := conn.ExpectData(t, &expectedTCP, &expectedPayload, time.Second); err != nil {
 				t.Fatalf("expected payload was not received: %s", err)
 			}
 
@@ -86,18 +86,18 @@ func TestSendWindowSizesPiggyback(t *testing.T) {
 
 			if tt.enqueue {
 				// Enqueue a segment for the dut to transmit.
-				dut.Send(acceptFd, sampleData, 0)
+				dut.Send(t, acceptFd, sampleData, 0)
 			}
 
 			// Send ACK for the previous segment along with data for the dut to
 			// receive and ACK back. Sending this ACK would make room for the dut
 			// to transmit any enqueued segment.
-			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh), WindowSize: testbench.Uint16(tt.windowSize)}, &testbench.Payload{Bytes: sampleData})
+			conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh), WindowSize: testbench.Uint16(tt.windowSize)}, &testbench.Payload{Bytes: sampleData})
 
 			// Expect the dut to piggyback the ACK for received data along with
 			// the segment enqueued for transmit.
 			expectedPayload = testbench.Payload{Bytes: tt.expectedPayload2}
-			if _, err := conn.ExpectData(&expectedTCP, &expectedPayload, time.Second); err != nil {
+			if _, err := conn.ExpectData(t, &expectedTCP, &expectedPayload, time.Second); err != nil {
 				t.Fatalf("expected payload was not received: %s", err)
 			}
 		})
diff --git a/test/packetimpact/tests/tcp_synrcvd_reset_test.go b/test/packetimpact/tests/tcp_synrcvd_reset_test.go
index 7d5deab01..57d034dd1 100644
--- a/test/packetimpact/tests/tcp_synrcvd_reset_test.go
+++ b/test/packetimpact/tests/tcp_synrcvd_reset_test.go
@@ -32,21 +32,21 @@ func init() {
 func TestTCPSynRcvdReset(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFD)
+	listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFD)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
 	// Expect dut connection to have transitioned to SYN-RCVD state.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)})
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)})
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
 		t.Fatalf("expected SYN-ACK %s", err)
 	}
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)})
 	// Expect the connection to have transitioned SYN-RCVD to CLOSED.
 	// TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
 		t.Fatalf("expected a TCP RST %s", err)
 	}
 }
diff --git a/test/packetimpact/tests/tcp_synsent_reset_test.go b/test/packetimpact/tests/tcp_synsent_reset_test.go
index 6898a2239..eac8eb19d 100644
--- a/test/packetimpact/tests/tcp_synsent_reset_test.go
+++ b/test/packetimpact/tests/tcp_synsent_reset_test.go
@@ -31,17 +31,19 @@ func init() {
 
 // dutSynSentState sets up the dut connection in SYN-SENT state.
 func dutSynSentState(t *testing.T) (*tb.DUT, *tb.TCPIPv4, uint16, uint16) {
+	t.Helper()
+
 	dut := tb.NewDUT(t)
 
-	clientFD, clientPort := dut.CreateBoundSocket(unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(tb.RemoteIPv4))
+	clientFD, clientPort := dut.CreateBoundSocket(t, unix.SOCK_STREAM|unix.SOCK_NONBLOCK, unix.IPPROTO_TCP, net.ParseIP(tb.RemoteIPv4))
 	port := uint16(9001)
 	conn := tb.NewTCPIPv4(t, tb.TCP{SrcPort: &port, DstPort: &clientPort}, tb.TCP{SrcPort: &clientPort, DstPort: &port})
 
 	sa := unix.SockaddrInet4{Port: int(port)}
 	copy(sa.Addr[:], net.IP(net.ParseIP(tb.LocalIPv4)).To4())
 	// Bring the dut to SYN-SENT state with a non-blocking connect.
-	dut.Connect(clientFD, &sa)
-	if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}, nil, time.Second); err != nil {
+	dut.Connect(t, clientFD, &sa)
+	if _, err := conn.ExpectData(t, &tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)}, nil, time.Second); err != nil {
 		t.Fatalf("expected SYN\n")
 	}
 
@@ -51,13 +53,13 @@ func dutSynSentState(t *testing.T) (*tb.DUT, *tb.TCPIPv4, uint16, uint16) {
 // TestTCPSynSentReset tests RFC793, p67: SYN-SENT to CLOSED transition.
 func TestTCPSynSentReset(t *testing.T) {
 	dut, conn, _, _ := dutSynSentState(t)
-	defer conn.Close()
+	defer conn.Close(t)
 	defer dut.TearDown()
-	conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)})
+	conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagRst | header.TCPFlagAck)})
 	// Expect the connection to have closed.
 	// TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side.
-	conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
-	if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
+	conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+	if _, err := conn.ExpectData(t, &tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
 		t.Fatalf("expected a TCP RST")
 	}
 }
@@ -67,22 +69,22 @@ func TestTCPSynSentReset(t *testing.T) {
 func TestTCPSynSentRcvdReset(t *testing.T) {
 	dut, c, remotePort, clientPort := dutSynSentState(t)
 	defer dut.TearDown()
-	defer c.Close()
+	defer c.Close(t)
 
 	conn := tb.NewTCPIPv4(t, tb.TCP{SrcPort: &remotePort, DstPort: &clientPort}, tb.TCP{SrcPort: &clientPort, DstPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 	// Initiate new SYN connection with the same port pair
 	// (simultaneous open case), expect the dut connection to move to
 	// SYN-RCVD state
-	conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)})
-	if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
+	conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn)})
+	if _, err := conn.ExpectData(t, &tb.TCP{Flags: tb.Uint8(header.TCPFlagSyn | header.TCPFlagAck)}, nil, time.Second); err != nil {
 		t.Fatalf("expected SYN-ACK %s\n", err)
 	}
-	conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)})
+	conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)})
 	// Expect the connection to have transitioned SYN-RCVD to CLOSED.
 	// TODO(gvisor.dev/issue/478): Check for TCP_INFO on the dut side.
-	conn.Send(tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
-	if _, err := conn.ExpectData(&tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
+	conn.Send(t, tb.TCP{Flags: tb.Uint8(header.TCPFlagAck)})
+	if _, err := conn.ExpectData(t, &tb.TCP{Flags: tb.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
 		t.Fatalf("expected a TCP RST")
 	}
 }
diff --git a/test/packetimpact/tests/tcp_user_timeout_test.go b/test/packetimpact/tests/tcp_user_timeout_test.go
index 87e45d765..551dc78e7 100644
--- a/test/packetimpact/tests/tcp_user_timeout_test.go
+++ b/test/packetimpact/tests/tcp_user_timeout_test.go
@@ -16,7 +16,6 @@ package tcp_user_timeout_test
 
 import (
 	"flag"
-	"fmt"
 	"testing"
 	"time"
 
@@ -29,22 +28,20 @@ func init() {
 	testbench.RegisterFlags(flag.CommandLine)
 }
 
-func sendPayload(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error {
+func sendPayload(t *testing.T, conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) {
 	sampleData := make([]byte, 100)
 	for i := range sampleData {
 		sampleData[i] = uint8(i)
 	}
-	conn.Drain()
-	dut.Send(fd, sampleData, 0)
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: sampleData}, time.Second); err != nil {
-		return fmt.Errorf("expected data but got none: %w", err)
+	conn.Drain(t)
+	dut.Send(t, fd, sampleData, 0)
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: sampleData}, time.Second); err != nil {
+		t.Fatalf("expected data but got none: %w", err)
 	}
-	return nil
 }
 
-func sendFIN(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error {
-	dut.Close(fd)
-	return nil
+func sendFIN(t *testing.T, conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) {
+	dut.Close(t, fd)
 }
 
 func TestTCPUserTimeout(t *testing.T) {
@@ -59,7 +56,7 @@ func TestTCPUserTimeout(t *testing.T) {
 	} {
 		for _, ttf := range []struct {
 			description string
-			f           func(conn *testbench.TCPIPv4, dut *testbench.DUT, fd int32) error
+			f           func(_ *testing.T, _ *testbench.TCPIPv4, _ *testbench.DUT, fd int32)
 		}{
 			{"AfterPayload", sendPayload},
 			{"AfterFIN", sendFIN},
@@ -68,31 +65,29 @@ func TestTCPUserTimeout(t *testing.T) {
 				// Create a socket, listen, TCP handshake, and accept.
 				dut := testbench.NewDUT(t)
 				defer dut.TearDown()
-				listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-				defer dut.Close(listenFD)
+				listenFD, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+				defer dut.Close(t, listenFD)
 				conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-				defer conn.Close()
-				conn.Connect()
-				acceptFD, _ := dut.Accept(listenFD)
+				defer conn.Close(t)
+				conn.Connect(t)
+				acceptFD, _ := dut.Accept(t, listenFD)
 
 				if tt.userTimeout != 0 {
-					dut.SetSockOptInt(acceptFD, unix.SOL_TCP, unix.TCP_USER_TIMEOUT, int32(tt.userTimeout.Milliseconds()))
+					dut.SetSockOptInt(t, acceptFD, unix.SOL_TCP, unix.TCP_USER_TIMEOUT, int32(tt.userTimeout.Milliseconds()))
 				}
 
-				if err := ttf.f(&conn, &dut, acceptFD); err != nil {
-					t.Fatal(err)
-				}
+				ttf.f(t, &conn, &dut, acceptFD)
 
 				time.Sleep(tt.sendDelay)
-				conn.Drain()
-				conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+				conn.Drain(t)
+				conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
 
 				// If TCP_USER_TIMEOUT was set and the above delay was longer than the
 				// TCP_USER_TIMEOUT then the DUT should send a RST in response to the
 				// testbench's packet.
 				expectRST := tt.userTimeout != 0 && tt.sendDelay > tt.userTimeout
 				expectTimeout := 5 * time.Second
-				got, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, expectTimeout)
+				got, err := conn.Expect(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, expectTimeout)
 				if expectRST && err != nil {
 					t.Errorf("expected RST packet within %s but got none: %s", expectTimeout, err)
 				}
diff --git a/test/packetimpact/tests/tcp_window_shrink_test.go b/test/packetimpact/tests/tcp_window_shrink_test.go
index e78d04756..5b001fbec 100644
--- a/test/packetimpact/tests/tcp_window_shrink_test.go
+++ b/test/packetimpact/tests/tcp_window_shrink_test.go
@@ -31,43 +31,43 @@ func init() {
 func TestWindowShrink(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFd)
+	listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFd)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
-	conn.Connect()
-	acceptFd, _ := dut.Accept(listenFd)
-	defer dut.Close(acceptFd)
+	conn.Connect(t)
+	acceptFd, _ := dut.Accept(t, listenFd)
+	defer dut.Close(t, acceptFd)
 
-	dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
+	dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
 
 	sampleData := []byte("Sample Data")
 	samplePayload := &testbench.Payload{Bytes: sampleData}
 
-	dut.Send(acceptFd, sampleData, 0)
-	if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
+	dut.Send(t, acceptFd, sampleData, 0)
+	if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
 
-	dut.Send(acceptFd, sampleData, 0)
-	dut.Send(acceptFd, sampleData, 0)
-	if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
+	dut.Send(t, acceptFd, sampleData, 0)
+	dut.Send(t, acceptFd, sampleData, 0)
+	if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
-	if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
+	if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
 	// We close our receiving window here
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
 
-	dut.Send(acceptFd, []byte("Sample Data"), 0)
+	dut.Send(t, acceptFd, []byte("Sample Data"), 0)
 	// Note: There is another kind of zero-window probing which Windows uses (by sending one
 	// new byte at `RemoteSeqNum`), if netstack wants to go that way, we may want to change
 	// the following lines.
-	expectedRemoteSeqNum := *conn.RemoteSeqNum() - 1
-	if _, err := conn.ExpectData(&testbench.TCP{SeqNum: testbench.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil {
+	expectedRemoteSeqNum := *conn.RemoteSeqNum(t) - 1
+	if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: testbench.Uint32(uint32(expectedRemoteSeqNum))}, nil, time.Second); err != nil {
 		t.Fatalf("expected a packet with sequence number %d: %s", expectedRemoteSeqNum, err)
 	}
 }
diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
index 8c89d57c9..da93267d6 100644
--- a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
+++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
@@ -33,27 +33,27 @@ func init() {
 func TestZeroWindowProbeRetransmit(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFd)
+	listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFd)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
-	conn.Connect()
-	acceptFd, _ := dut.Accept(listenFd)
-	defer dut.Close(acceptFd)
+	conn.Connect(t)
+	acceptFd, _ := dut.Accept(t, listenFd)
+	defer dut.Close(t, acceptFd)
 
-	dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
+	dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
 
 	sampleData := []byte("Sample Data")
 	samplePayload := &testbench.Payload{Bytes: sampleData}
 
 	// Send and receive sample data to the dut.
-	dut.Send(acceptFd, sampleData, 0)
-	if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
+	dut.Send(t, acceptFd, sampleData, 0)
+	if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
 		t.Fatalf("expected packet was not received: %s", err)
 	}
 
@@ -63,15 +63,15 @@ func TestZeroWindowProbeRetransmit(t *testing.T) {
 	// of the recorded first zero probe transmission duration.
 	//
 	// Advertize zero receive window again.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
-	probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1))
-	ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum()))
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
+	probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum(t) - 1))
+	ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum(t)))
 
 	startProbeDuration := time.Second
 	current := startProbeDuration
 	first := time.Now()
 	// Ask the dut to send out data.
-	dut.Send(acceptFd, sampleData, 0)
+	dut.Send(t, acceptFd, sampleData, 0)
 	// Expect the dut to keep the connection alive as long as the remote is
 	// acknowledging the zero-window probes.
 	for i := 0; i < 5; i++ {
@@ -79,7 +79,7 @@ func TestZeroWindowProbeRetransmit(t *testing.T) {
 		// Expect zero-window probe with a timeout which is a function of the typical
 		// first retransmission time. The retransmission times is supposed to
 		// exponentially increase.
-		if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil {
+		if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: probeSeq}, nil, 2*current); err != nil {
 			t.Fatalf("expected a probe with sequence number %d: loop %d", probeSeq, i)
 		}
 		if i == 0 {
@@ -92,14 +92,13 @@ func TestZeroWindowProbeRetransmit(t *testing.T) {
 			t.Errorf("got zero probe %d after %s, want >= %s", i, got, want)
 		}
 		// Acknowledge the zero-window probes from the dut.
-		conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
+		conn.Send(t, testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
 		current *= 2
 	}
 	// Advertize non-zero window.
-	conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)})
+	conn.Send(t, testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)})
 	// Expect the dut to recover and transmit data.
-	if _, err := conn.ExpectData(&testbench.
-		TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil {
+	if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
 }
diff --git a/test/packetimpact/tests/tcp_zero_window_probe_test.go b/test/packetimpact/tests/tcp_zero_window_probe_test.go
index 649fd5699..44cac42f8 100644
--- a/test/packetimpact/tests/tcp_zero_window_probe_test.go
+++ b/test/packetimpact/tests/tcp_zero_window_probe_test.go
@@ -33,29 +33,29 @@ func init() {
 func TestZeroWindowProbe(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFd)
+	listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFd)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
-	conn.Connect()
-	acceptFd, _ := dut.Accept(listenFd)
-	defer dut.Close(acceptFd)
+	conn.Connect(t)
+	acceptFd, _ := dut.Accept(t, listenFd)
+	defer dut.Close(t, acceptFd)
 
-	dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
+	dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
 
 	sampleData := []byte("Sample Data")
 	samplePayload := &testbench.Payload{Bytes: sampleData}
 
 	start := time.Now()
 	// Send and receive sample data to the dut.
-	dut.Send(acceptFd, sampleData, 0)
-	if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
+	dut.Send(t, acceptFd, sampleData, 0)
+	if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
 	sendTime := time.Now().Sub(start)
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
 		t.Fatalf("expected packet was not received: %s", err)
 	}
 
@@ -63,24 +63,24 @@ func TestZeroWindowProbe(t *testing.T) {
 	//         probe to be sent.
 	//
 	// Advertize zero window to the dut.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
 
 	// Expected sequence number of the zero window probe.
-	probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1))
+	probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum(t) - 1))
 	// Expected ack number of the ACK for the probe.
-	ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum()))
+	ackProbe := testbench.Uint32(uint32(*conn.RemoteSeqNum(t)))
 
 	// Expect there are no zero-window probes sent until there is data to be sent out
 	// from the dut.
-	if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil {
+	if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: probeSeq}, nil, 2*time.Second); err == nil {
 		t.Fatalf("unexpected packet with sequence number %d: %s", probeSeq, err)
 	}
 
 	start = time.Now()
 	// Ask the dut to send out data.
-	dut.Send(acceptFd, sampleData, 0)
+	dut.Send(t, acceptFd, sampleData, 0)
 	// Expect zero-window probe from the dut.
-	if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil {
+	if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil {
 		t.Fatalf("expected a packet with sequence number %d: %s", probeSeq, err)
 	}
 	// Expect the probe to be sent after some time. Compare against the previous
@@ -94,9 +94,9 @@ func TestZeroWindowProbe(t *testing.T) {
 	//         and sends out the sample payload after the send window opens.
 	//
 	// Advertize non-zero window to the dut and ack the zero window probe.
-	conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)})
+	conn.Send(t, testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck)})
 	// Expect the dut to recover and transmit data.
-	if _, err := conn.ExpectData(&testbench.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil {
+	if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: ackProbe}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
 
@@ -104,9 +104,9 @@ func TestZeroWindowProbe(t *testing.T) {
 	//         Check if the dut responds as we do for a similar probe sent to it.
 	//         Basically with sequence number to one byte behind the unacknowledged
 	//         sequence number.
-	p := testbench.Uint32(uint32(*conn.LocalSeqNum()))
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), SeqNum: testbench.Uint32(uint32(*conn.LocalSeqNum() - 1))})
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: p}, nil, time.Second); err != nil {
+	p := testbench.Uint32(uint32(*conn.LocalSeqNum(t)))
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), SeqNum: testbench.Uint32(uint32(*conn.LocalSeqNum(t) - 1))})
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), AckNum: p}, nil, time.Second); err != nil {
 		t.Fatalf("expected a packet with ack number: %d: %s", p, err)
 	}
 }
diff --git a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go
index 3c467b14f..09a1c653f 100644
--- a/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go
+++ b/test/packetimpact/tests/tcp_zero_window_probe_usertimeout_test.go
@@ -33,27 +33,27 @@ func init() {
 func TestZeroWindowProbeUserTimeout(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	listenFd, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFd)
+	listenFd, remotePort := dut.CreateListener(t, unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
+	defer dut.Close(t, listenFd)
 	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
-	conn.Connect()
-	acceptFd, _ := dut.Accept(listenFd)
-	defer dut.Close(acceptFd)
+	conn.Connect(t)
+	acceptFd, _ := dut.Accept(t, listenFd)
+	defer dut.Close(t, acceptFd)
 
-	dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
+	dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_NODELAY, 1)
 
 	sampleData := []byte("Sample Data")
 	samplePayload := &testbench.Payload{Bytes: sampleData}
 
 	// Send and receive sample data to the dut.
-	dut.Send(acceptFd, sampleData, 0)
-	if _, err := conn.ExpectData(&testbench.TCP{}, samplePayload, time.Second); err != nil {
+	dut.Send(t, acceptFd, sampleData, 0)
+	if _, err := conn.ExpectData(t, &testbench.TCP{}, samplePayload, time.Second); err != nil {
 		t.Fatalf("expected payload was not received: %s", err)
 	}
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, samplePayload)
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, nil, time.Second); err != nil {
 		t.Fatalf("expected packet was not received: %s", err)
 	}
 
@@ -61,15 +61,15 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) {
 	//         probe to be sent.
 	//
 	// Advertize zero window to the dut.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
 
 	// Expected sequence number of the zero window probe.
-	probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum() - 1))
+	probeSeq := testbench.Uint32(uint32(*conn.RemoteSeqNum(t) - 1))
 	start := time.Now()
 	// Ask the dut to send out data.
-	dut.Send(acceptFd, sampleData, 0)
+	dut.Send(t, acceptFd, sampleData, 0)
 	// Expect zero-window probe from the dut.
-	if _, err := conn.ExpectData(&testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil {
+	if _, err := conn.ExpectData(t, &testbench.TCP{SeqNum: probeSeq}, nil, time.Second); err != nil {
 		t.Fatalf("expected a packet with sequence number %d: %s", probeSeq, err)
 	}
 	// Record the duration for first probe, the dut sends the zero window probe after
@@ -80,19 +80,19 @@ func TestZeroWindowProbeUserTimeout(t *testing.T) {
 	//         when the dut is sending zero-window probes.
 	//
 	// Reduce the retransmit timeout.
-	dut.SetSockOptInt(acceptFd, unix.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int32(startProbeDuration.Milliseconds()))
+	dut.SetSockOptInt(t, acceptFd, unix.IPPROTO_TCP, unix.TCP_USER_TIMEOUT, int32(startProbeDuration.Milliseconds()))
 	// Advertize zero window again.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
 	// Ask the dut to send out data that would trigger zero window probe retransmissions.
-	dut.Send(acceptFd, sampleData, 0)
+	dut.Send(t, acceptFd, sampleData, 0)
 
 	// Wait for the connection to timeout after multiple zero-window probe retransmissions.
 	time.Sleep(8 * startProbeDuration)
 
 	// Expect the connection to have timed out and closed which would cause the dut
 	// to reply with a RST to the ACK we send.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-	if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
+	conn.Send(t, testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
+	if _, err := conn.ExpectData(t, &testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst)}, nil, time.Second); err != nil {
 		t.Fatalf("expected a TCP RST")
 	}
 }
diff --git a/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go b/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go
index b0315e67c..d30177e64 100644
--- a/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go
+++ b/test/packetimpact/tests/udp_discard_mcast_source_addr_test.go
@@ -36,11 +36,11 @@ func init() {
 func TestDiscardsUDPPacketsWithMcastSourceAddressV4(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv4))
-	defer dut.Close(remoteFD)
-	dut.SetSockOptTimeval(remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond)
+	remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv4))
+	defer dut.Close(t, remoteFD)
+	dut.SetSockOptTimeval(t, remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond)
 	conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
 	for _, mcastAddr := range []net.IP{
 		net.IPv4allsys,
@@ -50,11 +50,12 @@ func TestDiscardsUDPPacketsWithMcastSourceAddressV4(t *testing.T) {
 	} {
 		t.Run(fmt.Sprintf("srcaddr=%s", mcastAddr), func(t *testing.T) {
 			conn.SendIP(
+				t,
 				testbench.IPv4{SrcAddr: testbench.Address(tcpip.Address(mcastAddr.To4()))},
 				testbench.UDP{},
 			)
 
-			ret, payload, errno := dut.RecvWithErrno(context.Background(), remoteFD, 100, 0)
+			ret, payload, errno := dut.RecvWithErrno(context.Background(), t, remoteFD, 100, 0)
 			if errno != syscall.EAGAIN || errno != syscall.EWOULDBLOCK {
 				t.Errorf("Recv got unexpected result, ret=%d, payload=%q, errno=%s", ret, payload, errno)
 			}
@@ -65,11 +66,11 @@ func TestDiscardsUDPPacketsWithMcastSourceAddressV4(t *testing.T) {
 func TestDiscardsUDPPacketsWithMcastSourceAddressV6(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv6))
-	defer dut.Close(remoteFD)
-	dut.SetSockOptTimeval(remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond)
+	remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(testbench.RemoteIPv6))
+	defer dut.Close(t, remoteFD)
+	dut.SetSockOptTimeval(t, remoteFD, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &oneSecond)
 	conn := testbench.NewUDPIPv6(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
 	for _, mcastAddr := range []net.IP{
 		net.IPv6interfacelocalallnodes,
@@ -80,10 +81,11 @@ func TestDiscardsUDPPacketsWithMcastSourceAddressV6(t *testing.T) {
 	} {
 		t.Run(fmt.Sprintf("srcaddr=%s", mcastAddr), func(t *testing.T) {
 			conn.SendIPv6(
+				t,
 				testbench.IPv6{SrcAddr: testbench.Address(tcpip.Address(mcastAddr.To16()))},
 				testbench.UDP{},
 			)
-			ret, payload, errno := dut.RecvWithErrno(context.Background(), remoteFD, 100, 0)
+			ret, payload, errno := dut.RecvWithErrno(context.Background(), t, remoteFD, 100, 0)
 			if errno != syscall.EAGAIN || errno != syscall.EWOULDBLOCK {
 				t.Errorf("Recv got unexpected result, ret=%d, payload=%q, errno=%s", ret, payload, errno)
 			}
diff --git a/test/packetimpact/tests/udp_icmp_error_propagation_test.go b/test/packetimpact/tests/udp_icmp_error_propagation_test.go
index b754918f6..715e8f5b5 100644
--- a/test/packetimpact/tests/udp_icmp_error_propagation_test.go
+++ b/test/packetimpact/tests/udp_icmp_error_propagation_test.go
@@ -72,7 +72,7 @@ func (e icmpError) ToICMPv4() *testbench.ICMPv4 {
 type errorDetection struct {
 	name         string
 	useValidConn bool
-	f            func(context.Context, testData) error
+	f            func(context.Context, *testing.T, testData)
 }
 
 type testData struct {
@@ -95,12 +95,14 @@ func wantErrno(c connectionMode, icmpErr icmpError) syscall.Errno {
 }
 
 // sendICMPError sends an ICMP error message in response to a UDP datagram.
-func sendICMPError(conn *testbench.UDPIPv4, icmpErr icmpError, udp *testbench.UDP) error {
-	layers := (*testbench.Connection)(conn).CreateFrame(nil)
+func sendICMPError(t *testing.T, conn *testbench.UDPIPv4, icmpErr icmpError, udp *testbench.UDP) {
+	t.Helper()
+
+	layers := (*testbench.Connection)(conn).CreateFrame(t, nil)
 	layers = layers[:len(layers)-1]
 	ip, ok := udp.Prev().(*testbench.IPv4)
 	if !ok {
-		return fmt.Errorf("expected %s to be IPv4", udp.Prev())
+		t.Fatalf("expected %s to be IPv4", udp.Prev())
 	}
 	if icmpErr == timeToLiveExceeded {
 		*ip.TTL = 1
@@ -114,84 +116,82 @@ func sendICMPError(conn *testbench.UDPIPv4, icmpErr icmpError, udp *testbench.UD
 	// resulting in a mal-formed packet.
 	layers = append(layers, icmpErr.ToICMPv4(), ip, udp)
 
-	(*testbench.Connection)(conn).SendFrameStateless(layers)
-	return nil
+	(*testbench.Connection)(conn).SendFrameStateless(t, layers)
 }
 
 // testRecv tests observing the ICMP error through the recv syscall. A packet
 // is sent to the DUT, and if wantErrno is non-zero, then the first recv should
 // fail and the second should succeed. Otherwise if wantErrno is zero then the
 // first recv should succeed immediately.
-func testRecv(ctx context.Context, d testData) error {
+func testRecv(ctx context.Context, t *testing.T, d testData) {
+	t.Helper()
+
 	// Check that receiving on the clean socket works.
-	d.conn.Send(testbench.UDP{DstPort: &d.cleanPort})
-	d.dut.Recv(d.cleanFD, 100, 0)
+	d.conn.Send(t, testbench.UDP{DstPort: &d.cleanPort})
+	d.dut.Recv(t, d.cleanFD, 100, 0)
 
-	d.conn.Send(testbench.UDP{})
+	d.conn.Send(t, testbench.UDP{})
 
 	if d.wantErrno != syscall.Errno(0) {
 		ctx, cancel := context.WithTimeout(ctx, time.Second)
 		defer cancel()
-		ret, _, err := d.dut.RecvWithErrno(ctx, d.remoteFD, 100, 0)
+		ret, _, err := d.dut.RecvWithErrno(ctx, t, d.remoteFD, 100, 0)
 		if ret != -1 {
-			return fmt.Errorf("recv after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno)
+			t.Fatalf("recv after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno)
 		}
 		if err != d.wantErrno {
-			return fmt.Errorf("recv after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno)
+			t.Fatalf("recv after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno)
 		}
 	}
 
-	d.dut.Recv(d.remoteFD, 100, 0)
-	return nil
+	d.dut.Recv(t, d.remoteFD, 100, 0)
 }
 
 // testSendTo tests observing the ICMP error through the send syscall. If
 // wantErrno is non-zero, the first send should fail and a subsequent send
 // should suceed; while if wantErrno is zero then the first send should just
 // succeed.
-func testSendTo(ctx context.Context, d testData) error {
+func testSendTo(ctx context.Context, t *testing.T, d testData) {
 	// Check that sending on the clean socket works.
-	d.dut.SendTo(d.cleanFD, nil, 0, d.conn.LocalAddr())
-	if _, err := d.conn.Expect(testbench.UDP{SrcPort: &d.cleanPort}, time.Second); err != nil {
-		return fmt.Errorf("did not receive UDP packet from clean socket on DUT: %s", err)
+	d.dut.SendTo(t, d.cleanFD, nil, 0, d.conn.LocalAddr(t))
+	if _, err := d.conn.Expect(t, testbench.UDP{SrcPort: &d.cleanPort}, time.Second); err != nil {
+		t.Fatalf("did not receive UDP packet from clean socket on DUT: %s", err)
 	}
 
 	if d.wantErrno != syscall.Errno(0) {
 		ctx, cancel := context.WithTimeout(ctx, time.Second)
 		defer cancel()
-		ret, err := d.dut.SendToWithErrno(ctx, d.remoteFD, nil, 0, d.conn.LocalAddr())
+		ret, err := d.dut.SendToWithErrno(ctx, t, d.remoteFD, nil, 0, d.conn.LocalAddr(t))
 
 		if ret != -1 {
-			return fmt.Errorf("sendto after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno)
+			t.Fatalf("sendto after ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", d.wantErrno)
 		}
 		if err != d.wantErrno {
-			return fmt.Errorf("sendto after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno)
+			t.Fatalf("sendto after ICMP error resulted in error (%[1]d) %[1]v, expected (%[2]d) %[2]v", err, d.wantErrno)
 		}
 	}
 
-	d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr())
-	if _, err := d.conn.Expect(testbench.UDP{}, time.Second); err != nil {
-		return fmt.Errorf("did not receive UDP packet as expected: %s", err)
+	d.dut.SendTo(t, d.remoteFD, nil, 0, d.conn.LocalAddr(t))
+	if _, err := d.conn.Expect(t, testbench.UDP{}, time.Second); err != nil {
+		t.Fatalf("did not receive UDP packet as expected: %s", err)
 	}
-	return nil
 }
 
-func testSockOpt(_ context.Context, d testData) error {
+func testSockOpt(_ context.Context, t *testing.T, d testData) {
 	// Check that there's no pending error on the clean socket.
-	if errno := syscall.Errno(d.dut.GetSockOptInt(d.cleanFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != syscall.Errno(0) {
-		return fmt.Errorf("unexpected error (%[1]d) %[1]v on clean socket", errno)
+	if errno := syscall.Errno(d.dut.GetSockOptInt(t, d.cleanFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != syscall.Errno(0) {
+		t.Fatalf("unexpected error (%[1]d) %[1]v on clean socket", errno)
 	}
 
-	if errno := syscall.Errno(d.dut.GetSockOptInt(d.remoteFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != d.wantErrno {
-		return fmt.Errorf("SO_ERROR sockopt after ICMP error is (%[1]d) %[1]v, expected (%[2]d) %[2]v", errno, d.wantErrno)
+	if errno := syscall.Errno(d.dut.GetSockOptInt(t, d.remoteFD, unix.SOL_SOCKET, unix.SO_ERROR)); errno != d.wantErrno {
+		t.Fatalf("SO_ERROR sockopt after ICMP error is (%[1]d) %[1]v, expected (%[2]d) %[2]v", errno, d.wantErrno)
 	}
 
 	// Check that after clearing socket error, sending doesn't fail.
-	d.dut.SendTo(d.remoteFD, nil, 0, d.conn.LocalAddr())
-	if _, err := d.conn.Expect(testbench.UDP{}, time.Second); err != nil {
-		return fmt.Errorf("did not receive UDP packet as expected: %s", err)
+	d.dut.SendTo(t, d.remoteFD, nil, 0, d.conn.LocalAddr(t))
+	if _, err := d.conn.Expect(t, testbench.UDP{}, time.Second); err != nil {
+		t.Fatalf("did not receive UDP packet as expected: %s", err)
 	}
-	return nil
 }
 
 // TestUDPICMPErrorPropagation tests that ICMP error messages in response to
@@ -227,31 +227,29 @@ func TestUDPICMPErrorPropagation(t *testing.T) {
 					dut := testbench.NewDUT(t)
 					defer dut.TearDown()
 
-					remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
-					defer dut.Close(remoteFD)
+					remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
+					defer dut.Close(t, remoteFD)
 
 					// Create a second, clean socket on the DUT to ensure that the ICMP
 					// error messages only affect the sockets they are intended for.
-					cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
-					defer dut.Close(cleanFD)
+					cleanFD, cleanPort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
+					defer dut.Close(t, cleanFD)
 
 					conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
-					defer conn.Close()
+					defer conn.Close(t)
 
 					if connect {
-						dut.Connect(remoteFD, conn.LocalAddr())
-						dut.Connect(cleanFD, conn.LocalAddr())
+						dut.Connect(t, remoteFD, conn.LocalAddr(t))
+						dut.Connect(t, cleanFD, conn.LocalAddr(t))
 					}
 
-					dut.SendTo(remoteFD, nil, 0, conn.LocalAddr())
-					udp, err := conn.Expect(testbench.UDP{}, time.Second)
+					dut.SendTo(t, remoteFD, nil, 0, conn.LocalAddr(t))
+					udp, err := conn.Expect(t, testbench.UDP{}, time.Second)
 					if err != nil {
 						t.Fatalf("did not receive message from DUT: %s", err)
 					}
 
-					if err := sendICMPError(&conn, icmpErr, udp); err != nil {
-						t.Fatal(err)
-					}
+					sendICMPError(t, &conn, icmpErr, udp)
 
 					errDetectConn := &conn
 					if errDetect.useValidConn {
@@ -260,14 +258,12 @@ func TestUDPICMPErrorPropagation(t *testing.T) {
 						// interactions between it and the the DUT should be independent of
 						// the ICMP error at least at the port level.
 						connClean := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
-						defer connClean.Close()
+						defer connClean.Close(t)
 
 						errDetectConn = &connClean
 					}
 
-					if err := errDetect.f(context.Background(), testData{&dut, errDetectConn, remoteFD, remotePort, cleanFD, cleanPort, wantErrno}); err != nil {
-						t.Fatal(err)
-					}
+					errDetect.f(context.Background(), t, testData{&dut, errDetectConn, remoteFD, remotePort, cleanFD, cleanPort, wantErrno})
 				})
 			}
 		}
@@ -285,24 +281,24 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) {
 				dut := testbench.NewDUT(t)
 				defer dut.TearDown()
 
-				remoteFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
-				defer dut.Close(remoteFD)
+				remoteFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
+				defer dut.Close(t, remoteFD)
 
 				// Create a second, clean socket on the DUT to ensure that the ICMP
 				// error messages only affect the sockets they are intended for.
-				cleanFD, cleanPort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
-				defer dut.Close(cleanFD)
+				cleanFD, cleanPort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP("0.0.0.0"))
+				defer dut.Close(t, cleanFD)
 
 				conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
-				defer conn.Close()
+				defer conn.Close(t)
 
 				if connect {
-					dut.Connect(remoteFD, conn.LocalAddr())
-					dut.Connect(cleanFD, conn.LocalAddr())
+					dut.Connect(t, remoteFD, conn.LocalAddr(t))
+					dut.Connect(t, cleanFD, conn.LocalAddr(t))
 				}
 
-				dut.SendTo(remoteFD, nil, 0, conn.LocalAddr())
-				udp, err := conn.Expect(testbench.UDP{}, time.Second)
+				dut.SendTo(t, remoteFD, nil, 0, conn.LocalAddr(t))
+				udp, err := conn.Expect(t, testbench.UDP{}, time.Second)
 				if err != nil {
 					t.Fatalf("did not receive message from DUT: %s", err)
 				}
@@ -316,7 +312,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) {
 						ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 						defer cancel()
 
-						ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0)
+						ret, _, err := dut.RecvWithErrno(ctx, t, remoteFD, 100, 0)
 						if ret != -1 {
 							t.Errorf("recv during ICMP error succeeded unexpectedly, expected (%[1]d) %[1]v", wantErrno)
 							return
@@ -330,7 +326,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) {
 					ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 					defer cancel()
 
-					if ret, _, err := dut.RecvWithErrno(ctx, remoteFD, 100, 0); ret == -1 {
+					if ret, _, err := dut.RecvWithErrno(ctx, t, remoteFD, 100, 0); ret == -1 {
 						t.Errorf("recv after ICMP error failed with (%[1]d) %[1]", err)
 					}
 				}()
@@ -341,7 +337,7 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) {
 					ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 					defer cancel()
 
-					if ret, _, err := dut.RecvWithErrno(ctx, cleanFD, 100, 0); ret == -1 {
+					if ret, _, err := dut.RecvWithErrno(ctx, t, cleanFD, 100, 0); ret == -1 {
 						t.Errorf("recv on clean socket failed with (%[1]d) %[1]", err)
 					}
 				}()
@@ -352,12 +348,10 @@ func TestICMPErrorDuringUDPRecv(t *testing.T) {
 				// alternative is available.
 				time.Sleep(2 * time.Second)
 
-				if err := sendICMPError(&conn, icmpErr, udp); err != nil {
-					t.Fatal(err)
-				}
+				sendICMPError(t, &conn, icmpErr, udp)
 
-				conn.Send(testbench.UDP{DstPort: &cleanPort})
-				conn.Send(testbench.UDP{})
+				conn.Send(t, testbench.UDP{DstPort: &cleanPort})
+				conn.Send(t, testbench.UDP{})
 				wg.Wait()
 			})
 		}
diff --git a/test/packetimpact/tests/udp_recv_mcast_bcast_test.go b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go
index 263a54291..fcd202643 100644
--- a/test/packetimpact/tests/udp_recv_mcast_bcast_test.go
+++ b/test/packetimpact/tests/udp_recv_mcast_bcast_test.go
@@ -31,10 +31,10 @@ func init() {
 func TestUDPRecvMulticastBroadcast(t *testing.T) {
 	dut := testbench.NewDUT(t)
 	defer dut.TearDown()
-	boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4(0, 0, 0, 0))
-	defer dut.Close(boundFD)
+	boundFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.IPv4(0, 0, 0, 0))
+	defer dut.Close(t, boundFD)
 	conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
-	defer conn.Close()
+	defer conn.Close(t)
 
 	for _, bcastAddr := range []net.IP{
 		broadcastAddr(net.ParseIP(testbench.RemoteIPv4), net.CIDRMask(testbench.IPv4PrefixLength, 32)),
@@ -43,12 +43,13 @@ func TestUDPRecvMulticastBroadcast(t *testing.T) {
 	} {
 		payload := testbench.GenerateRandomPayload(t, 1<<10)
 		conn.SendIP(
+			t,
 			testbench.IPv4{DstAddr: testbench.Address(tcpip.Address(bcastAddr.To4()))},
 			testbench.UDP{},
 			&testbench.Payload{Bytes: payload},
 		)
 		t.Logf("Receiving packet sent to address: %s", bcastAddr)
-		if got, want := string(dut.Recv(boundFD, int32(len(payload)), 0)), string(payload); got != want {
+		if got, want := string(dut.Recv(t, boundFD, int32(len(payload)), 0)), string(payload); got != want {
 			t.Errorf("received payload does not match sent payload got: %s, want: %s", got, want)
 		}
 	}
diff --git a/test/packetimpact/tests/udp_send_recv_dgram_test.go b/test/packetimpact/tests/udp_send_recv_dgram_test.go
index bd53ad90b..dc20275d6 100644
--- a/test/packetimpact/tests/udp_send_recv_dgram_test.go
+++ b/test/packetimpact/tests/udp_send_recv_dgram_test.go
@@ -29,10 +29,10 @@ func init() {
 }
 
 type udpConn interface {
-	Send(testbench.UDP, ...testbench.Layer)
-	ExpectData(testbench.UDP, testbench.Payload, time.Duration) (testbench.Layers, error)
-	Drain()
-	Close()
+	Send(*testing.T, testbench.UDP, ...testbench.Layer)
+	ExpectData(*testing.T, testbench.UDP, testbench.Payload, time.Duration) (testbench.Layers, error)
+	Drain(*testing.T)
+	Close(*testing.T)
 }
 
 func TestUDP(t *testing.T) {
@@ -51,21 +51,21 @@ func TestUDP(t *testing.T) {
 			} else {
 				addr = testbench.RemoteIPv6
 			}
-			boundFD, remotePort := dut.CreateBoundSocket(unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(addr))
-			defer dut.Close(boundFD)
+			boundFD, remotePort := dut.CreateBoundSocket(t, unix.SOCK_DGRAM, unix.IPPROTO_UDP, net.ParseIP(addr))
+			defer dut.Close(t, boundFD)
 
 			var conn udpConn
 			var localAddr unix.Sockaddr
 			if isIPv4 {
 				v4Conn := testbench.NewUDPIPv4(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
-				localAddr = v4Conn.LocalAddr()
+				localAddr = v4Conn.LocalAddr(t)
 				conn = &v4Conn
 			} else {
 				v6Conn := testbench.NewUDPIPv6(t, testbench.UDP{DstPort: &remotePort}, testbench.UDP{SrcPort: &remotePort})
-				localAddr = v6Conn.LocalAddr()
+				localAddr = v6Conn.LocalAddr(t)
 				conn = &v6Conn
 			}
-			defer conn.Close()
+			defer conn.Close(t)
 
 			testCases := []struct {
 				name    string
@@ -81,17 +81,17 @@ func TestUDP(t *testing.T) {
 			for _, tc := range testCases {
 				t.Run(tc.name, func(t *testing.T) {
 					t.Run("Send", func(t *testing.T) {
-						conn.Send(testbench.UDP{}, &testbench.Payload{Bytes: tc.payload})
-						if got, want := string(dut.Recv(boundFD, int32(len(tc.payload)), 0)), string(tc.payload); got != want {
+						conn.Send(t, testbench.UDP{}, &testbench.Payload{Bytes: tc.payload})
+						if got, want := string(dut.Recv(t, boundFD, int32(len(tc.payload)), 0)), string(tc.payload); got != want {
 							t.Fatalf("received payload does not match sent payload got: %s, want: %s", got, want)
 						}
 					})
 					t.Run("Recv", func(t *testing.T) {
-						conn.Drain()
-						if got, want := int(dut.SendTo(boundFD, tc.payload, 0, localAddr)), len(tc.payload); got != want {
+						conn.Drain(t)
+						if got, want := int(dut.SendTo(t, boundFD, tc.payload, 0, localAddr)), len(tc.payload); got != want {
 							t.Fatalf("short write got: %d, want: %d", got, want)
 						}
-						if _, err := conn.ExpectData(testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: tc.payload}, time.Second); err != nil {
+						if _, err := conn.ExpectData(t, testbench.UDP{SrcPort: &remotePort}, testbench.Payload{Bytes: tc.payload}, time.Second); err != nil {
 							t.Fatal(err)
 						}
 					})
diff --git a/test/root/crictl_test.go b/test/root/crictl_test.go
index 193705ab8..df91fa0fe 100644
--- a/test/root/crictl_test.go
+++ b/test/root/crictl_test.go
@@ -405,11 +405,8 @@ func setup(t *testing.T, version string) (*criutil.Crictl, func(), error) {
 		}
 
 		// We provide the shim, followed by the runtime, and then a
-		// temporary root directory.  Note that we can safely assume
-		// that the shim has been installed in the same directory as
-		// the runtime (for test installs and for normal installs).
-		// Since this is v1, the binary name will be fixed.
-		config = fmt.Sprintf(v1Template, path.Join(runtimeDir, "gvisor-containerd-shim"), runtime, runtimeDir)
+		// temporary root directory.
+		config = fmt.Sprintf(v1Template, criutil.ResolvePath("gvisor-containerd-shim"), runtime, containerdRoot)
 	case v2:
 		// This is only supported past 1.2.
 		if major < 1 || (major == 1 && minor <= 1) {
diff --git a/test/runner/BUILD b/test/runner/BUILD
index 1f45a6922..63c7ec83a 100644
--- a/test/runner/BUILD
+++ b/test/runner/BUILD
@@ -17,6 +17,7 @@ go_binary(
         "//test/runner/gtest",
         "//test/uds",
         "@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
+        "@com_github_syndtr_gocapability//capability:go_default_library",
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl
index 600cb5192..c92392b35 100644
--- a/test/runner/defs.bzl
+++ b/test/runner/defs.bzl
@@ -157,7 +157,7 @@ def syscall_test(
         platform = "native",
         use_tmpfs = False,
         add_uds_tree = add_uds_tree,
-        tags = tags,
+        tags = list(tags),
     )
 
     for (platform, platform_tags) in platforms.items():
diff --git a/test/runner/runner.go b/test/runner/runner.go
index 2296f3a46..bc4b39cbb 100644
--- a/test/runner/runner.go
+++ b/test/runner/runner.go
@@ -30,6 +30,7 @@ import (
 	"time"
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/syndtr/gocapability/capability"
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/test/testutil"
@@ -105,6 +106,13 @@ func runTestCaseNative(testBin string, tc gtest.TestCase, t *testing.T) {
 	cmd.Env = env
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
+
+	if specutils.HasCapabilities(capability.CAP_NET_ADMIN) {
+		cmd.SysProcAttr = &syscall.SysProcAttr{
+			Cloneflags: syscall.CLONE_NEWNET,
+		}
+	}
+
 	if err := cmd.Run(); err != nil {
 		ws := err.(*exec.ExitError).Sys().(syscall.WaitStatus)
 		t.Errorf("test %q exited with status %d, want 0", tc.FullName(), ws.ExitStatus())
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index c06a75ada..c19b30b4a 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -400,6 +400,7 @@ syscall_test(
 syscall_test(
     add_overlay = True,
     test = "//test/syscalls/linux:open_test",
+    vfs2 = "True",
 )
 
 syscall_test(
@@ -640,11 +641,13 @@ syscall_test(
 syscall_test(
     add_overlay = True,
     test = "//test/syscalls/linux:sendfile_socket_test",
+    vfs2 = "True",
 )
 
 syscall_test(
     add_overlay = True,
     test = "//test/syscalls/linux:sendfile_test",
+    vfs2 = "True",
 )
 
 syscall_test(
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 662d780d8..66a31cd28 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -943,6 +943,7 @@ cc_binary(
         "//test/util:eventfd_util",
         "//test/util:file_descriptor",
         "//test/util:fs_util",
+        "@com_google_absl//absl/container:node_hash_set",
         "@com_google_absl//absl/strings",
         gtest,
         "//test/util:posix_error",
diff --git a/test/syscalls/linux/dev.cc b/test/syscalls/linux/dev.cc
index 6fa16208e..1d0d584cd 100644
--- a/test/syscalls/linux/dev.cc
+++ b/test/syscalls/linux/dev.cc
@@ -161,6 +161,19 @@ TEST(DevTest, OpenDevFuse) {
   ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY));
 }
 
+TEST(DevTest, ReadDevFuseWithoutMount) {
+  // Note(gvisor.dev/issue/3076) This won't work in the sentry until the new
+  // device registration is complete.
+  SKIP_IF(IsRunningWithVFS1() || IsRunningOnGvisor());
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_RDONLY));
+
+  std::vector<char> buf(1);
+  EXPECT_THAT(ReadFd(fd.get(), buf.data(), sizeof(buf)),
+              SyscallFailsWithErrno(EPERM));
+}
+
 }  // namespace
 }  // namespace testing
 
diff --git a/test/syscalls/linux/futex.cc b/test/syscalls/linux/futex.cc
index 40c80a6e1..90b1f0508 100644
--- a/test/syscalls/linux/futex.cc
+++ b/test/syscalls/linux/futex.cc
@@ -18,6 +18,7 @@
 #include <sys/syscall.h>
 #include <sys/time.h>
 #include <sys/types.h>
+#include <syscall.h>
 #include <unistd.h>
 
 #include <algorithm>
@@ -737,6 +738,97 @@ TEST_P(PrivateAndSharedFutexTest, PITryLockConcurrency_NoRandomSave) {
   }
 }
 
+int get_robust_list(int pid, struct robust_list_head** head_ptr,
+                    size_t* len_ptr) {
+  return syscall(__NR_get_robust_list, pid, head_ptr, len_ptr);
+}
+
+int set_robust_list(struct robust_list_head* head, size_t len) {
+  return syscall(__NR_set_robust_list, head, len);
+}
+
+TEST(RobustFutexTest, BasicSetGet) {
+  struct robust_list_head hd = {};
+  struct robust_list_head* hd_ptr = &hd;
+
+  // Set!
+  EXPECT_THAT(set_robust_list(hd_ptr, sizeof(hd)), SyscallSucceedsWithValue(0));
+
+  // Get!
+  struct robust_list_head* new_hd_ptr = hd_ptr;
+  size_t len;
+  EXPECT_THAT(get_robust_list(0, &new_hd_ptr, &len),
+              SyscallSucceedsWithValue(0));
+  EXPECT_EQ(new_hd_ptr, hd_ptr);
+  EXPECT_EQ(len, sizeof(hd));
+}
+
+TEST(RobustFutexTest, GetFromOtherTid) {
+  // Get the current tid and list head.
+  pid_t tid = gettid();
+  struct robust_list_head* hd_ptr = {};
+  size_t len;
+  EXPECT_THAT(get_robust_list(0, &hd_ptr, &len), SyscallSucceedsWithValue(0));
+
+  // Create a new thread.
+  ScopedThread t([&] {
+    // Current tid list head should be different from parent tid.
+    struct robust_list_head* got_hd_ptr = {};
+    EXPECT_THAT(get_robust_list(0, &got_hd_ptr, &len),
+                SyscallSucceedsWithValue(0));
+    EXPECT_NE(hd_ptr, got_hd_ptr);
+
+    // Get the parent list head by passing its tid.
+    EXPECT_THAT(get_robust_list(tid, &got_hd_ptr, &len),
+                SyscallSucceedsWithValue(0));
+    EXPECT_EQ(hd_ptr, got_hd_ptr);
+  });
+
+  // Wait for thread.
+  t.Join();
+}
+
+TEST(RobustFutexTest, InvalidSize) {
+  struct robust_list_head* hd = {};
+  EXPECT_THAT(set_robust_list(hd, sizeof(*hd) + 1),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(RobustFutexTest, PthreadMutexAttr) {
+  constexpr int kNumMutexes = 3;
+
+  // Create a bunch of robust mutexes.
+  pthread_mutexattr_t attrs[kNumMutexes];
+  pthread_mutex_t mtxs[kNumMutexes];
+  for (int i = 0; i < kNumMutexes; i++) {
+    TEST_PCHECK(pthread_mutexattr_init(&attrs[i]) == 0);
+    TEST_PCHECK(pthread_mutexattr_setrobust(&attrs[i], PTHREAD_MUTEX_ROBUST) ==
+                0);
+    TEST_PCHECK(pthread_mutex_init(&mtxs[i], &attrs[i]) == 0);
+  }
+
+  // Start thread to lock the mutexes and then exit.
+  ScopedThread t([&] {
+    for (int i = 0; i < kNumMutexes; i++) {
+      TEST_PCHECK(pthread_mutex_lock(&mtxs[i]) == 0);
+    }
+    pthread_exit(NULL);
+  });
+
+  // Wait for thread.
+  t.Join();
+
+  // Now try to take the mutexes.
+  for (int i = 0; i < kNumMutexes; i++) {
+    // Should get EOWNERDEAD.
+    EXPECT_EQ(pthread_mutex_lock(&mtxs[i]), EOWNERDEAD);
+    // Make the mutex consistent.
+    EXPECT_EQ(pthread_mutex_consistent(&mtxs[i]), 0);
+    // Unlock.
+    EXPECT_EQ(pthread_mutex_unlock(&mtxs[i]), 0);
+  }
+}
+
 }  // namespace
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/getdents.cc b/test/syscalls/linux/getdents.cc
index b147d6181..b040cdcf7 100644
--- a/test/syscalls/linux/getdents.cc
+++ b/test/syscalls/linux/getdents.cc
@@ -32,6 +32,7 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "absl/container/node_hash_set.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
 #include "test/util/eventfd_util.h"
@@ -393,7 +394,7 @@ TYPED_TEST(GetdentsTest, ProcSelfFd) {
   // Make the buffer very small since we want to iterate.
   typename TestFixture::DirentBufferType dirents(
       2 * sizeof(typename TestFixture::LinuxDirentType));
-  std::unordered_set<int> prev_fds;
+  absl::node_hash_set<int> prev_fds;
   while (true) {
     dirents.Reset();
     int rv;
diff --git a/test/syscalls/linux/mount.cc b/test/syscalls/linux/mount.cc
index a3e9745cf..97e8d0f7e 100644
--- a/test/syscalls/linux/mount.cc
+++ b/test/syscalls/linux/mount.cc
@@ -321,6 +321,28 @@ TEST(MountTest, RenameRemoveMountPoint) {
   ASSERT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EBUSY));
 }
 
+TEST(MountTest, MountFuseFilesystemNoDevice) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+  SKIP_IF(IsRunningOnGvisor() && !IsFUSEEnabled());
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  EXPECT_THAT(mount("", dir.path().c_str(), "fuse", 0, ""),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(MountTest, MountFuseFilesystem) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+  SKIP_IF(IsRunningOnGvisor() && !IsFUSEEnabled());
+
+  const FileDescriptor fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/fuse", O_WRONLY));
+  std::string mopts = "fd=" + std::to_string(fd.get());
+
+  auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+  auto const mount =
+      ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), "fuse", 0, mopts, 0));
+}
+
 }  // namespace
 
 }  // namespace testing
diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc
index bb7d108e8..bf350946b 100644
--- a/test/syscalls/linux/open.cc
+++ b/test/syscalls/linux/open.cc
@@ -235,7 +235,7 @@ TEST_F(OpenTest, AppendOnly) {
       ASSERT_NO_ERRNO_AND_VALUE(Open(test_file_name_, O_RDWR | O_APPEND));
   EXPECT_THAT(lseek(fd2.get(), 0, SEEK_CUR), SyscallSucceedsWithValue(0));
 
-  // Then try to write to the first file and make sure the bytes are appended.
+  // Then try to write to the first fd and make sure the bytes are appended.
   EXPECT_THAT(WriteFd(fd1.get(), buf.data(), buf.size()),
               SyscallSucceedsWithValue(buf.size()));
 
@@ -247,7 +247,7 @@ TEST_F(OpenTest, AppendOnly) {
   EXPECT_THAT(lseek(fd1.get(), 0, SEEK_CUR),
               SyscallSucceedsWithValue(kBufSize * 2));
 
-  // Then try to write to the second file and make sure the bytes are appended.
+  // Then try to write to the second fd and make sure the bytes are appended.
   EXPECT_THAT(WriteFd(fd2.get(), buf.data(), buf.size()),
               SyscallSucceedsWithValue(buf.size()));
 
diff --git a/test/syscalls/linux/packet_socket.cc b/test/syscalls/linux/packet_socket.cc
index e94ddcb77..40aa9326d 100644
--- a/test/syscalls/linux/packet_socket.cc
+++ b/test/syscalls/linux/packet_socket.cc
@@ -417,6 +417,122 @@ TEST_P(CookedPacketTest, BindDrop) {
   EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(0));
 }
 
+// Verify that we receive outbound packets. This test requires at least one
+// non loopback interface so that we can actually capture an outgoing packet.
+TEST_P(CookedPacketTest, ReceiveOutbound) {
+  // Only ETH_P_ALL sockets can receive outbound packets on linux.
+  SKIP_IF(GetParam() != ETH_P_ALL);
+
+  // Let's use a simple IP payload: a UDP datagram.
+  FileDescriptor udp_sock =
+      ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+
+  struct ifaddrs* if_addr_list = nullptr;
+  auto cleanup = Cleanup([&if_addr_list]() { freeifaddrs(if_addr_list); });
+
+  ASSERT_THAT(getifaddrs(&if_addr_list), SyscallSucceeds());
+
+  // Get interface other than loopback.
+  struct ifreq ifr = {};
+  for (struct ifaddrs* i = if_addr_list; i; i = i->ifa_next) {
+    if (strcmp(i->ifa_name, "lo") != 0) {
+      strncpy(ifr.ifr_name, i->ifa_name, sizeof(ifr.ifr_name));
+      break;
+    }
+  }
+
+  // Skip if no interface is available other than loopback.
+  if (strlen(ifr.ifr_name) == 0) {
+    GTEST_SKIP();
+  }
+
+  // Get interface index and name.
+  EXPECT_THAT(ioctl(socket_, SIOCGIFINDEX, &ifr), SyscallSucceeds());
+  EXPECT_NE(ifr.ifr_ifindex, 0);
+  int ifindex = ifr.ifr_ifindex;
+
+  constexpr int kMACSize = 6;
+  char hwaddr[kMACSize];
+  // Get interface address.
+  ASSERT_THAT(ioctl(socket_, SIOCGIFHWADDR, &ifr), SyscallSucceeds());
+  ASSERT_THAT(ifr.ifr_hwaddr.sa_family,
+              AnyOf(Eq(ARPHRD_NONE), Eq(ARPHRD_ETHER)));
+  memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, kMACSize);
+
+  // Just send it to the google dns server 8.8.8.8. It's UDP we don't care
+  // if it actually gets to the DNS Server we just want to see that we receive
+  // it on our AF_PACKET socket.
+  //
+  // NOTE: We just want to pick an IP that is non-local to avoid having to
+  // handle ARP as this should cause the UDP packet to be sent to the default
+  // gateway configured for the system under test. Otherwise the only packet we
+  // will see is the ARP query unless we picked an IP which will actually
+  // resolve. The test is a bit brittle but this was the best compromise for
+  // now.
+  struct sockaddr_in dest = {};
+  ASSERT_EQ(inet_pton(AF_INET, "8.8.8.8", &dest.sin_addr.s_addr), 1);
+  dest.sin_family = AF_INET;
+  dest.sin_port = kPort;
+  EXPECT_THAT(sendto(udp_sock.get(), kMessage, sizeof(kMessage), 0,
+                     reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)),
+              SyscallSucceedsWithValue(sizeof(kMessage)));
+
+  // Wait and make sure the socket receives the data.
+  struct pollfd pfd = {};
+  pfd.fd = socket_;
+  pfd.events = POLLIN;
+  EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 1000), SyscallSucceedsWithValue(1));
+
+  // Now read and check that the packet is the one we just sent.
+  // Read and verify the data.
+  constexpr size_t packet_size =
+      sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kMessage);
+  char buf[64];
+  struct sockaddr_ll src = {};
+  socklen_t src_len = sizeof(src);
+  ASSERT_THAT(recvfrom(socket_, buf, sizeof(buf), 0,
+                       reinterpret_cast<struct sockaddr*>(&src), &src_len),
+              SyscallSucceedsWithValue(packet_size));
+
+  // sockaddr_ll ends with an 8 byte physical address field, but ethernet
+  // addresses only use 6 bytes.  Linux used to return sizeof(sockaddr_ll)-2
+  // here, but since commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c returns
+  // sizeof(sockaddr_ll).
+  ASSERT_THAT(src_len, AnyOf(Eq(sizeof(src)), Eq(sizeof(src) - 2)));
+
+  // Verify the source address.
+  EXPECT_EQ(src.sll_family, AF_PACKET);
+  EXPECT_EQ(src.sll_ifindex, ifindex);
+  EXPECT_EQ(src.sll_halen, ETH_ALEN);
+  EXPECT_EQ(ntohs(src.sll_protocol), ETH_P_IP);
+  EXPECT_EQ(src.sll_pkttype, PACKET_OUTGOING);
+  // Verify the link address of the interface matches that of the non
+  // non loopback interface address we stored above.
+  for (int i = 0; i < src.sll_halen; i++) {
+    EXPECT_EQ(src.sll_addr[i], hwaddr[i]);
+  }
+
+  // Verify the IP header.
+  struct iphdr ip = {};
+  memcpy(&ip, buf, sizeof(ip));
+  EXPECT_EQ(ip.ihl, 5);
+  EXPECT_EQ(ip.version, 4);
+  EXPECT_EQ(ip.tot_len, htons(packet_size));
+  EXPECT_EQ(ip.protocol, IPPROTO_UDP);
+  EXPECT_EQ(ip.daddr, dest.sin_addr.s_addr);
+  EXPECT_NE(ip.saddr, htonl(INADDR_LOOPBACK));
+
+  // Verify the UDP header.
+  struct udphdr udp = {};
+  memcpy(&udp, buf + sizeof(iphdr), sizeof(udp));
+  EXPECT_EQ(udp.dest, kPort);
+  EXPECT_EQ(udp.len, htons(sizeof(udphdr) + sizeof(kMessage)));
+
+  // Verify the payload.
+  char* payload = reinterpret_cast<char*>(buf + sizeof(iphdr) + sizeof(udphdr));
+  EXPECT_EQ(strncmp(payload, kMessage, sizeof(kMessage)), 0);
+}
+
 // Bind with invalid address.
 TEST_P(CookedPacketTest, BindFail) {
   // Null address.
diff --git a/test/syscalls/linux/raw_socket.cc b/test/syscalls/linux/raw_socket.cc
index ce54dc064..8d6e5c913 100644
--- a/test/syscalls/linux/raw_socket.cc
+++ b/test/syscalls/linux/raw_socket.cc
@@ -262,6 +262,27 @@ TEST_P(RawSocketTest, SendWithoutConnectFails) {
               SyscallFailsWithErrno(EDESTADDRREQ));
 }
 
+// Wildcard Bind.
+TEST_P(RawSocketTest, BindToWildcard) {
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+  struct sockaddr_storage addr;
+  addr = {};
+
+  // We don't set ports because raw sockets don't have a notion of ports.
+  if (Family() == AF_INET) {
+    struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr);
+    sin->sin_family = AF_INET;
+    sin->sin_addr.s_addr = htonl(INADDR_ANY);
+  } else {
+    struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
+    sin6->sin6_family = AF_INET6;
+    sin6->sin6_addr = in6addr_any;
+  }
+
+  ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
+              SyscallSucceeds());
+}
+
 // Bind to localhost.
 TEST_P(RawSocketTest, BindToLocalhost) {
   SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
diff --git a/test/syscalls/linux/raw_socket_hdrincl.cc b/test/syscalls/linux/raw_socket_hdrincl.cc
index 5bb14d57c..97f0467aa 100644
--- a/test/syscalls/linux/raw_socket_hdrincl.cc
+++ b/test/syscalls/linux/raw_socket_hdrincl.cc
@@ -178,6 +178,9 @@ TEST_F(RawHDRINCL, ConnectToLoopback) {
 }
 
 TEST_F(RawHDRINCL, SendWithoutConnectSucceeds) {
+  // FIXME(github.dev/issue/3159): Test currently flaky.
+  SKIP_IF(true);
+
   struct iphdr hdr = LoopbackHeader();
   ASSERT_THAT(send(socket_, &hdr, sizeof(hdr), 0),
               SyscallSucceedsWithValue(sizeof(hdr)));
@@ -281,6 +284,9 @@ TEST_F(RawHDRINCL, SendAndReceive) {
 // Send and receive a packet where the sendto address is not the same as the
 // provided destination.
 TEST_F(RawHDRINCL, SendAndReceiveDifferentAddress) {
+  // FIXME(github.dev/issue/3160): Test currently flaky.
+  SKIP_IF(true);
+
   int port = 40000;
   if (!IsRunningOnGvisor()) {
     port = static_cast<short>(ASSERT_NO_ERRNO_AND_VALUE(
diff --git a/tools/bazel.mk b/tools/bazel.mk
index 9e02af8dc..45d6007cf 100644
--- a/tools/bazel.mk
+++ b/tools/bazel.mk
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 # See base Makefile.
+SHELL=/bin/bash -o pipefail
 BRANCH_NAME := $(shell (git branch --show-current 2>/dev/null || \
 			git rev-parse --abbrev-ref HEAD 2>/dev/null) | \
 			xargs -n 1 basename 2>/dev/null)
@@ -22,22 +23,38 @@ BRANCH_NAME := $(shell (git branch --show-current 2>/dev/null || \
 # Bazel container configuration (see below).
 USER ?= gvisor
 HASH ?= $(shell readlink -m $(CURDIR) | md5sum | cut -c1-8)
+BUILDER_BASE := gvisor.dev/images/default
+BUILDER_IMAGE := gvisor.dev/images/builder
+BUILDER_NAME ?= gvisor-builder-$(HASH)
 DOCKER_NAME ?= gvisor-bazel-$(HASH)
 DOCKER_PRIVILEGED ?= --privileged
 BAZEL_CACHE := $(shell readlink -m ~/.cache/bazel/)
 GCLOUD_CONFIG := $(shell readlink -m ~/.config/gcloud/)
 DOCKER_SOCKET := /var/run/docker.sock
 
-# Non-configurable.
+# Bazel flags.
+OPTIONS += --test_output=errors --keep_going --verbose_failures=true
+BAZEL := bazel $(STARTUP_OPTIONS)
+
+# Basic options.
 UID := $(shell id -u ${USER})
 GID := $(shell id -g ${USER})
 USERADD_OPTIONS :=
 FULL_DOCKER_RUN_OPTIONS := $(DOCKER_RUN_OPTIONS)
+FULL_DOCKER_RUN_OPTIONS += --user $(UID):$(GID)
+FULL_DOCKER_RUN_OPTIONS += --entrypoint ""
+FULL_DOCKER_RUN_OPTIONS += --init
 FULL_DOCKER_RUN_OPTIONS += -v "$(BAZEL_CACHE):$(BAZEL_CACHE)"
 FULL_DOCKER_RUN_OPTIONS += -v "$(GCLOUD_CONFIG):$(GCLOUD_CONFIG)"
 FULL_DOCKER_RUN_OPTIONS += -v "/tmp:/tmp"
+FULL_DOCKER_EXEC_OPTIONS := --user $(UID):$(GID)
+FULL_DOCKER_EXEC_OPTIONS += -i
+
+# Add docker passthrough options.
 ifneq ($(DOCKER_PRIVILEGED),)
 FULL_DOCKER_RUN_OPTIONS += -v "$(DOCKER_SOCKET):$(DOCKER_SOCKET)"
+FULL_DOCKER_RUN_OPTIONS += $(DOCKER_PRIVILEGED)
+FULL_DOCKER_EXEC_OPTIONS += $(DOCKER_PRIVILEGED)
 DOCKER_GROUP := $(shell stat -c '%g' $(DOCKER_SOCKET))
 ifneq ($(GID),$(DOCKER_GROUP))
 USERADD_OPTIONS += --groups $(DOCKER_GROUP)
@@ -45,7 +62,35 @@ GROUPADD_DOCKER += groupadd --gid $(DOCKER_GROUP) --non-unique docker-$(HASH) &&
 FULL_DOCKER_RUN_OPTIONS += --group-add $(DOCKER_GROUP)
 endif
 endif
-SHELL=/bin/bash -o pipefail
+
+# Add KVM passthrough options.
+ifneq (,$(wildcard /dev/kvm))
+FULL_DOCKER_RUN_OPTIONS += --device=/dev/kvm
+KVM_GROUP := $(shell stat -c '%g' /dev/kvm)
+ifneq ($(GID),$(KVM_GROUP))
+USERADD_OPTIONS += --groups $(KVM_GROUP)
+GROUPADD_DOCKER += groupadd --gid $(KVM_GROUP) --non-unique kvm-$(HASH) &&
+FULL_DOCKER_RUN_OPTIONS += --group-add $(KVM_GROUP)
+endif
+endif
+
+# Load the appropriate config.
+ifneq (,$(BAZEL_CONFIG))
+OPTIONS += --config=$(BAZEL_CONFIG)
+endif
+
+bazel-image: load-default
+	@if docker ps --all | grep $(BUILDER_NAME); then docker rm -f $(BUILDER_NAME); fi
+	docker run --user 0:0 --entrypoint "" --name $(BUILDER_NAME) \
+		$(BUILDER_BASE) \
+		sh -c "groupadd --gid $(GID) --non-unique $(USER) && \
+		       $(GROUPADD_DOCKER) \
+		       useradd --uid $(UID) --non-unique --no-create-home \
+		               --gid $(GID) $(USERADD_OPTIONS) -d $(HOME) $(USER) && \
+		       if [[ -e /dev/kvm ]]; then chmod a+rw /dev/kvm; fi"
+	docker commit $(BUILDER_NAME) $(BUILDER_IMAGE)
+	@docker rm -f $(BUILDER_NAME)
+.PHONY: bazel-image
 
 ##
 ## Bazel helpers.
@@ -60,40 +105,37 @@ SHELL=/bin/bash -o pipefail
 ##     GCLOUD_CONFIG      - The gcloud config directory (detect: detected).
 ##     DOCKER_SOCKET      - The Docker socket (default: detected).
 ##
-bazel-server-start: load-default ## Starts the bazel server.
+bazel-server-start: bazel-image ## Starts the bazel server.
 	@mkdir -p $(BAZEL_CACHE)
 	@mkdir -p $(GCLOUD_CONFIG)
-	docker run -d --rm \
-		--init \
-	        --name $(DOCKER_NAME) \
-		--user 0:0 $(DOCKER_GROUP_OPTIONS) \
+	@if docker ps --all | grep $(DOCKER_NAME); then docker rm -f $(DOCKER_NAME); fi
+	# This command runs a bazel server, and the container sticks around
+	# until the bazel server exits. This should ensure that it does not
+	# exit in the middle of running a build, but also it won't stick around
+	# forever. The build commands wrap around an appropriate exec into the
+	# container in order to perform work via the bazel client.
+	docker run -d --rm --name $(DOCKER_NAME) \
 		-v "$(CURDIR):$(CURDIR)" \
 		--workdir "$(CURDIR)" \
-		--entrypoint "" \
 		$(FULL_DOCKER_RUN_OPTIONS) \
-		gvisor.dev/images/default \
-		sh -c "groupadd --gid $(GID) --non-unique $(USER) && \
-		       $(GROUPADD_DOCKER) \
-		       useradd --uid $(UID) --non-unique --no-create-home --gid $(GID) $(USERADD_OPTIONS) -d $(HOME) $(USER) && \
-	               bazel version && \
-		       exec tail --pid=\$$(bazel info server_pid) -f /dev/null"
-	@while :; do if docker logs $(DOCKER_NAME) 2>/dev/null | grep "Build label:" >/dev/null; then break; fi; \
-		if ! docker ps | grep $(DOCKER_NAME); then exit 1; else sleep 1; fi; done
+		$(BUILDER_IMAGE) \
+		sh -c "tail -f --pid=\$$($(BAZEL) info server_pid)"
 .PHONY: bazel-server-start
 
 bazel-shutdown: ## Shuts down a running bazel server.
-	@docker exec --user $(UID):$(GID) $(DOCKER_NAME) bazel shutdown; rc=$$?; docker kill $(DOCKER_NAME) || [[ $$rc -ne 0 ]]
+	@docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) $(BAZEL) shutdown; \
+	       rc=$$?; docker kill $(DOCKER_NAME) || [[ $$rc -ne 0 ]]
 .PHONY: bazel-shutdown
 
 bazel-alias: ## Emits an alias that can be used within the shell.
-	@echo "alias bazel='docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) bazel'"
+	@echo "alias bazel='docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) bazel'"
 .PHONY: bazel-alias
 
 bazel-server: ## Ensures that the server exists. Used as an internal target.
-	@docker exec $(DOCKER_NAME) true || $(MAKE) bazel-server-start
+	@docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) true || $(MAKE) bazel-server-start
 .PHONY: bazel-server
 
-build_cmd = docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) sh -o pipefail -c 'bazel $(STARTUP_OPTIONS) build $(OPTIONS) $(TARGETS)'
+build_cmd = docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) sh -o pipefail -c '$(BAZEL) build $(OPTIONS) $(TARGETS)'
 
 build_paths = $(build_cmd) 2>&1 \
 		| tee /proc/self/fd/2 \
@@ -120,5 +162,9 @@ sudo: bazel-server
 .PHONY: sudo
 
 test: bazel-server
-	@docker exec --user $(UID):$(GID) -i $(DOCKER_NAME) bazel $(STARTUP_OPTIONS) test $(OPTIONS) $(TARGETS)
+	@docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) $(BAZEL) test $(OPTIONS) $(TARGETS)
 .PHONY: test
+
+query: bazel-server
+	@docker exec $(FULL_DOCKER_EXEC_OPTIONS) $(DOCKER_NAME) $(BAZEL) query $(OPTIONS) '$(TARGETS)'
+.PHONY: query
diff --git a/tools/bazeldefs/BUILD b/tools/bazeldefs/BUILD
index f2f80bae1..3f809065d 100644
--- a/tools/bazeldefs/BUILD
+++ b/tools/bazeldefs/BUILD
@@ -49,3 +49,40 @@ rbe_toolchain(
     toolchain = "@bazel_toolchains//configs/ubuntu16_04_clang/10.0.0/bazel_2.0.0/cc:cc-compiler-k8",
     toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
 )
+
+# Updated versions of the above, compatible with bazel3.
+rbe_platform(
+    name = "rbe_ubuntu1604_bazel3",
+    constraint_values = [
+        "@bazel_tools//platforms:x86_64",
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//tools/cpp:clang",
+        "@bazel_toolchains_bazel3//constraints:xenial",
+        "@bazel_toolchains_bazel3//constraints/sanitizers:support_msan",
+    ],
+    remote_execution_properties = """
+        properties: {
+          name: "container-image"
+          value:"docker://gcr.io/cloud-marketplace/google/rbe-ubuntu16-04@sha256:b516a2d69537cb40a7c6a7d92d0008abb29fba8725243772bdaf2c83f1be2272"
+        }
+        properties: {
+          name: "dockerAddCapabilities"
+          value: "SYS_ADMIN"
+        }
+        properties: {
+          name: "dockerPrivileged"
+          value: "true"
+        }
+    """,
+)
+
+rbe_toolchain(
+    name = "cc-toolchain-clang-x86_64-default_bazel3",
+    exec_compatible_with = [],
+    tags = [
+        "manual",
+    ],
+    target_compatible_with = [],
+    toolchain = "@bazel_toolchains_bazel3//configs/ubuntu16_04_clang/11.0.0/bazel_3.1.0/cc:cc-compiler-k8",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
diff --git a/tools/go_generics/BUILD b/tools/go_generics/BUILD
index 32a949c93..558826bf1 100644
--- a/tools/go_generics/BUILD
+++ b/tools/go_generics/BUILD
@@ -12,27 +12,3 @@ go_binary(
     visibility = ["//:sandbox"],
     deps = ["//tools/go_generics/globals"],
 )
-
-genrule(
-    name = "go_generics_tests",
-    srcs = glob(["generics_tests/**"]) + [":go_generics"],
-    outs = ["go_generics_tests.tgz"],
-    cmd = "tar -czvhf $@ $(SRCS)",
-)
-
-genrule(
-    name = "go_generics_test_bundle",
-    srcs = [
-        ":go_generics_tests.tgz",
-        ":go_generics_unittest.sh",
-    ],
-    outs = ["go_generics_test.sh"],
-    cmd = "cat $(location :go_generics_unittest.sh) $(location :go_generics_tests.tgz) > $@",
-    executable = True,
-)
-
-sh_test(
-    name = "go_generics_test",
-    size = "small",
-    srcs = ["go_generics_test.sh"],
-)
diff --git a/tools/go_generics/defs.bzl b/tools/go_generics/defs.bzl
index ec047a644..33329cf28 100644
--- a/tools/go_generics/defs.bzl
+++ b/tools/go_generics/defs.bzl
@@ -100,20 +100,21 @@ def _go_template_instance_impl(ctx):
 
     # Build the argument list.
     args = ["-i=%s" % template.file.path, "-o=%s" % output.path]
-    args += ["-p=%s" % ctx.attr.package]
+    if ctx.attr.package:
+        args.append("-p=%s" % ctx.attr.package)
 
     if len(ctx.attr.prefix) > 0:
-        args += ["-prefix=%s" % ctx.attr.prefix]
+        args.append("-prefix=%s" % ctx.attr.prefix)
 
     if len(ctx.attr.suffix) > 0:
-        args += ["-suffix=%s" % ctx.attr.suffix]
+        args.append("-suffix=%s" % ctx.attr.suffix)
 
     args += [("-t=%s=%s" % (p[0], p[1])) for p in ctx.attr.types.items()]
     args += [("-c=%s=%s" % (p[0], p[1])) for p in ctx.attr.consts.items()]
     args += [("-import=%s=%s" % (p[0], p[1])) for p in ctx.attr.imports.items()]
 
     if ctx.attr.anon:
-        args += ["-anon"]
+        args.append("-anon")
 
     ctx.actions.run(
         inputs = [template.file],
@@ -151,7 +152,7 @@ go_template_instance = rule(
         "consts": attr.string_dict(),
         "imports": attr.string_dict(),
         "anon": attr.bool(mandatory = False, default = False),
-        "package": attr.string(mandatory = True),
+        "package": attr.string(mandatory = False),
         "out": attr.output(mandatory = True),
         "_tool": attr.label(executable = True, cfg = "host", default = Label("//tools/go_generics")),
     },
diff --git a/tools/go_generics/generics_tests/all_stmts/opts.txt b/tools/go_generics/generics_tests/all_stmts/opts.txt
deleted file mode 100644
index c9d0e09bf..000000000
--- a/tools/go_generics/generics_tests/all_stmts/opts.txt
+++ /dev/null
@@ -1 +0,0 @@
--t=T=Q
diff --git a/tools/go_generics/generics_tests/all_types/opts.txt b/tools/go_generics/generics_tests/all_types/opts.txt
deleted file mode 100644
index c9d0e09bf..000000000
--- a/tools/go_generics/generics_tests/all_types/opts.txt
+++ /dev/null
@@ -1 +0,0 @@
--t=T=Q
diff --git a/tools/go_generics/generics_tests/anon/opts.txt b/tools/go_generics/generics_tests/anon/opts.txt
deleted file mode 100644
index a5e9d26de..000000000
--- a/tools/go_generics/generics_tests/anon/opts.txt
+++ /dev/null
@@ -1 +0,0 @@
--t=T=Q -suffix=New -anon
diff --git a/tools/go_generics/generics_tests/consts/opts.txt b/tools/go_generics/generics_tests/consts/opts.txt
deleted file mode 100644
index 4fb59dce8..000000000
--- a/tools/go_generics/generics_tests/consts/opts.txt
+++ /dev/null
@@ -1 +0,0 @@
--c=c1=20 -c=z=600 -c=v=3.3 -c=s="def" -c=A=20 -c=C=100 -c=S="def" -c=T="ABC"
diff --git a/tools/go_generics/generics_tests/imports/opts.txt b/tools/go_generics/generics_tests/imports/opts.txt
deleted file mode 100644
index 87324be79..000000000
--- a/tools/go_generics/generics_tests/imports/opts.txt
+++ /dev/null
@@ -1 +0,0 @@
--t=T=sync.Mutex -c=n=math.Uint32 -c=m=math.Uint64 -import=sync=sync -import=math=mymathpath
diff --git a/tools/go_generics/generics_tests/remove_typedef/opts.txt b/tools/go_generics/generics_tests/remove_typedef/opts.txt
deleted file mode 100644
index 9c8ecaada..000000000
--- a/tools/go_generics/generics_tests/remove_typedef/opts.txt
+++ /dev/null
@@ -1 +0,0 @@
--t=T=U
diff --git a/tools/go_generics/generics_tests/simple/opts.txt b/tools/go_generics/generics_tests/simple/opts.txt
deleted file mode 100644
index 7832ef66f..000000000
--- a/tools/go_generics/generics_tests/simple/opts.txt
+++ /dev/null
@@ -1 +0,0 @@
--t=T=Q -suffix=New
diff --git a/tools/go_generics/go_generics_unittest.sh b/tools/go_generics/go_generics_unittest.sh
deleted file mode 100755
index 44b22db91..000000000
--- a/tools/go_generics/go_generics_unittest.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/bash
-
-# Copyright 2018 The gVisor Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Bash "safe-mode":  Treat command failures as fatal (even those that occur in
-# pipes), and treat unset variables as errors.
-set -eu -o pipefail
-
-# This file will be generated as a self-extracting shell script in order to
-# eliminate the need for any runtime dependencies. The tarball at the end will
-# include the go_generics binary, as well as a subdirectory named
-# generics_tests. See the BUILD file for more information.
-declare -r temp=$(mktemp -d)
-function cleanup() {
-  rm -rf "${temp}"
-}
-# trap cleanup EXIT
-
-# Print message in "$1" then exit with status 1.
-function die () {
-  echo "$1" 1>&2
-  exit 1
-}
-
-# This prints the line number of __BUNDLE__ below, that should be the last line
-# of this script. After that point, the concatenated archive will be the
-# contents.
-declare -r tgz=`awk '/^__BUNDLE__/ {print NR + 1; exit 0; }' $0`
-tail -n+"${tgz}" $0 | tar -xzv -C "${temp}"
-
-# The target for the test.
-declare -r binary="$(find ${temp} -type f -a -name go_generics)"
-declare -r input_dirs="$(find ${temp} -type d -a -name generics_tests)/*"
-
-# Go through all test cases.
-for f in ${input_dirs}; do
-  base=$(basename "${f}")
-
-  # Run go_generics on the input file.
-  opts=$(head -n 1 ${f}/opts.txt)
-  out="${f}/output/generated.go"
-  expected="${f}/output/output.go"
-  ${binary} ${opts} "-i=${f}/input.go" "-o=${out}" || die "go_generics failed for test case \"${base}\""
-
-  # Compare the outputs.
-  diff ${expected} ${out}
-  if [ $? -ne 0 ]; then
-    echo "Expected:"
-    cat ${expected}
-    echo "Actual:"
-    cat ${out}
-    die "Actual output is different from expected for test \"${base}\""
-  fi
-done
-
-echo "PASS"
-exit 0
-__BUNDLE__
diff --git a/tools/go_generics/tests/BUILD b/tools/go_generics/tests/BUILD
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/go_generics/tests/BUILD
diff --git a/tools/go_generics/tests/all_stmts/BUILD b/tools/go_generics/tests/all_stmts/BUILD
new file mode 100644
index 000000000..a4a7c775a
--- /dev/null
+++ b/tools/go_generics/tests/all_stmts/BUILD
@@ -0,0 +1,16 @@
+load("//tools/go_generics/tests:defs.bzl", "go_generics_test")
+
+go_generics_test(
+    name = "all_stmts",
+    inputs = ["input.go"],
+    output = "output.go",
+    types = {
+        "T": "Q",
+    },
+)
+
+# @unused
+glaze_ignore = [
+    "input.go",
+    "output.go",
+]
diff --git a/tools/go_generics/generics_tests/all_stmts/input.go b/tools/go_generics/tests/all_stmts/input.go
index 4791d1ff1..4791d1ff1 100644
--- a/tools/go_generics/generics_tests/all_stmts/input.go
+++ b/tools/go_generics/tests/all_stmts/input.go
diff --git a/tools/go_generics/generics_tests/all_stmts/output/output.go b/tools/go_generics/tests/all_stmts/output.go
index a53d84535..a53d84535 100644
--- a/tools/go_generics/generics_tests/all_stmts/output/output.go
+++ b/tools/go_generics/tests/all_stmts/output.go
diff --git a/tools/go_generics/tests/all_types/BUILD b/tools/go_generics/tests/all_types/BUILD
new file mode 100644
index 000000000..60b1fd314
--- /dev/null
+++ b/tools/go_generics/tests/all_types/BUILD
@@ -0,0 +1,16 @@
+load("//tools/go_generics/tests:defs.bzl", "go_generics_test")
+
+go_generics_test(
+    name = "all_types",
+    inputs = ["input.go"],
+    output = "output.go",
+    types = {
+        "T": "Q",
+    },
+)
+
+# @unused
+glaze_ignore = [
+    "input.go",
+    "output.go",
+]
diff --git a/tools/go_generics/generics_tests/all_types/input.go b/tools/go_generics/tests/all_types/input.go
index 3575d02ec..6f85bbb69 100644
--- a/tools/go_generics/generics_tests/all_types/input.go
+++ b/tools/go_generics/tests/all_types/input.go
@@ -14,7 +14,9 @@
 
 package tests
 
-import "./lib"
+import (
+	"./lib"
+)
 
 type T int
 
diff --git a/tools/go_generics/generics_tests/all_types/lib/lib.go b/tools/go_generics/tests/all_types/lib/lib.go
index 988786496..988786496 100644
--- a/tools/go_generics/generics_tests/all_types/lib/lib.go
+++ b/tools/go_generics/tests/all_types/lib/lib.go
diff --git a/tools/go_generics/generics_tests/all_types/output/output.go b/tools/go_generics/tests/all_types/output.go
index 41fd147a1..c0bbebfe7 100644
--- a/tools/go_generics/generics_tests/all_types/output/output.go
+++ b/tools/go_generics/tests/all_types/output.go
@@ -14,7 +14,9 @@
 
 package main
 
-import "./lib"
+import (
+	"./lib"
+)
 
 type newType struct {
 	a Q
diff --git a/tools/go_generics/tests/anon/BUILD b/tools/go_generics/tests/anon/BUILD
new file mode 100644
index 000000000..ef24f4b25
--- /dev/null
+++ b/tools/go_generics/tests/anon/BUILD
@@ -0,0 +1,18 @@
+load("//tools/go_generics/tests:defs.bzl", "go_generics_test")
+
+go_generics_test(
+    name = "anon",
+    anon = True,
+    inputs = ["input.go"],
+    output = "output.go",
+    suffix = "New",
+    types = {
+        "T": "Q",
+    },
+)
+
+# @unused
+glaze_ignore = [
+    "input.go",
+    "output.go",
+]
diff --git a/tools/go_generics/generics_tests/anon/input.go b/tools/go_generics/tests/anon/input.go
index 44086d522..44086d522 100644
--- a/tools/go_generics/generics_tests/anon/input.go
+++ b/tools/go_generics/tests/anon/input.go
diff --git a/tools/go_generics/generics_tests/anon/output/output.go b/tools/go_generics/tests/anon/output.go
index 160cddf79..7fa791853 100644
--- a/tools/go_generics/generics_tests/anon/output/output.go
+++ b/tools/go_generics/tests/anon/output.go
@@ -35,8 +35,8 @@ func (f FooNew) GetBar(name string) Q {
 
 func foobarNew() {
 	a := BazNew{}
-	a.Q = 0 // should not be renamed, this is a limitation
+	a.Q = 0
 
 	b := otherpkg.UnrelatedType{}
-	b.Q = 0 // should not be renamed, this is a limitation
+	b.Q = 0
 }
diff --git a/tools/go_generics/tests/consts/BUILD b/tools/go_generics/tests/consts/BUILD
new file mode 100644
index 000000000..fd7caccad
--- /dev/null
+++ b/tools/go_generics/tests/consts/BUILD
@@ -0,0 +1,23 @@
+load("//tools/go_generics/tests:defs.bzl", "go_generics_test")
+
+go_generics_test(
+    name = "consts",
+    consts = {
+        "c1": "20",
+        "z": "600",
+        "v": "3.3",
+        "s": "\"def\"",
+        "A": "20",
+        "C": "100",
+        "S": "\"def\"",
+        "T": "\"ABC\"",
+    },
+    inputs = ["input.go"],
+    output = "output.go",
+)
+
+# @unused
+glaze_ignore = [
+    "input.go",
+    "output.go",
+]
diff --git a/tools/go_generics/generics_tests/consts/input.go b/tools/go_generics/tests/consts/input.go
index 04b95fcc6..04b95fcc6 100644
--- a/tools/go_generics/generics_tests/consts/input.go
+++ b/tools/go_generics/tests/consts/input.go
diff --git a/tools/go_generics/generics_tests/consts/output/output.go b/tools/go_generics/tests/consts/output.go
index 18d316cc9..18d316cc9 100644
--- a/tools/go_generics/generics_tests/consts/output/output.go
+++ b/tools/go_generics/tests/consts/output.go
diff --git a/tools/go_generics/tests/defs.bzl b/tools/go_generics/tests/defs.bzl
new file mode 100644
index 000000000..6277c3947
--- /dev/null
+++ b/tools/go_generics/tests/defs.bzl
@@ -0,0 +1,67 @@
+"""Generics tests."""
+
+load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance")
+
+def _go_generics_test_impl(ctx):
+    runner = ctx.actions.declare_file(ctx.label.name)
+    runner_content = "\n".join([
+        "#!/bin/bash",
+        "exec diff --ignore-blank-lines --ignore-matching-lines=^[[:space:]]*// %s %s" % (
+            ctx.files.template_output[0].short_path,
+            ctx.files.expected_output[0].short_path,
+        ),
+        "",
+    ])
+    ctx.actions.write(runner, runner_content, is_executable = True)
+    return [DefaultInfo(
+        executable = runner,
+        runfiles = ctx.runfiles(
+            files = ctx.files.template_output + ctx.files.expected_output,
+            collect_default = True,
+            collect_data = True,
+        ),
+    )]
+
+_go_generics_test = rule(
+    implementation = _go_generics_test_impl,
+    attrs = {
+        "template_output": attr.label(mandatory = True, allow_single_file = True),
+        "expected_output": attr.label(mandatory = True, allow_single_file = True),
+    },
+    test = True,
+)
+
+def go_generics_test(name, inputs, output, types = None, consts = None, **kwargs):
+    """Instantiates a generics test.
+
+    Args:
+        name: the name of the test.
+        inputs: all the input files.
+        output: the output files.
+        types: the template types (dictionary).
+        consts: the template consts (dictionary).
+        **kwargs: additional arguments for the template_instance.
+    """
+    if types == None:
+        types = dict()
+    if consts == None:
+        consts = dict()
+    go_template(
+        name = name + "_template",
+        srcs = inputs,
+        types = types.keys(),
+        consts = consts.keys(),
+    )
+    go_template_instance(
+        name = name + "_output",
+        template = ":" + name + "_template",
+        out = name + "_output.go",
+        types = types,
+        consts = consts,
+        **kwargs
+    )
+    _go_generics_test(
+        name = name + "_test",
+        template_output = name + "_output.go",
+        expected_output = output,
+    )
diff --git a/tools/go_generics/tests/imports/BUILD b/tools/go_generics/tests/imports/BUILD
new file mode 100644
index 000000000..a86223d41
--- /dev/null
+++ b/tools/go_generics/tests/imports/BUILD
@@ -0,0 +1,24 @@
+load("//tools/go_generics/tests:defs.bzl", "go_generics_test")
+
+go_generics_test(
+    name = "imports",
+    consts = {
+        "n": "math.Uint32",
+        "m": "math.Uint64",
+    },
+    imports = {
+        "sync": "sync",
+        "math": "mymathpath",
+    },
+    inputs = ["input.go"],
+    output = "output.go",
+    types = {
+        "T": "sync.Mutex",
+    },
+)
+
+# @unused
+glaze_ignore = [
+    "input.go",
+    "output.go",
+]
diff --git a/tools/go_generics/generics_tests/imports/input.go b/tools/go_generics/tests/imports/input.go
index 0f032c2a1..0f032c2a1 100644
--- a/tools/go_generics/generics_tests/imports/input.go
+++ b/tools/go_generics/tests/imports/input.go
diff --git a/tools/go_generics/generics_tests/imports/output/output.go b/tools/go_generics/tests/imports/output.go
index 2488ca58c..2488ca58c 100644
--- a/tools/go_generics/generics_tests/imports/output/output.go
+++ b/tools/go_generics/tests/imports/output.go
diff --git a/tools/go_generics/tests/remove_typedef/BUILD b/tools/go_generics/tests/remove_typedef/BUILD
new file mode 100644
index 000000000..46457cec6
--- /dev/null
+++ b/tools/go_generics/tests/remove_typedef/BUILD
@@ -0,0 +1,16 @@
+load("//tools/go_generics/tests:defs.bzl", "go_generics_test")
+
+go_generics_test(
+    name = "remove_typedef",
+    inputs = ["input.go"],
+    output = "output.go",
+    types = {
+        "T": "U",
+    },
+)
+
+# @unused
+glaze_ignore = [
+    "input.go",
+    "output.go",
+]
diff --git a/tools/go_generics/generics_tests/remove_typedef/input.go b/tools/go_generics/tests/remove_typedef/input.go
index cf632bae7..cf632bae7 100644
--- a/tools/go_generics/generics_tests/remove_typedef/input.go
+++ b/tools/go_generics/tests/remove_typedef/input.go
diff --git a/tools/go_generics/generics_tests/remove_typedef/output/output.go b/tools/go_generics/tests/remove_typedef/output.go
index d44fd8e1c..d44fd8e1c 100644
--- a/tools/go_generics/generics_tests/remove_typedef/output/output.go
+++ b/tools/go_generics/tests/remove_typedef/output.go
diff --git a/tools/go_generics/tests/simple/BUILD b/tools/go_generics/tests/simple/BUILD
new file mode 100644
index 000000000..4b9265ea4
--- /dev/null
+++ b/tools/go_generics/tests/simple/BUILD
@@ -0,0 +1,17 @@
+load("//tools/go_generics/tests:defs.bzl", "go_generics_test")
+
+go_generics_test(
+    name = "simple",
+    inputs = ["input.go"],
+    output = "output.go",
+    suffix = "New",
+    types = {
+        "T": "Q",
+    },
+)
+
+# @unused
+glaze_ignore = [
+    "input.go",
+    "output.go",
+]
diff --git a/tools/go_generics/generics_tests/simple/input.go b/tools/go_generics/tests/simple/input.go
index 2a917f16c..2a917f16c 100644
--- a/tools/go_generics/generics_tests/simple/input.go
+++ b/tools/go_generics/tests/simple/input.go
diff --git a/tools/go_generics/generics_tests/simple/output/output.go b/tools/go_generics/tests/simple/output.go
index 6bfa0b25b..6bfa0b25b 100644
--- a/tools/go_generics/generics_tests/simple/output/output.go
+++ b/tools/go_generics/tests/simple/output.go
diff --git a/tools/go_marshal/README.md b/tools/go_marshal/README.md
index 4886efddf..68d759083 100644
--- a/tools/go_marshal/README.md
+++ b/tools/go_marshal/README.md
@@ -9,11 +9,9 @@ automatically generating code to marshal go data structures to memory.
 `binary.Marshal` by moving the go runtime reflection necessary to marshal a
 struct to compile-time.
 
-`go_marshal` automatically generates implementations for `abi.Marshallable` and
-`safemem.{Reader,Writer}`. Call-sites for serialization (typically syscall
-implementations) can directly invoke `safemem.Reader.ReadToBlocks` and
-`safemem.Writer.WriteFromBlocks`. Data structures that require custom
-serialization will have manual implementations for these interfaces.
+`go_marshal` automatically generates implementations for `marshal.Marshallable`
+and `safemem.{Reader,Writer}`. Data structures that require custom serialization
+will have manual implementations for these interfaces.
 
 Data structures can be flagged for code generation by adding a struct-level
 comment `// +marshal`.
diff --git a/tools/go_marshal/gomarshal/generator.go b/tools/go_marshal/gomarshal/generator.go
index 177013dbb..19bcd4e6a 100644
--- a/tools/go_marshal/gomarshal/generator.go
+++ b/tools/go_marshal/gomarshal/generator.go
@@ -413,13 +413,13 @@ func (g *Generator) Run() error {
 		for _, t := range g.collectMarshallableTypes(a, fsets[i]) {
 			impl := g.generateOne(t, fsets[i])
 			// Collect Marshallable types referenced by the generated code.
-			for ref, _ := range impl.ms {
+			for ref := range impl.ms {
 				ms[ref] = struct{}{}
 			}
 			impls = append(impls, impl)
 			// Collect imports referenced by the generated code and add them to
 			// the list of imports we need to copy to the generated code.
-			for name, _ := range impl.is {
+			for name := range impl.is {
 				if !g.imports.markUsed(name) {
 					panic(fmt.Sprintf("Generated code for '%s' referenced a non-existent import with local name '%s'. Either go-marshal needs to add an import to the generated file, or a package in an input source file has a package name differ from the final component of its path, which go-marshal doesn't know how to detect; use an import alias to work around this limitation.", impl.typeName(), name))
 				}
diff --git a/tools/go_marshal/gomarshal/generator_interfaces_struct.go b/tools/go_marshal/gomarshal/generator_interfaces_struct.go
index 9cd3c9579..4b9cea08a 100644
--- a/tools/go_marshal/gomarshal/generator_interfaces_struct.go
+++ b/tools/go_marshal/gomarshal/generator_interfaces_struct.go
@@ -268,6 +268,10 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
 	g.emit("// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.\n")
 	g.emit("func (%s *%s) MarshalUnsafe(dst []byte) {\n", g.r, g.typeName())
 	g.inIndent(func() {
+		fallback := func() {
+			g.emit("// Type %s doesn't have a packed layout in memory, fallback to MarshalBytes.\n", g.typeName())
+			g.emit("%s.MarshalBytes(dst)\n", g.r)
+		}
 		if thisPacked {
 			g.recordUsedImport("safecopy")
 			g.recordUsedImport("unsafe")
@@ -277,16 +281,13 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
 					g.emit("safecopy.CopyIn(dst, unsafe.Pointer(%s))\n", g.r)
 				})
 				g.emit("} else {\n")
-				g.inIndent(func() {
-					g.emit("%s.MarshalBytes(dst)\n", g.r)
-				})
+				g.inIndent(fallback)
 				g.emit("}\n")
 			} else {
 				g.emit("safecopy.CopyIn(dst, unsafe.Pointer(%s))\n", g.r)
 			}
 		} else {
-			g.emit("// Type %s doesn't have a packed layout in memory, fallback to MarshalBytes.\n", g.typeName())
-			g.emit("%s.MarshalBytes(dst)\n", g.r)
+			fallback()
 		}
 	})
 	g.emit("}\n\n")
@@ -294,6 +295,10 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
 	g.emit("// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.\n")
 	g.emit("func (%s *%s) UnmarshalUnsafe(src []byte) {\n", g.r, g.typeName())
 	g.inIndent(func() {
+		fallback := func() {
+			g.emit("// Type %s doesn't have a packed layout in memory, fallback to UnmarshalBytes.\n", g.typeName())
+			g.emit("%s.UnmarshalBytes(src)\n", g.r)
+		}
 		if thisPacked {
 			g.recordUsedImport("safecopy")
 			g.recordUsedImport("unsafe")
@@ -303,16 +308,13 @@ func (g *interfaceGenerator) emitMarshallableForStruct(st *ast.StructType) {
 					g.emit("safecopy.CopyOut(unsafe.Pointer(%s), src)\n", g.r)
 				})
 				g.emit("} else {\n")
-				g.inIndent(func() {
-					g.emit("%s.UnmarshalBytes(src)\n", g.r)
-				})
+				g.inIndent(fallback)
 				g.emit("}\n")
 			} else {
 				g.emit("safecopy.CopyOut(unsafe.Pointer(%s), src)\n", g.r)
 			}
 		} else {
-			g.emit("// Type %s doesn't have a packed layout in memory, fall back to UnmarshalBytes.\n", g.typeName())
-			g.emit("%s.UnmarshalBytes(src)\n", g.r)
+			fallback()
 		}
 	})
 	g.emit("}\n\n")
@@ -463,8 +465,10 @@ func (g *interfaceGenerator) emitMarshallableSliceForStruct(st *ast.StructType,
 			})
 			g.emit("}\n\n")
 
-			g.emit("// Handle any final partial object.\n")
-			g.emit("if length < size*count && length%size != 0 {\n")
+			g.emit("// Handle any final partial object. buf is guaranteed to be long enough for the\n")
+			g.emit("// final element, but may not contain valid data for the entire range. This may\n")
+			g.emit("// result in unmarshalling zero values for some parts of the object.\n")
+			g.emit("if length%size != 0 {\n")
 			g.inIndent(func() {
 				g.emit("idx := limit\n")
 				g.emit("dst[idx].UnmarshalBytes(buf[size*idx:size*(idx+1)])\n")
diff --git a/tools/go_marshal/marshal/marshal.go b/tools/go_marshal/marshal/marshal.go
index cb2166252..85b196f08 100644
--- a/tools/go_marshal/marshal/marshal.go
+++ b/tools/go_marshal/marshal/marshal.go
@@ -58,18 +58,12 @@ type Marshallable interface {
 	// likely make use of the type of these fields).
 	SizeBytes() int
 
-	// MarshalBytes serializes a copy of a type to dst. dst may be smaller than
-	// SizeBytes(), which results in a part of the struct being marshalled. Note
-	// that this may have unexpected results for non-packed types, as implicit
-	// padding needs to be taken into account when reasoning about how much of
-	// the type is serialized.
+	// MarshalBytes serializes a copy of a type to dst.
+	// Precondition: dst must be at least SizeBytes() in length.
 	MarshalBytes(dst []byte)
 
-	// UnmarshalBytes deserializes a type from src. src may be smaller than
-	// SizeBytes(), which results in a partially deserialized struct. Note that
-	// this may have unexpected results for non-packed types, as implicit
-	// padding needs to be taken into account when reasoning about how much of
-	// the type is deserialized.
+	// UnmarshalBytes deserializes a type from src.
+	// Precondition: src must be at least SizeBytes() in length.
 	UnmarshalBytes(src []byte)
 
 	// Packed returns true if the marshalled size of the type is the same as the
@@ -89,8 +83,8 @@ type Marshallable interface {
 	// representation to the dst buffer. This is only safe to do when the type
 	// has no implicit padding, see Marshallable.Packed. When Packed would
 	// return false, MarshalUnsafe should fall back to the safer but slower
-	// MarshalBytes. dst may be smaller than SizeBytes(), see comment for
-	// MarshalBytes for implications.
+	// MarshalBytes.
+	// Precondition: dst must be at least SizeBytes() in length.
 	MarshalUnsafe(dst []byte)
 
 	// UnmarshalUnsafe deserializes a type by directly copying to the underlying
@@ -99,8 +93,8 @@ type Marshallable interface {
 	// This allows much faster unmarshalling of types which have no implicit
 	// padding, see Marshallable.Packed. When Packed would return false,
 	// UnmarshalUnsafe should fall back to the safer but slower unmarshal
-	// mechanism implemented in UnmarshalBytes. src may be smaller than
-	// SizeBytes(), see comment for UnmarshalBytes for implications.
+	// mechanism implemented in UnmarshalBytes.
+	// Precondition: src must be at least SizeBytes() in length.
 	UnmarshalUnsafe(src []byte)
 
 	// CopyIn deserializes a Marshallable type from a task's memory. This may
@@ -149,14 +143,16 @@ type Marshallable interface {
 //
 // Generates four additional functions for marshalling slices of Foos like this:
 //
-// // MarshalUnsafeFooSlice is like Foo.MarshalUnsafe, buf for a []Foo. It's
-// // more efficient that repeatedly calling calling Foo.MarshalUnsafe over a
-// // []Foo in a loop.
+// // MarshalUnsafeFooSlice is like Foo.MarshalUnsafe, buf for a []Foo. It
+// // might be more efficient that repeatedly calling Foo.MarshalUnsafe
+// // over a []Foo in a loop if the type is Packed.
+// // Preconditions: dst must be at least len(src)*Foo.SizeBytes() in length.
 // func MarshalUnsafeFooSlice(src []Foo, dst []byte) (int, error) { ... }
 //
-// // UnmarshalUnsafeFooSlice is like Foo.UnmarshalUnsafe, buf for a []Foo. It's
-// // more efficient that repeatedly calling calling Foo.UnmarshalUnsafe over a
-// // []Foo in a loop.
+// // UnmarshalUnsafeFooSlice is like Foo.UnmarshalUnsafe, buf for a []Foo. It
+// // might be more efficient that repeatedly calling Foo.UnmarshalUnsafe
+// // over a []Foo in a loop if the type is Packed.
+// // Preconditions: src must be at least len(dst)*Foo.SizeBytes() in length.
 // func UnmarshalUnsafeFooSlice(dst []Foo, src []byte) (int, error) { ... }
 //
 // // CopyFooSliceIn copies in a slice of Foo objects from the task's memory.
diff --git a/tools/go_marshal/primitive/primitive.go b/tools/go_marshal/primitive/primitive.go
index ebcf130ae..d93edda8b 100644
--- a/tools/go_marshal/primitive/primitive.go
+++ b/tools/go_marshal/primitive/primitive.go
@@ -17,10 +17,22 @@
 package primitive
 
 import (
+	"io"
+
 	"gvisor.dev/gvisor/pkg/usermem"
 	"gvisor.dev/gvisor/tools/go_marshal/marshal"
 )
 
+// Int8 is a marshal.Marshallable implementation for int8.
+//
+// +marshal slice:Int8Slice:inner
+type Int8 int8
+
+// Uint8 is a marshal.Marshallable implementation for uint8.
+//
+// +marshal slice:Uint8Slice:inner
+type Uint8 uint8
+
 // Int16 is a marshal.Marshallable implementation for int16.
 //
 // +marshal slice:Int16Slice:inner
@@ -51,6 +63,66 @@ type Int64 int64
 // +marshal slice:Uint64Slice:inner
 type Uint64 uint64
 
+// ByteSlice is a marshal.Marshallable implementation for []byte.
+// This is a convenience wrapper around a dynamically sized type, and can't be
+// embedded in other marshallable types because it breaks assumptions made by
+// go-marshal internals. It violates the "no dynamically-sized types"
+// constraint of the go-marshal library.
+type ByteSlice []byte
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (b *ByteSlice) SizeBytes() int {
+	return len(*b)
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (b *ByteSlice) MarshalBytes(dst []byte) {
+	copy(dst, *b)
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (b *ByteSlice) UnmarshalBytes(src []byte) {
+	copy(*b, src)
+}
+
+// Packed implements marshal.Marshallable.Packed.
+func (b *ByteSlice) Packed() bool {
+	return false
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (b *ByteSlice) MarshalUnsafe(dst []byte) {
+	b.MarshalBytes(dst)
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (b *ByteSlice) UnmarshalUnsafe(src []byte) {
+	b.UnmarshalBytes(src)
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+func (b *ByteSlice) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
+	return task.CopyInBytes(addr, *b)
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+func (b *ByteSlice) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
+	return task.CopyOutBytes(addr, *b)
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+func (b *ByteSlice) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) {
+	return task.CopyOutBytes(addr, (*b)[:limit])
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (b *ByteSlice) WriteTo(w io.Writer) (int64, error) {
+	n, err := w.Write(*b)
+	return int64(n), err
+}
+
+var _ marshal.Marshallable = (*ByteSlice)(nil)
+
 // Below, we define some convenience functions for marshalling primitive types
 // using the newtypes above, without requiring superfluous casts.
 
diff --git a/tools/nogo/build.go b/tools/nogo/build.go
index fb9f17b62..433d13738 100644
--- a/tools/nogo/build.go
+++ b/tools/nogo/build.go
@@ -31,10 +31,10 @@ var (
 )
 
 // findStdPkg needs to find the bundled standard library packages.
-func findStdPkg(path, GOOS, GOARCH string) (io.ReadCloser, error) {
+func (i *importer) findStdPkg(path string) (io.ReadCloser, error) {
 	if path == "C" {
 		// Cgo builds cannot be analyzed. Skip.
 		return nil, ErrSkip
 	}
-	return os.Open(fmt.Sprintf("external/go_sdk/pkg/%s_%s/%s.a", GOOS, GOARCH, path))
+	return os.Open(fmt.Sprintf("external/go_sdk/pkg/%s_%s/%s.a", i.GOOS, i.GOARCH, path))
 }
diff --git a/tools/nogo/defs.bzl b/tools/nogo/defs.bzl
index 6560b57c8..d399079c5 100644
--- a/tools/nogo/defs.bzl
+++ b/tools/nogo/defs.bzl
@@ -28,8 +28,10 @@ def _nogo_aspect_impl(target, ctx):
     else:
         return [NogoInfo()]
 
-    # Construct the Go environment from the go_context.env dictionary.
-    env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_context(ctx).env.items()])
+    go_ctx = go_context(ctx)
+
+    # Construct the Go environment from the go_ctx.env dictionary.
+    env_prefix = " ".join(["%s=%s" % (key, value) for (key, value) in go_ctx.env.items()])
 
     # Start with all target files and srcs as input.
     inputs = target.files.to_list() + srcs
@@ -45,7 +47,7 @@ def _nogo_aspect_impl(target, ctx):
         "#!/bin/bash",
         "%s %s tool objdump %s > %s\n" % (
             env_prefix,
-            go_context(ctx).go.path,
+            go_ctx.go.path,
             [f.path for f in binaries if f.path.endswith(".a")][0],
             disasm_file.path,
         ),
@@ -53,7 +55,7 @@ def _nogo_aspect_impl(target, ctx):
     ctx.actions.run(
         inputs = binaries,
         outputs = [disasm_file],
-        tools = go_context(ctx).runfiles,
+        tools = go_ctx.runfiles,
         mnemonic = "GoObjdump",
         progress_message = "Objdump %s" % target.label,
         executable = dumper,
@@ -70,9 +72,11 @@ def _nogo_aspect_impl(target, ctx):
         ImportPath = importpath,
         GoFiles = [src.path for src in srcs if src.path.endswith(".go")],
         NonGoFiles = [src.path for src in srcs if not src.path.endswith(".go")],
-        GOOS = go_context(ctx).goos,
-        GOARCH = go_context(ctx).goarch,
-        Tags = go_context(ctx).tags,
+        # Google's internal build system needs a bit more help to find std.
+        StdZip = go_ctx.std_zip.short_path if hasattr(go_ctx, "std_zip") else "",
+        GOOS = go_ctx.goos,
+        GOARCH = go_ctx.goarch,
+        Tags = go_ctx.tags,
         FactMap = {},  # Constructed below.
         ImportMap = {},  # Constructed below.
         FactOutput = facts.path,
@@ -110,7 +114,7 @@ def _nogo_aspect_impl(target, ctx):
     ctx.actions.run(
         inputs = inputs,
         outputs = [facts],
-        tools = go_context(ctx).runfiles,
+        tools = go_ctx.runfiles,
         executable = ctx.files._nogo[0],
         mnemonic = "GoStaticAnalysis",
         progress_message = "Analyzing %s" % target.label,
diff --git a/tools/nogo/nogo.go b/tools/nogo/nogo.go
index 5ee586c3e..ea1e97076 100644
--- a/tools/nogo/nogo.go
+++ b/tools/nogo/nogo.go
@@ -55,6 +55,7 @@ type pkgConfig struct {
 	FactMap    map[string]string
 	FactOutput string
 	Objdump    string
+	StdZip     string
 }
 
 // loadFacts finds and loads facts per FactMap.
@@ -111,7 +112,7 @@ func (i *importer) Import(path string) (*types.Package, error) {
 	if !ok {
 		// Not found in the import path. Attempt to find the package
 		// via the standard library.
-		rc, err = findStdPkg(path, i.GOOS, i.GOARCH)
+		rc, err = i.findStdPkg(path)
 	} else {
 		// Open the file.
 		rc, err = os.Open(realPath)
diff --git a/tools/vm/ubuntu1604/30_docker.sh b/tools/vm/ubuntu1604/30_docker.sh
index 332a03dfb..d393133e4 100755
--- a/tools/vm/ubuntu1604/30_docker.sh
+++ b/tools/vm/ubuntu1604/30_docker.sh
@@ -54,8 +54,11 @@ while true; do
 done
 
 # Enable experimental features, for cross-building aarch64 images.
+# Enable Docker IPv6.
 cat > /etc/docker/daemon.json <<EOF
 {
-    "experimental": true
+    "experimental": true,
+    "fixed-cidr-v6": "2001:db8:1::/64",
+    "ipv6": true
 }
 EOF