57 files changed, 3579 insertions, 4794 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index ae4dd102a..a907c103b 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -12,18 +12,15 @@ go_library(
         "controller.go",
         "debug.go",
         "events.go",
-        "fds.go",
         "fs.go",
         "limits.go",
         "loader.go",
-        "loader_amd64.go",
-        "loader_arm64.go",
         "network.go",
-        "pprof.go",
         "strace.go",
-        "user.go",
+        "vfs.go",
     ],
     visibility = [
+        "//pkg/test:__subpackages__",
         "//runsc:__subpackages__",
         "//test:__subpackages__",
     ],
@@ -34,6 +31,7 @@ go_library(
         "//pkg/control/server",
         "//pkg/cpuid",
         "//pkg/eventchannel",
+        "//pkg/fspath",
         "//pkg/log",
         "//pkg/memutil",
         "//pkg/rand",
@@ -41,6 +39,8 @@ go_library(
         "//pkg/sentry/arch",
         "//pkg/sentry/arch:registers_go_proto",
         "//pkg/sentry/control",
+        "//pkg/sentry/devices/memdev",
+        "//pkg/sentry/fdimport",
         "//pkg/sentry/fs",
         "//pkg/sentry/fs/dev",
         "//pkg/sentry/fs/gofer",
@@ -50,6 +50,14 @@ go_library(
         "//pkg/sentry/fs/sys",
         "//pkg/sentry/fs/tmpfs",
         "//pkg/sentry/fs/tty",
+        "//pkg/sentry/fs/user",
+        "//pkg/sentry/fsimpl/devpts",
+        "//pkg/sentry/fsimpl/devtmpfs",
+        "//pkg/sentry/fsimpl/gofer",
+        "//pkg/sentry/fsimpl/host",
+        "//pkg/sentry/fsimpl/proc",
+        "//pkg/sentry/fsimpl/sys",
+        "//pkg/sentry/fsimpl/tmpfs",
         "//pkg/sentry/inet",
         "//pkg/sentry/kernel",
         "//pkg/sentry/kernel:uncaught_signal_go_proto",
@@ -67,17 +75,18 @@ go_library(
         "//pkg/sentry/socket/unix",
         "//pkg/sentry/state",
         "//pkg/sentry/strace",
-        "//pkg/sentry/syscalls/linux",
         "//pkg/sentry/syscalls/linux/vfs2",
         "//pkg/sentry/time",
         "//pkg/sentry/unimpl:unimplemented_syscall_go_proto",
         "//pkg/sentry/usage",
+        "//pkg/sentry/vfs",
         "//pkg/sentry/watchdog",
         "//pkg/sync",
         "//pkg/syserror",
         "//pkg/tcpip",
         "//pkg/tcpip/link/fdbased",
         "//pkg/tcpip/link/loopback",
+        "//pkg/tcpip/link/qdisc/fifo",
         "//pkg/tcpip/link/sniffer",
         "//pkg/tcpip/network/arp",
         "//pkg/tcpip/network/ipv4",
@@ -88,9 +97,9 @@ go_library(
         "//pkg/tcpip/transport/tcp",
         "//pkg/tcpip/transport/udp",
         "//pkg/urpc",
-        "//pkg/usermem",
         "//runsc/boot/filter",
         "//runsc/boot/platforms",
+        "//runsc/boot/pprof",
         "//runsc/specutils",
         "@com_github_golang_protobuf//proto:go_default_library",
         "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
@@ -105,19 +114,20 @@ go_test(
         "compat_test.go",
         "fs_test.go",
         "loader_test.go",
-        "user_test.go",
     ],
     library = ":boot",
     deps = [
         "//pkg/control/server",
+        "//pkg/fspath",
         "//pkg/log",
         "//pkg/p9",
         "//pkg/sentry/contexttest",
         "//pkg/sentry/fs",
-        "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/vfs",
         "//pkg/sync",
         "//pkg/unet",
         "//runsc/fsgofer",
         "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index 8995d678e..84c67cbc2 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -65,7 +65,7 @@ func newCompatEmitter(logFD int) (*compatEmitter, error) {
 
 	if logFD > 0 {
 		f := os.NewFile(uintptr(logFD), "user log file")
-		target := &log.MultiEmitter{c.sink, &log.K8sJSONEmitter{log.Writer{Next: f}}}
+		target := &log.MultiEmitter{c.sink, log.K8sJSONEmitter{&log.Writer{Next: f}}}
 		c.sink = &log.BasicLogger{Level: log.Info, Emitter: target}
 	}
 	return c, nil
@@ -119,7 +119,13 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
 	}
 
 	if tr.shouldReport(regs) {
-		c.sink.Infof("Unsupported syscall: %s, regs: %+v", c.nameMap.Name(uintptr(sysnr)), regs)
+		name := c.nameMap.Name(uintptr(sysnr))
+		c.sink.Infof("Unsupported syscall %s(%#x,%#x,%#x,%#x,%#x,%#x). It is "+
+			"likely that you can safely ignore this message and that this is not "+
+			"the cause of any error. Please, refer to %s/%s for more information.",
+			name, argVal(0, regs), argVal(1, regs), argVal(2, regs), argVal(3, regs),
+			argVal(4, regs), argVal(5, regs), syscallLink, name)
+
 		tr.onReported(regs)
 	}
 }
diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go
index 42b0ca8b0..8eb76b2ba 100644
--- a/runsc/boot/compat_amd64.go
+++ b/runsc/boot/compat_amd64.go
@@ -24,8 +24,12 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/strace"
 )
 
-// reportLimit is the max number of events that should be reported per tracker.
-const reportLimit = 100
+const (
+	// reportLimit is the max number of events that should be reported per
+	// tracker.
+	reportLimit = 100
+	syscallLink = "https://gvisor.dev/c/linux/amd64"
+)
 
 // newRegs create a empty Registers instance.
 func newRegs() *rpb.Registers {
@@ -36,22 +40,22 @@ func newRegs() *rpb.Registers {
 	}
 }
 
-func argVal(argIdx int, regs *rpb.Registers) uint32 {
+func argVal(argIdx int, regs *rpb.Registers) uint64 {
 	amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64
 
 	switch argIdx {
 	case 0:
-		return uint32(amd64Regs.Rdi)
+		return amd64Regs.Rdi
 	case 1:
-		return uint32(amd64Regs.Rsi)
+		return amd64Regs.Rsi
 	case 2:
-		return uint32(amd64Regs.Rdx)
+		return amd64Regs.Rdx
 	case 3:
-		return uint32(amd64Regs.R10)
+		return amd64Regs.R10
 	case 4:
-		return uint32(amd64Regs.R8)
+		return amd64Regs.R8
 	case 5:
-		return uint32(amd64Regs.R9)
+		return amd64Regs.R9
 	}
 	panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
 }
diff --git a/runsc/boot/compat_arm64.go b/runsc/boot/compat_arm64.go
index f784cd237..bce9d95b3 100644
--- a/runsc/boot/compat_arm64.go
+++ b/runsc/boot/compat_arm64.go
@@ -23,8 +23,12 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/strace"
 )
 
-// reportLimit is the max number of events that should be reported per tracker.
-const reportLimit = 100
+const (
+	// reportLimit is the max number of events that should be reported per
+	// tracker.
+	reportLimit = 100
+	syscallLink = "https://gvisor.dev/c/linux/arm64"
+)
 
 // newRegs create a empty Registers instance.
 func newRegs() *rpb.Registers {
@@ -35,22 +39,22 @@ func newRegs() *rpb.Registers {
 	}
 }
 
-func argVal(argIdx int, regs *rpb.Registers) uint32 {
+func argVal(argIdx int, regs *rpb.Registers) uint64 {
 	arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64
 
 	switch argIdx {
 	case 0:
-		return uint32(arm64Regs.R0)
+		return arm64Regs.R0
 	case 1:
-		return uint32(arm64Regs.R1)
+		return arm64Regs.R1
 	case 2:
-		return uint32(arm64Regs.R2)
+		return arm64Regs.R2
 	case 3:
-		return uint32(arm64Regs.R3)
+		return arm64Regs.R3
 	case 4:
-		return uint32(arm64Regs.R4)
+		return arm64Regs.R4
 	case 5:
-		return uint32(arm64Regs.R5)
+		return arm64Regs.R5
 	}
 	panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
 }
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 35391030f..bcec7e4db 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -158,6 +158,9 @@ type Config struct {
 	// DebugLog is the path to log debug information to, if not empty.
 	DebugLog string
 
+	// PanicLog is the path to log GO's runtime messages, if not empty.
+	PanicLog string
+
 	// DebugLogFormat is the log format for debug.
 	DebugLogFormat string
 
@@ -184,6 +187,10 @@ type Config struct {
 	// SoftwareGSO indicates that software segmentation offload is enabled.
 	SoftwareGSO bool
 
+	// QDisc indicates the type of queuening discipline to use by default
+	// for non-loopback interfaces.
+	QDisc QueueingDiscipline
+
 	// LogPackets indicates that all network packets should be logged.
 	LogPackets bool
 
@@ -234,8 +241,10 @@ type Config struct {
 	// ReferenceLeakMode sets reference leak check mode
 	ReferenceLeakMode refs.LeakMode
 
-	// OverlayfsStaleRead causes cached FDs to reopen after a file is opened for
-	// write to workaround overlayfs limitation on kernels before 4.19.
+	// OverlayfsStaleRead instructs the sandbox to assume that the root mount
+	// is on a Linux overlayfs mount, which does not necessarily preserve
+	// coherence between read-only and subsequent writable file descriptors
+	// representing the "same" file.
 	OverlayfsStaleRead bool
 
 	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
@@ -269,6 +278,7 @@ func (c *Config) ToFlags() []string {
 		"--log=" + c.LogFilename,
 		"--log-format=" + c.LogFormat,
 		"--debug-log=" + c.DebugLog,
+		"--panic-log=" + c.PanicLog,
 		"--debug-log-format=" + c.DebugLogFormat,
 		"--file-access=" + c.FileAccess.String(),
 		"--overlay=" + strconv.FormatBool(c.Overlay),
@@ -290,6 +300,7 @@ func (c *Config) ToFlags() []string {
 		"--gso=" + strconv.FormatBool(c.HardwareGSO),
 		"--software-gso=" + strconv.FormatBool(c.SoftwareGSO),
 		"--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead),
+		"--qdisc=" + c.QDisc.String(),
 	}
 	if c.CPUNumFromQuota {
 		f = append(f, "--cpu-num-from-quota")
@@ -301,5 +312,10 @@ func (c *Config) ToFlags() []string {
 	if len(c.TestOnlyTestNameEnv) != 0 {
 		f = append(f, "--TESTONLY-test-name-env="+c.TestOnlyTestNameEnv)
 	}
+
+	if c.VFS2 {
+		f = append(f, "--vfs2=true")
+	}
+
 	return f
 }
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 9c9e94864..8125d5061 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -32,6 +32,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/watchdog"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/urpc"
+	"gvisor.dev/gvisor/runsc/boot/pprof"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
 
@@ -100,11 +101,14 @@ const (
 
 // Profiling related commands (see pprof.go for more details).
 const (
-	StartCPUProfile = "Profile.StartCPUProfile"
-	StopCPUProfile  = "Profile.StopCPUProfile"
-	HeapProfile     = "Profile.HeapProfile"
-	StartTrace      = "Profile.StartTrace"
-	StopTrace       = "Profile.StopTrace"
+	StartCPUProfile  = "Profile.StartCPUProfile"
+	StopCPUProfile   = "Profile.StopCPUProfile"
+	HeapProfile      = "Profile.HeapProfile"
+	GoroutineProfile = "Profile.GoroutineProfile"
+	BlockProfile     = "Profile.BlockProfile"
+	MutexProfile     = "Profile.MutexProfile"
+	StartTrace       = "Profile.StartTrace"
+	StopTrace        = "Profile.StopTrace"
 )
 
 // Logging related commands (see logging.go for more details).
@@ -142,7 +146,7 @@ func newController(fd int, l *Loader) (*controller, error) {
 	}
 	srv.Register(manager)
 
-	if eps, ok := l.k.NetworkStack().(*netstack.Stack); ok {
+	if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok {
 		net := &Network{
 			Stack: eps.Stack,
 		}
@@ -341,7 +345,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
 		return fmt.Errorf("creating memory file: %v", err)
 	}
 	k.SetMemoryFile(mf)
-	networkStack := cm.l.k.NetworkStack()
+	networkStack := cm.l.k.RootNetworkNamespace().Stack()
 	cm.l.k = k
 
 	// Set up the restore environment.
@@ -365,9 +369,9 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
 	}
 
 	if cm.l.conf.ProfileEnable {
-		// initializePProf opens /proc/self/maps, so has to be
-		// called before installing seccomp filters.
-		initializePProf()
+		// pprof.Initialize opens /proc/self/maps, so has to be called before
+		// installing seccomp filters.
+		pprof.Initialize()
 	}
 
 	// Seccomp filters have to be applied before parsing the state file.
diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go
deleted file mode 100644
index 417d2d5fb..000000000
--- a/runsc/boot/fds.go
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package boot
-
-import (
-	"fmt"
-
-	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/sentry/fs"
-	"gvisor.dev/gvisor/pkg/sentry/fs/host"
-	"gvisor.dev/gvisor/pkg/sentry/kernel"
-)
-
-// createFDTable creates an FD table that contains stdin, stdout, and stderr.
-// If console is true, then ioctl calls will be passed through to the host FD.
-// Upon success, createFDMap dups then closes stdioFDs.
-func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) {
-	if len(stdioFDs) != 3 {
-		return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
-	}
-
-	k := kernel.KernelFromContext(ctx)
-	fdTable := k.NewFDTable()
-	defer fdTable.DecRef()
-	mounter := fs.FileOwnerFromContext(ctx)
-
-	var ttyFile *fs.File
-	for appFD, hostFD := range stdioFDs {
-		var appFile *fs.File
-
-		if console && appFD < 3 {
-			// Import the file as a host TTY file.
-			if ttyFile == nil {
-				var err error
-				appFile, err = host.ImportFile(ctx, hostFD, mounter, true /* isTTY */)
-				if err != nil {
-					return nil, err
-				}
-				defer appFile.DecRef()
-
-				// Remember this in the TTY file, as we will
-				// use it for the other stdio FDs.
-				ttyFile = appFile
-			} else {
-				// Re-use the existing TTY file, as all three
-				// stdio FDs must point to the same fs.File in
-				// order to share TTY state, specifically the
-				// foreground process group id.
-				appFile = ttyFile
-			}
-		} else {
-			// Import the file as a regular host file.
-			var err error
-			appFile, err = host.ImportFile(ctx, hostFD, mounter, false /* isTTY */)
-			if err != nil {
-				return nil, err
-			}
-			defer appFile.DecRef()
-		}
-
-		// Add the file to the FD map.
-		if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil {
-			return nil, err
-		}
-	}
-
-	fdTable.IncRef()
-	return fdTable, nil
-}
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index c69f4c602..60e33425f 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -44,7 +44,7 @@ var allowedSyscalls = seccomp.SyscallRules{
 		{
 			seccomp.AllowAny{},
 			seccomp.AllowAny{},
-			seccomp.AllowValue(0),
+			seccomp.AllowValue(syscall.O_CLOEXEC),
 		},
 	},
 	syscall.SYS_EPOLL_CREATE1: {},
@@ -229,7 +229,11 @@ var allowedSyscalls = seccomp.SyscallRules{
 	syscall.SYS_NANOSLEEP: {},
 	syscall.SYS_PPOLL:     {},
 	syscall.SYS_PREAD64:   {},
+	syscall.SYS_PREADV:    {},
+	unix.SYS_PREADV2:      {},
 	syscall.SYS_PWRITE64:  {},
+	syscall.SYS_PWRITEV:   {},
+	unix.SYS_PWRITEV2:     {},
 	syscall.SYS_READ:      {},
 	syscall.SYS_RECVMSG: []seccomp.Rule{
 		{
@@ -282,12 +286,29 @@ var allowedSyscalls = seccomp.SyscallRules{
 		{seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
 	},
 	syscall.SYS_SIGALTSTACK:     {},
+	unix.SYS_STATX:              {},
 	syscall.SYS_SYNC_FILE_RANGE: {},
+	syscall.SYS_TEE: []seccomp.Rule{
+		{
+			seccomp.AllowAny{},
+			seccomp.AllowAny{},
+			seccomp.AllowValue(1),                      /* len */
+			seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */
+		},
+	},
 	syscall.SYS_TGKILL: []seccomp.Rule{
 		{
 			seccomp.AllowValue(uint64(os.Getpid())),
 		},
 	},
+	syscall.SYS_UTIMENSAT: []seccomp.Rule{
+		{
+			seccomp.AllowAny{},
+			seccomp.AllowValue(0), /* null pathname */
+			seccomp.AllowAny{},
+			seccomp.AllowValue(0), /* flags */
+		},
+	},
 	syscall.SYS_WRITE: {},
 	// The only user in rawfile.NonBlockingWrite3 always passes iovcnt with
 	// values 2 or 3. Three iovec-s are passed, when the PACKET_VNET_HDR
diff --git a/runsc/boot/filter/extra_filters_msan.go b/runsc/boot/filter/extra_filters_msan.go
index 5e5a3c998..209e646a7 100644
--- a/runsc/boot/filter/extra_filters_msan.go
+++ b/runsc/boot/filter/extra_filters_msan.go
@@ -26,6 +26,8 @@ import (
 func instrumentationFilters() seccomp.SyscallRules {
 	Report("MSAN is enabled: syscall filters less restrictive!")
 	return seccomp.SyscallRules{
+		syscall.SYS_CLONE:             {},
+		syscall.SYS_MMAP:              {},
 		syscall.SYS_SCHED_GETAFFINITY: {},
 		syscall.SYS_SET_ROBUST_LIST:   {},
 	}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 0f62842ea..b98a1eb50 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -37,6 +37,13 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
 	"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+	"gvisor.dev/gvisor/pkg/sentry/fs/user"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+	gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
+	procvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
+	sysvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
+	tmpfsvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/syserror"
@@ -44,27 +51,19 @@ import (
 )
 
 const (
-	// Filesystem name for 9p gofer mounts.
-	rootFsName = "9p"
-
 	// Device name for root mount.
 	rootDevice = "9pfs-/"
 
 	// MountPrefix is the annotation prefix for mount hints.
 	MountPrefix = "dev.gvisor.spec.mount."
 
-	// Filesystems that runsc supports.
-	bind     = "bind"
-	devpts   = "devpts"
-	devtmpfs = "devtmpfs"
-	proc     = "proc"
-	sysfs    = "sysfs"
-	tmpfs    = "tmpfs"
-	nonefs   = "none"
+	// Supported filesystems that map to different internal filesystem.
+	bind   = "bind"
+	nonefs = "none"
 )
 
 // tmpfs has some extra supported options that we must pass through.
-var tmpfsAllowedOptions = []string{"mode", "uid", "gid"}
+var tmpfsAllowedData = []string{"mode", "uid", "gid"}
 
 func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
 	// Upper layer uses the same flags as lower, but it must be read-write.
@@ -108,12 +107,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
 
 	// Always mount /dev.
 	mounts = append(mounts, specs.Mount{
-		Type:        devtmpfs,
+		Type:        devtmpfs.Name,
 		Destination: "/dev",
 	})
 
 	mounts = append(mounts, specs.Mount{
-		Type:        devpts,
+		Type:        devpts.Name,
 		Destination: "/dev/pts",
 	})
 
@@ -137,13 +136,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
 	var mandatoryMounts []specs.Mount
 	if !procMounted {
 		mandatoryMounts = append(mandatoryMounts, specs.Mount{
-			Type:        proc,
+			Type:        procvfs2.Name,
 			Destination: "/proc",
 		})
 	}
 	if !sysMounted {
 		mandatoryMounts = append(mandatoryMounts, specs.Mount{
-			Type:        sysfs,
+			Type:        sysvfs2.Name,
 			Destination: "/sys",
 		})
 	}
@@ -155,13 +154,17 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
 	return mounts
 }
 
-// p9MountOptions creates a slice of options for a p9 mount.
-func p9MountOptions(fd int, fa FileAccessType) []string {
+// p9MountData creates a slice of p9 mount data.
+func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string {
 	opts := []string{
 		"trans=fd",
 		"rfdno=" + strconv.Itoa(fd),
 		"wfdno=" + strconv.Itoa(fd),
-		"privateunixsocket=true",
+	}
+	if !vfs2 {
+		// privateunixsocket is always enabled in VFS2. VFS1 requires explicit
+		// enablement.
+		opts = append(opts, "privateunixsocket=true")
 	}
 	if fa == FileAccessShared {
 		opts = append(opts, "cache=remote_revalidating")
@@ -231,8 +234,8 @@ func isSupportedMountFlag(fstype, opt string) bool {
 	case "rw", "ro", "noatime", "noexec":
 		return true
 	}
-	if fstype == tmpfs {
-		ok, err := parseMountOption(opt, tmpfsAllowedOptions...)
+	if fstype == tmpfsvfs2.Name {
+		ok, err := parseMountOption(opt, tmpfsAllowedData...)
 		return ok && err == nil
 	}
 	return false
@@ -278,6 +281,9 @@ func subtargets(root string, mnts []specs.Mount) []string {
 }
 
 func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+	if conf.VFS2 {
+		return setupContainerVFS2(ctx, conf, mntr, procArgs)
+	}
 	mns, err := mntr.setupFS(conf, procArgs)
 	if err != nil {
 		return err
@@ -286,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter,
 	// Set namespace here so that it can be found in ctx.
 	procArgs.MountNamespace = mns
 
-	return setExecutablePath(ctx, procArgs)
-}
-
-// setExecutablePath sets the procArgs.Filename by searching the PATH for an
-// executable matching the procArgs.Argv[0].
-func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
-	paths := fs.GetPath(procArgs.Envv)
-	exe := procArgs.Argv[0]
-	f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths)
+	// Resolve the executable path from working dir and environment.
+	f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0])
 	if err != nil {
-		return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err)
+		return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err)
 	}
 	procArgs.Filename = f
 	return nil
@@ -438,7 +437,7 @@ func (m *mountHint) setOptions(val string) error {
 }
 
 func (m *mountHint) isSupported() bool {
-	return m.mount.Type == tmpfs && m.share == pod
+	return m.mount.Type == tmpfsvfs2.Name && m.share == pod
 }
 
 // checkCompatible verifies that shared mount is compatible with master.
@@ -573,11 +572,14 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin
 // should be mounted (e.g. a volume shared between containers). It must be
 // called for the root container only.
 func (c *containerMounter) processHints(conf *Config) error {
+	if conf.VFS2 {
+		return nil
+	}
 	ctx := c.k.SupervisorContext()
 	for _, hint := range c.hints.mounts {
 		// TODO(b/142076984): Only support tmpfs for now. Bind mounts require a
 		// common gofer to mount all shared volumes.
-		if hint.mount.Type != tmpfs {
+		if hint.mount.Type != tmpfsvfs2.Name {
 			continue
 		}
 		log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
@@ -714,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
 	fd := c.fds.remove()
 	log.Infof("Mounting root over 9P, ioFD: %d", fd)
 	p9FS := mustFindFilesystem("9p")
-	opts := p9MountOptions(fd, conf.FileAccess)
+	opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
 
 	if conf.OverlayfsStaleRead {
 		// We can't check for overlayfs here because sandbox is chroot'ed and gofer
@@ -760,30 +762,27 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
 	)
 
 	switch m.Type {
-	case devpts, devtmpfs, proc, sysfs:
+	case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name:
 		fsName = m.Type
 	case nonefs:
-		fsName = sysfs
-	case tmpfs:
+		fsName = sysvfs2.Name
+	case tmpfsvfs2.Name:
 		fsName = m.Type
 
 		var err error
-		opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...)
+		opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
 		if err != nil {
 			return "", nil, false, err
 		}
 
 	case bind:
 		fd := c.fds.remove()
-		fsName = "9p"
-		opts = p9MountOptions(fd, c.getMountAccessType(m))
+		fsName = gofervfs2.Name
+		opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2)
 		// If configured, add overlay to all writable mounts.
 		useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
 
 	default:
-		// TODO(nlacasse): Support all the mount types and make this a fatal error.
-		// Most applications will "just work" without them, so this is a warning
-		// for now.
 		log.Warningf("ignoring unknown filesystem type %q", m.Type)
 	}
 	return fsName, opts, useOverlay, nil
@@ -824,7 +823,20 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
 
 	inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil)
 	if err != nil {
-		return fmt.Errorf("creating mount with source %q: %v", m.Source, err)
+		err := fmt.Errorf("creating mount with source %q: %v", m.Source, err)
+		// Check to see if this is a common error due to a Linux bug.
+		// This error is generated here in order to cause it to be
+		// printed to the user using Docker via 'runsc create' etc. rather
+		// than simply printed to the logs for the 'runsc boot' command.
+		//
+		// We check the error message string rather than type because the
+		// actual error types (syscall.EIO, syscall.EPIPE) are lost by file system
+		// implementation (e.g. p9).
+		// TODO(gvisor.dev/issue/1765): Remove message when bug is resolved.
+		if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) {
+			return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug"))
+		}
+		return err
 	}
 
 	// If there are submounts, we need to overlay the mount on top of a ramfs
@@ -919,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
 
 	// Add root mount.
 	fd := c.fds.remove()
-	opts := p9MountOptions(fd, conf.FileAccess)
+	opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
 
 	mf := fs.MountSourceFlags{}
 	if c.root.Readonly || conf.Overlay {
@@ -931,7 +943,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
 		Flags:      mf,
 		DataString: strings.Join(opts, ","),
 	}
-	renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount)
+	renv.MountSources[gofervfs2.Name] = append(renv.MountSources[gofervfs2.Name], rootMount)
 
 	// Add submounts.
 	var tmpMounted bool
@@ -947,7 +959,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
 	// TODO(b/67958150): handle '/tmp' properly (see mountTmp()).
 	if !tmpMounted {
 		tmpMount := specs.Mount{
-			Type:        tmpfs,
+			Type:        tmpfsvfs2.Name,
 			Destination: "/tmp",
 		}
 		if err := c.addRestoreMount(conf, renv, tmpMount); err != nil {
@@ -1003,11 +1015,11 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M
 		// No '/tmp' found (or fallthrough from above). Safe to mount internal
 		// tmpfs.
 		tmpMount := specs.Mount{
-			Type:        tmpfs,
+			Type:        tmpfsvfs2.Name,
 			Destination: "/tmp",
 			// Sticky bit is added to prevent accidental deletion of files from
 			// another user. This is normally done for /tmp.
-			Options: []string{"mode=1777"},
+			Options: []string{"mode=01777"},
 		}
 		return c.mountSubmount(ctx, conf, mns, root, tmpMount)
 
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 9f0d5d7af..002479612 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -26,16 +26,19 @@ import (
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"golang.org/x/sys/unix"
-	"gvisor.dev/gvisor/pkg/abi"
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/cpuid"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/memutil"
 	"gvisor.dev/gvisor/pkg/rand"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/control"
+	"gvisor.dev/gvisor/pkg/sentry/fdimport"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/host"
+	"gvisor.dev/gvisor/pkg/sentry/fs/user"
+	hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
 	"gvisor.dev/gvisor/pkg/sentry/inet"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -46,9 +49,11 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/syscalls/linux/vfs2"
 	"gvisor.dev/gvisor/pkg/sentry/time"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sentry/watchdog"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
 	"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
 	"gvisor.dev/gvisor/pkg/tcpip/network/arp"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -60,6 +65,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
 	"gvisor.dev/gvisor/runsc/boot/filter"
 	_ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms.
+	"gvisor.dev/gvisor/runsc/boot/pprof"
 	"gvisor.dev/gvisor/runsc/specutils"
 
 	// Include supported socket providers.
@@ -137,6 +143,9 @@ type execProcess struct {
 	// tty will be nil if the process is not attached to a terminal.
 	tty *host.TTYFileOperations
 
+	// tty will be nil if the process is not attached to a terminal.
+	ttyVFS2 *hostvfs2.TTYFileDescription
+
 	// pidnsPath is the pid namespace path in spec
 	pidnsPath string
 }
@@ -154,13 +163,17 @@ type Args struct {
 	Spec *specs.Spec
 	// Conf is the system configuration.
 	Conf *Config
-	// ControllerFD is the FD to the URPC controller.
+	// ControllerFD is the FD to the URPC controller. The Loader takes ownership
+	// of this FD and may close it at any time.
 	ControllerFD int
-	// Device is an optional argument that is passed to the platform.
+	// Device is an optional argument that is passed to the platform. The Loader
+	// takes ownership of this file and may close it at any time.
 	Device *os.File
-	// GoferFDs is an array of FDs used to connect with the Gofer.
+	// GoferFDs is an array of FDs used to connect with the Gofer. The Loader
+	// takes ownership of these FDs and may close them at any time.
 	GoferFDs []int
-	// StdioFDs is the stdio for the application.
+	// StdioFDs is the stdio for the application. The Loader takes ownership of
+	// these FDs and may close them at any time.
 	StdioFDs []int
 	// Console is set to true if using TTY.
 	Console bool
@@ -173,6 +186,9 @@ type Args struct {
 	UserLogFD int
 }
 
+// make sure stdioFDs are always the same on initial start and on restore
+const startingStdioFD = 64
+
 // New initializes a new kernel loader configured by spec.
 // New also handles setting up a kernel for restoring a container.
 func New(args Args) (*Loader, error) {
@@ -186,11 +202,10 @@ func New(args Args) (*Loader, error) {
 		return nil, fmt.Errorf("setting up memory usage: %v", err)
 	}
 
+	// Is this a VFSv2 kernel?
 	if args.Conf.VFS2 {
-		st, ok := kernel.LookupSyscallTable(abi.Linux, arch.Host)
-		if ok {
-			vfs2.Override(st.Table)
-		}
+		kernel.VFS2Enabled = true
+		vfs2.Override()
 	}
 
 	// Create kernel and platform.
@@ -230,11 +245,8 @@ func New(args Args) (*Loader, error) {
 		return nil, fmt.Errorf("enabling strace: %v", err)
 	}
 
-	// Create an empty network stack because the network namespace may be empty at
-	// this point. Netns is configured before Run() is called. Netstack is
-	// configured using a control uRPC message. Host network is configured inside
-	// Run().
-	networkStack, err := newEmptyNetworkStack(args.Conf, k, k)
+	// Create root network namespace/stack.
+	netns, err := newRootNetworkNamespace(args.Conf, k, k)
 	if err != nil {
 		return nil, fmt.Errorf("creating network: %v", err)
 	}
@@ -277,7 +289,7 @@ func New(args Args) (*Loader, error) {
 		FeatureSet:                  cpuid.HostFeatureSet(),
 		Timekeeper:                  tk,
 		RootUserNamespace:           creds.UserNamespace,
-		NetworkStack:                networkStack,
+		RootNetworkNamespace:        netns,
 		ApplicationCores:            uint(args.NumCPU),
 		Vdso:                        vdso,
 		RootUTSNamespace:            kernel.NewUTSNamespace(args.Spec.Hostname, args.Spec.Hostname, creds.UserNamespace),
@@ -320,6 +332,38 @@ func New(args Args) (*Loader, error) {
 		return nil, fmt.Errorf("creating pod mount hints: %v", err)
 	}
 
+	if kernel.VFS2Enabled {
+		// Set up host mount that will be used for imported fds.
+		hostFilesystem, err := hostvfs2.NewFilesystem(k.VFS())
+		if err != nil {
+			return nil, fmt.Errorf("failed to create hostfs filesystem: %v", err)
+		}
+		defer hostFilesystem.DecRef()
+		hostMount, err := k.VFS().NewDisconnectedMount(hostFilesystem, nil, &vfs.MountOptions{})
+		if err != nil {
+			return nil, fmt.Errorf("failed to create hostfs mount: %v", err)
+		}
+		k.SetHostMount(hostMount)
+	}
+
+	// Make host FDs stable between invocations. Host FDs must map to the exact
+	// same number when the sandbox is restored. Otherwise the wrong FD will be
+	// used.
+	var stdioFDs []int
+	newfd := startingStdioFD
+	for _, fd := range args.StdioFDs {
+		err := syscall.Dup3(fd, newfd, syscall.O_CLOEXEC)
+		if err != nil {
+			return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err)
+		}
+		stdioFDs = append(stdioFDs, newfd)
+		err = syscall.Close(fd)
+		if err != nil {
+			return nil, fmt.Errorf("close original stdioFDs failed: %v", err)
+		}
+		newfd++
+	}
+
 	eid := execID{cid: args.ID}
 	l := &Loader{
 		k:            k,
@@ -328,7 +372,7 @@ func New(args Args) (*Loader, error) {
 		watchdog:     dog,
 		spec:         args.Spec,
 		goferFDs:     args.GoferFDs,
-		stdioFDs:     args.StdioFDs,
+		stdioFDs:     stdioFDs,
 		rootProcArgs: procArgs,
 		sandboxID:    args.ID,
 		processes:    map[execID]*execProcess{eid: {}},
@@ -368,11 +412,16 @@ func newProcess(id string, spec *specs.Spec, creds *auth.Credentials, k *kernel.
 		return kernel.CreateProcessArgs{}, fmt.Errorf("creating limits: %v", err)
 	}
 
+	wd := spec.Process.Cwd
+	if wd == "" {
+		wd = "/"
+	}
+
 	// Create the process arguments.
 	procArgs := kernel.CreateProcessArgs{
 		Argv:                    spec.Process.Args,
 		Envv:                    spec.Process.Env,
-		WorkingDirectory:        spec.Process.Cwd, // Defaults to '/' if empty.
+		WorkingDirectory:        wd,
 		Credentials:             creds,
 		Umask:                   0022,
 		Limits:                  ls,
@@ -466,7 +515,7 @@ func (l *Loader) run() error {
 		// Delay host network configuration to this point because network namespace
 		// is configured after the loader is created and before Run() is called.
 		log.Debugf("Configuring host network")
-		stack := l.k.NetworkStack().(*hostinet.Stack)
+		stack := l.k.RootNetworkNamespace().Stack().(*hostinet.Stack)
 		if err := stack.Configure(); err != nil {
 			return err
 		}
@@ -483,9 +532,11 @@ func (l *Loader) run() error {
 
 	// If we are restoring, we do not want to create a process.
 	// l.restore is set by the container manager when a restore call is made.
+	var ttyFile *host.TTYFileOperations
+	var ttyFileVFS2 *hostvfs2.TTYFileDescription
 	if !l.restore {
 		if l.conf.ProfileEnable {
-			initializePProf()
+			pprof.Initialize()
 		}
 
 		// Finally done with all configuration. Setup filters before user code
@@ -497,13 +548,14 @@ func (l *Loader) run() error {
 		// Create the FD map, which will set stdin, stdout, and stderr.  If console
 		// is true, then ioctl calls will be passed through to the host fd.
 		ctx := l.rootProcArgs.NewContext(l.k)
-		fdTable, err := createFDTable(ctx, l.console, l.stdioFDs)
+		var err error
+
+		// CreateProcess takes a reference on FDMap if successful. We won't need
+		// ours either way.
+		l.rootProcArgs.FDTable, ttyFile, ttyFileVFS2, err = createFDTable(ctx, l.console, l.stdioFDs)
 		if err != nil {
 			return fmt.Errorf("importing fds: %v", err)
 		}
-		// CreateProcess takes a reference on FDMap if successful. We won't need
-		// ours either way.
-		l.rootProcArgs.FDTable = fdTable
 
 		// Setup the root container file system.
 		l.startGoferMonitor(l.sandboxID, l.goferFDs)
@@ -517,7 +569,15 @@ func (l *Loader) run() error {
 		}
 
 		// Add the HOME enviroment variable if it is not already set.
-		envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+		var envv []string
+		if kernel.VFS2Enabled {
+			envv, err = user.MaybeAddExecUserHomeVFS2(ctx, l.rootProcArgs.MountNamespaceVFS2,
+				l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+
+		} else {
+			envv, err = user.MaybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace,
+				l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+		}
 		if err != nil {
 			return err
 		}
@@ -538,14 +598,16 @@ func (l *Loader) run() error {
 		ep.pidnsPath = ns.Path
 	}
 	if l.console {
-		ttyFile, _ := l.rootProcArgs.FDTable.Get(0)
-		defer ttyFile.DecRef()
-		ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations)
-
-		// Set the foreground process group on the TTY to the global
-		// init process group, since that is what we are about to
-		// start running.
-		ep.tty.InitForegroundProcessGroup(ep.tg.ProcessGroup())
+		// Set the foreground process group on the TTY to the global init process
+		// group, since that is what we are about to start running.
+		switch {
+		case ttyFileVFS2 != nil:
+			ep.ttyVFS2 = ttyFileVFS2
+			ttyFileVFS2.InitForegroundProcessGroup(ep.tg.ProcessGroup())
+		case ttyFile != nil:
+			ep.tty = ttyFile
+			ttyFile.InitForegroundProcessGroup(ep.tg.ProcessGroup())
+		}
 	}
 
 	// Handle signals by forwarding them to the root container process
@@ -570,6 +632,19 @@ func (l *Loader) run() error {
 		}
 	})
 
+	// l.stdioFDs are derived from dup() in boot.New() and they are now dup()ed again
+	// either in createFDTable() during initial start or in descriptor.initAfterLoad()
+	// during restore, we can release l.stdioFDs now. VFS2 takes ownership of the
+	// passed FDs, so only close for VFS1.
+	if !kernel.VFS2Enabled {
+		for _, fd := range l.stdioFDs {
+			err := syscall.Close(fd)
+			if err != nil {
+				return fmt.Errorf("close dup()ed stdioFDs: %v", err)
+			}
+		}
+	}
+
 	log.Infof("Process should have started...")
 	l.watchdog.Start()
 	return l.k.Start()
@@ -653,7 +728,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
 
 	// Create the FD map, which will set stdin, stdout, and stderr.
 	ctx := procArgs.NewContext(l.k)
-	fdTable, err := createFDTable(ctx, false, stdioFDs)
+	fdTable, _, _, err := createFDTable(ctx, false, stdioFDs)
 	if err != nil {
 		return fmt.Errorf("importing fds: %v", err)
 	}
@@ -738,14 +813,14 @@ func (l *Loader) destroyContainer(cid string) error {
 	l.mu.Lock()
 	defer l.mu.Unlock()
 
-	_, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid})
+	tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid})
 	if err != nil {
 		// Container doesn't exist.
 		return err
 	}
 
-	// The container exists, has it been started?
-	if started {
+	// The container exists, but has it been started?
+	if tg != nil {
 		if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
 			return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
 		}
@@ -787,45 +862,65 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
 	l.mu.Lock()
 	defer l.mu.Unlock()
 
-	tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID})
+	tg, err := l.tryThreadGroupFromIDLocked(execID{cid: args.ContainerID})
 	if err != nil {
 		return 0, err
 	}
-	if !started {
+	if tg == nil {
 		return 0, fmt.Errorf("container %q not started", args.ContainerID)
 	}
 
 	// Get the container MountNamespace from the Task.
-	tg.Leader().WithMuLocked(func(t *kernel.Task) {
-		// task.MountNamespace() does not take a ref, so we must do so
-		// ourselves.
-		args.MountNamespace = t.MountNamespace()
-		args.MountNamespace.IncRef()
-	})
-	defer args.MountNamespace.DecRef()
+	if kernel.VFS2Enabled {
+		// task.MountNamespace() does not take a ref, so we must do so ourselves.
+		args.MountNamespaceVFS2 = tg.Leader().MountNamespaceVFS2()
+		args.MountNamespaceVFS2.IncRef()
+	} else {
+		tg.Leader().WithMuLocked(func(t *kernel.Task) {
+			// task.MountNamespace() does not take a ref, so we must do so ourselves.
+			args.MountNamespace = t.MountNamespace()
+			args.MountNamespace.IncRef()
+		})
+	}
 
-	// Add the HOME enviroment varible if it is not already set.
-	root := args.MountNamespace.Root()
-	defer root.DecRef()
-	ctx := fs.WithRoot(l.k.SupervisorContext(), root)
-	envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
-	if err != nil {
-		return 0, err
+	// Add the HOME environment variable if it is not already set.
+	if kernel.VFS2Enabled {
+		defer args.MountNamespaceVFS2.DecRef()
+
+		root := args.MountNamespaceVFS2.Root()
+		defer root.DecRef()
+		ctx := vfs.WithRoot(l.k.SupervisorContext(), root)
+		envv, err := user.MaybeAddExecUserHomeVFS2(ctx, args.MountNamespaceVFS2, args.KUID, args.Envv)
+		if err != nil {
+			return 0, err
+		}
+		args.Envv = envv
+	} else {
+		defer args.MountNamespace.DecRef()
+
+		root := args.MountNamespace.Root()
+		defer root.DecRef()
+		ctx := fs.WithRoot(l.k.SupervisorContext(), root)
+		envv, err := user.MaybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
+		if err != nil {
+			return 0, err
+		}
+		args.Envv = envv
 	}
-	args.Envv = envv
 
 	// Start the process.
 	proc := control.Proc{Kernel: l.k}
 	args.PIDNamespace = tg.PIDNamespace()
-	newTG, tgid, ttyFile, err := control.ExecAsync(&proc, args)
+	newTG, tgid, ttyFile, ttyFileVFS2, err := control.ExecAsync(&proc, args)
 	if err != nil {
 		return 0, err
 	}
 
 	eid := execID{cid: args.ContainerID, pid: tgid}
 	l.processes[eid] = &execProcess{
-		tg:  newTG,
-		tty: ttyFile,
+		tg:      newTG,
+		tty:     ttyFile,
+		ttyVFS2: ttyFileVFS2,
 	}
 	log.Debugf("updated processes: %v", l.processes)
 
@@ -836,7 +931,7 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
 func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
 	// Don't defer unlock, as doing so would make it impossible for
 	// multiple clients to wait on the same container.
-	tg, _, err := l.threadGroupFromID(execID{cid: cid})
+	tg, err := l.threadGroupFromID(execID{cid: cid})
 	if err != nil {
 		return fmt.Errorf("can't wait for container %q: %v", cid, err)
 	}
@@ -855,7 +950,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
 
 	// Try to find a process that was exec'd
 	eid := execID{cid: cid, pid: tgid}
-	execTG, _, err := l.threadGroupFromID(eid)
+	execTG, err := l.threadGroupFromID(eid)
 	if err == nil {
 		ws := l.wait(execTG)
 		*waitStatus = ws
@@ -869,7 +964,7 @@ func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) e
 
 	// The caller may be waiting on a process not started directly via exec.
 	// In this case, find the process in the container's PID namespace.
-	initTG, _, err := l.threadGroupFromID(execID{cid: cid})
+	initTG, err := l.threadGroupFromID(execID{cid: cid})
 	if err != nil {
 		return fmt.Errorf("waiting for PID %d: %v", tgid, err)
 	}
@@ -905,48 +1000,92 @@ func (l *Loader) WaitExit() kernel.ExitStatus {
 	return l.k.GlobalInit().ExitStatus()
 }
 
-func newEmptyNetworkStack(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) {
+func newRootNetworkNamespace(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (*inet.Namespace, error) {
+	// Create an empty network stack because the network namespace may be empty at
+	// this point. Netns is configured before Run() is called. Netstack is
+	// configured using a control uRPC message. Host network is configured inside
+	// Run().
 	switch conf.Network {
 	case NetworkHost:
-		return hostinet.NewStack(), nil
+		// No network namespacing support for hostinet yet, hence creator is nil.
+		return inet.NewRootNamespace(hostinet.NewStack(), nil), nil
 
 	case NetworkNone, NetworkSandbox:
-		// NetworkNone sets up loopback using netstack.
-		netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()}
-		transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()}
-		s := netstack.Stack{stack.New(stack.Options{
-			NetworkProtocols:   netProtos,
-			TransportProtocols: transProtos,
-			Clock:              clock,
-			Stats:              netstack.Metrics,
-			HandleLocal:        true,
-			// Enable raw sockets for users with sufficient
-			// privileges.
-			RawFactory: raw.EndpointFactory{},
-			UniqueID:   uniqueID,
-		})}
-
-		// Enable SACK Recovery.
-		if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
-			return nil, fmt.Errorf("failed to enable SACK: %v", err)
+		s, err := newEmptySandboxNetworkStack(clock, uniqueID)
+		if err != nil {
+			return nil, err
+		}
+		creator := &sandboxNetstackCreator{
+			clock:    clock,
+			uniqueID: uniqueID,
 		}
+		return inet.NewRootNamespace(s, creator), nil
 
-		// Set default TTLs as required by socket/netstack.
-		s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
-		s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+	default:
+		panic(fmt.Sprintf("invalid network configuration: %v", conf.Network))
+	}
 
-		// Enable Receive Buffer Auto-Tuning.
-		if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
-			return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
-		}
+}
+
+func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) {
+	netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()}
+	transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()}
+	s := netstack.Stack{stack.New(stack.Options{
+		NetworkProtocols:   netProtos,
+		TransportProtocols: transProtos,
+		Clock:              clock,
+		Stats:              netstack.Metrics,
+		HandleLocal:        true,
+		// Enable raw sockets for users with sufficient
+		// privileges.
+		RawFactory: raw.EndpointFactory{},
+		UniqueID:   uniqueID,
+	})}
 
-		s.FillDefaultIPTables()
+	// Enable SACK Recovery.
+	if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
+		return nil, fmt.Errorf("failed to enable SACK: %v", err)
+	}
 
-		return &s, nil
+	// Set default TTLs as required by socket/netstack.
+	s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+	s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
 
-	default:
-		panic(fmt.Sprintf("invalid network configuration: %v", conf.Network))
+	// Enable Receive Buffer Auto-Tuning.
+	if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
+		return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
+	}
+
+	s.FillIPTablesMetadata()
+
+	return &s, nil
+}
+
+// sandboxNetstackCreator implements kernel.NetworkStackCreator.
+//
+// +stateify savable
+type sandboxNetstackCreator struct {
+	clock    tcpip.Clock
+	uniqueID stack.UniqueID
+}
+
+// CreateStack implements kernel.NetworkStackCreator.CreateStack.
+func (f *sandboxNetstackCreator) CreateStack() (inet.Stack, error) {
+	s, err := newEmptySandboxNetworkStack(f.clock, f.uniqueID)
+	if err != nil {
+		return nil, err
+	}
+
+	// Setup loopback.
+	n := &Network{Stack: s.(*netstack.Stack).Stack}
+	nicID := tcpip.NICID(f.uniqueID.UniqueID())
+	link := DefaultLoopbackLink
+	linkEP := loopback.New()
+	if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
+		return nil, err
 	}
+
+	return s, nil
 }
 
 // signal sends a signal to one or more processes in a container. If PID is 0,
@@ -976,8 +1115,7 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
 			return fmt.Errorf("PID (%d) cannot be set when signaling all processes", pid)
 		}
 		// Check that the container has actually started before signaling it.
-		_, _, err := l.threadGroupFromID(execID{cid: cid})
-		if err != nil {
+		if _, err := l.threadGroupFromID(execID{cid: cid}); err != nil {
 			return err
 		}
 		if err := l.signalAllProcesses(cid, signo); err != nil {
@@ -991,16 +1129,16 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
 }
 
 func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) error {
-	execTG, _, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
+	execTG, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
 	if err == nil {
 		// Send signal directly to the identified process.
-		return execTG.SendSignal(&arch.SignalInfo{Signo: signo})
+		return l.k.SendExternalSignalThreadGroup(execTG, &arch.SignalInfo{Signo: signo})
 	}
 
 	// The caller may be signaling a process not started directly via exec.
 	// In this case, find the process in the container's PID namespace and
 	// signal it.
-	initTG, _, err := l.threadGroupFromID(execID{cid: cid})
+	initTG, err := l.threadGroupFromID(execID{cid: cid})
 	if err != nil {
 		return fmt.Errorf("no thread group found: %v", err)
 	}
@@ -1011,25 +1149,43 @@ func (l *Loader) signalProcess(cid string, tgid kernel.ThreadID, signo int32) er
 	if tg.Leader().ContainerID() != cid {
 		return fmt.Errorf("process %d is part of a different container: %q", tgid, tg.Leader().ContainerID())
 	}
-	return tg.SendSignal(&arch.SignalInfo{Signo: signo})
+	return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
 }
 
+// signalForegrondProcessGroup looks up foreground process group from the TTY
+// for the given "tgid" inside container "cid", and send the signal to it.
 func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, signo int32) error {
-	// Lookup foreground process group from the TTY for the given process,
-	// and send the signal to it.
-	tg, tty, err := l.threadGroupFromID(execID{cid: cid, pid: tgid})
+	l.mu.Lock()
+	tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid, pid: tgid})
 	if err != nil {
+		l.mu.Unlock()
 		return fmt.Errorf("no thread group found: %v", err)
 	}
-	if tty == nil {
+	if tg == nil {
+		l.mu.Unlock()
+		return fmt.Errorf("container %q not started", cid)
+	}
+
+	tty, ttyVFS2, err := l.ttyFromIDLocked(execID{cid: cid, pid: tgid})
+	l.mu.Unlock()
+	if err != nil {
+		return fmt.Errorf("no thread group found: %v", err)
+	}
+
+	var pg *kernel.ProcessGroup
+	switch {
+	case ttyVFS2 != nil:
+		pg = ttyVFS2.ForegroundProcessGroup()
+	case tty != nil:
+		pg = tty.ForegroundProcessGroup()
+	default:
 		return fmt.Errorf("no TTY attached")
 	}
-	pg := tty.ForegroundProcessGroup()
 	if pg == nil {
 		// No foreground process group has been set. Signal the
 		// original thread group.
 		log.Warningf("No foreground process group for container %q and PID %d. Sending signal directly to PID %d.", cid, tgid, tgid)
-		return tg.SendSignal(&arch.SignalInfo{Signo: signo})
+		return l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo})
 	}
 	// Send the signal to all processes in the process group.
 	var lastErr error
@@ -1037,7 +1193,7 @@ func (l *Loader) signalForegrondProcessGroup(cid string, tgid kernel.ThreadID, s
 		if tg.ProcessGroup() != pg {
 			continue
 		}
-		if err := tg.SendSignal(&arch.SignalInfo{Signo: signo}); err != nil {
+		if err := l.k.SendExternalSignalThreadGroup(tg, &arch.SignalInfo{Signo: signo}); err != nil {
 			lastErr = err
 		}
 	}
@@ -1055,33 +1211,57 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
 	return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo})
 }
 
-// threadGroupFromID same as threadGroupFromIDLocked except that it acquires
-// mutex before calling it.
-func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) {
+// threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it
+// acquires mutex before calling it and fails in case container hasn't started
+// yet.
+func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, error) {
 	l.mu.Lock()
 	defer l.mu.Unlock()
-	tg, tty, ok, err := l.threadGroupFromIDLocked(key)
+	tg, err := l.tryThreadGroupFromIDLocked(key)
 	if err != nil {
-		return nil, nil, err
+		return nil, err
 	}
-	if !ok {
-		return nil, nil, fmt.Errorf("container %q not started", key.cid)
+	if tg == nil {
+		return nil, fmt.Errorf("container %q not started", key.cid)
 	}
-	return tg, tty, nil
+	return tg, nil
 }
 
-// threadGroupFromIDLocked returns the thread group and TTY for the given
-// execution ID. TTY may be nil if the process is not attached to a terminal.
-// Also returns a boolean indicating whether the container has already started.
-// Returns error if execution ID is invalid or if the container cannot be
-// found (maybe it has been deleted). Caller must hold 'mu'.
-func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) {
+// tryThreadGroupFromIDLocked returns the thread group for the given execution
+// ID. It may return nil in case the container has not started yet. Returns
+// error if execution ID is invalid or if the container cannot be found (maybe
+// it has been deleted). Caller must hold 'mu'.
+func (l *Loader) tryThreadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, error) {
 	ep := l.processes[key]
 	if ep == nil {
-		return nil, nil, false, fmt.Errorf("container %q not found", key.cid)
+		return nil, fmt.Errorf("container %q not found", key.cid)
 	}
-	if ep.tg == nil {
-		return nil, nil, false, nil
+	return ep.tg, nil
+}
+
+// ttyFromIDLocked returns the TTY files for the given execution ID. It may
+// return nil in case the container has not started yet. Returns error if
+// execution ID is invalid or if the container cannot be found (maybe it has
+// been deleted). Caller must hold 'mu'.
+func (l *Loader) ttyFromIDLocked(key execID) (*host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+	ep := l.processes[key]
+	if ep == nil {
+		return nil, nil, fmt.Errorf("container %q not found", key.cid)
+	}
+	return ep.tty, ep.ttyVFS2, nil
+}
+
+func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+	if len(stdioFDs) != 3 {
+		return nil, nil, nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs))
+	}
+
+	k := kernel.KernelFromContext(ctx)
+	fdTable := k.NewFDTable()
+	ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, console, stdioFDs)
+	if err != nil {
+		fdTable.DecRef()
+		return nil, nil, nil, err
 	}
-	return ep.tg, ep.tty, true, nil
+	return fdTable, ttyFile, ttyFileVFS2, nil
 }
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 44aa63196..e448fd773 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -24,11 +24,14 @@ import (
 	"time"
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/control/server"
+	"gvisor.dev/gvisor/pkg/fspath"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/sentry/contexttest"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/unet"
 	"gvisor.dev/gvisor/runsc/fsgofer"
@@ -100,20 +103,29 @@ func startGofer(root string) (int, func(), error) {
 	return sandboxEnd, cleanup, nil
 }
 
-func createLoader() (*Loader, func(), error) {
+func createLoader(vfsEnabled bool, spec *specs.Spec) (*Loader, func(), error) {
 	fd, err := server.CreateSocket(ControlSocketAddr(fmt.Sprintf("%010d", rand.Int())[:10]))
 	if err != nil {
 		return nil, nil, err
 	}
 	conf := testConfig()
-	spec := testSpec()
+	conf.VFS2 = vfsEnabled
 
 	sandEnd, cleanup, err := startGofer(spec.Root.Path)
 	if err != nil {
 		return nil, nil, err
 	}
 
-	stdio := []int{int(os.Stdin.Fd()), int(os.Stdout.Fd()), int(os.Stderr.Fd())}
+	// Loader takes ownership of stdio.
+	var stdio []int
+	for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} {
+		newFd, err := unix.Dup(int(f.Fd()))
+		if err != nil {
+			return nil, nil, err
+		}
+		stdio = append(stdio, newFd)
+	}
+
 	args := Args{
 		ID:           "foo",
 		Spec:         spec,
@@ -132,10 +144,20 @@ func createLoader() (*Loader, func(), error) {
 
 // TestRun runs a simple application in a sandbox and checks that it succeeds.
 func TestRun(t *testing.T) {
-	l, cleanup, err := createLoader()
+	doRun(t, false)
+}
+
+// TestRunVFS2 runs TestRun in VFSv2.
+func TestRunVFS2(t *testing.T) {
+	doRun(t, true)
+}
+
+func doRun(t *testing.T, vfsEnabled bool) {
+	l, cleanup, err := createLoader(vfsEnabled, testSpec())
 	if err != nil {
 		t.Fatalf("error creating loader: %v", err)
 	}
+
 	defer l.Destroy()
 	defer cleanup()
 
@@ -169,7 +191,16 @@ func TestRun(t *testing.T) {
 // TestStartSignal tests that the controller Start message will cause
 // WaitForStartSignal to return.
 func TestStartSignal(t *testing.T) {
-	l, cleanup, err := createLoader()
+	doStartSignal(t, false)
+}
+
+// TestStartSignalVFS2 does TestStartSignal with VFS2.
+func TestStartSignalVFS2(t *testing.T) {
+	doStartSignal(t, true)
+}
+
+func doStartSignal(t *testing.T, vfsEnabled bool) {
+	l, cleanup, err := createLoader(vfsEnabled, testSpec())
 	if err != nil {
 		t.Fatalf("error creating loader: %v", err)
 	}
@@ -217,18 +248,19 @@ func TestStartSignal(t *testing.T) {
 
 }
 
-// Test that MountNamespace can be created with various specs.
-func TestCreateMountNamespace(t *testing.T) {
-	testCases := []struct {
-		name string
-		// Spec that will be used to create the mount manager.  Note
-		// that we can't mount procfs without a kernel, so each spec
-		// MUST contain something other than procfs mounted at /proc.
-		spec specs.Spec
-		// Paths that are expected to exist in the resulting fs.
-		expectedPaths []string
-	}{
-		{
+type CreateMountTestcase struct {
+	name string
+	// Spec that will be used to create the mount manager.  Note
+	// that we can't mount procfs without a kernel, so each spec
+	// MUST contain something other than procfs mounted at /proc.
+	spec specs.Spec
+	// Paths that are expected to exist in the resulting fs.
+	expectedPaths []string
+}
+
+func createMountTestcases(vfs2 bool) []*CreateMountTestcase {
+	testCases := []*CreateMountTestcase{
+		&CreateMountTestcase{
 			// Only proc.
 			name: "only proc mount",
 			spec: specs.Spec{
@@ -270,7 +302,7 @@ func TestCreateMountNamespace(t *testing.T) {
 			// /dev, and /sys.
 			expectedPaths: []string{"/some/very/very/deep/path", "/proc", "/dev", "/sys"},
 		},
-		{
+		&CreateMountTestcase{
 			// Mounts are nested inside each other.
 			name: "nested mounts",
 			spec: specs.Spec{
@@ -314,7 +346,7 @@ func TestCreateMountNamespace(t *testing.T) {
 			expectedPaths: []string{"/foo", "/foo/bar", "/foo/bar/baz", "/foo/qux",
 				"/foo/qux-quz", "/foo/some/very/very/deep/path", "/proc", "/dev", "/sys"},
 		},
-		{
+		&CreateMountTestcase{
 			name: "mount inside /dev",
 			spec: specs.Spec{
 				Root: &specs.Root{
@@ -357,40 +389,46 @@ func TestCreateMountNamespace(t *testing.T) {
 			},
 			expectedPaths: []string{"/proc", "/dev", "/dev/fd-foo", "/dev/foo", "/dev/bar", "/sys"},
 		},
-		{
-			name: "mounts inside mandatory mounts",
-			spec: specs.Spec{
-				Root: &specs.Root{
-					Path:     os.TempDir(),
-					Readonly: true,
+	}
+
+	vfsCase := &CreateMountTestcase{
+		name: "mounts inside mandatory mounts",
+		spec: specs.Spec{
+			Root: &specs.Root{
+				Path:     os.TempDir(),
+				Readonly: true,
+			},
+			Mounts: []specs.Mount{
+				{
+					Destination: "/proc",
+					Type:        "tmpfs",
 				},
-				Mounts: []specs.Mount{
-					{
-						Destination: "/proc",
-						Type:        "tmpfs",
-					},
-					// We don't include /sys, and /tmp in
-					// the spec, since they will be added
-					// automatically.
-					//
-					// Instead, add submounts inside these
-					// directories and make sure they are
-					// visible under the mandatory mounts.
-					{
-						Destination: "/sys/bar",
-						Type:        "tmpfs",
-					},
-					{
-						Destination: "/tmp/baz",
-						Type:        "tmpfs",
-					},
+				// TODO (gvisor.dev/issue/1487): Re-add this case when sysfs supports
+				//  MkDirAt in VFS2 (and remove the reduntant append).
+				// {
+				//		Destination: "/sys/bar",
+				//		Type:        "tmpfs",
+				//	},
+				//
+				{
+					Destination: "/tmp/baz",
+					Type:        "tmpfs",
 				},
 			},
-			expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"},
 		},
+		expectedPaths: []string{"/proc", "/sys" /* "/sys/bar" ,*/, "/tmp", "/tmp/baz"},
 	}
 
-	for _, tc := range testCases {
+	if !vfs2 {
+		vfsCase.spec.Mounts = append(vfsCase.spec.Mounts, specs.Mount{Destination: "/sys/bar", Type: "tmpfs"})
+		vfsCase.expectedPaths = append(vfsCase.expectedPaths, "/sys/bar")
+	}
+	return append(testCases, vfsCase)
+}
+
+// Test that MountNamespace can be created with various specs.
+func TestCreateMountNamespace(t *testing.T) {
+	for _, tc := range createMountTestcases(false /* vfs2 */) {
 		t.Run(tc.name, func(t *testing.T) {
 			conf := testConfig()
 			ctx := contexttest.Context(t)
@@ -425,6 +463,52 @@ func TestCreateMountNamespace(t *testing.T) {
 	}
 }
 
+// Test that MountNamespace can be created with various specs.
+func TestCreateMountNamespaceVFS2(t *testing.T) {
+	for _, tc := range createMountTestcases(true /* vfs2 */) {
+		t.Run(tc.name, func(t *testing.T) {
+			spec := testSpec()
+			spec.Mounts = tc.spec.Mounts
+			spec.Root = tc.spec.Root
+
+			t.Logf("Using root: %q", spec.Root.Path)
+			l, loaderCleanup, err := createLoader(true /* VFS2 Enabled */, spec)
+			if err != nil {
+				t.Fatalf("failed to create loader: %v", err)
+			}
+			defer l.Destroy()
+			defer loaderCleanup()
+
+			mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints)
+			if err := mntr.processHints(l.conf); err != nil {
+				t.Fatalf("failed process hints: %v", err)
+			}
+
+			ctx := l.k.SupervisorContext()
+			mns, err := mntr.setupVFS2(ctx, l.conf, &l.rootProcArgs)
+			if err != nil {
+				t.Fatalf("failed to setupVFS2: %v", err)
+			}
+
+			root := mns.Root()
+			defer root.DecRef()
+			for _, p := range tc.expectedPaths {
+				target := &vfs.PathOperation{
+					Root:  root,
+					Start: root,
+					Path:  fspath.Parse(p),
+				}
+
+				if d, err := l.k.VFS().GetDentryAt(ctx, l.rootProcArgs.Credentials, target, &vfs.GetDentryOptions{}); err != nil {
+					t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err)
+				} else {
+					d.DecRef()
+				}
+			}
+		})
+	}
+}
+
 // TestRestoreEnvironment tests that the correct mounts are collected from the spec and config
 // in order to build the environment for restoring.
 func TestRestoreEnvironment(t *testing.T) {
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 6a8765ec8..0af30456e 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -17,12 +17,15 @@ package boot
 import (
 	"fmt"
 	"net"
+	"runtime"
+	"strings"
 	"syscall"
 
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
 	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+	"gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo"
 	"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
 	"gvisor.dev/gvisor/pkg/tcpip/network/arp"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -31,6 +34,32 @@ import (
 	"gvisor.dev/gvisor/pkg/urpc"
 )
 
+var (
+	// DefaultLoopbackLink contains IP addresses and routes of "127.0.0.1/8" and
+	// "::1/8" on "lo" interface.
+	DefaultLoopbackLink = LoopbackLink{
+		Name: "lo",
+		Addresses: []net.IP{
+			net.IP("\x7f\x00\x00\x01"),
+			net.IPv6loopback,
+		},
+		Routes: []Route{
+			{
+				Destination: net.IPNet{
+					IP:   net.IPv4(0x7f, 0, 0, 0),
+					Mask: net.IPv4Mask(0xff, 0, 0, 0),
+				},
+			},
+			{
+				Destination: net.IPNet{
+					IP:   net.IPv6loopback,
+					Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)),
+				},
+			},
+		},
+	}
+)
+
 // Network exposes methods that can be used to configure a network stack.
 type Network struct {
 	Stack *stack.Stack
@@ -48,6 +77,44 @@ type DefaultRoute struct {
 	Name  string
 }
 
+// QueueingDiscipline is used to specify the kind of Queueing Discipline to
+// apply for a give FDBasedLink.
+type QueueingDiscipline int
+
+const (
+	// QDiscNone disables any queueing for the underlying FD.
+	QDiscNone QueueingDiscipline = iota
+
+	// QDiscFIFO applies a simple fifo based queue to the underlying
+	// FD.
+	QDiscFIFO
+)
+
+// MakeQueueingDiscipline if possible the equivalent QueuingDiscipline for s
+// else returns an error.
+func MakeQueueingDiscipline(s string) (QueueingDiscipline, error) {
+	switch s {
+	case "none":
+		return QDiscNone, nil
+	case "fifo":
+		return QDiscFIFO, nil
+	default:
+		return 0, fmt.Errorf("unsupported qdisc specified: %q", s)
+	}
+}
+
+// String implements fmt.Stringer.
+func (q QueueingDiscipline) String() string {
+	switch q {
+	case QDiscNone:
+		return "none"
+	case QDiscFIFO:
+		return "fifo"
+	default:
+		panic(fmt.Sprintf("Invalid queueing discipline: %d", q))
+	}
+}
+
 // FDBasedLink configures an fd-based link.
 type FDBasedLink struct {
 	Name               string
@@ -57,6 +124,7 @@ type FDBasedLink struct {
 	GSOMaxSize         uint32
 	SoftwareGSOEnabled bool
 	LinkAddress        net.HardwareAddr
+	QDisc              QueueingDiscipline
 
 	// NumChannels controls how many underlying FD's are to be used to
 	// create this endpoint.
@@ -158,6 +226,8 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
 		}
 
 		mac := tcpip.LinkAddress(link.LinkAddress)
+		log.Infof("gso max size is: %d", link.GSOMaxSize)
+
 		linkEP, err := fdbased.New(&fdbased.Options{
 			FDs:                FDs,
 			MTU:                uint32(link.MTU),
@@ -172,6 +242,13 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
 			return err
 		}
 
+		switch link.QDisc {
+		case QDiscNone:
+		case QDiscFIFO:
+			log.Infof("Enabling FIFO QDisc on %q", link.Name)
+			linkEP = fifo.New(linkEP, runtime.GOMAXPROCS(0), 1000)
+		}
+
 		log.Infof("Enabling interface %q with id %d on addresses %+v (%v) w/ %d channels", link.Name, nicID, link.Addresses, mac, link.NumChannels)
 		if err := n.createNICWithAddrs(nicID, link.Name, linkEP, link.Addresses); err != nil {
 			return err
diff --git a/runsc/boot/pprof/BUILD b/runsc/boot/pprof/BUILD
new file mode 100644
index 000000000..29cb42b2f
--- /dev/null
+++ b/runsc/boot/pprof/BUILD
@@ -0,0 +1,11 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "pprof",
+    srcs = ["pprof.go"],
+    visibility = [
+        "//runsc:__subpackages__",
+    ],
+)
diff --git a/runsc/boot/pprof.go b/runsc/boot/pprof/pprof.go
index 463362f02..1ded20dee 100644
--- a/runsc/boot/pprof.go
+++ b/runsc/boot/pprof/pprof.go
@@ -12,7 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package boot
+// Package pprof provides a stub to initialize custom profilers.
+package pprof
 
-func initializePProf() {
+// Initialize will be called at boot for initializing custom profilers.
+func Initialize() {
 }
diff --git a/runsc/boot/user.go b/runsc/boot/user.go
deleted file mode 100644
index f0aa52135..000000000
--- a/runsc/boot/user.go
+++ /dev/null
@@ -1,170 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package boot
-
-import (
-	"bufio"
-	"fmt"
-	"io"
-	"strconv"
-	"strings"
-
-	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/sentry/fs"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-	"gvisor.dev/gvisor/pkg/usermem"
-)
-
-type fileReader struct {
-	// Ctx is the context for the file reader.
-	Ctx context.Context
-
-	// File is the file to read from.
-	File *fs.File
-}
-
-// Read implements io.Reader.Read.
-func (r *fileReader) Read(buf []byte) (int, error) {
-	n, err := r.File.Readv(r.Ctx, usermem.BytesIOSequence(buf))
-	return int(n), err
-}
-
-// getExecUserHome returns the home directory of the executing user read from
-// /etc/passwd as read from the container filesystem.
-func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) {
-	// The default user home directory to return if no user matching the user
-	// if found in the /etc/passwd found in the image.
-	const defaultHome = "/"
-
-	// Open the /etc/passwd file from the dirent via the root mount namespace.
-	mnsRoot := rootMns.Root()
-	maxTraversals := uint(linux.MaxSymlinkTraversals)
-	dirent, err := rootMns.FindInode(ctx, mnsRoot, nil, "/etc/passwd", &maxTraversals)
-	if err != nil {
-		// NOTE: Ignore errors opening the passwd file. If the passwd file
-		// doesn't exist we will return the default home directory.
-		return defaultHome, nil
-	}
-	defer dirent.DecRef()
-
-	// Check read permissions on the file.
-	if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Read: true}); err != nil {
-		// NOTE: Ignore permissions errors here and return default root dir.
-		return defaultHome, nil
-	}
-
-	// Only open regular files. We don't open other files like named pipes as
-	// they may block and might present some attack surface to the container.
-	// Note that runc does not seem to do this kind of checking.
-	if !fs.IsRegular(dirent.Inode.StableAttr) {
-		return defaultHome, nil
-	}
-
-	f, err := dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true, Directory: false})
-	if err != nil {
-		return "", err
-	}
-	defer f.DecRef()
-
-	r := &fileReader{
-		Ctx:  ctx,
-		File: f,
-	}
-
-	homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome)
-	if err != nil {
-		return "", err
-	}
-
-	return homeDir, nil
-}
-
-// maybeAddExecUserHome returns a new slice with the HOME enviroment variable
-// set if the slice does not already contain it, otherwise it returns the
-// original slice unmodified.
-func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) {
-	// Check if the envv already contains HOME.
-	for _, env := range envv {
-		if strings.HasPrefix(env, "HOME=") {
-			// We have it. Return the original slice unmodified.
-			return envv, nil
-		}
-	}
-
-	// Read /etc/passwd for the user's HOME directory and set the HOME
-	// environment variable as required by POSIX if it is not overridden by
-	// the user.
-	homeDir, err := getExecUserHome(ctx, mns, uid)
-	if err != nil {
-		return nil, fmt.Errorf("error reading exec user: %v", err)
-	}
-	return append(envv, "HOME="+homeDir), nil
-}
-
-// findHomeInPasswd parses a passwd file and returns the given user's home
-// directory. This function does it's best to replicate the runc's behavior.
-func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) {
-	s := bufio.NewScanner(passwd)
-
-	for s.Scan() {
-		if err := s.Err(); err != nil {
-			return "", err
-		}
-
-		line := strings.TrimSpace(s.Text())
-		if line == "" {
-			continue
-		}
-
-		// Pull out part of passwd entry. Loosely parse the passwd entry as some
-		// passwd files could be poorly written and for compatibility with runc.
-		//
-		// Per 'man 5 passwd'
-		// /etc/passwd contains one line for each user account, with seven
-		// fields delimited by colons (“:”). These fields are:
-		//
-		// - login name
-		// - optional encrypted password
-		// - numerical user ID
-		// - numerical group ID
-		// - user name or comment field
-		// - user home directory
-		// - optional user command interpreter
-		parts := strings.Split(line, ":")
-
-		found := false
-		homeDir := ""
-		for i, p := range parts {
-			switch i {
-			case 2:
-				parsedUID, err := strconv.ParseUint(p, 10, 32)
-				if err == nil && parsedUID == uint64(uid) {
-					found = true
-				}
-			case 5:
-				homeDir = p
-			}
-		}
-		if found {
-			// NOTE: If the uid is present but the home directory is not
-			// present in the /etc/passwd entry we return an empty string. This
-			// is, for better or worse, what runc does.
-			return homeDir, nil
-		}
-	}
-
-	return defaultHome, nil
-}
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
deleted file mode 100644
index fb4e13dfb..000000000
--- a/runsc/boot/user_test.go
+++ /dev/null
@@ -1,254 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package boot
-
-import (
-	"io/ioutil"
-	"os"
-	"path/filepath"
-	"strings"
-	"syscall"
-	"testing"
-
-	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"gvisor.dev/gvisor/pkg/sentry/contexttest"
-	"gvisor.dev/gvisor/pkg/sentry/fs"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-)
-
-func setupTempDir() (string, error) {
-	tmpDir, err := ioutil.TempDir(os.TempDir(), "exec-user-test")
-	if err != nil {
-		return "", err
-	}
-	return tmpDir, nil
-}
-
-func setupPasswd(contents string, perms os.FileMode) func() (string, error) {
-	return func() (string, error) {
-		tmpDir, err := setupTempDir()
-		if err != nil {
-			return "", err
-		}
-
-		if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
-			return "", err
-		}
-
-		f, err := os.Create(filepath.Join(tmpDir, "etc", "passwd"))
-		if err != nil {
-			return "", err
-		}
-		defer f.Close()
-
-		_, err = f.WriteString(contents)
-		if err != nil {
-			return "", err
-		}
-
-		err = f.Chmod(perms)
-		if err != nil {
-			return "", err
-		}
-		return tmpDir, nil
-	}
-}
-
-// TestGetExecUserHome tests the getExecUserHome function.
-func TestGetExecUserHome(t *testing.T) {
-	tests := map[string]struct {
-		uid        auth.KUID
-		createRoot func() (string, error)
-		expected   string
-	}{
-		"success": {
-			uid:        1000,
-			createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0666),
-			expected:   "/home/adin",
-		},
-		"no_passwd": {
-			uid:        1000,
-			createRoot: setupTempDir,
-			expected:   "/",
-		},
-		"no_perms": {
-			uid:        1000,
-			createRoot: setupPasswd("adin::1000:1111::/home/adin:/bin/sh", 0000),
-			expected:   "/",
-		},
-		"directory": {
-			uid: 1000,
-			createRoot: func() (string, error) {
-				tmpDir, err := setupTempDir()
-				if err != nil {
-					return "", err
-				}
-
-				if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
-					return "", err
-				}
-
-				if err := syscall.Mkdir(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
-					return "", err
-				}
-
-				return tmpDir, nil
-			},
-			expected: "/",
-		},
-		// Currently we don't allow named pipes.
-		"named_pipe": {
-			uid: 1000,
-			createRoot: func() (string, error) {
-				tmpDir, err := setupTempDir()
-				if err != nil {
-					return "", err
-				}
-
-				if err := os.Mkdir(filepath.Join(tmpDir, "etc"), 0777); err != nil {
-					return "", err
-				}
-
-				if err := syscall.Mkfifo(filepath.Join(tmpDir, "etc", "passwd"), 0666); err != nil {
-					return "", err
-				}
-
-				return tmpDir, nil
-			},
-			expected: "/",
-		},
-	}
-
-	for name, tc := range tests {
-		t.Run(name, func(t *testing.T) {
-			tmpDir, err := tc.createRoot()
-			if err != nil {
-				t.Fatalf("failed to create root dir: %v", err)
-			}
-
-			sandEnd, cleanup, err := startGofer(tmpDir)
-			if err != nil {
-				t.Fatalf("failed to create gofer: %v", err)
-			}
-			defer cleanup()
-
-			ctx := contexttest.Context(t)
-			conf := &Config{
-				RootDir:        "unused_root_dir",
-				Network:        NetworkNone,
-				DisableSeccomp: true,
-			}
-
-			spec := &specs.Spec{
-				Root: &specs.Root{
-					Path:     tmpDir,
-					Readonly: true,
-				},
-				// Add /proc mount as tmpfs to avoid needing a kernel.
-				Mounts: []specs.Mount{
-					{
-						Destination: "/proc",
-						Type:        "tmpfs",
-					},
-				},
-			}
-
-			mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{})
-			mns, err := mntr.createMountNamespace(ctx, conf)
-			if err != nil {
-				t.Fatalf("failed to create mount namespace: %v", err)
-			}
-			ctx = fs.WithRoot(ctx, mns.Root())
-			if err := mntr.mountSubmounts(ctx, conf, mns); err != nil {
-				t.Fatalf("failed to create mount namespace: %v", err)
-			}
-
-			got, err := getExecUserHome(ctx, mns, tc.uid)
-			if err != nil {
-				t.Fatalf("failed to get user home: %v", err)
-			}
-
-			if got != tc.expected {
-				t.Fatalf("expected %v, got: %v", tc.expected, got)
-			}
-		})
-	}
-}
-
-// TestFindHomeInPasswd tests the findHomeInPasswd function's passwd file parsing.
-func TestFindHomeInPasswd(t *testing.T) {
-	tests := map[string]struct {
-		uid      uint32
-		passwd   string
-		expected string
-		def      string
-	}{
-		"empty": {
-			uid:      1000,
-			passwd:   "",
-			expected: "/",
-			def:      "/",
-		},
-		"whitespace": {
-			uid:      1000,
-			passwd:   "       ",
-			expected: "/",
-			def:      "/",
-		},
-		"full": {
-			uid:      1000,
-			passwd:   "adin::1000:1111::/home/adin:/bin/sh",
-			expected: "/home/adin",
-			def:      "/",
-		},
-		// For better or worse, this is how runc works.
-		"partial": {
-			uid:      1000,
-			passwd:   "adin::1000:1111:",
-			expected: "",
-			def:      "/",
-		},
-		"multiple": {
-			uid:      1001,
-			passwd:   "adin::1000:1111::/home/adin:/bin/sh\nian::1001:1111::/home/ian:/bin/sh",
-			expected: "/home/ian",
-			def:      "/",
-		},
-		"duplicate": {
-			uid:      1000,
-			passwd:   "adin::1000:1111::/home/adin:/bin/sh\nian::1000:1111::/home/ian:/bin/sh",
-			expected: "/home/adin",
-			def:      "/",
-		},
-		"empty_lines": {
-			uid:      1001,
-			passwd:   "adin::1000:1111::/home/adin:/bin/sh\n\n\nian::1001:1111::/home/ian:/bin/sh",
-			expected: "/home/ian",
-			def:      "/",
-		},
-	}
-
-	for name, tc := range tests {
-		t.Run(name, func(t *testing.T) {
-			got, err := findHomeInPasswd(tc.uid, strings.NewReader(tc.passwd), tc.def)
-			if err != nil {
-				t.Fatalf("error parsing passwd: %v", err)
-			}
-			if tc.expected != got {
-				t.Fatalf("expected %v, got: %v", tc.expected, got)
-			}
-		})
-	}
-}
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
new file mode 100644
index 000000000..7ed6801b4
--- /dev/null
+++ b/runsc/boot/vfs.go
@@ -0,0 +1,375 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+	"fmt"
+	"path"
+	"sort"
+	"strings"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/fspath"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/devices/memdev"
+	"gvisor.dev/gvisor/pkg/sentry/fs/user"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error {
+	vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserList: true,
+		// TODO(b/29356795): Users may mount this once the terminals are in a
+		//  usable state.
+		AllowUserMount: false,
+	})
+	vfsObj.MustRegisterFilesystemType(devtmpfs.Name, &devtmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserMount: true,
+		AllowUserList:  true,
+	})
+	vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserList: true,
+	})
+	vfsObj.MustRegisterFilesystemType(proc.Name, &proc.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserMount: true,
+		AllowUserList:  true,
+	})
+	vfsObj.MustRegisterFilesystemType(sys.Name, &sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserMount: true,
+		AllowUserList:  true,
+	})
+	vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserMount: true,
+		AllowUserList:  true,
+	})
+
+	// Setup files in devtmpfs.
+	if err := memdev.Register(vfsObj); err != nil {
+		return fmt.Errorf("registering memdev: %w", err)
+	}
+	a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name)
+	if err != nil {
+		return fmt.Errorf("creating devtmpfs accessor: %w", err)
+	}
+	defer a.Release()
+
+	if err := a.UserspaceInit(ctx); err != nil {
+		return fmt.Errorf("initializing userspace: %w", err)
+	}
+	if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil {
+		return fmt.Errorf("creating devtmpfs files: %w", err)
+	}
+	return nil
+}
+
+func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error {
+	if err := mntr.k.VFS().Init(); err != nil {
+		return fmt.Errorf("failed to initialize VFS: %w", err)
+	}
+	mns, err := mntr.setupVFS2(ctx, conf, procArgs)
+	if err != nil {
+		return fmt.Errorf("failed to setupFS: %w", err)
+	}
+	procArgs.MountNamespaceVFS2 = mns
+
+	// Resolve the executable path from working dir and environment.
+	f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0])
+	if err != nil {
+		return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err)
+	}
+	procArgs.Filename = f
+	return nil
+}
+
+func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
+	log.Infof("Configuring container's file system with VFS2")
+
+	// Create context with root credentials to mount the filesystem (the current
+	// user may not be privileged enough).
+	rootCreds := auth.NewRootCredentials(procArgs.Credentials.UserNamespace)
+	rootProcArgs := *procArgs
+	rootProcArgs.WorkingDirectory = "/"
+	rootProcArgs.Credentials = rootCreds
+	rootProcArgs.Umask = 0022
+	rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals
+	rootCtx := procArgs.NewContext(c.k)
+
+	if err := registerFilesystems(rootCtx, c.k.VFS(), rootCreds); err != nil {
+		return nil, fmt.Errorf("register filesystems: %w", err)
+	}
+
+	mns, err := c.createMountNamespaceVFS2(rootCtx, conf, rootCreds)
+	if err != nil {
+		return nil, fmt.Errorf("creating mount namespace: %w", err)
+	}
+	rootProcArgs.MountNamespaceVFS2 = mns
+
+	// Mount submounts.
+	if err := c.mountSubmountsVFS2(rootCtx, conf, mns, rootCreds); err != nil {
+		return nil, fmt.Errorf("mounting submounts vfs2: %w", err)
+	}
+	return mns, nil
+}
+
+func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
+	fd := c.fds.remove()
+	opts := strings.Join(p9MountData(fd, conf.FileAccess, true /* vfs2 */), ",")
+
+	log.Infof("Mounting root over 9P, ioFD: %d", fd)
+	mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts})
+	if err != nil {
+		return nil, fmt.Errorf("setting up mount namespace: %w", err)
+	}
+	return mns, nil
+}
+
+func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error {
+	mounts, err := c.prepareMountsVFS2()
+	if err != nil {
+		return err
+	}
+
+	for i := range mounts {
+		submount := &mounts[i]
+		log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options)
+		if err := c.mountSubmountVFS2(ctx, conf, mns, creds, submount); err != nil {
+			return err
+		}
+	}
+
+	if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil {
+		return fmt.Errorf(`mount submount "\tmp": %w`, err)
+	}
+	return nil
+}
+
+type mountAndFD struct {
+	specs.Mount
+	fd int
+}
+
+func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
+	// Associate bind mounts with their FDs before sorting since there is an
+	// undocumented assumption that FDs are dispensed in the order in which
+	// they are required by mounts.
+	var mounts []mountAndFD
+	for _, m := range c.mounts {
+		fd := -1
+		// Only bind mounts use host FDs; see
+		// containerMounter.getMountNameAndOptionsVFS2.
+		if m.Type == bind {
+			fd = c.fds.remove()
+		}
+		mounts = append(mounts, mountAndFD{
+			Mount: m,
+			fd:    fd,
+		})
+	}
+	if err := c.checkDispenser(); err != nil {
+		return nil, err
+	}
+
+	// Sort the mounts so that we don't place children before parents.
+	sort.Slice(mounts, func(i, j int) bool {
+		return len(mounts[i].Destination) < len(mounts[j].Destination)
+	})
+
+	return mounts, nil
+}
+
+func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error {
+	root := mns.Root()
+	defer root.DecRef()
+	target := &vfs.PathOperation{
+		Root:  root,
+		Start: root,
+		Path:  fspath.Parse(submount.Destination),
+	}
+	fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount)
+	if err != nil {
+		return fmt.Errorf("mountOptions failed: %w", err)
+	}
+	if len(fsName) == 0 {
+		// Filesystem is not supported (e.g. cgroup), just skip it.
+		return nil
+	}
+
+	if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil {
+		return err
+	}
+	if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil {
+		return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
+	}
+	log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts.GetFilesystemOptions.Data)
+	return nil
+}
+
+// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
+// used for mounts.
+func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, *vfs.MountOptions, error) {
+	var (
+		fsName string
+		data   []string
+	)
+
+	// Find filesystem name and FS specific data field.
+	switch m.Type {
+	case devpts.Name, devtmpfs.Name, proc.Name, sys.Name:
+		fsName = m.Type
+	case nonefs:
+		fsName = sys.Name
+	case tmpfs.Name:
+		fsName = m.Type
+
+		var err error
+		data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
+		if err != nil {
+			return "", nil, err
+		}
+
+	case bind:
+		fsName = gofer.Name
+		data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
+
+	default:
+		log.Warningf("ignoring unknown filesystem type %q", m.Type)
+	}
+
+	opts := &vfs.MountOptions{
+		GetFilesystemOptions: vfs.GetFilesystemOptions{
+			Data: strings.Join(data, ","),
+		},
+		InternalMount: true,
+	}
+
+	for _, o := range m.Options {
+		switch o {
+		case "rw":
+			opts.ReadOnly = false
+		case "ro":
+			opts.ReadOnly = true
+		case "noatime":
+			opts.Flags.NoATime = true
+		case "noexec":
+			opts.Flags.NoExec = true
+		default:
+			log.Warningf("ignoring unknown mount option %q", o)
+		}
+	}
+
+	if conf.Overlay {
+		// All writes go to upper, be paranoid and make lower readonly.
+		opts.ReadOnly = true
+	}
+	return fsName, opts, nil
+}
+
+func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error {
+	target := &vfs.PathOperation{
+		Root:  root,
+		Start: root,
+		Path:  fspath.Parse(currentPath),
+	}
+	_, err := c.k.VFS().StatAt(ctx, creds, target, &vfs.StatOptions{})
+	if err == nil {
+		// Mount point exists, nothing else to do.
+		return nil
+	}
+	if err != syserror.ENOENT {
+		return fmt.Errorf("stat failed for %q during mount point creation: %w", currentPath, err)
+	}
+
+	// Recurse to ensure parent is created and then create the mount point.
+	if err := c.makeSyntheticMount(ctx, path.Dir(currentPath), root, creds); err != nil {
+		return err
+	}
+	log.Debugf("Creating dir %q for mount point", currentPath)
+	mkdirOpts := &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}
+	if err := c.k.VFS().MkdirAt(ctx, creds, target, mkdirOpts); err != nil {
+		return fmt.Errorf("failed to create directory %q for mount: %w", currentPath, err)
+	}
+	return nil
+}
+
+// mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so.
+// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
+// the host /tmp, but this is a nice optimization, and fixes some apps that call
+// mknod in /tmp. It's unsafe to mount tmpfs if:
+//   1. /tmp is mounted explicitly: we should not override user's wish
+//   2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp
+//
+// Note that when there are submounts inside of '/tmp', directories for the
+// mount points must be present, making '/tmp' not empty anymore.
+func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds *auth.Credentials, mns *vfs.MountNamespace) error {
+	for _, m := range c.mounts {
+		// m.Destination has been cleaned, so it's to use equality here.
+		if m.Destination == "/tmp" {
+			log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m)
+			return nil
+		}
+	}
+
+	root := mns.Root()
+	defer root.DecRef()
+	pop := vfs.PathOperation{
+		Root:  root,
+		Start: root,
+		Path:  fspath.Parse("/tmp"),
+	}
+	// TODO(gvisor.dev/issue/2782): Use O_PATH when available.
+	statx, err := c.k.VFS().StatAt(ctx, creds, &pop, &vfs.StatOptions{})
+	switch err {
+	case nil:
+		// Found '/tmp' in filesystem, check if it's empty.
+		if linux.FileMode(statx.Mode).FileType() != linux.ModeDirectory {
+			// Not a dir?! Leave it be.
+			return nil
+		}
+		if statx.Nlink > 2 {
+			// If more than "." and ".." is found, skip internal tmpfs to prevent
+			// hiding existing files.
+			log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`)
+			return nil
+		}
+		log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`)
+		fallthrough
+
+	case syserror.ENOENT:
+		// No '/tmp' found (or fallthrough from above). It's safe to mount internal
+		// tmpfs.
+		tmpMount := specs.Mount{
+			Type:        tmpfs.Name,
+			Destination: "/tmp",
+			// Sticky bit is added to prevent accidental deletion of files from
+			// another user. This is normally done for /tmp.
+			Options: []string{"mode=01777"},
+		}
+		return c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount})
+
+	default:
+		return fmt.Errorf(`stating "/tmp" inside container: %w`, err)
+	}
+}
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD
index d4c7bdfbb..c087e1a3c 100644
--- a/runsc/cgroup/BUILD
+++ b/runsc/cgroup/BUILD
@@ -7,8 +7,8 @@ go_library(
     srcs = ["cgroup.go"],
     visibility = ["//:sandbox"],
     deps = [
+        "//pkg/cleanup",
         "//pkg/log",
-        "//runsc/specutils",
         "@com_github_cenkalti_backoff//:go_default_library",
         "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
     ],
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 653ca5f52..ef01820ef 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -19,6 +19,7 @@ package cgroup
 import (
 	"bufio"
 	"context"
+	"errors"
 	"fmt"
 	"io/ioutil"
 	"os"
@@ -30,29 +31,31 @@ import (
 
 	"github.com/cenkalti/backoff"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.dev/gvisor/pkg/cleanup"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/runsc/specutils"
 )
 
 const (
 	cgroupRoot = "/sys/fs/cgroup"
 )
 
-var controllers = map[string]controller{
-	"blkio":    &blockIO{},
-	"cpu":      &cpu{},
-	"cpuset":   &cpuSet{},
-	"memory":   &memory{},
-	"net_cls":  &networkClass{},
-	"net_prio": &networkPrio{},
+var controllers = map[string]config{
+	"blkio":    config{ctrlr: &blockIO{}},
+	"cpu":      config{ctrlr: &cpu{}},
+	"cpuset":   config{ctrlr: &cpuSet{}},
+	"memory":   config{ctrlr: &memory{}},
+	"net_cls":  config{ctrlr: &networkClass{}},
+	"net_prio": config{ctrlr: &networkPrio{}},
+	"pids":     config{ctrlr: &pids{}},
 
 	// These controllers either don't have anything in the OCI spec or is
-	// irrevalant for a sandbox, e.g. pids.
-	"devices":    &noop{},
-	"freezer":    &noop{},
-	"perf_event": &noop{},
-	"pids":       &noop{},
-	"systemd":    &noop{},
+	// irrelevant for a sandbox.
+	"devices":    config{ctrlr: &noop{}},
+	"freezer":    config{ctrlr: &noop{}},
+	"hugetlb":    config{ctrlr: &noop{}, optional: true},
+	"perf_event": config{ctrlr: &noop{}},
+	"rdma":       config{ctrlr: &noop{}, optional: true},
+	"systemd":    config{ctrlr: &noop{}},
 }
 
 func setOptionalValueInt(path, name string, val *int64) error {
@@ -196,8 +199,9 @@ func LoadPaths(pid string) (map[string]string, error) {
 	return paths, nil
 }
 
-// Cgroup represents a group inside all controllers. For example: Name='/foo/bar'
-// maps to /sys/fs/cgroup/<controller>/foo/bar on all controllers.
+// Cgroup represents a group inside all controllers. For example:
+//   Name='/foo/bar' maps to /sys/fs/cgroup/<controller>/foo/bar on
+//   all controllers.
 type Cgroup struct {
 	Name    string            `json:"name"`
 	Parents map[string]string `json:"parents"`
@@ -242,16 +246,20 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error {
 
 	// The Cleanup object cleans up partially created cgroups when an error occurs.
 	// Errors occuring during cleanup itself are ignored.
-	clean := specutils.MakeCleanup(func() { _ = c.Uninstall() })
+	clean := cleanup.Make(func() { _ = c.Uninstall() })
 	defer clean.Clean()
 
-	for key, ctrl := range controllers {
+	for key, cfg := range controllers {
 		path := c.makePath(key)
 		if err := os.MkdirAll(path, 0755); err != nil {
+			if cfg.optional && errors.Is(err, syscall.EROFS) {
+				log.Infof("Skipping cgroup %q", key)
+				continue
+			}
 			return err
 		}
 		if res != nil {
-			if err := ctrl.set(res, path); err != nil {
+			if err := cfg.ctrlr.set(res, path); err != nil {
 				return err
 			}
 		}
@@ -321,10 +329,13 @@ func (c *Cgroup) Join() (func(), error) {
 	}
 
 	// Now join the cgroups.
-	for key := range controllers {
+	for key, cfg := range controllers {
 		path := c.makePath(key)
 		log.Debugf("Joining cgroup %q", path)
 		if err := setValue(path, "cgroup.procs", "0"); err != nil {
+			if cfg.optional && os.IsNotExist(err) {
+				continue
+			}
 			return undo, err
 		}
 	}
@@ -375,6 +386,11 @@ func (c *Cgroup) makePath(controllerName string) string {
 	return filepath.Join(cgroupRoot, controllerName, path)
 }
 
+type config struct {
+	ctrlr    controller
+	optional bool
+}
+
 type controller interface {
 	set(*specs.LinuxResources, string) error
 }
@@ -525,3 +541,13 @@ func (*networkPrio) set(spec *specs.LinuxResources, path string) error {
 	}
 	return nil
 }
+
+type pids struct{}
+
+func (*pids) set(spec *specs.LinuxResources, path string) error {
+	if spec.Pids == nil {
+		return nil
+	}
+	val := strconv.FormatInt(spec.Pids.Limit, 10)
+	return setValue(path, "pids.max", val)
+}
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index 2a88b85a9..af3538ef0 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -31,6 +31,7 @@ go_library(
         "spec.go",
         "start.go",
         "state.go",
+        "statefile.go",
         "syscalls.go",
         "wait.go",
     ],
@@ -43,11 +44,13 @@ go_library(
         "//pkg/sentry/control",
         "//pkg/sentry/kernel",
         "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/platform",
+        "//pkg/state",
+        "//pkg/state/statefile",
         "//pkg/sync",
         "//pkg/unet",
         "//pkg/urpc",
         "//runsc/boot",
-        "//runsc/boot/platforms",
         "//runsc/console",
         "//runsc/container",
         "//runsc/flag",
@@ -79,11 +82,11 @@ go_test(
         "//pkg/log",
         "//pkg/sentry/control",
         "//pkg/sentry/kernel/auth",
+        "//pkg/test/testutil",
         "//pkg/urpc",
         "//runsc/boot",
         "//runsc/container",
         "//runsc/specutils",
-        "//runsc/testutil",
         "@com_github_google_go-cmp//cmp:go_default_library",
         "@com_github_google_go-cmp//cmp/cmpopts:go_default_library",
         "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 0f3da69a0..01204ab4d 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -23,9 +23,10 @@ import (
 
 	"github.com/google/subcommands"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/runsc/boot"
-	"gvisor.dev/gvisor/runsc/boot/platforms"
 	"gvisor.dev/gvisor/runsc/flag"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
@@ -82,8 +83,13 @@ type Boot struct {
 	// sandbox (e.g. gofer) and sent through this FD.
 	mountsFD int
 
-	// pidns is set if the sanadbox is in its own pid namespace.
+	// pidns is set if the sandbox is in its own pid namespace.
 	pidns bool
+
+	// attached is set to true to kill the sandbox process when the parent process
+	// terminates. This flag is set when the command execve's itself because
+	// parent death signal doesn't propagate through execve when uid/gid changes.
+	attached bool
 }
 
 // Name implements subcommands.Command.Name.
@@ -118,6 +124,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) {
 	f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
 	f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
 	f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
+	f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates")
 }
 
 // Execute implements subcommands.Command.Execute.  It starts a sandbox in a
@@ -129,33 +136,36 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	}
 
 	// Ensure that if there is a panic, all goroutine stacks are printed.
-	debug.SetTraceback("all")
+	debug.SetTraceback("system")
 
 	conf := args[0].(*boot.Config)
 
+	if b.attached {
+		// Ensure this process is killed after parent process terminates when
+		// attached mode is enabled. In the unfortunate event that the parent
+		// terminates before this point, this process leaks.
+		if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil {
+			Fatalf("error setting parent death signal: %v", err)
+		}
+	}
+
 	if b.setUpRoot {
 		if err := setUpChroot(b.pidns); err != nil {
 			Fatalf("error setting up chroot: %v", err)
 		}
 
-		if !b.applyCaps {
-			// Remove --setup-root arg to call myself.
-			var args []string
-			for _, arg := range os.Args {
-				if !strings.Contains(arg, "setup-root") {
-					args = append(args, arg)
-				}
-			}
-			if !conf.Rootless {
-				// Note that we've already read the spec from the spec FD, and
-				// we will read it again after the exec call. This works
-				// because the ReadSpecFromFile function seeks to the beginning
-				// of the file before reading.
-				if err := callSelfAsNobody(args); err != nil {
-					Fatalf("%v", err)
-				}
-				panic("callSelfAsNobody must never return success")
+		if !b.applyCaps && !conf.Rootless {
+			// Remove --apply-caps arg to call myself. It has already been done.
+			args := prepareArgs(b.attached, "setup-root")
+
+			// Note that we've already read the spec from the spec FD, and
+			// we will read it again after the exec call. This works
+			// because the ReadSpecFromFile function seeks to the beginning
+			// of the file before reading.
+			if err := callSelfAsNobody(args); err != nil {
+				Fatalf("%v", err)
 			}
+			panic("callSelfAsNobody must never return success")
 		}
 	}
 
@@ -173,7 +183,12 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		if caps == nil {
 			caps = &specs.LinuxCapabilities{}
 		}
-		if conf.Platform == platforms.Ptrace {
+
+		gPlatform, err := platform.Lookup(conf.Platform)
+		if err != nil {
+			Fatalf("loading platform: %v", err)
+		}
+		if gPlatform.Requirements().RequiresCapSysPtrace {
 			// Ptrace platform requires extra capabilities.
 			const c = "CAP_SYS_PTRACE"
 			caps.Bounding = append(caps.Bounding, c)
@@ -181,13 +196,9 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 			caps.Permitted = append(caps.Permitted, c)
 		}
 
-		// Remove --apply-caps arg to call myself.
-		var args []string
-		for _, arg := range os.Args {
-			if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") {
-				args = append(args, arg)
-			}
-		}
+		// Remove --apply-caps and --setup-root arg to call myself. Both have
+		// already been done.
+		args := prepareArgs(b.attached, "setup-root", "apply-caps")
 
 		// Note that we've already read the spec from the spec FD, and
 		// we will read it again after the exec call. This works
@@ -258,3 +269,22 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	l.Destroy()
 	return subcommands.ExitSuccess
 }
+
+func prepareArgs(attached bool, exclude ...string) []string {
+	var args []string
+	for _, arg := range os.Args {
+		for _, excl := range exclude {
+			if strings.Contains(arg, excl) {
+				goto skip
+			}
+		}
+		args = append(args, arg)
+		if attached && arg == "boot" {
+			// Strategicaly place "--attached" after the command. This is needed
+			// to ensure the new process is killed when the parent process terminates.
+			args = append(args, "--attached")
+		}
+	skip:
+	}
+	return args
+}
diff --git a/runsc/cmd/capability_test.go b/runsc/cmd/capability_test.go
index 0c27f7313..a84067112 100644
--- a/runsc/cmd/capability_test.go
+++ b/runsc/cmd/capability_test.go
@@ -23,10 +23,10 @@ import (
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/syndtr/gocapability/capability"
 	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/specutils"
-	"gvisor.dev/gvisor/runsc/testutil"
 )
 
 func init() {
@@ -85,21 +85,20 @@ func TestCapabilities(t *testing.T) {
 		Inheritable: caps,
 	}
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 
 	// Use --network=host to make sandbox use spec's capabilities.
 	conf.Network = boot.NetworkHost
 
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	// Create and start the container.
 	args := container.Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 	}
diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go
index b5a0ce17d..189244765 100644
--- a/runsc/cmd/chroot.go
+++ b/runsc/cmd/chroot.go
@@ -50,7 +50,7 @@ func pivotRoot(root string) error {
 	// new_root, so after umounting the old_root, we will see only
 	// the new_root in "/".
 	if err := syscall.PivotRoot(".", "."); err != nil {
-		return fmt.Errorf("error changing root filesystem: %v", err)
+		return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err)
 	}
 
 	if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 79965460e..b5de2588b 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -32,17 +32,20 @@ import (
 
 // Debug implements subcommands.Command for the "debug" command.
 type Debug struct {
-	pid         int
-	stacks      bool
-	signal      int
-	profileHeap string
-	profileCPU  string
-	trace       string
-	strace      string
-	logLevel    string
-	logPackets  string
-	duration    time.Duration
-	ps          bool
+	pid              int
+	stacks           bool
+	signal           int
+	profileHeap      string
+	profileCPU       string
+	profileGoroutine string
+	profileBlock     string
+	profileMutex     string
+	trace            string
+	strace           string
+	logLevel         string
+	logPackets       string
+	duration         time.Duration
+	ps               bool
 }
 
 // Name implements subcommands.Command.
@@ -66,6 +69,9 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
 	f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log")
 	f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.")
 	f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
+	f.StringVar(&d.profileGoroutine, "profile-goroutine", "", "writes goroutine profile to the given file.")
+	f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
+	f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
 	f.DurationVar(&d.duration, "duration", time.Second, "amount of time to wait for CPU and trace profiles")
 	f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
 	f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
@@ -147,6 +153,42 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		}
 		log.Infof("Heap profile written to %q", d.profileHeap)
 	}
+	if d.profileGoroutine != "" {
+		f, err := os.Create(d.profileGoroutine)
+		if err != nil {
+			return Errorf(err.Error())
+		}
+		defer f.Close()
+
+		if err := c.Sandbox.GoroutineProfile(f); err != nil {
+			return Errorf(err.Error())
+		}
+		log.Infof("Goroutine profile written to %q", d.profileGoroutine)
+	}
+	if d.profileBlock != "" {
+		f, err := os.Create(d.profileBlock)
+		if err != nil {
+			return Errorf(err.Error())
+		}
+		defer f.Close()
+
+		if err := c.Sandbox.BlockProfile(f); err != nil {
+			return Errorf(err.Error())
+		}
+		log.Infof("Block profile written to %q", d.profileBlock)
+	}
+	if d.profileMutex != "" {
+		f, err := os.Create(d.profileMutex)
+		if err != nil {
+			return Errorf(err.Error())
+		}
+		defer f.Close()
+
+		if err := c.Sandbox.MutexProfile(f); err != nil {
+			return Errorf(err.Error())
+		}
+		log.Infof("Mutex profile written to %q", d.profileMutex)
+	}
 
 	delay := false
 	if d.profileCPU != "" {
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index b184bd402..7d1310c96 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -166,15 +166,33 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
 		return Errorf("Error write spec: %v", err)
 	}
 
-	runArgs := container.Args{
+	containerArgs := container.Args{
 		ID:        cid,
 		Spec:      spec,
 		BundleDir: tmpDir,
 		Attached:  true,
 	}
-	ws, err := container.Run(conf, runArgs)
+	ct, err := container.New(conf, containerArgs)
 	if err != nil {
-		return Errorf("running container: %v", err)
+		return Errorf("creating container: %v", err)
+	}
+	defer ct.Destroy()
+
+	if err := ct.Start(conf); err != nil {
+		return Errorf("starting container: %v", err)
+	}
+
+	// Forward signals to init in the container. Thus if we get SIGINT from
+	// ^C, the container gracefully exit, and we can clean up.
+	//
+	// N.B. There is a still a window before this where a signal may kill
+	// this process, skipping cleanup.
+	stopForwarding := ct.ForwardSignals(0 /* pid */, false /* fgProcess */)
+	defer stopForwarding()
+
+	ws, err := ct.Wait()
+	if err != nil {
+		return Errorf("waiting for container: %v", err)
 	}
 
 	*waitStatus = ws
@@ -237,20 +255,27 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) {
 	for _, cmd := range cmds {
 		log.Debugf("Run %q", cmd)
 		args := strings.Split(cmd, " ")
-		c := exec.Command(args[0], args[1:]...)
-		if err := c.Run(); err != nil {
+		cmd := exec.Command(args[0], args[1:]...)
+		if err := cmd.Run(); err != nil {
+			c.cleanupNet(cid, dev, "", "", "")
 			return nil, fmt.Errorf("failed to run %q: %v", cmd, err)
 		}
 	}
 
-	if err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec); err != nil {
+	resolvPath, err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec)
+	if err != nil {
+		c.cleanupNet(cid, dev, "", "", "")
 		return nil, err
 	}
-	if err := makeFile("/etc/hostname", cid+"\n", spec); err != nil {
+	hostnamePath, err := makeFile("/etc/hostname", cid+"\n", spec)
+	if err != nil {
+		c.cleanupNet(cid, dev, resolvPath, "", "")
 		return nil, err
 	}
 	hosts := fmt.Sprintf("127.0.0.1\tlocalhost\n%s\t%s\n", c.ip, cid)
-	if err := makeFile("/etc/hosts", hosts, spec); err != nil {
+	hostsPath, err := makeFile("/etc/hosts", hosts, spec)
+	if err != nil {
+		c.cleanupNet(cid, dev, resolvPath, hostnamePath, "")
 		return nil, err
 	}
 
@@ -263,19 +288,22 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) {
 	}
 	spec.Linux.Namespaces = append(spec.Linux.Namespaces, netns)
 
-	return func() { c.cleanNet(cid, dev) }, nil
+	return func() { c.cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath) }, nil
 }
 
-func (c *Do) cleanNet(cid, dev string) {
-	veth, peer := deviceNames(cid)
+// cleanupNet tries to cleanup the network setup in setupNet.
+//
+// It may be called when setupNet is only partially complete, in which case it
+// will cleanup as much as possible, logging warnings for the rest.
+//
+// Unfortunately none of this can be automatically cleaned up on process exit,
+// we must do so explicitly.
+func (c *Do) cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath string) {
+	_, peer := deviceNames(cid)
 
 	cmds := []string{
 		fmt.Sprintf("ip link delete %s", peer),
 		fmt.Sprintf("ip netns delete %s", cid),
-
-		fmt.Sprintf("iptables -t nat -D POSTROUTING -s %s/24 -o %s -j MASQUERADE", c.ip, dev),
-		fmt.Sprintf("iptables -D FORWARD -i %s -o %s -j ACCEPT", dev, veth),
-		fmt.Sprintf("iptables -D FORWARD -o %s -i %s -j ACCEPT", dev, veth),
 	}
 
 	for _, cmd := range cmds {
@@ -286,6 +314,10 @@ func (c *Do) cleanNet(cid, dev string) {
 			log.Warningf("Failed to run %q: %v", cmd, err)
 		}
 	}
+
+	tryRemove(resolvPath)
+	tryRemove(hostnamePath)
+	tryRemove(hostsPath)
 }
 
 func deviceNames(cid string) (string, string) {
@@ -306,13 +338,16 @@ func defaultDevice() (string, error) {
 	return parts[4], nil
 }
 
-func makeFile(dest, content string, spec *specs.Spec) error {
+func makeFile(dest, content string, spec *specs.Spec) (string, error) {
 	tmpFile, err := ioutil.TempFile("", filepath.Base(dest))
 	if err != nil {
-		return err
+		return "", err
 	}
 	if _, err := tmpFile.WriteString(content); err != nil {
-		return err
+		if err := os.Remove(tmpFile.Name()); err != nil {
+			log.Warningf("Failed to remove %q: %v", tmpFile, err)
+		}
+		return "", err
 	}
 	spec.Mounts = append(spec.Mounts, specs.Mount{
 		Source:      tmpFile.Name(),
@@ -320,7 +355,17 @@ func makeFile(dest, content string, spec *specs.Spec) error {
 		Type:        "bind",
 		Options:     []string{"ro"},
 	})
-	return nil
+	return tmpFile.Name(), nil
+}
+
+func tryRemove(path string) {
+	if path == "" {
+		return
+	}
+
+	if err := os.Remove(path); err != nil {
+		log.Warningf("Failed to remove %q: %v", path, err)
+	}
 }
 
 func calculatePeerIP(ip string) (string, error) {
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 6e06f3c0f..10448a759 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -168,7 +168,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	// Start with root mount, then add any other additional mount as needed.
 	ats := make([]p9.Attacher, 0, len(spec.Mounts)+1)
 	ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{
-		ROMount:      spec.Root.Readonly,
+		ROMount:      spec.Root.Readonly || conf.Overlay,
 		PanicOnWrite: g.panicOnWrite,
 	})
 	if err != nil {
@@ -181,7 +181,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	for _, m := range spec.Mounts {
 		if specutils.Is9PMount(m) {
 			cfg := fsgofer.Config{
-				ROMount:      isReadonlyMount(m.Options),
+				ROMount:      isReadonlyMount(m.Options) || conf.Overlay,
 				PanicOnWrite: g.panicOnWrite,
 				HostUDS:      conf.FSGoferHostUDS,
 			}
@@ -272,9 +272,8 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
 
 	root := spec.Root.Path
 	if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
-		// FIXME: runsc can't be re-executed without
-		// /proc, so we create a tmpfs mount, mount ./proc and ./root
-		// there, then move this mount to the root and after
+		// runsc can't be re-executed without /proc, so we create a tmpfs mount,
+		// mount ./proc and ./root there, then move this mount to the root and after
 		// setCapsAndCallSelf, runsc will chroot into /root.
 		//
 		// We need a directory to construct a new root and we know that
@@ -335,7 +334,7 @@ func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
 
 	if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
 		if err := pivotRoot("/proc"); err != nil {
-			Fatalf("faild to change the root file system: %v", err)
+			Fatalf("failed to change the root file system: %v", err)
 		}
 		if err := os.Chdir("/"); err != nil {
 			Fatalf("failed to change working directory")
diff --git a/runsc/cmd/help.go b/runsc/cmd/help.go
index c7d210140..cd85dabbb 100644
--- a/runsc/cmd/help.go
+++ b/runsc/cmd/help.go
@@ -65,16 +65,10 @@ func (h *Help) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}
 	switch f.NArg() {
 	case 0:
 		fmt.Fprintf(h.cdr.Output, "Usage: %s <flags> <subcommand> <subcommand args>\n\n", h.cdr.Name())
-		fmt.Fprintf(h.cdr.Output, `runsc is a command line client for running applications packaged in the Open
-Container Initiative (OCI) format. Applications run by runsc are run in an
-isolated gVisor sandbox that emulates a Linux environment.
+		fmt.Fprintf(h.cdr.Output, `runsc is the gVisor container runtime.
 
-gVisor is a user-space kernel, written in Go, that implements a substantial
-portion of the Linux system call interface. It provides an additional layer
-of isolation between running applications and the host operating system.
-
-Functionality is provided by subcommands. For additonal help on individual
-subcommands use "%s %s <subcommand>".
+Functionality is provided by subcommands. For help with a specific subcommand,
+use "%s %s <subcommand>".
 
 `, h.cdr.Name(), h.Name())
 		h.cdr.VisitGroups(func(g *subcommands.CommandGroup) {
diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go
index 8e2b36e85..a2b0a4b14 100644
--- a/runsc/cmd/spec.go
+++ b/runsc/cmd/spec.go
@@ -16,6 +16,7 @@ package cmd
 
 import (
 	"context"
+	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -24,7 +25,8 @@ import (
 	"gvisor.dev/gvisor/runsc/flag"
 )
 
-var specTemplate = []byte(`{
+func genSpec(cwd string) []byte {
+	var template = fmt.Sprintf(`{
 	"ociVersion": "1.0.0",
 	"process": {
 		"terminal": true,
@@ -39,7 +41,7 @@ var specTemplate = []byte(`{
 			"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
 			"TERM=xterm"
 		],
-		"cwd": "/",
+		"cwd": "%s",
 		"capabilities": {
 			"bounding": [
 				"CAP_AUDIT_WRITE",
@@ -123,11 +125,15 @@ var specTemplate = []byte(`{
 			}
 		]
 	}
-}`)
+}`, cwd)
+
+	return []byte(template)
+}
 
 // Spec implements subcommands.Command for the "spec" command.
 type Spec struct {
 	bundle string
+	cwd    string
 }
 
 // Name implements subcommands.Command.Name.
@@ -165,6 +171,8 @@ EXAMPLE:
 // SetFlags implements subcommands.Command.SetFlags.
 func (s *Spec) SetFlags(f *flag.FlagSet) {
 	f.StringVar(&s.bundle, "bundle", ".", "path to the root of the OCI bundle")
+	f.StringVar(&s.cwd, "cwd", "/", "working directory that will be set for the executable, "+
+		"this value MUST be an absolute path")
 }
 
 // Execute implements subcommands.Command.Execute.
@@ -174,7 +182,9 @@ func (s *Spec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		Fatalf("file %q already exists", confPath)
 	}
 
-	if err := ioutil.WriteFile(confPath, specTemplate, 0664); err != nil {
+	var spec = genSpec(s.cwd)
+
+	if err := ioutil.WriteFile(confPath, spec, 0664); err != nil {
 		Fatalf("writing to %q: %v", confPath, err)
 	}
 
diff --git a/runsc/cmd/statefile.go b/runsc/cmd/statefile.go
new file mode 100644
index 000000000..e6f1907da
--- /dev/null
+++ b/runsc/cmd/statefile.go
@@ -0,0 +1,143 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"context"
+	"fmt"
+	"os"
+
+	"github.com/google/subcommands"
+	"gvisor.dev/gvisor/pkg/state"
+	"gvisor.dev/gvisor/pkg/state/statefile"
+	"gvisor.dev/gvisor/runsc/flag"
+)
+
+// Statefile implements subcommands.Command for the "statefile" command.
+type Statefile struct {
+	list   bool
+	get    string
+	key    string
+	output string
+	html   bool
+}
+
+// Name implements subcommands.Command.
+func (*Statefile) Name() string {
+	return "state"
+}
+
+// Synopsis implements subcommands.Command.
+func (*Statefile) Synopsis() string {
+	return "shows information about a statefile"
+}
+
+// Usage implements subcommands.Command.
+func (*Statefile) Usage() string {
+	return `statefile [flags] <statefile>`
+}
+
+// SetFlags implements subcommands.Command.
+func (s *Statefile) SetFlags(f *flag.FlagSet) {
+	f.BoolVar(&s.list, "list", false, "lists the metdata in the statefile.")
+	f.StringVar(&s.get, "get", "", "extracts the given metadata key.")
+	f.StringVar(&s.key, "key", "", "the integrity key for the file.")
+	f.StringVar(&s.output, "output", "", "target to write the result.")
+	f.BoolVar(&s.html, "html", false, "outputs in HTML format.")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (s *Statefile) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+	// Check arguments.
+	if s.list && s.get != "" {
+		Fatalf("error: can't specify -list and -get simultaneously.")
+	}
+
+	// Setup output.
+	var output = os.Stdout // Default.
+	if s.output != "" {
+		f, err := os.OpenFile(s.output, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
+		if err != nil {
+			Fatalf("error opening output: %v", err)
+		}
+		defer func() {
+			if err := f.Close(); err != nil {
+				Fatalf("error flushing output: %v", err)
+			}
+		}()
+		output = f
+	}
+
+	// Open the file.
+	if f.NArg() != 1 {
+		f.Usage()
+		return subcommands.ExitUsageError
+	}
+	input, err := os.Open(f.Arg(0))
+	if err != nil {
+		Fatalf("error opening input: %v\n", err)
+	}
+
+	if s.html {
+		fmt.Fprintf(output, "<html><body>\n")
+		defer fmt.Fprintf(output, "</body></html>\n")
+	}
+
+	// Dump the full file?
+	if !s.list && s.get == "" {
+		var key []byte
+		if s.key != "" {
+			key = []byte(s.key)
+		}
+		rc, _, err := statefile.NewReader(input, key)
+		if err != nil {
+			Fatalf("error parsing statefile: %v", err)
+		}
+		if err := state.PrettyPrint(output, rc, s.html); err != nil {
+			Fatalf("error printing state: %v", err)
+		}
+		return subcommands.ExitSuccess
+	}
+
+	// Load just the metadata.
+	metadata, err := statefile.MetadataUnsafe(input)
+	if err != nil {
+		Fatalf("error reading metadata: %v", err)
+	}
+
+	// Is it a single key?
+	if s.get != "" {
+		val, ok := metadata[s.get]
+		if !ok {
+			Fatalf("metadata key %s: not found", s.get)
+		}
+		fmt.Fprintf(output, "%s\n", val)
+		return subcommands.ExitSuccess
+	}
+
+	// List all keys.
+	if s.html {
+		fmt.Fprintf(output, " <ul>\n")
+		defer fmt.Fprintf(output, " </ul>\n")
+	}
+	for key := range metadata {
+		if s.html {
+			fmt.Fprintf(output, "  <li>%s</li>\n", key)
+		} else {
+			fmt.Fprintf(output, "%s\n", key)
+		}
+	}
+	return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go
index 7072547be..a37d66139 100644
--- a/runsc/cmd/syscalls.go
+++ b/runsc/cmd/syscalls.go
@@ -32,9 +32,10 @@ import (
 
 // Syscalls implements subcommands.Command for the "syscalls" command.
 type Syscalls struct {
-	output string
-	os     string
-	arch   string
+	format   string
+	os       string
+	arch     string
+	filename string
 }
 
 // CompatibilityInfo is a map of system and architecture to compatibility doc.
@@ -95,16 +96,17 @@ func (*Syscalls) Usage() string {
 
 // SetFlags implements subcommands.Command.SetFlags.
 func (s *Syscalls) SetFlags(f *flag.FlagSet) {
-	f.StringVar(&s.output, "o", "table", "Output format (table, csv, json).")
+	f.StringVar(&s.format, "format", "table", "Output format (table, csv, json).")
 	f.StringVar(&s.os, "os", osAll, "The OS (e.g. linux)")
 	f.StringVar(&s.arch, "arch", archAll, "The CPU architecture (e.g. amd64).")
+	f.StringVar(&s.filename, "filename", "", "Output filename (otherwise stdout).")
 }
 
 // Execute implements subcommands.Command.Execute.
 func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	out, ok := outputMap[s.output]
+	out, ok := outputMap[s.format]
 	if !ok {
-		Fatalf("Unsupported output format %q", s.output)
+		Fatalf("Unsupported output format %q", s.format)
 	}
 
 	// Build map of all supported architectures.
@@ -124,7 +126,14 @@ func (s *Syscalls) Execute(_ context.Context, f *flag.FlagSet, args ...interface
 		Fatalf("%v", err)
 	}
 
-	if err := out(os.Stdout, info); err != nil {
+	w := os.Stdout // Default.
+	if s.filename != "" {
+		w, err = os.OpenFile(s.filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
+		if err != nil {
+			Fatalf("Error opening %q: %v", s.filename, err)
+		}
+	}
+	if err := out(w, info); err != nil {
 		Fatalf("Error writing output: %v", err)
 	}
 
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 0aaeea3a8..49cfb0837 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -15,8 +15,11 @@ go_library(
         "//test:__subpackages__",
     ],
     deps = [
+        "//pkg/abi/linux",
+        "//pkg/cleanup",
         "//pkg/log",
         "//pkg/sentry/control",
+        "//pkg/sentry/sighandling",
         "//pkg/sync",
         "//runsc/boot",
         "//runsc/cgroup",
@@ -33,33 +36,36 @@ go_test(
     size = "large",
     srcs = [
         "console_test.go",
+        "container_norace_test.go",
+        "container_race_test.go",
         "container_test.go",
         "multi_container_test.go",
         "shared_volume_test.go",
     ],
     data = [
         "//runsc",
-        "//runsc/container/test_app",
+        "//test/cmd/test_app",
     ],
     library = ":container",
-    shard_count = 5,
+    shard_count = 10,
     tags = [
         "requires-kvm",
     ],
     deps = [
         "//pkg/abi/linux",
         "//pkg/bits",
+        "//pkg/cleanup",
         "//pkg/log",
         "//pkg/sentry/control",
         "//pkg/sentry/kernel",
         "//pkg/sentry/kernel/auth",
         "//pkg/sync",
+        "//pkg/test/testutil",
         "//pkg/unet",
         "//pkg/urpc",
         "//runsc/boot",
         "//runsc/boot/platforms",
         "//runsc/specutils",
-        "//runsc/testutil",
         "@com_github_cenkalti_backoff//:go_default_library",
         "@com_github_kr_pty//:go_default_library",
         "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index c2518d52b..3813c6b93 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -29,9 +29,9 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/control"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/pkg/unet"
 	"gvisor.dev/gvisor/pkg/urpc"
-	"gvisor.dev/gvisor/runsc/testutil"
 )
 
 // socketPath creates a path inside bundleDir and ensures that the returned
@@ -58,25 +58,26 @@ func socketPath(bundleDir string) (string, error) {
 }
 
 // createConsoleSocket creates a socket at the given path that will receive a
-// console fd from the sandbox. If no error occurs, it returns the server
-// socket and a cleanup function.
-func createConsoleSocket(path string) (*unet.ServerSocket, func() error, error) {
+// console fd from the sandbox. If an error occurs, t.Fatalf will be called.
+// The function returning should be deferred as cleanup.
+func createConsoleSocket(t *testing.T, path string) (*unet.ServerSocket, func()) {
+	t.Helper()
 	srv, err := unet.BindAndListen(path, false)
 	if err != nil {
-		return nil, nil, fmt.Errorf("error binding and listening to socket %q: %v", path, err)
+		t.Fatalf("error binding and listening to socket %q: %v", path, err)
 	}
 
-	cleanup := func() error {
+	cleanup := func() {
+		// Log errors; nothing can be done.
 		if err := srv.Close(); err != nil {
-			return fmt.Errorf("error closing socket %q: %v", path, err)
+			t.Logf("error closing socket %q: %v", path, err)
 		}
 		if err := os.Remove(path); err != nil {
-			return fmt.Errorf("error removing socket %q: %v", path, err)
+			t.Logf("error removing socket %q: %v", path, err)
 		}
-		return nil
 	}
 
-	return srv, cleanup, nil
+	return srv, cleanup
 }
 
 // receiveConsolePTY accepts a connection on the server socket and reads fds.
@@ -118,63 +119,59 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) {
 
 // Test that an pty FD is sent over the console socket if one is provided.
 func TestConsoleSocket(t *testing.T) {
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-		spec := testutil.NewSpecWithArgs("true")
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+	for name, conf := range configsWithVFS2(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			spec := testutil.NewSpecWithArgs("true")
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		sock, err := socketPath(bundleDir)
-		if err != nil {
-			t.Fatalf("error getting socket path: %v", err)
-		}
-		srv, cleanup, err := createConsoleSocket(sock)
-		if err != nil {
-			t.Fatalf("error creating socket at %q: %v", sock, err)
-		}
-		defer cleanup()
-
-		// Create the container and pass the socket name.
-		args := Args{
-			ID:            testutil.UniqueContainerID(),
-			Spec:          spec,
-			BundleDir:     bundleDir,
-			ConsoleSocket: sock,
-		}
-		c, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer c.Destroy()
+			sock, err := socketPath(bundleDir)
+			if err != nil {
+				t.Fatalf("error getting socket path: %v", err)
+			}
+			srv, cleanup := createConsoleSocket(t, sock)
+			defer cleanup()
+
+			// Create the container and pass the socket name.
+			args := Args{
+				ID:            testutil.RandomContainerID(),
+				Spec:          spec,
+				BundleDir:     bundleDir,
+				ConsoleSocket: sock,
+			}
+			c, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer c.Destroy()
 
-		// Make sure we get a console PTY.
-		ptyMaster, err := receiveConsolePTY(srv)
-		if err != nil {
-			t.Fatalf("error receiving console FD: %v", err)
-		}
-		ptyMaster.Close()
+			// Make sure we get a console PTY.
+			ptyMaster, err := receiveConsolePTY(srv)
+			if err != nil {
+				t.Fatalf("error receiving console FD: %v", err)
+			}
+			ptyMaster.Close()
+		})
 	}
 }
 
 // Test that job control signals work on a console created with "exec -ti".
 func TestJobControlSignalExec(t *testing.T) {
 	spec := testutil.NewSpecWithArgs("/bin/sleep", "10000")
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	// Create and start the container.
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 	}
@@ -286,32 +283,28 @@ func TestJobControlSignalExec(t *testing.T) {
 
 // Test that job control signals work on a console created with "run -ti".
 func TestJobControlSignalRootContainer(t *testing.T) {
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	// Don't let bash execute from profile or rc files, otherwise our PID
 	// counts get messed up.
 	spec := testutil.NewSpecWithArgs("/bin/bash", "--noprofile", "--norc")
 	spec.Process.Terminal = true
 
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	sock, err := socketPath(bundleDir)
 	if err != nil {
 		t.Fatalf("error getting socket path: %v", err)
 	}
-	srv, cleanup, err := createConsoleSocket(sock)
-	if err != nil {
-		t.Fatalf("error creating socket at %q: %v", sock, err)
-	}
+	srv, cleanup := createConsoleSocket(t, sock)
 	defer cleanup()
 
 	// Create the container and pass the socket name.
 	args := Args{
-		ID:            testutil.UniqueContainerID(),
+		ID:            testutil.RandomContainerID(),
 		Spec:          spec,
 		BundleDir:     bundleDir,
 		ConsoleSocket: sock,
@@ -333,13 +326,13 @@ func TestJobControlSignalRootContainer(t *testing.T) {
 	// file. Writes after a certain point will block unless we drain the
 	// PTY, so we must continually copy from it.
 	//
-	// We log the output to stdout for debugabilitly, and also to a buffer,
+	// We log the output to stderr for debugabilitly, and also to a buffer,
 	// since we wait on particular output from bash below. We use a custom
 	// blockingBuffer which is thread-safe and also blocks on Read calls,
 	// which makes this a suitable Reader for WaitUntilRead.
 	ptyBuf := newBlockingBuffer()
 	tee := io.TeeReader(ptyMaster, ptyBuf)
-	go io.Copy(os.Stdout, tee)
+	go io.Copy(os.Stderr, tee)
 
 	// Start the container.
 	if err := c.Start(conf); err != nil {
@@ -368,7 +361,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
 		{PID: 1, Cmd: "bash", Threads: []kernel.ThreadID{1}},
 	}
 	if err := waitForProcessList(c, expectedPL); err != nil {
-		t.Fatal(err)
+		t.Fatalf("error waiting for processes: %v", err)
 	}
 
 	// Execute sleep via the terminal.
@@ -377,7 +370,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
 	// Wait for sleep to start.
 	expectedPL = append(expectedPL, &control.Process{PID: 2, PPID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{2}})
 	if err := waitForProcessList(c, expectedPL); err != nil {
-		t.Fatal(err)
+		t.Fatalf("error waiting for processes: %v", err)
 	}
 
 	// Reset the pty buffer, so there is less output for us to scan later.
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 68782c4be..6d297d0df 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -17,11 +17,11 @@ package container
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"os/exec"
-	"os/signal"
 	"regexp"
 	"strconv"
 	"strings"
@@ -30,8 +30,11 @@ import (
 
 	"github.com/cenkalti/backoff"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/cleanup"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/control"
+	"gvisor.dev/gvisor/pkg/sentry/sighandling"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/cgroup"
 	"gvisor.dev/gvisor/runsc/sandbox"
@@ -273,7 +276,7 @@ func New(conf *boot.Config, args Args) (*Container, error) {
 	}
 
 	if err := os.MkdirAll(conf.RootDir, 0711); err != nil {
-		return nil, fmt.Errorf("creating container root directory: %v", err)
+		return nil, fmt.Errorf("creating container root directory %q: %v", conf.RootDir, err)
 	}
 
 	c := &Container{
@@ -291,7 +294,7 @@ func New(conf *boot.Config, args Args) (*Container, error) {
 	}
 	// The Cleanup object cleans up partially created containers when an error
 	// occurs. Any errors occurring during cleanup itself are ignored.
-	cu := specutils.MakeCleanup(func() { _ = c.Destroy() })
+	cu := cleanup.Make(func() { _ = c.Destroy() })
 	defer cu.Clean()
 
 	// Lock the container metadata file to prevent concurrent creations of
@@ -400,7 +403,7 @@ func (c *Container) Start(conf *boot.Config) error {
 	if err := c.Saver.lock(); err != nil {
 		return err
 	}
-	unlock := specutils.MakeCleanup(func() { c.Saver.unlock() })
+	unlock := cleanup.Make(func() { c.Saver.unlock() })
 	defer unlock.Clean()
 
 	if err := c.requireStatus("start", Created); err != nil {
@@ -420,7 +423,7 @@ func (c *Container) Start(conf *boot.Config) error {
 			return err
 		}
 	} else {
-		// Join cgroup to strt gofer process to ensure it's part of the cgroup from
+		// Join cgroup to start gofer process to ensure it's part of the cgroup from
 		// the start (and all their children processes).
 		if err := runInCgroup(c.Sandbox.Cgroup, func() error {
 			// Create the gofer process.
@@ -504,7 +507,7 @@ func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) {
 	}
 	// Clean up partially created container if an error occurs.
 	// Any errors returned by Destroy() itself are ignored.
-	cu := specutils.MakeCleanup(func() {
+	cu := cleanup.Make(func() {
 		c.Destroy()
 	})
 	defer cu.Clean()
@@ -620,21 +623,15 @@ func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error {
 // forwarding signals.
 func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() {
 	log.Debugf("Forwarding all signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
-	sigCh := make(chan os.Signal, 1)
-	signal.Notify(sigCh)
-	go func() {
-		for s := range sigCh {
-			log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", s, c.ID, pid, fgProcess)
-			if err := c.Sandbox.SignalProcess(c.ID, pid, s.(syscall.Signal), fgProcess); err != nil {
-				log.Warningf("error forwarding signal %d to container %q: %v", s, c.ID, err)
-			}
+	stop := sighandling.StartSignalForwarding(func(sig linux.Signal) {
+		log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", sig, c.ID, pid, fgProcess)
+		if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.Signal(sig), fgProcess); err != nil {
+			log.Warningf("error forwarding signal %d to container %q: %v", sig, c.ID, err)
 		}
-		log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
-	}()
-
+	})
 	return func() {
-		signal.Stop(sigCh)
-		close(sigCh)
+		log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
+		stop()
 	}
 }
 
@@ -1066,27 +1063,19 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error {
 
 // adjustGoferOOMScoreAdj sets the oom_store_adj for the container's gofer.
 func (c *Container) adjustGoferOOMScoreAdj() error {
-	if c.GoferPid != 0 && c.Spec.Process.OOMScoreAdj != nil {
-		if err := setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj); err != nil {
-			// Ignore NotExist error because it can be returned when the sandbox
-			// exited while OOM score was being adjusted.
-			if !os.IsNotExist(err) {
-				return fmt.Errorf("setting gofer oom_score_adj for container %q: %v", c.ID, err)
-			}
-			log.Warningf("Gofer process (%d) not found setting oom_score_adj", c.GoferPid)
-		}
+	if c.GoferPid == 0 || c.Spec.Process.OOMScoreAdj == nil {
+		return nil
 	}
-
-	return nil
+	return setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj)
 }
 
 // adjustSandboxOOMScoreAdj sets the oom_score_adj for the sandbox.
 // oom_score_adj is set to the lowest oom_score_adj among the containers
 // running in the sandbox.
 //
-// TODO(gvisor.dev/issue/512): This call could race with other containers being
+// TODO(gvisor.dev/issue/238): This call could race with other containers being
 // created at the same time and end up setting the wrong oom_score_adj to the
-// sandbox.
+// sandbox. Use rpc client to synchronize.
 func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) error {
 	containers, err := loadSandbox(rootDir, s.ID)
 	if err != nil {
@@ -1154,29 +1143,29 @@ func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool)
 	}
 
 	// Set the lowest of all containers oom_score_adj to the sandbox.
-	if err := setOOMScoreAdj(s.Pid, lowScore); err != nil {
-		// Ignore NotExist error because it can be returned when the sandbox
-		// exited while OOM score was being adjusted.
-		if !os.IsNotExist(err) {
-			return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", s.ID, err)
-		}
-		log.Warningf("Sandbox process (%d) not found setting oom_score_adj", s.Pid)
-	}
-
-	return nil
+	return setOOMScoreAdj(s.Pid, lowScore)
 }
 
 // setOOMScoreAdj sets oom_score_adj to the given value for the given PID.
 // /proc must be available and mounted read-write. scoreAdj should be between
-// -1000 and 1000.
+// -1000 and 1000. It's a noop if the process has already exited.
 func setOOMScoreAdj(pid int, scoreAdj int) error {
 	f, err := os.OpenFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid), os.O_WRONLY, 0644)
 	if err != nil {
+		// Ignore NotExist errors because it can race with process exit.
+		if os.IsNotExist(err) {
+			log.Warningf("Process (%d) not found setting oom_score_adj", pid)
+			return nil
+		}
 		return err
 	}
 	defer f.Close()
 	if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil {
-		return err
+		if errors.Is(err, syscall.ESRCH) {
+			log.Warningf("Process (%d) exited while setting oom_score_adj", pid)
+			return nil
+		}
+		return fmt.Errorf("setting oom_score_adj to %q: %v", scoreAdj, err)
 	}
 	return nil
 }
diff --git a/runsc/boot/loader_amd64.go b/runsc/container/container_norace_test.go
index b9669f2ac..838c1e20a 100644
--- a/runsc/boot/loader_amd64.go
+++ b/runsc/container/container_norace_test.go
@@ -1,4 +1,4 @@
-// Copyright 2019 The gVisor Authors.
+// Copyright 2018 The gVisor Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,16 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// +build amd64
+// +build !race
 
-package boot
+package container
 
-import (
-	"gvisor.dev/gvisor/pkg/sentry/kernel"
-	"gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
-)
-
-func init() {
-	// Register the global syscall table.
-	kernel.RegisterSyscallTable(linux.AMD64)
-}
+// Allow both kvm and ptrace for non-race builds.
+var platformOptions = []configOption{ptrace, kvm}
diff --git a/runsc/boot/loader_arm64.go b/runsc/container/container_race_test.go
index cf64d28c8..9fb4c4fc0 100644
--- a/runsc/boot/loader_arm64.go
+++ b/runsc/container/container_race_test.go
@@ -1,4 +1,4 @@
-// Copyright 2019 The gVisor Authors.
+// Copyright 2018 The gVisor Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,16 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// +build arm64
+// +build race
 
-package boot
+package container
 
-import (
-	"gvisor.dev/gvisor/pkg/sentry/kernel"
-	"gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
-)
-
-func init() {
-	// Register the global syscall table.
-	kernel.RegisterSyscallTable(linux.ARM64)
-}
+// Only enabled ptrace with race builds.
+var platformOptions = []configOption{ptrace}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 04a7dc237..e7715b6f7 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -39,10 +39,10 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/boot/platforms"
 	"gvisor.dev/gvisor/runsc/specutils"
-	"gvisor.dev/gvisor/runsc/testutil"
 )
 
 // waitForProcessList waits for the given process list to show up in the container.
@@ -71,6 +71,7 @@ func waitForProcessCount(cont *Container, want int) error {
 			return &backoff.PermanentError{Err: err}
 		}
 		if got := len(pss); got != want {
+			log.Infof("Waiting for process count to reach %d. Current: %d", want, got)
 			return fmt.Errorf("wrong process count, got: %d, want: %d", got, want)
 		}
 		return nil
@@ -123,23 +124,6 @@ func procListsEqual(got, want []*control.Process) (bool, error) {
 	return true, nil
 }
 
-// getAndCheckProcLists is similar to waitForProcessList, but does not wait and retry the
-// test for equality. This is because we already confirmed that exec occurred.
-func getAndCheckProcLists(cont *Container, want []*control.Process) error {
-	got, err := cont.Processes()
-	if err != nil {
-		return fmt.Errorf("error getting process data from container: %v", err)
-	}
-	equal, err := procListsEqual(got, want)
-	if err != nil {
-		return err
-	}
-	if equal {
-		return nil
-	}
-	return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want))
-}
-
 func procListToString(pl []*control.Process) string {
 	strs := make([]string, 0, len(pl))
 	for _, p := range pl {
@@ -231,16 +215,15 @@ func readOutputNum(file string, position int) (int, error) {
 // run starts the sandbox and waits for it to exit, checking that the
 // application succeeded.
 func run(spec *specs.Spec, conf *boot.Config) error {
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		return fmt.Errorf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	// Create, start and wait for the container.
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 		Attached:  true,
@@ -259,39 +242,64 @@ type configOption int
 
 const (
 	overlay configOption = iota
+	ptrace
 	kvm
 	nonExclusiveFS
 )
 
-var noOverlay = []configOption{kvm, nonExclusiveFS}
-var all = append(noOverlay, overlay)
+var (
+	noOverlay = append(platformOptions, nonExclusiveFS)
+	all       = append(noOverlay, overlay)
+)
 
 // configs generates different configurations to run tests.
-func configs(opts ...configOption) []*boot.Config {
+func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
 	// Always load the default config.
-	cs := []*boot.Config{testutil.TestConfig()}
-
+	cs := make(map[string]*boot.Config)
 	for _, o := range opts {
-		c := testutil.TestConfig()
 		switch o {
 		case overlay:
+			c := testutil.TestConfig(t)
 			c.Overlay = true
+			cs["overlay"] = c
+		case ptrace:
+			c := testutil.TestConfig(t)
+			c.Platform = platforms.Ptrace
+			cs["ptrace"] = c
 		case kvm:
-			// TODO(b/112165693): KVM tests are flaky. Disable until fixed.
-			continue
-
+			c := testutil.TestConfig(t)
 			c.Platform = platforms.KVM
+			cs["kvm"] = c
 		case nonExclusiveFS:
+			c := testutil.TestConfig(t)
 			c.FileAccess = boot.FileAccessShared
+			cs["non-exclusive"] = c
 		default:
 			panic(fmt.Sprintf("unknown config option %v", o))
-
 		}
-		cs = append(cs, c)
 	}
 	return cs
 }
 
+func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*boot.Config {
+	vfs1 := configs(t, opts...)
+
+	var optsVFS2 []configOption
+	for _, opt := range opts {
+		// TODO(gvisor.dev/issue/1487): Enable overlay tests.
+		if opt != overlay {
+			optsVFS2 = append(optsVFS2, opt)
+		}
+	}
+
+	for key, value := range configs(t, optsVFS2...) {
+		value.VFS2 = true
+		vfs1[key+"VFS2"] = value
+	}
+
+	return vfs1
+}
+
 // TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle.
 // It verifies after each step that the container can be loaded from disk, and
 // has the correct status.
@@ -301,133 +309,133 @@ func TestLifecycle(t *testing.T) {
 	childReaper.Start()
 	defer childReaper.Stop()
 
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-		// The container will just sleep for a long time.  We will kill it before
-		// it finishes sleeping.
-		spec := testutil.NewSpecWithArgs("sleep", "100")
-
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
-
-		// expectedPL lists the expected process state of the container.
-		expectedPL := []*control.Process{
-			{
-				UID:     0,
-				PID:     1,
-				PPID:    0,
-				C:       0,
-				Cmd:     "sleep",
-				Threads: []kernel.ThreadID{1},
-			},
-		}
-		// Create the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		c, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer c.Destroy()
+	for name, conf := range configsWithVFS2(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			// The container will just sleep for a long time.  We will kill it before
+			// it finishes sleeping.
+			spec := testutil.NewSpecWithArgs("sleep", "100")
 
-		// Load the container from disk and check the status.
-		c, err = Load(rootDir, args.ID)
-		if err != nil {
-			t.Fatalf("error loading container: %v", err)
-		}
-		if got, want := c.Status, Created; got != want {
-			t.Errorf("container status got %v, want %v", got, want)
-		}
+			rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		// List should return the container id.
-		ids, err := List(rootDir)
-		if err != nil {
-			t.Fatalf("error listing containers: %v", err)
-		}
-		if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) {
-			t.Errorf("container list got %v, want %v", got, want)
-		}
+			// expectedPL lists the expected process state of the container.
+			expectedPL := []*control.Process{
+				{
+					UID:     0,
+					PID:     1,
+					PPID:    0,
+					C:       0,
+					Cmd:     "sleep",
+					Threads: []kernel.ThreadID{1},
+				},
+			}
+			// Create the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			c, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer c.Destroy()
 
-		// Start the container.
-		if err := c.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// Load the container from disk and check the status.
+			c, err = Load(rootDir, args.ID)
+			if err != nil {
+				t.Fatalf("error loading container: %v", err)
+			}
+			if got, want := c.Status, Created; got != want {
+				t.Errorf("container status got %v, want %v", got, want)
+			}
 
-		// Load the container from disk and check the status.
-		c, err = Load(rootDir, args.ID)
-		if err != nil {
-			t.Fatalf("error loading container: %v", err)
-		}
-		if got, want := c.Status, Running; got != want {
-			t.Errorf("container status got %v, want %v", got, want)
-		}
+			// List should return the container id.
+			ids, err := List(rootDir)
+			if err != nil {
+				t.Fatalf("error listing containers: %v", err)
+			}
+			if got, want := ids, []string{args.ID}; !reflect.DeepEqual(got, want) {
+				t.Errorf("container list got %v, want %v", got, want)
+			}
 
-		// Verify that "sleep 100" is running.
-		if err := waitForProcessList(c, expectedPL); err != nil {
-			t.Error(err)
-		}
+			// Start the container.
+			if err := c.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		// Wait on the container.
-		var wg sync.WaitGroup
-		wg.Add(1)
-		ch := make(chan struct{})
-		go func() {
-			ch <- struct{}{}
-			ws, err := c.Wait()
+			// Load the container from disk and check the status.
+			c, err = Load(rootDir, args.ID)
 			if err != nil {
-				t.Fatalf("error waiting on container: %v", err)
+				t.Fatalf("error loading container: %v", err)
 			}
-			if got, want := ws.Signal(), syscall.SIGTERM; got != want {
-				t.Fatalf("got signal %v, want %v", got, want)
+			if got, want := c.Status, Running; got != want {
+				t.Errorf("container status got %v, want %v", got, want)
 			}
-			wg.Done()
-		}()
 
-		// Wait a bit to ensure that we've started waiting on the
-		// container before we signal.
-		<-ch
-		time.Sleep(100 * time.Millisecond)
-		// Send the container a SIGTERM which will cause it to stop.
-		if err := c.SignalContainer(syscall.SIGTERM, false); err != nil {
-			t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err)
-		}
-		// Wait for it to die.
-		wg.Wait()
+			// Verify that "sleep 100" is running.
+			if err := waitForProcessList(c, expectedPL); err != nil {
+				t.Error(err)
+			}
 
-		// Load the container from disk and check the status.
-		c, err = Load(rootDir, args.ID)
-		if err != nil {
-			t.Fatalf("error loading container: %v", err)
-		}
-		if got, want := c.Status, Stopped; got != want {
-			t.Errorf("container status got %v, want %v", got, want)
-		}
+			// Wait on the container.
+			ch := make(chan error)
+			go func() {
+				ws, err := c.Wait()
+				if err != nil {
+					ch <- err
+				}
+				if got, want := ws.Signal(), syscall.SIGTERM; got != want {
+					ch <- fmt.Errorf("got signal %v, want %v", got, want)
+				}
+				ch <- nil
+			}()
 
-		// Destroy the container.
-		if err := c.Destroy(); err != nil {
-			t.Fatalf("error destroying container: %v", err)
-		}
+			// Wait a bit to ensure that we've started waiting on
+			// the container before we signal.
+			time.Sleep(time.Second)
 
-		// List should not return the container id.
-		ids, err = List(rootDir)
-		if err != nil {
-			t.Fatalf("error listing containers: %v", err)
-		}
-		if len(ids) != 0 {
-			t.Errorf("expected container list to be empty, but got %v", ids)
-		}
+			// Send the container a SIGTERM which will cause it to stop.
+			if err := c.SignalContainer(syscall.SIGTERM, false); err != nil {
+				t.Fatalf("error sending signal %v to container: %v", syscall.SIGTERM, err)
+			}
 
-		// Loading the container by id should fail.
-		if _, err = Load(rootDir, args.ID); err == nil {
-			t.Errorf("expected loading destroyed container to fail, but it did not")
-		}
+			// Wait for it to die.
+			if err := <-ch; err != nil {
+				t.Fatalf("error waiting for container: %v", err)
+			}
+
+			// Load the container from disk and check the status.
+			c, err = Load(rootDir, args.ID)
+			if err != nil {
+				t.Fatalf("error loading container: %v", err)
+			}
+			if got, want := c.Status, Stopped; got != want {
+				t.Errorf("container status got %v, want %v", got, want)
+			}
+
+			// Destroy the container.
+			if err := c.Destroy(); err != nil {
+				t.Fatalf("error destroying container: %v", err)
+			}
+
+			// List should not return the container id.
+			ids, err = List(rootDir)
+			if err != nil {
+				t.Fatalf("error listing containers: %v", err)
+			}
+			if len(ids) != 0 {
+				t.Errorf("expected container list to be empty, but got %v", ids)
+			}
+
+			// Loading the container by id should fail.
+			if _, err = Load(rootDir, args.ID); err == nil {
+				t.Errorf("expected loading destroyed container to fail, but it did not")
+			}
+		})
 	}
 }
 
@@ -436,12 +444,14 @@ func TestExePath(t *testing.T) {
 	// Create two directories that will be prepended to PATH.
 	firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first")
 	if err != nil {
-		t.Fatal(err)
+		t.Fatalf("error creating temporary directory: %v", err)
 	}
+	defer os.RemoveAll(firstPath)
 	secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second")
 	if err != nil {
-		t.Fatal(err)
+		t.Fatalf("error creating temporary directory: %v", err)
 	}
+	defer os.RemoveAll(secondPath)
 
 	// Create two minimal executables in the second path, two of which
 	// will be masked by files in first path.
@@ -449,11 +459,11 @@ func TestExePath(t *testing.T) {
 		path := filepath.Join(secondPath, p)
 		f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777)
 		if err != nil {
-			t.Fatal(err)
+			t.Fatalf("error opening path: %v", err)
 		}
 		defer f.Close()
 		if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil {
-			t.Fatal(err)
+			t.Fatalf("error writing contents: %v", err)
 		}
 	}
 
@@ -462,7 +472,7 @@ func TestExePath(t *testing.T) {
 	nonExecutable := filepath.Join(firstPath, "masked1")
 	f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666)
 	if err != nil {
-		t.Fatal(err)
+		t.Fatalf("error opening file: %v", err)
 	}
 	f2.Close()
 
@@ -470,85 +480,95 @@ func TestExePath(t *testing.T) {
 	// executable in the second.
 	nonRegular := filepath.Join(firstPath, "masked2")
 	if err := os.Mkdir(nonRegular, 0777); err != nil {
-		t.Fatal(err)
-	}
-
-	for _, conf := range configs(overlay) {
-		t.Logf("Running test with conf: %+v", conf)
-		for _, test := range []struct {
-			path    string
-			success bool
-		}{
-			{path: "true", success: true},
-			{path: "bin/true", success: true},
-			{path: "/bin/true", success: true},
-			{path: "thisfiledoesntexit", success: false},
-			{path: "bin/thisfiledoesntexit", success: false},
-			{path: "/bin/thisfiledoesntexit", success: false},
-
-			{path: "unmasked", success: true},
-			{path: filepath.Join(firstPath, "unmasked"), success: false},
-			{path: filepath.Join(secondPath, "unmasked"), success: true},
-
-			{path: "masked1", success: true},
-			{path: filepath.Join(firstPath, "masked1"), success: false},
-			{path: filepath.Join(secondPath, "masked1"), success: true},
-
-			{path: "masked2", success: true},
-			{path: filepath.Join(firstPath, "masked2"), success: false},
-			{path: filepath.Join(secondPath, "masked2"), success: true},
-		} {
-			spec := testutil.NewSpecWithArgs(test.path)
-			spec.Process.Env = []string{
-				fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")),
-			}
-
-			rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-			if err != nil {
-				t.Fatalf("exec: %s, error setting up container: %v", test.path, err)
-			}
-
-			args := Args{
-				ID:        testutil.UniqueContainerID(),
-				Spec:      spec,
-				BundleDir: bundleDir,
-				Attached:  true,
-			}
-			ws, err := Run(conf, args)
+		t.Fatalf("error making directory: %v", err)
+	}
+
+	for name, conf := range configsWithVFS2(t, overlay) {
+		t.Run(name, func(t *testing.T) {
+			for _, test := range []struct {
+				path    string
+				success bool
+			}{
+				{path: "true", success: true},
+				{path: "bin/true", success: true},
+				{path: "/bin/true", success: true},
+				{path: "thisfiledoesntexit", success: false},
+				{path: "bin/thisfiledoesntexit", success: false},
+				{path: "/bin/thisfiledoesntexit", success: false},
+
+				{path: "unmasked", success: true},
+				{path: filepath.Join(firstPath, "unmasked"), success: false},
+				{path: filepath.Join(secondPath, "unmasked"), success: true},
+
+				{path: "masked1", success: true},
+				{path: filepath.Join(firstPath, "masked1"), success: false},
+				{path: filepath.Join(secondPath, "masked1"), success: true},
+
+				{path: "masked2", success: true},
+				{path: filepath.Join(firstPath, "masked2"), success: false},
+				{path: filepath.Join(secondPath, "masked2"), success: true},
+			} {
+				t.Run(fmt.Sprintf("path=%s,success=%t", test.path, test.success), func(t *testing.T) {
+					spec := testutil.NewSpecWithArgs(test.path)
+					spec.Process.Env = []string{
+						fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")),
+					}
 
-			os.RemoveAll(rootDir)
-			os.RemoveAll(bundleDir)
+					_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+					if err != nil {
+						t.Fatalf("exec: error setting up container: %v", err)
+					}
+					defer cleanup()
 
-			if test.success {
-				if err != nil {
-					t.Errorf("exec: %s, error running container: %v", test.path, err)
-				}
-				if ws.ExitStatus() != 0 {
-					t.Errorf("exec: %s, got exit status %v want %v", test.path, ws.ExitStatus(), 0)
-				}
-			} else {
-				if err == nil {
-					t.Errorf("exec: %s, got: no error, want: error", test.path)
-				}
+					args := Args{
+						ID:        testutil.RandomContainerID(),
+						Spec:      spec,
+						BundleDir: bundleDir,
+						Attached:  true,
+					}
+					ws, err := Run(conf, args)
+
+					if test.success {
+						if err != nil {
+							t.Errorf("exec: error running container: %v", err)
+						}
+						if ws.ExitStatus() != 0 {
+							t.Errorf("exec: got exit status %v want %v", ws.ExitStatus(), 0)
+						}
+					} else {
+						if err == nil {
+							t.Errorf("exec: got: no error, want: error")
+						}
+					}
+				})
 			}
-		}
+		})
 	}
 }
 
 // Test the we can retrieve the application exit status from the container.
 func TestAppExitStatus(t *testing.T) {
+	doAppExitStatus(t, false)
+}
+
+// This is TestAppExitStatus for VFSv2.
+func TestAppExitStatusVFS2(t *testing.T) {
+	doAppExitStatus(t, true)
+}
+
+func doAppExitStatus(t *testing.T, vfs2 bool) {
 	// First container will succeed.
 	succSpec := testutil.NewSpecWithArgs("true")
-	conf := testutil.TestConfig()
-	rootDir, bundleDir, err := testutil.SetupContainer(succSpec, conf)
+	conf := testutil.TestConfig(t)
+	conf.VFS2 = vfs2
+	_, bundleDir, cleanup, err := testutil.SetupContainer(succSpec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      succSpec,
 		BundleDir: bundleDir,
 		Attached:  true,
@@ -565,15 +585,14 @@ func TestAppExitStatus(t *testing.T) {
 	wantStatus := 123
 	errSpec := testutil.NewSpecWithArgs("bash", "-c", fmt.Sprintf("exit %d", wantStatus))
 
-	rootDir2, bundleDir2, err := testutil.SetupContainer(errSpec, conf)
+	_, bundleDir2, cleanup2, err := testutil.SetupContainer(errSpec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir2)
-	defer os.RemoveAll(bundleDir2)
+	defer cleanup2()
 
 	args2 := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      errSpec,
 		BundleDir: bundleDir2,
 		Attached:  true,
@@ -589,166 +608,163 @@ func TestAppExitStatus(t *testing.T) {
 
 // TestExec verifies that a container can exec a new program.
 func TestExec(t *testing.T) {
-	for _, conf := range configs(overlay) {
-		t.Logf("Running test with conf: %+v", conf)
+	for name, conf := range configsWithVFS2(t, overlay) {
+		t.Run(name, func(t *testing.T) {
+			const uid = 343
+			spec := testutil.NewSpecWithArgs("sleep", "100")
 
-		const uid = 343
-		spec := testutil.NewSpecWithArgs("sleep", "100")
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+			// Create and start the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			cont, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer cont.Destroy()
+			if err := cont.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		// Create and start the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		cont, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer cont.Destroy()
-		if err := cont.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// expectedPL lists the expected process state of the container.
+			expectedPL := []*control.Process{
+				{
+					UID:     0,
+					PID:     1,
+					PPID:    0,
+					C:       0,
+					Cmd:     "sleep",
+					Threads: []kernel.ThreadID{1},
+				},
+				{
+					UID:     uid,
+					PID:     2,
+					PPID:    0,
+					C:       0,
+					Cmd:     "sleep",
+					Threads: []kernel.ThreadID{2},
+				},
+			}
 
-		// expectedPL lists the expected process state of the container.
-		expectedPL := []*control.Process{
-			{
-				UID:     0,
-				PID:     1,
-				PPID:    0,
-				C:       0,
-				Cmd:     "sleep",
-				Threads: []kernel.ThreadID{1},
-			},
-			{
-				UID:     uid,
-				PID:     2,
-				PPID:    0,
-				C:       0,
-				Cmd:     "sleep",
-				Threads: []kernel.ThreadID{2},
-			},
-		}
+			// Verify that "sleep 100" is running.
+			if err := waitForProcessList(cont, expectedPL[:1]); err != nil {
+				t.Error(err)
+			}
 
-		// Verify that "sleep 100" is running.
-		if err := waitForProcessList(cont, expectedPL[:1]); err != nil {
-			t.Error(err)
-		}
+			execArgs := &control.ExecArgs{
+				Filename:         "/bin/sleep",
+				Argv:             []string{"/bin/sleep", "5"},
+				WorkingDirectory: "/",
+				KUID:             uid,
+			}
 
-		execArgs := &control.ExecArgs{
-			Filename:         "/bin/sleep",
-			Argv:             []string{"/bin/sleep", "5"},
-			WorkingDirectory: "/",
-			KUID:             uid,
-		}
+			// Verify that "sleep 100" and "sleep 5" are running
+			// after exec.  First, start running exec (whick
+			// blocks).
+			ch := make(chan error)
+			go func() {
+				exitStatus, err := cont.executeSync(execArgs)
+				if err != nil {
+					ch <- err
+				} else if exitStatus != 0 {
+					ch <- fmt.Errorf("failed with exit status: %v", exitStatus)
+				} else {
+					ch <- nil
+				}
+			}()
 
-		// Verify that "sleep 100" and "sleep 5" are running after exec.
-		// First, start running exec (whick blocks).
-		status := make(chan error, 1)
-		go func() {
-			exitStatus, err := cont.executeSync(execArgs)
-			if err != nil {
-				log.Debugf("error executing: %v", err)
-				status <- err
-			} else if exitStatus != 0 {
-				log.Debugf("bad status: %d", exitStatus)
-				status <- fmt.Errorf("failed with exit status: %v", exitStatus)
-			} else {
-				status <- nil
+			if err := waitForProcessList(cont, expectedPL); err != nil {
+				t.Fatalf("error waiting for processes: %v", err)
 			}
-		}()
 
-		if err := waitForProcessList(cont, expectedPL); err != nil {
-			t.Fatal(err)
-		}
-
-		// Ensure that exec finished without error.
-		select {
-		case <-time.After(10 * time.Second):
-			t.Fatalf("container timed out waiting for exec to finish.")
-		case st := <-status:
-			if st != nil {
-				t.Errorf("container failed to exec %v: %v", args, err)
+			// Ensure that exec finished without error.
+			select {
+			case <-time.After(10 * time.Second):
+				t.Fatalf("container timed out waiting for exec to finish.")
+			case err := <-ch:
+				if err != nil {
+					t.Errorf("container failed to exec %v: %v", args, err)
+				}
 			}
-		}
+		})
 	}
 }
 
 // TestKillPid verifies that we can signal individual exec'd processes.
 func TestKillPid(t *testing.T) {
-	for _, conf := range configs(overlay) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		app, err := testutil.FindFile("runsc/container/test_app/test_app")
-		if err != nil {
-			t.Fatal("error finding test_app:", err)
-		}
+	for name, conf := range configsWithVFS2(t, overlay) {
+		t.Run(name, func(t *testing.T) {
+			app, err := testutil.FindFile("test/cmd/test_app/test_app")
+			if err != nil {
+				t.Fatal("error finding test_app:", err)
+			}
 
-		const nProcs = 4
-		spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true")
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+			const nProcs = 4
+			spec := testutil.NewSpecWithArgs(app, "task-tree", "--depth", strconv.Itoa(nProcs-1), "--width=1", "--pause=true")
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		// Create and start the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		cont, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer cont.Destroy()
-		if err := cont.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// Create and start the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			cont, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer cont.Destroy()
+			if err := cont.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		// Verify that all processes are running.
-		if err := waitForProcessCount(cont, nProcs); err != nil {
-			t.Fatalf("timed out waiting for processes to start: %v", err)
-		}
+			// Verify that all processes are running.
+			if err := waitForProcessCount(cont, nProcs); err != nil {
+				t.Fatalf("timed out waiting for processes to start: %v", err)
+			}
 
-		// Kill the child process with the largest PID.
-		procs, err := cont.Processes()
-		if err != nil {
-			t.Fatalf("failed to get process list: %v", err)
-		}
-		var pid int32
-		for _, p := range procs {
-			if pid < int32(p.PID) {
-				pid = int32(p.PID)
+			// Kill the child process with the largest PID.
+			procs, err := cont.Processes()
+			if err != nil {
+				t.Fatalf("failed to get process list: %v", err)
+			}
+			var pid int32
+			for _, p := range procs {
+				if pid < int32(p.PID) {
+					pid = int32(p.PID)
+				}
+			}
+			if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil {
+				t.Fatalf("failed to signal process %d: %v", pid, err)
 			}
-		}
-		if err := cont.SignalProcess(syscall.SIGKILL, pid); err != nil {
-			t.Fatalf("failed to signal process %d: %v", pid, err)
-		}
 
-		// Verify that one process is gone.
-		if err := waitForProcessCount(cont, nProcs-1); err != nil {
-			t.Fatal(err)
-		}
+			// Verify that one process is gone.
+			if err := waitForProcessCount(cont, nProcs-1); err != nil {
+				t.Fatalf("error waiting for processes: %v", err)
+			}
 
-		procs, err = cont.Processes()
-		if err != nil {
-			t.Fatalf("failed to get process list: %v", err)
-		}
-		for _, p := range procs {
-			if pid == int32(p.PID) {
-				t.Fatalf("pid %d is still alive, which should be killed", pid)
+			procs, err = cont.Processes()
+			if err != nil {
+				t.Fatalf("failed to get process list: %v", err)
 			}
-		}
+			for _, p := range procs {
+				if pid == int32(p.PID) {
+					t.Fatalf("pid %d is still alive, which should be killed", pid)
+				}
+			}
+		})
 	}
 }
 
@@ -759,160 +775,160 @@ func TestKillPid(t *testing.T) {
 // be the next consecutive number after the last number from the checkpointed container.
 func TestCheckpointRestore(t *testing.T) {
 	// Skip overlay because test requires writing to host file.
-	for _, conf := range configs(noOverlay...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test")
-		if err != nil {
-			t.Fatalf("ioutil.TempDir failed: %v", err)
-		}
-		if err := os.Chmod(dir, 0777); err != nil {
-			t.Fatalf("error chmoding file: %q, %v", dir, err)
-		}
+	for name, conf := range configs(t, noOverlay...) {
+		t.Run(name, func(t *testing.T) {
+			dir, err := ioutil.TempDir(testutil.TmpDir(), "checkpoint-test")
+			if err != nil {
+				t.Fatalf("ioutil.TempDir failed: %v", err)
+			}
+			defer os.RemoveAll(dir)
+			if err := os.Chmod(dir, 0777); err != nil {
+				t.Fatalf("error chmoding file: %q, %v", dir, err)
+			}
 
-		outputPath := filepath.Join(dir, "output")
-		outputFile, err := createWriteableOutputFile(outputPath)
-		if err != nil {
-			t.Fatalf("error creating output file: %v", err)
-		}
-		defer outputFile.Close()
+			outputPath := filepath.Join(dir, "output")
+			outputFile, err := createWriteableOutputFile(outputPath)
+			if err != nil {
+				t.Fatalf("error creating output file: %v", err)
+			}
+			defer outputFile.Close()
 
-		script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath)
-		spec := testutil.NewSpecWithArgs("bash", "-c", script)
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+			script := fmt.Sprintf("for ((i=0; ;i++)); do echo $i >> %q; sleep 1; done", outputPath)
+			spec := testutil.NewSpecWithArgs("bash", "-c", script)
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		// Create and start the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		cont, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer cont.Destroy()
-		if err := cont.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// Create and start the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			cont, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer cont.Destroy()
+			if err := cont.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		// Set the image path, which is where the checkpoint image will be saved.
-		imagePath := filepath.Join(dir, "test-image-file")
+			// Set the image path, which is where the checkpoint image will be saved.
+			imagePath := filepath.Join(dir, "test-image-file")
 
-		// Create the image file and open for writing.
-		file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
-		if err != nil {
-			t.Fatalf("error opening new file at imagePath: %v", err)
-		}
-		defer file.Close()
+			// Create the image file and open for writing.
+			file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
+			if err != nil {
+				t.Fatalf("error opening new file at imagePath: %v", err)
+			}
+			defer file.Close()
 
-		// Wait until application has ran.
-		if err := waitForFileNotEmpty(outputFile); err != nil {
-			t.Fatalf("Failed to wait for output file: %v", err)
-		}
+			// Wait until application has ran.
+			if err := waitForFileNotEmpty(outputFile); err != nil {
+				t.Fatalf("Failed to wait for output file: %v", err)
+			}
 
-		// Checkpoint running container; save state into new file.
-		if err := cont.Checkpoint(file); err != nil {
-			t.Fatalf("error checkpointing container to empty file: %v", err)
-		}
-		defer os.RemoveAll(imagePath)
+			// Checkpoint running container; save state into new file.
+			if err := cont.Checkpoint(file); err != nil {
+				t.Fatalf("error checkpointing container to empty file: %v", err)
+			}
+			defer os.RemoveAll(imagePath)
 
-		lastNum, err := readOutputNum(outputPath, -1)
-		if err != nil {
-			t.Fatalf("error with outputFile: %v", err)
-		}
+			lastNum, err := readOutputNum(outputPath, -1)
+			if err != nil {
+				t.Fatalf("error with outputFile: %v", err)
+			}
 
-		// Delete and recreate file before restoring.
-		if err := os.Remove(outputPath); err != nil {
-			t.Fatalf("error removing file")
-		}
-		outputFile2, err := createWriteableOutputFile(outputPath)
-		if err != nil {
-			t.Fatalf("error creating output file: %v", err)
-		}
-		defer outputFile2.Close()
+			// Delete and recreate file before restoring.
+			if err := os.Remove(outputPath); err != nil {
+				t.Fatalf("error removing file")
+			}
+			outputFile2, err := createWriteableOutputFile(outputPath)
+			if err != nil {
+				t.Fatalf("error creating output file: %v", err)
+			}
+			defer outputFile2.Close()
 
-		// Restore into a new container.
-		args2 := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		cont2, err := New(conf, args2)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer cont2.Destroy()
+			// Restore into a new container.
+			args2 := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			cont2, err := New(conf, args2)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer cont2.Destroy()
 
-		if err := cont2.Restore(spec, conf, imagePath); err != nil {
-			t.Fatalf("error restoring container: %v", err)
-		}
+			if err := cont2.Restore(spec, conf, imagePath); err != nil {
+				t.Fatalf("error restoring container: %v", err)
+			}
 
-		// Wait until application has ran.
-		if err := waitForFileNotEmpty(outputFile2); err != nil {
-			t.Fatalf("Failed to wait for output file: %v", err)
-		}
+			// Wait until application has ran.
+			if err := waitForFileNotEmpty(outputFile2); err != nil {
+				t.Fatalf("Failed to wait for output file: %v", err)
+			}
 
-		firstNum, err := readOutputNum(outputPath, 0)
-		if err != nil {
-			t.Fatalf("error with outputFile: %v", err)
-		}
+			firstNum, err := readOutputNum(outputPath, 0)
+			if err != nil {
+				t.Fatalf("error with outputFile: %v", err)
+			}
 
-		// Check that lastNum is one less than firstNum and that the container picks
-		// up from where it left off.
-		if lastNum+1 != firstNum {
-			t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum)
-		}
-		cont2.Destroy()
+			// Check that lastNum is one less than firstNum and that the container picks
+			// up from where it left off.
+			if lastNum+1 != firstNum {
+				t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum)
+			}
+			cont2.Destroy()
 
-		// Restore into another container!
-		// Delete and recreate file before restoring.
-		if err := os.Remove(outputPath); err != nil {
-			t.Fatalf("error removing file")
-		}
-		outputFile3, err := createWriteableOutputFile(outputPath)
-		if err != nil {
-			t.Fatalf("error creating output file: %v", err)
-		}
-		defer outputFile3.Close()
+			// Restore into another container!
+			// Delete and recreate file before restoring.
+			if err := os.Remove(outputPath); err != nil {
+				t.Fatalf("error removing file")
+			}
+			outputFile3, err := createWriteableOutputFile(outputPath)
+			if err != nil {
+				t.Fatalf("error creating output file: %v", err)
+			}
+			defer outputFile3.Close()
 
-		// Restore into a new container.
-		args3 := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		cont3, err := New(conf, args3)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer cont3.Destroy()
+			// Restore into a new container.
+			args3 := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			cont3, err := New(conf, args3)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer cont3.Destroy()
 
-		if err := cont3.Restore(spec, conf, imagePath); err != nil {
-			t.Fatalf("error restoring container: %v", err)
-		}
+			if err := cont3.Restore(spec, conf, imagePath); err != nil {
+				t.Fatalf("error restoring container: %v", err)
+			}
 
-		// Wait until application has ran.
-		if err := waitForFileNotEmpty(outputFile3); err != nil {
-			t.Fatalf("Failed to wait for output file: %v", err)
-		}
+			// Wait until application has ran.
+			if err := waitForFileNotEmpty(outputFile3); err != nil {
+				t.Fatalf("Failed to wait for output file: %v", err)
+			}
 
-		firstNum2, err := readOutputNum(outputPath, 0)
-		if err != nil {
-			t.Fatalf("error with outputFile: %v", err)
-		}
+			firstNum2, err := readOutputNum(outputPath, 0)
+			if err != nil {
+				t.Fatalf("error with outputFile: %v", err)
+			}
 
-		// Check that lastNum is one less than firstNum and that the container picks
-		// up from where it left off.
-		if lastNum+1 != firstNum2 {
-			t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2)
-		}
-		cont3.Destroy()
+			// Check that lastNum is one less than firstNum and that the container picks
+			// up from where it left off.
+			if lastNum+1 != firstNum2 {
+				t.Errorf("error numbers not in order, previous: %d, next: %d", lastNum, firstNum2)
+			}
+			cont3.Destroy()
+		})
 	}
 }
 
@@ -920,135 +936,134 @@ func TestCheckpointRestore(t *testing.T) {
 // with filesystem Unix Domain Socket use.
 func TestUnixDomainSockets(t *testing.T) {
 	// Skip overlay because test requires writing to host file.
-	for _, conf := range configs(noOverlay...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		// UDS path is limited to 108 chars for compatibility with older systems.
-		// Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is
-		// not exceeded. Assumes '/tmp' exists in the system.
-		dir, err := ioutil.TempDir("/tmp", "uds-test")
-		if err != nil {
-			t.Fatalf("ioutil.TempDir failed: %v", err)
-		}
-		defer os.RemoveAll(dir)
+	for name, conf := range configs(t, noOverlay...) {
+		t.Run(name, func(t *testing.T) {
+			// UDS path is limited to 108 chars for compatibility with older systems.
+			// Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is
+			// not exceeded. Assumes '/tmp' exists in the system.
+			dir, err := ioutil.TempDir("/tmp", "uds-test")
+			if err != nil {
+				t.Fatalf("ioutil.TempDir failed: %v", err)
+			}
+			defer os.RemoveAll(dir)
 
-		outputPath := filepath.Join(dir, "uds_output")
-		outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666)
-		if err != nil {
-			t.Fatalf("error creating output file: %v", err)
-		}
-		defer outputFile.Close()
+			outputPath := filepath.Join(dir, "uds_output")
+			outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666)
+			if err != nil {
+				t.Fatalf("error creating output file: %v", err)
+			}
+			defer outputFile.Close()
 
-		app, err := testutil.FindFile("runsc/container/test_app/test_app")
-		if err != nil {
-			t.Fatal("error finding test_app:", err)
-		}
+			app, err := testutil.FindFile("test/cmd/test_app/test_app")
+			if err != nil {
+				t.Fatal("error finding test_app:", err)
+			}
 
-		socketPath := filepath.Join(dir, "uds_socket")
-		defer os.Remove(socketPath)
+			socketPath := filepath.Join(dir, "uds_socket")
+			defer os.Remove(socketPath)
 
-		spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath)
-		spec.Process.User = specs.User{
-			UID: uint32(os.Getuid()),
-			GID: uint32(os.Getgid()),
-		}
-		spec.Mounts = []specs.Mount{{
-			Type:        "bind",
-			Destination: dir,
-			Source:      dir,
-		}}
+			spec := testutil.NewSpecWithArgs(app, "uds", "--file", outputPath, "--socket", socketPath)
+			spec.Process.User = specs.User{
+				UID: uint32(os.Getuid()),
+				GID: uint32(os.Getgid()),
+			}
+			spec.Mounts = []specs.Mount{{
+				Type:        "bind",
+				Destination: dir,
+				Source:      dir,
+			}}
 
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		// Create and start the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		cont, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer cont.Destroy()
-		if err := cont.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// Create and start the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			cont, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer cont.Destroy()
+			if err := cont.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		// Set the image path, the location where the checkpoint image will be saved.
-		imagePath := filepath.Join(dir, "test-image-file")
+			// Set the image path, the location where the checkpoint image will be saved.
+			imagePath := filepath.Join(dir, "test-image-file")
 
-		// Create the image file and open for writing.
-		file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
-		if err != nil {
-			t.Fatalf("error opening new file at imagePath: %v", err)
-		}
-		defer file.Close()
-		defer os.RemoveAll(imagePath)
+			// Create the image file and open for writing.
+			file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
+			if err != nil {
+				t.Fatalf("error opening new file at imagePath: %v", err)
+			}
+			defer file.Close()
+			defer os.RemoveAll(imagePath)
 
-		// Wait until application has ran.
-		if err := waitForFileNotEmpty(outputFile); err != nil {
-			t.Fatalf("Failed to wait for output file: %v", err)
-		}
+			// Wait until application has ran.
+			if err := waitForFileNotEmpty(outputFile); err != nil {
+				t.Fatalf("Failed to wait for output file: %v", err)
+			}
 
-		// Checkpoint running container; save state into new file.
-		if err := cont.Checkpoint(file); err != nil {
-			t.Fatalf("error checkpointing container to empty file: %v", err)
-		}
+			// Checkpoint running container; save state into new file.
+			if err := cont.Checkpoint(file); err != nil {
+				t.Fatalf("error checkpointing container to empty file: %v", err)
+			}
 
-		// Read last number outputted before checkpoint.
-		lastNum, err := readOutputNum(outputPath, -1)
-		if err != nil {
-			t.Fatalf("error with outputFile: %v", err)
-		}
+			// Read last number outputted before checkpoint.
+			lastNum, err := readOutputNum(outputPath, -1)
+			if err != nil {
+				t.Fatalf("error with outputFile: %v", err)
+			}
 
-		// Delete and recreate file before restoring.
-		if err := os.Remove(outputPath); err != nil {
-			t.Fatalf("error removing file")
-		}
-		outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666)
-		if err != nil {
-			t.Fatalf("error creating output file: %v", err)
-		}
-		defer outputFile2.Close()
+			// Delete and recreate file before restoring.
+			if err := os.Remove(outputPath); err != nil {
+				t.Fatalf("error removing file")
+			}
+			outputFile2, err := os.OpenFile(outputPath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0666)
+			if err != nil {
+				t.Fatalf("error creating output file: %v", err)
+			}
+			defer outputFile2.Close()
 
-		// Restore into a new container.
-		argsRestore := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		contRestore, err := New(conf, argsRestore)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer contRestore.Destroy()
+			// Restore into a new container.
+			argsRestore := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			contRestore, err := New(conf, argsRestore)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer contRestore.Destroy()
 
-		if err := contRestore.Restore(spec, conf, imagePath); err != nil {
-			t.Fatalf("error restoring container: %v", err)
-		}
+			if err := contRestore.Restore(spec, conf, imagePath); err != nil {
+				t.Fatalf("error restoring container: %v", err)
+			}
 
-		// Wait until application has ran.
-		if err := waitForFileNotEmpty(outputFile2); err != nil {
-			t.Fatalf("Failed to wait for output file: %v", err)
-		}
+			// Wait until application has ran.
+			if err := waitForFileNotEmpty(outputFile2); err != nil {
+				t.Fatalf("Failed to wait for output file: %v", err)
+			}
 
-		// Read first number outputted after restore.
-		firstNum, err := readOutputNum(outputPath, 0)
-		if err != nil {
-			t.Fatalf("error with outputFile: %v", err)
-		}
+			// Read first number outputted after restore.
+			firstNum, err := readOutputNum(outputPath, 0)
+			if err != nil {
+				t.Fatalf("error with outputFile: %v", err)
+			}
 
-		// Check that lastNum is one less than firstNum.
-		if lastNum+1 != firstNum {
-			t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum)
-		}
-		contRestore.Destroy()
+			// Check that lastNum is one less than firstNum.
+			if lastNum+1 != firstNum {
+				t.Errorf("error numbers not consecutive, previous: %d, next: %d", lastNum, firstNum)
+			}
+			contRestore.Destroy()
+		})
 	}
 }
 
@@ -1058,10 +1073,8 @@ func TestUnixDomainSockets(t *testing.T) {
 // recreated. Then it resumes the container, verify that the file gets created
 // again.
 func TestPauseResume(t *testing.T) {
-	for _, conf := range configs(noOverlay...) {
-		t.Run(fmt.Sprintf("conf: %+v", conf), func(t *testing.T) {
-			t.Logf("Running test with conf: %+v", conf)
-
+	for name, conf := range configs(t, noOverlay...) {
+		t.Run(name, func(t *testing.T) {
 			tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "lock")
 			if err != nil {
 				t.Fatalf("error creating temp dir: %v", err)
@@ -1072,16 +1085,15 @@ func TestPauseResume(t *testing.T) {
 			script := fmt.Sprintf("while [[ true ]]; do touch %q; sleep 0.1; done", running)
 			spec := testutil.NewSpecWithArgs("/bin/bash", "-c", script)
 
-			rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 			if err != nil {
 				t.Fatalf("error setting up container: %v", err)
 			}
-			defer os.RemoveAll(rootDir)
-			defer os.RemoveAll(bundleDir)
+			defer cleanup()
 
 			// Create and start the container.
 			args := Args{
-				ID:        testutil.UniqueContainerID(),
+				ID:        testutil.RandomContainerID(),
 				Spec:      spec,
 				BundleDir: bundleDir,
 			}
@@ -1139,17 +1151,16 @@ func TestPauseResume(t *testing.T) {
 // occurs given the correct state.
 func TestPauseResumeStatus(t *testing.T) {
 	spec := testutil.NewSpecWithArgs("sleep", "20")
-	conf := testutil.TestConfig()
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	conf := testutil.TestConfig(t)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	// Create and start the container.
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 	}
@@ -1205,359 +1216,365 @@ func TestCapabilities(t *testing.T) {
 	uid := auth.KUID(os.Getuid() + 1)
 	gid := auth.KGID(os.Getgid() + 1)
 
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		spec := testutil.NewSpecWithArgs("sleep", "100")
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+	for name, conf := range configsWithVFS2(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			spec := testutil.NewSpecWithArgs("sleep", "100")
+			rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		// Create and start the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		cont, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer cont.Destroy()
-		if err := cont.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// Create and start the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			cont, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer cont.Destroy()
+			if err := cont.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		// expectedPL lists the expected process state of the container.
-		expectedPL := []*control.Process{
-			{
-				UID:     0,
-				PID:     1,
-				PPID:    0,
-				C:       0,
-				Cmd:     "sleep",
-				Threads: []kernel.ThreadID{1},
-			},
-			{
-				UID:     uid,
-				PID:     2,
-				PPID:    0,
-				C:       0,
-				Cmd:     "exe",
-				Threads: []kernel.ThreadID{2},
-			},
-		}
-		if err := waitForProcessList(cont, expectedPL[:1]); err != nil {
-			t.Fatalf("Failed to wait for sleep to start, err: %v", err)
-		}
+			// expectedPL lists the expected process state of the container.
+			expectedPL := []*control.Process{
+				{
+					UID:     0,
+					PID:     1,
+					PPID:    0,
+					C:       0,
+					Cmd:     "sleep",
+					Threads: []kernel.ThreadID{1},
+				},
+				{
+					UID:     uid,
+					PID:     2,
+					PPID:    0,
+					C:       0,
+					Cmd:     "exe",
+					Threads: []kernel.ThreadID{2},
+				},
+			}
+			if err := waitForProcessList(cont, expectedPL[:1]); err != nil {
+				t.Fatalf("Failed to wait for sleep to start, err: %v", err)
+			}
 
-		// Create an executable that can't be run with the specified UID:GID.
-		// This shouldn't be callable within the container until we add the
-		// CAP_DAC_OVERRIDE capability to skip the access check.
-		exePath := filepath.Join(rootDir, "exe")
-		if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil {
-			t.Fatalf("couldn't create executable: %v", err)
-		}
-		defer os.Remove(exePath)
-
-		// Need to traverse the intermediate directory.
-		os.Chmod(rootDir, 0755)
-
-		execArgs := &control.ExecArgs{
-			Filename:         exePath,
-			Argv:             []string{exePath},
-			WorkingDirectory: "/",
-			KUID:             uid,
-			KGID:             gid,
-			Capabilities:     &auth.TaskCapabilities{},
-		}
+			// Create an executable that can't be run with the specified UID:GID.
+			// This shouldn't be callable within the container until we add the
+			// CAP_DAC_OVERRIDE capability to skip the access check.
+			exePath := filepath.Join(rootDir, "exe")
+			if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil {
+				t.Fatalf("couldn't create executable: %v", err)
+			}
+			defer os.Remove(exePath)
+
+			// Need to traverse the intermediate directory.
+			os.Chmod(rootDir, 0755)
+
+			execArgs := &control.ExecArgs{
+				Filename:         exePath,
+				Argv:             []string{exePath},
+				WorkingDirectory: "/",
+				KUID:             uid,
+				KGID:             gid,
+				Capabilities:     &auth.TaskCapabilities{},
+			}
 
-		// "exe" should fail because we don't have the necessary permissions.
-		if _, err := cont.executeSync(execArgs); err == nil {
-			t.Fatalf("container executed without error, but an error was expected")
-		}
+			// "exe" should fail because we don't have the necessary permissions.
+			if _, err := cont.executeSync(execArgs); err == nil {
+				t.Fatalf("container executed without error, but an error was expected")
+			}
 
-		// Now we run with the capability enabled and should succeed.
-		execArgs.Capabilities = &auth.TaskCapabilities{
-			EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
-		}
-		// "exe" should not fail this time.
-		if _, err := cont.executeSync(execArgs); err != nil {
-			t.Fatalf("container failed to exec %v: %v", args, err)
-		}
+			// Now we run with the capability enabled and should succeed.
+			execArgs.Capabilities = &auth.TaskCapabilities{
+				EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
+			}
+			// "exe" should not fail this time.
+			if _, err := cont.executeSync(execArgs); err != nil {
+				t.Fatalf("container failed to exec %v: %v", args, err)
+			}
+		})
 	}
 }
 
 // TestRunNonRoot checks that sandbox can be configured when running as
 // non-privileged user.
 func TestRunNonRoot(t *testing.T) {
-	for _, conf := range configs(noOverlay...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		spec := testutil.NewSpecWithArgs("/bin/true")
-
-		// Set a random user/group with no access to "blocked" dir.
-		spec.Process.User.UID = 343
-		spec.Process.User.GID = 2401
-		spec.Process.Capabilities = nil
+	for name, conf := range configsWithVFS2(t, noOverlay...) {
+		t.Run(name, func(t *testing.T) {
+			spec := testutil.NewSpecWithArgs("/bin/true")
+
+			// Set a random user/group with no access to "blocked" dir.
+			spec.Process.User.UID = 343
+			spec.Process.User.GID = 2401
+			spec.Process.Capabilities = nil
+
+			// User running inside container can't list '$TMP/blocked' and would fail to
+			// mount it.
+			dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked")
+			if err != nil {
+				t.Fatalf("ioutil.TempDir() failed: %v", err)
+			}
+			if err := os.Chmod(dir, 0700); err != nil {
+				t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
+			}
+			dir = path.Join(dir, "test")
+			if err := os.Mkdir(dir, 0755); err != nil {
+				t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
+			}
 
-		// User running inside container can't list '$TMP/blocked' and would fail to
-		// mount it.
-		dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked")
-		if err != nil {
-			t.Fatalf("ioutil.TempDir() failed: %v", err)
-		}
-		if err := os.Chmod(dir, 0700); err != nil {
-			t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
-		}
-		dir = path.Join(dir, "test")
-		if err := os.Mkdir(dir, 0755); err != nil {
-			t.Fatalf("os.MkDir(%q) failed: %v", dir, err)
-		}
+			src, err := ioutil.TempDir(testutil.TmpDir(), "src")
+			if err != nil {
+				t.Fatalf("ioutil.TempDir() failed: %v", err)
+			}
 
-		src, err := ioutil.TempDir(testutil.TmpDir(), "src")
-		if err != nil {
-			t.Fatalf("ioutil.TempDir() failed: %v", err)
-		}
+			spec.Mounts = append(spec.Mounts, specs.Mount{
+				Destination: dir,
+				Source:      src,
+				Type:        "bind",
+			})
 
-		spec.Mounts = append(spec.Mounts, specs.Mount{
-			Destination: dir,
-			Source:      src,
-			Type:        "bind",
+			if err := run(spec, conf); err != nil {
+				t.Fatalf("error running sandbox: %v", err)
+			}
 		})
-
-		if err := run(spec, conf); err != nil {
-			t.Fatalf("error running sandbox: %v", err)
-		}
 	}
 }
 
 // TestMountNewDir checks that runsc will create destination directory if it
 // doesn't exit.
 func TestMountNewDir(t *testing.T) {
-	for _, conf := range configs(overlay) {
-		t.Logf("Running test with conf: %+v", conf)
+	for name, conf := range configsWithVFS2(t, overlay) {
+		t.Run(name, func(t *testing.T) {
+			root, err := ioutil.TempDir(testutil.TmpDir(), "root")
+			if err != nil {
+				t.Fatal("ioutil.TempDir() failed:", err)
+			}
 
-		root, err := ioutil.TempDir(testutil.TmpDir(), "root")
-		if err != nil {
-			t.Fatal("ioutil.TempDir() failed:", err)
-		}
+			srcDir := path.Join(root, "src", "dir", "anotherdir")
+			if err := os.MkdirAll(srcDir, 0755); err != nil {
+				t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err)
+			}
 
-		srcDir := path.Join(root, "src", "dir", "anotherdir")
-		if err := os.MkdirAll(srcDir, 0755); err != nil {
-			t.Fatalf("os.MkDir(%q) failed: %v", srcDir, err)
-		}
+			mountDir := path.Join(root, "dir", "anotherdir")
 
-		mountDir := path.Join(root, "dir", "anotherdir")
+			spec := testutil.NewSpecWithArgs("/bin/ls", mountDir)
+			spec.Mounts = append(spec.Mounts, specs.Mount{
+				Destination: mountDir,
+				Source:      srcDir,
+				Type:        "bind",
+			})
 
-		spec := testutil.NewSpecWithArgs("/bin/ls", mountDir)
-		spec.Mounts = append(spec.Mounts, specs.Mount{
-			Destination: mountDir,
-			Source:      srcDir,
-			Type:        "bind",
+			if err := run(spec, conf); err != nil {
+				t.Fatalf("error running sandbox: %v", err)
+			}
 		})
-
-		if err := run(spec, conf); err != nil {
-			t.Fatalf("error running sandbox: %v", err)
-		}
 	}
 }
 
 func TestReadonlyRoot(t *testing.T) {
-	for _, conf := range configs(overlay) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		spec := testutil.NewSpecWithArgs("/bin/touch", "/foo")
-		spec.Root.Readonly = true
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+	for name, conf := range configsWithVFS2(t, overlay) {
+		t.Run(name, func(t *testing.T) {
+			spec := testutil.NewSpecWithArgs("/bin/touch", "/foo")
+			spec.Root.Readonly = true
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		// Create, start and wait for the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		c, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer c.Destroy()
-		if err := c.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// Create, start and wait for the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			c, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer c.Destroy()
+			if err := c.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		ws, err := c.Wait()
-		if err != nil {
-			t.Fatalf("error waiting on container: %v", err)
-		}
-		if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
-			t.Fatalf("container failed, waitStatus: %v", ws)
-		}
+			ws, err := c.Wait()
+			if err != nil {
+				t.Fatalf("error waiting on container: %v", err)
+			}
+			if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
+				t.Fatalf("container failed, waitStatus: %v", ws)
+			}
+		})
 	}
 }
 
 func TestUIDMap(t *testing.T) {
-	for _, conf := range configs(noOverlay...) {
-		t.Logf("Running test with conf: %+v", conf)
-		testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount")
-		if err != nil {
-			t.Fatal(err)
-		}
-		defer os.RemoveAll(testDir)
-		testFile := path.Join(testDir, "testfile")
-
-		spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile")
-		uid := os.Getuid()
-		gid := os.Getgid()
-		spec.Linux = &specs.Linux{
-			Namespaces: []specs.LinuxNamespace{
-				{Type: specs.UserNamespace},
-				{Type: specs.PIDNamespace},
-				{Type: specs.MountNamespace},
-			},
-			UIDMappings: []specs.LinuxIDMapping{
-				{
-					ContainerID: 0,
-					HostID:      uint32(uid),
-					Size:        1,
+	for name, conf := range configsWithVFS2(t, noOverlay...) {
+		t.Run(name, func(t *testing.T) {
+			testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount")
+			if err != nil {
+				t.Fatalf("ioutil.TempDir() failed: %v", err)
+			}
+			defer os.RemoveAll(testDir)
+			testFile := path.Join(testDir, "testfile")
+
+			spec := testutil.NewSpecWithArgs("touch", "/tmp/testfile")
+			uid := os.Getuid()
+			gid := os.Getgid()
+			spec.Linux = &specs.Linux{
+				Namespaces: []specs.LinuxNamespace{
+					{Type: specs.UserNamespace},
+					{Type: specs.PIDNamespace},
+					{Type: specs.MountNamespace},
 				},
-			},
-			GIDMappings: []specs.LinuxIDMapping{
-				{
-					ContainerID: 0,
-					HostID:      uint32(gid),
-					Size:        1,
+				UIDMappings: []specs.LinuxIDMapping{
+					{
+						ContainerID: 0,
+						HostID:      uint32(uid),
+						Size:        1,
+					},
 				},
-			},
-		}
+				GIDMappings: []specs.LinuxIDMapping{
+					{
+						ContainerID: 0,
+						HostID:      uint32(gid),
+						Size:        1,
+					},
+				},
+			}
 
-		spec.Mounts = append(spec.Mounts, specs.Mount{
-			Destination: "/tmp",
-			Source:      testDir,
-			Type:        "bind",
-		})
+			spec.Mounts = append(spec.Mounts, specs.Mount{
+				Destination: "/tmp",
+				Source:      testDir,
+				Type:        "bind",
+			})
 
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		// Create, start and wait for the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		c, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer c.Destroy()
-		if err := c.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// Create, start and wait for the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			c, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer c.Destroy()
+			if err := c.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		ws, err := c.Wait()
-		if err != nil {
-			t.Fatalf("error waiting on container: %v", err)
-		}
-		if !ws.Exited() || ws.ExitStatus() != 0 {
-			t.Fatalf("container failed, waitStatus: %v", ws)
-		}
-		st := syscall.Stat_t{}
-		if err := syscall.Stat(testFile, &st); err != nil {
-			t.Fatalf("error stat /testfile: %v", err)
-		}
+			ws, err := c.Wait()
+			if err != nil {
+				t.Fatalf("error waiting on container: %v", err)
+			}
+			if !ws.Exited() || ws.ExitStatus() != 0 {
+				t.Fatalf("container failed, waitStatus: %v", ws)
+			}
+			st := syscall.Stat_t{}
+			if err := syscall.Stat(testFile, &st); err != nil {
+				t.Fatalf("error stat /testfile: %v", err)
+			}
 
-		if st.Uid != uint32(uid) || st.Gid != uint32(gid) {
-			t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid)
-		}
+			if st.Uid != uint32(uid) || st.Gid != uint32(gid) {
+				t.Fatalf("UID: %d (%d) GID: %d (%d)", st.Uid, uid, st.Gid, gid)
+			}
+		})
 	}
 }
 
 func TestReadonlyMount(t *testing.T) {
-	for _, conf := range configs(overlay) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
-		spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
-		if err != nil {
-			t.Fatalf("ioutil.TempDir() failed: %v", err)
-		}
-		spec.Mounts = append(spec.Mounts, specs.Mount{
-			Destination: dir,
-			Source:      dir,
-			Type:        "bind",
-			Options:     []string{"ro"},
-		})
-		spec.Root.Readonly = false
-
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+	for name, conf := range configsWithVFS2(t, overlay) {
+		t.Run(name, func(t *testing.T) {
+			dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
+			spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
+			if err != nil {
+				t.Fatalf("ioutil.TempDir() failed: %v", err)
+			}
+			spec.Mounts = append(spec.Mounts, specs.Mount{
+				Destination: dir,
+				Source:      dir,
+				Type:        "bind",
+				Options:     []string{"ro"},
+			})
+			spec.Root.Readonly = false
+
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		// Create, start and wait for the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		c, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer c.Destroy()
-		if err := c.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// Create, start and wait for the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			c, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer c.Destroy()
+			if err := c.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		ws, err := c.Wait()
-		if err != nil {
-			t.Fatalf("error waiting on container: %v", err)
-		}
-		if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
-			t.Fatalf("container failed, waitStatus: %v", ws)
-		}
+			ws, err := c.Wait()
+			if err != nil {
+				t.Fatalf("error waiting on container: %v", err)
+			}
+			if !ws.Exited() || syscall.Errno(ws.ExitStatus()) != syscall.EPERM {
+				t.Fatalf("container failed, waitStatus: %v", ws)
+			}
+		})
 	}
 }
 
 // TestAbbreviatedIDs checks that runsc supports using abbreviated container
 // IDs in place of full IDs.
 func TestAbbreviatedIDs(t *testing.T) {
-	rootDir, err := testutil.SetupRootDir()
+	doAbbreviatedIDsTest(t, false)
+}
+
+func TestAbbreviatedIDsVFS2(t *testing.T) {
+	doAbbreviatedIDsTest(t, true)
+}
+
+func doAbbreviatedIDsTest(t *testing.T, vfs2 bool) {
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
+	conf.VFS2 = vfs2
 
 	cids := []string{
-		"foo-" + testutil.UniqueContainerID(),
-		"bar-" + testutil.UniqueContainerID(),
-		"baz-" + testutil.UniqueContainerID(),
+		"foo-" + testutil.RandomContainerID(),
+		"bar-" + testutil.RandomContainerID(),
+		"baz-" + testutil.RandomContainerID(),
 	}
 	for _, cid := range cids {
 		spec := testutil.NewSpecWithArgs("sleep", "100")
-		bundleDir, err := testutil.SetupBundleDir(spec)
+		bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
 		if err != nil {
 			t.Fatalf("error setting up container: %v", err)
 		}
-		defer os.RemoveAll(bundleDir)
+		defer cleanup()
 
 		// Create and start the container.
 		args := Args{
@@ -1600,18 +1617,27 @@ func TestAbbreviatedIDs(t *testing.T) {
 }
 
 func TestGoferExits(t *testing.T) {
+	doGoferExitTest(t, false)
+}
+
+func TestGoferExitsVFS2(t *testing.T) {
+	doGoferExitTest(t, true)
+}
+
+func doGoferExitTest(t *testing.T, vfs2 bool) {
 	spec := testutil.NewSpecWithArgs("/bin/sleep", "10000")
-	conf := testutil.TestConfig()
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	conf := testutil.TestConfig(t)
+	conf.VFS2 = vfs2
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	// Create and start the container.
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 	}
@@ -1640,7 +1666,7 @@ func TestGoferExits(t *testing.T) {
 }
 
 func TestRootNotMount(t *testing.T) {
-	appSym, err := testutil.FindFile("runsc/container/test_app/test_app")
+	appSym, err := testutil.FindFile("test/cmd/test_app/test_app")
 	if err != nil {
 		t.Fatal("error finding test_app:", err)
 	}
@@ -1670,27 +1696,26 @@ func TestRootNotMount(t *testing.T) {
 	spec.Root.Readonly = true
 	spec.Mounts = nil
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	if err := run(spec, conf); err != nil {
 		t.Fatalf("error running sandbox: %v", err)
 	}
 }
 
 func TestUserLog(t *testing.T) {
-	app, err := testutil.FindFile("runsc/container/test_app/test_app")
+	app, err := testutil.FindFile("test/cmd/test_app/test_app")
 	if err != nil {
 		t.Fatal("error finding test_app:", err)
 	}
 
 	// sched_rr_get_interval = 148 - not implemented in gvisor.
 	spec := testutil.NewSpecWithArgs(app, "syscall", "--syscall=148")
-	conf := testutil.TestConfig()
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	conf := testutil.TestConfig(t)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	dir, err := ioutil.TempDir(testutil.TmpDir(), "user_log_test")
 	if err != nil {
@@ -1700,7 +1725,7 @@ func TestUserLog(t *testing.T) {
 
 	// Create, start and wait for the container.
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 		UserLog:   userLog,
@@ -1718,78 +1743,85 @@ func TestUserLog(t *testing.T) {
 	if err != nil {
 		t.Fatalf("error opening user log file %q: %v", userLog, err)
 	}
-	if want := "Unsupported syscall: sched_rr_get_interval"; !strings.Contains(string(out), want) {
+	if want := "Unsupported syscall sched_rr_get_interval("; !strings.Contains(string(out), want) {
 		t.Errorf("user log file doesn't contain %q, out: %s", want, string(out))
 	}
 }
 
 func TestWaitOnExitedSandbox(t *testing.T) {
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		// Run a shell that sleeps for 1 second and then exits with a
-		// non-zero code.
-		const wantExit = 17
-		cmd := fmt.Sprintf("sleep 1; exit %d", wantExit)
-		spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd)
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+	for name, conf := range configsWithVFS2(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			// Run a shell that sleeps for 1 second and then exits with a
+			// non-zero code.
+			const wantExit = 17
+			cmd := fmt.Sprintf("sleep 1; exit %d", wantExit)
+			spec := testutil.NewSpecWithArgs("/bin/sh", "-c", cmd)
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		// Create and Start the container.
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		c, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		defer c.Destroy()
-		if err := c.Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			// Create and Start the container.
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			c, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer c.Destroy()
+			if err := c.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		// Wait on the sandbox. This will make an RPC to the sandbox
-		// and get the actual exit status of the application.
-		ws, err := c.Wait()
-		if err != nil {
-			t.Fatalf("error waiting on container: %v", err)
-		}
-		if got := ws.ExitStatus(); got != wantExit {
-			t.Errorf("got exit status %d, want %d", got, wantExit)
-		}
+			// Wait on the sandbox. This will make an RPC to the sandbox
+			// and get the actual exit status of the application.
+			ws, err := c.Wait()
+			if err != nil {
+				t.Fatalf("error waiting on container: %v", err)
+			}
+			if got := ws.ExitStatus(); got != wantExit {
+				t.Errorf("got exit status %d, want %d", got, wantExit)
+			}
 
-		// Now the sandbox has exited, but the zombie sandbox process
-		// still exists. Calling Wait() now will return the sandbox
-		// exit status.
-		ws, err = c.Wait()
-		if err != nil {
-			t.Fatalf("error waiting on container: %v", err)
-		}
-		if got := ws.ExitStatus(); got != wantExit {
-			t.Errorf("got exit status %d, want %d", got, wantExit)
-		}
+			// Now the sandbox has exited, but the zombie sandbox process
+			// still exists. Calling Wait() now will return the sandbox
+			// exit status.
+			ws, err = c.Wait()
+			if err != nil {
+				t.Fatalf("error waiting on container: %v", err)
+			}
+			if got := ws.ExitStatus(); got != wantExit {
+				t.Errorf("got exit status %d, want %d", got, wantExit)
+			}
+		})
 	}
 }
 
 func TestDestroyNotStarted(t *testing.T) {
+	doDestroyNotStartedTest(t, false)
+}
+
+func TestDestroyNotStartedVFS2(t *testing.T) {
+	doDestroyNotStartedTest(t, true)
+}
+
+func doDestroyNotStartedTest(t *testing.T, vfs2 bool) {
 	spec := testutil.NewSpecWithArgs("/bin/sleep", "100")
-	conf := testutil.TestConfig()
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	conf := testutil.TestConfig(t)
+	conf.VFS2 = vfs2
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	// Create the container and check that it can be destroyed.
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 	}
@@ -1804,19 +1836,27 @@ func TestDestroyNotStarted(t *testing.T) {
 
 // TestDestroyStarting attempts to force a race between start and destroy.
 func TestDestroyStarting(t *testing.T) {
+	doDestroyNotStartedTest(t, false)
+}
+
+func TestDestroyStartedVFS2(t *testing.T) {
+	doDestroyNotStartedTest(t, true)
+}
+
+func doDestroyStartingTest(t *testing.T, vfs2 bool) {
 	for i := 0; i < 10; i++ {
 		spec := testutil.NewSpecWithArgs("/bin/sleep", "100")
-		conf := testutil.TestConfig()
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+		conf := testutil.TestConfig(t)
+		conf.VFS2 = vfs2
+		rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 		if err != nil {
 			t.Fatalf("error setting up container: %v", err)
 		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+		defer cleanup()
 
 		// Create the container and check that it can be destroyed.
 		args := Args{
-			ID:        testutil.UniqueContainerID(),
+			ID:        testutil.RandomContainerID(),
 			Spec:      spec,
 			BundleDir: bundleDir,
 		}
@@ -1851,23 +1891,23 @@ func TestDestroyStarting(t *testing.T) {
 }
 
 func TestCreateWorkingDir(t *testing.T) {
-	for _, conf := range configs(overlay) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create")
-		if err != nil {
-			t.Fatalf("ioutil.TempDir() failed: %v", err)
-		}
-		dir := path.Join(tmpDir, "new/working/dir")
+	for name, conf := range configsWithVFS2(t, overlay) {
+		t.Run(name, func(t *testing.T) {
+			tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create")
+			if err != nil {
+				t.Fatalf("ioutil.TempDir() failed: %v", err)
+			}
+			dir := path.Join(tmpDir, "new/working/dir")
 
-		// touch will fail if the directory doesn't exist.
-		spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
-		spec.Process.Cwd = dir
-		spec.Root.Readonly = true
+			// touch will fail if the directory doesn't exist.
+			spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
+			spec.Process.Cwd = dir
+			spec.Root.Readonly = true
 
-		if err := run(spec, conf); err != nil {
-			t.Fatalf("Error running container: %v", err)
-		}
+			if err := run(spec, conf); err != nil {
+				t.Fatalf("Error running container: %v", err)
+			}
+		})
 	}
 }
 
@@ -1924,16 +1964,15 @@ func TestMountPropagation(t *testing.T) {
 		},
 	}
 
-	conf := testutil.TestConfig()
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	conf := testutil.TestConfig(t)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 	}
@@ -1975,87 +2014,87 @@ func TestMountPropagation(t *testing.T) {
 }
 
 func TestMountSymlink(t *testing.T) {
-	for _, conf := range configs(overlay) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
-		if err != nil {
-			t.Fatalf("ioutil.TempDir() failed: %v", err)
-		}
+	for name, conf := range configsWithVFS2(t, overlay) {
+		t.Run(name, func(t *testing.T) {
+			dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
+			if err != nil {
+				t.Fatalf("ioutil.TempDir() failed: %v", err)
+			}
+			defer os.RemoveAll(dir)
 
-		source := path.Join(dir, "source")
-		target := path.Join(dir, "target")
-		for _, path := range []string{source, target} {
-			if err := os.MkdirAll(path, 0777); err != nil {
-				t.Fatalf("os.MkdirAll(): %v", err)
+			source := path.Join(dir, "source")
+			target := path.Join(dir, "target")
+			for _, path := range []string{source, target} {
+				if err := os.MkdirAll(path, 0777); err != nil {
+					t.Fatalf("os.MkdirAll(): %v", err)
+				}
 			}
-		}
-		f, err := os.Create(path.Join(source, "file"))
-		if err != nil {
-			t.Fatalf("os.Create(): %v", err)
-		}
-		f.Close()
+			f, err := os.Create(path.Join(source, "file"))
+			if err != nil {
+				t.Fatalf("os.Create(): %v", err)
+			}
+			f.Close()
 
-		link := path.Join(dir, "link")
-		if err := os.Symlink(target, link); err != nil {
-			t.Fatalf("os.Symlink(%q, %q): %v", target, link, err)
-		}
+			link := path.Join(dir, "link")
+			if err := os.Symlink(target, link); err != nil {
+				t.Fatalf("os.Symlink(%q, %q): %v", target, link, err)
+			}
 
-		spec := testutil.NewSpecWithArgs("/bin/sleep", "1000")
+			spec := testutil.NewSpecWithArgs("/bin/sleep", "1000")
 
-		// Mount to a symlink to ensure the mount code will follow it and mount
-		// at the symlink target.
-		spec.Mounts = append(spec.Mounts, specs.Mount{
-			Type:        "bind",
-			Destination: link,
-			Source:      source,
-		})
+			// Mount to a symlink to ensure the mount code will follow it and mount
+			// at the symlink target.
+			spec.Mounts = append(spec.Mounts, specs.Mount{
+				Type:        "bind",
+				Destination: link,
+				Source:      source,
+			})
 
-		rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-		if err != nil {
-			t.Fatalf("error setting up container: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		defer os.RemoveAll(bundleDir)
+			_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
 
-		args := Args{
-			ID:        testutil.UniqueContainerID(),
-			Spec:      spec,
-			BundleDir: bundleDir,
-		}
-		cont, err := New(conf, args)
-		if err != nil {
-			t.Fatalf("creating container: %v", err)
-		}
-		defer cont.Destroy()
+			args := Args{
+				ID:        testutil.RandomContainerID(),
+				Spec:      spec,
+				BundleDir: bundleDir,
+			}
+			cont, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("creating container: %v", err)
+			}
+			defer cont.Destroy()
 
-		if err := cont.Start(conf); err != nil {
-			t.Fatalf("starting container: %v", err)
-		}
+			if err := cont.Start(conf); err != nil {
+				t.Fatalf("starting container: %v", err)
+			}
 
-		// Check that symlink was resolved and mount was created where the symlink
-		// is pointing to.
-		file := path.Join(target, "file")
-		execArgs := &control.ExecArgs{
-			Filename: "/usr/bin/test",
-			Argv:     []string{"test", "-f", file},
-		}
-		if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
-			t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
-		}
+			// Check that symlink was resolved and mount was created where the symlink
+			// is pointing to.
+			file := path.Join(target, "file")
+			execArgs := &control.ExecArgs{
+				Filename: "/usr/bin/test",
+				Argv:     []string{"test", "-f", file},
+			}
+			if ws, err := cont.executeSync(execArgs); err != nil || ws != 0 {
+				t.Fatalf("exec: test -f %q, ws: %v, err: %v", file, ws, err)
+			}
+		})
 	}
 }
 
 // Check that --net-raw disables the CAP_NET_RAW capability.
 func TestNetRaw(t *testing.T) {
 	capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10)
-	app, err := testutil.FindFile("runsc/container/test_app/test_app")
+	app, err := testutil.FindFile("test/cmd/test_app/test_app")
 	if err != nil {
 		t.Fatal("error finding test_app:", err)
 	}
 
 	for _, enableRaw := range []bool{true, false} {
-		conf := testutil.TestConfig()
+		conf := testutil.TestConfig(t)
 		conf.EnableRaw = enableRaw
 
 		test := "--enabled"
@@ -2070,49 +2109,12 @@ func TestNetRaw(t *testing.T) {
 	}
 }
 
-// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works.
-func TestOverlayfsStaleRead(t *testing.T) {
-	conf := testutil.TestConfig()
-	conf.OverlayfsStaleRead = true
-
-	in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in")
-	if err != nil {
-		t.Fatalf("ioutil.TempFile() failed: %v", err)
-	}
-	defer in.Close()
-	if _, err := in.WriteString("stale data"); err != nil {
-		t.Fatalf("in.Write() failed: %v", err)
-	}
-
-	out, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.out")
-	if err != nil {
-		t.Fatalf("ioutil.TempFile() failed: %v", err)
-	}
-	defer out.Close()
-
-	const want = "foobar"
-	cmd := fmt.Sprintf("cat %q && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name())
-	spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd)
-	if err := run(spec, conf); err != nil {
-		t.Fatalf("Error running container: %v", err)
-	}
-
-	gotBytes, err := ioutil.ReadAll(out)
-	if err != nil {
-		t.Fatalf("out.Read() failed: %v", err)
-	}
-	got := strings.TrimSpace(string(gotBytes))
-	if want != got {
-		t.Errorf("Wrong content in out file, got: %q. want: %q", got, want)
-	}
-}
-
 // TestTTYField checks TTY field returned by container.Processes().
 func TestTTYField(t *testing.T) {
 	stop := testutil.StartReaper()
 	defer stop()
 
-	testApp, err := testutil.FindFile("runsc/container/test_app/test_app")
+	testApp, err := testutil.FindFile("test/cmd/test_app/test_app")
 	if err != nil {
 		t.Fatal("error finding test_app:", err)
 	}
@@ -2135,64 +2137,70 @@ func TestTTYField(t *testing.T) {
 	}
 
 	for _, test := range testCases {
-		t.Run(test.name, func(t *testing.T) {
-			conf := testutil.TestConfig()
-
-			// We will run /bin/sleep, possibly with an open TTY.
-			cmd := []string{"/bin/sleep", "10000"}
-			if test.useTTY {
-				// Run inside the "pty-runner".
-				cmd = append([]string{testApp, "pty-runner"}, cmd...)
+		for _, vfs2 := range []bool{false, true} {
+			name := test.name
+			if vfs2 {
+				name += "-vfs2"
 			}
+			t.Run(name, func(t *testing.T) {
+				conf := testutil.TestConfig(t)
+				conf.VFS2 = vfs2
+
+				// We will run /bin/sleep, possibly with an open TTY.
+				cmd := []string{"/bin/sleep", "10000"}
+				if test.useTTY {
+					// Run inside the "pty-runner".
+					cmd = append([]string{testApp, "pty-runner"}, cmd...)
+				}
 
-			spec := testutil.NewSpecWithArgs(cmd...)
-			rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
-			if err != nil {
-				t.Fatalf("error setting up container: %v", err)
-			}
-			defer os.RemoveAll(rootDir)
-			defer os.RemoveAll(bundleDir)
-
-			// Create and start the container.
-			args := Args{
-				ID:        testutil.UniqueContainerID(),
-				Spec:      spec,
-				BundleDir: bundleDir,
-			}
-			c, err := New(conf, args)
-			if err != nil {
-				t.Fatalf("error creating container: %v", err)
-			}
-			defer c.Destroy()
-			if err := c.Start(conf); err != nil {
-				t.Fatalf("error starting container: %v", err)
-			}
+				spec := testutil.NewSpecWithArgs(cmd...)
+				_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+				if err != nil {
+					t.Fatalf("error setting up container: %v", err)
+				}
+				defer cleanup()
 
-			// Wait for sleep to be running, and check the TTY
-			// field.
-			var gotTTYField string
-			cb := func() error {
-				ps, err := c.Processes()
+				// Create and start the container.
+				args := Args{
+					ID:        testutil.RandomContainerID(),
+					Spec:      spec,
+					BundleDir: bundleDir,
+				}
+				c, err := New(conf, args)
 				if err != nil {
-					err = fmt.Errorf("error getting process data from container: %v", err)
-					return &backoff.PermanentError{Err: err}
+					t.Fatalf("error creating container: %v", err)
+				}
+				defer c.Destroy()
+				if err := c.Start(conf); err != nil {
+					t.Fatalf("error starting container: %v", err)
 				}
-				for _, p := range ps {
-					if strings.Contains(p.Cmd, "sleep") {
-						gotTTYField = p.TTY
-						return nil
+
+				// Wait for sleep to be running, and check the TTY
+				// field.
+				var gotTTYField string
+				cb := func() error {
+					ps, err := c.Processes()
+					if err != nil {
+						err = fmt.Errorf("error getting process data from container: %v", err)
+						return &backoff.PermanentError{Err: err}
+					}
+					for _, p := range ps {
+						if strings.Contains(p.Cmd, "sleep") {
+							gotTTYField = p.TTY
+							return nil
+						}
 					}
+					return fmt.Errorf("sleep not running")
+				}
+				if err := testutil.Poll(cb, 30*time.Second); err != nil {
+					t.Fatalf("error waiting for sleep process: %v", err)
 				}
-				return fmt.Errorf("sleep not running")
-			}
-			if err := testutil.Poll(cb, 30*time.Second); err != nil {
-				t.Fatalf("error waiting for sleep process: %v", err)
-			}
 
-			if gotTTYField != test.wantTTYField {
-				t.Errorf("tty field got %q, want %q", gotTTYField, test.wantTTYField)
-			}
-		})
+				if gotTTYField != test.wantTTYField {
+					t.Errorf("tty field got %q, want %q", gotTTYField, test.wantTTYField)
+				}
+			})
+		}
 	}
 }
 
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 2da93ec5b..207206dd2 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -27,18 +27,19 @@ import (
 	"time"
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.dev/gvisor/pkg/cleanup"
 	"gvisor.dev/gvisor/pkg/sentry/control"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/specutils"
-	"gvisor.dev/gvisor/runsc/testutil"
 )
 
 func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
 	var specs []*specs.Spec
 	var ids []string
-	rootID := testutil.UniqueContainerID()
+	rootID := testutil.RandomContainerID()
 
 	for i, cmd := range cmds {
 		spec := testutil.NewSpecWithArgs(cmd...)
@@ -52,7 +53,7 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
 				specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeContainer,
 				specutils.ContainerdSandboxIDAnnotation:     rootID,
 			}
-			ids = append(ids, testutil.UniqueContainerID())
+			ids = append(ids, testutil.RandomContainerID())
 		}
 		specs = append(specs, spec)
 	}
@@ -64,23 +65,16 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
 		panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.")
 	}
 
+	cu := cleanup.Cleanup{}
+	defer cu.Clean()
+
 	var containers []*Container
-	var bundles []string
-	cleanup := func() {
-		for _, c := range containers {
-			c.Destroy()
-		}
-		for _, b := range bundles {
-			os.RemoveAll(b)
-		}
-	}
 	for i, spec := range specs {
-		bundleDir, err := testutil.SetupBundleDir(spec)
+		bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
 		if err != nil {
-			cleanup()
 			return nil, nil, fmt.Errorf("error setting up container: %v", err)
 		}
-		bundles = append(bundles, bundleDir)
+		cu.Add(cleanup)
 
 		args := Args{
 			ID:        ids[i],
@@ -89,17 +83,17 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
 		}
 		cont, err := New(conf, args)
 		if err != nil {
-			cleanup()
 			return nil, nil, fmt.Errorf("error creating container: %v", err)
 		}
+		cu.Add(func() { cont.Destroy() })
 		containers = append(containers, cont)
 
 		if err := cont.Start(conf); err != nil {
-			cleanup()
 			return nil, nil, fmt.Errorf("error starting container: %v", err)
 		}
 	}
-	return containers, cleanup, nil
+
+	return containers, cu.Release(), nil
 }
 
 type execDesc struct {
@@ -135,161 +129,161 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
 // TestMultiContainerSanity checks that it is possible to run 2 dead-simple
 // containers in the same sandbox.
 func TestMultiContainerSanity(t *testing.T) {
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		rootDir, err := testutil.SetupRootDir()
-		if err != nil {
-			t.Fatalf("error creating root dir: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		conf.RootDir = rootDir
+	for name, conf := range configsWithVFS2(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
 
-		// Setup the containers.
-		sleep := []string{"sleep", "100"}
-		specs, ids := createSpecs(sleep, sleep)
-		containers, cleanup, err := startContainers(conf, specs, ids)
-		if err != nil {
-			t.Fatalf("error starting containers: %v", err)
-		}
-		defer cleanup()
+			// Setup the containers.
+			sleep := []string{"sleep", "100"}
+			specs, ids := createSpecs(sleep, sleep)
+			containers, cleanup, err := startContainers(conf, specs, ids)
+			if err != nil {
+				t.Fatalf("error starting containers: %v", err)
+			}
+			defer cleanup()
 
-		// Check via ps that multiple processes are running.
-		expectedPL := []*control.Process{
-			{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
-		}
-		if err := waitForProcessList(containers[0], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
-		expectedPL = []*control.Process{
-			{PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}},
-		}
-		if err := waitForProcessList(containers[1], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
+			// Check via ps that multiple processes are running.
+			expectedPL := []*control.Process{
+				{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
+			}
+			if err := waitForProcessList(containers[0], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
+			expectedPL = []*control.Process{
+				{PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}},
+			}
+			if err := waitForProcessList(containers[1], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
+		})
 	}
 }
 
 // TestMultiPIDNS checks that it is possible to run 2 dead-simple
 // containers in the same sandbox with different pidns.
 func TestMultiPIDNS(t *testing.T) {
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		rootDir, err := testutil.SetupRootDir()
-		if err != nil {
-			t.Fatalf("error creating root dir: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		conf.RootDir = rootDir
-
-		// Setup the containers.
-		sleep := []string{"sleep", "100"}
-		testSpecs, ids := createSpecs(sleep, sleep)
-		testSpecs[1].Linux = &specs.Linux{
-			Namespaces: []specs.LinuxNamespace{
-				{
-					Type: "pid",
+	for name, conf := range configs(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
+
+			// Setup the containers.
+			sleep := []string{"sleep", "100"}
+			testSpecs, ids := createSpecs(sleep, sleep)
+			testSpecs[1].Linux = &specs.Linux{
+				Namespaces: []specs.LinuxNamespace{
+					{
+						Type: "pid",
+					},
 				},
-			},
-		}
+			}
 
-		containers, cleanup, err := startContainers(conf, testSpecs, ids)
-		if err != nil {
-			t.Fatalf("error starting containers: %v", err)
-		}
-		defer cleanup()
+			containers, cleanup, err := startContainers(conf, testSpecs, ids)
+			if err != nil {
+				t.Fatalf("error starting containers: %v", err)
+			}
+			defer cleanup()
 
-		// Check via ps that multiple processes are running.
-		expectedPL := []*control.Process{
-			{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
-		}
-		if err := waitForProcessList(containers[0], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
-		expectedPL = []*control.Process{
-			{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
-		}
-		if err := waitForProcessList(containers[1], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
+			// Check via ps that multiple processes are running.
+			expectedPL := []*control.Process{
+				{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
+			}
+			if err := waitForProcessList(containers[0], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
+			expectedPL = []*control.Process{
+				{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
+			}
+			if err := waitForProcessList(containers[1], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
+		})
 	}
 }
 
 // TestMultiPIDNSPath checks the pidns path.
 func TestMultiPIDNSPath(t *testing.T) {
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		rootDir, err := testutil.SetupRootDir()
-		if err != nil {
-			t.Fatalf("error creating root dir: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		conf.RootDir = rootDir
-
-		// Setup the containers.
-		sleep := []string{"sleep", "100"}
-		testSpecs, ids := createSpecs(sleep, sleep, sleep)
-		testSpecs[0].Linux = &specs.Linux{
-			Namespaces: []specs.LinuxNamespace{
-				{
-					Type: "pid",
-					Path: "/proc/1/ns/pid",
+	for name, conf := range configs(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
+
+			// Setup the containers.
+			sleep := []string{"sleep", "100"}
+			testSpecs, ids := createSpecs(sleep, sleep, sleep)
+			testSpecs[0].Linux = &specs.Linux{
+				Namespaces: []specs.LinuxNamespace{
+					{
+						Type: "pid",
+						Path: "/proc/1/ns/pid",
+					},
 				},
-			},
-		}
-		testSpecs[1].Linux = &specs.Linux{
-			Namespaces: []specs.LinuxNamespace{
-				{
-					Type: "pid",
-					Path: "/proc/1/ns/pid",
+			}
+			testSpecs[1].Linux = &specs.Linux{
+				Namespaces: []specs.LinuxNamespace{
+					{
+						Type: "pid",
+						Path: "/proc/1/ns/pid",
+					},
 				},
-			},
-		}
-		testSpecs[2].Linux = &specs.Linux{
-			Namespaces: []specs.LinuxNamespace{
-				{
-					Type: "pid",
-					Path: "/proc/2/ns/pid",
+			}
+			testSpecs[2].Linux = &specs.Linux{
+				Namespaces: []specs.LinuxNamespace{
+					{
+						Type: "pid",
+						Path: "/proc/2/ns/pid",
+					},
 				},
-			},
-		}
+			}
 
-		containers, cleanup, err := startContainers(conf, testSpecs, ids)
-		if err != nil {
-			t.Fatalf("error starting containers: %v", err)
-		}
-		defer cleanup()
+			containers, cleanup, err := startContainers(conf, testSpecs, ids)
+			if err != nil {
+				t.Fatalf("error starting containers: %v", err)
+			}
+			defer cleanup()
 
-		// Check via ps that multiple processes are running.
-		expectedPL := []*control.Process{
-			{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
-		}
-		if err := waitForProcessList(containers[0], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
-		if err := waitForProcessList(containers[2], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
+			// Check via ps that multiple processes are running.
+			expectedPL := []*control.Process{
+				{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
+			}
+			if err := waitForProcessList(containers[0], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
+			if err := waitForProcessList(containers[2], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
 
-		expectedPL = []*control.Process{
-			{PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}},
-		}
-		if err := waitForProcessList(containers[1], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
+			expectedPL = []*control.Process{
+				{PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}},
+			}
+			if err := waitForProcessList(containers[1], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
+		})
 	}
 }
 
 func TestMultiContainerWait(t *testing.T) {
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	// The first container should run the entire duration of the test.
@@ -361,13 +355,13 @@ func TestMultiContainerWait(t *testing.T) {
 // TestExecWait ensures what we can wait containers and individual processes in the
 // sandbox that have already exited.
 func TestExecWait(t *testing.T) {
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	// The first container should run the entire duration of the test.
@@ -457,13 +451,13 @@ func TestMultiContainerMount(t *testing.T) {
 	})
 
 	// Setup the containers.
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	containers, cleanup, err := startContainers(conf, sps, ids)
@@ -484,176 +478,176 @@ func TestMultiContainerMount(t *testing.T) {
 // TestMultiContainerSignal checks that it is possible to signal individual
 // containers without killing the entire sandbox.
 func TestMultiContainerSignal(t *testing.T) {
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		rootDir, err := testutil.SetupRootDir()
-		if err != nil {
-			t.Fatalf("error creating root dir: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		conf.RootDir = rootDir
+	for name, conf := range configs(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
 
-		// Setup the containers.
-		sleep := []string{"sleep", "100"}
-		specs, ids := createSpecs(sleep, sleep)
-		containers, cleanup, err := startContainers(conf, specs, ids)
-		if err != nil {
-			t.Fatalf("error starting containers: %v", err)
-		}
-		defer cleanup()
+			// Setup the containers.
+			sleep := []string{"sleep", "100"}
+			specs, ids := createSpecs(sleep, sleep)
+			containers, cleanup, err := startContainers(conf, specs, ids)
+			if err != nil {
+				t.Fatalf("error starting containers: %v", err)
+			}
+			defer cleanup()
 
-		// Check via ps that container 1 process is running.
-		expectedPL := []*control.Process{
-			{PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}},
-		}
+			// Check via ps that container 1 process is running.
+			expectedPL := []*control.Process{
+				{PID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{2}},
+			}
 
-		if err := waitForProcessList(containers[1], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
+			if err := waitForProcessList(containers[1], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
 
-		// Kill process 2.
-		if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil {
-			t.Errorf("failed to kill process 2: %v", err)
-		}
+			// Kill process 2.
+			if err := containers[1].SignalContainer(syscall.SIGKILL, false); err != nil {
+				t.Errorf("failed to kill process 2: %v", err)
+			}
 
-		// Make sure process 1 is still running.
-		expectedPL = []*control.Process{
-			{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
-		}
-		if err := waitForProcessList(containers[0], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
+			// Make sure process 1 is still running.
+			expectedPL = []*control.Process{
+				{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
+			}
+			if err := waitForProcessList(containers[0], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
 
-		// goferPid is reset when container is destroyed.
-		goferPid := containers[1].GoferPid
+			// goferPid is reset when container is destroyed.
+			goferPid := containers[1].GoferPid
 
-		// Destroy container and ensure container's gofer process has exited.
-		if err := containers[1].Destroy(); err != nil {
-			t.Errorf("failed to destroy container: %v", err)
-		}
-		_, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) {
-			cpid, err := syscall.Wait4(goferPid, nil, 0, nil)
-			return uintptr(cpid), 0, err
-		})
-		if err != syscall.ECHILD {
-			t.Errorf("error waiting for gofer to exit: %v", err)
-		}
-		// Make sure process 1 is still running.
-		if err := waitForProcessList(containers[0], expectedPL); err != nil {
-			t.Errorf("failed to wait for sleep to start: %v", err)
-		}
+			// Destroy container and ensure container's gofer process has exited.
+			if err := containers[1].Destroy(); err != nil {
+				t.Errorf("failed to destroy container: %v", err)
+			}
+			_, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) {
+				cpid, err := syscall.Wait4(goferPid, nil, 0, nil)
+				return uintptr(cpid), 0, err
+			})
+			if err != syscall.ECHILD {
+				t.Errorf("error waiting for gofer to exit: %v", err)
+			}
+			// Make sure process 1 is still running.
+			if err := waitForProcessList(containers[0], expectedPL); err != nil {
+				t.Errorf("failed to wait for sleep to start: %v", err)
+			}
 
-		// Now that process 2 is gone, ensure we get an error trying to
-		// signal it again.
-		if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil {
-			t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID)
-		}
+			// Now that process 2 is gone, ensure we get an error trying to
+			// signal it again.
+			if err := containers[1].SignalContainer(syscall.SIGKILL, false); err == nil {
+				t.Errorf("container %q shouldn't exist, but we were able to signal it", containers[1].ID)
+			}
 
-		// Kill process 1.
-		if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil {
-			t.Errorf("failed to kill process 1: %v", err)
-		}
+			// Kill process 1.
+			if err := containers[0].SignalContainer(syscall.SIGKILL, false); err != nil {
+				t.Errorf("failed to kill process 1: %v", err)
+			}
 
-		// Ensure that container's gofer and sandbox process are no more.
-		err = blockUntilWaitable(containers[0].GoferPid)
-		if err != nil && err != syscall.ECHILD {
-			t.Errorf("error waiting for gofer to exit: %v", err)
-		}
+			// Ensure that container's gofer and sandbox process are no more.
+			err = blockUntilWaitable(containers[0].GoferPid)
+			if err != nil && err != syscall.ECHILD {
+				t.Errorf("error waiting for gofer to exit: %v", err)
+			}
 
-		err = blockUntilWaitable(containers[0].Sandbox.Pid)
-		if err != nil && err != syscall.ECHILD {
-			t.Errorf("error waiting for sandbox to exit: %v", err)
-		}
+			err = blockUntilWaitable(containers[0].Sandbox.Pid)
+			if err != nil && err != syscall.ECHILD {
+				t.Errorf("error waiting for sandbox to exit: %v", err)
+			}
 
-		// The sentry should be gone, so signaling should yield an error.
-		if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil {
-			t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID)
-		}
+			// The sentry should be gone, so signaling should yield an error.
+			if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil {
+				t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID)
+			}
 
-		if err := containers[0].Destroy(); err != nil {
-			t.Errorf("failed to destroy container: %v", err)
-		}
+			if err := containers[0].Destroy(); err != nil {
+				t.Errorf("failed to destroy container: %v", err)
+			}
+		})
 	}
 }
 
 // TestMultiContainerDestroy checks that container are properly cleaned-up when
 // they are destroyed.
 func TestMultiContainerDestroy(t *testing.T) {
-	app, err := testutil.FindFile("runsc/container/test_app/test_app")
+	app, err := testutil.FindFile("test/cmd/test_app/test_app")
 	if err != nil {
 		t.Fatal("error finding test_app:", err)
 	}
 
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		rootDir, err := testutil.SetupRootDir()
-		if err != nil {
-			t.Fatalf("error creating root dir: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		conf.RootDir = rootDir
-
-		// First container will remain intact while the second container is killed.
-		podSpecs, ids := createSpecs(
-			[]string{"sleep", "100"},
-			[]string{app, "fork-bomb"})
-
-		// Run the fork bomb in a PID namespace to prevent processes to be
-		// re-parented to PID=1 in the root container.
-		podSpecs[1].Linux = &specs.Linux{
-			Namespaces: []specs.LinuxNamespace{{Type: "pid"}},
-		}
-		containers, cleanup, err := startContainers(conf, podSpecs, ids)
-		if err != nil {
-			t.Fatalf("error starting containers: %v", err)
-		}
-		defer cleanup()
+	for name, conf := range configs(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
+
+			// First container will remain intact while the second container is killed.
+			podSpecs, ids := createSpecs(
+				[]string{"sleep", "100"},
+				[]string{app, "fork-bomb"})
+
+			// Run the fork bomb in a PID namespace to prevent processes to be
+			// re-parented to PID=1 in the root container.
+			podSpecs[1].Linux = &specs.Linux{
+				Namespaces: []specs.LinuxNamespace{{Type: "pid"}},
+			}
+			containers, cleanup, err := startContainers(conf, podSpecs, ids)
+			if err != nil {
+				t.Fatalf("error starting containers: %v", err)
+			}
+			defer cleanup()
 
-		// Exec more processes to ensure signal all works for exec'd processes too.
-		args := &control.ExecArgs{
-			Filename: app,
-			Argv:     []string{app, "fork-bomb"},
-		}
-		if _, err := containers[1].Execute(args); err != nil {
-			t.Fatalf("error exec'ing: %v", err)
-		}
+			// Exec more processes to ensure signal all works for exec'd processes too.
+			args := &control.ExecArgs{
+				Filename: app,
+				Argv:     []string{app, "fork-bomb"},
+			}
+			if _, err := containers[1].Execute(args); err != nil {
+				t.Fatalf("error exec'ing: %v", err)
+			}
 
-		// Let it brew...
-		time.Sleep(500 * time.Millisecond)
+			// Let it brew...
+			time.Sleep(500 * time.Millisecond)
 
-		if err := containers[1].Destroy(); err != nil {
-			t.Fatalf("error destroying container: %v", err)
-		}
+			if err := containers[1].Destroy(); err != nil {
+				t.Fatalf("error destroying container: %v", err)
+			}
 
-		// Check that destroy killed all processes belonging to the container and
-		// waited for them to exit before returning.
-		pss, err := containers[0].Sandbox.Processes("")
-		if err != nil {
-			t.Fatalf("error getting process data from sandbox: %v", err)
-		}
-		expectedPL := []*control.Process{{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}}
-		if r, err := procListsEqual(pss, expectedPL); !r {
-			t.Errorf("container got process list: %s, want: %s: error: %v",
-				procListToString(pss), procListToString(expectedPL), err)
-		}
+			// Check that destroy killed all processes belonging to the container and
+			// waited for them to exit before returning.
+			pss, err := containers[0].Sandbox.Processes("")
+			if err != nil {
+				t.Fatalf("error getting process data from sandbox: %v", err)
+			}
+			expectedPL := []*control.Process{{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}}}
+			if r, err := procListsEqual(pss, expectedPL); !r {
+				t.Errorf("container got process list: %s, want: %s: error: %v",
+					procListToString(pss), procListToString(expectedPL), err)
+			}
 
-		// Check that cont.Destroy is safe to call multiple times.
-		if err := containers[1].Destroy(); err != nil {
-			t.Errorf("error destroying container: %v", err)
-		}
+			// Check that cont.Destroy is safe to call multiple times.
+			if err := containers[1].Destroy(); err != nil {
+				t.Errorf("error destroying container: %v", err)
+			}
+		})
 	}
 }
 
 func TestMultiContainerProcesses(t *testing.T) {
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	// Note: use curly braces to keep 'sh' process around. Otherwise, shell
@@ -706,13 +700,13 @@ func TestMultiContainerProcesses(t *testing.T) {
 // TestMultiContainerKillAll checks that all process that belong to a container
 // are killed when SIGKILL is sent to *all* processes in that container.
 func TestMultiContainerKillAll(t *testing.T) {
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	for _, tc := range []struct {
@@ -721,7 +715,7 @@ func TestMultiContainerKillAll(t *testing.T) {
 		{killContainer: true},
 		{killContainer: false},
 	} {
-		app, err := testutil.FindFile("runsc/container/test_app/test_app")
+		app, err := testutil.FindFile("test/cmd/test_app/test_app")
 		if err != nil {
 			t.Fatal("error finding test_app:", err)
 		}
@@ -739,11 +733,11 @@ func TestMultiContainerKillAll(t *testing.T) {
 		// Wait until all processes are created.
 		rootProcCount := int(math.Pow(2, 3) - 1)
 		if err := waitForProcessCount(containers[0], rootProcCount); err != nil {
-			t.Fatal(err)
+			t.Fatalf("error waitting for processes: %v", err)
 		}
 		procCount := int(math.Pow(2, 5) - 1)
 		if err := waitForProcessCount(containers[1], procCount); err != nil {
-			t.Fatal(err)
+			t.Fatalf("error waiting for processes: %v", err)
 		}
 
 		// Exec more processes to ensure signal works for exec'd processes too.
@@ -757,7 +751,7 @@ func TestMultiContainerKillAll(t *testing.T) {
 		// Wait for these new processes to start.
 		procCount += int(math.Pow(2, 3) - 1)
 		if err := waitForProcessCount(containers[1], procCount); err != nil {
-			t.Fatal(err)
+			t.Fatalf("error waiting for processes: %v", err)
 		}
 
 		if tc.killContainer {
@@ -790,11 +784,11 @@ func TestMultiContainerKillAll(t *testing.T) {
 
 		// Check that all processes are gone.
 		if err := waitForProcessCount(containers[1], 0); err != nil {
-			t.Fatal(err)
+			t.Fatalf("error waiting for processes: %v", err)
 		}
 		// Check that root container was not affected.
 		if err := waitForProcessCount(containers[0], rootProcCount); err != nil {
-			t.Fatal(err)
+			t.Fatalf("error waiting for processes: %v", err)
 		}
 	}
 }
@@ -804,18 +798,17 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) {
 		[]string{"/bin/sleep", "100"},
 		[]string{"/bin/sleep", "100"})
 
-	conf := testutil.TestConfig()
-	rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf)
+	conf := testutil.TestConfig(t)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(rootBundleDir)
+	defer cleanup()
 
 	rootArgs := Args{
 		ID:        ids[0],
 		Spec:      specs[0],
-		BundleDir: rootBundleDir,
+		BundleDir: bundleDir,
 	}
 	root, err := New(conf, rootArgs)
 	if err != nil {
@@ -827,11 +820,11 @@ func TestMultiContainerDestroyNotStarted(t *testing.T) {
 	}
 
 	// Create and destroy sub-container.
-	bundleDir, err := testutil.SetupBundleDir(specs[1])
+	bundleDir, cleanupSub, err := testutil.SetupBundleDir(specs[1])
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(bundleDir)
+	defer cleanupSub()
 
 	args := Args{
 		ID:        ids[1],
@@ -858,18 +851,17 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
 	}
 	specs, ids := createSpecs(cmds...)
 
-	conf := testutil.TestConfig()
-	rootDir, rootBundleDir, err := testutil.SetupContainer(specs[0], conf)
+	conf := testutil.TestConfig(t)
+	rootDir, bundleDir, cleanup, err := testutil.SetupContainer(specs[0], conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(rootBundleDir)
+	defer cleanup()
 
 	rootArgs := Args{
 		ID:        ids[0],
 		Spec:      specs[0],
-		BundleDir: rootBundleDir,
+		BundleDir: bundleDir,
 	}
 	root, err := New(conf, rootArgs)
 	if err != nil {
@@ -886,16 +878,16 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
 			continue // skip root container
 		}
 
-		bundleDir, err := testutil.SetupBundleDir(specs[i])
+		bundleDir, cleanup, err := testutil.SetupBundleDir(specs[i])
 		if err != nil {
 			t.Fatalf("error setting up container: %v", err)
 		}
-		defer os.RemoveAll(bundleDir)
+		defer cleanup()
 
 		rootArgs := Args{
 			ID:        ids[i],
 			Spec:      specs[i],
-			BundleDir: rootBundleDir,
+			BundleDir: bundleDir,
 		}
 		cont, err := New(conf, rootArgs)
 		if err != nil {
@@ -937,13 +929,13 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) {
 	script := fmt.Sprintf("if [ -f %q ]; then exit 1; else touch %q; fi", filename, filename)
 	cmd := []string{"sh", "-c", script}
 
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	// Make sure overlay is enabled, and none of the root filesystems are
@@ -977,7 +969,7 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) {
 // TestMultiContainerContainerDestroyStress tests that IO operations continue
 // to work after containers have been stopped and gofers killed.
 func TestMultiContainerContainerDestroyStress(t *testing.T) {
-	app, err := testutil.FindFile("runsc/container/test_app/test_app")
+	app, err := testutil.FindFile("test/cmd/test_app/test_app")
 	if err != nil {
 		t.Fatal("error finding test_app:", err)
 	}
@@ -1006,13 +998,12 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) {
 	childrenSpecs := allSpecs[1:]
 	childrenIDs := allIDs[1:]
 
-	conf := testutil.TestConfig()
-	rootDir, bundleDir, err := testutil.SetupContainer(rootSpec, conf)
+	conf := testutil.TestConfig(t)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(rootSpec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	// Start root container.
 	rootArgs := Args{
@@ -1038,11 +1029,11 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) {
 
 		var children []*Container
 		for j, spec := range specs {
-			bundleDir, err := testutil.SetupBundleDir(spec)
+			bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
 			if err != nil {
 				t.Fatalf("error setting up container: %v", err)
 			}
-			defer os.RemoveAll(bundleDir)
+			defer cleanup()
 
 			args := Args{
 				ID:        ids[j],
@@ -1080,308 +1071,308 @@ func TestMultiContainerContainerDestroyStress(t *testing.T) {
 // Test that pod shared mounts are properly mounted in 2 containers and that
 // changes from one container is reflected in the other.
 func TestMultiContainerSharedMount(t *testing.T) {
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		rootDir, err := testutil.SetupRootDir()
-		if err != nil {
-			t.Fatalf("error creating root dir: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		conf.RootDir = rootDir
-
-		// Setup the containers.
-		sleep := []string{"sleep", "100"}
-		podSpec, ids := createSpecs(sleep, sleep)
-		mnt0 := specs.Mount{
-			Destination: "/mydir/test",
-			Source:      "/some/dir",
-			Type:        "tmpfs",
-			Options:     nil,
-		}
-		podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+	for name, conf := range configs(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
+
+			// Setup the containers.
+			sleep := []string{"sleep", "100"}
+			podSpec, ids := createSpecs(sleep, sleep)
+			mnt0 := specs.Mount{
+				Destination: "/mydir/test",
+				Source:      "/some/dir",
+				Type:        "tmpfs",
+				Options:     nil,
+			}
+			podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
 
-		mnt1 := mnt0
-		mnt1.Destination = "/mydir2/test2"
-		podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+			mnt1 := mnt0
+			mnt1.Destination = "/mydir2/test2"
+			podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
 
-		createSharedMount(mnt0, "test-mount", podSpec...)
+			createSharedMount(mnt0, "test-mount", podSpec...)
 
-		containers, cleanup, err := startContainers(conf, podSpec, ids)
-		if err != nil {
-			t.Fatalf("error starting containers: %v", err)
-		}
-		defer cleanup()
+			containers, cleanup, err := startContainers(conf, podSpec, ids)
+			if err != nil {
+				t.Fatalf("error starting containers: %v", err)
+			}
+			defer cleanup()
 
-		file0 := path.Join(mnt0.Destination, "abc")
-		file1 := path.Join(mnt1.Destination, "abc")
-		execs := []execDesc{
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/test", "-d", mnt0.Destination},
-				desc: "directory is mounted in container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/test", "-d", mnt1.Destination},
-				desc: "directory is mounted in container1",
-			},
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/touch", file0},
-				desc: "create file in container0",
-			},
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/test", "-f", file0},
-				desc: "file appears in container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/test", "-f", file1},
-				desc: "file appears in container1",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/bin/rm", file1},
-				desc: "file removed from container1",
-			},
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/test", "!", "-f", file0},
-				desc: "file removed from container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/test", "!", "-f", file1},
-				desc: "file removed from container1",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/bin/mkdir", file1},
-				desc: "create directory in container1",
-			},
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/test", "-d", file0},
-				desc: "dir appears in container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/test", "-d", file1},
-				desc: "dir appears in container1",
-			},
-			{
-				c:    containers[0],
-				cmd:  []string{"/bin/rmdir", file0},
-				desc: "create directory in container0",
-			},
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/test", "!", "-d", file0},
-				desc: "dir removed from container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/test", "!", "-d", file1},
-				desc: "dir removed from container1",
-			},
-		}
-		if err := execMany(execs); err != nil {
-			t.Fatal(err.Error())
-		}
+			file0 := path.Join(mnt0.Destination, "abc")
+			file1 := path.Join(mnt1.Destination, "abc")
+			execs := []execDesc{
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/test", "-d", mnt0.Destination},
+					desc: "directory is mounted in container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/test", "-d", mnt1.Destination},
+					desc: "directory is mounted in container1",
+				},
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/touch", file0},
+					desc: "create file in container0",
+				},
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/test", "-f", file0},
+					desc: "file appears in container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/test", "-f", file1},
+					desc: "file appears in container1",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/bin/rm", file1},
+					desc: "file removed from container1",
+				},
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/test", "!", "-f", file0},
+					desc: "file removed from container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/test", "!", "-f", file1},
+					desc: "file removed from container1",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/bin/mkdir", file1},
+					desc: "create directory in container1",
+				},
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/test", "-d", file0},
+					desc: "dir appears in container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/test", "-d", file1},
+					desc: "dir appears in container1",
+				},
+				{
+					c:    containers[0],
+					cmd:  []string{"/bin/rmdir", file0},
+					desc: "create directory in container0",
+				},
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/test", "!", "-d", file0},
+					desc: "dir removed from container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/test", "!", "-d", file1},
+					desc: "dir removed from container1",
+				},
+			}
+			if err := execMany(execs); err != nil {
+				t.Fatal(err.Error())
+			}
+		})
 	}
 }
 
 // Test that pod mounts are mounted as readonly when requested.
 func TestMultiContainerSharedMountReadonly(t *testing.T) {
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		rootDir, err := testutil.SetupRootDir()
-		if err != nil {
-			t.Fatalf("error creating root dir: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		conf.RootDir = rootDir
-
-		// Setup the containers.
-		sleep := []string{"sleep", "100"}
-		podSpec, ids := createSpecs(sleep, sleep)
-		mnt0 := specs.Mount{
-			Destination: "/mydir/test",
-			Source:      "/some/dir",
-			Type:        "tmpfs",
-			Options:     []string{"ro"},
-		}
-		podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+	for name, conf := range configs(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
+
+			// Setup the containers.
+			sleep := []string{"sleep", "100"}
+			podSpec, ids := createSpecs(sleep, sleep)
+			mnt0 := specs.Mount{
+				Destination: "/mydir/test",
+				Source:      "/some/dir",
+				Type:        "tmpfs",
+				Options:     []string{"ro"},
+			}
+			podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
 
-		mnt1 := mnt0
-		mnt1.Destination = "/mydir2/test2"
-		podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+			mnt1 := mnt0
+			mnt1.Destination = "/mydir2/test2"
+			podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
 
-		createSharedMount(mnt0, "test-mount", podSpec...)
+			createSharedMount(mnt0, "test-mount", podSpec...)
 
-		containers, cleanup, err := startContainers(conf, podSpec, ids)
-		if err != nil {
-			t.Fatalf("error starting containers: %v", err)
-		}
-		defer cleanup()
+			containers, cleanup, err := startContainers(conf, podSpec, ids)
+			if err != nil {
+				t.Fatalf("error starting containers: %v", err)
+			}
+			defer cleanup()
 
-		file0 := path.Join(mnt0.Destination, "abc")
-		file1 := path.Join(mnt1.Destination, "abc")
-		execs := []execDesc{
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/test", "-d", mnt0.Destination},
-				desc: "directory is mounted in container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/test", "-d", mnt1.Destination},
-				desc: "directory is mounted in container1",
-			},
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/touch", file0},
-				want: 1,
-				desc: "fails to write to container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/touch", file1},
-				want: 1,
-				desc: "fails to write to container1",
-			},
-		}
-		if err := execMany(execs); err != nil {
-			t.Fatal(err.Error())
-		}
+			file0 := path.Join(mnt0.Destination, "abc")
+			file1 := path.Join(mnt1.Destination, "abc")
+			execs := []execDesc{
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/test", "-d", mnt0.Destination},
+					desc: "directory is mounted in container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/test", "-d", mnt1.Destination},
+					desc: "directory is mounted in container1",
+				},
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/touch", file0},
+					want: 1,
+					desc: "fails to write to container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/touch", file1},
+					want: 1,
+					desc: "fails to write to container1",
+				},
+			}
+			if err := execMany(execs); err != nil {
+				t.Fatal(err.Error())
+			}
+		})
 	}
 }
 
 // Test that shared pod mounts continue to work after container is restarted.
 func TestMultiContainerSharedMountRestart(t *testing.T) {
-	for _, conf := range configs(all...) {
-		t.Logf("Running test with conf: %+v", conf)
-
-		rootDir, err := testutil.SetupRootDir()
-		if err != nil {
-			t.Fatalf("error creating root dir: %v", err)
-		}
-		defer os.RemoveAll(rootDir)
-		conf.RootDir = rootDir
-
-		// Setup the containers.
-		sleep := []string{"sleep", "100"}
-		podSpec, ids := createSpecs(sleep, sleep)
-		mnt0 := specs.Mount{
-			Destination: "/mydir/test",
-			Source:      "/some/dir",
-			Type:        "tmpfs",
-			Options:     nil,
-		}
-		podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
+	for name, conf := range configs(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
+
+			// Setup the containers.
+			sleep := []string{"sleep", "100"}
+			podSpec, ids := createSpecs(sleep, sleep)
+			mnt0 := specs.Mount{
+				Destination: "/mydir/test",
+				Source:      "/some/dir",
+				Type:        "tmpfs",
+				Options:     nil,
+			}
+			podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
 
-		mnt1 := mnt0
-		mnt1.Destination = "/mydir2/test2"
-		podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
+			mnt1 := mnt0
+			mnt1.Destination = "/mydir2/test2"
+			podSpec[1].Mounts = append(podSpec[1].Mounts, mnt1)
 
-		createSharedMount(mnt0, "test-mount", podSpec...)
+			createSharedMount(mnt0, "test-mount", podSpec...)
 
-		containers, cleanup, err := startContainers(conf, podSpec, ids)
-		if err != nil {
-			t.Fatalf("error starting containers: %v", err)
-		}
-		defer cleanup()
+			containers, cleanup, err := startContainers(conf, podSpec, ids)
+			if err != nil {
+				t.Fatalf("error starting containers: %v", err)
+			}
+			defer cleanup()
 
-		file0 := path.Join(mnt0.Destination, "abc")
-		file1 := path.Join(mnt1.Destination, "abc")
-		execs := []execDesc{
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/touch", file0},
-				desc: "create file in container0",
-			},
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/test", "-f", file0},
-				desc: "file appears in container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/test", "-f", file1},
-				desc: "file appears in container1",
-			},
-		}
-		if err := execMany(execs); err != nil {
-			t.Fatal(err.Error())
-		}
+			file0 := path.Join(mnt0.Destination, "abc")
+			file1 := path.Join(mnt1.Destination, "abc")
+			execs := []execDesc{
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/touch", file0},
+					desc: "create file in container0",
+				},
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/test", "-f", file0},
+					desc: "file appears in container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/test", "-f", file1},
+					desc: "file appears in container1",
+				},
+			}
+			if err := execMany(execs); err != nil {
+				t.Fatal(err.Error())
+			}
 
-		containers[1].Destroy()
+			containers[1].Destroy()
 
-		bundleDir, err := testutil.SetupBundleDir(podSpec[1])
-		if err != nil {
-			t.Fatalf("error restarting container: %v", err)
-		}
-		defer os.RemoveAll(bundleDir)
+			bundleDir, cleanup, err := testutil.SetupBundleDir(podSpec[1])
+			if err != nil {
+				t.Fatalf("error restarting container: %v", err)
+			}
+			defer cleanup()
 
-		args := Args{
-			ID:        ids[1],
-			Spec:      podSpec[1],
-			BundleDir: bundleDir,
-		}
-		containers[1], err = New(conf, args)
-		if err != nil {
-			t.Fatalf("error creating container: %v", err)
-		}
-		if err := containers[1].Start(conf); err != nil {
-			t.Fatalf("error starting container: %v", err)
-		}
+			args := Args{
+				ID:        ids[1],
+				Spec:      podSpec[1],
+				BundleDir: bundleDir,
+			}
+			containers[1], err = New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			if err := containers[1].Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
 
-		execs = []execDesc{
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/test", "-f", file0},
-				desc: "file is still in container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/test", "-f", file1},
-				desc: "file is still in container1",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/bin/rm", file1},
-				desc: "file removed from container1",
-			},
-			{
-				c:    containers[0],
-				cmd:  []string{"/usr/bin/test", "!", "-f", file0},
-				desc: "file removed from container0",
-			},
-			{
-				c:    containers[1],
-				cmd:  []string{"/usr/bin/test", "!", "-f", file1},
-				desc: "file removed from container1",
-			},
-		}
-		if err := execMany(execs); err != nil {
-			t.Fatal(err.Error())
-		}
+			execs = []execDesc{
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/test", "-f", file0},
+					desc: "file is still in container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/test", "-f", file1},
+					desc: "file is still in container1",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/bin/rm", file1},
+					desc: "file removed from container1",
+				},
+				{
+					c:    containers[0],
+					cmd:  []string{"/usr/bin/test", "!", "-f", file0},
+					desc: "file removed from container0",
+				},
+				{
+					c:    containers[1],
+					cmd:  []string{"/usr/bin/test", "!", "-f", file1},
+					desc: "file removed from container1",
+				},
+			}
+			if err := execMany(execs); err != nil {
+				t.Fatal(err.Error())
+			}
+		})
 	}
 }
 
 // Test that unsupported pod mounts options are ignored when matching master and
 // slave mounts.
 func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	// Setup the containers.
@@ -1428,7 +1419,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
 // Test that one container can send an FD to another container, even though
 // they have distinct MountNamespaces.
 func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) {
-	app, err := testutil.FindFile("runsc/container/test_app/test_app")
+	app, err := testutil.FindFile("test/cmd/test_app/test_app")
 	if err != nil {
 		t.Fatal("error finding test_app:", err)
 	}
@@ -1457,13 +1448,13 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) {
 		Type:        "tmpfs",
 	}
 
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	// Create the specs.
@@ -1494,13 +1485,13 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) {
 
 // Test that container is destroyed when Gofer is killed.
 func TestMultiContainerGoferKilled(t *testing.T) {
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	sleep := []string{"sleep", "100"}
@@ -1581,13 +1572,13 @@ func TestMultiContainerLoadSandbox(t *testing.T) {
 	sleep := []string{"sleep", "100"}
 	specs, ids := createSpecs(sleep, sleep, sleep)
 
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	// Create containers for the sandbox.
@@ -1614,7 +1605,7 @@ func TestMultiContainerLoadSandbox(t *testing.T) {
 	}
 
 	// Create a valid but empty container directory.
-	randomCID := testutil.UniqueContainerID()
+	randomCID := testutil.RandomContainerID()
 	dir = filepath.Join(conf.RootDir, randomCID)
 	if err := os.MkdirAll(dir, 0755); err != nil {
 		t.Fatalf("os.MkdirAll(%q)=%v", dir, err)
@@ -1681,13 +1672,13 @@ func TestMultiContainerRunNonRoot(t *testing.T) {
 		Type:        "bind",
 	})
 
-	rootDir, err := testutil.SetupRootDir()
+	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
+	defer cleanup()
 
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
 
 	pod, cleanup, err := startContainers(conf, podSpecs, ids)
diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go
index dc4194134..bac177a88 100644
--- a/runsc/container/shared_volume_test.go
+++ b/runsc/container/shared_volume_test.go
@@ -24,16 +24,15 @@ import (
 
 	"gvisor.dev/gvisor/pkg/sentry/control"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/runsc/boot"
-	"gvisor.dev/gvisor/runsc/testutil"
 )
 
 // TestSharedVolume checks that modifications to a volume mount are propagated
 // into and out of the sandbox.
 func TestSharedVolume(t *testing.T) {
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.FileAccess = boot.FileAccessShared
-	t.Logf("Running test with conf: %+v", conf)
 
 	// Main process just sleeps. We will use "exec" to probe the state of
 	// the filesystem.
@@ -44,16 +43,15 @@ func TestSharedVolume(t *testing.T) {
 		t.Fatalf("TempDir failed: %v", err)
 	}
 
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	// Create and start the container.
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 	}
@@ -190,9 +188,8 @@ func checkFile(c *Container, filename string, want []byte) error {
 // TestSharedVolumeFile tests that changes to file content outside the sandbox
 // is reflected inside.
 func TestSharedVolumeFile(t *testing.T) {
-	conf := testutil.TestConfig()
+	conf := testutil.TestConfig(t)
 	conf.FileAccess = boot.FileAccessShared
-	t.Logf("Running test with conf: %+v", conf)
 
 	// Main process just sleeps. We will use "exec" to probe the state of
 	// the filesystem.
@@ -203,16 +200,15 @@ func TestSharedVolumeFile(t *testing.T) {
 		t.Fatalf("TempDir failed: %v", err)
 	}
 
-	rootDir, bundleDir, err := testutil.SetupContainer(spec, conf)
+	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
-	defer os.RemoveAll(rootDir)
-	defer os.RemoveAll(bundleDir)
+	defer cleanup()
 
 	// Create and start the container.
 	args := Args{
-		ID:        testutil.UniqueContainerID(),
+		ID:        testutil.RandomContainerID(),
 		Spec:      spec,
 		BundleDir: bundleDir,
 	}
diff --git a/runsc/container/test_app/BUILD b/runsc/container/test_app/BUILD
deleted file mode 100644
index 0defbd9fc..000000000
--- a/runsc/container/test_app/BUILD
+++ /dev/null
@@ -1,21 +0,0 @@
-load("//tools:defs.bzl", "go_binary")
-
-package(licenses = ["notice"])
-
-go_binary(
-    name = "test_app",
-    testonly = 1,
-    srcs = [
-        "fds.go",
-        "test_app.go",
-    ],
-    pure = True,
-    visibility = ["//runsc/container:__pkg__"],
-    deps = [
-        "//pkg/unet",
-        "//runsc/flag",
-        "//runsc/testutil",
-        "@com_github_google_subcommands//:go_default_library",
-        "@com_github_kr_pty//:go_default_library",
-    ],
-)
diff --git a/runsc/container/test_app/fds.go b/runsc/container/test_app/fds.go
deleted file mode 100644
index 2a146a2c3..000000000
--- a/runsc/container/test_app/fds.go
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
-	"context"
-	"io/ioutil"
-	"log"
-	"os"
-	"time"
-
-	"github.com/google/subcommands"
-	"gvisor.dev/gvisor/pkg/unet"
-	"gvisor.dev/gvisor/runsc/flag"
-	"gvisor.dev/gvisor/runsc/testutil"
-)
-
-const fileContents = "foobarbaz"
-
-// fdSender will open a file and send the FD over a unix domain socket.
-type fdSender struct {
-	socketPath string
-}
-
-// Name implements subcommands.Command.Name.
-func (*fdSender) Name() string {
-	return "fd_sender"
-}
-
-// Synopsis implements subcommands.Command.Synopsys.
-func (*fdSender) Synopsis() string {
-	return "creates a file and sends the FD over the socket"
-}
-
-// Usage implements subcommands.Command.Usage.
-func (*fdSender) Usage() string {
-	return "fd_sender <flags>"
-}
-
-// SetFlags implements subcommands.Command.SetFlags.
-func (fds *fdSender) SetFlags(f *flag.FlagSet) {
-	f.StringVar(&fds.socketPath, "socket", "", "path to socket")
-}
-
-// Execute implements subcommands.Command.Execute.
-func (fds *fdSender) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	if fds.socketPath == "" {
-		log.Fatalf("socket flag must be set")
-	}
-
-	dir, err := ioutil.TempDir("", "")
-	if err != nil {
-		log.Fatalf("TempDir failed: %v", err)
-	}
-
-	fileToSend, err := ioutil.TempFile(dir, "")
-	if err != nil {
-		log.Fatalf("TempFile failed: %v", err)
-	}
-	defer fileToSend.Close()
-
-	if _, err := fileToSend.WriteString(fileContents); err != nil {
-		log.Fatalf("Write(%q) failed: %v", fileContents, err)
-	}
-
-	// Receiver may not be started yet, so try connecting in a poll loop.
-	var s *unet.Socket
-	if err := testutil.Poll(func() error {
-		var err error
-		s, err = unet.Connect(fds.socketPath, true /* SEQPACKET, so we can send empty message with FD */)
-		return err
-	}, 10*time.Second); err != nil {
-		log.Fatalf("Error connecting to socket %q: %v", fds.socketPath, err)
-	}
-	defer s.Close()
-
-	w := s.Writer(true)
-	w.ControlMessage.PackFDs(int(fileToSend.Fd()))
-	if _, err := w.WriteVec([][]byte{[]byte{'a'}}); err != nil {
-		log.Fatalf("Error sending FD %q over socket %q: %v", fileToSend.Fd(), fds.socketPath, err)
-	}
-
-	log.Print("FD SENDER exiting successfully")
-	return subcommands.ExitSuccess
-}
-
-// fdReceiver receives an FD from a unix domain socket and does things to it.
-type fdReceiver struct {
-	socketPath string
-}
-
-// Name implements subcommands.Command.Name.
-func (*fdReceiver) Name() string {
-	return "fd_receiver"
-}
-
-// Synopsis implements subcommands.Command.Synopsys.
-func (*fdReceiver) Synopsis() string {
-	return "reads an FD from a unix socket, and then does things to it"
-}
-
-// Usage implements subcommands.Command.Usage.
-func (*fdReceiver) Usage() string {
-	return "fd_receiver <flags>"
-}
-
-// SetFlags implements subcommands.Command.SetFlags.
-func (fdr *fdReceiver) SetFlags(f *flag.FlagSet) {
-	f.StringVar(&fdr.socketPath, "socket", "", "path to socket")
-}
-
-// Execute implements subcommands.Command.Execute.
-func (fdr *fdReceiver) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	if fdr.socketPath == "" {
-		log.Fatalf("Flags cannot be empty, given: socket: %q", fdr.socketPath)
-	}
-
-	ss, err := unet.BindAndListen(fdr.socketPath, true /* packet */)
-	if err != nil {
-		log.Fatalf("BindAndListen(%q) failed: %v", fdr.socketPath, err)
-	}
-	defer ss.Close()
-
-	var s *unet.Socket
-	c := make(chan error, 1)
-	go func() {
-		var err error
-		s, err = ss.Accept()
-		c <- err
-	}()
-
-	select {
-	case err := <-c:
-		if err != nil {
-			log.Fatalf("Accept() failed: %v", err)
-		}
-	case <-time.After(10 * time.Second):
-		log.Fatalf("Timeout waiting for accept")
-	}
-
-	r := s.Reader(true)
-	r.EnableFDs(1)
-	b := [][]byte{{'a'}}
-	if n, err := r.ReadVec(b); n != 1 || err != nil {
-		log.Fatalf("ReadVec got n=%d err %v (wanted 0, nil)", n, err)
-	}
-
-	fds, err := r.ExtractFDs()
-	if err != nil {
-		log.Fatalf("ExtractFD() got err %v", err)
-	}
-	if len(fds) != 1 {
-		log.Fatalf("ExtractFD() got %d FDs, wanted 1", len(fds))
-	}
-	fd := fds[0]
-
-	file := os.NewFile(uintptr(fd), "received file")
-	defer file.Close()
-	if _, err := file.Seek(0, os.SEEK_SET); err != nil {
-		log.Fatalf("Seek(0, 0) failed: %v", err)
-	}
-
-	got, err := ioutil.ReadAll(file)
-	if err != nil {
-		log.Fatalf("ReadAll failed: %v", err)
-	}
-	if string(got) != fileContents {
-		log.Fatalf("ReadAll got %q want %q", string(got), fileContents)
-	}
-
-	log.Print("FD RECEIVER exiting successfully")
-	return subcommands.ExitSuccess
-}
diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go
deleted file mode 100644
index 01c47c79f..000000000
--- a/runsc/container/test_app/test_app.go
+++ /dev/null
@@ -1,394 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Binary test_app is like a swiss knife for tests that need to run anything
-// inside the sandbox. New functionality can be added with new commands.
-package main
-
-import (
-	"context"
-	"fmt"
-	"io"
-	"io/ioutil"
-	"log"
-	"net"
-	"os"
-	"os/exec"
-	"regexp"
-	"strconv"
-	sys "syscall"
-	"time"
-
-	"github.com/google/subcommands"
-	"github.com/kr/pty"
-	"gvisor.dev/gvisor/runsc/flag"
-	"gvisor.dev/gvisor/runsc/testutil"
-)
-
-func main() {
-	subcommands.Register(subcommands.HelpCommand(), "")
-	subcommands.Register(subcommands.FlagsCommand(), "")
-	subcommands.Register(new(capability), "")
-	subcommands.Register(new(fdReceiver), "")
-	subcommands.Register(new(fdSender), "")
-	subcommands.Register(new(forkBomb), "")
-	subcommands.Register(new(ptyRunner), "")
-	subcommands.Register(new(reaper), "")
-	subcommands.Register(new(syscall), "")
-	subcommands.Register(new(taskTree), "")
-	subcommands.Register(new(uds), "")
-
-	flag.Parse()
-
-	exitCode := subcommands.Execute(context.Background())
-	os.Exit(int(exitCode))
-}
-
-type uds struct {
-	fileName   string
-	socketPath string
-}
-
-// Name implements subcommands.Command.Name.
-func (*uds) Name() string {
-	return "uds"
-}
-
-// Synopsis implements subcommands.Command.Synopsys.
-func (*uds) Synopsis() string {
-	return "creates unix domain socket client and server. Client sends a contant flow of sequential numbers. Server prints them to --file"
-}
-
-// Usage implements subcommands.Command.Usage.
-func (*uds) Usage() string {
-	return "uds <flags>"
-}
-
-// SetFlags implements subcommands.Command.SetFlags.
-func (c *uds) SetFlags(f *flag.FlagSet) {
-	f.StringVar(&c.fileName, "file", "", "name of output file")
-	f.StringVar(&c.socketPath, "socket", "", "path to socket")
-}
-
-// Execute implements subcommands.Command.Execute.
-func (c *uds) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	if c.fileName == "" || c.socketPath == "" {
-		log.Fatalf("Flags cannot be empty, given: fileName: %q, socketPath: %q", c.fileName, c.socketPath)
-		return subcommands.ExitFailure
-	}
-	outputFile, err := os.OpenFile(c.fileName, os.O_WRONLY|os.O_CREATE, 0666)
-	if err != nil {
-		log.Fatal("error opening output file:", err)
-	}
-
-	defer os.Remove(c.socketPath)
-
-	listener, err := net.Listen("unix", c.socketPath)
-	if err != nil {
-		log.Fatal("error listening on socket %q:", c.socketPath, err)
-	}
-
-	go server(listener, outputFile)
-	for i := 0; ; i++ {
-		conn, err := net.Dial("unix", c.socketPath)
-		if err != nil {
-			log.Fatal("error dialing:", err)
-		}
-		if _, err := conn.Write([]byte(strconv.Itoa(i))); err != nil {
-			log.Fatal("error writing:", err)
-		}
-		conn.Close()
-		time.Sleep(100 * time.Millisecond)
-	}
-}
-
-func server(listener net.Listener, out *os.File) {
-	buf := make([]byte, 16)
-
-	for {
-		c, err := listener.Accept()
-		if err != nil {
-			log.Fatal("error accepting connection:", err)
-		}
-		nr, err := c.Read(buf)
-		if err != nil {
-			log.Fatal("error reading from buf:", err)
-		}
-		data := buf[0:nr]
-		fmt.Fprint(out, string(data)+"\n")
-	}
-}
-
-type taskTree struct {
-	depth int
-	width int
-	pause bool
-}
-
-// Name implements subcommands.Command.
-func (*taskTree) Name() string {
-	return "task-tree"
-}
-
-// Synopsis implements subcommands.Command.
-func (*taskTree) Synopsis() string {
-	return "creates a tree of tasks"
-}
-
-// Usage implements subcommands.Command.
-func (*taskTree) Usage() string {
-	return "task-tree <flags>"
-}
-
-// SetFlags implements subcommands.Command.
-func (c *taskTree) SetFlags(f *flag.FlagSet) {
-	f.IntVar(&c.depth, "depth", 1, "number of levels to create")
-	f.IntVar(&c.width, "width", 1, "number of tasks at each level")
-	f.BoolVar(&c.pause, "pause", false, "whether the tasks should pause perpetually")
-}
-
-// Execute implements subcommands.Command.
-func (c *taskTree) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	stop := testutil.StartReaper()
-	defer stop()
-
-	if c.depth == 0 {
-		log.Printf("Child sleeping, PID: %d\n", os.Getpid())
-		select {}
-	}
-	log.Printf("Parent %d sleeping, PID: %d\n", c.depth, os.Getpid())
-
-	var cmds []*exec.Cmd
-	for i := 0; i < c.width; i++ {
-		cmd := exec.Command(
-			"/proc/self/exe", c.Name(),
-			"--depth", strconv.Itoa(c.depth-1),
-			"--width", strconv.Itoa(c.width),
-			"--pause", strconv.FormatBool(c.pause))
-		cmd.Stdout = os.Stdout
-		cmd.Stderr = os.Stderr
-
-		if err := cmd.Start(); err != nil {
-			log.Fatal("failed to call self:", err)
-		}
-		cmds = append(cmds, cmd)
-	}
-
-	for _, c := range cmds {
-		c.Wait()
-	}
-
-	if c.pause {
-		select {}
-	}
-
-	return subcommands.ExitSuccess
-}
-
-type forkBomb struct {
-	delay time.Duration
-}
-
-// Name implements subcommands.Command.
-func (*forkBomb) Name() string {
-	return "fork-bomb"
-}
-
-// Synopsis implements subcommands.Command.
-func (*forkBomb) Synopsis() string {
-	return "creates child process until the end of times"
-}
-
-// Usage implements subcommands.Command.
-func (*forkBomb) Usage() string {
-	return "fork-bomb <flags>"
-}
-
-// SetFlags implements subcommands.Command.
-func (c *forkBomb) SetFlags(f *flag.FlagSet) {
-	f.DurationVar(&c.delay, "delay", 100*time.Millisecond, "amount of time to delay creation of child")
-}
-
-// Execute implements subcommands.Command.
-func (c *forkBomb) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	time.Sleep(c.delay)
-
-	cmd := exec.Command("/proc/self/exe", c.Name())
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	if err := cmd.Run(); err != nil {
-		log.Fatal("failed to call self:", err)
-	}
-	return subcommands.ExitSuccess
-}
-
-type reaper struct{}
-
-// Name implements subcommands.Command.
-func (*reaper) Name() string {
-	return "reaper"
-}
-
-// Synopsis implements subcommands.Command.
-func (*reaper) Synopsis() string {
-	return "reaps all children in a loop"
-}
-
-// Usage implements subcommands.Command.
-func (*reaper) Usage() string {
-	return "reaper <flags>"
-}
-
-// SetFlags implements subcommands.Command.
-func (*reaper) SetFlags(*flag.FlagSet) {}
-
-// Execute implements subcommands.Command.
-func (c *reaper) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	stop := testutil.StartReaper()
-	defer stop()
-	select {}
-}
-
-type syscall struct {
-	sysno uint64
-}
-
-// Name implements subcommands.Command.
-func (*syscall) Name() string {
-	return "syscall"
-}
-
-// Synopsis implements subcommands.Command.
-func (*syscall) Synopsis() string {
-	return "syscall makes a syscall"
-}
-
-// Usage implements subcommands.Command.
-func (*syscall) Usage() string {
-	return "syscall <flags>"
-}
-
-// SetFlags implements subcommands.Command.
-func (s *syscall) SetFlags(f *flag.FlagSet) {
-	f.Uint64Var(&s.sysno, "syscall", 0, "syscall to call")
-}
-
-// Execute implements subcommands.Command.
-func (s *syscall) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	if _, _, errno := sys.Syscall(uintptr(s.sysno), 0, 0, 0); errno != 0 {
-		fmt.Printf("syscall(%d, 0, 0...) failed: %v\n", s.sysno, errno)
-	} else {
-		fmt.Printf("syscall(%d, 0, 0...) success\n", s.sysno)
-	}
-	return subcommands.ExitSuccess
-}
-
-type capability struct {
-	enabled  uint64
-	disabled uint64
-}
-
-// Name implements subcommands.Command.
-func (*capability) Name() string {
-	return "capability"
-}
-
-// Synopsis implements subcommands.Command.
-func (*capability) Synopsis() string {
-	return "checks if effective capabilities are set/unset"
-}
-
-// Usage implements subcommands.Command.
-func (*capability) Usage() string {
-	return "capability [--enabled=number] [--disabled=number]"
-}
-
-// SetFlags implements subcommands.Command.
-func (c *capability) SetFlags(f *flag.FlagSet) {
-	f.Uint64Var(&c.enabled, "enabled", 0, "")
-	f.Uint64Var(&c.disabled, "disabled", 0, "")
-}
-
-// Execute implements subcommands.Command.
-func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	if c.enabled == 0 && c.disabled == 0 {
-		fmt.Println("One of the flags must be set")
-		return subcommands.ExitUsageError
-	}
-
-	status, err := ioutil.ReadFile("/proc/self/status")
-	if err != nil {
-		fmt.Printf("Error reading %q: %v\n", "proc/self/status", err)
-		return subcommands.ExitFailure
-	}
-	re := regexp.MustCompile("CapEff:\t([0-9a-f]+)\n")
-	matches := re.FindStringSubmatch(string(status))
-	if matches == nil || len(matches) != 2 {
-		fmt.Printf("Effective capabilities not found in\n%s\n", status)
-		return subcommands.ExitFailure
-	}
-	caps, err := strconv.ParseUint(matches[1], 16, 64)
-	if err != nil {
-		fmt.Printf("failed to convert capabilities %q: %v\n", matches[1], err)
-		return subcommands.ExitFailure
-	}
-
-	if c.enabled != 0 && (caps&c.enabled) != c.enabled {
-		fmt.Printf("Missing capabilities, want: %#x: got: %#x\n", c.enabled, caps)
-		return subcommands.ExitFailure
-	}
-	if c.disabled != 0 && (caps&c.disabled) != 0 {
-		fmt.Printf("Extra capabilities found, dont_want: %#x: got: %#x\n", c.disabled, caps)
-		return subcommands.ExitFailure
-	}
-
-	return subcommands.ExitSuccess
-}
-
-type ptyRunner struct{}
-
-// Name implements subcommands.Command.
-func (*ptyRunner) Name() string {
-	return "pty-runner"
-}
-
-// Synopsis implements subcommands.Command.
-func (*ptyRunner) Synopsis() string {
-	return "runs the given command with an open pty terminal"
-}
-
-// Usage implements subcommands.Command.
-func (*ptyRunner) Usage() string {
-	return "pty-runner [command]"
-}
-
-// SetFlags implements subcommands.Command.SetFlags.
-func (*ptyRunner) SetFlags(f *flag.FlagSet) {}
-
-// Execute implements subcommands.Command.
-func (*ptyRunner) Execute(_ context.Context, fs *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
-	c := exec.Command(fs.Args()[0], fs.Args()[1:]...)
-	f, err := pty.Start(c)
-	if err != nil {
-		fmt.Printf("pty.Start failed: %v", err)
-		return subcommands.ExitFailure
-	}
-	defer f.Close()
-
-	// Copy stdout from the command to keep this process alive until the
-	// subprocess exits.
-	io.Copy(os.Stdout, f)
-
-	return subcommands.ExitSuccess
-}
diff --git a/runsc/criutil/BUILD b/runsc/criutil/BUILD
deleted file mode 100644
index 8a571a000..000000000
--- a/runsc/criutil/BUILD
+++ /dev/null
@@ -1,11 +0,0 @@
-load("//tools:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
-    name = "criutil",
-    testonly = 1,
-    srcs = ["criutil.go"],
-    visibility = ["//:sandbox"],
-    deps = ["//runsc/testutil"],
-)
diff --git a/runsc/criutil/criutil.go b/runsc/criutil/criutil.go
deleted file mode 100644
index 773f5a1c4..000000000
--- a/runsc/criutil/criutil.go
+++ /dev/null
@@ -1,277 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package criutil contains utility functions for interacting with the
-// Container Runtime Interface (CRI), principally via the crictl command line
-// tool. This requires critools to be installed on the local system.
-package criutil
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/exec"
-	"strings"
-	"time"
-
-	"gvisor.dev/gvisor/runsc/testutil"
-)
-
-const endpointPrefix = "unix://"
-
-// Crictl contains information required to run the crictl utility.
-type Crictl struct {
-	executable      string
-	timeout         time.Duration
-	imageEndpoint   string
-	runtimeEndpoint string
-}
-
-// NewCrictl returns a Crictl configured with a timeout and an endpoint over
-// which it will talk to containerd.
-func NewCrictl(timeout time.Duration, endpoint string) *Crictl {
-	// Bazel doesn't pass PATH through, assume the location of crictl
-	// unless specified by environment variable.
-	executable := os.Getenv("CRICTL_PATH")
-	if executable == "" {
-		executable = "/usr/local/bin/crictl"
-	}
-	return &Crictl{
-		executable:      executable,
-		timeout:         timeout,
-		imageEndpoint:   endpointPrefix + endpoint,
-		runtimeEndpoint: endpointPrefix + endpoint,
-	}
-}
-
-// Pull pulls an container image. It corresponds to `crictl pull`.
-func (cc *Crictl) Pull(imageName string) error {
-	_, err := cc.run("pull", imageName)
-	return err
-}
-
-// RunPod creates a sandbox. It corresponds to `crictl runp`.
-func (cc *Crictl) RunPod(sbSpecFile string) (string, error) {
-	podID, err := cc.run("runp", sbSpecFile)
-	if err != nil {
-		return "", fmt.Errorf("runp failed: %v", err)
-	}
-	// Strip the trailing newline from crictl output.
-	return strings.TrimSpace(podID), nil
-}
-
-// Create creates a container within a sandbox. It corresponds to `crictl
-// create`.
-func (cc *Crictl) Create(podID, contSpecFile, sbSpecFile string) (string, error) {
-	podID, err := cc.run("create", podID, contSpecFile, sbSpecFile)
-	if err != nil {
-		return "", fmt.Errorf("create failed: %v", err)
-	}
-	// Strip the trailing newline from crictl output.
-	return strings.TrimSpace(podID), nil
-}
-
-// Start starts a container. It corresponds to `crictl start`.
-func (cc *Crictl) Start(contID string) (string, error) {
-	output, err := cc.run("start", contID)
-	if err != nil {
-		return "", fmt.Errorf("start failed: %v", err)
-	}
-	return output, nil
-}
-
-// Stop stops a container. It corresponds to `crictl stop`.
-func (cc *Crictl) Stop(contID string) error {
-	_, err := cc.run("stop", contID)
-	return err
-}
-
-// Exec execs a program inside a container. It corresponds to `crictl exec`.
-func (cc *Crictl) Exec(contID string, args ...string) (string, error) {
-	a := []string{"exec", contID}
-	a = append(a, args...)
-	output, err := cc.run(a...)
-	if err != nil {
-		return "", fmt.Errorf("exec failed: %v", err)
-	}
-	return output, nil
-}
-
-// Rm removes a container. It corresponds to `crictl rm`.
-func (cc *Crictl) Rm(contID string) error {
-	_, err := cc.run("rm", contID)
-	return err
-}
-
-// StopPod stops a pod. It corresponds to `crictl stopp`.
-func (cc *Crictl) StopPod(podID string) error {
-	_, err := cc.run("stopp", podID)
-	return err
-}
-
-// containsConfig is a minimal copy of
-// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto
-// It only contains fields needed for testing.
-type containerConfig struct {
-	Status containerStatus
-}
-
-type containerStatus struct {
-	Network containerNetwork
-}
-
-type containerNetwork struct {
-	IP string
-}
-
-// PodIP returns a pod's IP address.
-func (cc *Crictl) PodIP(podID string) (string, error) {
-	output, err := cc.run("inspectp", podID)
-	if err != nil {
-		return "", err
-	}
-	conf := &containerConfig{}
-	if err := json.Unmarshal([]byte(output), conf); err != nil {
-		return "", fmt.Errorf("failed to unmarshal JSON: %v, %s", err, output)
-	}
-	if conf.Status.Network.IP == "" {
-		return "", fmt.Errorf("no IP found in config: %s", output)
-	}
-	return conf.Status.Network.IP, nil
-}
-
-// RmPod removes a container. It corresponds to `crictl rmp`.
-func (cc *Crictl) RmPod(podID string) error {
-	_, err := cc.run("rmp", podID)
-	return err
-}
-
-// StartContainer pulls the given image ands starts the container in the
-// sandbox with the given podID.
-func (cc *Crictl) StartContainer(podID, image, sbSpec, contSpec string) (string, error) {
-	// Write the specs to files that can be read by crictl.
-	sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec)
-	if err != nil {
-		return "", fmt.Errorf("failed to write sandbox spec: %v", err)
-	}
-	contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec)
-	if err != nil {
-		return "", fmt.Errorf("failed to write container spec: %v", err)
-	}
-
-	return cc.startContainer(podID, image, sbSpecFile, contSpecFile)
-}
-
-func (cc *Crictl) startContainer(podID, image, sbSpecFile, contSpecFile string) (string, error) {
-	if err := cc.Pull(image); err != nil {
-		return "", fmt.Errorf("failed to pull %s: %v", image, err)
-	}
-
-	contID, err := cc.Create(podID, contSpecFile, sbSpecFile)
-	if err != nil {
-		return "", fmt.Errorf("failed to create container in pod %q: %v", podID, err)
-	}
-
-	if _, err := cc.Start(contID); err != nil {
-		return "", fmt.Errorf("failed to start container %q in pod %q: %v", contID, podID, err)
-	}
-
-	return contID, nil
-}
-
-// StopContainer stops and deletes the container with the given container ID.
-func (cc *Crictl) StopContainer(contID string) error {
-	if err := cc.Stop(contID); err != nil {
-		return fmt.Errorf("failed to stop container %q: %v", contID, err)
-	}
-
-	if err := cc.Rm(contID); err != nil {
-		return fmt.Errorf("failed to remove container %q: %v", contID, err)
-	}
-
-	return nil
-}
-
-// StartPodAndContainer pulls an image, then starts a sandbox and container in
-// that sandbox. It returns the pod ID and container ID.
-func (cc *Crictl) StartPodAndContainer(image, sbSpec, contSpec string) (string, string, error) {
-	// Write the specs to files that can be read by crictl.
-	sbSpecFile, err := testutil.WriteTmpFile("sbSpec", sbSpec)
-	if err != nil {
-		return "", "", fmt.Errorf("failed to write sandbox spec: %v", err)
-	}
-	contSpecFile, err := testutil.WriteTmpFile("contSpec", contSpec)
-	if err != nil {
-		return "", "", fmt.Errorf("failed to write container spec: %v", err)
-	}
-
-	podID, err := cc.RunPod(sbSpecFile)
-	if err != nil {
-		return "", "", err
-	}
-
-	contID, err := cc.startContainer(podID, image, sbSpecFile, contSpecFile)
-
-	return podID, contID, err
-}
-
-// StopPodAndContainer stops a container and pod.
-func (cc *Crictl) StopPodAndContainer(podID, contID string) error {
-	if err := cc.StopContainer(contID); err != nil {
-		return fmt.Errorf("failed to stop container %q in pod %q: %v", contID, podID, err)
-	}
-
-	if err := cc.StopPod(podID); err != nil {
-		return fmt.Errorf("failed to stop pod %q: %v", podID, err)
-	}
-
-	if err := cc.RmPod(podID); err != nil {
-		return fmt.Errorf("failed to remove pod %q: %v", podID, err)
-	}
-
-	return nil
-}
-
-// run runs crictl with the given args and returns an error if it takes longer
-// than cc.Timeout to run.
-func (cc *Crictl) run(args ...string) (string, error) {
-	defaultArgs := []string{
-		"--image-endpoint", cc.imageEndpoint,
-		"--runtime-endpoint", cc.runtimeEndpoint,
-	}
-	cmd := exec.Command(cc.executable, append(defaultArgs, args...)...)
-
-	// Run the command with a timeout.
-	done := make(chan string)
-	errCh := make(chan error)
-	go func() {
-		output, err := cmd.CombinedOutput()
-		if err != nil {
-			errCh <- fmt.Errorf("error: \"%v\", output: %s", err, string(output))
-			return
-		}
-		done <- string(output)
-	}()
-	select {
-	case output := <-done:
-		return output, nil
-	case err := <-errCh:
-		return "", err
-	case <-time.After(cc.timeout):
-		if err := testutil.KillCommand(cmd); err != nil {
-			return "", fmt.Errorf("timed out, then couldn't kill process %+v: %v", cmd, err)
-		}
-		return "", fmt.Errorf("timed out: %+v", cmd)
-	}
-}
diff --git a/runsc/dockerutil/BUILD b/runsc/dockerutil/BUILD
deleted file mode 100644
index 8621af901..000000000
--- a/runsc/dockerutil/BUILD
+++ /dev/null
@@ -1,14 +0,0 @@
-load("//tools:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
-    name = "dockerutil",
-    testonly = 1,
-    srcs = ["dockerutil.go"],
-    visibility = ["//:sandbox"],
-    deps = [
-        "//runsc/testutil",
-        "@com_github_kr_pty//:go_default_library",
-    ],
-)
diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go
deleted file mode 100644
index 1ff5e8cc3..000000000
--- a/runsc/dockerutil/dockerutil.go
+++ /dev/null
@@ -1,476 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package dockerutil is a collection of utility functions, primarily for
-// testing.
-package dockerutil
-
-import (
-	"encoding/json"
-	"flag"
-	"fmt"
-	"io/ioutil"
-	"log"
-	"os"
-	"os/exec"
-	"path"
-	"regexp"
-	"strconv"
-	"strings"
-	"syscall"
-	"time"
-
-	"github.com/kr/pty"
-	"gvisor.dev/gvisor/runsc/testutil"
-)
-
-var (
-	runtime = flag.String("runtime", "runsc", "specify which runtime to use")
-	config  = flag.String("config_path", "/etc/docker/daemon.json", "configuration file for reading paths")
-)
-
-// EnsureSupportedDockerVersion checks if correct docker is installed.
-func EnsureSupportedDockerVersion() {
-	cmd := exec.Command("docker", "version")
-	out, err := cmd.CombinedOutput()
-	if err != nil {
-		log.Fatalf("Error running %q: %v", "docker version", err)
-	}
-	re := regexp.MustCompile(`Version:\s+(\d+)\.(\d+)\.\d.*`)
-	matches := re.FindStringSubmatch(string(out))
-	if len(matches) != 3 {
-		log.Fatalf("Invalid docker output: %s", out)
-	}
-	major, _ := strconv.Atoi(matches[1])
-	minor, _ := strconv.Atoi(matches[2])
-	if major < 17 || (major == 17 && minor < 9) {
-		log.Fatalf("Docker version 17.09.0 or greater is required, found: %02d.%02d", major, minor)
-	}
-}
-
-// RuntimePath returns the binary path for the current runtime.
-func RuntimePath() (string, error) {
-	// Read the configuration data; the file must exist.
-	configBytes, err := ioutil.ReadFile(*config)
-	if err != nil {
-		return "", err
-	}
-
-	// Unmarshal the configuration.
-	c := make(map[string]interface{})
-	if err := json.Unmarshal(configBytes, &c); err != nil {
-		return "", err
-	}
-
-	// Decode the expected configuration.
-	r, ok := c["runtimes"]
-	if !ok {
-		return "", fmt.Errorf("no runtimes declared: %v", c)
-	}
-	rs, ok := r.(map[string]interface{})
-	if !ok {
-		// The runtimes are not a map.
-		return "", fmt.Errorf("unexpected format: %v", c)
-	}
-	r, ok = rs[*runtime]
-	if !ok {
-		// The expected runtime is not declared.
-		return "", fmt.Errorf("runtime %q not found: %v", *runtime, c)
-	}
-	rs, ok = r.(map[string]interface{})
-	if !ok {
-		// The runtime is not a map.
-		return "", fmt.Errorf("unexpected format: %v", c)
-	}
-	p, ok := rs["path"].(string)
-	if !ok {
-		// The runtime does not declare a path.
-		return "", fmt.Errorf("unexpected format: %v", c)
-	}
-	return p, nil
-}
-
-// MountMode describes if the mount should be ro or rw.
-type MountMode int
-
-const (
-	// ReadOnly is what the name says.
-	ReadOnly MountMode = iota
-	// ReadWrite is what the name says.
-	ReadWrite
-)
-
-// String returns the mount mode argument for this MountMode.
-func (m MountMode) String() string {
-	switch m {
-	case ReadOnly:
-		return "ro"
-	case ReadWrite:
-		return "rw"
-	}
-	panic(fmt.Sprintf("invalid mode: %d", m))
-}
-
-// MountArg formats the volume argument to mount in the container.
-func MountArg(source, target string, mode MountMode) string {
-	return fmt.Sprintf("-v=%s:%s:%v", source, target, mode)
-}
-
-// LinkArg formats the link argument.
-func LinkArg(source *Docker, target string) string {
-	return fmt.Sprintf("--link=%s:%s", source.Name, target)
-}
-
-// PrepareFiles creates temp directory to copy files there. The sandbox doesn't
-// have access to files in the test dir.
-func PrepareFiles(names ...string) (string, error) {
-	dir, err := ioutil.TempDir("", "image-test")
-	if err != nil {
-		return "", fmt.Errorf("ioutil.TempDir failed: %v", err)
-	}
-	if err := os.Chmod(dir, 0777); err != nil {
-		return "", fmt.Errorf("os.Chmod(%q, 0777) failed: %v", dir, err)
-	}
-	for _, name := range names {
-		src, err := testutil.FindFile(name)
-		if err != nil {
-			return "", fmt.Errorf("testutil.Preparefiles(%q) failed: %v", name, err)
-		}
-		dst := path.Join(dir, path.Base(name))
-		if err := testutil.Copy(src, dst); err != nil {
-			return "", fmt.Errorf("testutil.Copy(%q, %q) failed: %v", src, dst, err)
-		}
-	}
-	return dir, nil
-}
-
-// do executes docker command.
-func do(args ...string) (string, error) {
-	log.Printf("Running: docker %s\n", args)
-	cmd := exec.Command("docker", args...)
-	out, err := cmd.CombinedOutput()
-	if err != nil {
-		return "", fmt.Errorf("error executing docker %s: %v\nout: %s", args, err, out)
-	}
-	return string(out), nil
-}
-
-// doWithPty executes docker command with stdio attached to a pty.
-func doWithPty(args ...string) (*exec.Cmd, *os.File, error) {
-	log.Printf("Running with pty: docker %s\n", args)
-	cmd := exec.Command("docker", args...)
-	ptmx, err := pty.Start(cmd)
-	if err != nil {
-		return nil, nil, fmt.Errorf("error executing docker %s with a pty: %v", args, err)
-	}
-	return cmd, ptmx, nil
-}
-
-// Pull pulls a docker image. This is used in tests to isolate the
-// time to pull the image off the network from the time to actually
-// start the container, to avoid timeouts over slow networks.
-func Pull(image string) error {
-	_, err := do("pull", image)
-	return err
-}
-
-// Docker contains the name and the runtime of a docker container.
-type Docker struct {
-	Runtime string
-	Name    string
-}
-
-// MakeDocker sets up the struct for a Docker container.
-// Names of containers will be unique.
-func MakeDocker(namePrefix string) Docker {
-	return Docker{
-		Name:    testutil.RandomName(namePrefix),
-		Runtime: *runtime,
-	}
-}
-
-// logDockerID logs a container id, which is needed to find container runsc logs.
-func (d *Docker) logDockerID() {
-	id, err := d.ID()
-	if err != nil {
-		log.Printf("%v\n", err)
-	}
-	log.Printf("Name: %s ID: %v\n", d.Name, id)
-}
-
-// Create calls 'docker create' with the arguments provided.
-func (d *Docker) Create(args ...string) error {
-	a := []string{"create", "--runtime", d.Runtime, "--name", d.Name}
-	a = append(a, args...)
-	_, err := do(a...)
-	if err == nil {
-		d.logDockerID()
-	}
-	return err
-}
-
-// Start calls 'docker start'.
-func (d *Docker) Start() error {
-	if _, err := do("start", d.Name); err != nil {
-		return fmt.Errorf("error starting container %q: %v", d.Name, err)
-	}
-	return nil
-}
-
-// Stop calls 'docker stop'.
-func (d *Docker) Stop() error {
-	if _, err := do("stop", d.Name); err != nil {
-		return fmt.Errorf("error stopping container %q: %v", d.Name, err)
-	}
-	return nil
-}
-
-// Run calls 'docker run' with the arguments provided. The container starts
-// running in the background and the call returns immediately.
-func (d *Docker) Run(args ...string) error {
-	a := d.runArgs("-d")
-	a = append(a, args...)
-	_, err := do(a...)
-	if err == nil {
-		d.logDockerID()
-	}
-	return err
-}
-
-// RunWithPty is like Run but with an attached pty.
-func (d *Docker) RunWithPty(args ...string) (*exec.Cmd, *os.File, error) {
-	a := d.runArgs("-it")
-	a = append(a, args...)
-	return doWithPty(a...)
-}
-
-// RunFg calls 'docker run' with the arguments provided in the foreground. It
-// blocks until the container exits and returns the output.
-func (d *Docker) RunFg(args ...string) (string, error) {
-	a := d.runArgs(args...)
-	out, err := do(a...)
-	if err == nil {
-		d.logDockerID()
-	}
-	return string(out), err
-}
-
-func (d *Docker) runArgs(args ...string) []string {
-	// Environment variable RUNSC_TEST_NAME is picked up by the runtime and added
-	// to the log name, so one can easily identify the corresponding logs for
-	// this test.
-	rv := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-e", "RUNSC_TEST_NAME=" + d.Name}
-	return append(rv, args...)
-}
-
-// Logs calls 'docker logs'.
-func (d *Docker) Logs() (string, error) {
-	return do("logs", d.Name)
-}
-
-// Exec calls 'docker exec' with the arguments provided.
-func (d *Docker) Exec(args ...string) (string, error) {
-	return d.ExecWithFlags(nil, args...)
-}
-
-// ExecWithFlags calls 'docker exec <flags> name <args>'.
-func (d *Docker) ExecWithFlags(flags []string, args ...string) (string, error) {
-	a := []string{"exec"}
-	a = append(a, flags...)
-	a = append(a, d.Name)
-	a = append(a, args...)
-	return do(a...)
-}
-
-// ExecAsUser calls 'docker exec' as the given user with the arguments
-// provided.
-func (d *Docker) ExecAsUser(user string, args ...string) (string, error) {
-	a := []string{"exec", "--user", user, d.Name}
-	a = append(a, args...)
-	return do(a...)
-}
-
-// ExecWithTerminal calls 'docker exec -it' with the arguments provided and
-// attaches a pty to stdio.
-func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) {
-	a := []string{"exec", "-it", d.Name}
-	a = append(a, args...)
-	return doWithPty(a...)
-}
-
-// Pause calls 'docker pause'.
-func (d *Docker) Pause() error {
-	if _, err := do("pause", d.Name); err != nil {
-		return fmt.Errorf("error pausing container %q: %v", d.Name, err)
-	}
-	return nil
-}
-
-// Unpause calls 'docker pause'.
-func (d *Docker) Unpause() error {
-	if _, err := do("unpause", d.Name); err != nil {
-		return fmt.Errorf("error unpausing container %q: %v", d.Name, err)
-	}
-	return nil
-}
-
-// Checkpoint calls 'docker checkpoint'.
-func (d *Docker) Checkpoint(name string) error {
-	if _, err := do("checkpoint", "create", d.Name, name); err != nil {
-		return fmt.Errorf("error pausing container %q: %v", d.Name, err)
-	}
-	return nil
-}
-
-// Restore calls 'docker start --checkname [name]'.
-func (d *Docker) Restore(name string) error {
-	if _, err := do("start", "--checkpoint", name, d.Name); err != nil {
-		return fmt.Errorf("error starting container %q: %v", d.Name, err)
-	}
-	return nil
-}
-
-// Remove calls 'docker rm'.
-func (d *Docker) Remove() error {
-	if _, err := do("rm", d.Name); err != nil {
-		return fmt.Errorf("error deleting container %q: %v", d.Name, err)
-	}
-	return nil
-}
-
-// CleanUp kills and deletes the container (best effort).
-func (d *Docker) CleanUp() {
-	d.logDockerID()
-	if _, err := do("kill", d.Name); err != nil {
-		if strings.Contains(err.Error(), "is not running") {
-			// Nothing to kill. Don't log the error in this case.
-		} else {
-			log.Printf("error killing container %q: %v", d.Name, err)
-		}
-	}
-	if err := d.Remove(); err != nil {
-		log.Print(err)
-	}
-}
-
-// FindPort returns the host port that is mapped to 'sandboxPort'. This calls
-// docker to allocate a free port in the host and prevent conflicts.
-func (d *Docker) FindPort(sandboxPort int) (int, error) {
-	format := fmt.Sprintf(`{{ (index (index .NetworkSettings.Ports "%d/tcp") 0).HostPort }}`, sandboxPort)
-	out, err := do("inspect", "-f", format, d.Name)
-	if err != nil {
-		return -1, fmt.Errorf("error retrieving port: %v", err)
-	}
-	port, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n"))
-	if err != nil {
-		return -1, fmt.Errorf("error parsing port %q: %v", out, err)
-	}
-	return port, nil
-}
-
-// FindIP returns the IP address of the container as a string.
-func (d *Docker) FindIP() (string, error) {
-	const format = `{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}`
-	out, err := do("inspect", "-f", format, d.Name)
-	if err != nil {
-		return "", fmt.Errorf("error retrieving IP: %v", err)
-	}
-	return strings.TrimSpace(out), nil
-}
-
-// SandboxPid returns the PID to the sandbox process.
-func (d *Docker) SandboxPid() (int, error) {
-	out, err := do("inspect", "-f={{.State.Pid}}", d.Name)
-	if err != nil {
-		return -1, fmt.Errorf("error retrieving pid: %v", err)
-	}
-	pid, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n"))
-	if err != nil {
-		return -1, fmt.Errorf("error parsing pid %q: %v", out, err)
-	}
-	return pid, nil
-}
-
-// ID returns the container ID.
-func (d *Docker) ID() (string, error) {
-	out, err := do("inspect", "-f={{.Id}}", d.Name)
-	if err != nil {
-		return "", fmt.Errorf("error retrieving ID: %v", err)
-	}
-	return strings.TrimSpace(string(out)), nil
-}
-
-// Wait waits for container to exit, up to the given timeout. Returns error if
-// wait fails or timeout is hit. Returns the application return code otherwise.
-// Note that the application may have failed even if err == nil, always check
-// the exit code.
-func (d *Docker) Wait(timeout time.Duration) (syscall.WaitStatus, error) {
-	timeoutChan := time.After(timeout)
-	waitChan := make(chan (syscall.WaitStatus))
-	errChan := make(chan (error))
-
-	go func() {
-		out, err := do("wait", d.Name)
-		if err != nil {
-			errChan <- fmt.Errorf("error waiting for container %q: %v", d.Name, err)
-		}
-		exit, err := strconv.Atoi(strings.TrimSuffix(string(out), "\n"))
-		if err != nil {
-			errChan <- fmt.Errorf("error parsing exit code %q: %v", out, err)
-		}
-		waitChan <- syscall.WaitStatus(uint32(exit))
-	}()
-
-	select {
-	case ws := <-waitChan:
-		return ws, nil
-	case err := <-errChan:
-		return syscall.WaitStatus(1), err
-	case <-timeoutChan:
-		return syscall.WaitStatus(1), fmt.Errorf("timeout waiting for container %q", d.Name)
-	}
-}
-
-// WaitForOutput calls 'docker logs' to retrieve containers output and searches
-// for the given pattern.
-func (d *Docker) WaitForOutput(pattern string, timeout time.Duration) (string, error) {
-	matches, err := d.WaitForOutputSubmatch(pattern, timeout)
-	if err != nil {
-		return "", err
-	}
-	if len(matches) == 0 {
-		return "", nil
-	}
-	return matches[0], nil
-}
-
-// WaitForOutputSubmatch calls 'docker logs' to retrieve containers output and
-// searches for the given pattern. It returns any regexp submatches as well.
-func (d *Docker) WaitForOutputSubmatch(pattern string, timeout time.Duration) ([]string, error) {
-	re := regexp.MustCompile(pattern)
-	var out string
-	for exp := time.Now().Add(timeout); time.Now().Before(exp); {
-		var err error
-		out, err = d.Logs()
-		if err != nil {
-			return nil, err
-		}
-		if matches := re.FindStringSubmatch(out); matches != nil {
-			// Success!
-			return matches, nil
-		}
-		time.Sleep(100 * time.Millisecond)
-	}
-	return nil, fmt.Errorf("timeout waiting for output %q: %s", re.String(), out)
-}
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 64a406ae2..1036b0630 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -13,12 +13,12 @@ go_library(
     visibility = ["//runsc:__subpackages__"],
     deps = [
         "//pkg/abi/linux",
+        "//pkg/cleanup",
         "//pkg/fd",
         "//pkg/log",
         "//pkg/p9",
         "//pkg/sync",
         "//pkg/syserr",
-        "//runsc/specutils",
         "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index cadd83273..edc239013 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -33,11 +33,11 @@ import (
 
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/cleanup"
 	"gvisor.dev/gvisor/pkg/fd"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/sync"
-	"gvisor.dev/gvisor/runsc/specutils"
 )
 
 const (
@@ -439,7 +439,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid
 	if err != nil {
 		return nil, nil, p9.QID{}, 0, extractErrno(err)
 	}
-	cu := specutils.MakeCleanup(func() {
+	cu := cleanup.Make(func() {
 		child.Close()
 		// Best effort attempt to remove the file in case of failure.
 		if err := syscall.Unlinkat(l.file.FD(), name); err != nil {
@@ -480,7 +480,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID)
 	if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil {
 		return p9.QID{}, extractErrno(err)
 	}
-	cu := specutils.MakeCleanup(func() {
+	cu := cleanup.Make(func() {
 		// Best effort attempt to remove the dir in case of failure.
 		if err := unix.Unlinkat(l.file.FD(), name, unix.AT_REMOVEDIR); err != nil {
 			log.Warningf("error unlinking dir %q after failure: %v", path.Join(l.hostPath, name), err)
@@ -767,22 +767,18 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
 	return err
 }
 
-// TODO(b/127675828): support getxattr.
 func (*localFile) GetXattr(string, uint64) (string, error) {
 	return "", syscall.EOPNOTSUPP
 }
 
-// TODO(b/127675828): support setxattr.
 func (*localFile) SetXattr(string, string, uint32) error {
 	return syscall.EOPNOTSUPP
 }
 
-// TODO(b/148303075): support listxattr.
 func (*localFile) ListXattr(uint64) (map[string]struct{}, error) {
 	return nil, syscall.EOPNOTSUPP
 }
 
-// TODO(b/148303075): support removexattr.
 func (*localFile) RemoveXattr(string) error {
 	return syscall.EOPNOTSUPP
 }
@@ -868,7 +864,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9.
 	if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil {
 		return p9.QID{}, extractErrno(err)
 	}
-	cu := specutils.MakeCleanup(func() {
+	cu := cleanup.Make(func() {
 		// Best effort attempt to remove the symlink in case of failure.
 		if err := syscall.Unlinkat(l.file.FD(), newName); err != nil {
 			log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err)
diff --git a/runsc/main.go b/runsc/main.go
index 762b0f801..920ed84a5 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -54,9 +54,11 @@ var (
 
 	// Debugging flags.
 	debugLog        = flag.String("debug-log", "", "additional location for logs. If it ends with '/', log files are created inside the directory with default names. The following variables are available: %TIMESTAMP%, %COMMAND%.")
+	panicLog        = flag.String("panic-log", "", "file path were panic reports and other Go's runtime messages are written.")
 	logPackets      = flag.Bool("log-packets", false, "enable network packet logging.")
 	logFD           = flag.Int("log-fd", -1, "file descriptor to log to.  If set, the 'log' flag is ignored.")
 	debugLogFD      = flag.Int("debug-log-fd", -1, "file descriptor to write debug logs to.  If set, the 'debug-log-dir' flag is ignored.")
+	panicLogFD      = flag.Int("panic-log-fd", -1, "file descriptor to write Go's runtime messages.")
 	debugLogFormat  = flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.")
 	alsoLogToStderr = flag.Bool("alsologtostderr", false, "send log messages to stderr.")
 
@@ -70,10 +72,11 @@ var (
 	network            = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.")
 	hardwareGSO        = flag.Bool("gso", true, "enable hardware segmentation offload if it is supported by a network device.")
 	softwareGSO        = flag.Bool("software-gso", true, "enable software segmentation offload when hardware ofload can't be enabled.")
+	qDisc              = flag.String("qdisc", "fifo", "specifies which queueing discipline to apply by default to the non loopback nics used by the sandbox.")
 	fileAccess         = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
 	fsGoferHostUDS     = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
 	overlay            = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
-	overlayfsStaleRead = flag.Bool("overlayfs-stale-read", false, "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19.")
+	overlayfsStaleRead = flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
 	watchdogAction     = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
 	panicSignal        = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
 	profile            = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
@@ -82,6 +85,7 @@ var (
 	rootless           = flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
 	referenceLeakMode  = flag.String("ref-leak-mode", "disabled", "sets reference leak check mode: disabled (default), log-names, log-traces.")
 	cpuNumFromQuota    = flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
+	vfs2Enabled        = flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.")
 
 	// Test flags, not to be used outside tests, ever.
 	testOnlyAllowRunAsCurrentUserWithoutChroot = flag.Bool("TESTONLY-unsafe-nonroot", false, "TEST ONLY; do not ever use! This skips many security measures that isolate the host from the sandbox.")
@@ -116,8 +120,8 @@ func main() {
 	subcommands.Register(new(cmd.Resume), "")
 	subcommands.Register(new(cmd.Run), "")
 	subcommands.Register(new(cmd.Spec), "")
-	subcommands.Register(new(cmd.Start), "")
 	subcommands.Register(new(cmd.State), "")
+	subcommands.Register(new(cmd.Start), "")
 	subcommands.Register(new(cmd.Wait), "")
 
 	// Register internal commands with the internal group name. This causes
@@ -127,6 +131,7 @@ func main() {
 	subcommands.Register(new(cmd.Boot), internalGroup)
 	subcommands.Register(new(cmd.Debug), internalGroup)
 	subcommands.Register(new(cmd.Gofer), internalGroup)
+	subcommands.Register(new(cmd.Statefile), internalGroup)
 
 	// All subcommands must be registered before flag parsing.
 	flag.Parse()
@@ -194,6 +199,11 @@ func main() {
 		cmd.Fatalf("%v", err)
 	}
 
+	queueingDiscipline, err := boot.MakeQueueingDiscipline(*qDisc)
+	if err != nil {
+		cmd.Fatalf("%s", err)
+	}
+
 	// Sets the reference leak check mode. Also set it in config below to
 	// propagate it to child processes.
 	refs.SetLeakMode(refsLeakMode)
@@ -205,6 +215,7 @@ func main() {
 		LogFilename:        *logFilename,
 		LogFormat:          *logFormat,
 		DebugLog:           *debugLog,
+		PanicLog:           *panicLog,
 		DebugLogFormat:     *debugLogFormat,
 		FileAccess:         fsAccess,
 		FSGoferHostUDS:     *fsGoferHostUDS,
@@ -226,7 +237,8 @@ func main() {
 		ReferenceLeakMode:  refsLeakMode,
 		OverlayfsStaleRead: *overlayfsStaleRead,
 		CPUNumFromQuota:    *cpuNumFromQuota,
-
+		VFS2:               *vfs2Enabled,
+		QDisc:              queueingDiscipline,
 		TestOnlyAllowRunAsCurrentUserWithoutChroot: *testOnlyAllowRunAsCurrentUserWithoutChroot,
 		TestOnlyTestNameEnv:                        *testOnlyTestNameEnv,
 	}
@@ -257,20 +269,6 @@ func main() {
 	if *debugLogFD > -1 {
 		f := os.NewFile(uintptr(*debugLogFD), "debug log file")
 
-		// Quick sanity check to make sure no other commands get passed
-		// a log fd (they should use log dir instead).
-		if subcommand != "boot" && subcommand != "gofer" {
-			cmd.Fatalf("flag --debug-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand)
-		}
-
-		// If we are the boot process, then we own our stdio FDs and can do what we
-		// want with them. Since Docker and Containerd both eat boot's stderr, we
-		// dup our stderr to the provided log FD so that panics will appear in the
-		// logs, rather than just disappear.
-		if err := syscall.Dup3(int(f.Fd()), int(os.Stderr.Fd()), 0); err != nil {
-			cmd.Fatalf("error dup'ing fd %d to stderr: %v", f.Fd(), err)
-		}
-
 		e = newEmitter(*debugLogFormat, f)
 
 	} else if *debugLog != "" {
@@ -286,7 +284,25 @@ func main() {
 		e = newEmitter("text", ioutil.Discard)
 	}
 
-	if *alsoLogToStderr {
+	if *panicLogFD > -1 || *debugLogFD > -1 {
+		fd := *panicLogFD
+		if fd < 0 {
+			fd = *debugLogFD
+		}
+		// Quick sanity check to make sure no other commands get passed
+		// a log fd (they should use log dir instead).
+		if subcommand != "boot" && subcommand != "gofer" {
+			cmd.Fatalf("flags --debug-log-fd and --panic-log-fd should only be passed to 'boot' and 'gofer' command, but was passed to %q", subcommand)
+		}
+
+		// If we are the boot process, then we own our stdio FDs and can do what we
+		// want with them. Since Docker and Containerd both eat boot's stderr, we
+		// dup our stderr to the provided log FD so that panics will appear in the
+		// logs, rather than just disappear.
+		if err := syscall.Dup3(fd, int(os.Stderr.Fd()), 0); err != nil {
+			cmd.Fatalf("error dup'ing fd %d to stderr: %v", fd, err)
+		}
+	} else if *alsoLogToStderr {
 		e = &log.MultiEmitter{e, newEmitter(*debugLogFormat, os.Stderr)}
 	}
 
@@ -303,6 +319,7 @@ func main() {
 	log.Infof("\t\tFileAccess: %v, overlay: %t", conf.FileAccess, conf.Overlay)
 	log.Infof("\t\tNetwork: %v, logging: %t", conf.Network, conf.LogPackets)
 	log.Infof("\t\tStrace: %t, max size: %d, syscalls: %s", conf.Strace, conf.StraceLogSize, conf.StraceSyscalls)
+	log.Infof("\t\tVFS2 enabled: %v", conf.VFS2)
 	log.Infof("***************************")
 
 	if *testOnlyAllowRunAsCurrentUserWithoutChroot {
@@ -319,7 +336,7 @@ func main() {
 		log.Infof("Exiting with status: %v", ws)
 		if ws.Signaled() {
 			// No good way to return it, emulate what the shell does. Maybe raise
-			// signall to self?
+			// signal to self?
 			os.Exit(128 + int(ws.Signal()))
 		}
 		os.Exit(ws.ExitStatus())
@@ -332,11 +349,11 @@ func main() {
 func newEmitter(format string, logFile io.Writer) log.Emitter {
 	switch format {
 	case "text":
-		return &log.GoogleEmitter{log.Writer{Next: logFile}}
+		return log.GoogleEmitter{&log.Writer{Next: logFile}}
 	case "json":
-		return &log.JSONEmitter{log.Writer{Next: logFile}}
+		return log.JSONEmitter{&log.Writer{Next: logFile}}
 	case "json-k8s":
-		return &log.K8sJSONEmitter{log.Writer{Next: logFile}}
+		return log.K8sJSONEmitter{&log.Writer{Next: logFile}}
 	}
 	cmd.Fatalf("invalid log format %q, must be 'text', 'json', or 'json-k8s'", format)
 	panic("unreachable")
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index c95d50294..035dcd3e3 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -13,6 +13,7 @@ go_library(
         "//runsc:__subpackages__",
     ],
     deps = [
+        "//pkg/cleanup",
         "//pkg/control/client",
         "//pkg/control/server",
         "//pkg/log",
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 99e143696..209bfdb20 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -21,7 +21,6 @@ import (
 	"path/filepath"
 	"runtime"
 	"strconv"
-	"strings"
 	"syscall"
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
@@ -63,7 +62,7 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
 		// Build the path to the net namespace of the sandbox process.
 		// This is what we will copy.
 		nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
-		if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.NumNetworkChannels); err != nil {
+		if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.HardwareGSO, conf.SoftwareGSO, conf.NumNetworkChannels, conf.QDisc); err != nil {
 			return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err)
 		}
 	case boot.NetworkHost:
@@ -75,30 +74,8 @@ func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Confi
 }
 
 func createDefaultLoopbackInterface(conn *urpc.Client) error {
-	link := boot.LoopbackLink{
-		Name: "lo",
-		Addresses: []net.IP{
-			net.IP("\x7f\x00\x00\x01"),
-			net.IPv6loopback,
-		},
-		Routes: []boot.Route{
-			{
-				Destination: net.IPNet{
-
-					IP:   net.IPv4(0x7f, 0, 0, 0),
-					Mask: net.IPv4Mask(0xff, 0, 0, 0),
-				},
-			},
-			{
-				Destination: net.IPNet{
-					IP:   net.IPv6loopback,
-					Mask: net.IPMask(strings.Repeat("\xff", net.IPv6len)),
-				},
-			},
-		},
-	}
 	if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{
-		LoopbackLinks: []boot.LoopbackLink{link},
+		LoopbackLinks: []boot.LoopbackLink{boot.DefaultLoopbackLink},
 	}, nil); err != nil {
 		return fmt.Errorf("creating loopback link and routes: %v", err)
 	}
@@ -138,7 +115,7 @@ func isRootNS() (bool, error) {
 // createInterfacesAndRoutesFromNS scrapes the interface and routes from the
 // net namespace with the given path, creates them in the sandbox, and removes
 // them from the host.
-func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, numNetworkChannels int) error {
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareGSO bool, softwareGSO bool, numNetworkChannels int, qDisc boot.QueueingDiscipline) error {
 	// Join the network namespace that we will be copying.
 	restore, err := joinNetNS(nsPath)
 	if err != nil {
@@ -224,6 +201,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
 			MTU:         iface.MTU,
 			Routes:      routes,
 			NumChannels: numNetworkChannels,
+			QDisc:       qDisc,
 		}
 
 		// Get the link for the interface.
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index ec72bdbfd..6e1a2af25 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -18,16 +18,19 @@ package sandbox
 import (
 	"context"
 	"fmt"
+	"io"
 	"math"
 	"os"
 	"os/exec"
 	"strconv"
+	"strings"
 	"syscall"
 	"time"
 
 	"github.com/cenkalti/backoff"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/syndtr/gocapability/capability"
+	"gvisor.dev/gvisor/pkg/cleanup"
 	"gvisor.dev/gvisor/pkg/control/client"
 	"gvisor.dev/gvisor/pkg/control/server"
 	"gvisor.dev/gvisor/pkg/log"
@@ -117,7 +120,7 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) {
 	s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup}
 	// The Cleanup object cleans up partially created sandboxes when an error
 	// occurs. Any errors occurring during cleanup itself are ignored.
-	c := specutils.MakeCleanup(func() {
+	c := cleanup.Make(func() {
 		err := s.destroy()
 		log.Warningf("error destroying sandbox: %v", err)
 	})
@@ -142,7 +145,19 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) {
 	// Wait until the sandbox has booted.
 	b := make([]byte, 1)
 	if l, err := clientSyncFile.Read(b); err != nil || l != 1 {
-		return nil, fmt.Errorf("waiting for sandbox to start: %v", err)
+		err := fmt.Errorf("waiting for sandbox to start: %v", err)
+		// If the sandbox failed to start, it may be because the binary
+		// permissions were incorrect. Check the bits and return a more helpful
+		// error message.
+		//
+		// NOTE: The error message is checked because error types are lost over
+		// rpc calls.
+		if strings.Contains(err.Error(), io.EOF.Error()) {
+			if permsErr := checkBinaryPermissions(conf); permsErr != nil {
+				return nil, fmt.Errorf("%v: %v", err, permsErr)
+			}
+		}
+		return nil, err
 	}
 
 	c.Release()
@@ -369,8 +384,24 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		cmd.Args = append(cmd.Args, "--debug-log-fd="+strconv.Itoa(nextFD))
 		nextFD++
 	}
+	if conf.PanicLog != "" {
+		test := ""
+		if len(conf.TestOnlyTestNameEnv) != 0 {
+			// Fetch test name if one is provided and the test only flag was set.
+			if t, ok := specutils.EnvVar(args.Spec.Process.Env, conf.TestOnlyTestNameEnv); ok {
+				test = t
+			}
+		}
 
-	cmd.Args = append(cmd.Args, "--panic-signal="+strconv.Itoa(int(syscall.SIGTERM)))
+		panicLogFile, err := specutils.DebugLogFile(conf.PanicLog, "panic", test)
+		if err != nil {
+			return fmt.Errorf("opening debug log file in %q: %v", conf.PanicLog, err)
+		}
+		defer panicLogFile.Close()
+		cmd.ExtraFiles = append(cmd.ExtraFiles, panicLogFile)
+		cmd.Args = append(cmd.Args, "--panic-log-fd="+strconv.Itoa(nextFD))
+		nextFD++
+	}
 
 	// Add the "boot" command to the args.
 	//
@@ -416,9 +447,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		nextFD++
 	}
 
-	// If the platform needs a device FD we must pass it in.
-	if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil {
+	gPlatform, err := platform.Lookup(conf.Platform)
+	if err != nil {
 		return err
+	}
+
+	if deviceFile, err := gPlatform.OpenDevice(); err != nil {
+		return fmt.Errorf("opening device file for platform %q: %v", gPlatform, err)
 	} else if deviceFile != nil {
 		defer deviceFile.Close()
 		cmd.ExtraFiles = append(cmd.ExtraFiles, deviceFile)
@@ -426,6 +461,12 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		nextFD++
 	}
 
+	// TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff
+	// isn't set.
+	if conf.Platform == "kvm" {
+		cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1")
+	}
+
 	// The current process' stdio must be passed to the application via the
 	// --stdio-fds flag. The stdio of the sandbox process itself must not
 	// be connected to the same FDs, otherwise we risk leaking sandbox
@@ -503,7 +544,7 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		{Type: specs.UTSNamespace},
 	}
 
-	if conf.Platform == platforms.Ptrace {
+	if gPlatform.Requirements().RequiresCurrentPIDNS {
 		// TODO(b/75837838): Also set a new PID namespace so that we limit
 		// access to other host processes.
 		log.Infof("Sandbox will be started in the current PID namespace")
@@ -564,45 +605,32 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 			nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
 			cmd.Args = append(cmd.Args, "--setup-root")
 
+			const nobody = 65534
 			if conf.Rootless {
-				log.Infof("Rootless mode: sandbox will run as root inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
+				log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
 				cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
 					{
-						ContainerID: 0,
+						ContainerID: nobody,
 						HostID:      os.Getuid(),
 						Size:        1,
 					},
 				}
 				cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
 					{
-						ContainerID: 0,
+						ContainerID: nobody,
 						HostID:      os.Getgid(),
 						Size:        1,
 					},
 				}
-				cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
 
 			} else {
 				// Map nobody in the new namespace to nobody in the parent namespace.
 				//
 				// A sandbox process will construct an empty
-				// root for itself, so it has to have the CAP_SYS_ADMIN
-				// capability.
-				//
-				// FIXME(b/122554829): The current implementations of
-				// os/exec doesn't allow to set ambient capabilities if
-				// a process is started in a new user namespace. As a
-				// workaround, we start the sandbox process with the 0
-				// UID and then it constructs a chroot and sets UID to
-				// nobody.  https://github.com/golang/go/issues/2315
-				const nobody = 65534
+				// root for itself, so it has to have
+				// CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
 				cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
 					{
-						ContainerID: 0,
-						HostID:      nobody - 1,
-						Size:        1,
-					},
-					{
 						ContainerID: nobody,
 						HostID:      nobody,
 						Size:        1,
@@ -615,11 +643,11 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 						Size:        1,
 					},
 				}
-
-				// Set credentials to run as user and group nobody.
-				cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: nobody}
 			}
 
+			// Set credentials to run as user and group nobody.
+			cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody}
+			cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT))
 		} else {
 			return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
 		}
@@ -677,6 +705,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		nextFD++
 	}
 
+	if args.Attached {
+		// Kill sandbox if parent process exits in attached mode.
+		cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
+		// Tells boot that any process it creates must have pdeathsig set.
+		cmd.Args = append(cmd.Args, "--attached")
+	}
+
 	// Add container as the last argument.
 	cmd.Args = append(cmd.Args, s.ID)
 
@@ -685,15 +720,22 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		log.Debugf("Donating FD %d: %q", i+3, f.Name())
 	}
 
-	if args.Attached {
-		// Kill sandbox if parent process exits in attached mode.
-		cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
-	}
-
 	log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
 	log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr)
 	if err := specutils.StartInNS(cmd, nss); err != nil {
-		return fmt.Errorf("Sandbox: %v", err)
+		err := fmt.Errorf("starting sandbox: %v", err)
+		// If the sandbox failed to start, it may be because the binary
+		// permissions were incorrect. Check the bits and return a more helpful
+		// error message.
+		//
+		// NOTE: The error message is checked because error types are lost over
+		// rpc calls.
+		if strings.Contains(err.Error(), syscall.EACCES.Error()) {
+			if permsErr := checkBinaryPermissions(conf); permsErr != nil {
+				return fmt.Errorf("%v: %v", err, permsErr)
+			}
+		}
+		return err
 	}
 	s.child = true
 	s.Pid = cmd.Process.Pid
@@ -972,6 +1014,66 @@ func (s *Sandbox) StopCPUProfile() error {
 	return nil
 }
 
+// GoroutineProfile writes a goroutine profile to the given file.
+func (s *Sandbox) GoroutineProfile(f *os.File) error {
+	log.Debugf("Goroutine profile %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opts := control.ProfileOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+	if err := conn.Call(boot.GoroutineProfile, &opts, nil); err != nil {
+		return fmt.Errorf("getting sandbox %q goroutine profile: %v", s.ID, err)
+	}
+	return nil
+}
+
+// BlockProfile writes a block profile to the given file.
+func (s *Sandbox) BlockProfile(f *os.File) error {
+	log.Debugf("Block profile %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opts := control.ProfileOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+	if err := conn.Call(boot.BlockProfile, &opts, nil); err != nil {
+		return fmt.Errorf("getting sandbox %q block profile: %v", s.ID, err)
+	}
+	return nil
+}
+
+// MutexProfile writes a mutex profile to the given file.
+func (s *Sandbox) MutexProfile(f *os.File) error {
+	log.Debugf("Mutex profile %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opts := control.ProfileOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+	if err := conn.Call(boot.MutexProfile, &opts, nil); err != nil {
+		return fmt.Errorf("getting sandbox %q mutex profile: %v", s.ID, err)
+	}
+	return nil
+}
+
 // StartTrace start trace  writing to the given file.
 func (s *Sandbox) StartTrace(f *os.File) error {
 	log.Debugf("Trace start %q", s.ID)
@@ -1096,3 +1198,31 @@ func deviceFileForPlatform(name string) (*os.File, error) {
 	}
 	return f, nil
 }
+
+// checkBinaryPermissions verifies that the required binary bits are set on
+// the runsc executable.
+func checkBinaryPermissions(conf *boot.Config) error {
+	// All platforms need the other exe bit
+	neededBits := os.FileMode(0001)
+	if conf.Platform == platforms.Ptrace {
+		// Ptrace needs the other read bit
+		neededBits |= os.FileMode(0004)
+	}
+
+	exePath, err := os.Executable()
+	if err != nil {
+		return fmt.Errorf("getting exe path: %v", err)
+	}
+
+	// Check the permissions of the runsc binary and print an error if it
+	// doesn't match expectations.
+	info, err := os.Stat(exePath)
+	if err != nil {
+		return fmt.Errorf("stat file: %v", err)
+	}
+
+	if info.Mode().Perm()&neededBits != neededBits {
+		return fmt.Errorf(specutils.FaqErrorMsg("runsc-perms", fmt.Sprintf("%s does not have the correct permissions", exePath)))
+	}
+	return nil
+}
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index c7dd3051c..23001d67c 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -18,6 +18,7 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"os/signal"
 	"path/filepath"
 	"runtime"
 	"syscall"
@@ -252,13 +253,27 @@ func MaybeRunAsRoot() error {
 		},
 		Credential:                 &syscall.Credential{Uid: 0, Gid: 0},
 		GidMappingsEnableSetgroups: false,
+
+		// Make sure child is killed when the parent terminates.
+		Pdeathsig: syscall.SIGKILL,
 	}
 
 	cmd.Env = os.Environ()
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
-	if err := cmd.Run(); err != nil {
+	if err := cmd.Start(); err != nil {
+		return fmt.Errorf("re-executing self: %w", err)
+	}
+	ch := make(chan os.Signal, 1)
+	signal.Notify(ch)
+	go func() {
+		for {
+			// Forward all signals to child process.
+			cmd.Process.Signal(<-ch)
+		}
+	}()
+	if err := cmd.Wait(); err != nil {
 		if exit, ok := err.(*exec.ExitError); ok {
 			if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
 				os.Exit(ws.ExitStatus())
@@ -266,7 +281,7 @@ func MaybeRunAsRoot() error {
 			log.Warningf("No wait status provided, exiting with -1: %v", err)
 			os.Exit(-1)
 		}
-		return fmt.Errorf("re-executing self: %v", err)
+		return err
 	}
 	// Child completed with success.
 	os.Exit(0)
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index d3c2e4e78..f1fa573c5 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -92,6 +92,12 @@ func ValidateSpec(spec *specs.Spec) error {
 		log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile)
 	}
 
+	// PR_SET_NO_NEW_PRIVS is assumed to always be set.
+	// See kernel.Task.updateCredsForExecLocked.
+	if !spec.Process.NoNewPrivileges {
+		log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.")
+	}
+
 	// TODO(gvisor.dev/issue/510): Apply seccomp to application inside sandbox.
 	if spec.Linux != nil && spec.Linux.Seccomp != nil {
 		log.Warningf("Seccomp spec is being ignored")
@@ -438,36 +444,6 @@ func ContainsStr(strs []string, str string) bool {
 	return false
 }
 
-// Cleanup allows defers to be aborted when cleanup needs to happen
-// conditionally. Usage:
-// c := MakeCleanup(func() { f.Close() })
-// defer c.Clean() // any failure before release is called will close the file.
-// ...
-// c.Release() // on success, aborts closing the file and return it.
-// return f
-type Cleanup struct {
-	clean func()
-}
-
-// MakeCleanup creates a new Cleanup object.
-func MakeCleanup(f func()) Cleanup {
-	return Cleanup{clean: f}
-}
-
-// Clean calls the cleanup function.
-func (c *Cleanup) Clean() {
-	if c.clean != nil {
-		c.clean()
-		c.clean = nil
-	}
-}
-
-// Release releases the cleanup from its duties, i.e. cleanup function is not
-// called after this point.
-func (c *Cleanup) Release() {
-	c.clean = nil
-}
-
 // RetryEintr retries the function until an error different than EINTR is
 // returned.
 func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) {
@@ -528,3 +504,8 @@ func EnvVar(env []string, name string) (string, bool) {
 	}
 	return "", false
 }
+
+// FaqErrorMsg returns an error message pointing to the FAQ.
+func FaqErrorMsg(anchor, msg string) string {
+	return fmt.Sprintf("%s; see https://gvisor.dev/faq#%s for more details", msg, anchor)
+}
diff --git a/runsc/testutil/BUILD b/runsc/testutil/BUILD
deleted file mode 100644
index 945405303..000000000
--- a/runsc/testutil/BUILD
+++ /dev/null
@@ -1,21 +0,0 @@
-load("//tools:defs.bzl", "go_library")
-
-package(licenses = ["notice"])
-
-go_library(
-    name = "testutil",
-    testonly = 1,
-    srcs = [
-        "testutil.go",
-        "testutil_runfiles.go",
-    ],
-    visibility = ["//:sandbox"],
-    deps = [
-        "//pkg/log",
-        "//pkg/sync",
-        "//runsc/boot",
-        "//runsc/specutils",
-        "@com_github_cenkalti_backoff//:go_default_library",
-        "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
-    ],
-)
diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go
deleted file mode 100644
index 92d677e71..000000000
--- a/runsc/testutil/testutil.go
+++ /dev/null
@@ -1,431 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package testutil contains utility functions for runsc tests.
-package testutil
-
-import (
-	"bufio"
-	"context"
-	"debug/elf"
-	"encoding/base32"
-	"encoding/json"
-	"flag"
-	"fmt"
-	"io"
-	"io/ioutil"
-	"math"
-	"math/rand"
-	"net/http"
-	"os"
-	"os/exec"
-	"os/signal"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"sync/atomic"
-	"syscall"
-	"time"
-
-	"github.com/cenkalti/backoff"
-	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/pkg/sync"
-	"gvisor.dev/gvisor/runsc/boot"
-	"gvisor.dev/gvisor/runsc/specutils"
-)
-
-var (
-	checkpoint = flag.Bool("checkpoint", true, "control checkpoint/restore support")
-)
-
-func init() {
-	rand.Seed(time.Now().UnixNano())
-}
-
-// IsCheckpointSupported returns the relevant command line flag.
-func IsCheckpointSupported() bool {
-	return *checkpoint
-}
-
-// TmpDir returns the absolute path to a writable directory that can be used as
-// scratch by the test.
-func TmpDir() string {
-	dir := os.Getenv("TEST_TMPDIR")
-	if dir == "" {
-		dir = "/tmp"
-	}
-	return dir
-}
-
-// ConfigureExePath configures the executable for runsc in the test environment.
-func ConfigureExePath() error {
-	path, err := FindFile("runsc/runsc")
-	if err != nil {
-		return err
-	}
-	specutils.ExePath = path
-	return nil
-}
-
-// TestConfig returns the default configuration to use in tests. Note that
-// 'RootDir' must be set by caller if required.
-func TestConfig() *boot.Config {
-	logDir := ""
-	if dir, ok := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR"); ok {
-		logDir = dir + "/"
-	}
-	return &boot.Config{
-		Debug:           true,
-		DebugLog:        logDir,
-		LogFormat:       "text",
-		DebugLogFormat:  "text",
-		AlsoLogToStderr: true,
-		LogPackets:      true,
-		Network:         boot.NetworkNone,
-		Strace:          true,
-		Platform:        "ptrace",
-		FileAccess:      boot.FileAccessExclusive,
-		TestOnlyAllowRunAsCurrentUserWithoutChroot: true,
-		NumNetworkChannels:                         1,
-	}
-}
-
-// NewSpecWithArgs creates a simple spec with the given args suitable for use
-// in tests.
-func NewSpecWithArgs(args ...string) *specs.Spec {
-	return &specs.Spec{
-		// The host filesystem root is the container root.
-		Root: &specs.Root{
-			Path:     "/",
-			Readonly: true,
-		},
-		Process: &specs.Process{
-			Args: args,
-			Env: []string{
-				"PATH=" + os.Getenv("PATH"),
-			},
-			Capabilities: specutils.AllCapabilities(),
-		},
-		Mounts: []specs.Mount{
-			// Hide the host /etc to avoid any side-effects.
-			// For example, bash reads /etc/passwd and if it is
-			// very big, tests can fail by timeout.
-			{
-				Type:        "tmpfs",
-				Destination: "/etc",
-			},
-			// Root is readonly, but many tests want to write to tmpdir.
-			// This creates a writable mount inside the root. Also, when tmpdir points
-			// to "/tmp", it makes the the actual /tmp to be mounted and not a tmpfs
-			// inside the sentry.
-			{
-				Type:        "bind",
-				Destination: TmpDir(),
-				Source:      TmpDir(),
-			},
-		},
-		Hostname: "runsc-test-hostname",
-	}
-}
-
-// SetupRootDir creates a root directory for containers.
-func SetupRootDir() (string, error) {
-	rootDir, err := ioutil.TempDir(TmpDir(), "containers")
-	if err != nil {
-		return "", fmt.Errorf("error creating root dir: %v", err)
-	}
-	return rootDir, nil
-}
-
-// SetupContainer creates a bundle and root dir for the container, generates a
-// test config, and writes the spec to config.json in the bundle dir.
-func SetupContainer(spec *specs.Spec, conf *boot.Config) (rootDir, bundleDir string, err error) {
-	rootDir, err = SetupRootDir()
-	if err != nil {
-		return "", "", err
-	}
-	conf.RootDir = rootDir
-	bundleDir, err = SetupBundleDir(spec)
-	return rootDir, bundleDir, err
-}
-
-// SetupBundleDir creates a bundle dir and writes the spec to config.json.
-func SetupBundleDir(spec *specs.Spec) (bundleDir string, err error) {
-	bundleDir, err = ioutil.TempDir(TmpDir(), "bundle")
-	if err != nil {
-		return "", fmt.Errorf("error creating bundle dir: %v", err)
-	}
-
-	if err = writeSpec(bundleDir, spec); err != nil {
-		return "", fmt.Errorf("error writing spec: %v", err)
-	}
-	return bundleDir, nil
-}
-
-// writeSpec writes the spec to disk in the given directory.
-func writeSpec(dir string, spec *specs.Spec) error {
-	b, err := json.Marshal(spec)
-	if err != nil {
-		return err
-	}
-	return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755)
-}
-
-// UniqueContainerID generates a unique container id for each test.
-//
-// The container id is used to create an abstract unix domain socket, which must
-// be unique.  While the container forbids creating two containers with the same
-// name, sometimes between test runs the socket does not get cleaned up quickly
-// enough, causing container creation to fail.
-func UniqueContainerID() string {
-	// Read 20 random bytes.
-	b := make([]byte, 20)
-	// "[Read] always returns len(p) and a nil error." --godoc
-	if _, err := rand.Read(b); err != nil {
-		panic("rand.Read failed: " + err.Error())
-	}
-	// base32 encode the random bytes, so that the name is a valid
-	// container id and can be used as a socket name in the filesystem.
-	return fmt.Sprintf("test-container-%s", base32.StdEncoding.EncodeToString(b))
-}
-
-// Copy copies file from src to dst.
-func Copy(src, dst string) error {
-	in, err := os.Open(src)
-	if err != nil {
-		return err
-	}
-	defer in.Close()
-
-	out, err := os.Create(dst)
-	if err != nil {
-		return err
-	}
-	defer out.Close()
-
-	_, err = io.Copy(out, in)
-	return err
-}
-
-// Poll is a shorthand function to poll for something with given timeout.
-func Poll(cb func() error, timeout time.Duration) error {
-	ctx, cancel := context.WithTimeout(context.Background(), timeout)
-	defer cancel()
-	b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
-	return backoff.Retry(cb, b)
-}
-
-// WaitForHTTP tries GET requests on a port until the call succeeds or timeout.
-func WaitForHTTP(port int, timeout time.Duration) error {
-	cb := func() error {
-		c := &http.Client{
-			// Calculate timeout to be able to do minimum 5 attempts.
-			Timeout: timeout / 5,
-		}
-		url := fmt.Sprintf("http://localhost:%d/", port)
-		resp, err := c.Get(url)
-		if err != nil {
-			log.Infof("Waiting %s: %v", url, err)
-			return err
-		}
-		resp.Body.Close()
-		return nil
-	}
-	return Poll(cb, timeout)
-}
-
-// Reaper reaps child processes.
-type Reaper struct {
-	// mu protects ch, which will be nil if the reaper is not running.
-	mu sync.Mutex
-	ch chan os.Signal
-}
-
-// Start starts reaping child processes.
-func (r *Reaper) Start() {
-	r.mu.Lock()
-	defer r.mu.Unlock()
-
-	if r.ch != nil {
-		panic("reaper.Start called on a running reaper")
-	}
-
-	r.ch = make(chan os.Signal, 1)
-	signal.Notify(r.ch, syscall.SIGCHLD)
-
-	go func() {
-		for {
-			r.mu.Lock()
-			ch := r.ch
-			r.mu.Unlock()
-			if ch == nil {
-				return
-			}
-
-			_, ok := <-ch
-			if !ok {
-				// Channel closed.
-				return
-			}
-			for {
-				cpid, _ := syscall.Wait4(-1, nil, syscall.WNOHANG, nil)
-				if cpid < 1 {
-					break
-				}
-			}
-		}
-	}()
-}
-
-// Stop stops reaping child processes.
-func (r *Reaper) Stop() {
-	r.mu.Lock()
-	defer r.mu.Unlock()
-
-	if r.ch == nil {
-		panic("reaper.Stop called on a stopped reaper")
-	}
-
-	signal.Stop(r.ch)
-	close(r.ch)
-	r.ch = nil
-}
-
-// StartReaper is a helper that starts a new Reaper and returns a function to
-// stop it.
-func StartReaper() func() {
-	r := &Reaper{}
-	r.Start()
-	return r.Stop
-}
-
-// WaitUntilRead reads from the given reader until the wanted string is found
-// or until timeout.
-func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error {
-	sc := bufio.NewScanner(r)
-	if split != nil {
-		sc.Split(split)
-	}
-	// done must be accessed atomically. A value greater than 0 indicates
-	// that the read loop can exit.
-	var done uint32
-	doneCh := make(chan struct{})
-	go func() {
-		for sc.Scan() {
-			t := sc.Text()
-			if strings.Contains(t, want) {
-				atomic.StoreUint32(&done, 1)
-				close(doneCh)
-				break
-			}
-			if atomic.LoadUint32(&done) > 0 {
-				break
-			}
-		}
-	}()
-	select {
-	case <-time.After(timeout):
-		atomic.StoreUint32(&done, 1)
-		return fmt.Errorf("timeout waiting to read %q", want)
-	case <-doneCh:
-		return nil
-	}
-}
-
-// KillCommand kills the process running cmd unless it hasn't been started. It
-// returns an error if it cannot kill the process unless the reason is that the
-// process has already exited.
-func KillCommand(cmd *exec.Cmd) error {
-	if cmd.Process == nil {
-		return nil
-	}
-	if err := cmd.Process.Kill(); err != nil {
-		if !strings.Contains(err.Error(), "process already finished") {
-			return fmt.Errorf("failed to kill process %v: %v", cmd, err)
-		}
-	}
-	return nil
-}
-
-// WriteTmpFile writes text to a temporary file, closes the file, and returns
-// the name of the file.
-func WriteTmpFile(pattern, text string) (string, error) {
-	file, err := ioutil.TempFile(TmpDir(), pattern)
-	if err != nil {
-		return "", err
-	}
-	defer file.Close()
-	if _, err := file.Write([]byte(text)); err != nil {
-		return "", err
-	}
-	return file.Name(), nil
-}
-
-// RandomName create a name with a 6 digit random number appended to it.
-func RandomName(prefix string) string {
-	return fmt.Sprintf("%s-%06d", prefix, rand.Int31n(1000000))
-}
-
-// IsStatic returns true iff the given file is a static binary.
-func IsStatic(filename string) (bool, error) {
-	f, err := elf.Open(filename)
-	if err != nil {
-		return false, err
-	}
-	for _, prog := range f.Progs {
-		if prog.Type == elf.PT_INTERP {
-			return false, nil // Has interpreter.
-		}
-	}
-	return true, nil
-}
-
-// TestIndicesForShard returns indices for this test shard based on the
-// TEST_SHARD_INDEX and TEST_TOTAL_SHARDS environment vars.
-//
-// If either of the env vars are not present, then the function will return all
-// tests. If there are more shards than there are tests, then the returned list
-// may be empty.
-func TestIndicesForShard(numTests int) ([]int, error) {
-	var (
-		shardIndex = 0
-		shardTotal = 1
-	)
-
-	indexStr, totalStr := os.Getenv("TEST_SHARD_INDEX"), os.Getenv("TEST_TOTAL_SHARDS")
-	if indexStr != "" && totalStr != "" {
-		// Parse index and total to ints.
-		var err error
-		shardIndex, err = strconv.Atoi(indexStr)
-		if err != nil {
-			return nil, fmt.Errorf("invalid TEST_SHARD_INDEX %q: %v", indexStr, err)
-		}
-		shardTotal, err = strconv.Atoi(totalStr)
-		if err != nil {
-			return nil, fmt.Errorf("invalid TEST_TOTAL_SHARDS %q: %v", totalStr, err)
-		}
-	}
-
-	// Calculate!
-	var indices []int
-	numBlocks := int(math.Ceil(float64(numTests) / float64(shardTotal)))
-	for i := 0; i < numBlocks; i++ {
-		pick := i*shardTotal + shardIndex
-		if pick < numTests {
-			indices = append(indices, pick)
-		}
-	}
-	return indices, nil
-}
diff --git a/runsc/testutil/testutil_runfiles.go b/runsc/testutil/testutil_runfiles.go
deleted file mode 100644
index ece9ea9a1..000000000
--- a/runsc/testutil/testutil_runfiles.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package testutil
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-)
-
-// FindFile searchs for a file inside the test run environment. It returns the
-// full path to the file. It fails if none or more than one file is found.
-func FindFile(path string) (string, error) {
-	wd, err := os.Getwd()
-	if err != nil {
-		return "", err
-	}
-
-	// The test root is demarcated by a path element called "__main__". Search for
-	// it backwards from the working directory.
-	root := wd
-	for {
-		dir, name := filepath.Split(root)
-		if name == "__main__" {
-			break
-		}
-		if len(dir) == 0 {
-			return "", fmt.Errorf("directory __main__ not found in %q", wd)
-		}
-		// Remove ending slash to loop around.
-		root = dir[:len(dir)-1]
-	}
-
-	// Annoyingly, bazel adds the build type to the directory path for go
-	// binaries, but not for c++ binaries. We use two different patterns to
-	// to find our file.
-	patterns := []string{
-		// Try the obvious path first.
-		filepath.Join(root, path),
-		// If it was a go binary, use a wildcard to match the build
-		// type. The pattern is: /test-path/__main__/directories/*/file.
-		filepath.Join(root, filepath.Dir(path), "*", filepath.Base(path)),
-	}
-
-	for _, p := range patterns {
-		matches, err := filepath.Glob(p)
-		if err != nil {
-			// "The only possible returned error is ErrBadPattern,
-			// when pattern is malformed." -godoc
-			return "", fmt.Errorf("error globbing %q: %v", p, err)
-		}
-		switch len(matches) {
-		case 0:
-			// Try the next pattern.
-		case 1:
-			// We found it.
-			return matches[0], nil
-		default:
-			return "", fmt.Errorf("more than one match found for %q: %s", path, matches)
-		}
-	}
-	return "", fmt.Errorf("file %q not found", path)
-}