40 files changed, 1093 insertions, 421 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 248f77c34..8c73dc5dc 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -38,6 +38,7 @@ go_library(
         "//pkg/memutil",
         "//pkg/rand",
         "//pkg/refs",
+        "//pkg/refsvfs2",
         "//pkg/sentry/arch",
         "//pkg/sentry/arch:registers_go_proto",
         "//pkg/sentry/control",
@@ -74,6 +75,7 @@ go_library(
         "//pkg/sentry/platform",
         "//pkg/sentry/sighandling",
         "//pkg/sentry/socket/hostinet",
+        "//pkg/sentry/socket/netfilter",
         "//pkg/sentry/socket/netlink",
         "//pkg/sentry/socket/netlink/route",
         "//pkg/sentry/socket/netlink/uevent",
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index 7076ae2e2..a3a76b609 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -53,7 +53,7 @@ type compatEmitter struct {
 func newCompatEmitter(logFD int) (*compatEmitter, error) {
 	nameMap, ok := getSyscallNameMap()
 	if !ok {
-		return nil, fmt.Errorf("Linux syscall table not found")
+		return nil, fmt.Errorf("syscall table not found")
 	}
 
 	c := &compatEmitter{
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 894651519..865126ac5 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -30,6 +30,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/socket/netstack"
 	"gvisor.dev/gvisor/pkg/sentry/state"
 	"gvisor.dev/gvisor/pkg/sentry/time"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sentry/watchdog"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/urpc"
@@ -195,7 +196,7 @@ type containerManager struct {
 
 // StartRoot will start the root container process.
 func (cm *containerManager) StartRoot(cid *string, _ *struct{}) error {
-	log.Debugf("containerManager.StartRoot %q", *cid)
+	log.Debugf("containerManager.StartRoot, cid: %s", *cid)
 	// Tell the root container to start and wait for the result.
 	cm.startChan <- struct{}{}
 	if err := <-cm.startResultChan; err != nil {
@@ -206,14 +207,35 @@ func (cm *containerManager) StartRoot(cid *string, _ *struct{}) error {
 
 // Processes retrieves information about processes running in the sandbox.
 func (cm *containerManager) Processes(cid *string, out *[]*control.Process) error {
-	log.Debugf("containerManager.Processes: %q", *cid)
+	log.Debugf("containerManager.Processes, cid: %s", *cid)
 	return control.Processes(cm.l.k, *cid, out)
 }
 
+// CreateArgs contains arguments to the Create method.
+type CreateArgs struct {
+	// CID is the ID of the container to start.
+	CID string
+
+	// FilePayload may contain a TTY file for the terminal, if enabled.
+	urpc.FilePayload
+}
+
 // Create creates a container within a sandbox.
-func (cm *containerManager) Create(cid *string, _ *struct{}) error {
-	log.Debugf("containerManager.Create: %q", *cid)
-	return cm.l.createContainer(*cid)
+func (cm *containerManager) Create(args *CreateArgs, _ *struct{}) error {
+	log.Debugf("containerManager.Create: %s", args.CID)
+
+	if len(args.Files) > 1 {
+		return fmt.Errorf("start arguments must have at most 1 files for TTY")
+	}
+	var tty *fd.FD
+	if len(args.Files) == 1 {
+		var err error
+		tty, err = fd.NewFromFile(args.Files[0])
+		if err != nil {
+			return fmt.Errorf("error dup'ing TTY file: %w", err)
+		}
+	}
+	return cm.l.createContainer(args.CID, tty)
 }
 
 // StartArgs contains arguments to the Start method.
@@ -228,20 +250,18 @@ type StartArgs struct {
 	CID string
 
 	// FilePayload contains, in order:
-	//   * stdin, stdout, and stderr.
-	//   * the file descriptor over which the sandbox will
-	//     request files from its root filesystem.
+	//   * stdin, stdout, and stderr (optional: if terminal is disabled).
+	//   * file descriptors to connect to gofer to serve the root filesystem.
 	urpc.FilePayload
 }
 
 // Start runs a created container within a sandbox.
 func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
-	log.Debugf("containerManager.Start: %+v", args)
-
 	// Validate arguments.
 	if args == nil {
 		return errors.New("start missing arguments")
 	}
+	log.Debugf("containerManager.Start, cid: %s, args: %+v", args.CID, args)
 	if args.Spec == nil {
 		return errors.New("start arguments missing spec")
 	}
@@ -251,44 +271,66 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
 	if args.CID == "" {
 		return errors.New("start argument missing container ID")
 	}
-	if len(args.FilePayload.Files) < 4 {
-		return fmt.Errorf("start arguments must contain stdin, stderr, and stdout followed by at least one file for the container root gofer")
+	if len(args.Files) < 1 {
+		return fmt.Errorf("start arguments must contain at least one file for the container root gofer")
 	}
 
 	// All validation passed, logs the spec for debugging.
 	specutils.LogSpec(args.Spec)
 
-	fds, err := fd.NewFromFiles(args.FilePayload.Files)
+	goferFiles := args.Files
+	var stdios []*fd.FD
+	if !args.Spec.Process.Terminal {
+		// When not using a terminal, stdios come as the first 3 files in the
+		// payload.
+		if l := len(args.Files); l < 4 {
+			return fmt.Errorf("start arguments (len: %d) must contain stdios and files for the container root gofer", l)
+		}
+		var err error
+		stdios, err = fd.NewFromFiles(goferFiles[:3])
+		if err != nil {
+			return fmt.Errorf("error dup'ing stdio files: %w", err)
+		}
+		goferFiles = goferFiles[3:]
+	}
+	defer func() {
+		for _, fd := range stdios {
+			_ = fd.Close()
+		}
+	}()
+
+	goferFDs, err := fd.NewFromFiles(goferFiles)
 	if err != nil {
-		return err
+		return fmt.Errorf("error dup'ing gofer files: %w", err)
 	}
 	defer func() {
-		for _, fd := range fds {
+		for _, fd := range goferFDs {
 			_ = fd.Close()
 		}
 	}()
-	if err := cm.l.startContainer(args.Spec, args.Conf, args.CID, fds); err != nil {
-		log.Debugf("containerManager.Start failed %q: %+v: %v", args.CID, args, err)
+
+	if err := cm.l.startContainer(args.Spec, args.Conf, args.CID, stdios, goferFDs); err != nil {
+		log.Debugf("containerManager.Start failed, cid: %s, args: %+v, err: %v", args.CID, args, err)
 		return err
 	}
-	log.Debugf("Container %q started", args.CID)
+	log.Debugf("Container started, cid: %s", args.CID)
 	return nil
 }
 
 // Destroy stops a container if it is still running and cleans up its
 // filesystem.
 func (cm *containerManager) Destroy(cid *string, _ *struct{}) error {
-	log.Debugf("containerManager.destroy %q", *cid)
+	log.Debugf("containerManager.destroy, cid: %s", *cid)
 	return cm.l.destroyContainer(*cid)
 }
 
 // ExecuteAsync starts running a command on a created or running sandbox. It
 // returns the PID of the new process.
 func (cm *containerManager) ExecuteAsync(args *control.ExecArgs, pid *int32) error {
-	log.Debugf("containerManager.ExecuteAsync: %+v", args)
+	log.Debugf("containerManager.ExecuteAsync, cid: %s, args: %+v", args.ContainerID, args)
 	tgid, err := cm.l.executeAsync(args)
 	if err != nil {
-		log.Debugf("containerManager.ExecuteAsync failed: %+v: %v", args, err)
+		log.Debugf("containerManager.ExecuteAsync failed, cid: %s, args: %+v, err: %v", args.ContainerID, args, err)
 		return err
 	}
 	*pid = int32(tgid)
@@ -330,18 +372,18 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
 	log.Debugf("containerManager.Restore")
 
 	var specFile, deviceFile *os.File
-	switch numFiles := len(o.FilePayload.Files); numFiles {
+	switch numFiles := len(o.Files); numFiles {
 	case 2:
 		// The device file is donated to the platform.
 		// Can't take ownership away from os.File. dup them to get a new FD.
-		fd, err := syscall.Dup(int(o.FilePayload.Files[1].Fd()))
+		fd, err := syscall.Dup(int(o.Files[1].Fd()))
 		if err != nil {
 			return fmt.Errorf("failed to dup file: %v", err)
 		}
 		deviceFile = os.NewFile(uintptr(fd), "platform device")
 		fallthrough
 	case 1:
-		specFile = o.FilePayload.Files[0]
+		specFile = o.Files[0]
 	case 0:
 		return fmt.Errorf("at least one file must be passed to Restore")
 	default:
@@ -367,12 +409,20 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
 	cm.l.k = k
 
 	// Set up the restore environment.
+	ctx := k.SupervisorContext()
 	mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints)
-	renv, err := mntr.createRestoreEnvironment(cm.l.root.conf)
-	if err != nil {
-		return fmt.Errorf("creating RestoreEnvironment: %v", err)
+	if kernel.VFS2Enabled {
+		ctx, err = mntr.configureRestore(ctx, cm.l.root.conf)
+		if err != nil {
+			return fmt.Errorf("configuring filesystem restore: %v", err)
+		}
+	} else {
+		renv, err := mntr.createRestoreEnvironment(cm.l.root.conf)
+		if err != nil {
+			return fmt.Errorf("creating RestoreEnvironment: %v", err)
+		}
+		fs.SetRestoreEnvironment(*renv)
 	}
-	fs.SetRestoreEnvironment(*renv)
 
 	// Prepare to load from the state file.
 	if eps, ok := networkStack.(*netstack.Stack); ok {
@@ -399,7 +449,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
 
 	// Load the state.
 	loadOpts := state.LoadOpts{Source: specFile}
-	if err := loadOpts.Load(k, networkStack, time.NewCalibratedClocks()); err != nil {
+	if err := loadOpts.Load(ctx, k, networkStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}); err != nil {
 		return err
 	}
 
@@ -444,9 +494,9 @@ func (cm *containerManager) Resume(_, _ *struct{}) error {
 
 // Wait waits for the init process in the given container.
 func (cm *containerManager) Wait(cid *string, waitStatus *uint32) error {
-	log.Debugf("containerManager.Wait")
+	log.Debugf("containerManager.Wait, cid: %s", *cid)
 	err := cm.l.waitContainer(*cid, waitStatus)
-	log.Debugf("containerManager.Wait returned, waitStatus: %v: %v", waitStatus, err)
+	log.Debugf("containerManager.Wait returned, cid: %s, waitStatus: %#x, err: %v", *cid, *waitStatus, err)
 	return err
 }
 
@@ -461,8 +511,10 @@ type WaitPIDArgs struct {
 
 // WaitPID waits for the process with PID 'pid' in the sandbox.
 func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error {
-	log.Debugf("containerManager.Wait")
-	return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus)
+	log.Debugf("containerManager.Wait, cid: %s, pid: %d", args.CID, args.PID)
+	err := cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus)
+	log.Debugf("containerManager.Wait, cid: %s, pid: %d, waitStatus: %#x, err: %v", args.CID, args.PID, *waitStatus, err)
+	return err
 }
 
 // SignalDeliveryMode enumerates different signal delivery modes.
@@ -519,6 +571,6 @@ type SignalArgs struct {
 // indicated process, to all processes in the container, or to the foreground
 // process group.
 func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error {
-	log.Debugf("containerManager.Signal %+v", args)
+	log.Debugf("containerManager.Signal: cid: %s, PID: %d, signal: %d, mode: %v", args.CID, args.PID, args.Signo, args.Mode)
 	return cm.l.signal(args.CID, args.PID, args.Signo, args.Mode)
 }
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index a7c4ebb0c..4e3bb9ac7 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -343,6 +343,16 @@ func hostInetFilters() seccomp.SyscallRules {
 			},
 			{
 				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_IP),
+				seccomp.EqualTo(syscall.IP_PKTINFO),
+			},
+			{
+				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_IP),
+				seccomp.EqualTo(syscall.IP_RECVORIGDSTADDR),
+			},
+			{
+				seccomp.MatchAny{},
 				seccomp.EqualTo(syscall.SOL_IPV6),
 				seccomp.EqualTo(syscall.IPV6_TCLASS),
 			},
@@ -358,6 +368,11 @@ func hostInetFilters() seccomp.SyscallRules {
 			},
 			{
 				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_IPV6),
+				seccomp.EqualTo(linux.IPV6_RECVORIGDSTADDR),
+			},
+			{
+				seccomp.MatchAny{},
 				seccomp.EqualTo(syscall.SOL_SOCKET),
 				seccomp.EqualTo(syscall.SO_ERROR),
 			},
@@ -393,6 +408,11 @@ func hostInetFilters() seccomp.SyscallRules {
 			},
 			{
 				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_SOCKET),
+				seccomp.EqualTo(syscall.SO_TIMESTAMP),
+			},
+			{
+				seccomp.MatchAny{},
 				seccomp.EqualTo(syscall.SOL_TCP),
 				seccomp.EqualTo(syscall.TCP_NODELAY),
 			},
@@ -401,6 +421,11 @@ func hostInetFilters() seccomp.SyscallRules {
 				seccomp.EqualTo(syscall.SOL_TCP),
 				seccomp.EqualTo(syscall.TCP_INFO),
 			},
+			{
+				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_TCP),
+				seccomp.EqualTo(linux.TCP_INQ),
+			},
 		},
 		syscall.SYS_IOCTL: []seccomp.Rule{
 			{
@@ -449,6 +474,13 @@ func hostInetFilters() seccomp.SyscallRules {
 			},
 			{
 				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_SOCKET),
+				seccomp.EqualTo(syscall.SO_TIMESTAMP),
+				seccomp.MatchAny{},
+				seccomp.EqualTo(4),
+			},
+			{
+				seccomp.MatchAny{},
 				seccomp.EqualTo(syscall.SOL_TCP),
 				seccomp.EqualTo(syscall.TCP_NODELAY),
 				seccomp.MatchAny{},
@@ -456,6 +488,13 @@ func hostInetFilters() seccomp.SyscallRules {
 			},
 			{
 				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_TCP),
+				seccomp.EqualTo(linux.TCP_INQ),
+				seccomp.MatchAny{},
+				seccomp.EqualTo(4),
+			},
+			{
+				seccomp.MatchAny{},
 				seccomp.EqualTo(syscall.SOL_IP),
 				seccomp.EqualTo(syscall.IP_TOS),
 				seccomp.MatchAny{},
@@ -470,6 +509,20 @@ func hostInetFilters() seccomp.SyscallRules {
 			},
 			{
 				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_IP),
+				seccomp.EqualTo(syscall.IP_PKTINFO),
+				seccomp.MatchAny{},
+				seccomp.EqualTo(4),
+			},
+			{
+				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_IP),
+				seccomp.EqualTo(syscall.IP_RECVORIGDSTADDR),
+				seccomp.MatchAny{},
+				seccomp.EqualTo(4),
+			},
+			{
+				seccomp.MatchAny{},
 				seccomp.EqualTo(syscall.SOL_IPV6),
 				seccomp.EqualTo(syscall.IPV6_TCLASS),
 				seccomp.MatchAny{},
@@ -482,6 +535,13 @@ func hostInetFilters() seccomp.SyscallRules {
 				seccomp.MatchAny{},
 				seccomp.EqualTo(4),
 			},
+			{
+				seccomp.MatchAny{},
+				seccomp.EqualTo(syscall.SOL_IPV6),
+				seccomp.EqualTo(linux.IPV6_RECVORIGDSTADDR),
+				seccomp.MatchAny{},
+				seccomp.EqualTo(4),
+			},
 		},
 		syscall.SYS_SHUTDOWN: []seccomp.Rule{
 			{
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index ddf288456..2b0d2cd51 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -22,15 +22,6 @@ import (
 	"strings"
 	"syscall"
 
-	// Include filesystem types that OCI spec might mount.
-	_ "gvisor.dev/gvisor/pkg/sentry/fs/dev"
-	_ "gvisor.dev/gvisor/pkg/sentry/fs/host"
-	_ "gvisor.dev/gvisor/pkg/sentry/fs/proc"
-	_ "gvisor.dev/gvisor/pkg/sentry/fs/sys"
-	_ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
-	_ "gvisor.dev/gvisor/pkg/sentry/fs/tty"
-	"gvisor.dev/gvisor/pkg/sentry/vfs"
-
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -48,9 +39,18 @@ import (
 	tmpfsvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/specutils"
+
+	// Include filesystem types that OCI spec might mount.
+	_ "gvisor.dev/gvisor/pkg/sentry/fs/dev"
+	_ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+	_ "gvisor.dev/gvisor/pkg/sentry/fs/proc"
+	_ "gvisor.dev/gvisor/pkg/sentry/fs/sys"
+	_ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
+	_ "gvisor.dev/gvisor/pkg/sentry/fs/tty"
 )
 
 const (
@@ -105,33 +105,28 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name
 // mandatory mounts that are required by the OCI specification.
 func compileMounts(spec *specs.Spec) []specs.Mount {
 	// Keep track of whether proc and sys were mounted.
-	var procMounted, sysMounted bool
+	var procMounted, sysMounted, devMounted, devptsMounted bool
 	var mounts []specs.Mount
 
-	// Always mount /dev.
-	mounts = append(mounts, specs.Mount{
-		Type:        devtmpfs.Name,
-		Destination: "/dev",
-	})
-
-	mounts = append(mounts, specs.Mount{
-		Type:        devpts.Name,
-		Destination: "/dev/pts",
-	})
-
 	// Mount all submounts from the spec.
 	for _, m := range spec.Mounts {
 		if !specutils.IsSupportedDevMount(m) {
 			log.Warningf("ignoring dev mount at %q", m.Destination)
 			continue
 		}
-		mounts = append(mounts, m)
 		switch filepath.Clean(m.Destination) {
 		case "/proc":
 			procMounted = true
 		case "/sys":
 			sysMounted = true
+		case "/dev":
+			m.Type = devtmpfs.Name
+			devMounted = true
+		case "/dev/pts":
+			m.Type = devpts.Name
+			devptsMounted = true
 		}
+		mounts = append(mounts, m)
 	}
 
 	// Mount proc and sys even if the user did not ask for it, as the spec
@@ -149,6 +144,18 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
 			Destination: "/sys",
 		})
 	}
+	if !devMounted {
+		mandatoryMounts = append(mandatoryMounts, specs.Mount{
+			Type:        devtmpfs.Name,
+			Destination: "/dev",
+		})
+	}
+	if !devptsMounted {
+		mandatoryMounts = append(mandatoryMounts, specs.Mount{
+			Type:        devpts.Name,
+			Destination: "/dev/pts",
+		})
+	}
 
 	// The mandatory mounts should be ordered right after the root, in case
 	// there are submounts of these mandatory mounts already in the spec.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 8ad000497..3df013d34 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -35,6 +35,7 @@ import (
 	"gvisor.dev/gvisor/pkg/memutil"
 	"gvisor.dev/gvisor/pkg/rand"
 	"gvisor.dev/gvisor/pkg/refs"
+	"gvisor.dev/gvisor/pkg/refsvfs2"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/control"
 	"gvisor.dev/gvisor/pkg/sentry/fdimport"
@@ -49,6 +50,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
 	"gvisor.dev/gvisor/pkg/sentry/platform"
 	"gvisor.dev/gvisor/pkg/sentry/sighandling"
+	"gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
 	"gvisor.dev/gvisor/pkg/sentry/syscalls/linux/vfs2"
 	"gvisor.dev/gvisor/pkg/sentry/time"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
@@ -155,6 +157,11 @@ type execProcess struct {
 
 	// pidnsPath is the pid namespace path in spec
 	pidnsPath string
+
+	// hostTTY is present when creating a sub-container with terminal enabled.
+	// TTY file is passed during container create and must be saved until
+	// container start.
+	hostTTY *fd.FD
 }
 
 func init() {
@@ -476,6 +483,10 @@ func (l *Loader) Destroy() {
 	// save/restore.
 	l.k.Release()
 
+	// All sentry-created resources should have been released at this point;
+	// check for reference leaks.
+	refsvfs2.DoLeakCheck()
+
 	// In the success case, stdioFDs and goferFDs will only contain
 	// released/closed FDs that ownership has been passed over to host FDs and
 	// gofer sessions. Close them here in case of failure.
@@ -582,7 +593,9 @@ func (l *Loader) run() error {
 
 		// Create the root container init task. It will begin running
 		// when the kernel is started.
-		if _, err := l.createContainerProcess(true, l.sandboxID, &l.root, ep); err != nil {
+		var err error
+		_, ep.tty, ep.ttyVFS2, err = l.createContainerProcess(true, l.sandboxID, &l.root)
+		if err != nil {
 			return err
 		}
 
@@ -621,7 +634,7 @@ func (l *Loader) run() error {
 }
 
 // createContainer creates a new container inside the sandbox.
-func (l *Loader) createContainer(cid string) error {
+func (l *Loader) createContainer(cid string, tty *fd.FD) error {
 	l.mu.Lock()
 	defer l.mu.Unlock()
 
@@ -629,14 +642,14 @@ func (l *Loader) createContainer(cid string) error {
 	if _, ok := l.processes[eid]; ok {
 		return fmt.Errorf("container %q already exists", cid)
 	}
-	l.processes[eid] = &execProcess{}
+	l.processes[eid] = &execProcess{hostTTY: tty}
 	return nil
 }
 
 // startContainer starts a child container. It returns the thread group ID of
 // the newly created process. Used FDs are either closed or released. It's safe
 // for the caller to close any remaining files upon return.
-func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid string, files []*fd.FD) error {
+func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid string, stdioFDs, goferFDs []*fd.FD) error {
 	// Create capabilities.
 	caps, err := specutils.Capabilities(conf.EnableRaw, spec.Process.Capabilities)
 	if err != nil {
@@ -689,36 +702,41 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *config.Config, cid strin
 	info := &containerInfo{
 		conf:     conf,
 		spec:     spec,
-		stdioFDs: files[:3],
-		goferFDs: files[3:],
+		goferFDs: goferFDs,
 	}
 	info.procArgs, err = createProcessArgs(cid, spec, creds, l.k, pidns)
 	if err != nil {
 		return fmt.Errorf("creating new process: %v", err)
 	}
-	tg, err := l.createContainerProcess(false, cid, info, ep)
+
+	// Use stdios or TTY depending on the spec configuration.
+	if spec.Process.Terminal {
+		if len(stdioFDs) > 0 {
+			return fmt.Errorf("using TTY, stdios not expected: %v", stdioFDs)
+		}
+		if ep.hostTTY == nil {
+			return fmt.Errorf("terminal enabled but no TTY provided. Did you set --console-socket on create?")
+		}
+		info.stdioFDs = []*fd.FD{ep.hostTTY, ep.hostTTY, ep.hostTTY}
+		ep.hostTTY = nil
+	} else {
+		info.stdioFDs = stdioFDs
+	}
+
+	ep.tg, ep.tty, ep.ttyVFS2, err = l.createContainerProcess(false, cid, info)
 	if err != nil {
 		return err
 	}
-
-	// Success!
-	l.k.StartProcess(tg)
-	ep.tg = tg
+	l.k.StartProcess(ep.tg)
 	return nil
 }
 
-func (l *Loader) createContainerProcess(root bool, cid string, info *containerInfo, ep *execProcess) (*kernel.ThreadGroup, error) {
-	console := false
-	if root {
-		// Only root container supports terminal for now.
-		console = info.spec.Process.Terminal
-	}
-
+func (l *Loader) createContainerProcess(root bool, cid string, info *containerInfo) (*kernel.ThreadGroup, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
 	// Create the FD map, which will set stdin, stdout, and stderr.
 	ctx := info.procArgs.NewContext(l.k)
-	fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, console, info.stdioFDs)
+	fdTable, ttyFile, ttyFileVFS2, err := createFDTable(ctx, info.spec.Process.Terminal, info.stdioFDs)
 	if err != nil {
-		return nil, fmt.Errorf("importing fds: %v", err)
+		return nil, nil, nil, fmt.Errorf("importing fds: %v", err)
 	}
 	// CreateProcess takes a reference on fdTable if successful. We won't need
 	// ours either way.
@@ -730,14 +748,14 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
 	mntr := newContainerMounter(info.spec, info.goferFDs, l.k, l.mountHints)
 	if root {
 		if err := mntr.processHints(info.conf, info.procArgs.Credentials); err != nil {
-			return nil, err
+			return nil, nil, nil, err
 		}
 	}
 	if err := setupContainerFS(ctx, info.conf, mntr, &info.procArgs); err != nil {
-		return nil, err
+		return nil, nil, nil, err
 	}
 
-	// Add the HOME enviroment variable if it is not already set.
+	// Add the HOME environment variable if it is not already set.
 	var envv []string
 	if kernel.VFS2Enabled {
 		envv, err = user.MaybeAddExecUserHomeVFS2(ctx, info.procArgs.MountNamespaceVFS2,
@@ -748,29 +766,25 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
 			info.procArgs.Credentials.RealKUID, info.procArgs.Envv)
 	}
 	if err != nil {
-		return nil, err
+		return nil, nil, nil, err
 	}
 	info.procArgs.Envv = envv
 
 	// Create and start the new process.
 	tg, _, err := l.k.CreateProcess(info.procArgs)
 	if err != nil {
-		return nil, fmt.Errorf("creating process: %v", err)
+		return nil, nil, nil, fmt.Errorf("creating process: %v", err)
 	}
 	// CreateProcess takes a reference on FDTable if successful.
 	info.procArgs.FDTable.DecRef(ctx)
 
 	// Set the foreground process group on the TTY to the global init process
 	// group, since that is what we are about to start running.
-	if root {
-		switch {
-		case ttyFileVFS2 != nil:
-			ep.ttyVFS2 = ttyFileVFS2
-			ttyFileVFS2.InitForegroundProcessGroup(tg.ProcessGroup())
-		case ttyFile != nil:
-			ep.tty = ttyFile
-			ttyFile.InitForegroundProcessGroup(tg.ProcessGroup())
-		}
+	switch {
+	case ttyFileVFS2 != nil:
+		ttyFileVFS2.InitForegroundProcessGroup(tg.ProcessGroup())
+	case ttyFile != nil:
+		ttyFile.InitForegroundProcessGroup(tg.ProcessGroup())
 	}
 
 	// Install seccomp filters with the new task if there are any.
@@ -778,7 +792,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
 		if info.spec.Linux != nil && info.spec.Linux.Seccomp != nil {
 			program, err := seccomp.BuildProgram(info.spec.Linux.Seccomp)
 			if err != nil {
-				return nil, fmt.Errorf("building seccomp program: %v", err)
+				return nil, nil, nil, fmt.Errorf("building seccomp program: %v", err)
 			}
 
 			if log.IsLogging(log.Debug) {
@@ -789,7 +803,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
 			task := tg.Leader()
 			// NOTE: It seems Flags are ignored by runc so we ignore them too.
 			if err := task.AppendSyscallFilter(program, true); err != nil {
-				return nil, fmt.Errorf("appending seccomp filters: %v", err)
+				return nil, nil, nil, fmt.Errorf("appending seccomp filters: %v", err)
 			}
 		}
 	} else {
@@ -798,7 +812,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
 		}
 	}
 
-	return tg, nil
+	return tg, ttyFile, ttyFileVFS2, nil
 }
 
 // startGoferMonitor runs a goroutine to monitor gofer's health. It polls on
@@ -882,7 +896,7 @@ func (l *Loader) destroyContainer(cid string) error {
 		}
 	}
 
-	log.Debugf("Container destroyed %q", cid)
+	log.Debugf("Container destroyed, cid: %s", cid)
 	return nil
 }
 
@@ -1068,7 +1082,12 @@ func newRootNetworkNamespace(conf *config.Config, clock tcpip.Clock, uniqueID st
 
 func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) {
 	netProtos := []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol, arp.NewProtocol}
-	transProtos := []stack.TransportProtocolFactory{tcp.NewProtocol, udp.NewProtocol, icmp.NewProtocol4}
+	transProtos := []stack.TransportProtocolFactory{
+		tcp.NewProtocol,
+		udp.NewProtocol,
+		icmp.NewProtocol4,
+		icmp.NewProtocol6,
+	}
 	s := netstack.Stack{stack.New(stack.Options{
 		NetworkProtocols:   netProtos,
 		TransportProtocols: transProtos,
@@ -1079,6 +1098,7 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
 		// privileges.
 		RawFactory: raw.EndpointFactory{},
 		UniqueID:   uniqueID,
+		IPTables:   netfilter.DefaultLinuxTables(),
 	})}
 
 	// Enable SACK Recovery.
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index e376f944b..b77b4762e 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -266,7 +266,7 @@ type CreateMountTestcase struct {
 
 func createMountTestcases() []*CreateMountTestcase {
 	testCases := []*CreateMountTestcase{
-		&CreateMountTestcase{
+		{
 			// Only proc.
 			name: "only proc mount",
 			spec: specs.Spec{
@@ -304,11 +304,10 @@ func createMountTestcases() []*CreateMountTestcase {
 					},
 				},
 			},
-			// /some/deep/path should be mounted, along with /proc,
-			// /dev, and /sys.
+			// /some/deep/path should be mounted, along with /proc, /dev, and /sys.
 			expectedPaths: []string{"/some/very/very/deep/path", "/proc", "/dev", "/sys"},
 		},
-		&CreateMountTestcase{
+		{
 			// Mounts are nested inside each other.
 			name: "nested mounts",
 			spec: specs.Spec{
@@ -352,7 +351,7 @@ func createMountTestcases() []*CreateMountTestcase {
 			expectedPaths: []string{"/foo", "/foo/bar", "/foo/bar/baz", "/foo/qux",
 				"/foo/qux-quz", "/foo/some/very/very/deep/path", "/proc", "/dev", "/sys"},
 		},
-		&CreateMountTestcase{
+		{
 			name: "mount inside /dev",
 			spec: specs.Spec{
 				Root: &specs.Root{
@@ -395,35 +394,37 @@ func createMountTestcases() []*CreateMountTestcase {
 			},
 			expectedPaths: []string{"/proc", "/dev", "/dev/fd-foo", "/dev/foo", "/dev/bar", "/sys"},
 		},
-	}
-
-	vfsCase := &CreateMountTestcase{
-		name: "mounts inside mandatory mounts",
-		spec: specs.Spec{
-			Root: &specs.Root{
-				Path:     os.TempDir(),
-				Readonly: true,
-			},
-			Mounts: []specs.Mount{
-				{
-					Destination: "/proc",
-					Type:        "tmpfs",
-				},
-				{
-					Destination: "/sys/bar",
-					Type:        "tmpfs",
+		{
+			name: "mounts inside mandatory mounts",
+			spec: specs.Spec{
+				Root: &specs.Root{
+					Path:     os.TempDir(),
+					Readonly: true,
 				},
-
-				{
-					Destination: "/tmp/baz",
-					Type:        "tmpfs",
+				Mounts: []specs.Mount{
+					{
+						Destination: "/proc",
+						Type:        "tmpfs",
+					},
+					{
+						Destination: "/sys/bar",
+						Type:        "tmpfs",
+					},
+					{
+						Destination: "/tmp/baz",
+						Type:        "tmpfs",
+					},
+					{
+						Destination: "/dev/goo",
+						Type:        "tmpfs",
+					},
 				},
 			},
+			expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz", "/dev/goo"},
 		},
-		expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"},
 	}
 
-	return append(testCases, vfsCase)
+	return testCases
 }
 
 // Test that MountNamespace can be created with various specs.
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index 988573640..3d3a813df 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -28,7 +28,6 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/link/packetsocket"
 	"gvisor.dev/gvisor/pkg/tcpip/link/qdisc/fifo"
 	"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
-	"gvisor.dev/gvisor/pkg/tcpip/network/arp"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
@@ -41,9 +40,9 @@ var (
 	// "::1/8" on "lo" interface.
 	DefaultLoopbackLink = LoopbackLink{
 		Name: "lo",
-		Addresses: []net.IP{
-			net.IP("\x7f\x00\x00\x01"),
-			net.IPv6loopback,
+		Addresses: []IPWithPrefix{
+			{Address: net.IP("\x7f\x00\x00\x01"), PrefixLen: 8},
+			{Address: net.IPv6loopback, PrefixLen: 128},
 		},
 		Routes: []Route{
 			{
@@ -83,7 +82,7 @@ type DefaultRoute struct {
 type FDBasedLink struct {
 	Name               string
 	MTU                int
-	Addresses          []net.IP
+	Addresses          []IPWithPrefix
 	Routes             []Route
 	GSOMaxSize         uint32
 	SoftwareGSOEnabled bool
@@ -100,7 +99,7 @@ type FDBasedLink struct {
 // LoopbackLink configures a loopback li nk.
 type LoopbackLink struct {
 	Name      string
-	Addresses []net.IP
+	Addresses []IPWithPrefix
 	Routes    []Route
 }
 
@@ -118,6 +117,19 @@ type CreateLinksAndRoutesArgs struct {
 	Defaultv6Gateway DefaultRoute
 }
 
+// IPWithPrefix is an address with its subnet prefix length.
+type IPWithPrefix struct {
+	// Address is a network address.
+	Address net.IP
+
+	// PrefixLen is the subnet prefix length.
+	PrefixLen int
+}
+
+func (ip IPWithPrefix) String() string {
+	return fmt.Sprintf("%s/%d", ip.Address, ip.PrefixLen)
+}
+
 // Empty returns true if route hasn't been set.
 func (r *Route) Empty() bool {
 	return r.Destination.IP == nil && r.Destination.Mask == nil && r.Gateway == nil
@@ -265,20 +277,19 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
 
 // createNICWithAddrs creates a NIC in the network stack and adds the given
 // addresses.
-func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkEndpoint, addrs []net.IP) error {
+func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkEndpoint, addrs []IPWithPrefix) error {
 	opts := stack.NICOptions{Name: name}
 	if err := n.Stack.CreateNICWithOptions(id, sniffer.New(ep), opts); err != nil {
 		return fmt.Errorf("CreateNICWithOptions(%d, _, %+v) failed: %v", id, opts, err)
 	}
 
-	// Always start with an arp address for the NIC.
-	if err := n.Stack.AddAddress(id, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-		return fmt.Errorf("AddAddress(%v, %v, %v) failed: %v", id, arp.ProtocolNumber, arp.ProtocolAddress, err)
-	}
-
 	for _, addr := range addrs {
-		proto, tcpipAddr := ipToAddressAndProto(addr)
-		if err := n.Stack.AddAddress(id, proto, tcpipAddr); err != nil {
+		proto, tcpipAddr := ipToAddressAndProto(addr.Address)
+		ap := tcpip.AddressWithPrefix{
+			Address:   tcpipAddr,
+			PrefixLen: addr.PrefixLen,
+		}
+		if err := n.Stack.AddAddressWithPrefix(id, proto, ap); err != nil {
 			return fmt.Errorf("AddAddress(%v, %v, %v) failed: %v", id, proto, tcpipAddr, err)
 		}
 	}
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 004da5b40..3fd28e516 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -210,6 +210,9 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *c
 		ReadOnly: c.root.Readonly,
 		GetFilesystemOptions: vfs.GetFilesystemOptions{
 			Data: strings.Join(data, ","),
+			InternalData: gofer.InternalFilesystemOptions{
+				UniqueID: "/",
+			},
 		},
 		InternalMount: true,
 	}
@@ -247,36 +250,76 @@ func (c *containerMounter) configureOverlay(ctx context.Context, creds *auth.Cre
 	overlayOpts := *lowerOpts
 	overlayOpts.GetFilesystemOptions = vfs.GetFilesystemOptions{}
 
-	// Next mount upper and lower. Upper is a tmpfs mount to keep all
-	// modifications inside the sandbox.
-	upper, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, tmpfs.Name, &upperOpts)
-	if err != nil {
-		return nil, nil, fmt.Errorf("failed to create upper layer for overlay, opts: %+v: %v", upperOpts, err)
-	}
-	cu := cleanup.Make(func() { upper.DecRef(ctx) })
-	defer cu.Clean()
-
 	// All writes go to the upper layer, be paranoid and make lower readonly.
 	lowerOpts.ReadOnly = true
 	lower, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, lowerFSName, lowerOpts)
 	if err != nil {
 		return nil, nil, err
 	}
-	cu.Add(func() { lower.DecRef(ctx) })
+	cu := cleanup.Make(func() { lower.DecRef(ctx) })
+	defer cu.Clean()
 
-	// Propagate the lower layer's root's owner, group, and mode to the upper
-	// layer's root for consistency with VFS1.
-	upperRootVD := vfs.MakeVirtualDentry(upper, upper.Root())
+	// Determine the lower layer's root's type.
 	lowerRootVD := vfs.MakeVirtualDentry(lower, lower.Root())
 	stat, err := c.k.VFS().StatAt(ctx, creds, &vfs.PathOperation{
 		Root:  lowerRootVD,
 		Start: lowerRootVD,
 	}, &vfs.StatOptions{
-		Mask: linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE,
+		Mask: linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE | linux.STATX_TYPE,
 	})
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, fmt.Errorf("failed to stat lower layer's root: %v", err)
+	}
+	if stat.Mask&linux.STATX_TYPE == 0 {
+		return nil, nil, fmt.Errorf("failed to get file type of lower layer's root")
+	}
+	rootType := stat.Mode & linux.S_IFMT
+	if rootType != linux.S_IFDIR && rootType != linux.S_IFREG {
+		return nil, nil, fmt.Errorf("lower layer's root has unsupported file type %v", rootType)
+	}
+
+	// Upper is a tmpfs mount to keep all modifications inside the sandbox.
+	upperOpts.GetFilesystemOptions.InternalData = tmpfs.FilesystemOpts{
+		RootFileType: uint16(rootType),
+	}
+	upper, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, tmpfs.Name, &upperOpts)
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to create upper layer for overlay, opts: %+v: %v", upperOpts, err)
+	}
+	cu.Add(func() { upper.DecRef(ctx) })
+
+	// If the overlay mount consists of a regular file, copy up its contents
+	// from the lower layer, since in the overlay the otherwise-empty upper
+	// layer file will take precedence.
+	upperRootVD := vfs.MakeVirtualDentry(upper, upper.Root())
+	if rootType == linux.S_IFREG {
+		lowerFD, err := c.k.VFS().OpenAt(ctx, creds, &vfs.PathOperation{
+			Root:  lowerRootVD,
+			Start: lowerRootVD,
+		}, &vfs.OpenOptions{
+			Flags: linux.O_RDONLY,
+		})
+		if err != nil {
+			return nil, nil, fmt.Errorf("failed to open lower layer root for copying: %v", err)
+		}
+		defer lowerFD.DecRef(ctx)
+		upperFD, err := c.k.VFS().OpenAt(ctx, creds, &vfs.PathOperation{
+			Root:  upperRootVD,
+			Start: upperRootVD,
+		}, &vfs.OpenOptions{
+			Flags: linux.O_WRONLY,
+		})
+		if err != nil {
+			return nil, nil, fmt.Errorf("failed to open upper layer root for copying: %v", err)
+		}
+		defer upperFD.DecRef(ctx)
+		if _, err := vfs.CopyRegularFileData(ctx, upperFD, lowerFD); err != nil {
+			return nil, nil, fmt.Errorf("failed to copy up overlay file: %v", err)
+		}
 	}
+
+	// Propagate the lower layer's root's owner, group, and mode to the upper
+	// layer's root for consistency with VFS1.
 	err = c.k.VFS().SetStatAt(ctx, creds, &vfs.PathOperation{
 		Root:  upperRootVD,
 		Start: upperRootVD,
@@ -427,6 +470,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
 	fsName := m.Type
 	useOverlay := false
 	var data []string
+	var iopts interface{}
 
 	// Find filesystem name and FS specific data field.
 	switch m.Type {
@@ -451,6 +495,9 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
 			return "", nil, false, fmt.Errorf("9P mount requires a connection FD")
 		}
 		data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
+		iopts = gofer.InternalFilesystemOptions{
+			UniqueID: m.Destination,
+		}
 
 		// If configured, add overlay to all writable mounts.
 		useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
@@ -462,7 +509,8 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
 
 	opts := &vfs.MountOptions{
 		GetFilesystemOptions: vfs.GetFilesystemOptions{
-			Data: strings.Join(data, ","),
+			Data:         strings.Join(data, ","),
+			InternalData: iopts,
 		},
 		InternalMount: true,
 	}
@@ -667,3 +715,21 @@ func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Crede
 	}
 	return c.k.VFS().MakeSyntheticMountpoint(ctx, dest, root, creds)
 }
+
+// configureRestore returns an updated context.Context including filesystem
+// state used by restore defined by conf.
+func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Config) (context.Context, error) {
+	fdmap := make(map[string]int)
+	fdmap["/"] = c.fds.remove()
+	mounts, err := c.prepareMountsVFS2()
+	if err != nil {
+		return ctx, err
+	}
+	for i := range c.mounts {
+		submount := &mounts[i]
+		if submount.fd >= 0 {
+			fdmap[submount.Destination] = submount.fd
+		}
+	}
+	return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil
+}
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 56da21584..e5294de55 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -21,6 +21,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -198,8 +199,13 @@ func LoadPaths(pid string) (map[string]string, error) {
 	}
 	defer f.Close()
 
+	return loadPathsHelper(f)
+}
+
+func loadPathsHelper(cgroup io.Reader) (map[string]string, error) {
 	paths := make(map[string]string)
-	scanner := bufio.NewScanner(f)
+
+	scanner := bufio.NewScanner(cgroup)
 	for scanner.Scan() {
 		// Format: ID:[name=]controller1,controller2:path
 		// Example: 2:cpu,cpuacct:/user.slice
@@ -207,6 +213,9 @@ func LoadPaths(pid string) (map[string]string, error) {
 		if len(tokens) != 3 {
 			return nil, fmt.Errorf("invalid cgroups file, line: %q", scanner.Text())
 		}
+		if len(tokens[1]) == 0 {
+			continue
+		}
 		for _, ctrlr := range strings.Split(tokens[1], ",") {
 			// Remove prefix for cgroups with no controller, eg. systemd.
 			ctrlr = strings.TrimPrefix(ctrlr, "name=")
@@ -225,7 +234,7 @@ func LoadPaths(pid string) (map[string]string, error) {
 type Cgroup struct {
 	Name    string            `json:"name"`
 	Parents map[string]string `json:"parents"`
-	Own     bool              `json:"own"`
+	Own     map[string]bool   `json:"own"`
 }
 
 // New creates a new Cgroup instance if the spec includes a cgroup path.
@@ -242,9 +251,11 @@ func New(spec *specs.Spec) (*Cgroup, error) {
 			return nil, fmt.Errorf("finding current cgroups: %w", err)
 		}
 	}
+	own := make(map[string]bool)
 	return &Cgroup{
 		Name:    spec.Linux.CgroupsPath,
 		Parents: parents,
+		Own:     own,
 	}, nil
 }
 
@@ -252,18 +263,8 @@ func New(spec *specs.Spec) (*Cgroup, error) {
 // already exists, it means that the caller has already provided a
 // pre-configured cgroups, and 'res' is ignored.
 func (c *Cgroup) Install(res *specs.LinuxResources) error {
-	if _, err := os.Stat(c.makePath("memory")); err == nil {
-		// If cgroup has already been created; it has been setup by caller. Don't
-		// make any changes to configuration, just join when sandbox/gofer starts.
-		log.Debugf("Using pre-created cgroup %q", c.Name)
-		return nil
-	}
-
 	log.Debugf("Creating cgroup %q", c.Name)
 
-	// Mark that cgroup resources are owned by me.
-	c.Own = true
-
 	// The Cleanup object cleans up partially created cgroups when an error occurs.
 	// Errors occuring during cleanup itself are ignored.
 	clean := cleanup.Make(func() { _ = c.Uninstall() })
@@ -271,6 +272,16 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error {
 
 	for key, cfg := range controllers {
 		path := c.makePath(key)
+		if _, err := os.Stat(path); err == nil {
+			// If cgroup has already been created; it has been setup by caller. Don't
+			// make any changes to configuration, just join when sandbox/gofer starts.
+			log.Debugf("Using pre-created cgroup %q", path)
+			continue
+		}
+
+		// Mark that cgroup resources are owned by me.
+		c.Own[key] = true
+
 		if err := os.MkdirAll(path, 0755); err != nil {
 			if cfg.optional && errors.Is(err, syscall.EROFS) {
 				log.Infof("Skipping cgroup %q", key)
@@ -289,12 +300,12 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error {
 // Uninstall removes the settings done in Install(). If cgroup path already
 // existed when Install() was called, Uninstall is a noop.
 func (c *Cgroup) Uninstall() error {
-	if !c.Own {
-		// cgroup is managed by caller, don't touch it.
-		return nil
-	}
 	log.Debugf("Deleting cgroup %q", c.Name)
 	for key := range controllers {
+		if !c.Own[key] {
+			// cgroup is managed by caller, don't touch it.
+			continue
+		}
 		path := c.makePath(key)
 		log.Debugf("Removing cgroup controller for key=%q path=%q", key, path)
 
diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go
index 4db5ee5c3..931144cf9 100644
--- a/runsc/cgroup/cgroup_test.go
+++ b/runsc/cgroup/cgroup_test.go
@@ -29,7 +29,10 @@ func TestUninstallEnoent(t *testing.T) {
 	c := Cgroup{
 		// set a non-existent name
 		Name: "runsc-test-uninstall-656e6f656e740a",
-		Own:  true,
+	}
+	c.Own = make(map[string]bool)
+	for key := range controllers {
+		c.Own[key] = true
 	}
 	if err := c.Uninstall(); err != nil {
 		t.Errorf("Uninstall() failed: %v", err)
@@ -647,3 +650,83 @@ func TestPids(t *testing.T) {
 		})
 	}
 }
+
+func TestLoadPaths(t *testing.T) {
+	for _, tc := range []struct {
+		name    string
+		cgroups string
+		want    map[string]string
+		err     string
+	}{
+		{
+			name:    "abs-path",
+			cgroups: "0:ctr:/path",
+			want:    map[string]string{"ctr": "/path"},
+		},
+		{
+			name:    "rel-path",
+			cgroups: "0:ctr:rel-path",
+			want:    map[string]string{"ctr": "rel-path"},
+		},
+		{
+			name:    "non-controller",
+			cgroups: "0:name=systemd:/path",
+			want:    map[string]string{"systemd": "/path"},
+		},
+		{
+			name: "empty",
+		},
+		{
+			name: "multiple",
+			cgroups: "0:ctr0:/path0\n" +
+				"1:ctr1:/path1\n" +
+				"2::/empty\n",
+			want: map[string]string{
+				"ctr0": "/path0",
+				"ctr1": "/path1",
+			},
+		},
+		{
+			name:    "missing-field",
+			cgroups: "0:nopath\n",
+			err:     "invalid cgroups file",
+		},
+		{
+			name:    "too-many-fields",
+			cgroups: "0:ctr:/path:extra\n",
+			err:     "invalid cgroups file",
+		},
+		{
+			name: "multiple-malformed",
+			cgroups: "0:ctr0:/path0\n" +
+				"1:ctr1:/path1\n" +
+				"2:\n",
+			err: "invalid cgroups file",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			r := strings.NewReader(tc.cgroups)
+			got, err := loadPathsHelper(r)
+			if len(tc.err) == 0 {
+				if err != nil {
+					t.Fatalf("Unexpected error: %v", err)
+				}
+			} else if !strings.Contains(err.Error(), tc.err) {
+				t.Fatalf("Wrong error message, want: *%s*, got: %v", tc.err, err)
+			}
+			for key, vWant := range tc.want {
+				vGot, ok := got[key]
+				if !ok {
+					t.Errorf("Missing controller %q", key)
+				}
+				if vWant != vGot {
+					t.Errorf("Wrong controller %q value, want: %q, got: %q", key, vWant, vGot)
+				}
+				delete(got, key)
+			}
+			for k, v := range got {
+				t.Errorf("Unexpected controller %q: %q", k, v)
+			}
+		})
+	}
+}
diff --git a/runsc/cli/main.go b/runsc/cli/main.go
index bca015db5..6c3bf4d21 100644
--- a/runsc/cli/main.go
+++ b/runsc/cli/main.go
@@ -22,6 +22,7 @@ import (
 	"io/ioutil"
 	"os"
 	"os/signal"
+	"runtime"
 	"syscall"
 	"time"
 
@@ -82,6 +83,7 @@ func Main(version string) {
 	subcommands.Register(new(cmd.Spec), "")
 	subcommands.Register(new(cmd.State), "")
 	subcommands.Register(new(cmd.Start), "")
+	subcommands.Register(new(cmd.Symbolize), "")
 	subcommands.Register(new(cmd.Wait), "")
 
 	// Register internal commands with the internal group name. This causes
@@ -207,6 +209,8 @@ func Main(version string) {
 	log.Infof("***************************")
 	log.Infof("Args: %s", os.Args)
 	log.Infof("Version %s", version)
+	log.Infof("GOOS: %s", runtime.GOOS)
+	log.Infof("GOARCH: %s", runtime.GOARCH)
 	log.Infof("PID: %d", os.Getpid())
 	log.Infof("UID: %d, GID: %d", os.Getuid(), os.Getgid())
 	log.Infof("Configuration:")
diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD
index 2556f6d9e..19520d7ab 100644
--- a/runsc/cmd/BUILD
+++ b/runsc/cmd/BUILD
@@ -32,6 +32,7 @@ go_library(
         "start.go",
         "state.go",
         "statefile.go",
+        "symbolize.go",
         "syscalls.go",
         "wait.go",
     ],
@@ -39,6 +40,7 @@ go_library(
         "//runsc:__subpackages__",
     ],
     deps = [
+        "//pkg/coverage",
         "//pkg/log",
         "//pkg/p9",
         "//pkg/sentry/control",
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index cd419e1aa..2c92e3067 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -131,11 +131,11 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		return subcommands.ExitUsageError
 	}
 
-	// Ensure that if there is a panic, all goroutine stacks are printed.
-	debug.SetTraceback("system")
-
 	conf := args[0].(*config.Config)
 
+	// Set traceback level
+	debug.SetTraceback(conf.Traceback)
+
 	if b.attached {
 		// Ensure this process is killed after parent process terminates when
 		// attached mode is enabled. In the unfortunate event that the parent
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
index 8fe0c427a..c0bc8f064 100644
--- a/runsc/cmd/checkpoint.go
+++ b/runsc/cmd/checkpoint.go
@@ -75,7 +75,7 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa
 	conf := args[0].(*config.Config)
 	waitStatus := args[1].(*syscall.WaitStatus)
 
-	cont, err := container.Load(conf.RootDir, id)
+	cont, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading container: %v", err)
 	}
@@ -149,6 +149,9 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa
 	}
 
 	ws, err := cont.Wait()
+	if err != nil {
+		Fatalf("Error waiting for container: %v", err)
+	}
 	*waitStatus = ws
 
 	return subcommands.ExitSuccess
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 132198222..609e8231c 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -91,7 +91,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 			return subcommands.ExitUsageError
 		}
 		var err error
-		c, err = container.Load(conf.RootDir, f.Arg(0))
+		c, err = container.LoadAndCheck(conf.RootDir, f.Arg(0))
 		if err != nil {
 			return Errorf("loading container %q: %v", f.Arg(0), err)
 		}
@@ -106,7 +106,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 			return Errorf("listing containers: %v", err)
 		}
 		for _, id := range ids {
-			candidate, err := container.Load(conf.RootDir, id)
+			candidate, err := container.LoadAndCheck(conf.RootDir, id)
 			if err != nil {
 				return Errorf("loading container %q: %v", id, err)
 			}
diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go
index 4e49deff8..a25637265 100644
--- a/runsc/cmd/delete.go
+++ b/runsc/cmd/delete.go
@@ -68,7 +68,7 @@ func (d *Delete) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
 
 func (d *Delete) execute(ids []string, conf *config.Config) error {
 	for _, id := range ids {
-		c, err := container.Load(conf.RootDir, id)
+		c, err := container.LoadAndCheck(conf.RootDir, id)
 		if err != nil {
 			if os.IsNotExist(err) && d.force {
 				log.Warningf("couldn't find container %q: %v", id, err)
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index 640de4c47..8a8d9f752 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -81,7 +81,7 @@ func (c *Do) SetFlags(f *flag.FlagSet) {
 // Execute implements subcommands.Command.Execute.
 func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
 	if len(f.Args()) == 0 {
-		c.Usage()
+		f.Usage()
 		return subcommands.ExitUsageError
 	}
 
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
index 25fe2cf1c..3836b7b4e 100644
--- a/runsc/cmd/events.go
+++ b/runsc/cmd/events.go
@@ -74,7 +74,7 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa
 	id := f.Arg(0)
 	conf := args[0].(*config.Config)
 
-	c, err := container.Load(conf.RootDir, id)
+	c, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading sandbox: %v", err)
 	}
@@ -85,7 +85,12 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa
 		ev, err := c.Event()
 		if err != nil {
 			log.Warningf("Error getting events for container: %v", err)
+			if evs.stats {
+				return subcommands.ExitFailure
+			}
 		}
+		log.Debugf("Events: %+v", ev)
+
 		// err must be preserved because it is used below when breaking
 		// out of the loop.
 		b, err := json.Marshal(ev)
@@ -101,11 +106,9 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa
 			if err != nil {
 				return subcommands.ExitFailure
 			}
-			break
+			return subcommands.ExitSuccess
 		}
 
 		time.Sleep(time.Duration(evs.intervalSec) * time.Second)
 	}
-
-	return subcommands.ExitSuccess
 }
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 775ed4b43..eafd6285c 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -112,7 +112,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	}
 	waitStatus := args[1].(*syscall.WaitStatus)
 
-	c, err := container.Load(conf.RootDir, id)
+	c, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading sandbox: %v", err)
 	}
@@ -150,7 +150,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 }
 
 func (ex *Exec) exec(c *container.Container, e *control.ExecArgs, waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
-	// Start the new process and get it pid.
+	// Start the new process and get its pid.
 	pid, err := c.Execute(e)
 	if err != nil {
 		return Errorf("executing processes for container: %v", err)
diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go
index 04eee99b2..fe69e2a08 100644
--- a/runsc/cmd/kill.go
+++ b/runsc/cmd/kill.go
@@ -69,7 +69,7 @@ func (k *Kill) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		Fatalf("it is invalid to specify both --all and --pid")
 	}
 
-	c, err := container.Load(conf.RootDir, id)
+	c, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading container: %v", err)
 	}
diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go
index f92d6fef9..6907eb16a 100644
--- a/runsc/cmd/list.go
+++ b/runsc/cmd/list.go
@@ -79,7 +79,7 @@ func (l *List) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	// Collect the containers.
 	var containers []*container.Container
 	for _, id := range ids {
-		c, err := container.Load(conf.RootDir, id)
+		c, err := container.LoadAndCheck(conf.RootDir, id)
 		if err != nil {
 			Fatalf("loading container %q: %v", id, err)
 		}
diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go
index 0eb1402ed..fe7d4e257 100644
--- a/runsc/cmd/pause.go
+++ b/runsc/cmd/pause.go
@@ -55,7 +55,7 @@ func (*Pause) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
 	id := f.Arg(0)
 	conf := args[0].(*config.Config)
 
-	cont, err := container.Load(conf.RootDir, id)
+	cont, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading container: %v", err)
 	}
diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go
index bc58c928f..18d7a1436 100644
--- a/runsc/cmd/ps.go
+++ b/runsc/cmd/ps.go
@@ -60,7 +60,7 @@ func (ps *PS) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{})
 	id := f.Arg(0)
 	conf := args[0].(*config.Config)
 
-	c, err := container.Load(conf.RootDir, id)
+	c, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading sandbox: %v", err)
 	}
diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go
index f24823f99..a00928204 100644
--- a/runsc/cmd/resume.go
+++ b/runsc/cmd/resume.go
@@ -56,7 +56,7 @@ func (r *Resume) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}
 	id := f.Arg(0)
 	conf := args[0].(*config.Config)
 
-	cont, err := container.Load(conf.RootDir, id)
+	cont, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading container: %v", err)
 	}
diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go
index 88991b521..f6499cc44 100644
--- a/runsc/cmd/start.go
+++ b/runsc/cmd/start.go
@@ -21,6 +21,7 @@ import (
 	"gvisor.dev/gvisor/runsc/config"
 	"gvisor.dev/gvisor/runsc/container"
 	"gvisor.dev/gvisor/runsc/flag"
+	"gvisor.dev/gvisor/runsc/specutils"
 )
 
 // Start implements subcommands.Command for the "start" command.
@@ -54,10 +55,16 @@ func (*Start) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
 	id := f.Arg(0)
 	conf := args[0].(*config.Config)
 
-	c, err := container.Load(conf.RootDir, id)
+	c, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading container: %v", err)
 	}
+	// Read the spec again here to ensure flag annotations from the spec are
+	// applied to "conf".
+	if _, err := specutils.ReadSpec(c.BundleDir, conf); err != nil {
+		Fatalf("reading spec: %v", err)
+	}
+
 	if err := c.Start(conf); err != nil {
 		Fatalf("starting container: %v", err)
 	}
diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go
index 2bd2ab9f8..d8a70dd7f 100644
--- a/runsc/cmd/state.go
+++ b/runsc/cmd/state.go
@@ -57,7 +57,7 @@ func (*State) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s
 	id := f.Arg(0)
 	conf := args[0].(*config.Config)
 
-	c, err := container.Load(conf.RootDir, id)
+	c, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading container: %v", err)
 	}
diff --git a/runsc/cmd/symbolize.go b/runsc/cmd/symbolize.go
new file mode 100644
index 000000000..fc0c69358
--- /dev/null
+++ b/runsc/cmd/symbolize.go
@@ -0,0 +1,91 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"bufio"
+	"context"
+	"os"
+	"strconv"
+	"strings"
+
+	"github.com/google/subcommands"
+	"gvisor.dev/gvisor/pkg/coverage"
+	"gvisor.dev/gvisor/runsc/flag"
+)
+
+// Symbolize implements subcommands.Command for the "symbolize" command.
+type Symbolize struct {
+	dumpAll bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Symbolize) Name() string {
+	return "symbolize"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Symbolize) Synopsis() string {
+	return "Convert synthetic instruction pointers from kcov into positions in the runsc source code. Only used when Go coverage is enabled."
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Symbolize) Usage() string {
+	return `symbolize - converts synthetic instruction pointers into positions in the runsc source code.
+
+This command takes instruction pointers from stdin and converts them into their
+corresponding file names and line/column numbers in the runsc source code. The
+inputs are not interpreted as actual addresses, but as synthetic values that are
+exposed through /sys/kernel/debug/kcov. One can extract coverage information
+from kcov and translate those values into locations in the source code by
+running symbolize on the same runsc binary.
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (c *Symbolize) SetFlags(f *flag.FlagSet) {
+	f.BoolVar(&c.dumpAll, "all", false, "dump information on all coverage blocks along with their synthetic PCs")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (c *Symbolize) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+	if f.NArg() != 0 {
+		f.Usage()
+		return subcommands.ExitUsageError
+	}
+	if !coverage.KcovAvailable() {
+		return Errorf("symbolize can only be used when coverage is available.")
+	}
+	coverage.InitCoverageData()
+
+	if c.dumpAll {
+		coverage.WriteAllBlocks(os.Stdout)
+		return subcommands.ExitSuccess
+	}
+
+	scanner := bufio.NewScanner(os.Stdin)
+	for scanner.Scan() {
+		// Input is always base 16, but may or may not have a leading "0x".
+		str := strings.TrimPrefix(scanner.Text(), "0x")
+		pc, err := strconv.ParseUint(str, 16 /* base */, 64 /* bitSize */)
+		if err != nil {
+			return Errorf("Failed to symbolize \"%s\": %v", scanner.Text(), err)
+		}
+		if err := coverage.Symbolize(os.Stdout, pc); err != nil {
+			return Errorf("Failed to symbolize \"%s\": %v", scanner.Text(), err)
+		}
+	}
+	return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go
index 28d0642ed..c1d6aeae2 100644
--- a/runsc/cmd/wait.go
+++ b/runsc/cmd/wait.go
@@ -72,7 +72,7 @@ func (wt *Wait) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	id := f.Arg(0)
 	conf := args[0].(*config.Config)
 
-	c, err := container.Load(conf.RootDir, id)
+	c, err := container.LoadAndCheck(conf.RootDir, id)
 	if err != nil {
 		Fatalf("loading container: %v", err)
 	}
diff --git a/runsc/config/config.go b/runsc/config/config.go
index f30f79f68..b02d8e2e1 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -37,6 +37,9 @@ type Config struct {
 	// RootDir is the runtime root directory.
 	RootDir string `flag:"root"`
 
+	// Traceback changes the Go runtime's traceback level.
+	Traceback string `flag:"traceback"`
+
 	// Debug indicates that debug logging should be enabled.
 	Debug bool `flag:"debug"`
 
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
index a5f25cfa2..02ab9255a 100644
--- a/runsc/config/flags.go
+++ b/runsc/config/flags.go
@@ -29,7 +29,7 @@ import (
 
 var registration sync.Once
 
-// This is the set of flags used to populate Config.
+// RegisterFlags registers flags used to populate Config.
 func RegisterFlags() {
 	registration.Do(func() {
 		// Although these flags are not part of the OCI spec, they are used by
@@ -49,6 +49,7 @@ func RegisterFlags() {
 		flag.String("debug-log-format", "text", "log format: text (default), json, or json-k8s.")
 		flag.Bool("alsologtostderr", false, "send log messages to stderr.")
 		flag.Bool("allow-flag-override", false, "allow OCI annotations (dev.gvisor.flag.<name>) to override flags for debugging.")
+		flag.String("traceback", "system", "golang runtime's traceback level")
 
 		// Debugging flags: strace related
 		flag.Bool("strace", false, "enable strace.")
@@ -70,7 +71,7 @@ func RegisterFlags() {
 		flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
 		flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
 		flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
-		flag.Bool("vfs2", false, "TEST ONLY; use while VFSv2 is landing. This uses the new experimental VFS layer.")
+		flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.")
 		flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.")
 
 		// Flags that control sandbox runtime behavior: network related.
diff --git a/runsc/console/console.go b/runsc/console/console.go
index dbb88e117..b36028792 100644
--- a/runsc/console/console.go
+++ b/runsc/console/console.go
@@ -24,8 +24,8 @@ import (
 	"golang.org/x/sys/unix"
 )
 
-// NewWithSocket creates pty master/replica pair, sends the master FD over the given
-// socket, and returns the replica.
+// NewWithSocket creates pty master/replica pair, sends the master FD over the
+// given socket, and returns the replica.
 func NewWithSocket(socketPath string) (*os.File, error) {
 	// Create a new pty master and replica.
 	ptyMaster, ptyReplica, err := pty.Open()
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index c33755482..8793c8916 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test", "more_shards")
 
 package(licenses = ["notice"])
 
@@ -24,6 +24,7 @@ go_library(
         "//runsc/boot",
         "//runsc/cgroup",
         "//runsc/config",
+        "//runsc/console",
         "//runsc/sandbox",
         "//runsc/specutils",
         "@com_github_cenkalti_backoff//:go_default_library",
@@ -48,7 +49,7 @@ go_test(
         "//test/cmd/test_app",
     ],
     library = ":container",
-    shard_count = 10,
+    shard_count = more_shards,
     tags = [
         "requires-kvm",
     ],
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index 4228399b8..1b0fdebd6 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -18,6 +18,7 @@ import (
 	"bytes"
 	"fmt"
 	"io"
+	"math/rand"
 	"os"
 	"path/filepath"
 	"syscall"
@@ -27,7 +28,6 @@ import (
 	"github.com/kr/pty"
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/sentry/control"
-	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/test/testutil"
 	"gvisor.dev/gvisor/pkg/unet"
@@ -38,19 +38,22 @@ import (
 // path is under 108 charactors (the unix socket path length limit),
 // relativizing the path if necessary.
 func socketPath(bundleDir string) (string, error) {
-	path := filepath.Join(bundleDir, "socket")
+	num := rand.Intn(10000)
+	path := filepath.Join(bundleDir, fmt.Sprintf("socket-%4d", num))
+	const maxPathLen = 108
+	if len(path) <= maxPathLen {
+		return path, nil
+	}
+
+	// Path is too large, try to make it smaller.
 	cwd, err := os.Getwd()
 	if err != nil {
 		return "", fmt.Errorf("error getting cwd: %v", err)
 	}
-	relPath, err := filepath.Rel(cwd, path)
+	path, err = filepath.Rel(cwd, path)
 	if err != nil {
 		return "", fmt.Errorf("error getting relative path for %q from cwd %q: %v", path, cwd, err)
 	}
-	if len(path) > len(relPath) {
-		path = relPath
-	}
-	const maxPathLen = 108
 	if len(path) > maxPathLen {
 		return "", fmt.Errorf("could not get socket path under length limit %d: %s", maxPathLen, path)
 	}
@@ -159,6 +162,82 @@ func TestConsoleSocket(t *testing.T) {
 	}
 }
 
+// Test that an pty FD is sent over the console socket if one is provided.
+func TestMultiContainerConsoleSocket(t *testing.T) {
+	for name, conf := range configsWithVFS2(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
+
+			// Setup the containers.
+			sleep := []string{"sleep", "100"}
+			tru := []string{"true"}
+			testSpecs, ids := createSpecs(sleep, tru)
+			testSpecs[1].Process.Terminal = true
+
+			bundleDir, cleanup, err := testutil.SetupBundleDir(testSpecs[0])
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
+
+			args := Args{
+				ID:        ids[0],
+				Spec:      testSpecs[0],
+				BundleDir: bundleDir,
+			}
+			rootCont, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer rootCont.Destroy()
+			if err := rootCont.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
+
+			bundleDir, cleanup, err = testutil.SetupBundleDir(testSpecs[0])
+			if err != nil {
+				t.Fatalf("error setting up container: %v", err)
+			}
+			defer cleanup()
+
+			sock, err := socketPath(bundleDir)
+			if err != nil {
+				t.Fatalf("error getting socket path: %v", err)
+			}
+			srv, cleanup := createConsoleSocket(t, sock)
+			defer cleanup()
+
+			// Create the container and pass the socket name.
+			args = Args{
+				ID:            ids[1],
+				Spec:          testSpecs[1],
+				BundleDir:     bundleDir,
+				ConsoleSocket: sock,
+			}
+			cont, err := New(conf, args)
+			if err != nil {
+				t.Fatalf("error creating container: %v", err)
+			}
+			defer cont.Destroy()
+			if err := cont.Start(conf); err != nil {
+				t.Fatalf("error starting container: %v", err)
+			}
+
+			// Make sure we get a console PTY.
+			ptyMaster, err := receiveConsolePTY(srv)
+			if err != nil {
+				t.Fatalf("error receiving console FD: %v", err)
+			}
+			ptyMaster.Close()
+		})
+	}
+}
+
 // Test that job control signals work on a console created with "exec -ti".
 func TestJobControlSignalExec(t *testing.T) {
 	spec := testutil.NewSpecWithArgs("/bin/sleep", "10000")
@@ -221,9 +300,9 @@ func TestJobControlSignalExec(t *testing.T) {
 	// Make sure all the processes are running.
 	expectedPL := []*control.Process{
 		// Root container process.
-		{PID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{1}},
+		newProcessBuilder().Cmd("sleep").Process(),
 		// Bash from exec process.
-		{PID: 2, Cmd: "bash", Threads: []kernel.ThreadID{2}},
+		newProcessBuilder().PID(2).Cmd("bash").Process(),
 	}
 	if err := waitForProcessList(c, expectedPL); err != nil {
 		t.Error(err)
@@ -233,7 +312,7 @@ func TestJobControlSignalExec(t *testing.T) {
 	ptyMaster.Write([]byte("sleep 100\n"))
 
 	// Wait for it to start. Sleep's PPID is bash's PID.
-	expectedPL = append(expectedPL, &control.Process{PID: 3, PPID: 2, Cmd: "sleep", Threads: []kernel.ThreadID{3}})
+	expectedPL = append(expectedPL, newProcessBuilder().PID(3).PPID(2).Cmd("sleep").Process())
 	if err := waitForProcessList(c, expectedPL); err != nil {
 		t.Error(err)
 	}
@@ -254,7 +333,7 @@ func TestJobControlSignalExec(t *testing.T) {
 	// Sleep is dead, but it may take more time for bash to notice and
 	// change the foreground process back to itself. We know it is done
 	// when bash writes "Terminated" to the pty.
-	if err := testutil.WaitUntilRead(ptyMaster, "Terminated", nil, 5*time.Second); err != nil {
+	if err := testutil.WaitUntilRead(ptyMaster, "Terminated", 5*time.Second); err != nil {
 		t.Fatalf("bash did not take over pty: %v", err)
 	}
 
@@ -359,7 +438,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
 
 	// Wait for bash to start.
 	expectedPL := []*control.Process{
-		{PID: 1, Cmd: "bash", Threads: []kernel.ThreadID{1}},
+		newProcessBuilder().PID(1).Cmd("bash").Process(),
 	}
 	if err := waitForProcessList(c, expectedPL); err != nil {
 		t.Fatalf("error waiting for processes: %v", err)
@@ -369,7 +448,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
 	ptyMaster.Write([]byte("sleep 100\n"))
 
 	// Wait for sleep to start.
-	expectedPL = append(expectedPL, &control.Process{PID: 2, PPID: 1, Cmd: "sleep", Threads: []kernel.ThreadID{2}})
+	expectedPL = append(expectedPL, newProcessBuilder().PID(2).PPID(1).Cmd("sleep").Process())
 	if err := waitForProcessList(c, expectedPL); err != nil {
 		t.Fatalf("error waiting for processes: %v", err)
 	}
@@ -393,7 +472,7 @@ func TestJobControlSignalRootContainer(t *testing.T) {
 	// Sleep is dead, but it may take more time for bash to notice and
 	// change the foreground process back to itself. We know it is done
 	// when bash writes "Terminated" to the pty.
-	if err := testutil.WaitUntilRead(ptyBuf, "Terminated", nil, 5*time.Second); err != nil {
+	if err := testutil.WaitUntilRead(ptyBuf, "Terminated", 5*time.Second); err != nil {
 		t.Fatalf("bash did not take over pty: %v", err)
 	}
 
@@ -414,6 +493,104 @@ func TestJobControlSignalRootContainer(t *testing.T) {
 	}
 }
 
+// Test that terminal works with root and sub-containers.
+func TestMultiContainerTerminal(t *testing.T) {
+	for name, conf := range configsWithVFS2(t, all...) {
+		t.Run(name, func(t *testing.T) {
+			rootDir, cleanup, err := testutil.SetupRootDir()
+			if err != nil {
+				t.Fatalf("error creating root dir: %v", err)
+			}
+			defer cleanup()
+			conf.RootDir = rootDir
+
+			// Don't let bash execute from profile or rc files, otherwise our PID
+			// counts get messed up.
+			bash := []string{"/bin/bash", "--noprofile", "--norc"}
+			testSpecs, ids := createSpecs(bash, bash)
+
+			type termContainer struct {
+				container *Container
+				master    *os.File
+			}
+			var containers []termContainer
+			for i, spec := range testSpecs {
+				bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
+				if err != nil {
+					t.Fatalf("error setting up container: %v", err)
+				}
+				defer cleanup()
+
+				spec.Process.Terminal = true
+				sock, err := socketPath(bundleDir)
+				if err != nil {
+					t.Fatalf("error getting socket path: %v", err)
+				}
+				srv, cleanup := createConsoleSocket(t, sock)
+				defer cleanup()
+
+				// Create the container and pass the socket name.
+				args := Args{
+					ID:            ids[i],
+					Spec:          spec,
+					BundleDir:     bundleDir,
+					ConsoleSocket: sock,
+				}
+				cont, err := New(conf, args)
+				if err != nil {
+					t.Fatalf("error creating container: %v", err)
+				}
+				defer cont.Destroy()
+
+				if err := cont.Start(conf); err != nil {
+					t.Fatalf("error starting container: %v", err)
+				}
+
+				// Make sure we get a console PTY.
+				ptyMaster, err := receiveConsolePTY(srv)
+				if err != nil {
+					t.Fatalf("error receiving console FD: %v", err)
+				}
+				defer ptyMaster.Close()
+
+				containers = append(containers, termContainer{
+					container: cont,
+					master:    ptyMaster,
+				})
+			}
+
+			for _, tc := range containers {
+				// Bash output as well as sandbox output will be written to the PTY
+				// file. Writes after a certain point will block unless we drain the
+				// PTY, so we must continually copy from it.
+				//
+				// We log the output to stderr for debugabilitly, and also to a buffer,
+				// since we wait on particular output from bash below. We use a custom
+				// blockingBuffer which is thread-safe and also blocks on Read calls,
+				// which makes this a suitable Reader for WaitUntilRead.
+				ptyBuf := newBlockingBuffer()
+				tee := io.TeeReader(tc.master, ptyBuf)
+				go io.Copy(os.Stderr, tee)
+
+				// Wait for bash to start.
+				expectedPL := []*control.Process{
+					newProcessBuilder().Cmd("bash").Process(),
+				}
+				if err := waitForProcessList(tc.container, expectedPL); err != nil {
+					t.Fatalf("error waiting for processes: %v", err)
+				}
+
+				// Execute echo command and check that it was executed correctly. Use
+				// a variable to ensure it's not matching against command echo.
+				tc.master.Write([]byte("echo foo-${PWD}-123\n"))
+				if err := testutil.WaitUntilRead(ptyBuf, "foo-/-123", 5*time.Second); err != nil {
+					t.Fatalf("echo didn't execute: %v", err)
+				}
+			}
+		})
+	}
+}
+
 // blockingBuffer is a thread-safe buffer that blocks when reading if the
 // buffer is empty.  It implements io.ReadWriter.
 type blockingBuffer struct {
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 63f64ce6e..418a27beb 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -38,6 +38,7 @@ import (
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/cgroup"
 	"gvisor.dev/gvisor/runsc/config"
+	"gvisor.dev/gvisor/runsc/console"
 	"gvisor.dev/gvisor/runsc/sandbox"
 	"gvisor.dev/gvisor/runsc/specutils"
 )
@@ -79,6 +80,7 @@ func validateID(id string) error {
 //   - It calls 'runsc delete'. runc implementation kills --all SIGKILL once
 //     again just to be sure, waits, and then proceeds with remaining teardown.
 //
+// Container is thread-unsafe.
 type Container struct {
 	// ID is the container ID.
 	ID string `json:"id"`
@@ -159,9 +161,9 @@ func loadSandbox(rootDir, id string) ([]*Container, error) {
 // container to which id unambiguously refers to. Returns ErrNotExist if
 // container doesn't exist.
 func Load(rootDir, partialID string) (*Container, error) {
-	log.Debugf("Load container %q %q", rootDir, partialID)
+	log.Debugf("Load container, rootDir: %q, partial cid: %s", rootDir, partialID)
 	if err := validateID(partialID); err != nil {
-		return nil, fmt.Errorf("validating id: %v", err)
+		return nil, fmt.Errorf("invalid container id: %v", err)
 	}
 
 	id, err := findContainerID(rootDir, partialID)
@@ -184,22 +186,31 @@ func Load(rootDir, partialID string) (*Container, error) {
 		}
 		return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err)
 	}
+	return c, nil
+}
+
+// LoadAndCheck is similar to Load(), but also checks if the container is still
+// running to get an error earlier to the caller.
+func LoadAndCheck(rootDir, partialID string) (*Container, error) {
+	c, err := Load(rootDir, partialID)
+	if err != nil {
+		// Preserve error so that callers can distinguish 'not found' errors.
+		return nil, err
+	}
 
-	// If the status is "Running" or "Created", check that the sandbox
-	// process still exists, and set it to Stopped if it does not.
+	// If the status is "Running" or "Created", check that the sandbox/container
+	// is still running, setting it to Stopped if not.
 	//
 	// This is inherently racy.
-	if c.Status == Running || c.Status == Created {
-		// Check if the sandbox process is still running.
+	switch c.Status {
+	case Created:
 		if !c.isSandboxRunning() {
 			// Sandbox no longer exists, so this container definitely does not exist.
 			c.changeStatus(Stopped)
-		} else if c.Status == Running {
-			// Container state should reflect the actual state of the application, so
-			// we don't consider gofer process here.
-			if err := c.SignalContainer(syscall.Signal(0), false); err != nil {
-				c.changeStatus(Stopped)
-			}
+		}
+	case Running:
+		if err := c.SignalContainer(syscall.Signal(0), false); err != nil {
+			c.changeStatus(Stopped)
 		}
 	}
 
@@ -271,7 +282,7 @@ type Args struct {
 // indicates that an existing Sandbox should be used. The caller must call
 // Destroy() on the container.
 func New(conf *config.Config, args Args) (*Container, error) {
-	log.Debugf("Create container %q in root dir: %s", args.ID, conf.RootDir)
+	log.Debugf("Create container, cid: %s, rootDir: %q", args.ID, conf.RootDir)
 	if err := validateID(args.ID); err != nil {
 		return nil, err
 	}
@@ -310,7 +321,15 @@ func New(conf *config.Config, args Args) (*Container, error) {
 	// indicate the ID of the sandbox, which is the same as the ID of the
 	// init container in the sandbox.
 	if isRoot(args.Spec) {
-		log.Debugf("Creating new sandbox for container %q", args.ID)
+		log.Debugf("Creating new sandbox for container, cid: %s", args.ID)
+
+		if args.Spec.Linux == nil {
+			args.Spec.Linux = &specs.Linux{}
+		}
+		// Don't force the use of cgroups in tests because they lack permission to do so.
+		if args.Spec.Linux.CgroupsPath == "" && !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+			args.Spec.Linux.CgroupsPath = "/" + args.ID
+		}
 
 		// Create and join cgroup before processes are created to ensure they are
 		// part of the cgroup from the start (and all their children processes).
@@ -321,7 +340,13 @@ func New(conf *config.Config, args Args) (*Container, error) {
 		if cg != nil {
 			// If there is cgroup config, install it before creating sandbox process.
 			if err := cg.Install(args.Spec.Linux.Resources); err != nil {
-				return nil, fmt.Errorf("configuring cgroup: %v", err)
+				switch {
+				case errors.Is(err, syscall.EACCES) && conf.Rootless:
+					log.Warningf("Skipping cgroup configuration in rootless mode: %v", err)
+					cg = nil
+				default:
+					return nil, fmt.Errorf("configuring cgroup: %v", err)
+				}
 			}
 		}
 		if err := runInCgroup(cg, func() error {
@@ -366,15 +391,30 @@ func New(conf *config.Config, args Args) (*Container, error) {
 		if !ok {
 			return nil, fmt.Errorf("no sandbox ID found when creating container")
 		}
-		log.Debugf("Creating new container %q in sandbox %q", c.ID, sbid)
+		log.Debugf("Creating new container, cid: %s, sandbox: %s", c.ID, sbid)
 
 		// Find the sandbox associated with this ID.
-		sb, err := Load(conf.RootDir, sbid)
+		sb, err := LoadAndCheck(conf.RootDir, sbid)
 		if err != nil {
 			return nil, err
 		}
 		c.Sandbox = sb.Sandbox
-		if err := c.Sandbox.CreateContainer(c.ID); err != nil {
+
+		// If the console control socket file is provided, then create a new
+		// pty master/slave pair and send the TTY to the sandbox process.
+		var tty *os.File
+		if c.ConsoleSocket != "" {
+			// Create a new TTY pair and send the master on the provided socket.
+			var err error
+			tty, err = console.NewWithSocket(c.ConsoleSocket)
+			if err != nil {
+				return nil, fmt.Errorf("setting up console with socket %q: %w", c.ConsoleSocket, err)
+			}
+			// tty file is transferred to the sandbox, then it can be closed here.
+			defer tty.Close()
+		}
+
+		if err := c.Sandbox.CreateContainer(c.ID, tty); err != nil {
 			return nil, err
 		}
 	}
@@ -399,7 +439,7 @@ func New(conf *config.Config, args Args) (*Container, error) {
 
 // Start starts running the containerized process inside the sandbox.
 func (c *Container) Start(conf *config.Config) error {
-	log.Debugf("Start container %q", c.ID)
+	log.Debugf("Start container, cid: %s", c.ID)
 
 	if err := c.Saver.lock(); err != nil {
 		return err
@@ -428,11 +468,16 @@ func (c *Container) Start(conf *config.Config) error {
 		// the start (and all their children processes).
 		if err := runInCgroup(c.Sandbox.Cgroup, func() error {
 			// Create the gofer process.
-			ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir, false)
+			goferFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir, false)
 			if err != nil {
 				return err
 			}
-			defer mountsFile.Close()
+			defer func() {
+				_ = mountsFile.Close()
+				for _, f := range goferFiles {
+					_ = f.Close()
+				}
+			}()
 
 			cleanMounts, err := specutils.ReadMounts(mountsFile)
 			if err != nil {
@@ -440,7 +485,14 @@ func (c *Container) Start(conf *config.Config) error {
 			}
 			c.Spec.Mounts = cleanMounts
 
-			return c.Sandbox.StartContainer(c.Spec, conf, c.ID, ioFiles)
+			// Setup stdios if the container is not using terminal. Otherwise TTY was
+			// already setup in create.
+			var stdios []*os.File
+			if !c.Spec.Process.Terminal {
+				stdios = []*os.File{os.Stdin, os.Stdout, os.Stderr}
+			}
+
+			return c.Sandbox.StartContainer(c.Spec, conf, c.ID, stdios, goferFiles)
 		}); err != nil {
 			return err
 		}
@@ -462,7 +514,7 @@ func (c *Container) Start(conf *config.Config) error {
 	unlock.Clean()
 
 	// Adjust the oom_score_adj for sandbox. This must be done after saveLocked().
-	if err := adjustSandboxOOMScoreAdj(c.Sandbox, c.Saver.RootDir, false); err != nil {
+	if err := adjustSandboxOOMScoreAdj(c.Sandbox, c.Spec, c.Saver.RootDir, false); err != nil {
 		return err
 	}
 
@@ -474,7 +526,7 @@ func (c *Container) Start(conf *config.Config) error {
 // Restore takes a container and replaces its kernel and file system
 // to restore a container from its state file.
 func (c *Container) Restore(spec *specs.Spec, conf *config.Config, restoreFile string) error {
-	log.Debugf("Restore container %q", c.ID)
+	log.Debugf("Restore container, cid: %s", c.ID)
 	if err := c.Saver.lock(); err != nil {
 		return err
 	}
@@ -501,7 +553,7 @@ func (c *Container) Restore(spec *specs.Spec, conf *config.Config, restoreFile s
 
 // Run is a helper that calls Create + Start + Wait.
 func Run(conf *config.Config, args Args) (syscall.WaitStatus, error) {
-	log.Debugf("Run container %q in root dir: %s", args.ID, conf.RootDir)
+	log.Debugf("Run container, cid: %s, rootDir: %q", args.ID, conf.RootDir)
 	c, err := New(conf, args)
 	if err != nil {
 		return 0, fmt.Errorf("creating container: %v", err)
@@ -533,7 +585,7 @@ func Run(conf *config.Config, args Args) (syscall.WaitStatus, error) {
 // Execute runs the specified command in the container. It returns the PID of
 // the newly created process.
 func (c *Container) Execute(args *control.ExecArgs) (int32, error) {
-	log.Debugf("Execute in container %q, args: %+v", c.ID, args)
+	log.Debugf("Execute in container, cid: %s, args: %+v", c.ID, args)
 	if err := c.requireStatus("execute in", Created, Running); err != nil {
 		return 0, err
 	}
@@ -543,7 +595,7 @@ func (c *Container) Execute(args *control.ExecArgs) (int32, error) {
 
 // Event returns events for the container.
 func (c *Container) Event() (*boot.Event, error) {
-	log.Debugf("Getting events for container %q", c.ID)
+	log.Debugf("Getting events for container, cid: %s", c.ID)
 	if err := c.requireStatus("get events for", Created, Running, Paused); err != nil {
 		return nil, err
 	}
@@ -563,14 +615,19 @@ func (c *Container) SandboxPid() int {
 // Call to wait on a stopped container is needed to retrieve the exit status
 // and wait returns immediately.
 func (c *Container) Wait() (syscall.WaitStatus, error) {
-	log.Debugf("Wait on container %q", c.ID)
-	return c.Sandbox.Wait(c.ID)
+	log.Debugf("Wait on container, cid: %s", c.ID)
+	ws, err := c.Sandbox.Wait(c.ID)
+	if err == nil {
+		// Wait succeeded, container is not running anymore.
+		c.changeStatus(Stopped)
+	}
+	return ws, err
 }
 
 // WaitRootPID waits for process 'pid' in the sandbox's PID namespace and
 // returns its WaitStatus.
 func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) {
-	log.Debugf("Wait on PID %d in sandbox %q", pid, c.Sandbox.ID)
+	log.Debugf("Wait on process %d in sandbox, cid: %s", pid, c.Sandbox.ID)
 	if !c.isSandboxRunning() {
 		return 0, fmt.Errorf("sandbox is not running")
 	}
@@ -580,7 +637,7 @@ func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) {
 // WaitPID waits for process 'pid' in the container's PID namespace and returns
 // its WaitStatus.
 func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) {
-	log.Debugf("Wait on PID %d in container %q", pid, c.ID)
+	log.Debugf("Wait on process %d in container, cid: %s", pid, c.ID)
 	if !c.isSandboxRunning() {
 		return 0, fmt.Errorf("sandbox is not running")
 	}
@@ -592,7 +649,7 @@ func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) {
 // SignalContainer returns an error if the container is already stopped.
 // TODO(b/113680494): Distinguish different error types.
 func (c *Container) SignalContainer(sig syscall.Signal, all bool) error {
-	log.Debugf("Signal container %q: %v", c.ID, sig)
+	log.Debugf("Signal container, cid: %s, signal: %v (%d)", c.ID, sig, sig)
 	// Signaling container in Stopped state is allowed. When all=false,
 	// an error will be returned anyway; when all=true, this allows
 	// sending signal to other processes inside the container even
@@ -609,7 +666,7 @@ func (c *Container) SignalContainer(sig syscall.Signal, all bool) error {
 
 // SignalProcess sends sig to a specific process in the container.
 func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error {
-	log.Debugf("Signal process %d in container %q: %v", pid, c.ID, sig)
+	log.Debugf("Signal process %d in container, cid: %s, signal: %v (%d)", pid, c.ID, sig, sig)
 	if err := c.requireStatus("signal a process inside", Running); err != nil {
 		return err
 	}
@@ -623,15 +680,15 @@ func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error {
 // container process inside the sandbox. It returns a function that will stop
 // forwarding signals.
 func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() {
-	log.Debugf("Forwarding all signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
+	log.Debugf("Forwarding all signals to container, cid: %s, PIDPID: %d, fgProcess: %t", c.ID, pid, fgProcess)
 	stop := sighandling.StartSignalForwarding(func(sig linux.Signal) {
-		log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", sig, c.ID, pid, fgProcess)
+		log.Debugf("Forwarding signal %d to container, cid: %s, PID: %d, fgProcess: %t", sig, c.ID, pid, fgProcess)
 		if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.Signal(sig), fgProcess); err != nil {
 			log.Warningf("error forwarding signal %d to container %q: %v", sig, c.ID, err)
 		}
 	})
 	return func() {
-		log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
+		log.Debugf("Done forwarding signals to container, cid: %s, PID: %d, fgProcess: %t", c.ID, pid, fgProcess)
 		stop()
 	}
 }
@@ -639,7 +696,7 @@ func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() {
 // Checkpoint sends the checkpoint call to the container.
 // The statefile will be written to f, the file at the specified image-path.
 func (c *Container) Checkpoint(f *os.File) error {
-	log.Debugf("Checkpoint container %q", c.ID)
+	log.Debugf("Checkpoint container, cid: %s", c.ID)
 	if err := c.requireStatus("checkpoint", Created, Running, Paused); err != nil {
 		return err
 	}
@@ -649,7 +706,7 @@ func (c *Container) Checkpoint(f *os.File) error {
 // Pause suspends the container and its kernel.
 // The call only succeeds if the container's status is created or running.
 func (c *Container) Pause() error {
-	log.Debugf("Pausing container %q", c.ID)
+	log.Debugf("Pausing container, cid: %s", c.ID)
 	if err := c.Saver.lock(); err != nil {
 		return err
 	}
@@ -660,7 +717,7 @@ func (c *Container) Pause() error {
 	}
 
 	if err := c.Sandbox.Pause(c.ID); err != nil {
-		return fmt.Errorf("pausing container: %v", err)
+		return fmt.Errorf("pausing container %q: %v", c.ID, err)
 	}
 	c.changeStatus(Paused)
 	return c.saveLocked()
@@ -669,7 +726,7 @@ func (c *Container) Pause() error {
 // Resume unpauses the container and its kernel.
 // The call only succeeds if the container's status is paused.
 func (c *Container) Resume() error {
-	log.Debugf("Resuming container %q", c.ID)
+	log.Debugf("Resuming container, cid: %s", c.ID)
 	if err := c.Saver.lock(); err != nil {
 		return err
 	}
@@ -708,7 +765,7 @@ func (c *Container) Processes() ([]*control.Process, error) {
 // Destroy stops all processes and frees all resources associated with the
 // container.
 func (c *Container) Destroy() error {
-	log.Debugf("Destroy container %q", c.ID)
+	log.Debugf("Destroy container, cid: %s", c.ID)
 
 	if err := c.Saver.lock(); err != nil {
 		return err
@@ -745,14 +802,12 @@ func (c *Container) Destroy() error {
 	c.changeStatus(Stopped)
 
 	// Adjust oom_score_adj for the sandbox. This must be done after the container
-	// is stopped and the directory at c.Root is removed. Adjustment can be
-	// skipped if the root container is exiting, because it brings down the entire
-	// sandbox.
+	// is stopped and the directory at c.Root is removed.
 	//
 	// Use 'sb' to tell whether it has been executed before because Destroy must
 	// be idempotent.
-	if sb != nil && !isRoot(c.Spec) {
-		if err := adjustSandboxOOMScoreAdj(sb, c.Saver.RootDir, true); err != nil {
+	if sb != nil {
+		if err := adjustSandboxOOMScoreAdj(sb, c.Spec, c.Saver.RootDir, true); err != nil {
 			errs = append(errs, err.Error())
 		}
 	}
@@ -781,7 +836,7 @@ func (c *Container) Destroy() error {
 //
 // Precondition: container must be locked with container.lock().
 func (c *Container) saveLocked() error {
-	log.Debugf("Save container %q", c.ID)
+	log.Debugf("Save container, cid: %s", c.ID)
 	if err := c.Saver.saveLocked(c); err != nil {
 		return fmt.Errorf("saving container metadata: %v", err)
 	}
@@ -795,7 +850,7 @@ func (c *Container) stop() error {
 	var cgroup *cgroup.Cgroup
 
 	if c.Sandbox != nil {
-		log.Debugf("Destroying container %q", c.ID)
+		log.Debugf("Destroying container, cid: %s", c.ID)
 		if err := c.Sandbox.DestroyContainer(c.ID); err != nil {
 			return fmt.Errorf("destroying container %q: %v", c.ID, err)
 		}
@@ -809,7 +864,7 @@ func (c *Container) stop() error {
 
 	// Try killing gofer if it does not exit with container.
 	if c.GoferPid != 0 {
-		log.Debugf("Killing gofer for container %q, PID: %d", c.ID, c.GoferPid)
+		log.Debugf("Killing gofer for container, cid: %s, PID: %d", c.ID, c.GoferPid)
 		if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil {
 			// The gofer may already be stopped, log the error.
 			log.Warningf("Error sending signal %d to gofer %d: %v", syscall.SIGKILL, c.GoferPid, err)
@@ -1082,7 +1137,13 @@ func (c *Container) adjustGoferOOMScoreAdj() error {
 // TODO(gvisor.dev/issue/238): This call could race with other containers being
 // created at the same time and end up setting the wrong oom_score_adj to the
 // sandbox. Use rpc client to synchronize.
-func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) error {
+func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, spec *specs.Spec, rootDir string, destroy bool) error {
+	// Adjustment can be skipped if the root container is exiting, because it
+	// brings down the entire sandbox.
+	if isRoot(spec) && destroy {
+		return nil
+	}
+
 	containers, err := loadSandbox(rootDir, s.ID)
 	if err != nil {
 		return fmt.Errorf("loading sandbox containers: %v", err)
@@ -1096,53 +1157,34 @@ func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool)
 	// Get the lowest score for all containers.
 	var lowScore int
 	scoreFound := false
-	if len(containers) == 1 && specutils.SpecContainerType(containers[0].Spec) == specutils.ContainerTypeUnspecified {
-		// This is a single-container sandbox. Set the oom_score_adj to
-		// the value specified in the OCI bundle.
-		if containers[0].Spec.Process.OOMScoreAdj != nil {
-			scoreFound = true
-			lowScore = *containers[0].Spec.Process.OOMScoreAdj
+	for _, container := range containers {
+		// Special multi-container support for CRI. Ignore the root container when
+		// calculating oom_score_adj for the sandbox because it is the
+		// infrastructure (pause) container and always has a very low oom_score_adj.
+		//
+		// We will use OOMScoreAdj in the single-container case where the
+		// containerd container-type annotation is not present.
+		if specutils.SpecContainerType(container.Spec) == specutils.ContainerTypeSandbox {
+			continue
 		}
-	} else {
-		for _, container := range containers {
-			// Special multi-container support for CRI. Ignore the root
-			// container when calculating oom_score_adj for the sandbox because
-			// it is the infrastructure (pause) container and always has a very
-			// low oom_score_adj.
-			//
-			// We will use OOMScoreAdj in the single-container case where the
-			// containerd container-type annotation is not present.
-			if specutils.SpecContainerType(container.Spec) == specutils.ContainerTypeSandbox {
-				continue
-			}
 
-			if container.Spec.Process.OOMScoreAdj != nil && (!scoreFound || *container.Spec.Process.OOMScoreAdj < lowScore) {
-				scoreFound = true
-				lowScore = *container.Spec.Process.OOMScoreAdj
-			}
+		if container.Spec.Process.OOMScoreAdj != nil && (!scoreFound || *container.Spec.Process.OOMScoreAdj < lowScore) {
+			scoreFound = true
+			lowScore = *container.Spec.Process.OOMScoreAdj
 		}
 	}
 
 	// If the container is destroyed and remaining containers have no
-	// oomScoreAdj specified then we must revert to the oom_score_adj of the
-	// parent process.
+	// oomScoreAdj specified then we must revert to the original oom_score_adj
+	// saved with the root container.
 	if !scoreFound && destroy {
-		ppid, err := specutils.GetParentPid(s.Pid)
-		if err != nil {
-			return fmt.Errorf("getting parent pid of sandbox pid %d: %v", s.Pid, err)
-		}
-		pScore, err := specutils.GetOOMScoreAdj(ppid)
-		if err != nil {
-			return fmt.Errorf("getting oom_score_adj of parent %d: %v", ppid, err)
-		}
-
+		lowScore = containers[0].Sandbox.OriginalOOMScoreAdj
 		scoreFound = true
-		lowScore = pScore
 	}
 
-	// Only set oom_score_adj if one of the containers has oom_score_adj set
-	// in the OCI bundle. If not, we need to inherit the parent process's
-	// oom_score_adj.
+	// Only set oom_score_adj if one of the containers has oom_score_adj set. If
+	// not, oom_score_adj is inherited from the parent process.
+	//
 	// See: https://github.com/opencontainers/runtime-spec/blob/master/config.md#linux-process
 	if !scoreFound {
 		return nil
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index cc188f45b..fa99e403a 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -364,7 +364,7 @@ func TestLifecycle(t *testing.T) {
 			defer c.Destroy()
 
 			// Load the container from disk and check the status.
-			c, err = Load(rootDir, args.ID)
+			c, err = LoadAndCheck(rootDir, args.ID)
 			if err != nil {
 				t.Fatalf("error loading container: %v", err)
 			}
@@ -387,7 +387,7 @@ func TestLifecycle(t *testing.T) {
 			}
 
 			// Load the container from disk and check the status.
-			c, err = Load(rootDir, args.ID)
+			c, err = LoadAndCheck(rootDir, args.ID)
 			if err != nil {
 				t.Fatalf("error loading container: %v", err)
 			}
@@ -428,7 +428,7 @@ func TestLifecycle(t *testing.T) {
 			}
 
 			// Load the container from disk and check the status.
-			c, err = Load(rootDir, args.ID)
+			c, err = LoadAndCheck(rootDir, args.ID)
 			if err != nil {
 				t.Fatalf("error loading container: %v", err)
 			}
@@ -451,7 +451,7 @@ func TestLifecycle(t *testing.T) {
 			}
 
 			// Loading the container by id should fail.
-			if _, err = Load(rootDir, args.ID); err == nil {
+			if _, err = LoadAndCheck(rootDir, args.ID); err == nil {
 				t.Errorf("expected loading destroyed container to fail, but it did not")
 			}
 		})
@@ -1738,7 +1738,7 @@ func doAbbreviatedIDsTest(t *testing.T, vfs2 bool) {
 		cids[2]: cids[2],
 	}
 	for shortid, longid := range unambiguous {
-		if _, err := Load(rootDir, shortid); err != nil {
+		if _, err := LoadAndCheck(rootDir, shortid); err != nil {
 			t.Errorf("%q should resolve to %q: %v", shortid, longid, err)
 		}
 	}
@@ -1749,7 +1749,7 @@ func doAbbreviatedIDsTest(t *testing.T, vfs2 bool) {
 		"ba",
 	}
 	for _, shortid := range ambiguous {
-		if s, err := Load(rootDir, shortid); err == nil {
+		if s, err := LoadAndCheck(rootDir, shortid); err == nil {
 			t.Errorf("%q should be ambiguous, but resolved to %q", shortid, s.ID)
 		}
 	}
@@ -1976,11 +1976,11 @@ func doDestroyNotStartedTest(t *testing.T, vfs2 bool) {
 
 // TestDestroyStarting attempts to force a race between start and destroy.
 func TestDestroyStarting(t *testing.T) {
-	doDestroyNotStartedTest(t, false)
+	doDestroyStartingTest(t, false)
 }
 
 func TestDestroyStartedVFS2(t *testing.T) {
-	doDestroyNotStartedTest(t, true)
+	doDestroyStartingTest(t, true)
 }
 
 func doDestroyStartingTest(t *testing.T, vfs2 bool) {
@@ -2007,7 +2007,7 @@ func doDestroyStartingTest(t *testing.T, vfs2 bool) {
 
 		// Container is not thread safe, so load another instance to run in
 		// concurrently.
-		startCont, err := Load(rootDir, args.ID)
+		startCont, err := LoadAndCheck(rootDir, args.ID)
 		if err != nil {
 			t.Fatalf("error loading container: %v", err)
 		}
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 850e80290..45d4e6e6e 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -15,6 +15,7 @@
 package container
 
 import (
+	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"math"
@@ -300,54 +301,21 @@ func TestMultiContainerWait(t *testing.T) {
 	}
 	defer cleanup()
 
-	// Check via ps that multiple processes are running.
-	expectedPL := []*control.Process{
-		newProcessBuilder().PID(2).PPID(0).Cmd("sleep").Process(),
+	// Check that we can wait for the sub-container.
+	c := containers[1]
+	if ws, err := c.Wait(); err != nil {
+		t.Errorf("failed to wait for process %s: %v", c.Spec.Process.Args, err)
+	} else if es := ws.ExitStatus(); es != 0 {
+		t.Errorf("process %s exited with non-zero status %d", c.Spec.Process.Args, es)
 	}
-	if err := waitForProcessList(containers[1], expectedPL); err != nil {
-		t.Errorf("failed to wait for sleep to start: %v", err)
+	if _, err := c.Wait(); err != nil {
+		t.Errorf("wait for stopped container %s shouldn't fail: %v", c.Spec.Process.Args, err)
 	}
 
-	// Wait on the short lived container from multiple goroutines.
-	wg := sync.WaitGroup{}
-	for i := 0; i < 3; i++ {
-		wg.Add(1)
-		go func(c *Container) {
-			defer wg.Done()
-			if ws, err := c.Wait(); err != nil {
-				t.Errorf("failed to wait for process %s: %v", c.Spec.Process.Args, err)
-			} else if es := ws.ExitStatus(); es != 0 {
-				t.Errorf("process %s exited with non-zero status %d", c.Spec.Process.Args, es)
-			}
-			if _, err := c.Wait(); err != nil {
-				t.Errorf("wait for stopped container %s shouldn't fail: %v", c.Spec.Process.Args, err)
-			}
-		}(containers[1])
-	}
-
-	// Also wait via PID.
-	for i := 0; i < 3; i++ {
-		wg.Add(1)
-		go func(c *Container) {
-			defer wg.Done()
-			const pid = 2
-			if ws, err := c.WaitPID(pid); err != nil {
-				t.Errorf("failed to wait for PID %d: %v", pid, err)
-			} else if es := ws.ExitStatus(); es != 0 {
-				t.Errorf("PID %d exited with non-zero status %d", pid, es)
-			}
-			if _, err := c.WaitPID(pid); err == nil {
-				t.Errorf("wait for stopped PID %d should fail", pid)
-			}
-		}(containers[1])
-	}
-
-	wg.Wait()
-
 	// After Wait returns, ensure that the root container is running and
 	// the child has finished.
-	expectedPL = []*control.Process{
-		newProcessBuilder().Cmd("sleep").Process(),
+	expectedPL := []*control.Process{
+		newProcessBuilder().Cmd("sleep").PID(1).Process(),
 	}
 	if err := waitForProcessList(containers[0], expectedPL); err != nil {
 		t.Errorf("failed to wait for %q to start: %v", strings.Join(containers[0].Spec.Process.Args, " "), err)
@@ -762,7 +730,7 @@ func TestMultiContainerKillAll(t *testing.T) {
 			// processes still running inside.
 			containers[1].SignalContainer(syscall.SIGKILL, false)
 			op := func() error {
-				c, err := Load(conf.RootDir, ids[1])
+				c, err := LoadAndCheck(conf.RootDir, ids[1])
 				if err != nil {
 					return err
 				}
@@ -776,7 +744,7 @@ func TestMultiContainerKillAll(t *testing.T) {
 			}
 		}
 
-		c, err := Load(conf.RootDir, ids[1])
+		c, err := LoadAndCheck(conf.RootDir, ids[1])
 		if err != nil {
 			t.Fatalf("failed to load child container %q: %v", c.ID, err)
 		}
@@ -899,7 +867,7 @@ func TestMultiContainerDestroyStarting(t *testing.T) {
 
 		// Container is not thread safe, so load another instance to run in
 		// concurrently.
-		startCont, err := Load(rootDir, ids[i])
+		startCont, err := LoadAndCheck(rootDir, ids[i])
 		if err != nil {
 			t.Fatalf("error loading container: %v", err)
 		}
@@ -1766,3 +1734,72 @@ func TestMultiContainerHomeEnvDir(t *testing.T) {
 		})
 	}
 }
+
+func TestMultiContainerEvent(t *testing.T) {
+	conf := testutil.TestConfig(t)
+	rootDir, cleanup, err := testutil.SetupRootDir()
+	if err != nil {
+		t.Fatalf("error creating root dir: %v", err)
+	}
+	defer cleanup()
+	conf.RootDir = rootDir
+
+	// Setup the containers.
+	sleep := []string{"/bin/sleep", "100"}
+	quick := []string{"/bin/true"}
+	podSpec, ids := createSpecs(sleep, sleep, quick)
+	containers, cleanup, err := startContainers(conf, podSpec, ids)
+	if err != nil {
+		t.Fatalf("error starting containers: %v", err)
+	}
+	defer cleanup()
+
+	for _, cont := range containers {
+		t.Logf("Running containerd %s", cont.ID)
+	}
+
+	// Wait for last container to stabilize the process count that is checked
+	// further below.
+	if ws, err := containers[2].Wait(); err != nil || ws != 0 {
+		t.Fatalf("Container.Wait, status: %v, err: %v", ws, err)
+	}
+
+	// Check events for running containers.
+	for _, cont := range containers[:2] {
+		evt, err := cont.Event()
+		if err != nil {
+			t.Errorf("Container.Events(): %v", err)
+		}
+		if want := "stats"; evt.Type != want {
+			t.Errorf("Wrong event type, want: %s, got :%s", want, evt.Type)
+		}
+		if cont.ID != evt.ID {
+			t.Errorf("Wrong container ID, want: %s, got :%s", cont.ID, evt.ID)
+		}
+		// Event.Data is an interface, so it comes from the wire was
+		// map[string]string. Marshal and unmarshall again to the correc type.
+		data, err := json.Marshal(evt.Data)
+		if err != nil {
+			t.Fatalf("invalid event data: %v", err)
+		}
+		var stats boot.Stats
+		if err := json.Unmarshal(data, &stats); err != nil {
+			t.Fatalf("invalid event data: %v", err)
+		}
+		// One process per remaining container.
+		if want := uint64(2); stats.Pids.Current != want {
+			t.Errorf("Wrong number of PIDs, want: %d, got :%d", want, stats.Pids.Current)
+		}
+	}
+
+	// Check that stop and destroyed containers return error.
+	if err := containers[1].Destroy(); err != nil {
+		t.Fatalf("container.Destroy: %v", err)
+	}
+	for _, cont := range containers[1:] {
+		_, err := cont.Event()
+		if err == nil {
+			t.Errorf("Container.Events() should have failed, cid:%s, state: %v", cont.ID, cont.Status)
+		}
+	}
+}
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
index 8f66dd1f8..9e429f7d5 100644
--- a/runsc/sandbox/network.go
+++ b/runsc/sandbox/network.go
@@ -127,7 +127,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
 	// Get all interfaces in the namespace.
 	ifaces, err := net.Interfaces()
 	if err != nil {
-		return fmt.Errorf("querying interfaces: %v", err)
+		return fmt.Errorf("querying interfaces: %w", err)
 	}
 
 	isRoot, err := isRootNS()
@@ -148,14 +148,14 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
 
 		allAddrs, err := iface.Addrs()
 		if err != nil {
-			return fmt.Errorf("fetching interface addresses for %q: %v", iface.Name, err)
+			return fmt.Errorf("fetching interface addresses for %q: %w", iface.Name, err)
 		}
 
 		// We build our own loopback device.
 		if iface.Flags&net.FlagLoopback != 0 {
 			link, err := loopbackLink(iface, allAddrs)
 			if err != nil {
-				return fmt.Errorf("getting loopback link for iface %q: %v", iface.Name, err)
+				return fmt.Errorf("getting loopback link for iface %q: %w", iface.Name, err)
 			}
 			args.LoopbackLinks = append(args.LoopbackLinks, link)
 			continue
@@ -209,7 +209,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
 		// Get the link for the interface.
 		ifaceLink, err := netlink.LinkByName(iface.Name)
 		if err != nil {
-			return fmt.Errorf("getting link for interface %q: %v", iface.Name, err)
+			return fmt.Errorf("getting link for interface %q: %w", iface.Name, err)
 		}
 		link.LinkAddress = ifaceLink.Attrs().HardwareAddr
 
@@ -219,7 +219,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
 			log.Debugf("Creating Channel %d", i)
 			socketEntry, err := createSocket(iface, ifaceLink, hardwareGSO)
 			if err != nil {
-				return fmt.Errorf("failed to createSocket for %s : %v", iface.Name, err)
+				return fmt.Errorf("failed to createSocket for %s : %w", iface.Name, err)
 			}
 			if i == 0 {
 				link.GSOMaxSize = socketEntry.gsoMaxSize
@@ -241,11 +241,12 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
 		// Collect the addresses for the interface, enable forwarding,
 		// and remove them from the host.
 		for _, addr := range ipAddrs {
-			link.Addresses = append(link.Addresses, addr.IP)
+			prefix, _ := addr.Mask.Size()
+			link.Addresses = append(link.Addresses, boot.IPWithPrefix{Address: addr.IP, PrefixLen: prefix})
 
 			// Steal IP address from NIC.
 			if err := removeAddress(ifaceLink, addr.String()); err != nil {
-				return fmt.Errorf("removing address %v from device %q: %v", iface.Name, addr, err)
+				return fmt.Errorf("removing address %v from device %q: %w", addr, iface.Name, err)
 			}
 		}
 
@@ -254,7 +255,7 @@ func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, hardwareG
 
 	log.Debugf("Setting up network, config: %+v", args)
 	if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &args, nil); err != nil {
-		return fmt.Errorf("creating links and routes: %v", err)
+		return fmt.Errorf("creating links and routes: %w", err)
 	}
 	return nil
 }
@@ -278,8 +279,6 @@ func createSocket(iface net.Interface, ifaceLink netlink.Link, enableGSO bool) (
 	ll := syscall.SockaddrLinklayer{
 		Protocol: protocol,
 		Ifindex:  iface.Index,
-		Hatype:   0, // No ARP type.
-		Pkttype:  syscall.PACKET_OTHERHOST,
 	}
 	if err := syscall.Bind(fd, &ll); err != nil {
 		return nil, fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
@@ -339,9 +338,15 @@ func loopbackLink(iface net.Interface, addrs []net.Addr) (boot.LoopbackLink, err
 		if !ok {
 			return boot.LoopbackLink{}, fmt.Errorf("address is not IPNet: %+v", addr)
 		}
+
+		prefix, _ := ipNet.Mask.Size()
+		link.Addresses = append(link.Addresses, boot.IPWithPrefix{
+			Address:   ipNet.IP,
+			PrefixLen: prefix,
+		})
+
 		dst := *ipNet
 		dst.IP = dst.IP.Mask(dst.Mask)
-		link.Addresses = append(link.Addresses, ipNet.IP)
 		link.Routes = append(link.Routes, boot.Route{
 			Destination: dst,
 		})
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index c4309feb3..c84ebcd8a 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -66,6 +66,10 @@ type Sandbox struct {
 	// Cgroup has the cgroup configuration for the sandbox.
 	Cgroup *cgroup.Cgroup `json:"cgroup"`
 
+	// OriginalOOMScoreAdj stores the value of oom_score_adj when the sandbox
+	// started, before it may be modified.
+	OriginalOOMScoreAdj int `json:"originalOomScoreAdj"`
+
 	// child is set if a sandbox process is a child of the current process.
 	//
 	// This field isn't saved to json, because only a creator of sandbox
@@ -169,7 +173,7 @@ func New(conf *config.Config, args *Args) (*Sandbox, error) {
 }
 
 // CreateContainer creates a non-root container inside the sandbox.
-func (s *Sandbox) CreateContainer(cid string) error {
+func (s *Sandbox) CreateContainer(cid string, tty *os.File) error {
 	log.Debugf("Create non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
 	sandboxConn, err := s.sandboxConnect()
 	if err != nil {
@@ -177,7 +181,16 @@ func (s *Sandbox) CreateContainer(cid string) error {
 	}
 	defer sandboxConn.Close()
 
-	if err := sandboxConn.Call(boot.ContainerCreate, &cid, nil); err != nil {
+	var files []*os.File
+	if tty != nil {
+		files = []*os.File{tty}
+	}
+
+	args := boot.CreateArgs{
+		CID:         cid,
+		FilePayload: urpc.FilePayload{Files: files},
+	}
+	if err := sandboxConn.Call(boot.ContainerCreate, &args, nil); err != nil {
 		return fmt.Errorf("creating non-root container %q: %v", cid, err)
 	}
 	return nil
@@ -207,11 +220,7 @@ func (s *Sandbox) StartRoot(spec *specs.Spec, conf *config.Config) error {
 }
 
 // StartContainer starts running a non-root container inside the sandbox.
-func (s *Sandbox) StartContainer(spec *specs.Spec, conf *config.Config, cid string, goferFiles []*os.File) error {
-	for _, f := range goferFiles {
-		defer f.Close()
-	}
-
+func (s *Sandbox) StartContainer(spec *specs.Spec, conf *config.Config, cid string, stdios, goferFiles []*os.File) error {
 	log.Debugf("Start non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
 	sandboxConn, err := s.sandboxConnect()
 	if err != nil {
@@ -219,15 +228,18 @@ func (s *Sandbox) StartContainer(spec *specs.Spec, conf *config.Config, cid stri
 	}
 	defer sandboxConn.Close()
 
-	// The payload must container stdin/stdout/stderr followed by gofer
-	// files.
-	files := append([]*os.File{os.Stdin, os.Stdout, os.Stderr}, goferFiles...)
+	// The payload must contain stdin/stdout/stderr (which may be empty if using
+	// TTY) followed by gofer files.
+	payload := urpc.FilePayload{}
+	payload.Files = append(payload.Files, stdios...)
+	payload.Files = append(payload.Files, goferFiles...)
+
 	// Start running the container.
 	args := boot.StartArgs{
 		Spec:        spec,
 		Conf:        conf,
 		CID:         cid,
-		FilePayload: urpc.FilePayload{Files: files},
+		FilePayload: payload,
 	}
 	if err := sandboxConn.Call(boot.ContainerStart, &args, nil); err != nil {
 		return fmt.Errorf("starting non-root container %v: %v", spec.Process.Args, err)
@@ -739,6 +751,11 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
 		}
 		return err
 	}
+	s.OriginalOOMScoreAdj, err = specutils.GetOOMScoreAdj(cmd.Process.Pid)
+	if err != nil {
+		return err
+	}
+
 	s.child = true
 	s.Pid = cmd.Process.Pid
 	log.Infof("Sandbox started, PID: %d", s.Pid)
@@ -1133,11 +1150,11 @@ func (s *Sandbox) DestroyContainer(cid string) error {
 
 func (s *Sandbox) destroyContainer(cid string) error {
 	if s.IsRootContainer(cid) {
-		log.Debugf("Destroying root container %q by destroying sandbox", cid)
+		log.Debugf("Destroying root container by destroying sandbox, cid: %s", cid)
 		return s.destroy()
 	}
 
-	log.Debugf("Destroying container %q in sandbox %q", cid, s.ID)
+	log.Debugf("Destroying container, cid: %s, sandbox: %s", cid, s.ID)
 	conn, err := s.sandboxConnect()
 	if err != nil {
 		return err
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 0392e3e83..fdbba1832 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -19,6 +19,7 @@ package specutils
 import (
 	"encoding/json"
 	"fmt"
+	"io"
 	"io/ioutil"
 	"os"
 	"path"
@@ -169,7 +170,7 @@ func ReadSpec(bundleDir string, conf *config.Config) (*specs.Spec, error) {
 // ReadSpecFromFile reads an OCI runtime spec from the given File, and
 // normalizes all relative paths into absolute by prepending the bundle dir.
 func ReadSpecFromFile(bundleDir string, specFile *os.File, conf *config.Config) (*specs.Spec, error) {
-	if _, err := specFile.Seek(0, os.SEEK_SET); err != nil {
+	if _, err := specFile.Seek(0, io.SeekStart); err != nil {
 		return nil, fmt.Errorf("error seeking to beginning of file %q: %v", specFile.Name(), err)
 	}
 	specBytes, err := ioutil.ReadAll(specFile)
@@ -344,15 +345,9 @@ func IsSupportedDevMount(m specs.Mount) bool {
 	var existingDevices = []string{
 		"/dev/fd", "/dev/stdin", "/dev/stdout", "/dev/stderr",
 		"/dev/null", "/dev/zero", "/dev/full", "/dev/random",
-		"/dev/urandom", "/dev/shm", "/dev/pts", "/dev/ptmx",
+		"/dev/urandom", "/dev/shm", "/dev/ptmx",
 	}
 	dst := filepath.Clean(m.Destination)
-	if dst == "/dev" {
-		// OCI spec uses many different mounts for the things inside of '/dev'. We
-		// have a single mount at '/dev' that is always mounted, regardless of
-		// whether it was asked for, as the spec says we SHOULD.
-		return false
-	}
 	for _, dev := range existingDevices {
 		if dst == dev || strings.HasPrefix(dst, dev+"/") {
 			return false
@@ -425,7 +420,7 @@ func Mount(src, dst, typ string, flags uint32) error {
 		// Special case, as there is no source directory for proc mounts.
 		isDir = true
 	} else if fi, err := os.Stat(src); err != nil {
-		return fmt.Errorf("Stat(%q) failed: %v", src, err)
+		return fmt.Errorf("stat(%q) failed: %v", src, err)
 	} else {
 		isDir = fi.IsDir()
 	}
@@ -433,25 +428,25 @@ func Mount(src, dst, typ string, flags uint32) error {
 	if isDir {
 		// Create the destination directory.
 		if err := os.MkdirAll(dst, 0777); err != nil {
-			return fmt.Errorf("Mkdir(%q) failed: %v", dst, err)
+			return fmt.Errorf("mkdir(%q) failed: %v", dst, err)
 		}
 	} else {
 		// Create the parent destination directory.
 		parent := path.Dir(dst)
 		if err := os.MkdirAll(parent, 0777); err != nil {
-			return fmt.Errorf("Mkdir(%q) failed: %v", parent, err)
+			return fmt.Errorf("mkdir(%q) failed: %v", parent, err)
 		}
 		// Create the destination file if it does not exist.
 		f, err := os.OpenFile(dst, syscall.O_CREAT, 0777)
 		if err != nil {
-			return fmt.Errorf("Open(%q) failed: %v", dst, err)
+			return fmt.Errorf("open(%q) failed: %v", dst, err)
 		}
 		f.Close()
 	}
 
 	// Do the mount.
 	if err := syscall.Mount(src, dst, typ, uintptr(flags), ""); err != nil {
-		return fmt.Errorf("Mount(%q, %q, %d) failed: %v", src, dst, flags, err)
+		return fmt.Errorf("mount(%q, %q, %d) failed: %v", src, dst, flags, err)
 	}
 	return nil
 }
@@ -486,35 +481,6 @@ func GetOOMScoreAdj(pid int) (int, error) {
 	return strconv.Atoi(strings.TrimSpace(string(data)))
 }
 
-// GetParentPid gets the parent process ID of the specified PID.
-func GetParentPid(pid int) (int, error) {
-	data, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/stat", pid))
-	if err != nil {
-		return 0, err
-	}
-
-	var cpid string
-	var name string
-	var state string
-	var ppid int
-	// Parse after the binary name.
-	_, err = fmt.Sscanf(string(data),
-		"%v %v %v %d",
-		// cpid is ignored.
-		&cpid,
-		// name is ignored.
-		&name,
-		// state is ignored.
-		&state,
-		&ppid)
-
-	if err != nil {
-		return 0, err
-	}
-
-	return ppid, nil
-}
-
 // EnvVar looks for a varible value in the env slice assuming the following
 // format: "NAME=VALUE".
 func EnvVar(env []string, name string) (string, bool) {