Check in gVisor.

PiperOrigin-RevId: 194583126 Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
author: Googler <noreply@google.com> 2018-04-27 10:37:02 -0700
committer: Adin Scannell <ascannell@google.com> 2018-04-28 01:44:26 -0400
commit: d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree: 54f95eef73aee6bacbfc736fffc631be2605ed53 /runsc/sandbox
parent: f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)
8 files changed, 2147 insertions, 0 deletions
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
new file mode 100644
index 000000000..bdd95903e
--- /dev/null
+++ b/runsc/sandbox/BUILD
@@ -0,0 +1,53 @@
+package(licenses = ["notice"])  # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "sandbox",
+    srcs = [
+        "console.go",
+        "hook.go",
+        "namespace.go",
+        "network.go",
+        "sandbox.go",
+        "status.go",
+    ],
+    importpath = "gvisor.googlesource.com/gvisor/runsc/sandbox",
+    visibility = [
+        "//runsc:__subpackages__",
+    ],
+    deps = [
+        "//pkg/control/client",
+        "//pkg/control/server",
+        "//pkg/log",
+        "//pkg/sentry/control",
+        "//pkg/urpc",
+        "//runsc/boot",
+        "//runsc/specutils",
+        "@com_github_kr_pty//:go_default_library",
+        "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+        "@com_github_vishvananda_netlink//:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
+
+go_test(
+    name = "sandbox_test",
+    size = "small",
+    srcs = ["sandbox_test.go"],
+    pure = "on",
+    rundir = ".",
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/log",
+        "//pkg/sentry/control",
+        "//pkg/sentry/kernel/auth",
+        "//pkg/unet",
+        "//runsc/boot",
+        "//runsc/cmd",
+        "//runsc/sandbox",
+        "@com_github_google_subcommands//:go_default_library",
+        "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
diff --git a/runsc/sandbox/console.go b/runsc/sandbox/console.go
new file mode 100644
index 000000000..3f133e12a
--- /dev/null
+++ b/runsc/sandbox/console.go
@@ -0,0 +1,60 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+	"fmt"
+	"net"
+	"os"
+
+	"github.com/kr/pty"
+	"golang.org/x/sys/unix"
+)
+
+// setupConsole creates pty master/slave pair, sends the master FD over the
+// given socket, and returns the slave.
+func setupConsole(socketPath string) (*os.File, error) {
+	// Create a new pty master and slave.
+	ptyMaster, ptySlave, err := pty.Open()
+	if err != nil {
+		return nil, fmt.Errorf("error opening pty: %v", err)
+	}
+	defer ptyMaster.Close()
+
+	// Get a connection to the socket path.
+	conn, err := net.Dial("unix", socketPath)
+	if err != nil {
+		ptySlave.Close()
+		return nil, fmt.Errorf("error dial socket %q: %v", socketPath, err)
+	}
+	uc, ok := conn.(*net.UnixConn)
+	if !ok {
+		ptySlave.Close()
+		return nil, fmt.Errorf("connection is not a UnixConn: %T", conn)
+	}
+	socket, err := uc.File()
+	if err != nil {
+		ptySlave.Close()
+		return nil, fmt.Errorf("error getting file for unix socket %v: %v", uc, err)
+	}
+
+	// Send the master FD over the connection.
+	msg := unix.UnixRights(int(ptyMaster.Fd()))
+	if err := unix.Sendmsg(int(socket.Fd()), []byte("pty-master"), msg, nil, 0); err != nil {
+		ptySlave.Close()
+		return nil, fmt.Errorf("error sending console over unix socket %q: %v", socketPath, err)
+	}
+	return ptySlave, nil
+}
diff --git a/runsc/sandbox/hook.go b/runsc/sandbox/hook.go
new file mode 100644
index 000000000..40b064cdc
--- /dev/null
+++ b/runsc/sandbox/hook.go
@@ -0,0 +1,111 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+// This file implements hooks as defined in OCI spec:
+// https://github.com/opencontainers/runtime-spec/blob/master/config.md#toc22
+//
+// "hooks":{
+// 		"prestart":[{
+// 			"path":"/usr/bin/dockerd",
+// 			"args":[
+// 				"libnetwork-setkey", "arg2",
+// 			]
+// 		}]
+// },
+
+// executeHooksBestEffort executes hooks and logs warning in case they fail.
+// Runs all hooks, always.
+func executeHooksBestEffort(hooks []specs.Hook, s specs.State) {
+	for _, h := range hooks {
+		if err := executeHook(h, s); err != nil {
+			log.Warningf("Failure to execute hook %+v, err: %v", h, err)
+		}
+	}
+}
+
+// executeHooks executes hooks until the first one fails or they all execute.
+func executeHooks(hooks []specs.Hook, s specs.State) error {
+	for _, h := range hooks {
+		if err := executeHook(h, s); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func executeHook(h specs.Hook, s specs.State) error {
+	log.Debugf("Executing hook %+v, state: %+v", h, s)
+
+	if strings.TrimSpace(h.Path) == "" {
+		return fmt.Errorf("empty path for hook")
+	}
+	if !filepath.IsAbs(h.Path) {
+		return fmt.Errorf("path for hook is not absolute: %q", h.Path)
+	}
+
+	b, err := json.Marshal(s)
+	if err != nil {
+		return err
+	}
+	var stdout, stderr bytes.Buffer
+	cmd := exec.Cmd{
+		Path:   h.Path,
+		Args:   h.Args,
+		Env:    h.Env,
+		Stdin:  bytes.NewReader(b),
+		Stdout: &stdout,
+		Stderr: &stderr,
+	}
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+
+	c := make(chan error, 1)
+	go func() {
+		c <- cmd.Wait()
+	}()
+
+	var timer <-chan time.Time
+	if h.Timeout != nil {
+		timer = time.After(time.Duration(*h.Timeout) * time.Second)
+	}
+	select {
+	case err := <-c:
+		if err != nil {
+			return fmt.Errorf("failure executing hook %q, err: %v\nstdout: %s\nstderr: %s", h.Path, err, stdout.String(), stderr.String())
+		}
+	case <-timer:
+		cmd.Process.Kill()
+		cmd.Wait()
+		return fmt.Errorf("timeout executing hook %q\nstdout: %s\nstderr: %s", h.Path, stdout.String(), stderr.String())
+	}
+
+	log.Debugf("Execute hook %q success!", h.Path)
+	return nil
+}
diff --git a/runsc/sandbox/namespace.go b/runsc/sandbox/namespace.go
new file mode 100644
index 000000000..1d3bcfbb5
--- /dev/null
+++ b/runsc/sandbox/namespace.go
@@ -0,0 +1,204 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"syscall"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"golang.org/x/sys/unix"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+// nsCloneFlag returns the clone flag that can be used to set a namespace of
+// the given type.
+func nsCloneFlag(nst specs.LinuxNamespaceType) uintptr {
+	switch nst {
+	case specs.IPCNamespace:
+		return syscall.CLONE_NEWIPC
+	case specs.MountNamespace:
+		return syscall.CLONE_NEWNS
+	case specs.NetworkNamespace:
+		return syscall.CLONE_NEWNET
+	case specs.PIDNamespace:
+		return syscall.CLONE_NEWPID
+	case specs.UTSNamespace:
+		return syscall.CLONE_NEWUTS
+	case specs.UserNamespace:
+		return syscall.CLONE_NEWUSER
+	case specs.CgroupNamespace:
+		panic("cgroup namespace has no associated clone flag")
+	default:
+		panic(fmt.Sprintf("unknown namespace %v", nst))
+	}
+}
+
+// nsPath returns the path of the namespace for the current process and the
+// given namespace.
+func nsPath(nst specs.LinuxNamespaceType) string {
+	base := "/proc/self/ns"
+	switch nst {
+	case specs.CgroupNamespace:
+		return filepath.Join(base, "cgroup")
+	case specs.IPCNamespace:
+		return filepath.Join(base, "ipc")
+	case specs.MountNamespace:
+		return filepath.Join(base, "mnt")
+	case specs.NetworkNamespace:
+		return filepath.Join(base, "net")
+	case specs.PIDNamespace:
+		return filepath.Join(base, "pid")
+	case specs.UserNamespace:
+		return filepath.Join(base, "user")
+	case specs.UTSNamespace:
+		return filepath.Join(base, "uts")
+	default:
+		panic(fmt.Sprintf("unknown namespace %v", nst))
+	}
+}
+
+// getNS returns true and the namespace with the given type from the slice of
+// namespaces in the spec.  It returns false if the slice does not contain a
+// namespace with the type.
+func getNS(nst specs.LinuxNamespaceType, s *specs.Spec) (specs.LinuxNamespace, bool) {
+	if s.Linux == nil {
+		return specs.LinuxNamespace{}, false
+	}
+	for _, ns := range s.Linux.Namespaces {
+		if ns.Type == nst {
+			return ns, true
+		}
+	}
+	return specs.LinuxNamespace{}, false
+}
+
+// filterNS returns a slice of namespaces from the spec with types that match
+// those in the `filter` slice.
+func filterNS(filter []specs.LinuxNamespaceType, s *specs.Spec) []specs.LinuxNamespace {
+	if s.Linux == nil {
+		return nil
+	}
+	var out []specs.LinuxNamespace
+	for _, nst := range filter {
+		if ns, ok := getNS(nst, s); ok {
+			out = append(out, ns)
+		}
+	}
+	return out
+}
+
+// setNS sets the namespace of the given type.  It must be called with
+// OSThreadLocked.
+func setNS(fd, nsType uintptr) error {
+	if _, _, err := syscall.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 {
+		return err
+	}
+	return nil
+}
+
+// applyNS applies the namespace on the current thread and returns a function
+// that will restore the namespace to the original value.
+//
+// Preconditions: Must be called with os thread locked.
+func applyNS(ns specs.LinuxNamespace) (func(), error) {
+	log.Infof("applying namespace %v at path %q", ns.Type, ns.Path)
+	newNS, err := os.Open(ns.Path)
+	if err != nil {
+		return nil, fmt.Errorf("error opening %q: %v", ns.Path, err)
+	}
+	defer newNS.Close()
+
+	// Store current netns to restore back after child is started.
+	curPath := nsPath(ns.Type)
+	oldNS, err := os.Open(curPath)
+	if err != nil {
+		return nil, fmt.Errorf("error opening %q: %v", curPath, err)
+	}
+
+	// Set netns to the one requested and setup function to restore it back.
+	flag := nsCloneFlag(ns.Type)
+	if err := setNS(newNS.Fd(), flag); err != nil {
+		oldNS.Close()
+		return nil, fmt.Errorf("error setting namespace of type %v and path %q: %v", ns.Type, ns.Path, err)
+	}
+	return func() {
+		log.Infof("restoring namespace %v", ns.Type)
+		defer oldNS.Close()
+		if err := setNS(oldNS.Fd(), flag); err != nil {
+			panic(fmt.Sprintf("error restoring namespace: of type %v: %v", ns.Type, err))
+		}
+	}, nil
+}
+
+// startInNS joins or creates the given namespaces and calls cmd.Start before
+// restoring the namespaces to the original values.
+func startInNS(cmd *exec.Cmd, nss []specs.LinuxNamespace) error {
+	// We are about to setup namespaces, which requires the os thread being
+	// locked so that Go doesn't change the thread out from under us.
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	if cmd.SysProcAttr == nil {
+		cmd.SysProcAttr = &syscall.SysProcAttr{}
+	}
+
+	for _, ns := range nss {
+		if ns.Path == "" {
+			// No path.  Just set a flag to create a new namespace.
+			cmd.SysProcAttr.Cloneflags |= nsCloneFlag(ns.Type)
+			continue
+		}
+		// Join the given namespace, and restore the current namespace
+		// before exiting.
+		restoreNS, err := applyNS(ns)
+		if err != nil {
+			return err
+		}
+		defer restoreNS()
+	}
+
+	return cmd.Start()
+}
+
+// setUIDGIDMappings sets the given uid/gid mappings from the spec on the cmd.
+func setUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) {
+	if s.Linux == nil {
+		return
+	}
+	if cmd.SysProcAttr == nil {
+		cmd.SysProcAttr = &syscall.SysProcAttr{}
+	}
+	for _, idMap := range s.Linux.UIDMappings {
+		log.Infof("Mapping host uid %d to container uid %d (size=%d)", idMap.HostID, idMap.ContainerID, idMap.Size)
+		cmd.SysProcAttr.UidMappings = append(cmd.SysProcAttr.UidMappings, syscall.SysProcIDMap{
+			ContainerID: int(idMap.ContainerID),
+			HostID:      int(idMap.HostID),
+			Size:        int(idMap.Size),
+		})
+	}
+	for _, idMap := range s.Linux.GIDMappings {
+		log.Infof("Mapping host gid %d to container gid %d (size=%d)", idMap.HostID, idMap.ContainerID, idMap.Size)
+		cmd.SysProcAttr.GidMappings = append(cmd.SysProcAttr.GidMappings, syscall.SysProcIDMap{
+			ContainerID: int(idMap.ContainerID),
+			HostID:      int(idMap.HostID),
+			Size:        int(idMap.Size),
+		})
+	}
+}
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
new file mode 100644
index 000000000..1b6a1d9a6
--- /dev/null
+++ b/runsc/sandbox/network.go
@@ -0,0 +1,348 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+	"fmt"
+	"net"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"syscall"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/vishvananda/netlink"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/urpc"
+	"gvisor.googlesource.com/gvisor/runsc/boot"
+)
+
+// setupNetwork configures the network stack to mimic the local network
+// configuration. Docker uses network namespaces with vnets to configure the
+// network for the container. The untrusted app expects to see the same network
+// inside the sandbox. Routing and port mapping is handled directly by docker
+// with most of network information not even available to the runtime.
+//
+// Netstack inside the sandbox speaks directly to the device using a raw socket.
+// All IP addresses assigned to the NIC, are removed and passed on to netstack's
+// device.
+//
+// If 'conf.Network' is NoNetwork, skips local configuration and creates a
+// loopback interface only.
+//
+// Run the following container to test it:
+//  docker run -di --runtime=runsc -p 8080:80 -v $PWD:/usr/local/apache2/htdocs/ httpd:2.4
+func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Config) error {
+	log.Infof("Setting up network")
+
+	// HACK!
+	//
+	// When kubernetes starts a pod, it first creates a sandbox with an
+	// application that just pauses forever.  Later, when a container is
+	// added to the pod, kubernetes will create another sandbox with a
+	// config that corresponds to the containerized application, and add it
+	// to the same namespaces as the pause sandbox.
+	//
+	// Running a second sandbox currently breaks because the two sandboxes
+	// have the same network namespace and configuration, and try to create
+	// a tap device on the same host device which fails.
+	//
+	// Runsc will eventually need to detect that this container is meant to
+	// be run in the same sandbox as the pausing application, and somehow
+	// make that happen.
+	//
+	// For now the following HACK disables networking for the "pause"
+	// sandbox, allowing the second sandbox to start up successfully.
+	//
+	// Cri-o helpfully adds the "ContainerType" annotation that we can use
+	// to detect whether we are a pod or container.  Cri-containerd will
+	// support this eventually, but does not currently
+	// (https://github.com/kubernetes-incubator/cri-containerd/issues/512).
+	//
+	// Thus, to support cri-containerd, we check if the exec args is
+	// "/pause", which is pretty gross.
+	//
+	// TODO: Remove this once multiple containers per sandbox
+	// is properly supported.
+	if spec.Annotations["io.kubernetes.cri-o.ContainerType"] == "sandbox" || spec.Process.Args[0] == "/pause" {
+		log.Warningf("HACK: Disabling network")
+		conf.Network = boot.NetworkNone
+	}
+
+	switch conf.Network {
+	case boot.NetworkNone:
+		log.Infof("Network is disabled, create loopback interface only")
+		if err := createDefaultLoopbackInterface(conn); err != nil {
+			return fmt.Errorf("error creating default loopback interface: %v", err)
+		}
+	case boot.NetworkSandbox:
+		// Build the path to the net namespace of the sandbox process.
+		// This is what we will copy.
+		nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
+		if err := createInterfacesAndRoutesFromNS(conn, nsPath); err != nil {
+			return fmt.Errorf("error creating interfaces from net namespace %q: %v", nsPath, err)
+		}
+	case boot.NetworkHost:
+		// Nothing to do here.
+	default:
+		return fmt.Errorf("Invalid network type: %d", conf.Network)
+	}
+	return nil
+}
+
+func createDefaultLoopbackInterface(conn *urpc.Client) error {
+	link := boot.LoopbackLink{
+		Name: "lo",
+		Addresses: []net.IP{
+			net.IP("\x7f\x00\x00\x01"),
+			net.IPv6loopback,
+		},
+		Routes: []boot.Route{
+			{
+				Destination: net.IP("\x7f\x00\x00\x00"),
+				Mask:        net.IPMask("\xff\x00\x00\x00"),
+			},
+			{
+				Destination: net.IPv6loopback,
+				Mask:        net.IPMask(strings.Repeat("\xff", 16)),
+			},
+		},
+	}
+	if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{
+		LoopbackLinks: []boot.LoopbackLink{link},
+	}, nil); err != nil {
+		return fmt.Errorf("error creating loopback link and routes: %v", err)
+	}
+	return nil
+}
+
+func joinNetNS(nsPath string) (func(), error) {
+	runtime.LockOSThread()
+	restoreNS, err := applyNS(specs.LinuxNamespace{
+		Type: specs.NetworkNamespace,
+		Path: nsPath,
+	})
+	if err != nil {
+		runtime.UnlockOSThread()
+		return nil, fmt.Errorf("error joining net namespace %q: %v", nsPath, err)
+	}
+	return func() {
+		restoreNS()
+		runtime.UnlockOSThread()
+	}, nil
+}
+
+// isRootNS determines whether we are running in the root net namespace.
+//
+// TODO: Find a better way to detect root network.
+func isRootNS(ifaces []net.Interface) bool {
+	for _, iface := range ifaces {
+		if iface.Name == "docker0" {
+			return true
+		}
+	}
+	return false
+
+}
+
+// createInterfacesAndRoutesFromNS scrapes the interface and routes from the
+// net namespace with the given path, creates them in the sandbox, and removes
+// them from the host.
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string) error {
+	// Join the network namespace that we will be copying.
+	restore, err := joinNetNS(nsPath)
+	if err != nil {
+		return err
+	}
+	defer restore()
+
+	// Get all interfaces in the namespace.
+	ifaces, err := net.Interfaces()
+	if err != nil {
+		return fmt.Errorf("error querying interfaces: %v", err)
+	}
+
+	if isRootNS(ifaces) {
+		return fmt.Errorf("cannot run in with network enabled in root network namespace")
+	}
+
+	// Collect addresses and routes from the interfaces.
+	var args boot.CreateLinksAndRoutesArgs
+	for _, iface := range ifaces {
+		if iface.Flags&net.FlagUp == 0 {
+			log.Infof("Skipping down interface: %+v", iface)
+			continue
+		}
+
+		ifaddrs, err := iface.Addrs()
+		if err != nil {
+			return fmt.Errorf("error fetching interface addresses for %q: %v", iface.Name, err)
+		}
+
+		// We build our own loopback devices.
+		if iface.Flags&net.FlagLoopback != 0 {
+			links, err := loopbackLinks(iface, ifaddrs)
+			if err != nil {
+				return fmt.Errorf("error getting loopback routes and links for iface %q: %v", iface.Name, err)
+			}
+			args.LoopbackLinks = append(args.LoopbackLinks, links...)
+			continue
+		}
+
+		// Get the link for the interface.
+		ifaceLink, err := netlink.LinkByName(iface.Name)
+		if err != nil {
+			return fmt.Errorf("error getting link for interface %q: %v", iface.Name, err)
+		}
+
+		// Create the socket.
+		const protocol = 0x0300 // htons(ETH_P_ALL)
+		fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
+		if err != nil {
+			return fmt.Errorf("unable to create raw socket: %v", err)
+		}
+		deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
+
+		// Bind to the appropriate device.
+		ll := syscall.SockaddrLinklayer{
+			Protocol: protocol,
+			Ifindex:  ifaceLink.Attrs().Index,
+			Hatype:   0, // No ARP type.
+			Pkttype:  syscall.PACKET_OTHERHOST,
+		}
+		if err := syscall.Bind(fd, &ll); err != nil {
+			return fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
+		}
+
+		// Scrape the routes before removing the address, since that
+		// will remove the routes as well.
+		routes, def, err := routesForIface(iface)
+		if err != nil {
+			return fmt.Errorf("error getting routes for interface %q: %v", iface.Name, err)
+		}
+		if def != nil {
+			if !args.DefaultGateway.Route.Empty() {
+				return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, def, args.DefaultGateway)
+			}
+			args.DefaultGateway.Route = *def
+			args.DefaultGateway.Name = iface.Name
+		}
+
+		link := boot.FDBasedLink{
+			Name:   iface.Name,
+			MTU:    iface.MTU,
+			Routes: routes,
+		}
+
+		// Collect the addresses for the interface, enable forwarding,
+		// and remove them from the host.
+		for _, ifaddr := range ifaddrs {
+			ipNet, ok := ifaddr.(*net.IPNet)
+			if !ok {
+				return fmt.Errorf("address is not IPNet: %t %+v", ifaddr, ifaddr)
+			}
+			link.Addresses = append(link.Addresses, ipNet.IP)
+
+			// Steal IP address from NIC.
+			if err := removeAddress(ifaceLink, ipNet.String()); err != nil {
+				return fmt.Errorf("error removing address %v from device %q: %v", iface.Name, ipNet, err)
+			}
+		}
+
+		args.FilePayload.Files = append(args.FilePayload.Files, deviceFile)
+		args.FDBasedLinks = append(args.FDBasedLinks, link)
+	}
+
+	log.Debugf("Setting up network, config: %+v", args)
+	if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &args, nil); err != nil {
+		return fmt.Errorf("error creating links and routes: %v", err)
+	}
+	return nil
+}
+
+// loopbackLinks collects the links for a loopback interface.
+func loopbackLinks(iface net.Interface, addrs []net.Addr) ([]boot.LoopbackLink, error) {
+	var links []boot.LoopbackLink
+	for _, addr := range addrs {
+		ipNet, ok := addr.(*net.IPNet)
+		if !ok {
+			return nil, fmt.Errorf("address is not IPNet: %t %+v", addr, addr)
+		}
+		links = append(links, boot.LoopbackLink{
+			Name:      iface.Name,
+			Addresses: []net.IP{ipNet.IP},
+			Routes: []boot.Route{{
+				Destination: ipNet.IP.Mask(ipNet.Mask),
+				Mask:        ipNet.Mask,
+			}},
+		})
+	}
+	return links, nil
+}
+
+// routesForIface iterates over all routes for the given interface and converts
+// them to boot.Routes.
+func routesForIface(iface net.Interface) ([]boot.Route, *boot.Route, error) {
+	link, err := netlink.LinkByIndex(iface.Index)
+	if err != nil {
+		return nil, nil, err
+	}
+	rs, err := netlink.RouteList(link, netlink.FAMILY_ALL)
+	if err != nil {
+		return nil, nil, fmt.Errorf("error getting routes from %q: %v", iface.Name, err)
+	}
+
+	var def *boot.Route
+	var routes []boot.Route
+	for _, r := range rs {
+		// Is it a default route?
+		if r.Dst == nil {
+			if r.Gw == nil {
+				return nil, nil, fmt.Errorf("default route with no gateway %q: %+v", iface.Name, r)
+			}
+			if def != nil {
+				return nil, nil, fmt.Errorf("more than one default route found %q, def: %+v, route: %+v", iface.Name, def, r)
+			}
+			emptyAddr := net.IPv6zero
+			if r.Gw.To4() != nil {
+				emptyAddr = net.IPv4zero
+			}
+			// Create a catch all route to the gateway.
+			def = &boot.Route{
+				Destination: emptyAddr,
+				Mask:        net.IPMask(emptyAddr),
+				Gateway:     r.Gw,
+			}
+			continue
+		}
+		routes = append(routes, boot.Route{
+			Destination: r.Dst.IP.Mask(r.Dst.Mask),
+			Mask:        r.Dst.Mask,
+		})
+	}
+	return routes, def, nil
+}
+
+// removeAddress removes IP address from network device. It's equivalent to:
+//   ip addr del <ipAndMask> dev <name>
+func removeAddress(source netlink.Link, ipAndMask string) error {
+	addr, err := netlink.ParseAddr(ipAndMask)
+	if err != nil {
+		return err
+	}
+	return netlink.AddrDel(source, addr)
+}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
new file mode 100644
index 000000000..b2fa1d58e
--- /dev/null
+++ b/runsc/sandbox/sandbox.go
@@ -0,0 +1,666 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package sandbox creates and manipulates sandboxes.
+package sandbox
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"syscall"
+	"time"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"golang.org/x/sys/unix"
+	"gvisor.googlesource.com/gvisor/pkg/control/client"
+	"gvisor.googlesource.com/gvisor/pkg/control/server"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/control"
+	"gvisor.googlesource.com/gvisor/pkg/urpc"
+	"gvisor.googlesource.com/gvisor/runsc/boot"
+	"gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// metadataFilename is the name of the metadata file relative to sandboxRoot
+// that holds sandbox metadata.
+const metadataFilename = "meta.json"
+
+// See libcontainer/factory_linux.go
+var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
+
+// validateID validates the sandbox id.
+func validateID(id string) error {
+	if !idRegex.MatchString(id) {
+		return fmt.Errorf("invalid sandbox id: %v", id)
+	}
+	return nil
+}
+
+// Sandbox wraps a child sandbox process, and is responsible for saving and
+// loading sandbox metadata to disk.
+//
+// Within a root directory, we maintain subdirectories for each sandbox named
+// with the sandbox id.  The sandbox metadata is is stored as json within the
+// sandbox directoy in a file named "meta.json".  This metadata format is
+// defined by us, and is not part of the OCI spec.
+//
+// Sandboxes must write this metadata file after any change to their internal
+// state.  The entire sandbox directory is deleted when the sandbox is
+// destroyed.
+//
+// TODO: Protect against concurrent changes to the sandbox metadata
+// file.
+type Sandbox struct {
+	// ID is the sandbox ID.
+	ID string `json:"id"`
+
+	// Spec is the OCI runtime spec that configures this sandbox.
+	Spec *specs.Spec `json:"spec"`
+
+	// BundleDir is the directory containing the sandbox bundle.
+	BundleDir string `json:"bundleDir"`
+
+	// SandboxRoot is the directory containing the sandbox metadata file.
+	SandboxRoot string `json:"sandboxRoot"`
+
+	// CreatedAt is the time the sandbox was created.
+	CreatedAt time.Time `json:"createdAt"`
+
+	// Owner is the sandbox owner.
+	Owner string `json:"owner"`
+
+	// ConsoleSocket is the path to a unix domain socket that will receive
+	// the console FD.  It is only used during create, so we don't need to
+	// store it in the metadata.
+	ConsoleSocket string `json:"-"`
+
+	// Pid is the pid of the running sandbox.  Only valid if Status is
+	// Created or Running.
+	Pid int `json:"pid"`
+
+	// GoferPid is the pid of the gofer running along side the sandbox. May be 0
+	// if the gofer has been killed or it's not being used.
+	GoferPid int `json:"goferPid"`
+
+	// Status is the current sandbox Status.
+	Status Status `json:"status"`
+}
+
+// Create creates the sandbox subprocess and writes the metadata file.  Args
+// are additional arguments that will be passed to the sandbox process.
+func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile string, args []string) (*Sandbox, error) {
+	log.Debugf("Create sandbox %q in root dir: %s", id, conf.RootDir)
+	if err := validateID(id); err != nil {
+		return nil, err
+	}
+
+	sandboxRoot := filepath.Join(conf.RootDir, id)
+	if exists(sandboxRoot) {
+		return nil, fmt.Errorf("sandbox with id %q already exists: %q ", id, sandboxRoot)
+	}
+
+	s := &Sandbox{
+		ID:            id,
+		Spec:          spec,
+		ConsoleSocket: consoleSocket,
+		BundleDir:     bundleDir,
+		SandboxRoot:   sandboxRoot,
+		Status:        Creating,
+		Owner:         os.Getenv("USER"),
+	}
+
+	// Create sandbox process. If anything errors between now and the end of this
+	// function, we MUST clean up all sandbox resources.
+	if err := s.createProcesses(conf, args); err != nil {
+		s.Destroy()
+		return nil, err
+	}
+
+	// Wait for the control server to come up (or timeout).  The sandbox is
+	// not "created" until that happens.
+	if err := s.waitForCreated(10 * time.Second); err != nil {
+		s.Destroy()
+		return nil, err
+	}
+
+	s.Status = Created
+	s.CreatedAt = time.Now()
+
+	// Save the metadata file.
+	if err := s.save(); err != nil {
+		s.Destroy()
+		return nil, err
+	}
+
+	// Write the pid file.  Containerd consideres the create complete after
+	// this file is created, so it must be the last thing we do.
+	if pidFile != "" {
+		if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(s.Pid)), 0644); err != nil {
+			s.Destroy()
+			return nil, fmt.Errorf("error writing pid file: %v", err)
+		}
+	}
+
+	return s, nil
+}
+
+// Run is a helper that calls Create + Start + Wait.
+func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile string, args []string) (syscall.WaitStatus, error) {
+	s, err := Create(id, spec, conf, bundleDir, consoleSocket, pidFile, args)
+	if err != nil {
+		return 0, fmt.Errorf("error creating sandbox: %v", err)
+	}
+	if err := s.Start(conf); err != nil {
+		return 0, fmt.Errorf("error starting sandbox: %v", err)
+	}
+	return s.Wait()
+}
+
+// Load loads a sandbox from with the given id from a metadata file.
+func Load(rootDir, id string) (*Sandbox, error) {
+	log.Debugf("Load sandbox %q %q", rootDir, id)
+	if err := validateID(id); err != nil {
+		return nil, err
+	}
+	sandboxRoot := filepath.Join(rootDir, id)
+	if !exists(sandboxRoot) {
+		return nil, fmt.Errorf("sandbox with id %q does not exist", id)
+	}
+	metaFile := filepath.Join(sandboxRoot, metadataFilename)
+	if !exists(metaFile) {
+		return nil, fmt.Errorf("sandbox with id %q does not have metadata file %q", id, metaFile)
+	}
+	metaBytes, err := ioutil.ReadFile(metaFile)
+	if err != nil {
+		return nil, fmt.Errorf("error reading sandbox metadata file %q: %v", metaFile, err)
+	}
+	var s Sandbox
+	if err := json.Unmarshal(metaBytes, &s); err != nil {
+		return nil, fmt.Errorf("error unmarshaling sandbox metadata from %q: %v", metaFile, err)
+	}
+
+	// If the status is "Running" or "Created", check that the process
+	// still exists, and set it to Stopped if it does not.
+	//
+	// This is inherintly racey.
+	if s.Status == Running || s.Status == Created {
+		// Send signal 0 to check if process exists.
+		if err := s.Signal(0); err != nil {
+			// Process no longer exists.
+			s.Status = Stopped
+			s.Pid = 0
+		}
+	}
+
+	return &s, nil
+}
+
+// List returns all sandbox ids in the given root directory.
+func List(rootDir string) ([]string, error) {
+	log.Debugf("List sandboxes %q", rootDir)
+	fs, err := ioutil.ReadDir(rootDir)
+	if err != nil {
+		return nil, fmt.Errorf("ReadDir(%s) failed: %v", rootDir, err)
+	}
+	var out []string
+	for _, f := range fs {
+		out = append(out, f.Name())
+	}
+	return out, nil
+}
+
+// State returns the metadata of the sandbox.
+func (s *Sandbox) State() specs.State {
+	return specs.State{
+		Version: specs.Version,
+		ID:      s.ID,
+		Status:  s.Status.String(),
+		Pid:     s.Pid,
+		Bundle:  s.BundleDir,
+	}
+}
+
+// Start starts running the containerized process inside the sandbox.
+func (s *Sandbox) Start(conf *boot.Config) error {
+	log.Debugf("Start sandbox %q, pid: %d", s.ID, s.Pid)
+	if s.Status != Created {
+		return fmt.Errorf("cannot start container in state %s", s.Status)
+	}
+
+	// "If any prestart hook fails, the runtime MUST generate an error,
+	// stop and destroy the container".
+	if s.Spec.Hooks != nil {
+		if err := executeHooks(s.Spec.Hooks.Prestart, s.State()); err != nil {
+			s.Destroy()
+			return err
+		}
+	}
+
+	c, err := s.connect()
+	if err != nil {
+		s.Destroy()
+		return err
+	}
+	defer c.Close()
+
+	// Configure the network.
+	if err := setupNetwork(c, s.Pid, s.Spec, conf); err != nil {
+		s.Destroy()
+		return fmt.Errorf("error setting up network: %v", err)
+	}
+
+	// Send a message to the sandbox control server to start the
+	// application.
+	if err := c.Call(boot.ApplicationStart, nil, nil); err != nil {
+		s.Destroy()
+		return fmt.Errorf("error starting sandbox: %v", err)
+	}
+
+	// "If any poststart hook fails, the runtime MUST log a warning, but
+	// the remaining hooks and lifecycle continue as if the hook had
+	// succeeded".
+	if s.Spec.Hooks != nil {
+		executeHooksBestEffort(s.Spec.Hooks.Poststart, s.State())
+	}
+
+	s.Status = Running
+	return s.save()
+}
+
+// Processes retrieves the list of processes and associated metadata inside a
+// sandbox.
+func (s *Sandbox) Processes() ([]*control.Process, error) {
+	if s.Status != Running {
+		return nil, fmt.Errorf("cannot get processes of container %q because it isn't running. It is in state %v", s.ID, s.Status)
+	}
+
+	c, err := s.connect()
+	if err != nil {
+		return nil, err
+	}
+	defer c.Close()
+
+	var pl []*control.Process
+	if err := c.Call(boot.ApplicationProcesses, nil, &pl); err != nil {
+		return nil, fmt.Errorf("error retrieving process data from sandbox: %v", err)
+	}
+	return pl, nil
+}
+
+// Execute runs the specified command in the sandbox.
+func (s *Sandbox) Execute(e *control.ExecArgs) (syscall.WaitStatus, error) {
+	log.Debugf("Execute in sandbox %q, pid: %d, args: %+v", s.ID, s.Pid, e)
+	if s.Status != Created && s.Status != Running {
+		return 0, fmt.Errorf("cannot exec in container in state %s", s.Status)
+	}
+
+	log.Debugf("Connecting to sandbox...")
+	c, err := s.connect()
+	if err != nil {
+		return 0, fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err)
+	}
+	defer c.Close()
+
+	// Send a message to the sandbox control server to start the application.
+	var waitStatus uint32
+	if err := c.Call(boot.ApplicationExecute, e, &waitStatus); err != nil {
+		return 0, fmt.Errorf("error executing in sandbox: %v", err)
+	}
+
+	return syscall.WaitStatus(waitStatus), nil
+}
+
+// Event retrieves stats about the sandbox such as memory and CPU utilization.
+func (s *Sandbox) Event() (*boot.Event, error) {
+	if s.Status != Running && s.Status != Created {
+		return nil, fmt.Errorf("cannot get events for container in state: %s", s.Status)
+	}
+
+	c, err := s.connect()
+	if err != nil {
+		return nil, err
+	}
+	defer c.Close()
+
+	var e boot.Event
+	if err := c.Call(boot.ApplicationEvent, nil, &e); err != nil {
+		return nil, fmt.Errorf("error retrieving event data from sandbox: %v", err)
+	}
+	e.ID = s.ID
+	return &e, nil
+}
+
+func (s *Sandbox) connect() (*urpc.Client, error) {
+	log.Debugf("Connecting to sandbox...")
+	c, err := client.ConnectTo(boot.ControlSocketAddr(s.ID))
+	if err != nil {
+		return nil, fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err)
+	}
+	return c, nil
+}
+
+func (s *Sandbox) createProcesses(conf *boot.Config, args []string) error {
+	binPath, err := specutils.BinPath()
+	if err != nil {
+		return err
+	}
+
+	ioFiles, err := s.createGoferProcess(conf, binPath, args)
+	if err != nil {
+		return err
+	}
+	return s.createSandboxProcess(conf, binPath, args, ioFiles)
+}
+
+func (s *Sandbox) createGoferProcess(conf *boot.Config, binPath string, commonArgs []string) ([]*os.File, error) {
+	if conf.FileAccess != boot.FileAccessProxy {
+		// Don't start a gofer. The sandbox will access host FS directly.
+		return nil, nil
+	}
+
+	var args []string
+	args = append(args, commonArgs...)
+	args = append(args, "gofer", "--bundle", s.BundleDir)
+
+	// Start with root mount and then add any other additional mount.
+	mountCount := 1
+	for _, m := range s.Spec.Mounts {
+		if specutils.Is9PMount(m) {
+			mountCount++
+		}
+	}
+
+	sandEnds := make([]*os.File, 0, mountCount)
+	goferEnds := make([]*os.File, 0, mountCount)
+	for i := 0; i < mountCount; i++ {
+		// Create socket that connects the sandbox and gofer.
+		fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0)
+		if err != nil {
+			return nil, err
+		}
+		sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox io fd"))
+
+		goferEnd := os.NewFile(uintptr(fds[1]), "gofer io fd")
+		defer goferEnd.Close()
+		goferEnds = append(goferEnds, goferEnd)
+
+		args = append(args, fmt.Sprintf("--io-fds=%d", 3+i))
+	}
+
+	cmd := exec.Command(binPath, args...)
+	cmd.ExtraFiles = goferEnds
+
+	// Setup any uid/gid mappings, and create or join the configured user
+	// namespace so the gofer's view of the filesystem aligns with the
+	// users in the sandbox.
+	setUIDGIDMappings(cmd, s.Spec)
+	nss := filterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, s.Spec)
+
+	// Start the gofer in the given namespace.
+	log.Debugf("Starting gofer: %s %v", binPath, args)
+	if err := startInNS(cmd, nss); err != nil {
+		return nil, err
+	}
+	s.GoferPid = cmd.Process.Pid
+	log.Infof("Gofer started, pid: %d", cmd.Process.Pid)
+	return sandEnds, nil
+}
+
+// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
+// command, passing in the bundle dir.
+func (s *Sandbox) createSandboxProcess(conf *boot.Config, binPath string, commonArgs []string, ioFiles []*os.File) error {
+	// nextFD is used to get unused FDs that we can pass to the sandbox.  It
+	// starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
+	nextFD := 3
+
+	// Create control server socket here and donate FD to child process because
+	// it may be in a different network namespace and won't be reachable from
+	// outside.
+	fd, err := server.CreateSocket(boot.ControlSocketAddr(s.ID))
+	if err != nil {
+		return fmt.Errorf("error creating control server socket for sandbox %q: %v", s.ID, err)
+	}
+
+	consoleEnabled := s.ConsoleSocket != ""
+
+	cmd := exec.Command(binPath, commonArgs...)
+	cmd.SysProcAttr = &syscall.SysProcAttr{}
+	cmd.Args = append(cmd.Args,
+		"boot",
+		"--bundle", s.BundleDir,
+		"--controller-fd="+strconv.Itoa(nextFD),
+		fmt.Sprintf("--console=%t", consoleEnabled))
+	nextFD++
+
+	controllerFile := os.NewFile(uintptr(fd), "control_server_socket")
+	defer controllerFile.Close()
+	cmd.ExtraFiles = append(cmd.ExtraFiles, controllerFile)
+
+	// If there is a gofer, sends all socket ends to the sandbox.
+	for _, f := range ioFiles {
+		defer f.Close()
+		cmd.ExtraFiles = append(cmd.ExtraFiles, f)
+		cmd.Args = append(cmd.Args, "--io-fds="+strconv.Itoa(nextFD))
+		nextFD++
+	}
+
+	// If the console control socket file is provided, then create a new
+	// pty master/slave pair and set the tty on the sandox process.
+	if consoleEnabled {
+		// setupConsole will send the master on the socket, and return
+		// the slave.
+		tty, err := setupConsole(s.ConsoleSocket)
+		if err != nil {
+			return fmt.Errorf("error setting up control socket %q: %v", s.ConsoleSocket, err)
+		}
+		defer tty.Close()
+
+		cmd.Stdin = tty
+		cmd.Stdout = tty
+		cmd.Stderr = tty
+		cmd.SysProcAttr.Setctty = true
+		cmd.SysProcAttr.Ctty = int(tty.Fd())
+	} else {
+		cmd.Stdin = os.Stdin
+		cmd.Stdout = os.Stdout
+		cmd.Stderr = os.Stderr
+	}
+
+	// Detach from this session, otherwise cmd will get SIGHUP and SIGCONT
+	// when re-parented.
+	cmd.SysProcAttr.Setsid = true
+
+	// nss is the set of namespaces to join or create before starting the sandbox
+	// process. IPC and UTS namespaces from the host are not used as they
+	// are virtualized inside the sandbox. Be paranoid and run inside an empty
+	// namespace for these.
+	log.Infof("Sandbox will be started in empty IPC and UTS namespaces")
+	nss := []specs.LinuxNamespace{
+		specs.LinuxNamespace{Type: specs.IPCNamespace},
+		specs.LinuxNamespace{Type: specs.UTSNamespace},
+	}
+
+	if conf.Platform == boot.PlatformPtrace {
+		// TODO: Also set an empty PID namespace so that we limit
+		// access to other host processes.
+		log.Infof("Sandbox will be started in the current PID namespace")
+	} else {
+		log.Infof("Sandbox will be started in empty PID namespace")
+		nss = append(nss, specs.LinuxNamespace{Type: specs.PIDNamespace})
+	}
+
+	if conf.FileAccess == boot.FileAccessProxy {
+		log.Infof("Sandbox will be started in empty mount namespace")
+		nss = append(nss, specs.LinuxNamespace{Type: specs.MountNamespace})
+	} else {
+		log.Infof("Sandbox will be started in the current mount namespace")
+	}
+
+	// Joins the network namespace if network is enabled. the sandbox talks
+	// directly to the host network, which may have been configured in the
+	// namespace.
+	if ns, ok := getNS(specs.NetworkNamespace, s.Spec); ok && conf.Network != boot.NetworkNone {
+		log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
+		nss = append(nss, ns)
+	} else {
+		log.Infof("Sandbox will be started in empty network namespace")
+		nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
+	}
+
+	// User namespace depends on the following options:
+	//   - Host network/filesystem: requires to run inside the user namespace
+	//       specified in the spec or the current namespace if none is configured.
+	//   - Gofer: when using a Gofer, the sandbox process can run isolated in an
+	//       empty namespace.
+	if conf.Network == boot.NetworkHost || conf.FileAccess == boot.FileAccessDirect {
+		if userns, ok := getNS(specs.UserNamespace, s.Spec); ok {
+			log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
+			nss = append(nss, userns)
+			setUIDGIDMappings(cmd, s.Spec)
+		} else {
+			// TODO: Retrict capabilities since it's using current user
+			// namespace, i.e. root.
+			log.Infof("Sandbox will be started in the current user namespace")
+		}
+		// When running in the caller's defined user namespace, apply the same
+		// capabilities to the sandbox process to ensure it abides to the same
+		// rules.
+		cmd.Args = append(cmd.Args, "--apply-caps=true")
+
+	} else {
+		log.Infof("Sandbox will be started in empty user namespace")
+		nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
+	}
+
+	log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
+	if err := startInNS(cmd, nss); err != nil {
+		return err
+	}
+	s.Pid = cmd.Process.Pid
+	log.Infof("Sandbox started, pid: %d", s.Pid)
+	return nil
+}
+
+// waitForCreated waits for the sandbox subprocess control server to be
+// running, at which point the sandbox is in Created state.
+func (s *Sandbox) waitForCreated(timeout time.Duration) error {
+	log.Debugf("Waiting for sandbox %q creation", s.ID)
+	tchan := time.After(timeout)
+	for {
+		select {
+		case <-tchan:
+			return fmt.Errorf("timed out waiting for sandbox control server")
+		default:
+			if c, err := client.ConnectTo(boot.ControlSocketAddr(s.ID)); err == nil {
+				// It's alive!
+				c.Close()
+				return nil
+			}
+		}
+	}
+}
+
+// Wait waits for the containerized process to exit, and returns its WaitStatus.
+func (s *Sandbox) Wait() (syscall.WaitStatus, error) {
+	log.Debugf("Wait on sandbox %q with pid %d", s.ID, s.Pid)
+	p, err := os.FindProcess(s.Pid)
+	if err != nil {
+		// "On Unix systems, FindProcess always succeeds and returns a
+		// Process for the given pid."
+		panic(err)
+	}
+	ps, err := p.Wait()
+	if err != nil {
+		return 0, err
+	}
+	return ps.Sys().(syscall.WaitStatus), nil
+}
+
+// Destroy frees all resources associated with the sandbox.
+func (s *Sandbox) Destroy() error {
+	log.Debugf("Destroy sandbox %q", s.ID)
+	if s.Pid != 0 {
+		// TODO: Too harsh?
+		log.Debugf("Killing sandbox %q", s.ID)
+		sendSignal(s.Pid, unix.SIGKILL)
+		s.Pid = 0
+	}
+	if s.GoferPid != 0 {
+		log.Debugf("Killing gofer for sandbox %q", s.ID)
+		sendSignal(s.GoferPid, unix.SIGKILL)
+		s.GoferPid = 0
+	}
+	if err := os.RemoveAll(s.SandboxRoot); err != nil {
+		log.Warningf("Failed to delete sandbox root directory %q, err: %v", s.SandboxRoot, err)
+	}
+
+	// "If any poststop hook fails, the runtime MUST log a warning, but the
+	// remaining hooks and lifecycle continue as if the hook had succeeded".
+	if s.Spec.Hooks != nil && (s.Status == Created || s.Status == Running) {
+		executeHooksBestEffort(s.Spec.Hooks.Poststop, s.State())
+	}
+
+	s.Status = Stopped
+	return nil
+}
+
+// Signal sends the signal to the sandbox.
+func (s *Sandbox) Signal(sig syscall.Signal) error {
+	log.Debugf("Signal sandbox %q", s.ID)
+	if s.Status == Stopped {
+		log.Warningf("sandbox %q not running, not sending signal %v to pid %d", s.ID, sig, s.Pid)
+		return nil
+	}
+	return sendSignal(s.Pid, sig)
+}
+
+func sendSignal(pid int, sig syscall.Signal) error {
+	if err := syscall.Kill(pid, sig); err != nil {
+		return fmt.Errorf("error sending signal %d to pid %d: %v", sig, pid, err)
+	}
+	return nil
+}
+
+// save saves the sandbox metadata to a file.
+func (s *Sandbox) save() error {
+	log.Debugf("Save sandbox %q", s.ID)
+	if err := os.MkdirAll(s.SandboxRoot, 0711); err != nil {
+		return fmt.Errorf("error creating sandbox root directory %q: %v", s.SandboxRoot, err)
+	}
+	meta, err := json.Marshal(s)
+	if err != nil {
+		return fmt.Errorf("error marshaling sandbox metadata: %v", err)
+	}
+	metaFile := filepath.Join(s.SandboxRoot, metadataFilename)
+	if err := ioutil.WriteFile(metaFile, meta, 0640); err != nil {
+		return fmt.Errorf("error writing sandbox metadata: %v", err)
+	}
+	return nil
+}
+
+// exists returns true if the given file exists.
+func exists(f string) bool {
+	if _, err := os.Stat(f); err == nil {
+		return true
+	} else if !os.IsNotExist(err) {
+		log.Warningf("error checking for file %q: %v", f, err)
+	}
+	return false
+}
diff --git a/runsc/sandbox/sandbox_test.go b/runsc/sandbox/sandbox_test.go
new file mode 100644
index 000000000..6c71cac30
--- /dev/null
+++ b/runsc/sandbox/sandbox_test.go
@@ -0,0 +1,649 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox_test
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"reflect"
+	"strings"
+	"syscall"
+	"testing"
+	"time"
+
+	"context"
+	"flag"
+	"github.com/google/subcommands"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"golang.org/x/sys/unix"
+	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/control"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.googlesource.com/gvisor/pkg/unet"
+	"gvisor.googlesource.com/gvisor/runsc/boot"
+	"gvisor.googlesource.com/gvisor/runsc/cmd"
+	"gvisor.googlesource.com/gvisor/runsc/sandbox"
+)
+
+func init() {
+	log.SetLevel(log.Debug)
+}
+
+// writeSpec writes the spec to disk in the given directory.
+func writeSpec(dir string, spec *specs.Spec) error {
+	b, err := json.Marshal(spec)
+	if err != nil {
+		return err
+	}
+	return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755)
+}
+
+// newSpecWithArgs creates a simple spec with the given args suitable for use
+// in tests.
+func newSpecWithArgs(args ...string) *specs.Spec {
+	spec := &specs.Spec{
+		// The host filesystem root is the sandbox root.
+		Root: &specs.Root{
+			Path:     "/",
+			Readonly: true,
+		},
+		Process: &specs.Process{
+			Args: args,
+			Env: []string{
+				"PATH=" + os.Getenv("PATH"),
+			},
+		},
+	}
+	return spec
+}
+
+// shutdownSignal will be sent to the sandbox in order to shut down cleanly.
+const shutdownSignal = syscall.SIGUSR2
+
+// setupSandbox creates a bundle and root dir for the sandbox, generates a test
+// config, and writes the spec to config.json in the bundle dir.
+func setupSandbox(spec *specs.Spec) (rootDir, bundleDir string, conf *boot.Config, err error) {
+	rootDir, err = ioutil.TempDir("", "sandboxes")
+	if err != nil {
+		return "", "", nil, fmt.Errorf("error creating root dir: %v", err)
+	}
+
+	bundleDir, err = ioutil.TempDir("", "bundle")
+	if err != nil {
+		return "", "", nil, fmt.Errorf("error creating bundle dir: %v", err)
+	}
+
+	if err = writeSpec(bundleDir, spec); err != nil {
+		return "", "", nil, fmt.Errorf("error writing spec: %v", err)
+	}
+
+	conf = &boot.Config{
+		RootDir: rootDir,
+		Network: boot.NetworkNone,
+	}
+
+	return rootDir, bundleDir, conf, nil
+}
+
+// uniqueSandboxID generates a unique sandbox id for each test.
+//
+// The sandbox id is used to create an abstract unix domain socket, which must
+// be unique.  While the sandbox forbids creating two sandboxes with the same
+// name, sometimes between test runs the socket does not get cleaned up quickly
+// enough, causing sandbox creation to fail.
+func uniqueSandboxID() string {
+	return fmt.Sprintf("test-sandbox-%d", time.Now().UnixNano())
+}
+
+// waitForProcessList waits for the given process list to show up in the sandbox.
+func waitForProcessList(s *sandbox.Sandbox, expected []*control.Process) error {
+	var got []*control.Process
+	for start := time.Now(); time.Now().Sub(start) < 10*time.Second; {
+		var err error
+		got, err := s.Processes()
+		if err != nil {
+			return fmt.Errorf("error getting process data from sandbox: %v", err)
+		}
+		if procListsEqual(got, expected) {
+			return nil
+		}
+		// Process might not have started, try again...
+		time.Sleep(10 * time.Millisecond)
+	}
+	return fmt.Errorf("sandbox got process list: %s, want: %s", procListToString(got), procListToString(expected))
+}
+
+// TestLifecycle tests the basic Create/Start/Signal/Destory sandbox lifecycle.
+// It verifies after each step that the sandbox can be loaded from disk, and
+// has the correct status.
+func TestLifecycle(t *testing.T) {
+	// The sandbox will just sleep for a long time.  We will kill it before
+	// it finishes sleeping.
+	spec := newSpecWithArgs("sleep", "100")
+
+	rootDir, bundleDir, conf, err := setupSandbox(spec)
+	if err != nil {
+		t.Fatalf("error setting up sandbox: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	// expectedPL lists the expected process state of the sandbox.
+	expectedPL := []*control.Process{
+		{
+			UID:  0,
+			PID:  1,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+	}
+	// Create the sandbox.
+	id := uniqueSandboxID()
+	if _, err := sandbox.Create(id, spec, conf, bundleDir, "", "", nil); err != nil {
+		t.Fatalf("error creating sandbox: %v", err)
+	}
+	// Load the sandbox from disk and check the status.
+	s, err := sandbox.Load(rootDir, id)
+	if err != nil {
+		t.Fatalf("error loading sandbox: %v", err)
+	}
+	if got, want := s.Status, sandbox.Created; got != want {
+		t.Errorf("sandbox status got %v, want %v", got, want)
+	}
+
+	// List should return the sandbox id.
+	ids, err := sandbox.List(rootDir)
+	if err != nil {
+		t.Fatalf("error listing sandboxes: %v", err)
+	}
+	if got, want := ids, []string{id}; !reflect.DeepEqual(got, want) {
+		t.Errorf("sandbox list got %v, want %v", got, want)
+	}
+
+	// Start the sandbox.
+	if err := s.Start(conf); err != nil {
+		t.Fatalf("error starting sandbox: %v", err)
+	}
+	// Load the sandbox from disk and check the status.
+	s, err = sandbox.Load(rootDir, id)
+	if err != nil {
+		t.Fatalf("error loading sandbox: %v", err)
+	}
+	if got, want := s.Status, sandbox.Running; got != want {
+		t.Errorf("sandbox status got %v, want %v", got, want)
+	}
+
+	// Verify that "sleep 100" is running.
+	if err := waitForProcessList(s, expectedPL); err != nil {
+		t.Error(err)
+	}
+
+	// Send the sandbox a signal, which we catch and use to cleanly
+	// shutdown.
+	if err := s.Signal(shutdownSignal); err != nil {
+		t.Fatalf("error sending signal %v to sandbox: %v", shutdownSignal, err)
+	}
+	// Wait for it to die.
+	if _, err := s.Wait(); err != nil {
+		t.Fatalf("error waiting on sandbox: %v", err)
+	}
+	// Load the sandbox from disk and check the status.
+	s, err = sandbox.Load(rootDir, id)
+	if err != nil {
+		t.Fatalf("error loading sandbox: %v", err)
+	}
+	if got, want := s.Status, sandbox.Stopped; got != want {
+		t.Errorf("sandbox status got %v, want %v", got, want)
+	}
+
+	// Destroy the sandbox.
+	if err := s.Destroy(); err != nil {
+		t.Fatalf("error destroying sandbox: %v", err)
+	}
+
+	// List should not return the sandbox id.
+	ids, err = sandbox.List(rootDir)
+	if err != nil {
+		t.Fatalf("error listing sandboxes: %v", err)
+	}
+	if len(ids) != 0 {
+		t.Errorf("expected sandbox list to be empty, but got %v", ids)
+	}
+
+	// Loading the sandbox by id should fail.
+	if _, err = sandbox.Load(rootDir, id); err == nil {
+		t.Errorf("expected loading destroyed sandbox to fail, but it did not")
+	}
+}
+
+// Test the we can execute the application with different path formats.
+func TestExePath(t *testing.T) {
+	for _, test := range []struct {
+		path    string
+		success bool
+	}{
+		{path: "true", success: true},
+		{path: "bin/true", success: true},
+		{path: "/bin/true", success: true},
+		{path: "thisfiledoesntexit", success: false},
+		{path: "bin/thisfiledoesntexit", success: false},
+		{path: "/bin/thisfiledoesntexit", success: false},
+	} {
+		spec := newSpecWithArgs(test.path)
+		rootDir, bundleDir, conf, err := setupSandbox(spec)
+		if err != nil {
+			t.Fatalf("exec: %s, error setting up sandbox: %v", test.path, err)
+		}
+
+		ws, err := sandbox.Run(uniqueSandboxID(), spec, conf, bundleDir, "", "", nil)
+
+		os.RemoveAll(rootDir)
+		os.RemoveAll(bundleDir)
+
+		if test.success {
+			if err != nil {
+				t.Errorf("exec: %s, error running sandbox: %v", test.path, err)
+			}
+			if ws.ExitStatus() != 0 {
+				t.Errorf("exec: %s, got exit status %v want %v", test.path, ws.ExitStatus(), 0)
+			}
+		} else {
+			if err == nil {
+				t.Errorf("exec: %s, got: no error, want: error", test.path)
+			}
+		}
+	}
+}
+
+// Test the we can retrieve the application exit status from the sandbox.
+func TestAppExitStatus(t *testing.T) {
+	// First sandbox will succeed.
+	succSpec := newSpecWithArgs("true")
+
+	rootDir, bundleDir, conf, err := setupSandbox(succSpec)
+	if err != nil {
+		t.Fatalf("error setting up sandbox: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	ws, err := sandbox.Run(uniqueSandboxID(), succSpec, conf, bundleDir, "", "", nil)
+	if err != nil {
+		t.Fatalf("error running sandbox: %v", err)
+	}
+	if ws.ExitStatus() != 0 {
+		t.Errorf("got exit status %v want %v", ws.ExitStatus(), 0)
+	}
+
+	// Second sandbox exits with non-zero status.
+	wantStatus := 123
+	errSpec := newSpecWithArgs("bash", "-c", fmt.Sprintf("exit %d", wantStatus))
+
+	rootDir2, bundleDir2, conf, err := setupSandbox(errSpec)
+	if err != nil {
+		t.Fatalf("error setting up sandbox: %v", err)
+	}
+	defer os.RemoveAll(rootDir2)
+	defer os.RemoveAll(bundleDir2)
+
+	ws, err = sandbox.Run(uniqueSandboxID(), succSpec, conf, bundleDir2, "", "", nil)
+	if err != nil {
+		t.Fatalf("error running sandbox: %v", err)
+	}
+	if ws.ExitStatus() != wantStatus {
+		t.Errorf("got exit status %v want %v", ws.ExitStatus(), wantStatus)
+	}
+}
+
+// TestExec verifies that a sandbox can exec a new program.
+func TestExec(t *testing.T) {
+	const uid = 343
+	spec := newSpecWithArgs("sleep", "100")
+
+	rootDir, bundleDir, conf, err := setupSandbox(spec)
+	if err != nil {
+		t.Fatalf("error setting up sandbox: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	// Create and start the sandbox.
+	s, err := sandbox.Create(uniqueSandboxID(), spec, conf, bundleDir, "", "", nil)
+	if err != nil {
+		t.Fatalf("error creating sandbox: %v", err)
+	}
+	defer s.Destroy()
+	if err := s.Start(conf); err != nil {
+		t.Fatalf("error starting sandbox: %v", err)
+	}
+
+	// expectedPL lists the expected process state of the sandbox.
+	expectedPL := []*control.Process{
+		{
+			UID:  0,
+			PID:  1,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+		{
+			UID:  uid,
+			PID:  2,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+	}
+
+	// Verify that "sleep 100" is running.
+	if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+		t.Error(err)
+	}
+
+	execArgs := control.ExecArgs{
+		Filename:         "/bin/sleep",
+		Argv:             []string{"sleep", "5"},
+		Envv:             []string{"PATH=" + os.Getenv("PATH")},
+		WorkingDirectory: "/",
+		KUID:             uid,
+		Detach:           false,
+	}
+
+	// Verify that "sleep 100" and "sleep 5" are running after exec.
+	// First, start running exec (whick blocks).
+	status := make(chan error, 1)
+	go func() {
+		exitStatus, err := s.Execute(&execArgs)
+		if err != nil {
+			status <- err
+		} else if exitStatus != 0 {
+			status <- fmt.Errorf("failed with exit status: %v", exitStatus)
+		} else {
+			status <- nil
+		}
+	}()
+
+	if err := waitForProcessList(s, expectedPL); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure that exec finished without error.
+	select {
+	case <-time.After(10 * time.Second):
+		t.Fatalf("sandbox timed out waiting for exec to finish.")
+	case st := <-status:
+		if st != nil {
+			t.Errorf("sandbox failed to exec %v: %v", execArgs, err)
+		}
+	}
+}
+
+// TestCapabilities verifies that:
+// - Running exec as non-root UID and GID will result in an error (because the
+//   executable file can't be read).
+// - Running exec as non-root with CAP_DAC_OVERRIDE succeeds because it skips
+//   this check.
+func TestCapabilities(t *testing.T) {
+	const uid = 343
+	const gid = 2401
+	spec := newSpecWithArgs("sleep", "100")
+
+	// We generate files in the host temporary directory.
+	spec.Mounts = append(spec.Mounts, specs.Mount{
+		Destination: os.TempDir(),
+		Source:      os.TempDir(),
+		Type:        "bind",
+	})
+
+	rootDir, bundleDir, conf, err := setupSandbox(spec)
+	if err != nil {
+		t.Fatalf("error setting up sandbox: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	// Create and start the sandbox.
+	s, err := sandbox.Create(uniqueSandboxID(), spec, conf, bundleDir, "", "", nil)
+	if err != nil {
+		t.Fatalf("error creating sandbox: %v", err)
+	}
+	defer s.Destroy()
+	if err := s.Start(conf); err != nil {
+		t.Fatalf("error starting sandbox: %v", err)
+	}
+
+	// expectedPL lists the expected process state of the sandbox.
+	expectedPL := []*control.Process{
+		{
+			UID:  0,
+			PID:  1,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+		{
+			UID:  uid,
+			PID:  2,
+			PPID: 0,
+			C:    0,
+			Cmd:  "exe",
+		},
+	}
+	if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+		t.Fatalf("Failed to wait for sleep to start, err: %v", err)
+	}
+
+	// Create an executable that can't be run with the specified UID:GID.
+	// This shouldn't be callable within the sandbox until we add the
+	// CAP_DAC_OVERRIDE capability to skip the access check.
+	exePath := filepath.Join(rootDir, "exe")
+	if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil {
+		t.Fatalf("couldn't create executable: %v", err)
+	}
+	defer os.Remove(exePath)
+
+	// Need to traverse the intermediate directory.
+	os.Chmod(rootDir, 0755)
+
+	execArgs := control.ExecArgs{
+		Filename:         exePath,
+		Argv:             []string{exePath},
+		Envv:             []string{"PATH=" + os.Getenv("PATH")},
+		WorkingDirectory: "/",
+		KUID:             uid,
+		KGID:             gid,
+		Capabilities:     &auth.TaskCapabilities{},
+		Detach:           true,
+	}
+
+	// "exe" should fail because we don't have the necessary permissions.
+	if _, err := s.Execute(&execArgs); err == nil {
+		t.Fatalf("sandbox executed without error, but an error was expected")
+	}
+
+	// Now we run with the capability enabled and should succeed.
+	execArgs.Capabilities = &auth.TaskCapabilities{
+		EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
+	}
+	// First, start running exec.
+	if _, err := s.Execute(&execArgs); err != nil {
+		t.Fatalf("sandbox failed to exec %v: %v", execArgs, err)
+	}
+
+	if err := waitForProcessList(s, expectedPL); err != nil {
+		t.Error(err)
+	}
+}
+
+// Test that an tty FD is sent over the console socket if one is provided.
+func TestConsoleSocket(t *testing.T) {
+	spec := newSpecWithArgs("true")
+	rootDir, bundleDir, conf, err := setupSandbox(spec)
+	if err != nil {
+		t.Fatalf("error setting up sandbox: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	// Create a named socket and start listening.  We use a relative path
+	// to avoid overflowing the unix path length limit (108 chars).
+	socketPath := filepath.Join(bundleDir, "socket")
+	cwd, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("error getting cwd: %v", err)
+	}
+	socketRelPath, err := filepath.Rel(cwd, socketPath)
+	if err != nil {
+		t.Fatalf("error getting relative path for %q from cwd %q: %v", socketPath, cwd, err)
+	}
+	if len(socketRelPath) > len(socketPath) {
+		socketRelPath = socketPath
+	}
+	srv, err := unet.BindAndListen(socketRelPath, false)
+	if err != nil {
+		t.Fatalf("error binding and listening to socket %q: %v", socketPath, err)
+	}
+	defer os.Remove(socketPath)
+
+	// Create the sandbox and pass the socket name.
+	id := uniqueSandboxID()
+	s, err := sandbox.Create(id, spec, conf, bundleDir, socketRelPath, "", nil)
+	if err != nil {
+		t.Fatalf("error creating sandbox: %v", err)
+	}
+
+	// Open the othe end of the socket.
+	sock, err := srv.Accept()
+	if err != nil {
+		t.Fatalf("error accepting socket connection: %v", err)
+	}
+
+	// Allow 3 fds to be received.  We only expect 1.
+	r := sock.Reader(true /* blocking */)
+	r.EnableFDs(1)
+
+	// The socket is closed right after sending the FD, so EOF is
+	// an allowed error.
+	b := [][]byte{{}}
+	if _, err := r.ReadVec(b); err != nil && err != io.EOF {
+		t.Fatalf("error reading from socket connection: %v", err)
+	}
+
+	// We should have gotten a control message.
+	fds, err := r.ExtractFDs()
+	if err != nil {
+		t.Fatalf("error extracting fds from socket connection: %v", err)
+	}
+	if len(fds) != 1 {
+		t.Fatalf("got %d fds from socket, wanted 1", len(fds))
+	}
+
+	// Verify that the fd is a terminal.
+	if _, err := unix.IoctlGetTermios(fds[0], unix.TCGETS); err != nil {
+		t.Errorf("fd is not a terminal (ioctl TGGETS got %v)", err)
+	}
+
+	// Shut it down.
+	if err := s.Destroy(); err != nil {
+		t.Fatalf("error destroying sandbox: %v", err)
+	}
+
+	// Close socket.
+	if err := srv.Close(); err != nil {
+		t.Fatalf("error destroying sandbox: %v", err)
+	}
+}
+
+// procListsEqual is used to check whether 2 Process lists are equal for all
+// implemented fields.
+func procListsEqual(got, want []*control.Process) bool {
+	if len(got) != len(want) {
+		return false
+	}
+	for i := range got {
+		pd1 := got[i]
+		pd2 := want[i]
+		// Zero out unimplemented and timing dependant fields.
+		pd1.Time, pd2.Time = "", ""
+		pd1.STime, pd2.STime = "", ""
+		pd1.C, pd2.C = 0, 0
+		if *pd1 != *pd2 {
+			return false
+		}
+	}
+	return true
+}
+
+func procListToString(pl []*control.Process) string {
+	strs := make([]string, 0, len(pl))
+	for _, p := range pl {
+		strs = append(strs, fmt.Sprintf("%+v", p))
+	}
+	return fmt.Sprintf("[%s]", strings.Join(strs, ","))
+}
+
+// TestMain acts like runsc if it is called with the "boot" argument, otherwise
+// it just runs the tests.  This is required because creating a sandbox will
+// call "/proc/self/exe boot".  Normally /proc/self/exe is the runsc binary,
+// but for tests we have to fake it.
+func TestMain(m *testing.M) {
+	// exit writes coverage data before exiting.
+	exit := func(status int) {
+		os.Exit(status)
+	}
+
+	if !flag.Parsed() {
+		flag.Parse()
+	}
+
+	// If we are passed one of the commands then run it.
+	subcommands.Register(new(cmd.Boot), "boot")
+	subcommands.Register(new(cmd.Gofer), "gofer")
+	switch flag.Arg(0) {
+	case "boot", "gofer":
+		// Run the command in a goroutine so we can block the main
+		// thread waiting for shutdownSignal.
+		go func() {
+			conf := &boot.Config{
+				RootDir: "unused-root-dir",
+				Network: boot.NetworkNone,
+			}
+			var ws syscall.WaitStatus
+			subcmdCode := subcommands.Execute(context.Background(), conf, &ws)
+			if subcmdCode != subcommands.ExitSuccess {
+				panic(fmt.Sprintf("command failed to execute, err: %v", subcmdCode))
+			}
+			// Sandbox exited normally. Shut down this process.
+			os.Exit(ws.ExitStatus())
+		}()
+
+		// Shutdown cleanly when the shutdownSignal is received.  This
+		// allows us to write coverage data before exiting.
+		sigc := make(chan os.Signal, 1)
+		signal.Notify(sigc, shutdownSignal)
+		<-sigc
+		exit(0)
+	default:
+		// Otherwise run the tests.
+		exit(m.Run())
+	}
+}
diff --git a/runsc/sandbox/status.go b/runsc/sandbox/status.go
new file mode 100644
index 000000000..6fc936aba
--- /dev/null
+++ b/runsc/sandbox/status.go
@@ -0,0 +1,56 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+// Status enumerates sandbox statuses.  The statuses and their semantics are
+// part of the runtime CLI spec.
+//
+// TODO: Get precise about the transitions between statuses.
+type Status int
+
+const (
+	// Creating indicates "the container is being created".
+	Creating Status = iota
+
+	// Created indicates "the runtime has finished the create operation and
+	// the container process has neither exited nor executed the
+	// user-specified program".
+	Created
+
+	// Running indicates "the container process has executed the
+	// user-specified program but has not exited".
+	Running
+
+	// Stopped indicates "the container process has exited".
+	Stopped
+)
+
+// String converts a Status to a string.  These strings are part of the runtime
+// CLI spec and should not be changed.
+func (s Status) String() string {
+	switch s {
+	case Creating:
+		return "creating"
+	case Created:
+		return "created"
+	case Running:
+		return "running"
+	case Stopped:
+		return "stopped"
+	default:
+		return "unknown"
+	}
+
+}
author	Googler <noreply@google.com>	2018-04-27 10:37:02 -0700
committer	Adin Scannell <ascannell@google.com>	2018-04-28 01:44:26 -0400
commit	d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree	54f95eef73aee6bacbfc736fffc631be2605ed53 /runsc/sandbox
parent	f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)