3 files changed, 1423 insertions, 0 deletions
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
new file mode 100644
index 000000000..0460d5f1a
--- /dev/null
+++ b/runsc/sandbox/network.go
@@ -0,0 +1,375 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+	"fmt"
+	"net"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"syscall"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/vishvananda/netlink"
+	"golang.org/x/sys/unix"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/urpc"
+	"gvisor.googlesource.com/gvisor/runsc/boot"
+	"gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+const (
+	// Annotations used to indicate whether the container corresponds to a
+	// pod or a container within a pod.
+	crioContainerTypeAnnotation       = "io.kubernetes.cri-o.ContainerType"
+	containerdContainerTypeAnnotation = "io.kubernetes.cri.container-type"
+)
+
+// setupNetwork configures the network stack to mimic the local network
+// configuration. Docker uses network namespaces with vnets to configure the
+// network for the container. The untrusted app expects to see the same network
+// inside the sandbox. Routing and port mapping is handled directly by docker
+// with most of network information not even available to the runtime.
+//
+// Netstack inside the sandbox speaks directly to the device using a raw socket.
+// All IP addresses assigned to the NIC, are removed and passed on to netstack's
+// device.
+//
+// If 'conf.Network' is NoNetwork, skips local configuration and creates a
+// loopback interface only.
+//
+// Run the following container to test it:
+//  docker run -di --runtime=runsc -p 8080:80 -v $PWD:/usr/local/apache2/htdocs/ httpd:2.4
+func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Config) error {
+	log.Infof("Setting up network")
+
+	switch conf.Network {
+	case boot.NetworkNone:
+		log.Infof("Network is disabled, create loopback interface only")
+		if err := createDefaultLoopbackInterface(conn); err != nil {
+			return fmt.Errorf("creating default loopback interface: %v", err)
+		}
+	case boot.NetworkSandbox:
+		// Build the path to the net namespace of the sandbox process.
+		// This is what we will copy.
+		nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
+		if err := createInterfacesAndRoutesFromNS(conn, nsPath, conf.GSO); err != nil {
+			return fmt.Errorf("creating interfaces from net namespace %q: %v", nsPath, err)
+		}
+	case boot.NetworkHost:
+		// Nothing to do here.
+	default:
+		return fmt.Errorf("invalid network type: %d", conf.Network)
+	}
+	return nil
+}
+
+func createDefaultLoopbackInterface(conn *urpc.Client) error {
+	link := boot.LoopbackLink{
+		Name: "lo",
+		Addresses: []net.IP{
+			net.IP("\x7f\x00\x00\x01"),
+			net.IPv6loopback,
+		},
+		Routes: []boot.Route{
+			{
+				Destination: net.IP("\x7f\x00\x00\x00"),
+				Mask:        net.IPMask("\xff\x00\x00\x00"),
+			},
+			{
+				Destination: net.IPv6loopback,
+				Mask:        net.IPMask(strings.Repeat("\xff", 16)),
+			},
+		},
+	}
+	if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{
+		LoopbackLinks: []boot.LoopbackLink{link},
+	}, nil); err != nil {
+		return fmt.Errorf("creating loopback link and routes: %v", err)
+	}
+	return nil
+}
+
+func joinNetNS(nsPath string) (func(), error) {
+	runtime.LockOSThread()
+	restoreNS, err := specutils.ApplyNS(specs.LinuxNamespace{
+		Type: specs.NetworkNamespace,
+		Path: nsPath,
+	})
+	if err != nil {
+		runtime.UnlockOSThread()
+		return nil, fmt.Errorf("joining net namespace %q: %v", nsPath, err)
+	}
+	return func() {
+		restoreNS()
+		runtime.UnlockOSThread()
+	}, nil
+}
+
+// isRootNS determines whether we are running in the root net namespace.
+// /proc/sys/net/core/rmem_default only exists in root network namespace.
+func isRootNS() (bool, error) {
+	err := syscall.Access("/proc/sys/net/core/rmem_default", syscall.F_OK)
+	switch err {
+	case nil:
+		return true, nil
+	case syscall.ENOENT:
+		return false, nil
+	default:
+		return false, fmt.Errorf("failed to access /proc/sys/net/core/rmem_default: %v", err)
+	}
+}
+
+// createInterfacesAndRoutesFromNS scrapes the interface and routes from the
+// net namespace with the given path, creates them in the sandbox, and removes
+// them from the host.
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string, enableGSO bool) error {
+	// Join the network namespace that we will be copying.
+	restore, err := joinNetNS(nsPath)
+	if err != nil {
+		return err
+	}
+	defer restore()
+
+	// Get all interfaces in the namespace.
+	ifaces, err := net.Interfaces()
+	if err != nil {
+		return fmt.Errorf("querying interfaces: %v", err)
+	}
+
+	isRoot, err := isRootNS()
+	if err != nil {
+		return err
+	}
+	if isRoot {
+
+		return fmt.Errorf("cannot run with network enabled in root network namespace")
+	}
+
+	// Collect addresses and routes from the interfaces.
+	var args boot.CreateLinksAndRoutesArgs
+	for _, iface := range ifaces {
+		if iface.Flags&net.FlagUp == 0 {
+			log.Infof("Skipping down interface: %+v", iface)
+			continue
+		}
+
+		allAddrs, err := iface.Addrs()
+		if err != nil {
+			return fmt.Errorf("fetching interface addresses for %q: %v", iface.Name, err)
+		}
+
+		// We build our own loopback devices.
+		if iface.Flags&net.FlagLoopback != 0 {
+			links, err := loopbackLinks(iface, allAddrs)
+			if err != nil {
+				return fmt.Errorf("getting loopback routes and links for iface %q: %v", iface.Name, err)
+			}
+			args.LoopbackLinks = append(args.LoopbackLinks, links...)
+			continue
+		}
+
+		// Keep only IPv4 addresses.
+		var ip4addrs []*net.IPNet
+		for _, ifaddr := range allAddrs {
+			ipNet, ok := ifaddr.(*net.IPNet)
+			if !ok {
+				return fmt.Errorf("address is not IPNet: %+v", ifaddr)
+			}
+			if ipNet.IP.To4() == nil {
+				log.Warningf("IPv6 is not supported, skipping: %v", ipNet)
+				continue
+			}
+			ip4addrs = append(ip4addrs, ipNet)
+		}
+		if len(ip4addrs) == 0 {
+			log.Warningf("No IPv4 address found for interface %q, skipping", iface.Name)
+			continue
+		}
+
+		// Create the socket.
+		const protocol = 0x0300 // htons(ETH_P_ALL)
+		fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
+		if err != nil {
+			return fmt.Errorf("unable to create raw socket: %v", err)
+		}
+		deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
+
+		// Bind to the appropriate device.
+		ll := syscall.SockaddrLinklayer{
+			Protocol: protocol,
+			Ifindex:  iface.Index,
+			Hatype:   0, // No ARP type.
+			Pkttype:  syscall.PACKET_OTHERHOST,
+		}
+		if err := syscall.Bind(fd, &ll); err != nil {
+			return fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
+		}
+
+		// Scrape the routes before removing the address, since that
+		// will remove the routes as well.
+		routes, def, err := routesForIface(iface)
+		if err != nil {
+			return fmt.Errorf("getting routes for interface %q: %v", iface.Name, err)
+		}
+		if def != nil {
+			if !args.DefaultGateway.Route.Empty() {
+				return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, def, args.DefaultGateway)
+			}
+			args.DefaultGateway.Route = *def
+			args.DefaultGateway.Name = iface.Name
+		}
+
+		link := boot.FDBasedLink{
+			Name:   iface.Name,
+			MTU:    iface.MTU,
+			Routes: routes,
+		}
+
+		// Get the link for the interface.
+		ifaceLink, err := netlink.LinkByName(iface.Name)
+		if err != nil {
+			return fmt.Errorf("getting link for interface %q: %v", iface.Name, err)
+		}
+		link.LinkAddress = []byte(ifaceLink.Attrs().HardwareAddr)
+
+		if enableGSO {
+			gso, err := isGSOEnabled(fd, iface.Name)
+			if err != nil {
+				return fmt.Errorf("getting GSO for interface %q: %v", iface.Name, err)
+			}
+			if gso {
+				if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_VNET_HDR, 1); err != nil {
+					return fmt.Errorf("unable to enable the PACKET_VNET_HDR option: %v", err)
+				}
+				link.GSOMaxSize = ifaceLink.Attrs().GSOMaxSize
+			} else {
+				log.Infof("GSO not available in host.")
+			}
+		}
+
+		// Use SO_RCVBUFFORCE because on linux the receive buffer for an
+		// AF_PACKET socket is capped by "net.core.rmem_max". rmem_max
+		// defaults to a unusually low value of 208KB. This is too low
+		// for gVisor to be able to receive packets at high throughputs
+		// without incurring packet drops.
+		const rcvBufSize = 4 << 20 // 4MB.
+
+		if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUFFORCE, rcvBufSize); err != nil {
+			return fmt.Errorf("failed to increase socket rcv buffer to %d: %v", rcvBufSize, err)
+		}
+
+		// Collect the addresses for the interface, enable forwarding,
+		// and remove them from the host.
+		for _, addr := range ip4addrs {
+			link.Addresses = append(link.Addresses, addr.IP)
+
+			// Steal IP address from NIC.
+			if err := removeAddress(ifaceLink, addr.String()); err != nil {
+				return fmt.Errorf("removing address %v from device %q: %v", iface.Name, addr, err)
+			}
+		}
+
+		args.FilePayload.Files = append(args.FilePayload.Files, deviceFile)
+		args.FDBasedLinks = append(args.FDBasedLinks, link)
+	}
+
+	log.Debugf("Setting up network, config: %+v", args)
+	if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &args, nil); err != nil {
+		return fmt.Errorf("creating links and routes: %v", err)
+	}
+	return nil
+}
+
+// loopbackLinks collects the links for a loopback interface.
+func loopbackLinks(iface net.Interface, addrs []net.Addr) ([]boot.LoopbackLink, error) {
+	var links []boot.LoopbackLink
+	for _, addr := range addrs {
+		ipNet, ok := addr.(*net.IPNet)
+		if !ok {
+			return nil, fmt.Errorf("address is not IPNet: %+v", addr)
+		}
+		links = append(links, boot.LoopbackLink{
+			Name:      iface.Name,
+			Addresses: []net.IP{ipNet.IP},
+			Routes: []boot.Route{{
+				Destination: ipNet.IP.Mask(ipNet.Mask),
+				Mask:        ipNet.Mask,
+			}},
+		})
+	}
+	return links, nil
+}
+
+// routesForIface iterates over all routes for the given interface and converts
+// them to boot.Routes.
+func routesForIface(iface net.Interface) ([]boot.Route, *boot.Route, error) {
+	link, err := netlink.LinkByIndex(iface.Index)
+	if err != nil {
+		return nil, nil, err
+	}
+	rs, err := netlink.RouteList(link, netlink.FAMILY_ALL)
+	if err != nil {
+		return nil, nil, fmt.Errorf("getting routes from %q: %v", iface.Name, err)
+	}
+
+	var def *boot.Route
+	var routes []boot.Route
+	for _, r := range rs {
+		// Is it a default route?
+		if r.Dst == nil {
+			if r.Gw == nil {
+				return nil, nil, fmt.Errorf("default route with no gateway %q: %+v", iface.Name, r)
+			}
+			if r.Gw.To4() == nil {
+				log.Warningf("IPv6 is not supported, skipping default route: %v", r)
+				continue
+			}
+			if def != nil {
+				return nil, nil, fmt.Errorf("more than one default route found %q, def: %+v, route: %+v", iface.Name, def, r)
+			}
+			// Create a catch all route to the gateway.
+			def = &boot.Route{
+				Destination: net.IPv4zero,
+				Mask:        net.IPMask(net.IPv4zero),
+				Gateway:     r.Gw,
+			}
+			continue
+		}
+		if r.Dst.IP.To4() == nil {
+			log.Warningf("IPv6 is not supported, skipping route: %v", r)
+			continue
+		}
+		routes = append(routes, boot.Route{
+			Destination: r.Dst.IP.Mask(r.Dst.Mask),
+			Mask:        r.Dst.Mask,
+			Gateway:     r.Gw,
+		})
+	}
+	return routes, def, nil
+}
+
+// removeAddress removes IP address from network device. It's equivalent to:
+//   ip addr del <ipAndMask> dev <name>
+func removeAddress(source netlink.Link, ipAndMask string) error {
+	addr, err := netlink.ParseAddr(ipAndMask)
+	if err != nil {
+		return err
+	}
+	return netlink.AddrDel(source, addr)
+}
diff --git a/runsc/sandbox/network_unsafe.go b/runsc/sandbox/network_unsafe.go
new file mode 100644
index 000000000..2a2a0fb7e
--- /dev/null
+++ b/runsc/sandbox/network_unsafe.go
@@ -0,0 +1,56 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+	"syscall"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+type ethtoolValue struct {
+	cmd uint32
+	val uint32
+}
+
+type ifreq struct {
+	ifrName [unix.IFNAMSIZ]byte
+	ifrData *ethtoolValue
+}
+
+const (
+	_ETHTOOL_GGSO = 0x00000023
+)
+
+func isGSOEnabled(fd int, intf string) (bool, error) {
+	val := ethtoolValue{
+		cmd: _ETHTOOL_GGSO,
+	}
+
+	var name [unix.IFNAMSIZ]byte
+	copy(name[:], []byte(intf))
+
+	ifr := ifreq{
+		ifrName: name,
+		ifrData: &val,
+	}
+
+	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), unix.SIOCETHTOOL, uintptr(unsafe.Pointer(&ifr))); err != 0 {
+		return false, err
+	}
+
+	return val.val != 0, nil
+}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
new file mode 100644
index 000000000..47a66afb2
--- /dev/null
+++ b/runsc/sandbox/sandbox.go
@@ -0,0 +1,992 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package sandbox creates and manipulates sandboxes.
+package sandbox
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"strconv"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/cenkalti/backoff"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/syndtr/gocapability/capability"
+	"gvisor.googlesource.com/gvisor/pkg/control/client"
+	"gvisor.googlesource.com/gvisor/pkg/control/server"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/control"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform/kvm"
+	"gvisor.googlesource.com/gvisor/pkg/urpc"
+	"gvisor.googlesource.com/gvisor/runsc/boot"
+	"gvisor.googlesource.com/gvisor/runsc/cgroup"
+	"gvisor.googlesource.com/gvisor/runsc/console"
+	"gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// Sandbox wraps a sandbox process.
+//
+// It is used to start/stop sandbox process (and associated processes like
+// gofers), as well as for running and manipulating containers inside a running
+// sandbox.
+//
+// Note: Sandbox must be immutable because a copy of it is saved for each
+// container and changes would not be synchronized to all of them.
+type Sandbox struct {
+	// ID is the id of the sandbox (immutable). By convention, this is the same
+	// ID as the first container run in the sandbox.
+	ID string `json:"id"`
+
+	// Pid is the pid of the running sandbox (immutable). May be 0 is the sandbox
+	// is not running.
+	Pid int `json:"pid"`
+
+	// Cgroup has the cgroup configuration for the sandbox.
+	Cgroup *cgroup.Cgroup `json:"cgroup"`
+
+	// child is set if a sandbox process is a child of the current process.
+	//
+	// This field isn't saved to json, because only a creator of sandbox
+	// will have it as a child process.
+	child bool
+
+	// status is an exit status of a sandbox process.
+	status syscall.WaitStatus
+
+	// statusMu protects status.
+	statusMu sync.Mutex
+}
+
+// New creates the sandbox process. The caller must call Destroy() on the
+// sandbox.
+func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, specFile *os.File, cg *cgroup.Cgroup) (*Sandbox, error) {
+	s := &Sandbox{ID: id, Cgroup: cg}
+	// The Cleanup object cleans up partially created sandboxes when an error
+	// occurs. Any errors occurring during cleanup itself are ignored.
+	c := specutils.MakeCleanup(func() {
+		err := s.destroy()
+		log.Warningf("error destroying sandbox: %v", err)
+	})
+	defer c.Clean()
+
+	// Create pipe to synchronize when sandbox process has been booted.
+	clientSyncFile, sandboxSyncFile, err := os.Pipe()
+	if err != nil {
+		return nil, fmt.Errorf("creating pipe for sandbox %q: %v", s.ID, err)
+	}
+	defer clientSyncFile.Close()
+
+	// Create the sandbox process.
+	err = s.createSandboxProcess(spec, conf, bundleDir, consoleSocket, userLog, ioFiles, specFile, sandboxSyncFile)
+	// sandboxSyncFile has to be closed to be able to detect when the sandbox
+	// process exits unexpectedly.
+	sandboxSyncFile.Close()
+	if err != nil {
+		return nil, err
+	}
+
+	// Wait until the sandbox has booted.
+	b := make([]byte, 1)
+	if l, err := clientSyncFile.Read(b); err != nil || l != 1 {
+		return nil, fmt.Errorf("waiting for sandbox to start: %v", err)
+	}
+
+	c.Release()
+	return s, nil
+}
+
+// CreateContainer creates a non-root container inside the sandbox.
+func (s *Sandbox) CreateContainer(cid string) error {
+	log.Debugf("Create non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
+	sandboxConn, err := s.sandboxConnect()
+	if err != nil {
+		return fmt.Errorf("couldn't connect to sandbox: %v", err)
+	}
+	defer sandboxConn.Close()
+
+	if err := sandboxConn.Call(boot.ContainerCreate, &cid, nil); err != nil {
+		return fmt.Errorf("creating non-root container %q: %v", cid, err)
+	}
+	return nil
+}
+
+// StartRoot starts running the root container process inside the sandbox.
+func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error {
+	log.Debugf("Start root sandbox %q, PID: %d", s.ID, s.Pid)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	// Configure the network.
+	if err := setupNetwork(conn, s.Pid, spec, conf); err != nil {
+		return fmt.Errorf("setting up network: %v", err)
+	}
+
+	// Send a message to the sandbox control server to start the root
+	// container.
+	if err := conn.Call(boot.RootContainerStart, &s.ID, nil); err != nil {
+		return fmt.Errorf("starting root container: %v", err)
+	}
+
+	return nil
+}
+
+// StartContainer starts running a non-root container inside the sandbox.
+func (s *Sandbox) StartContainer(spec *specs.Spec, conf *boot.Config, cid string, goferFiles []*os.File) error {
+	for _, f := range goferFiles {
+		defer f.Close()
+	}
+
+	log.Debugf("Start non-root container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid)
+	sandboxConn, err := s.sandboxConnect()
+	if err != nil {
+		return fmt.Errorf("couldn't connect to sandbox: %v", err)
+	}
+	defer sandboxConn.Close()
+
+	// The payload must container stdin/stdout/stderr followed by gofer
+	// files.
+	files := append([]*os.File{os.Stdin, os.Stdout, os.Stderr}, goferFiles...)
+	// Start running the container.
+	args := boot.StartArgs{
+		Spec:        spec,
+		Conf:        conf,
+		CID:         cid,
+		FilePayload: urpc.FilePayload{Files: files},
+	}
+	if err := sandboxConn.Call(boot.ContainerStart, &args, nil); err != nil {
+		return fmt.Errorf("starting non-root container %v: %v", spec.Process.Args, err)
+	}
+	return nil
+}
+
+// Restore sends the restore call for a container in the sandbox.
+func (s *Sandbox) Restore(cid string, spec *specs.Spec, conf *boot.Config, filename string) error {
+	log.Debugf("Restore sandbox %q", s.ID)
+
+	rf, err := os.Open(filename)
+	if err != nil {
+		return fmt.Errorf("opening restore file %q failed: %v", filename, err)
+	}
+	defer rf.Close()
+
+	opt := boot.RestoreOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{rf},
+		},
+		SandboxID: s.ID,
+	}
+
+	// If the platform needs a device FD we must pass it in.
+	if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil {
+		return err
+	} else if deviceFile != nil {
+		defer deviceFile.Close()
+		opt.FilePayload.Files = append(opt.FilePayload.Files, deviceFile)
+	}
+
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	// Configure the network.
+	if err := setupNetwork(conn, s.Pid, spec, conf); err != nil {
+		return fmt.Errorf("setting up network: %v", err)
+	}
+
+	// Restore the container and start the root container.
+	if err := conn.Call(boot.ContainerRestore, &opt, nil); err != nil {
+		return fmt.Errorf("restoring container %q: %v", cid, err)
+	}
+
+	return nil
+}
+
+// Processes retrieves the list of processes and associated metadata for a
+// given container in this sandbox.
+func (s *Sandbox) Processes(cid string) ([]*control.Process, error) {
+	log.Debugf("Getting processes for container %q in sandbox %q", cid, s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return nil, err
+	}
+	defer conn.Close()
+
+	var pl []*control.Process
+	if err := conn.Call(boot.ContainerProcesses, &cid, &pl); err != nil {
+		return nil, fmt.Errorf("retrieving process data from sandbox: %v", err)
+	}
+	return pl, nil
+}
+
+// Execute runs the specified command in the container. It returns the PID of
+// the newly created process.
+func (s *Sandbox) Execute(args *control.ExecArgs) (int32, error) {
+	log.Debugf("Executing new process in container %q in sandbox %q", args.ContainerID, s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return 0, s.connError(err)
+	}
+	defer conn.Close()
+
+	// Send a message to the sandbox control server to start the container.
+	var pid int32
+	if err := conn.Call(boot.ContainerExecuteAsync, args, &pid); err != nil {
+		return 0, fmt.Errorf("executing command %q in sandbox: %v", args, err)
+	}
+	return pid, nil
+}
+
+// Event retrieves stats about the sandbox such as memory and CPU utilization.
+func (s *Sandbox) Event(cid string) (*boot.Event, error) {
+	log.Debugf("Getting events for container %q in sandbox %q", cid, s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return nil, err
+	}
+	defer conn.Close()
+
+	var e boot.Event
+	// TODO(b/129292330): Pass in the container id (cid) here. The sandbox
+	// should return events only for that container.
+	if err := conn.Call(boot.ContainerEvent, nil, &e); err != nil {
+		return nil, fmt.Errorf("retrieving event data from sandbox: %v", err)
+	}
+	e.ID = cid
+	return &e, nil
+}
+
+func (s *Sandbox) sandboxConnect() (*urpc.Client, error) {
+	log.Debugf("Connecting to sandbox %q", s.ID)
+	conn, err := client.ConnectTo(boot.ControlSocketAddr(s.ID))
+	if err != nil {
+		return nil, s.connError(err)
+	}
+	return conn, nil
+}
+
+func (s *Sandbox) connError(err error) error {
+	return fmt.Errorf("connecting to control server at PID %d: %v", s.Pid, err)
+}
+
+// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
+// command, passing in the bundle dir.
+func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, userLog string, ioFiles []*os.File, mountsFile, startSyncFile *os.File) error {
+	// nextFD is used to get unused FDs that we can pass to the sandbox.  It
+	// starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
+	nextFD := 3
+
+	binPath := specutils.ExePath
+	cmd := exec.Command(binPath, conf.ToFlags()...)
+	cmd.SysProcAttr = &syscall.SysProcAttr{}
+
+	// Open the log files to pass to the sandbox as FDs.
+	//
+	// These flags must come BEFORE the "boot" command in cmd.Args.
+	if conf.LogFilename != "" {
+		logFile, err := os.OpenFile(conf.LogFilename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+		if err != nil {
+			return fmt.Errorf("opening log file %q: %v", conf.LogFilename, err)
+		}
+		defer logFile.Close()
+		cmd.ExtraFiles = append(cmd.ExtraFiles, logFile)
+		cmd.Args = append(cmd.Args, "--log-fd="+strconv.Itoa(nextFD))
+		nextFD++
+	}
+	if conf.DebugLog != "" {
+		debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "boot")
+		if err != nil {
+			return fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err)
+		}
+		defer debugLogFile.Close()
+		cmd.ExtraFiles = append(cmd.ExtraFiles, debugLogFile)
+		cmd.Args = append(cmd.Args, "--debug-log-fd="+strconv.Itoa(nextFD))
+		nextFD++
+	}
+
+	// Add the "boot" command to the args.
+	//
+	// All flags after this must be for the boot command
+	cmd.Args = append(cmd.Args, "boot", "--bundle="+bundleDir)
+
+	// Create a socket for the control server and donate it to the sandbox.
+	addr := boot.ControlSocketAddr(s.ID)
+	sockFD, err := server.CreateSocket(addr)
+	log.Infof("Creating sandbox process with addr: %s", addr[1:]) // skip "\00".
+	if err != nil {
+		return fmt.Errorf("creating control server socket for sandbox %q: %v", s.ID, err)
+	}
+	controllerFile := os.NewFile(uintptr(sockFD), "control_server_socket")
+	defer controllerFile.Close()
+	cmd.ExtraFiles = append(cmd.ExtraFiles, controllerFile)
+	cmd.Args = append(cmd.Args, "--controller-fd="+strconv.Itoa(nextFD))
+	nextFD++
+
+	defer mountsFile.Close()
+	cmd.ExtraFiles = append(cmd.ExtraFiles, mountsFile)
+	cmd.Args = append(cmd.Args, "--mounts-fd="+strconv.Itoa(nextFD))
+	nextFD++
+
+	specFile, err := specutils.OpenSpec(bundleDir)
+	if err != nil {
+		return err
+	}
+	defer specFile.Close()
+	cmd.ExtraFiles = append(cmd.ExtraFiles, specFile)
+	cmd.Args = append(cmd.Args, "--spec-fd="+strconv.Itoa(nextFD))
+	nextFD++
+
+	cmd.ExtraFiles = append(cmd.ExtraFiles, startSyncFile)
+	cmd.Args = append(cmd.Args, "--start-sync-fd="+strconv.Itoa(nextFD))
+	nextFD++
+
+	// If there is a gofer, sends all socket ends to the sandbox.
+	for _, f := range ioFiles {
+		defer f.Close()
+		cmd.ExtraFiles = append(cmd.ExtraFiles, f)
+		cmd.Args = append(cmd.Args, "--io-fds="+strconv.Itoa(nextFD))
+		nextFD++
+	}
+
+	// If the platform needs a device FD we must pass it in.
+	if deviceFile, err := deviceFileForPlatform(conf.Platform); err != nil {
+		return err
+	} else if deviceFile != nil {
+		defer deviceFile.Close()
+		cmd.ExtraFiles = append(cmd.ExtraFiles, deviceFile)
+		cmd.Args = append(cmd.Args, "--device-fd="+strconv.Itoa(nextFD))
+		nextFD++
+	}
+
+	// The current process' stdio must be passed to the application via the
+	// --stdio-fds flag. The stdio of the sandbox process itself must not
+	// be connected to the same FDs, otherwise we risk leaking sandbox
+	// errors to the application, so we set the sandbox stdio to nil,
+	// causing them to read/write from the null device.
+	cmd.Stdin = nil
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+
+	// If the console control socket file is provided, then create a new
+	// pty master/slave pair and set the TTY on the sandbox process.
+	if consoleSocket != "" {
+		cmd.Args = append(cmd.Args, "--console=true")
+
+		// console.NewWithSocket will send the master on the given
+		// socket, and return the slave.
+		tty, err := console.NewWithSocket(consoleSocket)
+		if err != nil {
+			return fmt.Errorf("setting up console with socket %q: %v", consoleSocket, err)
+		}
+		defer tty.Close()
+
+		// Set the TTY as a controlling TTY on the sandbox process.
+		// Note that the Ctty field must be the FD of the TTY in the
+		// *new* process, not this process. Since we are about to
+		// assign the TTY to nextFD, we can use that value here.
+		// stdin, we can use FD 0 here.
+		cmd.SysProcAttr.Setctty = true
+		cmd.SysProcAttr.Ctty = nextFD
+
+		// Pass the tty as all stdio fds to sandbox.
+		for i := 0; i < 3; i++ {
+			cmd.ExtraFiles = append(cmd.ExtraFiles, tty)
+			cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
+			nextFD++
+		}
+
+		if conf.Debug {
+			// If debugging, send the boot process stdio to the
+			// TTY, so that it is easier to find.
+			cmd.Stdin = tty
+			cmd.Stdout = tty
+			cmd.Stderr = tty
+		}
+	} else {
+		// If not using a console, pass our current stdio as the
+		// container stdio via flags.
+		for _, f := range []*os.File{os.Stdin, os.Stdout, os.Stderr} {
+			cmd.ExtraFiles = append(cmd.ExtraFiles, f)
+			cmd.Args = append(cmd.Args, "--stdio-fds="+strconv.Itoa(nextFD))
+			nextFD++
+		}
+
+		if conf.Debug {
+			// If debugging, send the boot process stdio to the
+			// this process' stdio, so that is is easier to find.
+			cmd.Stdin = os.Stdin
+			cmd.Stdout = os.Stdout
+			cmd.Stderr = os.Stderr
+		}
+	}
+
+	// Detach from this session, otherwise cmd will get SIGHUP and SIGCONT
+	// when re-parented.
+	cmd.SysProcAttr.Setsid = true
+
+	// nss is the set of namespaces to join or create before starting the sandbox
+	// process. Mount, IPC and UTS namespaces from the host are not used as they
+	// are virtualized inside the sandbox. Be paranoid and run inside an empty
+	// namespace for these. Don't unshare cgroup because sandbox is added to a
+	// cgroup in the caller's namespace.
+	log.Infof("Sandbox will be started in new mount, IPC and UTS namespaces")
+	nss := []specs.LinuxNamespace{
+		{Type: specs.IPCNamespace},
+		{Type: specs.MountNamespace},
+		{Type: specs.UTSNamespace},
+	}
+
+	if conf.Platform == boot.PlatformPtrace {
+		// TODO(b/75837838): Also set a new PID namespace so that we limit
+		// access to other host processes.
+		log.Infof("Sandbox will be started in the current PID namespace")
+	} else {
+		log.Infof("Sandbox will be started in a new PID namespace")
+		nss = append(nss, specs.LinuxNamespace{Type: specs.PIDNamespace})
+		cmd.Args = append(cmd.Args, "--pidns=true")
+	}
+
+	// Joins the network namespace if network is enabled. the sandbox talks
+	// directly to the host network, which may have been configured in the
+	// namespace.
+	if ns, ok := specutils.GetNS(specs.NetworkNamespace, spec); ok && conf.Network != boot.NetworkNone {
+		log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
+		nss = append(nss, ns)
+	} else if conf.Network == boot.NetworkHost {
+		log.Infof("Sandbox will be started in the host network namespace")
+	} else {
+		log.Infof("Sandbox will be started in new network namespace")
+		nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
+	}
+
+	// User namespace depends on the network type. Host network requires to run
+	// inside the user namespace specified in the spec or the current namespace
+	// if none is configured.
+	if conf.Network == boot.NetworkHost {
+		if userns, ok := specutils.GetNS(specs.UserNamespace, spec); ok {
+			log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
+			nss = append(nss, userns)
+			specutils.SetUIDGIDMappings(cmd, spec)
+		} else {
+			log.Infof("Sandbox will be started in the current user namespace")
+		}
+		// When running in the caller's defined user namespace, apply the same
+		// capabilities to the sandbox process to ensure it abides to the same
+		// rules.
+		cmd.Args = append(cmd.Args, "--apply-caps=true")
+
+		// If we have CAP_SYS_ADMIN, we can create an empty chroot and
+		// bind-mount the executable inside it.
+		if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+			log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
+
+		} else if specutils.HasCapabilities(capability.CAP_SYS_ADMIN) {
+			log.Infof("Sandbox will be started in minimal chroot")
+			cmd.Args = append(cmd.Args, "--setup-root")
+		} else {
+			return fmt.Errorf("can't run sandbox process in minimal chroot since we don't have CAP_SYS_ADMIN")
+		}
+	} else {
+		// If we have CAP_SETUID and CAP_SETGID, then we can also run
+		// as user nobody.
+		if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+			log.Warningf("Running sandbox in test mode as current user (uid=%d gid=%d). This is only safe in tests!", os.Getuid(), os.Getgid())
+			log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
+		} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
+			log.Infof("Sandbox will be started in new user namespace")
+			nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
+
+			// Map nobody in the new namespace to nobody in the parent namespace.
+			//
+			// A sandbox process will construct an empty
+			// root for itself, so it has to have the CAP_SYS_ADMIN
+			// capability.
+			//
+			// FIXME(b/122554829): The current implementations of
+			// os/exec doesn't allow to set ambient capabilities if
+			// a process is started in a new user namespace. As a
+			// workaround, we start the sandbox process with the 0
+			// UID and then it constructs a chroot and sets UID to
+			// nobody.  https://github.com/golang/go/issues/2315
+			const nobody = 65534
+			cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
+				{
+					ContainerID: int(0),
+					HostID:      int(nobody - 1),
+					Size:        int(1),
+				},
+				{
+					ContainerID: int(nobody),
+					HostID:      int(nobody),
+					Size:        int(1),
+				},
+			}
+			cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
+				{
+					ContainerID: int(nobody),
+					HostID:      int(nobody),
+					Size:        int(1),
+				},
+			}
+
+			// Set credentials to run as user and group nobody.
+			cmd.SysProcAttr.Credential = &syscall.Credential{
+				Uid: 0,
+				Gid: nobody,
+			}
+			cmd.Args = append(cmd.Args, "--setup-root")
+		} else {
+			return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
+		}
+	}
+
+	cmd.Args[0] = "runsc-sandbox"
+
+	if s.Cgroup != nil {
+		cpuNum, err := s.Cgroup.NumCPU()
+		if err != nil {
+			return fmt.Errorf("getting cpu count from cgroups: %v", err)
+		}
+		cmd.Args = append(cmd.Args, "--cpu-num", strconv.Itoa(cpuNum))
+
+		mem, err := s.Cgroup.MemoryLimit()
+		if err != nil {
+			return fmt.Errorf("getting memory limit from cgroups: %v", err)
+		}
+		// When memory limit is unset, a "large" number is returned. In that case,
+		// just stick with the default.
+		if mem < 0x7ffffffffffff000 {
+			cmd.Args = append(cmd.Args, "--total-memory", strconv.FormatUint(mem, 10))
+		}
+	}
+
+	if userLog != "" {
+		f, err := os.OpenFile(userLog, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664)
+		if err != nil {
+			return fmt.Errorf("opening compat log file: %v", err)
+		}
+		defer f.Close()
+
+		cmd.ExtraFiles = append(cmd.ExtraFiles, f)
+		cmd.Args = append(cmd.Args, "--user-log-fd", strconv.Itoa(nextFD))
+		nextFD++
+	}
+
+	// Add container as the last argument.
+	cmd.Args = append(cmd.Args, s.ID)
+
+	// Log the FDs we are donating to the sandbox process.
+	for i, f := range cmd.ExtraFiles {
+		log.Debugf("Donating FD %d: %q", i+3, f.Name())
+	}
+
+	log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
+	log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr)
+	if err := specutils.StartInNS(cmd, nss); err != nil {
+		return fmt.Errorf("Sandbox: %v", err)
+	}
+	s.child = true
+	s.Pid = cmd.Process.Pid
+	log.Infof("Sandbox started, PID: %d", s.Pid)
+
+	return nil
+}
+
+// Wait waits for the containerized process to exit, and returns its WaitStatus.
+func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
+	log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID)
+	var ws syscall.WaitStatus
+
+	if conn, err := s.sandboxConnect(); err != nil {
+		// The sandbox may have exited while before we had a chance to
+		// wait on it.
+		log.Warningf("Wait on container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err)
+	} else {
+		defer conn.Close()
+		// Try the Wait RPC to the sandbox.
+		err = conn.Call(boot.ContainerWait, &cid, &ws)
+		if err == nil {
+			// It worked!
+			return ws, nil
+		}
+		// The sandbox may have exited after we connected, but before
+		// or during the Wait RPC.
+		log.Warningf("Wait RPC to container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err)
+	}
+
+	// The sandbox may have already exited, or exited while handling the
+	// Wait RPC. The best we can do is ask Linux what the sandbox exit
+	// status was, since in most cases that will be the same as the
+	// container exit status.
+	if err := s.waitForStopped(); err != nil {
+		return ws, err
+	}
+	if !s.child {
+		return ws, fmt.Errorf("sandbox no longer running and its exit status is unavailable")
+	}
+	return s.status, nil
+}
+
+// WaitPID waits for process 'pid' in the container's sandbox and returns its
+// WaitStatus.
+func (s *Sandbox) WaitPID(cid string, pid int32, clearStatus bool) (syscall.WaitStatus, error) {
+	log.Debugf("Waiting for PID %d in sandbox %q", pid, s.ID)
+	var ws syscall.WaitStatus
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return ws, err
+	}
+	defer conn.Close()
+
+	args := &boot.WaitPIDArgs{
+		PID:         pid,
+		CID:         cid,
+		ClearStatus: clearStatus,
+	}
+	if err := conn.Call(boot.ContainerWaitPID, args, &ws); err != nil {
+		return ws, fmt.Errorf("waiting on PID %d in sandbox %q: %v", pid, s.ID, err)
+	}
+	return ws, nil
+}
+
+// IsRootContainer returns true if the specified container ID belongs to the
+// root container.
+func (s *Sandbox) IsRootContainer(cid string) bool {
+	return s.ID == cid
+}
+
+// Destroy frees all resources associated with the sandbox. It fails fast and
+// is idempotent.
+func (s *Sandbox) destroy() error {
+	log.Debugf("Destroy sandbox %q", s.ID)
+	if s.Pid != 0 {
+		log.Debugf("Killing sandbox %q", s.ID)
+		if err := syscall.Kill(s.Pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH {
+			return fmt.Errorf("killing sandbox %q PID %q: %v", s.ID, s.Pid, err)
+		}
+		if err := s.waitForStopped(); err != nil {
+			return fmt.Errorf("waiting sandbox %q stop: %v", s.ID, err)
+		}
+	}
+
+	return nil
+}
+
+// SignalContainer sends the signal to a container in the sandbox. If all is
+// true and signal is SIGKILL, then waits for all processes to exit before
+// returning.
+func (s *Sandbox) SignalContainer(cid string, sig syscall.Signal, all bool) error {
+	log.Debugf("Signal sandbox %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	mode := boot.DeliverToProcess
+	if all {
+		mode = boot.DeliverToAllProcesses
+	}
+
+	args := boot.SignalArgs{
+		CID:   cid,
+		Signo: int32(sig),
+		Mode:  mode,
+	}
+	if err := conn.Call(boot.ContainerSignal, &args, nil); err != nil {
+		return fmt.Errorf("signaling container %q: %v", cid, err)
+	}
+	return nil
+}
+
+// SignalProcess sends the signal to a particular process in the container. If
+// fgProcess is true, then the signal is sent to the foreground process group
+// in the same session that PID belongs to. This is only valid if the process
+// is attached to a host TTY.
+func (s *Sandbox) SignalProcess(cid string, pid int32, sig syscall.Signal, fgProcess bool) error {
+	log.Debugf("Signal sandbox %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	mode := boot.DeliverToProcess
+	if fgProcess {
+		mode = boot.DeliverToForegroundProcessGroup
+	}
+
+	args := boot.SignalArgs{
+		CID:   cid,
+		Signo: int32(sig),
+		PID:   pid,
+		Mode:  mode,
+	}
+	if err := conn.Call(boot.ContainerSignal, &args, nil); err != nil {
+		return fmt.Errorf("signaling container %q PID %d: %v", cid, pid, err)
+	}
+	return nil
+}
+
+// Checkpoint sends the checkpoint call for a container in the sandbox.
+// The statefile will be written to f.
+func (s *Sandbox) Checkpoint(cid string, f *os.File) error {
+	log.Debugf("Checkpoint sandbox %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opt := control.SaveOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+
+	if err := conn.Call(boot.ContainerCheckpoint, &opt, nil); err != nil {
+		return fmt.Errorf("checkpointing container %q: %v", cid, err)
+	}
+	return nil
+}
+
+// Pause sends the pause call for a container in the sandbox.
+func (s *Sandbox) Pause(cid string) error {
+	log.Debugf("Pause sandbox %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	if err := conn.Call(boot.ContainerPause, nil, nil); err != nil {
+		return fmt.Errorf("pausing container %q: %v", cid, err)
+	}
+	return nil
+}
+
+// Resume sends the resume call for a container in the sandbox.
+func (s *Sandbox) Resume(cid string) error {
+	log.Debugf("Resume sandbox %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	if err := conn.Call(boot.ContainerResume, nil, nil); err != nil {
+		return fmt.Errorf("resuming container %q: %v", cid, err)
+	}
+	return nil
+}
+
+// IsRunning returns true if the sandbox or gofer process is running.
+func (s *Sandbox) IsRunning() bool {
+	if s.Pid != 0 {
+		// Send a signal 0 to the sandbox process.
+		if err := syscall.Kill(s.Pid, 0); err == nil {
+			// Succeeded, process is running.
+			return true
+		}
+	}
+	return false
+}
+
+// Stacks collects and returns all stacks for the sandbox.
+func (s *Sandbox) Stacks() (string, error) {
+	log.Debugf("Stacks sandbox %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return "", err
+	}
+	defer conn.Close()
+
+	var stacks string
+	if err := conn.Call(boot.SandboxStacks, nil, &stacks); err != nil {
+		return "", fmt.Errorf("getting sandbox %q stacks: %v", s.ID, err)
+	}
+	return stacks, nil
+}
+
+// HeapProfile writes a heap profile to the given file.
+func (s *Sandbox) HeapProfile(f *os.File) error {
+	log.Debugf("Heap profile %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opts := control.ProfileOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+	if err := conn.Call(boot.HeapProfile, &opts, nil); err != nil {
+		return fmt.Errorf("getting sandbox %q heap profile: %v", s.ID, err)
+	}
+	return nil
+}
+
+// StartCPUProfile start CPU profile writing to the given file.
+func (s *Sandbox) StartCPUProfile(f *os.File) error {
+	log.Debugf("CPU profile start %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opts := control.ProfileOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+	if err := conn.Call(boot.StartCPUProfile, &opts, nil); err != nil {
+		return fmt.Errorf("starting sandbox %q CPU profile: %v", s.ID, err)
+	}
+	return nil
+}
+
+// StopCPUProfile stops a previously started CPU profile.
+func (s *Sandbox) StopCPUProfile() error {
+	log.Debugf("CPU profile stop %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	if err := conn.Call(boot.StopCPUProfile, nil, nil); err != nil {
+		return fmt.Errorf("stopping sandbox %q CPU profile: %v", s.ID, err)
+	}
+	return nil
+}
+
+// StartTrace start trace  writing to the given file.
+func (s *Sandbox) StartTrace(f *os.File) error {
+	log.Debugf("Trace start %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opts := control.ProfileOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+	if err := conn.Call(boot.StartTrace, &opts, nil); err != nil {
+		return fmt.Errorf("starting sandbox %q trace: %v", s.ID, err)
+	}
+	return nil
+}
+
+// StopTrace stops a previously started trace..
+func (s *Sandbox) StopTrace() error {
+	log.Debugf("Trace stop %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	if err := conn.Call(boot.StopTrace, nil, nil); err != nil {
+		return fmt.Errorf("stopping sandbox %q trace: %v", s.ID, err)
+	}
+	return nil
+}
+
+// DestroyContainer destroys the given container. If it is the root container,
+// then the entire sandbox is destroyed.
+func (s *Sandbox) DestroyContainer(cid string) error {
+	if s.IsRootContainer(cid) {
+		log.Debugf("Destroying root container %q by destroying sandbox", cid)
+		return s.destroy()
+	}
+
+	if !s.IsRunning() {
+		// Sandbox isn't running anymore, container is already destroyed.
+		return nil
+	}
+
+	log.Debugf("Destroying container %q in sandbox %q", cid, s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+	if err := conn.Call(boot.ContainerDestroy, &cid, nil); err != nil {
+		return fmt.Errorf("destroying container %q: %v", cid, err)
+	}
+	return nil
+}
+
+func (s *Sandbox) waitForStopped() error {
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
+	op := func() error {
+		if s.child {
+			s.statusMu.Lock()
+			defer s.statusMu.Unlock()
+			if s.Pid == 0 {
+				return nil
+			}
+			// The sandbox process is a child of the current process,
+			// so we can wait it and collect its zombie.
+			wpid, err := syscall.Wait4(int(s.Pid), &s.status, syscall.WNOHANG, nil)
+			if err != nil {
+				return fmt.Errorf("error waiting the sandbox process: %v", err)
+			}
+			if wpid == 0 {
+				return fmt.Errorf("sandbox is still running")
+			}
+			s.Pid = 0
+		} else if s.IsRunning() {
+			return fmt.Errorf("sandbox is still running")
+		}
+		return nil
+	}
+	return backoff.Retry(op, b)
+}
+
+// deviceFileForPlatform opens the device file for the given platform. If the
+// platform does not need a device file, then nil is returned.
+func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) {
+	var (
+		f   *os.File
+		err error
+	)
+	switch p {
+	case boot.PlatformKVM:
+		f, err = kvm.OpenDevice()
+	default:
+		return nil, nil
+	}
+	if err != nil {
+		return nil, fmt.Errorf("opening device file for platform %q: %v", p, err)
+	}
+	return f, err
+}