11 files changed, 386 insertions, 97 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index ae727f144..1a598199d 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -15,9 +15,12 @@
 package boot
 
 import (
+	"errors"
 	"fmt"
 
+	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.googlesource.com/gvisor/pkg/control/server"
+	"gvisor.googlesource.com/gvisor/pkg/log"
 	"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
 	"gvisor.googlesource.com/gvisor/pkg/sentry/control"
 	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
@@ -50,6 +53,10 @@ const (
 	// ContainerSignal is used to send a signal to a container.
 	ContainerSignal = "containerManager.Signal"
 
+	// ContainerStart is the URPC endpoint for running a non-root container
+	// within a sandbox.
+	ContainerStart = "containerManager.Start"
+
 	// ContainerWait is used to wait on the init process of the container
 	// and return its ExitStatus.
 	ContainerWait = "containerManager.Wait"
@@ -127,10 +134,14 @@ type containerManager struct {
 
 	// watchdog is the kernel watchdog.
 	watchdog *watchdog.Watchdog
+
+	// l is the loader that creates containers and sandboxes.
+	l *Loader
 }
 
 // StartRoot will start the root container process.
 func (cm *containerManager) StartRoot(_, _ *struct{}) error {
+	log.Debugf("containerManager.StartRoot")
 	// Tell the root container to start and wait for the result.
 	cm.startChan <- struct{}{}
 	return <-cm.startResultChan
@@ -138,11 +149,42 @@ func (cm *containerManager) StartRoot(_, _ *struct{}) error {
 
 // Processes retrieves information about processes running in the sandbox.
 func (cm *containerManager) Processes(_, out *[]*control.Process) error {
+	log.Debugf("containerManager.Processes")
 	return control.Processes(cm.k, out)
 }
 
+// StartArgs contains arguments to the Start method.
+type StartArgs struct {
+	// Spec is the spec of the container to start.
+	Spec *specs.Spec
+
+	// TODO: Separate sandbox and container configs.
+	// Config is the runsc-specific configuration for the sandbox.
+	Conf *Config
+}
+
+// Start runs a created container within a sandbox.
+func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
+	log.Debugf("containerManager.Start")
+
+	// Validate arguments.
+	if args == nil {
+		return errors.New("start missing arguments")
+	}
+	if args.Spec == nil {
+		return errors.New("start arguments missing spec")
+	}
+	if args.Conf == nil {
+		return errors.New("start arguments missing config")
+	}
+
+	cm.l.startContainer(args, cm.k)
+	return nil
+}
+
 // Execute runs a command on a created or running sandbox.
 func (cm *containerManager) Execute(e *control.ExecArgs, waitStatus *uint32) error {
+	log.Debugf("containerManager.Execute")
 	proc := control.Proc{Kernel: cm.k}
 	if err := proc.Exec(e, waitStatus); err != nil {
 		return fmt.Errorf("error executing: %+v: %v", e, err)
@@ -152,6 +194,7 @@ func (cm *containerManager) Execute(e *control.ExecArgs, waitStatus *uint32) err
 
 // Checkpoint pauses a sandbox and saves its state.
 func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error {
+	log.Debugf("containerManager.Checkpoint")
 	state := control.State{
 		Kernel:   cm.k,
 		Watchdog: cm.watchdog,
@@ -173,6 +216,7 @@ func (cm *containerManager) Resume(_, _ *struct{}) error {
 
 // Wait waits for the init process in the given container.
 func (cm *containerManager) Wait(cid *string, waitStatus *uint32) error {
+	log.Debugf("containerManager.Wait")
 	// TODO: Use the cid and wait on the init process in that
 	// container. Currently we just wait on PID 1 in the sandbox.
 	tg := cm.k.TaskSet().Root.ThreadGroupWithID(1)
@@ -195,6 +239,7 @@ type SignalArgs struct {
 
 // Signal sends a signal to the init process of the container.
 func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error {
+	log.Debugf("containerManager.Signal")
 	// TODO: Use the cid and send the signal to the init
 	// process in theat container. Currently we just signal PID 1 in the
 	// sandbox.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 526e8f8bb..d1a413cc7 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Package boot loads the kernel and runs a container..
+// Package boot loads the kernel and runs a container.
 package boot
 
 import (
@@ -79,8 +79,8 @@ type Loader struct {
 	// container. It should be called when a sandbox is destroyed.
 	stopSignalForwarding func()
 
-	// procArgs refers to the root container task.
-	procArgs kernel.CreateProcessArgs
+	// rootProcArgs refers to the root sandbox init task.
+	rootProcArgs kernel.CreateProcessArgs
 }
 
 func init() {
@@ -117,12 +117,6 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
 	}
 	tk.SetClocks(time.NewCalibratedClocks())
 
-	// Create initial limits.
-	ls, err := createLimitSet(spec)
-	if err != nil {
-		return nil, fmt.Errorf("error creating limits: %v", err)
-	}
-
 	// Create capabilities.
 	caps, err := specutils.Capabilities(spec.Process.Capabilities)
 	if err != nil {
@@ -154,13 +148,6 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
 		return nil, fmt.Errorf("failed to enable strace: %v", err)
 	}
 
-	// Get the executable path, which is a bit tricky because we have to
-	// inspect the environment PATH which is relative to the root path.
-	exec, err := specutils.GetExecutablePath(spec.Process.Args[0], spec.Root.Path, spec.Process.Env)
-	if err != nil {
-		return nil, fmt.Errorf("error getting executable path: %v", err)
-	}
-
 	// Create an empty network stack because the network namespace may be empty at
 	// this point. Netns is configured before Run() is called. Netstack is
 	// configured using a control uRPC message. Host network is configured inside
@@ -223,16 +210,56 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
 		return nil, fmt.Errorf("error creating control server: %v", err)
 	}
 
+	// We don't care about child signals; some platforms can generate a
+	// tremendous number of useless ones (I'm looking at you, ptrace).
+	if err := sighandling.IgnoreChildStop(); err != nil {
+		return nil, fmt.Errorf("failed to ignore child stop signals: %v", err)
+	}
+	// Ensure that most signals received in sentry context are forwarded to
+	// the emulated kernel.
+	stopSignalForwarding := sighandling.StartForwarding(k)
+
+	procArgs, err := newProcess(spec, conf, ioFDs, console, creds, utsns, ipcns, k)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create root process: %v", err)
+	}
+
+	l := &Loader{
+		k:                    k,
+		ctrl:                 ctrl,
+		conf:                 conf,
+		console:              console,
+		watchdog:             watchdog,
+		stopSignalForwarding: stopSignalForwarding,
+		rootProcArgs:         procArgs,
+	}
+	ctrl.manager.l = l
+	return l, nil
+}
+
+// newProcess creates a process that can be run with kernel.CreateProcess.
+func newProcess(spec *specs.Spec, conf *Config, ioFDs []int, console bool, creds *auth.Credentials, utsns *kernel.UTSNamespace, ipcns *kernel.IPCNamespace, k *kernel.Kernel) (kernel.CreateProcessArgs, error) {
+	// Create initial limits.
+	ls, err := createLimitSet(spec)
+	if err != nil {
+		return kernel.CreateProcessArgs{}, fmt.Errorf("error creating limits: %v", err)
+	}
+
+	// Get the executable path, which is a bit tricky because we have to
+	// inspect the environment PATH which is relative to the root path.
+	exec, err := specutils.GetExecutablePath(spec.Process.Args[0], spec.Root.Path, spec.Process.Env)
+	if err != nil {
+		return kernel.CreateProcessArgs{}, fmt.Errorf("error getting executable path: %v", err)
+	}
+
 	// Create the process arguments.
 	procArgs := kernel.CreateProcessArgs{
-		Filename:         exec,
-		Argv:             spec.Process.Args,
-		Envv:             spec.Process.Env,
-		WorkingDirectory: spec.Process.Cwd,
-		Credentials:      creds,
-		// Creating the FDMap requires that we have kernel.Kernel.fdMapUids, so
-		// it must wait until we have a Kernel.
-		Umask:                uint(syscall.Umask(0)),
+		Filename:             exec,
+		Argv:                 spec.Process.Args,
+		Envv:                 spec.Process.Env,
+		WorkingDirectory:     spec.Process.Cwd,
+		Credentials:          creds,
+		Umask:                uint(0022),
 		Limits:               ls,
 		MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
 		UTSNamespace:         utsns,
@@ -240,52 +267,42 @@ func New(spec *specs.Spec, conf *Config, controllerFD, restoreFD int, ioFDs []in
 	}
 	ctx := procArgs.NewContext(k)
 
-	// Use root user to configure mounts. The current user might not have
-	// permission to do so.
-	rootProcArgs := kernel.CreateProcessArgs{
-		WorkingDirectory:     "/",
-		Credentials:          auth.NewRootCredentials(creds.UserNamespace),
-		Umask:                uint(syscall.Umask(0022)),
-		MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
-	}
-	rootCtx := rootProcArgs.NewContext(k)
-
-	// Create the virtual filesystem.
-	mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs)
-	if err != nil {
-		return nil, fmt.Errorf("error creating mounts: %v", err)
-	}
-	k.SetRootMountNamespace(mns)
-
-	// Create the FD map, which will set stdin, stdout, and stderr.  If console
-	// is true, then ioctl calls will be passed through to the host fd.
+	// Create the FD map, which will set stdin, stdout, and stderr.  If
+	// console is true, then ioctl calls will be passed through to the host
+	// fd.
 	fdm, err := createFDMap(ctx, k, ls, console)
 	if err != nil {
-		return nil, fmt.Errorf("error importing fds: %v", err)
+		return kernel.CreateProcessArgs{}, fmt.Errorf("error importing fds: %v", err)
 	}
 
 	// CreateProcess takes a reference on FDMap if successful. We
 	// won't need ours either way.
 	procArgs.FDMap = fdm
 
-	// We don't care about child signals; some platforms can generate a
-	// tremendous number of useless ones (I'm looking at you, ptrace).
-	if err := sighandling.IgnoreChildStop(); err != nil {
-		return nil, fmt.Errorf("failed to ignore child stop signals: %v", err)
+	// If this is the root container, we also need to setup the root mount
+	// namespace.
+	if k.RootMountNamespace() == nil {
+		// Use root user to configure mounts. The current user might not have
+		// permission to do so.
+		rootProcArgs := kernel.CreateProcessArgs{
+			WorkingDirectory: "/",
+			Credentials:      auth.NewRootCredentials(creds.UserNamespace),
+			// The sentry should run with a umask of 0.
+			Umask:                uint(syscall.Umask(0)),
+			MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
+		}
+		rootCtx := rootProcArgs.NewContext(k)
+
+		// Create the virtual filesystem.
+		mns, err := createMountNamespace(ctx, rootCtx, spec, conf, ioFDs)
+		if err != nil {
+			return kernel.CreateProcessArgs{}, fmt.Errorf("error creating mounts: %v", err)
+		}
+
+		k.SetRootMountNamespace(mns)
 	}
-	// Ensure that most signals received in sentry context are forwarded to
-	// the emulated kernel.
-	stopSignalForwarding := sighandling.StartForwarding(k)
 
-	return &Loader{
-		k:                    k,
-		ctrl:                 ctrl,
-		conf:                 conf,
-		console:              console,
-		watchdog:             watchdog,
-		stopSignalForwarding: stopSignalForwarding,
-		procArgs:             procArgs,
-	}, nil
+	return procArgs, nil
 }
 
 // Destroy cleans up all resources used by the loader.
@@ -350,17 +367,69 @@ func (l *Loader) run() error {
 	}
 
 	// Create the root container init task.
-	if _, err := l.k.CreateProcess(l.procArgs); err != nil {
+	if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
 		return fmt.Errorf("failed to create init process: %v", err)
 	}
 
 	// CreateProcess takes a reference on FDMap if successful.
-	l.procArgs.FDMap.DecRef()
+	l.rootProcArgs.FDMap.DecRef()
 
 	l.watchdog.Start()
 	return l.k.Start()
 }
 
+func (l *Loader) startContainer(args *StartArgs, k *kernel.Kernel) error {
+	spec := args.Spec
+	// Create capabilities.
+	caps, err := specutils.Capabilities(spec.Process.Capabilities)
+	if err != nil {
+		return fmt.Errorf("error creating capabilities: %v", err)
+	}
+
+	// Convert the spec's additional GIDs to KGIDs.
+	extraKGIDs := make([]auth.KGID, 0, len(spec.Process.User.AdditionalGids))
+	for _, GID := range spec.Process.User.AdditionalGids {
+		extraKGIDs = append(extraKGIDs, auth.KGID(GID))
+	}
+
+	// Create credentials. We reuse the root user namespace because the
+	// sentry currently supports only 1 mount namespace, which is tied to a
+	// single user namespace. Thus we must run in the same user namespace
+	// to access mounts.
+	// TODO: Create a new mount namespace for the container.
+	creds := auth.NewUserCredentials(
+		auth.KUID(spec.Process.User.UID),
+		auth.KGID(spec.Process.User.GID),
+		extraKGIDs,
+		caps,
+		l.k.RootUserNamespace())
+
+	// TODO New containers should be started in new PID namespaces
+	// when indicated by the spec.
+
+	procArgs, err := newProcess(
+		args.Spec,
+		args.Conf,
+		nil,   // ioFDs
+		false, // console
+		creds,
+		k.RootUTSNamespace(),
+		k.RootIPCNamespace(),
+		k)
+	if err != nil {
+		return fmt.Errorf("failed to create new process: %v", err)
+	}
+
+	if _, err := l.k.CreateProcess(procArgs); err != nil {
+		return fmt.Errorf("failed to create process in sentry: %v", err)
+	}
+
+	// CreateProcess takes a reference on FDMap if successful.
+	procArgs.FDMap.DecRef()
+
+	return nil
+}
+
 // WaitForStartSignal waits for a start signal from the control server.
 func (l *Loader) WaitForStartSignal() {
 	<-l.ctrl.manager.startChan
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
index f221ad3ae..df65ea31d 100644
--- a/runsc/cmd/events.go
+++ b/runsc/cmd/events.go
@@ -76,7 +76,7 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa
 
 	c, err := container.Load(conf.RootDir, id)
 	if err != nil {
-		Fatalf("error loading sandox: %v", err)
+		Fatalf("error loading sandbox: %v", err)
 	}
 
 	// Repeatedly get stats from the container.
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index 235ed9bc6..cbce07c8e 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -104,7 +104,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 
 	c, err := container.Load(conf.RootDir, id)
 	if err != nil {
-		Fatalf("error loading sandox: %v", err)
+		Fatalf("error loading sandbox: %v", err)
 	}
 
 	if e.WorkingDirectory == "" {
diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go
index 9f9f4d15e..5d219bfdc 100644
--- a/runsc/cmd/ps.go
+++ b/runsc/cmd/ps.go
@@ -62,7 +62,7 @@ func (ps *PS) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{})
 
 	c, err := container.Load(conf.RootDir, id)
 	if err != nil {
-		Fatalf("error loading sandox: %v", err)
+		Fatalf("error loading sandbox: %v", err)
 	}
 	pList, err := c.Processes()
 	if err != nil {
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index fe477abf2..61e05e1c3 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -37,6 +37,7 @@ go_test(
         "//pkg/sentry/kernel/auth",
         "//pkg/unet",
         "//runsc/container",
+        "//runsc/specutils",
         "//runsc/test/testutil",
         "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
         "@org_golang_x_sys//unix:go_default_library",
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 571784e07..3b7f95af9 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -214,22 +214,43 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
 		Owner:         os.Getenv("USER"),
 	}
 
-	// TODO: If the metadata annotations indicate that this
-	// container should be started in another sandbox, we must do so. The
-	// metadata will indicate the ID of the sandbox, which is the same as
-	// the ID of the init container in the sandbox. We can look up that
-	// init container by ID to get the sandbox, then we need to expose a
-	// way to run a new container in the sandbox.
-
-	// Start a new sandbox for this container. Any errors after this point
-	// must destroy the container.
-	s, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket)
-	if err != nil {
-		c.Destroy()
-		return nil, err
-	}
+	// If the metadata annotations indicate that this container should be
+	// started in an existing sandbox, we must do so. The metadata will
+	// indicate the ID of the sandbox, which is the same as the ID of the
+	// init container in the sandbox.
+	if specutils.ShouldCreateSandbox(spec) {
+		log.Debugf("Creating new sandbox for container %q", id)
+		// Start a new sandbox for this container. Any errors after this point
+		// must destroy the container.
+		s, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket)
+		if err != nil {
+			c.Destroy()
+			return nil, err
+		}
+		c.Sandbox = s
+	} else {
+		// This is sort of confusing. For a sandbox with a root
+		// container and a child container in it, runsc sees:
+		// * A container struct whose sandbox ID is equal to the
+		//   container ID. This is the root container that is tied to
+		//   the creation of the sandbox.
+		// * A container struct whose sandbox ID is equal to the above
+		//   container/sandbox ID, but that has a different container
+		//   ID. This is the child container.
+		sbid, ok := specutils.SandboxID(spec)
+		if !ok {
+			return nil, fmt.Errorf("no sandbox ID found when creating container")
+		}
+		log.Debugf("Creating new container %q in sandbox %q", c.ID, sbid)
 
-	c.Sandbox = s
+		// Find the sandbox associated with this ID.
+		sb, err := Load(conf.RootDir, sbid)
+		if err != nil {
+			c.Destroy()
+			return nil, err
+		}
+		c.Sandbox = sb.Sandbox
+	}
 	c.Status = Created
 
 	// Save the metadata file.
@@ -242,7 +263,7 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
 	// this file is created, so it must be the last thing we do.
 	if pidFile != "" {
 		if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.Pid())), 0644); err != nil {
-			s.Destroy()
+			c.Destroy()
 			return nil, fmt.Errorf("error writing pid file: %v", err)
 		}
 	}
@@ -266,9 +287,16 @@ func (c *Container) Start(conf *boot.Config) error {
 		}
 	}
 
-	if err := c.Sandbox.Start(c.ID, c.Spec, conf); err != nil {
-		c.Destroy()
-		return err
+	if specutils.ShouldCreateSandbox(c.Spec) {
+		if err := c.Sandbox.StartRoot(c.Spec, conf); err != nil {
+			c.Destroy()
+			return err
+		}
+	} else {
+		if err := c.Sandbox.Start(c.Spec, conf); err != nil {
+			c.Destroy()
+			return err
+		}
 	}
 
 	// "If any poststart hook fails, the runtime MUST log a warning, but
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 7f87ea5ab..1116ca170 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -36,6 +36,7 @@ import (
 	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.googlesource.com/gvisor/pkg/unet"
 	"gvisor.googlesource.com/gvisor/runsc/container"
+	"gvisor.googlesource.com/gvisor/runsc/specutils"
 	"gvisor.googlesource.com/gvisor/runsc/test/testutil"
 )
 
@@ -51,7 +52,7 @@ func waitForProcessList(s *container.Container, expected []*control.Process) err
 	var got []*control.Process
 	for start := time.Now(); time.Now().Sub(start) < 10*time.Second; {
 		var err error
-		got, err := s.Processes()
+		got, err = s.Processes()
 		if err != nil {
 			return fmt.Errorf("error getting process data from container: %v", err)
 		}
@@ -946,3 +947,73 @@ func TestAbbreviatedIDs(t *testing.T) {
 		}
 	}
 }
+
+// TestMultiContainerSanity checks that it is possible to run 2 dead-simple
+// containers in the same sandbox.
+func TestMultiContainerSanity(t *testing.T) {
+	containerIDs := []string{
+		testutil.UniqueContainerID(),
+		testutil.UniqueContainerID(),
+	}
+	containerAnnotations := []map[string]string{
+		// The first container creates a sandbox.
+		map[string]string{
+			specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeSandbox,
+		},
+		// The second container creates a container within the first
+		// container's sandbox.
+		map[string]string{
+			specutils.ContainerdContainerTypeAnnotation: specutils.ContainerdContainerTypeContainer,
+			specutils.ContainerdSandboxIDAnnotation:     containerIDs[0],
+		},
+	}
+
+	rootDir, err := testutil.SetupRootDir()
+	if err != nil {
+		t.Fatalf("error creating root dir: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+
+	// Setup the containers.
+	containers := make([]*container.Container, 0, len(containerIDs))
+	for i, annotations := range containerAnnotations {
+		spec := testutil.NewSpecWithArgs("sleep", "100")
+		spec.Annotations = annotations
+		bundleDir, conf, err := testutil.SetupContainerInRoot(rootDir, spec)
+		if err != nil {
+			t.Fatalf("error setting up container: %v", err)
+		}
+		defer os.RemoveAll(bundleDir)
+		cont, err := container.Create(containerIDs[i], spec, conf, bundleDir, "", "")
+		if err != nil {
+			t.Fatalf("error creating container: %v", err)
+		}
+		defer cont.Destroy()
+		if err := cont.Start(conf); err != nil {
+			t.Fatalf("error starting container: %v", err)
+		}
+		containers = append(containers, cont)
+	}
+
+	expectedPL := []*control.Process{
+		{
+			UID:  0,
+			PID:  1,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+		{
+			UID:  0,
+			PID:  2,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+	}
+
+	// Check via ps that multiple processes are running.
+	if err := waitForProcessList(containers[0], expectedPL); err != nil {
+		t.Errorf("failed to wait for sleep to start: %v", err)
+	}
+}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 0181dc9d4..90b46e247 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -81,9 +81,9 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo
 	return s, nil
 }
 
-// Start starts running the containerized process inside the sandbox.
-func (s *Sandbox) Start(cid string, spec *specs.Spec, conf *boot.Config) error {
-	log.Debugf("Start sandbox %q, pid: %d", s.ID, s.Pid)
+// StartRoot starts running the root container process inside the sandbox.
+func (s *Sandbox) StartRoot(spec *specs.Spec, conf *boot.Config) error {
+	log.Debugf("Start root sandbox %q, pid: %d", s.ID, s.Pid)
 	conn, err := s.connect()
 	if err != nil {
 		return err
@@ -96,9 +96,7 @@ func (s *Sandbox) Start(cid string, spec *specs.Spec, conf *boot.Config) error {
 	}
 
 	// Send a message to the sandbox control server to start the root
-	// container..
-	//
-	// TODO: We need a way to start non-root containers.
+	// container.
 	if err := conn.Call(boot.RootContainerStart, nil, nil); err != nil {
 		return fmt.Errorf("error starting root container %v: %v", spec.Process.Args, err)
 	}
@@ -106,6 +104,26 @@ func (s *Sandbox) Start(cid string, spec *specs.Spec, conf *boot.Config) error {
 	return nil
 }
 
+// Start starts running a non-root container inside the sandbox.
+func (s *Sandbox) Start(spec *specs.Spec, conf *boot.Config) error {
+	log.Debugf("Start non-root container sandbox %q, pid: %d", s.ID, s.Pid)
+	conn, err := s.connect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	args := boot.StartArgs{
+		Spec: spec,
+		Conf: conf,
+	}
+	if err := conn.Call(boot.ContainerStart, args, nil); err != nil {
+		return fmt.Errorf("error starting non-root container %v: %v", spec.Process.Args, err)
+	}
+
+	return nil
+}
+
 // Processes retrieves the list of processes and associated metadata for a
 // given container in this sandbox.
 func (s *Sandbox) Processes(cid string) ([]*control.Process, error) {
@@ -130,11 +148,11 @@ func (s *Sandbox) Execute(cid string, e *control.ExecArgs) (syscall.WaitStatus,
 	log.Debugf("Executing new process in container %q in sandbox %q", cid, s.ID)
 	conn, err := s.connect()
 	if err != nil {
-		return 0, fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err)
+		return 0, s.connError(err)
 	}
 	defer conn.Close()
 
-	// Send a message to the sandbox control server to start the container..
+	// Send a message to the sandbox control server to start the container.
 	var waitStatus uint32
 	// TODO: Pass in the container id (cid) here. The sandbox
 	// should execute in the context of that container.
@@ -168,11 +186,15 @@ func (s *Sandbox) connect() (*urpc.Client, error) {
 	log.Debugf("Connecting to sandbox %q", s.ID)
 	conn, err := client.ConnectTo(boot.ControlSocketAddr(s.ID))
 	if err != nil {
-		return nil, fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err)
+		return nil, s.connError(err)
 	}
 	return conn, nil
 }
 
+func (s *Sandbox) connError(err error) error {
+	return fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err)
+}
+
 func (s *Sandbox) createGoferProcess(spec *specs.Spec, conf *boot.Config, bundleDir, binPath string) ([]*os.File, error) {
 	if conf.FileAccess != boot.FileAccessProxy {
 		// Don't start a gofer. The sandbox will access host FS directly.
@@ -266,7 +288,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
 	}
 
 	// If the console control socket file is provided, then create a new
-	// pty master/slave pair and set the tty on the sandox process.
+	// pty master/slave pair and set the tty on the sandbox process.
 	if consoleEnabled {
 		// setupConsole will send the master on the socket, and return
 		// the slave.
diff --git a/runsc/sandbox/sandbox_test.go b/runsc/sandbox/sandbox_test.go
index e25290d5e..fee2de283 100644
--- a/runsc/sandbox/sandbox_test.go
+++ b/runsc/sandbox/sandbox_test.go
@@ -44,7 +44,7 @@ func TestGoferExits(t *testing.T) {
 		t.Fatalf("error creating container: %v", err)
 	}
 	defer s.Destroy()
-	if err := s.Start("123", spec, conf); err != nil {
+	if err := s.StartRoot(spec, conf); err != nil {
 		t.Fatalf("error starting container: %v", err)
 	}
 
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 8dae3efb1..c552111f2 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -63,6 +63,26 @@ func ValidateSpec(spec *specs.Spec) error {
 	if spec.Linux != nil && spec.Linux.Seccomp != nil {
 		log.Warningf("Seccomp spec is being ignored")
 	}
+
+	// 2 annotations are use by containerd to support multi-container pods.
+	//   "io.kubernetes.cri.container-type"
+	//   "io.kubernetes.cri.sandbox-id"
+	containerType, hasContainerType := spec.Annotations[ContainerdContainerTypeAnnotation]
+	_, hasSandboxID := spec.Annotations[ContainerdSandboxIDAnnotation]
+	switch {
+	// Non-containerd use won't set a container type.
+	case !hasContainerType:
+	case containerType == ContainerdContainerTypeSandbox:
+	// When starting a container in an existing sandbox, the sandbox ID
+	// must be set.
+	case containerType == ContainerdContainerTypeContainer:
+		if !hasSandboxID {
+			return fmt.Errorf("spec has container-type of %s, but no sandbox ID set", containerType)
+		}
+	default:
+		return fmt.Errorf("unknown container-type: %s", containerType)
+	}
+
 	return nil
 }
 
@@ -82,7 +102,7 @@ func ReadSpec(bundleDir string) (*specs.Spec, error) {
 }
 
 // GetExecutablePath returns the absolute path to the executable, relative to
-// the root.  It searches the environment PATH for the first file that exists
+// the root. It searches the environment PATH for the first file that exists
 // with the given name.
 func GetExecutablePath(exec, root string, env []string) (string, error) {
 	exec = filepath.Clean(exec)
@@ -246,6 +266,39 @@ func BinPath() (string, error) {
 	return binPath, nil
 }
 
+const (
+	// ContainerdContainerTypeAnnotation is the OCI annotation set by
+	// containerd to indicate whether the container to create should have
+	// its own sandbox or a container within an existing sandbox.
+	ContainerdContainerTypeAnnotation = "io.kubernetes.cri.container-type"
+	// ContainerdContainerTypeContainer is the container type value
+	// indicating the container should be created in an existing sandbox.
+	ContainerdContainerTypeContainer = "container"
+	// ContainerdContainerTypeSandbox is the container type value
+	// indicating the container should be created in a new sandbox.
+	ContainerdContainerTypeSandbox = "sandbox"
+
+	// ContainerdSandboxIDAnnotation is the OCI annotation set to indicate
+	// which sandbox the container should be created in when the container
+	// is not the first container in the sandbox.
+	ContainerdSandboxIDAnnotation = "io.kubernetes.cri.sandbox-id"
+)
+
+// ShouldCreateSandbox returns true if the spec indicates that a new sandbox
+// should be created for the container. If false, the container should be
+// started in an existing sandbox.
+func ShouldCreateSandbox(spec *specs.Spec) bool {
+	t, ok := spec.Annotations[ContainerdContainerTypeAnnotation]
+	return !ok || t == ContainerdContainerTypeSandbox
+}
+
+// SandboxID returns the ID of the sandbox to join and whether an ID was found
+// in the spec.
+func SandboxID(spec *specs.Spec) (string, bool) {
+	id, ok := spec.Annotations[ContainerdSandboxIDAnnotation]
+	return id, ok
+}
+
 // WaitForReady waits for a process to become ready. The process is ready when
 // the 'ready' function returns true. It continues to wait if 'ready' returns
 // false. It returns error on timeout, if the process stops or if 'ready' fails.