5 files changed, 1259 insertions, 0 deletions
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
new file mode 100644
index 000000000..c558b4b0a
--- /dev/null
+++ b/runsc/container/BUILD
@@ -0,0 +1,45 @@
+package(licenses = ["notice"])  # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "container",
+    srcs = [
+        "container.go",
+        "hook.go",
+        "status.go",
+    ],
+    importpath = "gvisor.googlesource.com/gvisor/runsc/container",
+    visibility = [
+        "//runsc:__subpackages__",
+    ],
+    deps = [
+        "//pkg/log",
+        "//pkg/sentry/control",
+        "//runsc/boot",
+        "//runsc/sandbox",
+        "//runsc/specutils",
+        "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+    ],
+)
+
+go_test(
+    name = "container_test",
+    size = "small",
+    srcs = ["container_test.go"],
+    pure = "on",
+    rundir = ".",
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/log",
+        "//pkg/sentry/control",
+        "//pkg/sentry/kernel/auth",
+        "//pkg/unet",
+        "//runsc/boot",
+        "//runsc/cmd",
+        "//runsc/container",
+        "@com_github_google_subcommands//:go_default_library",
+        "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
diff --git a/runsc/container/container.go b/runsc/container/container.go
new file mode 100644
index 000000000..97115cd6b
--- /dev/null
+++ b/runsc/container/container.go
@@ -0,0 +1,380 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package container creates and manipulates containers.
+package container
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"syscall"
+	"time"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/control"
+	"gvisor.googlesource.com/gvisor/runsc/boot"
+	"gvisor.googlesource.com/gvisor/runsc/sandbox"
+	"gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// metadataFilename is the name of the metadata file relative to the container
+// root directory that holds sandbox metadata.
+const metadataFilename = "meta.json"
+
+// validateID validates the container id.
+func validateID(id string) error {
+	// See libcontainer/factory_linux.go.
+	idRegex := regexp.MustCompile(`^[\w+-\.]+$`)
+	if !idRegex.MatchString(id) {
+		return fmt.Errorf("invalid container id: %v", id)
+	}
+	return nil
+}
+
+// Container represents a containerized application. When running, the
+// container is associated with a single Sandbox.
+//
+// Container metadata can be saved and loaded to disk. Within a root directory,
+// we maintain subdirectories for each container named with the container id.
+// The container metadata is is stored as json within the container directory
+// in a file named "meta.json". This metadata format is defined by us, and is
+// not part of the OCI spec.
+//
+// Containers must write their metadata file after any change to their internal
+// state. The entire container directory is deleted when the container is
+// destroyed.
+type Container struct {
+	// ID is the container ID.
+	ID string `json:"id"`
+
+	// Spec is the OCI runtime spec that configures this container.
+	Spec *specs.Spec `json:"spec"`
+
+	// BundleDir is the directory containing the container bundle.
+	BundleDir string `json:"bundleDir"`
+
+	// Root is the directory containing the container metadata file.
+	Root string `json:"root"`
+
+	// CreatedAt is the time the container was created.
+	CreatedAt time.Time `json:"createdAt"`
+
+	// Owner is the container owner.
+	Owner string `json:"owner"`
+
+	// ConsoleSocket is the path to a unix domain socket that will receive
+	// the console FD. It is only used during create, so we don't need to
+	// store it in the metadata.
+	ConsoleSocket string `json:"-"`
+
+	// Status is the current container Status.
+	Status Status `json:"status"`
+
+	// Sandbox is the sandbox this container is running in. It will be nil
+	// if the container is not in state Running or Created.
+	Sandbox *sandbox.Sandbox `json:"sandbox"`
+}
+
+// Load loads a container with the given id from a metadata file.
+func Load(rootDir, id string) (*Container, error) {
+	log.Debugf("Load container %q %q", rootDir, id)
+	if err := validateID(id); err != nil {
+		return nil, err
+	}
+	cRoot := filepath.Join(rootDir, id)
+	if !exists(cRoot) {
+		return nil, fmt.Errorf("container with id %q does not exist", id)
+	}
+	metaFile := filepath.Join(cRoot, metadataFilename)
+	if !exists(metaFile) {
+		return nil, fmt.Errorf("container with id %q does not have metadata file %q", id, metaFile)
+	}
+	metaBytes, err := ioutil.ReadFile(metaFile)
+	if err != nil {
+		return nil, fmt.Errorf("error reading container metadata file %q: %v", metaFile, err)
+	}
+	var c Container
+	if err := json.Unmarshal(metaBytes, &c); err != nil {
+		return nil, fmt.Errorf("error unmarshaling container metadata from %q: %v", metaFile, err)
+	}
+
+	// If the status is "Running" or "Created", check that the sandbox
+	// process still exists, and set it to Stopped if it does not.
+	//
+	// This is inherently racey.
+	if c.Status == Running || c.Status == Created {
+		// Send signal 0 to check if container still exists.
+		if err := c.Signal(0); err != nil {
+			// Container no longer exists.
+			c.Status = Stopped
+			c.Sandbox = nil
+		}
+	}
+
+	return &c, nil
+}
+
+// List returns all container ids in the given root directory.
+func List(rootDir string) ([]string, error) {
+	log.Debugf("List containers %q", rootDir)
+	fs, err := ioutil.ReadDir(rootDir)
+	if err != nil {
+		return nil, fmt.Errorf("ReadDir(%s) failed: %v", rootDir, err)
+	}
+	var out []string
+	for _, f := range fs {
+		out = append(out, f.Name())
+	}
+	return out, nil
+}
+
+// Create creates the container in a new Sandbox process, unless the metadata
+// indicates that an existing Sandbox should be used.
+func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile string) (*Container, error) {
+	log.Debugf("Create container %q in root dir: %s", id, conf.RootDir)
+	if err := validateID(id); err != nil {
+		return nil, err
+	}
+	if err := specutils.ValidateSpec(spec); err != nil {
+		return nil, err
+	}
+
+	containerRoot := filepath.Join(conf.RootDir, id)
+	if exists(containerRoot) {
+		return nil, fmt.Errorf("container with id %q already exists: %q ", id, containerRoot)
+	}
+
+	c := &Container{
+		ID:            id,
+		Spec:          spec,
+		ConsoleSocket: consoleSocket,
+		BundleDir:     bundleDir,
+		Root:          containerRoot,
+		Status:        Creating,
+		Owner:         os.Getenv("USER"),
+	}
+
+	// TODO: If the metadata annotations indicates that this
+	// container should be started in another sandbox, we must do so. The
+	// metadata will indicate the ID of the sandbox, which is the same as
+	// the ID of the init container in the sandbox. We can look up that
+	// init container by ID to get the sandbox, then we need to expose a
+	// way to run a new container in the sandbox.
+
+	// Start a new sandbox for this container. Any errors after this point
+	// must destroy the container.
+	s, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket)
+	if err != nil {
+		c.Destroy()
+		return nil, err
+	}
+
+	c.Sandbox = s
+	c.Status = Created
+
+	// Save the metadata file.
+	if err := c.save(); err != nil {
+		c.Destroy()
+		return nil, err
+	}
+
+	// Write the pid file. Containerd considers the create complete after
+	// this file is created, so it must be the last thing we do.
+	if pidFile != "" {
+		if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.Pid())), 0644); err != nil {
+			s.Destroy()
+			return nil, fmt.Errorf("error writing pid file: %v", err)
+		}
+	}
+
+	return c, nil
+}
+
+// Start starts running the containerized process inside the sandbox.
+func (c *Container) Start(conf *boot.Config) error {
+	log.Debugf("Start container %q", c.ID)
+	if c.Status != Created {
+		return fmt.Errorf("cannot start container in state %s", c.Status)
+	}
+
+	// "If any prestart hook fails, the runtime MUST generate an error,
+	// stop and destroy the container".
+	if c.Spec.Hooks != nil {
+		if err := executeHooks(c.Spec.Hooks.Prestart, c.State()); err != nil {
+			c.Destroy()
+			return err
+		}
+	}
+
+	if err := c.Sandbox.Start(c.ID, c.Spec, conf); err != nil {
+		c.Destroy()
+		return err
+	}
+
+	// "If any poststart hook fails, the runtime MUST log a warning, but
+	// the remaining hooks and lifecycle continue as if the hook had
+	// succeeded".
+	if c.Spec.Hooks != nil {
+		executeHooksBestEffort(c.Spec.Hooks.Poststart, c.State())
+	}
+
+	c.Status = Running
+	return c.save()
+}
+
+// Run is a helper that calls Create + Start + Wait.
+func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile string) (syscall.WaitStatus, error) {
+	log.Debugf("Run container %q in root dir: %s", id, conf.RootDir)
+	c, err := Create(id, spec, conf, bundleDir, consoleSocket, pidFile)
+	if err != nil {
+		return 0, fmt.Errorf("error creating container: %v", err)
+	}
+	if err := c.Start(conf); err != nil {
+		return 0, fmt.Errorf("error starting container: %v", err)
+	}
+	return c.Wait()
+}
+
+// Execute runs the specified command in the container.
+func (c *Container) Execute(e *control.ExecArgs) (syscall.WaitStatus, error) {
+	log.Debugf("Execute in container %q, args: %+v", c.ID, e)
+	if c.Status != Created && c.Status != Running {
+		return 0, fmt.Errorf("cannot exec in container in state %s", c.Status)
+	}
+	return c.Sandbox.Execute(c.ID, e)
+}
+
+// Event returns events for the container.
+func (c *Container) Event() (*boot.Event, error) {
+	log.Debugf("Getting events for container %q", c.ID)
+	if c.Status != Running && c.Status != Created {
+		return nil, fmt.Errorf("cannot get events for container in state: %s", c.Status)
+	}
+	return c.Sandbox.Event(c.ID)
+}
+
+// Pid returns the Pid of the sandbox the container is running in, or -1 if the
+// container is not running.
+func (c *Container) Pid() int {
+	if c.Status != Running && c.Status != Created {
+		return -1
+	}
+	return c.Sandbox.Pid
+}
+
+// Wait waits for the container to exit, and returns its WaitStatus.
+func (c *Container) Wait() (syscall.WaitStatus, error) {
+	log.Debugf("Wait on container %q", c.ID)
+	return c.Sandbox.Wait(c.ID)
+}
+
+// Signal sends the signal to the container.
+func (c *Container) Signal(sig syscall.Signal) error {
+	log.Debugf("Signal container %q", c.ID)
+	if c.Status == Stopped {
+		log.Warningf("container %q not running, not sending signal %v", c.ID, sig)
+		return nil
+	}
+	return c.Sandbox.Signal(c.ID, sig)
+}
+
+// State returns the metadata of the container.
+func (c *Container) State() specs.State {
+	return specs.State{
+		Version: specs.Version,
+		ID:      c.ID,
+		Status:  c.Status.String(),
+		Pid:     c.Pid(),
+		Bundle:  c.BundleDir,
+	}
+}
+
+// Processes retrieves the list of processes and associated metadata inside a
+// container.
+func (c *Container) Processes() ([]*control.Process, error) {
+	if c.Status != Running {
+		return nil, fmt.Errorf("cannot get processes of container %q because it isn't running. It is in state %v", c.ID, c.Status)
+	}
+	return c.Sandbox.Processes(c.ID)
+}
+
+// Destroy frees all resources associated with the container.
+func (c *Container) Destroy() error {
+	log.Debugf("Destroy container %q", c.ID)
+
+	// First stop the container.
+	if err := c.Sandbox.Stop(c.ID); err != nil {
+		return err
+	}
+
+	// Then destroy all the metadata.
+	if err := os.RemoveAll(c.Root); err != nil {
+		log.Warningf("Failed to delete container root directory %q, err: %v", c.Root, err)
+	}
+
+	// "If any poststop hook fails, the runtime MUST log a warning, but the
+	// remaining hooks and lifecycle continue as if the hook had succeeded".
+	if c.Spec.Hooks != nil && (c.Status == Created || c.Status == Running) {
+		executeHooksBestEffort(c.Spec.Hooks.Poststop, c.State())
+	}
+
+	if err := os.RemoveAll(c.Root); err != nil {
+		log.Warningf("Failed to delete container root directory %q, err: %v", c.Root, err)
+	}
+
+	// If we are the first container in the sandbox, take the sandbox down
+	// as well.
+	if c.Sandbox != nil && c.Sandbox.ID == c.ID {
+		if err := c.Sandbox.Destroy(); err != nil {
+			log.Warningf("Failed to destroy sandbox %q: %v", c.Sandbox.ID, err)
+		}
+	}
+
+	c.Sandbox = nil
+	c.Status = Stopped
+	return nil
+}
+
+// save saves the container metadata to a file.
+func (c *Container) save() error {
+	log.Debugf("Save container %q", c.ID)
+	if err := os.MkdirAll(c.Root, 0711); err != nil {
+		return fmt.Errorf("error creating container root directory %q: %v", c.Root, err)
+	}
+	meta, err := json.Marshal(c)
+	if err != nil {
+		return fmt.Errorf("error marshaling container metadata: %v", err)
+	}
+	metaFile := filepath.Join(c.Root, metadataFilename)
+	if err := ioutil.WriteFile(metaFile, meta, 0640); err != nil {
+		return fmt.Errorf("error writing container metadata: %v", err)
+	}
+	return nil
+}
+
+// exists returns true if the given file exists.
+func exists(f string) bool {
+	if _, err := os.Stat(f); err == nil {
+		return true
+	} else if !os.IsNotExist(err) {
+		log.Warningf("error checking for file %q: %v", f, err)
+	}
+	return false
+}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
new file mode 100644
index 000000000..67efd2f9e
--- /dev/null
+++ b/runsc/container/container_test.go
@@ -0,0 +1,669 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container_test
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"reflect"
+	"strings"
+	"syscall"
+	"testing"
+	"time"
+
+	"context"
+	"flag"
+	"github.com/google/subcommands"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"golang.org/x/sys/unix"
+	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/control"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.googlesource.com/gvisor/pkg/unet"
+	"gvisor.googlesource.com/gvisor/runsc/boot"
+	"gvisor.googlesource.com/gvisor/runsc/cmd"
+	"gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+func init() {
+	log.SetLevel(log.Debug)
+}
+
+// writeSpec writes the spec to disk in the given directory.
+func writeSpec(dir string, spec *specs.Spec) error {
+	b, err := json.Marshal(spec)
+	if err != nil {
+		return err
+	}
+	return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755)
+}
+
+// newSpecWithArgs creates a simple spec with the given args suitable for use
+// in tests.
+func newSpecWithArgs(args ...string) *specs.Spec {
+	spec := &specs.Spec{
+		// The host filesystem root is the container root.
+		Root: &specs.Root{
+			Path:     "/",
+			Readonly: true,
+		},
+		Process: &specs.Process{
+			Args: args,
+			Env: []string{
+				"PATH=" + os.Getenv("PATH"),
+			},
+		},
+	}
+	return spec
+}
+
+// shutdownSignal will be sent to the sandbox in order to shut down cleanly.
+const shutdownSignal = syscall.SIGUSR2
+
+// setupContainer creates a bundle and root dir for the container, generates a
+// test config, and writes the spec to config.json in the bundle dir.
+func setupContainer(spec *specs.Spec) (rootDir, bundleDir string, conf *boot.Config, err error) {
+	rootDir, err = ioutil.TempDir("", "containers")
+	if err != nil {
+		return "", "", nil, fmt.Errorf("error creating root dir: %v", err)
+	}
+
+	bundleDir, err = ioutil.TempDir("", "bundle")
+	if err != nil {
+		return "", "", nil, fmt.Errorf("error creating bundle dir: %v", err)
+	}
+
+	if err = writeSpec(bundleDir, spec); err != nil {
+		return "", "", nil, fmt.Errorf("error writing spec: %v", err)
+	}
+
+	conf = &boot.Config{
+		RootDir: rootDir,
+		Network: boot.NetworkNone,
+		// Don't add flags when calling subprocesses, since the test
+		// runner does not know about all the flags. We control the
+		// Config in the subprocess anyways, so it does not matter.
+		TestModeNoFlags: true,
+	}
+
+	return rootDir, bundleDir, conf, nil
+}
+
+// uniqueContainerID generates a unique container id for each test.
+//
+// The container id is used to create an abstract unix domain socket, which must
+// be unique.  While the container forbids creating two containers with the same
+// name, sometimes between test runs the socket does not get cleaned up quickly
+// enough, causing container creation to fail.
+func uniqueContainerID() string {
+	return fmt.Sprintf("test-container-%d", time.Now().UnixNano())
+}
+
+// waitForProcessList waits for the given process list to show up in the container.
+func waitForProcessList(s *container.Container, expected []*control.Process) error {
+	var got []*control.Process
+	for start := time.Now(); time.Now().Sub(start) < 10*time.Second; {
+		var err error
+		got, err := s.Processes()
+		if err != nil {
+			return fmt.Errorf("error getting process data from container: %v", err)
+		}
+		if procListsEqual(got, expected) {
+			return nil
+		}
+		// Process might not have started, try again...
+		time.Sleep(10 * time.Millisecond)
+	}
+	return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(expected))
+}
+
+// TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle.
+// It verifies after each step that the container can be loaded from disk, and
+// has the correct status.
+func TestLifecycle(t *testing.T) {
+	// The container will just sleep for a long time.  We will kill it before
+	// it finishes sleeping.
+	spec := newSpecWithArgs("sleep", "100")
+
+	rootDir, bundleDir, conf, err := setupContainer(spec)
+	if err != nil {
+		t.Fatalf("error setting up container: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	// expectedPL lists the expected process state of the container.
+	expectedPL := []*control.Process{
+		{
+			UID:  0,
+			PID:  1,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+	}
+	// Create the container.
+	id := uniqueContainerID()
+	if _, err := container.Create(id, spec, conf, bundleDir, "", ""); err != nil {
+		t.Fatalf("error creating container: %v", err)
+	}
+	// Load the container from disk and check the status.
+	s, err := container.Load(rootDir, id)
+	if err != nil {
+		t.Fatalf("error loading container: %v", err)
+	}
+	if got, want := s.Status, container.Created; got != want {
+		t.Errorf("container status got %v, want %v", got, want)
+	}
+
+	// List should return the container id.
+	ids, err := container.List(rootDir)
+	if err != nil {
+		t.Fatalf("error listing containers: %v", err)
+	}
+	if got, want := ids, []string{id}; !reflect.DeepEqual(got, want) {
+		t.Errorf("container list got %v, want %v", got, want)
+	}
+
+	// Start the container.
+	if err := s.Start(conf); err != nil {
+		t.Fatalf("error starting container: %v", err)
+	}
+	// Load the container from disk and check the status.
+	s, err = container.Load(rootDir, id)
+	if err != nil {
+		t.Fatalf("error loading container: %v", err)
+	}
+	if got, want := s.Status, container.Running; got != want {
+		t.Errorf("container status got %v, want %v", got, want)
+	}
+
+	// Verify that "sleep 100" is running.
+	if err := waitForProcessList(s, expectedPL); err != nil {
+		t.Error(err)
+	}
+
+	// Send the container a signal, which we catch and use to cleanly
+	// shutdown.
+	if err := s.Signal(shutdownSignal); err != nil {
+		t.Fatalf("error sending signal %v to container: %v", shutdownSignal, err)
+	}
+	// Wait for it to die.
+	if _, err := s.Wait(); err != nil {
+		t.Fatalf("error waiting on container: %v", err)
+	}
+	// Load the container from disk and check the status.
+	s, err = container.Load(rootDir, id)
+	if err != nil {
+		t.Fatalf("error loading container: %v", err)
+	}
+	if got, want := s.Status, container.Stopped; got != want {
+		t.Errorf("container status got %v, want %v", got, want)
+	}
+
+	// Destroy the container.
+	if err := s.Destroy(); err != nil {
+		t.Fatalf("error destroying container: %v", err)
+	}
+
+	// List should not return the container id.
+	ids, err = container.List(rootDir)
+	if err != nil {
+		t.Fatalf("error listing containers: %v", err)
+	}
+	if len(ids) != 0 {
+		t.Errorf("expected container list to be empty, but got %v", ids)
+	}
+
+	// Loading the container by id should fail.
+	if _, err = container.Load(rootDir, id); err == nil {
+		t.Errorf("expected loading destroyed container to fail, but it did not")
+	}
+}
+
+// Test the we can execute the application with different path formats.
+func TestExePath(t *testing.T) {
+	for _, test := range []struct {
+		path    string
+		success bool
+	}{
+		{path: "true", success: true},
+		{path: "bin/true", success: true},
+		{path: "/bin/true", success: true},
+		{path: "thisfiledoesntexit", success: false},
+		{path: "bin/thisfiledoesntexit", success: false},
+		{path: "/bin/thisfiledoesntexit", success: false},
+	} {
+		spec := newSpecWithArgs(test.path)
+		rootDir, bundleDir, conf, err := setupContainer(spec)
+		if err != nil {
+			t.Fatalf("exec: %s, error setting up container: %v", test.path, err)
+		}
+
+		ws, err := container.Run(uniqueContainerID(), spec, conf, bundleDir, "", "")
+
+		os.RemoveAll(rootDir)
+		os.RemoveAll(bundleDir)
+
+		if test.success {
+			if err != nil {
+				t.Errorf("exec: %s, error running container: %v", test.path, err)
+			}
+			if ws.ExitStatus() != 0 {
+				t.Errorf("exec: %s, got exit status %v want %v", test.path, ws.ExitStatus(), 0)
+			}
+		} else {
+			if err == nil {
+				t.Errorf("exec: %s, got: no error, want: error", test.path)
+			}
+		}
+	}
+}
+
+// Test the we can retrieve the application exit status from the container.
+func TestAppExitStatus(t *testing.T) {
+	// First container will succeed.
+	succSpec := newSpecWithArgs("true")
+
+	rootDir, bundleDir, conf, err := setupContainer(succSpec)
+	if err != nil {
+		t.Fatalf("error setting up container: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	ws, err := container.Run(uniqueContainerID(), succSpec, conf, bundleDir, "", "")
+	if err != nil {
+		t.Fatalf("error running container: %v", err)
+	}
+	if ws.ExitStatus() != 0 {
+		t.Errorf("got exit status %v want %v", ws.ExitStatus(), 0)
+	}
+
+	// Second container exits with non-zero status.
+	wantStatus := 123
+	errSpec := newSpecWithArgs("bash", "-c", fmt.Sprintf("exit %d", wantStatus))
+
+	rootDir2, bundleDir2, conf, err := setupContainer(errSpec)
+	if err != nil {
+		t.Fatalf("error setting up container: %v", err)
+	}
+	defer os.RemoveAll(rootDir2)
+	defer os.RemoveAll(bundleDir2)
+
+	ws, err = container.Run(uniqueContainerID(), succSpec, conf, bundleDir2, "", "")
+	if err != nil {
+		t.Fatalf("error running container: %v", err)
+	}
+	if ws.ExitStatus() != wantStatus {
+		t.Errorf("got exit status %v want %v", ws.ExitStatus(), wantStatus)
+	}
+}
+
+// TestExec verifies that a container can exec a new program.
+func TestExec(t *testing.T) {
+	const uid = 343
+	spec := newSpecWithArgs("sleep", "100")
+
+	rootDir, bundleDir, conf, err := setupContainer(spec)
+	if err != nil {
+		t.Fatalf("error setting up container: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	// Create and start the container.
+	s, err := container.Create(uniqueContainerID(), spec, conf, bundleDir, "", "")
+	if err != nil {
+		t.Fatalf("error creating container: %v", err)
+	}
+	defer s.Destroy()
+	if err := s.Start(conf); err != nil {
+		t.Fatalf("error starting container: %v", err)
+	}
+
+	// expectedPL lists the expected process state of the container.
+	expectedPL := []*control.Process{
+		{
+			UID:  0,
+			PID:  1,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+		{
+			UID:  uid,
+			PID:  2,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+	}
+
+	// Verify that "sleep 100" is running.
+	if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+		t.Error(err)
+	}
+
+	execArgs := control.ExecArgs{
+		Filename:         "/bin/sleep",
+		Argv:             []string{"sleep", "5"},
+		Envv:             []string{"PATH=" + os.Getenv("PATH")},
+		WorkingDirectory: "/",
+		KUID:             uid,
+	}
+
+	// Verify that "sleep 100" and "sleep 5" are running after exec.
+	// First, start running exec (whick blocks).
+	status := make(chan error, 1)
+	go func() {
+		exitStatus, err := s.Execute(&execArgs)
+		if err != nil {
+			status <- err
+		} else if exitStatus != 0 {
+			status <- fmt.Errorf("failed with exit status: %v", exitStatus)
+		} else {
+			status <- nil
+		}
+	}()
+
+	if err := waitForProcessList(s, expectedPL); err != nil {
+		t.Fatal(err)
+	}
+
+	// Ensure that exec finished without error.
+	select {
+	case <-time.After(10 * time.Second):
+		t.Fatalf("container timed out waiting for exec to finish.")
+	case st := <-status:
+		if st != nil {
+			t.Errorf("container failed to exec %v: %v", execArgs, err)
+		}
+	}
+}
+
+// TestCapabilities verifies that:
+// - Running exec as non-root UID and GID will result in an error (because the
+//   executable file can't be read).
+// - Running exec as non-root with CAP_DAC_OVERRIDE succeeds because it skips
+//   this check.
+func TestCapabilities(t *testing.T) {
+	const uid = 343
+	const gid = 2401
+	spec := newSpecWithArgs("sleep", "100")
+
+	// We generate files in the host temporary directory.
+	spec.Mounts = append(spec.Mounts, specs.Mount{
+		Destination: os.TempDir(),
+		Source:      os.TempDir(),
+		Type:        "bind",
+	})
+
+	rootDir, bundleDir, conf, err := setupContainer(spec)
+	if err != nil {
+		t.Fatalf("error setting up container: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	// Create and start the container.
+	s, err := container.Create(uniqueContainerID(), spec, conf, bundleDir, "", "")
+	if err != nil {
+		t.Fatalf("error creating container: %v", err)
+	}
+	defer s.Destroy()
+	if err := s.Start(conf); err != nil {
+		t.Fatalf("error starting container: %v", err)
+	}
+
+	// expectedPL lists the expected process state of the container.
+	expectedPL := []*control.Process{
+		{
+			UID:  0,
+			PID:  1,
+			PPID: 0,
+			C:    0,
+			Cmd:  "sleep",
+		},
+		{
+			UID:  uid,
+			PID:  2,
+			PPID: 0,
+			C:    0,
+			Cmd:  "exe",
+		},
+	}
+	if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+		t.Fatalf("Failed to wait for sleep to start, err: %v", err)
+	}
+
+	// Create an executable that can't be run with the specified UID:GID.
+	// This shouldn't be callable within the container until we add the
+	// CAP_DAC_OVERRIDE capability to skip the access check.
+	exePath := filepath.Join(rootDir, "exe")
+	if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil {
+		t.Fatalf("couldn't create executable: %v", err)
+	}
+	defer os.Remove(exePath)
+
+	// Need to traverse the intermediate directory.
+	os.Chmod(rootDir, 0755)
+
+	execArgs := control.ExecArgs{
+		Filename:         exePath,
+		Argv:             []string{exePath},
+		Envv:             []string{"PATH=" + os.Getenv("PATH")},
+		WorkingDirectory: "/",
+		KUID:             uid,
+		KGID:             gid,
+		Capabilities:     &auth.TaskCapabilities{},
+	}
+
+	// "exe" should fail because we don't have the necessary permissions.
+	if _, err := s.Execute(&execArgs); err == nil {
+		t.Fatalf("container executed without error, but an error was expected")
+	}
+
+	// Now we run with the capability enabled and should succeed.
+	execArgs.Capabilities = &auth.TaskCapabilities{
+		EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
+	}
+	// "exe" should not fail this time.
+	if _, err := s.Execute(&execArgs); err != nil {
+		t.Fatalf("container failed to exec %v: %v", execArgs, err)
+	}
+}
+
+// Test that an tty FD is sent over the console socket if one is provided.
+func TestConsoleSocket(t *testing.T) {
+	spec := newSpecWithArgs("true")
+	rootDir, bundleDir, conf, err := setupContainer(spec)
+	if err != nil {
+		t.Fatalf("error setting up container: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	// Create a named socket and start listening.  We use a relative path
+	// to avoid overflowing the unix path length limit (108 chars).
+	socketPath := filepath.Join(bundleDir, "socket")
+	cwd, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("error getting cwd: %v", err)
+	}
+	socketRelPath, err := filepath.Rel(cwd, socketPath)
+	if err != nil {
+		t.Fatalf("error getting relative path for %q from cwd %q: %v", socketPath, cwd, err)
+	}
+	if len(socketRelPath) > len(socketPath) {
+		socketRelPath = socketPath
+	}
+	srv, err := unet.BindAndListen(socketRelPath, false)
+	if err != nil {
+		t.Fatalf("error binding and listening to socket %q: %v", socketPath, err)
+	}
+	defer os.Remove(socketPath)
+
+	// Create the container and pass the socket name.
+	id := uniqueContainerID()
+	s, err := container.Create(id, spec, conf, bundleDir, socketRelPath, "")
+	if err != nil {
+		t.Fatalf("error creating container: %v", err)
+	}
+
+	// Open the othe end of the socket.
+	sock, err := srv.Accept()
+	if err != nil {
+		t.Fatalf("error accepting socket connection: %v", err)
+	}
+
+	// Allow 3 fds to be received.  We only expect 1.
+	r := sock.Reader(true /* blocking */)
+	r.EnableFDs(1)
+
+	// The socket is closed right after sending the FD, so EOF is
+	// an allowed error.
+	b := [][]byte{{}}
+	if _, err := r.ReadVec(b); err != nil && err != io.EOF {
+		t.Fatalf("error reading from socket connection: %v", err)
+	}
+
+	// We should have gotten a control message.
+	fds, err := r.ExtractFDs()
+	if err != nil {
+		t.Fatalf("error extracting fds from socket connection: %v", err)
+	}
+	if len(fds) != 1 {
+		t.Fatalf("got %d fds from socket, wanted 1", len(fds))
+	}
+
+	// Verify that the fd is a terminal.
+	if _, err := unix.IoctlGetTermios(fds[0], unix.TCGETS); err != nil {
+		t.Errorf("fd is not a terminal (ioctl TGGETS got %v)", err)
+	}
+
+	// Shut it down.
+	if err := s.Destroy(); err != nil {
+		t.Fatalf("error destroying container: %v", err)
+	}
+
+	// Close socket.
+	if err := srv.Close(); err != nil {
+		t.Fatalf("error destroying container: %v", err)
+	}
+}
+
+func TestSpecUnsupported(t *testing.T) {
+	spec := newSpecWithArgs("/bin/true")
+	spec.Process.SelinuxLabel = "somelabel"
+
+	// These are normally set by docker and will just cause warnings to be logged.
+	spec.Process.ApparmorProfile = "someprofile"
+	spec.Linux = &specs.Linux{Seccomp: &specs.LinuxSeccomp{}}
+
+	rootDir, bundleDir, conf, err := setupContainer(spec)
+	if err != nil {
+		t.Fatalf("error setting up container: %v", err)
+	}
+	defer os.RemoveAll(rootDir)
+	defer os.RemoveAll(bundleDir)
+
+	id := uniqueContainerID()
+	_, err = container.Create(id, spec, conf, bundleDir, "", "")
+	if err == nil || !strings.Contains(err.Error(), "is not supported") {
+		t.Errorf("container.Create() wrong error, got: %v, want: *is not supported, spec.Process: %+v", err, spec.Process)
+	}
+}
+
+// procListsEqual is used to check whether 2 Process lists are equal for all
+// implemented fields.
+func procListsEqual(got, want []*control.Process) bool {
+	if len(got) != len(want) {
+		return false
+	}
+	for i := range got {
+		pd1 := got[i]
+		pd2 := want[i]
+		// Zero out unimplemented and timing dependant fields.
+		pd1.Time, pd2.Time = "", ""
+		pd1.STime, pd2.STime = "", ""
+		pd1.C, pd2.C = 0, 0
+		if *pd1 != *pd2 {
+			return false
+		}
+	}
+	return true
+}
+
+func procListToString(pl []*control.Process) string {
+	strs := make([]string, 0, len(pl))
+	for _, p := range pl {
+		strs = append(strs, fmt.Sprintf("%+v", p))
+	}
+	return fmt.Sprintf("[%s]", strings.Join(strs, ","))
+}
+
+// TestMain acts like runsc if it is called with the "boot" argument, otherwise
+// it just runs the tests.  This is required because creating a container will
+// call "/proc/self/exe boot".  Normally /proc/self/exe is the runsc binary,
+// but for tests we have to fake it.
+func TestMain(m *testing.M) {
+	// exit writes coverage data before exiting.
+	exit := func(status int) {
+		os.Exit(status)
+	}
+
+	if !flag.Parsed() {
+		flag.Parse()
+	}
+
+	// If we are passed one of the commands then run it.
+	subcommands.Register(new(cmd.Boot), "boot")
+	subcommands.Register(new(cmd.Gofer), "gofer")
+	switch flag.Arg(0) {
+	case "boot", "gofer":
+		// Run the command in a goroutine so we can block the main
+		// thread waiting for shutdownSignal.
+		go func() {
+			conf := &boot.Config{
+				RootDir: "unused-root-dir",
+				Network: boot.NetworkNone,
+			}
+			var ws syscall.WaitStatus
+			subcmdCode := subcommands.Execute(context.Background(), conf, &ws)
+			if subcmdCode != subcommands.ExitSuccess {
+				panic(fmt.Sprintf("command failed to execute, err: %v", subcmdCode))
+			}
+			// Container exited normally. Shut down this process.
+			os.Exit(ws.ExitStatus())
+		}()
+
+		// Shutdown cleanly when the shutdownSignal is received.  This
+		// allows us to write coverage data before exiting.
+		sigc := make(chan os.Signal, 1)
+		signal.Notify(sigc, shutdownSignal)
+		<-sigc
+		exit(0)
+	default:
+		// Otherwise run the tests.
+		exit(m.Run())
+	}
+}
diff --git a/runsc/container/hook.go b/runsc/container/hook.go
new file mode 100644
index 000000000..3d93ca0be
--- /dev/null
+++ b/runsc/container/hook.go
@@ -0,0 +1,111 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+// This file implements hooks as defined in OCI spec:
+// https://github.com/opencontainers/runtime-spec/blob/master/config.md#toc22
+//
+// "hooks":{
+// 		"prestart":[{
+// 			"path":"/usr/bin/dockerd",
+// 			"args":[
+// 				"libnetwork-setkey", "arg2",
+// 			]
+// 		}]
+// },
+
+// executeHooksBestEffort executes hooks and logs warning in case they fail.
+// Runs all hooks, always.
+func executeHooksBestEffort(hooks []specs.Hook, s specs.State) {
+	for _, h := range hooks {
+		if err := executeHook(h, s); err != nil {
+			log.Warningf("Failure to execute hook %+v, err: %v", h, err)
+		}
+	}
+}
+
+// executeHooks executes hooks until the first one fails or they all execute.
+func executeHooks(hooks []specs.Hook, s specs.State) error {
+	for _, h := range hooks {
+		if err := executeHook(h, s); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func executeHook(h specs.Hook, s specs.State) error {
+	log.Debugf("Executing hook %+v, state: %+v", h, s)
+
+	if strings.TrimSpace(h.Path) == "" {
+		return fmt.Errorf("empty path for hook")
+	}
+	if !filepath.IsAbs(h.Path) {
+		return fmt.Errorf("path for hook is not absolute: %q", h.Path)
+	}
+
+	b, err := json.Marshal(s)
+	if err != nil {
+		return err
+	}
+	var stdout, stderr bytes.Buffer
+	cmd := exec.Cmd{
+		Path:   h.Path,
+		Args:   h.Args,
+		Env:    h.Env,
+		Stdin:  bytes.NewReader(b),
+		Stdout: &stdout,
+		Stderr: &stderr,
+	}
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+
+	c := make(chan error, 1)
+	go func() {
+		c <- cmd.Wait()
+	}()
+
+	var timer <-chan time.Time
+	if h.Timeout != nil {
+		timer = time.After(time.Duration(*h.Timeout) * time.Second)
+	}
+	select {
+	case err := <-c:
+		if err != nil {
+			return fmt.Errorf("failure executing hook %q, err: %v\nstdout: %s\nstderr: %s", h.Path, err, stdout.String(), stderr.String())
+		}
+	case <-timer:
+		cmd.Process.Kill()
+		cmd.Wait()
+		return fmt.Errorf("timeout executing hook %q\nstdout: %s\nstderr: %s", h.Path, stdout.String(), stderr.String())
+	}
+
+	log.Debugf("Execute hook %q success!", h.Path)
+	return nil
+}
diff --git a/runsc/container/status.go b/runsc/container/status.go
new file mode 100644
index 000000000..8da1b4e89
--- /dev/null
+++ b/runsc/container/status.go
@@ -0,0 +1,54 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container
+
+// Status enumerates container statuses. The statuses and their semantics are
+// part of the runtime CLI spec.
+type Status int
+
+const (
+	// Creating indicates "the container is being created".
+	Creating Status = iota
+
+	// Created indicates "the runtime has finished the create operation and
+	// the container process has neither exited nor executed the
+	// user-specified program".
+	Created
+
+	// Running indicates "the container process has executed the
+	// user-specified program but has not exited".
+	Running
+
+	// Stopped indicates "the container process has exited".
+	Stopped
+)
+
+// String converts a Status to a string. These strings are part of the runtime
+// CLI spec and should not be changed.
+func (s Status) String() string {
+	switch s {
+	case Creating:
+		return "creating"
+	case Created:
+		return "created"
+	case Running:
+		return "running"
+	case Stopped:
+		return "stopped"
+	default:
+		return "unknown"
+	}
+
+}