summaryrefslogtreecommitdiffhomepage
path: root/runsc/container
diff options
context:
space:
mode:
authorNicolas Lacasse <nlacasse@google.com>2018-05-15 10:17:19 -0700
committerShentubot <shentubot@google.com>2018-05-15 10:18:03 -0700
commit205f1027e6beb84101439172b3c776c2671b5be8 (patch)
tree10294e667ee529e140c474c475e7309cb72ea1d8 /runsc/container
parented02ac4f668ec41063cd51cbbd451baba9e9a6e7 (diff)
Refactor the Sandbox package into Sandbox + Container.
This is a necessary prerequisite for supporting multiple containers in a single sandbox. All the commands (in cmd package) now call operations on Containers (container package). When a Container first starts, it will create a Sandbox with the same ID. The Sandbox class is now simpler, as it only knows how to create boot/gofer processes, and how to forward commands into the running boot process. There are TODOs sprinkled around for additional support for multiple containers. Most notably, we need to detect when a container is intended to run in an existing sandbox (by reading the metadata), and then have some way to signal to the sandbox to start a new container. Other urpc calls into the sandbox need to pass the container ID, so the sandbox can run the operation on the given container. These are only half-plummed through right now. PiperOrigin-RevId: 196688269 Change-Id: I1ecf4abbb9dd8987a53ae509df19341aaf42b5b0
Diffstat (limited to 'runsc/container')
-rw-r--r--runsc/container/BUILD45
-rw-r--r--runsc/container/container.go380
-rw-r--r--runsc/container/container_test.go669
-rw-r--r--runsc/container/hook.go111
-rw-r--r--runsc/container/status.go54
5 files changed, 1259 insertions, 0 deletions
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
new file mode 100644
index 000000000..c558b4b0a
--- /dev/null
+++ b/runsc/container/BUILD
@@ -0,0 +1,45 @@
+package(licenses = ["notice"]) # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "container",
+ srcs = [
+ "container.go",
+ "hook.go",
+ "status.go",
+ ],
+ importpath = "gvisor.googlesource.com/gvisor/runsc/container",
+ visibility = [
+ "//runsc:__subpackages__",
+ ],
+ deps = [
+ "//pkg/log",
+ "//pkg/sentry/control",
+ "//runsc/boot",
+ "//runsc/sandbox",
+ "//runsc/specutils",
+ "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ ],
+)
+
+go_test(
+ name = "container_test",
+ size = "small",
+ srcs = ["container_test.go"],
+ pure = "on",
+ rundir = ".",
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/log",
+ "//pkg/sentry/control",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/unet",
+ "//runsc/boot",
+ "//runsc/cmd",
+ "//runsc/container",
+ "@com_github_google_subcommands//:go_default_library",
+ "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
diff --git a/runsc/container/container.go b/runsc/container/container.go
new file mode 100644
index 000000000..97115cd6b
--- /dev/null
+++ b/runsc/container/container.go
@@ -0,0 +1,380 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package container creates and manipulates containers.
+package container
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strconv"
+ "syscall"
+ "time"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/control"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/sandbox"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// metadataFilename is the name of the metadata file relative to the container
+// root directory that holds sandbox metadata.
+const metadataFilename = "meta.json"
+
+// validateID validates the container id.
+func validateID(id string) error {
+ // See libcontainer/factory_linux.go.
+ idRegex := regexp.MustCompile(`^[\w+-\.]+$`)
+ if !idRegex.MatchString(id) {
+ return fmt.Errorf("invalid container id: %v", id)
+ }
+ return nil
+}
+
+// Container represents a containerized application. When running, the
+// container is associated with a single Sandbox.
+//
+// Container metadata can be saved and loaded to disk. Within a root directory,
+// we maintain subdirectories for each container named with the container id.
+// The container metadata is is stored as json within the container directory
+// in a file named "meta.json". This metadata format is defined by us, and is
+// not part of the OCI spec.
+//
+// Containers must write their metadata file after any change to their internal
+// state. The entire container directory is deleted when the container is
+// destroyed.
+type Container struct {
+ // ID is the container ID.
+ ID string `json:"id"`
+
+ // Spec is the OCI runtime spec that configures this container.
+ Spec *specs.Spec `json:"spec"`
+
+ // BundleDir is the directory containing the container bundle.
+ BundleDir string `json:"bundleDir"`
+
+ // Root is the directory containing the container metadata file.
+ Root string `json:"root"`
+
+ // CreatedAt is the time the container was created.
+ CreatedAt time.Time `json:"createdAt"`
+
+ // Owner is the container owner.
+ Owner string `json:"owner"`
+
+ // ConsoleSocket is the path to a unix domain socket that will receive
+ // the console FD. It is only used during create, so we don't need to
+ // store it in the metadata.
+ ConsoleSocket string `json:"-"`
+
+ // Status is the current container Status.
+ Status Status `json:"status"`
+
+ // Sandbox is the sandbox this container is running in. It will be nil
+ // if the container is not in state Running or Created.
+ Sandbox *sandbox.Sandbox `json:"sandbox"`
+}
+
+// Load loads a container with the given id from a metadata file.
+func Load(rootDir, id string) (*Container, error) {
+ log.Debugf("Load container %q %q", rootDir, id)
+ if err := validateID(id); err != nil {
+ return nil, err
+ }
+ cRoot := filepath.Join(rootDir, id)
+ if !exists(cRoot) {
+ return nil, fmt.Errorf("container with id %q does not exist", id)
+ }
+ metaFile := filepath.Join(cRoot, metadataFilename)
+ if !exists(metaFile) {
+ return nil, fmt.Errorf("container with id %q does not have metadata file %q", id, metaFile)
+ }
+ metaBytes, err := ioutil.ReadFile(metaFile)
+ if err != nil {
+ return nil, fmt.Errorf("error reading container metadata file %q: %v", metaFile, err)
+ }
+ var c Container
+ if err := json.Unmarshal(metaBytes, &c); err != nil {
+ return nil, fmt.Errorf("error unmarshaling container metadata from %q: %v", metaFile, err)
+ }
+
+ // If the status is "Running" or "Created", check that the sandbox
+ // process still exists, and set it to Stopped if it does not.
+ //
+ // This is inherently racey.
+ if c.Status == Running || c.Status == Created {
+ // Send signal 0 to check if container still exists.
+ if err := c.Signal(0); err != nil {
+ // Container no longer exists.
+ c.Status = Stopped
+ c.Sandbox = nil
+ }
+ }
+
+ return &c, nil
+}
+
+// List returns all container ids in the given root directory.
+func List(rootDir string) ([]string, error) {
+ log.Debugf("List containers %q", rootDir)
+ fs, err := ioutil.ReadDir(rootDir)
+ if err != nil {
+ return nil, fmt.Errorf("ReadDir(%s) failed: %v", rootDir, err)
+ }
+ var out []string
+ for _, f := range fs {
+ out = append(out, f.Name())
+ }
+ return out, nil
+}
+
+// Create creates the container in a new Sandbox process, unless the metadata
+// indicates that an existing Sandbox should be used.
+func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile string) (*Container, error) {
+ log.Debugf("Create container %q in root dir: %s", id, conf.RootDir)
+ if err := validateID(id); err != nil {
+ return nil, err
+ }
+ if err := specutils.ValidateSpec(spec); err != nil {
+ return nil, err
+ }
+
+ containerRoot := filepath.Join(conf.RootDir, id)
+ if exists(containerRoot) {
+ return nil, fmt.Errorf("container with id %q already exists: %q ", id, containerRoot)
+ }
+
+ c := &Container{
+ ID: id,
+ Spec: spec,
+ ConsoleSocket: consoleSocket,
+ BundleDir: bundleDir,
+ Root: containerRoot,
+ Status: Creating,
+ Owner: os.Getenv("USER"),
+ }
+
+ // TODO: If the metadata annotations indicates that this
+ // container should be started in another sandbox, we must do so. The
+ // metadata will indicate the ID of the sandbox, which is the same as
+ // the ID of the init container in the sandbox. We can look up that
+ // init container by ID to get the sandbox, then we need to expose a
+ // way to run a new container in the sandbox.
+
+ // Start a new sandbox for this container. Any errors after this point
+ // must destroy the container.
+ s, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket)
+ if err != nil {
+ c.Destroy()
+ return nil, err
+ }
+
+ c.Sandbox = s
+ c.Status = Created
+
+ // Save the metadata file.
+ if err := c.save(); err != nil {
+ c.Destroy()
+ return nil, err
+ }
+
+ // Write the pid file. Containerd considers the create complete after
+ // this file is created, so it must be the last thing we do.
+ if pidFile != "" {
+ if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(c.Pid())), 0644); err != nil {
+ s.Destroy()
+ return nil, fmt.Errorf("error writing pid file: %v", err)
+ }
+ }
+
+ return c, nil
+}
+
+// Start starts running the containerized process inside the sandbox.
+func (c *Container) Start(conf *boot.Config) error {
+ log.Debugf("Start container %q", c.ID)
+ if c.Status != Created {
+ return fmt.Errorf("cannot start container in state %s", c.Status)
+ }
+
+ // "If any prestart hook fails, the runtime MUST generate an error,
+ // stop and destroy the container".
+ if c.Spec.Hooks != nil {
+ if err := executeHooks(c.Spec.Hooks.Prestart, c.State()); err != nil {
+ c.Destroy()
+ return err
+ }
+ }
+
+ if err := c.Sandbox.Start(c.ID, c.Spec, conf); err != nil {
+ c.Destroy()
+ return err
+ }
+
+ // "If any poststart hook fails, the runtime MUST log a warning, but
+ // the remaining hooks and lifecycle continue as if the hook had
+ // succeeded".
+ if c.Spec.Hooks != nil {
+ executeHooksBestEffort(c.Spec.Hooks.Poststart, c.State())
+ }
+
+ c.Status = Running
+ return c.save()
+}
+
+// Run is a helper that calls Create + Start + Wait.
+func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile string) (syscall.WaitStatus, error) {
+ log.Debugf("Run container %q in root dir: %s", id, conf.RootDir)
+ c, err := Create(id, spec, conf, bundleDir, consoleSocket, pidFile)
+ if err != nil {
+ return 0, fmt.Errorf("error creating container: %v", err)
+ }
+ if err := c.Start(conf); err != nil {
+ return 0, fmt.Errorf("error starting container: %v", err)
+ }
+ return c.Wait()
+}
+
+// Execute runs the specified command in the container.
+func (c *Container) Execute(e *control.ExecArgs) (syscall.WaitStatus, error) {
+ log.Debugf("Execute in container %q, args: %+v", c.ID, e)
+ if c.Status != Created && c.Status != Running {
+ return 0, fmt.Errorf("cannot exec in container in state %s", c.Status)
+ }
+ return c.Sandbox.Execute(c.ID, e)
+}
+
+// Event returns events for the container.
+func (c *Container) Event() (*boot.Event, error) {
+ log.Debugf("Getting events for container %q", c.ID)
+ if c.Status != Running && c.Status != Created {
+ return nil, fmt.Errorf("cannot get events for container in state: %s", c.Status)
+ }
+ return c.Sandbox.Event(c.ID)
+}
+
+// Pid returns the Pid of the sandbox the container is running in, or -1 if the
+// container is not running.
+func (c *Container) Pid() int {
+ if c.Status != Running && c.Status != Created {
+ return -1
+ }
+ return c.Sandbox.Pid
+}
+
+// Wait waits for the container to exit, and returns its WaitStatus.
+func (c *Container) Wait() (syscall.WaitStatus, error) {
+ log.Debugf("Wait on container %q", c.ID)
+ return c.Sandbox.Wait(c.ID)
+}
+
+// Signal sends the signal to the container.
+func (c *Container) Signal(sig syscall.Signal) error {
+ log.Debugf("Signal container %q", c.ID)
+ if c.Status == Stopped {
+ log.Warningf("container %q not running, not sending signal %v", c.ID, sig)
+ return nil
+ }
+ return c.Sandbox.Signal(c.ID, sig)
+}
+
+// State returns the metadata of the container.
+func (c *Container) State() specs.State {
+ return specs.State{
+ Version: specs.Version,
+ ID: c.ID,
+ Status: c.Status.String(),
+ Pid: c.Pid(),
+ Bundle: c.BundleDir,
+ }
+}
+
+// Processes retrieves the list of processes and associated metadata inside a
+// container.
+func (c *Container) Processes() ([]*control.Process, error) {
+ if c.Status != Running {
+ return nil, fmt.Errorf("cannot get processes of container %q because it isn't running. It is in state %v", c.ID, c.Status)
+ }
+ return c.Sandbox.Processes(c.ID)
+}
+
+// Destroy frees all resources associated with the container.
+func (c *Container) Destroy() error {
+ log.Debugf("Destroy container %q", c.ID)
+
+ // First stop the container.
+ if err := c.Sandbox.Stop(c.ID); err != nil {
+ return err
+ }
+
+ // Then destroy all the metadata.
+ if err := os.RemoveAll(c.Root); err != nil {
+ log.Warningf("Failed to delete container root directory %q, err: %v", c.Root, err)
+ }
+
+ // "If any poststop hook fails, the runtime MUST log a warning, but the
+ // remaining hooks and lifecycle continue as if the hook had succeeded".
+ if c.Spec.Hooks != nil && (c.Status == Created || c.Status == Running) {
+ executeHooksBestEffort(c.Spec.Hooks.Poststop, c.State())
+ }
+
+ if err := os.RemoveAll(c.Root); err != nil {
+ log.Warningf("Failed to delete container root directory %q, err: %v", c.Root, err)
+ }
+
+ // If we are the first container in the sandbox, take the sandbox down
+ // as well.
+ if c.Sandbox != nil && c.Sandbox.ID == c.ID {
+ if err := c.Sandbox.Destroy(); err != nil {
+ log.Warningf("Failed to destroy sandbox %q: %v", c.Sandbox.ID, err)
+ }
+ }
+
+ c.Sandbox = nil
+ c.Status = Stopped
+ return nil
+}
+
+// save saves the container metadata to a file.
+func (c *Container) save() error {
+ log.Debugf("Save container %q", c.ID)
+ if err := os.MkdirAll(c.Root, 0711); err != nil {
+ return fmt.Errorf("error creating container root directory %q: %v", c.Root, err)
+ }
+ meta, err := json.Marshal(c)
+ if err != nil {
+ return fmt.Errorf("error marshaling container metadata: %v", err)
+ }
+ metaFile := filepath.Join(c.Root, metadataFilename)
+ if err := ioutil.WriteFile(metaFile, meta, 0640); err != nil {
+ return fmt.Errorf("error writing container metadata: %v", err)
+ }
+ return nil
+}
+
+// exists returns true if the given file exists.
+func exists(f string) bool {
+ if _, err := os.Stat(f); err == nil {
+ return true
+ } else if !os.IsNotExist(err) {
+ log.Warningf("error checking for file %q: %v", f, err)
+ }
+ return false
+}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
new file mode 100644
index 000000000..67efd2f9e
--- /dev/null
+++ b/runsc/container/container_test.go
@@ -0,0 +1,669 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container_test
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "os/signal"
+ "path/filepath"
+ "reflect"
+ "strings"
+ "syscall"
+ "testing"
+ "time"
+
+ "context"
+ "flag"
+ "github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/control"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/cmd"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+func init() {
+ log.SetLevel(log.Debug)
+}
+
+// writeSpec writes the spec to disk in the given directory.
+func writeSpec(dir string, spec *specs.Spec) error {
+ b, err := json.Marshal(spec)
+ if err != nil {
+ return err
+ }
+ return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755)
+}
+
+// newSpecWithArgs creates a simple spec with the given args suitable for use
+// in tests.
+func newSpecWithArgs(args ...string) *specs.Spec {
+ spec := &specs.Spec{
+ // The host filesystem root is the container root.
+ Root: &specs.Root{
+ Path: "/",
+ Readonly: true,
+ },
+ Process: &specs.Process{
+ Args: args,
+ Env: []string{
+ "PATH=" + os.Getenv("PATH"),
+ },
+ },
+ }
+ return spec
+}
+
+// shutdownSignal will be sent to the sandbox in order to shut down cleanly.
+const shutdownSignal = syscall.SIGUSR2
+
+// setupContainer creates a bundle and root dir for the container, generates a
+// test config, and writes the spec to config.json in the bundle dir.
+func setupContainer(spec *specs.Spec) (rootDir, bundleDir string, conf *boot.Config, err error) {
+ rootDir, err = ioutil.TempDir("", "containers")
+ if err != nil {
+ return "", "", nil, fmt.Errorf("error creating root dir: %v", err)
+ }
+
+ bundleDir, err = ioutil.TempDir("", "bundle")
+ if err != nil {
+ return "", "", nil, fmt.Errorf("error creating bundle dir: %v", err)
+ }
+
+ if err = writeSpec(bundleDir, spec); err != nil {
+ return "", "", nil, fmt.Errorf("error writing spec: %v", err)
+ }
+
+ conf = &boot.Config{
+ RootDir: rootDir,
+ Network: boot.NetworkNone,
+ // Don't add flags when calling subprocesses, since the test
+ // runner does not know about all the flags. We control the
+ // Config in the subprocess anyways, so it does not matter.
+ TestModeNoFlags: true,
+ }
+
+ return rootDir, bundleDir, conf, nil
+}
+
+// uniqueContainerID generates a unique container id for each test.
+//
+// The container id is used to create an abstract unix domain socket, which must
+// be unique. While the container forbids creating two containers with the same
+// name, sometimes between test runs the socket does not get cleaned up quickly
+// enough, causing container creation to fail.
+func uniqueContainerID() string {
+ return fmt.Sprintf("test-container-%d", time.Now().UnixNano())
+}
+
+// waitForProcessList waits for the given process list to show up in the container.
+func waitForProcessList(s *container.Container, expected []*control.Process) error {
+ var got []*control.Process
+ for start := time.Now(); time.Now().Sub(start) < 10*time.Second; {
+ var err error
+ got, err := s.Processes()
+ if err != nil {
+ return fmt.Errorf("error getting process data from container: %v", err)
+ }
+ if procListsEqual(got, expected) {
+ return nil
+ }
+ // Process might not have started, try again...
+ time.Sleep(10 * time.Millisecond)
+ }
+ return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(expected))
+}
+
+// TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle.
+// It verifies after each step that the container can be loaded from disk, and
+// has the correct status.
+func TestLifecycle(t *testing.T) {
+ // The container will just sleep for a long time. We will kill it before
+ // it finishes sleeping.
+ spec := newSpecWithArgs("sleep", "100")
+
+ rootDir, bundleDir, conf, err := setupContainer(spec)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // expectedPL lists the expected process state of the container.
+ expectedPL := []*control.Process{
+ {
+ UID: 0,
+ PID: 1,
+ PPID: 0,
+ C: 0,
+ Cmd: "sleep",
+ },
+ }
+ // Create the container.
+ id := uniqueContainerID()
+ if _, err := container.Create(id, spec, conf, bundleDir, "", ""); err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ // Load the container from disk and check the status.
+ s, err := container.Load(rootDir, id)
+ if err != nil {
+ t.Fatalf("error loading container: %v", err)
+ }
+ if got, want := s.Status, container.Created; got != want {
+ t.Errorf("container status got %v, want %v", got, want)
+ }
+
+ // List should return the container id.
+ ids, err := container.List(rootDir)
+ if err != nil {
+ t.Fatalf("error listing containers: %v", err)
+ }
+ if got, want := ids, []string{id}; !reflect.DeepEqual(got, want) {
+ t.Errorf("container list got %v, want %v", got, want)
+ }
+
+ // Start the container.
+ if err := s.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
+ // Load the container from disk and check the status.
+ s, err = container.Load(rootDir, id)
+ if err != nil {
+ t.Fatalf("error loading container: %v", err)
+ }
+ if got, want := s.Status, container.Running; got != want {
+ t.Errorf("container status got %v, want %v", got, want)
+ }
+
+ // Verify that "sleep 100" is running.
+ if err := waitForProcessList(s, expectedPL); err != nil {
+ t.Error(err)
+ }
+
+ // Send the container a signal, which we catch and use to cleanly
+ // shutdown.
+ if err := s.Signal(shutdownSignal); err != nil {
+ t.Fatalf("error sending signal %v to container: %v", shutdownSignal, err)
+ }
+ // Wait for it to die.
+ if _, err := s.Wait(); err != nil {
+ t.Fatalf("error waiting on container: %v", err)
+ }
+ // Load the container from disk and check the status.
+ s, err = container.Load(rootDir, id)
+ if err != nil {
+ t.Fatalf("error loading container: %v", err)
+ }
+ if got, want := s.Status, container.Stopped; got != want {
+ t.Errorf("container status got %v, want %v", got, want)
+ }
+
+ // Destroy the container.
+ if err := s.Destroy(); err != nil {
+ t.Fatalf("error destroying container: %v", err)
+ }
+
+ // List should not return the container id.
+ ids, err = container.List(rootDir)
+ if err != nil {
+ t.Fatalf("error listing containers: %v", err)
+ }
+ if len(ids) != 0 {
+ t.Errorf("expected container list to be empty, but got %v", ids)
+ }
+
+ // Loading the container by id should fail.
+ if _, err = container.Load(rootDir, id); err == nil {
+ t.Errorf("expected loading destroyed container to fail, but it did not")
+ }
+}
+
+// Test the we can execute the application with different path formats.
+func TestExePath(t *testing.T) {
+ for _, test := range []struct {
+ path string
+ success bool
+ }{
+ {path: "true", success: true},
+ {path: "bin/true", success: true},
+ {path: "/bin/true", success: true},
+ {path: "thisfiledoesntexit", success: false},
+ {path: "bin/thisfiledoesntexit", success: false},
+ {path: "/bin/thisfiledoesntexit", success: false},
+ } {
+ spec := newSpecWithArgs(test.path)
+ rootDir, bundleDir, conf, err := setupContainer(spec)
+ if err != nil {
+ t.Fatalf("exec: %s, error setting up container: %v", test.path, err)
+ }
+
+ ws, err := container.Run(uniqueContainerID(), spec, conf, bundleDir, "", "")
+
+ os.RemoveAll(rootDir)
+ os.RemoveAll(bundleDir)
+
+ if test.success {
+ if err != nil {
+ t.Errorf("exec: %s, error running container: %v", test.path, err)
+ }
+ if ws.ExitStatus() != 0 {
+ t.Errorf("exec: %s, got exit status %v want %v", test.path, ws.ExitStatus(), 0)
+ }
+ } else {
+ if err == nil {
+ t.Errorf("exec: %s, got: no error, want: error", test.path)
+ }
+ }
+ }
+}
+
+// Test the we can retrieve the application exit status from the container.
+func TestAppExitStatus(t *testing.T) {
+ // First container will succeed.
+ succSpec := newSpecWithArgs("true")
+
+ rootDir, bundleDir, conf, err := setupContainer(succSpec)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ ws, err := container.Run(uniqueContainerID(), succSpec, conf, bundleDir, "", "")
+ if err != nil {
+ t.Fatalf("error running container: %v", err)
+ }
+ if ws.ExitStatus() != 0 {
+ t.Errorf("got exit status %v want %v", ws.ExitStatus(), 0)
+ }
+
+ // Second container exits with non-zero status.
+ wantStatus := 123
+ errSpec := newSpecWithArgs("bash", "-c", fmt.Sprintf("exit %d", wantStatus))
+
+ rootDir2, bundleDir2, conf, err := setupContainer(errSpec)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir2)
+ defer os.RemoveAll(bundleDir2)
+
+ ws, err = container.Run(uniqueContainerID(), succSpec, conf, bundleDir2, "", "")
+ if err != nil {
+ t.Fatalf("error running container: %v", err)
+ }
+ if ws.ExitStatus() != wantStatus {
+ t.Errorf("got exit status %v want %v", ws.ExitStatus(), wantStatus)
+ }
+}
+
+// TestExec verifies that a container can exec a new program.
+func TestExec(t *testing.T) {
+ const uid = 343
+ spec := newSpecWithArgs("sleep", "100")
+
+ rootDir, bundleDir, conf, err := setupContainer(spec)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // Create and start the container.
+ s, err := container.Create(uniqueContainerID(), spec, conf, bundleDir, "", "")
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer s.Destroy()
+ if err := s.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
+
+ // expectedPL lists the expected process state of the container.
+ expectedPL := []*control.Process{
+ {
+ UID: 0,
+ PID: 1,
+ PPID: 0,
+ C: 0,
+ Cmd: "sleep",
+ },
+ {
+ UID: uid,
+ PID: 2,
+ PPID: 0,
+ C: 0,
+ Cmd: "sleep",
+ },
+ }
+
+ // Verify that "sleep 100" is running.
+ if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+ t.Error(err)
+ }
+
+ execArgs := control.ExecArgs{
+ Filename: "/bin/sleep",
+ Argv: []string{"sleep", "5"},
+ Envv: []string{"PATH=" + os.Getenv("PATH")},
+ WorkingDirectory: "/",
+ KUID: uid,
+ }
+
+ // Verify that "sleep 100" and "sleep 5" are running after exec.
+ // First, start running exec (whick blocks).
+ status := make(chan error, 1)
+ go func() {
+ exitStatus, err := s.Execute(&execArgs)
+ if err != nil {
+ status <- err
+ } else if exitStatus != 0 {
+ status <- fmt.Errorf("failed with exit status: %v", exitStatus)
+ } else {
+ status <- nil
+ }
+ }()
+
+ if err := waitForProcessList(s, expectedPL); err != nil {
+ t.Fatal(err)
+ }
+
+ // Ensure that exec finished without error.
+ select {
+ case <-time.After(10 * time.Second):
+ t.Fatalf("container timed out waiting for exec to finish.")
+ case st := <-status:
+ if st != nil {
+ t.Errorf("container failed to exec %v: %v", execArgs, err)
+ }
+ }
+}
+
+// TestCapabilities verifies that:
+// - Running exec as non-root UID and GID will result in an error (because the
+// executable file can't be read).
+// - Running exec as non-root with CAP_DAC_OVERRIDE succeeds because it skips
+// this check.
+func TestCapabilities(t *testing.T) {
+ const uid = 343
+ const gid = 2401
+ spec := newSpecWithArgs("sleep", "100")
+
+ // We generate files in the host temporary directory.
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Destination: os.TempDir(),
+ Source: os.TempDir(),
+ Type: "bind",
+ })
+
+ rootDir, bundleDir, conf, err := setupContainer(spec)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // Create and start the container.
+ s, err := container.Create(uniqueContainerID(), spec, conf, bundleDir, "", "")
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+ defer s.Destroy()
+ if err := s.Start(conf); err != nil {
+ t.Fatalf("error starting container: %v", err)
+ }
+
+ // expectedPL lists the expected process state of the container.
+ expectedPL := []*control.Process{
+ {
+ UID: 0,
+ PID: 1,
+ PPID: 0,
+ C: 0,
+ Cmd: "sleep",
+ },
+ {
+ UID: uid,
+ PID: 2,
+ PPID: 0,
+ C: 0,
+ Cmd: "exe",
+ },
+ }
+ if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+ t.Fatalf("Failed to wait for sleep to start, err: %v", err)
+ }
+
+ // Create an executable that can't be run with the specified UID:GID.
+ // This shouldn't be callable within the container until we add the
+ // CAP_DAC_OVERRIDE capability to skip the access check.
+ exePath := filepath.Join(rootDir, "exe")
+ if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil {
+ t.Fatalf("couldn't create executable: %v", err)
+ }
+ defer os.Remove(exePath)
+
+ // Need to traverse the intermediate directory.
+ os.Chmod(rootDir, 0755)
+
+ execArgs := control.ExecArgs{
+ Filename: exePath,
+ Argv: []string{exePath},
+ Envv: []string{"PATH=" + os.Getenv("PATH")},
+ WorkingDirectory: "/",
+ KUID: uid,
+ KGID: gid,
+ Capabilities: &auth.TaskCapabilities{},
+ }
+
+ // "exe" should fail because we don't have the necessary permissions.
+ if _, err := s.Execute(&execArgs); err == nil {
+ t.Fatalf("container executed without error, but an error was expected")
+ }
+
+ // Now we run with the capability enabled and should succeed.
+ execArgs.Capabilities = &auth.TaskCapabilities{
+ EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
+ }
+ // "exe" should not fail this time.
+ if _, err := s.Execute(&execArgs); err != nil {
+ t.Fatalf("container failed to exec %v: %v", execArgs, err)
+ }
+}
+
+// Test that an tty FD is sent over the console socket if one is provided.
+func TestConsoleSocket(t *testing.T) {
+ spec := newSpecWithArgs("true")
+ rootDir, bundleDir, conf, err := setupContainer(spec)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // Create a named socket and start listening. We use a relative path
+ // to avoid overflowing the unix path length limit (108 chars).
+ socketPath := filepath.Join(bundleDir, "socket")
+ cwd, err := os.Getwd()
+ if err != nil {
+ t.Fatalf("error getting cwd: %v", err)
+ }
+ socketRelPath, err := filepath.Rel(cwd, socketPath)
+ if err != nil {
+ t.Fatalf("error getting relative path for %q from cwd %q: %v", socketPath, cwd, err)
+ }
+ if len(socketRelPath) > len(socketPath) {
+ socketRelPath = socketPath
+ }
+ srv, err := unet.BindAndListen(socketRelPath, false)
+ if err != nil {
+ t.Fatalf("error binding and listening to socket %q: %v", socketPath, err)
+ }
+ defer os.Remove(socketPath)
+
+ // Create the container and pass the socket name.
+ id := uniqueContainerID()
+ s, err := container.Create(id, spec, conf, bundleDir, socketRelPath, "")
+ if err != nil {
+ t.Fatalf("error creating container: %v", err)
+ }
+
+ // Open the othe end of the socket.
+ sock, err := srv.Accept()
+ if err != nil {
+ t.Fatalf("error accepting socket connection: %v", err)
+ }
+
+ // Allow 3 fds to be received. We only expect 1.
+ r := sock.Reader(true /* blocking */)
+ r.EnableFDs(1)
+
+ // The socket is closed right after sending the FD, so EOF is
+ // an allowed error.
+ b := [][]byte{{}}
+ if _, err := r.ReadVec(b); err != nil && err != io.EOF {
+ t.Fatalf("error reading from socket connection: %v", err)
+ }
+
+ // We should have gotten a control message.
+ fds, err := r.ExtractFDs()
+ if err != nil {
+ t.Fatalf("error extracting fds from socket connection: %v", err)
+ }
+ if len(fds) != 1 {
+ t.Fatalf("got %d fds from socket, wanted 1", len(fds))
+ }
+
+ // Verify that the fd is a terminal.
+ if _, err := unix.IoctlGetTermios(fds[0], unix.TCGETS); err != nil {
+ t.Errorf("fd is not a terminal (ioctl TGGETS got %v)", err)
+ }
+
+ // Shut it down.
+ if err := s.Destroy(); err != nil {
+ t.Fatalf("error destroying container: %v", err)
+ }
+
+ // Close socket.
+ if err := srv.Close(); err != nil {
+ t.Fatalf("error destroying container: %v", err)
+ }
+}
+
+func TestSpecUnsupported(t *testing.T) {
+ spec := newSpecWithArgs("/bin/true")
+ spec.Process.SelinuxLabel = "somelabel"
+
+ // These are normally set by docker and will just cause warnings to be logged.
+ spec.Process.ApparmorProfile = "someprofile"
+ spec.Linux = &specs.Linux{Seccomp: &specs.LinuxSeccomp{}}
+
+ rootDir, bundleDir, conf, err := setupContainer(spec)
+ if err != nil {
+ t.Fatalf("error setting up container: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ id := uniqueContainerID()
+ _, err = container.Create(id, spec, conf, bundleDir, "", "")
+ if err == nil || !strings.Contains(err.Error(), "is not supported") {
+ t.Errorf("container.Create() wrong error, got: %v, want: *is not supported, spec.Process: %+v", err, spec.Process)
+ }
+}
+
+// procListsEqual is used to check whether 2 Process lists are equal for all
+// implemented fields.
+func procListsEqual(got, want []*control.Process) bool {
+ if len(got) != len(want) {
+ return false
+ }
+ for i := range got {
+ pd1 := got[i]
+ pd2 := want[i]
+ // Zero out unimplemented and timing dependant fields.
+ pd1.Time, pd2.Time = "", ""
+ pd1.STime, pd2.STime = "", ""
+ pd1.C, pd2.C = 0, 0
+ if *pd1 != *pd2 {
+ return false
+ }
+ }
+ return true
+}
+
+func procListToString(pl []*control.Process) string {
+ strs := make([]string, 0, len(pl))
+ for _, p := range pl {
+ strs = append(strs, fmt.Sprintf("%+v", p))
+ }
+ return fmt.Sprintf("[%s]", strings.Join(strs, ","))
+}
+
+// TestMain acts like runsc if it is called with the "boot" argument, otherwise
+// it just runs the tests. This is required because creating a container will
+// call "/proc/self/exe boot". Normally /proc/self/exe is the runsc binary,
+// but for tests we have to fake it.
+func TestMain(m *testing.M) {
+ // exit writes coverage data before exiting.
+ exit := func(status int) {
+ os.Exit(status)
+ }
+
+ if !flag.Parsed() {
+ flag.Parse()
+ }
+
+ // If we are passed one of the commands then run it.
+ subcommands.Register(new(cmd.Boot), "boot")
+ subcommands.Register(new(cmd.Gofer), "gofer")
+ switch flag.Arg(0) {
+ case "boot", "gofer":
+ // Run the command in a goroutine so we can block the main
+ // thread waiting for shutdownSignal.
+ go func() {
+ conf := &boot.Config{
+ RootDir: "unused-root-dir",
+ Network: boot.NetworkNone,
+ }
+ var ws syscall.WaitStatus
+ subcmdCode := subcommands.Execute(context.Background(), conf, &ws)
+ if subcmdCode != subcommands.ExitSuccess {
+ panic(fmt.Sprintf("command failed to execute, err: %v", subcmdCode))
+ }
+ // Container exited normally. Shut down this process.
+ os.Exit(ws.ExitStatus())
+ }()
+
+ // Shutdown cleanly when the shutdownSignal is received. This
+ // allows us to write coverage data before exiting.
+ sigc := make(chan os.Signal, 1)
+ signal.Notify(sigc, shutdownSignal)
+ <-sigc
+ exit(0)
+ default:
+ // Otherwise run the tests.
+ exit(m.Run())
+ }
+}
diff --git a/runsc/container/hook.go b/runsc/container/hook.go
new file mode 100644
index 000000000..3d93ca0be
--- /dev/null
+++ b/runsc/container/hook.go
@@ -0,0 +1,111 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "time"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+// This file implements hooks as defined in OCI spec:
+// https://github.com/opencontainers/runtime-spec/blob/master/config.md#toc22
+//
+// "hooks":{
+// "prestart":[{
+// "path":"/usr/bin/dockerd",
+// "args":[
+// "libnetwork-setkey", "arg2",
+// ]
+// }]
+// },
+
+// executeHooksBestEffort executes hooks and logs warning in case they fail.
+// Runs all hooks, always.
+func executeHooksBestEffort(hooks []specs.Hook, s specs.State) {
+ for _, h := range hooks {
+ if err := executeHook(h, s); err != nil {
+ log.Warningf("Failure to execute hook %+v, err: %v", h, err)
+ }
+ }
+}
+
+// executeHooks executes hooks until the first one fails or they all execute.
+func executeHooks(hooks []specs.Hook, s specs.State) error {
+ for _, h := range hooks {
+ if err := executeHook(h, s); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func executeHook(h specs.Hook, s specs.State) error {
+ log.Debugf("Executing hook %+v, state: %+v", h, s)
+
+ if strings.TrimSpace(h.Path) == "" {
+ return fmt.Errorf("empty path for hook")
+ }
+ if !filepath.IsAbs(h.Path) {
+ return fmt.Errorf("path for hook is not absolute: %q", h.Path)
+ }
+
+ b, err := json.Marshal(s)
+ if err != nil {
+ return err
+ }
+ var stdout, stderr bytes.Buffer
+ cmd := exec.Cmd{
+ Path: h.Path,
+ Args: h.Args,
+ Env: h.Env,
+ Stdin: bytes.NewReader(b),
+ Stdout: &stdout,
+ Stderr: &stderr,
+ }
+ if err := cmd.Start(); err != nil {
+ return err
+ }
+
+ c := make(chan error, 1)
+ go func() {
+ c <- cmd.Wait()
+ }()
+
+ var timer <-chan time.Time
+ if h.Timeout != nil {
+ timer = time.After(time.Duration(*h.Timeout) * time.Second)
+ }
+ select {
+ case err := <-c:
+ if err != nil {
+ return fmt.Errorf("failure executing hook %q, err: %v\nstdout: %s\nstderr: %s", h.Path, err, stdout.String(), stderr.String())
+ }
+ case <-timer:
+ cmd.Process.Kill()
+ cmd.Wait()
+ return fmt.Errorf("timeout executing hook %q\nstdout: %s\nstderr: %s", h.Path, stdout.String(), stderr.String())
+ }
+
+ log.Debugf("Execute hook %q success!", h.Path)
+ return nil
+}
diff --git a/runsc/container/status.go b/runsc/container/status.go
new file mode 100644
index 000000000..8da1b4e89
--- /dev/null
+++ b/runsc/container/status.go
@@ -0,0 +1,54 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package container
+
+// Status enumerates container statuses. The statuses and their semantics are
+// part of the runtime CLI spec.
+type Status int
+
+const (
+ // Creating indicates "the container is being created".
+ Creating Status = iota
+
+ // Created indicates "the runtime has finished the create operation and
+ // the container process has neither exited nor executed the
+ // user-specified program".
+ Created
+
+ // Running indicates "the container process has executed the
+ // user-specified program but has not exited".
+ Running
+
+ // Stopped indicates "the container process has exited".
+ Stopped
+)
+
+// String converts a Status to a string. These strings are part of the runtime
+// CLI spec and should not be changed.
+func (s Status) String() string {
+ switch s {
+ case Creating:
+ return "creating"
+ case Created:
+ return "created"
+ case Running:
+ return "running"
+ case Stopped:
+ return "stopped"
+ default:
+ return "unknown"
+ }
+
+}