summaryrefslogtreecommitdiffhomepage
path: root/runsc/sandbox
diff options
context:
space:
mode:
authorGoogler <noreply@google.com>2018-04-27 10:37:02 -0700
committerAdin Scannell <ascannell@google.com>2018-04-28 01:44:26 -0400
commitd02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree54f95eef73aee6bacbfc736fffc631be2605ed53 /runsc/sandbox
parentf70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)
Check in gVisor.
PiperOrigin-RevId: 194583126 Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
Diffstat (limited to 'runsc/sandbox')
-rw-r--r--runsc/sandbox/BUILD53
-rw-r--r--runsc/sandbox/console.go60
-rw-r--r--runsc/sandbox/hook.go111
-rw-r--r--runsc/sandbox/namespace.go204
-rw-r--r--runsc/sandbox/network.go348
-rw-r--r--runsc/sandbox/sandbox.go666
-rw-r--r--runsc/sandbox/sandbox_test.go649
-rw-r--r--runsc/sandbox/status.go56
8 files changed, 2147 insertions, 0 deletions
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
new file mode 100644
index 000000000..bdd95903e
--- /dev/null
+++ b/runsc/sandbox/BUILD
@@ -0,0 +1,53 @@
+package(licenses = ["notice"]) # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "sandbox",
+ srcs = [
+ "console.go",
+ "hook.go",
+ "namespace.go",
+ "network.go",
+ "sandbox.go",
+ "status.go",
+ ],
+ importpath = "gvisor.googlesource.com/gvisor/runsc/sandbox",
+ visibility = [
+ "//runsc:__subpackages__",
+ ],
+ deps = [
+ "//pkg/control/client",
+ "//pkg/control/server",
+ "//pkg/log",
+ "//pkg/sentry/control",
+ "//pkg/urpc",
+ "//runsc/boot",
+ "//runsc/specutils",
+ "@com_github_kr_pty//:go_default_library",
+ "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@com_github_vishvananda_netlink//:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+go_test(
+ name = "sandbox_test",
+ size = "small",
+ srcs = ["sandbox_test.go"],
+ pure = "on",
+ rundir = ".",
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/log",
+ "//pkg/sentry/control",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/unet",
+ "//runsc/boot",
+ "//runsc/cmd",
+ "//runsc/sandbox",
+ "@com_github_google_subcommands//:go_default_library",
+ "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
diff --git a/runsc/sandbox/console.go b/runsc/sandbox/console.go
new file mode 100644
index 000000000..3f133e12a
--- /dev/null
+++ b/runsc/sandbox/console.go
@@ -0,0 +1,60 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+ "fmt"
+ "net"
+ "os"
+
+ "github.com/kr/pty"
+ "golang.org/x/sys/unix"
+)
+
+// setupConsole creates pty master/slave pair, sends the master FD over the
+// given socket, and returns the slave.
+func setupConsole(socketPath string) (*os.File, error) {
+ // Create a new pty master and slave.
+ ptyMaster, ptySlave, err := pty.Open()
+ if err != nil {
+ return nil, fmt.Errorf("error opening pty: %v", err)
+ }
+ defer ptyMaster.Close()
+
+ // Get a connection to the socket path.
+ conn, err := net.Dial("unix", socketPath)
+ if err != nil {
+ ptySlave.Close()
+ return nil, fmt.Errorf("error dial socket %q: %v", socketPath, err)
+ }
+ uc, ok := conn.(*net.UnixConn)
+ if !ok {
+ ptySlave.Close()
+ return nil, fmt.Errorf("connection is not a UnixConn: %T", conn)
+ }
+ socket, err := uc.File()
+ if err != nil {
+ ptySlave.Close()
+ return nil, fmt.Errorf("error getting file for unix socket %v: %v", uc, err)
+ }
+
+ // Send the master FD over the connection.
+ msg := unix.UnixRights(int(ptyMaster.Fd()))
+ if err := unix.Sendmsg(int(socket.Fd()), []byte("pty-master"), msg, nil, 0); err != nil {
+ ptySlave.Close()
+ return nil, fmt.Errorf("error sending console over unix socket %q: %v", socketPath, err)
+ }
+ return ptySlave, nil
+}
diff --git a/runsc/sandbox/hook.go b/runsc/sandbox/hook.go
new file mode 100644
index 000000000..40b064cdc
--- /dev/null
+++ b/runsc/sandbox/hook.go
@@ -0,0 +1,111 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "time"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+// This file implements hooks as defined in OCI spec:
+// https://github.com/opencontainers/runtime-spec/blob/master/config.md#toc22
+//
+// "hooks":{
+// "prestart":[{
+// "path":"/usr/bin/dockerd",
+// "args":[
+// "libnetwork-setkey", "arg2",
+// ]
+// }]
+// },
+
+// executeHooksBestEffort executes hooks and logs warning in case they fail.
+// Runs all hooks, always.
+func executeHooksBestEffort(hooks []specs.Hook, s specs.State) {
+ for _, h := range hooks {
+ if err := executeHook(h, s); err != nil {
+ log.Warningf("Failure to execute hook %+v, err: %v", h, err)
+ }
+ }
+}
+
+// executeHooks executes hooks until the first one fails or they all execute.
+func executeHooks(hooks []specs.Hook, s specs.State) error {
+ for _, h := range hooks {
+ if err := executeHook(h, s); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func executeHook(h specs.Hook, s specs.State) error {
+ log.Debugf("Executing hook %+v, state: %+v", h, s)
+
+ if strings.TrimSpace(h.Path) == "" {
+ return fmt.Errorf("empty path for hook")
+ }
+ if !filepath.IsAbs(h.Path) {
+ return fmt.Errorf("path for hook is not absolute: %q", h.Path)
+ }
+
+ b, err := json.Marshal(s)
+ if err != nil {
+ return err
+ }
+ var stdout, stderr bytes.Buffer
+ cmd := exec.Cmd{
+ Path: h.Path,
+ Args: h.Args,
+ Env: h.Env,
+ Stdin: bytes.NewReader(b),
+ Stdout: &stdout,
+ Stderr: &stderr,
+ }
+ if err := cmd.Start(); err != nil {
+ return err
+ }
+
+ c := make(chan error, 1)
+ go func() {
+ c <- cmd.Wait()
+ }()
+
+ var timer <-chan time.Time
+ if h.Timeout != nil {
+ timer = time.After(time.Duration(*h.Timeout) * time.Second)
+ }
+ select {
+ case err := <-c:
+ if err != nil {
+ return fmt.Errorf("failure executing hook %q, err: %v\nstdout: %s\nstderr: %s", h.Path, err, stdout.String(), stderr.String())
+ }
+ case <-timer:
+ cmd.Process.Kill()
+ cmd.Wait()
+ return fmt.Errorf("timeout executing hook %q\nstdout: %s\nstderr: %s", h.Path, stdout.String(), stderr.String())
+ }
+
+ log.Debugf("Execute hook %q success!", h.Path)
+ return nil
+}
diff --git a/runsc/sandbox/namespace.go b/runsc/sandbox/namespace.go
new file mode 100644
index 000000000..1d3bcfbb5
--- /dev/null
+++ b/runsc/sandbox/namespace.go
@@ -0,0 +1,204 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "runtime"
+ "syscall"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+// nsCloneFlag returns the clone flag that can be used to set a namespace of
+// the given type.
+func nsCloneFlag(nst specs.LinuxNamespaceType) uintptr {
+ switch nst {
+ case specs.IPCNamespace:
+ return syscall.CLONE_NEWIPC
+ case specs.MountNamespace:
+ return syscall.CLONE_NEWNS
+ case specs.NetworkNamespace:
+ return syscall.CLONE_NEWNET
+ case specs.PIDNamespace:
+ return syscall.CLONE_NEWPID
+ case specs.UTSNamespace:
+ return syscall.CLONE_NEWUTS
+ case specs.UserNamespace:
+ return syscall.CLONE_NEWUSER
+ case specs.CgroupNamespace:
+ panic("cgroup namespace has no associated clone flag")
+ default:
+ panic(fmt.Sprintf("unknown namespace %v", nst))
+ }
+}
+
+// nsPath returns the path of the namespace for the current process and the
+// given namespace.
+func nsPath(nst specs.LinuxNamespaceType) string {
+ base := "/proc/self/ns"
+ switch nst {
+ case specs.CgroupNamespace:
+ return filepath.Join(base, "cgroup")
+ case specs.IPCNamespace:
+ return filepath.Join(base, "ipc")
+ case specs.MountNamespace:
+ return filepath.Join(base, "mnt")
+ case specs.NetworkNamespace:
+ return filepath.Join(base, "net")
+ case specs.PIDNamespace:
+ return filepath.Join(base, "pid")
+ case specs.UserNamespace:
+ return filepath.Join(base, "user")
+ case specs.UTSNamespace:
+ return filepath.Join(base, "uts")
+ default:
+ panic(fmt.Sprintf("unknown namespace %v", nst))
+ }
+}
+
+// getNS returns true and the namespace with the given type from the slice of
+// namespaces in the spec. It returns false if the slice does not contain a
+// namespace with the type.
+func getNS(nst specs.LinuxNamespaceType, s *specs.Spec) (specs.LinuxNamespace, bool) {
+ if s.Linux == nil {
+ return specs.LinuxNamespace{}, false
+ }
+ for _, ns := range s.Linux.Namespaces {
+ if ns.Type == nst {
+ return ns, true
+ }
+ }
+ return specs.LinuxNamespace{}, false
+}
+
+// filterNS returns a slice of namespaces from the spec with types that match
+// those in the `filter` slice.
+func filterNS(filter []specs.LinuxNamespaceType, s *specs.Spec) []specs.LinuxNamespace {
+ if s.Linux == nil {
+ return nil
+ }
+ var out []specs.LinuxNamespace
+ for _, nst := range filter {
+ if ns, ok := getNS(nst, s); ok {
+ out = append(out, ns)
+ }
+ }
+ return out
+}
+
+// setNS sets the namespace of the given type. It must be called with
+// OSThreadLocked.
+func setNS(fd, nsType uintptr) error {
+ if _, _, err := syscall.RawSyscall(unix.SYS_SETNS, fd, nsType, 0); err != 0 {
+ return err
+ }
+ return nil
+}
+
+// applyNS applies the namespace on the current thread and returns a function
+// that will restore the namespace to the original value.
+//
+// Preconditions: Must be called with os thread locked.
+func applyNS(ns specs.LinuxNamespace) (func(), error) {
+ log.Infof("applying namespace %v at path %q", ns.Type, ns.Path)
+ newNS, err := os.Open(ns.Path)
+ if err != nil {
+ return nil, fmt.Errorf("error opening %q: %v", ns.Path, err)
+ }
+ defer newNS.Close()
+
+ // Store current netns to restore back after child is started.
+ curPath := nsPath(ns.Type)
+ oldNS, err := os.Open(curPath)
+ if err != nil {
+ return nil, fmt.Errorf("error opening %q: %v", curPath, err)
+ }
+
+ // Set netns to the one requested and setup function to restore it back.
+ flag := nsCloneFlag(ns.Type)
+ if err := setNS(newNS.Fd(), flag); err != nil {
+ oldNS.Close()
+ return nil, fmt.Errorf("error setting namespace of type %v and path %q: %v", ns.Type, ns.Path, err)
+ }
+ return func() {
+ log.Infof("restoring namespace %v", ns.Type)
+ defer oldNS.Close()
+ if err := setNS(oldNS.Fd(), flag); err != nil {
+ panic(fmt.Sprintf("error restoring namespace: of type %v: %v", ns.Type, err))
+ }
+ }, nil
+}
+
+// startInNS joins or creates the given namespaces and calls cmd.Start before
+// restoring the namespaces to the original values.
+func startInNS(cmd *exec.Cmd, nss []specs.LinuxNamespace) error {
+ // We are about to setup namespaces, which requires the os thread being
+ // locked so that Go doesn't change the thread out from under us.
+ runtime.LockOSThread()
+ defer runtime.UnlockOSThread()
+
+ if cmd.SysProcAttr == nil {
+ cmd.SysProcAttr = &syscall.SysProcAttr{}
+ }
+
+ for _, ns := range nss {
+ if ns.Path == "" {
+ // No path. Just set a flag to create a new namespace.
+ cmd.SysProcAttr.Cloneflags |= nsCloneFlag(ns.Type)
+ continue
+ }
+ // Join the given namespace, and restore the current namespace
+ // before exiting.
+ restoreNS, err := applyNS(ns)
+ if err != nil {
+ return err
+ }
+ defer restoreNS()
+ }
+
+ return cmd.Start()
+}
+
+// setUIDGIDMappings sets the given uid/gid mappings from the spec on the cmd.
+func setUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) {
+ if s.Linux == nil {
+ return
+ }
+ if cmd.SysProcAttr == nil {
+ cmd.SysProcAttr = &syscall.SysProcAttr{}
+ }
+ for _, idMap := range s.Linux.UIDMappings {
+ log.Infof("Mapping host uid %d to container uid %d (size=%d)", idMap.HostID, idMap.ContainerID, idMap.Size)
+ cmd.SysProcAttr.UidMappings = append(cmd.SysProcAttr.UidMappings, syscall.SysProcIDMap{
+ ContainerID: int(idMap.ContainerID),
+ HostID: int(idMap.HostID),
+ Size: int(idMap.Size),
+ })
+ }
+ for _, idMap := range s.Linux.GIDMappings {
+ log.Infof("Mapping host gid %d to container gid %d (size=%d)", idMap.HostID, idMap.ContainerID, idMap.Size)
+ cmd.SysProcAttr.GidMappings = append(cmd.SysProcAttr.GidMappings, syscall.SysProcIDMap{
+ ContainerID: int(idMap.ContainerID),
+ HostID: int(idMap.HostID),
+ Size: int(idMap.Size),
+ })
+ }
+}
diff --git a/runsc/sandbox/network.go b/runsc/sandbox/network.go
new file mode 100644
index 000000000..1b6a1d9a6
--- /dev/null
+++ b/runsc/sandbox/network.go
@@ -0,0 +1,348 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+import (
+ "fmt"
+ "net"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strconv"
+ "strings"
+ "syscall"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/vishvananda/netlink"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+)
+
+// setupNetwork configures the network stack to mimic the local network
+// configuration. Docker uses network namespaces with vnets to configure the
+// network for the container. The untrusted app expects to see the same network
+// inside the sandbox. Routing and port mapping is handled directly by docker
+// with most of network information not even available to the runtime.
+//
+// Netstack inside the sandbox speaks directly to the device using a raw socket.
+// All IP addresses assigned to the NIC, are removed and passed on to netstack's
+// device.
+//
+// If 'conf.Network' is NoNetwork, skips local configuration and creates a
+// loopback interface only.
+//
+// Run the following container to test it:
+// docker run -di --runtime=runsc -p 8080:80 -v $PWD:/usr/local/apache2/htdocs/ httpd:2.4
+func setupNetwork(conn *urpc.Client, pid int, spec *specs.Spec, conf *boot.Config) error {
+ log.Infof("Setting up network")
+
+ // HACK!
+ //
+ // When kubernetes starts a pod, it first creates a sandbox with an
+ // application that just pauses forever. Later, when a container is
+ // added to the pod, kubernetes will create another sandbox with a
+ // config that corresponds to the containerized application, and add it
+ // to the same namespaces as the pause sandbox.
+ //
+ // Running a second sandbox currently breaks because the two sandboxes
+ // have the same network namespace and configuration, and try to create
+ // a tap device on the same host device which fails.
+ //
+ // Runsc will eventually need to detect that this container is meant to
+ // be run in the same sandbox as the pausing application, and somehow
+ // make that happen.
+ //
+ // For now the following HACK disables networking for the "pause"
+ // sandbox, allowing the second sandbox to start up successfully.
+ //
+ // Cri-o helpfully adds the "ContainerType" annotation that we can use
+ // to detect whether we are a pod or container. Cri-containerd will
+ // support this eventually, but does not currently
+ // (https://github.com/kubernetes-incubator/cri-containerd/issues/512).
+ //
+ // Thus, to support cri-containerd, we check if the exec args is
+ // "/pause", which is pretty gross.
+ //
+ // TODO: Remove this once multiple containers per sandbox
+ // is properly supported.
+ if spec.Annotations["io.kubernetes.cri-o.ContainerType"] == "sandbox" || spec.Process.Args[0] == "/pause" {
+ log.Warningf("HACK: Disabling network")
+ conf.Network = boot.NetworkNone
+ }
+
+ switch conf.Network {
+ case boot.NetworkNone:
+ log.Infof("Network is disabled, create loopback interface only")
+ if err := createDefaultLoopbackInterface(conn); err != nil {
+ return fmt.Errorf("error creating default loopback interface: %v", err)
+ }
+ case boot.NetworkSandbox:
+ // Build the path to the net namespace of the sandbox process.
+ // This is what we will copy.
+ nsPath := filepath.Join("/proc", strconv.Itoa(pid), "ns/net")
+ if err := createInterfacesAndRoutesFromNS(conn, nsPath); err != nil {
+ return fmt.Errorf("error creating interfaces from net namespace %q: %v", nsPath, err)
+ }
+ case boot.NetworkHost:
+ // Nothing to do here.
+ default:
+ return fmt.Errorf("Invalid network type: %d", conf.Network)
+ }
+ return nil
+}
+
+func createDefaultLoopbackInterface(conn *urpc.Client) error {
+ link := boot.LoopbackLink{
+ Name: "lo",
+ Addresses: []net.IP{
+ net.IP("\x7f\x00\x00\x01"),
+ net.IPv6loopback,
+ },
+ Routes: []boot.Route{
+ {
+ Destination: net.IP("\x7f\x00\x00\x00"),
+ Mask: net.IPMask("\xff\x00\x00\x00"),
+ },
+ {
+ Destination: net.IPv6loopback,
+ Mask: net.IPMask(strings.Repeat("\xff", 16)),
+ },
+ },
+ }
+ if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &boot.CreateLinksAndRoutesArgs{
+ LoopbackLinks: []boot.LoopbackLink{link},
+ }, nil); err != nil {
+ return fmt.Errorf("error creating loopback link and routes: %v", err)
+ }
+ return nil
+}
+
+func joinNetNS(nsPath string) (func(), error) {
+ runtime.LockOSThread()
+ restoreNS, err := applyNS(specs.LinuxNamespace{
+ Type: specs.NetworkNamespace,
+ Path: nsPath,
+ })
+ if err != nil {
+ runtime.UnlockOSThread()
+ return nil, fmt.Errorf("error joining net namespace %q: %v", nsPath, err)
+ }
+ return func() {
+ restoreNS()
+ runtime.UnlockOSThread()
+ }, nil
+}
+
+// isRootNS determines whether we are running in the root net namespace.
+//
+// TODO: Find a better way to detect root network.
+func isRootNS(ifaces []net.Interface) bool {
+ for _, iface := range ifaces {
+ if iface.Name == "docker0" {
+ return true
+ }
+ }
+ return false
+
+}
+
+// createInterfacesAndRoutesFromNS scrapes the interface and routes from the
+// net namespace with the given path, creates them in the sandbox, and removes
+// them from the host.
+func createInterfacesAndRoutesFromNS(conn *urpc.Client, nsPath string) error {
+ // Join the network namespace that we will be copying.
+ restore, err := joinNetNS(nsPath)
+ if err != nil {
+ return err
+ }
+ defer restore()
+
+ // Get all interfaces in the namespace.
+ ifaces, err := net.Interfaces()
+ if err != nil {
+ return fmt.Errorf("error querying interfaces: %v", err)
+ }
+
+ if isRootNS(ifaces) {
+ return fmt.Errorf("cannot run in with network enabled in root network namespace")
+ }
+
+ // Collect addresses and routes from the interfaces.
+ var args boot.CreateLinksAndRoutesArgs
+ for _, iface := range ifaces {
+ if iface.Flags&net.FlagUp == 0 {
+ log.Infof("Skipping down interface: %+v", iface)
+ continue
+ }
+
+ ifaddrs, err := iface.Addrs()
+ if err != nil {
+ return fmt.Errorf("error fetching interface addresses for %q: %v", iface.Name, err)
+ }
+
+ // We build our own loopback devices.
+ if iface.Flags&net.FlagLoopback != 0 {
+ links, err := loopbackLinks(iface, ifaddrs)
+ if err != nil {
+ return fmt.Errorf("error getting loopback routes and links for iface %q: %v", iface.Name, err)
+ }
+ args.LoopbackLinks = append(args.LoopbackLinks, links...)
+ continue
+ }
+
+ // Get the link for the interface.
+ ifaceLink, err := netlink.LinkByName(iface.Name)
+ if err != nil {
+ return fmt.Errorf("error getting link for interface %q: %v", iface.Name, err)
+ }
+
+ // Create the socket.
+ const protocol = 0x0300 // htons(ETH_P_ALL)
+ fd, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_RAW, protocol)
+ if err != nil {
+ return fmt.Errorf("unable to create raw socket: %v", err)
+ }
+ deviceFile := os.NewFile(uintptr(fd), "raw-device-fd")
+
+ // Bind to the appropriate device.
+ ll := syscall.SockaddrLinklayer{
+ Protocol: protocol,
+ Ifindex: ifaceLink.Attrs().Index,
+ Hatype: 0, // No ARP type.
+ Pkttype: syscall.PACKET_OTHERHOST,
+ }
+ if err := syscall.Bind(fd, &ll); err != nil {
+ return fmt.Errorf("unable to bind to %q: %v", iface.Name, err)
+ }
+
+ // Scrape the routes before removing the address, since that
+ // will remove the routes as well.
+ routes, def, err := routesForIface(iface)
+ if err != nil {
+ return fmt.Errorf("error getting routes for interface %q: %v", iface.Name, err)
+ }
+ if def != nil {
+ if !args.DefaultGateway.Route.Empty() {
+ return fmt.Errorf("more than one default route found, interface: %v, route: %v, default route: %+v", iface.Name, def, args.DefaultGateway)
+ }
+ args.DefaultGateway.Route = *def
+ args.DefaultGateway.Name = iface.Name
+ }
+
+ link := boot.FDBasedLink{
+ Name: iface.Name,
+ MTU: iface.MTU,
+ Routes: routes,
+ }
+
+ // Collect the addresses for the interface, enable forwarding,
+ // and remove them from the host.
+ for _, ifaddr := range ifaddrs {
+ ipNet, ok := ifaddr.(*net.IPNet)
+ if !ok {
+ return fmt.Errorf("address is not IPNet: %t %+v", ifaddr, ifaddr)
+ }
+ link.Addresses = append(link.Addresses, ipNet.IP)
+
+ // Steal IP address from NIC.
+ if err := removeAddress(ifaceLink, ipNet.String()); err != nil {
+ return fmt.Errorf("error removing address %v from device %q: %v", iface.Name, ipNet, err)
+ }
+ }
+
+ args.FilePayload.Files = append(args.FilePayload.Files, deviceFile)
+ args.FDBasedLinks = append(args.FDBasedLinks, link)
+ }
+
+ log.Debugf("Setting up network, config: %+v", args)
+ if err := conn.Call(boot.NetworkCreateLinksAndRoutes, &args, nil); err != nil {
+ return fmt.Errorf("error creating links and routes: %v", err)
+ }
+ return nil
+}
+
+// loopbackLinks collects the links for a loopback interface.
+func loopbackLinks(iface net.Interface, addrs []net.Addr) ([]boot.LoopbackLink, error) {
+ var links []boot.LoopbackLink
+ for _, addr := range addrs {
+ ipNet, ok := addr.(*net.IPNet)
+ if !ok {
+ return nil, fmt.Errorf("address is not IPNet: %t %+v", addr, addr)
+ }
+ links = append(links, boot.LoopbackLink{
+ Name: iface.Name,
+ Addresses: []net.IP{ipNet.IP},
+ Routes: []boot.Route{{
+ Destination: ipNet.IP.Mask(ipNet.Mask),
+ Mask: ipNet.Mask,
+ }},
+ })
+ }
+ return links, nil
+}
+
+// routesForIface iterates over all routes for the given interface and converts
+// them to boot.Routes.
+func routesForIface(iface net.Interface) ([]boot.Route, *boot.Route, error) {
+ link, err := netlink.LinkByIndex(iface.Index)
+ if err != nil {
+ return nil, nil, err
+ }
+ rs, err := netlink.RouteList(link, netlink.FAMILY_ALL)
+ if err != nil {
+ return nil, nil, fmt.Errorf("error getting routes from %q: %v", iface.Name, err)
+ }
+
+ var def *boot.Route
+ var routes []boot.Route
+ for _, r := range rs {
+ // Is it a default route?
+ if r.Dst == nil {
+ if r.Gw == nil {
+ return nil, nil, fmt.Errorf("default route with no gateway %q: %+v", iface.Name, r)
+ }
+ if def != nil {
+ return nil, nil, fmt.Errorf("more than one default route found %q, def: %+v, route: %+v", iface.Name, def, r)
+ }
+ emptyAddr := net.IPv6zero
+ if r.Gw.To4() != nil {
+ emptyAddr = net.IPv4zero
+ }
+ // Create a catch all route to the gateway.
+ def = &boot.Route{
+ Destination: emptyAddr,
+ Mask: net.IPMask(emptyAddr),
+ Gateway: r.Gw,
+ }
+ continue
+ }
+ routes = append(routes, boot.Route{
+ Destination: r.Dst.IP.Mask(r.Dst.Mask),
+ Mask: r.Dst.Mask,
+ })
+ }
+ return routes, def, nil
+}
+
+// removeAddress removes IP address from network device. It's equivalent to:
+// ip addr del <ipAndMask> dev <name>
+func removeAddress(source netlink.Link, ipAndMask string) error {
+ addr, err := netlink.ParseAddr(ipAndMask)
+ if err != nil {
+ return err
+ }
+ return netlink.AddrDel(source, addr)
+}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
new file mode 100644
index 000000000..b2fa1d58e
--- /dev/null
+++ b/runsc/sandbox/sandbox.go
@@ -0,0 +1,666 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package sandbox creates and manipulates sandboxes.
+package sandbox
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "regexp"
+ "strconv"
+ "syscall"
+ "time"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
+ "gvisor.googlesource.com/gvisor/pkg/control/client"
+ "gvisor.googlesource.com/gvisor/pkg/control/server"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/control"
+ "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// metadataFilename is the name of the metadata file relative to sandboxRoot
+// that holds sandbox metadata.
+const metadataFilename = "meta.json"
+
+// See libcontainer/factory_linux.go
+var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
+
+// validateID validates the sandbox id.
+func validateID(id string) error {
+ if !idRegex.MatchString(id) {
+ return fmt.Errorf("invalid sandbox id: %v", id)
+ }
+ return nil
+}
+
+// Sandbox wraps a child sandbox process, and is responsible for saving and
+// loading sandbox metadata to disk.
+//
+// Within a root directory, we maintain subdirectories for each sandbox named
+// with the sandbox id. The sandbox metadata is is stored as json within the
+// sandbox directoy in a file named "meta.json". This metadata format is
+// defined by us, and is not part of the OCI spec.
+//
+// Sandboxes must write this metadata file after any change to their internal
+// state. The entire sandbox directory is deleted when the sandbox is
+// destroyed.
+//
+// TODO: Protect against concurrent changes to the sandbox metadata
+// file.
+type Sandbox struct {
+ // ID is the sandbox ID.
+ ID string `json:"id"`
+
+ // Spec is the OCI runtime spec that configures this sandbox.
+ Spec *specs.Spec `json:"spec"`
+
+ // BundleDir is the directory containing the sandbox bundle.
+ BundleDir string `json:"bundleDir"`
+
+ // SandboxRoot is the directory containing the sandbox metadata file.
+ SandboxRoot string `json:"sandboxRoot"`
+
+ // CreatedAt is the time the sandbox was created.
+ CreatedAt time.Time `json:"createdAt"`
+
+ // Owner is the sandbox owner.
+ Owner string `json:"owner"`
+
+ // ConsoleSocket is the path to a unix domain socket that will receive
+ // the console FD. It is only used during create, so we don't need to
+ // store it in the metadata.
+ ConsoleSocket string `json:"-"`
+
+ // Pid is the pid of the running sandbox. Only valid if Status is
+ // Created or Running.
+ Pid int `json:"pid"`
+
+ // GoferPid is the pid of the gofer running along side the sandbox. May be 0
+ // if the gofer has been killed or it's not being used.
+ GoferPid int `json:"goferPid"`
+
+ // Status is the current sandbox Status.
+ Status Status `json:"status"`
+}
+
+// Create creates the sandbox subprocess and writes the metadata file. Args
+// are additional arguments that will be passed to the sandbox process.
+func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile string, args []string) (*Sandbox, error) {
+ log.Debugf("Create sandbox %q in root dir: %s", id, conf.RootDir)
+ if err := validateID(id); err != nil {
+ return nil, err
+ }
+
+ sandboxRoot := filepath.Join(conf.RootDir, id)
+ if exists(sandboxRoot) {
+ return nil, fmt.Errorf("sandbox with id %q already exists: %q ", id, sandboxRoot)
+ }
+
+ s := &Sandbox{
+ ID: id,
+ Spec: spec,
+ ConsoleSocket: consoleSocket,
+ BundleDir: bundleDir,
+ SandboxRoot: sandboxRoot,
+ Status: Creating,
+ Owner: os.Getenv("USER"),
+ }
+
+ // Create sandbox process. If anything errors between now and the end of this
+ // function, we MUST clean up all sandbox resources.
+ if err := s.createProcesses(conf, args); err != nil {
+ s.Destroy()
+ return nil, err
+ }
+
+ // Wait for the control server to come up (or timeout). The sandbox is
+ // not "created" until that happens.
+ if err := s.waitForCreated(10 * time.Second); err != nil {
+ s.Destroy()
+ return nil, err
+ }
+
+ s.Status = Created
+ s.CreatedAt = time.Now()
+
+ // Save the metadata file.
+ if err := s.save(); err != nil {
+ s.Destroy()
+ return nil, err
+ }
+
+ // Write the pid file. Containerd consideres the create complete after
+ // this file is created, so it must be the last thing we do.
+ if pidFile != "" {
+ if err := ioutil.WriteFile(pidFile, []byte(strconv.Itoa(s.Pid)), 0644); err != nil {
+ s.Destroy()
+ return nil, fmt.Errorf("error writing pid file: %v", err)
+ }
+ }
+
+ return s, nil
+}
+
+// Run is a helper that calls Create + Start + Wait.
+func Run(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocket, pidFile string, args []string) (syscall.WaitStatus, error) {
+ s, err := Create(id, spec, conf, bundleDir, consoleSocket, pidFile, args)
+ if err != nil {
+ return 0, fmt.Errorf("error creating sandbox: %v", err)
+ }
+ if err := s.Start(conf); err != nil {
+ return 0, fmt.Errorf("error starting sandbox: %v", err)
+ }
+ return s.Wait()
+}
+
+// Load loads a sandbox from with the given id from a metadata file.
+func Load(rootDir, id string) (*Sandbox, error) {
+ log.Debugf("Load sandbox %q %q", rootDir, id)
+ if err := validateID(id); err != nil {
+ return nil, err
+ }
+ sandboxRoot := filepath.Join(rootDir, id)
+ if !exists(sandboxRoot) {
+ return nil, fmt.Errorf("sandbox with id %q does not exist", id)
+ }
+ metaFile := filepath.Join(sandboxRoot, metadataFilename)
+ if !exists(metaFile) {
+ return nil, fmt.Errorf("sandbox with id %q does not have metadata file %q", id, metaFile)
+ }
+ metaBytes, err := ioutil.ReadFile(metaFile)
+ if err != nil {
+ return nil, fmt.Errorf("error reading sandbox metadata file %q: %v", metaFile, err)
+ }
+ var s Sandbox
+ if err := json.Unmarshal(metaBytes, &s); err != nil {
+ return nil, fmt.Errorf("error unmarshaling sandbox metadata from %q: %v", metaFile, err)
+ }
+
+ // If the status is "Running" or "Created", check that the process
+ // still exists, and set it to Stopped if it does not.
+ //
+ // This is inherintly racey.
+ if s.Status == Running || s.Status == Created {
+ // Send signal 0 to check if process exists.
+ if err := s.Signal(0); err != nil {
+ // Process no longer exists.
+ s.Status = Stopped
+ s.Pid = 0
+ }
+ }
+
+ return &s, nil
+}
+
+// List returns all sandbox ids in the given root directory.
+func List(rootDir string) ([]string, error) {
+ log.Debugf("List sandboxes %q", rootDir)
+ fs, err := ioutil.ReadDir(rootDir)
+ if err != nil {
+ return nil, fmt.Errorf("ReadDir(%s) failed: %v", rootDir, err)
+ }
+ var out []string
+ for _, f := range fs {
+ out = append(out, f.Name())
+ }
+ return out, nil
+}
+
+// State returns the metadata of the sandbox.
+func (s *Sandbox) State() specs.State {
+ return specs.State{
+ Version: specs.Version,
+ ID: s.ID,
+ Status: s.Status.String(),
+ Pid: s.Pid,
+ Bundle: s.BundleDir,
+ }
+}
+
+// Start starts running the containerized process inside the sandbox.
+func (s *Sandbox) Start(conf *boot.Config) error {
+ log.Debugf("Start sandbox %q, pid: %d", s.ID, s.Pid)
+ if s.Status != Created {
+ return fmt.Errorf("cannot start container in state %s", s.Status)
+ }
+
+ // "If any prestart hook fails, the runtime MUST generate an error,
+ // stop and destroy the container".
+ if s.Spec.Hooks != nil {
+ if err := executeHooks(s.Spec.Hooks.Prestart, s.State()); err != nil {
+ s.Destroy()
+ return err
+ }
+ }
+
+ c, err := s.connect()
+ if err != nil {
+ s.Destroy()
+ return err
+ }
+ defer c.Close()
+
+ // Configure the network.
+ if err := setupNetwork(c, s.Pid, s.Spec, conf); err != nil {
+ s.Destroy()
+ return fmt.Errorf("error setting up network: %v", err)
+ }
+
+ // Send a message to the sandbox control server to start the
+ // application.
+ if err := c.Call(boot.ApplicationStart, nil, nil); err != nil {
+ s.Destroy()
+ return fmt.Errorf("error starting sandbox: %v", err)
+ }
+
+ // "If any poststart hook fails, the runtime MUST log a warning, but
+ // the remaining hooks and lifecycle continue as if the hook had
+ // succeeded".
+ if s.Spec.Hooks != nil {
+ executeHooksBestEffort(s.Spec.Hooks.Poststart, s.State())
+ }
+
+ s.Status = Running
+ return s.save()
+}
+
+// Processes retrieves the list of processes and associated metadata inside a
+// sandbox.
+func (s *Sandbox) Processes() ([]*control.Process, error) {
+ if s.Status != Running {
+ return nil, fmt.Errorf("cannot get processes of container %q because it isn't running. It is in state %v", s.ID, s.Status)
+ }
+
+ c, err := s.connect()
+ if err != nil {
+ return nil, err
+ }
+ defer c.Close()
+
+ var pl []*control.Process
+ if err := c.Call(boot.ApplicationProcesses, nil, &pl); err != nil {
+ return nil, fmt.Errorf("error retrieving process data from sandbox: %v", err)
+ }
+ return pl, nil
+}
+
+// Execute runs the specified command in the sandbox.
+func (s *Sandbox) Execute(e *control.ExecArgs) (syscall.WaitStatus, error) {
+ log.Debugf("Execute in sandbox %q, pid: %d, args: %+v", s.ID, s.Pid, e)
+ if s.Status != Created && s.Status != Running {
+ return 0, fmt.Errorf("cannot exec in container in state %s", s.Status)
+ }
+
+ log.Debugf("Connecting to sandbox...")
+ c, err := s.connect()
+ if err != nil {
+ return 0, fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err)
+ }
+ defer c.Close()
+
+ // Send a message to the sandbox control server to start the application.
+ var waitStatus uint32
+ if err := c.Call(boot.ApplicationExecute, e, &waitStatus); err != nil {
+ return 0, fmt.Errorf("error executing in sandbox: %v", err)
+ }
+
+ return syscall.WaitStatus(waitStatus), nil
+}
+
+// Event retrieves stats about the sandbox such as memory and CPU utilization.
+func (s *Sandbox) Event() (*boot.Event, error) {
+ if s.Status != Running && s.Status != Created {
+ return nil, fmt.Errorf("cannot get events for container in state: %s", s.Status)
+ }
+
+ c, err := s.connect()
+ if err != nil {
+ return nil, err
+ }
+ defer c.Close()
+
+ var e boot.Event
+ if err := c.Call(boot.ApplicationEvent, nil, &e); err != nil {
+ return nil, fmt.Errorf("error retrieving event data from sandbox: %v", err)
+ }
+ e.ID = s.ID
+ return &e, nil
+}
+
+func (s *Sandbox) connect() (*urpc.Client, error) {
+ log.Debugf("Connecting to sandbox...")
+ c, err := client.ConnectTo(boot.ControlSocketAddr(s.ID))
+ if err != nil {
+ return nil, fmt.Errorf("error connecting to control server at pid %d: %v", s.Pid, err)
+ }
+ return c, nil
+}
+
+func (s *Sandbox) createProcesses(conf *boot.Config, args []string) error {
+ binPath, err := specutils.BinPath()
+ if err != nil {
+ return err
+ }
+
+ ioFiles, err := s.createGoferProcess(conf, binPath, args)
+ if err != nil {
+ return err
+ }
+ return s.createSandboxProcess(conf, binPath, args, ioFiles)
+}
+
+func (s *Sandbox) createGoferProcess(conf *boot.Config, binPath string, commonArgs []string) ([]*os.File, error) {
+ if conf.FileAccess != boot.FileAccessProxy {
+ // Don't start a gofer. The sandbox will access host FS directly.
+ return nil, nil
+ }
+
+ var args []string
+ args = append(args, commonArgs...)
+ args = append(args, "gofer", "--bundle", s.BundleDir)
+
+ // Start with root mount and then add any other additional mount.
+ mountCount := 1
+ for _, m := range s.Spec.Mounts {
+ if specutils.Is9PMount(m) {
+ mountCount++
+ }
+ }
+
+ sandEnds := make([]*os.File, 0, mountCount)
+ goferEnds := make([]*os.File, 0, mountCount)
+ for i := 0; i < mountCount; i++ {
+ // Create socket that connects the sandbox and gofer.
+ fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0)
+ if err != nil {
+ return nil, err
+ }
+ sandEnds = append(sandEnds, os.NewFile(uintptr(fds[0]), "sandbox io fd"))
+
+ goferEnd := os.NewFile(uintptr(fds[1]), "gofer io fd")
+ defer goferEnd.Close()
+ goferEnds = append(goferEnds, goferEnd)
+
+ args = append(args, fmt.Sprintf("--io-fds=%d", 3+i))
+ }
+
+ cmd := exec.Command(binPath, args...)
+ cmd.ExtraFiles = goferEnds
+
+ // Setup any uid/gid mappings, and create or join the configured user
+ // namespace so the gofer's view of the filesystem aligns with the
+ // users in the sandbox.
+ setUIDGIDMappings(cmd, s.Spec)
+ nss := filterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, s.Spec)
+
+ // Start the gofer in the given namespace.
+ log.Debugf("Starting gofer: %s %v", binPath, args)
+ if err := startInNS(cmd, nss); err != nil {
+ return nil, err
+ }
+ s.GoferPid = cmd.Process.Pid
+ log.Infof("Gofer started, pid: %d", cmd.Process.Pid)
+ return sandEnds, nil
+}
+
+// createSandboxProcess starts the sandbox as a subprocess by running the "boot"
+// command, passing in the bundle dir.
+func (s *Sandbox) createSandboxProcess(conf *boot.Config, binPath string, commonArgs []string, ioFiles []*os.File) error {
+ // nextFD is used to get unused FDs that we can pass to the sandbox. It
+ // starts at 3 because 0, 1, and 2 are taken by stdin/out/err.
+ nextFD := 3
+
+ // Create control server socket here and donate FD to child process because
+ // it may be in a different network namespace and won't be reachable from
+ // outside.
+ fd, err := server.CreateSocket(boot.ControlSocketAddr(s.ID))
+ if err != nil {
+ return fmt.Errorf("error creating control server socket for sandbox %q: %v", s.ID, err)
+ }
+
+ consoleEnabled := s.ConsoleSocket != ""
+
+ cmd := exec.Command(binPath, commonArgs...)
+ cmd.SysProcAttr = &syscall.SysProcAttr{}
+ cmd.Args = append(cmd.Args,
+ "boot",
+ "--bundle", s.BundleDir,
+ "--controller-fd="+strconv.Itoa(nextFD),
+ fmt.Sprintf("--console=%t", consoleEnabled))
+ nextFD++
+
+ controllerFile := os.NewFile(uintptr(fd), "control_server_socket")
+ defer controllerFile.Close()
+ cmd.ExtraFiles = append(cmd.ExtraFiles, controllerFile)
+
+ // If there is a gofer, sends all socket ends to the sandbox.
+ for _, f := range ioFiles {
+ defer f.Close()
+ cmd.ExtraFiles = append(cmd.ExtraFiles, f)
+ cmd.Args = append(cmd.Args, "--io-fds="+strconv.Itoa(nextFD))
+ nextFD++
+ }
+
+ // If the console control socket file is provided, then create a new
+ // pty master/slave pair and set the tty on the sandox process.
+ if consoleEnabled {
+ // setupConsole will send the master on the socket, and return
+ // the slave.
+ tty, err := setupConsole(s.ConsoleSocket)
+ if err != nil {
+ return fmt.Errorf("error setting up control socket %q: %v", s.ConsoleSocket, err)
+ }
+ defer tty.Close()
+
+ cmd.Stdin = tty
+ cmd.Stdout = tty
+ cmd.Stderr = tty
+ cmd.SysProcAttr.Setctty = true
+ cmd.SysProcAttr.Ctty = int(tty.Fd())
+ } else {
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ }
+
+ // Detach from this session, otherwise cmd will get SIGHUP and SIGCONT
+ // when re-parented.
+ cmd.SysProcAttr.Setsid = true
+
+ // nss is the set of namespaces to join or create before starting the sandbox
+ // process. IPC and UTS namespaces from the host are not used as they
+ // are virtualized inside the sandbox. Be paranoid and run inside an empty
+ // namespace for these.
+ log.Infof("Sandbox will be started in empty IPC and UTS namespaces")
+ nss := []specs.LinuxNamespace{
+ specs.LinuxNamespace{Type: specs.IPCNamespace},
+ specs.LinuxNamespace{Type: specs.UTSNamespace},
+ }
+
+ if conf.Platform == boot.PlatformPtrace {
+ // TODO: Also set an empty PID namespace so that we limit
+ // access to other host processes.
+ log.Infof("Sandbox will be started in the current PID namespace")
+ } else {
+ log.Infof("Sandbox will be started in empty PID namespace")
+ nss = append(nss, specs.LinuxNamespace{Type: specs.PIDNamespace})
+ }
+
+ if conf.FileAccess == boot.FileAccessProxy {
+ log.Infof("Sandbox will be started in empty mount namespace")
+ nss = append(nss, specs.LinuxNamespace{Type: specs.MountNamespace})
+ } else {
+ log.Infof("Sandbox will be started in the current mount namespace")
+ }
+
+ // Joins the network namespace if network is enabled. the sandbox talks
+ // directly to the host network, which may have been configured in the
+ // namespace.
+ if ns, ok := getNS(specs.NetworkNamespace, s.Spec); ok && conf.Network != boot.NetworkNone {
+ log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
+ nss = append(nss, ns)
+ } else {
+ log.Infof("Sandbox will be started in empty network namespace")
+ nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
+ }
+
+ // User namespace depends on the following options:
+ // - Host network/filesystem: requires to run inside the user namespace
+ // specified in the spec or the current namespace if none is configured.
+ // - Gofer: when using a Gofer, the sandbox process can run isolated in an
+ // empty namespace.
+ if conf.Network == boot.NetworkHost || conf.FileAccess == boot.FileAccessDirect {
+ if userns, ok := getNS(specs.UserNamespace, s.Spec); ok {
+ log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
+ nss = append(nss, userns)
+ setUIDGIDMappings(cmd, s.Spec)
+ } else {
+ // TODO: Retrict capabilities since it's using current user
+ // namespace, i.e. root.
+ log.Infof("Sandbox will be started in the current user namespace")
+ }
+ // When running in the caller's defined user namespace, apply the same
+ // capabilities to the sandbox process to ensure it abides to the same
+ // rules.
+ cmd.Args = append(cmd.Args, "--apply-caps=true")
+
+ } else {
+ log.Infof("Sandbox will be started in empty user namespace")
+ nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
+ }
+
+ log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
+ if err := startInNS(cmd, nss); err != nil {
+ return err
+ }
+ s.Pid = cmd.Process.Pid
+ log.Infof("Sandbox started, pid: %d", s.Pid)
+ return nil
+}
+
+// waitForCreated waits for the sandbox subprocess control server to be
+// running, at which point the sandbox is in Created state.
+func (s *Sandbox) waitForCreated(timeout time.Duration) error {
+ log.Debugf("Waiting for sandbox %q creation", s.ID)
+ tchan := time.After(timeout)
+ for {
+ select {
+ case <-tchan:
+ return fmt.Errorf("timed out waiting for sandbox control server")
+ default:
+ if c, err := client.ConnectTo(boot.ControlSocketAddr(s.ID)); err == nil {
+ // It's alive!
+ c.Close()
+ return nil
+ }
+ }
+ }
+}
+
+// Wait waits for the containerized process to exit, and returns its WaitStatus.
+func (s *Sandbox) Wait() (syscall.WaitStatus, error) {
+ log.Debugf("Wait on sandbox %q with pid %d", s.ID, s.Pid)
+ p, err := os.FindProcess(s.Pid)
+ if err != nil {
+ // "On Unix systems, FindProcess always succeeds and returns a
+ // Process for the given pid."
+ panic(err)
+ }
+ ps, err := p.Wait()
+ if err != nil {
+ return 0, err
+ }
+ return ps.Sys().(syscall.WaitStatus), nil
+}
+
+// Destroy frees all resources associated with the sandbox.
+func (s *Sandbox) Destroy() error {
+ log.Debugf("Destroy sandbox %q", s.ID)
+ if s.Pid != 0 {
+ // TODO: Too harsh?
+ log.Debugf("Killing sandbox %q", s.ID)
+ sendSignal(s.Pid, unix.SIGKILL)
+ s.Pid = 0
+ }
+ if s.GoferPid != 0 {
+ log.Debugf("Killing gofer for sandbox %q", s.ID)
+ sendSignal(s.GoferPid, unix.SIGKILL)
+ s.GoferPid = 0
+ }
+ if err := os.RemoveAll(s.SandboxRoot); err != nil {
+ log.Warningf("Failed to delete sandbox root directory %q, err: %v", s.SandboxRoot, err)
+ }
+
+ // "If any poststop hook fails, the runtime MUST log a warning, but the
+ // remaining hooks and lifecycle continue as if the hook had succeeded".
+ if s.Spec.Hooks != nil && (s.Status == Created || s.Status == Running) {
+ executeHooksBestEffort(s.Spec.Hooks.Poststop, s.State())
+ }
+
+ s.Status = Stopped
+ return nil
+}
+
+// Signal sends the signal to the sandbox.
+func (s *Sandbox) Signal(sig syscall.Signal) error {
+ log.Debugf("Signal sandbox %q", s.ID)
+ if s.Status == Stopped {
+ log.Warningf("sandbox %q not running, not sending signal %v to pid %d", s.ID, sig, s.Pid)
+ return nil
+ }
+ return sendSignal(s.Pid, sig)
+}
+
+func sendSignal(pid int, sig syscall.Signal) error {
+ if err := syscall.Kill(pid, sig); err != nil {
+ return fmt.Errorf("error sending signal %d to pid %d: %v", sig, pid, err)
+ }
+ return nil
+}
+
+// save saves the sandbox metadata to a file.
+func (s *Sandbox) save() error {
+ log.Debugf("Save sandbox %q", s.ID)
+ if err := os.MkdirAll(s.SandboxRoot, 0711); err != nil {
+ return fmt.Errorf("error creating sandbox root directory %q: %v", s.SandboxRoot, err)
+ }
+ meta, err := json.Marshal(s)
+ if err != nil {
+ return fmt.Errorf("error marshaling sandbox metadata: %v", err)
+ }
+ metaFile := filepath.Join(s.SandboxRoot, metadataFilename)
+ if err := ioutil.WriteFile(metaFile, meta, 0640); err != nil {
+ return fmt.Errorf("error writing sandbox metadata: %v", err)
+ }
+ return nil
+}
+
+// exists returns true if the given file exists.
+func exists(f string) bool {
+ if _, err := os.Stat(f); err == nil {
+ return true
+ } else if !os.IsNotExist(err) {
+ log.Warningf("error checking for file %q: %v", f, err)
+ }
+ return false
+}
diff --git a/runsc/sandbox/sandbox_test.go b/runsc/sandbox/sandbox_test.go
new file mode 100644
index 000000000..6c71cac30
--- /dev/null
+++ b/runsc/sandbox/sandbox_test.go
@@ -0,0 +1,649 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox_test
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "os/signal"
+ "path/filepath"
+ "reflect"
+ "strings"
+ "syscall"
+ "testing"
+ "time"
+
+ "context"
+ "flag"
+ "github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "golang.org/x/sys/unix"
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/control"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/cmd"
+ "gvisor.googlesource.com/gvisor/runsc/sandbox"
+)
+
+func init() {
+ log.SetLevel(log.Debug)
+}
+
+// writeSpec writes the spec to disk in the given directory.
+func writeSpec(dir string, spec *specs.Spec) error {
+ b, err := json.Marshal(spec)
+ if err != nil {
+ return err
+ }
+ return ioutil.WriteFile(filepath.Join(dir, "config.json"), b, 0755)
+}
+
+// newSpecWithArgs creates a simple spec with the given args suitable for use
+// in tests.
+func newSpecWithArgs(args ...string) *specs.Spec {
+ spec := &specs.Spec{
+ // The host filesystem root is the sandbox root.
+ Root: &specs.Root{
+ Path: "/",
+ Readonly: true,
+ },
+ Process: &specs.Process{
+ Args: args,
+ Env: []string{
+ "PATH=" + os.Getenv("PATH"),
+ },
+ },
+ }
+ return spec
+}
+
+// shutdownSignal will be sent to the sandbox in order to shut down cleanly.
+const shutdownSignal = syscall.SIGUSR2
+
+// setupSandbox creates a bundle and root dir for the sandbox, generates a test
+// config, and writes the spec to config.json in the bundle dir.
+func setupSandbox(spec *specs.Spec) (rootDir, bundleDir string, conf *boot.Config, err error) {
+ rootDir, err = ioutil.TempDir("", "sandboxes")
+ if err != nil {
+ return "", "", nil, fmt.Errorf("error creating root dir: %v", err)
+ }
+
+ bundleDir, err = ioutil.TempDir("", "bundle")
+ if err != nil {
+ return "", "", nil, fmt.Errorf("error creating bundle dir: %v", err)
+ }
+
+ if err = writeSpec(bundleDir, spec); err != nil {
+ return "", "", nil, fmt.Errorf("error writing spec: %v", err)
+ }
+
+ conf = &boot.Config{
+ RootDir: rootDir,
+ Network: boot.NetworkNone,
+ }
+
+ return rootDir, bundleDir, conf, nil
+}
+
+// uniqueSandboxID generates a unique sandbox id for each test.
+//
+// The sandbox id is used to create an abstract unix domain socket, which must
+// be unique. While the sandbox forbids creating two sandboxes with the same
+// name, sometimes between test runs the socket does not get cleaned up quickly
+// enough, causing sandbox creation to fail.
+func uniqueSandboxID() string {
+ return fmt.Sprintf("test-sandbox-%d", time.Now().UnixNano())
+}
+
+// waitForProcessList waits for the given process list to show up in the sandbox.
+func waitForProcessList(s *sandbox.Sandbox, expected []*control.Process) error {
+ var got []*control.Process
+ for start := time.Now(); time.Now().Sub(start) < 10*time.Second; {
+ var err error
+ got, err := s.Processes()
+ if err != nil {
+ return fmt.Errorf("error getting process data from sandbox: %v", err)
+ }
+ if procListsEqual(got, expected) {
+ return nil
+ }
+ // Process might not have started, try again...
+ time.Sleep(10 * time.Millisecond)
+ }
+ return fmt.Errorf("sandbox got process list: %s, want: %s", procListToString(got), procListToString(expected))
+}
+
+// TestLifecycle tests the basic Create/Start/Signal/Destory sandbox lifecycle.
+// It verifies after each step that the sandbox can be loaded from disk, and
+// has the correct status.
+func TestLifecycle(t *testing.T) {
+ // The sandbox will just sleep for a long time. We will kill it before
+ // it finishes sleeping.
+ spec := newSpecWithArgs("sleep", "100")
+
+ rootDir, bundleDir, conf, err := setupSandbox(spec)
+ if err != nil {
+ t.Fatalf("error setting up sandbox: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // expectedPL lists the expected process state of the sandbox.
+ expectedPL := []*control.Process{
+ {
+ UID: 0,
+ PID: 1,
+ PPID: 0,
+ C: 0,
+ Cmd: "sleep",
+ },
+ }
+ // Create the sandbox.
+ id := uniqueSandboxID()
+ if _, err := sandbox.Create(id, spec, conf, bundleDir, "", "", nil); err != nil {
+ t.Fatalf("error creating sandbox: %v", err)
+ }
+ // Load the sandbox from disk and check the status.
+ s, err := sandbox.Load(rootDir, id)
+ if err != nil {
+ t.Fatalf("error loading sandbox: %v", err)
+ }
+ if got, want := s.Status, sandbox.Created; got != want {
+ t.Errorf("sandbox status got %v, want %v", got, want)
+ }
+
+ // List should return the sandbox id.
+ ids, err := sandbox.List(rootDir)
+ if err != nil {
+ t.Fatalf("error listing sandboxes: %v", err)
+ }
+ if got, want := ids, []string{id}; !reflect.DeepEqual(got, want) {
+ t.Errorf("sandbox list got %v, want %v", got, want)
+ }
+
+ // Start the sandbox.
+ if err := s.Start(conf); err != nil {
+ t.Fatalf("error starting sandbox: %v", err)
+ }
+ // Load the sandbox from disk and check the status.
+ s, err = sandbox.Load(rootDir, id)
+ if err != nil {
+ t.Fatalf("error loading sandbox: %v", err)
+ }
+ if got, want := s.Status, sandbox.Running; got != want {
+ t.Errorf("sandbox status got %v, want %v", got, want)
+ }
+
+ // Verify that "sleep 100" is running.
+ if err := waitForProcessList(s, expectedPL); err != nil {
+ t.Error(err)
+ }
+
+ // Send the sandbox a signal, which we catch and use to cleanly
+ // shutdown.
+ if err := s.Signal(shutdownSignal); err != nil {
+ t.Fatalf("error sending signal %v to sandbox: %v", shutdownSignal, err)
+ }
+ // Wait for it to die.
+ if _, err := s.Wait(); err != nil {
+ t.Fatalf("error waiting on sandbox: %v", err)
+ }
+ // Load the sandbox from disk and check the status.
+ s, err = sandbox.Load(rootDir, id)
+ if err != nil {
+ t.Fatalf("error loading sandbox: %v", err)
+ }
+ if got, want := s.Status, sandbox.Stopped; got != want {
+ t.Errorf("sandbox status got %v, want %v", got, want)
+ }
+
+ // Destroy the sandbox.
+ if err := s.Destroy(); err != nil {
+ t.Fatalf("error destroying sandbox: %v", err)
+ }
+
+ // List should not return the sandbox id.
+ ids, err = sandbox.List(rootDir)
+ if err != nil {
+ t.Fatalf("error listing sandboxes: %v", err)
+ }
+ if len(ids) != 0 {
+ t.Errorf("expected sandbox list to be empty, but got %v", ids)
+ }
+
+ // Loading the sandbox by id should fail.
+ if _, err = sandbox.Load(rootDir, id); err == nil {
+ t.Errorf("expected loading destroyed sandbox to fail, but it did not")
+ }
+}
+
+// Test the we can execute the application with different path formats.
+func TestExePath(t *testing.T) {
+ for _, test := range []struct {
+ path string
+ success bool
+ }{
+ {path: "true", success: true},
+ {path: "bin/true", success: true},
+ {path: "/bin/true", success: true},
+ {path: "thisfiledoesntexit", success: false},
+ {path: "bin/thisfiledoesntexit", success: false},
+ {path: "/bin/thisfiledoesntexit", success: false},
+ } {
+ spec := newSpecWithArgs(test.path)
+ rootDir, bundleDir, conf, err := setupSandbox(spec)
+ if err != nil {
+ t.Fatalf("exec: %s, error setting up sandbox: %v", test.path, err)
+ }
+
+ ws, err := sandbox.Run(uniqueSandboxID(), spec, conf, bundleDir, "", "", nil)
+
+ os.RemoveAll(rootDir)
+ os.RemoveAll(bundleDir)
+
+ if test.success {
+ if err != nil {
+ t.Errorf("exec: %s, error running sandbox: %v", test.path, err)
+ }
+ if ws.ExitStatus() != 0 {
+ t.Errorf("exec: %s, got exit status %v want %v", test.path, ws.ExitStatus(), 0)
+ }
+ } else {
+ if err == nil {
+ t.Errorf("exec: %s, got: no error, want: error", test.path)
+ }
+ }
+ }
+}
+
+// Test the we can retrieve the application exit status from the sandbox.
+func TestAppExitStatus(t *testing.T) {
+ // First sandbox will succeed.
+ succSpec := newSpecWithArgs("true")
+
+ rootDir, bundleDir, conf, err := setupSandbox(succSpec)
+ if err != nil {
+ t.Fatalf("error setting up sandbox: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ ws, err := sandbox.Run(uniqueSandboxID(), succSpec, conf, bundleDir, "", "", nil)
+ if err != nil {
+ t.Fatalf("error running sandbox: %v", err)
+ }
+ if ws.ExitStatus() != 0 {
+ t.Errorf("got exit status %v want %v", ws.ExitStatus(), 0)
+ }
+
+ // Second sandbox exits with non-zero status.
+ wantStatus := 123
+ errSpec := newSpecWithArgs("bash", "-c", fmt.Sprintf("exit %d", wantStatus))
+
+ rootDir2, bundleDir2, conf, err := setupSandbox(errSpec)
+ if err != nil {
+ t.Fatalf("error setting up sandbox: %v", err)
+ }
+ defer os.RemoveAll(rootDir2)
+ defer os.RemoveAll(bundleDir2)
+
+ ws, err = sandbox.Run(uniqueSandboxID(), succSpec, conf, bundleDir2, "", "", nil)
+ if err != nil {
+ t.Fatalf("error running sandbox: %v", err)
+ }
+ if ws.ExitStatus() != wantStatus {
+ t.Errorf("got exit status %v want %v", ws.ExitStatus(), wantStatus)
+ }
+}
+
+// TestExec verifies that a sandbox can exec a new program.
+func TestExec(t *testing.T) {
+ const uid = 343
+ spec := newSpecWithArgs("sleep", "100")
+
+ rootDir, bundleDir, conf, err := setupSandbox(spec)
+ if err != nil {
+ t.Fatalf("error setting up sandbox: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // Create and start the sandbox.
+ s, err := sandbox.Create(uniqueSandboxID(), spec, conf, bundleDir, "", "", nil)
+ if err != nil {
+ t.Fatalf("error creating sandbox: %v", err)
+ }
+ defer s.Destroy()
+ if err := s.Start(conf); err != nil {
+ t.Fatalf("error starting sandbox: %v", err)
+ }
+
+ // expectedPL lists the expected process state of the sandbox.
+ expectedPL := []*control.Process{
+ {
+ UID: 0,
+ PID: 1,
+ PPID: 0,
+ C: 0,
+ Cmd: "sleep",
+ },
+ {
+ UID: uid,
+ PID: 2,
+ PPID: 0,
+ C: 0,
+ Cmd: "sleep",
+ },
+ }
+
+ // Verify that "sleep 100" is running.
+ if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+ t.Error(err)
+ }
+
+ execArgs := control.ExecArgs{
+ Filename: "/bin/sleep",
+ Argv: []string{"sleep", "5"},
+ Envv: []string{"PATH=" + os.Getenv("PATH")},
+ WorkingDirectory: "/",
+ KUID: uid,
+ Detach: false,
+ }
+
+ // Verify that "sleep 100" and "sleep 5" are running after exec.
+ // First, start running exec (whick blocks).
+ status := make(chan error, 1)
+ go func() {
+ exitStatus, err := s.Execute(&execArgs)
+ if err != nil {
+ status <- err
+ } else if exitStatus != 0 {
+ status <- fmt.Errorf("failed with exit status: %v", exitStatus)
+ } else {
+ status <- nil
+ }
+ }()
+
+ if err := waitForProcessList(s, expectedPL); err != nil {
+ t.Fatal(err)
+ }
+
+ // Ensure that exec finished without error.
+ select {
+ case <-time.After(10 * time.Second):
+ t.Fatalf("sandbox timed out waiting for exec to finish.")
+ case st := <-status:
+ if st != nil {
+ t.Errorf("sandbox failed to exec %v: %v", execArgs, err)
+ }
+ }
+}
+
+// TestCapabilities verifies that:
+// - Running exec as non-root UID and GID will result in an error (because the
+// executable file can't be read).
+// - Running exec as non-root with CAP_DAC_OVERRIDE succeeds because it skips
+// this check.
+func TestCapabilities(t *testing.T) {
+ const uid = 343
+ const gid = 2401
+ spec := newSpecWithArgs("sleep", "100")
+
+ // We generate files in the host temporary directory.
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Destination: os.TempDir(),
+ Source: os.TempDir(),
+ Type: "bind",
+ })
+
+ rootDir, bundleDir, conf, err := setupSandbox(spec)
+ if err != nil {
+ t.Fatalf("error setting up sandbox: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // Create and start the sandbox.
+ s, err := sandbox.Create(uniqueSandboxID(), spec, conf, bundleDir, "", "", nil)
+ if err != nil {
+ t.Fatalf("error creating sandbox: %v", err)
+ }
+ defer s.Destroy()
+ if err := s.Start(conf); err != nil {
+ t.Fatalf("error starting sandbox: %v", err)
+ }
+
+ // expectedPL lists the expected process state of the sandbox.
+ expectedPL := []*control.Process{
+ {
+ UID: 0,
+ PID: 1,
+ PPID: 0,
+ C: 0,
+ Cmd: "sleep",
+ },
+ {
+ UID: uid,
+ PID: 2,
+ PPID: 0,
+ C: 0,
+ Cmd: "exe",
+ },
+ }
+ if err := waitForProcessList(s, expectedPL[:1]); err != nil {
+ t.Fatalf("Failed to wait for sleep to start, err: %v", err)
+ }
+
+ // Create an executable that can't be run with the specified UID:GID.
+ // This shouldn't be callable within the sandbox until we add the
+ // CAP_DAC_OVERRIDE capability to skip the access check.
+ exePath := filepath.Join(rootDir, "exe")
+ if err := ioutil.WriteFile(exePath, []byte("#!/bin/sh\necho hello"), 0770); err != nil {
+ t.Fatalf("couldn't create executable: %v", err)
+ }
+ defer os.Remove(exePath)
+
+ // Need to traverse the intermediate directory.
+ os.Chmod(rootDir, 0755)
+
+ execArgs := control.ExecArgs{
+ Filename: exePath,
+ Argv: []string{exePath},
+ Envv: []string{"PATH=" + os.Getenv("PATH")},
+ WorkingDirectory: "/",
+ KUID: uid,
+ KGID: gid,
+ Capabilities: &auth.TaskCapabilities{},
+ Detach: true,
+ }
+
+ // "exe" should fail because we don't have the necessary permissions.
+ if _, err := s.Execute(&execArgs); err == nil {
+ t.Fatalf("sandbox executed without error, but an error was expected")
+ }
+
+ // Now we run with the capability enabled and should succeed.
+ execArgs.Capabilities = &auth.TaskCapabilities{
+ EffectiveCaps: auth.CapabilitySetOf(linux.CAP_DAC_OVERRIDE),
+ }
+ // First, start running exec.
+ if _, err := s.Execute(&execArgs); err != nil {
+ t.Fatalf("sandbox failed to exec %v: %v", execArgs, err)
+ }
+
+ if err := waitForProcessList(s, expectedPL); err != nil {
+ t.Error(err)
+ }
+}
+
+// Test that an tty FD is sent over the console socket if one is provided.
+func TestConsoleSocket(t *testing.T) {
+ spec := newSpecWithArgs("true")
+ rootDir, bundleDir, conf, err := setupSandbox(spec)
+ if err != nil {
+ t.Fatalf("error setting up sandbox: %v", err)
+ }
+ defer os.RemoveAll(rootDir)
+ defer os.RemoveAll(bundleDir)
+
+ // Create a named socket and start listening. We use a relative path
+ // to avoid overflowing the unix path length limit (108 chars).
+ socketPath := filepath.Join(bundleDir, "socket")
+ cwd, err := os.Getwd()
+ if err != nil {
+ t.Fatalf("error getting cwd: %v", err)
+ }
+ socketRelPath, err := filepath.Rel(cwd, socketPath)
+ if err != nil {
+ t.Fatalf("error getting relative path for %q from cwd %q: %v", socketPath, cwd, err)
+ }
+ if len(socketRelPath) > len(socketPath) {
+ socketRelPath = socketPath
+ }
+ srv, err := unet.BindAndListen(socketRelPath, false)
+ if err != nil {
+ t.Fatalf("error binding and listening to socket %q: %v", socketPath, err)
+ }
+ defer os.Remove(socketPath)
+
+ // Create the sandbox and pass the socket name.
+ id := uniqueSandboxID()
+ s, err := sandbox.Create(id, spec, conf, bundleDir, socketRelPath, "", nil)
+ if err != nil {
+ t.Fatalf("error creating sandbox: %v", err)
+ }
+
+ // Open the othe end of the socket.
+ sock, err := srv.Accept()
+ if err != nil {
+ t.Fatalf("error accepting socket connection: %v", err)
+ }
+
+ // Allow 3 fds to be received. We only expect 1.
+ r := sock.Reader(true /* blocking */)
+ r.EnableFDs(1)
+
+ // The socket is closed right after sending the FD, so EOF is
+ // an allowed error.
+ b := [][]byte{{}}
+ if _, err := r.ReadVec(b); err != nil && err != io.EOF {
+ t.Fatalf("error reading from socket connection: %v", err)
+ }
+
+ // We should have gotten a control message.
+ fds, err := r.ExtractFDs()
+ if err != nil {
+ t.Fatalf("error extracting fds from socket connection: %v", err)
+ }
+ if len(fds) != 1 {
+ t.Fatalf("got %d fds from socket, wanted 1", len(fds))
+ }
+
+ // Verify that the fd is a terminal.
+ if _, err := unix.IoctlGetTermios(fds[0], unix.TCGETS); err != nil {
+ t.Errorf("fd is not a terminal (ioctl TGGETS got %v)", err)
+ }
+
+ // Shut it down.
+ if err := s.Destroy(); err != nil {
+ t.Fatalf("error destroying sandbox: %v", err)
+ }
+
+ // Close socket.
+ if err := srv.Close(); err != nil {
+ t.Fatalf("error destroying sandbox: %v", err)
+ }
+}
+
+// procListsEqual is used to check whether 2 Process lists are equal for all
+// implemented fields.
+func procListsEqual(got, want []*control.Process) bool {
+ if len(got) != len(want) {
+ return false
+ }
+ for i := range got {
+ pd1 := got[i]
+ pd2 := want[i]
+ // Zero out unimplemented and timing dependant fields.
+ pd1.Time, pd2.Time = "", ""
+ pd1.STime, pd2.STime = "", ""
+ pd1.C, pd2.C = 0, 0
+ if *pd1 != *pd2 {
+ return false
+ }
+ }
+ return true
+}
+
+func procListToString(pl []*control.Process) string {
+ strs := make([]string, 0, len(pl))
+ for _, p := range pl {
+ strs = append(strs, fmt.Sprintf("%+v", p))
+ }
+ return fmt.Sprintf("[%s]", strings.Join(strs, ","))
+}
+
+// TestMain acts like runsc if it is called with the "boot" argument, otherwise
+// it just runs the tests. This is required because creating a sandbox will
+// call "/proc/self/exe boot". Normally /proc/self/exe is the runsc binary,
+// but for tests we have to fake it.
+func TestMain(m *testing.M) {
+ // exit writes coverage data before exiting.
+ exit := func(status int) {
+ os.Exit(status)
+ }
+
+ if !flag.Parsed() {
+ flag.Parse()
+ }
+
+ // If we are passed one of the commands then run it.
+ subcommands.Register(new(cmd.Boot), "boot")
+ subcommands.Register(new(cmd.Gofer), "gofer")
+ switch flag.Arg(0) {
+ case "boot", "gofer":
+ // Run the command in a goroutine so we can block the main
+ // thread waiting for shutdownSignal.
+ go func() {
+ conf := &boot.Config{
+ RootDir: "unused-root-dir",
+ Network: boot.NetworkNone,
+ }
+ var ws syscall.WaitStatus
+ subcmdCode := subcommands.Execute(context.Background(), conf, &ws)
+ if subcmdCode != subcommands.ExitSuccess {
+ panic(fmt.Sprintf("command failed to execute, err: %v", subcmdCode))
+ }
+ // Sandbox exited normally. Shut down this process.
+ os.Exit(ws.ExitStatus())
+ }()
+
+ // Shutdown cleanly when the shutdownSignal is received. This
+ // allows us to write coverage data before exiting.
+ sigc := make(chan os.Signal, 1)
+ signal.Notify(sigc, shutdownSignal)
+ <-sigc
+ exit(0)
+ default:
+ // Otherwise run the tests.
+ exit(m.Run())
+ }
+}
diff --git a/runsc/sandbox/status.go b/runsc/sandbox/status.go
new file mode 100644
index 000000000..6fc936aba
--- /dev/null
+++ b/runsc/sandbox/status.go
@@ -0,0 +1,56 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sandbox
+
+// Status enumerates sandbox statuses. The statuses and their semantics are
+// part of the runtime CLI spec.
+//
+// TODO: Get precise about the transitions between statuses.
+type Status int
+
+const (
+ // Creating indicates "the container is being created".
+ Creating Status = iota
+
+ // Created indicates "the runtime has finished the create operation and
+ // the container process has neither exited nor executed the
+ // user-specified program".
+ Created
+
+ // Running indicates "the container process has executed the
+ // user-specified program but has not exited".
+ Running
+
+ // Stopped indicates "the container process has exited".
+ Stopped
+)
+
+// String converts a Status to a string. These strings are part of the runtime
+// CLI spec and should not be changed.
+func (s Status) String() string {
+ switch s {
+ case Creating:
+ return "creating"
+ case Created:
+ return "created"
+ case Running:
+ return "running"
+ case Stopped:
+ return "stopped"
+ default:
+ return "unknown"
+ }
+
+}