summaryrefslogtreecommitdiffhomepage
path: root/runsc/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/cmd')
-rw-r--r--runsc/cmd/boot.go257
-rw-r--r--runsc/cmd/capability.go157
-rw-r--r--runsc/cmd/checkpoint.go150
-rw-r--r--runsc/cmd/chroot.go97
-rw-r--r--runsc/cmd/cmd.go117
-rw-r--r--runsc/cmd/create.go103
-rw-r--r--runsc/cmd/debug.go185
-rw-r--r--runsc/cmd/delete.go87
-rw-r--r--runsc/cmd/do.go310
-rw-r--r--runsc/cmd/events.go111
-rw-r--r--runsc/cmd/exec.go486
-rw-r--r--runsc/cmd/gofer.go446
-rw-r--r--runsc/cmd/kill.go154
-rw-r--r--runsc/cmd/list.go117
-rw-r--r--runsc/cmd/path.go28
-rw-r--r--runsc/cmd/pause.go68
-rw-r--r--runsc/cmd/ps.go86
-rw-r--r--runsc/cmd/restore.go106
-rw-r--r--runsc/cmd/resume.go69
-rw-r--r--runsc/cmd/run.go87
-rw-r--r--runsc/cmd/spec.go182
-rw-r--r--runsc/cmd/start.go65
-rw-r--r--runsc/cmd/state.go76
-rw-r--r--runsc/cmd/wait.go127
24 files changed, 3671 insertions, 0 deletions
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
new file mode 100644
index 000000000..3a547d4aa
--- /dev/null
+++ b/runsc/cmd/boot.go
@@ -0,0 +1,257 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "os"
+ "runtime/debug"
+ "strings"
+ "syscall"
+
+ "flag"
+ "github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// Boot implements subcommands.Command for the "boot" command which starts a
+// new sandbox. It should not be called directly.
+type Boot struct {
+ // bundleDir is the directory containing the OCI spec.
+ bundleDir string
+
+ // specFD is the file descriptor that the spec will be read from.
+ specFD int
+
+ // controllerFD is the file descriptor of a stream socket for the
+ // control server that is donated to this process.
+ controllerFD int
+
+ // deviceFD is the file descriptor for the platform device file.
+ deviceFD int
+
+ // ioFDs is the list of FDs used to connect to FS gofers.
+ ioFDs intFlags
+
+ // stdioFDs are the fds for stdin, stdout, and stderr. They must be
+ // provided in that order.
+ stdioFDs intFlags
+
+ // console is set to true if the sandbox should allow terminal ioctl(2)
+ // syscalls.
+ console bool
+
+ // applyCaps determines if capabilities defined in the spec should be applied
+ // to the process.
+ applyCaps bool
+
+ // setUpChroot is set to true if the sandbox is started in an empty root.
+ setUpRoot bool
+
+ // cpuNum number of CPUs to create inside the sandbox.
+ cpuNum int
+
+ // totalMem sets the initial amount of total memory to report back to the
+ // container.
+ totalMem uint64
+
+ // userLogFD is the file descriptor to write user logs to.
+ userLogFD int
+
+ // startSyncFD is the file descriptor to synchronize runsc and sandbox.
+ startSyncFD int
+
+ // mountsFD is the file descriptor to read list of mounts after they have
+ // been resolved (direct paths, no symlinks). They are resolved outside the
+ // sandbox (e.g. gofer) and sent through this FD.
+ mountsFD int
+
+ // pidns is set if the sanadbox is in its own pid namespace.
+ pidns bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Boot) Name() string {
+ return "boot"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Boot) Synopsis() string {
+ return "launch a sandbox process (internal use only)"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Boot) Usage() string {
+ return `boot [flags] <container id>`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (b *Boot) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&b.bundleDir, "bundle", "", "required path to the root of the bundle directory")
+ f.IntVar(&b.specFD, "spec-fd", -1, "required fd with the container spec")
+ f.IntVar(&b.controllerFD, "controller-fd", -1, "required FD of a stream socket for the control server that must be donated to this process")
+ f.IntVar(&b.deviceFD, "device-fd", -1, "FD for the platform device file")
+ f.Var(&b.ioFDs, "io-fds", "list of FDs to connect 9P clients. They must follow this order: root first, then mounts as defined in the spec")
+ f.Var(&b.stdioFDs, "stdio-fds", "list of FDs containing sandbox stdin, stdout, and stderr in that order")
+ f.BoolVar(&b.console, "console", false, "set to true if the sandbox should allow terminal ioctl(2) syscalls")
+ f.BoolVar(&b.applyCaps, "apply-caps", false, "if true, apply capabilities defined in the spec to the process")
+ f.BoolVar(&b.setUpRoot, "setup-root", false, "if true, set up an empty root for the process")
+ f.BoolVar(&b.pidns, "pidns", false, "if true, the sandbox is in its own PID namespace")
+ f.IntVar(&b.cpuNum, "cpu-num", 0, "number of CPUs to create inside the sandbox")
+ f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container")
+ f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
+ f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
+ f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
+}
+
+// Execute implements subcommands.Command.Execute. It starts a sandbox in a
+// waiting state.
+func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if b.specFD == -1 || b.controllerFD == -1 || b.startSyncFD == -1 || f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ // Ensure that if there is a panic, all goroutine stacks are printed.
+ debug.SetTraceback("all")
+
+ if b.setUpRoot {
+ if err := setUpChroot(b.pidns); err != nil {
+ Fatalf("error setting up chroot: %v", err)
+ }
+
+ if !b.applyCaps {
+ // Remove --setup-root arg to call myself.
+ var args []string
+ for _, arg := range os.Args {
+ if !strings.Contains(arg, "setup-root") {
+ args = append(args, arg)
+ }
+ }
+ // Note that we've already read the spec from the spec FD, and
+ // we will read it again after the exec call. This works
+ // because the ReadSpecFromFile function seeks to the beginning
+ // of the file before reading.
+ if err := callSelfAsNobody(args); err != nil {
+ Fatalf("%v", err)
+ }
+ panic("callSelfAsNobody must never return success")
+ }
+ }
+
+ // Get the spec from the specFD.
+ specFile := os.NewFile(uintptr(b.specFD), "spec file")
+ defer specFile.Close()
+ spec, err := specutils.ReadSpecFromFile(b.bundleDir, specFile)
+ if err != nil {
+ Fatalf("reading spec: %v", err)
+ }
+ specutils.LogSpec(spec)
+
+ conf := args[0].(*boot.Config)
+ waitStatus := args[1].(*syscall.WaitStatus)
+
+ if b.applyCaps {
+ caps := spec.Process.Capabilities
+ if caps == nil {
+ caps = &specs.LinuxCapabilities{}
+ }
+ if conf.Platform == boot.PlatformPtrace {
+ // Ptrace platform requires extra capabilities.
+ const c = "CAP_SYS_PTRACE"
+ caps.Bounding = append(caps.Bounding, c)
+ caps.Effective = append(caps.Effective, c)
+ caps.Permitted = append(caps.Permitted, c)
+ }
+
+ // Remove --apply-caps arg to call myself.
+ var args []string
+ for _, arg := range os.Args {
+ if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") {
+ args = append(args, arg)
+ }
+ }
+
+ // Note that we've already read the spec from the spec FD, and
+ // we will read it again after the exec call. This works
+ // because the ReadSpecFromFile function seeks to the beginning
+ // of the file before reading.
+ if err := setCapsAndCallSelf(args, caps); err != nil {
+ Fatalf("%v", err)
+ }
+ panic("setCapsAndCallSelf must never return success")
+ }
+
+ // Read resolved mount list and replace the original one from the spec.
+ mountsFile := os.NewFile(uintptr(b.mountsFD), "mounts file")
+ cleanMounts, err := specutils.ReadMounts(mountsFile)
+ if err != nil {
+ mountsFile.Close()
+ Fatalf("Error reading mounts file: %v", err)
+ }
+ mountsFile.Close()
+ spec.Mounts = cleanMounts
+
+ // Create the loader.
+ bootArgs := boot.Args{
+ ID: f.Arg(0),
+ Spec: spec,
+ Conf: conf,
+ ControllerFD: b.controllerFD,
+ Device: os.NewFile(uintptr(b.deviceFD), "platform device"),
+ GoferFDs: b.ioFDs.GetArray(),
+ StdioFDs: b.stdioFDs.GetArray(),
+ Console: b.console,
+ NumCPU: b.cpuNum,
+ TotalMem: b.totalMem,
+ UserLogFD: b.userLogFD,
+ }
+ l, err := boot.New(bootArgs)
+ if err != nil {
+ Fatalf("creating loader: %v", err)
+ }
+
+ // Fatalf exits the process and doesn't run defers.
+ // 'l' must be destroyed explicitly after this point!
+
+ // Notify the parent process the sandbox has booted (and that the controller
+ // is up).
+ startSyncFile := os.NewFile(uintptr(b.startSyncFD), "start-sync file")
+ buf := make([]byte, 1)
+ if w, err := startSyncFile.Write(buf); err != nil || w != 1 {
+ l.Destroy()
+ Fatalf("unable to write into the start-sync descriptor: %v", err)
+ }
+ // Closes startSyncFile because 'l.Run()' only returns when the sandbox exits.
+ startSyncFile.Close()
+
+ // Wait for the start signal from runsc.
+ l.WaitForStartSignal()
+
+ // Run the application and wait for it to finish.
+ if err := l.Run(); err != nil {
+ l.Destroy()
+ Fatalf("running sandbox: %v", err)
+ }
+
+ ws := l.WaitExit()
+ log.Infof("application exiting with %+v", ws)
+ *waitStatus = syscall.WaitStatus(ws.Status())
+ l.Destroy()
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/capability.go b/runsc/cmd/capability.go
new file mode 100644
index 000000000..312e5b471
--- /dev/null
+++ b/runsc/cmd/capability.go
@@ -0,0 +1,157 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "fmt"
+
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/syndtr/gocapability/capability"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+)
+
+var allCapTypes = []capability.CapType{
+ capability.BOUNDS,
+ capability.EFFECTIVE,
+ capability.PERMITTED,
+ capability.INHERITABLE,
+ capability.AMBIENT,
+}
+
+// applyCaps applies the capabilities in the spec to the current thread.
+//
+// Note that it must be called with current thread locked.
+func applyCaps(caps *specs.LinuxCapabilities) error {
+ // Load current capabilities to trim the ones not permitted.
+ curCaps, err := capability.NewPid2(0)
+ if err != nil {
+ return err
+ }
+ if err := curCaps.Load(); err != nil {
+ return err
+ }
+
+ // Create an empty capability set to populate.
+ newCaps, err := capability.NewPid2(0)
+ if err != nil {
+ return err
+ }
+
+ for _, c := range allCapTypes {
+ if !newCaps.Empty(c) {
+ panic("unloaded capabilities must be empty")
+ }
+ set, err := trimCaps(getCaps(c, caps), curCaps)
+ if err != nil {
+ return err
+ }
+ newCaps.Set(c, set...)
+ }
+
+ if err := newCaps.Apply(capability.CAPS | capability.BOUNDS | capability.AMBS); err != nil {
+ return err
+ }
+ log.Infof("Capabilities applied: %+v", newCaps)
+ return nil
+}
+
+func getCaps(which capability.CapType, caps *specs.LinuxCapabilities) []string {
+ switch which {
+ case capability.BOUNDS:
+ return caps.Bounding
+ case capability.EFFECTIVE:
+ return caps.Effective
+ case capability.PERMITTED:
+ return caps.Permitted
+ case capability.INHERITABLE:
+ return caps.Inheritable
+ case capability.AMBIENT:
+ return caps.Ambient
+ }
+ panic(fmt.Sprint("invalid capability type:", which))
+}
+
+func trimCaps(names []string, setter capability.Capabilities) ([]capability.Cap, error) {
+ wantedCaps, err := capsFromNames(names)
+ if err != nil {
+ return nil, err
+ }
+
+ // Trim down capabilities that aren't possible to acquire.
+ var caps []capability.Cap
+ for _, c := range wantedCaps {
+ // Capability rules are more complicated than this, but this catches most
+ // problems with tests running with non-privileged user.
+ if setter.Get(capability.PERMITTED, c) {
+ caps = append(caps, c)
+ } else {
+ log.Warningf("Capability %q is not permitted, dropping it.", c)
+ }
+ }
+ return caps, nil
+}
+
+func capsFromNames(names []string) ([]capability.Cap, error) {
+ var caps []capability.Cap
+ for _, name := range names {
+ cap, ok := capFromName[name]
+ if !ok {
+ return nil, fmt.Errorf("invalid capability %q", name)
+ }
+ caps = append(caps, cap)
+ }
+ return caps, nil
+}
+
+var capFromName = map[string]capability.Cap{
+ "CAP_CHOWN": capability.CAP_CHOWN,
+ "CAP_DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE,
+ "CAP_DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH,
+ "CAP_FOWNER": capability.CAP_FOWNER,
+ "CAP_FSETID": capability.CAP_FSETID,
+ "CAP_KILL": capability.CAP_KILL,
+ "CAP_SETGID": capability.CAP_SETGID,
+ "CAP_SETUID": capability.CAP_SETUID,
+ "CAP_SETPCAP": capability.CAP_SETPCAP,
+ "CAP_LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE,
+ "CAP_NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
+ "CAP_NET_BROADCAST": capability.CAP_NET_BROADCAST,
+ "CAP_NET_ADMIN": capability.CAP_NET_ADMIN,
+ "CAP_NET_RAW": capability.CAP_NET_RAW,
+ "CAP_IPC_LOCK": capability.CAP_IPC_LOCK,
+ "CAP_IPC_OWNER": capability.CAP_IPC_OWNER,
+ "CAP_SYS_MODULE": capability.CAP_SYS_MODULE,
+ "CAP_SYS_RAWIO": capability.CAP_SYS_RAWIO,
+ "CAP_SYS_CHROOT": capability.CAP_SYS_CHROOT,
+ "CAP_SYS_PTRACE": capability.CAP_SYS_PTRACE,
+ "CAP_SYS_PACCT": capability.CAP_SYS_PACCT,
+ "CAP_SYS_ADMIN": capability.CAP_SYS_ADMIN,
+ "CAP_SYS_BOOT": capability.CAP_SYS_BOOT,
+ "CAP_SYS_NICE": capability.CAP_SYS_NICE,
+ "CAP_SYS_RESOURCE": capability.CAP_SYS_RESOURCE,
+ "CAP_SYS_TIME": capability.CAP_SYS_TIME,
+ "CAP_SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG,
+ "CAP_MKNOD": capability.CAP_MKNOD,
+ "CAP_LEASE": capability.CAP_LEASE,
+ "CAP_AUDIT_WRITE": capability.CAP_AUDIT_WRITE,
+ "CAP_AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL,
+ "CAP_SETFCAP": capability.CAP_SETFCAP,
+ "CAP_MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE,
+ "CAP_MAC_ADMIN": capability.CAP_MAC_ADMIN,
+ "CAP_SYSLOG": capability.CAP_SYSLOG,
+ "CAP_WAKE_ALARM": capability.CAP_WAKE_ALARM,
+ "CAP_BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND,
+ "CAP_AUDIT_READ": capability.CAP_AUDIT_READ,
+}
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
new file mode 100644
index 000000000..96d3c3378
--- /dev/null
+++ b/runsc/cmd/checkpoint.go
@@ -0,0 +1,150 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "os"
+ "path/filepath"
+ "syscall"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// File containing the container's saved image/state within the given image-path's directory.
+const checkpointFileName = "checkpoint.img"
+
+// Checkpoint implements subcommands.Command for the "checkpoint" command.
+type Checkpoint struct {
+ imagePath string
+ leaveRunning bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Checkpoint) Name() string {
+ return "checkpoint"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Checkpoint) Synopsis() string {
+ return "checkpoint current state of container (experimental)"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Checkpoint) Usage() string {
+ return `checkpoint [flags] <container id> - save current state of container.
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (c *Checkpoint) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&c.imagePath, "image-path", "", "directory path to saved container image")
+ f.BoolVar(&c.leaveRunning, "leave-running", false, "restart the container after checkpointing")
+
+ // Unimplemented flags necessary for compatibility with docker.
+ var wp string
+ f.StringVar(&wp, "work-path", "", "ignored")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+ waitStatus := args[1].(*syscall.WaitStatus)
+
+ cont, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading container: %v", err)
+ }
+
+ if c.imagePath == "" {
+ Fatalf("image-path flag must be provided")
+ }
+
+ if err := os.MkdirAll(c.imagePath, 0755); err != nil {
+ Fatalf("making directories at path provided: %v", err)
+ }
+
+ fullImagePath := filepath.Join(c.imagePath, checkpointFileName)
+
+ // Create the image file and open for writing.
+ file, err := os.OpenFile(fullImagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644)
+ if err != nil {
+ Fatalf("os.OpenFile(%q) failed: %v", fullImagePath, err)
+ }
+ defer file.Close()
+
+ if err := cont.Checkpoint(file); err != nil {
+ Fatalf("checkpoint failed: %v", err)
+ }
+
+ if !c.leaveRunning {
+ return subcommands.ExitSuccess
+ }
+
+ // TODO(b/110843694): Make it possible to restore into same container.
+ // For now, we can fake it by destroying the container and making a
+ // new container with the same ID. This hack does not work with docker
+ // which uses the container pid to ensure that the restore-container is
+ // actually the same as the checkpoint-container. By restoring into
+ // the same container, we will solve the docker incompatibility.
+
+ // Restore into new container with same ID.
+ bundleDir := cont.BundleDir
+ if bundleDir == "" {
+ Fatalf("setting bundleDir")
+ }
+
+ spec, err := specutils.ReadSpec(bundleDir)
+ if err != nil {
+ Fatalf("reading spec: %v", err)
+ }
+
+ specutils.LogSpec(spec)
+
+ if cont.ConsoleSocket != "" {
+ log.Warningf("ignoring console socket since it cannot be restored")
+ }
+
+ if err := cont.Destroy(); err != nil {
+ Fatalf("destroying container: %v", err)
+ }
+
+ cont, err = container.Create(id, spec, conf, bundleDir, "", "", "")
+ if err != nil {
+ Fatalf("restoring container: %v", err)
+ }
+ defer cont.Destroy()
+
+ if err := cont.Restore(spec, conf, fullImagePath); err != nil {
+ Fatalf("starting container: %v", err)
+ }
+
+ ws, err := cont.Wait()
+ *waitStatus = ws
+
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go
new file mode 100644
index 000000000..1a774db04
--- /dev/null
+++ b/runsc/cmd/chroot.go
@@ -0,0 +1,97 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// mountInChroot creates the destination mount point in the given chroot and
+// mounts the source.
+func mountInChroot(chroot, src, dst, typ string, flags uint32) error {
+ chrootDst := filepath.Join(chroot, dst)
+ log.Infof("Mounting %q at %q", src, chrootDst)
+
+ if err := specutils.Mount(src, chrootDst, typ, flags); err != nil {
+ return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err)
+ }
+ return nil
+}
+
+func pivotRoot(root string) error {
+ if err := os.Chdir(root); err != nil {
+ return fmt.Errorf("error changing working directory: %v", err)
+ }
+ // pivot_root(new_root, put_old) moves the root filesystem (old_root)
+ // of the calling process to the directory put_old and makes new_root
+ // the new root filesystem of the calling process.
+ //
+ // pivot_root(".", ".") makes a mount of the working directory the new
+ // root filesystem, so it will be moved in "/" and then the old_root
+ // will be moved to "/" too. The parent mount of the old_root will be
+ // new_root, so after umounting the old_root, we will see only
+ // the new_root in "/".
+ if err := syscall.PivotRoot(".", "."); err != nil {
+ return fmt.Errorf("error changing root filesystem: %v", err)
+ }
+
+ if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
+ return fmt.Errorf("error umounting the old root file system: %v", err)
+ }
+ return nil
+}
+
+// setUpChroot creates an empty directory with runsc mounted at /runsc and proc
+// mounted at /proc.
+func setUpChroot(pidns bool) error {
+ // We are a new mount namespace, so we can use /tmp as a directory to
+ // construct a new root.
+ chroot := os.TempDir()
+
+ log.Infof("Setting up sandbox chroot in %q", chroot)
+
+ // Convert all shared mounts into slave to be sure that nothing will be
+ // propagated outside of our namespace.
+ if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
+ return fmt.Errorf("error converting mounts: %v", err)
+ }
+
+ if err := syscall.Mount("runsc-root", chroot, "tmpfs", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC, ""); err != nil {
+ return fmt.Errorf("error mounting tmpfs in choot: %v", err)
+ }
+
+ if pidns {
+ flags := uint32(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC | syscall.MS_RDONLY)
+ if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil {
+ return fmt.Errorf("error mounting proc in chroot: %v", err)
+ }
+ } else {
+ if err := mountInChroot(chroot, "/proc", "/proc", "bind", syscall.MS_BIND|syscall.MS_RDONLY|syscall.MS_REC); err != nil {
+ return fmt.Errorf("error mounting proc in chroot: %v", err)
+ }
+ }
+
+ if err := syscall.Mount("", chroot, "", syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_BIND, ""); err != nil {
+ return fmt.Errorf("error remounting chroot in read-only: %v", err)
+ }
+
+ return pivotRoot(chroot)
+}
diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go
new file mode 100644
index 000000000..a2fc377d1
--- /dev/null
+++ b/runsc/cmd/cmd.go
@@ -0,0 +1,117 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package cmd holds implementations of the runsc commands.
+package cmd
+
+import (
+ "fmt"
+ "os"
+ "runtime"
+ "strconv"
+ "syscall"
+
+ "github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// Errorf logs to stderr and returns subcommands.ExitFailure.
+func Errorf(s string, args ...interface{}) subcommands.ExitStatus {
+ // If runsc is being invoked by docker or cri-o, then we might not have
+ // access to stderr, so we log a serious-looking warning in addition to
+ // writing to stderr.
+ log.Warningf("FATAL ERROR: "+s, args...)
+ fmt.Fprintf(os.Stderr, s+"\n", args...)
+ // Return an error that is unlikely to be used by the application.
+ return subcommands.ExitFailure
+}
+
+// Fatalf logs to stderr and exits with a failure status code.
+func Fatalf(s string, args ...interface{}) {
+ Errorf(s, args...)
+ os.Exit(128)
+}
+
+// intFlags can be used with int flags that appear multiple times.
+type intFlags []int
+
+// String implements flag.Value.
+func (i *intFlags) String() string {
+ return fmt.Sprintf("%v", *i)
+}
+
+// Get implements flag.Value.
+func (i *intFlags) Get() interface{} {
+ return i
+}
+
+// GetArray returns array of FDs.
+func (i *intFlags) GetArray() []int {
+ return *i
+}
+
+// Set implements flag.Value.
+func (i *intFlags) Set(s string) error {
+ fd, err := strconv.Atoi(s)
+ if err != nil {
+ return fmt.Errorf("invalid flag value: %v", err)
+ }
+ if fd < 0 {
+ return fmt.Errorf("flag value must be greater than 0: %d", fd)
+ }
+ *i = append(*i, fd)
+ return nil
+}
+
+// setCapsAndCallSelf sets capabilities to the current thread and then execve's
+// itself again with the arguments specified in 'args' to restart the process
+// with the desired capabilities.
+func setCapsAndCallSelf(args []string, caps *specs.LinuxCapabilities) error {
+ // Keep thread locked while capabilities are changed.
+ runtime.LockOSThread()
+ defer runtime.UnlockOSThread()
+
+ if err := applyCaps(caps); err != nil {
+ return fmt.Errorf("applyCaps() failed: %v", err)
+ }
+ binPath := specutils.ExePath
+
+ log.Infof("Execve %q again, bye!", binPath)
+ err := syscall.Exec(binPath, args, []string{})
+ return fmt.Errorf("error executing %s: %v", binPath, err)
+}
+
+// callSelfAsNobody sets UID and GID to nobody and then execve's itself again.
+func callSelfAsNobody(args []string) error {
+ // Keep thread locked while user/group are changed.
+ runtime.LockOSThread()
+ defer runtime.UnlockOSThread()
+
+ const nobody = 65534
+
+ if _, _, err := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(nobody), 0, 0); err != 0 {
+ return fmt.Errorf("error setting uid: %v", err)
+ }
+ if _, _, err := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(nobody), 0, 0); err != 0 {
+ return fmt.Errorf("error setting gid: %v", err)
+ }
+
+ binPath := specutils.ExePath
+
+ log.Infof("Execve %q again, bye!", binPath)
+ err := syscall.Exec(binPath, args, []string{})
+ return fmt.Errorf("error executing %s: %v", binPath, err)
+}
diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go
new file mode 100644
index 000000000..629c198fd
--- /dev/null
+++ b/runsc/cmd/create.go
@@ -0,0 +1,103 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// Create implements subcommands.Command for the "create" command.
+type Create struct {
+ // bundleDir is the path to the bundle directory (defaults to the
+ // current working directory).
+ bundleDir string
+
+ // pidFile is the filename that the sandbox pid will be written to.
+ // This file should only be created once the container process inside
+ // the sandbox is ready to use.
+ pidFile string
+
+ // consoleSocket is the path to an AF_UNIX socket which will receive a
+ // file descriptor referencing the master end of the console's
+ // pseudoterminal. This is ignored unless spec.Process.Terminal is
+ // true.
+ consoleSocket string
+
+ // userLog is the path to send user-visible logs to. This log is different
+ // from debug logs. The former is meant to be consumed by the users and should
+ // contain only information that is relevant to the person running the
+ // container, e.g. unsuported syscalls, while the later is more verbose and
+ // consumed by developers.
+ userLog string
+}
+
+// Name implements subcommands.Command.Name.
+func (*Create) Name() string {
+ return "create"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Create) Synopsis() string {
+ return "create a secure container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Create) Usage() string {
+ return `create [flags] <container id> - create a secure container
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (c *Create) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&c.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory")
+ f.StringVar(&c.consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal")
+ f.StringVar(&c.pidFile, "pid-file", "", "filename that the container pid will be written to")
+ f.StringVar(&c.userLog, "user-log", "", "filename to send user-visible logs to. Empty means no logging.")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+
+ bundleDir := c.bundleDir
+ if bundleDir == "" {
+ bundleDir = getwdOrDie()
+ }
+ spec, err := specutils.ReadSpec(bundleDir)
+ if err != nil {
+ Fatalf("reading spec: %v", err)
+ }
+ specutils.LogSpec(spec)
+
+ // Create the container. A new sandbox will be created for the
+ // container unless the metadata specifies that it should be run in an
+ // existing container.
+ if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil {
+ Fatalf("creating container: %v", err)
+ }
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
new file mode 100644
index 000000000..27eb51172
--- /dev/null
+++ b/runsc/cmd/debug.go
@@ -0,0 +1,185 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "os"
+ "syscall"
+ "time"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// Debug implements subcommands.Command for the "debug" command.
+type Debug struct {
+ pid int
+ stacks bool
+ signal int
+ profileHeap string
+ profileCPU string
+ profileDelay int
+ trace string
+}
+
+// Name implements subcommands.Command.
+func (*Debug) Name() string {
+ return "debug"
+}
+
+// Synopsis implements subcommands.Command.
+func (*Debug) Synopsis() string {
+ return "shows a variety of debug information"
+}
+
+// Usage implements subcommands.Command.
+func (*Debug) Usage() string {
+ return `debug [flags] <container id>`
+}
+
+// SetFlags implements subcommands.Command.
+func (d *Debug) SetFlags(f *flag.FlagSet) {
+ f.IntVar(&d.pid, "pid", 0, "sandbox process ID. Container ID is not necessary if this is set")
+ f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log")
+ f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.")
+ f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
+ f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile")
+ f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
+ f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ var c *container.Container
+ conf := args[0].(*boot.Config)
+
+ if d.pid == 0 {
+ // No pid, container ID must have been provided.
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+ var err error
+ c, err = container.Load(conf.RootDir, f.Arg(0))
+ if err != nil {
+ Fatalf("loading container %q: %v", f.Arg(0), err)
+ }
+ } else {
+ if f.NArg() != 0 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+ // Go over all sandboxes and find the one that matches PID.
+ ids, err := container.List(conf.RootDir)
+ if err != nil {
+ Fatalf("listing containers: %v", err)
+ }
+ for _, id := range ids {
+ candidate, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading container %q: %v", id, err)
+ }
+ if candidate.SandboxPid() == d.pid {
+ c = candidate
+ break
+ }
+ }
+ if c == nil {
+ Fatalf("container with PID %d not found", d.pid)
+ }
+ }
+
+ if c.Sandbox == nil || !c.Sandbox.IsRunning() {
+ Fatalf("container sandbox is not running")
+ }
+ log.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Pid)
+
+ if d.signal > 0 {
+ log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid)
+ if err := syscall.Kill(c.Sandbox.Pid, syscall.Signal(d.signal)); err != nil {
+ Fatalf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid)
+ }
+ }
+ if d.stacks {
+ log.Infof("Retrieving sandbox stacks")
+ stacks, err := c.Sandbox.Stacks()
+ if err != nil {
+ Fatalf("retrieving stacks: %v", err)
+ }
+ log.Infof(" *** Stack dump ***\n%s", stacks)
+ }
+ if d.profileHeap != "" {
+ f, err := os.Create(d.profileHeap)
+ if err != nil {
+ Fatalf(err.Error())
+ }
+ defer f.Close()
+
+ if err := c.Sandbox.HeapProfile(f); err != nil {
+ Fatalf(err.Error())
+ }
+ log.Infof("Heap profile written to %q", d.profileHeap)
+ }
+
+ delay := false
+ if d.profileCPU != "" {
+ delay = true
+ f, err := os.Create(d.profileCPU)
+ if err != nil {
+ Fatalf(err.Error())
+ }
+ defer func() {
+ f.Close()
+ if err := c.Sandbox.StopCPUProfile(); err != nil {
+ Fatalf(err.Error())
+ }
+ log.Infof("CPU profile written to %q", d.profileCPU)
+ }()
+ if err := c.Sandbox.StartCPUProfile(f); err != nil {
+ Fatalf(err.Error())
+ }
+ log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU)
+ }
+ if d.trace != "" {
+ delay = true
+ f, err := os.Create(d.trace)
+ if err != nil {
+ Fatalf(err.Error())
+ }
+ defer func() {
+ f.Close()
+ if err := c.Sandbox.StopTrace(); err != nil {
+ Fatalf(err.Error())
+ }
+ log.Infof("Trace written to %q", d.trace)
+ }()
+ if err := c.Sandbox.StartTrace(f); err != nil {
+ Fatalf(err.Error())
+ }
+ log.Infof("Tracing started for %d sec, writing to %q", d.profileDelay, d.trace)
+
+ }
+
+ if delay {
+ time.Sleep(time.Duration(d.profileDelay) * time.Second)
+
+ }
+
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go
new file mode 100644
index 000000000..9039723e9
--- /dev/null
+++ b/runsc/cmd/delete.go
@@ -0,0 +1,87 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "fmt"
+ "os"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// Delete implements subcommands.Command for the "delete" command.
+type Delete struct {
+ // force indicates that the container should be terminated if running.
+ force bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Delete) Name() string {
+ return "delete"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Delete) Synopsis() string {
+ return "delete resources held by a container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Delete) Usage() string {
+ return `delete [flags] <container ids>`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (d *Delete) SetFlags(f *flag.FlagSet) {
+ f.BoolVar(&d.force, "force", false, "terminate container if running")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (d *Delete) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() == 0 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ conf := args[0].(*boot.Config)
+ if err := d.execute(f.Args(), conf); err != nil {
+ Fatalf("%v", err)
+ }
+ return subcommands.ExitSuccess
+}
+
+func (d *Delete) execute(ids []string, conf *boot.Config) error {
+ for _, id := range ids {
+ c, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ if os.IsNotExist(err) && d.force {
+ log.Warningf("couldn't find container %q: %v", id, err)
+ return nil
+ }
+ return fmt.Errorf("loading container %q: %v", id, err)
+ }
+ if !d.force && c.Status != container.Created && c.Status != container.Stopped {
+ return fmt.Errorf("cannot delete container that is not stopped without --force flag")
+ }
+ if err := c.Destroy(); err != nil {
+ return fmt.Errorf("destroying container: %v", err)
+ }
+ }
+ return nil
+}
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
new file mode 100644
index 000000000..8ea59046c
--- /dev/null
+++ b/runsc/cmd/do.go
@@ -0,0 +1,310 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "math/rand"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall"
+
+ "flag"
+ "github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// Do implements subcommands.Command for the "do" command. It sets up a simple
+// sandbox and executes the command inside it. See Usage() for more details.
+type Do struct {
+ root string
+ cwd string
+ ip string
+ networkNamespace bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Do) Name() string {
+ return "do"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Do) Synopsis() string {
+ return "Simplistic way to execute a command inside the sandbox. It's to be used for testing only."
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Do) Usage() string {
+ return `do [flags] <cmd> - runs a command.
+
+This command starts a sandbox with host filesystem mounted inside as readonly,
+with a writable tmpfs overlay on top of it. The given command is executed inside
+the sandbox. It's to be used to quickly test applications without having to
+install or run docker. It doesn't give nearly as many options and it's to be
+used for testing only.
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (c *Do) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`)
+ f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory")
+ f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox")
+ f.BoolVar(&c.networkNamespace, "netns", true, "run in a new network namespace")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if len(f.Args()) == 0 {
+ c.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ conf := args[0].(*boot.Config)
+ waitStatus := args[1].(*syscall.WaitStatus)
+
+ // Map the entire host file system, but make it readonly with a writable
+ // overlay on top (ignore --overlay option).
+ conf.Overlay = true
+
+ hostname, err := os.Hostname()
+ if err != nil {
+ return Errorf("Error to retrieve hostname: %v", err)
+ }
+
+ absRoot, err := resolvePath(c.root)
+ if err != nil {
+ return Errorf("Error resolving root: %v", err)
+ }
+ absCwd, err := resolvePath(c.cwd)
+ if err != nil {
+ return Errorf("Error resolving current directory: %v", err)
+ }
+
+ spec := &specs.Spec{
+ Root: &specs.Root{
+ Path: absRoot,
+ },
+ Process: &specs.Process{
+ Cwd: absCwd,
+ Args: f.Args(),
+ Env: os.Environ(),
+ Capabilities: specutils.AllCapabilities(),
+ },
+ Hostname: hostname,
+ }
+
+ specutils.LogSpec(spec)
+
+ cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000))
+ if !c.networkNamespace {
+ if conf.Network != boot.NetworkHost {
+ Fatalf("The current network namespace can be used only if --network=host is set", nil)
+ }
+ } else if conf.Network != boot.NetworkNone {
+ clean, err := c.setupNet(cid, spec)
+ if err != nil {
+ return Errorf("Error setting up network: %v", err)
+ }
+ defer clean()
+ }
+
+ out, err := json.Marshal(spec)
+ if err != nil {
+ return Errorf("Error to marshal spec: %v", err)
+ }
+ tmpDir, err := ioutil.TempDir("", "runsc-do")
+ if err != nil {
+ return Errorf("Error to create tmp dir: %v", err)
+ }
+ defer os.RemoveAll(tmpDir)
+
+ log.Infof("Changing configuration RootDir to %q", tmpDir)
+ conf.RootDir = tmpDir
+
+ cfgPath := filepath.Join(tmpDir, "config.json")
+ if err := ioutil.WriteFile(cfgPath, out, 0755); err != nil {
+ return Errorf("Error write spec: %v", err)
+ }
+
+ ws, err := container.Run(cid, spec, conf, tmpDir, "", "", "", false)
+ if err != nil {
+ return Errorf("running container: %v", err)
+ }
+
+ *waitStatus = ws
+ return subcommands.ExitSuccess
+}
+
+func resolvePath(path string) (string, error) {
+ var err error
+ path, err = filepath.Abs(path)
+ if err != nil {
+ return "", fmt.Errorf("resolving %q: %v", path, err)
+ }
+ path = filepath.Clean(path)
+ if err := syscall.Access(path, 0); err != nil {
+ return "", fmt.Errorf("unable to access %q: %v", path, err)
+ }
+ return path, nil
+}
+
+func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) {
+ dev, err := defaultDevice()
+ if err != nil {
+ return nil, err
+ }
+ peerIP, err := calculatePeerIP(c.ip)
+ if err != nil {
+ return nil, err
+ }
+ veth, peer := deviceNames(cid)
+
+ cmds := []string{
+ fmt.Sprintf("ip link add %s type veth peer name %s", veth, peer),
+
+ // Setup device outside the namespace.
+ fmt.Sprintf("ip addr add %s/24 dev %s", peerIP, peer),
+ fmt.Sprintf("ip link set %s up", peer),
+
+ // Setup device inside the namespace.
+ fmt.Sprintf("ip netns add %s", cid),
+ fmt.Sprintf("ip link set %s netns %s", veth, cid),
+ fmt.Sprintf("ip netns exec %s ip addr add %s/24 dev %s", cid, c.ip, veth),
+ fmt.Sprintf("ip netns exec %s ip link set %s up", cid, veth),
+ fmt.Sprintf("ip netns exec %s ip link set lo up", cid),
+ fmt.Sprintf("ip netns exec %s ip route add default via %s", cid, peerIP),
+
+ // Enable network access.
+ "sysctl -w net.ipv4.ip_forward=1",
+ fmt.Sprintf("iptables -t nat -A POSTROUTING -s %s -o %s -j MASQUERADE", c.ip, dev),
+ fmt.Sprintf("iptables -A FORWARD -i %s -o %s -j ACCEPT", dev, peer),
+ fmt.Sprintf("iptables -A FORWARD -o %s -i %s -j ACCEPT", dev, peer),
+ }
+
+ for _, cmd := range cmds {
+ log.Debugf("Run %q", cmd)
+ args := strings.Split(cmd, " ")
+ c := exec.Command(args[0], args[1:]...)
+ if err := c.Run(); err != nil {
+ return nil, fmt.Errorf("failed to run %q: %v", cmd, err)
+ }
+ }
+
+ if err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec); err != nil {
+ return nil, err
+ }
+ if err := makeFile("/etc/hostname", cid+"\n", spec); err != nil {
+ return nil, err
+ }
+ hosts := fmt.Sprintf("127.0.0.1\tlocalhost\n%s\t%s\n", c.ip, cid)
+ if err := makeFile("/etc/hosts", hosts, spec); err != nil {
+ return nil, err
+ }
+
+ if spec.Linux == nil {
+ spec.Linux = &specs.Linux{}
+ }
+ netns := specs.LinuxNamespace{
+ Type: specs.NetworkNamespace,
+ Path: filepath.Join("/var/run/netns", cid),
+ }
+ spec.Linux.Namespaces = append(spec.Linux.Namespaces, netns)
+
+ return func() { c.cleanNet(cid, dev) }, nil
+}
+
+func (c *Do) cleanNet(cid, dev string) {
+ veth, peer := deviceNames(cid)
+
+ cmds := []string{
+ fmt.Sprintf("ip link delete %s", peer),
+ fmt.Sprintf("ip netns delete %s", cid),
+
+ fmt.Sprintf("iptables -t nat -D POSTROUTING -s %s/24 -o %s -j MASQUERADE", c.ip, dev),
+ fmt.Sprintf("iptables -D FORWARD -i %s -o %s -j ACCEPT", dev, veth),
+ fmt.Sprintf("iptables -D FORWARD -o %s -i %s -j ACCEPT", dev, veth),
+ }
+
+ for _, cmd := range cmds {
+ log.Debugf("Run %q", cmd)
+ args := strings.Split(cmd, " ")
+ c := exec.Command(args[0], args[1:]...)
+ if err := c.Run(); err != nil {
+ log.Warningf("Failed to run %q: %v", cmd, err)
+ }
+ }
+}
+
+func deviceNames(cid string) (string, string) {
+ // Device name is limited to 15 letters.
+ return "ve-" + cid, "vp-" + cid
+
+}
+
+func defaultDevice() (string, error) {
+ out, err := exec.Command("ip", "route", "list", "default").CombinedOutput()
+ if err != nil {
+ return "", err
+ }
+ parts := strings.Split(string(out), " ")
+ if len(parts) < 5 {
+ return "", fmt.Errorf("malformed %q output: %q", "ip route list default", string(out))
+ }
+ return parts[4], nil
+}
+
+func makeFile(dest, content string, spec *specs.Spec) error {
+ tmpFile, err := ioutil.TempFile("", filepath.Base(dest))
+ if err != nil {
+ return err
+ }
+ if _, err := tmpFile.WriteString(content); err != nil {
+ return err
+ }
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Source: tmpFile.Name(),
+ Destination: dest,
+ Type: "bind",
+ Options: []string{"ro"},
+ })
+ return nil
+}
+
+func calculatePeerIP(ip string) (string, error) {
+ parts := strings.Split(ip, ".")
+ if len(parts) != 4 {
+ return "", fmt.Errorf("invalid IP format %q", ip)
+ }
+ n, err := strconv.Atoi(parts[3])
+ if err != nil {
+ return "", fmt.Errorf("invalid IP format %q: %v", ip, err)
+ }
+ n++
+ if n > 255 {
+ n = 1
+ }
+ return fmt.Sprintf("%s.%s.%s.%d", parts[0], parts[1], parts[2], n), nil
+}
diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go
new file mode 100644
index 000000000..c6bc8fc3a
--- /dev/null
+++ b/runsc/cmd/events.go
@@ -0,0 +1,111 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "encoding/json"
+ "os"
+ "time"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// Events implements subcommands.Command for the "events" command.
+type Events struct {
+ // The interval between stats reporting.
+ intervalSec int
+ // If true, events will print a single group of stats and exit.
+ stats bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Events) Name() string {
+ return "events"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Events) Synopsis() string {
+ return "display container events such as OOM notifications, cpu, memory, and IO usage statistics"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Events) Usage() string {
+ return `<container-id>
+
+Where "<container-id>" is the name for the instance of the container.
+
+The events command displays information about the container. By default the
+information is displayed once every 5 seconds.
+
+OPTIONS:
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (evs *Events) SetFlags(f *flag.FlagSet) {
+ f.IntVar(&evs.intervalSec, "interval", 5, "set the stats collection interval, in seconds")
+ f.BoolVar(&evs.stats, "stats", false, "display the container's stats then exit")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+
+ c, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading sandbox: %v", err)
+ }
+
+ // Repeatedly get stats from the container.
+ for {
+ // Get the event and print it as JSON.
+ ev, err := c.Event()
+ if err != nil {
+ log.Warningf("Error getting events for container: %v", err)
+ }
+ // err must be preserved because it is used below when breaking
+ // out of the loop.
+ b, err := json.Marshal(ev)
+ if err != nil {
+ log.Warningf("Error while marshalling event %v: %v", ev, err)
+ } else {
+ os.Stdout.Write(b)
+ }
+
+ // If we're only running once, break. If we're only running
+ // once and there was an error, the command failed.
+ if evs.stats {
+ if err != nil {
+ return subcommands.ExitFailure
+ }
+ break
+ }
+
+ time.Sleep(time.Duration(evs.intervalSec) * time.Second)
+ }
+
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
new file mode 100644
index 000000000..52fd7ac4b
--- /dev/null
+++ b/runsc/cmd/exec.go
@@ -0,0 +1,486 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "syscall"
+ "time"
+
+ "flag"
+ "github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/control"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.googlesource.com/gvisor/pkg/urpc"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/console"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+const privateClearStatusFlag = "private-clear-status"
+
+// Exec implements subcommands.Command for the "exec" command.
+type Exec struct {
+ cwd string
+ env stringSlice
+ // user contains the UID and GID with which to run the new process.
+ user user
+ extraKGIDs stringSlice
+ caps stringSlice
+ detach bool
+ clearStatus bool
+ processPath string
+ pidFile string
+ internalPidFile string
+
+ // consoleSocket is the path to an AF_UNIX socket which will receive a
+ // file descriptor referencing the master end of the console's
+ // pseudoterminal.
+ consoleSocket string
+}
+
+// Name implements subcommands.Command.Name.
+func (*Exec) Name() string {
+ return "exec"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Exec) Synopsis() string {
+ return "execute new process inside the container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Exec) Usage() string {
+ return `exec [command options] <container-id> <command> [command options] || --process process.json <container-id>
+
+
+Where "<container-id>" is the name for the instance of the container and
+"<command>" is the command to be executed in the container.
+"<command>" can't be empty unless a "-process" flag provided.
+
+EXAMPLE:
+If the container is configured to run /bin/ps the following will
+output a list of processes running in the container:
+
+ # runc exec <container-id> ps
+
+OPTIONS:
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (ex *Exec) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&ex.cwd, "cwd", "", "current working directory")
+ f.Var(&ex.env, "env", "set environment variables (e.g. '-env PATH=/bin -env TERM=xterm')")
+ f.Var(&ex.user, "user", "UID (format: <uid>[:<gid>])")
+ f.Var(&ex.extraKGIDs, "additional-gids", "additional gids")
+ f.Var(&ex.caps, "cap", "add a capability to the bounding set for the process")
+ f.BoolVar(&ex.detach, "detach", false, "detach from the container's process")
+ f.StringVar(&ex.processPath, "process", "", "path to the process.json")
+ f.StringVar(&ex.pidFile, "pid-file", "", "filename that the container pid will be written to")
+ f.StringVar(&ex.internalPidFile, "internal-pid-file", "", "filename that the container-internal pid will be written to")
+ f.StringVar(&ex.consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal")
+
+ // This flag clears the status of the exec'd process upon completion. It is
+ // only used when we fork due to --detach being set on the parent.
+ f.BoolVar(&ex.clearStatus, privateClearStatusFlag, true, "private flag, do not use")
+}
+
+// Execute implements subcommands.Command.Execute. It starts a process in an
+// already created container.
+func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ e, id, err := ex.parseArgs(f)
+ if err != nil {
+ Fatalf("parsing process spec: %v", err)
+ }
+ conf := args[0].(*boot.Config)
+ waitStatus := args[1].(*syscall.WaitStatus)
+
+ c, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading sandbox: %v", err)
+ }
+
+ // Replace empty settings with defaults from container.
+ if e.WorkingDirectory == "" {
+ e.WorkingDirectory = c.Spec.Process.Cwd
+ }
+ if e.Envv == nil {
+ e.Envv, err = resolveEnvs(c.Spec.Process.Env, ex.env)
+ if err != nil {
+ Fatalf("getting environment variables: %v", err)
+ }
+ }
+ if e.Capabilities == nil {
+ // enableRaw is set to true to prevent the filtering out of
+ // CAP_NET_RAW. This is the opposite of Create() because exec
+ // requires the capability to be set explicitly, while 'docker
+ // run' sets it by default.
+ e.Capabilities, err = specutils.Capabilities(true /* enableRaw */, c.Spec.Process.Capabilities)
+ if err != nil {
+ Fatalf("creating capabilities: %v", err)
+ }
+ }
+
+ // containerd expects an actual process to represent the container being
+ // executed. If detach was specified, starts a child in non-detach mode,
+ // write the child's PID to the pid file. So when the container returns, the
+ // child process will also return and signal containerd.
+ if ex.detach {
+ return ex.execAndWait(waitStatus)
+ }
+
+ // Start the new process and get it pid.
+ pid, err := c.Execute(e)
+ if err != nil {
+ Fatalf("getting processes for container: %v", err)
+ }
+
+ if e.StdioIsPty {
+ // Forward signals sent to this process to the foreground
+ // process in the sandbox.
+ stopForwarding := c.ForwardSignals(pid, true /* fgProcess */)
+ defer stopForwarding()
+ }
+
+ // Write the sandbox-internal pid if required.
+ if ex.internalPidFile != "" {
+ pidStr := []byte(strconv.Itoa(int(pid)))
+ if err := ioutil.WriteFile(ex.internalPidFile, pidStr, 0644); err != nil {
+ Fatalf("writing internal pid file %q: %v", ex.internalPidFile, err)
+ }
+ }
+
+ // Generate the pid file after the internal pid file is generated, so that users
+ // can safely assume that the internal pid file is ready after `runsc exec -d`
+ // returns.
+ if ex.pidFile != "" {
+ if err := ioutil.WriteFile(ex.pidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil {
+ Fatalf("writing pid file: %v", err)
+ }
+ }
+
+ // Wait for the process to exit.
+ ws, err := c.WaitPID(pid, ex.clearStatus)
+ if err != nil {
+ Fatalf("waiting on pid %d: %v", pid, err)
+ }
+ *waitStatus = ws
+ return subcommands.ExitSuccess
+}
+
+func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStatus {
+ binPath := specutils.ExePath
+ var args []string
+
+ // The command needs to write a pid file so that execAndWait can tell
+ // when it has started. If no pid-file was provided, we should use a
+ // filename in a temp directory.
+ pidFile := ex.pidFile
+ if pidFile == "" {
+ tmpDir, err := ioutil.TempDir("", "exec-pid-")
+ if err != nil {
+ Fatalf("creating TempDir: %v", err)
+ }
+ defer os.RemoveAll(tmpDir)
+ pidFile = filepath.Join(tmpDir, "pid")
+ args = append(args, "--pid-file="+pidFile)
+ }
+
+ // Add the rest of the args, excluding the "detach" flag.
+ for _, a := range os.Args[1:] {
+ if strings.Contains(a, "detach") {
+ // Replace with the "private-clear-status" flag, which tells
+ // the new process it's a detached child and shouldn't
+ // clear the exit status of the sentry process.
+ args = append(args, fmt.Sprintf("--%s=false", privateClearStatusFlag))
+ } else {
+ args = append(args, a)
+ }
+ }
+
+ cmd := exec.Command(binPath, args...)
+ cmd.Args[0] = "runsc-exec"
+
+ // Exec stdio defaults to current process stdio.
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+
+ // If the console control socket file is provided, then create a new
+ // pty master/slave pair and set the TTY on the sandbox process.
+ if ex.consoleSocket != "" {
+ // Create a new TTY pair and send the master on the provided
+ // socket.
+ tty, err := console.NewWithSocket(ex.consoleSocket)
+ if err != nil {
+ Fatalf("setting up console with socket %q: %v", ex.consoleSocket, err)
+ }
+ defer tty.Close()
+
+ // Set stdio to the new TTY slave.
+ cmd.Stdin = tty
+ cmd.Stdout = tty
+ cmd.Stderr = tty
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ Setsid: true,
+ Setctty: true,
+ Ctty: int(tty.Fd()),
+ }
+ }
+
+ if err := cmd.Start(); err != nil {
+ Fatalf("failure to start child exec process, err: %v", err)
+ }
+
+ log.Infof("Started child (PID: %d) to exec and wait: %s %s", cmd.Process.Pid, binPath, args)
+
+ // Wait for PID file to ensure that child process has started. Otherwise,
+ // '--process' file is deleted as soon as this process returns and the child
+ // may fail to read it.
+ ready := func() (bool, error) {
+ pidb, err := ioutil.ReadFile(pidFile)
+ if err == nil {
+ // File appeared, check whether pid is fully written.
+ pid, err := strconv.Atoi(string(pidb))
+ if err != nil {
+ return false, nil
+ }
+ return pid == cmd.Process.Pid, nil
+ }
+ if pe, ok := err.(*os.PathError); !ok || pe.Err != syscall.ENOENT {
+ return false, err
+ }
+ // No file yet, continue to wait...
+ return false, nil
+ }
+ if err := specutils.WaitForReady(cmd.Process.Pid, 10*time.Second, ready); err != nil {
+ Fatalf("unexpected error waiting for PID file, err: %v", err)
+ }
+
+ *waitStatus = 0
+ return subcommands.ExitSuccess
+}
+
+// parseArgs parses exec information from the command line or a JSON file
+// depending on whether the --process flag was used. Returns an ExecArgs and
+// the ID of the container to be used.
+func (ex *Exec) parseArgs(f *flag.FlagSet) (*control.ExecArgs, string, error) {
+ if ex.processPath == "" {
+ // Requires at least a container ID and command.
+ if f.NArg() < 2 {
+ f.Usage()
+ return nil, "", fmt.Errorf("both a container-id and command are required")
+ }
+ e, err := ex.argsFromCLI(f.Args()[1:])
+ return e, f.Arg(0), err
+ }
+ // Requires only the container ID.
+ if f.NArg() != 1 {
+ f.Usage()
+ return nil, "", fmt.Errorf("a container-id is required")
+ }
+ e, err := ex.argsFromProcessFile()
+ return e, f.Arg(0), err
+}
+
+func (ex *Exec) argsFromCLI(argv []string) (*control.ExecArgs, error) {
+ extraKGIDs := make([]auth.KGID, 0, len(ex.extraKGIDs))
+ for _, s := range ex.extraKGIDs {
+ kgid, err := strconv.Atoi(s)
+ if err != nil {
+ Fatalf("parsing GID: %s, %v", s, err)
+ }
+ extraKGIDs = append(extraKGIDs, auth.KGID(kgid))
+ }
+
+ var caps *auth.TaskCapabilities
+ if len(ex.caps) > 0 {
+ var err error
+ caps, err = capabilities(ex.caps)
+ if err != nil {
+ return nil, fmt.Errorf("capabilities error: %v", err)
+ }
+ }
+
+ return &control.ExecArgs{
+ Argv: argv,
+ WorkingDirectory: ex.cwd,
+ KUID: ex.user.kuid,
+ KGID: ex.user.kgid,
+ ExtraKGIDs: extraKGIDs,
+ Capabilities: caps,
+ StdioIsPty: ex.consoleSocket != "",
+ FilePayload: urpc.FilePayload{[]*os.File{os.Stdin, os.Stdout, os.Stderr}},
+ }, nil
+}
+
+func (ex *Exec) argsFromProcessFile() (*control.ExecArgs, error) {
+ f, err := os.Open(ex.processPath)
+ if err != nil {
+ return nil, fmt.Errorf("error opening process file: %s, %v", ex.processPath, err)
+ }
+ defer f.Close()
+ var p specs.Process
+ if err := json.NewDecoder(f).Decode(&p); err != nil {
+ return nil, fmt.Errorf("error parsing process file: %s, %v", ex.processPath, err)
+ }
+ return argsFromProcess(&p)
+}
+
+// argsFromProcess performs all the non-IO conversion from the Process struct
+// to ExecArgs.
+func argsFromProcess(p *specs.Process) (*control.ExecArgs, error) {
+ // Create capabilities.
+ var caps *auth.TaskCapabilities
+ if p.Capabilities != nil {
+ var err error
+ // enableRaw is set to true to prevent the filtering out of
+ // CAP_NET_RAW. This is the opposite of Create() because exec
+ // requires the capability to be set explicitly, while 'docker
+ // run' sets it by default.
+ caps, err = specutils.Capabilities(true /* enableRaw */, p.Capabilities)
+ if err != nil {
+ return nil, fmt.Errorf("error creating capabilities: %v", err)
+ }
+ }
+
+ // Convert the spec's additional GIDs to KGIDs.
+ extraKGIDs := make([]auth.KGID, 0, len(p.User.AdditionalGids))
+ for _, GID := range p.User.AdditionalGids {
+ extraKGIDs = append(extraKGIDs, auth.KGID(GID))
+ }
+
+ return &control.ExecArgs{
+ Argv: p.Args,
+ Envv: p.Env,
+ WorkingDirectory: p.Cwd,
+ KUID: auth.KUID(p.User.UID),
+ KGID: auth.KGID(p.User.GID),
+ ExtraKGIDs: extraKGIDs,
+ Capabilities: caps,
+ StdioIsPty: p.Terminal,
+ FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, os.Stdout, os.Stderr}},
+ }, nil
+}
+
+// resolveEnvs transforms lists of environment variables into a single list of
+// environment variables. If a variable is defined multiple times, the last
+// value is used.
+func resolveEnvs(envs ...[]string) ([]string, error) {
+ // First create a map of variable names to values. This removes any
+ // duplicates.
+ envMap := make(map[string]string)
+ for _, env := range envs {
+ for _, str := range env {
+ parts := strings.SplitN(str, "=", 2)
+ if len(parts) != 2 {
+ return nil, fmt.Errorf("invalid variable: %s", str)
+ }
+ envMap[parts[0]] = parts[1]
+ }
+ }
+ // Reassemble envMap into a list of environment variables of the form
+ // NAME=VALUE.
+ env := make([]string, 0, len(envMap))
+ for k, v := range envMap {
+ env = append(env, fmt.Sprintf("%s=%s", k, v))
+ }
+ return env, nil
+}
+
+// capabilities takes a list of capabilities as strings and returns an
+// auth.TaskCapabilities struct with those capabilities in every capability set.
+// This mimics runc's behavior.
+func capabilities(cs []string) (*auth.TaskCapabilities, error) {
+ var specCaps specs.LinuxCapabilities
+ for _, cap := range cs {
+ specCaps.Ambient = append(specCaps.Ambient, cap)
+ specCaps.Bounding = append(specCaps.Bounding, cap)
+ specCaps.Effective = append(specCaps.Effective, cap)
+ specCaps.Inheritable = append(specCaps.Inheritable, cap)
+ specCaps.Permitted = append(specCaps.Permitted, cap)
+ }
+ // enableRaw is set to true to prevent the filtering out of
+ // CAP_NET_RAW. This is the opposite of Create() because exec requires
+ // the capability to be set explicitly, while 'docker run' sets it by
+ // default.
+ return specutils.Capabilities(true /* enableRaw */, &specCaps)
+}
+
+// stringSlice allows a flag to be used multiple times, where each occurrence
+// adds a value to the flag. For example, a flag called "x" could be invoked
+// via "runsc exec -x foo -x bar", and the corresponding stringSlice would be
+// {"x", "y"}.
+type stringSlice []string
+
+// String implements flag.Value.String.
+func (ss *stringSlice) String() string {
+ return fmt.Sprintf("%v", *ss)
+}
+
+// Get implements flag.Value.Get.
+func (ss *stringSlice) Get() interface{} {
+ return ss
+}
+
+// Set implements flag.Value.Set.
+func (ss *stringSlice) Set(s string) error {
+ *ss = append(*ss, s)
+ return nil
+}
+
+// user allows -user to convey a UID and, optionally, a GID separated by a
+// colon.
+type user struct {
+ kuid auth.KUID
+ kgid auth.KGID
+}
+
+func (u *user) String() string {
+ return fmt.Sprintf("%+v", *u)
+}
+
+func (u *user) Get() interface{} {
+ return u
+}
+
+func (u *user) Set(s string) error {
+ parts := strings.SplitN(s, ":", 2)
+ kuid, err := strconv.Atoi(parts[0])
+ if err != nil {
+ return fmt.Errorf("couldn't parse UID: %s", parts[0])
+ }
+ u.kuid = auth.KUID(kuid)
+ if len(parts) > 1 {
+ kgid, err := strconv.Atoi(parts[1])
+ if err != nil {
+ return fmt.Errorf("couldn't parse GID: %s", parts[1])
+ }
+ u.kgid = auth.KGID(kgid)
+ }
+ return nil
+}
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
new file mode 100644
index 000000000..bccb29397
--- /dev/null
+++ b/runsc/cmd/gofer.go
@@ -0,0 +1,446 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "syscall"
+
+ "flag"
+ "github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/p9"
+ "gvisor.googlesource.com/gvisor/pkg/unet"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/fsgofer"
+ "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+var caps = []string{
+ "CAP_CHOWN",
+ "CAP_DAC_OVERRIDE",
+ "CAP_DAC_READ_SEARCH",
+ "CAP_FOWNER",
+ "CAP_FSETID",
+ "CAP_SYS_CHROOT",
+}
+
+// goferCaps is the minimal set of capabilities needed by the Gofer to operate
+// on files.
+var goferCaps = &specs.LinuxCapabilities{
+ Bounding: caps,
+ Effective: caps,
+ Permitted: caps,
+}
+
+// Gofer implements subcommands.Command for the "gofer" command, which starts a
+// filesystem gofer. This command should not be called directly.
+type Gofer struct {
+ bundleDir string
+ ioFDs intFlags
+ applyCaps bool
+ setUpRoot bool
+
+ panicOnWrite bool
+ specFD int
+ mountsFD int
+}
+
+// Name implements subcommands.Command.
+func (*Gofer) Name() string {
+ return "gofer"
+}
+
+// Synopsis implements subcommands.Command.
+func (*Gofer) Synopsis() string {
+ return "launch a gofer process that serves files over 9P protocol (internal use only)"
+}
+
+// Usage implements subcommands.Command.
+func (*Gofer) Usage() string {
+ return `gofer [flags]`
+}
+
+// SetFlags implements subcommands.Command.
+func (g *Gofer) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory")
+ f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec")
+ f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do")
+ f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected")
+ f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process")
+ f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
+ f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).")
+}
+
+// Execute implements subcommands.Command.
+func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if g.bundleDir == "" || len(g.ioFDs) < 1 || g.specFD < 0 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ specFile := os.NewFile(uintptr(g.specFD), "spec file")
+ defer specFile.Close()
+ spec, err := specutils.ReadSpecFromFile(g.bundleDir, specFile)
+ if err != nil {
+ Fatalf("reading spec: %v", err)
+ }
+
+ conf := args[0].(*boot.Config)
+
+ if g.setUpRoot {
+ if err := setupRootFS(spec, conf); err != nil {
+ Fatalf("Error setting up root FS: %v", err)
+ }
+ }
+ if g.applyCaps {
+ // Disable caps when calling myself again.
+ // Note: minimal argument handling for the default case to keep it simple.
+ args := os.Args
+ args = append(args, "--apply-caps=false", "--setup-root=false")
+ if err := setCapsAndCallSelf(args, goferCaps); err != nil {
+ Fatalf("Unable to apply caps: %v", err)
+ }
+ panic("unreachable")
+ }
+
+ // Find what path is going to be served by this gofer.
+ root := spec.Root.Path
+ if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ root = "/root"
+ }
+
+ // Resolve mount points paths, then replace mounts from our spec and send the
+ // mount list over to the sandbox, so they are both in sync.
+ //
+ // Note that all mount points have been mounted in the proper location in
+ // setupRootFS().
+ cleanMounts, err := resolveMounts(spec.Mounts, root)
+ if err != nil {
+ Fatalf("Failure to resolve mounts: %v", err)
+ }
+ spec.Mounts = cleanMounts
+ go func() {
+ if err := g.writeMounts(cleanMounts); err != nil {
+ panic(fmt.Sprintf("Failed to write mounts: %v", err))
+ }
+ }()
+
+ specutils.LogSpec(spec)
+
+ // fsgofer should run with a umask of 0, because we want to preserve file
+ // modes exactly as sent by the sandbox, which will have applied its own umask.
+ syscall.Umask(0)
+
+ if err := syscall.Chroot(root); err != nil {
+ Fatalf("failed to chroot to %q: %v", root, err)
+ }
+ if err := syscall.Chdir("/"); err != nil {
+ Fatalf("changing working dir: %v", err)
+ }
+ log.Infof("Process chroot'd to %q", root)
+
+ // Start with root mount, then add any other additional mount as needed.
+ ats := make([]p9.Attacher, 0, len(spec.Mounts)+1)
+ ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{
+ ROMount: spec.Root.Readonly,
+ PanicOnWrite: g.panicOnWrite,
+ })
+ if err != nil {
+ Fatalf("creating attach point: %v", err)
+ }
+ ats = append(ats, ap)
+ log.Infof("Serving %q mapped to %q on FD %d (ro: %t)", "/", root, g.ioFDs[0], spec.Root.Readonly)
+
+ mountIdx := 1 // first one is the root
+ for _, m := range spec.Mounts {
+ if specutils.Is9PMount(m) {
+ cfg := fsgofer.Config{
+ ROMount: isReadonlyMount(m.Options),
+ PanicOnWrite: g.panicOnWrite,
+ }
+ ap, err := fsgofer.NewAttachPoint(m.Destination, cfg)
+ if err != nil {
+ Fatalf("creating attach point: %v", err)
+ }
+ ats = append(ats, ap)
+
+ if mountIdx >= len(g.ioFDs) {
+ Fatalf("no FD found for mount. Did you forget --io-fd? mount: %d, %v", len(g.ioFDs), m)
+ }
+ log.Infof("Serving %q mapped on FD %d (ro: %t)", m.Destination, g.ioFDs[mountIdx], cfg.ROMount)
+ mountIdx++
+ }
+ }
+ if mountIdx != len(g.ioFDs) {
+ Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs))
+ }
+
+ if err := filter.Install(); err != nil {
+ Fatalf("installing seccomp filters: %v", err)
+ }
+
+ runServers(ats, g.ioFDs)
+ return subcommands.ExitSuccess
+}
+
+func runServers(ats []p9.Attacher, ioFDs []int) {
+ // Run the loops and wait for all to exit.
+ var wg sync.WaitGroup
+ for i, ioFD := range ioFDs {
+ wg.Add(1)
+ go func(ioFD int, at p9.Attacher) {
+ socket, err := unet.NewSocket(ioFD)
+ if err != nil {
+ Fatalf("creating server on FD %d: %v", ioFD, err)
+ }
+ s := p9.NewServer(at)
+ if err := s.Handle(socket); err != nil {
+ Fatalf("P9 server returned error. Gofer is shutting down. FD: %d, err: %v", ioFD, err)
+ }
+ wg.Done()
+ }(ioFD, ats[i])
+ }
+ wg.Wait()
+ log.Infof("All 9P servers exited.")
+}
+
+func (g *Gofer) writeMounts(mounts []specs.Mount) error {
+ bytes, err := json.Marshal(mounts)
+ if err != nil {
+ return err
+ }
+
+ f := os.NewFile(uintptr(g.mountsFD), "mounts file")
+ defer f.Close()
+
+ for written := 0; written < len(bytes); {
+ w, err := f.Write(bytes[written:])
+ if err != nil {
+ return err
+ }
+ written += w
+ }
+ return nil
+}
+
+func isReadonlyMount(opts []string) bool {
+ for _, o := range opts {
+ if o == "ro" {
+ return true
+ }
+ }
+ return false
+}
+
+func setupRootFS(spec *specs.Spec, conf *boot.Config) error {
+ // Convert all shared mounts into slaves to be sure that nothing will be
+ // propagated outside of our namespace.
+ if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil {
+ Fatalf("error converting mounts: %v", err)
+ }
+
+ root := spec.Root.Path
+ if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ // FIXME: runsc can't be re-executed without
+ // /proc, so we create a tmpfs mount, mount ./proc and ./root
+ // there, then move this mount to the root and after
+ // setCapsAndCallSelf, runsc will chroot into /root.
+ //
+ // We need a directory to construct a new root and we know that
+ // runsc can't start without /proc, so we can use it for this.
+ flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC)
+ if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil {
+ Fatalf("error mounting tmpfs: %v", err)
+ }
+
+ // Prepare tree structure for pivot_root(2).
+ os.Mkdir("/proc/proc", 0755)
+ os.Mkdir("/proc/root", 0755)
+ if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil {
+ Fatalf("error mounting proc: %v", err)
+ }
+ root = "/proc/root"
+ }
+
+ // Mount root path followed by submounts.
+ if err := syscall.Mount(spec.Root.Path, root, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
+ return fmt.Errorf("mounting root on root (%q) err: %v", root, err)
+ }
+
+ flags := uint32(syscall.MS_SLAVE | syscall.MS_REC)
+ if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
+ flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation})
+ }
+ if err := syscall.Mount("", root, "", uintptr(flags), ""); err != nil {
+ return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", root, flags, err)
+ }
+
+ // Replace the current spec, with the clean spec with symlinks resolved.
+ if err := setupMounts(spec.Mounts, root); err != nil {
+ Fatalf("error setting up FS: %v", err)
+ }
+
+ // Create working directory if needed.
+ if spec.Process.Cwd != "" {
+ dst, err := resolveSymlinks(root, spec.Process.Cwd)
+ if err != nil {
+ return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err)
+ }
+ if err := os.MkdirAll(dst, 0755); err != nil {
+ return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err)
+ }
+ }
+
+ // Check if root needs to be remounted as readonly.
+ if spec.Root.Readonly {
+ // If root is a mount point but not read-only, we can change mount options
+ // to make it read-only for extra safety.
+ log.Infof("Remounting root as readonly: %q", root)
+ flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC)
+ if err := syscall.Mount(root, root, "bind", flags, ""); err != nil {
+ return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", root, root, flags, err)
+ }
+ }
+
+ if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
+ if err := pivotRoot("/proc"); err != nil {
+ Fatalf("faild to change the root file system: %v", err)
+ }
+ if err := os.Chdir("/"); err != nil {
+ Fatalf("failed to change working directory")
+ }
+ }
+ return nil
+}
+
+// setupMounts binds mount all mounts specified in the spec in their correct
+// location inside root. It will resolve relative paths and symlinks. It also
+// creates directories as needed.
+func setupMounts(mounts []specs.Mount, root string) error {
+ for _, m := range mounts {
+ if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ continue
+ }
+
+ dst, err := resolveSymlinks(root, m.Destination)
+ if err != nil {
+ return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
+ }
+
+ flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND
+ log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
+ if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil {
+ return fmt.Errorf("mounting %v: %v", m, err)
+ }
+
+ // Set propagation options that cannot be set together with other options.
+ flags = specutils.PropOptionsToFlags(m.Options)
+ if flags != 0 {
+ if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil {
+ return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err)
+ }
+ }
+ }
+ return nil
+}
+
+// resolveMounts resolved relative paths and symlinks to mount points.
+//
+// Note: mount points must already be in place for resolution to work.
+// Otherwise, it may follow symlinks to locations that would be overwritten
+// with another mount point and return the wrong location. In short, make sure
+// setupMounts() has been called before.
+func resolveMounts(mounts []specs.Mount, root string) ([]specs.Mount, error) {
+ cleanMounts := make([]specs.Mount, 0, len(mounts))
+ for _, m := range mounts {
+ if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ cleanMounts = append(cleanMounts, m)
+ continue
+ }
+ dst, err := resolveSymlinks(root, m.Destination)
+ if err != nil {
+ return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
+ }
+ relDst, err := filepath.Rel(root, dst)
+ if err != nil {
+ panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err))
+ }
+ cpy := m
+ cpy.Destination = filepath.Join("/", relDst)
+ cleanMounts = append(cleanMounts, cpy)
+ }
+ return cleanMounts, nil
+}
+
+// ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are
+// symlinks, they are evaluated relative to 'root' to ensure the end result is
+// the same as if the process was running inside the container.
+func resolveSymlinks(root, rel string) (string, error) {
+ return resolveSymlinksImpl(root, root, rel, 255)
+}
+
+func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) {
+ if followCount == 0 {
+ return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel))
+ }
+
+ rel = filepath.Clean(rel)
+ for _, name := range strings.Split(rel, string(filepath.Separator)) {
+ if name == "" {
+ continue
+ }
+ // Note that Join() resolves things like ".." and returns a clean path.
+ path := filepath.Join(base, name)
+ if !strings.HasPrefix(path, root) {
+ // One cannot '..' their way out of root.
+ path = root
+ continue
+ }
+ fi, err := os.Lstat(path)
+ if err != nil {
+ if !os.IsNotExist(err) {
+ return "", err
+ }
+ // Not found means there is no symlink to check. Just keep walking dirs.
+ base = path
+ continue
+ }
+ if fi.Mode()&os.ModeSymlink != 0 {
+ link, err := os.Readlink(path)
+ if err != nil {
+ return "", err
+ }
+ if filepath.IsAbs(link) {
+ base = root
+ }
+ base, err = resolveSymlinksImpl(root, base, link, followCount-1)
+ if err != nil {
+ return "", err
+ }
+ continue
+ }
+ base = path
+ }
+ return base, nil
+}
diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go
new file mode 100644
index 000000000..aed5f3291
--- /dev/null
+++ b/runsc/cmd/kill.go
@@ -0,0 +1,154 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "fmt"
+ "strconv"
+ "strings"
+ "syscall"
+
+ "flag"
+ "github.com/google/subcommands"
+ "golang.org/x/sys/unix"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// Kill implements subcommands.Command for the "kill" command.
+type Kill struct {
+ all bool
+ pid int
+}
+
+// Name implements subcommands.Command.Name.
+func (*Kill) Name() string {
+ return "kill"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Kill) Synopsis() string {
+ return "sends a signal to the container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Kill) Usage() string {
+ return `kill <container id> [signal]`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (k *Kill) SetFlags(f *flag.FlagSet) {
+ f.BoolVar(&k.all, "all", false, "send the specified signal to all processes inside the container")
+ f.IntVar(&k.pid, "pid", 0, "send the specified signal to a specific process")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (k *Kill) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() == 0 || f.NArg() > 2 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+
+ if k.pid != 0 && k.all {
+ Fatalf("it is invalid to specify both --all and --pid")
+ }
+
+ c, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading container: %v", err)
+ }
+
+ // The OCI command-line spec says that the signal should be specified
+ // via a flag, but runc (and things that call runc) pass it as an
+ // argument.
+ signal := f.Arg(1)
+ if signal == "" {
+ signal = "TERM"
+ }
+
+ sig, err := parseSignal(signal)
+ if err != nil {
+ Fatalf("%v", err)
+ }
+
+ if k.pid != 0 {
+ if err := c.SignalProcess(sig, int32(k.pid)); err != nil {
+ Fatalf("failed to signal pid %d: %v", k.pid, err)
+ }
+ } else {
+ if err := c.SignalContainer(sig, k.all); err != nil {
+ Fatalf("%v", err)
+ }
+ }
+ return subcommands.ExitSuccess
+}
+
+func parseSignal(s string) (syscall.Signal, error) {
+ n, err := strconv.Atoi(s)
+ if err == nil {
+ sig := syscall.Signal(n)
+ for _, msig := range signalMap {
+ if sig == msig {
+ return sig, nil
+ }
+ }
+ return -1, fmt.Errorf("unknown signal %q", s)
+ }
+ if sig, ok := signalMap[strings.TrimPrefix(strings.ToUpper(s), "SIG")]; ok {
+ return sig, nil
+ }
+ return -1, fmt.Errorf("unknown signal %q", s)
+}
+
+var signalMap = map[string]syscall.Signal{
+ "ABRT": unix.SIGABRT,
+ "ALRM": unix.SIGALRM,
+ "BUS": unix.SIGBUS,
+ "CHLD": unix.SIGCHLD,
+ "CLD": unix.SIGCLD,
+ "CONT": unix.SIGCONT,
+ "FPE": unix.SIGFPE,
+ "HUP": unix.SIGHUP,
+ "ILL": unix.SIGILL,
+ "INT": unix.SIGINT,
+ "IO": unix.SIGIO,
+ "IOT": unix.SIGIOT,
+ "KILL": unix.SIGKILL,
+ "PIPE": unix.SIGPIPE,
+ "POLL": unix.SIGPOLL,
+ "PROF": unix.SIGPROF,
+ "PWR": unix.SIGPWR,
+ "QUIT": unix.SIGQUIT,
+ "SEGV": unix.SIGSEGV,
+ "STKFLT": unix.SIGSTKFLT,
+ "STOP": unix.SIGSTOP,
+ "SYS": unix.SIGSYS,
+ "TERM": unix.SIGTERM,
+ "TRAP": unix.SIGTRAP,
+ "TSTP": unix.SIGTSTP,
+ "TTIN": unix.SIGTTIN,
+ "TTOU": unix.SIGTTOU,
+ "URG": unix.SIGURG,
+ "USR1": unix.SIGUSR1,
+ "USR2": unix.SIGUSR2,
+ "VTALRM": unix.SIGVTALRM,
+ "WINCH": unix.SIGWINCH,
+ "XCPU": unix.SIGXCPU,
+ "XFSZ": unix.SIGXFSZ,
+}
diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go
new file mode 100644
index 000000000..1f5ca2473
--- /dev/null
+++ b/runsc/cmd/list.go
@@ -0,0 +1,117 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "os"
+ "text/tabwriter"
+ "time"
+
+ "flag"
+ "github.com/google/subcommands"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// List implements subcommands.Command for the "list" command for the "list" command.
+type List struct {
+ quiet bool
+ format string
+}
+
+// Name implements subcommands.command.name.
+func (*List) Name() string {
+ return "list"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*List) Synopsis() string {
+ return "list containers started by runsc with the given root"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*List) Usage() string {
+ return `list [flags]`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (l *List) SetFlags(f *flag.FlagSet) {
+ f.BoolVar(&l.quiet, "quiet", false, "only list container ids")
+ f.StringVar(&l.format, "format", "text", "output format: 'text' (default) or 'json'")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (l *List) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 0 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ conf := args[0].(*boot.Config)
+ ids, err := container.List(conf.RootDir)
+ if err != nil {
+ Fatalf("%v", err)
+ }
+
+ if l.quiet {
+ for _, id := range ids {
+ fmt.Println(id)
+ }
+ return subcommands.ExitSuccess
+ }
+
+ // Collect the containers.
+ var containers []*container.Container
+ for _, id := range ids {
+ c, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading container %q: %v", id, err)
+ }
+ containers = append(containers, c)
+ }
+
+ switch l.format {
+ case "text":
+ // Print a nice table.
+ w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0)
+ fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tOWNER\n")
+ for _, c := range containers {
+ fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\t%s\n",
+ c.ID,
+ c.SandboxPid(),
+ c.Status,
+ c.BundleDir,
+ c.CreatedAt.Format(time.RFC3339Nano),
+ c.Owner)
+ }
+ w.Flush()
+ case "json":
+ // Print just the states.
+ var states []specs.State
+ for _, c := range containers {
+ states = append(states, c.State())
+ }
+ if err := json.NewEncoder(os.Stdout).Encode(states); err != nil {
+ Fatalf("marshaling container state: %v", err)
+ }
+ default:
+ Fatalf("unknown list format %q", l.format)
+ }
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/path.go b/runsc/cmd/path.go
new file mode 100644
index 000000000..0e9ef7fa5
--- /dev/null
+++ b/runsc/cmd/path.go
@@ -0,0 +1,28 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "os"
+)
+
+// getwdOrDie returns the current working directory and dies if it cannot.
+func getwdOrDie() string {
+ wd, err := os.Getwd()
+ if err != nil {
+ Fatalf("getting current working directory: %v", err)
+ }
+ return wd
+}
diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go
new file mode 100644
index 000000000..11b36aa10
--- /dev/null
+++ b/runsc/cmd/pause.go
@@ -0,0 +1,68 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// Pause implements subcommands.Command for the "pause" command.
+type Pause struct{}
+
+// Name implements subcommands.Command.Name.
+func (*Pause) Name() string {
+ return "pause"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Pause) Synopsis() string {
+ return "pause suspends all processes in a container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Pause) Usage() string {
+ return `pause <container id> - pause process in instance of container.`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (*Pause) SetFlags(f *flag.FlagSet) {
+}
+
+// Execute implements subcommands.Command.Execute.
+func (*Pause) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+
+ cont, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading container: %v", err)
+ }
+
+ if err := cont.Pause(); err != nil {
+ Fatalf("pause failed: %v", err)
+ }
+
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go
new file mode 100644
index 000000000..3a3e6f17a
--- /dev/null
+++ b/runsc/cmd/ps.go
@@ -0,0 +1,86 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "fmt"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/control"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// PS implements subcommands.Command for the "ps" command.
+type PS struct {
+ format string
+}
+
+// Name implements subcommands.Command.Name.
+func (*PS) Name() string {
+ return "ps"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*PS) Synopsis() string {
+ return "ps displays the processes running inside a container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*PS) Usage() string {
+ return "<container-id> [ps options]"
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (ps *PS) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&ps.format, "format", "table", "output format. Select one of: table or json (default: table)")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (ps *PS) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+
+ c, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading sandbox: %v", err)
+ }
+ pList, err := c.Processes()
+ if err != nil {
+ Fatalf("getting processes for container: %v", err)
+ }
+
+ switch ps.format {
+ case "table":
+ fmt.Println(control.ProcessListToTable(pList))
+ case "json":
+ o, err := control.PrintPIDsJSON(pList)
+ if err != nil {
+ Fatalf("generating JSON: %v", err)
+ }
+ fmt.Println(o)
+ default:
+ Fatalf("unsupported format: %s", ps.format)
+ }
+
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go
new file mode 100644
index 000000000..3ab2f5676
--- /dev/null
+++ b/runsc/cmd/restore.go
@@ -0,0 +1,106 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "path/filepath"
+ "syscall"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// Restore implements subcommands.Command for the "restore" command.
+type Restore struct {
+ // Restore flags are a super-set of those for Create.
+ Create
+
+ // imagePath is the path to the saved container image
+ imagePath string
+
+ // detach indicates that runsc has to start a process and exit without waiting it.
+ detach bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Restore) Name() string {
+ return "restore"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Restore) Synopsis() string {
+ return "restore a saved state of container (experimental)"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Restore) Usage() string {
+ return `restore [flags] <container id> - restore saved state of container.
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (r *Restore) SetFlags(f *flag.FlagSet) {
+ r.Create.SetFlags(f)
+ f.StringVar(&r.imagePath, "image-path", "", "directory path to saved container image")
+ f.BoolVar(&r.detach, "detach", false, "detach from the container's process")
+
+ // Unimplemented flags necessary for compatibility with docker.
+
+ var nsr bool
+ f.BoolVar(&nsr, "no-subreaper", false, "ignored")
+
+ var wp string
+ f.StringVar(&wp, "work-path", "", "ignored")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+ waitStatus := args[1].(*syscall.WaitStatus)
+
+ bundleDir := r.bundleDir
+ if bundleDir == "" {
+ bundleDir = getwdOrDie()
+ }
+ spec, err := specutils.ReadSpec(bundleDir)
+ if err != nil {
+ Fatalf("reading spec: %v", err)
+ }
+ specutils.LogSpec(spec)
+
+ if r.imagePath == "" {
+ Fatalf("image-path flag must be provided")
+ }
+
+ conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName)
+
+ ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
+ if err != nil {
+ Fatalf("running container: %v", err)
+ }
+ *waitStatus = ws
+
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go
new file mode 100644
index 000000000..9a2ade41e
--- /dev/null
+++ b/runsc/cmd/resume.go
@@ -0,0 +1,69 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// Resume implements subcommands.Command for the "resume" command.
+type Resume struct{}
+
+// Name implements subcommands.Command.Name.
+func (*Resume) Name() string {
+ return "resume"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Resume) Synopsis() string {
+ return "Resume unpauses a paused container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Resume) Usage() string {
+ return `resume <container id> - resume a paused container.
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (r *Resume) SetFlags(f *flag.FlagSet) {
+}
+
+// Execute implements subcommands.Command.Execute.
+func (r *Resume) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+
+ cont, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading container: %v", err)
+ }
+
+ if err := cont.Resume(); err != nil {
+ Fatalf("resume failed: %v", err)
+ }
+
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go
new file mode 100644
index 000000000..c228b4f93
--- /dev/null
+++ b/runsc/cmd/run.go
@@ -0,0 +1,87 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "syscall"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+ "gvisor.googlesource.com/gvisor/runsc/specutils"
+)
+
+// Run implements subcommands.Command for the "run" command.
+type Run struct {
+ // Run flags are a super-set of those for Create.
+ Create
+
+ // detach indicates that runsc has to start a process and exit without waiting it.
+ detach bool
+}
+
+// Name implements subcommands.Command.Name.
+func (*Run) Name() string {
+ return "run"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Run) Synopsis() string {
+ return "create and run a secure container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Run) Usage() string {
+ return `run [flags] <container id> - create and run a secure container.
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (r *Run) SetFlags(f *flag.FlagSet) {
+ f.BoolVar(&r.detach, "detach", false, "detach from the container's process")
+ r.Create.SetFlags(f)
+}
+
+// Execute implements subcommands.Command.Execute.
+func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+ waitStatus := args[1].(*syscall.WaitStatus)
+
+ bundleDir := r.bundleDir
+ if bundleDir == "" {
+ bundleDir = getwdOrDie()
+ }
+ spec, err := specutils.ReadSpec(bundleDir)
+ if err != nil {
+ Fatalf("reading spec: %v", err)
+ }
+ specutils.LogSpec(spec)
+
+ ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach)
+ if err != nil {
+ Fatalf("running container: %v", err)
+ }
+
+ *waitStatus = ws
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go
new file mode 100644
index 000000000..344da13ba
--- /dev/null
+++ b/runsc/cmd/spec.go
@@ -0,0 +1,182 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+
+ "flag"
+ "github.com/google/subcommands"
+)
+
+var specTemplate = []byte(`{
+ "ociVersion": "1.0.0",
+ "process": {
+ "terminal": true,
+ "user": {
+ "uid": 0,
+ "gid": 0
+ },
+ "args": [
+ "sh"
+ ],
+ "env": [
+ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+ "TERM=xterm"
+ ],
+ "cwd": "/",
+ "capabilities": {
+ "bounding": [
+ "CAP_AUDIT_WRITE",
+ "CAP_KILL",
+ "CAP_NET_BIND_SERVICE"
+ ],
+ "effective": [
+ "CAP_AUDIT_WRITE",
+ "CAP_KILL",
+ "CAP_NET_BIND_SERVICE"
+ ],
+ "inheritable": [
+ "CAP_AUDIT_WRITE",
+ "CAP_KILL",
+ "CAP_NET_BIND_SERVICE"
+ ],
+ "permitted": [
+ "CAP_AUDIT_WRITE",
+ "CAP_KILL",
+ "CAP_NET_BIND_SERVICE"
+ ],
+ "ambient": [
+ "CAP_AUDIT_WRITE",
+ "CAP_KILL",
+ "CAP_NET_BIND_SERVICE"
+ ]
+ },
+ "rlimits": [
+ {
+ "type": "RLIMIT_NOFILE",
+ "hard": 1024,
+ "soft": 1024
+ }
+ ]
+ },
+ "root": {
+ "path": "rootfs",
+ "readonly": true
+ },
+ "hostname": "runsc",
+ "mounts": [
+ {
+ "destination": "/proc",
+ "type": "proc",
+ "source": "proc"
+ },
+ {
+ "destination": "/dev",
+ "type": "tmpfs",
+ "source": "tmpfs",
+ "options": []
+ },
+ {
+ "destination": "/sys",
+ "type": "sysfs",
+ "source": "sysfs",
+ "options": [
+ "nosuid",
+ "noexec",
+ "nodev",
+ "ro"
+ ]
+ }
+ ],
+ "linux": {
+ "namespaces": [
+ {
+ "type": "pid"
+ },
+ {
+ "type": "network"
+ },
+ {
+ "type": "ipc"
+ },
+ {
+ "type": "uts"
+ },
+ {
+ "type": "mount"
+ }
+ ]
+ }
+}`)
+
+// Spec implements subcommands.Command for the "spec" command.
+type Spec struct {
+ bundle string
+}
+
+// Name implements subcommands.Command.Name.
+func (*Spec) Name() string {
+ return "spec"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Spec) Synopsis() string {
+ return "create a new OCI bundle specification file"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Spec) Usage() string {
+ return `spec [options] - create a new OCI bundle specification file.
+
+The spec command creates a new specification file (config.json) for a new OCI bundle.
+
+The specification file is a starter file that runs the "sh" command in the container. You
+should edit the file to suit your needs. You can find out more about the format of the
+specification file by visiting the OCI runtime spec repository:
+https://github.com/opencontainers/runtime-spec/
+
+EXAMPLE:
+ $ mkdir -p bundle/rootfs
+ $ cd bundle
+ $ runsc spec
+ $ docker export $(docker create hello-world) | tar -xf - -C rootfs
+ $ sed -i 's;"sh";"/hello";' config.json
+ $ sudo runsc run hello
+
+`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (s *Spec) SetFlags(f *flag.FlagSet) {
+ f.StringVar(&s.bundle, "bundle", ".", "path to the root of the OCI bundle")
+}
+
+// Execute implements subcommands.Command.Execute.
+func (s *Spec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ confPath := filepath.Join(s.bundle, "config.json")
+ if _, err := os.Stat(confPath); !os.IsNotExist(err) {
+ Fatalf("file %q already exists", confPath)
+ }
+
+ if err := ioutil.WriteFile(confPath, specTemplate, 0664); err != nil {
+ Fatalf("writing to %q: %v", confPath, err)
+ }
+
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go
new file mode 100644
index 000000000..657726251
--- /dev/null
+++ b/runsc/cmd/start.go
@@ -0,0 +1,65 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// Start implements subcommands.Command for the "start" command.
+type Start struct{}
+
+// Name implements subcommands.Command.Name.
+func (*Start) Name() string {
+ return "start"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Start) Synopsis() string {
+ return "start a secure container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Start) Usage() string {
+ return `start <container id> - start a secure container.`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (*Start) SetFlags(f *flag.FlagSet) {}
+
+// Execute implements subcommands.Command.Execute.
+func (*Start) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+
+ c, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading container: %v", err)
+ }
+ if err := c.Start(conf); err != nil {
+ Fatalf("starting container: %v", err)
+ }
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go
new file mode 100644
index 000000000..f0d449b19
--- /dev/null
+++ b/runsc/cmd/state.go
@@ -0,0 +1,76 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "encoding/json"
+ "os"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+// State implements subcommands.Command for the "state" command.
+type State struct{}
+
+// Name implements subcommands.Command.Name.
+func (*State) Name() string {
+ return "state"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*State) Synopsis() string {
+ return "get the state of a container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*State) Usage() string {
+ return `state [flags] <container id> - get the state of a container`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (*State) SetFlags(f *flag.FlagSet) {}
+
+// Execute implements subcommands.Command.Execute.
+func (*State) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+
+ c, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading container: %v", err)
+ }
+ log.Debugf("Returning state for container %+v", c)
+
+ state := c.State()
+ log.Debugf("State: %+v", state)
+
+ // Write json-encoded state directly to stdout.
+ b, err := json.MarshalIndent(state, "", " ")
+ if err != nil {
+ Fatalf("marshaling container state: %v", err)
+ }
+ os.Stdout.Write(b)
+ return subcommands.ExitSuccess
+}
diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go
new file mode 100644
index 000000000..a55a682f3
--- /dev/null
+++ b/runsc/cmd/wait.go
@@ -0,0 +1,127 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+ "context"
+ "encoding/json"
+ "os"
+ "syscall"
+
+ "flag"
+ "github.com/google/subcommands"
+ "gvisor.googlesource.com/gvisor/runsc/boot"
+ "gvisor.googlesource.com/gvisor/runsc/container"
+)
+
+const (
+ unsetPID = -1
+)
+
+// Wait implements subcommands.Command for the "wait" command.
+type Wait struct {
+ rootPID int
+ pid int
+}
+
+// Name implements subcommands.Command.Name.
+func (*Wait) Name() string {
+ return "wait"
+}
+
+// Synopsis implements subcommands.Command.Synopsis.
+func (*Wait) Synopsis() string {
+ return "wait on a process inside a container"
+}
+
+// Usage implements subcommands.Command.Usage.
+func (*Wait) Usage() string {
+ return `wait [flags] <container id>`
+}
+
+// SetFlags implements subcommands.Command.SetFlags.
+func (wt *Wait) SetFlags(f *flag.FlagSet) {
+ f.IntVar(&wt.rootPID, "rootpid", unsetPID, "select a PID in the sandbox root PID namespace to wait on instead of the container's root process")
+ f.IntVar(&wt.pid, "pid", unsetPID, "select a PID in the container's PID namespace to wait on instead of the container's root process")
+}
+
+// Execute implements subcommands.Command.Execute. It waits for a process in a
+// container to exit before returning.
+func (wt *Wait) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if f.NArg() != 1 {
+ f.Usage()
+ return subcommands.ExitUsageError
+ }
+ // You can't specify both -pid and -rootpid.
+ if wt.rootPID != unsetPID && wt.pid != unsetPID {
+ Fatalf("only one of -pid and -rootPid can be set")
+ }
+
+ id := f.Arg(0)
+ conf := args[0].(*boot.Config)
+
+ c, err := container.Load(conf.RootDir, id)
+ if err != nil {
+ Fatalf("loading container: %v", err)
+ }
+
+ var waitStatus syscall.WaitStatus
+ switch {
+ // Wait on the whole container.
+ case wt.rootPID == unsetPID && wt.pid == unsetPID:
+ ws, err := c.Wait()
+ if err != nil {
+ Fatalf("waiting on container %q: %v", c.ID, err)
+ }
+ waitStatus = ws
+ // Wait on a PID in the root PID namespace.
+ case wt.rootPID != unsetPID:
+ ws, err := c.WaitRootPID(int32(wt.rootPID), true /* clearStatus */)
+ if err != nil {
+ Fatalf("waiting on PID in root PID namespace %d in container %q: %v", wt.rootPID, c.ID, err)
+ }
+ waitStatus = ws
+ // Wait on a PID in the container's PID namespace.
+ case wt.pid != unsetPID:
+ ws, err := c.WaitPID(int32(wt.pid), true /* clearStatus */)
+ if err != nil {
+ Fatalf("waiting on PID %d in container %q: %v", wt.pid, c.ID, err)
+ }
+ waitStatus = ws
+ }
+ result := waitResult{
+ ID: id,
+ ExitStatus: exitStatus(waitStatus),
+ }
+ // Write json-encoded wait result directly to stdout.
+ if err := json.NewEncoder(os.Stdout).Encode(result); err != nil {
+ Fatalf("marshaling wait result: %v", err)
+ }
+ return subcommands.ExitSuccess
+}
+
+type waitResult struct {
+ ID string `json:"id"`
+ ExitStatus int `json:"exitStatus"`
+}
+
+// exitStatus returns the correct exit status for a process based on if it
+// was signaled or exited cleanly.
+func exitStatus(status syscall.WaitStatus) int {
+ if status.Signaled() {
+ return 128 + int(status.Signal())
+ }
+ return status.ExitStatus()
+}