diff options
Diffstat (limited to 'runsc/cmd')
-rw-r--r-- | runsc/cmd/boot.go | 257 | ||||
-rw-r--r-- | runsc/cmd/capability.go | 157 | ||||
-rw-r--r-- | runsc/cmd/checkpoint.go | 150 | ||||
-rw-r--r-- | runsc/cmd/chroot.go | 97 | ||||
-rw-r--r-- | runsc/cmd/cmd.go | 117 | ||||
-rw-r--r-- | runsc/cmd/create.go | 103 | ||||
-rw-r--r-- | runsc/cmd/debug.go | 185 | ||||
-rw-r--r-- | runsc/cmd/delete.go | 87 | ||||
-rw-r--r-- | runsc/cmd/do.go | 310 | ||||
-rw-r--r-- | runsc/cmd/events.go | 111 | ||||
-rw-r--r-- | runsc/cmd/exec.go | 486 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 446 | ||||
-rw-r--r-- | runsc/cmd/kill.go | 154 | ||||
-rw-r--r-- | runsc/cmd/list.go | 117 | ||||
-rw-r--r-- | runsc/cmd/path.go | 28 | ||||
-rw-r--r-- | runsc/cmd/pause.go | 68 | ||||
-rw-r--r-- | runsc/cmd/ps.go | 86 | ||||
-rw-r--r-- | runsc/cmd/restore.go | 106 | ||||
-rw-r--r-- | runsc/cmd/resume.go | 69 | ||||
-rw-r--r-- | runsc/cmd/run.go | 87 | ||||
-rw-r--r-- | runsc/cmd/spec.go | 182 | ||||
-rw-r--r-- | runsc/cmd/start.go | 65 | ||||
-rw-r--r-- | runsc/cmd/state.go | 76 | ||||
-rw-r--r-- | runsc/cmd/wait.go | 127 |
24 files changed, 3671 insertions, 0 deletions
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go new file mode 100644 index 000000000..3a547d4aa --- /dev/null +++ b/runsc/cmd/boot.go @@ -0,0 +1,257 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "os" + "runtime/debug" + "strings" + "syscall" + + "flag" + "github.com/google/subcommands" + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// Boot implements subcommands.Command for the "boot" command which starts a +// new sandbox. It should not be called directly. +type Boot struct { + // bundleDir is the directory containing the OCI spec. + bundleDir string + + // specFD is the file descriptor that the spec will be read from. + specFD int + + // controllerFD is the file descriptor of a stream socket for the + // control server that is donated to this process. + controllerFD int + + // deviceFD is the file descriptor for the platform device file. + deviceFD int + + // ioFDs is the list of FDs used to connect to FS gofers. + ioFDs intFlags + + // stdioFDs are the fds for stdin, stdout, and stderr. They must be + // provided in that order. + stdioFDs intFlags + + // console is set to true if the sandbox should allow terminal ioctl(2) + // syscalls. + console bool + + // applyCaps determines if capabilities defined in the spec should be applied + // to the process. + applyCaps bool + + // setUpChroot is set to true if the sandbox is started in an empty root. + setUpRoot bool + + // cpuNum number of CPUs to create inside the sandbox. + cpuNum int + + // totalMem sets the initial amount of total memory to report back to the + // container. + totalMem uint64 + + // userLogFD is the file descriptor to write user logs to. + userLogFD int + + // startSyncFD is the file descriptor to synchronize runsc and sandbox. + startSyncFD int + + // mountsFD is the file descriptor to read list of mounts after they have + // been resolved (direct paths, no symlinks). They are resolved outside the + // sandbox (e.g. gofer) and sent through this FD. + mountsFD int + + // pidns is set if the sanadbox is in its own pid namespace. + pidns bool +} + +// Name implements subcommands.Command.Name. +func (*Boot) Name() string { + return "boot" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Boot) Synopsis() string { + return "launch a sandbox process (internal use only)" +} + +// Usage implements subcommands.Command.Usage. +func (*Boot) Usage() string { + return `boot [flags] <container id>` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (b *Boot) SetFlags(f *flag.FlagSet) { + f.StringVar(&b.bundleDir, "bundle", "", "required path to the root of the bundle directory") + f.IntVar(&b.specFD, "spec-fd", -1, "required fd with the container spec") + f.IntVar(&b.controllerFD, "controller-fd", -1, "required FD of a stream socket for the control server that must be donated to this process") + f.IntVar(&b.deviceFD, "device-fd", -1, "FD for the platform device file") + f.Var(&b.ioFDs, "io-fds", "list of FDs to connect 9P clients. They must follow this order: root first, then mounts as defined in the spec") + f.Var(&b.stdioFDs, "stdio-fds", "list of FDs containing sandbox stdin, stdout, and stderr in that order") + f.BoolVar(&b.console, "console", false, "set to true if the sandbox should allow terminal ioctl(2) syscalls") + f.BoolVar(&b.applyCaps, "apply-caps", false, "if true, apply capabilities defined in the spec to the process") + f.BoolVar(&b.setUpRoot, "setup-root", false, "if true, set up an empty root for the process") + f.BoolVar(&b.pidns, "pidns", false, "if true, the sandbox is in its own PID namespace") + f.IntVar(&b.cpuNum, "cpu-num", 0, "number of CPUs to create inside the sandbox") + f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container") + f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") + f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") + f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") +} + +// Execute implements subcommands.Command.Execute. It starts a sandbox in a +// waiting state. +func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if b.specFD == -1 || b.controllerFD == -1 || b.startSyncFD == -1 || f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + // Ensure that if there is a panic, all goroutine stacks are printed. + debug.SetTraceback("all") + + if b.setUpRoot { + if err := setUpChroot(b.pidns); err != nil { + Fatalf("error setting up chroot: %v", err) + } + + if !b.applyCaps { + // Remove --setup-root arg to call myself. + var args []string + for _, arg := range os.Args { + if !strings.Contains(arg, "setup-root") { + args = append(args, arg) + } + } + // Note that we've already read the spec from the spec FD, and + // we will read it again after the exec call. This works + // because the ReadSpecFromFile function seeks to the beginning + // of the file before reading. + if err := callSelfAsNobody(args); err != nil { + Fatalf("%v", err) + } + panic("callSelfAsNobody must never return success") + } + } + + // Get the spec from the specFD. + specFile := os.NewFile(uintptr(b.specFD), "spec file") + defer specFile.Close() + spec, err := specutils.ReadSpecFromFile(b.bundleDir, specFile) + if err != nil { + Fatalf("reading spec: %v", err) + } + specutils.LogSpec(spec) + + conf := args[0].(*boot.Config) + waitStatus := args[1].(*syscall.WaitStatus) + + if b.applyCaps { + caps := spec.Process.Capabilities + if caps == nil { + caps = &specs.LinuxCapabilities{} + } + if conf.Platform == boot.PlatformPtrace { + // Ptrace platform requires extra capabilities. + const c = "CAP_SYS_PTRACE" + caps.Bounding = append(caps.Bounding, c) + caps.Effective = append(caps.Effective, c) + caps.Permitted = append(caps.Permitted, c) + } + + // Remove --apply-caps arg to call myself. + var args []string + for _, arg := range os.Args { + if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") { + args = append(args, arg) + } + } + + // Note that we've already read the spec from the spec FD, and + // we will read it again after the exec call. This works + // because the ReadSpecFromFile function seeks to the beginning + // of the file before reading. + if err := setCapsAndCallSelf(args, caps); err != nil { + Fatalf("%v", err) + } + panic("setCapsAndCallSelf must never return success") + } + + // Read resolved mount list and replace the original one from the spec. + mountsFile := os.NewFile(uintptr(b.mountsFD), "mounts file") + cleanMounts, err := specutils.ReadMounts(mountsFile) + if err != nil { + mountsFile.Close() + Fatalf("Error reading mounts file: %v", err) + } + mountsFile.Close() + spec.Mounts = cleanMounts + + // Create the loader. + bootArgs := boot.Args{ + ID: f.Arg(0), + Spec: spec, + Conf: conf, + ControllerFD: b.controllerFD, + Device: os.NewFile(uintptr(b.deviceFD), "platform device"), + GoferFDs: b.ioFDs.GetArray(), + StdioFDs: b.stdioFDs.GetArray(), + Console: b.console, + NumCPU: b.cpuNum, + TotalMem: b.totalMem, + UserLogFD: b.userLogFD, + } + l, err := boot.New(bootArgs) + if err != nil { + Fatalf("creating loader: %v", err) + } + + // Fatalf exits the process and doesn't run defers. + // 'l' must be destroyed explicitly after this point! + + // Notify the parent process the sandbox has booted (and that the controller + // is up). + startSyncFile := os.NewFile(uintptr(b.startSyncFD), "start-sync file") + buf := make([]byte, 1) + if w, err := startSyncFile.Write(buf); err != nil || w != 1 { + l.Destroy() + Fatalf("unable to write into the start-sync descriptor: %v", err) + } + // Closes startSyncFile because 'l.Run()' only returns when the sandbox exits. + startSyncFile.Close() + + // Wait for the start signal from runsc. + l.WaitForStartSignal() + + // Run the application and wait for it to finish. + if err := l.Run(); err != nil { + l.Destroy() + Fatalf("running sandbox: %v", err) + } + + ws := l.WaitExit() + log.Infof("application exiting with %+v", ws) + *waitStatus = syscall.WaitStatus(ws.Status()) + l.Destroy() + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/capability.go b/runsc/cmd/capability.go new file mode 100644 index 000000000..312e5b471 --- /dev/null +++ b/runsc/cmd/capability.go @@ -0,0 +1,157 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "fmt" + + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/syndtr/gocapability/capability" + "gvisor.googlesource.com/gvisor/pkg/log" +) + +var allCapTypes = []capability.CapType{ + capability.BOUNDS, + capability.EFFECTIVE, + capability.PERMITTED, + capability.INHERITABLE, + capability.AMBIENT, +} + +// applyCaps applies the capabilities in the spec to the current thread. +// +// Note that it must be called with current thread locked. +func applyCaps(caps *specs.LinuxCapabilities) error { + // Load current capabilities to trim the ones not permitted. + curCaps, err := capability.NewPid2(0) + if err != nil { + return err + } + if err := curCaps.Load(); err != nil { + return err + } + + // Create an empty capability set to populate. + newCaps, err := capability.NewPid2(0) + if err != nil { + return err + } + + for _, c := range allCapTypes { + if !newCaps.Empty(c) { + panic("unloaded capabilities must be empty") + } + set, err := trimCaps(getCaps(c, caps), curCaps) + if err != nil { + return err + } + newCaps.Set(c, set...) + } + + if err := newCaps.Apply(capability.CAPS | capability.BOUNDS | capability.AMBS); err != nil { + return err + } + log.Infof("Capabilities applied: %+v", newCaps) + return nil +} + +func getCaps(which capability.CapType, caps *specs.LinuxCapabilities) []string { + switch which { + case capability.BOUNDS: + return caps.Bounding + case capability.EFFECTIVE: + return caps.Effective + case capability.PERMITTED: + return caps.Permitted + case capability.INHERITABLE: + return caps.Inheritable + case capability.AMBIENT: + return caps.Ambient + } + panic(fmt.Sprint("invalid capability type:", which)) +} + +func trimCaps(names []string, setter capability.Capabilities) ([]capability.Cap, error) { + wantedCaps, err := capsFromNames(names) + if err != nil { + return nil, err + } + + // Trim down capabilities that aren't possible to acquire. + var caps []capability.Cap + for _, c := range wantedCaps { + // Capability rules are more complicated than this, but this catches most + // problems with tests running with non-privileged user. + if setter.Get(capability.PERMITTED, c) { + caps = append(caps, c) + } else { + log.Warningf("Capability %q is not permitted, dropping it.", c) + } + } + return caps, nil +} + +func capsFromNames(names []string) ([]capability.Cap, error) { + var caps []capability.Cap + for _, name := range names { + cap, ok := capFromName[name] + if !ok { + return nil, fmt.Errorf("invalid capability %q", name) + } + caps = append(caps, cap) + } + return caps, nil +} + +var capFromName = map[string]capability.Cap{ + "CAP_CHOWN": capability.CAP_CHOWN, + "CAP_DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE, + "CAP_DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH, + "CAP_FOWNER": capability.CAP_FOWNER, + "CAP_FSETID": capability.CAP_FSETID, + "CAP_KILL": capability.CAP_KILL, + "CAP_SETGID": capability.CAP_SETGID, + "CAP_SETUID": capability.CAP_SETUID, + "CAP_SETPCAP": capability.CAP_SETPCAP, + "CAP_LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE, + "CAP_NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE, + "CAP_NET_BROADCAST": capability.CAP_NET_BROADCAST, + "CAP_NET_ADMIN": capability.CAP_NET_ADMIN, + "CAP_NET_RAW": capability.CAP_NET_RAW, + "CAP_IPC_LOCK": capability.CAP_IPC_LOCK, + "CAP_IPC_OWNER": capability.CAP_IPC_OWNER, + "CAP_SYS_MODULE": capability.CAP_SYS_MODULE, + "CAP_SYS_RAWIO": capability.CAP_SYS_RAWIO, + "CAP_SYS_CHROOT": capability.CAP_SYS_CHROOT, + "CAP_SYS_PTRACE": capability.CAP_SYS_PTRACE, + "CAP_SYS_PACCT": capability.CAP_SYS_PACCT, + "CAP_SYS_ADMIN": capability.CAP_SYS_ADMIN, + "CAP_SYS_BOOT": capability.CAP_SYS_BOOT, + "CAP_SYS_NICE": capability.CAP_SYS_NICE, + "CAP_SYS_RESOURCE": capability.CAP_SYS_RESOURCE, + "CAP_SYS_TIME": capability.CAP_SYS_TIME, + "CAP_SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG, + "CAP_MKNOD": capability.CAP_MKNOD, + "CAP_LEASE": capability.CAP_LEASE, + "CAP_AUDIT_WRITE": capability.CAP_AUDIT_WRITE, + "CAP_AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL, + "CAP_SETFCAP": capability.CAP_SETFCAP, + "CAP_MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE, + "CAP_MAC_ADMIN": capability.CAP_MAC_ADMIN, + "CAP_SYSLOG": capability.CAP_SYSLOG, + "CAP_WAKE_ALARM": capability.CAP_WAKE_ALARM, + "CAP_BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND, + "CAP_AUDIT_READ": capability.CAP_AUDIT_READ, +} diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go new file mode 100644 index 000000000..96d3c3378 --- /dev/null +++ b/runsc/cmd/checkpoint.go @@ -0,0 +1,150 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "os" + "path/filepath" + "syscall" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// File containing the container's saved image/state within the given image-path's directory. +const checkpointFileName = "checkpoint.img" + +// Checkpoint implements subcommands.Command for the "checkpoint" command. +type Checkpoint struct { + imagePath string + leaveRunning bool +} + +// Name implements subcommands.Command.Name. +func (*Checkpoint) Name() string { + return "checkpoint" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Checkpoint) Synopsis() string { + return "checkpoint current state of container (experimental)" +} + +// Usage implements subcommands.Command.Usage. +func (*Checkpoint) Usage() string { + return `checkpoint [flags] <container id> - save current state of container. +` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (c *Checkpoint) SetFlags(f *flag.FlagSet) { + f.StringVar(&c.imagePath, "image-path", "", "directory path to saved container image") + f.BoolVar(&c.leaveRunning, "leave-running", false, "restart the container after checkpointing") + + // Unimplemented flags necessary for compatibility with docker. + var wp string + f.StringVar(&wp, "work-path", "", "ignored") +} + +// Execute implements subcommands.Command.Execute. +func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + waitStatus := args[1].(*syscall.WaitStatus) + + cont, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading container: %v", err) + } + + if c.imagePath == "" { + Fatalf("image-path flag must be provided") + } + + if err := os.MkdirAll(c.imagePath, 0755); err != nil { + Fatalf("making directories at path provided: %v", err) + } + + fullImagePath := filepath.Join(c.imagePath, checkpointFileName) + + // Create the image file and open for writing. + file, err := os.OpenFile(fullImagePath, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0644) + if err != nil { + Fatalf("os.OpenFile(%q) failed: %v", fullImagePath, err) + } + defer file.Close() + + if err := cont.Checkpoint(file); err != nil { + Fatalf("checkpoint failed: %v", err) + } + + if !c.leaveRunning { + return subcommands.ExitSuccess + } + + // TODO(b/110843694): Make it possible to restore into same container. + // For now, we can fake it by destroying the container and making a + // new container with the same ID. This hack does not work with docker + // which uses the container pid to ensure that the restore-container is + // actually the same as the checkpoint-container. By restoring into + // the same container, we will solve the docker incompatibility. + + // Restore into new container with same ID. + bundleDir := cont.BundleDir + if bundleDir == "" { + Fatalf("setting bundleDir") + } + + spec, err := specutils.ReadSpec(bundleDir) + if err != nil { + Fatalf("reading spec: %v", err) + } + + specutils.LogSpec(spec) + + if cont.ConsoleSocket != "" { + log.Warningf("ignoring console socket since it cannot be restored") + } + + if err := cont.Destroy(); err != nil { + Fatalf("destroying container: %v", err) + } + + cont, err = container.Create(id, spec, conf, bundleDir, "", "", "") + if err != nil { + Fatalf("restoring container: %v", err) + } + defer cont.Destroy() + + if err := cont.Restore(spec, conf, fullImagePath); err != nil { + Fatalf("starting container: %v", err) + } + + ws, err := cont.Wait() + *waitStatus = ws + + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/chroot.go b/runsc/cmd/chroot.go new file mode 100644 index 000000000..1a774db04 --- /dev/null +++ b/runsc/cmd/chroot.go @@ -0,0 +1,97 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// mountInChroot creates the destination mount point in the given chroot and +// mounts the source. +func mountInChroot(chroot, src, dst, typ string, flags uint32) error { + chrootDst := filepath.Join(chroot, dst) + log.Infof("Mounting %q at %q", src, chrootDst) + + if err := specutils.Mount(src, chrootDst, typ, flags); err != nil { + return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err) + } + return nil +} + +func pivotRoot(root string) error { + if err := os.Chdir(root); err != nil { + return fmt.Errorf("error changing working directory: %v", err) + } + // pivot_root(new_root, put_old) moves the root filesystem (old_root) + // of the calling process to the directory put_old and makes new_root + // the new root filesystem of the calling process. + // + // pivot_root(".", ".") makes a mount of the working directory the new + // root filesystem, so it will be moved in "/" and then the old_root + // will be moved to "/" too. The parent mount of the old_root will be + // new_root, so after umounting the old_root, we will see only + // the new_root in "/". + if err := syscall.PivotRoot(".", "."); err != nil { + return fmt.Errorf("error changing root filesystem: %v", err) + } + + if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil { + return fmt.Errorf("error umounting the old root file system: %v", err) + } + return nil +} + +// setUpChroot creates an empty directory with runsc mounted at /runsc and proc +// mounted at /proc. +func setUpChroot(pidns bool) error { + // We are a new mount namespace, so we can use /tmp as a directory to + // construct a new root. + chroot := os.TempDir() + + log.Infof("Setting up sandbox chroot in %q", chroot) + + // Convert all shared mounts into slave to be sure that nothing will be + // propagated outside of our namespace. + if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("error converting mounts: %v", err) + } + + if err := syscall.Mount("runsc-root", chroot, "tmpfs", syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_NOEXEC, ""); err != nil { + return fmt.Errorf("error mounting tmpfs in choot: %v", err) + } + + if pidns { + flags := uint32(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC | syscall.MS_RDONLY) + if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil { + return fmt.Errorf("error mounting proc in chroot: %v", err) + } + } else { + if err := mountInChroot(chroot, "/proc", "/proc", "bind", syscall.MS_BIND|syscall.MS_RDONLY|syscall.MS_REC); err != nil { + return fmt.Errorf("error mounting proc in chroot: %v", err) + } + } + + if err := syscall.Mount("", chroot, "", syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_BIND, ""); err != nil { + return fmt.Errorf("error remounting chroot in read-only: %v", err) + } + + return pivotRoot(chroot) +} diff --git a/runsc/cmd/cmd.go b/runsc/cmd/cmd.go new file mode 100644 index 000000000..a2fc377d1 --- /dev/null +++ b/runsc/cmd/cmd.go @@ -0,0 +1,117 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package cmd holds implementations of the runsc commands. +package cmd + +import ( + "fmt" + "os" + "runtime" + "strconv" + "syscall" + + "github.com/google/subcommands" + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// Errorf logs to stderr and returns subcommands.ExitFailure. +func Errorf(s string, args ...interface{}) subcommands.ExitStatus { + // If runsc is being invoked by docker or cri-o, then we might not have + // access to stderr, so we log a serious-looking warning in addition to + // writing to stderr. + log.Warningf("FATAL ERROR: "+s, args...) + fmt.Fprintf(os.Stderr, s+"\n", args...) + // Return an error that is unlikely to be used by the application. + return subcommands.ExitFailure +} + +// Fatalf logs to stderr and exits with a failure status code. +func Fatalf(s string, args ...interface{}) { + Errorf(s, args...) + os.Exit(128) +} + +// intFlags can be used with int flags that appear multiple times. +type intFlags []int + +// String implements flag.Value. +func (i *intFlags) String() string { + return fmt.Sprintf("%v", *i) +} + +// Get implements flag.Value. +func (i *intFlags) Get() interface{} { + return i +} + +// GetArray returns array of FDs. +func (i *intFlags) GetArray() []int { + return *i +} + +// Set implements flag.Value. +func (i *intFlags) Set(s string) error { + fd, err := strconv.Atoi(s) + if err != nil { + return fmt.Errorf("invalid flag value: %v", err) + } + if fd < 0 { + return fmt.Errorf("flag value must be greater than 0: %d", fd) + } + *i = append(*i, fd) + return nil +} + +// setCapsAndCallSelf sets capabilities to the current thread and then execve's +// itself again with the arguments specified in 'args' to restart the process +// with the desired capabilities. +func setCapsAndCallSelf(args []string, caps *specs.LinuxCapabilities) error { + // Keep thread locked while capabilities are changed. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + if err := applyCaps(caps); err != nil { + return fmt.Errorf("applyCaps() failed: %v", err) + } + binPath := specutils.ExePath + + log.Infof("Execve %q again, bye!", binPath) + err := syscall.Exec(binPath, args, []string{}) + return fmt.Errorf("error executing %s: %v", binPath, err) +} + +// callSelfAsNobody sets UID and GID to nobody and then execve's itself again. +func callSelfAsNobody(args []string) error { + // Keep thread locked while user/group are changed. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + const nobody = 65534 + + if _, _, err := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(nobody), 0, 0); err != 0 { + return fmt.Errorf("error setting uid: %v", err) + } + if _, _, err := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(nobody), 0, 0); err != 0 { + return fmt.Errorf("error setting gid: %v", err) + } + + binPath := specutils.ExePath + + log.Infof("Execve %q again, bye!", binPath) + err := syscall.Exec(binPath, args, []string{}) + return fmt.Errorf("error executing %s: %v", binPath, err) +} diff --git a/runsc/cmd/create.go b/runsc/cmd/create.go new file mode 100644 index 000000000..629c198fd --- /dev/null +++ b/runsc/cmd/create.go @@ -0,0 +1,103 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// Create implements subcommands.Command for the "create" command. +type Create struct { + // bundleDir is the path to the bundle directory (defaults to the + // current working directory). + bundleDir string + + // pidFile is the filename that the sandbox pid will be written to. + // This file should only be created once the container process inside + // the sandbox is ready to use. + pidFile string + + // consoleSocket is the path to an AF_UNIX socket which will receive a + // file descriptor referencing the master end of the console's + // pseudoterminal. This is ignored unless spec.Process.Terminal is + // true. + consoleSocket string + + // userLog is the path to send user-visible logs to. This log is different + // from debug logs. The former is meant to be consumed by the users and should + // contain only information that is relevant to the person running the + // container, e.g. unsuported syscalls, while the later is more verbose and + // consumed by developers. + userLog string +} + +// Name implements subcommands.Command.Name. +func (*Create) Name() string { + return "create" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Create) Synopsis() string { + return "create a secure container" +} + +// Usage implements subcommands.Command.Usage. +func (*Create) Usage() string { + return `create [flags] <container id> - create a secure container +` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (c *Create) SetFlags(f *flag.FlagSet) { + f.StringVar(&c.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory") + f.StringVar(&c.consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal") + f.StringVar(&c.pidFile, "pid-file", "", "filename that the container pid will be written to") + f.StringVar(&c.userLog, "user-log", "", "filename to send user-visible logs to. Empty means no logging.") +} + +// Execute implements subcommands.Command.Execute. +func (c *Create) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + + bundleDir := c.bundleDir + if bundleDir == "" { + bundleDir = getwdOrDie() + } + spec, err := specutils.ReadSpec(bundleDir) + if err != nil { + Fatalf("reading spec: %v", err) + } + specutils.LogSpec(spec) + + // Create the container. A new sandbox will be created for the + // container unless the metadata specifies that it should be run in an + // existing container. + if _, err := container.Create(id, spec, conf, bundleDir, c.consoleSocket, c.pidFile, c.userLog); err != nil { + Fatalf("creating container: %v", err) + } + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go new file mode 100644 index 000000000..27eb51172 --- /dev/null +++ b/runsc/cmd/debug.go @@ -0,0 +1,185 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "os" + "syscall" + "time" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// Debug implements subcommands.Command for the "debug" command. +type Debug struct { + pid int + stacks bool + signal int + profileHeap string + profileCPU string + profileDelay int + trace string +} + +// Name implements subcommands.Command. +func (*Debug) Name() string { + return "debug" +} + +// Synopsis implements subcommands.Command. +func (*Debug) Synopsis() string { + return "shows a variety of debug information" +} + +// Usage implements subcommands.Command. +func (*Debug) Usage() string { + return `debug [flags] <container id>` +} + +// SetFlags implements subcommands.Command. +func (d *Debug) SetFlags(f *flag.FlagSet) { + f.IntVar(&d.pid, "pid", 0, "sandbox process ID. Container ID is not necessary if this is set") + f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log") + f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.") + f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.") + f.IntVar(&d.profileDelay, "profile-delay", 5, "amount of time to wait before stoping CPU profile") + f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.") + f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox") +} + +// Execute implements subcommands.Command.Execute. +func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + var c *container.Container + conf := args[0].(*boot.Config) + + if d.pid == 0 { + // No pid, container ID must have been provided. + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + var err error + c, err = container.Load(conf.RootDir, f.Arg(0)) + if err != nil { + Fatalf("loading container %q: %v", f.Arg(0), err) + } + } else { + if f.NArg() != 0 { + f.Usage() + return subcommands.ExitUsageError + } + // Go over all sandboxes and find the one that matches PID. + ids, err := container.List(conf.RootDir) + if err != nil { + Fatalf("listing containers: %v", err) + } + for _, id := range ids { + candidate, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading container %q: %v", id, err) + } + if candidate.SandboxPid() == d.pid { + c = candidate + break + } + } + if c == nil { + Fatalf("container with PID %d not found", d.pid) + } + } + + if c.Sandbox == nil || !c.Sandbox.IsRunning() { + Fatalf("container sandbox is not running") + } + log.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Pid) + + if d.signal > 0 { + log.Infof("Sending signal %d to process: %d", d.signal, c.Sandbox.Pid) + if err := syscall.Kill(c.Sandbox.Pid, syscall.Signal(d.signal)); err != nil { + Fatalf("failed to send signal %d to processs %d", d.signal, c.Sandbox.Pid) + } + } + if d.stacks { + log.Infof("Retrieving sandbox stacks") + stacks, err := c.Sandbox.Stacks() + if err != nil { + Fatalf("retrieving stacks: %v", err) + } + log.Infof(" *** Stack dump ***\n%s", stacks) + } + if d.profileHeap != "" { + f, err := os.Create(d.profileHeap) + if err != nil { + Fatalf(err.Error()) + } + defer f.Close() + + if err := c.Sandbox.HeapProfile(f); err != nil { + Fatalf(err.Error()) + } + log.Infof("Heap profile written to %q", d.profileHeap) + } + + delay := false + if d.profileCPU != "" { + delay = true + f, err := os.Create(d.profileCPU) + if err != nil { + Fatalf(err.Error()) + } + defer func() { + f.Close() + if err := c.Sandbox.StopCPUProfile(); err != nil { + Fatalf(err.Error()) + } + log.Infof("CPU profile written to %q", d.profileCPU) + }() + if err := c.Sandbox.StartCPUProfile(f); err != nil { + Fatalf(err.Error()) + } + log.Infof("CPU profile started for %d sec, writing to %q", d.profileDelay, d.profileCPU) + } + if d.trace != "" { + delay = true + f, err := os.Create(d.trace) + if err != nil { + Fatalf(err.Error()) + } + defer func() { + f.Close() + if err := c.Sandbox.StopTrace(); err != nil { + Fatalf(err.Error()) + } + log.Infof("Trace written to %q", d.trace) + }() + if err := c.Sandbox.StartTrace(f); err != nil { + Fatalf(err.Error()) + } + log.Infof("Tracing started for %d sec, writing to %q", d.profileDelay, d.trace) + + } + + if delay { + time.Sleep(time.Duration(d.profileDelay) * time.Second) + + } + + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go new file mode 100644 index 000000000..9039723e9 --- /dev/null +++ b/runsc/cmd/delete.go @@ -0,0 +1,87 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "fmt" + "os" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// Delete implements subcommands.Command for the "delete" command. +type Delete struct { + // force indicates that the container should be terminated if running. + force bool +} + +// Name implements subcommands.Command.Name. +func (*Delete) Name() string { + return "delete" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Delete) Synopsis() string { + return "delete resources held by a container" +} + +// Usage implements subcommands.Command.Usage. +func (*Delete) Usage() string { + return `delete [flags] <container ids>` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (d *Delete) SetFlags(f *flag.FlagSet) { + f.BoolVar(&d.force, "force", false, "terminate container if running") +} + +// Execute implements subcommands.Command.Execute. +func (d *Delete) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() == 0 { + f.Usage() + return subcommands.ExitUsageError + } + + conf := args[0].(*boot.Config) + if err := d.execute(f.Args(), conf); err != nil { + Fatalf("%v", err) + } + return subcommands.ExitSuccess +} + +func (d *Delete) execute(ids []string, conf *boot.Config) error { + for _, id := range ids { + c, err := container.Load(conf.RootDir, id) + if err != nil { + if os.IsNotExist(err) && d.force { + log.Warningf("couldn't find container %q: %v", id, err) + return nil + } + return fmt.Errorf("loading container %q: %v", id, err) + } + if !d.force && c.Status != container.Created && c.Status != container.Stopped { + return fmt.Errorf("cannot delete container that is not stopped without --force flag") + } + if err := c.Destroy(); err != nil { + return fmt.Errorf("destroying container: %v", err) + } + } + return nil +} diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go new file mode 100644 index 000000000..8ea59046c --- /dev/null +++ b/runsc/cmd/do.go @@ -0,0 +1,310 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "io/ioutil" + "math/rand" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "syscall" + + "flag" + "github.com/google/subcommands" + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// Do implements subcommands.Command for the "do" command. It sets up a simple +// sandbox and executes the command inside it. See Usage() for more details. +type Do struct { + root string + cwd string + ip string + networkNamespace bool +} + +// Name implements subcommands.Command.Name. +func (*Do) Name() string { + return "do" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Do) Synopsis() string { + return "Simplistic way to execute a command inside the sandbox. It's to be used for testing only." +} + +// Usage implements subcommands.Command.Usage. +func (*Do) Usage() string { + return `do [flags] <cmd> - runs a command. + +This command starts a sandbox with host filesystem mounted inside as readonly, +with a writable tmpfs overlay on top of it. The given command is executed inside +the sandbox. It's to be used to quickly test applications without having to +install or run docker. It doesn't give nearly as many options and it's to be +used for testing only. +` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (c *Do) SetFlags(f *flag.FlagSet) { + f.StringVar(&c.root, "root", "/", `path to the root directory, defaults to "/"`) + f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory") + f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox") + f.BoolVar(&c.networkNamespace, "netns", true, "run in a new network namespace") +} + +// Execute implements subcommands.Command.Execute. +func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if len(f.Args()) == 0 { + c.Usage() + return subcommands.ExitUsageError + } + + conf := args[0].(*boot.Config) + waitStatus := args[1].(*syscall.WaitStatus) + + // Map the entire host file system, but make it readonly with a writable + // overlay on top (ignore --overlay option). + conf.Overlay = true + + hostname, err := os.Hostname() + if err != nil { + return Errorf("Error to retrieve hostname: %v", err) + } + + absRoot, err := resolvePath(c.root) + if err != nil { + return Errorf("Error resolving root: %v", err) + } + absCwd, err := resolvePath(c.cwd) + if err != nil { + return Errorf("Error resolving current directory: %v", err) + } + + spec := &specs.Spec{ + Root: &specs.Root{ + Path: absRoot, + }, + Process: &specs.Process{ + Cwd: absCwd, + Args: f.Args(), + Env: os.Environ(), + Capabilities: specutils.AllCapabilities(), + }, + Hostname: hostname, + } + + specutils.LogSpec(spec) + + cid := fmt.Sprintf("runsc-%06d", rand.Int31n(1000000)) + if !c.networkNamespace { + if conf.Network != boot.NetworkHost { + Fatalf("The current network namespace can be used only if --network=host is set", nil) + } + } else if conf.Network != boot.NetworkNone { + clean, err := c.setupNet(cid, spec) + if err != nil { + return Errorf("Error setting up network: %v", err) + } + defer clean() + } + + out, err := json.Marshal(spec) + if err != nil { + return Errorf("Error to marshal spec: %v", err) + } + tmpDir, err := ioutil.TempDir("", "runsc-do") + if err != nil { + return Errorf("Error to create tmp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + log.Infof("Changing configuration RootDir to %q", tmpDir) + conf.RootDir = tmpDir + + cfgPath := filepath.Join(tmpDir, "config.json") + if err := ioutil.WriteFile(cfgPath, out, 0755); err != nil { + return Errorf("Error write spec: %v", err) + } + + ws, err := container.Run(cid, spec, conf, tmpDir, "", "", "", false) + if err != nil { + return Errorf("running container: %v", err) + } + + *waitStatus = ws + return subcommands.ExitSuccess +} + +func resolvePath(path string) (string, error) { + var err error + path, err = filepath.Abs(path) + if err != nil { + return "", fmt.Errorf("resolving %q: %v", path, err) + } + path = filepath.Clean(path) + if err := syscall.Access(path, 0); err != nil { + return "", fmt.Errorf("unable to access %q: %v", path, err) + } + return path, nil +} + +func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) { + dev, err := defaultDevice() + if err != nil { + return nil, err + } + peerIP, err := calculatePeerIP(c.ip) + if err != nil { + return nil, err + } + veth, peer := deviceNames(cid) + + cmds := []string{ + fmt.Sprintf("ip link add %s type veth peer name %s", veth, peer), + + // Setup device outside the namespace. + fmt.Sprintf("ip addr add %s/24 dev %s", peerIP, peer), + fmt.Sprintf("ip link set %s up", peer), + + // Setup device inside the namespace. + fmt.Sprintf("ip netns add %s", cid), + fmt.Sprintf("ip link set %s netns %s", veth, cid), + fmt.Sprintf("ip netns exec %s ip addr add %s/24 dev %s", cid, c.ip, veth), + fmt.Sprintf("ip netns exec %s ip link set %s up", cid, veth), + fmt.Sprintf("ip netns exec %s ip link set lo up", cid), + fmt.Sprintf("ip netns exec %s ip route add default via %s", cid, peerIP), + + // Enable network access. + "sysctl -w net.ipv4.ip_forward=1", + fmt.Sprintf("iptables -t nat -A POSTROUTING -s %s -o %s -j MASQUERADE", c.ip, dev), + fmt.Sprintf("iptables -A FORWARD -i %s -o %s -j ACCEPT", dev, peer), + fmt.Sprintf("iptables -A FORWARD -o %s -i %s -j ACCEPT", dev, peer), + } + + for _, cmd := range cmds { + log.Debugf("Run %q", cmd) + args := strings.Split(cmd, " ") + c := exec.Command(args[0], args[1:]...) + if err := c.Run(); err != nil { + return nil, fmt.Errorf("failed to run %q: %v", cmd, err) + } + } + + if err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec); err != nil { + return nil, err + } + if err := makeFile("/etc/hostname", cid+"\n", spec); err != nil { + return nil, err + } + hosts := fmt.Sprintf("127.0.0.1\tlocalhost\n%s\t%s\n", c.ip, cid) + if err := makeFile("/etc/hosts", hosts, spec); err != nil { + return nil, err + } + + if spec.Linux == nil { + spec.Linux = &specs.Linux{} + } + netns := specs.LinuxNamespace{ + Type: specs.NetworkNamespace, + Path: filepath.Join("/var/run/netns", cid), + } + spec.Linux.Namespaces = append(spec.Linux.Namespaces, netns) + + return func() { c.cleanNet(cid, dev) }, nil +} + +func (c *Do) cleanNet(cid, dev string) { + veth, peer := deviceNames(cid) + + cmds := []string{ + fmt.Sprintf("ip link delete %s", peer), + fmt.Sprintf("ip netns delete %s", cid), + + fmt.Sprintf("iptables -t nat -D POSTROUTING -s %s/24 -o %s -j MASQUERADE", c.ip, dev), + fmt.Sprintf("iptables -D FORWARD -i %s -o %s -j ACCEPT", dev, veth), + fmt.Sprintf("iptables -D FORWARD -o %s -i %s -j ACCEPT", dev, veth), + } + + for _, cmd := range cmds { + log.Debugf("Run %q", cmd) + args := strings.Split(cmd, " ") + c := exec.Command(args[0], args[1:]...) + if err := c.Run(); err != nil { + log.Warningf("Failed to run %q: %v", cmd, err) + } + } +} + +func deviceNames(cid string) (string, string) { + // Device name is limited to 15 letters. + return "ve-" + cid, "vp-" + cid + +} + +func defaultDevice() (string, error) { + out, err := exec.Command("ip", "route", "list", "default").CombinedOutput() + if err != nil { + return "", err + } + parts := strings.Split(string(out), " ") + if len(parts) < 5 { + return "", fmt.Errorf("malformed %q output: %q", "ip route list default", string(out)) + } + return parts[4], nil +} + +func makeFile(dest, content string, spec *specs.Spec) error { + tmpFile, err := ioutil.TempFile("", filepath.Base(dest)) + if err != nil { + return err + } + if _, err := tmpFile.WriteString(content); err != nil { + return err + } + spec.Mounts = append(spec.Mounts, specs.Mount{ + Source: tmpFile.Name(), + Destination: dest, + Type: "bind", + Options: []string{"ro"}, + }) + return nil +} + +func calculatePeerIP(ip string) (string, error) { + parts := strings.Split(ip, ".") + if len(parts) != 4 { + return "", fmt.Errorf("invalid IP format %q", ip) + } + n, err := strconv.Atoi(parts[3]) + if err != nil { + return "", fmt.Errorf("invalid IP format %q: %v", ip, err) + } + n++ + if n > 255 { + n = 1 + } + return fmt.Sprintf("%s.%s.%s.%d", parts[0], parts[1], parts[2], n), nil +} diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go new file mode 100644 index 000000000..c6bc8fc3a --- /dev/null +++ b/runsc/cmd/events.go @@ -0,0 +1,111 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/json" + "os" + "time" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// Events implements subcommands.Command for the "events" command. +type Events struct { + // The interval between stats reporting. + intervalSec int + // If true, events will print a single group of stats and exit. + stats bool +} + +// Name implements subcommands.Command.Name. +func (*Events) Name() string { + return "events" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Events) Synopsis() string { + return "display container events such as OOM notifications, cpu, memory, and IO usage statistics" +} + +// Usage implements subcommands.Command.Usage. +func (*Events) Usage() string { + return `<container-id> + +Where "<container-id>" is the name for the instance of the container. + +The events command displays information about the container. By default the +information is displayed once every 5 seconds. + +OPTIONS: +` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (evs *Events) SetFlags(f *flag.FlagSet) { + f.IntVar(&evs.intervalSec, "interval", 5, "set the stats collection interval, in seconds") + f.BoolVar(&evs.stats, "stats", false, "display the container's stats then exit") +} + +// Execute implements subcommands.Command.Execute. +func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + + c, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading sandbox: %v", err) + } + + // Repeatedly get stats from the container. + for { + // Get the event and print it as JSON. + ev, err := c.Event() + if err != nil { + log.Warningf("Error getting events for container: %v", err) + } + // err must be preserved because it is used below when breaking + // out of the loop. + b, err := json.Marshal(ev) + if err != nil { + log.Warningf("Error while marshalling event %v: %v", ev, err) + } else { + os.Stdout.Write(b) + } + + // If we're only running once, break. If we're only running + // once and there was an error, the command failed. + if evs.stats { + if err != nil { + return subcommands.ExitFailure + } + break + } + + time.Sleep(time.Duration(evs.intervalSec) * time.Second) + } + + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go new file mode 100644 index 000000000..52fd7ac4b --- /dev/null +++ b/runsc/cmd/exec.go @@ -0,0 +1,486 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "syscall" + "time" + + "flag" + "github.com/google/subcommands" + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/control" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" + "gvisor.googlesource.com/gvisor/pkg/urpc" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/console" + "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +const privateClearStatusFlag = "private-clear-status" + +// Exec implements subcommands.Command for the "exec" command. +type Exec struct { + cwd string + env stringSlice + // user contains the UID and GID with which to run the new process. + user user + extraKGIDs stringSlice + caps stringSlice + detach bool + clearStatus bool + processPath string + pidFile string + internalPidFile string + + // consoleSocket is the path to an AF_UNIX socket which will receive a + // file descriptor referencing the master end of the console's + // pseudoterminal. + consoleSocket string +} + +// Name implements subcommands.Command.Name. +func (*Exec) Name() string { + return "exec" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Exec) Synopsis() string { + return "execute new process inside the container" +} + +// Usage implements subcommands.Command.Usage. +func (*Exec) Usage() string { + return `exec [command options] <container-id> <command> [command options] || --process process.json <container-id> + + +Where "<container-id>" is the name for the instance of the container and +"<command>" is the command to be executed in the container. +"<command>" can't be empty unless a "-process" flag provided. + +EXAMPLE: +If the container is configured to run /bin/ps the following will +output a list of processes running in the container: + + # runc exec <container-id> ps + +OPTIONS: +` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (ex *Exec) SetFlags(f *flag.FlagSet) { + f.StringVar(&ex.cwd, "cwd", "", "current working directory") + f.Var(&ex.env, "env", "set environment variables (e.g. '-env PATH=/bin -env TERM=xterm')") + f.Var(&ex.user, "user", "UID (format: <uid>[:<gid>])") + f.Var(&ex.extraKGIDs, "additional-gids", "additional gids") + f.Var(&ex.caps, "cap", "add a capability to the bounding set for the process") + f.BoolVar(&ex.detach, "detach", false, "detach from the container's process") + f.StringVar(&ex.processPath, "process", "", "path to the process.json") + f.StringVar(&ex.pidFile, "pid-file", "", "filename that the container pid will be written to") + f.StringVar(&ex.internalPidFile, "internal-pid-file", "", "filename that the container-internal pid will be written to") + f.StringVar(&ex.consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal") + + // This flag clears the status of the exec'd process upon completion. It is + // only used when we fork due to --detach being set on the parent. + f.BoolVar(&ex.clearStatus, privateClearStatusFlag, true, "private flag, do not use") +} + +// Execute implements subcommands.Command.Execute. It starts a process in an +// already created container. +func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + e, id, err := ex.parseArgs(f) + if err != nil { + Fatalf("parsing process spec: %v", err) + } + conf := args[0].(*boot.Config) + waitStatus := args[1].(*syscall.WaitStatus) + + c, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading sandbox: %v", err) + } + + // Replace empty settings with defaults from container. + if e.WorkingDirectory == "" { + e.WorkingDirectory = c.Spec.Process.Cwd + } + if e.Envv == nil { + e.Envv, err = resolveEnvs(c.Spec.Process.Env, ex.env) + if err != nil { + Fatalf("getting environment variables: %v", err) + } + } + if e.Capabilities == nil { + // enableRaw is set to true to prevent the filtering out of + // CAP_NET_RAW. This is the opposite of Create() because exec + // requires the capability to be set explicitly, while 'docker + // run' sets it by default. + e.Capabilities, err = specutils.Capabilities(true /* enableRaw */, c.Spec.Process.Capabilities) + if err != nil { + Fatalf("creating capabilities: %v", err) + } + } + + // containerd expects an actual process to represent the container being + // executed. If detach was specified, starts a child in non-detach mode, + // write the child's PID to the pid file. So when the container returns, the + // child process will also return and signal containerd. + if ex.detach { + return ex.execAndWait(waitStatus) + } + + // Start the new process and get it pid. + pid, err := c.Execute(e) + if err != nil { + Fatalf("getting processes for container: %v", err) + } + + if e.StdioIsPty { + // Forward signals sent to this process to the foreground + // process in the sandbox. + stopForwarding := c.ForwardSignals(pid, true /* fgProcess */) + defer stopForwarding() + } + + // Write the sandbox-internal pid if required. + if ex.internalPidFile != "" { + pidStr := []byte(strconv.Itoa(int(pid))) + if err := ioutil.WriteFile(ex.internalPidFile, pidStr, 0644); err != nil { + Fatalf("writing internal pid file %q: %v", ex.internalPidFile, err) + } + } + + // Generate the pid file after the internal pid file is generated, so that users + // can safely assume that the internal pid file is ready after `runsc exec -d` + // returns. + if ex.pidFile != "" { + if err := ioutil.WriteFile(ex.pidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil { + Fatalf("writing pid file: %v", err) + } + } + + // Wait for the process to exit. + ws, err := c.WaitPID(pid, ex.clearStatus) + if err != nil { + Fatalf("waiting on pid %d: %v", pid, err) + } + *waitStatus = ws + return subcommands.ExitSuccess +} + +func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStatus { + binPath := specutils.ExePath + var args []string + + // The command needs to write a pid file so that execAndWait can tell + // when it has started. If no pid-file was provided, we should use a + // filename in a temp directory. + pidFile := ex.pidFile + if pidFile == "" { + tmpDir, err := ioutil.TempDir("", "exec-pid-") + if err != nil { + Fatalf("creating TempDir: %v", err) + } + defer os.RemoveAll(tmpDir) + pidFile = filepath.Join(tmpDir, "pid") + args = append(args, "--pid-file="+pidFile) + } + + // Add the rest of the args, excluding the "detach" flag. + for _, a := range os.Args[1:] { + if strings.Contains(a, "detach") { + // Replace with the "private-clear-status" flag, which tells + // the new process it's a detached child and shouldn't + // clear the exit status of the sentry process. + args = append(args, fmt.Sprintf("--%s=false", privateClearStatusFlag)) + } else { + args = append(args, a) + } + } + + cmd := exec.Command(binPath, args...) + cmd.Args[0] = "runsc-exec" + + // Exec stdio defaults to current process stdio. + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + // If the console control socket file is provided, then create a new + // pty master/slave pair and set the TTY on the sandbox process. + if ex.consoleSocket != "" { + // Create a new TTY pair and send the master on the provided + // socket. + tty, err := console.NewWithSocket(ex.consoleSocket) + if err != nil { + Fatalf("setting up console with socket %q: %v", ex.consoleSocket, err) + } + defer tty.Close() + + // Set stdio to the new TTY slave. + cmd.Stdin = tty + cmd.Stdout = tty + cmd.Stderr = tty + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setsid: true, + Setctty: true, + Ctty: int(tty.Fd()), + } + } + + if err := cmd.Start(); err != nil { + Fatalf("failure to start child exec process, err: %v", err) + } + + log.Infof("Started child (PID: %d) to exec and wait: %s %s", cmd.Process.Pid, binPath, args) + + // Wait for PID file to ensure that child process has started. Otherwise, + // '--process' file is deleted as soon as this process returns and the child + // may fail to read it. + ready := func() (bool, error) { + pidb, err := ioutil.ReadFile(pidFile) + if err == nil { + // File appeared, check whether pid is fully written. + pid, err := strconv.Atoi(string(pidb)) + if err != nil { + return false, nil + } + return pid == cmd.Process.Pid, nil + } + if pe, ok := err.(*os.PathError); !ok || pe.Err != syscall.ENOENT { + return false, err + } + // No file yet, continue to wait... + return false, nil + } + if err := specutils.WaitForReady(cmd.Process.Pid, 10*time.Second, ready); err != nil { + Fatalf("unexpected error waiting for PID file, err: %v", err) + } + + *waitStatus = 0 + return subcommands.ExitSuccess +} + +// parseArgs parses exec information from the command line or a JSON file +// depending on whether the --process flag was used. Returns an ExecArgs and +// the ID of the container to be used. +func (ex *Exec) parseArgs(f *flag.FlagSet) (*control.ExecArgs, string, error) { + if ex.processPath == "" { + // Requires at least a container ID and command. + if f.NArg() < 2 { + f.Usage() + return nil, "", fmt.Errorf("both a container-id and command are required") + } + e, err := ex.argsFromCLI(f.Args()[1:]) + return e, f.Arg(0), err + } + // Requires only the container ID. + if f.NArg() != 1 { + f.Usage() + return nil, "", fmt.Errorf("a container-id is required") + } + e, err := ex.argsFromProcessFile() + return e, f.Arg(0), err +} + +func (ex *Exec) argsFromCLI(argv []string) (*control.ExecArgs, error) { + extraKGIDs := make([]auth.KGID, 0, len(ex.extraKGIDs)) + for _, s := range ex.extraKGIDs { + kgid, err := strconv.Atoi(s) + if err != nil { + Fatalf("parsing GID: %s, %v", s, err) + } + extraKGIDs = append(extraKGIDs, auth.KGID(kgid)) + } + + var caps *auth.TaskCapabilities + if len(ex.caps) > 0 { + var err error + caps, err = capabilities(ex.caps) + if err != nil { + return nil, fmt.Errorf("capabilities error: %v", err) + } + } + + return &control.ExecArgs{ + Argv: argv, + WorkingDirectory: ex.cwd, + KUID: ex.user.kuid, + KGID: ex.user.kgid, + ExtraKGIDs: extraKGIDs, + Capabilities: caps, + StdioIsPty: ex.consoleSocket != "", + FilePayload: urpc.FilePayload{[]*os.File{os.Stdin, os.Stdout, os.Stderr}}, + }, nil +} + +func (ex *Exec) argsFromProcessFile() (*control.ExecArgs, error) { + f, err := os.Open(ex.processPath) + if err != nil { + return nil, fmt.Errorf("error opening process file: %s, %v", ex.processPath, err) + } + defer f.Close() + var p specs.Process + if err := json.NewDecoder(f).Decode(&p); err != nil { + return nil, fmt.Errorf("error parsing process file: %s, %v", ex.processPath, err) + } + return argsFromProcess(&p) +} + +// argsFromProcess performs all the non-IO conversion from the Process struct +// to ExecArgs. +func argsFromProcess(p *specs.Process) (*control.ExecArgs, error) { + // Create capabilities. + var caps *auth.TaskCapabilities + if p.Capabilities != nil { + var err error + // enableRaw is set to true to prevent the filtering out of + // CAP_NET_RAW. This is the opposite of Create() because exec + // requires the capability to be set explicitly, while 'docker + // run' sets it by default. + caps, err = specutils.Capabilities(true /* enableRaw */, p.Capabilities) + if err != nil { + return nil, fmt.Errorf("error creating capabilities: %v", err) + } + } + + // Convert the spec's additional GIDs to KGIDs. + extraKGIDs := make([]auth.KGID, 0, len(p.User.AdditionalGids)) + for _, GID := range p.User.AdditionalGids { + extraKGIDs = append(extraKGIDs, auth.KGID(GID)) + } + + return &control.ExecArgs{ + Argv: p.Args, + Envv: p.Env, + WorkingDirectory: p.Cwd, + KUID: auth.KUID(p.User.UID), + KGID: auth.KGID(p.User.GID), + ExtraKGIDs: extraKGIDs, + Capabilities: caps, + StdioIsPty: p.Terminal, + FilePayload: urpc.FilePayload{Files: []*os.File{os.Stdin, os.Stdout, os.Stderr}}, + }, nil +} + +// resolveEnvs transforms lists of environment variables into a single list of +// environment variables. If a variable is defined multiple times, the last +// value is used. +func resolveEnvs(envs ...[]string) ([]string, error) { + // First create a map of variable names to values. This removes any + // duplicates. + envMap := make(map[string]string) + for _, env := range envs { + for _, str := range env { + parts := strings.SplitN(str, "=", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid variable: %s", str) + } + envMap[parts[0]] = parts[1] + } + } + // Reassemble envMap into a list of environment variables of the form + // NAME=VALUE. + env := make([]string, 0, len(envMap)) + for k, v := range envMap { + env = append(env, fmt.Sprintf("%s=%s", k, v)) + } + return env, nil +} + +// capabilities takes a list of capabilities as strings and returns an +// auth.TaskCapabilities struct with those capabilities in every capability set. +// This mimics runc's behavior. +func capabilities(cs []string) (*auth.TaskCapabilities, error) { + var specCaps specs.LinuxCapabilities + for _, cap := range cs { + specCaps.Ambient = append(specCaps.Ambient, cap) + specCaps.Bounding = append(specCaps.Bounding, cap) + specCaps.Effective = append(specCaps.Effective, cap) + specCaps.Inheritable = append(specCaps.Inheritable, cap) + specCaps.Permitted = append(specCaps.Permitted, cap) + } + // enableRaw is set to true to prevent the filtering out of + // CAP_NET_RAW. This is the opposite of Create() because exec requires + // the capability to be set explicitly, while 'docker run' sets it by + // default. + return specutils.Capabilities(true /* enableRaw */, &specCaps) +} + +// stringSlice allows a flag to be used multiple times, where each occurrence +// adds a value to the flag. For example, a flag called "x" could be invoked +// via "runsc exec -x foo -x bar", and the corresponding stringSlice would be +// {"x", "y"}. +type stringSlice []string + +// String implements flag.Value.String. +func (ss *stringSlice) String() string { + return fmt.Sprintf("%v", *ss) +} + +// Get implements flag.Value.Get. +func (ss *stringSlice) Get() interface{} { + return ss +} + +// Set implements flag.Value.Set. +func (ss *stringSlice) Set(s string) error { + *ss = append(*ss, s) + return nil +} + +// user allows -user to convey a UID and, optionally, a GID separated by a +// colon. +type user struct { + kuid auth.KUID + kgid auth.KGID +} + +func (u *user) String() string { + return fmt.Sprintf("%+v", *u) +} + +func (u *user) Get() interface{} { + return u +} + +func (u *user) Set(s string) error { + parts := strings.SplitN(s, ":", 2) + kuid, err := strconv.Atoi(parts[0]) + if err != nil { + return fmt.Errorf("couldn't parse UID: %s", parts[0]) + } + u.kuid = auth.KUID(kuid) + if len(parts) > 1 { + kgid, err := strconv.Atoi(parts[1]) + if err != nil { + return fmt.Errorf("couldn't parse GID: %s", parts[1]) + } + u.kgid = auth.KGID(kgid) + } + return nil +} diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go new file mode 100644 index 000000000..bccb29397 --- /dev/null +++ b/runsc/cmd/gofer.go @@ -0,0 +1,446 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "syscall" + + "flag" + "github.com/google/subcommands" + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/p9" + "gvisor.googlesource.com/gvisor/pkg/unet" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/fsgofer" + "gvisor.googlesource.com/gvisor/runsc/fsgofer/filter" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +var caps = []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_DAC_READ_SEARCH", + "CAP_FOWNER", + "CAP_FSETID", + "CAP_SYS_CHROOT", +} + +// goferCaps is the minimal set of capabilities needed by the Gofer to operate +// on files. +var goferCaps = &specs.LinuxCapabilities{ + Bounding: caps, + Effective: caps, + Permitted: caps, +} + +// Gofer implements subcommands.Command for the "gofer" command, which starts a +// filesystem gofer. This command should not be called directly. +type Gofer struct { + bundleDir string + ioFDs intFlags + applyCaps bool + setUpRoot bool + + panicOnWrite bool + specFD int + mountsFD int +} + +// Name implements subcommands.Command. +func (*Gofer) Name() string { + return "gofer" +} + +// Synopsis implements subcommands.Command. +func (*Gofer) Synopsis() string { + return "launch a gofer process that serves files over 9P protocol (internal use only)" +} + +// Usage implements subcommands.Command. +func (*Gofer) Usage() string { + return `gofer [flags]` +} + +// SetFlags implements subcommands.Command. +func (g *Gofer) SetFlags(f *flag.FlagSet) { + f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory") + f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec") + f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do") + f.BoolVar(&g.panicOnWrite, "panic-on-write", false, "if true, panics on attempts to write to RO mounts. RW mounts are unnaffected") + f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process") + f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec") + f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).") +} + +// Execute implements subcommands.Command. +func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if g.bundleDir == "" || len(g.ioFDs) < 1 || g.specFD < 0 { + f.Usage() + return subcommands.ExitUsageError + } + + specFile := os.NewFile(uintptr(g.specFD), "spec file") + defer specFile.Close() + spec, err := specutils.ReadSpecFromFile(g.bundleDir, specFile) + if err != nil { + Fatalf("reading spec: %v", err) + } + + conf := args[0].(*boot.Config) + + if g.setUpRoot { + if err := setupRootFS(spec, conf); err != nil { + Fatalf("Error setting up root FS: %v", err) + } + } + if g.applyCaps { + // Disable caps when calling myself again. + // Note: minimal argument handling for the default case to keep it simple. + args := os.Args + args = append(args, "--apply-caps=false", "--setup-root=false") + if err := setCapsAndCallSelf(args, goferCaps); err != nil { + Fatalf("Unable to apply caps: %v", err) + } + panic("unreachable") + } + + // Find what path is going to be served by this gofer. + root := spec.Root.Path + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + root = "/root" + } + + // Resolve mount points paths, then replace mounts from our spec and send the + // mount list over to the sandbox, so they are both in sync. + // + // Note that all mount points have been mounted in the proper location in + // setupRootFS(). + cleanMounts, err := resolveMounts(spec.Mounts, root) + if err != nil { + Fatalf("Failure to resolve mounts: %v", err) + } + spec.Mounts = cleanMounts + go func() { + if err := g.writeMounts(cleanMounts); err != nil { + panic(fmt.Sprintf("Failed to write mounts: %v", err)) + } + }() + + specutils.LogSpec(spec) + + // fsgofer should run with a umask of 0, because we want to preserve file + // modes exactly as sent by the sandbox, which will have applied its own umask. + syscall.Umask(0) + + if err := syscall.Chroot(root); err != nil { + Fatalf("failed to chroot to %q: %v", root, err) + } + if err := syscall.Chdir("/"); err != nil { + Fatalf("changing working dir: %v", err) + } + log.Infof("Process chroot'd to %q", root) + + // Start with root mount, then add any other additional mount as needed. + ats := make([]p9.Attacher, 0, len(spec.Mounts)+1) + ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{ + ROMount: spec.Root.Readonly, + PanicOnWrite: g.panicOnWrite, + }) + if err != nil { + Fatalf("creating attach point: %v", err) + } + ats = append(ats, ap) + log.Infof("Serving %q mapped to %q on FD %d (ro: %t)", "/", root, g.ioFDs[0], spec.Root.Readonly) + + mountIdx := 1 // first one is the root + for _, m := range spec.Mounts { + if specutils.Is9PMount(m) { + cfg := fsgofer.Config{ + ROMount: isReadonlyMount(m.Options), + PanicOnWrite: g.panicOnWrite, + } + ap, err := fsgofer.NewAttachPoint(m.Destination, cfg) + if err != nil { + Fatalf("creating attach point: %v", err) + } + ats = append(ats, ap) + + if mountIdx >= len(g.ioFDs) { + Fatalf("no FD found for mount. Did you forget --io-fd? mount: %d, %v", len(g.ioFDs), m) + } + log.Infof("Serving %q mapped on FD %d (ro: %t)", m.Destination, g.ioFDs[mountIdx], cfg.ROMount) + mountIdx++ + } + } + if mountIdx != len(g.ioFDs) { + Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs)) + } + + if err := filter.Install(); err != nil { + Fatalf("installing seccomp filters: %v", err) + } + + runServers(ats, g.ioFDs) + return subcommands.ExitSuccess +} + +func runServers(ats []p9.Attacher, ioFDs []int) { + // Run the loops and wait for all to exit. + var wg sync.WaitGroup + for i, ioFD := range ioFDs { + wg.Add(1) + go func(ioFD int, at p9.Attacher) { + socket, err := unet.NewSocket(ioFD) + if err != nil { + Fatalf("creating server on FD %d: %v", ioFD, err) + } + s := p9.NewServer(at) + if err := s.Handle(socket); err != nil { + Fatalf("P9 server returned error. Gofer is shutting down. FD: %d, err: %v", ioFD, err) + } + wg.Done() + }(ioFD, ats[i]) + } + wg.Wait() + log.Infof("All 9P servers exited.") +} + +func (g *Gofer) writeMounts(mounts []specs.Mount) error { + bytes, err := json.Marshal(mounts) + if err != nil { + return err + } + + f := os.NewFile(uintptr(g.mountsFD), "mounts file") + defer f.Close() + + for written := 0; written < len(bytes); { + w, err := f.Write(bytes[written:]) + if err != nil { + return err + } + written += w + } + return nil +} + +func isReadonlyMount(opts []string) bool { + for _, o := range opts { + if o == "ro" { + return true + } + } + return false +} + +func setupRootFS(spec *specs.Spec, conf *boot.Config) error { + // Convert all shared mounts into slaves to be sure that nothing will be + // propagated outside of our namespace. + if err := syscall.Mount("", "/", "", syscall.MS_SLAVE|syscall.MS_REC, ""); err != nil { + Fatalf("error converting mounts: %v", err) + } + + root := spec.Root.Path + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + // FIXME: runsc can't be re-executed without + // /proc, so we create a tmpfs mount, mount ./proc and ./root + // there, then move this mount to the root and after + // setCapsAndCallSelf, runsc will chroot into /root. + // + // We need a directory to construct a new root and we know that + // runsc can't start without /proc, so we can use it for this. + flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC) + if err := syscall.Mount("runsc-root", "/proc", "tmpfs", flags, ""); err != nil { + Fatalf("error mounting tmpfs: %v", err) + } + + // Prepare tree structure for pivot_root(2). + os.Mkdir("/proc/proc", 0755) + os.Mkdir("/proc/root", 0755) + if err := syscall.Mount("runsc-proc", "/proc/proc", "proc", flags|syscall.MS_RDONLY, ""); err != nil { + Fatalf("error mounting proc: %v", err) + } + root = "/proc/root" + } + + // Mount root path followed by submounts. + if err := syscall.Mount(spec.Root.Path, root, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { + return fmt.Errorf("mounting root on root (%q) err: %v", root, err) + } + + flags := uint32(syscall.MS_SLAVE | syscall.MS_REC) + if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { + flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation}) + } + if err := syscall.Mount("", root, "", uintptr(flags), ""); err != nil { + return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", root, flags, err) + } + + // Replace the current spec, with the clean spec with symlinks resolved. + if err := setupMounts(spec.Mounts, root); err != nil { + Fatalf("error setting up FS: %v", err) + } + + // Create working directory if needed. + if spec.Process.Cwd != "" { + dst, err := resolveSymlinks(root, spec.Process.Cwd) + if err != nil { + return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err) + } + if err := os.MkdirAll(dst, 0755); err != nil { + return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err) + } + } + + // Check if root needs to be remounted as readonly. + if spec.Root.Readonly { + // If root is a mount point but not read-only, we can change mount options + // to make it read-only for extra safety. + log.Infof("Remounting root as readonly: %q", root) + flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT | syscall.MS_RDONLY | syscall.MS_REC) + if err := syscall.Mount(root, root, "bind", flags, ""); err != nil { + return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", root, root, flags, err) + } + } + + if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { + if err := pivotRoot("/proc"); err != nil { + Fatalf("faild to change the root file system: %v", err) + } + if err := os.Chdir("/"); err != nil { + Fatalf("failed to change working directory") + } + } + return nil +} + +// setupMounts binds mount all mounts specified in the spec in their correct +// location inside root. It will resolve relative paths and symlinks. It also +// creates directories as needed. +func setupMounts(mounts []specs.Mount, root string) error { + for _, m := range mounts { + if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + continue + } + + dst, err := resolveSymlinks(root, m.Destination) + if err != nil { + return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) + } + + flags := specutils.OptionsToFlags(m.Options) | syscall.MS_BIND + log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) + if err := specutils.Mount(m.Source, dst, m.Type, flags); err != nil { + return fmt.Errorf("mounting %v: %v", m, err) + } + + // Set propagation options that cannot be set together with other options. + flags = specutils.PropOptionsToFlags(m.Options) + if flags != 0 { + if err := syscall.Mount("", dst, "", uintptr(flags), ""); err != nil { + return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err) + } + } + } + return nil +} + +// resolveMounts resolved relative paths and symlinks to mount points. +// +// Note: mount points must already be in place for resolution to work. +// Otherwise, it may follow symlinks to locations that would be overwritten +// with another mount point and return the wrong location. In short, make sure +// setupMounts() has been called before. +func resolveMounts(mounts []specs.Mount, root string) ([]specs.Mount, error) { + cleanMounts := make([]specs.Mount, 0, len(mounts)) + for _, m := range mounts { + if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + cleanMounts = append(cleanMounts, m) + continue + } + dst, err := resolveSymlinks(root, m.Destination) + if err != nil { + return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) + } + relDst, err := filepath.Rel(root, dst) + if err != nil { + panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err)) + } + cpy := m + cpy.Destination = filepath.Join("/", relDst) + cleanMounts = append(cleanMounts, cpy) + } + return cleanMounts, nil +} + +// ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are +// symlinks, they are evaluated relative to 'root' to ensure the end result is +// the same as if the process was running inside the container. +func resolveSymlinks(root, rel string) (string, error) { + return resolveSymlinksImpl(root, root, rel, 255) +} + +func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { + if followCount == 0 { + return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) + } + + rel = filepath.Clean(rel) + for _, name := range strings.Split(rel, string(filepath.Separator)) { + if name == "" { + continue + } + // Note that Join() resolves things like ".." and returns a clean path. + path := filepath.Join(base, name) + if !strings.HasPrefix(path, root) { + // One cannot '..' their way out of root. + path = root + continue + } + fi, err := os.Lstat(path) + if err != nil { + if !os.IsNotExist(err) { + return "", err + } + // Not found means there is no symlink to check. Just keep walking dirs. + base = path + continue + } + if fi.Mode()&os.ModeSymlink != 0 { + link, err := os.Readlink(path) + if err != nil { + return "", err + } + if filepath.IsAbs(link) { + base = root + } + base, err = resolveSymlinksImpl(root, base, link, followCount-1) + if err != nil { + return "", err + } + continue + } + base = path + } + return base, nil +} diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go new file mode 100644 index 000000000..aed5f3291 --- /dev/null +++ b/runsc/cmd/kill.go @@ -0,0 +1,154 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "fmt" + "strconv" + "strings" + "syscall" + + "flag" + "github.com/google/subcommands" + "golang.org/x/sys/unix" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// Kill implements subcommands.Command for the "kill" command. +type Kill struct { + all bool + pid int +} + +// Name implements subcommands.Command.Name. +func (*Kill) Name() string { + return "kill" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Kill) Synopsis() string { + return "sends a signal to the container" +} + +// Usage implements subcommands.Command.Usage. +func (*Kill) Usage() string { + return `kill <container id> [signal]` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (k *Kill) SetFlags(f *flag.FlagSet) { + f.BoolVar(&k.all, "all", false, "send the specified signal to all processes inside the container") + f.IntVar(&k.pid, "pid", 0, "send the specified signal to a specific process") +} + +// Execute implements subcommands.Command.Execute. +func (k *Kill) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() == 0 || f.NArg() > 2 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + + if k.pid != 0 && k.all { + Fatalf("it is invalid to specify both --all and --pid") + } + + c, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading container: %v", err) + } + + // The OCI command-line spec says that the signal should be specified + // via a flag, but runc (and things that call runc) pass it as an + // argument. + signal := f.Arg(1) + if signal == "" { + signal = "TERM" + } + + sig, err := parseSignal(signal) + if err != nil { + Fatalf("%v", err) + } + + if k.pid != 0 { + if err := c.SignalProcess(sig, int32(k.pid)); err != nil { + Fatalf("failed to signal pid %d: %v", k.pid, err) + } + } else { + if err := c.SignalContainer(sig, k.all); err != nil { + Fatalf("%v", err) + } + } + return subcommands.ExitSuccess +} + +func parseSignal(s string) (syscall.Signal, error) { + n, err := strconv.Atoi(s) + if err == nil { + sig := syscall.Signal(n) + for _, msig := range signalMap { + if sig == msig { + return sig, nil + } + } + return -1, fmt.Errorf("unknown signal %q", s) + } + if sig, ok := signalMap[strings.TrimPrefix(strings.ToUpper(s), "SIG")]; ok { + return sig, nil + } + return -1, fmt.Errorf("unknown signal %q", s) +} + +var signalMap = map[string]syscall.Signal{ + "ABRT": unix.SIGABRT, + "ALRM": unix.SIGALRM, + "BUS": unix.SIGBUS, + "CHLD": unix.SIGCHLD, + "CLD": unix.SIGCLD, + "CONT": unix.SIGCONT, + "FPE": unix.SIGFPE, + "HUP": unix.SIGHUP, + "ILL": unix.SIGILL, + "INT": unix.SIGINT, + "IO": unix.SIGIO, + "IOT": unix.SIGIOT, + "KILL": unix.SIGKILL, + "PIPE": unix.SIGPIPE, + "POLL": unix.SIGPOLL, + "PROF": unix.SIGPROF, + "PWR": unix.SIGPWR, + "QUIT": unix.SIGQUIT, + "SEGV": unix.SIGSEGV, + "STKFLT": unix.SIGSTKFLT, + "STOP": unix.SIGSTOP, + "SYS": unix.SIGSYS, + "TERM": unix.SIGTERM, + "TRAP": unix.SIGTRAP, + "TSTP": unix.SIGTSTP, + "TTIN": unix.SIGTTIN, + "TTOU": unix.SIGTTOU, + "URG": unix.SIGURG, + "USR1": unix.SIGUSR1, + "USR2": unix.SIGUSR2, + "VTALRM": unix.SIGVTALRM, + "WINCH": unix.SIGWINCH, + "XCPU": unix.SIGXCPU, + "XFSZ": unix.SIGXFSZ, +} diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go new file mode 100644 index 000000000..1f5ca2473 --- /dev/null +++ b/runsc/cmd/list.go @@ -0,0 +1,117 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + "text/tabwriter" + "time" + + "flag" + "github.com/google/subcommands" + specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// List implements subcommands.Command for the "list" command for the "list" command. +type List struct { + quiet bool + format string +} + +// Name implements subcommands.command.name. +func (*List) Name() string { + return "list" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*List) Synopsis() string { + return "list containers started by runsc with the given root" +} + +// Usage implements subcommands.Command.Usage. +func (*List) Usage() string { + return `list [flags]` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (l *List) SetFlags(f *flag.FlagSet) { + f.BoolVar(&l.quiet, "quiet", false, "only list container ids") + f.StringVar(&l.format, "format", "text", "output format: 'text' (default) or 'json'") +} + +// Execute implements subcommands.Command.Execute. +func (l *List) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 0 { + f.Usage() + return subcommands.ExitUsageError + } + + conf := args[0].(*boot.Config) + ids, err := container.List(conf.RootDir) + if err != nil { + Fatalf("%v", err) + } + + if l.quiet { + for _, id := range ids { + fmt.Println(id) + } + return subcommands.ExitSuccess + } + + // Collect the containers. + var containers []*container.Container + for _, id := range ids { + c, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading container %q: %v", id, err) + } + containers = append(containers, c) + } + + switch l.format { + case "text": + // Print a nice table. + w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0) + fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tOWNER\n") + for _, c := range containers { + fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\t%s\n", + c.ID, + c.SandboxPid(), + c.Status, + c.BundleDir, + c.CreatedAt.Format(time.RFC3339Nano), + c.Owner) + } + w.Flush() + case "json": + // Print just the states. + var states []specs.State + for _, c := range containers { + states = append(states, c.State()) + } + if err := json.NewEncoder(os.Stdout).Encode(states); err != nil { + Fatalf("marshaling container state: %v", err) + } + default: + Fatalf("unknown list format %q", l.format) + } + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/path.go b/runsc/cmd/path.go new file mode 100644 index 000000000..0e9ef7fa5 --- /dev/null +++ b/runsc/cmd/path.go @@ -0,0 +1,28 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "os" +) + +// getwdOrDie returns the current working directory and dies if it cannot. +func getwdOrDie() string { + wd, err := os.Getwd() + if err != nil { + Fatalf("getting current working directory: %v", err) + } + return wd +} diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go new file mode 100644 index 000000000..11b36aa10 --- /dev/null +++ b/runsc/cmd/pause.go @@ -0,0 +1,68 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// Pause implements subcommands.Command for the "pause" command. +type Pause struct{} + +// Name implements subcommands.Command.Name. +func (*Pause) Name() string { + return "pause" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Pause) Synopsis() string { + return "pause suspends all processes in a container" +} + +// Usage implements subcommands.Command.Usage. +func (*Pause) Usage() string { + return `pause <container id> - pause process in instance of container.` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (*Pause) SetFlags(f *flag.FlagSet) { +} + +// Execute implements subcommands.Command.Execute. +func (*Pause) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + + cont, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading container: %v", err) + } + + if err := cont.Pause(); err != nil { + Fatalf("pause failed: %v", err) + } + + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go new file mode 100644 index 000000000..3a3e6f17a --- /dev/null +++ b/runsc/cmd/ps.go @@ -0,0 +1,86 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "fmt" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/pkg/sentry/control" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// PS implements subcommands.Command for the "ps" command. +type PS struct { + format string +} + +// Name implements subcommands.Command.Name. +func (*PS) Name() string { + return "ps" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*PS) Synopsis() string { + return "ps displays the processes running inside a container" +} + +// Usage implements subcommands.Command.Usage. +func (*PS) Usage() string { + return "<container-id> [ps options]" +} + +// SetFlags implements subcommands.Command.SetFlags. +func (ps *PS) SetFlags(f *flag.FlagSet) { + f.StringVar(&ps.format, "format", "table", "output format. Select one of: table or json (default: table)") +} + +// Execute implements subcommands.Command.Execute. +func (ps *PS) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + + c, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading sandbox: %v", err) + } + pList, err := c.Processes() + if err != nil { + Fatalf("getting processes for container: %v", err) + } + + switch ps.format { + case "table": + fmt.Println(control.ProcessListToTable(pList)) + case "json": + o, err := control.PrintPIDsJSON(pList) + if err != nil { + Fatalf("generating JSON: %v", err) + } + fmt.Println(o) + default: + Fatalf("unsupported format: %s", ps.format) + } + + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/restore.go b/runsc/cmd/restore.go new file mode 100644 index 000000000..3ab2f5676 --- /dev/null +++ b/runsc/cmd/restore.go @@ -0,0 +1,106 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "path/filepath" + "syscall" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// Restore implements subcommands.Command for the "restore" command. +type Restore struct { + // Restore flags are a super-set of those for Create. + Create + + // imagePath is the path to the saved container image + imagePath string + + // detach indicates that runsc has to start a process and exit without waiting it. + detach bool +} + +// Name implements subcommands.Command.Name. +func (*Restore) Name() string { + return "restore" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Restore) Synopsis() string { + return "restore a saved state of container (experimental)" +} + +// Usage implements subcommands.Command.Usage. +func (*Restore) Usage() string { + return `restore [flags] <container id> - restore saved state of container. +` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (r *Restore) SetFlags(f *flag.FlagSet) { + r.Create.SetFlags(f) + f.StringVar(&r.imagePath, "image-path", "", "directory path to saved container image") + f.BoolVar(&r.detach, "detach", false, "detach from the container's process") + + // Unimplemented flags necessary for compatibility with docker. + + var nsr bool + f.BoolVar(&nsr, "no-subreaper", false, "ignored") + + var wp string + f.StringVar(&wp, "work-path", "", "ignored") +} + +// Execute implements subcommands.Command.Execute. +func (r *Restore) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + waitStatus := args[1].(*syscall.WaitStatus) + + bundleDir := r.bundleDir + if bundleDir == "" { + bundleDir = getwdOrDie() + } + spec, err := specutils.ReadSpec(bundleDir) + if err != nil { + Fatalf("reading spec: %v", err) + } + specutils.LogSpec(spec) + + if r.imagePath == "" { + Fatalf("image-path flag must be provided") + } + + conf.RestoreFile = filepath.Join(r.imagePath, checkpointFileName) + + ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach) + if err != nil { + Fatalf("running container: %v", err) + } + *waitStatus = ws + + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go new file mode 100644 index 000000000..9a2ade41e --- /dev/null +++ b/runsc/cmd/resume.go @@ -0,0 +1,69 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// Resume implements subcommands.Command for the "resume" command. +type Resume struct{} + +// Name implements subcommands.Command.Name. +func (*Resume) Name() string { + return "resume" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Resume) Synopsis() string { + return "Resume unpauses a paused container" +} + +// Usage implements subcommands.Command.Usage. +func (*Resume) Usage() string { + return `resume <container id> - resume a paused container. +` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (r *Resume) SetFlags(f *flag.FlagSet) { +} + +// Execute implements subcommands.Command.Execute. +func (r *Resume) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + + cont, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading container: %v", err) + } + + if err := cont.Resume(); err != nil { + Fatalf("resume failed: %v", err) + } + + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/run.go b/runsc/cmd/run.go new file mode 100644 index 000000000..c228b4f93 --- /dev/null +++ b/runsc/cmd/run.go @@ -0,0 +1,87 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "syscall" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" + "gvisor.googlesource.com/gvisor/runsc/specutils" +) + +// Run implements subcommands.Command for the "run" command. +type Run struct { + // Run flags are a super-set of those for Create. + Create + + // detach indicates that runsc has to start a process and exit without waiting it. + detach bool +} + +// Name implements subcommands.Command.Name. +func (*Run) Name() string { + return "run" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Run) Synopsis() string { + return "create and run a secure container" +} + +// Usage implements subcommands.Command.Usage. +func (*Run) Usage() string { + return `run [flags] <container id> - create and run a secure container. +` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (r *Run) SetFlags(f *flag.FlagSet) { + f.BoolVar(&r.detach, "detach", false, "detach from the container's process") + r.Create.SetFlags(f) +} + +// Execute implements subcommands.Command.Execute. +func (r *Run) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + waitStatus := args[1].(*syscall.WaitStatus) + + bundleDir := r.bundleDir + if bundleDir == "" { + bundleDir = getwdOrDie() + } + spec, err := specutils.ReadSpec(bundleDir) + if err != nil { + Fatalf("reading spec: %v", err) + } + specutils.LogSpec(spec) + + ws, err := container.Run(id, spec, conf, bundleDir, r.consoleSocket, r.pidFile, r.userLog, r.detach) + if err != nil { + Fatalf("running container: %v", err) + } + + *waitStatus = ws + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go new file mode 100644 index 000000000..344da13ba --- /dev/null +++ b/runsc/cmd/spec.go @@ -0,0 +1,182 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + + "flag" + "github.com/google/subcommands" +) + +var specTemplate = []byte(`{ + "ociVersion": "1.0.0", + "process": { + "terminal": true, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ + "sh" + ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/", + "capabilities": { + "bounding": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "effective": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "inheritable": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "permitted": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "ambient": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ] + }, + "rlimits": [ + { + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + } + ] + }, + "root": { + "path": "rootfs", + "readonly": true + }, + "hostname": "runsc", + "mounts": [ + { + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ] + } + ], + "linux": { + "namespaces": [ + { + "type": "pid" + }, + { + "type": "network" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + } + ] + } +}`) + +// Spec implements subcommands.Command for the "spec" command. +type Spec struct { + bundle string +} + +// Name implements subcommands.Command.Name. +func (*Spec) Name() string { + return "spec" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Spec) Synopsis() string { + return "create a new OCI bundle specification file" +} + +// Usage implements subcommands.Command.Usage. +func (*Spec) Usage() string { + return `spec [options] - create a new OCI bundle specification file. + +The spec command creates a new specification file (config.json) for a new OCI bundle. + +The specification file is a starter file that runs the "sh" command in the container. You +should edit the file to suit your needs. You can find out more about the format of the +specification file by visiting the OCI runtime spec repository: +https://github.com/opencontainers/runtime-spec/ + +EXAMPLE: + $ mkdir -p bundle/rootfs + $ cd bundle + $ runsc spec + $ docker export $(docker create hello-world) | tar -xf - -C rootfs + $ sed -i 's;"sh";"/hello";' config.json + $ sudo runsc run hello + +` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (s *Spec) SetFlags(f *flag.FlagSet) { + f.StringVar(&s.bundle, "bundle", ".", "path to the root of the OCI bundle") +} + +// Execute implements subcommands.Command.Execute. +func (s *Spec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + confPath := filepath.Join(s.bundle, "config.json") + if _, err := os.Stat(confPath); !os.IsNotExist(err) { + Fatalf("file %q already exists", confPath) + } + + if err := ioutil.WriteFile(confPath, specTemplate, 0664); err != nil { + Fatalf("writing to %q: %v", confPath, err) + } + + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go new file mode 100644 index 000000000..657726251 --- /dev/null +++ b/runsc/cmd/start.go @@ -0,0 +1,65 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// Start implements subcommands.Command for the "start" command. +type Start struct{} + +// Name implements subcommands.Command.Name. +func (*Start) Name() string { + return "start" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Start) Synopsis() string { + return "start a secure container" +} + +// Usage implements subcommands.Command.Usage. +func (*Start) Usage() string { + return `start <container id> - start a secure container.` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (*Start) SetFlags(f *flag.FlagSet) {} + +// Execute implements subcommands.Command.Execute. +func (*Start) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + + c, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading container: %v", err) + } + if err := c.Start(conf); err != nil { + Fatalf("starting container: %v", err) + } + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go new file mode 100644 index 000000000..f0d449b19 --- /dev/null +++ b/runsc/cmd/state.go @@ -0,0 +1,76 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/json" + "os" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +// State implements subcommands.Command for the "state" command. +type State struct{} + +// Name implements subcommands.Command.Name. +func (*State) Name() string { + return "state" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*State) Synopsis() string { + return "get the state of a container" +} + +// Usage implements subcommands.Command.Usage. +func (*State) Usage() string { + return `state [flags] <container id> - get the state of a container` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (*State) SetFlags(f *flag.FlagSet) {} + +// Execute implements subcommands.Command.Execute. +func (*State) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + + c, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading container: %v", err) + } + log.Debugf("Returning state for container %+v", c) + + state := c.State() + log.Debugf("State: %+v", state) + + // Write json-encoded state directly to stdout. + b, err := json.MarshalIndent(state, "", " ") + if err != nil { + Fatalf("marshaling container state: %v", err) + } + os.Stdout.Write(b) + return subcommands.ExitSuccess +} diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go new file mode 100644 index 000000000..a55a682f3 --- /dev/null +++ b/runsc/cmd/wait.go @@ -0,0 +1,127 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/json" + "os" + "syscall" + + "flag" + "github.com/google/subcommands" + "gvisor.googlesource.com/gvisor/runsc/boot" + "gvisor.googlesource.com/gvisor/runsc/container" +) + +const ( + unsetPID = -1 +) + +// Wait implements subcommands.Command for the "wait" command. +type Wait struct { + rootPID int + pid int +} + +// Name implements subcommands.Command.Name. +func (*Wait) Name() string { + return "wait" +} + +// Synopsis implements subcommands.Command.Synopsis. +func (*Wait) Synopsis() string { + return "wait on a process inside a container" +} + +// Usage implements subcommands.Command.Usage. +func (*Wait) Usage() string { + return `wait [flags] <container id>` +} + +// SetFlags implements subcommands.Command.SetFlags. +func (wt *Wait) SetFlags(f *flag.FlagSet) { + f.IntVar(&wt.rootPID, "rootpid", unsetPID, "select a PID in the sandbox root PID namespace to wait on instead of the container's root process") + f.IntVar(&wt.pid, "pid", unsetPID, "select a PID in the container's PID namespace to wait on instead of the container's root process") +} + +// Execute implements subcommands.Command.Execute. It waits for a process in a +// container to exit before returning. +func (wt *Wait) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if f.NArg() != 1 { + f.Usage() + return subcommands.ExitUsageError + } + // You can't specify both -pid and -rootpid. + if wt.rootPID != unsetPID && wt.pid != unsetPID { + Fatalf("only one of -pid and -rootPid can be set") + } + + id := f.Arg(0) + conf := args[0].(*boot.Config) + + c, err := container.Load(conf.RootDir, id) + if err != nil { + Fatalf("loading container: %v", err) + } + + var waitStatus syscall.WaitStatus + switch { + // Wait on the whole container. + case wt.rootPID == unsetPID && wt.pid == unsetPID: + ws, err := c.Wait() + if err != nil { + Fatalf("waiting on container %q: %v", c.ID, err) + } + waitStatus = ws + // Wait on a PID in the root PID namespace. + case wt.rootPID != unsetPID: + ws, err := c.WaitRootPID(int32(wt.rootPID), true /* clearStatus */) + if err != nil { + Fatalf("waiting on PID in root PID namespace %d in container %q: %v", wt.rootPID, c.ID, err) + } + waitStatus = ws + // Wait on a PID in the container's PID namespace. + case wt.pid != unsetPID: + ws, err := c.WaitPID(int32(wt.pid), true /* clearStatus */) + if err != nil { + Fatalf("waiting on PID %d in container %q: %v", wt.pid, c.ID, err) + } + waitStatus = ws + } + result := waitResult{ + ID: id, + ExitStatus: exitStatus(waitStatus), + } + // Write json-encoded wait result directly to stdout. + if err := json.NewEncoder(os.Stdout).Encode(result); err != nil { + Fatalf("marshaling wait result: %v", err) + } + return subcommands.ExitSuccess +} + +type waitResult struct { + ID string `json:"id"` + ExitStatus int `json:"exitStatus"` +} + +// exitStatus returns the correct exit status for a process based on if it +// was signaled or exited cleanly. +func exitStatus(status syscall.WaitStatus) int { + if status.Signaled() { + return 128 + int(status.Signal()) + } + return status.ExitStatus() +} |