summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/control
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/control')
-rw-r--r--pkg/sentry/control/BUILD52
-rw-r--r--pkg/sentry/control/control.go17
-rw-r--r--pkg/sentry/control/logging.go136
-rw-r--r--pkg/sentry/control/pprof.go209
-rw-r--r--pkg/sentry/control/proc.go416
-rw-r--r--pkg/sentry/control/proc_test.go166
-rw-r--r--pkg/sentry/control/state.go73
7 files changed, 1069 insertions, 0 deletions
diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD
new file mode 100644
index 000000000..2c5d14be5
--- /dev/null
+++ b/pkg/sentry/control/BUILD
@@ -0,0 +1,52 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "control",
+ srcs = [
+ "control.go",
+ "logging.go",
+ "pprof.go",
+ "proc.go",
+ "state.go",
+ ],
+ visibility = [
+ "//:sandbox",
+ ],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/fd",
+ "//pkg/log",
+ "//pkg/sentry/fdimport",
+ "//pkg/sentry/fs",
+ "//pkg/sentry/fs/host",
+ "//pkg/sentry/fs/user",
+ "//pkg/sentry/fsimpl/host",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/kernel/time",
+ "//pkg/sentry/limits",
+ "//pkg/sentry/state",
+ "//pkg/sentry/strace",
+ "//pkg/sentry/usage",
+ "//pkg/sentry/vfs",
+ "//pkg/sentry/watchdog",
+ "//pkg/sync",
+ "//pkg/tcpip/link/sniffer",
+ "//pkg/urpc",
+ "@org_golang_x_sys//unix:go_default_library",
+ ],
+)
+
+go_test(
+ name = "control_test",
+ size = "small",
+ srcs = ["proc_test.go"],
+ library = ":control",
+ deps = [
+ "//pkg/log",
+ "//pkg/sentry/kernel/time",
+ "//pkg/sentry/usage",
+ ],
+)
diff --git a/pkg/sentry/control/control.go b/pkg/sentry/control/control.go
new file mode 100644
index 000000000..6060b9b4f
--- /dev/null
+++ b/pkg/sentry/control/control.go
@@ -0,0 +1,17 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package control contains types that expose control server methods, and can
+// be used to configure and interact with a running sandbox process.
+package control
diff --git a/pkg/sentry/control/logging.go b/pkg/sentry/control/logging.go
new file mode 100644
index 000000000..8a500a515
--- /dev/null
+++ b/pkg/sentry/control/logging.go
@@ -0,0 +1,136 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
+ "gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
+)
+
+// LoggingArgs are the arguments to use for changing the logging
+// level and strace list.
+type LoggingArgs struct {
+ // SetLevel is a flag used to indicate that we should update
+ // the logging level. We should be able to change the strace
+ // list without affecting the logging level and vice versa.
+ SetLevel bool
+
+ // Level is the log level that will be set if SetLevel is true.
+ Level log.Level
+
+ // SetLogPackets indicates that we should update the log packets flag.
+ SetLogPackets bool
+
+ // LogPackets is the actual value to set for LogPackets.
+ // SetLogPackets must be enabled to indicate that we're changing
+ // the value.
+ LogPackets bool
+
+ // SetStrace is a flag used to indicate that strace related
+ // arguments were passed in.
+ SetStrace bool
+
+ // EnableStrace is a flag from the CLI that specifies whether to
+ // enable strace at all. If this flag is false then a completely
+ // pristine copy of the syscall table will be swapped in. This
+ // approach is used to remain consistent with an empty strace
+ // whitelist meaning trace all system calls.
+ EnableStrace bool
+
+ // Strace is the whitelist of syscalls to trace to log. If this
+ // and StraceEventWhitelist are empty trace all system calls.
+ StraceWhitelist []string
+
+ // SetEventStrace is a flag used to indicate that event strace
+ // related arguments were passed in.
+ SetEventStrace bool
+
+ // StraceEventWhitelist is the whitelist of syscalls to trace
+ // to event log.
+ StraceEventWhitelist []string
+}
+
+// Logging provides functions related to logging.
+type Logging struct{}
+
+// Change will change the log level and strace arguments. Although
+// this functions signature requires an error it never actually
+// returns an error. It's required by the URPC interface.
+// Additionally, it may look odd that this is the only method
+// attached to an empty struct but this is also part of how
+// URPC dispatches.
+func (l *Logging) Change(args *LoggingArgs, code *int) error {
+ if args.SetLevel {
+ // Logging uses an atomic for the level so this is thread safe.
+ log.SetLevel(args.Level)
+ }
+
+ if args.SetLogPackets {
+ if args.LogPackets {
+ atomic.StoreUint32(&sniffer.LogPackets, 1)
+ } else {
+ atomic.StoreUint32(&sniffer.LogPackets, 0)
+ }
+ log.Infof("LogPackets set to: %v", atomic.LoadUint32(&sniffer.LogPackets))
+ }
+
+ if args.SetStrace {
+ if err := l.configureStrace(args); err != nil {
+ return fmt.Errorf("error configuring strace: %v", err)
+ }
+ }
+
+ if args.SetEventStrace {
+ if err := l.configureEventStrace(args); err != nil {
+ return fmt.Errorf("error configuring event strace: %v", err)
+ }
+ }
+
+ return nil
+}
+
+func (l *Logging) configureStrace(args *LoggingArgs) error {
+ if args.EnableStrace {
+ // Install the whitelist specified.
+ if len(args.StraceWhitelist) > 0 {
+ if err := strace.Enable(args.StraceWhitelist, strace.SinkTypeLog); err != nil {
+ return err
+ }
+ } else {
+ // For convenience, if strace is enabled but whitelist
+ // is empty, enable everything to log.
+ strace.EnableAll(strace.SinkTypeLog)
+ }
+ } else {
+ // Uninstall all strace functions.
+ strace.Disable(strace.SinkTypeLog)
+ }
+ return nil
+}
+
+func (l *Logging) configureEventStrace(args *LoggingArgs) error {
+ if len(args.StraceEventWhitelist) > 0 {
+ if err := strace.Enable(args.StraceEventWhitelist, strace.SinkTypeEvent); err != nil {
+ return err
+ }
+ } else {
+ strace.Disable(strace.SinkTypeEvent)
+ }
+ return nil
+}
diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go
new file mode 100644
index 000000000..663e51989
--- /dev/null
+++ b/pkg/sentry/control/pprof.go
@@ -0,0 +1,209 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+ "errors"
+ "runtime"
+ "runtime/pprof"
+ "runtime/trace"
+
+ "gvisor.dev/gvisor/pkg/fd"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/urpc"
+)
+
+var errNoOutput = errors.New("no output writer provided")
+
+// ProfileOpts contains options for the StartCPUProfile/Goroutine RPC call.
+type ProfileOpts struct {
+ // File is the filesystem path for the profile.
+ File string `json:"path"`
+
+ // FilePayload is the destination for the profiling output.
+ urpc.FilePayload
+}
+
+// Profile includes profile-related RPC stubs. It provides a way to
+// control the built-in pprof facility in sentry via sentryctl.
+//
+// The following options to sentryctl are added:
+//
+// - collect CPU profile on-demand.
+// sentryctl -pid <pid> pprof-cpu-start
+// sentryctl -pid <pid> pprof-cpu-stop
+//
+// - dump out the stack trace of current go routines.
+// sentryctl -pid <pid> pprof-goroutine
+type Profile struct {
+ // mu protects the fields below.
+ mu sync.Mutex
+
+ // cpuFile is the current CPU profile output file.
+ cpuFile *fd.FD
+
+ // traceFile is the current execution trace output file.
+ traceFile *fd.FD
+
+ // Kernel is the kernel under profile.
+ Kernel *kernel.Kernel
+}
+
+// StartCPUProfile is an RPC stub which starts recording the CPU profile in a
+// file.
+func (p *Profile) StartCPUProfile(o *ProfileOpts, _ *struct{}) error {
+ if len(o.FilePayload.Files) < 1 {
+ return errNoOutput
+ }
+
+ output, err := fd.NewFromFile(o.FilePayload.Files[0])
+ if err != nil {
+ return err
+ }
+
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ // Returns an error if profiling is already started.
+ if err := pprof.StartCPUProfile(output); err != nil {
+ output.Close()
+ return err
+ }
+
+ p.cpuFile = output
+ return nil
+}
+
+// StopCPUProfile is an RPC stub which stops the CPU profiling and flush out the
+// profile data. It takes no argument.
+func (p *Profile) StopCPUProfile(_, _ *struct{}) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ if p.cpuFile == nil {
+ return errors.New("CPU profiling not started")
+ }
+
+ pprof.StopCPUProfile()
+ p.cpuFile.Close()
+ p.cpuFile = nil
+ return nil
+}
+
+// HeapProfile generates a heap profile for the sentry.
+func (p *Profile) HeapProfile(o *ProfileOpts, _ *struct{}) error {
+ if len(o.FilePayload.Files) < 1 {
+ return errNoOutput
+ }
+ output := o.FilePayload.Files[0]
+ defer output.Close()
+ runtime.GC() // Get up-to-date statistics.
+ if err := pprof.WriteHeapProfile(output); err != nil {
+ return err
+ }
+ return nil
+}
+
+// GoroutineProfile is an RPC stub which dumps out the stack trace for all
+// running goroutines.
+func (p *Profile) GoroutineProfile(o *ProfileOpts, _ *struct{}) error {
+ if len(o.FilePayload.Files) < 1 {
+ return errNoOutput
+ }
+ output := o.FilePayload.Files[0]
+ defer output.Close()
+ if err := pprof.Lookup("goroutine").WriteTo(output, 2); err != nil {
+ return err
+ }
+ return nil
+}
+
+// BlockProfile is an RPC stub which dumps out the stack trace that led to
+// blocking on synchronization primitives.
+func (p *Profile) BlockProfile(o *ProfileOpts, _ *struct{}) error {
+ if len(o.FilePayload.Files) < 1 {
+ return errNoOutput
+ }
+ output := o.FilePayload.Files[0]
+ defer output.Close()
+ if err := pprof.Lookup("block").WriteTo(output, 0); err != nil {
+ return err
+ }
+ return nil
+}
+
+// MutexProfile is an RPC stub which dumps out the stack trace of holders of
+// contended mutexes.
+func (p *Profile) MutexProfile(o *ProfileOpts, _ *struct{}) error {
+ if len(o.FilePayload.Files) < 1 {
+ return errNoOutput
+ }
+ output := o.FilePayload.Files[0]
+ defer output.Close()
+ if err := pprof.Lookup("mutex").WriteTo(output, 0); err != nil {
+ return err
+ }
+ return nil
+}
+
+// StartTrace is an RPC stub which starts collection of an execution trace.
+func (p *Profile) StartTrace(o *ProfileOpts, _ *struct{}) error {
+ if len(o.FilePayload.Files) < 1 {
+ return errNoOutput
+ }
+
+ output, err := fd.NewFromFile(o.FilePayload.Files[0])
+ if err != nil {
+ return err
+ }
+
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ // Returns an error if profiling is already started.
+ if err := trace.Start(output); err != nil {
+ output.Close()
+ return err
+ }
+
+ // Ensure all trace contexts are registered.
+ p.Kernel.RebuildTraceContexts()
+
+ p.traceFile = output
+ return nil
+}
+
+// StopTrace is an RPC stub which stops collection of an ongoing execution
+// trace and flushes the trace data. It takes no argument.
+func (p *Profile) StopTrace(_, _ *struct{}) error {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ if p.traceFile == nil {
+ return errors.New("Execution tracing not started")
+ }
+
+ // Similarly to the case above, if tasks have not ended traces, we will
+ // lose information. Thus we need to rebuild the tasks in order to have
+ // complete information. This will not lose information if multiple
+ // traces are overlapping.
+ p.Kernel.RebuildTraceContexts()
+
+ trace.Stop()
+ p.traceFile.Close()
+ p.traceFile = nil
+ return nil
+}
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
new file mode 100644
index 000000000..1bae7cfaf
--- /dev/null
+++ b/pkg/sentry/control/proc.go
@@ -0,0 +1,416 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "sort"
+ "strings"
+ "text/tabwriter"
+ "time"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/fdimport"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/host"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
+ hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/urpc"
+)
+
+// Proc includes task-related functions.
+//
+// At the moment, this is limited to exec support.
+type Proc struct {
+ Kernel *kernel.Kernel
+}
+
+// ExecArgs is the set of arguments to exec.
+type ExecArgs struct {
+ // Filename is the filename to load.
+ //
+ // If this is provided as "", then the file will be guessed via Argv[0].
+ Filename string `json:"filename"`
+
+ // Argv is a list of arguments.
+ Argv []string `json:"argv"`
+
+ // Envv is a list of environment variables.
+ Envv []string `json:"envv"`
+
+ // MountNamespace is the mount namespace to execute the new process in.
+ // A reference on MountNamespace must be held for the lifetime of the
+ // ExecArgs. If MountNamespace is nil, it will default to the init
+ // process's MountNamespace.
+ MountNamespace *fs.MountNamespace
+
+ // MountNamespaceVFS2 is the mount namespace to execute the new process in.
+ // A reference on MountNamespace must be held for the lifetime of the
+ // ExecArgs. If MountNamespace is nil, it will default to the init
+ // process's MountNamespace.
+ MountNamespaceVFS2 *vfs.MountNamespace
+
+ // WorkingDirectory defines the working directory for the new process.
+ WorkingDirectory string `json:"wd"`
+
+ // KUID is the UID to run with in the root user namespace. Defaults to
+ // root if not set explicitly.
+ KUID auth.KUID
+
+ // KGID is the GID to run with in the root user namespace. Defaults to
+ // the root group if not set explicitly.
+ KGID auth.KGID
+
+ // ExtraKGIDs is the list of additional groups to which the user belongs.
+ ExtraKGIDs []auth.KGID
+
+ // Capabilities is the list of capabilities to give to the process.
+ Capabilities *auth.TaskCapabilities
+
+ // StdioIsPty indicates that FDs 0, 1, and 2 are connected to a host pty FD.
+ StdioIsPty bool
+
+ // FilePayload determines the files to give to the new process.
+ urpc.FilePayload
+
+ // ContainerID is the container for the process being executed.
+ ContainerID string
+
+ // PIDNamespace is the pid namespace for the process being executed.
+ PIDNamespace *kernel.PIDNamespace
+}
+
+// String prints the arguments as a string.
+func (args ExecArgs) String() string {
+ if len(args.Argv) == 0 {
+ return args.Filename
+ }
+ a := make([]string, len(args.Argv))
+ copy(a, args.Argv)
+ if args.Filename != "" {
+ a[0] = args.Filename
+ }
+ return strings.Join(a, " ")
+}
+
+// Exec runs a new task.
+func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error {
+ newTG, _, _, _, err := proc.execAsync(args)
+ if err != nil {
+ return err
+ }
+
+ // Wait for completion.
+ newTG.WaitExited()
+ *waitStatus = newTG.ExitStatus().Status()
+ return nil
+}
+
+// ExecAsync runs a new task, but doesn't wait for it to finish. It is defined
+// as a function rather than a method to avoid exposing execAsync as an RPC.
+func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+ return proc.execAsync(args)
+}
+
+// execAsync runs a new task, but doesn't wait for it to finish. It returns the
+// newly created thread group and its PID. If the stdio FDs are TTYs, then a
+// TTYFileOperations that wraps the TTY is also returned.
+func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) {
+ // Import file descriptors.
+ fdTable := proc.Kernel.NewFDTable()
+ defer fdTable.DecRef()
+
+ creds := auth.NewUserCredentials(
+ args.KUID,
+ args.KGID,
+ args.ExtraKGIDs,
+ args.Capabilities,
+ proc.Kernel.RootUserNamespace())
+
+ initArgs := kernel.CreateProcessArgs{
+ Filename: args.Filename,
+ Argv: args.Argv,
+ Envv: args.Envv,
+ WorkingDirectory: args.WorkingDirectory,
+ MountNamespace: args.MountNamespace,
+ MountNamespaceVFS2: args.MountNamespaceVFS2,
+ Credentials: creds,
+ FDTable: fdTable,
+ Umask: 0022,
+ Limits: limits.NewLimitSet(),
+ MaxSymlinkTraversals: linux.MaxSymlinkTraversals,
+ UTSNamespace: proc.Kernel.RootUTSNamespace(),
+ IPCNamespace: proc.Kernel.RootIPCNamespace(),
+ AbstractSocketNamespace: proc.Kernel.RootAbstractSocketNamespace(),
+ ContainerID: args.ContainerID,
+ PIDNamespace: args.PIDNamespace,
+ }
+ if initArgs.MountNamespace != nil {
+ // initArgs must hold a reference on MountNamespace, which will
+ // be donated to the new process in CreateProcess.
+ initArgs.MountNamespace.IncRef()
+ }
+ if initArgs.MountNamespaceVFS2 != nil {
+ // initArgs must hold a reference on MountNamespaceVFS2, which will
+ // be donated to the new process in CreateProcess.
+ initArgs.MountNamespaceVFS2.IncRef()
+ }
+ ctx := initArgs.NewContext(proc.Kernel)
+
+ if kernel.VFS2Enabled {
+ // Get the full path to the filename from the PATH env variable.
+ if initArgs.MountNamespaceVFS2 == nil {
+ // Set initArgs so that 'ctx' returns the namespace.
+ //
+ // MountNamespaceVFS2 adds a reference to the namespace, which is
+ // transferred to the new process.
+ initArgs.MountNamespaceVFS2 = proc.Kernel.GlobalInit().Leader().MountNamespaceVFS2()
+ }
+ } else {
+ if initArgs.MountNamespace == nil {
+ // Set initArgs so that 'ctx' returns the namespace.
+ initArgs.MountNamespace = proc.Kernel.GlobalInit().Leader().MountNamespace()
+
+ // initArgs must hold a reference on MountNamespace, which will
+ // be donated to the new process in CreateProcess.
+ initArgs.MountNamespace.IncRef()
+ }
+ }
+ resolved, err := user.ResolveExecutablePath(ctx, &initArgs)
+ if err != nil {
+ return nil, 0, nil, nil, err
+ }
+ initArgs.Filename = resolved
+
+ fds := make([]int, len(args.FilePayload.Files))
+ for i, file := range args.FilePayload.Files {
+ if kernel.VFS2Enabled {
+ // Need to dup to remove ownership from os.File.
+ dup, err := unix.Dup(int(file.Fd()))
+ if err != nil {
+ return nil, 0, nil, nil, fmt.Errorf("duplicating payload files: %w", err)
+ }
+ fds[i] = dup
+ } else {
+ // VFS1 dups the file on import.
+ fds[i] = int(file.Fd())
+ }
+ }
+ ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, fds)
+ if err != nil {
+ if kernel.VFS2Enabled {
+ for _, fd := range fds {
+ unix.Close(fd)
+ }
+ }
+ return nil, 0, nil, nil, err
+ }
+
+ tg, tid, err := proc.Kernel.CreateProcess(initArgs)
+ if err != nil {
+ return nil, 0, nil, nil, err
+ }
+
+ // Set the foreground process group on the TTY before starting the process.
+ switch {
+ case ttyFile != nil:
+ ttyFile.InitForegroundProcessGroup(tg.ProcessGroup())
+ case ttyFileVFS2 != nil:
+ ttyFileVFS2.InitForegroundProcessGroup(tg.ProcessGroup())
+ }
+
+ // Start the newly created process.
+ proc.Kernel.StartProcess(tg)
+
+ return tg, tid, ttyFile, ttyFileVFS2, nil
+}
+
+// PsArgs is the set of arguments to ps.
+type PsArgs struct {
+ // JSON will force calls to Ps to return the result as a JSON payload.
+ JSON bool
+}
+
+// Ps provides a process listing for the running kernel.
+func (proc *Proc) Ps(args *PsArgs, out *string) error {
+ var p []*Process
+ if e := Processes(proc.Kernel, "", &p); e != nil {
+ return e
+ }
+ if !args.JSON {
+ *out = ProcessListToTable(p)
+ } else {
+ s, e := ProcessListToJSON(p)
+ if e != nil {
+ return e
+ }
+ *out = s
+ }
+ return nil
+}
+
+// Process contains information about a single process in a Sandbox.
+type Process struct {
+ UID auth.KUID `json:"uid"`
+ PID kernel.ThreadID `json:"pid"`
+ // Parent PID
+ PPID kernel.ThreadID `json:"ppid"`
+ Threads []kernel.ThreadID `json:"threads"`
+ // Processor utilization
+ C int32 `json:"c"`
+ // TTY name of the process. Will be of the form "pts/N" if there is a
+ // TTY, or "?" if there is not.
+ TTY string `json:"tty"`
+ // Start time
+ STime string `json:"stime"`
+ // CPU time
+ Time string `json:"time"`
+ // Executable shortname (e.g. "sh" for /bin/sh)
+ Cmd string `json:"cmd"`
+}
+
+// ProcessListToTable prints a table with the following format:
+// UID PID PPID C TTY STIME TIME CMD
+// 0 1 0 0 pty/4 14:04 505262ns tail
+func ProcessListToTable(pl []*Process) string {
+ var buf bytes.Buffer
+ tw := tabwriter.NewWriter(&buf, 10, 1, 3, ' ', 0)
+ fmt.Fprint(tw, "UID\tPID\tPPID\tC\tTTY\tSTIME\tTIME\tCMD")
+ for _, d := range pl {
+ fmt.Fprintf(tw, "\n%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s",
+ d.UID,
+ d.PID,
+ d.PPID,
+ d.C,
+ d.TTY,
+ d.STime,
+ d.Time,
+ d.Cmd)
+ }
+ tw.Flush()
+ return buf.String()
+}
+
+// ProcessListToJSON will return the JSON representation of ps.
+func ProcessListToJSON(pl []*Process) (string, error) {
+ b, err := json.MarshalIndent(pl, "", " ")
+ if err != nil {
+ return "", fmt.Errorf("couldn't marshal process list %v: %v", pl, err)
+ }
+ return string(b), nil
+}
+
+// PrintPIDsJSON prints a JSON object containing only the PIDs in pl. This
+// behavior is the same as runc's.
+func PrintPIDsJSON(pl []*Process) (string, error) {
+ pids := make([]kernel.ThreadID, 0, len(pl))
+ for _, d := range pl {
+ pids = append(pids, d.PID)
+ }
+ b, err := json.Marshal(pids)
+ if err != nil {
+ return "", fmt.Errorf("couldn't marshal PIDs %v: %v", pids, err)
+ }
+ return string(b), nil
+}
+
+// Processes retrieves information about processes running in the sandbox with
+// the given container id. All processes are returned if 'containerID' is empty.
+func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error {
+ ts := k.TaskSet()
+ now := k.RealtimeClock().Now()
+ for _, tg := range ts.Root.ThreadGroups() {
+ pidns := tg.PIDNamespace()
+ pid := pidns.IDOfThreadGroup(tg)
+
+ // If tg has already been reaped ignore it.
+ if pid == 0 {
+ continue
+ }
+ if containerID != "" && containerID != tg.Leader().ContainerID() {
+ continue
+ }
+
+ ppid := kernel.ThreadID(0)
+ if p := tg.Leader().Parent(); p != nil {
+ ppid = pidns.IDOfThreadGroup(p.ThreadGroup())
+ }
+ threads := tg.MemberIDs(pidns)
+ *out = append(*out, &Process{
+ UID: tg.Leader().Credentials().EffectiveKUID,
+ PID: pid,
+ PPID: ppid,
+ Threads: threads,
+ STime: formatStartTime(now, tg.Leader().StartTime()),
+ C: percentCPU(tg.CPUStats(), tg.Leader().StartTime(), now),
+ Time: tg.CPUStats().SysTime.String(),
+ Cmd: tg.Leader().Name(),
+ TTY: ttyName(tg.TTY()),
+ })
+ }
+ sort.Slice(*out, func(i, j int) bool { return (*out)[i].PID < (*out)[j].PID })
+ return nil
+}
+
+// formatStartTime formats startTime depending on the current time:
+// - If startTime was today, HH:MM is used.
+// - If startTime was not today but was this year, MonDD is used (e.g. Jan02)
+// - If startTime was not this year, the year is used.
+func formatStartTime(now, startTime ktime.Time) string {
+ nowS, nowNs := now.Unix()
+ n := time.Unix(nowS, nowNs)
+ startTimeS, startTimeNs := startTime.Unix()
+ st := time.Unix(startTimeS, startTimeNs)
+ format := "15:04"
+ if st.YearDay() != n.YearDay() {
+ format = "Jan02"
+ }
+ if st.Year() != n.Year() {
+ format = "2006"
+ }
+ return st.Format(format)
+}
+
+func percentCPU(stats usage.CPUStats, startTime, now ktime.Time) int32 {
+ // Note: In procps, there is an option to include child CPU stats. As
+ // it is disabled by default, we do not include them.
+ total := stats.UserTime + stats.SysTime
+ lifetime := now.Sub(startTime)
+ if lifetime <= 0 {
+ return 0
+ }
+ percentCPU := total * 100 / lifetime
+ // Cap at 99% since procps does the same.
+ if percentCPU > 99 {
+ percentCPU = 99
+ }
+ return int32(percentCPU)
+}
+
+func ttyName(tty *kernel.TTY) string {
+ if tty == nil {
+ return "?"
+ }
+ return fmt.Sprintf("pts/%d", tty.Index)
+}
diff --git a/pkg/sentry/control/proc_test.go b/pkg/sentry/control/proc_test.go
new file mode 100644
index 000000000..0a88459b2
--- /dev/null
+++ b/pkg/sentry/control/proc_test.go
@@ -0,0 +1,166 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/log"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+)
+
+func init() {
+ log.SetLevel(log.Debug)
+}
+
+// Tests that ProcessData.Table() prints with the correct format.
+func TestProcessListTable(t *testing.T) {
+ testCases := []struct {
+ pl []*Process
+ expected string
+ }{
+ {
+ pl: []*Process{},
+ expected: "UID PID PPID C TTY STIME TIME CMD",
+ },
+ {
+ pl: []*Process{
+ {
+ UID: 0,
+ PID: 0,
+ PPID: 0,
+ C: 0,
+ TTY: "?",
+ STime: "0",
+ Time: "0",
+ Cmd: "zero",
+ },
+ {
+ UID: 1,
+ PID: 1,
+ PPID: 1,
+ C: 1,
+ TTY: "pts/4",
+ STime: "1",
+ Time: "1",
+ Cmd: "one",
+ },
+ },
+ expected: `UID PID PPID C TTY STIME TIME CMD
+0 0 0 0 ? 0 0 zero
+1 1 1 1 pts/4 1 1 one`,
+ },
+ }
+
+ for _, tc := range testCases {
+ output := ProcessListToTable(tc.pl)
+
+ if tc.expected != output {
+ t.Errorf("PrintTable(%v): got:\n%s\nwant:\n%s", tc.pl, output, tc.expected)
+ }
+ }
+}
+
+func TestProcessListJSON(t *testing.T) {
+ testCases := []struct {
+ pl []*Process
+ expected string
+ }{
+ {
+ pl: []*Process{},
+ expected: "[]",
+ },
+ {
+ pl: []*Process{
+ {
+ UID: 0,
+ PID: 0,
+ PPID: 0,
+ C: 0,
+ STime: "0",
+ Time: "0",
+ Cmd: "zero",
+ },
+ {
+ UID: 1,
+ PID: 1,
+ PPID: 1,
+ C: 1,
+ STime: "1",
+ Time: "1",
+ Cmd: "one",
+ },
+ },
+ expected: "[0,1]",
+ },
+ }
+
+ for _, tc := range testCases {
+ output, err := PrintPIDsJSON(tc.pl)
+ if err != nil {
+ t.Errorf("failed to generate JSON: %v", err)
+ }
+
+ if tc.expected != output {
+ t.Errorf("PrintJSON(%v): got:\n%s\nwant:\n%s", tc.pl, output, tc.expected)
+ }
+ }
+}
+
+func TestPercentCPU(t *testing.T) {
+ testCases := []struct {
+ stats usage.CPUStats
+ startTime ktime.Time
+ now ktime.Time
+ expected int32
+ }{
+ {
+ // Verify that 100% use is capped at 99.
+ stats: usage.CPUStats{UserTime: 1e9, SysTime: 1e9},
+ startTime: ktime.FromNanoseconds(7e9),
+ now: ktime.FromNanoseconds(9e9),
+ expected: 99,
+ },
+ {
+ // Verify that if usage > lifetime, we get at most 99%
+ // usage.
+ stats: usage.CPUStats{UserTime: 2e9, SysTime: 2e9},
+ startTime: ktime.FromNanoseconds(7e9),
+ now: ktime.FromNanoseconds(9e9),
+ expected: 99,
+ },
+ {
+ // Verify that 50% usage is reported correctly.
+ stats: usage.CPUStats{UserTime: 1e9, SysTime: 1e9},
+ startTime: ktime.FromNanoseconds(12e9),
+ now: ktime.FromNanoseconds(16e9),
+ expected: 50,
+ },
+ {
+ // Verify that 0% usage is reported correctly.
+ stats: usage.CPUStats{UserTime: 0, SysTime: 0},
+ startTime: ktime.FromNanoseconds(12e9),
+ now: ktime.FromNanoseconds(14e9),
+ expected: 0,
+ },
+ }
+
+ for _, tc := range testCases {
+ if pcpu := percentCPU(tc.stats, tc.startTime, tc.now); pcpu != tc.expected {
+ t.Errorf("percentCPU(%v, %v, %v): got %d, want %d", tc.stats, tc.startTime, tc.now, pcpu, tc.expected)
+ }
+ }
+}
diff --git a/pkg/sentry/control/state.go b/pkg/sentry/control/state.go
new file mode 100644
index 000000000..41feeffe3
--- /dev/null
+++ b/pkg/sentry/control/state.go
@@ -0,0 +1,73 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+ "errors"
+
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/state"
+ "gvisor.dev/gvisor/pkg/sentry/watchdog"
+ "gvisor.dev/gvisor/pkg/urpc"
+)
+
+// ErrInvalidFiles is returned when the urpc call to Save does not include an
+// appropriate file payload (e.g. there is no output file!).
+var ErrInvalidFiles = errors.New("exactly one file must be provided")
+
+// State includes state-related functions.
+type State struct {
+ Kernel *kernel.Kernel
+ Watchdog *watchdog.Watchdog
+}
+
+// SaveOpts contains options for the Save RPC call.
+type SaveOpts struct {
+ // Key is used for state integrity check.
+ Key []byte `json:"key"`
+
+ // Metadata is the set of metadata to prepend to the state file.
+ Metadata map[string]string `json:"metadata"`
+
+ // FilePayload contains the destination for the state.
+ urpc.FilePayload
+}
+
+// Save saves the running system.
+func (s *State) Save(o *SaveOpts, _ *struct{}) error {
+ // Create an output stream.
+ if len(o.FilePayload.Files) != 1 {
+ return ErrInvalidFiles
+ }
+ defer o.FilePayload.Files[0].Close()
+
+ // Save to the first provided stream.
+ saveOpts := state.SaveOpts{
+ Destination: o.FilePayload.Files[0],
+ Key: o.Key,
+ Metadata: o.Metadata,
+ Callback: func(err error) {
+ if err == nil {
+ log.Infof("Save succeeded: exiting...")
+ } else {
+ log.Warningf("Save failed: exiting...")
+ s.Kernel.SetSaveError(err)
+ }
+ s.Kernel.Kill(kernel.ExitStatus{})
+ },
+ }
+ return saveOpts.Save(s.Kernel, s.Watchdog)
+}