diff options
Diffstat (limited to 'pkg/sentry/control')
-rw-r--r-- | pkg/sentry/control/BUILD | 52 | ||||
-rw-r--r-- | pkg/sentry/control/control.go | 17 | ||||
-rw-r--r-- | pkg/sentry/control/logging.go | 136 | ||||
-rw-r--r-- | pkg/sentry/control/pprof.go | 209 | ||||
-rw-r--r-- | pkg/sentry/control/proc.go | 416 | ||||
-rw-r--r-- | pkg/sentry/control/proc_test.go | 166 | ||||
-rw-r--r-- | pkg/sentry/control/state.go | 73 |
7 files changed, 1069 insertions, 0 deletions
diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD new file mode 100644 index 000000000..2c5d14be5 --- /dev/null +++ b/pkg/sentry/control/BUILD @@ -0,0 +1,52 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "control", + srcs = [ + "control.go", + "logging.go", + "pprof.go", + "proc.go", + "state.go", + ], + visibility = [ + "//:sandbox", + ], + deps = [ + "//pkg/abi/linux", + "//pkg/fd", + "//pkg/log", + "//pkg/sentry/fdimport", + "//pkg/sentry/fs", + "//pkg/sentry/fs/host", + "//pkg/sentry/fs/user", + "//pkg/sentry/fsimpl/host", + "//pkg/sentry/kernel", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/time", + "//pkg/sentry/limits", + "//pkg/sentry/state", + "//pkg/sentry/strace", + "//pkg/sentry/usage", + "//pkg/sentry/vfs", + "//pkg/sentry/watchdog", + "//pkg/sync", + "//pkg/tcpip/link/sniffer", + "//pkg/urpc", + "@org_golang_x_sys//unix:go_default_library", + ], +) + +go_test( + name = "control_test", + size = "small", + srcs = ["proc_test.go"], + library = ":control", + deps = [ + "//pkg/log", + "//pkg/sentry/kernel/time", + "//pkg/sentry/usage", + ], +) diff --git a/pkg/sentry/control/control.go b/pkg/sentry/control/control.go new file mode 100644 index 000000000..6060b9b4f --- /dev/null +++ b/pkg/sentry/control/control.go @@ -0,0 +1,17 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package control contains types that expose control server methods, and can +// be used to configure and interact with a running sandbox process. +package control diff --git a/pkg/sentry/control/logging.go b/pkg/sentry/control/logging.go new file mode 100644 index 000000000..8a500a515 --- /dev/null +++ b/pkg/sentry/control/logging.go @@ -0,0 +1,136 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/strace" + "gvisor.dev/gvisor/pkg/tcpip/link/sniffer" +) + +// LoggingArgs are the arguments to use for changing the logging +// level and strace list. +type LoggingArgs struct { + // SetLevel is a flag used to indicate that we should update + // the logging level. We should be able to change the strace + // list without affecting the logging level and vice versa. + SetLevel bool + + // Level is the log level that will be set if SetLevel is true. + Level log.Level + + // SetLogPackets indicates that we should update the log packets flag. + SetLogPackets bool + + // LogPackets is the actual value to set for LogPackets. + // SetLogPackets must be enabled to indicate that we're changing + // the value. + LogPackets bool + + // SetStrace is a flag used to indicate that strace related + // arguments were passed in. + SetStrace bool + + // EnableStrace is a flag from the CLI that specifies whether to + // enable strace at all. If this flag is false then a completely + // pristine copy of the syscall table will be swapped in. This + // approach is used to remain consistent with an empty strace + // whitelist meaning trace all system calls. + EnableStrace bool + + // Strace is the whitelist of syscalls to trace to log. If this + // and StraceEventWhitelist are empty trace all system calls. + StraceWhitelist []string + + // SetEventStrace is a flag used to indicate that event strace + // related arguments were passed in. + SetEventStrace bool + + // StraceEventWhitelist is the whitelist of syscalls to trace + // to event log. + StraceEventWhitelist []string +} + +// Logging provides functions related to logging. +type Logging struct{} + +// Change will change the log level and strace arguments. Although +// this functions signature requires an error it never actually +// returns an error. It's required by the URPC interface. +// Additionally, it may look odd that this is the only method +// attached to an empty struct but this is also part of how +// URPC dispatches. +func (l *Logging) Change(args *LoggingArgs, code *int) error { + if args.SetLevel { + // Logging uses an atomic for the level so this is thread safe. + log.SetLevel(args.Level) + } + + if args.SetLogPackets { + if args.LogPackets { + atomic.StoreUint32(&sniffer.LogPackets, 1) + } else { + atomic.StoreUint32(&sniffer.LogPackets, 0) + } + log.Infof("LogPackets set to: %v", atomic.LoadUint32(&sniffer.LogPackets)) + } + + if args.SetStrace { + if err := l.configureStrace(args); err != nil { + return fmt.Errorf("error configuring strace: %v", err) + } + } + + if args.SetEventStrace { + if err := l.configureEventStrace(args); err != nil { + return fmt.Errorf("error configuring event strace: %v", err) + } + } + + return nil +} + +func (l *Logging) configureStrace(args *LoggingArgs) error { + if args.EnableStrace { + // Install the whitelist specified. + if len(args.StraceWhitelist) > 0 { + if err := strace.Enable(args.StraceWhitelist, strace.SinkTypeLog); err != nil { + return err + } + } else { + // For convenience, if strace is enabled but whitelist + // is empty, enable everything to log. + strace.EnableAll(strace.SinkTypeLog) + } + } else { + // Uninstall all strace functions. + strace.Disable(strace.SinkTypeLog) + } + return nil +} + +func (l *Logging) configureEventStrace(args *LoggingArgs) error { + if len(args.StraceEventWhitelist) > 0 { + if err := strace.Enable(args.StraceEventWhitelist, strace.SinkTypeEvent); err != nil { + return err + } + } else { + strace.Disable(strace.SinkTypeEvent) + } + return nil +} diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go new file mode 100644 index 000000000..663e51989 --- /dev/null +++ b/pkg/sentry/control/pprof.go @@ -0,0 +1,209 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "errors" + "runtime" + "runtime/pprof" + "runtime/trace" + + "gvisor.dev/gvisor/pkg/fd" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/urpc" +) + +var errNoOutput = errors.New("no output writer provided") + +// ProfileOpts contains options for the StartCPUProfile/Goroutine RPC call. +type ProfileOpts struct { + // File is the filesystem path for the profile. + File string `json:"path"` + + // FilePayload is the destination for the profiling output. + urpc.FilePayload +} + +// Profile includes profile-related RPC stubs. It provides a way to +// control the built-in pprof facility in sentry via sentryctl. +// +// The following options to sentryctl are added: +// +// - collect CPU profile on-demand. +// sentryctl -pid <pid> pprof-cpu-start +// sentryctl -pid <pid> pprof-cpu-stop +// +// - dump out the stack trace of current go routines. +// sentryctl -pid <pid> pprof-goroutine +type Profile struct { + // mu protects the fields below. + mu sync.Mutex + + // cpuFile is the current CPU profile output file. + cpuFile *fd.FD + + // traceFile is the current execution trace output file. + traceFile *fd.FD + + // Kernel is the kernel under profile. + Kernel *kernel.Kernel +} + +// StartCPUProfile is an RPC stub which starts recording the CPU profile in a +// file. +func (p *Profile) StartCPUProfile(o *ProfileOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errNoOutput + } + + output, err := fd.NewFromFile(o.FilePayload.Files[0]) + if err != nil { + return err + } + + p.mu.Lock() + defer p.mu.Unlock() + + // Returns an error if profiling is already started. + if err := pprof.StartCPUProfile(output); err != nil { + output.Close() + return err + } + + p.cpuFile = output + return nil +} + +// StopCPUProfile is an RPC stub which stops the CPU profiling and flush out the +// profile data. It takes no argument. +func (p *Profile) StopCPUProfile(_, _ *struct{}) error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.cpuFile == nil { + return errors.New("CPU profiling not started") + } + + pprof.StopCPUProfile() + p.cpuFile.Close() + p.cpuFile = nil + return nil +} + +// HeapProfile generates a heap profile for the sentry. +func (p *Profile) HeapProfile(o *ProfileOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errNoOutput + } + output := o.FilePayload.Files[0] + defer output.Close() + runtime.GC() // Get up-to-date statistics. + if err := pprof.WriteHeapProfile(output); err != nil { + return err + } + return nil +} + +// GoroutineProfile is an RPC stub which dumps out the stack trace for all +// running goroutines. +func (p *Profile) GoroutineProfile(o *ProfileOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errNoOutput + } + output := o.FilePayload.Files[0] + defer output.Close() + if err := pprof.Lookup("goroutine").WriteTo(output, 2); err != nil { + return err + } + return nil +} + +// BlockProfile is an RPC stub which dumps out the stack trace that led to +// blocking on synchronization primitives. +func (p *Profile) BlockProfile(o *ProfileOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errNoOutput + } + output := o.FilePayload.Files[0] + defer output.Close() + if err := pprof.Lookup("block").WriteTo(output, 0); err != nil { + return err + } + return nil +} + +// MutexProfile is an RPC stub which dumps out the stack trace of holders of +// contended mutexes. +func (p *Profile) MutexProfile(o *ProfileOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errNoOutput + } + output := o.FilePayload.Files[0] + defer output.Close() + if err := pprof.Lookup("mutex").WriteTo(output, 0); err != nil { + return err + } + return nil +} + +// StartTrace is an RPC stub which starts collection of an execution trace. +func (p *Profile) StartTrace(o *ProfileOpts, _ *struct{}) error { + if len(o.FilePayload.Files) < 1 { + return errNoOutput + } + + output, err := fd.NewFromFile(o.FilePayload.Files[0]) + if err != nil { + return err + } + + p.mu.Lock() + defer p.mu.Unlock() + + // Returns an error if profiling is already started. + if err := trace.Start(output); err != nil { + output.Close() + return err + } + + // Ensure all trace contexts are registered. + p.Kernel.RebuildTraceContexts() + + p.traceFile = output + return nil +} + +// StopTrace is an RPC stub which stops collection of an ongoing execution +// trace and flushes the trace data. It takes no argument. +func (p *Profile) StopTrace(_, _ *struct{}) error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.traceFile == nil { + return errors.New("Execution tracing not started") + } + + // Similarly to the case above, if tasks have not ended traces, we will + // lose information. Thus we need to rebuild the tasks in order to have + // complete information. This will not lose information if multiple + // traces are overlapping. + p.Kernel.RebuildTraceContexts() + + trace.Stop() + p.traceFile.Close() + p.traceFile = nil + return nil +} diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go new file mode 100644 index 000000000..1bae7cfaf --- /dev/null +++ b/pkg/sentry/control/proc.go @@ -0,0 +1,416 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "bytes" + "encoding/json" + "fmt" + "sort" + "strings" + "text/tabwriter" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/fdimport" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/host" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + hostvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/host" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/urpc" +) + +// Proc includes task-related functions. +// +// At the moment, this is limited to exec support. +type Proc struct { + Kernel *kernel.Kernel +} + +// ExecArgs is the set of arguments to exec. +type ExecArgs struct { + // Filename is the filename to load. + // + // If this is provided as "", then the file will be guessed via Argv[0]. + Filename string `json:"filename"` + + // Argv is a list of arguments. + Argv []string `json:"argv"` + + // Envv is a list of environment variables. + Envv []string `json:"envv"` + + // MountNamespace is the mount namespace to execute the new process in. + // A reference on MountNamespace must be held for the lifetime of the + // ExecArgs. If MountNamespace is nil, it will default to the init + // process's MountNamespace. + MountNamespace *fs.MountNamespace + + // MountNamespaceVFS2 is the mount namespace to execute the new process in. + // A reference on MountNamespace must be held for the lifetime of the + // ExecArgs. If MountNamespace is nil, it will default to the init + // process's MountNamespace. + MountNamespaceVFS2 *vfs.MountNamespace + + // WorkingDirectory defines the working directory for the new process. + WorkingDirectory string `json:"wd"` + + // KUID is the UID to run with in the root user namespace. Defaults to + // root if not set explicitly. + KUID auth.KUID + + // KGID is the GID to run with in the root user namespace. Defaults to + // the root group if not set explicitly. + KGID auth.KGID + + // ExtraKGIDs is the list of additional groups to which the user belongs. + ExtraKGIDs []auth.KGID + + // Capabilities is the list of capabilities to give to the process. + Capabilities *auth.TaskCapabilities + + // StdioIsPty indicates that FDs 0, 1, and 2 are connected to a host pty FD. + StdioIsPty bool + + // FilePayload determines the files to give to the new process. + urpc.FilePayload + + // ContainerID is the container for the process being executed. + ContainerID string + + // PIDNamespace is the pid namespace for the process being executed. + PIDNamespace *kernel.PIDNamespace +} + +// String prints the arguments as a string. +func (args ExecArgs) String() string { + if len(args.Argv) == 0 { + return args.Filename + } + a := make([]string, len(args.Argv)) + copy(a, args.Argv) + if args.Filename != "" { + a[0] = args.Filename + } + return strings.Join(a, " ") +} + +// Exec runs a new task. +func (proc *Proc) Exec(args *ExecArgs, waitStatus *uint32) error { + newTG, _, _, _, err := proc.execAsync(args) + if err != nil { + return err + } + + // Wait for completion. + newTG.WaitExited() + *waitStatus = newTG.ExitStatus().Status() + return nil +} + +// ExecAsync runs a new task, but doesn't wait for it to finish. It is defined +// as a function rather than a method to avoid exposing execAsync as an RPC. +func ExecAsync(proc *Proc, args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { + return proc.execAsync(args) +} + +// execAsync runs a new task, but doesn't wait for it to finish. It returns the +// newly created thread group and its PID. If the stdio FDs are TTYs, then a +// TTYFileOperations that wraps the TTY is also returned. +func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadID, *host.TTYFileOperations, *hostvfs2.TTYFileDescription, error) { + // Import file descriptors. + fdTable := proc.Kernel.NewFDTable() + defer fdTable.DecRef() + + creds := auth.NewUserCredentials( + args.KUID, + args.KGID, + args.ExtraKGIDs, + args.Capabilities, + proc.Kernel.RootUserNamespace()) + + initArgs := kernel.CreateProcessArgs{ + Filename: args.Filename, + Argv: args.Argv, + Envv: args.Envv, + WorkingDirectory: args.WorkingDirectory, + MountNamespace: args.MountNamespace, + MountNamespaceVFS2: args.MountNamespaceVFS2, + Credentials: creds, + FDTable: fdTable, + Umask: 0022, + Limits: limits.NewLimitSet(), + MaxSymlinkTraversals: linux.MaxSymlinkTraversals, + UTSNamespace: proc.Kernel.RootUTSNamespace(), + IPCNamespace: proc.Kernel.RootIPCNamespace(), + AbstractSocketNamespace: proc.Kernel.RootAbstractSocketNamespace(), + ContainerID: args.ContainerID, + PIDNamespace: args.PIDNamespace, + } + if initArgs.MountNamespace != nil { + // initArgs must hold a reference on MountNamespace, which will + // be donated to the new process in CreateProcess. + initArgs.MountNamespace.IncRef() + } + if initArgs.MountNamespaceVFS2 != nil { + // initArgs must hold a reference on MountNamespaceVFS2, which will + // be donated to the new process in CreateProcess. + initArgs.MountNamespaceVFS2.IncRef() + } + ctx := initArgs.NewContext(proc.Kernel) + + if kernel.VFS2Enabled { + // Get the full path to the filename from the PATH env variable. + if initArgs.MountNamespaceVFS2 == nil { + // Set initArgs so that 'ctx' returns the namespace. + // + // MountNamespaceVFS2 adds a reference to the namespace, which is + // transferred to the new process. + initArgs.MountNamespaceVFS2 = proc.Kernel.GlobalInit().Leader().MountNamespaceVFS2() + } + } else { + if initArgs.MountNamespace == nil { + // Set initArgs so that 'ctx' returns the namespace. + initArgs.MountNamespace = proc.Kernel.GlobalInit().Leader().MountNamespace() + + // initArgs must hold a reference on MountNamespace, which will + // be donated to the new process in CreateProcess. + initArgs.MountNamespace.IncRef() + } + } + resolved, err := user.ResolveExecutablePath(ctx, &initArgs) + if err != nil { + return nil, 0, nil, nil, err + } + initArgs.Filename = resolved + + fds := make([]int, len(args.FilePayload.Files)) + for i, file := range args.FilePayload.Files { + if kernel.VFS2Enabled { + // Need to dup to remove ownership from os.File. + dup, err := unix.Dup(int(file.Fd())) + if err != nil { + return nil, 0, nil, nil, fmt.Errorf("duplicating payload files: %w", err) + } + fds[i] = dup + } else { + // VFS1 dups the file on import. + fds[i] = int(file.Fd()) + } + } + ttyFile, ttyFileVFS2, err := fdimport.Import(ctx, fdTable, args.StdioIsPty, fds) + if err != nil { + if kernel.VFS2Enabled { + for _, fd := range fds { + unix.Close(fd) + } + } + return nil, 0, nil, nil, err + } + + tg, tid, err := proc.Kernel.CreateProcess(initArgs) + if err != nil { + return nil, 0, nil, nil, err + } + + // Set the foreground process group on the TTY before starting the process. + switch { + case ttyFile != nil: + ttyFile.InitForegroundProcessGroup(tg.ProcessGroup()) + case ttyFileVFS2 != nil: + ttyFileVFS2.InitForegroundProcessGroup(tg.ProcessGroup()) + } + + // Start the newly created process. + proc.Kernel.StartProcess(tg) + + return tg, tid, ttyFile, ttyFileVFS2, nil +} + +// PsArgs is the set of arguments to ps. +type PsArgs struct { + // JSON will force calls to Ps to return the result as a JSON payload. + JSON bool +} + +// Ps provides a process listing for the running kernel. +func (proc *Proc) Ps(args *PsArgs, out *string) error { + var p []*Process + if e := Processes(proc.Kernel, "", &p); e != nil { + return e + } + if !args.JSON { + *out = ProcessListToTable(p) + } else { + s, e := ProcessListToJSON(p) + if e != nil { + return e + } + *out = s + } + return nil +} + +// Process contains information about a single process in a Sandbox. +type Process struct { + UID auth.KUID `json:"uid"` + PID kernel.ThreadID `json:"pid"` + // Parent PID + PPID kernel.ThreadID `json:"ppid"` + Threads []kernel.ThreadID `json:"threads"` + // Processor utilization + C int32 `json:"c"` + // TTY name of the process. Will be of the form "pts/N" if there is a + // TTY, or "?" if there is not. + TTY string `json:"tty"` + // Start time + STime string `json:"stime"` + // CPU time + Time string `json:"time"` + // Executable shortname (e.g. "sh" for /bin/sh) + Cmd string `json:"cmd"` +} + +// ProcessListToTable prints a table with the following format: +// UID PID PPID C TTY STIME TIME CMD +// 0 1 0 0 pty/4 14:04 505262ns tail +func ProcessListToTable(pl []*Process) string { + var buf bytes.Buffer + tw := tabwriter.NewWriter(&buf, 10, 1, 3, ' ', 0) + fmt.Fprint(tw, "UID\tPID\tPPID\tC\tTTY\tSTIME\tTIME\tCMD") + for _, d := range pl { + fmt.Fprintf(tw, "\n%d\t%d\t%d\t%d\t%s\t%s\t%s\t%s", + d.UID, + d.PID, + d.PPID, + d.C, + d.TTY, + d.STime, + d.Time, + d.Cmd) + } + tw.Flush() + return buf.String() +} + +// ProcessListToJSON will return the JSON representation of ps. +func ProcessListToJSON(pl []*Process) (string, error) { + b, err := json.MarshalIndent(pl, "", " ") + if err != nil { + return "", fmt.Errorf("couldn't marshal process list %v: %v", pl, err) + } + return string(b), nil +} + +// PrintPIDsJSON prints a JSON object containing only the PIDs in pl. This +// behavior is the same as runc's. +func PrintPIDsJSON(pl []*Process) (string, error) { + pids := make([]kernel.ThreadID, 0, len(pl)) + for _, d := range pl { + pids = append(pids, d.PID) + } + b, err := json.Marshal(pids) + if err != nil { + return "", fmt.Errorf("couldn't marshal PIDs %v: %v", pids, err) + } + return string(b), nil +} + +// Processes retrieves information about processes running in the sandbox with +// the given container id. All processes are returned if 'containerID' is empty. +func Processes(k *kernel.Kernel, containerID string, out *[]*Process) error { + ts := k.TaskSet() + now := k.RealtimeClock().Now() + for _, tg := range ts.Root.ThreadGroups() { + pidns := tg.PIDNamespace() + pid := pidns.IDOfThreadGroup(tg) + + // If tg has already been reaped ignore it. + if pid == 0 { + continue + } + if containerID != "" && containerID != tg.Leader().ContainerID() { + continue + } + + ppid := kernel.ThreadID(0) + if p := tg.Leader().Parent(); p != nil { + ppid = pidns.IDOfThreadGroup(p.ThreadGroup()) + } + threads := tg.MemberIDs(pidns) + *out = append(*out, &Process{ + UID: tg.Leader().Credentials().EffectiveKUID, + PID: pid, + PPID: ppid, + Threads: threads, + STime: formatStartTime(now, tg.Leader().StartTime()), + C: percentCPU(tg.CPUStats(), tg.Leader().StartTime(), now), + Time: tg.CPUStats().SysTime.String(), + Cmd: tg.Leader().Name(), + TTY: ttyName(tg.TTY()), + }) + } + sort.Slice(*out, func(i, j int) bool { return (*out)[i].PID < (*out)[j].PID }) + return nil +} + +// formatStartTime formats startTime depending on the current time: +// - If startTime was today, HH:MM is used. +// - If startTime was not today but was this year, MonDD is used (e.g. Jan02) +// - If startTime was not this year, the year is used. +func formatStartTime(now, startTime ktime.Time) string { + nowS, nowNs := now.Unix() + n := time.Unix(nowS, nowNs) + startTimeS, startTimeNs := startTime.Unix() + st := time.Unix(startTimeS, startTimeNs) + format := "15:04" + if st.YearDay() != n.YearDay() { + format = "Jan02" + } + if st.Year() != n.Year() { + format = "2006" + } + return st.Format(format) +} + +func percentCPU(stats usage.CPUStats, startTime, now ktime.Time) int32 { + // Note: In procps, there is an option to include child CPU stats. As + // it is disabled by default, we do not include them. + total := stats.UserTime + stats.SysTime + lifetime := now.Sub(startTime) + if lifetime <= 0 { + return 0 + } + percentCPU := total * 100 / lifetime + // Cap at 99% since procps does the same. + if percentCPU > 99 { + percentCPU = 99 + } + return int32(percentCPU) +} + +func ttyName(tty *kernel.TTY) string { + if tty == nil { + return "?" + } + return fmt.Sprintf("pts/%d", tty.Index) +} diff --git a/pkg/sentry/control/proc_test.go b/pkg/sentry/control/proc_test.go new file mode 100644 index 000000000..0a88459b2 --- /dev/null +++ b/pkg/sentry/control/proc_test.go @@ -0,0 +1,166 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "testing" + + "gvisor.dev/gvisor/pkg/log" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usage" +) + +func init() { + log.SetLevel(log.Debug) +} + +// Tests that ProcessData.Table() prints with the correct format. +func TestProcessListTable(t *testing.T) { + testCases := []struct { + pl []*Process + expected string + }{ + { + pl: []*Process{}, + expected: "UID PID PPID C TTY STIME TIME CMD", + }, + { + pl: []*Process{ + { + UID: 0, + PID: 0, + PPID: 0, + C: 0, + TTY: "?", + STime: "0", + Time: "0", + Cmd: "zero", + }, + { + UID: 1, + PID: 1, + PPID: 1, + C: 1, + TTY: "pts/4", + STime: "1", + Time: "1", + Cmd: "one", + }, + }, + expected: `UID PID PPID C TTY STIME TIME CMD +0 0 0 0 ? 0 0 zero +1 1 1 1 pts/4 1 1 one`, + }, + } + + for _, tc := range testCases { + output := ProcessListToTable(tc.pl) + + if tc.expected != output { + t.Errorf("PrintTable(%v): got:\n%s\nwant:\n%s", tc.pl, output, tc.expected) + } + } +} + +func TestProcessListJSON(t *testing.T) { + testCases := []struct { + pl []*Process + expected string + }{ + { + pl: []*Process{}, + expected: "[]", + }, + { + pl: []*Process{ + { + UID: 0, + PID: 0, + PPID: 0, + C: 0, + STime: "0", + Time: "0", + Cmd: "zero", + }, + { + UID: 1, + PID: 1, + PPID: 1, + C: 1, + STime: "1", + Time: "1", + Cmd: "one", + }, + }, + expected: "[0,1]", + }, + } + + for _, tc := range testCases { + output, err := PrintPIDsJSON(tc.pl) + if err != nil { + t.Errorf("failed to generate JSON: %v", err) + } + + if tc.expected != output { + t.Errorf("PrintJSON(%v): got:\n%s\nwant:\n%s", tc.pl, output, tc.expected) + } + } +} + +func TestPercentCPU(t *testing.T) { + testCases := []struct { + stats usage.CPUStats + startTime ktime.Time + now ktime.Time + expected int32 + }{ + { + // Verify that 100% use is capped at 99. + stats: usage.CPUStats{UserTime: 1e9, SysTime: 1e9}, + startTime: ktime.FromNanoseconds(7e9), + now: ktime.FromNanoseconds(9e9), + expected: 99, + }, + { + // Verify that if usage > lifetime, we get at most 99% + // usage. + stats: usage.CPUStats{UserTime: 2e9, SysTime: 2e9}, + startTime: ktime.FromNanoseconds(7e9), + now: ktime.FromNanoseconds(9e9), + expected: 99, + }, + { + // Verify that 50% usage is reported correctly. + stats: usage.CPUStats{UserTime: 1e9, SysTime: 1e9}, + startTime: ktime.FromNanoseconds(12e9), + now: ktime.FromNanoseconds(16e9), + expected: 50, + }, + { + // Verify that 0% usage is reported correctly. + stats: usage.CPUStats{UserTime: 0, SysTime: 0}, + startTime: ktime.FromNanoseconds(12e9), + now: ktime.FromNanoseconds(14e9), + expected: 0, + }, + } + + for _, tc := range testCases { + if pcpu := percentCPU(tc.stats, tc.startTime, tc.now); pcpu != tc.expected { + t.Errorf("percentCPU(%v, %v, %v): got %d, want %d", tc.stats, tc.startTime, tc.now, pcpu, tc.expected) + } + } +} diff --git a/pkg/sentry/control/state.go b/pkg/sentry/control/state.go new file mode 100644 index 000000000..41feeffe3 --- /dev/null +++ b/pkg/sentry/control/state.go @@ -0,0 +1,73 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package control + +import ( + "errors" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/state" + "gvisor.dev/gvisor/pkg/sentry/watchdog" + "gvisor.dev/gvisor/pkg/urpc" +) + +// ErrInvalidFiles is returned when the urpc call to Save does not include an +// appropriate file payload (e.g. there is no output file!). +var ErrInvalidFiles = errors.New("exactly one file must be provided") + +// State includes state-related functions. +type State struct { + Kernel *kernel.Kernel + Watchdog *watchdog.Watchdog +} + +// SaveOpts contains options for the Save RPC call. +type SaveOpts struct { + // Key is used for state integrity check. + Key []byte `json:"key"` + + // Metadata is the set of metadata to prepend to the state file. + Metadata map[string]string `json:"metadata"` + + // FilePayload contains the destination for the state. + urpc.FilePayload +} + +// Save saves the running system. +func (s *State) Save(o *SaveOpts, _ *struct{}) error { + // Create an output stream. + if len(o.FilePayload.Files) != 1 { + return ErrInvalidFiles + } + defer o.FilePayload.Files[0].Close() + + // Save to the first provided stream. + saveOpts := state.SaveOpts{ + Destination: o.FilePayload.Files[0], + Key: o.Key, + Metadata: o.Metadata, + Callback: func(err error) { + if err == nil { + log.Infof("Save succeeded: exiting...") + } else { + log.Warningf("Save failed: exiting...") + s.Kernel.SetSaveError(err) + } + s.Kernel.Kill(kernel.ExitStatus{}) + }, + } + return saveOpts.Save(s.Kernel, s.Watchdog) +} |