diff options
author | Michael Pratt <mpratt@google.com> | 2021-09-13 18:07:29 -0400 |
---|---|---|
committer | Michael Pratt <mpratt@google.com> | 2021-09-16 11:20:37 -0400 |
commit | bd296e799bd3eceaa3c3f3db1227f9dba62bb1a1 (patch) | |
tree | 94dd1cd89df012d7159b0922e5f2f53914e73e8a | |
parent | 9bdeddd8826f98251584567b4157c63b297b7825 (diff) |
runsc: add global profile collection flags
Add global flags -profile-{block,cpu,heap,mutex} and -trace which
enable collection of the specified profile for the entire duration of a
container execution. This provides a way to definitively start profiling
before that application starts, rather than attempting to race with an
out-of-band `runsc debug`.
Note that only the main boot process is profiled.
This exposed a bug in Task.traceExecEvent: a crash when tracing and
-race are enabled. traceExecEvent is called off of the task goroutine,
but uses the Task as a context, which is a violation of the Task
contract. Switching to the AsyncContext fixes the issue.
Fixes #220
-rw-r--r-- | pkg/sentry/control/pprof.go | 30 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_log.go | 6 | ||||
-rw-r--r-- | runsc/boot/BUILD | 1 | ||||
-rw-r--r-- | runsc/boot/loader.go | 36 | ||||
-rw-r--r-- | runsc/boot/profile.go | 95 | ||||
-rw-r--r-- | runsc/cmd/boot.go | 50 | ||||
-rw-r--r-- | runsc/cmd/debug.go | 7 | ||||
-rw-r--r-- | runsc/config/config.go | 35 | ||||
-rw-r--r-- | runsc/config/flags.go | 5 | ||||
-rw-r--r-- | runsc/container/container_test.go | 43 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 55 |
11 files changed, 337 insertions, 26 deletions
diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go index 2f3664c57..f721b7236 100644 --- a/pkg/sentry/control/pprof.go +++ b/pkg/sentry/control/pprof.go @@ -26,6 +26,23 @@ import ( "gvisor.dev/gvisor/pkg/urpc" ) +const ( + // DefaultBlockProfileRate is the default profiling rate for block + // profiles. + // + // The default here is 10%, which will record a stacktrace 10% of the + // time when blocking occurs. Since these events should not be super + // frequent, we expect this to achieve a reasonable balance between + // collecting the data we need and imposing a high performance cost + // (e.g. skewing even the CPU profile). + DefaultBlockProfileRate = 10 + + // DefaultMutexProfileRate is the default profiling rate for mutex + // profiles. Like the block rate above, we use a default rate of 10% + // for the same reasons. + DefaultMutexProfileRate = 10 +) + // Profile includes profile-related RPC stubs. It provides a way to // control the built-in runtime profiling facilities. // @@ -175,12 +192,8 @@ func (p *Profile) Block(o *BlockProfileOpts, _ *struct{}) error { defer p.blockMu.Unlock() // Always set the rate. We then wait to collect a profile at this rate, - // and disable when we're done. Note that the default here is 10%, which - // will record a stacktrace 10% of the time when blocking occurs. Since - // these events should not be super frequent, we expect this to achieve - // a reasonable balance between collecting the data we need and imposing - // a high performance cost (e.g. skewing even the CPU profile). - rate := 10 + // and disable when we're done. + rate := DefaultBlockProfileRate if o.Rate != 0 { rate = o.Rate } @@ -220,9 +233,8 @@ func (p *Profile) Mutex(o *MutexProfileOpts, _ *struct{}) error { p.mutexMu.Lock() defer p.mutexMu.Unlock() - // Always set the fraction. Like the block rate above, we use - // a default rate of 10% for the same reasons. - fraction := 10 + // Always set the fraction. + fraction := DefaultMutexProfileRate if o.Fraction != 0 { fraction = o.Fraction } diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go index 8de08151a..c5b099559 100644 --- a/pkg/sentry/kernel/task_log.go +++ b/pkg/sentry/kernel/task_log.go @@ -249,5 +249,9 @@ func (t *Task) traceExecEvent(image *TaskImage) { return } defer file.DecRef(t) - trace.Logf(t.traceContext, traceCategory, "exec: %s", file.PathnameWithDeleted(t)) + + // traceExecEvent function may be called before the task goroutine + // starts, so we must use the async context. + name := file.PathnameWithDeleted(t.AsyncContext()) + trace.Logf(t.traceContext, traceCategory, "exec: %s", name) } diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index b8585c1e9..ff7a5a44b 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -15,6 +15,7 @@ go_library( "limits.go", "loader.go", "network.go", + "profile.go", "strace.go", "vfs.go", ], diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 1dd0048ac..b46d84e5a 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -120,6 +120,10 @@ type Loader struct { // container. It should be called when a sandbox is destroyed. stopSignalForwarding func() + // stopProfiling stops profiling started at container creation. It + // should be called when a sandbox is destroyed. + stopProfiling func() + // restore is set to true if we are restoring a container. restore bool @@ -199,6 +203,21 @@ type Args struct { TotalMem uint64 // UserLogFD is the file descriptor to write user logs to. UserLogFD int + // ProfileBlockFD is the file descriptor to write a block profile to. + // Valid if >=0. + ProfileBlockFD int + // ProfileCPUFD is the file descriptor to write a CPU profile to. + // Valid if >=0. + ProfileCPUFD int + // ProfileHeapFD is the file descriptor to write a heap profile to. + // Valid if >=0. + ProfileHeapFD int + // ProfileMutexFD is the file descriptor to write a mutex profile to. + // Valid if >=0. + ProfileMutexFD int + // TraceFD is the file descriptor to write a Go execution trace to. + // Valid if >=0. + TraceFD int } // make sure stdioFDs are always the same on initial start and on restore @@ -207,6 +226,8 @@ const startingStdioFD = 256 // New initializes a new kernel loader configured by spec. // New also handles setting up a kernel for restoring a container. func New(args Args) (*Loader, error) { + stopProfiling := startProfiling(args) + // We initialize the rand package now to make sure /dev/urandom is pre-opened // on kernels that do not support getrandom(2). if err := rand.Init(); err != nil { @@ -400,12 +421,13 @@ func New(args Args) (*Loader, error) { eid := execID{cid: args.ID} l := &Loader{ - k: k, - watchdog: dog, - sandboxID: args.ID, - processes: map[execID]*execProcess{eid: {}}, - mountHints: mountHints, - root: info, + k: k, + watchdog: dog, + sandboxID: args.ID, + processes: map[execID]*execProcess{eid: {}}, + mountHints: mountHints, + root: info, + stopProfiling: stopProfiling, } // We don't care about child signals; some platforms can generate a @@ -498,6 +520,8 @@ func (l *Loader) Destroy() { for _, f := range l.root.goferFDs { _ = f.Close() } + + l.stopProfiling() } func createPlatform(conf *config.Config, deviceFile *os.File) (platform.Platform, error) { diff --git a/runsc/boot/profile.go b/runsc/boot/profile.go new file mode 100644 index 000000000..3ecd3e532 --- /dev/null +++ b/runsc/boot/profile.go @@ -0,0 +1,95 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package boot + +import ( + "os" + "runtime" + "runtime/pprof" + "runtime/trace" + + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/control" +) + +// startProfiling initiates profiling as defined by the ProfileConfig, and +// returns a function that should be called to stop profiling. +func startProfiling(args Args) func() { + var onStopProfiling []func() + stopProfiling := func() { + for _, f := range onStopProfiling { + f() + } + } + + if args.ProfileBlockFD >= 0 { + file := os.NewFile(uintptr(args.ProfileBlockFD), "profile-block") + + runtime.SetBlockProfileRate(control.DefaultBlockProfileRate) + onStopProfiling = append(onStopProfiling, func() { + if err := pprof.Lookup("block").WriteTo(file, 0); err != nil { + log.Warningf("Error writing block profile: %v", err) + } + file.Close() + runtime.SetBlockProfileRate(0) + }) + } + + if args.ProfileCPUFD >= 0 { + file := os.NewFile(uintptr(args.ProfileCPUFD), "profile-cpu") + + pprof.StartCPUProfile(file) + onStopProfiling = append(onStopProfiling, func() { + pprof.StopCPUProfile() + file.Close() + }) + } + + if args.ProfileHeapFD >= 0 { + file := os.NewFile(uintptr(args.ProfileHeapFD), "profile-heap") + + onStopProfiling = append(onStopProfiling, func() { + if err := pprof.Lookup("heap").WriteTo(file, 0); err != nil { + log.Warningf("Error writing heap profile: %v", err) + } + file.Close() + }) + } + + if args.ProfileMutexFD >= 0 { + file := os.NewFile(uintptr(args.ProfileMutexFD), "profile-mutex") + + prev := runtime.SetMutexProfileFraction(control.DefaultMutexProfileRate) + onStopProfiling = append(onStopProfiling, func() { + if err := pprof.Lookup("mutex").WriteTo(file, 0); err != nil { + log.Warningf("Error writing mutex profile: %v", err) + } + file.Close() + runtime.SetMutexProfileFraction(prev) + }) + } + + if args.TraceFD >= 0 { + file := os.NewFile(uintptr(args.TraceFD), "trace") + + trace.Start(file) + onStopProfiling = append(onStopProfiling, func() { + trace.Stop() + file.Close() + }) + } + + return stopProfiling +} diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index f5c9821b2..e33a7f3cb 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -79,6 +79,26 @@ type Boot struct { // sandbox (e.g. gofer) and sent through this FD. mountsFD int + // profileBlockFD is the file descriptor to write a block profile to. + // Valid if >= 0. + profileBlockFD int + + // profileCPUFD is the file descriptor to write a CPU profile to. + // Valid if >= 0. + profileCPUFD int + + // profileHeapFD is the file descriptor to write a heap profile to. + // Valid if >= 0. + profileHeapFD int + + // profileMutexFD is the file descriptor to write a mutex profile to. + // Valid if >= 0. + profileMutexFD int + + // traceFD is the file descriptor to write a Go execution trace to. + // Valid if >= 0. + traceFD int + // pidns is set if the sandbox is in its own pid namespace. pidns bool @@ -119,6 +139,11 @@ func (b *Boot) SetFlags(f *flag.FlagSet) { f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.") f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup") f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).") + f.IntVar(&b.profileBlockFD, "profile-block-fd", -1, "file descriptor to write block profile to. -1 disables profiling.") + f.IntVar(&b.profileCPUFD, "profile-cpu-fd", -1, "file descriptor to write CPU profile to. -1 disables profiling.") + f.IntVar(&b.profileHeapFD, "profile-heap-fd", -1, "file descriptor to write heap profile to. -1 disables profiling.") + f.IntVar(&b.profileMutexFD, "profile-mutex-fd", -1, "file descriptor to write mutex profile to. -1 disables profiling.") + f.IntVar(&b.traceFD, "trace-fd", -1, "file descriptor to write Go execution trace to. -1 disables tracing.") f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates") } @@ -213,16 +238,21 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Create the loader. bootArgs := boot.Args{ - ID: f.Arg(0), - Spec: spec, - Conf: conf, - ControllerFD: b.controllerFD, - Device: os.NewFile(uintptr(b.deviceFD), "platform device"), - GoferFDs: b.ioFDs.GetArray(), - StdioFDs: b.stdioFDs.GetArray(), - NumCPU: b.cpuNum, - TotalMem: b.totalMem, - UserLogFD: b.userLogFD, + ID: f.Arg(0), + Spec: spec, + Conf: conf, + ControllerFD: b.controllerFD, + Device: os.NewFile(uintptr(b.deviceFD), "platform device"), + GoferFDs: b.ioFDs.GetArray(), + StdioFDs: b.stdioFDs.GetArray(), + NumCPU: b.cpuNum, + TotalMem: b.totalMem, + UserLogFD: b.userLogFD, + ProfileBlockFD: b.profileBlockFD, + ProfileCPUFD: b.profileCPUFD, + ProfileHeapFD: b.profileHeapFD, + ProfileMutexFD: b.profileMutexFD, + TraceFD: b.traceFD, } l, err := boot.New(bootArgs) if err != nil { diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index b2137ef46..318753728 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -90,6 +90,13 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) var c *container.Container conf := args[0].(*config.Config) + if conf.ProfileBlock != "" || conf.ProfileCPU != "" || conf.ProfileHeap != "" || conf.ProfileMutex != "" { + return Errorf("global -profile-{block,cpu,heap,mutex} flags have no effect on runsc debug. Pass runsc debug -profile-{block,cpu,heap,mutex} instead") + } + if conf.TraceFile != "" { + return Errorf("global -trace flag has no effect on runsc debug. Pass runsc debug -trace instead") + } + if d.pid == 0 { // No pid, container ID must have been provided. if f.NArg() != 1 { diff --git a/runsc/config/config.go b/runsc/config/config.go index a230baa29..a562f7bf4 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -140,6 +140,26 @@ type Config struct { // ProfileEnable is set to prepare the sandbox to be profiled. ProfileEnable bool `flag:"profile"` + // ProfileBlock collects a block profile to the passed file for the + // duration of the container execution. Requires ProfileEnabled. + ProfileBlock string `flag:"profile-block"` + + // ProfileCPU collects a CPU profile to the passed file for the + // duration of the container execution. Requires ProfileEnabled. + ProfileCPU string `flag:"profile-cpu"` + + // ProfileHeap collects a heap profile to the passed file for the + // duration of the container execution. Requires ProfileEnabled. + ProfileHeap string `flag:"profile-heap"` + + // ProfileMutex collects a mutex profile to the passed file for the + // duration of the container execution. Requires ProfileEnabled. + ProfileMutex string `flag:"profile-mutex"` + + // TraceFile collects a Go runtime execution trace to the passed file + // for the duration of the container execution. + TraceFile string `flag:"trace"` + // Controls defines the controls that may be enabled. Controls controlConfig `flag:"controls"` @@ -207,6 +227,21 @@ func (c *Config) validate() error { if c.NumNetworkChannels <= 0 { return fmt.Errorf("num_network_channels must be > 0, got: %d", c.NumNetworkChannels) } + // Require profile flags to explicitly opt-in to profiling with + // -profile rather than implying it since these options have security + // implications. + if c.ProfileBlock != "" && !c.ProfileEnable { + return fmt.Errorf("profile-block flag requires enabling profiling with profile flag") + } + if c.ProfileCPU != "" && !c.ProfileEnable { + return fmt.Errorf("profile-cpu flag requires enabling profiling with profile flag") + } + if c.ProfileHeap != "" && !c.ProfileEnable { + return fmt.Errorf("profile-heap flag requires enabling profiling with profile flag") + } + if c.ProfileMutex != "" && !c.ProfileEnable { + return fmt.Errorf("profile-mutex flag requires enabling profiling with profile flag") + } return nil } diff --git a/runsc/config/flags.go b/runsc/config/flags.go index cc5aba474..1bf23951a 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -63,6 +63,11 @@ func RegisterFlags() { flag.Var(watchdogActionPtr(watchdog.LogWarning), "watchdog-action", "sets what action the watchdog takes when triggered: log (default), panic.") flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") + flag.String("profile-block", "", "collects a block profile to this file path for the duration of the container execution. Requires -profile=true.") + flag.String("profile-cpu", "", "collects a CPU profile to this file path for the duration of the container execution. Requires -profile=true.") + flag.String("profile-heap", "", "collects a heap profile to this file path for the duration of the container execution. Requires -profile=true.") + flag.String("profile-mutex", "", "collects a mutex profile to this file path for the duration of the container execution. Requires -profile=true.") + flag.String("trace", "", "collects a Go runtime execution trace to this file path for the duration of the container execution.") flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.") flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.") flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)") diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 681f5c1a9..69dcf3f03 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -2829,3 +2829,46 @@ func TestStream(t *testing.T) { t.Errorf("out got %s, want include %s", buf, want) } } + +// TestProfile checks that profiling options generate profiles. +func TestProfile(t *testing.T) { + // Perform a non-trivial amount of work so we actually capture + // something in the profiles. + spec := testutil.NewSpecWithArgs("/bin/bash", "-c", "true") + conf := testutil.TestConfig(t) + conf.ProfileEnable = true + conf.ProfileBlock = filepath.Join(t.TempDir(), "block.pprof") + conf.ProfileCPU = filepath.Join(t.TempDir(), "cpu.pprof") + conf.ProfileHeap = filepath.Join(t.TempDir(), "heap.pprof") + conf.ProfileMutex = filepath.Join(t.TempDir(), "mutex.pprof") + conf.TraceFile = filepath.Join(t.TempDir(), "trace.out") + + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + Attached: true, + } + + _, err = Run(conf, args) + if err != nil { + t.Fatalf("Creating container: %v", err) + } + + // Basic test; simply assert that the profiles are not empty. + for _, name := range []string{conf.ProfileBlock, conf.ProfileCPU, conf.ProfileHeap, conf.ProfileMutex, conf.TraceFile} { + fi, err := os.Stat(name) + if err != nil { + t.Fatalf("Unable to stat profile file %s: %v", name, err) + } + if fi.Size() == 0 { + t.Errorf("Profile file %s is empty: %+v", name, fi) + } + } +} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 9fbce6bd6..f4a37cedc 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -490,6 +490,61 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn cmd.Args = append(cmd.Args, "--start-sync-fd="+strconv.Itoa(nextFD)) nextFD++ + if conf.ProfileBlock != "" { + blockFile, err := os.OpenFile(conf.ProfileBlock, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening block profiling file %q: %v", conf.ProfileBlock, err) + } + defer blockFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, blockFile) + cmd.Args = append(cmd.Args, "--profile-block-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + + if conf.ProfileCPU != "" { + cpuFile, err := os.OpenFile(conf.ProfileCPU, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening cpu profiling file %q: %v", conf.ProfileCPU, err) + } + defer cpuFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, cpuFile) + cmd.Args = append(cmd.Args, "--profile-cpu-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + + if conf.ProfileHeap != "" { + heapFile, err := os.OpenFile(conf.ProfileHeap, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening heap profiling file %q: %v", conf.ProfileHeap, err) + } + defer heapFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, heapFile) + cmd.Args = append(cmd.Args, "--profile-heap-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + + if conf.ProfileMutex != "" { + mutexFile, err := os.OpenFile(conf.ProfileMutex, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening mutex profiling file %q: %v", conf.ProfileMutex, err) + } + defer mutexFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, mutexFile) + cmd.Args = append(cmd.Args, "--profile-mutex-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + + if conf.TraceFile != "" { + traceFile, err := os.OpenFile(conf.TraceFile, os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("opening trace file %q: %v", conf.TraceFile, err) + } + defer traceFile.Close() + cmd.ExtraFiles = append(cmd.ExtraFiles, traceFile) + cmd.Args = append(cmd.Args, "--trace-fd="+strconv.Itoa(nextFD)) + nextFD++ + } + // If there is a gofer, sends all socket ends to the sandbox. for _, f := range args.IOFiles { defer f.Close() |