summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2021-01-05 21:33:03 +0000
committergVisor bot <gvisor-bot@google.com>2021-01-05 21:33:03 +0000
commitba74ccdec1f21b071355a4768d5fc0d5b036d33e (patch)
tree64ce8912999828dfd55101a17bc2f012a79f0599
parentcd29587db7a9b3cf298450778a9c335426b5fa00 (diff)
parentb06e5bc5b0913d3740b435d8753a2569220e0a33 (diff)
Merge release-20201208.0-109-gb06e5bc5b (automated)
-rw-r--r--pkg/control/server/server.go5
-rw-r--r--pkg/sentry/control/pprof.go56
-rw-r--r--pkg/urpc/urpc.go19
-rw-r--r--runsc/boot/controller.go16
-rw-r--r--runsc/cmd/debug.go83
-rw-r--r--runsc/sandbox/sandbox.go3
6 files changed, 126 insertions, 56 deletions
diff --git a/pkg/control/server/server.go b/pkg/control/server/server.go
index 41abe1f2d..629dae8f4 100644
--- a/pkg/control/server/server.go
+++ b/pkg/control/server/server.go
@@ -67,9 +67,10 @@ func (s *Server) Wait() {
// and the server should not be used afterwards.
func (s *Server) Stop() {
s.socket.Close()
- s.wg.Wait()
+ s.Wait()
- // This will cause existing clients to be terminated safely.
+ // This will cause existing clients to be terminated safely. If the
+ // registered handlers have a Stop callback, it will be called.
s.server.Stop()
}
diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go
index b78e29416..2f3664c57 100644
--- a/pkg/sentry/control/pprof.go
+++ b/pkg/sentry/control/pprof.go
@@ -50,17 +50,17 @@ type Profile struct {
done chan struct{}
}
-// NewProfile returns a new Profile object, and a stop callback.
-//
-// The stop callback should be used at most once.
-func NewProfile(k *kernel.Kernel) (*Profile, func()) {
- p := &Profile{
+// NewProfile returns a new Profile object.
+func NewProfile(k *kernel.Kernel) *Profile {
+ return &Profile{
kernel: k,
done: make(chan struct{}),
}
- return p, func() {
- close(p.done)
- }
+}
+
+// Stop implements urpc.Stopper.Stop.
+func (p *Profile) Stop() {
+ close(p.done)
}
// CPUProfileOpts contains options specifically for CPU profiles.
@@ -70,9 +70,6 @@ type CPUProfileOpts struct {
// Duration is the duration of the profile.
Duration time.Duration `json:"duration"`
-
- // Hz is the rate, which may be zero.
- Hz int `json:"hz"`
}
// CPU is an RPC stub which collects a CPU profile.
@@ -81,19 +78,13 @@ func (p *Profile) CPU(o *CPUProfileOpts, _ *struct{}) error {
return nil // Allowed.
}
- output, err := fd.NewFromFile(o.FilePayload.Files[0])
- if err != nil {
- return err
- }
+ output := o.FilePayload.Files[0]
defer output.Close()
p.cpuMu.Lock()
defer p.cpuMu.Unlock()
// Returns an error if profiling is already started.
- if o.Hz != 0 {
- runtime.SetCPUProfileRate(o.Hz)
- }
if err := pprof.StartCPUProfile(output); err != nil {
return err
}
@@ -112,6 +103,11 @@ func (p *Profile) CPU(o *CPUProfileOpts, _ *struct{}) error {
type HeapProfileOpts struct {
// FilePayload is the destination for the profiling output.
urpc.FilePayload
+
+ // Delay is the sleep time, similar to Duration. This may
+ // not affect the data collected however, as the heap will
+ // continue only the memory associated with the last alloc.
+ Delay time.Duration `json:"delay"`
}
// Heap generates a heap profile.
@@ -123,7 +119,16 @@ func (p *Profile) Heap(o *HeapProfileOpts, _ *struct{}) error {
output := o.FilePayload.Files[0]
defer output.Close()
- runtime.GC() // Get up-to-date statistics.
+ // Wait for the given delay.
+ select {
+ case <-time.After(o.Delay):
+ case <-p.done:
+ }
+
+ // Get up-to-date statistics.
+ runtime.GC()
+
+ // Write the given profile.
return pprof.WriteHeapProfile(output)
}
@@ -170,8 +175,12 @@ func (p *Profile) Block(o *BlockProfileOpts, _ *struct{}) error {
defer p.blockMu.Unlock()
// Always set the rate. We then wait to collect a profile at this rate,
- // and disable when we're done.
- rate := 1
+ // and disable when we're done. Note that the default here is 10%, which
+ // will record a stacktrace 10% of the time when blocking occurs. Since
+ // these events should not be super frequent, we expect this to achieve
+ // a reasonable balance between collecting the data we need and imposing
+ // a high performance cost (e.g. skewing even the CPU profile).
+ rate := 10
if o.Rate != 0 {
rate = o.Rate
}
@@ -211,8 +220,9 @@ func (p *Profile) Mutex(o *MutexProfileOpts, _ *struct{}) error {
p.mutexMu.Lock()
defer p.mutexMu.Unlock()
- // Always set the fraction.
- fraction := 1
+ // Always set the fraction. Like the block rate above, we use
+ // a default rate of 10% for the same reasons.
+ fraction := 10
if o.Fraction != 0 {
fraction = o.Fraction
}
diff --git a/pkg/urpc/urpc.go b/pkg/urpc/urpc.go
index dfd23032c..0e9a829f6 100644
--- a/pkg/urpc/urpc.go
+++ b/pkg/urpc/urpc.go
@@ -170,6 +170,9 @@ type Server struct {
// methods is the set of server methods.
methods map[string]registeredMethod
+ // stoppers are all registered stoppers.
+ stoppers []Stopper
+
// clients is a map of clients.
clients map[*unet.Socket]clientState
@@ -195,6 +198,12 @@ func NewServerWithCallback(afterRPCCallback func()) *Server {
}
}
+// Stopper is an optional interface, that when implemented, allows an object
+// to have a callback executed when the server is shutting down.
+type Stopper interface {
+ Stop()
+}
+
// Register registers the given object as an RPC receiver.
//
// This functions is the same way as the built-in RPC package, but it does not
@@ -206,6 +215,7 @@ func (s *Server) Register(obj interface{}) {
defer s.mu.Unlock()
typ := reflect.TypeOf(obj)
+ stopper, hasStop := obj.(Stopper)
// If we got a pointer, deref it to the underlying object. We need this to
// obtain the name of the underlying type.
@@ -221,6 +231,10 @@ func (s *Server) Register(obj interface{}) {
// Can't be anonymous.
panic("type not named.")
}
+ if hasStop && method.Name == "Stop" {
+ s.stoppers = append(s.stoppers, stopper)
+ continue // Legal stop method.
+ }
prettyName := typDeref.Name() + "." + method.Name
if _, ok := s.methods[prettyName]; ok {
@@ -448,6 +462,11 @@ func (s *Server) Stop() {
// Wait for all outstanding requests.
defer s.wg.Wait()
+ // Call any Stop callbacks.
+ for _, stopper := range s.stoppers {
+ stopper.Stop()
+ }
+
// Close all known clients.
s.mu.Lock()
defer s.mu.Unlock()
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 9008e1282..cb5d8ea31 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -129,14 +129,6 @@ type controller struct {
// manager holds the containerManager methods.
manager *containerManager
-
- // pprof holds the profile instance if enabled. It may be nil.
- pprof *control.Profile
-
- // stopProfiling has the callback to stop profiling calls. As
- // this may be executed only once at most, it will be set to nil
- // after it is executed for the first time.
- stopProfiling func()
}
// newController creates a new controller. The caller must call
@@ -167,18 +159,14 @@ func newController(fd int, l *Loader) (*controller, error) {
ctrl.srv.Register(&control.Logging{})
if l.root.conf.ProfileEnable {
- ctrl.pprof, ctrl.stopProfiling = control.NewProfile(l.k)
- ctrl.srv.Register(ctrl.pprof)
+ ctrl.srv.Register(control.NewProfile(l.k))
}
return ctrl, nil
}
func (c *controller) stop() {
- if c.stopProfiling != nil {
- c.stopProfiling()
- c.stopProfiling = nil
- }
+ c.srv.Stop()
}
// containerManager manages sandbox containers.
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 843dce01d..b84142b0d 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -17,6 +17,7 @@ package cmd
import (
"context"
"os"
+ "os/signal"
"strconv"
"strings"
"sync"
@@ -44,6 +45,7 @@ type Debug struct {
strace string
logLevel string
logPackets string
+ delay time.Duration
duration time.Duration
ps bool
}
@@ -71,7 +73,8 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
- f.DurationVar(&d.duration, "duration", time.Second, "amount of time to wait for CPU and trace profiles.")
+ f.DurationVar(&d.delay, "delay", time.Hour, "amount of time to delay for collecting heap and goroutine profiles.")
+ f.DurationVar(&d.duration, "duration", time.Hour, "amount of time to wait for CPU and trace profiles.")
f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
f.StringVar(&d.strace, "strace", "", `A comma separated list of syscalls to trace. "all" enables all traces, "off" disables all.`)
@@ -144,16 +147,6 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
log.Infof(" *** Stack dump ***\n%s", stacks)
}
- if d.profileHeap != "" {
- f, err := os.OpenFile(d.profileHeap, os.O_CREATE|os.O_TRUNC, 0644)
- if err != nil {
- return Errorf("error opening heap profile output: %v", err)
- }
- defer f.Close()
- if err := c.Sandbox.HeapProfile(f); err != nil {
- return Errorf("error collecting heap profile: %v", err)
- }
- }
if d.strace != "" || len(d.logLevel) != 0 || len(d.logPackets) != 0 {
args := control.LoggingArgs{}
switch strings.ToLower(d.strace) {
@@ -224,13 +217,22 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Open profiling files.
var (
+ heapFile *os.File
cpuFile *os.File
traceFile *os.File
blockFile *os.File
mutexFile *os.File
)
+ if d.profileHeap != "" {
+ f, err := os.OpenFile(d.profileHeap, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
+ if err != nil {
+ return Errorf("error opening heap profile output: %v", err)
+ }
+ defer f.Close()
+ heapFile = f
+ }
if d.profileCPU != "" {
- f, err := os.OpenFile(d.profileCPU, os.O_CREATE|os.O_TRUNC, 0644)
+ f, err := os.OpenFile(d.profileCPU, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return Errorf("error opening cpu profile output: %v", err)
}
@@ -238,14 +240,14 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
cpuFile = f
}
if d.trace != "" {
- f, err := os.OpenFile(d.trace, os.O_CREATE|os.O_TRUNC, 0644)
+ f, err := os.OpenFile(d.trace, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return Errorf("error opening trace profile output: %v", err)
}
traceFile = f
}
if d.profileBlock != "" {
- f, err := os.OpenFile(d.profileBlock, os.O_CREATE|os.O_TRUNC, 0644)
+ f, err := os.OpenFile(d.profileBlock, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return Errorf("error opening blocking profile output: %v", err)
}
@@ -253,7 +255,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
blockFile = f
}
if d.profileMutex != "" {
- f, err := os.OpenFile(d.profileMutex, os.O_CREATE|os.O_TRUNC, 0644)
+ f, err := os.OpenFile(d.profileMutex, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return Errorf("error opening mutex profile output: %v", err)
}
@@ -264,11 +266,19 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Collect profiles.
var (
wg sync.WaitGroup
+ heapErr error
cpuErr error
traceErr error
blockErr error
mutexErr error
)
+ if heapFile != nil {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ heapErr = c.Sandbox.HeapProfile(heapFile, d.delay)
+ }()
+ }
if cpuFile != nil {
wg.Add(1)
go func() {
@@ -298,20 +308,61 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}()
}
- wg.Wait()
+ // Before sleeping, allow us to catch signals and try to exit
+ // gracefully before just exiting. If we can't wait for wg, then
+ // we will not be able to read the errors below safely.
+ readyChan := make(chan struct{})
+ go func() {
+ defer close(readyChan)
+ wg.Wait()
+ }()
+ signals := make(chan os.Signal, 1)
+ signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT)
+ select {
+ case <-readyChan:
+ break // Safe to proceed.
+ case <-signals:
+ log.Infof("caught signal, waiting at most one more second.")
+ select {
+ case <-signals:
+ log.Infof("caught second signal, exiting immediately.")
+ os.Exit(1) // Not finished.
+ case <-time.After(time.Second):
+ log.Infof("timeout, exiting.")
+ os.Exit(1) // Not finished.
+ case <-readyChan:
+ break // Safe to proceed.
+ }
+ }
+
+ // Collect all errors.
errorCount := 0
+ if heapErr != nil {
+ errorCount++
+ log.Infof("error collecting heap profile: %v", heapErr)
+ os.Remove(heapFile.Name())
+ }
if cpuErr != nil {
+ errorCount++
log.Infof("error collecting cpu profile: %v", cpuErr)
+ os.Remove(cpuFile.Name())
}
if traceErr != nil {
+ errorCount++
log.Infof("error collecting trace profile: %v", traceErr)
+ os.Remove(traceFile.Name())
}
if blockErr != nil {
+ errorCount++
log.Infof("error collecting block profile: %v", blockErr)
+ os.Remove(blockFile.Name())
}
if mutexErr != nil {
+ errorCount++
log.Infof("error collecting mutex profile: %v", mutexErr)
+ os.Remove(mutexFile.Name())
}
+
if errorCount > 0 {
return subcommands.ExitFailure
}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index c1d13a58d..cfee9e63d 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -991,7 +991,7 @@ func (s *Sandbox) Stacks() (string, error) {
}
// HeapProfile writes a heap profile to the given file.
-func (s *Sandbox) HeapProfile(f *os.File) error {
+func (s *Sandbox) HeapProfile(f *os.File, delay time.Duration) error {
log.Debugf("Heap profile %q", s.ID)
conn, err := s.sandboxConnect()
if err != nil {
@@ -1001,6 +1001,7 @@ func (s *Sandbox) HeapProfile(f *os.File) error {
opts := control.HeapProfileOpts{
FilePayload: urpc.FilePayload{Files: []*os.File{f}},
+ Delay: delay,
}
return conn.Call(boot.HeapProfile, &opts, nil)
}