summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/watchdog/watchdog.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/watchdog/watchdog.go')
-rw-r--r--pkg/sentry/watchdog/watchdog.go374
1 files changed, 374 insertions, 0 deletions
diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go
new file mode 100644
index 000000000..748273366
--- /dev/null
+++ b/pkg/sentry/watchdog/watchdog.go
@@ -0,0 +1,374 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package watchdog is responsible for monitoring the sentry for tasks that may
+// potentially be stuck or looping inderterminally causing hard to debug hungs in
+// the untrusted app.
+//
+// It works by periodically querying all tasks to check whether they are in user
+// mode (RunUser), kernel mode (RunSys), or blocked in the kernel (OffCPU). Tasks
+// that have been running in kernel mode for a long time in the same syscall
+// without blocking are considered stuck and are reported.
+//
+// When a stuck task is detected, the watchdog can take one of the following actions:
+// 1. LogWarning: Logs a warning message followed by a stack dump of all goroutines.
+// If a tasks continues to be stuck, the message will repeat every minute, unless
+// a new stuck task is detected
+// 2. Panic: same as above, followed by panic()
+//
+package watchdog
+
+import (
+ "bytes"
+ "fmt"
+ "time"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/metric"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sync"
+)
+
+// Opts configures the watchdog.
+type Opts struct {
+ // TaskTimeout is the amount of time to allow a task to execute the
+ // same syscall without blocking before it's declared stuck.
+ TaskTimeout time.Duration
+
+ // TaskTimeoutAction indicates what action to take when a stuck tasks
+ // is detected.
+ TaskTimeoutAction Action
+
+ // StartupTimeout is the amount of time to allow between watchdog
+ // creation and calling watchdog.Start.
+ StartupTimeout time.Duration
+
+ // StartupTimeoutAction indicates what action to take when
+ // watchdog.Start is not called within the timeout.
+ StartupTimeoutAction Action
+}
+
+// DefaultOpts is a default set of options for the watchdog.
+var DefaultOpts = Opts{
+ // Task timeout.
+ TaskTimeout: 3 * time.Minute,
+ TaskTimeoutAction: LogWarning,
+
+ // Startup timeout.
+ StartupTimeout: 30 * time.Second,
+ StartupTimeoutAction: LogWarning,
+}
+
+// descheduleThreshold is the amount of time scheduling needs to be off before the entire wait period
+// is discounted from task's last update time. It's set high enough that small scheduling delays won't
+// trigger it.
+const descheduleThreshold = 1 * time.Second
+
+var (
+ stuckStartup = metric.MustCreateNewUint64Metric("/watchdog/stuck_startup_detected", true /* sync */, "Incremented once on startup watchdog timeout")
+ stuckTasks = metric.MustCreateNewUint64Metric("/watchdog/stuck_tasks_detected", true /* sync */, "Cumulative count of stuck tasks detected")
+)
+
+// Amount of time to wait before dumping the stack to the log again when the same task(s) remains stuck.
+var stackDumpSameTaskPeriod = time.Minute
+
+// Action defines what action to take when a stuck task is detected.
+type Action int
+
+const (
+ // LogWarning logs warning message followed by stack trace.
+ LogWarning Action = iota
+
+ // Panic will do the same logging as LogWarning and panic().
+ Panic
+)
+
+// String returns Action's string representation.
+func (a Action) String() string {
+ switch a {
+ case LogWarning:
+ return "LogWarning"
+ case Panic:
+ return "Panic"
+ default:
+ panic(fmt.Sprintf("Invalid action: %d", a))
+ }
+}
+
+// Watchdog is the main watchdog class. It controls a goroutine that periodically
+// analyses all tasks and reports if any of them appear to be stuck.
+type Watchdog struct {
+ // Configuration options are embedded.
+ Opts
+
+ // period indicates how often to check all tasks. It's calculated based on
+ // opts.TaskTimeout.
+ period time.Duration
+
+ // k is where the tasks come from.
+ k *kernel.Kernel
+
+ // stop is used to notify to watchdog should stop.
+ stop chan struct{}
+
+ // done is used to notify when the watchdog has stopped.
+ done chan struct{}
+
+ // offenders map contains all tasks that are currently stuck.
+ offenders map[*kernel.Task]*offender
+
+ // lastStackDump tracks the last time a stack dump was generated to prevent
+ // spamming the log.
+ lastStackDump time.Time
+
+ // lastRun is set to the last time the watchdog executed a monitoring loop.
+ lastRun ktime.Time
+
+ // mu protects the fields below.
+ mu sync.Mutex
+
+ // running is true if the watchdog is running.
+ running bool
+
+ // startCalled is true if Start has ever been called. It remains true
+ // even if Stop is called.
+ startCalled bool
+}
+
+type offender struct {
+ lastUpdateTime ktime.Time
+}
+
+// New creates a new watchdog.
+func New(k *kernel.Kernel, opts Opts) *Watchdog {
+ // 4 is arbitrary, just don't want to prolong 'TaskTimeout' too much.
+ period := opts.TaskTimeout / 4
+ w := &Watchdog{
+ Opts: opts,
+ k: k,
+ period: period,
+ offenders: make(map[*kernel.Task]*offender),
+ stop: make(chan struct{}),
+ done: make(chan struct{}),
+ }
+
+ // Handle StartupTimeout if it exists.
+ if w.StartupTimeout > 0 {
+ log.Infof("Watchdog waiting %v for startup", w.StartupTimeout)
+ go w.waitForStart() // S/R-SAFE: watchdog is stopped buring save and restarted after restore.
+ }
+
+ return w
+}
+
+// Start starts the watchdog.
+func (w *Watchdog) Start() {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ w.startCalled = true
+
+ if w.running {
+ return
+ }
+
+ if w.TaskTimeout == 0 {
+ log.Infof("Watchdog task timeout disabled")
+ return
+ }
+ w.lastRun = w.k.MonotonicClock().Now()
+
+ log.Infof("Starting watchdog, period: %v, timeout: %v, action: %v", w.period, w.TaskTimeout, w.TaskTimeoutAction)
+ go w.loop() // S/R-SAFE: watchdog is stopped during save and restarted after restore.
+ w.running = true
+}
+
+// Stop requests the watchdog to stop and wait for it.
+func (w *Watchdog) Stop() {
+ if w.TaskTimeout == 0 {
+ return
+ }
+
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ if !w.running {
+ return
+ }
+ log.Infof("Stopping watchdog")
+ w.stop <- struct{}{}
+ <-w.done
+ w.running = false
+ log.Infof("Watchdog stopped")
+}
+
+// waitForStart waits for Start to be called and takes action if it does not
+// happen within the startup timeout.
+func (w *Watchdog) waitForStart() {
+ <-time.After(w.StartupTimeout)
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ if w.startCalled {
+ // We are fine.
+ return
+ }
+
+ stuckStartup.Increment()
+
+ var buf bytes.Buffer
+ buf.WriteString(fmt.Sprintf("Watchdog.Start() not called within %s", w.StartupTimeout))
+ w.doAction(w.StartupTimeoutAction, false, &buf)
+}
+
+// loop is the main watchdog routine. It only returns when 'Stop()' is called.
+func (w *Watchdog) loop() {
+ // Loop until someone stops it.
+ for {
+ select {
+ case <-w.stop:
+ w.done <- struct{}{}
+ return
+ case <-time.After(w.period):
+ w.runTurn()
+ }
+ }
+}
+
+// runTurn runs a single pass over all tasks and reports anything it finds.
+func (w *Watchdog) runTurn() {
+ // Someone needs to watch the watchdog. The call below can get stuck if there
+ // is a deadlock affecting root's PID namespace mutex. Run it in a goroutine
+ // and report if it takes too long to return.
+ var tasks []*kernel.Task
+ done := make(chan struct{})
+ go func() { // S/R-SAFE: watchdog is stopped and restarted during S/R.
+ tasks = w.k.TaskSet().Root.Tasks()
+ close(done)
+ }()
+
+ select {
+ case <-done:
+ case <-time.After(w.TaskTimeout):
+ // Report if the watchdog is not making progress.
+ // No one is watching the watchdog watcher though.
+ w.reportStuckWatchdog()
+ <-done
+ }
+
+ newOffenders := make(map[*kernel.Task]*offender)
+ newTaskFound := false
+ now := ktime.FromNanoseconds(int64(w.k.CPUClockNow() * uint64(linux.ClockTick)))
+
+ // The process may be running with low CPU limit making tasks appear stuck because
+ // are starved of CPU cycles. An estimate is that Tasks could have been starved
+ // since the last time the watchdog run. If the watchdog detects that scheduling
+ // is off, it will discount the entire duration since last run from 'lastUpdateTime'.
+ discount := time.Duration(0)
+ if now.Sub(w.lastRun.Add(w.period)) > descheduleThreshold {
+ discount = now.Sub(w.lastRun)
+ }
+ w.lastRun = now
+
+ log.Infof("Watchdog starting loop, tasks: %d, discount: %v", len(tasks), discount)
+ for _, t := range tasks {
+ tsched := t.TaskGoroutineSchedInfo()
+
+ // An offender is a task running inside the kernel for longer than the specified timeout.
+ if tsched.State == kernel.TaskGoroutineRunningSys {
+ lastUpdateTime := ktime.FromNanoseconds(int64(tsched.Timestamp * uint64(linux.ClockTick)))
+ elapsed := now.Sub(lastUpdateTime) - discount
+ if elapsed > w.TaskTimeout {
+ tc, ok := w.offenders[t]
+ if !ok {
+ // New stuck task detected.
+ //
+ // Note that tasks blocked doing IO may be considered stuck in kernel,
+ // unless they are surrounded b
+ // Task.UninterruptibleSleepStart/Finish.
+ tc = &offender{lastUpdateTime: lastUpdateTime}
+ stuckTasks.Increment()
+ newTaskFound = true
+ }
+ newOffenders[t] = tc
+ }
+ }
+ }
+ if len(newOffenders) > 0 {
+ w.report(newOffenders, newTaskFound, now)
+ }
+
+ // Remember which tasks have been reported.
+ w.offenders = newOffenders
+}
+
+// report takes appropriate action when a stuck task is detected.
+func (w *Watchdog) report(offenders map[*kernel.Task]*offender, newTaskFound bool, now ktime.Time) {
+ var buf bytes.Buffer
+ buf.WriteString(fmt.Sprintf("Sentry detected %d stuck task(s):\n", len(offenders)))
+ for t, o := range offenders {
+ tid := w.k.TaskSet().Root.IDOfTask(t)
+ buf.WriteString(fmt.Sprintf("\tTask tid: %v (%#x), entered RunSys state %v ago.\n", tid, uint64(tid), now.Sub(o.lastUpdateTime)))
+ }
+
+ buf.WriteString("Search for '(*Task).run(0x..., 0x<tid>)' in the stack dump to find the offending goroutine")
+
+ // Force stack dump only if a new task is detected.
+ w.doAction(w.TaskTimeoutAction, newTaskFound, &buf)
+}
+
+func (w *Watchdog) reportStuckWatchdog() {
+ var buf bytes.Buffer
+ buf.WriteString("Watchdog goroutine is stuck")
+ w.doAction(w.TaskTimeoutAction, false, &buf)
+}
+
+// doAction will take the given action. If the action is LogWarning, the stack
+// is not always dumped to the log to prevent log flooding. "forceStack"
+// guarantees that the stack will be dumped regardless.
+func (w *Watchdog) doAction(action Action, forceStack bool, msg *bytes.Buffer) {
+ switch action {
+ case LogWarning:
+ // Dump stack only if forced or sometime has passed since the last time a
+ // stack dump was generated.
+ if !forceStack && time.Since(w.lastStackDump) < stackDumpSameTaskPeriod {
+ msg.WriteString("\n...[stack dump skipped]...")
+ log.Warningf(msg.String())
+ return
+ }
+ log.TracebackAll(msg.String())
+ w.lastStackDump = time.Now()
+
+ case Panic:
+ // Panic will skip over running tasks, which is likely the culprit here. So manually
+ // dump all stacks before panic'ing.
+ log.TracebackAll(msg.String())
+
+ // Attempt to flush metrics, timeout and move on in case metrics are stuck as well.
+ metricsEmitted := make(chan struct{}, 1)
+ go func() { // S/R-SAFE: watchdog is stopped during save and restarted after restore.
+ // Flush metrics before killing process.
+ metric.EmitMetricUpdate()
+ metricsEmitted <- struct{}{}
+ }()
+ select {
+ case <-metricsEmitted:
+ case <-time.After(1 * time.Second):
+ }
+ panic(fmt.Sprintf("%s\nStack for running G's are skipped while panicking.", msg.String()))
+
+ default:
+ panic(fmt.Sprintf("Unknown watchdog action %v", action))
+
+ }
+}