diff options
author | Fabricio Voznika <fvoznika@google.com> | 2018-11-20 17:23:14 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-11-20 17:24:19 -0800 |
commit | 5236b78242677612ac71b19cee85b3bf4cca4008 (patch) | |
tree | cb9eeeef288516a8e41d8f50538a6920d9f36db3 | |
parent | f894610c572976026f4cf6841f4095718827e4f8 (diff) |
Dumps stacks if watchdog thread is stuck
PiperOrigin-RevId: 222332703
Change-Id: Id5c3cf79591c5d2949895b4e323e63c48c679820
-rw-r--r-- | pkg/sentry/watchdog/watchdog.go | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go index 75b11237f..c49b537a5 100644 --- a/pkg/sentry/watchdog/watchdog.go +++ b/pkg/sentry/watchdog/watchdog.go @@ -190,7 +190,24 @@ func (w *Watchdog) loop() { // runTurn runs a single pass over all tasks and reports anything it finds. func (w *Watchdog) runTurn() { - tasks := w.k.TaskSet().Root.Tasks() + // Someone needs to watch the watchdog. The call below can get stuck if there + // is a deadlock affecting root's PID namespace mutex. Run it in a goroutine + // and report if it takes too long to return. + var tasks []*kernel.Task + done := make(chan struct{}) + go func() { // S/R-SAFE: watchdog is stopped and restarted during S/R. + tasks = w.k.TaskSet().Root.Tasks() + close(done) + }() + + select { + case <-done: + case <-time.After(w.taskTimeout): + // Report if the watchdog is not making progress. + // No one is wathching the watchdog watcher though. + w.reportStuckWatchdog() + <-done + } newOffenders := make(map[*kernel.Task]*offender) newTaskFound := false @@ -245,7 +262,16 @@ func (w *Watchdog) report(offenders map[*kernel.Task]*offender, newTaskFound boo buf.WriteString(fmt.Sprintf("\tTask tid: %v (%#x), entered RunSys state %v ago.\n", tid, uint64(tid), now.Sub(o.lastUpdateTime))) } buf.WriteString("Search for '(*Task).run(0x..., 0x<tid>)' in the stack dump to find the offending goroutine") + w.onStuckTask(newTaskFound, &buf) +} + +func (w *Watchdog) reportStuckWatchdog() { + var buf bytes.Buffer + buf.WriteString("Watchdog goroutine is stuck:\n") + w.onStuckTask(true, &buf) +} +func (w *Watchdog) onStuckTask(newTaskFound bool, buf *bytes.Buffer) { switch w.timeoutAction { case LogWarning: // Dump stack only if a new task is detected or if it sometime has passed since |