From 5236b78242677612ac71b19cee85b3bf4cca4008 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 20 Nov 2018 17:23:14 -0800 Subject: Dumps stacks if watchdog thread is stuck PiperOrigin-RevId: 222332703 Change-Id: Id5c3cf79591c5d2949895b4e323e63c48c679820 --- pkg/sentry/watchdog/watchdog.go | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go index 75b11237f..c49b537a5 100644 --- a/pkg/sentry/watchdog/watchdog.go +++ b/pkg/sentry/watchdog/watchdog.go @@ -190,7 +190,24 @@ func (w *Watchdog) loop() { // runTurn runs a single pass over all tasks and reports anything it finds. func (w *Watchdog) runTurn() { - tasks := w.k.TaskSet().Root.Tasks() + // Someone needs to watch the watchdog. The call below can get stuck if there + // is a deadlock affecting root's PID namespace mutex. Run it in a goroutine + // and report if it takes too long to return. + var tasks []*kernel.Task + done := make(chan struct{}) + go func() { // S/R-SAFE: watchdog is stopped and restarted during S/R. + tasks = w.k.TaskSet().Root.Tasks() + close(done) + }() + + select { + case <-done: + case <-time.After(w.taskTimeout): + // Report if the watchdog is not making progress. + // No one is wathching the watchdog watcher though. + w.reportStuckWatchdog() + <-done + } newOffenders := make(map[*kernel.Task]*offender) newTaskFound := false @@ -245,7 +262,16 @@ func (w *Watchdog) report(offenders map[*kernel.Task]*offender, newTaskFound boo buf.WriteString(fmt.Sprintf("\tTask tid: %v (%#x), entered RunSys state %v ago.\n", tid, uint64(tid), now.Sub(o.lastUpdateTime))) } buf.WriteString("Search for '(*Task).run(0x..., 0x)' in the stack dump to find the offending goroutine") + w.onStuckTask(newTaskFound, &buf) +} + +func (w *Watchdog) reportStuckWatchdog() { + var buf bytes.Buffer + buf.WriteString("Watchdog goroutine is stuck:\n") + w.onStuckTask(true, &buf) +} +func (w *Watchdog) onStuckTask(newTaskFound bool, buf *bytes.Buffer) { switch w.timeoutAction { case LogWarning: // Dump stack only if a new task is detected or if it sometime has passed since -- cgit v1.2.3