From 47f025461e6fdf8da99c780b17c2227696e83845 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 22 Jul 2021 15:37:37 -0700 Subject: runsc: Wait child processes without timeouts * First, we don't need to poll child processes. * Second, the 5 seconds timeout is too small if a host is overloaded. * Third, this can hide bugs in the code when we wait a process that isn't going to exit. PiperOrigin-RevId: 386337586 --- runsc/sandbox/sandbox.go | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'runsc/sandbox') diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 48efbb0b8..5fb7dc834 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -1157,27 +1157,26 @@ func (s *Sandbox) destroyContainer(cid string) error { } func (s *Sandbox) waitForStopped() error { + if s.child { + s.statusMu.Lock() + defer s.statusMu.Unlock() + if s.Pid == 0 { + return nil + } + // The sandbox process is a child of the current process, + // so we can wait it and collect its zombie. + if _, err := unix.Wait4(int(s.Pid), &s.status, 0, nil); err != nil { + return fmt.Errorf("error waiting the sandbox process: %v", err) + } + s.Pid = 0 + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) op := func() error { - if s.child { - s.statusMu.Lock() - defer s.statusMu.Unlock() - if s.Pid == 0 { - return nil - } - // The sandbox process is a child of the current process, - // so we can wait it and collect its zombie. - wpid, err := unix.Wait4(int(s.Pid), &s.status, unix.WNOHANG, nil) - if err != nil { - return fmt.Errorf("error waiting the sandbox process: %v", err) - } - if wpid == 0 { - return fmt.Errorf("sandbox is still running") - } - s.Pid = 0 - } else if s.IsRunning() { + if s.IsRunning() { return fmt.Errorf("sandbox is still running") } return nil -- cgit v1.2.3