diff options
author | Fabricio Voznika <fvoznika@google.com> | 2020-10-05 17:38:49 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2020-10-05 17:40:23 -0700 |
commit | 9e64b9f3a5c162e8cb79096fef9e502d7e9af463 (patch) | |
tree | dc3b9d8802cfcab9b58b9891d0a8cc9bb2b79bf4 /runsc/sandbox | |
parent | 8ef7cebcbb6b7a1fa4a8725421cd04f1c73398b1 (diff) |
Fix gofer monitor prematurely destroying container
When all container tasks finish, they release the mount which in turn
will close the 9P session to the gofer. The gofer exits when the connection
closes, triggering the gofer monitor. The gofer monitor will _think_ that
the gofer died prematurely and destroy the container. Then when the caller
attempts to wait for the container, e.g. to get the exit code, wait fails
saying the container doesn't exist.
Gofer monitor now just SIGKILLs the container, and let the normal teardown
process to happen, which will evetually destroy the container at the right
time. Also, fixed an issue with exec racing with container's init process
exiting.
Closes #1487
PiperOrigin-RevId: 335537350
Diffstat (limited to 'runsc/sandbox')
-rw-r--r-- | runsc/sandbox/sandbox.go | 39 |
1 files changed, 27 insertions, 12 deletions
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index a8f4f64a5..c4309feb3 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -72,11 +72,14 @@ type Sandbox struct { // will have it as a child process. child bool - // status is an exit status of a sandbox process. - status syscall.WaitStatus - // statusMu protects status. statusMu sync.Mutex + + // status is the exit status of a sandbox process. It's only set if the + // child==true and the sandbox was waited on. This field allows for multiple + // threads to wait on sandbox and get the exit code, since Linux will return + // WaitStatus to one of the waiters only. + status syscall.WaitStatus } // Args is used to configure a new sandbox. @@ -746,35 +749,47 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn // Wait waits for the containerized process to exit, and returns its WaitStatus. func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) { log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID) - var ws syscall.WaitStatus if conn, err := s.sandboxConnect(); err != nil { - // The sandbox may have exited while before we had a chance to - // wait on it. + // The sandbox may have exited while before we had a chance to wait on it. + // There is nothing we can do for subcontainers. For the init container, we + // can try to get the sandbox exit code. + if !s.IsRootContainer(cid) { + return syscall.WaitStatus(0), err + } log.Warningf("Wait on container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err) } else { defer conn.Close() + // Try the Wait RPC to the sandbox. + var ws syscall.WaitStatus err = conn.Call(boot.ContainerWait, &cid, &ws) if err == nil { // It worked! return ws, nil } + // See comment above. + if !s.IsRootContainer(cid) { + return syscall.WaitStatus(0), err + } + // The sandbox may have exited after we connected, but before // or during the Wait RPC. log.Warningf("Wait RPC to container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err) } - // The sandbox may have already exited, or exited while handling the - // Wait RPC. The best we can do is ask Linux what the sandbox exit - // status was, since in most cases that will be the same as the - // container exit status. + // The sandbox may have already exited, or exited while handling the Wait RPC. + // The best we can do is ask Linux what the sandbox exit status was, since in + // most cases that will be the same as the container exit status. if err := s.waitForStopped(); err != nil { - return ws, err + return syscall.WaitStatus(0), err } if !s.child { - return ws, fmt.Errorf("sandbox no longer running and its exit status is unavailable") + return syscall.WaitStatus(0), fmt.Errorf("sandbox no longer running and its exit status is unavailable") } + + s.statusMu.Lock() + defer s.statusMu.Unlock() return s.status, nil } |