summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--runsc/boot/loader.go34
-rw-r--r--runsc/container/multi_container_test.go13
-rw-r--r--runsc/sandbox/sandbox.go39
3 files changed, 52 insertions, 34 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index d5391f78a..dee2c4fbb 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -798,7 +798,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
}
// startGoferMonitor runs a goroutine to monitor gofer's health. It polls on
-// the gofer FDs looking for disconnects, and destroys the container if a
+// the gofer FDs looking for disconnects, and kills the container processes if a
// disconnect occurs in any of the gofer FDs.
func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) {
go func() {
@@ -819,18 +819,15 @@ func (l *Loader) startGoferMonitor(cid string, goferFDs []*fd.FD) {
panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err))
}
- // Check if the gofer has stopped as part of normal container destruction.
- // This is done just to avoid sending an annoying error message to the log.
- // Note that there is a small race window in between mu.Unlock() and the
- // lock being reacquired in destroyContainer(), but it's harmless to call
- // destroyContainer() multiple times.
l.mu.Lock()
- _, ok := l.processes[execID{cid: cid}]
- l.mu.Unlock()
- if ok {
- log.Infof("Gofer socket disconnected, destroying container %q", cid)
- if err := l.destroyContainer(cid); err != nil {
- log.Warningf("Error destroying container %q after gofer stopped: %v", cid, err)
+ defer l.mu.Unlock()
+
+ // The gofer could have been stopped due to a normal container shutdown.
+ // Check if the container has not stopped yet.
+ if tg, _ := l.tryThreadGroupFromIDLocked(execID{cid: cid}); tg != nil {
+ log.Infof("Gofer socket disconnected, killing container %q", cid)
+ if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
+ log.Warningf("Error killing container %q after gofer stopped: %v", cid, err)
}
}
}()
@@ -899,17 +896,24 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
return 0, fmt.Errorf("container %q not started", args.ContainerID)
}
- // Get the container MountNamespace from the Task.
+ // Get the container MountNamespace from the Task. Try to acquire ref may fail
+ // in case it raced with task exit.
if kernel.VFS2Enabled {
// task.MountNamespace() does not take a ref, so we must do so ourselves.
args.MountNamespaceVFS2 = tg.Leader().MountNamespaceVFS2()
- args.MountNamespaceVFS2.IncRef()
+ if !args.MountNamespaceVFS2.TryIncRef() {
+ return 0, fmt.Errorf("container %q has stopped", args.ContainerID)
+ }
} else {
+ var reffed bool
tg.Leader().WithMuLocked(func(t *kernel.Task) {
// task.MountNamespace() does not take a ref, so we must do so ourselves.
args.MountNamespace = t.MountNamespace()
- args.MountNamespace.IncRef()
+ reffed = args.MountNamespace.TryIncRef()
})
+ if !reffed {
+ return 0, fmt.Errorf("container %q has stopped", args.ContainerID)
+ }
}
// Add the HOME environment variable if it is not already set.
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 952215ec1..850e80290 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -480,7 +480,7 @@ func TestMultiContainerMount(t *testing.T) {
// TestMultiContainerSignal checks that it is possible to signal individual
// containers without killing the entire sandbox.
func TestMultiContainerSignal(t *testing.T) {
- for name, conf := range configs(t, all...) {
+ for name, conf := range configsWithVFS2(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -1691,12 +1691,11 @@ func TestMultiContainerRunNonRoot(t *testing.T) {
}
// TestMultiContainerHomeEnvDir tests that the HOME environment variable is set
-// for root containers, sub-containers, and execed processes.
+// for root containers, sub-containers, and exec'ed processes.
func TestMultiContainerHomeEnvDir(t *testing.T) {
- // TODO(gvisor.dev/issue/1487): VFSv2 configs failing.
// NOTE: Don't use overlay since we need changes to persist to the temp dir
// outside the sandbox.
- for testName, conf := range configs(t, noOverlay...) {
+ for testName, conf := range configsWithVFS2(t, noOverlay...) {
t.Run(testName, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
@@ -1718,9 +1717,9 @@ func TestMultiContainerHomeEnvDir(t *testing.T) {
// We will sleep in the root container in order to ensure that the root
//container doesn't terminate before sub containers can be created.
- rootCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s; sleep 1000", homeDirs["root"].Name())}
- subCmd := []string{"/bin/sh", "-c", fmt.Sprintf("printf \"$HOME\" > %s", homeDirs["sub"].Name())}
- execCmd := fmt.Sprintf("printf \"$HOME\" > %s", homeDirs["exec"].Name())
+ rootCmd := []string{"/bin/sh", "-c", fmt.Sprintf(`printf "$HOME" > %s; sleep 1000`, homeDirs["root"].Name())}
+ subCmd := []string{"/bin/sh", "-c", fmt.Sprintf(`printf "$HOME" > %s`, homeDirs["sub"].Name())}
+ execCmd := fmt.Sprintf(`printf "$HOME" > %s`, homeDirs["exec"].Name())
// Setup the containers, a root container and sub container.
specConfig, ids := createSpecs(rootCmd, subCmd)
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index a8f4f64a5..c4309feb3 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -72,11 +72,14 @@ type Sandbox struct {
// will have it as a child process.
child bool
- // status is an exit status of a sandbox process.
- status syscall.WaitStatus
-
// statusMu protects status.
statusMu sync.Mutex
+
+ // status is the exit status of a sandbox process. It's only set if the
+ // child==true and the sandbox was waited on. This field allows for multiple
+ // threads to wait on sandbox and get the exit code, since Linux will return
+ // WaitStatus to one of the waiters only.
+ status syscall.WaitStatus
}
// Args is used to configure a new sandbox.
@@ -746,35 +749,47 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
// Wait waits for the containerized process to exit, and returns its WaitStatus.
func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) {
log.Debugf("Waiting for container %q in sandbox %q", cid, s.ID)
- var ws syscall.WaitStatus
if conn, err := s.sandboxConnect(); err != nil {
- // The sandbox may have exited while before we had a chance to
- // wait on it.
+ // The sandbox may have exited while before we had a chance to wait on it.
+ // There is nothing we can do for subcontainers. For the init container, we
+ // can try to get the sandbox exit code.
+ if !s.IsRootContainer(cid) {
+ return syscall.WaitStatus(0), err
+ }
log.Warningf("Wait on container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err)
} else {
defer conn.Close()
+
// Try the Wait RPC to the sandbox.
+ var ws syscall.WaitStatus
err = conn.Call(boot.ContainerWait, &cid, &ws)
if err == nil {
// It worked!
return ws, nil
}
+ // See comment above.
+ if !s.IsRootContainer(cid) {
+ return syscall.WaitStatus(0), err
+ }
+
// The sandbox may have exited after we connected, but before
// or during the Wait RPC.
log.Warningf("Wait RPC to container %q failed: %v. Will try waiting on the sandbox process instead.", cid, err)
}
- // The sandbox may have already exited, or exited while handling the
- // Wait RPC. The best we can do is ask Linux what the sandbox exit
- // status was, since in most cases that will be the same as the
- // container exit status.
+ // The sandbox may have already exited, or exited while handling the Wait RPC.
+ // The best we can do is ask Linux what the sandbox exit status was, since in
+ // most cases that will be the same as the container exit status.
if err := s.waitForStopped(); err != nil {
- return ws, err
+ return syscall.WaitStatus(0), err
}
if !s.child {
- return ws, fmt.Errorf("sandbox no longer running and its exit status is unavailable")
+ return syscall.WaitStatus(0), fmt.Errorf("sandbox no longer running and its exit status is unavailable")
}
+
+ s.statusMu.Lock()
+ defer s.statusMu.Unlock()
return s.status, nil
}