summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2019-09-04 18:55:39 -0700
committergVisor bot <gvisor-bot@google.com>2019-09-04 18:56:49 -0700
commit0f5cdc1e00488823f1f7b9884c15b899677362b6 (patch)
tree16da94bb0a1f3505998e10c5261b8616e478bf1d
parentbcddd0a4778916f6a5347246a81fbe236050d2c4 (diff)
Resolve flakes with TestMultiContainerDestroy
Some processes are reparented to the root container depending on the kill order and the root container would not reap in time. So some zombie processes were still present when the test checked. Fix it by running the second container inside a PID namespace. PiperOrigin-RevId: 267278591
-rw-r--r--runsc/boot/loader.go49
-rw-r--r--runsc/container/multi_container_test.go14
2 files changed, 32 insertions, 31 deletions
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 19b738705..823a34619 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -760,26 +760,34 @@ func (l *Loader) destroyContainer(cid string) error {
if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
}
+ // Wait for all processes that belong to the container to exit (including
+ // exec'd processes).
+ for _, t := range l.k.TaskSet().Root.Tasks() {
+ if t.ContainerID() == cid {
+ t.ThreadGroup().WaitExited()
+ }
+ }
+
+ // At this point, all processes inside of the container have exited,
+ // releasing all references to the container's MountNamespace and
+ // causing all submounts and overlays to be unmounted.
+ //
+ // Since the container's MountNamespace has been released,
+ // MountNamespace.destroy() will have executed, but that function may
+ // trigger async close operations. We must wait for those to complete
+ // before returning, otherwise the caller may kill the gofer before
+ // they complete, causing a cascade of failing RPCs.
+ fs.AsyncBarrier()
}
- // Remove all container thread groups from the map.
+ // No more failure from this point on. Remove all container thread groups
+ // from the map.
for key := range l.processes {
if key.cid == cid {
delete(l.processes, key)
}
}
- // At this point, all processes inside of the container have exited,
- // releasing all references to the container's MountNamespace and
- // causing all submounts and overlays to be unmounted.
- //
- // Since the container's MountNamespace has been released,
- // MountNamespace.destroy() will have executed, but that function may
- // trigger async close operations. We must wait for those to complete
- // before returning, otherwise the caller may kill the gofer before
- // they complete, causing a cascade of failing RPCs.
- fs.AsyncBarrier()
-
log.Debugf("Container destroyed %q", cid)
return nil
}
@@ -1037,21 +1045,8 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
// the signal is delivered. This prevents process leaks when SIGKILL is
// sent to the entire container.
l.k.Pause()
- if err := l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo}); err != nil {
- l.k.Unpause()
- return err
- }
- l.k.Unpause()
-
- // If SIGKILLing all processes, wait for them to exit.
- if linux.Signal(signo) == linux.SIGKILL {
- for _, t := range l.k.TaskSet().Root.Tasks() {
- if t.ContainerID() == cid {
- t.ThreadGroup().WaitExited()
- }
- }
- }
- return nil
+ defer l.k.Unpause()
+ return l.k.SendContainerSignal(cid, &arch.SignalInfo{Signo: signo})
}
// threadGroupFromID same as threadGroupFromIDLocked except that it acquires
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 6e5f23ff2..bd45a5118 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -549,10 +549,16 @@ func TestMultiContainerDestroy(t *testing.T) {
t.Logf("Running test with conf: %+v", conf)
// First container will remain intact while the second container is killed.
- specs, ids := createSpecs(
- []string{app, "reaper"},
+ podSpecs, ids := createSpecs(
+ []string{"sleep", "100"},
[]string{app, "fork-bomb"})
- containers, cleanup, err := startContainers(conf, specs, ids)
+
+ // Run the fork bomb in a PID namespace to prevent processes to be
+ // re-parented to PID=1 in the root container.
+ podSpecs[1].Linux = &specs.Linux{
+ Namespaces: []specs.LinuxNamespace{{Type: "pid"}},
+ }
+ containers, cleanup, err := startContainers(conf, podSpecs, ids)
if err != nil {
t.Fatalf("error starting containers: %v", err)
}
@@ -580,7 +586,7 @@ func TestMultiContainerDestroy(t *testing.T) {
if err != nil {
t.Fatalf("error getting process data from sandbox: %v", err)
}
- expectedPL := []*control.Process{{PID: 1, Cmd: "test_app"}}
+ expectedPL := []*control.Process{{PID: 1, Cmd: "sleep"}}
if !procListsEqual(pss, expectedPL) {
t.Errorf("container got process list: %s, want: %s", procListToString(pss), procListToString(expectedPL))
}