diff options
author | Andrei Vagin <avagin@google.com> | 2019-01-11 10:31:21 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-01-11 10:32:26 -0800 |
commit | f8c8f241540fa79b47090ce4808c2c0cfbe44a12 (patch) | |
tree | 493726adeeb4f6c38619ec49c42cbd72d0578097 | |
parent | bde588ff05cad3591025a1e313eebe61cd82e421 (diff) |
runsc: Collect zombies of sandbox and gofer processes
And we need to wait a gofer process before cgroup.Uninstall,
because it is running in the sandbox cgroups.
PiperOrigin-RevId: 228904020
Change-Id: Iaf8826d5b9626db32d4057a1c505a8d7daaeb8f9
-rw-r--r-- | runsc/container/container.go | 41 | ||||
-rw-r--r-- | runsc/container/container_test.go | 34 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 16 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 30 |
4 files changed, 85 insertions, 36 deletions
diff --git a/runsc/container/container.go b/runsc/container/container.go index dc9ef86fa..544e7a250 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -119,6 +119,12 @@ type Container struct { // be 0 if the gofer has been killed. GoferPid int `json:"goferPid"` + // goferIsChild is set if a gofer process is a child of the current process. + // + // This field isn't saved to json, because only a creator of a gofer + // process will have it as a child process. + goferIsChild bool + // Sandbox is the sandbox this container is running in. It's set when the // container is created and reset when the sandbox is destroyed. Sandbox *sandbox.Sandbox `json:"sandbox"` @@ -708,11 +714,14 @@ func (c *Container) save() error { // root containers), and waits for the container or sandbox and the gofer // to stop. If any of them doesn't stop before timeout, an error is returned. func (c *Container) stop() error { + var cgroup *cgroup.Cgroup + if c.Sandbox != nil { log.Debugf("Destroying container %q", c.ID) if err := c.Sandbox.DestroyContainer(c.ID); err != nil { return fmt.Errorf("error destroying container %q: %v", c.ID, err) } + cgroup = c.Sandbox.Cgroup // Only set sandbox to nil after it has been told to destroy the container. c.Sandbox = nil } @@ -725,7 +734,18 @@ func (c *Container) stop() error { log.Warningf("Error sending signal %d to gofer %d: %v", syscall.SIGKILL, c.GoferPid, err) } } - return c.waitForStopped() + + if err := c.waitForStopped(); err != nil { + return err + } + + // Gofer is running in cgroups, so Cgroup.Uninstall has to be called after it. + if cgroup != nil { + if err := cgroup.Uninstall(); err != nil { + return err + } + } + return nil } func (c *Container) waitForStopped() error { @@ -738,12 +758,24 @@ func (c *Container) waitForStopped() error { return fmt.Errorf("container is still running") } } - if c.GoferPid != 0 { - if err := syscall.Kill(c.GoferPid, 0); err == nil { + if c.GoferPid == 0 { + return nil + } + if c.goferIsChild { + // The gofer process is a child of the current process, + // so we can wait it and collect its zombie. + wpid, err := syscall.Wait4(int(c.GoferPid), nil, syscall.WNOHANG, nil) + if err != nil { + return fmt.Errorf("error waiting the gofer process: %v", err) + } + if wpid == 0 { return fmt.Errorf("gofer is still running") } - c.GoferPid = 0 + + } else if err := syscall.Kill(c.GoferPid, 0); err == nil { + return fmt.Errorf("gofer is still running") } + c.GoferPid = 0 return nil } return backoff.Retry(op, b) @@ -816,6 +848,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund } log.Infof("Gofer started, PID: %d", cmd.Process.Pid) c.GoferPid = cmd.Process.Pid + c.goferIsChild = true return sandEnds, nil } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 45a36e583..affb51fab 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -39,9 +39,6 @@ import ( "gvisor.googlesource.com/gvisor/runsc/test/testutil" ) -// childReaper reaps child processes. -var childReaper *testutil.Reaper - // waitForProcessList waits for the given process list to show up in the container. func waitForProcessList(cont *Container, want []*control.Process) error { cb := func() error { @@ -75,6 +72,18 @@ func waitForProcessCount(cont *Container, want int) error { return testutil.Poll(cb, 30*time.Second) } +func blockUntilWaitable(pid int) error { + _, _, err := testutil.RetryEintr(func() (uintptr, uintptr, error) { + var err error + _, _, err1 := syscall.Syscall6(syscall.SYS_WAITID, 1, uintptr(pid), 0, syscall.WEXITED|syscall.WNOWAIT, 0, 0) + if err1 != 0 { + err = err1 + } + return 0, 0, err + }) + return err +} + // procListsEqual is used to check whether 2 Process lists are equal for all // implemented fields. func procListsEqual(got, want []*control.Process) bool { @@ -256,6 +265,11 @@ func configs(opts ...configOption) []*boot.Config { // It verifies after each step that the container can be loaded from disk, and // has the correct status. func TestLifecycle(t *testing.T) { + // Start the child reaper. + childReaper := &testutil.Reaper{} + childReaper.Start() + defer childReaper.Stop() + for _, conf := range configs(all...) { t.Logf("Running test with conf: %+v", conf) // The container will just sleep for a long time. We will kill it before @@ -1505,10 +1519,7 @@ func TestGoferExits(t *testing.T) { t.Fatalf("error killing sandbox process: %v", err) } - _, _, err = testutil.RetryEintr(func() (uintptr, uintptr, error) { - cpid, err := syscall.Wait4(c.GoferPid, nil, 0, nil) - return uintptr(cpid), 0, err - }) + err = blockUntilWaitable(c.GoferPid) if err != nil && err != syscall.ECHILD { t.Errorf("error waiting for gofer to exit: %v", err) } @@ -1576,10 +1587,6 @@ func TestUserLog(t *testing.T) { } func TestWaitOnExitedSandbox(t *testing.T) { - // Disable the childReaper for this test. - childReaper.Stop() - defer childReaper.Start() - for _, conf := range configs(all...) { t.Logf("Running test with conf: %+v", conf) @@ -1712,10 +1719,5 @@ func TestMain(m *testing.M) { } testutil.RunAsRoot() - // Start the child reaper. - childReaper = &testutil.Reaper{} - childReaper.Start() - defer childReaper.Stop() - os.Exit(m.Run()) } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index e431f5aec..6b3c41a9b 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -359,7 +359,7 @@ func TestMultiContainerSignal(t *testing.T) { cpid, err := syscall.Wait4(goferPid, nil, 0, nil) return uintptr(cpid), 0, err }) - if err != nil && err != syscall.ECHILD { + if err != syscall.ECHILD { t.Errorf("error waiting for gofer to exit: %v", err) } // Make sure process 1 is still running. @@ -379,18 +379,12 @@ func TestMultiContainerSignal(t *testing.T) { } // Ensure that container's gofer and sandbox process are no more. - _, _, err = testutil.RetryEintr(func() (uintptr, uintptr, error) { - cpid, err := syscall.Wait4(containers[0].GoferPid, nil, 0, nil) - return uintptr(cpid), 0, err - }) + err = blockUntilWaitable(containers[0].GoferPid) if err != nil && err != syscall.ECHILD { t.Errorf("error waiting for gofer to exit: %v", err) } - _, _, err = testutil.RetryEintr(func() (uintptr, uintptr, error) { - cpid, err := syscall.Wait4(containers[0].Sandbox.Pid, nil, 0, nil) - return uintptr(cpid), 0, err - }) + err = blockUntilWaitable(containers[0].Sandbox.Pid) if err != nil && err != syscall.ECHILD { t.Errorf("error waiting for sandbox to exit: %v", err) } @@ -399,6 +393,10 @@ func TestMultiContainerSignal(t *testing.T) { if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil { t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID) } + + if err := containers[0].Destroy(); err != nil { + t.Errorf("failed to destroy container: %v", err) + } } } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 9e95a11b4..fe55ddab8 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -62,6 +62,12 @@ type Sandbox struct { // Cgroup has the cgroup configuration for the sandbox. Cgroup *cgroup.Cgroup `json:"cgroup"` + + // child is set if a sandbox process is a child of the current process. + // + // This field isn't saved to json, because only a creator of sandbox + // will have it as a child process. + child bool } // New creates the sandbox process. The caller must call Destroy() on the @@ -70,7 +76,10 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke s := &Sandbox{ID: id, Cgroup: cg} // The Cleanup object cleans up partially created sandboxes when an error // occurs. Any errors occurring during cleanup itself are ignored. - c := specutils.MakeCleanup(func() { _ = s.destroy() }) + c := specutils.MakeCleanup(func() { + err := s.destroy() + log.Warningf("error destroying sandbox: %v", err) + }) defer c.Clean() // Create pipe to synchronize when sandbox process has been booted. @@ -578,6 +587,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund if err := specutils.StartInNS(cmd, nss); err != nil { return err } + s.child = true s.Pid = cmd.Process.Pid log.Infof("Sandbox started, PID: %d", s.Pid) @@ -666,11 +676,6 @@ func (s *Sandbox) destroy() error { } } - if s.Cgroup != nil { - if err := s.Cgroup.Uninstall(); err != nil { - return err - } - } if s.Chroot != "" { if err := tearDownChroot(s.Chroot); err != nil { return err @@ -846,7 +851,18 @@ func (s *Sandbox) waitForStopped() error { defer cancel() b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) op := func() error { - if s.IsRunning() { + if s.child && s.Pid != 0 { + // The sandbox process is a child of the current process, + // so we can wait it and collect its zombie. + wpid, err := syscall.Wait4(int(s.Pid), nil, syscall.WNOHANG, nil) + if err != nil { + return fmt.Errorf("error waiting the sandbox process: %v", err) + } + if wpid == 0 { + return fmt.Errorf("sandbox is still running") + } + s.Pid = 0 + } else if s.IsRunning() { return fmt.Errorf("sandbox is still running") } return nil |