summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrei Vagin <avagin@google.com>2019-01-11 10:31:21 -0800
committerShentubot <shentubot@google.com>2019-01-11 10:32:26 -0800
commitf8c8f241540fa79b47090ce4808c2c0cfbe44a12 (patch)
tree493726adeeb4f6c38619ec49c42cbd72d0578097
parentbde588ff05cad3591025a1e313eebe61cd82e421 (diff)
runsc: Collect zombies of sandbox and gofer processes
And we need to wait a gofer process before cgroup.Uninstall, because it is running in the sandbox cgroups. PiperOrigin-RevId: 228904020 Change-Id: Iaf8826d5b9626db32d4057a1c505a8d7daaeb8f9
-rw-r--r--runsc/container/container.go41
-rw-r--r--runsc/container/container_test.go34
-rw-r--r--runsc/container/multi_container_test.go16
-rw-r--r--runsc/sandbox/sandbox.go30
4 files changed, 85 insertions, 36 deletions
diff --git a/runsc/container/container.go b/runsc/container/container.go
index dc9ef86fa..544e7a250 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -119,6 +119,12 @@ type Container struct {
// be 0 if the gofer has been killed.
GoferPid int `json:"goferPid"`
+ // goferIsChild is set if a gofer process is a child of the current process.
+ //
+ // This field isn't saved to json, because only a creator of a gofer
+ // process will have it as a child process.
+ goferIsChild bool
+
// Sandbox is the sandbox this container is running in. It's set when the
// container is created and reset when the sandbox is destroyed.
Sandbox *sandbox.Sandbox `json:"sandbox"`
@@ -708,11 +714,14 @@ func (c *Container) save() error {
// root containers), and waits for the container or sandbox and the gofer
// to stop. If any of them doesn't stop before timeout, an error is returned.
func (c *Container) stop() error {
+ var cgroup *cgroup.Cgroup
+
if c.Sandbox != nil {
log.Debugf("Destroying container %q", c.ID)
if err := c.Sandbox.DestroyContainer(c.ID); err != nil {
return fmt.Errorf("error destroying container %q: %v", c.ID, err)
}
+ cgroup = c.Sandbox.Cgroup
// Only set sandbox to nil after it has been told to destroy the container.
c.Sandbox = nil
}
@@ -725,7 +734,18 @@ func (c *Container) stop() error {
log.Warningf("Error sending signal %d to gofer %d: %v", syscall.SIGKILL, c.GoferPid, err)
}
}
- return c.waitForStopped()
+
+ if err := c.waitForStopped(); err != nil {
+ return err
+ }
+
+ // Gofer is running in cgroups, so Cgroup.Uninstall has to be called after it.
+ if cgroup != nil {
+ if err := cgroup.Uninstall(); err != nil {
+ return err
+ }
+ }
+ return nil
}
func (c *Container) waitForStopped() error {
@@ -738,12 +758,24 @@ func (c *Container) waitForStopped() error {
return fmt.Errorf("container is still running")
}
}
- if c.GoferPid != 0 {
- if err := syscall.Kill(c.GoferPid, 0); err == nil {
+ if c.GoferPid == 0 {
+ return nil
+ }
+ if c.goferIsChild {
+ // The gofer process is a child of the current process,
+ // so we can wait it and collect its zombie.
+ wpid, err := syscall.Wait4(int(c.GoferPid), nil, syscall.WNOHANG, nil)
+ if err != nil {
+ return fmt.Errorf("error waiting the gofer process: %v", err)
+ }
+ if wpid == 0 {
return fmt.Errorf("gofer is still running")
}
- c.GoferPid = 0
+
+ } else if err := syscall.Kill(c.GoferPid, 0); err == nil {
+ return fmt.Errorf("gofer is still running")
}
+ c.GoferPid = 0
return nil
}
return backoff.Retry(op, b)
@@ -816,6 +848,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund
}
log.Infof("Gofer started, PID: %d", cmd.Process.Pid)
c.GoferPid = cmd.Process.Pid
+ c.goferIsChild = true
return sandEnds, nil
}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 45a36e583..affb51fab 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -39,9 +39,6 @@ import (
"gvisor.googlesource.com/gvisor/runsc/test/testutil"
)
-// childReaper reaps child processes.
-var childReaper *testutil.Reaper
-
// waitForProcessList waits for the given process list to show up in the container.
func waitForProcessList(cont *Container, want []*control.Process) error {
cb := func() error {
@@ -75,6 +72,18 @@ func waitForProcessCount(cont *Container, want int) error {
return testutil.Poll(cb, 30*time.Second)
}
+func blockUntilWaitable(pid int) error {
+ _, _, err := testutil.RetryEintr(func() (uintptr, uintptr, error) {
+ var err error
+ _, _, err1 := syscall.Syscall6(syscall.SYS_WAITID, 1, uintptr(pid), 0, syscall.WEXITED|syscall.WNOWAIT, 0, 0)
+ if err1 != 0 {
+ err = err1
+ }
+ return 0, 0, err
+ })
+ return err
+}
+
// procListsEqual is used to check whether 2 Process lists are equal for all
// implemented fields.
func procListsEqual(got, want []*control.Process) bool {
@@ -256,6 +265,11 @@ func configs(opts ...configOption) []*boot.Config {
// It verifies after each step that the container can be loaded from disk, and
// has the correct status.
func TestLifecycle(t *testing.T) {
+ // Start the child reaper.
+ childReaper := &testutil.Reaper{}
+ childReaper.Start()
+ defer childReaper.Stop()
+
for _, conf := range configs(all...) {
t.Logf("Running test with conf: %+v", conf)
// The container will just sleep for a long time. We will kill it before
@@ -1505,10 +1519,7 @@ func TestGoferExits(t *testing.T) {
t.Fatalf("error killing sandbox process: %v", err)
}
- _, _, err = testutil.RetryEintr(func() (uintptr, uintptr, error) {
- cpid, err := syscall.Wait4(c.GoferPid, nil, 0, nil)
- return uintptr(cpid), 0, err
- })
+ err = blockUntilWaitable(c.GoferPid)
if err != nil && err != syscall.ECHILD {
t.Errorf("error waiting for gofer to exit: %v", err)
}
@@ -1576,10 +1587,6 @@ func TestUserLog(t *testing.T) {
}
func TestWaitOnExitedSandbox(t *testing.T) {
- // Disable the childReaper for this test.
- childReaper.Stop()
- defer childReaper.Start()
-
for _, conf := range configs(all...) {
t.Logf("Running test with conf: %+v", conf)
@@ -1712,10 +1719,5 @@ func TestMain(m *testing.M) {
}
testutil.RunAsRoot()
- // Start the child reaper.
- childReaper = &testutil.Reaper{}
- childReaper.Start()
- defer childReaper.Stop()
-
os.Exit(m.Run())
}
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index e431f5aec..6b3c41a9b 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -359,7 +359,7 @@ func TestMultiContainerSignal(t *testing.T) {
cpid, err := syscall.Wait4(goferPid, nil, 0, nil)
return uintptr(cpid), 0, err
})
- if err != nil && err != syscall.ECHILD {
+ if err != syscall.ECHILD {
t.Errorf("error waiting for gofer to exit: %v", err)
}
// Make sure process 1 is still running.
@@ -379,18 +379,12 @@ func TestMultiContainerSignal(t *testing.T) {
}
// Ensure that container's gofer and sandbox process are no more.
- _, _, err = testutil.RetryEintr(func() (uintptr, uintptr, error) {
- cpid, err := syscall.Wait4(containers[0].GoferPid, nil, 0, nil)
- return uintptr(cpid), 0, err
- })
+ err = blockUntilWaitable(containers[0].GoferPid)
if err != nil && err != syscall.ECHILD {
t.Errorf("error waiting for gofer to exit: %v", err)
}
- _, _, err = testutil.RetryEintr(func() (uintptr, uintptr, error) {
- cpid, err := syscall.Wait4(containers[0].Sandbox.Pid, nil, 0, nil)
- return uintptr(cpid), 0, err
- })
+ err = blockUntilWaitable(containers[0].Sandbox.Pid)
if err != nil && err != syscall.ECHILD {
t.Errorf("error waiting for sandbox to exit: %v", err)
}
@@ -399,6 +393,10 @@ func TestMultiContainerSignal(t *testing.T) {
if err := containers[0].SignalContainer(syscall.SIGKILL, false); err == nil {
t.Errorf("sandbox %q shouldn't exist, but we were able to signal it", containers[0].Sandbox.ID)
}
+
+ if err := containers[0].Destroy(); err != nil {
+ t.Errorf("failed to destroy container: %v", err)
+ }
}
}
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 9e95a11b4..fe55ddab8 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -62,6 +62,12 @@ type Sandbox struct {
// Cgroup has the cgroup configuration for the sandbox.
Cgroup *cgroup.Cgroup `json:"cgroup"`
+
+ // child is set if a sandbox process is a child of the current process.
+ //
+ // This field isn't saved to json, because only a creator of sandbox
+ // will have it as a child process.
+ child bool
}
// New creates the sandbox process. The caller must call Destroy() on the
@@ -70,7 +76,10 @@ func New(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSocke
s := &Sandbox{ID: id, Cgroup: cg}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
- c := specutils.MakeCleanup(func() { _ = s.destroy() })
+ c := specutils.MakeCleanup(func() {
+ err := s.destroy()
+ log.Warningf("error destroying sandbox: %v", err)
+ })
defer c.Clean()
// Create pipe to synchronize when sandbox process has been booted.
@@ -578,6 +587,7 @@ func (s *Sandbox) createSandboxProcess(spec *specs.Spec, conf *boot.Config, bund
if err := specutils.StartInNS(cmd, nss); err != nil {
return err
}
+ s.child = true
s.Pid = cmd.Process.Pid
log.Infof("Sandbox started, PID: %d", s.Pid)
@@ -666,11 +676,6 @@ func (s *Sandbox) destroy() error {
}
}
- if s.Cgroup != nil {
- if err := s.Cgroup.Uninstall(); err != nil {
- return err
- }
- }
if s.Chroot != "" {
if err := tearDownChroot(s.Chroot); err != nil {
return err
@@ -846,7 +851,18 @@ func (s *Sandbox) waitForStopped() error {
defer cancel()
b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
op := func() error {
- if s.IsRunning() {
+ if s.child && s.Pid != 0 {
+ // The sandbox process is a child of the current process,
+ // so we can wait it and collect its zombie.
+ wpid, err := syscall.Wait4(int(s.Pid), nil, syscall.WNOHANG, nil)
+ if err != nil {
+ return fmt.Errorf("error waiting the sandbox process: %v", err)
+ }
+ if wpid == 0 {
+ return fmt.Errorf("sandbox is still running")
+ }
+ s.Pid = 0
+ } else if s.IsRunning() {
return fmt.Errorf("sandbox is still running")
}
return nil