From b461be88a8036ca0455aceb8b6c723d6b6fded4f Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Fri, 2 Aug 2019 13:46:42 -0700
Subject: Stops container if gofer is killed

Each gofer now has a goroutine that polls on the FDs used
to communicate with the sandbox. The respective gofer is
destroyed if any of the FDs is closed.

Closes #601

PiperOrigin-RevId: 261383725
---
 runsc/boot/BUILD                        |  1 +
 runsc/boot/controller.go                |  2 +-
 runsc/boot/fs.go                        |  6 +--
 runsc/boot/loader.go                    | 92 ++++++++++++++++++++++++++-------
 runsc/boot/loader_test.go               |  4 +-
 runsc/boot/user_test.go                 |  2 +-
 runsc/container/BUILD                   |  1 +
 runsc/container/container_test.go       |  2 +-
 runsc/container/multi_container_test.go | 86 ++++++++++++++++++++++++++++--
 runsc/specutils/specutils.go            | 11 ++++
 runsc/test/testutil/testutil.go         | 11 ----
 11 files changed, 174 insertions(+), 44 deletions(-)

diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 5025401dd..588bb8851 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -88,6 +88,7 @@ go_library(
         "//runsc/specutils",
         "@com_github_golang_protobuf//proto:go_default_library",
         "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
     ],
 )
 
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index f1e4a9ba4..0285f599d 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -347,7 +347,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
 	cm.l.k = k
 
 	// Set up the restore environment.
-	mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k, cm.l.mountHints)
+	mntr := newContainerMounter(cm.l.spec, cm.l.goferFDs, cm.l.k, cm.l.mountHints)
 	renv, err := mntr.createRestoreEnvironment(cm.l.conf)
 	if err != nil {
 		return fmt.Errorf("creating RestoreEnvironment: %v", err)
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 98ce40991..b6eeacf98 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -476,9 +476,6 @@ func (p *podMountHints) findMount(mount specs.Mount) *mountHint {
 }
 
 type containerMounter struct {
-	// cid is the container ID. May be set to empty for the root container.
-	cid string
-
 	root *specs.Root
 
 	// mounts is the set of submounts for the container. It's a copy from the spec
@@ -493,9 +490,8 @@ type containerMounter struct {
 	hints *podMountHints
 }
 
-func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter {
+func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter {
 	return &containerMounter{
-		cid:    cid,
 		root:   spec.Root,
 		mounts: compileMounts(spec),
 		fds:    fdDispenser{fds: goferFDs},
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 77e1aa456..434f1ca77 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -27,6 +27,7 @@ import (
 	gtime "time"
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/cpuid"
 	"gvisor.dev/gvisor/pkg/log"
@@ -524,11 +525,9 @@ func (l *Loader) run() error {
 		// ours either way.
 		l.rootProcArgs.FDTable = fdTable
 
-		// cid for root container can be empty. Only subcontainers need it to set
-		// the mount location.
-		mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints)
-
-		// Setup the root container.
+		// Setup the root container file system.
+		l.startGoferMonitor(l.sandboxID, l.goferFDs)
+		mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints)
 		if err := mntr.setupRootContainer(ctx, ctx, l.conf, func(mns *fs.MountNamespace) {
 			l.rootProcArgs.MountNamespace = mns
 		}); err != nil {
@@ -687,7 +686,9 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
 		goferFDs = append(goferFDs, fd)
 	}
 
-	mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints)
+	// Setup the child container file system.
+	l.startGoferMonitor(cid, goferFDs)
+	mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints)
 	if err := mntr.setupChildContainer(conf, &procArgs); err != nil {
 		return fmt.Errorf("configuring container FS: %v", err)
 	}
@@ -710,17 +711,59 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
 	return nil
 }
 
+// startGoferMonitor runs a goroutine to monitor gofer's health. It polls on
+// the gofer FDs looking for disconnects, and destroys the container if a
+// disconnect occurs in any of the gofer FDs.
+func (l *Loader) startGoferMonitor(cid string, goferFDs []int) {
+	go func() {
+		log.Debugf("Monitoring gofer health for container %q", cid)
+		var events []unix.PollFd
+		for _, fd := range goferFDs {
+			events = append(events, unix.PollFd{
+				Fd:     int32(fd),
+				Events: unix.POLLHUP | unix.POLLRDHUP,
+			})
+		}
+		_, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) {
+			// Use ppoll instead of poll because it's already whilelisted in seccomp.
+			n, err := unix.Ppoll(events, nil, nil)
+			return uintptr(n), 0, err
+		})
+		if err != nil {
+			panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err))
+		}
+
+		// Check if the gofer has stopped as part of normal container destruction.
+		// This is done just to avoid sending an annoying error message to the log.
+		// Note that there is a small race window in between mu.Unlock() and the
+		// lock being reacquired in destroyContainer(), but it's harmless to call
+		// destroyContainer() multiple times.
+		l.mu.Lock()
+		_, ok := l.processes[execID{cid: cid}]
+		l.mu.Unlock()
+		if ok {
+			log.Infof("Gofer socket disconnected, destroying container %q", cid)
+			if err := l.destroyContainer(cid); err != nil {
+				log.Warningf("Error destroying container %q after gofer stopped: %v", cid, err)
+			}
+		}
+	}()
+}
+
 // destroyContainer stops a container if it is still running and cleans up its
 // filesystem.
 func (l *Loader) destroyContainer(cid string) error {
 	l.mu.Lock()
 	defer l.mu.Unlock()
 
-	// Has the container started?
-	_, _, err := l.threadGroupFromIDLocked(execID{cid: cid})
+	_, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid})
+	if err != nil {
+		// Container doesn't exist.
+		return err
+	}
 
-	// If the container has started, kill and wait for all processes.
-	if err == nil {
+	// The container exists, has it been started?
+	if started {
 		if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil {
 			return fmt.Errorf("sending SIGKILL to all container processes: %v", err)
 		}
@@ -754,9 +797,12 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
 	l.mu.Lock()
 	defer l.mu.Unlock()
 
-	tg, _, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID})
+	tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID})
 	if err != nil {
-		return 0, fmt.Errorf("no such container: %q", args.ContainerID)
+		return 0, err
+	}
+	if !started {
+		return 0, fmt.Errorf("container %q not started", args.ContainerID)
 	}
 
 	// Get the container MountNamespace from the Task.
@@ -1018,22 +1064,30 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
 func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) {
 	l.mu.Lock()
 	defer l.mu.Unlock()
-	return l.threadGroupFromIDLocked(key)
+	tg, tty, ok, err := l.threadGroupFromIDLocked(key)
+	if err != nil {
+		return nil, nil, err
+	}
+	if !ok {
+		return nil, nil, fmt.Errorf("container %q not started", key.cid)
+	}
+	return tg, tty, nil
 }
 
 // threadGroupFromIDLocked returns the thread group and TTY for the given
 // execution ID. TTY may be nil if the process is not attached to a terminal.
-// Returns error if execution ID is invalid or if container/process has not
-// started yet. Caller must hold 'mu'.
-func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) {
+// Also returns a boolean indicating whether the container has already started.
+// Returns error if execution ID is invalid or if the container cannot be
+// found (maybe it has been deleted). Caller must hold 'mu'.
+func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) {
 	ep := l.processes[key]
 	if ep == nil {
-		return nil, nil, fmt.Errorf("container not found")
+		return nil, nil, false, fmt.Errorf("container %q not found", key.cid)
 	}
 	if ep.tg == nil {
-		return nil, nil, fmt.Errorf("container not started")
+		return nil, nil, false, nil
 	}
-	return ep.tg, ep.tty, nil
+	return ep.tg, ep.tty, true, nil
 }
 
 func init() {
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index ff713660d..e0e32b9d5 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -408,7 +408,7 @@ func TestCreateMountNamespace(t *testing.T) {
 				mns = m
 				ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
 			}
-			mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil, &podMountHints{})
+			mntr := newContainerMounter(&tc.spec, []int{sandEnd}, nil, &podMountHints{})
 			if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
 				t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err)
 			}
@@ -614,7 +614,7 @@ func TestRestoreEnvironment(t *testing.T) {
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
 			conf := testConfig()
-			mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil, &podMountHints{})
+			mntr := newContainerMounter(tc.spec, tc.ioFDs, nil, &podMountHints{})
 			actualRenv, err := mntr.createRestoreEnvironment(conf)
 			if !tc.errorExpected && err != nil {
 				t.Fatalf("could not create restore environment for test:%s", tc.name)
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
index 834003430..01f666507 100644
--- a/runsc/boot/user_test.go
+++ b/runsc/boot/user_test.go
@@ -169,7 +169,7 @@ func TestGetExecUserHome(t *testing.T) {
 				mns = m
 				ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
 			}
-			mntr := newContainerMounter(spec, "", []int{sandEnd}, nil, &podMountHints{})
+			mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{})
 			if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil {
 				t.Fatalf("failed to create mount namespace: %v", err)
 			}
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index e246c38ae..de8202bb1 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -49,6 +49,7 @@ go_test(
         "//pkg/abi/linux",
         "//pkg/log",
         "//pkg/sentry/control",
+        "//pkg/sentry/kernel",
         "//pkg/sentry/kernel/auth",
         "//pkg/unet",
         "//pkg/urpc",
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index ff68c586e..af128bf1c 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -76,7 +76,7 @@ func waitForProcessCount(cont *Container, want int) error {
 }
 
 func blockUntilWaitable(pid int) error {
-	_, _, err := testutil.RetryEintr(func() (uintptr, uintptr, error) {
+	_, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) {
 		var err error
 		_, _, err1 := syscall.Syscall6(syscall.SYS_WAITID, 1, uintptr(pid), 0, syscall.WEXITED|syscall.WNOWAIT, 0, 0)
 		if err1 != 0 {
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 978a422f5..2ef065a15 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -29,6 +29,7 @@ import (
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/sentry/control"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/specutils"
 	"gvisor.dev/gvisor/runsc/test/testutil"
@@ -488,7 +489,7 @@ func TestMultiContainerSignal(t *testing.T) {
 		if err := containers[1].Destroy(); err != nil {
 			t.Errorf("failed to destroy container: %v", err)
 		}
-		_, _, err = testutil.RetryEintr(func() (uintptr, uintptr, error) {
+		_, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) {
 			cpid, err := syscall.Wait4(goferPid, nil, 0, nil)
 			return uintptr(cpid), 0, err
 		})
@@ -905,9 +906,9 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) {
 	}
 }
 
-// TestMultiContainerGoferStop tests that IO operations continue to work after
-// containers have been stopped and gofers killed.
-func TestMultiContainerGoferStop(t *testing.T) {
+// TestMultiContainerContainerDestroyStress tests that IO operations continue
+// to work after containers have been stopped and gofers killed.
+func TestMultiContainerContainerDestroyStress(t *testing.T) {
 	app, err := testutil.FindFile("runsc/container/test_app/test_app")
 	if err != nil {
 		t.Fatal("error finding test_app:", err)
@@ -1345,3 +1346,80 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) {
 		}
 	}
 }
+
+// Test that container is destroyed when Gofer is killed.
+func TestMultiContainerGoferKilled(t *testing.T) {
+	sleep := []string{"sleep", "100"}
+	specs, ids := createSpecs(sleep, sleep, sleep)
+	conf := testutil.TestConfig()
+	containers, cleanup, err := startContainers(conf, specs, ids)
+	if err != nil {
+		t.Fatalf("error starting containers: %v", err)
+	}
+	defer cleanup()
+
+	// Ensure container is running
+	c := containers[2]
+	expectedPL := []*control.Process{
+		{PID: 3, Cmd: "sleep"},
+	}
+	if err := waitForProcessList(c, expectedPL); err != nil {
+		t.Errorf("failed to wait for sleep to start: %v", err)
+	}
+
+	// Kill container's gofer.
+	if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil {
+		t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err)
+	}
+
+	// Wait until container stops.
+	if err := waitForProcessList(c, nil); err != nil {
+		t.Errorf("Container %q was not stopped after gofer death: %v", c.ID, err)
+	}
+
+	// Check that container isn't running anymore.
+	args := &control.ExecArgs{Argv: []string{"/bin/true"}}
+	if _, err := c.executeSync(args); err == nil {
+		t.Fatalf("Container %q was not stopped after gofer death", c.ID)
+	}
+
+	// Check that other containers are unaffected.
+	for i, c := range containers {
+		if i == 2 {
+			continue // container[2] has been killed.
+		}
+		pl := []*control.Process{
+			{PID: kernel.ThreadID(i + 1), Cmd: "sleep"},
+		}
+		if err := waitForProcessList(c, pl); err != nil {
+			t.Errorf("Container %q was affected by another container: %v", c.ID, err)
+		}
+		args := &control.ExecArgs{Argv: []string{"/bin/true"}}
+		if _, err := c.executeSync(args); err != nil {
+			t.Fatalf("Container %q was affected by another container: %v", c.ID, err)
+		}
+	}
+
+	// Kill root container's gofer to bring entire sandbox down.
+	c = containers[0]
+	if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil {
+		t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err)
+	}
+
+	// Wait until sandbox stops. waitForProcessList will loop until sandbox exits
+	// and RPC errors out.
+	impossiblePL := []*control.Process{
+		{PID: 100, Cmd: "non-existent-process"},
+	}
+	if err := waitForProcessList(c, impossiblePL); err == nil {
+		t.Fatalf("Sandbox was not killed after gofer death")
+	}
+
+	// Check that entire sandbox isn't running anymore.
+	for _, c := range containers {
+		args := &control.ExecArgs{Argv: []string{"/bin/true"}}
+		if _, err := c.executeSync(args); err == nil {
+			t.Fatalf("Container %q was not stopped after gofer death", c.ID)
+		}
+	}
+}
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 0b40e38a3..2eec92349 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -492,3 +492,14 @@ func (c *Cleanup) Clean() {
 func (c *Cleanup) Release() {
 	c.clean = nil
 }
+
+// RetryEintr retries the function until an error different than EINTR is
+// returned.
+func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) {
+	for {
+		r1, r2, err := f()
+		if err != syscall.EINTR {
+			return r1, r2, err
+		}
+	}
+}
diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go
index a98675bfc..406f6d08a 100644
--- a/runsc/test/testutil/testutil.go
+++ b/runsc/test/testutil/testutil.go
@@ -348,17 +348,6 @@ func StartReaper() func() {
 	return r.Stop
 }
 
-// RetryEintr retries the function until an error different than EINTR is
-// returned.
-func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) {
-	for {
-		r1, r2, err := f()
-		if err != syscall.EINTR {
-			return r1, r2, err
-		}
-	}
-}
-
 // WaitUntilRead reads from the given reader until the wanted string is found
 // or until timeout.
 func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error {
-- 
cgit v1.2.3