From b461be88a8036ca0455aceb8b6c723d6b6fded4f Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Fri, 2 Aug 2019 13:46:42 -0700 Subject: Stops container if gofer is killed Each gofer now has a goroutine that polls on the FDs used to communicate with the sandbox. The respective gofer is destroyed if any of the FDs is closed. Closes #601 PiperOrigin-RevId: 261383725 --- runsc/boot/BUILD | 1 + runsc/boot/controller.go | 2 +- runsc/boot/fs.go | 6 +-- runsc/boot/loader.go | 92 ++++++++++++++++++++++++++------- runsc/boot/loader_test.go | 4 +- runsc/boot/user_test.go | 2 +- runsc/container/BUILD | 1 + runsc/container/container_test.go | 2 +- runsc/container/multi_container_test.go | 86 ++++++++++++++++++++++++++++-- runsc/specutils/specutils.go | 11 ++++ runsc/test/testutil/testutil.go | 11 ---- 11 files changed, 174 insertions(+), 44 deletions(-) diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 5025401dd..588bb8851 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -88,6 +88,7 @@ go_library( "//runsc/specutils", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index f1e4a9ba4..0285f599d 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -347,7 +347,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { cm.l.k = k // Set up the restore environment. - mntr := newContainerMounter(cm.l.spec, "", cm.l.goferFDs, cm.l.k, cm.l.mountHints) + mntr := newContainerMounter(cm.l.spec, cm.l.goferFDs, cm.l.k, cm.l.mountHints) renv, err := mntr.createRestoreEnvironment(cm.l.conf) if err != nil { return fmt.Errorf("creating RestoreEnvironment: %v", err) diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 98ce40991..b6eeacf98 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -476,9 +476,6 @@ func (p *podMountHints) findMount(mount specs.Mount) *mountHint { } type containerMounter struct { - // cid is the container ID. May be set to empty for the root container. - cid string - root *specs.Root // mounts is the set of submounts for the container. It's a copy from the spec @@ -493,9 +490,8 @@ type containerMounter struct { hints *podMountHints } -func newContainerMounter(spec *specs.Spec, cid string, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter { +func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hints *podMountHints) *containerMounter { return &containerMounter{ - cid: cid, root: spec.Root, mounts: compileMounts(spec), fds: fdDispenser{fds: goferFDs}, diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 77e1aa456..434f1ca77 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -27,6 +27,7 @@ import ( gtime "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/log" @@ -524,11 +525,9 @@ func (l *Loader) run() error { // ours either way. l.rootProcArgs.FDTable = fdTable - // cid for root container can be empty. Only subcontainers need it to set - // the mount location. - mntr := newContainerMounter(l.spec, "", l.goferFDs, l.k, l.mountHints) - - // Setup the root container. + // Setup the root container file system. + l.startGoferMonitor(l.sandboxID, l.goferFDs) + mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints) if err := mntr.setupRootContainer(ctx, ctx, l.conf, func(mns *fs.MountNamespace) { l.rootProcArgs.MountNamespace = mns }); err != nil { @@ -687,7 +686,9 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file goferFDs = append(goferFDs, fd) } - mntr := newContainerMounter(spec, cid, goferFDs, l.k, l.mountHints) + // Setup the child container file system. + l.startGoferMonitor(cid, goferFDs) + mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints) if err := mntr.setupChildContainer(conf, &procArgs); err != nil { return fmt.Errorf("configuring container FS: %v", err) } @@ -710,17 +711,59 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file return nil } +// startGoferMonitor runs a goroutine to monitor gofer's health. It polls on +// the gofer FDs looking for disconnects, and destroys the container if a +// disconnect occurs in any of the gofer FDs. +func (l *Loader) startGoferMonitor(cid string, goferFDs []int) { + go func() { + log.Debugf("Monitoring gofer health for container %q", cid) + var events []unix.PollFd + for _, fd := range goferFDs { + events = append(events, unix.PollFd{ + Fd: int32(fd), + Events: unix.POLLHUP | unix.POLLRDHUP, + }) + } + _, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) { + // Use ppoll instead of poll because it's already whilelisted in seccomp. + n, err := unix.Ppoll(events, nil, nil) + return uintptr(n), 0, err + }) + if err != nil { + panic(fmt.Sprintf("Error monitoring gofer FDs: %v", err)) + } + + // Check if the gofer has stopped as part of normal container destruction. + // This is done just to avoid sending an annoying error message to the log. + // Note that there is a small race window in between mu.Unlock() and the + // lock being reacquired in destroyContainer(), but it's harmless to call + // destroyContainer() multiple times. + l.mu.Lock() + _, ok := l.processes[execID{cid: cid}] + l.mu.Unlock() + if ok { + log.Infof("Gofer socket disconnected, destroying container %q", cid) + if err := l.destroyContainer(cid); err != nil { + log.Warningf("Error destroying container %q after gofer stopped: %v", cid, err) + } + } + }() +} + // destroyContainer stops a container if it is still running and cleans up its // filesystem. func (l *Loader) destroyContainer(cid string) error { l.mu.Lock() defer l.mu.Unlock() - // Has the container started? - _, _, err := l.threadGroupFromIDLocked(execID{cid: cid}) + _, _, started, err := l.threadGroupFromIDLocked(execID{cid: cid}) + if err != nil { + // Container doesn't exist. + return err + } - // If the container has started, kill and wait for all processes. - if err == nil { + // The container exists, has it been started? + if started { if err := l.signalAllProcesses(cid, int32(linux.SIGKILL)); err != nil { return fmt.Errorf("sending SIGKILL to all container processes: %v", err) } @@ -754,9 +797,12 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { l.mu.Lock() defer l.mu.Unlock() - tg, _, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID}) + tg, _, started, err := l.threadGroupFromIDLocked(execID{cid: args.ContainerID}) if err != nil { - return 0, fmt.Errorf("no such container: %q", args.ContainerID) + return 0, err + } + if !started { + return 0, fmt.Errorf("container %q not started", args.ContainerID) } // Get the container MountNamespace from the Task. @@ -1018,22 +1064,30 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error { func (l *Loader) threadGroupFromID(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) { l.mu.Lock() defer l.mu.Unlock() - return l.threadGroupFromIDLocked(key) + tg, tty, ok, err := l.threadGroupFromIDLocked(key) + if err != nil { + return nil, nil, err + } + if !ok { + return nil, nil, fmt.Errorf("container %q not started", key.cid) + } + return tg, tty, nil } // threadGroupFromIDLocked returns the thread group and TTY for the given // execution ID. TTY may be nil if the process is not attached to a terminal. -// Returns error if execution ID is invalid or if container/process has not -// started yet. Caller must hold 'mu'. -func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, error) { +// Also returns a boolean indicating whether the container has already started. +// Returns error if execution ID is invalid or if the container cannot be +// found (maybe it has been deleted). Caller must hold 'mu'. +func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host.TTYFileOperations, bool, error) { ep := l.processes[key] if ep == nil { - return nil, nil, fmt.Errorf("container not found") + return nil, nil, false, fmt.Errorf("container %q not found", key.cid) } if ep.tg == nil { - return nil, nil, fmt.Errorf("container not started") + return nil, nil, false, nil } - return ep.tg, ep.tty, nil + return ep.tg, ep.tty, true, nil } func init() { diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index ff713660d..e0e32b9d5 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -408,7 +408,7 @@ func TestCreateMountNamespace(t *testing.T) { mns = m ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root()) } - mntr := newContainerMounter(&tc.spec, "", []int{sandEnd}, nil, &podMountHints{}) + mntr := newContainerMounter(&tc.spec, []int{sandEnd}, nil, &podMountHints{}) if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil { t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err) } @@ -614,7 +614,7 @@ func TestRestoreEnvironment(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { conf := testConfig() - mntr := newContainerMounter(tc.spec, "", tc.ioFDs, nil, &podMountHints{}) + mntr := newContainerMounter(tc.spec, tc.ioFDs, nil, &podMountHints{}) actualRenv, err := mntr.createRestoreEnvironment(conf) if !tc.errorExpected && err != nil { t.Fatalf("could not create restore environment for test:%s", tc.name) diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go index 834003430..01f666507 100644 --- a/runsc/boot/user_test.go +++ b/runsc/boot/user_test.go @@ -169,7 +169,7 @@ func TestGetExecUserHome(t *testing.T) { mns = m ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root()) } - mntr := newContainerMounter(spec, "", []int{sandEnd}, nil, &podMountHints{}) + mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{}) if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil { t.Fatalf("failed to create mount namespace: %v", err) } diff --git a/runsc/container/BUILD b/runsc/container/BUILD index e246c38ae..de8202bb1 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -49,6 +49,7 @@ go_test( "//pkg/abi/linux", "//pkg/log", "//pkg/sentry/control", + "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/unet", "//pkg/urpc", diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index ff68c586e..af128bf1c 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -76,7 +76,7 @@ func waitForProcessCount(cont *Container, want int) error { } func blockUntilWaitable(pid int) error { - _, _, err := testutil.RetryEintr(func() (uintptr, uintptr, error) { + _, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) { var err error _, _, err1 := syscall.Syscall6(syscall.SYS_WAITID, 1, uintptr(pid), 0, syscall.WEXITED|syscall.WNOWAIT, 0, 0) if err1 != 0 { diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 978a422f5..2ef065a15 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -29,6 +29,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/specutils" "gvisor.dev/gvisor/runsc/test/testutil" @@ -488,7 +489,7 @@ func TestMultiContainerSignal(t *testing.T) { if err := containers[1].Destroy(); err != nil { t.Errorf("failed to destroy container: %v", err) } - _, _, err = testutil.RetryEintr(func() (uintptr, uintptr, error) { + _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) { cpid, err := syscall.Wait4(goferPid, nil, 0, nil) return uintptr(cpid), 0, err }) @@ -905,9 +906,9 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) { } } -// TestMultiContainerGoferStop tests that IO operations continue to work after -// containers have been stopped and gofers killed. -func TestMultiContainerGoferStop(t *testing.T) { +// TestMultiContainerContainerDestroyStress tests that IO operations continue +// to work after containers have been stopped and gofers killed. +func TestMultiContainerContainerDestroyStress(t *testing.T) { app, err := testutil.FindFile("runsc/container/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) @@ -1345,3 +1346,80 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { } } } + +// Test that container is destroyed when Gofer is killed. +func TestMultiContainerGoferKilled(t *testing.T) { + sleep := []string{"sleep", "100"} + specs, ids := createSpecs(sleep, sleep, sleep) + conf := testutil.TestConfig() + containers, cleanup, err := startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Ensure container is running + c := containers[2] + expectedPL := []*control.Process{ + {PID: 3, Cmd: "sleep"}, + } + if err := waitForProcessList(c, expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + + // Kill container's gofer. + if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil { + t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err) + } + + // Wait until container stops. + if err := waitForProcessList(c, nil); err != nil { + t.Errorf("Container %q was not stopped after gofer death: %v", c.ID, err) + } + + // Check that container isn't running anymore. + args := &control.ExecArgs{Argv: []string{"/bin/true"}} + if _, err := c.executeSync(args); err == nil { + t.Fatalf("Container %q was not stopped after gofer death", c.ID) + } + + // Check that other containers are unaffected. + for i, c := range containers { + if i == 2 { + continue // container[2] has been killed. + } + pl := []*control.Process{ + {PID: kernel.ThreadID(i + 1), Cmd: "sleep"}, + } + if err := waitForProcessList(c, pl); err != nil { + t.Errorf("Container %q was affected by another container: %v", c.ID, err) + } + args := &control.ExecArgs{Argv: []string{"/bin/true"}} + if _, err := c.executeSync(args); err != nil { + t.Fatalf("Container %q was affected by another container: %v", c.ID, err) + } + } + + // Kill root container's gofer to bring entire sandbox down. + c = containers[0] + if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil { + t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err) + } + + // Wait until sandbox stops. waitForProcessList will loop until sandbox exits + // and RPC errors out. + impossiblePL := []*control.Process{ + {PID: 100, Cmd: "non-existent-process"}, + } + if err := waitForProcessList(c, impossiblePL); err == nil { + t.Fatalf("Sandbox was not killed after gofer death") + } + + // Check that entire sandbox isn't running anymore. + for _, c := range containers { + args := &control.ExecArgs{Argv: []string{"/bin/true"}} + if _, err := c.executeSync(args); err == nil { + t.Fatalf("Container %q was not stopped after gofer death", c.ID) + } + } +} diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 0b40e38a3..2eec92349 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -492,3 +492,14 @@ func (c *Cleanup) Clean() { func (c *Cleanup) Release() { c.clean = nil } + +// RetryEintr retries the function until an error different than EINTR is +// returned. +func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) { + for { + r1, r2, err := f() + if err != syscall.EINTR { + return r1, r2, err + } + } +} diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go index a98675bfc..406f6d08a 100644 --- a/runsc/test/testutil/testutil.go +++ b/runsc/test/testutil/testutil.go @@ -348,17 +348,6 @@ func StartReaper() func() { return r.Stop } -// RetryEintr retries the function until an error different than EINTR is -// returned. -func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) { - for { - r1, r2, err := f() - if err != syscall.EINTR { - return r1, r2, err - } - } -} - // WaitUntilRead reads from the given reader until the wanted string is found // or until timeout. func WaitUntilRead(r io.Reader, want string, split bufio.SplitFunc, timeout time.Duration) error { -- cgit v1.2.3