From 915d76aa924c08b1fcb80a58e3caa24529a23d04 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 19 Sep 2018 18:52:53 -0700 Subject: Add container.Destroy urpc method. This method will: 1. Stop the container process if it is still running. 2. Unmount all sanadbox-internal mounts for the container. 3. Delete the contaner root directory inside the sandbox. Destroy is idempotent, and safe to call concurrantly. This fixes a bug where after stopping a container, we cannot unmount the container root directory on the host. This bug occured because the sandbox dirent cache was holding a dirent with a host fd corresponding to a file inside the container root on the host. The dirent cache did not know that the container had exited, and kept the FD open, preventing us from unmounting on the host. Now that we unmount (and flush) all container mounts inside the sandbox, any host FDs donated by the gofer will be closed, and we can unmount the container root on the host. PiperOrigin-RevId: 213737693 Change-Id: I28c0ff4cd19a08014cdd72fec5154497e92aacc9 --- runsc/boot/controller.go | 96 +++++++++++++++++++++++++++++++-- runsc/boot/fs.go | 8 +-- runsc/container/container.go | 13 ++--- runsc/container/multi_container_test.go | 70 ++++++++++++++++++++++++ runsc/sandbox/sandbox.go | 20 +++++++ 5 files changed, 190 insertions(+), 17 deletions(-) (limited to 'runsc') diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index f5b0d371c..b4594c8b0 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -21,8 +21,10 @@ import ( "path" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/control/server" "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" @@ -30,6 +32,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/state" "gvisor.googlesource.com/gvisor/pkg/sentry/time" "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" + "gvisor.googlesource.com/gvisor/pkg/syserror" "gvisor.googlesource.com/gvisor/pkg/urpc" ) @@ -37,6 +40,10 @@ const ( // ContainerCheckpoint checkpoints a container. ContainerCheckpoint = "containerManager.Checkpoint" + // ContainerDestroy is used to stop a non-root container and free all + // associated resources in the sandbox. + ContainerDestroy = "containerManager.Destroy" + // ContainerEvent is the URPC endpoint for getting stats about the // container used by "runsc events". ContainerEvent = "containerManager.Event" @@ -58,9 +65,6 @@ const ( // ContainerResume unpauses the paused container. ContainerResume = "containerManager.Resume" - // ContainerWaitForLoader blocks until the container's loader has been created. - ContainerWaitForLoader = "containerManager.WaitForLoader" - // ContainerSignal is used to send a signal to a container. ContainerSignal = "containerManager.Signal" @@ -72,6 +76,9 @@ const ( // and return its ExitStatus. ContainerWait = "containerManager.Wait" + // ContainerWaitForLoader blocks until the container's loader has been created. + ContainerWaitForLoader = "containerManager.WaitForLoader" + // ContainerWaitPID is used to wait on a process with a certain PID in // the sandbox and return its ExitStatus. ContainerWaitPID = "containerManager.WaitPID" @@ -228,6 +235,89 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { return nil } +// Destroy stops a container if it is still running and cleans up its +// filesystem. +func (cm *containerManager) Destroy(cid *string, _ *struct{}) error { + log.Debugf("containerManager.destroy %q", *cid) + cm.l.mu.Lock() + defer cm.l.mu.Unlock() + + if tg, ok := cm.l.containerRootTGs[*cid]; ok { + // Send SIGKILL to threadgroup. + if err := tg.SendSignal(&arch.SignalInfo{ + Signo: int32(linux.SIGKILL), + Code: arch.SignalInfoUser, + }); err == nil { + // SIGKILL sent. Now wait for it to exit. + log.Debugf("Waiting for container process to exit.") + tg.WaitExited() + log.Debugf("Container process exited.") + } else if err != syserror.ESRCH { + return fmt.Errorf("error sending SIGKILL to container %q: %v", *cid, err) + } + + // Remove the container thread group from the map. + delete(cm.l.containerRootTGs, *cid) + } + + // Clean up the filesystem by unmounting all mounts for this container + // and deleting the container root directory. + + // First get a reference to the container root directory. + mns := cm.l.k.RootMountNamespace() + mnsRoot := mns.Root() + defer mnsRoot.DecRef() + ctx := cm.l.rootProcArgs.NewContext(cm.l.k) + containerRoot := path.Join(ChildContainersDir, *cid) + containerRootDirent, err := mns.FindInode(ctx, mnsRoot, nil, containerRoot, linux.MaxSymlinkTraversals) + if err == syserror.ENOENT { + // Container must have been destroyed already. That's fine. + return nil + } + if err != nil { + return fmt.Errorf("error finding container root directory %q: %v", containerRoot, err) + } + defer containerRootDirent.DecRef() + + // Iterate through all submounts and unmount them. We unmount lazily by + // setting detach=true, so we can unmount in any order. + for _, m := range containerRootDirent.Inode.MountSource.Submounts() { + root := m.Root() + defer root.DecRef() + + // Do a best-effort unmount by flushing the refs and unmount + // with "detach only = true". + log.Debugf("Unmounting container submount %q", root.BaseName()) + m.FlushDirentRefs() + if err := mns.Unmount(ctx, root, true /* detach only */); err != nil { + return fmt.Errorf("error unmounting container submount %q: %v", root.BaseName(), err) + } + } + + // Unmount the container root itself. + log.Debugf("Unmounting container root %q", containerRoot) + containerRootDirent.Inode.MountSource.FlushDirentRefs() + if err := mns.Unmount(ctx, containerRootDirent, true /* detach only */); err != nil { + return fmt.Errorf("error unmounting container root mount %q: %v", containerRootDirent.BaseName(), err) + } + + // Get a reference to the parent directory and remove the root + // container directory. + containersDirDirent, err := mns.FindInode(ctx, mnsRoot, nil, ChildContainersDir, linux.MaxSymlinkTraversals) + if err != nil { + return fmt.Errorf("error finding containers directory %q: %v", ChildContainersDir, err) + } + defer containersDirDirent.DecRef() + log.Debugf("Deleting container root %q", containerRoot) + if err := containersDirDirent.RemoveDirectory(ctx, mnsRoot, *cid); err != nil { + return fmt.Errorf("error removing directory %q: %v", containerRoot, err) + } + + // We made it! + log.Debugf("Destroyed container %q", *cid) + return nil +} + // ExecArgs contains arguments to Execute. type ExecArgs struct { control.ExecArgs diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 59ae5faae..110f67de8 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -49,9 +49,9 @@ const ( // Device name for root mount. rootDevice = "9pfs-/" - // childContainersDir is the directory where child container root + // ChildContainersDir is the directory where child container root // filesystems are mounted. - childContainersDir = "/__runsc_containers__" + ChildContainersDir = "/__runsc_containers__" // Filesystems that runsc supports. bind = "bind" @@ -89,7 +89,7 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec // each child container. mounts = append(mounts, specs.Mount{ Type: tmpfs, - Destination: childContainersDir, + Destination: ChildContainersDir, }) } fds := &fdDispenser{fds: ioFDs} @@ -639,7 +639,7 @@ func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spe // Make directories for submounts within the container. rootDir := mns.Root() defer rootDir.DecRef() - containerRoot := filepath.Join(childContainersDir, cid) + containerRoot := filepath.Join(ChildContainersDir, cid) mkdirAll(ctx, mns, containerRoot) // Mount the container's root filesystem to the newly created diff --git a/runsc/container/container.go b/runsc/container/container.go index 3be88066c..a2582611a 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -544,16 +544,9 @@ func (c *Container) save() error { // to stop. If any of them doesn't stop before timeout, an error is returned. func (c *Container) stop() error { if c.Sandbox != nil && c.Sandbox.IsRunning() { - log.Debugf("Killing container %q", c.ID) - if c.Sandbox.IsRootContainer(c.ID) { - if err := c.Sandbox.Destroy(); err != nil { - return fmt.Errorf("error destroying sandbox %q: %v", c.Sandbox.ID, err) - } - } else { - if err := c.Signal(syscall.SIGKILL); err != nil { - // The container may already be stopped, log the error. - log.Warningf("Error sending signal %d to container %q: %v", syscall.SIGKILL, c.ID, err) - } + log.Debugf("Destroying container %q", c.ID) + if err := c.Sandbox.DestroyContainer(c.ID); err != nil { + return fmt.Errorf("error destroying container %q: %v", c.ID, err) } } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index d6418efb6..0df587e30 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -17,6 +17,7 @@ package container import ( "io/ioutil" "os" + "path" "path/filepath" "strings" "sync" @@ -25,6 +26,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.googlesource.com/gvisor/pkg/sentry/control" + "gvisor.googlesource.com/gvisor/runsc/boot" "gvisor.googlesource.com/gvisor/runsc/specutils" "gvisor.googlesource.com/gvisor/runsc/test/testutil" ) @@ -431,3 +433,71 @@ func TestMultiContainerSignal(t *testing.T) { } } } + +// TestMultiContainerDestroy checks that container are properly cleaned-up when +// they are destroyed. +func TestMultiContainerDestroy(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + rootDir, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer os.RemoveAll(rootDir) + + // Two containers that will run for a long time. We will + // destroy the second one. + specs, ids := createSpecs([]string{"sleep", "100"}, []string{"sleep", "100"}) + + // Setup the containers. + var containers []*Container + for i, spec := range specs { + conf := testutil.TestConfig() + bundleDir, err := testutil.SetupContainerInRoot(rootDir, spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(bundleDir) + cont, err := Create(ids[i], spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + containers = append(containers, cont) + } + + // Exec in the root container to check for the existence of the + // second containers root filesystem directory. + contDir := path.Join(boot.ChildContainersDir, containers[1].ID) + args := &control.ExecArgs{ + Filename: "/usr/bin/test", + Argv: []string{"test", "-d", contDir}, + } + if ws, err := containers[0].executeSync(args); err != nil { + t.Fatalf("error executing %+v: %v", args, err) + } else if ws.ExitStatus() != 0 { + t.Errorf("exec 'test -f %q' got exit status %d, wanted 0", contDir, ws.ExitStatus()) + } + + // Destory the second container. + if err := containers[1].Destroy(); err != nil { + t.Fatalf("error destroying container: %v", err) + } + + // Now the container dir should be gone. + if ws, err := containers[0].executeSync(args); err != nil { + t.Fatalf("error executing %+v: %v", args, err) + } else if ws.ExitStatus() == 0 { + t.Errorf("exec 'test -f %q' got exit status 0, wanted non-zero", contDir) + } + + // Check that cont.Destroy is safe to call multiple times. + if err := containers[1].Destroy(); err != nil { + t.Errorf("error destroying container: %v", err) + } + } +} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index f58d111bf..75739255d 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -666,6 +666,26 @@ func (s *Sandbox) Stacks() (string, error) { return stacks, nil } +// DestroyContainer destroys the given container. If it is the root container, +// then the entire sandbox is destroyed. +func (s *Sandbox) DestroyContainer(cid string) error { + if s.IsRootContainer(cid) { + log.Debugf("Destroying root container %q by destroying sandbox", cid) + return s.Destroy() + } + + log.Debugf("Destroying container %q in sandbox %q", cid, s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + if err := conn.Call(boot.ContainerDestroy, &cid, nil); err != nil { + return fmt.Errorf("error destroying container %q: %v", cid, err) + } + return nil +} + func (s *Sandbox) waitForStopped() error { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() -- cgit v1.2.3