diff options
-rw-r--r-- | runsc/boot/controller.go | 96 | ||||
-rw-r--r-- | runsc/boot/fs.go | 8 | ||||
-rw-r--r-- | runsc/container/container.go | 13 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 70 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 20 |
5 files changed, 190 insertions, 17 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index f5b0d371c..b4594c8b0 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -21,8 +21,10 @@ import ( "path" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/control/server" "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" @@ -30,6 +32,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/state" "gvisor.googlesource.com/gvisor/pkg/sentry/time" "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" + "gvisor.googlesource.com/gvisor/pkg/syserror" "gvisor.googlesource.com/gvisor/pkg/urpc" ) @@ -37,6 +40,10 @@ const ( // ContainerCheckpoint checkpoints a container. ContainerCheckpoint = "containerManager.Checkpoint" + // ContainerDestroy is used to stop a non-root container and free all + // associated resources in the sandbox. + ContainerDestroy = "containerManager.Destroy" + // ContainerEvent is the URPC endpoint for getting stats about the // container used by "runsc events". ContainerEvent = "containerManager.Event" @@ -58,9 +65,6 @@ const ( // ContainerResume unpauses the paused container. ContainerResume = "containerManager.Resume" - // ContainerWaitForLoader blocks until the container's loader has been created. - ContainerWaitForLoader = "containerManager.WaitForLoader" - // ContainerSignal is used to send a signal to a container. ContainerSignal = "containerManager.Signal" @@ -72,6 +76,9 @@ const ( // and return its ExitStatus. ContainerWait = "containerManager.Wait" + // ContainerWaitForLoader blocks until the container's loader has been created. + ContainerWaitForLoader = "containerManager.WaitForLoader" + // ContainerWaitPID is used to wait on a process with a certain PID in // the sandbox and return its ExitStatus. ContainerWaitPID = "containerManager.WaitPID" @@ -228,6 +235,89 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { return nil } +// Destroy stops a container if it is still running and cleans up its +// filesystem. +func (cm *containerManager) Destroy(cid *string, _ *struct{}) error { + log.Debugf("containerManager.destroy %q", *cid) + cm.l.mu.Lock() + defer cm.l.mu.Unlock() + + if tg, ok := cm.l.containerRootTGs[*cid]; ok { + // Send SIGKILL to threadgroup. + if err := tg.SendSignal(&arch.SignalInfo{ + Signo: int32(linux.SIGKILL), + Code: arch.SignalInfoUser, + }); err == nil { + // SIGKILL sent. Now wait for it to exit. + log.Debugf("Waiting for container process to exit.") + tg.WaitExited() + log.Debugf("Container process exited.") + } else if err != syserror.ESRCH { + return fmt.Errorf("error sending SIGKILL to container %q: %v", *cid, err) + } + + // Remove the container thread group from the map. + delete(cm.l.containerRootTGs, *cid) + } + + // Clean up the filesystem by unmounting all mounts for this container + // and deleting the container root directory. + + // First get a reference to the container root directory. + mns := cm.l.k.RootMountNamespace() + mnsRoot := mns.Root() + defer mnsRoot.DecRef() + ctx := cm.l.rootProcArgs.NewContext(cm.l.k) + containerRoot := path.Join(ChildContainersDir, *cid) + containerRootDirent, err := mns.FindInode(ctx, mnsRoot, nil, containerRoot, linux.MaxSymlinkTraversals) + if err == syserror.ENOENT { + // Container must have been destroyed already. That's fine. + return nil + } + if err != nil { + return fmt.Errorf("error finding container root directory %q: %v", containerRoot, err) + } + defer containerRootDirent.DecRef() + + // Iterate through all submounts and unmount them. We unmount lazily by + // setting detach=true, so we can unmount in any order. + for _, m := range containerRootDirent.Inode.MountSource.Submounts() { + root := m.Root() + defer root.DecRef() + + // Do a best-effort unmount by flushing the refs and unmount + // with "detach only = true". + log.Debugf("Unmounting container submount %q", root.BaseName()) + m.FlushDirentRefs() + if err := mns.Unmount(ctx, root, true /* detach only */); err != nil { + return fmt.Errorf("error unmounting container submount %q: %v", root.BaseName(), err) + } + } + + // Unmount the container root itself. + log.Debugf("Unmounting container root %q", containerRoot) + containerRootDirent.Inode.MountSource.FlushDirentRefs() + if err := mns.Unmount(ctx, containerRootDirent, true /* detach only */); err != nil { + return fmt.Errorf("error unmounting container root mount %q: %v", containerRootDirent.BaseName(), err) + } + + // Get a reference to the parent directory and remove the root + // container directory. + containersDirDirent, err := mns.FindInode(ctx, mnsRoot, nil, ChildContainersDir, linux.MaxSymlinkTraversals) + if err != nil { + return fmt.Errorf("error finding containers directory %q: %v", ChildContainersDir, err) + } + defer containersDirDirent.DecRef() + log.Debugf("Deleting container root %q", containerRoot) + if err := containersDirDirent.RemoveDirectory(ctx, mnsRoot, *cid); err != nil { + return fmt.Errorf("error removing directory %q: %v", containerRoot, err) + } + + // We made it! + log.Debugf("Destroyed container %q", *cid) + return nil +} + // ExecArgs contains arguments to Execute. type ExecArgs struct { control.ExecArgs diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 59ae5faae..110f67de8 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -49,9 +49,9 @@ const ( // Device name for root mount. rootDevice = "9pfs-/" - // childContainersDir is the directory where child container root + // ChildContainersDir is the directory where child container root // filesystems are mounted. - childContainersDir = "/__runsc_containers__" + ChildContainersDir = "/__runsc_containers__" // Filesystems that runsc supports. bind = "bind" @@ -89,7 +89,7 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec // each child container. mounts = append(mounts, specs.Mount{ Type: tmpfs, - Destination: childContainersDir, + Destination: ChildContainersDir, }) } fds := &fdDispenser{fds: ioFDs} @@ -639,7 +639,7 @@ func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spe // Make directories for submounts within the container. rootDir := mns.Root() defer rootDir.DecRef() - containerRoot := filepath.Join(childContainersDir, cid) + containerRoot := filepath.Join(ChildContainersDir, cid) mkdirAll(ctx, mns, containerRoot) // Mount the container's root filesystem to the newly created diff --git a/runsc/container/container.go b/runsc/container/container.go index 3be88066c..a2582611a 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -544,16 +544,9 @@ func (c *Container) save() error { // to stop. If any of them doesn't stop before timeout, an error is returned. func (c *Container) stop() error { if c.Sandbox != nil && c.Sandbox.IsRunning() { - log.Debugf("Killing container %q", c.ID) - if c.Sandbox.IsRootContainer(c.ID) { - if err := c.Sandbox.Destroy(); err != nil { - return fmt.Errorf("error destroying sandbox %q: %v", c.Sandbox.ID, err) - } - } else { - if err := c.Signal(syscall.SIGKILL); err != nil { - // The container may already be stopped, log the error. - log.Warningf("Error sending signal %d to container %q: %v", syscall.SIGKILL, c.ID, err) - } + log.Debugf("Destroying container %q", c.ID) + if err := c.Sandbox.DestroyContainer(c.ID); err != nil { + return fmt.Errorf("error destroying container %q: %v", c.ID, err) } } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index d6418efb6..0df587e30 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -17,6 +17,7 @@ package container import ( "io/ioutil" "os" + "path" "path/filepath" "strings" "sync" @@ -25,6 +26,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.googlesource.com/gvisor/pkg/sentry/control" + "gvisor.googlesource.com/gvisor/runsc/boot" "gvisor.googlesource.com/gvisor/runsc/specutils" "gvisor.googlesource.com/gvisor/runsc/test/testutil" ) @@ -431,3 +433,71 @@ func TestMultiContainerSignal(t *testing.T) { } } } + +// TestMultiContainerDestroy checks that container are properly cleaned-up when +// they are destroyed. +func TestMultiContainerDestroy(t *testing.T) { + for _, conf := range configs(all...) { + t.Logf("Running test with conf: %+v", conf) + + rootDir, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer os.RemoveAll(rootDir) + + // Two containers that will run for a long time. We will + // destroy the second one. + specs, ids := createSpecs([]string{"sleep", "100"}, []string{"sleep", "100"}) + + // Setup the containers. + var containers []*Container + for i, spec := range specs { + conf := testutil.TestConfig() + bundleDir, err := testutil.SetupContainerInRoot(rootDir, spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(bundleDir) + cont, err := Create(ids[i], spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + containers = append(containers, cont) + } + + // Exec in the root container to check for the existence of the + // second containers root filesystem directory. + contDir := path.Join(boot.ChildContainersDir, containers[1].ID) + args := &control.ExecArgs{ + Filename: "/usr/bin/test", + Argv: []string{"test", "-d", contDir}, + } + if ws, err := containers[0].executeSync(args); err != nil { + t.Fatalf("error executing %+v: %v", args, err) + } else if ws.ExitStatus() != 0 { + t.Errorf("exec 'test -f %q' got exit status %d, wanted 0", contDir, ws.ExitStatus()) + } + + // Destory the second container. + if err := containers[1].Destroy(); err != nil { + t.Fatalf("error destroying container: %v", err) + } + + // Now the container dir should be gone. + if ws, err := containers[0].executeSync(args); err != nil { + t.Fatalf("error executing %+v: %v", args, err) + } else if ws.ExitStatus() == 0 { + t.Errorf("exec 'test -f %q' got exit status 0, wanted non-zero", contDir) + } + + // Check that cont.Destroy is safe to call multiple times. + if err := containers[1].Destroy(); err != nil { + t.Errorf("error destroying container: %v", err) + } + } +} diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index f58d111bf..75739255d 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -666,6 +666,26 @@ func (s *Sandbox) Stacks() (string, error) { return stacks, nil } +// DestroyContainer destroys the given container. If it is the root container, +// then the entire sandbox is destroyed. +func (s *Sandbox) DestroyContainer(cid string) error { + if s.IsRootContainer(cid) { + log.Debugf("Destroying root container %q by destroying sandbox", cid) + return s.Destroy() + } + + log.Debugf("Destroying container %q in sandbox %q", cid, s.ID) + conn, err := s.sandboxConnect() + if err != nil { + return err + } + defer conn.Close() + if err := conn.Call(boot.ContainerDestroy, &cid, nil); err != nil { + return fmt.Errorf("error destroying container %q: %v", cid, err) + } + return nil +} + func (s *Sandbox) waitForStopped() error { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() |