diff options
author | Nicolas Lacasse <nlacasse@google.com> | 2018-09-19 18:52:53 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-09-19 18:54:14 -0700 |
commit | 915d76aa924c08b1fcb80a58e3caa24529a23d04 (patch) | |
tree | 7b43890499455af77c2d8fa279bf15838e69e643 /runsc/boot | |
parent | b873e388f36ee7251059d54a963c27a55be50ab5 (diff) |
Add container.Destroy urpc method.
This method will:
1. Stop the container process if it is still running.
2. Unmount all sanadbox-internal mounts for the container.
3. Delete the contaner root directory inside the sandbox.
Destroy is idempotent, and safe to call concurrantly.
This fixes a bug where after stopping a container, we cannot unmount the
container root directory on the host. This bug occured because the sandbox
dirent cache was holding a dirent with a host fd corresponding to a file inside
the container root on the host. The dirent cache did not know that the
container had exited, and kept the FD open, preventing us from unmounting on
the host.
Now that we unmount (and flush) all container mounts inside the sandbox, any
host FDs donated by the gofer will be closed, and we can unmount the container
root on the host.
PiperOrigin-RevId: 213737693
Change-Id: I28c0ff4cd19a08014cdd72fec5154497e92aacc9
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/controller.go | 96 | ||||
-rw-r--r-- | runsc/boot/fs.go | 8 |
2 files changed, 97 insertions, 7 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index f5b0d371c..b4594c8b0 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -21,8 +21,10 @@ import ( "path" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/control/server" "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" @@ -30,6 +32,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/state" "gvisor.googlesource.com/gvisor/pkg/sentry/time" "gvisor.googlesource.com/gvisor/pkg/sentry/watchdog" + "gvisor.googlesource.com/gvisor/pkg/syserror" "gvisor.googlesource.com/gvisor/pkg/urpc" ) @@ -37,6 +40,10 @@ const ( // ContainerCheckpoint checkpoints a container. ContainerCheckpoint = "containerManager.Checkpoint" + // ContainerDestroy is used to stop a non-root container and free all + // associated resources in the sandbox. + ContainerDestroy = "containerManager.Destroy" + // ContainerEvent is the URPC endpoint for getting stats about the // container used by "runsc events". ContainerEvent = "containerManager.Event" @@ -58,9 +65,6 @@ const ( // ContainerResume unpauses the paused container. ContainerResume = "containerManager.Resume" - // ContainerWaitForLoader blocks until the container's loader has been created. - ContainerWaitForLoader = "containerManager.WaitForLoader" - // ContainerSignal is used to send a signal to a container. ContainerSignal = "containerManager.Signal" @@ -72,6 +76,9 @@ const ( // and return its ExitStatus. ContainerWait = "containerManager.Wait" + // ContainerWaitForLoader blocks until the container's loader has been created. + ContainerWaitForLoader = "containerManager.WaitForLoader" + // ContainerWaitPID is used to wait on a process with a certain PID in // the sandbox and return its ExitStatus. ContainerWaitPID = "containerManager.WaitPID" @@ -228,6 +235,89 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { return nil } +// Destroy stops a container if it is still running and cleans up its +// filesystem. +func (cm *containerManager) Destroy(cid *string, _ *struct{}) error { + log.Debugf("containerManager.destroy %q", *cid) + cm.l.mu.Lock() + defer cm.l.mu.Unlock() + + if tg, ok := cm.l.containerRootTGs[*cid]; ok { + // Send SIGKILL to threadgroup. + if err := tg.SendSignal(&arch.SignalInfo{ + Signo: int32(linux.SIGKILL), + Code: arch.SignalInfoUser, + }); err == nil { + // SIGKILL sent. Now wait for it to exit. + log.Debugf("Waiting for container process to exit.") + tg.WaitExited() + log.Debugf("Container process exited.") + } else if err != syserror.ESRCH { + return fmt.Errorf("error sending SIGKILL to container %q: %v", *cid, err) + } + + // Remove the container thread group from the map. + delete(cm.l.containerRootTGs, *cid) + } + + // Clean up the filesystem by unmounting all mounts for this container + // and deleting the container root directory. + + // First get a reference to the container root directory. + mns := cm.l.k.RootMountNamespace() + mnsRoot := mns.Root() + defer mnsRoot.DecRef() + ctx := cm.l.rootProcArgs.NewContext(cm.l.k) + containerRoot := path.Join(ChildContainersDir, *cid) + containerRootDirent, err := mns.FindInode(ctx, mnsRoot, nil, containerRoot, linux.MaxSymlinkTraversals) + if err == syserror.ENOENT { + // Container must have been destroyed already. That's fine. + return nil + } + if err != nil { + return fmt.Errorf("error finding container root directory %q: %v", containerRoot, err) + } + defer containerRootDirent.DecRef() + + // Iterate through all submounts and unmount them. We unmount lazily by + // setting detach=true, so we can unmount in any order. + for _, m := range containerRootDirent.Inode.MountSource.Submounts() { + root := m.Root() + defer root.DecRef() + + // Do a best-effort unmount by flushing the refs and unmount + // with "detach only = true". + log.Debugf("Unmounting container submount %q", root.BaseName()) + m.FlushDirentRefs() + if err := mns.Unmount(ctx, root, true /* detach only */); err != nil { + return fmt.Errorf("error unmounting container submount %q: %v", root.BaseName(), err) + } + } + + // Unmount the container root itself. + log.Debugf("Unmounting container root %q", containerRoot) + containerRootDirent.Inode.MountSource.FlushDirentRefs() + if err := mns.Unmount(ctx, containerRootDirent, true /* detach only */); err != nil { + return fmt.Errorf("error unmounting container root mount %q: %v", containerRootDirent.BaseName(), err) + } + + // Get a reference to the parent directory and remove the root + // container directory. + containersDirDirent, err := mns.FindInode(ctx, mnsRoot, nil, ChildContainersDir, linux.MaxSymlinkTraversals) + if err != nil { + return fmt.Errorf("error finding containers directory %q: %v", ChildContainersDir, err) + } + defer containersDirDirent.DecRef() + log.Debugf("Deleting container root %q", containerRoot) + if err := containersDirDirent.RemoveDirectory(ctx, mnsRoot, *cid); err != nil { + return fmt.Errorf("error removing directory %q: %v", containerRoot, err) + } + + // We made it! + log.Debugf("Destroyed container %q", *cid) + return nil +} + // ExecArgs contains arguments to Execute. type ExecArgs struct { control.ExecArgs diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 59ae5faae..110f67de8 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -49,9 +49,9 @@ const ( // Device name for root mount. rootDevice = "9pfs-/" - // childContainersDir is the directory where child container root + // ChildContainersDir is the directory where child container root // filesystems are mounted. - childContainersDir = "/__runsc_containers__" + ChildContainersDir = "/__runsc_containers__" // Filesystems that runsc supports. bind = "bind" @@ -89,7 +89,7 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec // each child container. mounts = append(mounts, specs.Mount{ Type: tmpfs, - Destination: childContainersDir, + Destination: ChildContainersDir, }) } fds := &fdDispenser{fds: ioFDs} @@ -639,7 +639,7 @@ func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spe // Make directories for submounts within the container. rootDir := mns.Root() defer rootDir.DecRef() - containerRoot := filepath.Join(childContainersDir, cid) + containerRoot := filepath.Join(ChildContainersDir, cid) mkdirAll(ctx, mns, containerRoot) // Mount the container's root filesystem to the newly created |