summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2018-09-28 12:20:56 -0700
committerShentubot <shentubot@google.com>2018-09-28 12:22:21 -0700
commit2496d9b4b6343154525f73e9583a4a60bebcfa30 (patch)
tree3ac4c3c1ea5813a2c3a32ea8b4d05e01db0d99d1 /runsc/boot
parentfb65b0b471621b430969fe1c3009bee68209bf67 (diff)
Make runsc kill and delete more conformant to the "spec"
PiperOrigin-RevId: 214976251 Change-Id: I631348c3886f41f63d0e77e7c4f21b3ede2ab521
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/controller.go89
-rw-r--r--runsc/boot/fs.go67
-rw-r--r--runsc/boot/loader.go67
3 files changed, 119 insertions, 104 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 362e74df5..98356e8b7 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -21,10 +21,8 @@ import (
"path"
specs "github.com/opencontainers/runtime-spec/specs-go"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/control/server"
"gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/control"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
@@ -32,7 +30,6 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/state"
"gvisor.googlesource.com/gvisor/pkg/sentry/time"
"gvisor.googlesource.com/gvisor/pkg/sentry/watchdog"
- "gvisor.googlesource.com/gvisor/pkg/syserror"
"gvisor.googlesource.com/gvisor/pkg/urpc"
)
@@ -247,91 +244,7 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
// filesystem.
func (cm *containerManager) Destroy(cid *string, _ *struct{}) error {
log.Debugf("containerManager.destroy %q", *cid)
- cm.l.mu.Lock()
- defer cm.l.mu.Unlock()
-
- key := execID{cid: *cid}
- if tg, ok := cm.l.processes[key]; ok {
- // Send SIGKILL to threadgroup.
- if err := tg.SendSignal(&arch.SignalInfo{
- Signo: int32(linux.SIGKILL),
- Code: arch.SignalInfoUser,
- }); err == nil {
- // SIGKILL sent. Now wait for it to exit.
- log.Debugf("Waiting for container process to exit.")
- tg.WaitExited()
- log.Debugf("Container process exited.")
- } else if err != syserror.ESRCH {
- return fmt.Errorf("error sending SIGKILL to container %q: %v", *cid, err)
- }
-
- // Remove the container thread group from the map.
- delete(cm.l.processes, key)
- }
-
- // Clean up the filesystem by unmounting all mounts for this container
- // and deleting the container root directory.
-
- // First get a reference to the container root directory.
- mns := cm.l.k.RootMountNamespace()
- mnsRoot := mns.Root()
- defer mnsRoot.DecRef()
- ctx := cm.l.rootProcArgs.NewContext(cm.l.k)
- containerRoot := path.Join(ChildContainersDir, *cid)
- containerRootDirent, err := mns.FindInode(ctx, mnsRoot, nil, containerRoot, linux.MaxSymlinkTraversals)
- if err == syserror.ENOENT {
- // Container must have been destroyed already. That's fine.
- return nil
- }
- if err != nil {
- return fmt.Errorf("error finding container root directory %q: %v", containerRoot, err)
- }
- defer containerRootDirent.DecRef()
-
- // Iterate through all submounts and unmount them. We unmount lazily by
- // setting detach=true, so we can unmount in any order.
- for _, m := range containerRootDirent.Inode.MountSource.Submounts() {
- root := m.Root()
- defer root.DecRef()
-
- // Do a best-effort unmount by flushing the refs and unmount
- // with "detach only = true".
- log.Debugf("Unmounting container submount %q", root.BaseName())
- m.FlushDirentRefs()
- if err := mns.Unmount(ctx, root, true /* detach only */); err != nil {
- return fmt.Errorf("error unmounting container submount %q: %v", root.BaseName(), err)
- }
- }
-
- // Unmount the container root itself.
- log.Debugf("Unmounting container root %q", containerRoot)
- containerRootDirent.Inode.MountSource.FlushDirentRefs()
- if err := mns.Unmount(ctx, containerRootDirent, true /* detach only */); err != nil {
- return fmt.Errorf("error unmounting container root mount %q: %v", containerRootDirent.BaseName(), err)
- }
-
- // Get a reference to the parent directory and remove the root
- // container directory.
- containersDirDirent, err := mns.FindInode(ctx, mnsRoot, nil, ChildContainersDir, linux.MaxSymlinkTraversals)
- if err != nil {
- return fmt.Errorf("error finding containers directory %q: %v", ChildContainersDir, err)
- }
- defer containersDirDirent.DecRef()
- log.Debugf("Deleting container root %q", containerRoot)
- if err := containersDirDirent.RemoveDirectory(ctx, mnsRoot, *cid); err != nil {
- return fmt.Errorf("error removing directory %q: %v", containerRoot, err)
- }
-
- // Flushing dirent references triggers many async close operations. We
- // must wait for those to complete before returning, otherwise the
- // caller may kill the gofer before they complete, causing a cascade of
- // failing RPCs.
- log.Infof("Waiting for async filesystem operations to complete")
- fs.AsyncBarrier()
-
- // We made it!
- log.Debugf("Destroyed container %q", *cid)
- return nil
+ return cm.l.destroyContainer(*cid)
}
// ExecuteAsync starts running a command on a created or running sandbox. It
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 22d5f621c..9e8fea7e1 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -16,6 +16,7 @@ package boot
import (
"fmt"
+ "path"
"path/filepath"
"strconv"
"strings"
@@ -576,9 +577,9 @@ func subtargets(root string, mnts []specs.Mount) []string {
return targets
}
-// setFileSystemForProcess is used to set up the file system and amend the procArgs accordingly.
+// setupContainerFS is used to set up the file system and amend the procArgs accordingly.
// procArgs are passed by reference and the FDMap field is modified. It dups stdioFDs.
-func setFileSystemForProcess(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, stdioFDs, goferFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel, cid string) error {
+func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf *Config, stdioFDs, goferFDs []int, console bool, creds *auth.Credentials, ls *limits.LimitSet, k *kernel.Kernel, cid string) error {
ctx := procArgs.NewContext(k)
// Create the FD map, which will set stdin, stdout, and stderr. If
@@ -676,3 +677,65 @@ func setExecutablePath(ctx context.Context, mns *fs.MountNamespace, procArgs *ke
procArgs.Filename = f
return nil
}
+
+// destroyContainerFS cleans up the filesystem by unmounting all mounts for the
+// given container and deleting the container root directory.
+func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error {
+ // First get a reference to the container root directory.
+ mns := k.RootMountNamespace()
+ mnsRoot := mns.Root()
+ defer mnsRoot.DecRef()
+ containerRoot := path.Join(ChildContainersDir, cid)
+ containerRootDirent, err := mns.FindInode(ctx, mnsRoot, nil, containerRoot, linux.MaxSymlinkTraversals)
+ if err == syserror.ENOENT {
+ // Container must have been destroyed already. That's fine.
+ return nil
+ }
+ if err != nil {
+ return fmt.Errorf("error finding container root directory %q: %v", containerRoot, err)
+ }
+ defer containerRootDirent.DecRef()
+
+ // Iterate through all submounts and unmount them. We unmount lazily by
+ // setting detach=true, so we can unmount in any order.
+ for _, m := range containerRootDirent.Inode.MountSource.Submounts() {
+ root := m.Root()
+ defer root.DecRef()
+
+ // Do a best-effort unmount by flushing the refs and unmount
+ // with "detach only = true".
+ log.Debugf("Unmounting container submount %q", root.BaseName())
+ m.FlushDirentRefs()
+ if err := mns.Unmount(ctx, root, true /* detach only */); err != nil {
+ return fmt.Errorf("error unmounting container submount %q: %v", root.BaseName(), err)
+ }
+ }
+
+ // Unmount the container root itself.
+ log.Debugf("Unmounting container root %q", containerRoot)
+ containerRootDirent.Inode.MountSource.FlushDirentRefs()
+ if err := mns.Unmount(ctx, containerRootDirent, true /* detach only */); err != nil {
+ return fmt.Errorf("error unmounting container root mount %q: %v", containerRootDirent.BaseName(), err)
+ }
+
+ // Get a reference to the parent directory and remove the root
+ // container directory.
+ containersDirDirent, err := mns.FindInode(ctx, mnsRoot, nil, ChildContainersDir, linux.MaxSymlinkTraversals)
+ if err != nil {
+ return fmt.Errorf("error finding containers directory %q: %v", ChildContainersDir, err)
+ }
+ defer containersDirDirent.DecRef()
+ log.Debugf("Deleting container root %q", containerRoot)
+ if err := containersDirDirent.RemoveDirectory(ctx, mnsRoot, cid); err != nil {
+ return fmt.Errorf("error removing directory %q: %v", containerRoot, err)
+ }
+
+ // Flushing dirent references triggers many async close operations. We
+ // must wait for those to complete before returning, otherwise the
+ // caller may kill the gofer before they complete, causing a cascade of
+ // failing RPCs.
+ log.Infof("Waiting for async filesystem operations to complete")
+ fs.AsyncBarrier()
+
+ return nil
+}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 52c251812..1e2a12280 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -112,9 +112,6 @@ type Loader struct {
// have the corresponding pid set.
//
// processes is guardded by mu.
- //
- // TODO: When containers are removed via `runsc delete`,
- // processes should be cleaned up.
processes map[execID]*kernel.ThreadGroup
}
@@ -385,7 +382,7 @@ func (l *Loader) run() error {
// If we are restoring, we do not want to create a process.
// l.restore is set by the container manager when a restore call is made.
if !l.restore {
- if err := setFileSystemForProcess(
+ if err := setupContainerFS(
&l.rootProcArgs,
l.spec,
l.conf,
@@ -476,7 +473,7 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
stdioFDs := ioFDs[:3]
goferFDs := ioFDs[3:]
- if err := setFileSystemForProcess(
+ if err := setupContainerFS(
&procArgs,
spec,
conf,
@@ -519,6 +516,34 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
return nil
}
+// destroyContainer stops a container if it is still running and cleans up its
+// filesystem.
+func (l *Loader) destroyContainer(cid string) error {
+ // First kill and wait for all processes in the container.
+ if err := l.signal(cid, int32(linux.SIGKILL), true /*all*/); err != nil {
+ return fmt.Errorf("failed to SIGKILL all container processes: %v", err)
+ }
+
+ l.mu.Lock()
+ defer l.mu.Unlock()
+
+ // Remove all container thread groups from the map.
+ for key := range l.processes {
+ if key.cid == cid {
+ delete(l.processes, key)
+ }
+ }
+
+ ctx := l.rootProcArgs.NewContext(l.k)
+ if err := destroyContainerFS(ctx, cid, l.k); err != nil {
+ return fmt.Errorf("failed to destroy filesystem for container %q: %v", cid, err)
+ }
+
+ // We made it!
+ log.Debugf("Container destroyed %q", cid)
+ return nil
+}
+
func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
// Get the container Root Dirent from the Task, since we must run this
// process with the same Root.
@@ -669,13 +694,27 @@ func (l *Loader) signal(cid string, signo int32, all bool) error {
}
si := arch.SignalInfo{Signo: signo}
- if all {
- // Pause the kernel to prevent new processes from being created while
- // the signal is delivered. This prevents process leaks when SIGKILL is
- // sent to the entire container.
- l.k.Pause()
- defer l.k.Unpause()
- return l.k.SendContainerSignal(cid, &si)
- }
- return tg.Leader().SendSignal(&si)
+ if !all {
+ return tg.Leader().SendSignal(&si)
+ }
+
+ // Pause the kernel to prevent new processes from being created while
+ // the signal is delivered. This prevents process leaks when SIGKILL is
+ // sent to the entire container.
+ l.k.Pause()
+ if err := l.k.SendContainerSignal(cid, &si); err != nil {
+ l.k.Unpause()
+ return err
+ }
+ l.k.Unpause()
+
+ // If killing all processes, wait for them to exit.
+ if all && linux.Signal(signo) == linux.SIGKILL {
+ for _, t := range l.k.TaskSet().Root.Tasks() {
+ if t.ContainerID() == cid {
+ t.ThreadGroup().WaitExited()
+ }
+ }
+ }
+ return nil
}