diff options
author | Kevin Krakauer <krakauer@google.com> | 2018-09-17 16:24:05 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-09-17 16:25:24 -0700 |
commit | bb88c187c5457df14fa78e5e6b6f48cbc90fb489 (patch) | |
tree | a92886651d7657480b7f696ebe7a5f774916a1cb /runsc | |
parent | ab6fa44588233fa48d1ae0bf7d9b0d9e984a6af0 (diff) |
runsc: Enable waiting on exited processes.
This makes `runsc wait` behave more like waitpid()/wait4() in that:
- Once a process has run to completion, you can wait on it and get its exit
code.
- Processes not waited on will consume memory (like a zombie process)
PiperOrigin-RevId: 213358916
Change-Id: I5b5eca41ce71eea68e447380df8c38361a4d1558
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/controller.go | 33 | ||||
-rw-r--r-- | runsc/boot/loader.go | 114 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 25 | ||||
-rw-r--r-- | runsc/cmd/exec.go | 14 | ||||
-rw-r--r-- | runsc/cmd/wait.go | 4 | ||||
-rw-r--r-- | runsc/container/container.go | 8 | ||||
-rw-r--r-- | runsc/container/container_test.go | 4 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 94 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 7 |
9 files changed, 231 insertions, 72 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 4d41dcd6c..dc9359092 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -242,32 +242,11 @@ type ExecArgs struct { // returns the pid of the new process. func (cm *containerManager) ExecuteAsync(args *ExecArgs, pid *int32) error { log.Debugf("containerManager.ExecuteAsync: %+v", args) - - // Get the container Root Dirent from the Task, since we must run this - // process with the same Root. - cm.l.mu.Lock() - tg, ok := cm.l.containerRootTGs[args.CID] - cm.l.mu.Unlock() - if !ok { - return fmt.Errorf("cannot exec in container %q: no such container", args.CID) - } - tg.Leader().WithMuLocked(func(t *kernel.Task) { - args.Root = t.FSContext().RootDirectory() - }) - if args.Root != nil { - defer args.Root.DecRef() - } - - // Start the process. - proc := control.Proc{Kernel: cm.l.k} - newTG, err := control.ExecAsync(&proc, &args.ExecArgs) + tgid, err := cm.l.executeAsync(&args.ExecArgs, args.CID) if err != nil { - return fmt.Errorf("error executing: %+v: %v", args, err) + return err } - - // Return the pid of the newly-created process. - ts := cm.l.k.TaskSet() - *pid = int32(ts.Root.IDOfThreadGroup(newTG)) + *pid = int32(tgid) return nil } @@ -409,12 +388,16 @@ type WaitPIDArgs struct { // CID is the container ID. CID string + + // ClearStatus determines whether the exit status of the process should + // be cleared when WaitPID returns. + ClearStatus bool } // WaitPID waits for the process with PID 'pid' in the sandbox. func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error { log.Debugf("containerManager.Wait") - return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus) + return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, args.ClearStatus, waitStatus) } // SignalArgs are arguments to the Signal method. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 5e9ccb96f..665240ab6 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -31,6 +31,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/cpuid" "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" @@ -103,7 +104,7 @@ type Loader struct { // sandboxID is the ID for the whole sandbox. sandboxID string - // mu guards containerRootTGs. + // mu guards containerRootTGs and execProcesses. mu sync.Mutex // containerRootTGs maps container IDs to their root processes. It @@ -111,7 +112,24 @@ type Loader struct { // call methods on particular containers. // // containerRootTGs is guarded by mu. + // + // TODO: When containers are removed via `runsc delete`, + // containerRootTGs should be cleaned up. containerRootTGs map[string]*kernel.ThreadGroup + + // execProcesses maps each invocation of exec to the process it spawns. + // + // execProcesses is guardded by mu. + // + // TODO: When containers are removed via `runsc delete`, + // execProcesses should be cleaned up. + execProcesses map[execID]*kernel.ThreadGroup +} + +// execID uniquely identifies a sentry process. +type execID struct { + cid string + pid kernel.ThreadID } func init() { @@ -385,7 +403,8 @@ func (l *Loader) run() error { } // Create the root container init task. - if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { + _, _, err := l.k.CreateProcess(l.rootProcArgs) + if err != nil { return fmt.Errorf("failed to create init process: %v", err) } @@ -393,6 +412,11 @@ func (l *Loader) run() error { l.rootProcArgs.FDMap.DecRef() } + if l.execProcesses != nil { + return fmt.Errorf("there shouldn't already be a cache of exec'd processes, but found: %v", l.execProcesses) + } + l.execProcesses = make(map[execID]*kernel.ThreadGroup) + // Start signal forwarding only after an init process is created. l.stopSignalForwarding = l.startSignalForwarding() @@ -467,7 +491,7 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config return fmt.Errorf("error setting executable path for %+v: %v", procArgs, err) } - tg, err := l.k.CreateProcess(procArgs) + tg, _, err := l.k.CreateProcess(procArgs) if err != nil { return fmt.Errorf("failed to create process in sentry: %v", err) } @@ -482,6 +506,40 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config return nil } +func (l *Loader) executeAsync(args *control.ExecArgs, cid string) (kernel.ThreadID, error) { + // Get the container Root Dirent from the Task, since we must run this + // process with the same Root. + l.mu.Lock() + tg, ok := l.containerRootTGs[cid] + l.mu.Unlock() + if !ok { + return 0, fmt.Errorf("cannot exec in container %q: no such container", cid) + } + tg.Leader().WithMuLocked(func(t *kernel.Task) { + args.Root = t.FSContext().RootDirectory() + }) + if args.Root != nil { + defer args.Root.DecRef() + } + + // Start the process. + proc := control.Proc{Kernel: l.k} + tg, tgid, err := control.ExecAsync(&proc, args) + if err != nil { + return 0, fmt.Errorf("error executing: %+v: %v", args, err) + } + + // Insert the process into execProcesses so that we can wait on it + // later. + l.mu.Lock() + defer l.mu.Unlock() + eid := execID{cid: cid, pid: tgid} + l.execProcesses[eid] = tg + log.Debugf("updated execProcesses: %v", l.execProcesses) + + return tgid, nil +} + // TODO: Per-container namespaces must be supported for -pid. // waitContainer waits for the root process of a container to exit. @@ -500,39 +558,59 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // consider the container exited. // TODO: Multiple calls to waitContainer() should return // the same exit status. - defer func() { - l.mu.Lock() - defer l.mu.Unlock() - // TODO: Containers don't map 1:1 with their root - // processes. Container exits should be managed explicitly - // rather than via PID. - delete(l.containerRootTGs, cid) - }() - l.wait(tg, waitStatus) + ws := l.wait(tg) + *waitStatus = ws + + l.mu.Lock() + defer l.mu.Unlock() + delete(l.containerRootTGs, cid) + return nil } -func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) error { +func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, waitStatus *uint32) error { // TODO: Containers all currently share a PID namespace. // When per-container PID namespaces are supported, wait should use cid // to find the appropriate PID namespace. /*if cid != l.sandboxID { return errors.New("non-sandbox PID namespaces are not yet implemented") }*/ - // TODO: This won't work if the exec process already exited. - tg := l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid)) + + // If the process was started via runsc exec, it will have an + // entry in l.execProcesses. + l.mu.Lock() + eid := execID{cid: cid, pid: tgid} + tg, ok := l.execProcesses[eid] + l.mu.Unlock() + if ok { + ws := l.wait(tg) + *waitStatus = ws + if clearStatus { + // Remove tg from the cache. + l.mu.Lock() + delete(l.execProcesses, eid) + log.Debugf("updated execProcesses (removal): %v", l.execProcesses) + l.mu.Unlock() + } + return nil + } + + // This process wasn't created by runsc exec or start, so just find it + // by pid and hope it hasn't exited yet. + tg = l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid)) if tg == nil { return fmt.Errorf("no thread group with ID %d", tgid) } - l.wait(tg, waitStatus) + ws := l.wait(tg) + *waitStatus = ws return nil } // wait waits for the process with TGID 'tgid' in a container's PID namespace // to exit. -func (l *Loader) wait(tg *kernel.ThreadGroup, waitStatus *uint32) { +func (l *Loader) wait(tg *kernel.ThreadGroup) uint32 { tg.WaitExited() - *waitStatus = tg.ExitStatus().Status() + return tg.ExitStatus().Status() } func (l *Loader) setRootContainerID(cid string) { diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 9398292ff..a8a796445 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -111,11 +111,11 @@ func createLoader() (*Loader, func(), error) { // TestRun runs a simple application in a sandbox and checks that it succeeds. func TestRun(t *testing.T) { - s, cleanup, err := createLoader() + l, cleanup, err := createLoader() if err != nil { t.Fatalf("error creating loader: %v", err) } - defer s.Destroy() + defer l.Destroy() defer cleanup() // Start a goroutine to read the start chan result, otherwise Run will @@ -124,12 +124,13 @@ func TestRun(t *testing.T) { var wg sync.WaitGroup wg.Add(1) go func() { - resultChanErr = <-s.ctrl.manager.startResultChan + resultChanErr = <-l.ctrl.manager.startResultChan wg.Done() }() - // Run the container.. - if err := s.Run(); err != nil { + // Run the container. + l.setRootContainerID("foo") + if err := l.Run(); err != nil { t.Errorf("error running container: %v", err) } @@ -140,7 +141,7 @@ func TestRun(t *testing.T) { } // Wait for the application to exit. It should succeed. - if status := s.WaitExit(); status.Code != 0 || status.Signo != 0 { + if status := l.WaitExit(); status.Code != 0 || status.Signo != 0 { t.Errorf("application exited with status %+v, want 0", status) } } @@ -148,24 +149,24 @@ func TestRun(t *testing.T) { // TestStartSignal tests that the controller Start message will cause // WaitForStartSignal to return. func TestStartSignal(t *testing.T) { - s, cleanup, err := createLoader() + l, cleanup, err := createLoader() if err != nil { t.Fatalf("error creating loader: %v", err) } - defer s.Destroy() + defer l.Destroy() defer cleanup() // We aren't going to wait on this application, so the control server // needs to be shut down manually. - defer s.ctrl.srv.Stop() + defer l.ctrl.srv.Stop() // Start a goroutine that calls WaitForStartSignal and writes to a // channel when it returns. waitFinished := make(chan struct{}) go func() { - s.WaitForStartSignal() + l.WaitForStartSignal() // Pretend that Run() executed and returned no error. - s.ctrl.manager.startResultChan <- nil + l.ctrl.manager.startResultChan <- nil waitFinished <- struct{}{} }() @@ -181,7 +182,7 @@ func TestStartSignal(t *testing.T) { // Trigger the control server StartRoot method. cid := "foo" - if err := s.ctrl.manager.StartRoot(&cid, nil); err != nil { + if err := l.ctrl.manager.StartRoot(&cid, nil); err != nil { t.Errorf("error calling StartRoot: %v", err) } diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index 0d1fa6e20..957c4f0ff 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -49,6 +49,7 @@ type Exec struct { extraKGIDs stringSlice caps stringSlice detach bool + clearStatus bool processPath string pidFile string internalPidFile string @@ -100,6 +101,9 @@ func (ex *Exec) SetFlags(f *flag.FlagSet) { f.StringVar(&ex.pidFile, "pid-file", "", "filename that the container pid will be written to") f.StringVar(&ex.internalPidFile, "internal-pid-file", "", "filename that the container-internal pid will be written to") f.StringVar(&ex.consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal") + + // clear-status is expected to only be set when we fork due to --detach being set. + f.BoolVar(&ex.clearStatus, "clear-status", true, "clear the status of the exec'd process upon completion") } // Execute implements subcommands.Command.Execute. It starts a process in an @@ -163,7 +167,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } // Wait for the process to exit. - ws, err := c.WaitPID(pid) + ws, err := c.WaitPID(pid, ex.clearStatus) if err != nil { Fatalf("error waiting on pid %d: %v", pid, err) } @@ -194,10 +198,16 @@ func (ex *Exec) execAndWait(waitStatus *syscall.WaitStatus) subcommands.ExitStat // Add the rest of the args, excluding the "detach" flag. for _, a := range os.Args[1:] { - if !strings.Contains(a, "detach") { + if strings.Contains(a, "detach") { + // Replace with the "clear-status" flag, which tells + // the new process it's a detached child and shouldn't + // clear the exit status of the sentry process. + args = append(args, "--clear-status=false") + } else { args = append(args, a) } } + cmd := exec.Command(binPath, args...) // Exec stdio defaults to current process stdio. diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go index b41edc725..956349140 100644 --- a/runsc/cmd/wait.go +++ b/runsc/cmd/wait.go @@ -88,14 +88,14 @@ func (wt *Wait) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) waitStatus = ws // Wait on a PID in the root PID namespace. case wt.rootPID != unsetPID: - ws, err := c.WaitRootPID(int32(wt.rootPID)) + ws, err := c.WaitRootPID(int32(wt.rootPID), true /* clearStatus */) if err != nil { Fatalf("error waiting on PID in root PID namespace %d in container %q: %v", wt.rootPID, c.ID, err) } waitStatus = ws // Wait on a PID in the container's PID namespace. case wt.pid != unsetPID: - ws, err := c.WaitPID(int32(wt.pid)) + ws, err := c.WaitPID(int32(wt.pid), true /* clearStatus */) if err != nil { Fatalf("error waiting on PID %d in container %q: %v", wt.pid, c.ID, err) } diff --git a/runsc/container/container.go b/runsc/container/container.go index 792b7967b..a24c6cc31 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -398,22 +398,22 @@ func (c *Container) Wait() (syscall.WaitStatus, error) { // WaitRootPID waits for process 'pid' in the sandbox's PID namespace and // returns its WaitStatus. -func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) { +func (c *Container) WaitRootPID(pid int32, clearStatus bool) (syscall.WaitStatus, error) { log.Debugf("Wait on pid %d in sandbox %q", pid, c.Sandbox.ID) if c.Sandbox == nil || !c.Sandbox.IsRunning() { return 0, fmt.Errorf("container sandbox is not running") } - return c.Sandbox.WaitPID(pid, c.Sandbox.ID) + return c.Sandbox.WaitPID(c.Sandbox.ID, pid, clearStatus) } // WaitPID waits for process 'pid' in the container's PID namespace and returns // its WaitStatus. -func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) { +func (c *Container) WaitPID(pid int32, clearStatus bool) (syscall.WaitStatus, error) { log.Debugf("Wait on pid %d in container %q", pid, c.ID) if c.Sandbox == nil || !c.Sandbox.IsRunning() { return 0, fmt.Errorf("container sandbox is not running") } - return c.Sandbox.WaitPID(pid, c.ID) + return c.Sandbox.WaitPID(c.ID, pid, clearStatus) } // Signal sends the signal to the container. diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index ab1823f1c..5fe80f20f 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -551,7 +551,7 @@ func TestExec(t *testing.T) { args := &control.ExecArgs{ Filename: "/bin/sleep", - Argv: []string{"sleep", "5"}, + Argv: []string{"/bin/sleep", "5"}, WorkingDirectory: "/", KUID: uid, } @@ -1598,7 +1598,7 @@ func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, if err != nil { return 0, fmt.Errorf("error executing: %v", err) } - ws, err := cont.WaitPID(pid) + ws, err := cont.WaitPID(pid, true /* clearStatus */) if err != nil { return 0, fmt.Errorf("error waiting: %v", err) } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 84e0ec080..09888cb86 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -163,16 +163,15 @@ func TestMultiContainerWait(t *testing.T) { go func(c *Container) { defer wg.Done() const pid = 2 - if ws, err := c.WaitPID(pid); err != nil { + if ws, err := c.WaitPID(pid, true /* clearStatus */); err != nil { t.Errorf("failed to wait for PID %d: %v", pid, err) } else if es := ws.ExitStatus(); es != 0 { t.Errorf("PID %d exited with non-zero status %d", pid, es) } - if _, err := c.WaitPID(pid); err == nil { + if _, err := c.WaitPID(pid, true /* clearStatus */); err == nil { t.Errorf("wait for stopped PID %d should fail", pid) } - // TODO: use 'container[1]' when PID namespace is supported. - }(containers[0]) + }(containers[1]) } wg.Wait() @@ -184,6 +183,93 @@ func TestMultiContainerWait(t *testing.T) { } } +// TestExecWait ensures what we can wait containers and individual processes in the +// sandbox that have already exited. +func TestExecWait(t *testing.T) { + rootDir, err := testutil.SetupRootDir() + if err != nil { + t.Fatalf("error creating root dir: %v", err) + } + defer os.RemoveAll(rootDir) + + // The first container should run the entire duration of the test. + cmd1 := []string{"sleep", "100"} + // We'll wait on the second container, which is much shorter lived. + cmd2 := []string{"sleep", "1"} + specs, ids := createSpecs(cmd1, cmd2) + + // Setup the containers. + var containers []*Container + for i, spec := range specs { + conf := testutil.TestConfig() + bundleDir, err := testutil.SetupContainerInRoot(rootDir, spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(bundleDir) + cont, err := Create(ids[i], spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont.Destroy() + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + containers = append(containers, cont) + } + + // Check via ps that multiple processes are running. + expectedPL := []*control.Process{ + {PID: 1, Cmd: "sleep"}, + {PID: 2, Cmd: "sleep"}, + } + if err := waitForProcessList(containers[0], expectedPL); err != nil { + t.Fatalf("failed to wait for sleep to start: %v", err) + } + + // Wait for the second container to finish. + if err := waitForProcessList(containers[0], expectedPL[:1]); err != nil { + t.Fatalf("failed to wait for second container to stop: %v", err) + } + + // Get the second container exit status. + if ws, err := containers[1].Wait(); err != nil { + t.Fatalf("failed to wait for process %s: %v", containers[1].Spec.Process.Args, err) + } else if es := ws.ExitStatus(); es != 0 { + t.Fatalf("process %s exited with non-zero status %d", containers[1].Spec.Process.Args, es) + } + if _, err := containers[1].Wait(); err == nil { + t.Fatalf("wait for stopped process %s should fail", containers[1].Spec.Process.Args) + } + + // Execute another process in the first container. + args := &control.ExecArgs{ + Filename: "/bin/sleep", + Argv: []string{"/bin/sleep", "1"}, + WorkingDirectory: "/", + KUID: 0, + } + pid, err := containers[0].Execute(args) + if err != nil { + t.Fatalf("error executing: %v", err) + } + + // Wait for the exec'd process to exit. + if err := waitForProcessList(containers[0], expectedPL[:1]); err != nil { + t.Fatalf("failed to wait for second container to stop: %v", err) + } + + // Get the exit status from the exec'd process. + if ws, err := containers[0].WaitPID(pid, true /* clearStatus */); err != nil { + t.Fatalf("failed to wait for process %+v with pid %d: %v", args, pid, err) + } else if es := ws.ExitStatus(); es != 0 { + t.Fatalf("process %+v exited with non-zero status %d", args, es) + } + if _, err := containers[0].WaitPID(pid, true /* clearStatus */); err == nil { + t.Fatalf("wait for stopped process %+v should fail", args) + } +} + // TestMultiContainerMount tests that bind mounts can be used with multiple // containers. func TestMultiContainerMount(t *testing.T) { diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 8c4d0d495..3b10fd20e 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -522,7 +522,7 @@ func (s *Sandbox) Wait(cid string) (syscall.WaitStatus, error) { // WaitPID waits for process 'pid' in the container's sandbox and returns its // WaitStatus. -func (s *Sandbox) WaitPID(pid int32, cid string) (syscall.WaitStatus, error) { +func (s *Sandbox) WaitPID(cid string, pid int32, clearStatus bool) (syscall.WaitStatus, error) { log.Debugf("Waiting for PID %d in sandbox %q", pid, s.ID) var ws syscall.WaitStatus conn, err := s.sandboxConnect() @@ -532,8 +532,9 @@ func (s *Sandbox) WaitPID(pid int32, cid string) (syscall.WaitStatus, error) { defer conn.Close() args := &boot.WaitPIDArgs{ - PID: pid, - CID: cid, + PID: pid, + CID: cid, + ClearStatus: clearStatus, } if err := conn.Call(boot.ContainerWaitPID, args, &ws); err != nil { return ws, fmt.Errorf("error waiting on PID %d in sandbox %q: %v", pid, s.ID, err) |