runsc: Enable waiting on exited processes.

This makes `runsc wait` behave more like waitpid()/wait4() in that: - Once a process has run to completion, you can wait on it and get its exit code. - Processes not waited on will consume memory (like a zombie process) PiperOrigin-RevId: 213358916 Change-Id: I5b5eca41ce71eea68e447380df8c38361a4d1558
author: Kevin Krakauer <krakauer@google.com> 2018-09-17 16:24:05 -0700
committer: Shentubot <shentubot@google.com> 2018-09-17 16:25:24 -0700
commit: bb88c187c5457df14fa78e5e6b6f48cbc90fb489 (patch)
tree: a92886651d7657480b7f696ebe7a5f774916a1cb /runsc/boot
parent: ab6fa44588233fa48d1ae0bf7d9b0d9e984a6af0 (diff)
3 files changed, 117 insertions, 55 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 4d41dcd6c..dc9359092 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -242,32 +242,11 @@ type ExecArgs struct {
 // returns the pid of the new process.
 func (cm *containerManager) ExecuteAsync(args *ExecArgs, pid *int32) error {
 	log.Debugf("containerManager.ExecuteAsync: %+v", args)
-
-	// Get the container Root Dirent from the Task, since we must run this
-	// process with the same Root.
-	cm.l.mu.Lock()
-	tg, ok := cm.l.containerRootTGs[args.CID]
-	cm.l.mu.Unlock()
-	if !ok {
-		return fmt.Errorf("cannot exec in container %q: no such container", args.CID)
-	}
-	tg.Leader().WithMuLocked(func(t *kernel.Task) {
-		args.Root = t.FSContext().RootDirectory()
-	})
-	if args.Root != nil {
-		defer args.Root.DecRef()
-	}
-
-	// Start the process.
-	proc := control.Proc{Kernel: cm.l.k}
-	newTG, err := control.ExecAsync(&proc, &args.ExecArgs)
+	tgid, err := cm.l.executeAsync(&args.ExecArgs, args.CID)
 	if err != nil {
-		return fmt.Errorf("error executing: %+v: %v", args, err)
+		return err
 	}
-
-	// Return the pid of the newly-created process.
-	ts := cm.l.k.TaskSet()
-	*pid = int32(ts.Root.IDOfThreadGroup(newTG))
+	*pid = int32(tgid)
 	return nil
 }
 
@@ -409,12 +388,16 @@ type WaitPIDArgs struct {
 
 	// CID is the container ID.
 	CID string
+
+	// ClearStatus determines whether the exit status of the process should
+	// be cleared when WaitPID returns.
+	ClearStatus bool
 }
 
 // WaitPID waits for the process with PID 'pid' in the sandbox.
 func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error {
 	log.Debugf("containerManager.Wait")
-	return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus)
+	return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, args.ClearStatus, waitStatus)
 }
 
 // SignalArgs are arguments to the Signal method.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 5e9ccb96f..665240ab6 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -31,6 +31,7 @@ import (
 	"gvisor.googlesource.com/gvisor/pkg/cpuid"
 	"gvisor.googlesource.com/gvisor/pkg/log"
 	"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/control"
 	"gvisor.googlesource.com/gvisor/pkg/sentry/inet"
 	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
 	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
@@ -103,7 +104,7 @@ type Loader struct {
 	// sandboxID is the ID for the whole sandbox.
 	sandboxID string
 
-	// mu guards containerRootTGs.
+	// mu guards containerRootTGs and execProcesses.
 	mu sync.Mutex
 
 	// containerRootTGs maps container IDs to their root processes. It
@@ -111,7 +112,24 @@ type Loader struct {
 	// call methods on particular containers.
 	//
 	// containerRootTGs is guarded by mu.
+	//
+	// TODO: When containers are removed via `runsc delete`,
+	// containerRootTGs should be cleaned up.
 	containerRootTGs map[string]*kernel.ThreadGroup
+
+	// execProcesses maps each invocation of exec to the process it spawns.
+	//
+	// execProcesses is guardded by mu.
+	//
+	// TODO: When containers are removed via `runsc delete`,
+	// execProcesses should be cleaned up.
+	execProcesses map[execID]*kernel.ThreadGroup
+}
+
+// execID uniquely identifies a sentry process.
+type execID struct {
+	cid string
+	pid kernel.ThreadID
 }
 
 func init() {
@@ -385,7 +403,8 @@ func (l *Loader) run() error {
 		}
 
 		// Create the root container init task.
-		if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
+		_, _, err := l.k.CreateProcess(l.rootProcArgs)
+		if err != nil {
 			return fmt.Errorf("failed to create init process: %v", err)
 		}
 
@@ -393,6 +412,11 @@ func (l *Loader) run() error {
 		l.rootProcArgs.FDMap.DecRef()
 	}
 
+	if l.execProcesses != nil {
+		return fmt.Errorf("there shouldn't already be a cache of exec'd processes, but found: %v", l.execProcesses)
+	}
+	l.execProcesses = make(map[execID]*kernel.ThreadGroup)
+
 	// Start signal forwarding only after an init process is created.
 	l.stopSignalForwarding = l.startSignalForwarding()
 
@@ -467,7 +491,7 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
 		return fmt.Errorf("error setting executable path for %+v: %v", procArgs, err)
 	}
 
-	tg, err := l.k.CreateProcess(procArgs)
+	tg, _, err := l.k.CreateProcess(procArgs)
 	if err != nil {
 		return fmt.Errorf("failed to create process in sentry: %v", err)
 	}
@@ -482,6 +506,40 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
 	return nil
 }
 
+func (l *Loader) executeAsync(args *control.ExecArgs, cid string) (kernel.ThreadID, error) {
+	// Get the container Root Dirent from the Task, since we must run this
+	// process with the same Root.
+	l.mu.Lock()
+	tg, ok := l.containerRootTGs[cid]
+	l.mu.Unlock()
+	if !ok {
+		return 0, fmt.Errorf("cannot exec in container %q: no such container", cid)
+	}
+	tg.Leader().WithMuLocked(func(t *kernel.Task) {
+		args.Root = t.FSContext().RootDirectory()
+	})
+	if args.Root != nil {
+		defer args.Root.DecRef()
+	}
+
+	// Start the process.
+	proc := control.Proc{Kernel: l.k}
+	tg, tgid, err := control.ExecAsync(&proc, args)
+	if err != nil {
+		return 0, fmt.Errorf("error executing: %+v: %v", args, err)
+	}
+
+	// Insert the process into execProcesses so that we can wait on it
+	// later.
+	l.mu.Lock()
+	defer l.mu.Unlock()
+	eid := execID{cid: cid, pid: tgid}
+	l.execProcesses[eid] = tg
+	log.Debugf("updated execProcesses: %v", l.execProcesses)
+
+	return tgid, nil
+}
+
 // TODO: Per-container namespaces must be supported for -pid.
 
 // waitContainer waits for the root process of a container to exit.
@@ -500,39 +558,59 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
 	// consider the container exited.
 	// TODO: Multiple calls to waitContainer() should return
 	// the same exit status.
-	defer func() {
-		l.mu.Lock()
-		defer l.mu.Unlock()
-		// TODO: Containers don't map 1:1 with their root
-		// processes. Container exits should be managed explicitly
-		// rather than via PID.
-		delete(l.containerRootTGs, cid)
-	}()
-	l.wait(tg, waitStatus)
+	ws := l.wait(tg)
+	*waitStatus = ws
+
+	l.mu.Lock()
+	defer l.mu.Unlock()
+	delete(l.containerRootTGs, cid)
+
 	return nil
 }
 
-func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) error {
+func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, waitStatus *uint32) error {
 	// TODO: Containers all currently share a PID namespace.
 	// When per-container PID namespaces are supported, wait should use cid
 	// to find the appropriate PID namespace.
 	/*if cid != l.sandboxID {
 		return errors.New("non-sandbox PID namespaces are not yet implemented")
 	}*/
-	// TODO: This won't work if the exec process already exited.
-	tg := l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid))
+
+	// If the process was started via runsc exec, it will have an
+	// entry in l.execProcesses.
+	l.mu.Lock()
+	eid := execID{cid: cid, pid: tgid}
+	tg, ok := l.execProcesses[eid]
+	l.mu.Unlock()
+	if ok {
+		ws := l.wait(tg)
+		*waitStatus = ws
+		if clearStatus {
+			// Remove tg from the cache.
+			l.mu.Lock()
+			delete(l.execProcesses, eid)
+			log.Debugf("updated execProcesses (removal): %v", l.execProcesses)
+			l.mu.Unlock()
+		}
+		return nil
+	}
+
+	// This process wasn't created by runsc exec or start, so just find it
+	// by pid and hope it hasn't exited yet.
+	tg = l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid))
 	if tg == nil {
 		return fmt.Errorf("no thread group with ID %d", tgid)
 	}
-	l.wait(tg, waitStatus)
+	ws := l.wait(tg)
+	*waitStatus = ws
 	return nil
 }
 
 // wait waits for the process with TGID 'tgid' in a container's PID namespace
 // to exit.
-func (l *Loader) wait(tg *kernel.ThreadGroup, waitStatus *uint32) {
+func (l *Loader) wait(tg *kernel.ThreadGroup) uint32 {
 	tg.WaitExited()
-	*waitStatus = tg.ExitStatus().Status()
+	return tg.ExitStatus().Status()
 }
 
 func (l *Loader) setRootContainerID(cid string) {
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 9398292ff..a8a796445 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -111,11 +111,11 @@ func createLoader() (*Loader, func(), error) {
 
 // TestRun runs a simple application in a sandbox and checks that it succeeds.
 func TestRun(t *testing.T) {
-	s, cleanup, err := createLoader()
+	l, cleanup, err := createLoader()
 	if err != nil {
 		t.Fatalf("error creating loader: %v", err)
 	}
-	defer s.Destroy()
+	defer l.Destroy()
 	defer cleanup()
 
 	// Start a goroutine to read the start chan result, otherwise Run will
@@ -124,12 +124,13 @@ func TestRun(t *testing.T) {
 	var wg sync.WaitGroup
 	wg.Add(1)
 	go func() {
-		resultChanErr = <-s.ctrl.manager.startResultChan
+		resultChanErr = <-l.ctrl.manager.startResultChan
 		wg.Done()
 	}()
 
-	// Run the container..
-	if err := s.Run(); err != nil {
+	// Run the container.
+	l.setRootContainerID("foo")
+	if err := l.Run(); err != nil {
 		t.Errorf("error running container: %v", err)
 	}
 
@@ -140,7 +141,7 @@ func TestRun(t *testing.T) {
 	}
 
 	// Wait for the application to exit.  It should succeed.
-	if status := s.WaitExit(); status.Code != 0 || status.Signo != 0 {
+	if status := l.WaitExit(); status.Code != 0 || status.Signo != 0 {
 		t.Errorf("application exited with status %+v, want 0", status)
 	}
 }
@@ -148,24 +149,24 @@ func TestRun(t *testing.T) {
 // TestStartSignal tests that the controller Start message will cause
 // WaitForStartSignal to return.
 func TestStartSignal(t *testing.T) {
-	s, cleanup, err := createLoader()
+	l, cleanup, err := createLoader()
 	if err != nil {
 		t.Fatalf("error creating loader: %v", err)
 	}
-	defer s.Destroy()
+	defer l.Destroy()
 	defer cleanup()
 
 	// We aren't going to wait on this application, so the control server
 	// needs to be shut down manually.
-	defer s.ctrl.srv.Stop()
+	defer l.ctrl.srv.Stop()
 
 	// Start a goroutine that calls WaitForStartSignal and writes to a
 	// channel when it returns.
 	waitFinished := make(chan struct{})
 	go func() {
-		s.WaitForStartSignal()
+		l.WaitForStartSignal()
 		// Pretend that Run() executed and returned no error.
-		s.ctrl.manager.startResultChan <- nil
+		l.ctrl.manager.startResultChan <- nil
 		waitFinished <- struct{}{}
 	}()
 
@@ -181,7 +182,7 @@ func TestStartSignal(t *testing.T) {
 
 	// Trigger the control server StartRoot method.
 	cid := "foo"
-	if err := s.ctrl.manager.StartRoot(&cid, nil); err != nil {
+	if err := l.ctrl.manager.StartRoot(&cid, nil); err != nil {
 		t.Errorf("error calling StartRoot: %v", err)
 	}
author	Kevin Krakauer <krakauer@google.com>	2018-09-17 16:24:05 -0700
committer	Shentubot <shentubot@google.com>	2018-09-17 16:25:24 -0700
commit	bb88c187c5457df14fa78e5e6b6f48cbc90fb489 (patch)
tree	a92886651d7657480b7f696ebe7a5f774916a1cb /runsc/boot
parent	ab6fa44588233fa48d1ae0bf7d9b0d9e984a6af0 (diff)