summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
authorKevin Krakauer <krakauer@google.com>2018-09-17 16:24:05 -0700
committerShentubot <shentubot@google.com>2018-09-17 16:25:24 -0700
commitbb88c187c5457df14fa78e5e6b6f48cbc90fb489 (patch)
treea92886651d7657480b7f696ebe7a5f774916a1cb /runsc/boot
parentab6fa44588233fa48d1ae0bf7d9b0d9e984a6af0 (diff)
runsc: Enable waiting on exited processes.
This makes `runsc wait` behave more like waitpid()/wait4() in that: - Once a process has run to completion, you can wait on it and get its exit code. - Processes not waited on will consume memory (like a zombie process) PiperOrigin-RevId: 213358916 Change-Id: I5b5eca41ce71eea68e447380df8c38361a4d1558
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/controller.go33
-rw-r--r--runsc/boot/loader.go114
-rw-r--r--runsc/boot/loader_test.go25
3 files changed, 117 insertions, 55 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 4d41dcd6c..dc9359092 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -242,32 +242,11 @@ type ExecArgs struct {
// returns the pid of the new process.
func (cm *containerManager) ExecuteAsync(args *ExecArgs, pid *int32) error {
log.Debugf("containerManager.ExecuteAsync: %+v", args)
-
- // Get the container Root Dirent from the Task, since we must run this
- // process with the same Root.
- cm.l.mu.Lock()
- tg, ok := cm.l.containerRootTGs[args.CID]
- cm.l.mu.Unlock()
- if !ok {
- return fmt.Errorf("cannot exec in container %q: no such container", args.CID)
- }
- tg.Leader().WithMuLocked(func(t *kernel.Task) {
- args.Root = t.FSContext().RootDirectory()
- })
- if args.Root != nil {
- defer args.Root.DecRef()
- }
-
- // Start the process.
- proc := control.Proc{Kernel: cm.l.k}
- newTG, err := control.ExecAsync(&proc, &args.ExecArgs)
+ tgid, err := cm.l.executeAsync(&args.ExecArgs, args.CID)
if err != nil {
- return fmt.Errorf("error executing: %+v: %v", args, err)
+ return err
}
-
- // Return the pid of the newly-created process.
- ts := cm.l.k.TaskSet()
- *pid = int32(ts.Root.IDOfThreadGroup(newTG))
+ *pid = int32(tgid)
return nil
}
@@ -409,12 +388,16 @@ type WaitPIDArgs struct {
// CID is the container ID.
CID string
+
+ // ClearStatus determines whether the exit status of the process should
+ // be cleared when WaitPID returns.
+ ClearStatus bool
}
// WaitPID waits for the process with PID 'pid' in the sandbox.
func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error {
log.Debugf("containerManager.Wait")
- return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus)
+ return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, args.ClearStatus, waitStatus)
}
// SignalArgs are arguments to the Signal method.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 5e9ccb96f..665240ab6 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -31,6 +31,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/cpuid"
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/control"
"gvisor.googlesource.com/gvisor/pkg/sentry/inet"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
@@ -103,7 +104,7 @@ type Loader struct {
// sandboxID is the ID for the whole sandbox.
sandboxID string
- // mu guards containerRootTGs.
+ // mu guards containerRootTGs and execProcesses.
mu sync.Mutex
// containerRootTGs maps container IDs to their root processes. It
@@ -111,7 +112,24 @@ type Loader struct {
// call methods on particular containers.
//
// containerRootTGs is guarded by mu.
+ //
+ // TODO: When containers are removed via `runsc delete`,
+ // containerRootTGs should be cleaned up.
containerRootTGs map[string]*kernel.ThreadGroup
+
+ // execProcesses maps each invocation of exec to the process it spawns.
+ //
+ // execProcesses is guardded by mu.
+ //
+ // TODO: When containers are removed via `runsc delete`,
+ // execProcesses should be cleaned up.
+ execProcesses map[execID]*kernel.ThreadGroup
+}
+
+// execID uniquely identifies a sentry process.
+type execID struct {
+ cid string
+ pid kernel.ThreadID
}
func init() {
@@ -385,7 +403,8 @@ func (l *Loader) run() error {
}
// Create the root container init task.
- if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil {
+ _, _, err := l.k.CreateProcess(l.rootProcArgs)
+ if err != nil {
return fmt.Errorf("failed to create init process: %v", err)
}
@@ -393,6 +412,11 @@ func (l *Loader) run() error {
l.rootProcArgs.FDMap.DecRef()
}
+ if l.execProcesses != nil {
+ return fmt.Errorf("there shouldn't already be a cache of exec'd processes, but found: %v", l.execProcesses)
+ }
+ l.execProcesses = make(map[execID]*kernel.ThreadGroup)
+
// Start signal forwarding only after an init process is created.
l.stopSignalForwarding = l.startSignalForwarding()
@@ -467,7 +491,7 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
return fmt.Errorf("error setting executable path for %+v: %v", procArgs, err)
}
- tg, err := l.k.CreateProcess(procArgs)
+ tg, _, err := l.k.CreateProcess(procArgs)
if err != nil {
return fmt.Errorf("failed to create process in sentry: %v", err)
}
@@ -482,6 +506,40 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config
return nil
}
+func (l *Loader) executeAsync(args *control.ExecArgs, cid string) (kernel.ThreadID, error) {
+ // Get the container Root Dirent from the Task, since we must run this
+ // process with the same Root.
+ l.mu.Lock()
+ tg, ok := l.containerRootTGs[cid]
+ l.mu.Unlock()
+ if !ok {
+ return 0, fmt.Errorf("cannot exec in container %q: no such container", cid)
+ }
+ tg.Leader().WithMuLocked(func(t *kernel.Task) {
+ args.Root = t.FSContext().RootDirectory()
+ })
+ if args.Root != nil {
+ defer args.Root.DecRef()
+ }
+
+ // Start the process.
+ proc := control.Proc{Kernel: l.k}
+ tg, tgid, err := control.ExecAsync(&proc, args)
+ if err != nil {
+ return 0, fmt.Errorf("error executing: %+v: %v", args, err)
+ }
+
+ // Insert the process into execProcesses so that we can wait on it
+ // later.
+ l.mu.Lock()
+ defer l.mu.Unlock()
+ eid := execID{cid: cid, pid: tgid}
+ l.execProcesses[eid] = tg
+ log.Debugf("updated execProcesses: %v", l.execProcesses)
+
+ return tgid, nil
+}
+
// TODO: Per-container namespaces must be supported for -pid.
// waitContainer waits for the root process of a container to exit.
@@ -500,39 +558,59 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
// consider the container exited.
// TODO: Multiple calls to waitContainer() should return
// the same exit status.
- defer func() {
- l.mu.Lock()
- defer l.mu.Unlock()
- // TODO: Containers don't map 1:1 with their root
- // processes. Container exits should be managed explicitly
- // rather than via PID.
- delete(l.containerRootTGs, cid)
- }()
- l.wait(tg, waitStatus)
+ ws := l.wait(tg)
+ *waitStatus = ws
+
+ l.mu.Lock()
+ defer l.mu.Unlock()
+ delete(l.containerRootTGs, cid)
+
return nil
}
-func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) error {
+func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, waitStatus *uint32) error {
// TODO: Containers all currently share a PID namespace.
// When per-container PID namespaces are supported, wait should use cid
// to find the appropriate PID namespace.
/*if cid != l.sandboxID {
return errors.New("non-sandbox PID namespaces are not yet implemented")
}*/
- // TODO: This won't work if the exec process already exited.
- tg := l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid))
+
+ // If the process was started via runsc exec, it will have an
+ // entry in l.execProcesses.
+ l.mu.Lock()
+ eid := execID{cid: cid, pid: tgid}
+ tg, ok := l.execProcesses[eid]
+ l.mu.Unlock()
+ if ok {
+ ws := l.wait(tg)
+ *waitStatus = ws
+ if clearStatus {
+ // Remove tg from the cache.
+ l.mu.Lock()
+ delete(l.execProcesses, eid)
+ log.Debugf("updated execProcesses (removal): %v", l.execProcesses)
+ l.mu.Unlock()
+ }
+ return nil
+ }
+
+ // This process wasn't created by runsc exec or start, so just find it
+ // by pid and hope it hasn't exited yet.
+ tg = l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid))
if tg == nil {
return fmt.Errorf("no thread group with ID %d", tgid)
}
- l.wait(tg, waitStatus)
+ ws := l.wait(tg)
+ *waitStatus = ws
return nil
}
// wait waits for the process with TGID 'tgid' in a container's PID namespace
// to exit.
-func (l *Loader) wait(tg *kernel.ThreadGroup, waitStatus *uint32) {
+func (l *Loader) wait(tg *kernel.ThreadGroup) uint32 {
tg.WaitExited()
- *waitStatus = tg.ExitStatus().Status()
+ return tg.ExitStatus().Status()
}
func (l *Loader) setRootContainerID(cid string) {
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 9398292ff..a8a796445 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -111,11 +111,11 @@ func createLoader() (*Loader, func(), error) {
// TestRun runs a simple application in a sandbox and checks that it succeeds.
func TestRun(t *testing.T) {
- s, cleanup, err := createLoader()
+ l, cleanup, err := createLoader()
if err != nil {
t.Fatalf("error creating loader: %v", err)
}
- defer s.Destroy()
+ defer l.Destroy()
defer cleanup()
// Start a goroutine to read the start chan result, otherwise Run will
@@ -124,12 +124,13 @@ func TestRun(t *testing.T) {
var wg sync.WaitGroup
wg.Add(1)
go func() {
- resultChanErr = <-s.ctrl.manager.startResultChan
+ resultChanErr = <-l.ctrl.manager.startResultChan
wg.Done()
}()
- // Run the container..
- if err := s.Run(); err != nil {
+ // Run the container.
+ l.setRootContainerID("foo")
+ if err := l.Run(); err != nil {
t.Errorf("error running container: %v", err)
}
@@ -140,7 +141,7 @@ func TestRun(t *testing.T) {
}
// Wait for the application to exit. It should succeed.
- if status := s.WaitExit(); status.Code != 0 || status.Signo != 0 {
+ if status := l.WaitExit(); status.Code != 0 || status.Signo != 0 {
t.Errorf("application exited with status %+v, want 0", status)
}
}
@@ -148,24 +149,24 @@ func TestRun(t *testing.T) {
// TestStartSignal tests that the controller Start message will cause
// WaitForStartSignal to return.
func TestStartSignal(t *testing.T) {
- s, cleanup, err := createLoader()
+ l, cleanup, err := createLoader()
if err != nil {
t.Fatalf("error creating loader: %v", err)
}
- defer s.Destroy()
+ defer l.Destroy()
defer cleanup()
// We aren't going to wait on this application, so the control server
// needs to be shut down manually.
- defer s.ctrl.srv.Stop()
+ defer l.ctrl.srv.Stop()
// Start a goroutine that calls WaitForStartSignal and writes to a
// channel when it returns.
waitFinished := make(chan struct{})
go func() {
- s.WaitForStartSignal()
+ l.WaitForStartSignal()
// Pretend that Run() executed and returned no error.
- s.ctrl.manager.startResultChan <- nil
+ l.ctrl.manager.startResultChan <- nil
waitFinished <- struct{}{}
}()
@@ -181,7 +182,7 @@ func TestStartSignal(t *testing.T) {
// Trigger the control server StartRoot method.
cid := "foo"
- if err := s.ctrl.manager.StartRoot(&cid, nil); err != nil {
+ if err := l.ctrl.manager.StartRoot(&cid, nil); err != nil {
t.Errorf("error calling StartRoot: %v", err)
}