diff options
author | Kevin Krakauer <krakauer@google.com> | 2018-09-17 16:24:05 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-09-17 16:25:24 -0700 |
commit | bb88c187c5457df14fa78e5e6b6f48cbc90fb489 (patch) | |
tree | a92886651d7657480b7f696ebe7a5f774916a1cb /runsc/boot | |
parent | ab6fa44588233fa48d1ae0bf7d9b0d9e984a6af0 (diff) |
runsc: Enable waiting on exited processes.
This makes `runsc wait` behave more like waitpid()/wait4() in that:
- Once a process has run to completion, you can wait on it and get its exit
code.
- Processes not waited on will consume memory (like a zombie process)
PiperOrigin-RevId: 213358916
Change-Id: I5b5eca41ce71eea68e447380df8c38361a4d1558
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/controller.go | 33 | ||||
-rw-r--r-- | runsc/boot/loader.go | 114 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 25 |
3 files changed, 117 insertions, 55 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 4d41dcd6c..dc9359092 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -242,32 +242,11 @@ type ExecArgs struct { // returns the pid of the new process. func (cm *containerManager) ExecuteAsync(args *ExecArgs, pid *int32) error { log.Debugf("containerManager.ExecuteAsync: %+v", args) - - // Get the container Root Dirent from the Task, since we must run this - // process with the same Root. - cm.l.mu.Lock() - tg, ok := cm.l.containerRootTGs[args.CID] - cm.l.mu.Unlock() - if !ok { - return fmt.Errorf("cannot exec in container %q: no such container", args.CID) - } - tg.Leader().WithMuLocked(func(t *kernel.Task) { - args.Root = t.FSContext().RootDirectory() - }) - if args.Root != nil { - defer args.Root.DecRef() - } - - // Start the process. - proc := control.Proc{Kernel: cm.l.k} - newTG, err := control.ExecAsync(&proc, &args.ExecArgs) + tgid, err := cm.l.executeAsync(&args.ExecArgs, args.CID) if err != nil { - return fmt.Errorf("error executing: %+v: %v", args, err) + return err } - - // Return the pid of the newly-created process. - ts := cm.l.k.TaskSet() - *pid = int32(ts.Root.IDOfThreadGroup(newTG)) + *pid = int32(tgid) return nil } @@ -409,12 +388,16 @@ type WaitPIDArgs struct { // CID is the container ID. CID string + + // ClearStatus determines whether the exit status of the process should + // be cleared when WaitPID returns. + ClearStatus bool } // WaitPID waits for the process with PID 'pid' in the sandbox. func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error { log.Debugf("containerManager.Wait") - return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus) + return cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, args.ClearStatus, waitStatus) } // SignalArgs are arguments to the Signal method. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 5e9ccb96f..665240ab6 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -31,6 +31,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/cpuid" "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/control" "gvisor.googlesource.com/gvisor/pkg/sentry/inet" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" @@ -103,7 +104,7 @@ type Loader struct { // sandboxID is the ID for the whole sandbox. sandboxID string - // mu guards containerRootTGs. + // mu guards containerRootTGs and execProcesses. mu sync.Mutex // containerRootTGs maps container IDs to their root processes. It @@ -111,7 +112,24 @@ type Loader struct { // call methods on particular containers. // // containerRootTGs is guarded by mu. + // + // TODO: When containers are removed via `runsc delete`, + // containerRootTGs should be cleaned up. containerRootTGs map[string]*kernel.ThreadGroup + + // execProcesses maps each invocation of exec to the process it spawns. + // + // execProcesses is guardded by mu. + // + // TODO: When containers are removed via `runsc delete`, + // execProcesses should be cleaned up. + execProcesses map[execID]*kernel.ThreadGroup +} + +// execID uniquely identifies a sentry process. +type execID struct { + cid string + pid kernel.ThreadID } func init() { @@ -385,7 +403,8 @@ func (l *Loader) run() error { } // Create the root container init task. - if _, err := l.k.CreateProcess(l.rootProcArgs); err != nil { + _, _, err := l.k.CreateProcess(l.rootProcArgs) + if err != nil { return fmt.Errorf("failed to create init process: %v", err) } @@ -393,6 +412,11 @@ func (l *Loader) run() error { l.rootProcArgs.FDMap.DecRef() } + if l.execProcesses != nil { + return fmt.Errorf("there shouldn't already be a cache of exec'd processes, but found: %v", l.execProcesses) + } + l.execProcesses = make(map[execID]*kernel.ThreadGroup) + // Start signal forwarding only after an init process is created. l.stopSignalForwarding = l.startSignalForwarding() @@ -467,7 +491,7 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config return fmt.Errorf("error setting executable path for %+v: %v", procArgs, err) } - tg, err := l.k.CreateProcess(procArgs) + tg, _, err := l.k.CreateProcess(procArgs) if err != nil { return fmt.Errorf("failed to create process in sentry: %v", err) } @@ -482,6 +506,40 @@ func (l *Loader) startContainer(k *kernel.Kernel, spec *specs.Spec, conf *Config return nil } +func (l *Loader) executeAsync(args *control.ExecArgs, cid string) (kernel.ThreadID, error) { + // Get the container Root Dirent from the Task, since we must run this + // process with the same Root. + l.mu.Lock() + tg, ok := l.containerRootTGs[cid] + l.mu.Unlock() + if !ok { + return 0, fmt.Errorf("cannot exec in container %q: no such container", cid) + } + tg.Leader().WithMuLocked(func(t *kernel.Task) { + args.Root = t.FSContext().RootDirectory() + }) + if args.Root != nil { + defer args.Root.DecRef() + } + + // Start the process. + proc := control.Proc{Kernel: l.k} + tg, tgid, err := control.ExecAsync(&proc, args) + if err != nil { + return 0, fmt.Errorf("error executing: %+v: %v", args, err) + } + + // Insert the process into execProcesses so that we can wait on it + // later. + l.mu.Lock() + defer l.mu.Unlock() + eid := execID{cid: cid, pid: tgid} + l.execProcesses[eid] = tg + log.Debugf("updated execProcesses: %v", l.execProcesses) + + return tgid, nil +} + // TODO: Per-container namespaces must be supported for -pid. // waitContainer waits for the root process of a container to exit. @@ -500,39 +558,59 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // consider the container exited. // TODO: Multiple calls to waitContainer() should return // the same exit status. - defer func() { - l.mu.Lock() - defer l.mu.Unlock() - // TODO: Containers don't map 1:1 with their root - // processes. Container exits should be managed explicitly - // rather than via PID. - delete(l.containerRootTGs, cid) - }() - l.wait(tg, waitStatus) + ws := l.wait(tg) + *waitStatus = ws + + l.mu.Lock() + defer l.mu.Unlock() + delete(l.containerRootTGs, cid) + return nil } -func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) error { +func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, clearStatus bool, waitStatus *uint32) error { // TODO: Containers all currently share a PID namespace. // When per-container PID namespaces are supported, wait should use cid // to find the appropriate PID namespace. /*if cid != l.sandboxID { return errors.New("non-sandbox PID namespaces are not yet implemented") }*/ - // TODO: This won't work if the exec process already exited. - tg := l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid)) + + // If the process was started via runsc exec, it will have an + // entry in l.execProcesses. + l.mu.Lock() + eid := execID{cid: cid, pid: tgid} + tg, ok := l.execProcesses[eid] + l.mu.Unlock() + if ok { + ws := l.wait(tg) + *waitStatus = ws + if clearStatus { + // Remove tg from the cache. + l.mu.Lock() + delete(l.execProcesses, eid) + log.Debugf("updated execProcesses (removal): %v", l.execProcesses) + l.mu.Unlock() + } + return nil + } + + // This process wasn't created by runsc exec or start, so just find it + // by pid and hope it hasn't exited yet. + tg = l.k.TaskSet().Root.ThreadGroupWithID(kernel.ThreadID(tgid)) if tg == nil { return fmt.Errorf("no thread group with ID %d", tgid) } - l.wait(tg, waitStatus) + ws := l.wait(tg) + *waitStatus = ws return nil } // wait waits for the process with TGID 'tgid' in a container's PID namespace // to exit. -func (l *Loader) wait(tg *kernel.ThreadGroup, waitStatus *uint32) { +func (l *Loader) wait(tg *kernel.ThreadGroup) uint32 { tg.WaitExited() - *waitStatus = tg.ExitStatus().Status() + return tg.ExitStatus().Status() } func (l *Loader) setRootContainerID(cid string) { diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 9398292ff..a8a796445 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -111,11 +111,11 @@ func createLoader() (*Loader, func(), error) { // TestRun runs a simple application in a sandbox and checks that it succeeds. func TestRun(t *testing.T) { - s, cleanup, err := createLoader() + l, cleanup, err := createLoader() if err != nil { t.Fatalf("error creating loader: %v", err) } - defer s.Destroy() + defer l.Destroy() defer cleanup() // Start a goroutine to read the start chan result, otherwise Run will @@ -124,12 +124,13 @@ func TestRun(t *testing.T) { var wg sync.WaitGroup wg.Add(1) go func() { - resultChanErr = <-s.ctrl.manager.startResultChan + resultChanErr = <-l.ctrl.manager.startResultChan wg.Done() }() - // Run the container.. - if err := s.Run(); err != nil { + // Run the container. + l.setRootContainerID("foo") + if err := l.Run(); err != nil { t.Errorf("error running container: %v", err) } @@ -140,7 +141,7 @@ func TestRun(t *testing.T) { } // Wait for the application to exit. It should succeed. - if status := s.WaitExit(); status.Code != 0 || status.Signo != 0 { + if status := l.WaitExit(); status.Code != 0 || status.Signo != 0 { t.Errorf("application exited with status %+v, want 0", status) } } @@ -148,24 +149,24 @@ func TestRun(t *testing.T) { // TestStartSignal tests that the controller Start message will cause // WaitForStartSignal to return. func TestStartSignal(t *testing.T) { - s, cleanup, err := createLoader() + l, cleanup, err := createLoader() if err != nil { t.Fatalf("error creating loader: %v", err) } - defer s.Destroy() + defer l.Destroy() defer cleanup() // We aren't going to wait on this application, so the control server // needs to be shut down manually. - defer s.ctrl.srv.Stop() + defer l.ctrl.srv.Stop() // Start a goroutine that calls WaitForStartSignal and writes to a // channel when it returns. waitFinished := make(chan struct{}) go func() { - s.WaitForStartSignal() + l.WaitForStartSignal() // Pretend that Run() executed and returned no error. - s.ctrl.manager.startResultChan <- nil + l.ctrl.manager.startResultChan <- nil waitFinished <- struct{}{} }() @@ -181,7 +182,7 @@ func TestStartSignal(t *testing.T) { // Trigger the control server StartRoot method. cid := "foo" - if err := s.ctrl.manager.StartRoot(&cid, nil); err != nil { + if err := l.ctrl.manager.StartRoot(&cid, nil); err != nil { t.Errorf("error calling StartRoot: %v", err) } |