summaryrefslogtreecommitdiffhomepage
path: root/runsc/container
diff options
context:
space:
mode:
authorLantao Liu <lantaol@google.com>2018-09-13 16:36:53 -0700
committerShentubot <shentubot@google.com>2018-09-13 16:38:03 -0700
commitbde2a91433cfbac426577a691bf13817115b53be (patch)
tree1403a6e5ffca3345da142bf68535763b6f34e5a9 /runsc/container
parentadf8f339703922211886d3e5588160f65bc131b3 (diff)
runsc: Support container signal/wait.
This CL: 1) Fix `runsc wait`, it now also works after the container exits; 2) Generate correct container state in Load; 2) Make sure `Destory` cleanup everything before successfully return. PiperOrigin-RevId: 212900107 Change-Id: Ie129cbb9d74f8151a18364f1fc0b2603eac4109a
Diffstat (limited to 'runsc/container')
-rw-r--r--runsc/container/container.go178
-rw-r--r--runsc/container/container_test.go52
2 files changed, 122 insertions, 108 deletions
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 38848d02f..792b7967b 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -136,13 +136,17 @@ func Load(rootDir, id string) (*Container, error) {
// This is inherently racey.
if c.Status == Running || c.Status == Created {
// Check if the sandbox process is still running.
- if c.IsRunning() {
- // TODO: Send a message into the sandbox to
- // see if this particular container is still running.
- } else {
+ if !c.Sandbox.IsRunning() {
// Sandbox no longer exists, so this container definitely does not exist.
c.Status = Stopped
c.Sandbox = nil
+ } else if c.Status == Running {
+ // Container state should reflect the actual state of
+ // the application, so we don't consider gofer process
+ // here.
+ if err := c.Signal(syscall.Signal(0)); err != nil {
+ c.Status = Stopped
+ }
}
}
@@ -382,10 +386,12 @@ func (c *Container) Pid() int {
}
// Wait waits for the container to exit, and returns its WaitStatus.
+// Call to wait on a stopped container is needed to retrieve the exit status
+// and wait returns immediately.
func (c *Container) Wait() (syscall.WaitStatus, error) {
log.Debugf("Wait on container %q", c.ID)
- if c.Status == Stopped {
- return 0, fmt.Errorf("container is stopped")
+ if c.Sandbox == nil || !c.Sandbox.IsRunning() {
+ return 0, fmt.Errorf("container sandbox is not running")
}
return c.Sandbox.Wait(c.ID)
}
@@ -394,8 +400,8 @@ func (c *Container) Wait() (syscall.WaitStatus, error) {
// returns its WaitStatus.
func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) {
log.Debugf("Wait on pid %d in sandbox %q", pid, c.Sandbox.ID)
- if c.Status == Stopped {
- return 0, fmt.Errorf("container is stopped")
+ if c.Sandbox == nil || !c.Sandbox.IsRunning() {
+ return 0, fmt.Errorf("container sandbox is not running")
}
return c.Sandbox.WaitPID(pid, c.Sandbox.ID)
}
@@ -404,29 +410,19 @@ func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) {
// its WaitStatus.
func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) {
log.Debugf("Wait on pid %d in container %q", pid, c.ID)
- if c.Status == Stopped {
- return 0, fmt.Errorf("container is stopped")
- }
- ws, err := c.Sandbox.WaitPID(pid, c.ID)
- if err != nil {
- return 0, err
- }
- if c.Sandbox.IsRootContainer(c.ID) {
- // If waiting for the root, give some time for the sandbox process to exit
- // to prevent races with resources that might still be in use.
- if err := c.waitForStopped(); err != nil {
- return 0, err
- }
+ if c.Sandbox == nil || !c.Sandbox.IsRunning() {
+ return 0, fmt.Errorf("container sandbox is not running")
}
- return ws, nil
+ return c.Sandbox.WaitPID(pid, c.ID)
}
// Signal sends the signal to the container.
+// Signal returns an error if the container is already stopped.
+// TODO: Distinguish different error types.
func (c *Container) Signal(sig syscall.Signal) error {
log.Debugf("Signal container %q", c.ID)
if c.Status == Stopped {
- log.Warningf("container %q not running, not sending signal %v", c.ID, sig)
- return nil
+ return fmt.Errorf("container sandbox is stopped")
}
// TODO: Query the container for its state, then save it.
return c.Sandbox.Signal(c.ID, sig)
@@ -437,8 +433,7 @@ func (c *Container) Signal(sig syscall.Signal) error {
func (c *Container) Checkpoint(f *os.File) error {
log.Debugf("Checkpoint container %q", c.ID)
if c.Status == Stopped {
- log.Warningf("container %q not running, not checkpointing", c.ID)
- return nil
+ return fmt.Errorf("container sandbox is stopped")
}
return c.Sandbox.Checkpoint(c.ID, f)
}
@@ -496,93 +491,36 @@ func (c *Container) Processes() ([]*control.Process, error) {
}
// Destroy frees all resources associated with the container.
+// Destroy returns error if any step fails, and the function can be safely retried.
func (c *Container) Destroy() error {
log.Debugf("Destroy container %q", c.ID)
- // First stop the container.
- if c.Sandbox != nil {
- if err := c.Sandbox.Stop(c.ID); err != nil {
- return err
- }
+ if err := c.stop(); err != nil {
+ return fmt.Errorf("error stopping container: %v", err)
}
- // "If any poststop hook fails, the runtime MUST log a warning, but the
- // remaining hooks and lifecycle continue as if the hook had succeeded" -OCI spec.
- if c.Spec.Hooks != nil && (c.Status == Created || c.Status == Running) {
- executeHooksBestEffort(c.Spec.Hooks.Poststop, c.State())
- }
-
- // If we are the first container in the sandbox, take the sandbox down
- // as well.
- if c.Sandbox != nil && c.Sandbox.IsRootContainer(c.ID) {
- if err := c.Sandbox.Destroy(); err != nil {
- log.Warningf("Failed to destroy sandbox %q: %v", c.Sandbox.ID, err)
- }
- }
- c.Status = Stopped
- c.Sandbox = nil
-
- if err := c.destroyGofer(); err != nil {
- return fmt.Errorf("error destroying gofer: %v", err)
+ if err := destroyFS(c.Spec); err != nil {
+ return fmt.Errorf("error destroying container fs: %v", err)
}
if err := os.RemoveAll(c.Root); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("error deleting container root directory %q: %v", c.Root, err)
}
- return nil
-}
-
-func (c *Container) destroyGofer() error {
- if c.GoferPid != 0 {
- log.Debugf("Killing gofer for container %q, PID: %d", c.ID, c.GoferPid)
- if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil {
- log.Warningf("error sending signal %d to pid %d: %v", syscall.SIGKILL, c.GoferPid, err)
- }
- }
-
- // Gofer process may take some time to teardown. Retry in case of failure.
- ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
- defer cancel()
- b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
- err := backoff.Retry(func() error { return destroyFS(c.Spec) }, b)
- if err == nil {
- // Success!
- c.GoferPid = 0
- }
- return err
-}
-
-// IsRunning returns true if the sandbox or gofer process is running.
-func (c *Container) IsRunning() bool {
- if c.Sandbox != nil && c.Sandbox.IsRunning() {
- return true
- }
- if c.GoferPid != 0 {
- // Send a signal 0 to the gofer process.
- if err := syscall.Kill(c.GoferPid, 0); err == nil {
- log.Warningf("Found orphan gofer process, pid: %d", c.GoferPid)
- if err := c.destroyGofer(); err != nil {
- log.Warningf("Error destroying gofer: %v", err)
- }
-
- // Don't wait for gofer to die. Return 'running' and hope gofer is dead
- // next time around.
- return true
- }
+ // "If any poststop hook fails, the runtime MUST log a warning, but the
+ // remaining hooks and lifecycle continue as if the hook had succeeded" -OCI spec.
+ // Based on the OCI, "The post-stop hooks MUST be called after the container is
+ // deleted but before the delete operation returns"
+ // Run it here to:
+ // 1) Conform to the OCI.
+ // 2) Make sure it only runs once, because the root has been deleted, the container
+ // can't be loaded again.
+ if c.Spec.Hooks != nil {
+ executeHooksBestEffort(c.Spec.Hooks.Poststop, c.State())
}
- return false
-}
-// DestroyAndWait frees all resources associated with the container
-// and waits for destroy to finish before returning.
-//
-// TODO: This only works for single container.
-func (c *Container) DestroyAndWait() error {
- if err := c.Destroy(); err != nil {
- return fmt.Errorf("error destroying container %v: %v", c, err)
- }
- return c.waitForStopped()
+ c.Status = Stopped
+ return nil
}
// save saves the container metadata to a file.
@@ -602,13 +540,49 @@ func (c *Container) save() error {
return nil
}
+// stop stops the container (for regular containers) or the sandbox (for
+// root containers), and waits for the container or sandbox and the gofer
+// to stop. If any of them doesn't stop before timeout, an error is returned.
+func (c *Container) stop() error {
+ if c.Sandbox != nil && c.Sandbox.IsRunning() {
+ log.Debugf("Killing container %q", c.ID)
+ if c.Sandbox.IsRootContainer(c.ID) {
+ if err := c.Sandbox.Destroy(); err != nil {
+ return fmt.Errorf("error destroying sandbox %q: %v", c.Sandbox.ID, err)
+ }
+ } else {
+ if err := c.Signal(syscall.SIGKILL); err != nil {
+ // The container may already be stopped, log the error.
+ log.Warningf("Error sending signal %d to container %q: %v", syscall.SIGKILL, c.ID, err)
+ }
+ }
+ }
+
+ // Try killing gofer if it does not exit with container.
+ if c.GoferPid != 0 {
+ log.Debugf("Killing gofer for container %q, PID: %d", c.ID, c.GoferPid)
+ if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil {
+ // The gofer may already be stopped, log the error.
+ log.Warningf("Error sending signal %d to gofer %d: %v", syscall.SIGKILL, c.GoferPid, err)
+ }
+ }
+ return c.waitForStopped()
+}
+
func (c *Container) waitForStopped() error {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()
b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx)
op := func() error {
- if !c.IsRunning() {
- return fmt.Errorf("container is still running")
+ if c.Sandbox != nil && c.Sandbox.IsRunning() {
+ if err := c.Signal(syscall.Signal(0)); err == nil {
+ return fmt.Errorf("container is still running")
+ }
+ }
+ if c.GoferPid != 0 {
+ if err := syscall.Kill(c.GoferPid, 0); err == nil {
+ return fmt.Errorf("gofer is still running")
+ }
}
return nil
}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 790334249..ab1823f1c 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -200,6 +200,7 @@ func run(spec *specs.Spec, conf *boot.Config) error {
if err := s.Start(conf); err != nil {
return fmt.Errorf("error starting container: %v", err)
}
+
ws, err := s.Wait()
if err != nil {
return fmt.Errorf("error waiting on container: %v", err)
@@ -251,6 +252,35 @@ func configs(opts ...configOption) []*boot.Config {
return cs
}
+// In normal runsc usage, sandbox processes will be parented to
+// init and init will reap the them. However, in the test environment
+// the test runner is the parent and will not reap the sandbox
+// processes, so we must do it ourselves, or else they will left
+// as zombies.
+// The function returns a wait group, and the caller can reap
+// children synchronously by waiting on the wait group.
+func reapChildren(c *Container) (*sync.WaitGroup, error) {
+ var wg sync.WaitGroup
+ p, err := os.FindProcess(c.Sandbox.Pid)
+ if err != nil {
+ return nil, fmt.Errorf("error finding sandbox process: %v", err)
+ }
+ g, err := os.FindProcess(c.GoferPid)
+ if err != nil {
+ return nil, fmt.Errorf("error finding gofer process: %v", err)
+ }
+ wg.Add(2)
+ go func() {
+ p.Wait()
+ wg.Done()
+ }()
+ go func() {
+ g.Wait()
+ wg.Done()
+ }()
+ return &wg, nil
+}
+
// TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle.
// It verifies after each step that the container can be loaded from disk, and
// has the correct status.
@@ -306,6 +336,7 @@ func TestLifecycle(t *testing.T) {
if err := s.Start(conf); err != nil {
t.Fatalf("error starting container: %v", err)
}
+
// Load the container from disk and check the status.
s, err = Load(rootDir, id)
if err != nil {
@@ -352,10 +383,11 @@ func TestLifecycle(t *testing.T) {
// and init will reap the sandbox. However, in this case the
// test runner is the parent and will not reap the sandbox
// process, so we must do it ourselves.
- p, _ := os.FindProcess(s.Sandbox.Pid)
- p.Wait()
- g, _ := os.FindProcess(s.GoferPid)
- g.Wait()
+ reapWg, err := reapChildren(s)
+ if err != nil {
+ t.Fatalf("error reaping children: %v", err)
+ }
+ reapWg.Wait()
// Load the container from disk and check the status.
s, err = Load(rootDir, id)
@@ -1164,6 +1196,11 @@ func TestConsoleSocket(t *testing.T) {
t.Errorf("fd is not a terminal (ioctl TGGETS got %v)", err)
}
+ // Reap the sandbox process.
+ if _, err := reapChildren(s); err != nil {
+ t.Fatalf("error reaping children: %v", err)
+ }
+
// Shut it down.
if err := s.Destroy(); err != nil {
t.Fatalf("error destroying container: %v", err)
@@ -1259,6 +1296,7 @@ func TestReadonlyRoot(t *testing.T) {
if err := s.Start(conf); err != nil {
t.Fatalf("error starting container: %v", err)
}
+
ws, err := s.Wait()
if err != nil {
t.Fatalf("error waiting on container: %v", err)
@@ -1302,6 +1340,7 @@ func TestReadonlyMount(t *testing.T) {
if err := s.Start(conf); err != nil {
t.Fatalf("error starting container: %v", err)
}
+
ws, err := s.Wait()
if err != nil {
t.Fatalf("error waiting on container: %v", err)
@@ -1547,8 +1586,9 @@ func TestGoferExits(t *testing.T) {
if _, err := gofer.Wait(); err != nil {
t.Fatalf("error waiting for gofer process: %v", err)
}
- if c.IsRunning() {
- t.Errorf("container shouldn't be running, container: %+v", c)
+
+ if err := c.waitForStopped(); err != nil {
+ t.Errorf("container is not stopped: %v", err)
}
}