From d6d165cb0b8147461388287ffd4cfee221940123 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 21 Aug 2018 13:13:34 -0700 Subject: Initial change for multi-gofer support PiperOrigin-RevId: 209647293 Change-Id: I980fca1257ea3fcce796388a049c353b0303a8a5 --- runsc/container/BUILD | 2 +- runsc/container/container.go | 111 ++++++++++++++++++++++---------------- runsc/container/container_test.go | 47 +++++++++++++++- 3 files changed, 112 insertions(+), 48 deletions(-) (limited to 'runsc/container') diff --git a/runsc/container/BUILD b/runsc/container/BUILD index d4c650892..1171355c8 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -23,10 +23,10 @@ go_library( deps = [ "//pkg/log", "//pkg/sentry/control", - "//pkg/syserror", "//runsc/boot", "//runsc/sandbox", "//runsc/specutils", + "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", ], ) diff --git a/runsc/container/container.go b/runsc/container/container.go index da2ce0d25..8bd47aac1 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -16,6 +16,7 @@ package container import ( + "context" "encoding/json" "fmt" "io/ioutil" @@ -27,10 +28,10 @@ import ( "syscall" "time" + "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/control" - "gvisor.googlesource.com/gvisor/pkg/syserror" "gvisor.googlesource.com/gvisor/runsc/boot" "gvisor.googlesource.com/gvisor/runsc/sandbox" "gvisor.googlesource.com/gvisor/runsc/specutils" @@ -89,6 +90,10 @@ type Container struct { // Status is the current container Status. Status Status `json:"status"` + // GoferPid is the pid of the gofer running along side the sandbox. May + // be 0 if the gofer has been killed or it's not being used. + GoferPid int `json:"goferPid"` + // Sandbox is the sandbox this container is running in. It will be nil // if the container is not in state Running or Created. Sandbox *sandbox.Sandbox `json:"sandbox"` @@ -130,12 +135,11 @@ func Load(rootDir, id string) (*Container, error) { // This is inherently racey. if c.Status == Running || c.Status == Created { // Check if the sandbox process is still running. - if c.Sandbox.IsRunning() { + if c.IsRunning() { // TODO: Send a message into the sandbox to // see if this particular container is still running. } else { - // Sandbox no longer exists, so this container - // definitely does not exist. + // Sandbox no longer exists, so this container definitely does not exist. c.Status = Stopped c.Sandbox = nil } @@ -221,12 +225,13 @@ func Create(id string, spec *specs.Spec, conf *boot.Config, bundleDir, consoleSo log.Debugf("Creating new sandbox for container %q", id) // Start a new sandbox for this container. Any errors after this point // must destroy the container. - s, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket) + s, goferPid, err := sandbox.Create(id, spec, conf, bundleDir, consoleSocket) if err != nil { c.Destroy() return nil, err } c.Sandbox = s + c.GoferPid = goferPid } else { // This is sort of confusing. For a sandbox with a root // container and a child container in it, runsc sees: @@ -398,7 +403,18 @@ func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) { if c.Status == Stopped { return 0, fmt.Errorf("container is stopped") } - return c.Sandbox.WaitPID(pid, c.ID) + ws, err := c.Sandbox.WaitPID(pid, c.ID) + if err != nil { + return 0, err + } + if c.Sandbox.IsRootContainer(c.ID) { + // If waiting for the root, give some time for the sandbox process to exit + // to prevent races with resources that might still be in use. + if err := c.waitForStopped(); err != nil { + return 0, err + } + } + return ws, nil } // Signal sends the signal to the container. @@ -502,6 +518,14 @@ func (c *Container) Destroy() error { log.Warningf("Failed to destroy sandbox %q: %v", c.Sandbox.ID, err) } } + if c.GoferPid != 0 { + log.Debugf("Killing gofer for container %q, PID: %d", c.ID, c.GoferPid) + if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil { + log.Warningf("error sending signal %d to pid %d: %v", syscall.SIGKILL, c.GoferPid, err) + } else { + c.GoferPid = 0 + } + } c.Sandbox = nil c.Status = Stopped @@ -509,29 +533,38 @@ func (c *Container) Destroy() error { return nil } -// DestroyAndWait frees all resources associated with the container -// and waits for destroy to finish before returning. -func (c *Container) DestroyAndWait() error { - sandboxPid := c.Sandbox.Pid - goferPid := c.Sandbox.GoferPid - - if err := c.Destroy(); err != nil { - return fmt.Errorf("error destroying container %v: %v", c, err) +// IsRunning returns true if the sandbox or gofer process is running. +func (c *Container) IsRunning() bool { + if c.Status == Stopped { + return false } - - if sandboxPid != 0 { - if err := waitForDeath(sandboxPid, 5*time.Second); err != nil { - return fmt.Errorf("error waiting for sandbox death: %v", err) - } + if c.Sandbox != nil && c.Sandbox.IsRunning() { + return true } + if c.GoferPid != 0 { + // Send a signal 0 to the gofer process. + if err := syscall.Kill(c.GoferPid, 0); err == nil { + log.Warningf("Found orphan gofer process, pid: %d", c.GoferPid) + // Attempt to kill gofer if it's orphan. + syscall.Kill(c.GoferPid, syscall.SIGKILL) - if goferPid != 0 { - if err := waitForDeath(goferPid, 5*time.Second); err != nil { - return fmt.Errorf("error waiting for gofer death: %v", err) + // Don't wait for gofer to die. Return 'running' and hope gofer is dead + // next time around. + return true } } + return false +} - return nil +// DestroyAndWait frees all resources associated with the container +// and waits for destroy to finish before returning. +// +// TODO: This only works for single container. +func (c *Container) DestroyAndWait() error { + if err := c.Destroy(); err != nil { + return fmt.Errorf("error destroying container %v: %v", c, err) + } + return c.waitForStopped() } // save saves the container metadata to a file. @@ -551,29 +584,15 @@ func (c *Container) save() error { return nil } -// waitForDeath ensures that process is dead before proceeding. -// -// This is racy because the kernel can potentially reuse the pid in the time -// between the process' death and the first check after the process has ended. -func waitForDeath(pid int, timeout time.Duration) error { - backoff := 1 * time.Millisecond - for start := time.Now(); time.Now().Sub(start) < timeout; { - - if err := syscall.Kill(pid, 0); err != nil { - if err == syserror.ESRCH { - // pid does not exist so process must have died - return nil - } - return fmt.Errorf("error killing pid (%d): %v", pid, err) - } - // pid is still alive. - - // Process continues to run, backoff and retry. - time.Sleep(backoff) - backoff *= 2 - if backoff > 1*time.Second { - backoff = 1 * time.Second +func (c *Container) waitForStopped() error { + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) + op := func() error { + if !c.IsRunning() { + return fmt.Errorf("container is still running") } + return nil } - return fmt.Errorf("timed out waiting for process (%d)", pid) + return backoff.Retry(op, b) } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index a2da63afd..dadf8445b 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -350,7 +350,7 @@ func TestLifecycle(t *testing.T) { // ourselves. p, _ := os.FindProcess(s.Sandbox.Pid) p.Wait() - g, _ := os.FindProcess(s.Sandbox.GoferPid) + g, _ := os.FindProcess(s.GoferPid) g.Wait() // Load the container from disk and check the status. @@ -1701,3 +1701,48 @@ func TestContainerVolumeContentsShared(t *testing.T) { t.Errorf("stat %q got error %v, wanted ErrNotExist", filename, err) } } + +func TestGoferExits(t *testing.T) { + spec := testutil.NewSpecWithArgs("/bin/sleep", "10000") + conf := testutil.TestConfig() + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer os.RemoveAll(rootDir) + defer os.RemoveAll(bundleDir) + + // Create and start the container. + c, err := container.Create(testutil.UniqueContainerID(), spec, conf, bundleDir, "", "") + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer c.Destroy() + if err := c.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + + sandboxProc, err := os.FindProcess(c.Sandbox.Pid) + if err != nil { + t.Fatalf("error finding sandbox process: %v", err) + } + gofer, err := os.FindProcess(c.GoferPid) + if err != nil { + t.Fatalf("error finding sandbox process: %v", err) + } + + // Kill sandbox and expect gofer to exit on its own. + if err := sandboxProc.Kill(); err != nil { + t.Fatalf("error killing sandbox process: %v", err) + } + if _, err := sandboxProc.Wait(); err != nil { + t.Fatalf("error waiting for sandbox process: %v", err) + } + + if _, err := gofer.Wait(); err != nil { + t.Fatalf("error waiting for gofer process: %v", err) + } + if c.IsRunning() { + t.Errorf("container shouldn't be running, container: %+v", c) + } +} -- cgit v1.2.3