Added leave-running flag for checkpoint.

The leave-running flag allows the container to continue running after a checkpoint has occurred by doing an immediate restore into a new container with the same container ID after the old container is destroyed. Updates #80. PiperOrigin-RevId: 202695426 Change-Id: Iac50437f5afda018dc18b24bb8ddb935983cf336
author: Brielle Broder <bbroder@google.com> 2018-06-29 13:08:41 -0700
committer: Shentubot <shentubot@google.com> 2018-06-29 13:09:33 -0700
commit: 25e315c2e1764a9b0a1b70196e1108c00d172f48 (patch)
tree: 0d4a2795f59796818174b540c9619bd583607899 /runsc
parent: 23f49097c77213175e9b11755c28c3ff5ccc1118 (diff)
3 files changed, 109 insertions, 6 deletions
diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go
index a28eb0f02..9348289ca 100644
--- a/runsc/cmd/checkpoint.go
+++ b/runsc/cmd/checkpoint.go
@@ -17,12 +17,15 @@ package cmd
 import (
 	"os"
 	"path/filepath"
+	"syscall"
 
 	"context"
 	"flag"
 	"github.com/google/subcommands"
+	"gvisor.googlesource.com/gvisor/pkg/log"
 	"gvisor.googlesource.com/gvisor/runsc/boot"
 	"gvisor.googlesource.com/gvisor/runsc/container"
+	"gvisor.googlesource.com/gvisor/runsc/specutils"
 )
 
 // File containing the container's saved image/state within the given image-path's directory.
@@ -30,7 +33,8 @@ const checkpointFileName = "checkpoint.img"
 
 // Checkpoint implements subcommands.Command for the "checkpoint" command.
 type Checkpoint struct {
-	imagePath string
+	imagePath    string
+	leaveRunning bool
 }
 
 // Name implements subcommands.Command.Name.
@@ -51,14 +55,12 @@ func (*Checkpoint) Usage() string {
 
 // SetFlags implements subcommands.Command.SetFlags.
 func (c *Checkpoint) SetFlags(f *flag.FlagSet) {
-	f.StringVar(&c.imagePath, "image-path", "", "path to saved container image")
+	f.StringVar(&c.imagePath, "image-path", "", "directory path to saved container image")
+	f.BoolVar(&c.leaveRunning, "leave-running", false, "restart the container after checkpointing")
 
 	// Unimplemented flags necessary for compatibility with docker.
 	var wp string
 	f.StringVar(&wp, "work-path", "", "ignored")
-
-	var lr bool
-	f.BoolVar(&lr, "leave-running", false, "ignored")
 }
 
 // Execute implements subcommands.Command.Execute.
@@ -71,6 +73,7 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa
 
 	id := f.Arg(0)
 	conf := args[0].(*boot.Config)
+	waitStatus := args[1].(*syscall.WaitStatus)
 
 	cont, err := container.Load(conf.RootDir, id)
 	if err != nil {
@@ -98,5 +101,49 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa
 		Fatalf("checkpoint failed: %v", err)
 	}
 
+	if !c.leaveRunning {
+		return subcommands.ExitSuccess
+	}
+
+	// TODO: Make it possible to restore into same container.
+	// For now, we can fake it by destroying the container and making a
+	// new container with the same ID. This hack does not work with docker
+	// which uses the container pid to ensure that the restore-container is
+	// actually the same as the checkpoint-container. By restoring into
+	// the same container, we will solve the docker incompatibility.
+
+	// Restore into new container with same ID.
+	bundleDir := cont.BundleDir
+	if bundleDir == "" {
+		Fatalf("error setting bundleDir")
+	}
+
+	spec, err := specutils.ReadSpec(bundleDir)
+	if err != nil {
+		Fatalf("error reading spec: %v", err)
+	}
+
+	specutils.LogSpec(spec)
+
+	if cont.ConsoleSocket != "" {
+		log.Warningf("ignoring console socket since it cannot be restored")
+	}
+
+	if err := cont.DestroyAndWait(); err != nil {
+		Fatalf("error destroying container: %v", err)
+	}
+
+	cont, err = container.Create(id, spec, conf, bundleDir, "", "", fullImagePath)
+	if err != nil {
+		Fatalf("error restoring container: %v", err)
+	}
+
+	if err := cont.Start(conf); err != nil {
+		Fatalf("error starting container: %v", err)
+	}
+
+	ws, err := cont.Wait()
+	*waitStatus = ws
+
 	return subcommands.ExitSuccess
 }
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 61e05e1c3..679d7e097 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -16,6 +16,7 @@ go_library(
     deps = [
         "//pkg/log",
         "//pkg/sentry/control",
+        "//pkg/syserror",
         "//runsc/boot",
         "//runsc/sandbox",
         "//runsc/specutils",
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 042c76577..8dca721f6 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -30,6 +30,7 @@ import (
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.googlesource.com/gvisor/pkg/log"
 	"gvisor.googlesource.com/gvisor/pkg/sentry/control"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
 	"gvisor.googlesource.com/gvisor/runsc/boot"
 	"gvisor.googlesource.com/gvisor/runsc/sandbox"
 	"gvisor.googlesource.com/gvisor/runsc/specutils"
@@ -100,11 +101,12 @@ type Container struct {
 func Load(rootDir, id string) (*Container, error) {
 	log.Debugf("Load container %q %q", rootDir, id)
 	if err := validateID(id); err != nil {
-		return nil, err
+		return nil, fmt.Errorf("error validating id: %v", err)
 	}
 
 	cRoot, err := findContainerRoot(rootDir, id)
 	if err != nil {
+		// Preserve error so that callers can distinguish 'not found' errors.
 		return nil, err
 	}
 
@@ -471,6 +473,32 @@ func (c *Container) Destroy() error {
 
 	c.Sandbox = nil
 	c.Status = Stopped
+
+	return nil
+}
+
+// DestroyAndWait frees all resources associated with the container
+// and waits for destroy to finish before returning.
+func (c *Container) DestroyAndWait() error {
+	sandboxPid := c.Sandbox.Pid
+	goferPid := c.Sandbox.GoferPid
+
+	if err := c.Destroy(); err != nil {
+		return fmt.Errorf("error destroying container %v: %v", c, err)
+	}
+
+	if sandboxPid != 0 {
+		if err := waitForDeath(sandboxPid, 5*time.Second); err != nil {
+			return fmt.Errorf("error waiting for sandbox death: %v", err)
+		}
+	}
+
+	if goferPid != 0 {
+		if err := waitForDeath(goferPid, 5*time.Second); err != nil {
+			return fmt.Errorf("error waiting for gofer death: %v", err)
+		}
+	}
+
 	return nil
 }
 
@@ -490,3 +518,30 @@ func (c *Container) save() error {
 	}
 	return nil
 }
+
+// waitForDeath ensures that process is dead before proceeding.
+//
+// This is racy because the kernel can potentially reuse the pid in the time
+// between the process' death and the first check after the process has ended.
+func waitForDeath(pid int, timeout time.Duration) error {
+	backoff := 1 * time.Millisecond
+	for start := time.Now(); time.Now().Sub(start) < timeout; {
+
+		if err := syscall.Kill(pid, 0); err != nil {
+			if err == syserror.ESRCH {
+				// pid does not exist so process must have died
+				return nil
+			}
+			return fmt.Errorf("error killing pid (%d): %v", pid, err)
+		}
+		// pid is still alive.
+
+		// Process continues to run, backoff and retry.
+		time.Sleep(backoff)
+		backoff *= 2
+		if backoff > 1*time.Second {
+			backoff = 1 * time.Second
+		}
+	}
+	return fmt.Errorf("timed out waiting for process (%d)", pid)
+}
author	Brielle Broder <bbroder@google.com>	2018-06-29 13:08:41 -0700
committer	Shentubot <shentubot@google.com>	2018-06-29 13:09:33 -0700
commit	25e315c2e1764a9b0a1b70196e1108c00d172f48 (patch)
tree	0d4a2795f59796818174b540c9619bd583607899 /runsc
parent	23f49097c77213175e9b11755c28c3ff5ccc1118 (diff)