5 files changed, 132 insertions, 41 deletions
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index b184bd402..7d1310c96 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -166,15 +166,33 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
 		return Errorf("Error write spec: %v", err)
 	}
 
-	runArgs := container.Args{
+	containerArgs := container.Args{
 		ID:        cid,
 		Spec:      spec,
 		BundleDir: tmpDir,
 		Attached:  true,
 	}
-	ws, err := container.Run(conf, runArgs)
+	ct, err := container.New(conf, containerArgs)
 	if err != nil {
-		return Errorf("running container: %v", err)
+		return Errorf("creating container: %v", err)
+	}
+	defer ct.Destroy()
+
+	if err := ct.Start(conf); err != nil {
+		return Errorf("starting container: %v", err)
+	}
+
+	// Forward signals to init in the container. Thus if we get SIGINT from
+	// ^C, the container gracefully exit, and we can clean up.
+	//
+	// N.B. There is a still a window before this where a signal may kill
+	// this process, skipping cleanup.
+	stopForwarding := ct.ForwardSignals(0 /* pid */, false /* fgProcess */)
+	defer stopForwarding()
+
+	ws, err := ct.Wait()
+	if err != nil {
+		return Errorf("waiting for container: %v", err)
 	}
 
 	*waitStatus = ws
@@ -237,20 +255,27 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) {
 	for _, cmd := range cmds {
 		log.Debugf("Run %q", cmd)
 		args := strings.Split(cmd, " ")
-		c := exec.Command(args[0], args[1:]...)
-		if err := c.Run(); err != nil {
+		cmd := exec.Command(args[0], args[1:]...)
+		if err := cmd.Run(); err != nil {
+			c.cleanupNet(cid, dev, "", "", "")
 			return nil, fmt.Errorf("failed to run %q: %v", cmd, err)
 		}
 	}
 
-	if err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec); err != nil {
+	resolvPath, err := makeFile("/etc/resolv.conf", "nameserver 8.8.8.8\n", spec)
+	if err != nil {
+		c.cleanupNet(cid, dev, "", "", "")
 		return nil, err
 	}
-	if err := makeFile("/etc/hostname", cid+"\n", spec); err != nil {
+	hostnamePath, err := makeFile("/etc/hostname", cid+"\n", spec)
+	if err != nil {
+		c.cleanupNet(cid, dev, resolvPath, "", "")
 		return nil, err
 	}
 	hosts := fmt.Sprintf("127.0.0.1\tlocalhost\n%s\t%s\n", c.ip, cid)
-	if err := makeFile("/etc/hosts", hosts, spec); err != nil {
+	hostsPath, err := makeFile("/etc/hosts", hosts, spec)
+	if err != nil {
+		c.cleanupNet(cid, dev, resolvPath, hostnamePath, "")
 		return nil, err
 	}
 
@@ -263,19 +288,22 @@ func (c *Do) setupNet(cid string, spec *specs.Spec) (func(), error) {
 	}
 	spec.Linux.Namespaces = append(spec.Linux.Namespaces, netns)
 
-	return func() { c.cleanNet(cid, dev) }, nil
+	return func() { c.cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath) }, nil
 }
 
-func (c *Do) cleanNet(cid, dev string) {
-	veth, peer := deviceNames(cid)
+// cleanupNet tries to cleanup the network setup in setupNet.
+//
+// It may be called when setupNet is only partially complete, in which case it
+// will cleanup as much as possible, logging warnings for the rest.
+//
+// Unfortunately none of this can be automatically cleaned up on process exit,
+// we must do so explicitly.
+func (c *Do) cleanupNet(cid, dev, resolvPath, hostnamePath, hostsPath string) {
+	_, peer := deviceNames(cid)
 
 	cmds := []string{
 		fmt.Sprintf("ip link delete %s", peer),
 		fmt.Sprintf("ip netns delete %s", cid),
-
-		fmt.Sprintf("iptables -t nat -D POSTROUTING -s %s/24 -o %s -j MASQUERADE", c.ip, dev),
-		fmt.Sprintf("iptables -D FORWARD -i %s -o %s -j ACCEPT", dev, veth),
-		fmt.Sprintf("iptables -D FORWARD -o %s -i %s -j ACCEPT", dev, veth),
 	}
 
 	for _, cmd := range cmds {
@@ -286,6 +314,10 @@ func (c *Do) cleanNet(cid, dev string) {
 			log.Warningf("Failed to run %q: %v", cmd, err)
 		}
 	}
+
+	tryRemove(resolvPath)
+	tryRemove(hostnamePath)
+	tryRemove(hostsPath)
 }
 
 func deviceNames(cid string) (string, string) {
@@ -306,13 +338,16 @@ func defaultDevice() (string, error) {
 	return parts[4], nil
 }
 
-func makeFile(dest, content string, spec *specs.Spec) error {
+func makeFile(dest, content string, spec *specs.Spec) (string, error) {
 	tmpFile, err := ioutil.TempFile("", filepath.Base(dest))
 	if err != nil {
-		return err
+		return "", err
 	}
 	if _, err := tmpFile.WriteString(content); err != nil {
-		return err
+		if err := os.Remove(tmpFile.Name()); err != nil {
+			log.Warningf("Failed to remove %q: %v", tmpFile, err)
+		}
+		return "", err
 	}
 	spec.Mounts = append(spec.Mounts, specs.Mount{
 		Source:      tmpFile.Name(),
@@ -320,7 +355,17 @@ func makeFile(dest, content string, spec *specs.Spec) error {
 		Type:        "bind",
 		Options:     []string{"ro"},
 	})
-	return nil
+	return tmpFile.Name(), nil
+}
+
+func tryRemove(path string) {
+	if path == "" {
+		return
+	}
+
+	if err := os.Remove(path); err != nil {
+		log.Warningf("Failed to remove %q: %v", path, err)
+	}
 }
 
 func calculatePeerIP(ip string) (string, error) {
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 331b8e866..46154df60 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -15,8 +15,10 @@ go_library(
         "//test:__subpackages__",
     ],
     deps = [
+        "//pkg/abi/linux",
         "//pkg/log",
         "//pkg/sentry/control",
+        "//pkg/sentry/sighandling",
         "//pkg/sync",
         "//runsc/boot",
         "//runsc/cgroup",
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 117ea7d7b..8539f252d 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -22,7 +22,6 @@ import (
 	"io/ioutil"
 	"os"
 	"os/exec"
-	"os/signal"
 	"regexp"
 	"strconv"
 	"strings"
@@ -31,8 +30,10 @@ import (
 
 	"github.com/cenkalti/backoff"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/control"
+	"gvisor.dev/gvisor/pkg/sentry/sighandling"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/cgroup"
 	"gvisor.dev/gvisor/runsc/sandbox"
@@ -421,7 +422,7 @@ func (c *Container) Start(conf *boot.Config) error {
 			return err
 		}
 	} else {
-		// Join cgroup to strt gofer process to ensure it's part of the cgroup from
+		// Join cgroup to start gofer process to ensure it's part of the cgroup from
 		// the start (and all their children processes).
 		if err := runInCgroup(c.Sandbox.Cgroup, func() error {
 			// Create the gofer process.
@@ -621,21 +622,15 @@ func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error {
 // forwarding signals.
 func (c *Container) ForwardSignals(pid int32, fgProcess bool) func() {
 	log.Debugf("Forwarding all signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
-	sigCh := make(chan os.Signal, 1)
-	signal.Notify(sigCh)
-	go func() {
-		for s := range sigCh {
-			log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", s, c.ID, pid, fgProcess)
-			if err := c.Sandbox.SignalProcess(c.ID, pid, s.(syscall.Signal), fgProcess); err != nil {
-				log.Warningf("error forwarding signal %d to container %q: %v", s, c.ID, err)
-			}
+	stop := sighandling.StartSignalForwarding(func(sig linux.Signal) {
+		log.Debugf("Forwarding signal %d to container %q PID %d fgProcess=%t", sig, c.ID, pid, fgProcess)
+		if err := c.Sandbox.SignalProcess(c.ID, pid, syscall.Signal(sig), fgProcess); err != nil {
+			log.Warningf("error forwarding signal %d to container %q: %v", sig, c.ID, err)
 		}
-		log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
-	}()
-
+	})
 	return func() {
-		signal.Stop(sigCh)
-		close(sigCh)
+		log.Debugf("Done forwarding signals to container %q PID %d fgProcess=%t", c.ID, pid, fgProcess)
+		stop()
 	}
 }
 
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index c963c2153..f607fe8af 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -281,6 +281,18 @@ func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
 	return cs
 }
 
+func configsWithVFS2(t *testing.T, opts []configOption) map[string]*boot.Config {
+	vfs1 := configs(t, opts...)
+	vfs2 := configs(t, opts...)
+
+	for key, value := range vfs2 {
+		value.VFS2 = true
+		vfs1[key+"VFS2"] = value
+	}
+
+	return vfs1
+}
+
 // TestLifecycle tests the basic Create/Start/Signal/Destroy container lifecycle.
 // It verifies after each step that the container can be loaded from disk, and
 // has the correct status.
@@ -290,7 +302,7 @@ func TestLifecycle(t *testing.T) {
 	childReaper.Start()
 	defer childReaper.Stop()
 
-	for name, conf := range configs(t, all...) {
+	for name, conf := range configsWithVFS2(t, all) {
 		t.Run(name, func(t *testing.T) {
 			// The container will just sleep for a long time.  We will kill it before
 			// it finishes sleeping.
@@ -464,7 +476,7 @@ func TestExePath(t *testing.T) {
 		t.Fatalf("error making directory: %v", err)
 	}
 
-	for name, conf := range configs(t, overlay) {
+	for name, conf := range configsWithVFS2(t, []configOption{overlay}) {
 		t.Run(name, func(t *testing.T) {
 			for _, test := range []struct {
 				path    string
@@ -1329,7 +1341,7 @@ func TestRunNonRoot(t *testing.T) {
 // TestMountNewDir checks that runsc will create destination directory if it
 // doesn't exit.
 func TestMountNewDir(t *testing.T) {
-	for name, conf := range configs(t, overlay) {
+	for name, conf := range configsWithVFS2(t, []configOption{overlay}) {
 		t.Run(name, func(t *testing.T) {
 			root, err := ioutil.TempDir(testutil.TmpDir(), "root")
 			if err != nil {
@@ -1358,7 +1370,7 @@ func TestMountNewDir(t *testing.T) {
 }
 
 func TestReadonlyRoot(t *testing.T) {
-	for name, conf := range configs(t, overlay) {
+	for name, conf := range configsWithVFS2(t, []configOption{overlay}) {
 		t.Run(name, func(t *testing.T) {
 			spec := testutil.NewSpecWithArgs("/bin/touch", "/foo")
 			spec.Root.Readonly = true
@@ -1476,7 +1488,7 @@ func TestUIDMap(t *testing.T) {
 }
 
 func TestReadonlyMount(t *testing.T) {
-	for name, conf := range configs(t, overlay) {
+	for name, conf := range configsWithVFS2(t, []configOption{overlay}) {
 		t.Run(name, func(t *testing.T) {
 			dir, err := ioutil.TempDir(testutil.TmpDir(), "ro-mount")
 			spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
@@ -1548,6 +1560,14 @@ func TestBindMountByOption(t *testing.T) {
 // TestAbbreviatedIDs checks that runsc supports using abbreviated container
 // IDs in place of full IDs.
 func TestAbbreviatedIDs(t *testing.T) {
+	doAbbreviatedIDsTest(t, false)
+}
+
+func TestAbbreviatedIDsVFS2(t *testing.T) {
+	doAbbreviatedIDsTest(t, true)
+}
+
+func doAbbreviatedIDsTest(t *testing.T, vfs2 bool) {
 	rootDir, cleanup, err := testutil.SetupRootDir()
 	if err != nil {
 		t.Fatalf("error creating root dir: %v", err)
@@ -1556,6 +1576,7 @@ func TestAbbreviatedIDs(t *testing.T) {
 
 	conf := testutil.TestConfig(t)
 	conf.RootDir = rootDir
+	conf.VFS2 = vfs2
 
 	cids := []string{
 		"foo-" + testutil.RandomContainerID(),
@@ -1611,9 +1632,19 @@ func TestAbbreviatedIDs(t *testing.T) {
 }
 
 func TestGoferExits(t *testing.T) {
+	doGoferExitTest(t, false)
+}
+
+func TestGoferExitsVFS2(t *testing.T) {
+	doGoferExitTest(t, true)
+}
+
+func doGoferExitTest(t *testing.T, vfs2 bool) {
 	spec := testutil.NewSpecWithArgs("/bin/sleep", "10000")
 	conf := testutil.TestConfig(t)
+	conf.VFS2 = vfs2
 	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
+
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
 	}
@@ -1733,7 +1764,7 @@ func TestUserLog(t *testing.T) {
 }
 
 func TestWaitOnExitedSandbox(t *testing.T) {
-	for name, conf := range configs(t, all...) {
+	for name, conf := range configsWithVFS2(t, all) {
 		t.Run(name, func(t *testing.T) {
 			// Run a shell that sleeps for 1 second and then exits with a
 			// non-zero code.
@@ -1786,8 +1817,17 @@ func TestWaitOnExitedSandbox(t *testing.T) {
 }
 
 func TestDestroyNotStarted(t *testing.T) {
+	doDestroyNotStartedTest(t, false)
+}
+
+func TestDestroyNotStartedVFS2(t *testing.T) {
+	doDestroyNotStartedTest(t, true)
+}
+
+func doDestroyNotStartedTest(t *testing.T, vfs2 bool) {
 	spec := testutil.NewSpecWithArgs("/bin/sleep", "100")
 	conf := testutil.TestConfig(t)
+	conf.VFS2 = vfs2
 	_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 	if err != nil {
 		t.Fatalf("error setting up container: %v", err)
@@ -1811,9 +1851,18 @@ func TestDestroyNotStarted(t *testing.T) {
 
 // TestDestroyStarting attempts to force a race between start and destroy.
 func TestDestroyStarting(t *testing.T) {
+	doDestroyNotStartedTest(t, false)
+}
+
+func TestDestroyStartedVFS2(t *testing.T) {
+	doDestroyNotStartedTest(t, true)
+}
+
+func doDestroyStartingTest(t *testing.T, vfs2 bool) {
 	for i := 0; i < 10; i++ {
 		spec := testutil.NewSpecWithArgs("/bin/sleep", "100")
 		conf := testutil.TestConfig(t)
+		conf.VFS2 = vfs2
 		rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
 		if err != nil {
 			t.Fatalf("error setting up container: %v", err)
diff --git a/runsc/main.go b/runsc/main.go
index 2baba90f8..8e594c58e 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -330,7 +330,7 @@ func main() {
 		log.Infof("Exiting with status: %v", ws)
 		if ws.Signaled() {
 			// No good way to return it, emulate what the shell does. Maybe raise
-			// signall to self?
+			// signal to self?
 			os.Exit(128 + int(ws.Signal()))
 		}
 		os.Exit(ws.ExitStatus())