9 files changed, 203 insertions, 77 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 17e774e0c..8125d5061 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -101,11 +101,14 @@ const (
 
 // Profiling related commands (see pprof.go for more details).
 const (
-	StartCPUProfile = "Profile.StartCPUProfile"
-	StopCPUProfile  = "Profile.StopCPUProfile"
-	HeapProfile     = "Profile.HeapProfile"
-	StartTrace      = "Profile.StartTrace"
-	StopTrace       = "Profile.StopTrace"
+	StartCPUProfile  = "Profile.StartCPUProfile"
+	StopCPUProfile   = "Profile.StopCPUProfile"
+	HeapProfile      = "Profile.HeapProfile"
+	GoroutineProfile = "Profile.GoroutineProfile"
+	BlockProfile     = "Profile.BlockProfile"
+	MutexProfile     = "Profile.MutexProfile"
+	StartTrace       = "Profile.StartTrace"
+	StopTrace        = "Profile.StopTrace"
 )
 
 // Logging related commands (see logging.go for more details).
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index a4627905e..f459d1973 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -284,6 +284,7 @@ var allowedSyscalls = seccomp.SyscallRules{
 		{seccomp.AllowAny{}, seccomp.AllowValue(syscall.SHUT_RDWR)},
 	},
 	syscall.SYS_SIGALTSTACK:     {},
+	unix.SYS_STATX:              {},
 	syscall.SYS_SYNC_FILE_RANGE: {},
 	syscall.SYS_TGKILL: []seccomp.Rule{
 		{
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 0f3da69a0..0938944a6 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -23,6 +23,7 @@ import (
 
 	"github.com/google/subcommands"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/runsc/boot"
 	"gvisor.dev/gvisor/runsc/boot/platforms"
@@ -82,8 +83,13 @@ type Boot struct {
 	// sandbox (e.g. gofer) and sent through this FD.
 	mountsFD int
 
-	// pidns is set if the sanadbox is in its own pid namespace.
+	// pidns is set if the sandbox is in its own pid namespace.
 	pidns bool
+
+	// attached is set to true to kill the sandbox process when the parent process
+	// terminates. This flag is set when the command execve's itself because
+	// parent death signal doesn't propagate through execve when uid/gid changes.
+	attached bool
 }
 
 // Name implements subcommands.Command.Name.
@@ -118,6 +124,7 @@ func (b *Boot) SetFlags(f *flag.FlagSet) {
 	f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
 	f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
 	f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
+	f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates")
 }
 
 // Execute implements subcommands.Command.Execute.  It starts a sandbox in a
@@ -133,29 +140,32 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 
 	conf := args[0].(*boot.Config)
 
+	if b.attached {
+		// Ensure this process is killed after parent process terminates when
+		// attached mode is enabled. In the unfortunate event that the parent
+		// terminates before this point, this process leaks.
+		if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil {
+			Fatalf("error setting parent death signal: %v", err)
+		}
+	}
+
 	if b.setUpRoot {
 		if err := setUpChroot(b.pidns); err != nil {
 			Fatalf("error setting up chroot: %v", err)
 		}
 
-		if !b.applyCaps {
-			// Remove --setup-root arg to call myself.
-			var args []string
-			for _, arg := range os.Args {
-				if !strings.Contains(arg, "setup-root") {
-					args = append(args, arg)
-				}
-			}
-			if !conf.Rootless {
-				// Note that we've already read the spec from the spec FD, and
-				// we will read it again after the exec call. This works
-				// because the ReadSpecFromFile function seeks to the beginning
-				// of the file before reading.
-				if err := callSelfAsNobody(args); err != nil {
-					Fatalf("%v", err)
-				}
-				panic("callSelfAsNobody must never return success")
+		if !b.applyCaps && !conf.Rootless {
+			// Remove --apply-caps arg to call myself. It has already been done.
+			args := prepareArgs(b.attached, "setup-root")
+
+			// Note that we've already read the spec from the spec FD, and
+			// we will read it again after the exec call. This works
+			// because the ReadSpecFromFile function seeks to the beginning
+			// of the file before reading.
+			if err := callSelfAsNobody(args); err != nil {
+				Fatalf("%v", err)
 			}
+			panic("callSelfAsNobody must never return success")
 		}
 	}
 
@@ -181,13 +191,9 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 			caps.Permitted = append(caps.Permitted, c)
 		}
 
-		// Remove --apply-caps arg to call myself.
-		var args []string
-		for _, arg := range os.Args {
-			if !strings.Contains(arg, "setup-root") && !strings.Contains(arg, "apply-caps") {
-				args = append(args, arg)
-			}
-		}
+		// Remove --apply-caps and --setup-root arg to call myself. Both have
+		// already been done.
+		args := prepareArgs(b.attached, "setup-root", "apply-caps")
 
 		// Note that we've already read the spec from the spec FD, and
 		// we will read it again after the exec call. This works
@@ -258,3 +264,22 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	l.Destroy()
 	return subcommands.ExitSuccess
 }
+
+func prepareArgs(attached bool, exclude ...string) []string {
+	var args []string
+	for _, arg := range os.Args {
+		for _, excl := range exclude {
+			if strings.Contains(arg, excl) {
+				goto skip
+			}
+		}
+		args = append(args, arg)
+		if attached && arg == "boot" {
+			// Strategicaly place "--attached" after the command. This is needed
+			// to ensure the new process is killed when the parent process terminates.
+			args = append(args, "--attached")
+		}
+	skip:
+	}
+	return args
+}
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index 79965460e..b5de2588b 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -32,17 +32,20 @@ import (
 
 // Debug implements subcommands.Command for the "debug" command.
 type Debug struct {
-	pid         int
-	stacks      bool
-	signal      int
-	profileHeap string
-	profileCPU  string
-	trace       string
-	strace      string
-	logLevel    string
-	logPackets  string
-	duration    time.Duration
-	ps          bool
+	pid              int
+	stacks           bool
+	signal           int
+	profileHeap      string
+	profileCPU       string
+	profileGoroutine string
+	profileBlock     string
+	profileMutex     string
+	trace            string
+	strace           string
+	logLevel         string
+	logPackets       string
+	duration         time.Duration
+	ps               bool
 }
 
 // Name implements subcommands.Command.
@@ -66,6 +69,9 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
 	f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log")
 	f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.")
 	f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
+	f.StringVar(&d.profileGoroutine, "profile-goroutine", "", "writes goroutine profile to the given file.")
+	f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
+	f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
 	f.DurationVar(&d.duration, "duration", time.Second, "amount of time to wait for CPU and trace profiles")
 	f.StringVar(&d.trace, "trace", "", "writes an execution trace to the given file.")
 	f.IntVar(&d.signal, "signal", -1, "sends signal to the sandbox")
@@ -147,6 +153,42 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		}
 		log.Infof("Heap profile written to %q", d.profileHeap)
 	}
+	if d.profileGoroutine != "" {
+		f, err := os.Create(d.profileGoroutine)
+		if err != nil {
+			return Errorf(err.Error())
+		}
+		defer f.Close()
+
+		if err := c.Sandbox.GoroutineProfile(f); err != nil {
+			return Errorf(err.Error())
+		}
+		log.Infof("Goroutine profile written to %q", d.profileGoroutine)
+	}
+	if d.profileBlock != "" {
+		f, err := os.Create(d.profileBlock)
+		if err != nil {
+			return Errorf(err.Error())
+		}
+		defer f.Close()
+
+		if err := c.Sandbox.BlockProfile(f); err != nil {
+			return Errorf(err.Error())
+		}
+		log.Infof("Block profile written to %q", d.profileBlock)
+	}
+	if d.profileMutex != "" {
+		f, err := os.Create(d.profileMutex)
+		if err != nil {
+			return Errorf(err.Error())
+		}
+		defer f.Close()
+
+		if err := c.Sandbox.MutexProfile(f); err != nil {
+			return Errorf(err.Error())
+		}
+		log.Infof("Mutex profile written to %q", d.profileMutex)
+	}
 
 	delay := false
 	if d.profileCPU != "" {
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index c2518d52b..651615d4c 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -333,13 +333,13 @@ func TestJobControlSignalRootContainer(t *testing.T) {
 	// file. Writes after a certain point will block unless we drain the
 	// PTY, so we must continually copy from it.
 	//
-	// We log the output to stdout for debugabilitly, and also to a buffer,
+	// We log the output to stderr for debugabilitly, and also to a buffer,
 	// since we wait on particular output from bash below. We use a custom
 	// blockingBuffer which is thread-safe and also blocks on Read calls,
 	// which makes this a suitable Reader for WaitUntilRead.
 	ptyBuf := newBlockingBuffer()
 	tee := io.TeeReader(ptyMaster, ptyBuf)
-	go io.Copy(os.Stdout, tee)
+	go io.Copy(os.Stderr, tee)
 
 	// Start the container.
 	if err := c.Start(conf); err != nil {
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index bdd65b498..442e80ac0 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -124,23 +124,6 @@ func procListsEqual(got, want []*control.Process) (bool, error) {
 	return true, nil
 }
 
-// getAndCheckProcLists is similar to waitForProcessList, but does not wait and retry the
-// test for equality. This is because we already confirmed that exec occurred.
-func getAndCheckProcLists(cont *Container, want []*control.Process) error {
-	got, err := cont.Processes()
-	if err != nil {
-		return fmt.Errorf("error getting process data from container: %v", err)
-	}
-	equal, err := procListsEqual(got, want)
-	if err != nil {
-		return err
-	}
-	if equal {
-		return nil
-	}
-	return fmt.Errorf("container got process list: %s, want: %s", procListToString(got), procListToString(want))
-}
-
 func procListToString(pl []*control.Process) string {
 	strs := make([]string, 0, len(pl))
 	for _, p := range pl {
@@ -2092,7 +2075,7 @@ func TestOverlayfsStaleRead(t *testing.T) {
 	defer out.Close()
 
 	const want = "foobar"
-	cmd := fmt.Sprintf("cat %q && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name())
+	cmd := fmt.Sprintf("cat %q >&2 && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name())
 	spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd)
 	if err := run(spec, conf); err != nil {
 		t.Fatalf("Error running container: %v", err)
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 67e27df4d..8de75ae57 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -444,6 +444,12 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		nextFD++
 	}
 
+	// TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff
+	// isn't set.
+	if conf.Platform == "kvm" {
+		cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1")
+	}
+
 	// The current process' stdio must be passed to the application via the
 	// --stdio-fds flag. The stdio of the sandbox process itself must not
 	// be connected to the same FDs, otherwise we risk leaking sandbox
@@ -695,6 +701,13 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		nextFD++
 	}
 
+	if args.Attached {
+		// Kill sandbox if parent process exits in attached mode.
+		cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
+		// Tells boot that any process it creates must have pdeathsig set.
+		cmd.Args = append(cmd.Args, "--attached")
+	}
+
 	// Add container as the last argument.
 	cmd.Args = append(cmd.Args, s.ID)
 
@@ -703,11 +716,6 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
 		log.Debugf("Donating FD %d: %q", i+3, f.Name())
 	}
 
-	if args.Attached {
-		// Kill sandbox if parent process exits in attached mode.
-		cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
-	}
-
 	log.Debugf("Starting sandbox: %s %v", binPath, cmd.Args)
 	log.Debugf("SysProcAttr: %+v", cmd.SysProcAttr)
 	if err := specutils.StartInNS(cmd, nss); err != nil {
@@ -990,6 +998,66 @@ func (s *Sandbox) StopCPUProfile() error {
 	return nil
 }
 
+// GoroutineProfile writes a goroutine profile to the given file.
+func (s *Sandbox) GoroutineProfile(f *os.File) error {
+	log.Debugf("Goroutine profile %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opts := control.ProfileOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+	if err := conn.Call(boot.GoroutineProfile, &opts, nil); err != nil {
+		return fmt.Errorf("getting sandbox %q goroutine profile: %v", s.ID, err)
+	}
+	return nil
+}
+
+// BlockProfile writes a block profile to the given file.
+func (s *Sandbox) BlockProfile(f *os.File) error {
+	log.Debugf("Block profile %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opts := control.ProfileOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+	if err := conn.Call(boot.BlockProfile, &opts, nil); err != nil {
+		return fmt.Errorf("getting sandbox %q block profile: %v", s.ID, err)
+	}
+	return nil
+}
+
+// MutexProfile writes a mutex profile to the given file.
+func (s *Sandbox) MutexProfile(f *os.File) error {
+	log.Debugf("Mutex profile %q", s.ID)
+	conn, err := s.sandboxConnect()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+
+	opts := control.ProfileOpts{
+		FilePayload: urpc.FilePayload{
+			Files: []*os.File{f},
+		},
+	}
+	if err := conn.Call(boot.MutexProfile, &opts, nil); err != nil {
+		return fmt.Errorf("getting sandbox %q mutex profile: %v", s.ID, err)
+	}
+	return nil
+}
+
 // StartTrace start trace  writing to the given file.
 func (s *Sandbox) StartTrace(f *os.File) error {
 	log.Debugf("Trace start %q", s.ID)
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index c7dd3051c..60bb7b7ee 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -252,6 +252,9 @@ func MaybeRunAsRoot() error {
 		},
 		Credential:                 &syscall.Credential{Uid: 0, Gid: 0},
 		GidMappingsEnableSetgroups: false,
+
+		// Make sure child is killed when the parent terminates.
+		Pdeathsig: syscall.SIGKILL,
 	}
 
 	cmd.Env = os.Environ()
diff --git a/runsc/testutil/testutil.go b/runsc/testutil/testutil.go
index 92d677e71..51e487715 100644
--- a/runsc/testutil/testutil.go
+++ b/runsc/testutil/testutil.go
@@ -87,18 +87,19 @@ func TestConfig() *boot.Config {
 		logDir = dir + "/"
 	}
 	return &boot.Config{
-		Debug:           true,
-		DebugLog:        logDir,
-		LogFormat:       "text",
-		DebugLogFormat:  "text",
-		AlsoLogToStderr: true,
-		LogPackets:      true,
-		Network:         boot.NetworkNone,
-		Strace:          true,
-		Platform:        "ptrace",
-		FileAccess:      boot.FileAccessExclusive,
+		Debug:              true,
+		DebugLog:           logDir,
+		LogFormat:          "text",
+		DebugLogFormat:     "text",
+		AlsoLogToStderr:    true,
+		LogPackets:         true,
+		Network:            boot.NetworkNone,
+		Strace:             true,
+		Platform:           "ptrace",
+		FileAccess:         boot.FileAccessExclusive,
+		NumNetworkChannels: 1,
+
 		TestOnlyAllowRunAsCurrentUserWithoutChroot: true,
-		NumNetworkChannels:                         1,
 	}
 }