From 67a2ab1438cdccbe045143bbfaa807cf83110ebc Mon Sep 17 00:00:00 2001
From: Adin Scannell <ascannell@google.com>
Date: Tue, 3 Sep 2019 22:01:34 -0700
Subject: Impose order on test scripts.

The simple test script has gotten out of control. Shard this script into
different pieces and attempt to impose order on overall test structure. This
change helps lay some of the foundations for future improvements.

 * The runsc/test directories are moved into just test/.
 * The runsc/test/testutil package is split into logical pieces.
 * The scripts/ directory contains new top-level targets.
 * Each test is now responsible for building targets it requires.
 * The install functionality is moved into `runsc` itself for simplicity.
 * The existing kokoro run_tests.sh file now just calls all (can be split).

After this change is merged,  I will create multiple distinct workflows for
Kokoro, one for each of the scripts currently targeted by `run_tests.sh` today,
which should dramatically reduce the time-to-run for the Kokoro tests, and
provides a better foundation for further improvements to the infrastructure.

PiperOrigin-RevId: 267081397
---
 test/e2e/BUILD               |  31 ++++
 test/e2e/exec_test.go        | 156 +++++++++++++++++++
 test/e2e/integration.go      |  16 ++
 test/e2e/integration_test.go | 348 +++++++++++++++++++++++++++++++++++++++++++
 test/e2e/regression_test.go  |  45 ++++++
 5 files changed, 596 insertions(+)
 create mode 100644 test/e2e/BUILD
 create mode 100644 test/e2e/exec_test.go
 create mode 100644 test/e2e/integration.go
 create mode 100644 test/e2e/integration_test.go
 create mode 100644 test/e2e/regression_test.go

(limited to 'test/e2e')

diff --git a/test/e2e/BUILD b/test/e2e/BUILD
new file mode 100644
index 000000000..99442cffb
--- /dev/null
+++ b/test/e2e/BUILD
@@ -0,0 +1,31 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_test(
+    name = "integration_test",
+    size = "large",
+    srcs = [
+        "exec_test.go",
+        "integration_test.go",
+        "regression_test.go",
+    ],
+    embed = [":integration"],
+    tags = [
+        # Requires docker and runsc to be configured before the test runs.
+        "manual",
+        "local",
+    ],
+    visibility = ["//:sandbox"],
+    deps = [
+        "//pkg/abi/linux",
+        "//runsc/dockerutil",
+        "//runsc/testutil",
+    ],
+)
+
+go_library(
+    name = "integration",
+    srcs = ["integration.go"],
+    importpath = "gvisor.dev/gvisor/test/integration",
+)
diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go
new file mode 100644
index 000000000..ce2c4f689
--- /dev/null
+++ b/test/e2e/exec_test.go
@@ -0,0 +1,156 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package image provides end-to-end integration tests for runsc. These tests
+// require docker and runsc to be installed on the machine.
+//
+// Each test calls docker commands to start up a container, and tests that it
+// is behaving properly, with various runsc commands. The container is killed
+// and deleted at the end.
+
+package integration
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+	"syscall"
+	"testing"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/runsc/dockerutil"
+)
+
+func TestExecCapabilities(t *testing.T) {
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatalf("docker pull failed: %v", err)
+	}
+	d := dockerutil.MakeDocker("exec-test")
+
+	// Start the container.
+	if err := d.Run("alpine", "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer d.CleanUp()
+
+	matches, err := d.WaitForOutputSubmatch("CapEff:\t([0-9a-f]+)\n", 5*time.Second)
+	if err != nil {
+		t.Fatalf("WaitForOutputSubmatch() timeout: %v", err)
+	}
+	if len(matches) != 2 {
+		t.Fatalf("There should be a match for the whole line and the capability bitmask")
+	}
+	capString := matches[1]
+	t.Log("Root capabilities:", capString)
+
+	// CAP_NET_RAW was in the capability set for the container, but was
+	// removed. However, `exec` does not remove it. Verify that it's not
+	// set in the container, then re-add it for comparison.
+	caps, err := strconv.ParseUint(capString, 16, 64)
+	if err != nil {
+		t.Fatalf("failed to convert capabilities %q: %v", capString, err)
+	}
+	if caps&(1<<uint64(linux.CAP_NET_RAW)) != 0 {
+		t.Fatalf("CAP_NET_RAW should be filtered, but is set in the container: %x", caps)
+	}
+	caps |= 1 << uint64(linux.CAP_NET_RAW)
+	want := fmt.Sprintf("CapEff:\t%016x\n", caps)
+
+	// Now check that exec'd process capabilities match the root.
+	got, err := d.Exec("grep", "CapEff:", "/proc/self/status")
+	if err != nil {
+		t.Fatalf("docker exec failed: %v", err)
+	}
+	if got != want {
+		t.Errorf("wrong capabilities, got: %q, want: %q", got, want)
+	}
+}
+
+func TestExecJobControl(t *testing.T) {
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatalf("docker pull failed: %v", err)
+	}
+	d := dockerutil.MakeDocker("exec-job-control-test")
+
+	// Start the container.
+	if err := d.Run("alpine", "sleep", "1000"); err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer d.CleanUp()
+
+	// Exec 'sh' with an attached pty.
+	cmd, ptmx, err := d.ExecWithTerminal("sh")
+	if err != nil {
+		t.Fatalf("docker exec failed: %v", err)
+	}
+	defer ptmx.Close()
+
+	// Call "sleep 100 | cat" in the shell.  We pipe to cat so that there
+	// will be two processes in the foreground process group.
+	if _, err := ptmx.Write([]byte("sleep 100 | cat\n")); err != nil {
+		t.Fatalf("error writing to pty: %v", err)
+	}
+
+	// Give shell a few seconds to start executing the sleep.
+	time.Sleep(2 * time.Second)
+
+	// Send a ^C to the pty, which should kill sleep and cat, but not the
+	// shell.  \x03 is ASCII "end of text", which is the same as ^C.
+	if _, err := ptmx.Write([]byte{'\x03'}); err != nil {
+		t.Fatalf("error writing to pty: %v", err)
+	}
+
+	// The shell should still be alive at this point. Sleep should have
+	// exited with code 2+128=130. We'll exit with 10 plus that number, so
+	// that we can be sure that the shell did not get signalled.
+	if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil {
+		t.Fatalf("error writing to pty: %v", err)
+	}
+
+	// Exec process should exit with code 10+130=140.
+	ps, err := cmd.Process.Wait()
+	if err != nil {
+		t.Fatalf("error waiting for exec process: %v", err)
+	}
+	ws := ps.Sys().(syscall.WaitStatus)
+	if !ws.Exited() {
+		t.Errorf("ws.Exited got false, want true")
+	}
+	if got, want := ws.ExitStatus(), 140; got != want {
+		t.Errorf("ws.ExitedStatus got %d, want %d", got, want)
+	}
+}
+
+// Test that failure to exec returns proper error message.
+func TestExecError(t *testing.T) {
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatalf("docker pull failed: %v", err)
+	}
+	d := dockerutil.MakeDocker("exec-error-test")
+
+	// Start the container.
+	if err := d.Run("alpine", "sleep", "1000"); err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer d.CleanUp()
+
+	_, err := d.Exec("no_can_find")
+	if err == nil {
+		t.Fatalf("docker exec didn't fail")
+	}
+	if want := `error finding executable "no_can_find" in PATH`; !strings.Contains(err.Error(), want) {
+		t.Fatalf("docker exec wrong error, got: %s, want: .*%s.*", err.Error(), want)
+	}
+}
diff --git a/test/e2e/integration.go b/test/e2e/integration.go
new file mode 100644
index 000000000..4cd5f6c24
--- /dev/null
+++ b/test/e2e/integration.go
@@ -0,0 +1,16 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package integration is empty. See integration_test.go for description.
+package integration
diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go
new file mode 100644
index 000000000..7cc0de129
--- /dev/null
+++ b/test/e2e/integration_test.go
@@ -0,0 +1,348 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package integration provides end-to-end integration tests for runsc.
+//
+// Each test calls docker commands to start up a container, and tests that it is
+// behaving properly, with various runsc commands. The container is killed and
+// deleted at the end.
+//
+// Setup instruction in test/README.md.
+package integration
+
+import (
+	"flag"
+	"fmt"
+	"net"
+	"net/http"
+	"os"
+	"strconv"
+	"strings"
+	"syscall"
+	"testing"
+	"time"
+
+	"gvisor.dev/gvisor/runsc/dockerutil"
+	"gvisor.dev/gvisor/runsc/testutil"
+)
+
+// httpRequestSucceeds sends a request to a given url and checks that the status is OK.
+func httpRequestSucceeds(client http.Client, server string, port int) error {
+	url := fmt.Sprintf("http://%s:%d", server, port)
+	// Ensure that content is being served.
+	resp, err := client.Get(url)
+	if err != nil {
+		return fmt.Errorf("error reaching http server: %v", err)
+	}
+	if want := http.StatusOK; resp.StatusCode != want {
+		return fmt.Errorf("wrong response code, got: %d, want: %d", resp.StatusCode, want)
+	}
+	return nil
+}
+
+// TestLifeCycle tests a basic Create/Start/Stop docker container life cycle.
+func TestLifeCycle(t *testing.T) {
+	if err := dockerutil.Pull("nginx"); err != nil {
+		t.Fatal("docker pull failed:", err)
+	}
+	d := dockerutil.MakeDocker("lifecycle-test")
+	if err := d.Create("-p", "80", "nginx"); err != nil {
+		t.Fatal("docker create failed:", err)
+	}
+	if err := d.Start(); err != nil {
+		d.CleanUp()
+		t.Fatal("docker start failed:", err)
+	}
+
+	// Test that container is working
+	port, err := d.FindPort(80)
+	if err != nil {
+		t.Fatal("docker.FindPort(80) failed: ", err)
+	}
+	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+		t.Fatal("WaitForHTTP() timeout:", err)
+	}
+	client := http.Client{Timeout: time.Duration(2 * time.Second)}
+	if err := httpRequestSucceeds(client, "localhost", port); err != nil {
+		t.Error("http request failed:", err)
+	}
+
+	if err := d.Stop(); err != nil {
+		d.CleanUp()
+		t.Fatal("docker stop failed:", err)
+	}
+	if err := d.Remove(); err != nil {
+		t.Fatal("docker rm failed:", err)
+	}
+}
+
+func TestPauseResume(t *testing.T) {
+	const img = "gcr.io/gvisor-presubmit/python-hello"
+	if !testutil.IsCheckpointSupported() {
+		t.Log("Checkpoint is not supported, skipping test.")
+		return
+	}
+
+	if err := dockerutil.Pull(img); err != nil {
+		t.Fatal("docker pull failed:", err)
+	}
+	d := dockerutil.MakeDocker("pause-resume-test")
+	if err := d.Run("-p", "8080", img); err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer d.CleanUp()
+
+	// Find where port 8080 is mapped to.
+	port, err := d.FindPort(8080)
+	if err != nil {
+		t.Fatal("docker.FindPort(8080) failed:", err)
+	}
+
+	// Wait until it's up and running.
+	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+		t.Fatal("WaitForHTTP() timeout:", err)
+	}
+
+	// Check that container is working.
+	client := http.Client{Timeout: time.Duration(2 * time.Second)}
+	if err := httpRequestSucceeds(client, "localhost", port); err != nil {
+		t.Error("http request failed:", err)
+	}
+
+	if err := d.Pause(); err != nil {
+		t.Fatal("docker pause failed:", err)
+	}
+
+	// Check if container is paused.
+	switch _, err := client.Get(fmt.Sprintf("http://localhost:%d", port)); v := err.(type) {
+	case nil:
+		t.Errorf("http req expected to fail but it succeeded")
+	case net.Error:
+		if !v.Timeout() {
+			t.Errorf("http req got error %v, wanted timeout", v)
+		}
+	default:
+		t.Errorf("http req got unexpected error %v", v)
+	}
+
+	if err := d.Unpause(); err != nil {
+		t.Fatal("docker unpause failed:", err)
+	}
+
+	// Wait until it's up and running.
+	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+		t.Fatal("WaitForHTTP() timeout:", err)
+	}
+
+	// Check if container is working again.
+	if err := httpRequestSucceeds(client, "localhost", port); err != nil {
+		t.Error("http request failed:", err)
+	}
+}
+
+func TestCheckpointRestore(t *testing.T) {
+	const img = "gcr.io/gvisor-presubmit/python-hello"
+	if !testutil.IsCheckpointSupported() {
+		t.Log("Pause/resume is not supported, skipping test.")
+		return
+	}
+
+	if err := dockerutil.Pull(img); err != nil {
+		t.Fatal("docker pull failed:", err)
+	}
+	d := dockerutil.MakeDocker("save-restore-test")
+	if err := d.Run("-p", "8080", img); err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer d.CleanUp()
+
+	if err := d.Checkpoint("test"); err != nil {
+		t.Fatal("docker checkpoint failed:", err)
+	}
+
+	if _, err := d.Wait(30 * time.Second); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := d.Restore("test"); err != nil {
+		t.Fatal("docker restore failed:", err)
+	}
+
+	// Find where port 8080 is mapped to.
+	port, err := d.FindPort(8080)
+	if err != nil {
+		t.Fatal("docker.FindPort(8080) failed:", err)
+	}
+
+	// Wait until it's up and running.
+	if err := testutil.WaitForHTTP(port, 30*time.Second); err != nil {
+		t.Fatal("WaitForHTTP() timeout:", err)
+	}
+
+	// Check if container is working again.
+	client := http.Client{Timeout: time.Duration(2 * time.Second)}
+	if err := httpRequestSucceeds(client, "localhost", port); err != nil {
+		t.Error("http request failed:", err)
+	}
+}
+
+// Create client and server that talk to each other using the local IP.
+func TestConnectToSelf(t *testing.T) {
+	d := dockerutil.MakeDocker("connect-to-self-test")
+
+	// Creates server that replies "server" and exists. Sleeps at the end because
+	// 'docker exec' gets killed if the init process exists before it can finish.
+	if err := d.Run("ubuntu:trusty", "/bin/sh", "-c", "echo server | nc -l -p 8080 && sleep 1"); err != nil {
+		t.Fatal("docker run failed:", err)
+	}
+	defer d.CleanUp()
+
+	// Finds IP address for host.
+	ip, err := d.Exec("/bin/sh", "-c", "cat /etc/hosts | grep ${HOSTNAME} | awk '{print $1}'")
+	if err != nil {
+		t.Fatal("docker exec failed:", err)
+	}
+	ip = strings.TrimRight(ip, "\n")
+
+	// Runs client that sends "client" to the server and exits.
+	reply, err := d.Exec("/bin/sh", "-c", fmt.Sprintf("echo client | nc %s 8080", ip))
+	if err != nil {
+		t.Fatal("docker exec failed:", err)
+	}
+
+	// Ensure both client and server got the message from each other.
+	if want := "server\n"; reply != want {
+		t.Errorf("Error on server, want: %q, got: %q", want, reply)
+	}
+	if _, err := d.WaitForOutput("^client\n$", 1*time.Second); err != nil {
+		t.Fatal("docker.WaitForOutput(client) timeout:", err)
+	}
+}
+
+func TestMemLimit(t *testing.T) {
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatal("docker pull failed:", err)
+	}
+	d := dockerutil.MakeDocker("cgroup-test")
+	cmd := "cat /proc/meminfo | grep MemTotal: | awk '{print $2}'"
+	out, err := d.RunFg("--memory=500MB", "alpine", "sh", "-c", cmd)
+	if err != nil {
+		t.Fatal("docker run failed:", err)
+	}
+	defer d.CleanUp()
+
+	// Remove warning message that swap isn't present.
+	if strings.HasPrefix(out, "WARNING") {
+		lines := strings.Split(out, "\n")
+		if len(lines) != 3 {
+			t.Fatalf("invalid output: %s", out)
+		}
+		out = lines[1]
+	}
+
+	got, err := strconv.ParseUint(strings.TrimSpace(out), 10, 64)
+	if err != nil {
+		t.Fatalf("failed to parse %q: %v", out, err)
+	}
+	if want := uint64(500 * 1024); got != want {
+		t.Errorf("MemTotal got: %d, want: %d", got, want)
+	}
+}
+
+func TestNumCPU(t *testing.T) {
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatal("docker pull failed:", err)
+	}
+	d := dockerutil.MakeDocker("cgroup-test")
+	cmd := "cat /proc/cpuinfo | grep 'processor.*:' | wc -l"
+	out, err := d.RunFg("--cpuset-cpus=0", "alpine", "sh", "-c", cmd)
+	if err != nil {
+		t.Fatal("docker run failed:", err)
+	}
+	defer d.CleanUp()
+
+	got, err := strconv.Atoi(strings.TrimSpace(out))
+	if err != nil {
+		t.Fatalf("failed to parse %q: %v", out, err)
+	}
+	if want := 1; got != want {
+		t.Errorf("MemTotal got: %d, want: %d", got, want)
+	}
+}
+
+// TestJobControl tests that job control characters are handled properly.
+func TestJobControl(t *testing.T) {
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatalf("docker pull failed: %v", err)
+	}
+	d := dockerutil.MakeDocker("job-control-test")
+
+	// Start the container with an attached PTY.
+	_, ptmx, err := d.RunWithPty("alpine", "sh")
+	if err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer ptmx.Close()
+	defer d.CleanUp()
+
+	// Call "sleep 100" in the shell.
+	if _, err := ptmx.Write([]byte("sleep 100\n")); err != nil {
+		t.Fatalf("error writing to pty: %v", err)
+	}
+
+	// Give shell a few seconds to start executing the sleep.
+	time.Sleep(2 * time.Second)
+
+	// Send a ^C to the pty, which should kill sleep, but not the shell.
+	// \x03 is ASCII "end of text", which is the same as ^C.
+	if _, err := ptmx.Write([]byte{'\x03'}); err != nil {
+		t.Fatalf("error writing to pty: %v", err)
+	}
+
+	// The shell should still be alive at this point. Sleep should have
+	// exited with code 2+128=130. We'll exit with 10 plus that number, so
+	// that we can be sure that the shell did not get signalled.
+	if _, err := ptmx.Write([]byte("exit $(expr $? + 10)\n")); err != nil {
+		t.Fatalf("error writing to pty: %v", err)
+	}
+
+	// Wait for the container to exit.
+	got, err := d.Wait(5 * time.Second)
+	if err != nil {
+		t.Fatalf("error getting exit code: %v", err)
+	}
+	// Container should exit with code 10+130=140.
+	if want := syscall.WaitStatus(140); got != want {
+		t.Errorf("container exited with code %d want %d", got, want)
+	}
+}
+
+// TestTmpFile checks that files inside '/tmp' are not overridden. In addition,
+// it checks that working dir is created if it doesn't exit.
+func TestTmpFile(t *testing.T) {
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatal("docker pull failed:", err)
+	}
+	d := dockerutil.MakeDocker("tmp-file-test")
+	if err := d.Run("-w=/tmp/foo/bar", "--read-only", "alpine", "touch", "/tmp/foo/bar/file"); err != nil {
+		t.Fatal("docker run failed:", err)
+	}
+	defer d.CleanUp()
+}
+
+func TestMain(m *testing.M) {
+	dockerutil.EnsureSupportedDockerVersion()
+	flag.Parse()
+	os.Exit(m.Run())
+}
diff --git a/test/e2e/regression_test.go b/test/e2e/regression_test.go
new file mode 100644
index 000000000..2488be383
--- /dev/null
+++ b/test/e2e/regression_test.go
@@ -0,0 +1,45 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package integration
+
+import (
+	"strings"
+	"testing"
+
+	"gvisor.dev/gvisor/runsc/dockerutil"
+)
+
+// Test that UDS can be created using overlay when parent directory is in lower
+// layer only (b/134090485).
+//
+// Prerequisite: the directory where the socket file is created must not have
+// been open for write before bind(2) is called.
+func TestBindOverlay(t *testing.T) {
+	if err := dockerutil.Pull("ubuntu:trusty"); err != nil {
+		t.Fatal("docker pull failed:", err)
+	}
+	d := dockerutil.MakeDocker("bind-overlay-test")
+
+	cmd := "nc -l -U /var/run/sock & p=$! && sleep 1 && echo foobar-asdf | nc -U /var/run/sock && wait $p"
+	got, err := d.RunFg("ubuntu:trusty", "bash", "-c", cmd)
+	if err != nil {
+		t.Fatal("docker run failed:", err)
+	}
+
+	if want := "foobar-asdf"; !strings.Contains(got, want) {
+		t.Fatalf("docker run output is missing %q: %s", want, got)
+	}
+	defer d.CleanUp()
+}
-- 
cgit v1.2.3


From 112736c579690b4fee61e842a65a62fe29b1acb5 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Mon, 23 Sep 2019 14:46:07 -0700
Subject: Add test that runsc exec inherits the same environment as run.

PiperOrigin-RevId: 270764996
---
 test/e2e/exec_test.go | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'test/e2e')

diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go
index ce2c4f689..267679268 100644
--- a/test/e2e/exec_test.go
+++ b/test/e2e/exec_test.go
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Package image provides end-to-end integration tests for runsc. These tests
-// require docker and runsc to be installed on the machine.
+// Package integration provides end-to-end integration tests for runsc. These
+// tests require docker and runsc to be installed on the machine.
 //
 // Each test calls docker commands to start up a container, and tests that it
 // is behaving properly, with various runsc commands. The container is killed
@@ -154,3 +154,26 @@ func TestExecError(t *testing.T) {
 		t.Fatalf("docker exec wrong error, got: %s, want: .*%s.*", err.Error(), want)
 	}
 }
+
+// Test that exec inherits environment from run.
+func TestExecEnv(t *testing.T) {
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatalf("docker pull failed: %v", err)
+	}
+	d := dockerutil.MakeDocker("exec-env-test")
+
+	// Start the container with env FOO=BAR.
+	if err := d.Run("-e", "FOO=BAR", "alpine", "sleep", "1000"); err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer d.CleanUp()
+
+	// Exec "echo $FOO".
+	got, err := d.Exec("/bin/sh", "-c", "echo $FOO")
+	if err != nil {
+		t.Fatalf("docker exec failed: %v", err)
+	}
+	if want := "BAR"; !strings.Contains(got, want) {
+		t.Errorf("wanted exec output to contain %q, got %q", want, got)
+	}
+}
-- 
cgit v1.2.3


From f2ea8e6b249d729d4616ee219c0472bfff93a575 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Mon, 23 Sep 2019 17:04:45 -0700
Subject: Always set HOME env var with `runsc exec`.

We already do this for `runsc run`, but need to do the same for `runsc exec`.

PiperOrigin-RevId: 270793459
---
 runsc/boot/BUILD               |  1 +
 runsc/boot/loader.go           | 32 +++++++++++++++-----------------
 runsc/boot/user.go             | 28 ++++++++++++++++++++++++++--
 runsc/boot/user_test.go        |  3 ++-
 runsc/cmd/exec.go              |  1 +
 runsc/dockerutil/dockerutil.go |  8 ++++++++
 test/e2e/exec_test.go          | 42 ++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 95 insertions(+), 20 deletions(-)

(limited to 'test/e2e')

diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 588bb8851..54d1ab129 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -109,6 +109,7 @@ go_test(
         "//pkg/sentry/arch:registers_go_proto",
         "//pkg/sentry/context/contexttest",
         "//pkg/sentry/fs",
+        "//pkg/sentry/kernel/auth",
         "//pkg/unet",
         "//runsc/fsgofer",
         "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 823a34619..d824d7dc5 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -20,7 +20,6 @@ import (
 	mrand "math/rand"
 	"os"
 	"runtime"
-	"strings"
 	"sync"
 	"sync/atomic"
 	"syscall"
@@ -535,23 +534,12 @@ func (l *Loader) run() error {
 			return err
 		}
 
-		// Read /etc/passwd for the user's HOME directory and set the HOME
-		// environment variable as required by POSIX if it is not overridden by
-		// the user.
-		hasHomeEnvv := false
-		for _, envv := range l.rootProcArgs.Envv {
-			if strings.HasPrefix(envv, "HOME=") {
-				hasHomeEnvv = true
-			}
-		}
-		if !hasHomeEnvv {
-			homeDir, err := getExecUserHome(ctx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID))
-			if err != nil {
-				return fmt.Errorf("error reading exec user: %v", err)
-			}
-
-			l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir)
+		// Add the HOME enviroment variable if it is not already set.
+		envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+		if err != nil {
+			return err
 		}
+		l.rootProcArgs.Envv = envv
 
 		// Create the root container init task. It will begin running
 		// when the kernel is started.
@@ -815,6 +803,16 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
 	})
 	defer args.MountNamespace.DecRef()
 
+	// Add the HOME enviroment varible if it is not already set.
+	root := args.MountNamespace.Root()
+	defer root.DecRef()
+	ctx := fs.WithRoot(l.k.SupervisorContext(), root)
+	envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
+	if err != nil {
+		return 0, err
+	}
+	args.Envv = envv
+
 	// Start the process.
 	proc := control.Proc{Kernel: l.k}
 	args.PIDNamespace = tg.PIDNamespace()
diff --git a/runsc/boot/user.go b/runsc/boot/user.go
index d1d423a5c..56cc12ee0 100644
--- a/runsc/boot/user.go
+++ b/runsc/boot/user.go
@@ -16,6 +16,7 @@ package boot
 
 import (
 	"bufio"
+	"fmt"
 	"io"
 	"strconv"
 	"strings"
@@ -23,6 +24,7 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/sentry/context"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/usermem"
 )
 
@@ -42,7 +44,7 @@ func (r *fileReader) Read(buf []byte) (int, error) {
 
 // getExecUserHome returns the home directory of the executing user read from
 // /etc/passwd as read from the container filesystem.
-func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) {
+func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) {
 	// The default user home directory to return if no user matching the user
 	// if found in the /etc/passwd found in the image.
 	const defaultHome = "/"
@@ -82,7 +84,7 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32
 		File: f,
 	}
 
-	homeDir, err := findHomeInPasswd(uid, r, defaultHome)
+	homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome)
 	if err != nil {
 		return "", err
 	}
@@ -90,6 +92,28 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32
 	return homeDir, nil
 }
 
+// maybeAddExecUserHome returns a new slice with the HOME enviroment variable
+// set if the slice does not already contain it, otherwise it returns the
+// original slice unmodified.
+func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) {
+	// Check if the envv already contains HOME.
+	for _, env := range envv {
+		if strings.HasPrefix(env, "HOME=") {
+			// We have it. Return the original slice unmodified.
+			return envv, nil
+		}
+	}
+
+	// Read /etc/passwd for the user's HOME directory and set the HOME
+	// environment variable as required by POSIX if it is not overridden by
+	// the user.
+	homeDir, err := getExecUserHome(ctx, mns, uid)
+	if err != nil {
+		return nil, fmt.Errorf("error reading exec user: %v", err)
+	}
+	return append(envv, "HOME="+homeDir), nil
+}
+
 // findHomeInPasswd parses a passwd file and returns the given user's home
 // directory. This function does it's best to replicate the runc's behavior.
 func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) {
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
index 906baf3e5..9aee2ad07 100644
--- a/runsc/boot/user_test.go
+++ b/runsc/boot/user_test.go
@@ -25,6 +25,7 @@ import (
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 )
 
 func setupTempDir() (string, error) {
@@ -68,7 +69,7 @@ func setupPasswd(contents string, perms os.FileMode) func() (string, error) {
 // TestGetExecUserHome tests the getExecUserHome function.
 func TestGetExecUserHome(t *testing.T) {
 	tests := map[string]struct {
-		uid        uint32
+		uid        auth.KUID
 		createRoot func() (string, error)
 		expected   string
 	}{
diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index e817eff77..bf1225e1c 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -127,6 +127,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 			Fatalf("getting environment variables: %v", err)
 		}
 	}
+
 	if e.Capabilities == nil {
 		// enableRaw is set to true to prevent the filtering out of
 		// CAP_NET_RAW. This is the opposite of Create() because exec
diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go
index c073d8f75..e37ec0ffd 100644
--- a/runsc/dockerutil/dockerutil.go
+++ b/runsc/dockerutil/dockerutil.go
@@ -287,6 +287,14 @@ func (d *Docker) Exec(args ...string) (string, error) {
 	return do(a...)
 }
 
+// ExecAsUser calls 'docker exec' as the given user with the arguments
+// provided.
+func (d *Docker) ExecAsUser(user string, args ...string) (string, error) {
+	a := []string{"exec", "--user", user, d.Name}
+	a = append(a, args...)
+	return do(a...)
+}
+
 // ExecWithTerminal calls 'docker exec -it' with the arguments provided and
 // attaches a pty to stdio.
 func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) {
diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go
index 267679268..7238c2afe 100644
--- a/test/e2e/exec_test.go
+++ b/test/e2e/exec_test.go
@@ -177,3 +177,45 @@ func TestExecEnv(t *testing.T) {
 		t.Errorf("wanted exec output to contain %q, got %q", want, got)
 	}
 }
+
+// Test that exec always has HOME environment set, even when not set in run.
+func TestExecEnvHasHome(t *testing.T) {
+	// Base alpine image does not have any environment variables set.
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatalf("docker pull failed: %v", err)
+	}
+	d := dockerutil.MakeDocker("exec-env-test")
+
+	// We will check that HOME is set for root user, and also for a new
+	// non-root user we will create.
+	newUID := 1234
+	newHome := "/foo/bar"
+
+	// Create a new user with a home directory, and then sleep.
+	script := fmt.Sprintf(`
+	mkdir -p -m 777 %s && \
+	adduser foo -D -u %d -h %s && \
+	sleep 1000`, newHome, newUID, newHome)
+	if err := d.Run("alpine", "/bin/sh", "-c", script); err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer d.CleanUp()
+
+	// Exec "echo $HOME", and expect to see "/root".
+	got, err := d.Exec("/bin/sh", "-c", "echo $HOME")
+	if err != nil {
+		t.Fatalf("docker exec failed: %v", err)
+	}
+	if want := "/root"; !strings.Contains(got, want) {
+		t.Errorf("wanted exec output to contain %q, got %q", want, got)
+	}
+
+	// Execute the same as uid 123 and expect newHome.
+	got, err = d.ExecAsUser(strconv.Itoa(newUID), "/bin/sh", "-c", "echo $HOME")
+	if err != nil {
+		t.Fatalf("docker exec failed: %v", err)
+	}
+	if want := newHome; !strings.Contains(got, want) {
+		t.Errorf("wanted exec output to contain %q, got %q", want, got)
+	}
+}
-- 
cgit v1.2.3


From 0b02c3d5e5bae87f5cdbf4ae20dad8344bef32c2 Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Tue, 1 Oct 2019 11:48:24 -0700
Subject: Prevent CAP_NET_RAW from appearing in exec

'docker exec' was getting CAP_NET_RAW even when --net-raw=false
because it was not filtered out from when copying container's
capabilities.

PiperOrigin-RevId: 272260451
---
 runsc/cmd/exec.go                    | 52 ++++++++++++++---------------
 runsc/cmd/exec_test.go               |  4 +--
 runsc/container/BUILD                |  1 +
 runsc/container/container_test.go    | 25 ++++++++++++++
 runsc/container/test_app/test_app.go | 65 ++++++++++++++++++++++++++++++++++++
 runsc/dockerutil/dockerutil.go       |  9 ++++-
 runsc/specutils/BUILD                |  1 +
 runsc/specutils/specutils.go         | 10 ++++++
 test/e2e/BUILD                       |  2 ++
 test/e2e/exec_test.go                | 65 +++++++++++++++++++++++++++---------
 10 files changed, 190 insertions(+), 44 deletions(-)

(limited to 'test/e2e')

diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go
index bf1225e1c..d1e99243b 100644
--- a/runsc/cmd/exec.go
+++ b/runsc/cmd/exec.go
@@ -105,11 +105,11 @@ func (ex *Exec) SetFlags(f *flag.FlagSet) {
 // Execute implements subcommands.Command.Execute. It starts a process in an
 // already created container.
 func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
-	e, id, err := ex.parseArgs(f)
+	conf := args[0].(*boot.Config)
+	e, id, err := ex.parseArgs(f, conf.EnableRaw)
 	if err != nil {
 		Fatalf("parsing process spec: %v", err)
 	}
-	conf := args[0].(*boot.Config)
 	waitStatus := args[1].(*syscall.WaitStatus)
 
 	c, err := container.Load(conf.RootDir, id)
@@ -117,6 +117,9 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		Fatalf("loading sandbox: %v", err)
 	}
 
+	log.Debugf("Exec arguments: %+v", e)
+	log.Debugf("Exec capablities: %+v", e.Capabilities)
+
 	// Replace empty settings with defaults from container.
 	if e.WorkingDirectory == "" {
 		e.WorkingDirectory = c.Spec.Process.Cwd
@@ -129,14 +132,11 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 	}
 
 	if e.Capabilities == nil {
-		// enableRaw is set to true to prevent the filtering out of
-		// CAP_NET_RAW. This is the opposite of Create() because exec
-		// requires the capability to be set explicitly, while 'docker
-		// run' sets it by default.
-		e.Capabilities, err = specutils.Capabilities(true /* enableRaw */, c.Spec.Process.Capabilities)
+		e.Capabilities, err = specutils.Capabilities(conf.EnableRaw, c.Spec.Process.Capabilities)
 		if err != nil {
 			Fatalf("creating capabilities: %v", err)
 		}
+		log.Infof("Using exec capabilities from container: %+v", e.Capabilities)
 	}
 
 	// containerd expects an actual process to represent the container being
@@ -283,14 +283,14 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi
 // parseArgs parses exec information from the command line or a JSON file
 // depending on whether the --process flag was used. Returns an ExecArgs and
 // the ID of the container to be used.
-func (ex *Exec) parseArgs(f *flag.FlagSet) (*control.ExecArgs, string, error) {
+func (ex *Exec) parseArgs(f *flag.FlagSet, enableRaw bool) (*control.ExecArgs, string, error) {
 	if ex.processPath == "" {
 		// Requires at least a container ID and command.
 		if f.NArg() < 2 {
 			f.Usage()
 			return nil, "", fmt.Errorf("both a container-id and command are required")
 		}
-		e, err := ex.argsFromCLI(f.Args()[1:])
+		e, err := ex.argsFromCLI(f.Args()[1:], enableRaw)
 		return e, f.Arg(0), err
 	}
 	// Requires only the container ID.
@@ -298,11 +298,11 @@ func (ex *Exec) parseArgs(f *flag.FlagSet) (*control.ExecArgs, string, error) {
 		f.Usage()
 		return nil, "", fmt.Errorf("a container-id is required")
 	}
-	e, err := ex.argsFromProcessFile()
+	e, err := ex.argsFromProcessFile(enableRaw)
 	return e, f.Arg(0), err
 }
 
-func (ex *Exec) argsFromCLI(argv []string) (*control.ExecArgs, error) {
+func (ex *Exec) argsFromCLI(argv []string, enableRaw bool) (*control.ExecArgs, error) {
 	extraKGIDs := make([]auth.KGID, 0, len(ex.extraKGIDs))
 	for _, s := range ex.extraKGIDs {
 		kgid, err := strconv.Atoi(s)
@@ -315,7 +315,7 @@ func (ex *Exec) argsFromCLI(argv []string) (*control.ExecArgs, error) {
 	var caps *auth.TaskCapabilities
 	if len(ex.caps) > 0 {
 		var err error
-		caps, err = capabilities(ex.caps)
+		caps, err = capabilities(ex.caps, enableRaw)
 		if err != nil {
 			return nil, fmt.Errorf("capabilities error: %v", err)
 		}
@@ -333,7 +333,7 @@ func (ex *Exec) argsFromCLI(argv []string) (*control.ExecArgs, error) {
 	}, nil
 }
 
-func (ex *Exec) argsFromProcessFile() (*control.ExecArgs, error) {
+func (ex *Exec) argsFromProcessFile(enableRaw bool) (*control.ExecArgs, error) {
 	f, err := os.Open(ex.processPath)
 	if err != nil {
 		return nil, fmt.Errorf("error opening process file: %s, %v", ex.processPath, err)
@@ -343,21 +343,21 @@ func (ex *Exec) argsFromProcessFile() (*control.ExecArgs, error) {
 	if err := json.NewDecoder(f).Decode(&p); err != nil {
 		return nil, fmt.Errorf("error parsing process file: %s, %v", ex.processPath, err)
 	}
-	return argsFromProcess(&p)
+	return argsFromProcess(&p, enableRaw)
 }
 
 // argsFromProcess performs all the non-IO conversion from the Process struct
 // to ExecArgs.
-func argsFromProcess(p *specs.Process) (*control.ExecArgs, error) {
+func argsFromProcess(p *specs.Process, enableRaw bool) (*control.ExecArgs, error) {
 	// Create capabilities.
 	var caps *auth.TaskCapabilities
 	if p.Capabilities != nil {
 		var err error
-		// enableRaw is set to true to prevent the filtering out of
-		// CAP_NET_RAW. This is the opposite of Create() because exec
-		// requires the capability to be set explicitly, while 'docker
-		// run' sets it by default.
-		caps, err = specutils.Capabilities(true /* enableRaw */, p.Capabilities)
+		// Starting from Docker 19, capabilities are explicitly set for exec (instead
+		// of nil like before). So we can't distinguish 'exec' from
+		// 'exec --privileged', as both specify CAP_NET_RAW. Therefore, filter
+		// CAP_NET_RAW in the same way as container start.
+		caps, err = specutils.Capabilities(enableRaw, p.Capabilities)
 		if err != nil {
 			return nil, fmt.Errorf("error creating capabilities: %v", err)
 		}
@@ -410,7 +410,7 @@ func resolveEnvs(envs ...[]string) ([]string, error) {
 // capabilities takes a list of capabilities as strings and returns an
 // auth.TaskCapabilities struct with those capabilities in every capability set.
 // This mimics runc's behavior.
-func capabilities(cs []string) (*auth.TaskCapabilities, error) {
+func capabilities(cs []string, enableRaw bool) (*auth.TaskCapabilities, error) {
 	var specCaps specs.LinuxCapabilities
 	for _, cap := range cs {
 		specCaps.Ambient = append(specCaps.Ambient, cap)
@@ -419,11 +419,11 @@ func capabilities(cs []string) (*auth.TaskCapabilities, error) {
 		specCaps.Inheritable = append(specCaps.Inheritable, cap)
 		specCaps.Permitted = append(specCaps.Permitted, cap)
 	}
-	// enableRaw is set to true to prevent the filtering out of
-	// CAP_NET_RAW. This is the opposite of Create() because exec requires
-	// the capability to be set explicitly, while 'docker run' sets it by
-	// default.
-	return specutils.Capabilities(true /* enableRaw */, &specCaps)
+	// Starting from Docker 19, capabilities are explicitly set for exec (instead
+	// of nil like before). So we can't distinguish 'exec' from
+	// 'exec --privileged', as both specify CAP_NET_RAW. Therefore, filter
+	// CAP_NET_RAW in the same way as container start.
+	return specutils.Capabilities(enableRaw, &specCaps)
 }
 
 // stringSlice allows a flag to be used multiple times, where each occurrence
diff --git a/runsc/cmd/exec_test.go b/runsc/cmd/exec_test.go
index eb38a431f..a1e980d08 100644
--- a/runsc/cmd/exec_test.go
+++ b/runsc/cmd/exec_test.go
@@ -91,7 +91,7 @@ func TestCLIArgs(t *testing.T) {
 	}
 
 	for _, tc := range testCases {
-		e, err := tc.ex.argsFromCLI(tc.argv)
+		e, err := tc.ex.argsFromCLI(tc.argv, true)
 		if err != nil {
 			t.Errorf("argsFromCLI(%+v): got error: %+v", tc.ex, err)
 		} else if !cmp.Equal(*e, tc.expected, cmpopts.IgnoreUnexported(os.File{})) {
@@ -144,7 +144,7 @@ func TestJSONArgs(t *testing.T) {
 	}
 
 	for _, tc := range testCases {
-		e, err := argsFromProcess(&tc.p)
+		e, err := argsFromProcess(&tc.p, true)
 		if err != nil {
 			t.Errorf("argsFromProcess(%+v): got error: %+v", tc.p, err)
 		} else if !cmp.Equal(*e, tc.expected, cmpopts.IgnoreUnexported(os.File{})) {
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index bc1fa25e3..26d1cd5ab 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -47,6 +47,7 @@ go_test(
     ],
     deps = [
         "//pkg/abi/linux",
+        "//pkg/bits",
         "//pkg/log",
         "//pkg/sentry/control",
         "//pkg/sentry/kernel",
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 2ac12e5b6..519f5ed9b 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -34,6 +34,7 @@ import (
 	"github.com/cenkalti/backoff"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/bits"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/control"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -2049,6 +2050,30 @@ func TestMountSymlink(t *testing.T) {
 	}
 }
 
+// Check that --net-raw disables the CAP_NET_RAW capability.
+func TestNetRaw(t *testing.T) {
+	capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10)
+	app, err := testutil.FindFile("runsc/container/test_app/test_app")
+	if err != nil {
+		t.Fatal("error finding test_app:", err)
+	}
+
+	for _, enableRaw := range []bool{true, false} {
+		conf := testutil.TestConfig()
+		conf.EnableRaw = enableRaw
+
+		test := "--enabled"
+		if !enableRaw {
+			test = "--disabled"
+		}
+
+		spec := testutil.NewSpecWithArgs(app, "capability", test, capNetRaw)
+		if err := run(spec, conf); err != nil {
+			t.Fatalf("Error running container: %v", err)
+		}
+	}
+}
+
 // executeSync synchronously executes a new process.
 func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) {
 	pid, err := cont.Execute(args)
diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go
index 7f735c254..913d781c6 100644
--- a/runsc/container/test_app/test_app.go
+++ b/runsc/container/test_app/test_app.go
@@ -19,10 +19,12 @@ package main
 import (
 	"context"
 	"fmt"
+	"io/ioutil"
 	"log"
 	"net"
 	"os"
 	"os/exec"
+	"regexp"
 	"strconv"
 	sys "syscall"
 	"time"
@@ -35,6 +37,7 @@ import (
 func main() {
 	subcommands.Register(subcommands.HelpCommand(), "")
 	subcommands.Register(subcommands.FlagsCommand(), "")
+	subcommands.Register(new(capability), "")
 	subcommands.Register(new(fdReceiver), "")
 	subcommands.Register(new(fdSender), "")
 	subcommands.Register(new(forkBomb), "")
@@ -287,3 +290,65 @@ func (s *syscall) Execute(ctx context.Context, f *flag.FlagSet, args ...interfac
 	}
 	return subcommands.ExitSuccess
 }
+
+type capability struct {
+	enabled  uint64
+	disabled uint64
+}
+
+// Name implements subcommands.Command.
+func (*capability) Name() string {
+	return "capability"
+}
+
+// Synopsis implements subcommands.Command.
+func (*capability) Synopsis() string {
+	return "checks if effective capabilities are set/unset"
+}
+
+// Usage implements subcommands.Command.
+func (*capability) Usage() string {
+	return "capability [--enabled=number] [--disabled=number]"
+}
+
+// SetFlags implements subcommands.Command.
+func (c *capability) SetFlags(f *flag.FlagSet) {
+	f.Uint64Var(&c.enabled, "enabled", 0, "")
+	f.Uint64Var(&c.disabled, "disabled", 0, "")
+}
+
+// Execute implements subcommands.Command.
+func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+	if c.enabled == 0 && c.disabled == 0 {
+		fmt.Println("One of the flags must be set")
+		return subcommands.ExitUsageError
+	}
+
+	status, err := ioutil.ReadFile("/proc/self/status")
+	if err != nil {
+		fmt.Printf("Error reading %q: %v\n", "proc/self/status", err)
+		return subcommands.ExitFailure
+	}
+	re := regexp.MustCompile("CapEff:\t([0-9a-f]+)\n")
+	matches := re.FindStringSubmatch(string(status))
+	if matches == nil || len(matches) != 2 {
+		fmt.Printf("Effective capabilities not found in\n%s\n", status)
+		return subcommands.ExitFailure
+	}
+	caps, err := strconv.ParseUint(matches[1], 16, 64)
+	if err != nil {
+		fmt.Printf("failed to convert capabilities %q: %v\n", matches[1], err)
+		return subcommands.ExitFailure
+	}
+
+	if c.enabled != 0 && (caps&c.enabled) != c.enabled {
+		fmt.Printf("Missing capabilities, want: %#x: got: %#x\n", c.enabled, caps)
+		return subcommands.ExitFailure
+	}
+	if c.disabled != 0 && (caps&c.disabled) != 0 {
+		fmt.Printf("Extra capabilities found, dont_want: %#x: got: %#x\n", c.disabled, caps)
+		return subcommands.ExitFailure
+	}
+
+	return subcommands.ExitSuccess
+}
diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go
index e37ec0ffd..57f6ae8de 100644
--- a/runsc/dockerutil/dockerutil.go
+++ b/runsc/dockerutil/dockerutil.go
@@ -282,7 +282,14 @@ func (d *Docker) Logs() (string, error) {
 
 // Exec calls 'docker exec' with the arguments provided.
 func (d *Docker) Exec(args ...string) (string, error) {
-	a := []string{"exec", d.Name}
+	return d.ExecWithFlags(nil, args...)
+}
+
+// ExecWithFlags calls 'docker exec <flags> name <args>'.
+func (d *Docker) ExecWithFlags(flags []string, args ...string) (string, error) {
+	a := []string{"exec"}
+	a = append(a, flags...)
+	a = append(a, d.Name)
 	a = append(a, args...)
 	return do(a...)
 }
diff --git a/runsc/specutils/BUILD b/runsc/specutils/BUILD
index fbfb8e2f8..fa58313a0 100644
--- a/runsc/specutils/BUILD
+++ b/runsc/specutils/BUILD
@@ -13,6 +13,7 @@ go_library(
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/abi/linux",
+        "//pkg/bits",
         "//pkg/log",
         "//pkg/sentry/kernel/auth",
         "@com_github_cenkalti_backoff//:go_default_library",
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index cb9e58dfb..591abe458 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -31,6 +31,7 @@ import (
 	"github.com/cenkalti/backoff"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/bits"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 )
@@ -241,6 +242,15 @@ func AllCapabilities() *specs.LinuxCapabilities {
 	}
 }
 
+// AllCapabilitiesUint64 returns a bitmask containing all capabilities set.
+func AllCapabilitiesUint64() uint64 {
+	var rv uint64
+	for _, cap := range capFromName {
+		rv |= bits.MaskOf64(int(cap))
+	}
+	return rv
+}
+
 var capFromName = map[string]linux.Capability{
 	"CAP_CHOWN":            linux.CAP_CHOWN,
 	"CAP_DAC_OVERRIDE":     linux.CAP_DAC_OVERRIDE,
diff --git a/test/e2e/BUILD b/test/e2e/BUILD
index 99442cffb..4fe03a220 100644
--- a/test/e2e/BUILD
+++ b/test/e2e/BUILD
@@ -19,7 +19,9 @@ go_test(
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/abi/linux",
+        "//pkg/bits",
         "//runsc/dockerutil",
+        "//runsc/specutils",
         "//runsc/testutil",
     ],
 )
diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go
index 7238c2afe..88d26e865 100644
--- a/test/e2e/exec_test.go
+++ b/test/e2e/exec_test.go
@@ -30,14 +30,17 @@ import (
 	"time"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/bits"
 	"gvisor.dev/gvisor/runsc/dockerutil"
+	"gvisor.dev/gvisor/runsc/specutils"
 )
 
+// Test that exec uses the exact same capability set as the container.
 func TestExecCapabilities(t *testing.T) {
 	if err := dockerutil.Pull("alpine"); err != nil {
 		t.Fatalf("docker pull failed: %v", err)
 	}
-	d := dockerutil.MakeDocker("exec-test")
+	d := dockerutil.MakeDocker("exec-capabilities-test")
 
 	// Start the container.
 	if err := d.Run("alpine", "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil {
@@ -52,27 +55,59 @@ func TestExecCapabilities(t *testing.T) {
 	if len(matches) != 2 {
 		t.Fatalf("There should be a match for the whole line and the capability bitmask")
 	}
-	capString := matches[1]
-	t.Log("Root capabilities:", capString)
+	want := fmt.Sprintf("CapEff:\t%s\n", matches[1])
+	t.Log("Root capabilities:", want)
 
-	// CAP_NET_RAW was in the capability set for the container, but was
-	// removed. However, `exec` does not remove it. Verify that it's not
-	// set in the container, then re-add it for comparison.
-	caps, err := strconv.ParseUint(capString, 16, 64)
+	// Now check that exec'd process capabilities match the root.
+	got, err := d.Exec("grep", "CapEff:", "/proc/self/status")
 	if err != nil {
-		t.Fatalf("failed to convert capabilities %q: %v", capString, err)
+		t.Fatalf("docker exec failed: %v", err)
 	}
-	if caps&(1<<uint64(linux.CAP_NET_RAW)) != 0 {
-		t.Fatalf("CAP_NET_RAW should be filtered, but is set in the container: %x", caps)
+	t.Logf("CapEff: %v", got)
+	if got != want {
+		t.Errorf("wrong capabilities, got: %q, want: %q", got, want)
 	}
-	caps |= 1 << uint64(linux.CAP_NET_RAW)
-	want := fmt.Sprintf("CapEff:\t%016x\n", caps)
+}
 
-	// Now check that exec'd process capabilities match the root.
-	got, err := d.Exec("grep", "CapEff:", "/proc/self/status")
+// Test that 'exec --privileged' adds all capabilities, except for CAP_NET_RAW
+// which is removed from the container when --net-raw=false.
+func TestExecPrivileged(t *testing.T) {
+	if err := dockerutil.Pull("alpine"); err != nil {
+		t.Fatalf("docker pull failed: %v", err)
+	}
+	d := dockerutil.MakeDocker("exec-privileged-test")
+
+	// Start the container with all capabilities dropped.
+	if err := d.Run("--cap-drop=all", "alpine", "sh", "-c", "cat /proc/self/status; sleep 100"); err != nil {
+		t.Fatalf("docker run failed: %v", err)
+	}
+	defer d.CleanUp()
+
+	// Check that all capabilities where dropped from container.
+	matches, err := d.WaitForOutputSubmatch("CapEff:\t([0-9a-f]+)\n", 5*time.Second)
+	if err != nil {
+		t.Fatalf("WaitForOutputSubmatch() timeout: %v", err)
+	}
+	if len(matches) != 2 {
+		t.Fatalf("There should be a match for the whole line and the capability bitmask")
+	}
+	containerCaps, err := strconv.ParseUint(matches[1], 16, 64)
+	if err != nil {
+		t.Fatalf("failed to convert capabilities %q: %v", matches[1], err)
+	}
+	t.Logf("Container capabilities: %#x", containerCaps)
+	if containerCaps != 0 {
+		t.Fatalf("Container should have no capabilities: %x", containerCaps)
+	}
+
+	// Check that 'exec --privileged' adds all capabilities, except
+	// for CAP_NET_RAW.
+	got, err := d.ExecWithFlags([]string{"--privileged"}, "grep", "CapEff:", "/proc/self/status")
 	if err != nil {
 		t.Fatalf("docker exec failed: %v", err)
 	}
+	t.Logf("Exec CapEff: %v", got)
+	want := fmt.Sprintf("CapEff:\t%016x\n", specutils.AllCapabilitiesUint64()&^bits.MaskOf64(int(linux.CAP_NET_RAW)))
 	if got != want {
 		t.Errorf("wrong capabilities, got: %q, want: %q", got, want)
 	}
@@ -184,7 +219,7 @@ func TestExecEnvHasHome(t *testing.T) {
 	if err := dockerutil.Pull("alpine"); err != nil {
 		t.Fatalf("docker pull failed: %v", err)
 	}
-	d := dockerutil.MakeDocker("exec-env-test")
+	d := dockerutil.MakeDocker("exec-env-home-test")
 
 	// We will check that HOME is set for root user, and also for a new
 	// non-root user we will create.
-- 
cgit v1.2.3