diff options
author | Andrei Vagin <avagin@google.com> | 2019-10-02 13:00:07 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-02 13:00:07 -0700 |
commit | 9a875306dbabcf335a2abccc08119a1b67d0e51a (patch) | |
tree | 0f72c12e951a5eee7156df7a5d63351bc89befa6 /runsc/container | |
parent | 38bc0b6b6addd25ceec4f66ef1af41c1e61e2985 (diff) | |
parent | 03ce4dd86c9acd6b6148f68d5d2cf025d8c254bb (diff) |
Merge branch 'master' into pr_syscall_linux
Diffstat (limited to 'runsc/container')
-rw-r--r-- | runsc/container/BUILD | 3 | ||||
-rw-r--r-- | runsc/container/console_test.go | 2 | ||||
-rw-r--r-- | runsc/container/container.go | 113 | ||||
-rw-r--r-- | runsc/container/container_test.go | 74 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 71 | ||||
-rw-r--r-- | runsc/container/shared_volume_test.go | 2 | ||||
-rw-r--r-- | runsc/container/test_app/BUILD | 2 | ||||
-rw-r--r-- | runsc/container/test_app/fds.go | 4 | ||||
-rw-r--r-- | runsc/container/test_app/test_app.go | 67 |
9 files changed, 285 insertions, 53 deletions
diff --git a/runsc/container/BUILD b/runsc/container/BUILD index de8202bb1..26d1cd5ab 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -47,6 +47,7 @@ go_test( ], deps = [ "//pkg/abi/linux", + "//pkg/bits", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/kernel", @@ -56,7 +57,7 @@ go_test( "//runsc/boot", "//runsc/boot/platforms", "//runsc/specutils", - "//runsc/test/testutil", + "//runsc/testutil", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_kr_pty//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index e9372989f..7d67c3a75 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -30,7 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/pkg/urpc" - "gvisor.dev/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/testutil" ) // socketPath creates a path inside bundleDir and ensures that the returned diff --git a/runsc/container/container.go b/runsc/container/container.go index bbb364214..a721c1c31 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -513,9 +513,16 @@ func (c *Container) Start(conf *boot.Config) error { return err } - // Adjust the oom_score_adj for sandbox and gofers. This must be done after + // Adjust the oom_score_adj for sandbox. This must be done after // save(). - return c.adjustOOMScoreAdj(conf) + err = adjustSandboxOOMScoreAdj(c.Sandbox, c.RootContainerDir, false) + if err != nil { + return err + } + + // Set container's oom_score_adj to the gofer since it is dedicated to + // the container, in case the gofer uses up too much memory. + return c.adjustGoferOOMScoreAdj() } // Restore takes a container and replaces its kernel and file system @@ -782,6 +789,9 @@ func (c *Container) Destroy() error { } defer unlock() + // Stored for later use as stop() sets c.Sandbox to nil. + sb := c.Sandbox + if err := c.stop(); err != nil { err = fmt.Errorf("stopping container: %v", err) log.Warningf("%v", err) @@ -796,6 +806,16 @@ func (c *Container) Destroy() error { c.changeStatus(Stopped) + // Adjust oom_score_adj for the sandbox. This must be done after the + // container is stopped and the directory at c.Root is removed. + // We must test if the sandbox is nil because Destroy should be + // idempotent. + if sb != nil { + if err := adjustSandboxOOMScoreAdj(sb, c.RootContainerDir, true); err != nil { + errs = append(errs, err.Error()) + } + } + // "If any poststop hook fails, the runtime MUST log a warning, but the // remaining hooks and lifecycle continue as if the hook had succeeded" -OCI spec. // Based on the OCI, "The post-stop hooks MUST be called after the container is @@ -926,7 +946,14 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *boot.Config, bund } if conf.DebugLog != "" { - debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "gofer") + test := "" + if len(conf.TestOnlyTestNameEnv) != 0 { + // Fetch test name if one is provided and the test only flag was set. + if t, ok := specutils.EnvVar(spec.Process.Env, conf.TestOnlyTestNameEnv); ok { + test = t + } + } + debugLogFile, err := specutils.DebugLogFile(conf.DebugLog, "gofer", test) if err != nil { return nil, nil, fmt.Errorf("opening debug log file in %q: %v", conf.DebugLog, err) } @@ -1139,35 +1166,82 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error { return fn() } -// adjustOOMScoreAdj sets the oom_score_adj for the sandbox and all gofers. +// adjustGoferOOMScoreAdj sets the oom_store_adj for the container's gofer. +func (c *Container) adjustGoferOOMScoreAdj() error { + if c.GoferPid != 0 && c.Spec.Process.OOMScoreAdj != nil { + if err := setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj); err != nil { + return fmt.Errorf("setting gofer oom_score_adj for container %q: %v", c.ID, err) + } + } + + return nil +} + +// adjustSandboxOOMScoreAdj sets the oom_score_adj for the sandbox. // oom_score_adj is set to the lowest oom_score_adj among the containers // running in the sandbox. // // TODO(gvisor.dev/issue/512): This call could race with other containers being // created at the same time and end up setting the wrong oom_score_adj to the // sandbox. -func (c *Container) adjustOOMScoreAdj(conf *boot.Config) error { - // If this container's OOMScoreAdj is nil then we can exit early as no - // change should be made to oom_score_adj for the sandbox. - if c.Spec.Process.OOMScoreAdj == nil { - return nil - } - - containers, err := loadSandbox(conf.RootDir, c.Sandbox.ID) +func adjustSandboxOOMScoreAdj(s *sandbox.Sandbox, rootDir string, destroy bool) error { + containers, err := loadSandbox(rootDir, s.ID) if err != nil { return fmt.Errorf("loading sandbox containers: %v", err) } + // Do nothing if the sandbox has been terminated. + if len(containers) == 0 { + return nil + } + // Get the lowest score for all containers. var lowScore int scoreFound := false - for _, container := range containers { - if container.Spec.Process.OOMScoreAdj != nil && (!scoreFound || *container.Spec.Process.OOMScoreAdj < lowScore) { + if len(containers) == 1 && len(containers[0].Spec.Annotations[specutils.ContainerdContainerTypeAnnotation]) == 0 { + // This is a single-container sandbox. Set the oom_score_adj to + // the value specified in the OCI bundle. + if containers[0].Spec.Process.OOMScoreAdj != nil { scoreFound = true - lowScore = *container.Spec.Process.OOMScoreAdj + lowScore = *containers[0].Spec.Process.OOMScoreAdj + } + } else { + for _, container := range containers { + // Special multi-container support for CRI. Ignore the root + // container when calculating oom_score_adj for the sandbox because + // it is the infrastructure (pause) container and always has a very + // low oom_score_adj. + // + // We will use OOMScoreAdj in the single-container case where the + // containerd container-type annotation is not present. + if container.Spec.Annotations[specutils.ContainerdContainerTypeAnnotation] == specutils.ContainerdContainerTypeSandbox { + continue + } + + if container.Spec.Process.OOMScoreAdj != nil && (!scoreFound || *container.Spec.Process.OOMScoreAdj < lowScore) { + scoreFound = true + lowScore = *container.Spec.Process.OOMScoreAdj + } } } + // If the container is destroyed and remaining containers have no + // oomScoreAdj specified then we must revert to the oom_score_adj of the + // parent process. + if !scoreFound && destroy { + ppid, err := specutils.GetParentPid(s.Pid) + if err != nil { + return fmt.Errorf("getting parent pid of sandbox pid %d: %v", s.Pid, err) + } + pScore, err := specutils.GetOOMScoreAdj(ppid) + if err != nil { + return fmt.Errorf("getting oom_score_adj of parent %d: %v", ppid, err) + } + + scoreFound = true + lowScore = pScore + } + // Only set oom_score_adj if one of the containers has oom_score_adj set // in the OCI bundle. If not, we need to inherit the parent process's // oom_score_adj. @@ -1177,15 +1251,10 @@ func (c *Container) adjustOOMScoreAdj(conf *boot.Config) error { } // Set the lowest of all containers oom_score_adj to the sandbox. - if err := setOOMScoreAdj(c.Sandbox.Pid, lowScore); err != nil { - return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", c.Sandbox.ID, err) + if err := setOOMScoreAdj(s.Pid, lowScore); err != nil { + return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", s.ID, err) } - // Set container's oom_score_adj to the gofer since it is dedicated to the - // container, in case the gofer uses up too much memory. - if err := setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj); err != nil { - return fmt.Errorf("setting gofer oom_score_adj for container %q: %v", c.ID, err) - } return nil } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index af128bf1c..519f5ed9b 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -16,6 +16,7 @@ package container import ( "bytes" + "flag" "fmt" "io" "io/ioutil" @@ -33,13 +34,14 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bits" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/boot/platforms" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/testutil" ) // waitForProcessList waits for the given process list to show up in the container. @@ -155,12 +157,7 @@ func waitForFile(f *os.File) error { return nil } - timeout := 5 * time.Second - if testutil.RaceEnabled { - // Race makes slow things even slow, so bump the timeout. - timeout = 3 * timeout - } - return testutil.Poll(op, timeout) + return testutil.Poll(op, 30*time.Second) } // readOutputNum reads a file at given filepath and returns the int at the @@ -254,10 +251,6 @@ func configs(opts ...configOption) []*boot.Config { // TODO(b/112165693): KVM tests are flaky. Disable until fixed. continue - // TODO(b/68787993): KVM doesn't work with --race. - if testutil.RaceEnabled { - continue - } c.Platform = platforms.KVM case nonExclusiveFS: c.FileAccess = boot.FileAccessShared @@ -1310,10 +1303,13 @@ func TestRunNonRoot(t *testing.T) { t.Logf("Running test with conf: %+v", conf) spec := testutil.NewSpecWithArgs("/bin/true") + + // Set a random user/group with no access to "blocked" dir. spec.Process.User.UID = 343 spec.Process.User.GID = 2401 + spec.Process.Capabilities = nil - // User that container runs as can't list '$TMP/blocked' and would fail to + // User running inside container can't list '$TMP/blocked' and would fail to // mount it. dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked") if err != nil { @@ -1327,6 +1323,17 @@ func TestRunNonRoot(t *testing.T) { t.Fatalf("os.MkDir(%q) failed: %v", dir, err) } + src, err := ioutil.TempDir(testutil.TmpDir(), "src") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: src, + Type: "bind", + }) + if err := run(spec, conf); err != nil { t.Fatalf("error running sandbox: %v", err) } @@ -1637,22 +1644,27 @@ func TestGoferExits(t *testing.T) { } func TestRootNotMount(t *testing.T) { - if testutil.RaceEnabled { - // Requires statically linked binary, since it's mapping the root to a - // random dir, libs cannot be located. - t.Skip("race makes test_app not statically linked") - } - appSym, err := testutil.FindFile("runsc/container/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) } + app, err := filepath.EvalSymlinks(appSym) if err != nil { t.Fatalf("error resolving %q symlink: %v", appSym, err) } log.Infof("App path %q is a symlink to %q", appSym, app) + static, err := testutil.IsStatic(app) + if err != nil { + t.Fatalf("error reading application binary: %v", err) + } + if !static { + // This happens during race builds; we cannot map in shared + // libraries also, so we need to skip the test. + t.Skip() + } + root := filepath.Dir(app) exe := "/" + filepath.Base(app) log.Infof("Executing %q in %q", exe, root) @@ -2038,6 +2050,30 @@ func TestMountSymlink(t *testing.T) { } } +// Check that --net-raw disables the CAP_NET_RAW capability. +func TestNetRaw(t *testing.T) { + capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10) + app, err := testutil.FindFile("runsc/container/test_app/test_app") + if err != nil { + t.Fatal("error finding test_app:", err) + } + + for _, enableRaw := range []bool{true, false} { + conf := testutil.TestConfig() + conf.EnableRaw = enableRaw + + test := "--enabled" + if !enableRaw { + test = "--disabled" + } + + spec := testutil.NewSpecWithArgs(app, "capability", test, capNetRaw) + if err := run(spec, conf); err != nil { + t.Fatalf("Error running container: %v", err) + } + } +} + // executeSync synchronously executes a new process. func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, error) { pid, err := cont.Execute(args) @@ -2053,10 +2089,10 @@ func (cont *Container) executeSync(args *control.ExecArgs) (syscall.WaitStatus, func TestMain(m *testing.M) { log.SetLevel(log.Debug) + flag.Parse() if err := testutil.ConfigureExePath(); err != nil { panic(err.Error()) } specutils.MaybeRunAsRoot() - os.Exit(m.Run()) } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 2d51fecc6..bd45a5118 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -32,7 +32,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/specutils" - "gvisor.dev/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/testutil" ) func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { @@ -549,10 +549,16 @@ func TestMultiContainerDestroy(t *testing.T) { t.Logf("Running test with conf: %+v", conf) // First container will remain intact while the second container is killed. - specs, ids := createSpecs( - []string{app, "reaper"}, + podSpecs, ids := createSpecs( + []string{"sleep", "100"}, []string{app, "fork-bomb"}) - containers, cleanup, err := startContainers(conf, specs, ids) + + // Run the fork bomb in a PID namespace to prevent processes to be + // re-parented to PID=1 in the root container. + podSpecs[1].Linux = &specs.Linux{ + Namespaces: []specs.LinuxNamespace{{Type: "pid"}}, + } + containers, cleanup, err := startContainers(conf, podSpecs, ids) if err != nil { t.Fatalf("error starting containers: %v", err) } @@ -580,7 +586,7 @@ func TestMultiContainerDestroy(t *testing.T) { if err != nil { t.Fatalf("error getting process data from sandbox: %v", err) } - expectedPL := []*control.Process{{PID: 1, Cmd: "test_app"}} + expectedPL := []*control.Process{{PID: 1, Cmd: "sleep"}} if !procListsEqual(pss, expectedPL) { t.Errorf("container got process list: %s, want: %s", procListToString(pss), procListToString(expectedPL)) } @@ -1485,3 +1491,58 @@ func TestMultiContainerLoadSandbox(t *testing.T) { t.Errorf("containers not found: %v", wantIDs) } } + +// TestMultiContainerRunNonRoot checks that child container can be configured +// when running as non-privileged user. +func TestMultiContainerRunNonRoot(t *testing.T) { + cmdRoot := []string{"/bin/sleep", "100"} + cmdSub := []string{"/bin/true"} + podSpecs, ids := createSpecs(cmdRoot, cmdSub) + + // User running inside container can't list '$TMP/blocked' and would fail to + // mount it. + blocked, err := ioutil.TempDir(testutil.TmpDir(), "blocked") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + if err := os.Chmod(blocked, 0700); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", blocked, err) + } + dir := path.Join(blocked, "test") + if err := os.Mkdir(dir, 0755); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", dir, err) + } + + src, err := ioutil.TempDir(testutil.TmpDir(), "src") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + + // Set a random user/group with no access to "blocked" dir. + podSpecs[1].Process.User.UID = 343 + podSpecs[1].Process.User.GID = 2401 + podSpecs[1].Process.Capabilities = nil + + podSpecs[1].Mounts = append(podSpecs[1].Mounts, specs.Mount{ + Destination: dir, + Source: src, + Type: "bind", + }) + + conf := testutil.TestConfig() + pod, cleanup, err := startContainers(conf, podSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Once all containers are started, wait for the child container to exit. + // This means that the volume was mounted properly. + ws, err := pod[1].Wait() + if err != nil { + t.Fatalf("running child container: %v", err) + } + if !ws.Exited() || ws.ExitStatus() != 0 { + t.Fatalf("child container failed, waitStatus: %v", ws) + } +} diff --git a/runsc/container/shared_volume_test.go b/runsc/container/shared_volume_test.go index 1f90d2462..dc4194134 100644 --- a/runsc/container/shared_volume_test.go +++ b/runsc/container/shared_volume_test.go @@ -25,7 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/runsc/boot" - "gvisor.dev/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/testutil" ) // TestSharedVolume checks that modifications to a volume mount are propagated diff --git a/runsc/container/test_app/BUILD b/runsc/container/test_app/BUILD index 82dbd54d2..9bf9e6e9d 100644 --- a/runsc/container/test_app/BUILD +++ b/runsc/container/test_app/BUILD @@ -13,7 +13,7 @@ go_binary( visibility = ["//runsc/container:__pkg__"], deps = [ "//pkg/unet", - "//runsc/test/testutil", + "//runsc/testutil", "@com_github_google_subcommands//:go_default_library", ], ) diff --git a/runsc/container/test_app/fds.go b/runsc/container/test_app/fds.go index c12809cab..a90cc1662 100644 --- a/runsc/container/test_app/fds.go +++ b/runsc/container/test_app/fds.go @@ -24,7 +24,7 @@ import ( "flag" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/unet" - "gvisor.dev/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/testutil" ) const fileContents = "foobarbaz" @@ -60,7 +60,7 @@ func (fds *fdSender) Execute(ctx context.Context, f *flag.FlagSet, args ...inter log.Fatalf("socket flag must be set") } - dir, err := ioutil.TempDir(testutil.TmpDir(), "") + dir, err := ioutil.TempDir("", "") if err != nil { log.Fatalf("TempDir failed: %v", err) } diff --git a/runsc/container/test_app/test_app.go b/runsc/container/test_app/test_app.go index 6578c7b41..913d781c6 100644 --- a/runsc/container/test_app/test_app.go +++ b/runsc/container/test_app/test_app.go @@ -19,22 +19,25 @@ package main import ( "context" "fmt" + "io/ioutil" "log" "net" "os" "os/exec" + "regexp" "strconv" sys "syscall" "time" "flag" "github.com/google/subcommands" - "gvisor.dev/gvisor/runsc/test/testutil" + "gvisor.dev/gvisor/runsc/testutil" ) func main() { subcommands.Register(subcommands.HelpCommand(), "") subcommands.Register(subcommands.FlagsCommand(), "") + subcommands.Register(new(capability), "") subcommands.Register(new(fdReceiver), "") subcommands.Register(new(fdSender), "") subcommands.Register(new(forkBomb), "") @@ -287,3 +290,65 @@ func (s *syscall) Execute(ctx context.Context, f *flag.FlagSet, args ...interfac } return subcommands.ExitSuccess } + +type capability struct { + enabled uint64 + disabled uint64 +} + +// Name implements subcommands.Command. +func (*capability) Name() string { + return "capability" +} + +// Synopsis implements subcommands.Command. +func (*capability) Synopsis() string { + return "checks if effective capabilities are set/unset" +} + +// Usage implements subcommands.Command. +func (*capability) Usage() string { + return "capability [--enabled=number] [--disabled=number]" +} + +// SetFlags implements subcommands.Command. +func (c *capability) SetFlags(f *flag.FlagSet) { + f.Uint64Var(&c.enabled, "enabled", 0, "") + f.Uint64Var(&c.disabled, "disabled", 0, "") +} + +// Execute implements subcommands.Command. +func (c *capability) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + if c.enabled == 0 && c.disabled == 0 { + fmt.Println("One of the flags must be set") + return subcommands.ExitUsageError + } + + status, err := ioutil.ReadFile("/proc/self/status") + if err != nil { + fmt.Printf("Error reading %q: %v\n", "proc/self/status", err) + return subcommands.ExitFailure + } + re := regexp.MustCompile("CapEff:\t([0-9a-f]+)\n") + matches := re.FindStringSubmatch(string(status)) + if matches == nil || len(matches) != 2 { + fmt.Printf("Effective capabilities not found in\n%s\n", status) + return subcommands.ExitFailure + } + caps, err := strconv.ParseUint(matches[1], 16, 64) + if err != nil { + fmt.Printf("failed to convert capabilities %q: %v\n", matches[1], err) + return subcommands.ExitFailure + } + + if c.enabled != 0 && (caps&c.enabled) != c.enabled { + fmt.Printf("Missing capabilities, want: %#x: got: %#x\n", c.enabled, caps) + return subcommands.ExitFailure + } + if c.disabled != 0 && (caps&c.disabled) != 0 { + fmt.Printf("Extra capabilities found, dont_want: %#x: got: %#x\n", c.disabled, caps) + return subcommands.ExitFailure + } + + return subcommands.ExitSuccess +} |