diff options
Diffstat (limited to 'runsc/container')
-rw-r--r-- | runsc/container/BUILD | 1 | ||||
-rw-r--r-- | runsc/container/container.go | 111 | ||||
-rw-r--r-- | runsc/container/container_test.go | 18 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 213 |
4 files changed, 329 insertions, 14 deletions
diff --git a/runsc/container/BUILD b/runsc/container/BUILD index e246c38ae..de8202bb1 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -49,6 +49,7 @@ go_test( "//pkg/abi/linux", "//pkg/log", "//pkg/sentry/control", + "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/unet", "//pkg/urpc", diff --git a/runsc/container/container.go b/runsc/container/container.go index 8320bb2ca..bbb364214 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -138,6 +138,34 @@ type Container struct { RootContainerDir string } +// loadSandbox loads all containers that belong to the sandbox with the given +// ID. +func loadSandbox(rootDir, id string) ([]*Container, error) { + cids, err := List(rootDir) + if err != nil { + return nil, err + } + + // Load the container metadata. + var containers []*Container + for _, cid := range cids { + container, err := Load(rootDir, cid) + if err != nil { + // Container file may not exist if it raced with creation/deletion or + // directory was left behind. Load provides a snapshot in time, so it's + // fine to skip it. + if os.IsNotExist(err) { + continue + } + return nil, fmt.Errorf("loading container %q: %v", id, err) + } + if container.Sandbox.ID == id { + containers = append(containers, container) + } + } + return containers, nil +} + // Load loads a container with the given id from a metadata file. id may be an // abbreviation of the full container id, in which case Load loads the // container to which id unambiguously refers to. @@ -180,7 +208,7 @@ func Load(rootDir, id string) (*Container, error) { // If the status is "Running" or "Created", check that the sandbox // process still exists, and set it to Stopped if it does not. // - // This is inherently racey. + // This is inherently racy. if c.Status == Running || c.Status == Created { // Check if the sandbox process is still running. if !c.isSandboxRunning() { @@ -237,7 +265,13 @@ func List(rootDir string) ([]string, error) { } var out []string for _, f := range fs { - out = append(out, f.Name()) + // Filter out directories that do no belong to a container. + cid := f.Name() + if validateID(cid) == nil { + if _, err := os.Stat(filepath.Join(rootDir, cid, metadataFilename)); err == nil { + out = append(out, f.Name()) + } + } } return out, nil } @@ -475,7 +509,13 @@ func (c *Container) Start(conf *boot.Config) error { } c.changeStatus(Running) - return c.save() + if err := c.save(); err != nil { + return err + } + + // Adjust the oom_score_adj for sandbox and gofers. This must be done after + // save(). + return c.adjustOOMScoreAdj(conf) } // Restore takes a container and replaces its kernel and file system @@ -1098,3 +1138,68 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error { } return fn() } + +// adjustOOMScoreAdj sets the oom_score_adj for the sandbox and all gofers. +// oom_score_adj is set to the lowest oom_score_adj among the containers +// running in the sandbox. +// +// TODO(gvisor.dev/issue/512): This call could race with other containers being +// created at the same time and end up setting the wrong oom_score_adj to the +// sandbox. +func (c *Container) adjustOOMScoreAdj(conf *boot.Config) error { + // If this container's OOMScoreAdj is nil then we can exit early as no + // change should be made to oom_score_adj for the sandbox. + if c.Spec.Process.OOMScoreAdj == nil { + return nil + } + + containers, err := loadSandbox(conf.RootDir, c.Sandbox.ID) + if err != nil { + return fmt.Errorf("loading sandbox containers: %v", err) + } + + // Get the lowest score for all containers. + var lowScore int + scoreFound := false + for _, container := range containers { + if container.Spec.Process.OOMScoreAdj != nil && (!scoreFound || *container.Spec.Process.OOMScoreAdj < lowScore) { + scoreFound = true + lowScore = *container.Spec.Process.OOMScoreAdj + } + } + + // Only set oom_score_adj if one of the containers has oom_score_adj set + // in the OCI bundle. If not, we need to inherit the parent process's + // oom_score_adj. + // See: https://github.com/opencontainers/runtime-spec/blob/master/config.md#linux-process + if !scoreFound { + return nil + } + + // Set the lowest of all containers oom_score_adj to the sandbox. + if err := setOOMScoreAdj(c.Sandbox.Pid, lowScore); err != nil { + return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", c.Sandbox.ID, err) + } + + // Set container's oom_score_adj to the gofer since it is dedicated to the + // container, in case the gofer uses up too much memory. + if err := setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj); err != nil { + return fmt.Errorf("setting gofer oom_score_adj for container %q: %v", c.ID, err) + } + return nil +} + +// setOOMScoreAdj sets oom_score_adj to the given value for the given PID. +// /proc must be available and mounted read-write. scoreAdj should be between +// -1000 and 1000. +func setOOMScoreAdj(pid int, scoreAdj int) error { + f, err := os.OpenFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid), os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + if _, err := f.WriteString(strconv.Itoa(scoreAdj)); err != nil { + return err + } + return nil +} diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index ff68c586e..3d4f304f3 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -76,7 +76,7 @@ func waitForProcessCount(cont *Container, want int) error { } func blockUntilWaitable(pid int) error { - _, _, err := testutil.RetryEintr(func() (uintptr, uintptr, error) { + _, _, err := specutils.RetryEintr(func() (uintptr, uintptr, error) { var err error _, _, err1 := syscall.Syscall6(syscall.SYS_WAITID, 1, uintptr(pid), 0, syscall.WEXITED|syscall.WNOWAIT, 0, 0) if err1 != 0 { @@ -1310,10 +1310,13 @@ func TestRunNonRoot(t *testing.T) { t.Logf("Running test with conf: %+v", conf) spec := testutil.NewSpecWithArgs("/bin/true") + + // Set a random user/group with no access to "blocked" dir. spec.Process.User.UID = 343 spec.Process.User.GID = 2401 + spec.Process.Capabilities = nil - // User that container runs as can't list '$TMP/blocked' and would fail to + // User running inside container can't list '$TMP/blocked' and would fail to // mount it. dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked") if err != nil { @@ -1327,6 +1330,17 @@ func TestRunNonRoot(t *testing.T) { t.Fatalf("os.MkDir(%q) failed: %v", dir, err) } + src, err := ioutil.TempDir(testutil.TmpDir(), "src") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: src, + Type: "bind", + }) + if err := run(spec, conf); err != nil { t.Fatalf("error running sandbox: %v", err) } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 978a422f5..ae03d24b4 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -29,6 +29,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/sentry/control" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/specutils" "gvisor.dev/gvisor/runsc/test/testutil" @@ -59,11 +60,14 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) { } func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*Container, func(), error) { - rootDir, err := testutil.SetupRootDir() - if err != nil { - return nil, nil, fmt.Errorf("error creating root dir: %v", err) + // Setup root dir if one hasn't been provided. + if len(conf.RootDir) == 0 { + rootDir, err := testutil.SetupRootDir() + if err != nil { + return nil, nil, fmt.Errorf("error creating root dir: %v", err) + } + conf.RootDir = rootDir } - conf.RootDir = rootDir var containers []*Container var bundles []string @@ -74,7 +78,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C for _, b := range bundles { os.RemoveAll(b) } - os.RemoveAll(rootDir) + os.RemoveAll(conf.RootDir) } for i, spec := range specs { bundleDir, err := testutil.SetupBundleDir(spec) @@ -488,7 +492,7 @@ func TestMultiContainerSignal(t *testing.T) { if err := containers[1].Destroy(); err != nil { t.Errorf("failed to destroy container: %v", err) } - _, _, err = testutil.RetryEintr(func() (uintptr, uintptr, error) { + _, _, err = specutils.RetryEintr(func() (uintptr, uintptr, error) { cpid, err := syscall.Wait4(goferPid, nil, 0, nil) return uintptr(cpid), 0, err }) @@ -905,9 +909,9 @@ func TestMultiContainerDifferentFilesystems(t *testing.T) { } } -// TestMultiContainerGoferStop tests that IO operations continue to work after -// containers have been stopped and gofers killed. -func TestMultiContainerGoferStop(t *testing.T) { +// TestMultiContainerContainerDestroyStress tests that IO operations continue +// to work after containers have been stopped and gofers killed. +func TestMultiContainerContainerDestroyStress(t *testing.T) { app, err := testutil.FindFile("runsc/container/test_app/test_app") if err != nil { t.Fatal("error finding test_app:", err) @@ -1345,3 +1349,194 @@ func TestMultiContainerMultiRootCanHandleFDs(t *testing.T) { } } } + +// Test that container is destroyed when Gofer is killed. +func TestMultiContainerGoferKilled(t *testing.T) { + sleep := []string{"sleep", "100"} + specs, ids := createSpecs(sleep, sleep, sleep) + conf := testutil.TestConfig() + containers, cleanup, err := startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Ensure container is running + c := containers[2] + expectedPL := []*control.Process{ + {PID: 3, Cmd: "sleep"}, + } + if err := waitForProcessList(c, expectedPL); err != nil { + t.Errorf("failed to wait for sleep to start: %v", err) + } + + // Kill container's gofer. + if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil { + t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err) + } + + // Wait until container stops. + if err := waitForProcessList(c, nil); err != nil { + t.Errorf("Container %q was not stopped after gofer death: %v", c.ID, err) + } + + // Check that container isn't running anymore. + args := &control.ExecArgs{Argv: []string{"/bin/true"}} + if _, err := c.executeSync(args); err == nil { + t.Fatalf("Container %q was not stopped after gofer death", c.ID) + } + + // Check that other containers are unaffected. + for i, c := range containers { + if i == 2 { + continue // container[2] has been killed. + } + pl := []*control.Process{ + {PID: kernel.ThreadID(i + 1), Cmd: "sleep"}, + } + if err := waitForProcessList(c, pl); err != nil { + t.Errorf("Container %q was affected by another container: %v", c.ID, err) + } + args := &control.ExecArgs{Argv: []string{"/bin/true"}} + if _, err := c.executeSync(args); err != nil { + t.Fatalf("Container %q was affected by another container: %v", c.ID, err) + } + } + + // Kill root container's gofer to bring entire sandbox down. + c = containers[0] + if err := syscall.Kill(c.GoferPid, syscall.SIGKILL); err != nil { + t.Fatalf("syscall.Kill(%d, SIGKILL)=%v", c.GoferPid, err) + } + + // Wait until sandbox stops. waitForProcessList will loop until sandbox exits + // and RPC errors out. + impossiblePL := []*control.Process{ + {PID: 100, Cmd: "non-existent-process"}, + } + if err := waitForProcessList(c, impossiblePL); err == nil { + t.Fatalf("Sandbox was not killed after gofer death") + } + + // Check that entire sandbox isn't running anymore. + for _, c := range containers { + args := &control.ExecArgs{Argv: []string{"/bin/true"}} + if _, err := c.executeSync(args); err == nil { + t.Fatalf("Container %q was not stopped after gofer death", c.ID) + } + } +} + +func TestMultiContainerLoadSandbox(t *testing.T) { + sleep := []string{"sleep", "100"} + specs, ids := createSpecs(sleep, sleep, sleep) + conf := testutil.TestConfig() + + // Create containers for the sandbox. + wants, cleanup, err := startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Then create unrelated containers. + for i := 0; i < 3; i++ { + specs, ids = createSpecs(sleep, sleep, sleep) + _, cleanup, err = startContainers(conf, specs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + } + + // Create an unrelated directory under root. + dir := filepath.Join(conf.RootDir, "not-a-container") + if err := os.MkdirAll(dir, 0755); err != nil { + t.Fatalf("os.MkdirAll(%q)=%v", dir, err) + } + + // Create a valid but empty container directory. + randomCID := testutil.UniqueContainerID() + dir = filepath.Join(conf.RootDir, randomCID) + if err := os.MkdirAll(dir, 0755); err != nil { + t.Fatalf("os.MkdirAll(%q)=%v", dir, err) + } + + // Load the sandbox and check that the correct containers were returned. + id := wants[0].Sandbox.ID + gots, err := loadSandbox(conf.RootDir, id) + if err != nil { + t.Fatalf("loadSandbox()=%v", err) + } + wantIDs := make(map[string]struct{}) + for _, want := range wants { + wantIDs[want.ID] = struct{}{} + } + for _, got := range gots { + if got.Sandbox.ID != id { + t.Errorf("wrong sandbox ID, got: %v, want: %v", got.Sandbox.ID, id) + } + if _, ok := wantIDs[got.ID]; !ok { + t.Errorf("wrong container ID, got: %v, wants: %v", got.ID, wantIDs) + } + delete(wantIDs, got.ID) + } + if len(wantIDs) != 0 { + t.Errorf("containers not found: %v", wantIDs) + } +} + +// TestMultiContainerRunNonRoot checks that child container can be configured +// when running as non-privileged user. +func TestMultiContainerRunNonRoot(t *testing.T) { + cmdRoot := []string{"/bin/sleep", "100"} + cmdSub := []string{"/bin/true"} + podSpecs, ids := createSpecs(cmdRoot, cmdSub) + + // User running inside container can't list '$TMP/blocked' and would fail to + // mount it. + blocked, err := ioutil.TempDir(testutil.TmpDir(), "blocked") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + if err := os.Chmod(blocked, 0700); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", blocked, err) + } + dir := path.Join(blocked, "test") + if err := os.Mkdir(dir, 0755); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", dir, err) + } + + src, err := ioutil.TempDir(testutil.TmpDir(), "src") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + + // Set a random user/group with no access to "blocked" dir. + podSpecs[1].Process.User.UID = 343 + podSpecs[1].Process.User.GID = 2401 + podSpecs[1].Process.Capabilities = nil + + podSpecs[1].Mounts = append(podSpecs[1].Mounts, specs.Mount{ + Destination: dir, + Source: src, + Type: "bind", + }) + + conf := testutil.TestConfig() + pod, cleanup, err := startContainers(conf, podSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Once all containers are started, wait for the child container to exit. + // This means that the volume was mounted properly. + ws, err := pod[1].Wait() + if err != nil { + t.Fatalf("running child container: %v", err) + } + if !ws.Exited() || ws.ExitStatus() != 0 { + t.Fatalf("child container failed, waitStatus: %v", ws) + } +} |