diff options
author | Fabricio Voznika <fvoznika@google.com> | 2019-01-16 12:47:21 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-01-16 12:48:32 -0800 |
commit | e4d3ca7263291b43cdc49c7553c62608be062cd9 (patch) | |
tree | 47b8dee17087a36e1fc34c8acc48c798f2d2f383 | |
parent | 92cf3764e032740f0c84a1b242c54b99f45a6bf0 (diff) |
Prevent internal tmpfs mount to override files in /tmp
Runsc wants to mount /tmp using internal tmpfs implementation for
performance. However, it risks hiding files that may exist under
/tmp in case it's present in the container. Now, it only mounts
over /tmp iff:
- /tmp was not explicitly asked to be mounted
- /tmp is empty
If any of this is not true, then /tmp maps to the container's
image /tmp.
Note: checkpoint doesn't have sentry FS mounted to check if /tmp
is empty. It simply looks for explicit mounts right now.
PiperOrigin-RevId: 229607856
Change-Id: I10b6dae7ac157ef578efc4dfceb089f3b94cde06
-rw-r--r-- | runsc/boot/fs.go | 174 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 21 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 8 | ||||
-rw-r--r-- | runsc/test/integration/integration_test.go | 13 | ||||
-rw-r--r-- | test/syscalls/BUILD | 1 | ||||
-rw-r--r-- | test/syscalls/syscall_test_runner.go | 8 |
6 files changed, 157 insertions, 68 deletions
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 942315d6e..e0c8291ac 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -81,9 +81,11 @@ func (f *fdDispenser) empty() bool { return len(f.fds) == 0 } -// createMountNamespace creates a mount namespace containing the root filesystem +// setupRootContainerFS creates a mount namespace containing the root filesystem // and all mounts. 'rootCtx' is used to walk directories to find mount points. -func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec *specs.Spec, conf *Config, goferFDs []int) (*fs.MountNamespace, error) { +// 'setMountNS' is called after namespace is created. It must set the mount NS +// to 'rootCtx'. +func setupRootContainerFS(userCtx context.Context, rootCtx context.Context, spec *specs.Spec, conf *Config, goferFDs []int, setMountNS func(*fs.MountNamespace)) error { mounts := compileMounts(spec) // Create a tmpfs mount where we create and mount a root filesystem for @@ -96,32 +98,24 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec fds := &fdDispenser{fds: goferFDs} rootInode, err := createRootMount(rootCtx, spec, conf, fds, mounts) if err != nil { - return nil, fmt.Errorf("failed to create root mount: %v", err) + return fmt.Errorf("failed to create root mount: %v", err) } mns, err := fs.NewMountNamespace(userCtx, rootInode) if err != nil { - return nil, fmt.Errorf("failed to create root mount namespace: %v", err) + return fmt.Errorf("failed to create root mount namespace: %v", err) } + setMountNS(mns) root := mns.Root() defer root.DecRef() - for _, m := range mounts { - if err := mountSubmount(rootCtx, conf, mns, root, fds, m, mounts); err != nil { - return nil, fmt.Errorf("mount submount: %v", err) - } - } - - if !fds.empty() { - return nil, fmt.Errorf("not all mount points were consumed, remaining: %v", fds) - } - return mns, nil + return mountSubmounts(rootCtx, conf, mns, root, mounts, fds) } // compileMounts returns the supported mounts from the mount spec, adding any // mandatory mounts that are required by the OCI specification. func compileMounts(spec *specs.Spec) []specs.Mount { - // Keep track of whether proc, sys, and tmp were mounted. - var procMounted, sysMounted, tmpMounted bool + // Keep track of whether proc and sys were mounted. + var procMounted, sysMounted bool var mounts []specs.Mount // Always mount /dev. @@ -147,8 +141,6 @@ func compileMounts(spec *specs.Spec) []specs.Mount { procMounted = true case "/sys": sysMounted = true - case "/tmp": - tmpMounted = true } } @@ -168,20 +160,6 @@ func compileMounts(spec *specs.Spec) []specs.Mount { }) } - // Technically we don't have to mount tmpfs at /tmp, as we could just - // rely on the host /tmp, but this is a nice optimization, and fixes - // some apps that call mknod in /tmp. - if !tmpMounted { - // TODO: If the host /tmp (or a mount at /tmp) has - // files in it, we should overlay our tmpfs implementation over - // that. Until then, the /tmp mount will always appear empty at - // container creation. - mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: tmpfs, - Destination: "/tmp", - }) - } - // The mandatory mounts should be ordered right after the root, in case // there are submounts of these mandatory mounts already in the spec. mounts = append(mounts[:0], append(mandatoryMounts, mounts[0:]...)...) @@ -288,6 +266,23 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri return fsName, opts, useOverlay, err } +func mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, mounts []specs.Mount, fds *fdDispenser) error { + for _, m := range mounts { + if err := mountSubmount(ctx, conf, mns, root, fds, m, mounts); err != nil { + return fmt.Errorf("mount submount %q: %v", m.Destination, err) + } + } + + if err := mountTmp(ctx, conf, mns, root, fds, mounts); err != nil { + return fmt.Errorf("mount submount %q: %v", "tmp", err) + } + + if !fds.empty() { + return fmt.Errorf("not all mount points were consumed, remaining: %v", fds) + } + return nil +} + // mountSubmount mounts volumes inside the container's root. Because mounts may // be readonly, a lower ramfs overlay is added to create the mount point dir. // Another overlay is added with tmpfs on top if Config.Overlay is true. @@ -453,11 +448,27 @@ func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser) renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount) // Add submounts. + var tmpMounted bool for _, m := range compileMounts(spec) { if err := addRestoreMount(conf, renv, m, fds); err != nil { return nil, err } + if filepath.Clean(m.Destination) == "/tmp" { + tmpMounted = true + } } + + // TODO: handle '/tmp' properly (see mountTmp()). + if !tmpMounted { + tmpMount := specs.Mount{ + Type: tmpfs, + Destination: "/tmp", + } + if err := addRestoreMount(conf, renv, tmpMount, fds); err != nil { + return nil, err + } + } + return renv, nil } @@ -555,28 +566,13 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf mns := k.RootMountNamespace() if mns == nil { // Setup the root container. - - // Create the virtual filesystem. - mns, err := createMountNamespace(ctx, rootCtx, spec, conf, goferFDs) - if err != nil { - return fmt.Errorf("error creating mounts: %v", err) - } - k.SetRootMountNamespace(mns) - - // We're done with root container. - return nil + return setupRootContainerFS(ctx, rootCtx, spec, conf, goferFDs, func(mns *fs.MountNamespace) { + k.SetRootMountNamespace(mns) + }) } // Setup a child container. - - // Create the container's root filesystem mount. log.Infof("Creating new process in child container.") - fds := &fdDispenser{fds: append([]int{}, goferFDs...)} - rootInode, err := createRootMount(rootCtx, spec, conf, fds, nil) - if err != nil { - return fmt.Errorf("error creating filesystem for container: %v", err) - } - globalRoot := mns.Root() defer globalRoot.DecRef() @@ -595,6 +591,13 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf } defer containerRoot.DecRef() + // Create the container's root filesystem mount. + fds := &fdDispenser{fds: goferFDs} + rootInode, err := createRootMount(rootCtx, spec, conf, fds, nil) + if err != nil { + return fmt.Errorf("error creating filesystem for container: %v", err) + } + // Mount the container's root filesystem to the newly created mount point. if err := mns.Mount(ctx, containerRoot, rootInode); err != nil { return fmt.Errorf("mount container root: %v", err) @@ -606,20 +609,20 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf if err != nil { return fmt.Errorf("find container mount point %q: %v", cid, err) } + cu := specutils.MakeCleanup(func() { containerRoot.DecRef() }) + defer cu.Clean() log.Infof("Mounted child's root fs to %q", filepath.Join(ChildContainersDir, cid)) + // Set process root here, so 'rootCtx.Value(CtxRoot)' will return it. + procArgs.Root = containerRoot + // Mount all submounts. mounts := compileMounts(spec) - for _, m := range mounts { - if err := mountSubmount(rootCtx, conf, k.RootMountNamespace(), containerRoot, fds, m, mounts); err != nil { - containerRoot.DecRef() - return fmt.Errorf("error mounting filesystem for container: %v", err) - } + if err := mountSubmounts(rootCtx, conf, mns, containerRoot, mounts, fds); err != nil { + return err } - - // Set the procArgs root directory. - procArgs.Root = containerRoot + cu.Release() return nil } @@ -705,3 +708,58 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error return nil } + +// mountTmp mounts an internal tmpfs at '/tmp' if it's safe to do so. +// Technically we don't have to mount tmpfs at /tmp, as we could just rely on +// the host /tmp, but this is a nice optimization, and fixes some apps that call +// mknod in /tmp. It's unsafe to mount tmpfs if: +// 1. /tmp is mounted explictly: we should not override user's wish +// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp +// +// Note that when there are submounts inside of '/tmp', directories for the +// mount points must be present, making '/tmp' not empty anymore. +func mountTmp(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, fds *fdDispenser, mounts []specs.Mount) error { + for _, m := range mounts { + if filepath.Clean(m.Destination) == "/tmp" { + log.Debugf("Explict %q mount found, skipping internal tmpfs, mount: %+v", "/tmp", m) + return nil + } + } + + maxTraversals := uint(0) + tmp, err := mns.FindInode(ctx, root, root, "tmp", &maxTraversals) + switch err { + case nil: + // Found '/tmp' in filesystem, check if it's empty. + defer tmp.DecRef() + f, err := tmp.Inode.GetFile(ctx, tmp, fs.FileFlags{Read: true, Directory: true}) + if err != nil { + return err + } + defer f.DecRef() + serializer := &fs.CollectEntriesSerializer{} + if err := f.Readdir(ctx, serializer); err != nil { + return err + } + // If more than "." and ".." is found, skip internal tmpfs to prevent hiding + // existing files. + if len(serializer.Order) > 2 { + log.Infof("Skipping internal tmpfs on top %q, because it's not empty", "/tmp") + return nil + } + log.Infof("Mounting internal tmpfs on top of empty %q", "/tmp") + fallthrough + + case syserror.ENOENT: + // No '/tmp' found (or fallthrough from above). Safe to mount internal + // tmpfs. + tmpMount := specs.Mount{ + Type: tmpfs, + Destination: "/tmp", + } + return mountSubmount(ctx, conf, mns, root, fds, tmpMount, mounts) + + default: + return err + } +} diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 0ed3002e0..4fcc0faea 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -398,16 +398,21 @@ func TestCreateMountNamespace(t *testing.T) { } defer cleanup() - mm, err := createMountNamespace(ctx, ctx, &tc.spec, conf, []int{sandEnd}) - if err != nil { + // setupRootContainerFS needs to find root from the context after the + // namespace is created. + var mns *fs.MountNamespace + setMountNS := func(m *fs.MountNamespace) { + mns = m + ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root()) + } + if err := setupRootContainerFS(ctx, ctx, &tc.spec, conf, []int{sandEnd}, setMountNS); err != nil { t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err) } - defer mm.DecRef() - root := mm.Root() + root := mns.Root() defer root.DecRef() for _, p := range tc.expectedPaths { maxTraversals := uint(0) - if d, err := mm.FindInode(ctx, root, root, p, &maxTraversals); err != nil { + if d, err := mns.FindInode(ctx, root, root, p, &maxTraversals); err != nil { t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err) } else { d.DecRef() @@ -570,13 +575,13 @@ func TestRestoreEnvironment(t *testing.T) { }, "tmpfs": { { - Dev: "none", - }, - { Dev: "none", Flags: fs.MountSourceFlags{NoAtime: true}, Data: "uid=1022", }, + { + Dev: "none", + }, }, "devtmpfs": { { diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 6b3c41a9b..8490999ea 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -739,6 +739,11 @@ func TestMultiContainerGoferStop(t *testing.T) { t.Fatal("error finding test_app:", err) } + dir, err := ioutil.TempDir(testutil.TmpDir(), "gofer-stop-test") + if err != nil { + t.Fatal("ioutil.TempDir failed:", err) + } + // Setup containers. Root container just reaps children, while the others // perform some IOs. Children are executed in 3 batches of 10. Within the // batch there is overlap between containers starting and being destroyed. In @@ -746,7 +751,8 @@ func TestMultiContainerGoferStop(t *testing.T) { cmds := [][]string{{app, "reaper"}} const batchSize = 10 for i := 0; i < 3*batchSize; i++ { - cmds = append(cmds, []string{"sh", "-c", "find /bin -type f | head | xargs -I SRC cp SRC /tmp/output"}) + cmd := "find /bin -type f | head | xargs -I SRC cp SRC " + dir + cmds = append(cmds, []string{"sh", "-c", cmd}) } allSpecs, allIDs := createSpecs(cmds...) diff --git a/runsc/test/integration/integration_test.go b/runsc/test/integration/integration_test.go index 526b3a7a1..4a2770d48 100644 --- a/runsc/test/integration/integration_test.go +++ b/runsc/test/integration/integration_test.go @@ -279,6 +279,19 @@ func TestJobControl(t *testing.T) { } } +// TestTmpFile checks that files inside '/tmp' are not overridden. In addition, +// it checks that working dir is created if it doesn't exit. +func TestTmpFile(t *testing.T) { + if err := testutil.Pull("alpine"); err != nil { + t.Fatal("docker pull failed:", err) + } + d := testutil.MakeDocker("tmp-file-test") + if err := d.Run("-w=/tmp/foo/bar", "--read-only", "alpine", "touch", "/tmp/foo/bar/file"); err != nil { + t.Fatal("docker run failed:", err) + } + defer d.CleanUp() +} + func TestMain(m *testing.M) { testutil.EnsureSupportedDockerVersion() os.Exit(m.Run()) diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 674e4b5b1..c46ac77f7 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -538,6 +538,7 @@ go_binary( "//runsc/specutils", "//runsc/test/testutil", "//test/syscalls/gtest", + "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go index 1f2ff9864..e5c2358a0 100644 --- a/test/syscalls/syscall_test_runner.go +++ b/test/syscalls/syscall_test_runner.go @@ -29,6 +29,7 @@ import ( "syscall" "testing" + specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/runsc/specutils" @@ -107,7 +108,12 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) { // Mark the root as writeable, as some tests attempt to // write to the rootfs, and expect EACCES, not EROFS. spec.Root.Readonly = false - spec.Mounts = nil + + // Forces '/tmp' to be mounted as tmpfs, otherwise test that rely on features + // available in gVisor's tmpfs and not gofers, may fail. + spec.Mounts = []specs.Mount{ + {Destination: "/tmp", Type: "tmpfs"}, + } // Set environment variable that indicates we are // running in gVisor and with the given platform. |