summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2019-01-16 12:47:21 -0800
committerShentubot <shentubot@google.com>2019-01-16 12:48:32 -0800
commite4d3ca7263291b43cdc49c7553c62608be062cd9 (patch)
tree47b8dee17087a36e1fc34c8acc48c798f2d2f383
parent92cf3764e032740f0c84a1b242c54b99f45a6bf0 (diff)
Prevent internal tmpfs mount to override files in /tmp
Runsc wants to mount /tmp using internal tmpfs implementation for performance. However, it risks hiding files that may exist under /tmp in case it's present in the container. Now, it only mounts over /tmp iff: - /tmp was not explicitly asked to be mounted - /tmp is empty If any of this is not true, then /tmp maps to the container's image /tmp. Note: checkpoint doesn't have sentry FS mounted to check if /tmp is empty. It simply looks for explicit mounts right now. PiperOrigin-RevId: 229607856 Change-Id: I10b6dae7ac157ef578efc4dfceb089f3b94cde06
-rw-r--r--runsc/boot/fs.go174
-rw-r--r--runsc/boot/loader_test.go21
-rw-r--r--runsc/container/multi_container_test.go8
-rw-r--r--runsc/test/integration/integration_test.go13
-rw-r--r--test/syscalls/BUILD1
-rw-r--r--test/syscalls/syscall_test_runner.go8
6 files changed, 157 insertions, 68 deletions
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 942315d6e..e0c8291ac 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -81,9 +81,11 @@ func (f *fdDispenser) empty() bool {
return len(f.fds) == 0
}
-// createMountNamespace creates a mount namespace containing the root filesystem
+// setupRootContainerFS creates a mount namespace containing the root filesystem
// and all mounts. 'rootCtx' is used to walk directories to find mount points.
-func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec *specs.Spec, conf *Config, goferFDs []int) (*fs.MountNamespace, error) {
+// 'setMountNS' is called after namespace is created. It must set the mount NS
+// to 'rootCtx'.
+func setupRootContainerFS(userCtx context.Context, rootCtx context.Context, spec *specs.Spec, conf *Config, goferFDs []int, setMountNS func(*fs.MountNamespace)) error {
mounts := compileMounts(spec)
// Create a tmpfs mount where we create and mount a root filesystem for
@@ -96,32 +98,24 @@ func createMountNamespace(userCtx context.Context, rootCtx context.Context, spec
fds := &fdDispenser{fds: goferFDs}
rootInode, err := createRootMount(rootCtx, spec, conf, fds, mounts)
if err != nil {
- return nil, fmt.Errorf("failed to create root mount: %v", err)
+ return fmt.Errorf("failed to create root mount: %v", err)
}
mns, err := fs.NewMountNamespace(userCtx, rootInode)
if err != nil {
- return nil, fmt.Errorf("failed to create root mount namespace: %v", err)
+ return fmt.Errorf("failed to create root mount namespace: %v", err)
}
+ setMountNS(mns)
root := mns.Root()
defer root.DecRef()
- for _, m := range mounts {
- if err := mountSubmount(rootCtx, conf, mns, root, fds, m, mounts); err != nil {
- return nil, fmt.Errorf("mount submount: %v", err)
- }
- }
-
- if !fds.empty() {
- return nil, fmt.Errorf("not all mount points were consumed, remaining: %v", fds)
- }
- return mns, nil
+ return mountSubmounts(rootCtx, conf, mns, root, mounts, fds)
}
// compileMounts returns the supported mounts from the mount spec, adding any
// mandatory mounts that are required by the OCI specification.
func compileMounts(spec *specs.Spec) []specs.Mount {
- // Keep track of whether proc, sys, and tmp were mounted.
- var procMounted, sysMounted, tmpMounted bool
+ // Keep track of whether proc and sys were mounted.
+ var procMounted, sysMounted bool
var mounts []specs.Mount
// Always mount /dev.
@@ -147,8 +141,6 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
procMounted = true
case "/sys":
sysMounted = true
- case "/tmp":
- tmpMounted = true
}
}
@@ -168,20 +160,6 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
})
}
- // Technically we don't have to mount tmpfs at /tmp, as we could just
- // rely on the host /tmp, but this is a nice optimization, and fixes
- // some apps that call mknod in /tmp.
- if !tmpMounted {
- // TODO: If the host /tmp (or a mount at /tmp) has
- // files in it, we should overlay our tmpfs implementation over
- // that. Until then, the /tmp mount will always appear empty at
- // container creation.
- mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: tmpfs,
- Destination: "/tmp",
- })
- }
-
// The mandatory mounts should be ordered right after the root, in case
// there are submounts of these mandatory mounts already in the spec.
mounts = append(mounts[:0], append(mandatoryMounts, mounts[0:]...)...)
@@ -288,6 +266,23 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri
return fsName, opts, useOverlay, err
}
+func mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, mounts []specs.Mount, fds *fdDispenser) error {
+ for _, m := range mounts {
+ if err := mountSubmount(ctx, conf, mns, root, fds, m, mounts); err != nil {
+ return fmt.Errorf("mount submount %q: %v", m.Destination, err)
+ }
+ }
+
+ if err := mountTmp(ctx, conf, mns, root, fds, mounts); err != nil {
+ return fmt.Errorf("mount submount %q: %v", "tmp", err)
+ }
+
+ if !fds.empty() {
+ return fmt.Errorf("not all mount points were consumed, remaining: %v", fds)
+ }
+ return nil
+}
+
// mountSubmount mounts volumes inside the container's root. Because mounts may
// be readonly, a lower ramfs overlay is added to create the mount point dir.
// Another overlay is added with tmpfs on top if Config.Overlay is true.
@@ -453,11 +448,27 @@ func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser)
renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount)
// Add submounts.
+ var tmpMounted bool
for _, m := range compileMounts(spec) {
if err := addRestoreMount(conf, renv, m, fds); err != nil {
return nil, err
}
+ if filepath.Clean(m.Destination) == "/tmp" {
+ tmpMounted = true
+ }
}
+
+ // TODO: handle '/tmp' properly (see mountTmp()).
+ if !tmpMounted {
+ tmpMount := specs.Mount{
+ Type: tmpfs,
+ Destination: "/tmp",
+ }
+ if err := addRestoreMount(conf, renv, tmpMount, fds); err != nil {
+ return nil, err
+ }
+ }
+
return renv, nil
}
@@ -555,28 +566,13 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf
mns := k.RootMountNamespace()
if mns == nil {
// Setup the root container.
-
- // Create the virtual filesystem.
- mns, err := createMountNamespace(ctx, rootCtx, spec, conf, goferFDs)
- if err != nil {
- return fmt.Errorf("error creating mounts: %v", err)
- }
- k.SetRootMountNamespace(mns)
-
- // We're done with root container.
- return nil
+ return setupRootContainerFS(ctx, rootCtx, spec, conf, goferFDs, func(mns *fs.MountNamespace) {
+ k.SetRootMountNamespace(mns)
+ })
}
// Setup a child container.
-
- // Create the container's root filesystem mount.
log.Infof("Creating new process in child container.")
- fds := &fdDispenser{fds: append([]int{}, goferFDs...)}
- rootInode, err := createRootMount(rootCtx, spec, conf, fds, nil)
- if err != nil {
- return fmt.Errorf("error creating filesystem for container: %v", err)
- }
-
globalRoot := mns.Root()
defer globalRoot.DecRef()
@@ -595,6 +591,13 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf
}
defer containerRoot.DecRef()
+ // Create the container's root filesystem mount.
+ fds := &fdDispenser{fds: goferFDs}
+ rootInode, err := createRootMount(rootCtx, spec, conf, fds, nil)
+ if err != nil {
+ return fmt.Errorf("error creating filesystem for container: %v", err)
+ }
+
// Mount the container's root filesystem to the newly created mount point.
if err := mns.Mount(ctx, containerRoot, rootInode); err != nil {
return fmt.Errorf("mount container root: %v", err)
@@ -606,20 +609,20 @@ func setupContainerFS(procArgs *kernel.CreateProcessArgs, spec *specs.Spec, conf
if err != nil {
return fmt.Errorf("find container mount point %q: %v", cid, err)
}
+ cu := specutils.MakeCleanup(func() { containerRoot.DecRef() })
+ defer cu.Clean()
log.Infof("Mounted child's root fs to %q", filepath.Join(ChildContainersDir, cid))
+ // Set process root here, so 'rootCtx.Value(CtxRoot)' will return it.
+ procArgs.Root = containerRoot
+
// Mount all submounts.
mounts := compileMounts(spec)
- for _, m := range mounts {
- if err := mountSubmount(rootCtx, conf, k.RootMountNamespace(), containerRoot, fds, m, mounts); err != nil {
- containerRoot.DecRef()
- return fmt.Errorf("error mounting filesystem for container: %v", err)
- }
+ if err := mountSubmounts(rootCtx, conf, mns, containerRoot, mounts, fds); err != nil {
+ return err
}
-
- // Set the procArgs root directory.
- procArgs.Root = containerRoot
+ cu.Release()
return nil
}
@@ -705,3 +708,58 @@ func destroyContainerFS(ctx context.Context, cid string, k *kernel.Kernel) error
return nil
}
+
+// mountTmp mounts an internal tmpfs at '/tmp' if it's safe to do so.
+// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
+// the host /tmp, but this is a nice optimization, and fixes some apps that call
+// mknod in /tmp. It's unsafe to mount tmpfs if:
+// 1. /tmp is mounted explictly: we should not override user's wish
+// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp
+//
+// Note that when there are submounts inside of '/tmp', directories for the
+// mount points must be present, making '/tmp' not empty anymore.
+func mountTmp(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent, fds *fdDispenser, mounts []specs.Mount) error {
+ for _, m := range mounts {
+ if filepath.Clean(m.Destination) == "/tmp" {
+ log.Debugf("Explict %q mount found, skipping internal tmpfs, mount: %+v", "/tmp", m)
+ return nil
+ }
+ }
+
+ maxTraversals := uint(0)
+ tmp, err := mns.FindInode(ctx, root, root, "tmp", &maxTraversals)
+ switch err {
+ case nil:
+ // Found '/tmp' in filesystem, check if it's empty.
+ defer tmp.DecRef()
+ f, err := tmp.Inode.GetFile(ctx, tmp, fs.FileFlags{Read: true, Directory: true})
+ if err != nil {
+ return err
+ }
+ defer f.DecRef()
+ serializer := &fs.CollectEntriesSerializer{}
+ if err := f.Readdir(ctx, serializer); err != nil {
+ return err
+ }
+ // If more than "." and ".." is found, skip internal tmpfs to prevent hiding
+ // existing files.
+ if len(serializer.Order) > 2 {
+ log.Infof("Skipping internal tmpfs on top %q, because it's not empty", "/tmp")
+ return nil
+ }
+ log.Infof("Mounting internal tmpfs on top of empty %q", "/tmp")
+ fallthrough
+
+ case syserror.ENOENT:
+ // No '/tmp' found (or fallthrough from above). Safe to mount internal
+ // tmpfs.
+ tmpMount := specs.Mount{
+ Type: tmpfs,
+ Destination: "/tmp",
+ }
+ return mountSubmount(ctx, conf, mns, root, fds, tmpMount, mounts)
+
+ default:
+ return err
+ }
+}
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 0ed3002e0..4fcc0faea 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -398,16 +398,21 @@ func TestCreateMountNamespace(t *testing.T) {
}
defer cleanup()
- mm, err := createMountNamespace(ctx, ctx, &tc.spec, conf, []int{sandEnd})
- if err != nil {
+ // setupRootContainerFS needs to find root from the context after the
+ // namespace is created.
+ var mns *fs.MountNamespace
+ setMountNS := func(m *fs.MountNamespace) {
+ mns = m
+ ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root())
+ }
+ if err := setupRootContainerFS(ctx, ctx, &tc.spec, conf, []int{sandEnd}, setMountNS); err != nil {
t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err)
}
- defer mm.DecRef()
- root := mm.Root()
+ root := mns.Root()
defer root.DecRef()
for _, p := range tc.expectedPaths {
maxTraversals := uint(0)
- if d, err := mm.FindInode(ctx, root, root, p, &maxTraversals); err != nil {
+ if d, err := mns.FindInode(ctx, root, root, p, &maxTraversals); err != nil {
t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err)
} else {
d.DecRef()
@@ -570,13 +575,13 @@ func TestRestoreEnvironment(t *testing.T) {
},
"tmpfs": {
{
- Dev: "none",
- },
- {
Dev: "none",
Flags: fs.MountSourceFlags{NoAtime: true},
Data: "uid=1022",
},
+ {
+ Dev: "none",
+ },
},
"devtmpfs": {
{
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 6b3c41a9b..8490999ea 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -739,6 +739,11 @@ func TestMultiContainerGoferStop(t *testing.T) {
t.Fatal("error finding test_app:", err)
}
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "gofer-stop-test")
+ if err != nil {
+ t.Fatal("ioutil.TempDir failed:", err)
+ }
+
// Setup containers. Root container just reaps children, while the others
// perform some IOs. Children are executed in 3 batches of 10. Within the
// batch there is overlap between containers starting and being destroyed. In
@@ -746,7 +751,8 @@ func TestMultiContainerGoferStop(t *testing.T) {
cmds := [][]string{{app, "reaper"}}
const batchSize = 10
for i := 0; i < 3*batchSize; i++ {
- cmds = append(cmds, []string{"sh", "-c", "find /bin -type f | head | xargs -I SRC cp SRC /tmp/output"})
+ cmd := "find /bin -type f | head | xargs -I SRC cp SRC " + dir
+ cmds = append(cmds, []string{"sh", "-c", cmd})
}
allSpecs, allIDs := createSpecs(cmds...)
diff --git a/runsc/test/integration/integration_test.go b/runsc/test/integration/integration_test.go
index 526b3a7a1..4a2770d48 100644
--- a/runsc/test/integration/integration_test.go
+++ b/runsc/test/integration/integration_test.go
@@ -279,6 +279,19 @@ func TestJobControl(t *testing.T) {
}
}
+// TestTmpFile checks that files inside '/tmp' are not overridden. In addition,
+// it checks that working dir is created if it doesn't exit.
+func TestTmpFile(t *testing.T) {
+ if err := testutil.Pull("alpine"); err != nil {
+ t.Fatal("docker pull failed:", err)
+ }
+ d := testutil.MakeDocker("tmp-file-test")
+ if err := d.Run("-w=/tmp/foo/bar", "--read-only", "alpine", "touch", "/tmp/foo/bar/file"); err != nil {
+ t.Fatal("docker run failed:", err)
+ }
+ defer d.CleanUp()
+}
+
func TestMain(m *testing.M) {
testutil.EnsureSupportedDockerVersion()
os.Exit(m.Run())
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 674e4b5b1..c46ac77f7 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -538,6 +538,7 @@ go_binary(
"//runsc/specutils",
"//runsc/test/testutil",
"//test/syscalls/gtest",
+ "@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/test/syscalls/syscall_test_runner.go b/test/syscalls/syscall_test_runner.go
index 1f2ff9864..e5c2358a0 100644
--- a/test/syscalls/syscall_test_runner.go
+++ b/test/syscalls/syscall_test_runner.go
@@ -29,6 +29,7 @@ import (
"syscall"
"testing"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/runsc/specutils"
@@ -107,7 +108,12 @@ func runTestCaseRunsc(testBin string, tc gtest.TestCase, t *testing.T) {
// Mark the root as writeable, as some tests attempt to
// write to the rootfs, and expect EACCES, not EROFS.
spec.Root.Readonly = false
- spec.Mounts = nil
+
+ // Forces '/tmp' to be mounted as tmpfs, otherwise test that rely on features
+ // available in gVisor's tmpfs and not gofers, may fail.
+ spec.Mounts = []specs.Mount{
+ {Destination: "/tmp", Type: "tmpfs"},
+ }
// Set environment variable that indicates we are
// running in gVisor and with the given platform.