diff options
-rw-r--r-- | pkg/sentry/fs/mounts.go | 3 | ||||
-rw-r--r-- | runsc/boot/fs.go | 159 | ||||
-rw-r--r-- | runsc/boot/loader.go | 17 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 17 | ||||
-rw-r--r-- | runsc/boot/user_test.go | 12 | ||||
-rw-r--r-- | runsc/container/container_test.go | 16 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 55 |
7 files changed, 179 insertions, 100 deletions
diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index 9b713e785..ac0398bd9 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -171,8 +171,6 @@ type MountNamespace struct { // NewMountNamespace returns a new MountNamespace, with the provided node at the // root, and the given cache size. A root must always be provided. func NewMountNamespace(ctx context.Context, root *Inode) (*MountNamespace, error) { - creds := auth.CredentialsFromContext(ctx) - // Set the root dirent and id on the root mount. The reference returned from // NewDirent will be donated to the MountNamespace constructed below. d := NewDirent(ctx, root, "/") @@ -181,6 +179,7 @@ func NewMountNamespace(ctx context.Context, root *Inode) (*MountNamespace, error d: newRootMount(1, d), } + creds := auth.CredentialsFromContext(ctx) mns := MountNamespace{ userns: creds.UserNamespace, root: d, diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index b6eeacf98..34c674840 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -25,19 +25,21 @@ import ( // Include filesystem types that OCI spec might mount. _ "gvisor.dev/gvisor/pkg/sentry/fs/dev" - "gvisor.dev/gvisor/pkg/sentry/fs/gofer" _ "gvisor.dev/gvisor/pkg/sentry/fs/host" _ "gvisor.dev/gvisor/pkg/sentry/fs/proc" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" _ "gvisor.dev/gvisor/pkg/sentry/fs/sys" _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" _ "gvisor.dev/gvisor/pkg/sentry/fs/tty" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/gofer" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/runsc/specutils" ) @@ -261,6 +263,18 @@ func subtargets(root string, mnts []specs.Mount) []string { return targets } +func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { + mns, err := mntr.setupFS(conf, procArgs) + if err != nil { + return err + } + + // Set namespace here so that it can be found in ctx. + procArgs.MountNamespace = mns + + return setExecutablePath(ctx, procArgs) +} + // setExecutablePath sets the procArgs.Filename by searching the PATH for an // executable matching the procArgs.Argv[0]. func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { @@ -500,73 +514,95 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin } } -// setupChildContainer is used to set up the file system for non-root containers -// and amend the procArgs accordingly. This is the main entry point for this -// rest of functions in this file. procArgs are passed by reference and the -// FDMap field is modified. It dups stdioFDs. -func (c *containerMounter) setupChildContainer(conf *Config, procArgs *kernel.CreateProcessArgs) error { - // Setup a child container. - log.Infof("Creating new process in child container.") - - // Create a new root inode and mount namespace for the container. - rootCtx := c.k.SupervisorContext() - rootInode, err := c.createRootMount(rootCtx, conf) - if err != nil { - return fmt.Errorf("creating filesystem for container: %v", err) +// processHints processes annotations that container hints about how volumes +// should be mounted (e.g. a volume shared between containers). It must be +// called for the root container only. +func (c *containerMounter) processHints(conf *Config) error { + ctx := c.k.SupervisorContext() + for _, hint := range c.hints.mounts { + log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) + inode, err := c.mountSharedMaster(ctx, conf, hint) + if err != nil { + return fmt.Errorf("mounting shared master %q: %v", hint.name, err) + } + hint.root = inode } - mns, err := fs.NewMountNamespace(rootCtx, rootInode) + return nil +} + +// setupFS is used to set up the file system for all containers. This is the +// main entry point method, with most of the other being internal only. It +// returns the mount namespace that is created for the container. +func (c *containerMounter) setupFS(conf *Config, procArgs *kernel.CreateProcessArgs) (*fs.MountNamespace, error) { + log.Infof("Configuring container's file system") + + // Create context with root credentials to mount the filesystem (the current + // user may not be privileged enough). + rootProcArgs := *procArgs + rootProcArgs.WorkingDirectory = "/" + rootProcArgs.Credentials = auth.NewRootCredentials(procArgs.Credentials.UserNamespace) + rootProcArgs.Umask = 0022 + rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals + rootCtx := rootProcArgs.NewContext(c.k) + + mns, err := c.createMountNamespace(rootCtx, conf) if err != nil { - return fmt.Errorf("creating new mount namespace for container: %v", err) + return nil, err } - procArgs.MountNamespace = mns - root := mns.Root() - defer root.DecRef() - // Mount all submounts. - if err := c.mountSubmounts(rootCtx, conf, mns, root); err != nil { - return err + // Set namespace here so that it can be found in rootCtx. + rootProcArgs.MountNamespace = mns + + if err := c.mountSubmounts(rootCtx, conf, mns); err != nil { + return nil, err } - return c.checkDispenser() + return mns, nil } -func (c *containerMounter) checkDispenser() error { - if !c.fds.empty() { - return fmt.Errorf("not all gofer FDs were consumed, remaining: %v", c.fds) +func (c *containerMounter) createMountNamespace(ctx context.Context, conf *Config) (*fs.MountNamespace, error) { + rootInode, err := c.createRootMount(ctx, conf) + if err != nil { + return nil, fmt.Errorf("creating filesystem for container: %v", err) } - return nil + mns, err := fs.NewMountNamespace(ctx, rootInode) + if err != nil { + return nil, fmt.Errorf("creating new mount namespace for container: %v", err) + } + return mns, nil } -// setupRootContainer creates a mount namespace containing the root filesystem -// and all mounts. 'rootCtx' is used to walk directories to find mount points. -// The 'setMountNS' callback is called after the mount namespace is created and -// will get a reference on that namespace. The callback must ensure that the -// rootCtx has the provided mount namespace. -func (c *containerMounter) setupRootContainer(userCtx context.Context, rootCtx context.Context, conf *Config, setMountNS func(*fs.MountNamespace)) error { - for _, hint := range c.hints.mounts { - log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) - inode, err := c.mountSharedMaster(rootCtx, conf, hint) - if err != nil { - return fmt.Errorf("mounting shared master %q: %v", hint.name, err) +func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace) error { + root := mns.Root() + defer root.DecRef() + + for _, m := range c.mounts { + log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options) + if hint := c.hints.findMount(m); hint != nil && hint.isSupported() { + if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil { + return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err) + } + } else { + if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil { + return fmt.Errorf("mount submount %q: %v", m.Destination, err) + } } - hint.root = inode } - rootInode, err := c.createRootMount(rootCtx, conf) - if err != nil { - return fmt.Errorf("creating root mount: %v", err) + if err := c.mountTmp(ctx, conf, mns, root); err != nil { + return fmt.Errorf("mount submount %q: %v", "tmp", err) } - mns, err := fs.NewMountNamespace(userCtx, rootInode) - if err != nil { - return fmt.Errorf("creating root mount namespace: %v", err) + + if err := c.checkDispenser(); err != nil { + return err } - setMountNS(mns) + return nil +} - root := mns.Root() - defer root.DecRef() - if err := c.mountSubmounts(rootCtx, conf, mns, root); err != nil { - return fmt.Errorf("mounting submounts: %v", err) +func (c *containerMounter) checkDispenser() error { + if !c.fds.empty() { + return fmt.Errorf("not all gofer FDs were consumed, remaining: %v", c.fds) } - return c.checkDispenser() + return nil } // mountSharedMaster mounts the master of a volume that is shared among @@ -684,25 +720,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( return fsName, opts, useOverlay, err } -func (c *containerMounter) mountSubmounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, root *fs.Dirent) error { - for _, m := range c.mounts { - if hint := c.hints.findMount(m); hint != nil && hint.isSupported() { - if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil { - return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, m.Destination, err) - } - } else { - if err := c.mountSubmount(ctx, conf, mns, root, m); err != nil { - return fmt.Errorf("mount submount %q: %v", m.Destination, err) - } - } - } - - if err := c.mountTmp(ctx, conf, mns, root); err != nil { - return fmt.Errorf("mount submount %q: %v", "tmp", err) - } - return nil -} - // mountSubmount mounts volumes inside the container's root. Because mounts may // be readonly, a lower ramfs overlay is added to create the mount point dir. // Another overlay is added with tmpfs on top if Config.Overlay is true. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index f91158027..02dd080fe 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -527,14 +527,12 @@ func (l *Loader) run() error { // Setup the root container file system. l.startGoferMonitor(l.sandboxID, l.goferFDs) + mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints) - if err := mntr.setupRootContainer(ctx, ctx, l.conf, func(mns *fs.MountNamespace) { - l.rootProcArgs.MountNamespace = mns - }); err != nil { + if err := mntr.processHints(l.conf); err != nil { return err } - - if err := setExecutablePath(ctx, &l.rootProcArgs); err != nil { + if err := setupContainerFS(ctx, l.conf, mntr, &l.rootProcArgs); err != nil { return err } @@ -687,13 +685,10 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file // Setup the child container file system. l.startGoferMonitor(cid, goferFDs) - mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints) - if err := mntr.setupChildContainer(conf, &procArgs); err != nil { - return fmt.Errorf("configuring container FS: %v", err) - } - if err := setExecutablePath(ctx, &procArgs); err != nil { - return fmt.Errorf("setting executable path for %+v: %v", procArgs, err) + mntr := newContainerMounter(spec, goferFDs, l.k, l.mountHints) + if err := setupContainerFS(ctx, conf, mntr, &procArgs); err != nil { + return err } // Create and start the new process. diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index e0e32b9d5..147ff7703 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -401,17 +401,16 @@ func TestCreateMountNamespace(t *testing.T) { } defer cleanup() - // setupRootContainer needs to find root from the context after the - // namespace is created. - var mns *fs.MountNamespace - setMountNS := func(m *fs.MountNamespace) { - mns = m - ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root()) - } mntr := newContainerMounter(&tc.spec, []int{sandEnd}, nil, &podMountHints{}) - if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil { - t.Fatalf("createMountNamespace test case %q failed: %v", tc.name, err) + mns, err := mntr.createMountNamespace(ctx, conf) + if err != nil { + t.Fatalf("failed to create mount namespace: %v", err) } + ctx = fs.WithRoot(ctx, mns.Root()) + if err := mntr.mountSubmounts(ctx, conf, mns); err != nil { + t.Fatalf("failed to create mount namespace: %v", err) + } + root := mns.Root() defer root.DecRef() for _, p := range tc.expectedPaths { diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go index 01f666507..906baf3e5 100644 --- a/runsc/boot/user_test.go +++ b/runsc/boot/user_test.go @@ -164,13 +164,13 @@ func TestGetExecUserHome(t *testing.T) { }, } - var mns *fs.MountNamespace - setMountNS := func(m *fs.MountNamespace) { - mns = m - ctx.(*contexttest.TestContext).RegisterValue(fs.CtxRoot, mns.Root()) - } mntr := newContainerMounter(spec, []int{sandEnd}, nil, &podMountHints{}) - if err := mntr.setupRootContainer(ctx, ctx, conf, setMountNS); err != nil { + mns, err := mntr.createMountNamespace(ctx, conf) + if err != nil { + t.Fatalf("failed to create mount namespace: %v", err) + } + ctx = fs.WithRoot(ctx, mns.Root()) + if err := mntr.mountSubmounts(ctx, conf, mns); err != nil { t.Fatalf("failed to create mount namespace: %v", err) } diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index af128bf1c..3d4f304f3 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -1310,10 +1310,13 @@ func TestRunNonRoot(t *testing.T) { t.Logf("Running test with conf: %+v", conf) spec := testutil.NewSpecWithArgs("/bin/true") + + // Set a random user/group with no access to "blocked" dir. spec.Process.User.UID = 343 spec.Process.User.GID = 2401 + spec.Process.Capabilities = nil - // User that container runs as can't list '$TMP/blocked' and would fail to + // User running inside container can't list '$TMP/blocked' and would fail to // mount it. dir, err := ioutil.TempDir(testutil.TmpDir(), "blocked") if err != nil { @@ -1327,6 +1330,17 @@ func TestRunNonRoot(t *testing.T) { t.Fatalf("os.MkDir(%q) failed: %v", dir, err) } + src, err := ioutil.TempDir(testutil.TmpDir(), "src") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: src, + Type: "bind", + }) + if err := run(spec, conf); err != nil { t.Fatalf("error running sandbox: %v", err) } diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 2d51fecc6..ae03d24b4 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -1485,3 +1485,58 @@ func TestMultiContainerLoadSandbox(t *testing.T) { t.Errorf("containers not found: %v", wantIDs) } } + +// TestMultiContainerRunNonRoot checks that child container can be configured +// when running as non-privileged user. +func TestMultiContainerRunNonRoot(t *testing.T) { + cmdRoot := []string{"/bin/sleep", "100"} + cmdSub := []string{"/bin/true"} + podSpecs, ids := createSpecs(cmdRoot, cmdSub) + + // User running inside container can't list '$TMP/blocked' and would fail to + // mount it. + blocked, err := ioutil.TempDir(testutil.TmpDir(), "blocked") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + if err := os.Chmod(blocked, 0700); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", blocked, err) + } + dir := path.Join(blocked, "test") + if err := os.Mkdir(dir, 0755); err != nil { + t.Fatalf("os.MkDir(%q) failed: %v", dir, err) + } + + src, err := ioutil.TempDir(testutil.TmpDir(), "src") + if err != nil { + t.Fatalf("ioutil.TempDir() failed: %v", err) + } + + // Set a random user/group with no access to "blocked" dir. + podSpecs[1].Process.User.UID = 343 + podSpecs[1].Process.User.GID = 2401 + podSpecs[1].Process.Capabilities = nil + + podSpecs[1].Mounts = append(podSpecs[1].Mounts, specs.Mount{ + Destination: dir, + Source: src, + Type: "bind", + }) + + conf := testutil.TestConfig() + pod, cleanup, err := startContainers(conf, podSpecs, ids) + if err != nil { + t.Fatalf("error starting containers: %v", err) + } + defer cleanup() + + // Once all containers are started, wait for the child container to exit. + // This means that the volume was mounted properly. + ws, err := pod[1].Wait() + if err != nil { + t.Fatalf("running child container: %v", err) + } + if !ws.Exited() || ws.ExitStatus() != 0 { + t.Fatalf("child container failed, waitStatus: %v", ws) + } +} |