From f7428af9c11cd47e6252a3fbf24db411e513c241 Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Wed, 19 Jun 2019 09:20:10 -0700 Subject: Add MountNamespace to task. This allows tasks to have distinct mount namespace, instead of all sharing the kernel's root mount namespace. Currently, the only way for a task to get a different mount namespace than the kernel's root is by explicitly setting a different MountNamespace in CreateProcessArgs, and nothing does this (yet). In a follow-up CL, we will set CreateProcessArgs.MountNamespace when creating a new container inside runsc. Note that "MountNamespace" is a poor term for this thing. It's more like a distinct VFS tree. When we get around to adding real mount namespaces, this will need a better naem. PiperOrigin-RevId: 254009310 --- pkg/sentry/kernel/kernel.go | 49 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'pkg/sentry/kernel/kernel.go') diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 07ae592c4..9fe9eb914 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -381,9 +381,23 @@ func (k *Kernel) SaveTo(w io.Writer) error { // flushMountSourceRefs flushes the MountSources for all mounted filesystems // and open FDs. func (k *Kernel) flushMountSourceRefs() error { - // Flush all mount sources for currently mounted filesystems. + // Flush all mount sources for currently mounted filesystems in the + // root mount namespace. k.mounts.FlushMountSourceRefs() + // Some tasks may have other mount namespaces; flush those as well. + flushed := make(map[*fs.MountNamespace]struct{}) + k.tasks.mu.RLock() + k.tasks.forEachThreadGroupLocked(func(tg *ThreadGroup) { + if _, ok := flushed[tg.mounts]; ok { + // Already flushed. + return + } + tg.mounts.FlushMountSourceRefs() + flushed[tg.mounts] = struct{}{} + }) + k.tasks.mu.RUnlock() + // There may be some open FDs whose filesystems have been unmounted. We // must flush those as well. return k.tasks.forEachFDPaused(func(desc descriptor) error { @@ -611,12 +625,18 @@ type CreateProcessArgs struct { // AbstractSocketNamespace is the initial Abstract Socket namespace. AbstractSocketNamespace *AbstractSocketNamespace + // MountNamespace optionally contains the mount namespace for this + // process. If nil, the kernel's mount namespace is used. + // + // Anyone setting MountNamespace must donate a reference (i.e. + // increment it). + MountNamespace *fs.MountNamespace + // Root optionally contains the dirent that serves as the root for the // process. If nil, the mount namespace's root is used as the process' // root. // - // Anyone setting Root must donate a reference (i.e. increment it) to - // keep it alive until it is decremented by CreateProcess. + // Anyone setting Root must donate a reference (i.e. increment it). Root *fs.Dirent // ContainerID is the container that the process belongs to. @@ -715,20 +735,29 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, return nil, 0, fmt.Errorf("no kernel MountNamespace") } - tg := k.newThreadGroup(k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) + // Grab the mount namespace. + mounts := args.MountNamespace + if mounts == nil { + // If no MountNamespace was configured, then use the kernel's + // root mount namespace, with an extra reference that will be + // donated to the task. + mounts = k.mounts + mounts.IncRef() + } + + tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) ctx := args.NewContext(k) // Grab the root directory. root := args.Root if root == nil { - root = fs.RootFromContext(ctx) - // Is the root STILL nil? - if root == nil { - return nil, 0, fmt.Errorf("CreateProcessArgs.Root was not provided, and failed to get root from context") - } + // If no Root was configured, then get it from the + // MountNamespace. + root = mounts.Root() } + // The call to newFSContext below will take a reference on root, so we + // don't need to hold this one. defer root.DecRef() - args.Root = nil // Grab the working directory. remainingTraversals := uint(args.MaxSymlinkTraversals) -- cgit v1.2.3