From aaaefdf9cadf033fa281b612315c3227f5ab1c7a Mon Sep 17 00:00:00 2001 From: Nicolas Lacasse Date: Fri, 2 Aug 2019 11:21:50 -0700 Subject: Remove kernel.mounts. We can get the mount namespace from the CreateProcessArgs in all cases where we need it. This also gets rid of kernel.Destroy method, since the only thing it was doing was DecRefing the mounts. Removing the need to call kernel.SetRootMountNamespace also allowed for some more simplifications in the container fs setup code. PiperOrigin-RevId: 261357060 --- pkg/sentry/control/proc.go | 17 ++------- pkg/sentry/fs/context.go | 24 +++++++++++++ pkg/sentry/fs/mounts.go | 11 ++++++ pkg/sentry/kernel/kernel.go | 86 ++++++++------------------------------------- 4 files changed, 53 insertions(+), 85 deletions(-) (limited to 'pkg') diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 3f9772b87..c35faeb4c 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -56,15 +56,10 @@ type ExecArgs struct { // MountNamespace is the mount namespace to execute the new process in. // A reference on MountNamespace must be held for the lifetime of the - // ExecArgs. If MountNamespace is nil, it will default to the kernel's - // root MountNamespace. + // ExecArgs. If MountNamespace is nil, it will default to the init + // process's MountNamespace. MountNamespace *fs.MountNamespace - // Root defines the root directory for the new process. A reference on - // Root must be held for the lifetime of the ExecArgs. If Root is nil, - // it will default to the VFS root. - Root *fs.Dirent - // WorkingDirectory defines the working directory for the new process. WorkingDirectory string `json:"wd"` @@ -155,7 +150,6 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI Envv: args.Envv, WorkingDirectory: args.WorkingDirectory, MountNamespace: args.MountNamespace, - Root: args.Root, Credentials: creds, FDTable: fdTable, Umask: 0022, @@ -167,11 +161,6 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI ContainerID: args.ContainerID, PIDNamespace: args.PIDNamespace, } - if initArgs.Root != nil { - // initArgs must hold a reference on Root, which will be - // donated to the new process in CreateProcess. - initArgs.Root.IncRef() - } if initArgs.MountNamespace != nil { // initArgs must hold a reference on MountNamespace, which will // be donated to the new process in CreateProcess. @@ -184,7 +173,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI paths := fs.GetPath(initArgs.Envv) mns := initArgs.MountNamespace if mns == nil { - mns = proc.Kernel.RootMountNamespace() + mns = proc.Kernel.GlobalInit().Leader().MountNamespace() } f, err := mns.ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths) if err != nil { diff --git a/pkg/sentry/fs/context.go b/pkg/sentry/fs/context.go index 51b4c7ee1..dd427de5d 100644 --- a/pkg/sentry/fs/context.go +++ b/pkg/sentry/fs/context.go @@ -112,3 +112,27 @@ func DirentCacheLimiterFromContext(ctx context.Context) *DirentCacheLimiter { } return nil } + +type rootContext struct { + context.Context + root *Dirent +} + +// WithRoot returns a copy of ctx with the given root. +func WithRoot(ctx context.Context, root *Dirent) context.Context { + return &rootContext{ + Context: ctx, + root: root, + } +} + +// Value implements Context.Value. +func (rc rootContext) Value(key interface{}) interface{} { + switch key { + case CtxRoot: + rc.root.IncRef() + return rc.root + default: + return rc.Context.Value(key) + } +} diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index 728575864..9b713e785 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -219,6 +219,13 @@ func (mns *MountNamespace) flushMountSourceRefsLocked() { } } + if mns.root == nil { + // No root? This MountSource must have already been destroyed. + // This can happen when a Save is triggered while a process is + // exiting. There is nothing to flush. + return + } + // Flush root's MountSource references. mns.root.Inode.MountSource.FlushDirentRefs() } @@ -249,6 +256,10 @@ func (mns *MountNamespace) destroy() { // Drop reference on the root. mns.root.DecRef() + // Ensure that root cannot be accessed via this MountNamespace any + // more. + mns.root = nil + // Wait for asynchronous work (queued by dropping Dirent references // above) to complete before destroying this MountNamespace. AsyncBarrier() diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 53c25e49e..56a329f83 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -112,11 +112,6 @@ type Kernel struct { rootIPCNamespace *IPCNamespace rootAbstractSocketNamespace *AbstractSocketNamespace - // mounts holds the state of the virtual filesystem. mounts is initially - // nil, and must be set by calling Kernel.SetRootMountNamespace before - // Kernel.CreateProcess can succeed. - mounts *fs.MountNamespace - // futexes is the "root" futex.Manager, from which all others are forked. // This is necessary to ensure that shared futexes are coherent across all // tasks, including those created by CreateProcess. @@ -392,11 +387,7 @@ func (k *Kernel) SaveTo(w io.Writer) error { // flushMountSourceRefs flushes the MountSources for all mounted filesystems // and open FDs. func (k *Kernel) flushMountSourceRefs() error { - // Flush all mount sources for currently mounted filesystems in the - // root mount namespace. - k.mounts.FlushMountSourceRefs() - - // Some tasks may have other mount namespaces; flush those as well. + // Flush all mount sources for currently mounted filesystems in each task. flushed := make(map[*fs.MountNamespace]struct{}) k.tasks.mu.RLock() k.tasks.forEachThreadGroupLocked(func(tg *ThreadGroup) { @@ -573,16 +564,6 @@ func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack) error { return nil } -// Destroy releases resources owned by k. -// -// Preconditions: There must be no task goroutines running in k. -func (k *Kernel) Destroy() { - if k.mounts != nil { - k.mounts.DecRef() - k.mounts = nil - } -} - // UniqueID returns a unique identifier. func (k *Kernel) UniqueID() uint64 { id := atomic.AddUint64(&k.uniqueID, 1) @@ -646,19 +627,12 @@ type CreateProcessArgs struct { AbstractSocketNamespace *AbstractSocketNamespace // MountNamespace optionally contains the mount namespace for this - // process. If nil, the kernel's mount namespace is used. + // process. If nil, the init process's mount namespace is used. // // Anyone setting MountNamespace must donate a reference (i.e. // increment it). MountNamespace *fs.MountNamespace - // Root optionally contains the dirent that serves as the root for the - // process. If nil, the mount namespace's root is used as the process' - // root. - // - // Anyone setting Root must donate a reference (i.e. increment it). - Root *fs.Dirent - // ContainerID is the container that the process belongs to. ContainerID string } @@ -696,16 +670,10 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { case auth.CtxCredentials: return ctx.args.Credentials case fs.CtxRoot: - if ctx.args.Root != nil { - // Take a reference on the root dirent that will be - // given to the caller. - ctx.args.Root.IncRef() - return ctx.args.Root - } - if ctx.k.mounts != nil { - // MountNamespace.Root() will take a reference on the - // root dirent for us. - return ctx.k.mounts.Root() + if ctx.args.MountNamespace != nil { + // MountNamespace.Root() will take a reference on the root + // dirent for us. + return ctx.args.MountNamespace.Root() } return nil case fs.CtxDirentCacheLimiter: @@ -749,30 +717,18 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, defer k.extMu.Unlock() log.Infof("EXEC: %v", args.Argv) - if k.mounts == nil { - return nil, 0, fmt.Errorf("no kernel MountNamespace") - } - // Grab the mount namespace. mounts := args.MountNamespace if mounts == nil { - // If no MountNamespace was configured, then use the kernel's - // root mount namespace, with an extra reference that will be - // donated to the task. - mounts = k.mounts + mounts = k.GlobalInit().Leader().MountNamespace() mounts.IncRef() } tg := k.newThreadGroup(mounts, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) ctx := args.NewContext(k) - // Grab the root directory. - root := args.Root - if root == nil { - // If no Root was configured, then get it from the - // MountNamespace. - root = mounts.Root() - } + // Get the root directory from the MountNamespace. + root := mounts.Root() // The call to newFSContext below will take a reference on root, so we // don't need to hold this one. defer root.DecRef() @@ -782,7 +738,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, wd := root // Default. if args.WorkingDirectory != "" { var err error - wd, err = k.mounts.FindInode(ctx, root, nil, args.WorkingDirectory, &remainingTraversals) + wd, err = mounts.FindInode(ctx, root, nil, args.WorkingDirectory, &remainingTraversals) if err != nil { return nil, 0, fmt.Errorf("failed to find initial working directory %q: %v", args.WorkingDirectory, err) } @@ -811,8 +767,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, // Create a fresh task context. remainingTraversals = uint(args.MaxSymlinkTraversals) - tc, se := k.LoadTaskImage(ctx, k.mounts, root, wd, &remainingTraversals, args.Filename, args.File, args.Argv, args.Envv, k.featureSet) - + tc, se := k.LoadTaskImage(ctx, mounts, root, wd, &remainingTraversals, args.Filename, args.File, args.Argv, args.Envv, k.featureSet) if se != nil { return nil, 0, errors.New(se.String()) } @@ -1056,20 +1011,6 @@ func (k *Kernel) RootAbstractSocketNamespace() *AbstractSocketNamespace { return k.rootAbstractSocketNamespace } -// RootMountNamespace returns the MountNamespace. -func (k *Kernel) RootMountNamespace() *fs.MountNamespace { - k.extMu.Lock() - defer k.extMu.Unlock() - return k.mounts -} - -// SetRootMountNamespace sets the MountNamespace. -func (k *Kernel) SetRootMountNamespace(mounts *fs.MountNamespace) { - k.extMu.Lock() - defer k.extMu.Unlock() - k.mounts = mounts -} - // NetworkStack returns the network stack. NetworkStack may return nil if no // network stack is available. func (k *Kernel) NetworkStack() inet.Stack { @@ -1260,7 +1201,10 @@ func (ctx supervisorContext) Value(key interface{}) interface{} { // The supervisor context is global root. return auth.NewRootCredentials(ctx.k.rootUserNamespace) case fs.CtxRoot: - return ctx.k.mounts.Root() + if ctx.k.globalInit != nil { + return ctx.k.globalInit.mounts.Root() + } + return nil case fs.CtxDirentCacheLimiter: return ctx.k.DirentCacheLimiter case ktime.CtxRealtimeClock: -- cgit v1.2.3