diff options
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/control/proc.go | 17 | ||||
-rw-r--r-- | pkg/sentry/fs/context.go | 24 | ||||
-rw-r--r-- | pkg/sentry/fs/mounts.go | 11 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 86 |
4 files changed, 53 insertions, 85 deletions
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go index 3f9772b87..c35faeb4c 100644 --- a/pkg/sentry/control/proc.go +++ b/pkg/sentry/control/proc.go @@ -56,15 +56,10 @@ type ExecArgs struct { // MountNamespace is the mount namespace to execute the new process in. // A reference on MountNamespace must be held for the lifetime of the - // ExecArgs. If MountNamespace is nil, it will default to the kernel's - // root MountNamespace. + // ExecArgs. If MountNamespace is nil, it will default to the init + // process's MountNamespace. MountNamespace *fs.MountNamespace - // Root defines the root directory for the new process. A reference on - // Root must be held for the lifetime of the ExecArgs. If Root is nil, - // it will default to the VFS root. - Root *fs.Dirent - // WorkingDirectory defines the working directory for the new process. WorkingDirectory string `json:"wd"` @@ -155,7 +150,6 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI Envv: args.Envv, WorkingDirectory: args.WorkingDirectory, MountNamespace: args.MountNamespace, - Root: args.Root, Credentials: creds, FDTable: fdTable, Umask: 0022, @@ -167,11 +161,6 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI ContainerID: args.ContainerID, PIDNamespace: args.PIDNamespace, } - if initArgs.Root != nil { - // initArgs must hold a reference on Root, which will be - // donated to the new process in CreateProcess. - initArgs.Root.IncRef() - } if initArgs.MountNamespace != nil { // initArgs must hold a reference on MountNamespace, which will // be donated to the new process in CreateProcess. @@ -184,7 +173,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI paths := fs.GetPath(initArgs.Envv) mns := initArgs.MountNamespace if mns == nil { - mns = proc.Kernel.RootMountNamespace() + mns = proc.Kernel.GlobalInit().Leader().MountNamespace() } f, err := mns.ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths) if err != nil { diff --git a/pkg/sentry/fs/context.go b/pkg/sentry/fs/context.go index 51b4c7ee1..dd427de5d 100644 --- a/pkg/sentry/fs/context.go +++ b/pkg/sentry/fs/context.go @@ -112,3 +112,27 @@ func DirentCacheLimiterFromContext(ctx context.Context) *DirentCacheLimiter { } return nil } + +type rootContext struct { + context.Context + root *Dirent +} + +// WithRoot returns a copy of ctx with the given root. +func WithRoot(ctx context.Context, root *Dirent) context.Context { + return &rootContext{ + Context: ctx, + root: root, + } +} + +// Value implements Context.Value. +func (rc rootContext) Value(key interface{}) interface{} { + switch key { + case CtxRoot: + rc.root.IncRef() + return rc.root + default: + return rc.Context.Value(key) + } +} diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go index 728575864..9b713e785 100644 --- a/pkg/sentry/fs/mounts.go +++ b/pkg/sentry/fs/mounts.go @@ -219,6 +219,13 @@ func (mns *MountNamespace) flushMountSourceRefsLocked() { } } + if mns.root == nil { + // No root? This MountSource must have already been destroyed. + // This can happen when a Save is triggered while a process is + // exiting. There is nothing to flush. + return + } + // Flush root's MountSource references. mns.root.Inode.MountSource.FlushDirentRefs() } @@ -249,6 +256,10 @@ func (mns *MountNamespace) destroy() { // Drop reference on the root. mns.root.DecRef() + // Ensure that root cannot be accessed via this MountNamespace any + // more. + mns.root = nil + // Wait for asynchronous work (queued by dropping Dirent references // above) to complete before destroying this MountNamespace. AsyncBarrier() diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 53c25e49e..56a329f83 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -112,11 +112,6 @@ type Kernel struct { rootIPCNamespace *IPCNamespace rootAbstractSocketNamespace *AbstractSocketNamespace - // mounts holds the state of the virtual filesystem. mounts is initially - // nil, and must be set by calling Kernel.SetRootMountNamespace before - // Kernel.CreateProcess can succeed. - mounts *fs.MountNamespace - // futexes is the "root" futex.Manager, from which all others are forked. // This is necessary to ensure that shared futexes are coherent across all // tasks, including those created by CreateProcess. @@ -392,11 +387,7 @@ func (k *Kernel) SaveTo(w io.Writer) error { // flushMountSourceRefs flushes the MountSources for all mounted filesystems // and open FDs. func (k *Kernel) flushMountSourceRefs() error { - // Flush all mount sources for currently mounted filesystems in the - // root mount namespace. - k.mounts.FlushMountSourceRefs() - - // Some tasks may have other mount namespaces; flush those as well. + // Flush all mount sources for currently mounted filesystems in each task. flushed := make(map[*fs.MountNamespace]struct{}) k.tasks.mu.RLock() k.tasks.forEachThreadGroupLocked(func(tg *ThreadGroup) { @@ -573,16 +564,6 @@ func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack) error { return nil } -// Destroy releases resources owned by k. -// -// Preconditions: There must be no task goroutines running in k. -func (k *Kernel) Destroy() { - if k.mounts != nil { - k.mounts.DecRef() - k.mounts = nil - } -} - // UniqueID returns a unique identifier. func (k *Kernel) UniqueID() uint64 { id := atomic.AddUint64(&k.uniqueID, 1) @@ -646,19 +627,12 @@ type CreateProcessArgs struct { AbstractSocketNamespace *AbstractSocketNamespace // MountNamespace optionally contains the mount namespace for this - // process. If nil, the kernel's mount namespace is used. + // process. If nil, the init process's mount namespace is used. // // Anyone setting MountNamespace must donate a reference (i.e. // increment it). MountNamespace *fs.MountNamespace - // Root optionally contains the dirent that serves as the root for the - // process. If nil, the mount namespace's root is used as the process' - // root. - // - // Anyone setting Root must donate a reference (i.e. increment it). - Root *fs.Dirent - // ContainerID is the container that the process belongs to. ContainerID string } @@ -696,16 +670,10 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { case auth.CtxCredentials: return ctx.args.Credentials case fs.CtxRoot: - if ctx.args.Root != nil { - // Take a reference on the root dirent that will be - // given to the caller. - ctx.args.Root.IncRef() - return ctx.args.Root - } - if ctx.k.mounts != nil { - // MountNamespace.Root() will take a reference on the - // root dirent for us. - return ctx.k.mounts.Root() + if ctx.args.MountNamespace != nil { + // MountNamespace.Root() will take a reference on the root + // dirent for us. + return ctx.args.MountNamespace.Root() } return nil case fs.CtxDirentCacheLimiter: @@ -749,30 +717,18 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, defer k.extMu.Unlock() log.Infof("EXEC: %v", args.Argv) - if k.mounts == nil { - return nil, 0, fmt.Errorf("no kernel MountNamespace") - } - // Grab the mount namespace. mounts := args.MountNamespace if mounts == nil { - // If no MountNamespace was configured, then use the kernel's - // root mount namespace, with an extra reference that will be - // donated to the task. - mounts = k.mounts + mounts = k.GlobalInit().Leader().MountNamespace() mounts.IncRef() } tg := k.newThreadGroup(mounts, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) ctx := args.NewContext(k) - // Grab the root directory. - root := args.Root - if root == nil { - // If no Root was configured, then get it from the - // MountNamespace. - root = mounts.Root() - } + // Get the root directory from the MountNamespace. + root := mounts.Root() // The call to newFSContext below will take a reference on root, so we // don't need to hold this one. defer root.DecRef() @@ -782,7 +738,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, wd := root // Default. if args.WorkingDirectory != "" { var err error - wd, err = k.mounts.FindInode(ctx, root, nil, args.WorkingDirectory, &remainingTraversals) + wd, err = mounts.FindInode(ctx, root, nil, args.WorkingDirectory, &remainingTraversals) if err != nil { return nil, 0, fmt.Errorf("failed to find initial working directory %q: %v", args.WorkingDirectory, err) } @@ -811,8 +767,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, // Create a fresh task context. remainingTraversals = uint(args.MaxSymlinkTraversals) - tc, se := k.LoadTaskImage(ctx, k.mounts, root, wd, &remainingTraversals, args.Filename, args.File, args.Argv, args.Envv, k.featureSet) - + tc, se := k.LoadTaskImage(ctx, mounts, root, wd, &remainingTraversals, args.Filename, args.File, args.Argv, args.Envv, k.featureSet) if se != nil { return nil, 0, errors.New(se.String()) } @@ -1056,20 +1011,6 @@ func (k *Kernel) RootAbstractSocketNamespace() *AbstractSocketNamespace { return k.rootAbstractSocketNamespace } -// RootMountNamespace returns the MountNamespace. -func (k *Kernel) RootMountNamespace() *fs.MountNamespace { - k.extMu.Lock() - defer k.extMu.Unlock() - return k.mounts -} - -// SetRootMountNamespace sets the MountNamespace. -func (k *Kernel) SetRootMountNamespace(mounts *fs.MountNamespace) { - k.extMu.Lock() - defer k.extMu.Unlock() - k.mounts = mounts -} - // NetworkStack returns the network stack. NetworkStack may return nil if no // network stack is available. func (k *Kernel) NetworkStack() inet.Stack { @@ -1260,7 +1201,10 @@ func (ctx supervisorContext) Value(key interface{}) interface{} { // The supervisor context is global root. return auth.NewRootCredentials(ctx.k.rootUserNamespace) case fs.CtxRoot: - return ctx.k.mounts.Root() + if ctx.k.globalInit != nil { + return ctx.k.globalInit.mounts.Root() + } + return nil case fs.CtxDirentCacheLimiter: return ctx.k.DirentCacheLimiter case ktime.CtxRealtimeClock: |