summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/control/proc.go17
-rw-r--r--pkg/sentry/fs/context.go24
-rw-r--r--pkg/sentry/fs/mounts.go11
-rw-r--r--pkg/sentry/kernel/kernel.go86
4 files changed, 53 insertions, 85 deletions
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index 3f9772b87..c35faeb4c 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -56,15 +56,10 @@ type ExecArgs struct {
// MountNamespace is the mount namespace to execute the new process in.
// A reference on MountNamespace must be held for the lifetime of the
- // ExecArgs. If MountNamespace is nil, it will default to the kernel's
- // root MountNamespace.
+ // ExecArgs. If MountNamespace is nil, it will default to the init
+ // process's MountNamespace.
MountNamespace *fs.MountNamespace
- // Root defines the root directory for the new process. A reference on
- // Root must be held for the lifetime of the ExecArgs. If Root is nil,
- // it will default to the VFS root.
- Root *fs.Dirent
-
// WorkingDirectory defines the working directory for the new process.
WorkingDirectory string `json:"wd"`
@@ -155,7 +150,6 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
Envv: args.Envv,
WorkingDirectory: args.WorkingDirectory,
MountNamespace: args.MountNamespace,
- Root: args.Root,
Credentials: creds,
FDTable: fdTable,
Umask: 0022,
@@ -167,11 +161,6 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
ContainerID: args.ContainerID,
PIDNamespace: args.PIDNamespace,
}
- if initArgs.Root != nil {
- // initArgs must hold a reference on Root, which will be
- // donated to the new process in CreateProcess.
- initArgs.Root.IncRef()
- }
if initArgs.MountNamespace != nil {
// initArgs must hold a reference on MountNamespace, which will
// be donated to the new process in CreateProcess.
@@ -184,7 +173,7 @@ func (proc *Proc) execAsync(args *ExecArgs) (*kernel.ThreadGroup, kernel.ThreadI
paths := fs.GetPath(initArgs.Envv)
mns := initArgs.MountNamespace
if mns == nil {
- mns = proc.Kernel.RootMountNamespace()
+ mns = proc.Kernel.GlobalInit().Leader().MountNamespace()
}
f, err := mns.ResolveExecutablePath(ctx, initArgs.WorkingDirectory, initArgs.Argv[0], paths)
if err != nil {
diff --git a/pkg/sentry/fs/context.go b/pkg/sentry/fs/context.go
index 51b4c7ee1..dd427de5d 100644
--- a/pkg/sentry/fs/context.go
+++ b/pkg/sentry/fs/context.go
@@ -112,3 +112,27 @@ func DirentCacheLimiterFromContext(ctx context.Context) *DirentCacheLimiter {
}
return nil
}
+
+type rootContext struct {
+ context.Context
+ root *Dirent
+}
+
+// WithRoot returns a copy of ctx with the given root.
+func WithRoot(ctx context.Context, root *Dirent) context.Context {
+ return &rootContext{
+ Context: ctx,
+ root: root,
+ }
+}
+
+// Value implements Context.Value.
+func (rc rootContext) Value(key interface{}) interface{} {
+ switch key {
+ case CtxRoot:
+ rc.root.IncRef()
+ return rc.root
+ default:
+ return rc.Context.Value(key)
+ }
+}
diff --git a/pkg/sentry/fs/mounts.go b/pkg/sentry/fs/mounts.go
index 728575864..9b713e785 100644
--- a/pkg/sentry/fs/mounts.go
+++ b/pkg/sentry/fs/mounts.go
@@ -219,6 +219,13 @@ func (mns *MountNamespace) flushMountSourceRefsLocked() {
}
}
+ if mns.root == nil {
+ // No root? This MountSource must have already been destroyed.
+ // This can happen when a Save is triggered while a process is
+ // exiting. There is nothing to flush.
+ return
+ }
+
// Flush root's MountSource references.
mns.root.Inode.MountSource.FlushDirentRefs()
}
@@ -249,6 +256,10 @@ func (mns *MountNamespace) destroy() {
// Drop reference on the root.
mns.root.DecRef()
+ // Ensure that root cannot be accessed via this MountNamespace any
+ // more.
+ mns.root = nil
+
// Wait for asynchronous work (queued by dropping Dirent references
// above) to complete before destroying this MountNamespace.
AsyncBarrier()
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 53c25e49e..56a329f83 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -112,11 +112,6 @@ type Kernel struct {
rootIPCNamespace *IPCNamespace
rootAbstractSocketNamespace *AbstractSocketNamespace
- // mounts holds the state of the virtual filesystem. mounts is initially
- // nil, and must be set by calling Kernel.SetRootMountNamespace before
- // Kernel.CreateProcess can succeed.
- mounts *fs.MountNamespace
-
// futexes is the "root" futex.Manager, from which all others are forked.
// This is necessary to ensure that shared futexes are coherent across all
// tasks, including those created by CreateProcess.
@@ -392,11 +387,7 @@ func (k *Kernel) SaveTo(w io.Writer) error {
// flushMountSourceRefs flushes the MountSources for all mounted filesystems
// and open FDs.
func (k *Kernel) flushMountSourceRefs() error {
- // Flush all mount sources for currently mounted filesystems in the
- // root mount namespace.
- k.mounts.FlushMountSourceRefs()
-
- // Some tasks may have other mount namespaces; flush those as well.
+ // Flush all mount sources for currently mounted filesystems in each task.
flushed := make(map[*fs.MountNamespace]struct{})
k.tasks.mu.RLock()
k.tasks.forEachThreadGroupLocked(func(tg *ThreadGroup) {
@@ -573,16 +564,6 @@ func (k *Kernel) LoadFrom(r io.Reader, net inet.Stack) error {
return nil
}
-// Destroy releases resources owned by k.
-//
-// Preconditions: There must be no task goroutines running in k.
-func (k *Kernel) Destroy() {
- if k.mounts != nil {
- k.mounts.DecRef()
- k.mounts = nil
- }
-}
-
// UniqueID returns a unique identifier.
func (k *Kernel) UniqueID() uint64 {
id := atomic.AddUint64(&k.uniqueID, 1)
@@ -646,19 +627,12 @@ type CreateProcessArgs struct {
AbstractSocketNamespace *AbstractSocketNamespace
// MountNamespace optionally contains the mount namespace for this
- // process. If nil, the kernel's mount namespace is used.
+ // process. If nil, the init process's mount namespace is used.
//
// Anyone setting MountNamespace must donate a reference (i.e.
// increment it).
MountNamespace *fs.MountNamespace
- // Root optionally contains the dirent that serves as the root for the
- // process. If nil, the mount namespace's root is used as the process'
- // root.
- //
- // Anyone setting Root must donate a reference (i.e. increment it).
- Root *fs.Dirent
-
// ContainerID is the container that the process belongs to.
ContainerID string
}
@@ -696,16 +670,10 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
case auth.CtxCredentials:
return ctx.args.Credentials
case fs.CtxRoot:
- if ctx.args.Root != nil {
- // Take a reference on the root dirent that will be
- // given to the caller.
- ctx.args.Root.IncRef()
- return ctx.args.Root
- }
- if ctx.k.mounts != nil {
- // MountNamespace.Root() will take a reference on the
- // root dirent for us.
- return ctx.k.mounts.Root()
+ if ctx.args.MountNamespace != nil {
+ // MountNamespace.Root() will take a reference on the root
+ // dirent for us.
+ return ctx.args.MountNamespace.Root()
}
return nil
case fs.CtxDirentCacheLimiter:
@@ -749,30 +717,18 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
defer k.extMu.Unlock()
log.Infof("EXEC: %v", args.Argv)
- if k.mounts == nil {
- return nil, 0, fmt.Errorf("no kernel MountNamespace")
- }
-
// Grab the mount namespace.
mounts := args.MountNamespace
if mounts == nil {
- // If no MountNamespace was configured, then use the kernel's
- // root mount namespace, with an extra reference that will be
- // donated to the task.
- mounts = k.mounts
+ mounts = k.GlobalInit().Leader().MountNamespace()
mounts.IncRef()
}
tg := k.newThreadGroup(mounts, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
ctx := args.NewContext(k)
- // Grab the root directory.
- root := args.Root
- if root == nil {
- // If no Root was configured, then get it from the
- // MountNamespace.
- root = mounts.Root()
- }
+ // Get the root directory from the MountNamespace.
+ root := mounts.Root()
// The call to newFSContext below will take a reference on root, so we
// don't need to hold this one.
defer root.DecRef()
@@ -782,7 +738,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
wd := root // Default.
if args.WorkingDirectory != "" {
var err error
- wd, err = k.mounts.FindInode(ctx, root, nil, args.WorkingDirectory, &remainingTraversals)
+ wd, err = mounts.FindInode(ctx, root, nil, args.WorkingDirectory, &remainingTraversals)
if err != nil {
return nil, 0, fmt.Errorf("failed to find initial working directory %q: %v", args.WorkingDirectory, err)
}
@@ -811,8 +767,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
// Create a fresh task context.
remainingTraversals = uint(args.MaxSymlinkTraversals)
- tc, se := k.LoadTaskImage(ctx, k.mounts, root, wd, &remainingTraversals, args.Filename, args.File, args.Argv, args.Envv, k.featureSet)
-
+ tc, se := k.LoadTaskImage(ctx, mounts, root, wd, &remainingTraversals, args.Filename, args.File, args.Argv, args.Envv, k.featureSet)
if se != nil {
return nil, 0, errors.New(se.String())
}
@@ -1056,20 +1011,6 @@ func (k *Kernel) RootAbstractSocketNamespace() *AbstractSocketNamespace {
return k.rootAbstractSocketNamespace
}
-// RootMountNamespace returns the MountNamespace.
-func (k *Kernel) RootMountNamespace() *fs.MountNamespace {
- k.extMu.Lock()
- defer k.extMu.Unlock()
- return k.mounts
-}
-
-// SetRootMountNamespace sets the MountNamespace.
-func (k *Kernel) SetRootMountNamespace(mounts *fs.MountNamespace) {
- k.extMu.Lock()
- defer k.extMu.Unlock()
- k.mounts = mounts
-}
-
// NetworkStack returns the network stack. NetworkStack may return nil if no
// network stack is available.
func (k *Kernel) NetworkStack() inet.Stack {
@@ -1260,7 +1201,10 @@ func (ctx supervisorContext) Value(key interface{}) interface{} {
// The supervisor context is global root.
return auth.NewRootCredentials(ctx.k.rootUserNamespace)
case fs.CtxRoot:
- return ctx.k.mounts.Root()
+ if ctx.k.globalInit != nil {
+ return ctx.k.globalInit.mounts.Root()
+ }
+ return nil
case fs.CtxDirentCacheLimiter:
return ctx.k.DirentCacheLimiter
case ktime.CtxRealtimeClock: