summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel/kernel.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel/kernel.go')
-rw-r--r--pkg/sentry/kernel/kernel.go107
1 files changed, 68 insertions, 39 deletions
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index f253a81d9..47dadc43a 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -39,34 +39,34 @@ import (
"sync/atomic"
"time"
- "gvisor.googlesource.com/gvisor/pkg/abi/linux"
- "gvisor.googlesource.com/gvisor/pkg/cpuid"
- "gvisor.googlesource.com/gvisor/pkg/eventchannel"
- "gvisor.googlesource.com/gvisor/pkg/log"
- "gvisor.googlesource.com/gvisor/pkg/refs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
- "gvisor.googlesource.com/gvisor/pkg/sentry/context"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd"
- "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu"
- "gvisor.googlesource.com/gvisor/pkg/sentry/inet"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched"
- ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/loader"
- "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
- "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
- "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/port"
- sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time"
- "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl"
- uspb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
- "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid"
- "gvisor.googlesource.com/gvisor/pkg/state"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/eventchannel"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/context"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/timerfd"
+ "gvisor.dev/gvisor/pkg/sentry/hostcpu"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port"
+ sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ uspb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/state"
+ "gvisor.dev/gvisor/pkg/tcpip"
)
// Kernel represents an emulated Linux kernel. It must be initialized by calling
@@ -381,9 +381,23 @@ func (k *Kernel) SaveTo(w io.Writer) error {
// flushMountSourceRefs flushes the MountSources for all mounted filesystems
// and open FDs.
func (k *Kernel) flushMountSourceRefs() error {
- // Flush all mount sources for currently mounted filesystems.
+ // Flush all mount sources for currently mounted filesystems in the
+ // root mount namespace.
k.mounts.FlushMountSourceRefs()
+ // Some tasks may have other mount namespaces; flush those as well.
+ flushed := make(map[*fs.MountNamespace]struct{})
+ k.tasks.mu.RLock()
+ k.tasks.forEachThreadGroupLocked(func(tg *ThreadGroup) {
+ if _, ok := flushed[tg.mounts]; ok {
+ // Already flushed.
+ return
+ }
+ tg.mounts.FlushMountSourceRefs()
+ flushed[tg.mounts] = struct{}{}
+ })
+ k.tasks.mu.RUnlock()
+
// There may be some open FDs whose filesystems have been unmounted. We
// must flush those as well.
return k.tasks.forEachFDPaused(func(desc descriptor) error {
@@ -611,12 +625,18 @@ type CreateProcessArgs struct {
// AbstractSocketNamespace is the initial Abstract Socket namespace.
AbstractSocketNamespace *AbstractSocketNamespace
+ // MountNamespace optionally contains the mount namespace for this
+ // process. If nil, the kernel's mount namespace is used.
+ //
+ // Anyone setting MountNamespace must donate a reference (i.e.
+ // increment it).
+ MountNamespace *fs.MountNamespace
+
// Root optionally contains the dirent that serves as the root for the
// process. If nil, the mount namespace's root is used as the process'
// root.
//
- // Anyone setting Root must donate a reference (i.e. increment it) to
- // keep it alive until it is decremented by CreateProcess.
+ // Anyone setting Root must donate a reference (i.e. increment it).
Root *fs.Dirent
// ContainerID is the container that the process belongs to.
@@ -659,7 +679,7 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
return ctx.args.Credentials
case fs.CtxRoot:
if ctx.args.Root != nil {
- // Take a refernce on the root dirent that will be
+ // Take a reference on the root dirent that will be
// given to the caller.
ctx.args.Root.IncRef()
return ctx.args.Root
@@ -715,20 +735,29 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
return nil, 0, fmt.Errorf("no kernel MountNamespace")
}
- tg := k.newThreadGroup(k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
+ // Grab the mount namespace.
+ mounts := args.MountNamespace
+ if mounts == nil {
+ // If no MountNamespace was configured, then use the kernel's
+ // root mount namespace, with an extra reference that will be
+ // donated to the task.
+ mounts = k.mounts
+ mounts.IncRef()
+ }
+
+ tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock)
ctx := args.NewContext(k)
// Grab the root directory.
root := args.Root
if root == nil {
- root = fs.RootFromContext(ctx)
- // Is the root STILL nil?
- if root == nil {
- return nil, 0, fmt.Errorf("CreateProcessArgs.Root was not provided, and failed to get root from context")
- }
+ // If no Root was configured, then get it from the
+ // MountNamespace.
+ root = mounts.Root()
}
+ // The call to newFSContext below will take a reference on root, so we
+ // don't need to hold this one.
defer root.DecRef()
- args.Root = nil
// Grab the working directory.
remainingTraversals := uint(args.MaxSymlinkTraversals)