diff options
Diffstat (limited to 'pkg/sentry/kernel/kernel.go')
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 107 |
1 files changed, 68 insertions, 39 deletions
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index f253a81d9..47dadc43a 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -39,34 +39,34 @@ import ( "sync/atomic" "time" - "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/cpuid" - "gvisor.googlesource.com/gvisor/pkg/eventchannel" - "gvisor.googlesource.com/gvisor/pkg/log" - "gvisor.googlesource.com/gvisor/pkg/refs" - "gvisor.googlesource.com/gvisor/pkg/sentry/arch" - "gvisor.googlesource.com/gvisor/pkg/sentry/context" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/fs/timerfd" - "gvisor.googlesource.com/gvisor/pkg/sentry/hostcpu" - "gvisor.googlesource.com/gvisor/pkg/sentry/inet" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/epoll" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/sched" - ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/loader" - "gvisor.googlesource.com/gvisor/pkg/sentry/mm" - "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform" - "gvisor.googlesource.com/gvisor/pkg/sentry/socket/netlink/port" - sentrytime "gvisor.googlesource.com/gvisor/pkg/sentry/time" - "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl" - uspb "gvisor.googlesource.com/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" - "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" - "gvisor.googlesource.com/gvisor/pkg/state" - "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/eventchannel" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/refs" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/timerfd" + "gvisor.dev/gvisor/pkg/sentry/hostcpu" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" + "gvisor.dev/gvisor/pkg/sentry/kernel/futex" + "gvisor.dev/gvisor/pkg/sentry/kernel/sched" + ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/loader" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/platform" + "gvisor.dev/gvisor/pkg/sentry/socket/netlink/port" + sentrytime "gvisor.dev/gvisor/pkg/sentry/time" + "gvisor.dev/gvisor/pkg/sentry/unimpl" + uspb "gvisor.dev/gvisor/pkg/sentry/unimpl/unimplemented_syscall_go_proto" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/state" + "gvisor.dev/gvisor/pkg/tcpip" ) // Kernel represents an emulated Linux kernel. It must be initialized by calling @@ -381,9 +381,23 @@ func (k *Kernel) SaveTo(w io.Writer) error { // flushMountSourceRefs flushes the MountSources for all mounted filesystems // and open FDs. func (k *Kernel) flushMountSourceRefs() error { - // Flush all mount sources for currently mounted filesystems. + // Flush all mount sources for currently mounted filesystems in the + // root mount namespace. k.mounts.FlushMountSourceRefs() + // Some tasks may have other mount namespaces; flush those as well. + flushed := make(map[*fs.MountNamespace]struct{}) + k.tasks.mu.RLock() + k.tasks.forEachThreadGroupLocked(func(tg *ThreadGroup) { + if _, ok := flushed[tg.mounts]; ok { + // Already flushed. + return + } + tg.mounts.FlushMountSourceRefs() + flushed[tg.mounts] = struct{}{} + }) + k.tasks.mu.RUnlock() + // There may be some open FDs whose filesystems have been unmounted. We // must flush those as well. return k.tasks.forEachFDPaused(func(desc descriptor) error { @@ -611,12 +625,18 @@ type CreateProcessArgs struct { // AbstractSocketNamespace is the initial Abstract Socket namespace. AbstractSocketNamespace *AbstractSocketNamespace + // MountNamespace optionally contains the mount namespace for this + // process. If nil, the kernel's mount namespace is used. + // + // Anyone setting MountNamespace must donate a reference (i.e. + // increment it). + MountNamespace *fs.MountNamespace + // Root optionally contains the dirent that serves as the root for the // process. If nil, the mount namespace's root is used as the process' // root. // - // Anyone setting Root must donate a reference (i.e. increment it) to - // keep it alive until it is decremented by CreateProcess. + // Anyone setting Root must donate a reference (i.e. increment it). Root *fs.Dirent // ContainerID is the container that the process belongs to. @@ -659,7 +679,7 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} { return ctx.args.Credentials case fs.CtxRoot: if ctx.args.Root != nil { - // Take a refernce on the root dirent that will be + // Take a reference on the root dirent that will be // given to the caller. ctx.args.Root.IncRef() return ctx.args.Root @@ -715,20 +735,29 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID, return nil, 0, fmt.Errorf("no kernel MountNamespace") } - tg := k.newThreadGroup(k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) + // Grab the mount namespace. + mounts := args.MountNamespace + if mounts == nil { + // If no MountNamespace was configured, then use the kernel's + // root mount namespace, with an extra reference that will be + // donated to the task. + mounts = k.mounts + mounts.IncRef() + } + + tg := k.newThreadGroup(mounts, k.tasks.Root, NewSignalHandlers(), linux.SIGCHLD, args.Limits, k.monotonicClock) ctx := args.NewContext(k) // Grab the root directory. root := args.Root if root == nil { - root = fs.RootFromContext(ctx) - // Is the root STILL nil? - if root == nil { - return nil, 0, fmt.Errorf("CreateProcessArgs.Root was not provided, and failed to get root from context") - } + // If no Root was configured, then get it from the + // MountNamespace. + root = mounts.Root() } + // The call to newFSContext below will take a reference on root, so we + // don't need to hold this one. defer root.DecRef() - args.Root = nil // Grab the working directory. remainingTraversals := uint(args.MaxSymlinkTraversals) |