summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r--pkg/sentry/kernel/BUILD12
-rw-r--r--pkg/sentry/kernel/context.go3
-rw-r--r--pkg/sentry/kernel/ipc_namespace.go14
-rw-r--r--pkg/sentry/kernel/kernel.go57
-rw-r--r--pkg/sentry/kernel/shm/BUILD1
-rw-r--r--pkg/sentry/kernel/shm/shm.go46
-rw-r--r--pkg/sentry/kernel/task.go6
-rw-r--r--pkg/sentry/kernel/task_clone.go5
-rw-r--r--pkg/sentry/kernel/task_exit.go1
-rw-r--r--pkg/sentry/kernel/task_start.go1
10 files changed, 114 insertions, 32 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 5de70aecb..9a24c6bdb 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -97,6 +97,17 @@ go_template_instance(
)
go_template_instance(
+ name = "ipc_namespace_refs",
+ out = "ipc_namespace_refs.go",
+ package = "kernel",
+ prefix = "IPCNamespace",
+ template = "//pkg/refs_vfs2:refs_template",
+ types = {
+ "T": "IPCNamespace",
+ },
+)
+
+go_template_instance(
name = "process_group_refs",
out = "process_group_refs.go",
package = "kernel",
@@ -137,6 +148,7 @@ go_library(
"fs_context.go",
"fs_context_refs.go",
"ipc_namespace.go",
+ "ipc_namespace_refs.go",
"kcov.go",
"kcov_unsafe.go",
"kernel.go",
diff --git a/pkg/sentry/kernel/context.go b/pkg/sentry/kernel/context.go
index dd5f0f5fa..bb94769c4 100644
--- a/pkg/sentry/kernel/context.go
+++ b/pkg/sentry/kernel/context.go
@@ -81,7 +81,8 @@ func UTSNamespaceFromContext(ctx context.Context) *UTSNamespace {
}
// IPCNamespaceFromContext returns the IPC namespace in which ctx is executing,
-// or nil if there is no such IPC namespace.
+// or nil if there is no such IPC namespace. It takes a reference on the
+// namespace.
func IPCNamespaceFromContext(ctx context.Context) *IPCNamespace {
if v := ctx.Value(CtxIPCNamespace); v != nil {
return v.(*IPCNamespace)
diff --git a/pkg/sentry/kernel/ipc_namespace.go b/pkg/sentry/kernel/ipc_namespace.go
index 80a070d7e..3f34ee0db 100644
--- a/pkg/sentry/kernel/ipc_namespace.go
+++ b/pkg/sentry/kernel/ipc_namespace.go
@@ -15,6 +15,7 @@
package kernel
import (
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/semaphore"
"gvisor.dev/gvisor/pkg/sentry/kernel/shm"
@@ -24,6 +25,8 @@ import (
//
// +stateify savable
type IPCNamespace struct {
+ IPCNamespaceRefs
+
// User namespace which owns this IPC namespace. Immutable.
userNS *auth.UserNamespace
@@ -33,11 +36,13 @@ type IPCNamespace struct {
// NewIPCNamespace creates a new IPC namespace.
func NewIPCNamespace(userNS *auth.UserNamespace) *IPCNamespace {
- return &IPCNamespace{
+ ns := &IPCNamespace{
userNS: userNS,
semaphores: semaphore.NewRegistry(userNS),
shms: shm.NewRegistry(userNS),
}
+ ns.EnableLeakCheck()
+ return ns
}
// SemaphoreRegistry returns the semaphore set registry for this namespace.
@@ -50,6 +55,13 @@ func (i *IPCNamespace) ShmRegistry() *shm.Registry {
return i.shms
}
+// DecRef implements refs_vfs2.RefCounter.DecRef.
+func (i *IPCNamespace) DecRef(ctx context.Context) {
+ i.IPCNamespaceRefs.DecRef(func() {
+ i.shms.Release(ctx)
+ })
+}
+
// IPCNamespace returns the task's IPC namespace.
func (t *Task) IPCNamespace() *IPCNamespace {
t.mu.Lock()
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index d6c21adb7..652cbb732 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -828,7 +828,9 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
case CtxUTSNamespace:
return ctx.args.UTSNamespace
case CtxIPCNamespace:
- return ctx.args.IPCNamespace
+ ipcns := ctx.args.IPCNamespace
+ ipcns.IncRef()
+ return ipcns
case auth.CtxCredentials:
return ctx.args.Credentials
case fs.CtxRoot:
@@ -841,14 +843,16 @@ func (ctx *createProcessContext) Value(key interface{}) interface{} {
if ctx.args.MountNamespaceVFS2 == nil {
return nil
}
- // MountNamespaceVFS2.Root() takes a reference on the root dirent for us.
- return ctx.args.MountNamespaceVFS2.Root()
+ root := ctx.args.MountNamespaceVFS2.Root()
+ root.IncRef()
+ return root
case vfs.CtxMountNamespace:
if ctx.k.globalInit == nil {
return nil
}
- // MountNamespaceVFS2 takes a reference for us.
- return ctx.k.GlobalInit().Leader().MountNamespaceVFS2()
+ mntns := ctx.k.GlobalInit().Leader().MountNamespaceVFS2()
+ mntns.IncRef()
+ return mntns
case fs.CtxDirentCacheLimiter:
return ctx.k.DirentCacheLimiter
case inet.CtxStack:
@@ -904,14 +908,13 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
if VFS2Enabled {
mntnsVFS2 = args.MountNamespaceVFS2
if mntnsVFS2 == nil {
- // MountNamespaceVFS2 adds a reference to the namespace, which is
- // transferred to the new process.
+ // Add a reference to the namespace, which is transferred to the new process.
mntnsVFS2 = k.globalInit.Leader().MountNamespaceVFS2()
+ mntnsVFS2.IncRef()
}
// Get the root directory from the MountNamespace.
root := mntnsVFS2.Root()
- // The call to newFSContext below will take a reference on root, so we
- // don't need to hold this one.
+ root.IncRef()
defer root.DecRef(ctx)
// Grab the working directory.
@@ -1373,8 +1376,9 @@ func (k *Kernel) RootUTSNamespace() *UTSNamespace {
return k.rootUTSNamespace
}
-// RootIPCNamespace returns the root IPCNamespace.
+// RootIPCNamespace takes a reference and returns the root IPCNamespace.
func (k *Kernel) RootIPCNamespace() *IPCNamespace {
+ k.rootIPCNamespace.IncRef()
return k.rootIPCNamespace
}
@@ -1635,7 +1639,9 @@ func (ctx supervisorContext) Value(key interface{}) interface{} {
case CtxUTSNamespace:
return ctx.k.rootUTSNamespace
case CtxIPCNamespace:
- return ctx.k.rootIPCNamespace
+ ipcns := ctx.k.rootIPCNamespace
+ ipcns.IncRef()
+ return ipcns
case auth.CtxCredentials:
// The supervisor context is global root.
return auth.NewRootCredentials(ctx.k.rootUserNamespace)
@@ -1648,16 +1654,16 @@ func (ctx supervisorContext) Value(key interface{}) interface{} {
if ctx.k.globalInit == nil {
return vfs.VirtualDentry{}
}
- mntns := ctx.k.GlobalInit().Leader().MountNamespaceVFS2()
- defer mntns.DecRef(ctx)
- // Root() takes a reference on the root dirent for us.
- return mntns.Root()
+ root := ctx.k.GlobalInit().Leader().MountNamespaceVFS2().Root()
+ root.IncRef()
+ return root
case vfs.CtxMountNamespace:
if ctx.k.globalInit == nil {
return nil
}
- // MountNamespaceVFS2() takes a reference for us.
- return ctx.k.GlobalInit().Leader().MountNamespaceVFS2()
+ mntns := ctx.k.GlobalInit().Leader().MountNamespaceVFS2()
+ mntns.IncRef()
+ return mntns
case fs.CtxDirentCacheLimiter:
return ctx.k.DirentCacheLimiter
case inet.CtxStack:
@@ -1738,3 +1744,20 @@ func (k *Kernel) ShmMount() *vfs.Mount {
func (k *Kernel) SocketMount() *vfs.Mount {
return k.socketMount
}
+
+// Release releases resources owned by k.
+//
+// Precondition: This should only be called after the kernel is fully
+// initialized, e.g. after k.Start() has been called.
+func (k *Kernel) Release() {
+ ctx := k.SupervisorContext()
+ if VFS2Enabled {
+ k.hostMount.DecRef(ctx)
+ k.pipeMount.DecRef(ctx)
+ k.shmMount.DecRef(ctx)
+ k.socketMount.DecRef(ctx)
+ k.vfs.Release(ctx)
+ }
+ k.timekeeper.Destroy()
+ k.vdso.Release(ctx)
+}
diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD
index b7e4b480d..f8a382fd8 100644
--- a/pkg/sentry/kernel/shm/BUILD
+++ b/pkg/sentry/kernel/shm/BUILD
@@ -27,6 +27,7 @@ go_library(
"//pkg/context",
"//pkg/log",
"//pkg/refs",
+ "//pkg/refs_vfs2",
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index 00c03585e..ebbebf46b 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -321,9 +321,32 @@ func (r *Registry) remove(s *Shm) {
r.totalPages -= s.effectiveSize / usermem.PageSize
}
+// Release drops the self-reference of each active shm segment in the registry.
+// It is called when the kernel.IPCNamespace containing r is being destroyed.
+func (r *Registry) Release(ctx context.Context) {
+ // Because Shm.DecRef() may acquire the same locks, collect the segments to
+ // release first. Note that this should not race with any updates to r, since
+ // the IPC namespace containing it has no more references.
+ toRelease := make([]*Shm, 0)
+ r.mu.Lock()
+ for _, s := range r.keysToShms {
+ s.mu.Lock()
+ if !s.pendingDestruction {
+ toRelease = append(toRelease, s)
+ }
+ s.mu.Unlock()
+ }
+ r.mu.Unlock()
+
+ for _, s := range toRelease {
+ r.dissociateKey(s)
+ s.DecRef(ctx)
+ }
+}
+
// Shm represents a single shared memory segment.
//
-// Shm segment are backed directly by an allocation from platform memory.
+// Shm segments are backed directly by an allocation from platform memory.
// Segments are always mapped as a whole, greatly simplifying how mappings are
// tracked. However note that mremap and munmap calls may cause the vma for a
// segment to become fragmented; which requires special care when unmapping a
@@ -652,17 +675,20 @@ func (s *Shm) MarkDestroyed(ctx context.Context) {
s.registry.dissociateKey(s)
s.mu.Lock()
- defer s.mu.Unlock()
- if !s.pendingDestruction {
- s.pendingDestruction = true
- // Drop the self-reference so destruction occurs when all
- // external references are gone.
- //
- // N.B. This cannot be the final DecRef, as the caller also
- // holds a reference.
- s.DecRef(ctx)
+ if s.pendingDestruction {
+ s.mu.Unlock()
return
}
+ s.pendingDestruction = true
+ s.mu.Unlock()
+
+ // Drop the self-reference so destruction occurs when all
+ // external references are gone.
+ //
+ // N.B. This cannot be the final DecRef, as the caller also
+ // holds a reference.
+ s.DecRef(ctx)
+ return
}
// checkOwnership verifies whether a segment may be accessed by ctx as an
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index f796e0fa3..037971393 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -656,7 +656,9 @@ func (t *Task) Value(key interface{}) interface{} {
case CtxUTSNamespace:
return t.utsns
case CtxIPCNamespace:
- return t.ipcns
+ ipcns := t.IPCNamespace()
+ ipcns.IncRef()
+ return ipcns
case CtxTask:
return t
case auth.CtxCredentials:
@@ -735,7 +737,6 @@ func (t *Task) SyscallRestartBlock() SyscallRestartBlock {
func (t *Task) IsChrooted() bool {
if VFS2Enabled {
realRoot := t.mountNamespaceVFS2.Root()
- defer realRoot.DecRef(t)
root := t.fsContext.RootDirectoryVFS2()
defer root.DecRef(t)
return root != realRoot
@@ -868,7 +869,6 @@ func (t *Task) MountNamespace() *fs.MountNamespace {
func (t *Task) MountNamespaceVFS2() *vfs.MountNamespace {
t.mu.Lock()
defer t.mu.Unlock()
- t.mountNamespaceVFS2.IncRef()
return t.mountNamespaceVFS2
}
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index fce1064a7..7a053f369 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -203,6 +203,8 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
// Note that "If CLONE_NEWIPC is set, then create the process in a new IPC
// namespace"
ipcns = NewIPCNamespace(userns)
+ } else {
+ ipcns.IncRef()
}
netns := t.NetworkNamespace()
@@ -218,6 +220,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
tc, err := t.tc.Fork(t, t.k, !opts.NewAddressSpace)
if err != nil {
+ ipcns.DecRef(t)
return 0, nil, err
}
// clone() returns 0 in the child.
@@ -227,6 +230,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
}
if opts.SetTLS {
if !tc.Arch.SetTLS(uintptr(opts.TLS)) {
+ ipcns.DecRef(t)
return 0, nil, syserror.EPERM
}
}
@@ -509,6 +513,7 @@ func (t *Task) Unshare(opts *SharingOptions) error {
}
// Note that "If CLONE_NEWIPC is set, then create the process in a new IPC
// namespace"
+ t.ipcns.DecRef(t)
t.ipcns = NewIPCNamespace(creds.UserNamespace)
}
var oldFDTable *FDTable
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index b400a8b41..239551eb6 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -280,6 +280,7 @@ func (*runExitMain) execute(t *Task) taskRunState {
t.mountNamespaceVFS2.DecRef(t)
t.mountNamespaceVFS2 = nil
}
+ t.ipcns.DecRef(t)
t.mu.Unlock()
// If this is the last task to exit from the thread group, release the
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 64c1e120a..6e2ff573a 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -104,6 +104,7 @@ func (ts *TaskSet) NewTask(cfg *TaskConfig) (*Task, error) {
cfg.TaskContext.release()
cfg.FSContext.DecRef(t)
cfg.FDTable.DecRef(t)
+ cfg.IPCNamespace.DecRef(t)
if cfg.MountNamespaceVFS2 != nil {
cfg.MountNamespaceVFS2.DecRef(t)
}