summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r--pkg/sentry/kernel/BUILD1
-rw-r--r--pkg/sentry/kernel/kcov.go19
-rw-r--r--pkg/sentry/kernel/kcov_unsafe.go6
-rw-r--r--pkg/sentry/kernel/kernel.go12
-rw-r--r--pkg/sentry/kernel/pipe/vfs.go3
-rw-r--r--pkg/sentry/kernel/task_clone.go21
-rw-r--r--pkg/sentry/kernel/task_exit.go2
-rw-r--r--pkg/sentry/kernel/task_start.go14
-rw-r--r--pkg/sentry/kernel/thread_group.go7
9 files changed, 54 insertions, 31 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 9a24c6bdb..c0de72eef 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -218,6 +218,7 @@ go_library(
"//pkg/amutex",
"//pkg/bits",
"//pkg/bpf",
+ "//pkg/cleanup",
"//pkg/context",
"//pkg/coverage",
"//pkg/cpuid",
diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go
index 060c056df..4fcdfc541 100644
--- a/pkg/sentry/kernel/kcov.go
+++ b/pkg/sentry/kernel/kcov.go
@@ -199,23 +199,25 @@ func (kcov *Kcov) DisableTrace(ctx context.Context) error {
}
kcov.mode = linux.KCOV_MODE_INIT
kcov.owningTask = nil
- kcov.mappable = nil
+ if kcov.mappable != nil {
+ kcov.mappable.DecRef(ctx)
+ kcov.mappable = nil
+ }
return nil
}
// Clear resets the mode and clears the owning task and memory mapping for kcov.
// It is called when the fd corresponding to kcov is closed. Note that the mode
// needs to be set so that the next call to kcov.TaskWork() will exit early.
-func (kcov *Kcov) Clear() {
+func (kcov *Kcov) Clear(ctx context.Context) {
kcov.mu.Lock()
- kcov.clearLocked()
- kcov.mu.Unlock()
-}
-
-func (kcov *Kcov) clearLocked() {
kcov.mode = linux.KCOV_MODE_INIT
kcov.owningTask = nil
- kcov.mappable = nil
+ if kcov.mappable != nil {
+ kcov.mappable.DecRef(ctx)
+ kcov.mappable = nil
+ }
+ kcov.mu.Unlock()
}
// OnTaskExit is called when the owning task exits. It is similar to
@@ -254,6 +256,7 @@ func (kcov *Kcov) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) erro
// will look different under /proc/[pid]/maps than they do on Linux.
kcov.mappable = mm.NewSpecialMappable(fmt.Sprintf("[kcov:%d]", t.ThreadID()), kcov.mfp, fr)
}
+ kcov.mappable.IncRef()
opts.Mappable = kcov.mappable
opts.MappingIdentity = kcov.mappable
return nil
diff --git a/pkg/sentry/kernel/kcov_unsafe.go b/pkg/sentry/kernel/kcov_unsafe.go
index 6f64022eb..6f8a0266b 100644
--- a/pkg/sentry/kernel/kcov_unsafe.go
+++ b/pkg/sentry/kernel/kcov_unsafe.go
@@ -20,9 +20,9 @@ import (
"gvisor.dev/gvisor/pkg/safemem"
)
-// countBlock provides a safemem.BlockSeq for k.count.
+// countBlock provides a safemem.BlockSeq for kcov.count.
//
// Like k.count, the block returned is protected by k.mu.
-func (k *Kcov) countBlock() safemem.BlockSeq {
- return safemem.BlockSeqOf(safemem.BlockFromSafePointer(unsafe.Pointer(&k.count), int(unsafe.Sizeof(k.count))))
+func (kcov *Kcov) countBlock() safemem.BlockSeq {
+ return safemem.BlockSeqOf(safemem.BlockFromSafePointer(unsafe.Pointer(&kcov.count), int(unsafe.Sizeof(kcov.count))))
}
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 652cbb732..0eb2bf7bd 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -39,6 +39,7 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/eventchannel"
@@ -340,7 +341,7 @@ func (k *Kernel) Init(args InitKernelArgs) error {
return fmt.Errorf("Timekeeper is nil")
}
if args.Timekeeper.clocks == nil {
- return fmt.Errorf("Must call Timekeeper.SetClocks() before Kernel.Init()")
+ return fmt.Errorf("must call Timekeeper.SetClocks() before Kernel.Init()")
}
if args.RootUserNamespace == nil {
return fmt.Errorf("RootUserNamespace is nil")
@@ -365,7 +366,7 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.useHostCores = true
maxCPU, err := hostcpu.MaxPossibleCPU()
if err != nil {
- return fmt.Errorf("Failed to get maximum CPU number: %v", err)
+ return fmt.Errorf("failed to get maximum CPU number: %v", err)
}
minAppCores := uint(maxCPU) + 1
if k.applicationCores < minAppCores {
@@ -966,6 +967,10 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
}
tg := k.NewThreadGroup(mntns, args.PIDNamespace, NewSignalHandlers(), linux.SIGCHLD, args.Limits)
+ cu := cleanup.Make(func() {
+ tg.Release(ctx)
+ })
+ defer cu.Clean()
// Check which file to start from.
switch {
@@ -1025,13 +1030,14 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
MountNamespaceVFS2: mntnsVFS2,
ContainerID: args.ContainerID,
}
- t, err := k.tasks.NewTask(config)
+ t, err := k.tasks.NewTask(ctx, config)
if err != nil {
return nil, 0, err
}
t.traceExecEvent(tc) // Simulate exec for tracing.
// Success.
+ cu.Release()
tgid := k.tasks.Root.IDOfThreadGroup(tg)
if k.globalInit == nil {
k.globalInit = tg
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index f61039f5b..1a152142b 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -237,8 +237,7 @@ func (fd *VFSPipeFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal
// PipeSize implements fcntl(F_GETPIPE_SZ).
func (fd *VFSPipeFD) PipeSize() int64 {
- // Inline Pipe.FifoSize() rather than calling it with nil Context and
- // fs.File and ignoring the returned error (which is always nil).
+ // Inline Pipe.FifoSize() since we don't have a fs.File.
fd.pipe.mu.Lock()
defer fd.pipe.mu.Unlock()
return fd.pipe.max
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index 7a053f369..682080c14 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -206,6 +207,10 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
} else {
ipcns.IncRef()
}
+ cu := cleanup.Make(func() {
+ ipcns.DecRef(t)
+ })
+ defer cu.Clean()
netns := t.NetworkNamespace()
if opts.NewNetworkNamespace {
@@ -216,13 +221,18 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
mntnsVFS2 := t.mountNamespaceVFS2
if mntnsVFS2 != nil {
mntnsVFS2.IncRef()
+ cu.Add(func() {
+ mntnsVFS2.DecRef(t)
+ })
}
tc, err := t.tc.Fork(t, t.k, !opts.NewAddressSpace)
if err != nil {
- ipcns.DecRef(t)
return 0, nil, err
}
+ cu.Add(func() {
+ tc.release()
+ })
// clone() returns 0 in the child.
tc.Arch.SetReturn(0)
if opts.Stack != 0 {
@@ -230,7 +240,6 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
}
if opts.SetTLS {
if !tc.Arch.SetTLS(uintptr(opts.TLS)) {
- ipcns.DecRef(t)
return 0, nil, syserror.EPERM
}
}
@@ -299,11 +308,11 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
} else {
cfg.InheritParent = t
}
- nt, err := t.tg.pidns.owner.NewTask(cfg)
+ nt, err := t.tg.pidns.owner.NewTask(t, cfg)
+ // If NewTask succeeds, we transfer references to nt. If NewTask fails, it does
+ // the cleanup for us.
+ cu.Release()
if err != nil {
- if opts.NewThreadGroup {
- tg.release(t)
- }
return 0, nil, err
}
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index 239551eb6..ce7b9641d 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -286,7 +286,7 @@ func (*runExitMain) execute(t *Task) taskRunState {
// If this is the last task to exit from the thread group, release the
// thread group's resources.
if lastExiter {
- t.tg.release(t)
+ t.tg.Release(t)
}
// Detach tracees.
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 6e2ff573a..8e28230cc 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -16,6 +16,7 @@ package kernel
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -98,15 +99,18 @@ type TaskConfig struct {
// NewTask creates a new task defined by cfg.
//
// NewTask does not start the returned task; the caller must call Task.Start.
-func (ts *TaskSet) NewTask(cfg *TaskConfig) (*Task, error) {
+//
+// If successful, NewTask transfers references held by cfg to the new task.
+// Otherwise, NewTask releases them.
+func (ts *TaskSet) NewTask(ctx context.Context, cfg *TaskConfig) (*Task, error) {
t, err := ts.newTask(cfg)
if err != nil {
cfg.TaskContext.release()
- cfg.FSContext.DecRef(t)
- cfg.FDTable.DecRef(t)
- cfg.IPCNamespace.DecRef(t)
+ cfg.FSContext.DecRef(ctx)
+ cfg.FDTable.DecRef(ctx)
+ cfg.IPCNamespace.DecRef(ctx)
if cfg.MountNamespaceVFS2 != nil {
- cfg.MountNamespaceVFS2.DecRef(t)
+ cfg.MountNamespaceVFS2.DecRef(ctx)
}
return nil, err
}
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index 0b34c0099..a183b28c1 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -18,6 +18,7 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -307,8 +308,8 @@ func (tg *ThreadGroup) Limits() *limits.LimitSet {
return tg.limits
}
-// release releases the thread group's resources.
-func (tg *ThreadGroup) release(t *Task) {
+// Release releases the thread group's resources.
+func (tg *ThreadGroup) Release(ctx context.Context) {
// Timers must be destroyed without holding the TaskSet or signal mutexes
// since timers send signals with Timer.mu locked.
tg.itimerRealTimer.Destroy()
@@ -325,7 +326,7 @@ func (tg *ThreadGroup) release(t *Task) {
it.DestroyTimer()
}
if tg.mounts != nil {
- tg.mounts.DecRef(t)
+ tg.mounts.DecRef(ctx)
}
}