summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r--pkg/sentry/kernel/BUILD2
-rw-r--r--pkg/sentry/kernel/aio.go50
-rw-r--r--pkg/sentry/kernel/context.go17
-rw-r--r--pkg/sentry/kernel/kernel.go17
-rw-r--r--pkg/sentry/kernel/seccomp.go2
-rw-r--r--pkg/sentry/kernel/syscalls_state.go10
-rw-r--r--pkg/sentry/kernel/syslog.go6
-rw-r--r--pkg/sentry/kernel/task.go85
-rw-r--r--pkg/sentry/kernel/task_acct.go4
-rw-r--r--pkg/sentry/kernel/task_block.go44
-rw-r--r--pkg/sentry/kernel/task_clone.go14
-rw-r--r--pkg/sentry/kernel/task_context.go272
-rw-r--r--pkg/sentry/kernel/task_exec.go21
-rw-r--r--pkg/sentry/kernel/task_exit.go2
-rw-r--r--pkg/sentry/kernel/task_futex.go2
-rw-r--r--pkg/sentry/kernel/task_image.go173
-rw-r--r--pkg/sentry/kernel/task_log.go7
-rw-r--r--pkg/sentry/kernel/task_run.go22
-rw-r--r--pkg/sentry/kernel/task_sched.go12
-rw-r--r--pkg/sentry/kernel/task_signals.go12
-rw-r--r--pkg/sentry/kernel/task_start.go12
21 files changed, 459 insertions, 327 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 90dd4a047..0ee60569c 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -184,6 +184,7 @@ go_library(
"task_exit.go",
"task_futex.go",
"task_identity.go",
+ "task_image.go",
"task_list.go",
"task_log.go",
"task_net.go",
@@ -224,6 +225,7 @@ go_library(
"//pkg/cpuid",
"//pkg/eventchannel",
"//pkg/fspath",
+ "//pkg/goid",
"//pkg/log",
"//pkg/marshal",
"//pkg/marshal/primitive",
diff --git a/pkg/sentry/kernel/aio.go b/pkg/sentry/kernel/aio.go
index 0ac78c0b8..ec36d1a49 100644
--- a/pkg/sentry/kernel/aio.go
+++ b/pkg/sentry/kernel/aio.go
@@ -15,10 +15,7 @@
package kernel
import (
- "time"
-
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
)
// AIOCallback is an function that does asynchronous I/O on behalf of a task.
@@ -26,7 +23,7 @@ type AIOCallback func(context.Context)
// QueueAIO queues an AIOCallback which will be run asynchronously.
func (t *Task) QueueAIO(cb AIOCallback) {
- ctx := taskAsyncContext{t: t}
+ ctx := t.AsyncContext()
wg := &t.TaskSet().aioGoroutines
wg.Add(1)
go func() {
@@ -34,48 +31,3 @@ func (t *Task) QueueAIO(cb AIOCallback) {
wg.Done()
}()
}
-
-type taskAsyncContext struct {
- context.NoopSleeper
- t *Task
-}
-
-// Debugf implements log.Logger.Debugf.
-func (ctx taskAsyncContext) Debugf(format string, v ...interface{}) {
- ctx.t.Debugf(format, v...)
-}
-
-// Infof implements log.Logger.Infof.
-func (ctx taskAsyncContext) Infof(format string, v ...interface{}) {
- ctx.t.Infof(format, v...)
-}
-
-// Warningf implements log.Logger.Warningf.
-func (ctx taskAsyncContext) Warningf(format string, v ...interface{}) {
- ctx.t.Warningf(format, v...)
-}
-
-// IsLogging implements log.Logger.IsLogging.
-func (ctx taskAsyncContext) IsLogging(level log.Level) bool {
- return ctx.t.IsLogging(level)
-}
-
-// Deadline implements context.Context.Deadline.
-func (ctx taskAsyncContext) Deadline() (time.Time, bool) {
- return ctx.t.Deadline()
-}
-
-// Done implements context.Context.Done.
-func (ctx taskAsyncContext) Done() <-chan struct{} {
- return ctx.t.Done()
-}
-
-// Err implements context.Context.Err.
-func (ctx taskAsyncContext) Err() error {
- return ctx.t.Err()
-}
-
-// Value implements context.Context.Value.
-func (ctx taskAsyncContext) Value(key interface{}) interface{} {
- return ctx.t.Value(key)
-}
diff --git a/pkg/sentry/kernel/context.go b/pkg/sentry/kernel/context.go
index bb94769c4..a8596410f 100644
--- a/pkg/sentry/kernel/context.go
+++ b/pkg/sentry/kernel/context.go
@@ -15,8 +15,6 @@
package kernel
import (
- "time"
-
"gvisor.dev/gvisor/pkg/context"
)
@@ -98,18 +96,3 @@ func TaskFromContext(ctx context.Context) *Task {
}
return nil
}
-
-// Deadline implements context.Context.Deadline.
-func (*Task) Deadline() (time.Time, bool) {
- return time.Time{}, false
-}
-
-// Done implements context.Context.Done.
-func (*Task) Done() <-chan struct{} {
- return nil
-}
-
-// Err implements context.Context.Err.
-func (*Task) Err() error {
- return nil
-}
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 9b2be44d4..2cdcdfc1f 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -632,7 +632,7 @@ func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error {
defer k.tasks.mu.RUnlock()
for t := range k.tasks.Root.tids {
// We can skip locking Task.mu here since the kernel is paused.
- if mm := t.tc.MemoryManager; mm != nil {
+ if mm := t.image.MemoryManager; mm != nil {
if _, ok := invalidated[mm]; !ok {
if err := mm.InvalidateUnsavable(ctx); err != nil {
return err
@@ -642,7 +642,7 @@ func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error {
}
// I really wish we just had a sync.Map of all MMs...
if r, ok := t.runState.(*runSyscallAfterExecStop); ok {
- if err := r.tc.MemoryManager.InvalidateUnsavable(ctx); err != nil {
+ if err := r.image.MemoryManager.InvalidateUnsavable(ctx); err != nil {
return err
}
}
@@ -1017,7 +1017,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
Features: k.featureSet,
}
- tc, se := k.LoadTaskImage(ctx, loadArgs)
+ image, se := k.LoadTaskImage(ctx, loadArgs)
if se != nil {
return nil, 0, errors.New(se.String())
}
@@ -1030,7 +1030,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
config := &TaskConfig{
Kernel: k,
ThreadGroup: tg,
- TaskContext: tc,
+ TaskImage: image,
FSContext: fsContext,
FDTable: args.FDTable,
Credentials: args.Credentials,
@@ -1046,7 +1046,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
if err != nil {
return nil, 0, err
}
- t.traceExecEvent(tc) // Simulate exec for tracing.
+ t.traceExecEvent(image) // Simulate exec for tracing.
// Success.
cu.Release()
@@ -1359,6 +1359,13 @@ func (k *Kernel) SendContainerSignal(cid string, info *arch.SignalInfo) error {
// not have meaningful trace data. Rebuilding here ensures that we can do so
// after tracing has been enabled.
func (k *Kernel) RebuildTraceContexts() {
+ // We need to pause all task goroutines because Task.rebuildTraceContext()
+ // replaces Task.traceContext and Task.traceTask, which are
+ // task-goroutine-exclusive (i.e. the task goroutine assumes that it can
+ // access them without synchronization) for performance.
+ k.Pause()
+ defer k.Unpause()
+
k.extMu.Lock()
defer k.extMu.Unlock()
k.tasks.mu.RLock()
diff --git a/pkg/sentry/kernel/seccomp.go b/pkg/sentry/kernel/seccomp.go
index 387edfa91..60917e7d3 100644
--- a/pkg/sentry/kernel/seccomp.go
+++ b/pkg/sentry/kernel/seccomp.go
@@ -106,7 +106,7 @@ func (t *Task) checkSeccompSyscall(sysno int32, args arch.SyscallArguments, ip u
func (t *Task) evaluateSyscallFilters(sysno int32, args arch.SyscallArguments, ip usermem.Addr) uint32 {
data := linux.SeccompData{
Nr: sysno,
- Arch: t.tc.st.AuditNumber,
+ Arch: t.image.st.AuditNumber,
InstructionPointer: uint64(ip),
}
// data.args is []uint64 and args is []arch.SyscallArgument (uintptr), so
diff --git a/pkg/sentry/kernel/syscalls_state.go b/pkg/sentry/kernel/syscalls_state.go
index 90f890495..0b17a562e 100644
--- a/pkg/sentry/kernel/syscalls_state.go
+++ b/pkg/sentry/kernel/syscalls_state.go
@@ -30,18 +30,18 @@ type syscallTableInfo struct {
}
// saveSt saves the SyscallTable.
-func (tc *TaskContext) saveSt() syscallTableInfo {
+func (image *TaskImage) saveSt() syscallTableInfo {
return syscallTableInfo{
- OS: tc.st.OS,
- Arch: tc.st.Arch,
+ OS: image.st.OS,
+ Arch: image.st.Arch,
}
}
// loadSt loads the SyscallTable.
-func (tc *TaskContext) loadSt(sti syscallTableInfo) {
+func (image *TaskImage) loadSt(sti syscallTableInfo) {
st, ok := LookupSyscallTable(sti.OS, sti.Arch)
if !ok {
panic(fmt.Sprintf("syscall table not found for OS %v, Arch %v", sti.OS, sti.Arch))
}
- tc.st = st // Save the table reference.
+ image.st = st // Save the table reference.
}
diff --git a/pkg/sentry/kernel/syslog.go b/pkg/sentry/kernel/syslog.go
index a83ce219c..3fee7aa68 100644
--- a/pkg/sentry/kernel/syslog.go
+++ b/pkg/sentry/kernel/syslog.go
@@ -75,6 +75,12 @@ func (s *syslog) Log() []byte {
"Checking naughty and nice process list...", // Check it up to twice.
"Granting licence to kill(2)...", // British spelling for British movie.
"Letting the watchdogs out...",
+ "Conjuring /dev/null black hole...",
+ "Adversarially training Redcode AI...",
+ "Singleplexing /dev/ptmx...",
+ "Recruiting cron-ies...",
+ "Verifying that no non-zero bytes made their way into /dev/zero...",
+ "Accelerating teletypewriter to 9600 baud...",
}
selectMessage := func() string {
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index 037971393..c0ab53c94 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
- "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/inet"
@@ -29,11 +28,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
"gvisor.dev/gvisor/pkg/sentry/kernel/sched"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/sentry/limits"
- "gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/unimpl"
- "gvisor.dev/gvisor/pkg/sentry/uniqueid"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
@@ -63,6 +58,12 @@ import (
type Task struct {
taskNode
+ // goid is the task goroutine's ID. goid is owned by the task goroutine,
+ // but since it's used to detect cases where non-task goroutines
+ // incorrectly access state owned by, or exclusive to, the task goroutine,
+ // goid is always accessed using atomic memory operations.
+ goid int64 `state:"nosave"`
+
// runState is what the task goroutine is executing if it is not stopped.
// If runState is nil, the task goroutine should exit or has exited.
// runState is exclusive to the task goroutine.
@@ -83,7 +84,7 @@ type Task struct {
// taskWork is exclusive to the task goroutine.
taskWork []TaskWorker
- // haveSyscallReturn is true if tc.Arch().Return() represents a value
+ // haveSyscallReturn is true if image.Arch().Return() represents a value
// returned by a syscall (or set by ptrace after a syscall).
//
// haveSyscallReturn is exclusive to the task goroutine.
@@ -257,10 +258,10 @@ type Task struct {
// mu protects some of the following fields.
mu sync.Mutex `state:"nosave"`
- // tc holds task data provided by the ELF loader.
+ // image holds task data provided by the ELF loader.
//
- // tc is protected by mu, and is owned by the task goroutine.
- tc TaskContext
+ // image is protected by mu, and is owned by the task goroutine.
+ image TaskImage
// fsContext is the task's filesystem context.
//
@@ -274,7 +275,7 @@ type Task struct {
// If vforkParent is not nil, it is the task that created this task with
// vfork() or clone(CLONE_VFORK), and should have its vforkStop ended when
- // this TaskContext is released.
+ // this TaskImage is released.
//
// vforkParent is protected by the TaskSet mutex.
vforkParent *Task
@@ -641,64 +642,6 @@ func (t *Task) Kernel() *Kernel {
return t.k
}
-// Value implements context.Context.Value.
-//
-// Preconditions: The caller must be running on the task goroutine (as implied
-// by the requirements of context.Context).
-func (t *Task) Value(key interface{}) interface{} {
- switch key {
- case CtxCanTrace:
- return t.CanTrace
- case CtxKernel:
- return t.k
- case CtxPIDNamespace:
- return t.tg.pidns
- case CtxUTSNamespace:
- return t.utsns
- case CtxIPCNamespace:
- ipcns := t.IPCNamespace()
- ipcns.IncRef()
- return ipcns
- case CtxTask:
- return t
- case auth.CtxCredentials:
- return t.Credentials()
- case context.CtxThreadGroupID:
- return int32(t.ThreadGroup().ID())
- case fs.CtxRoot:
- return t.fsContext.RootDirectory()
- case vfs.CtxRoot:
- return t.fsContext.RootDirectoryVFS2()
- case vfs.CtxMountNamespace:
- t.mountNamespaceVFS2.IncRef()
- return t.mountNamespaceVFS2
- case fs.CtxDirentCacheLimiter:
- return t.k.DirentCacheLimiter
- case inet.CtxStack:
- return t.NetworkContext()
- case ktime.CtxRealtimeClock:
- return t.k.RealtimeClock()
- case limits.CtxLimits:
- return t.tg.limits
- case pgalloc.CtxMemoryFile:
- return t.k.mf
- case pgalloc.CtxMemoryFileProvider:
- return t.k
- case platform.CtxPlatform:
- return t.k
- case uniqueid.CtxGlobalUniqueID:
- return t.k.UniqueID()
- case uniqueid.CtxGlobalUniqueIDProvider:
- return t.k
- case uniqueid.CtxInotifyCookie:
- return t.k.GenerateInotifyCookie()
- case unimpl.CtxEvents:
- return t.k
- default:
- return nil
- }
-}
-
// SetClearTID sets t's cleartid.
//
// Preconditions: The caller must be running on the task goroutine.
@@ -751,12 +694,12 @@ func (t *Task) IsChrooted() bool {
return root != realRoot
}
-// TaskContext returns t's TaskContext.
+// TaskImage returns t's TaskImage.
//
// Precondition: The caller must be running on the task goroutine, or t.mu must
// be locked.
-func (t *Task) TaskContext() *TaskContext {
- return &t.tc
+func (t *Task) TaskImage() *TaskImage {
+ return &t.image
}
// FSContext returns t's FSContext. FSContext does not take an additional
diff --git a/pkg/sentry/kernel/task_acct.go b/pkg/sentry/kernel/task_acct.go
index 5f3e60fe8..e574997f7 100644
--- a/pkg/sentry/kernel/task_acct.go
+++ b/pkg/sentry/kernel/task_acct.go
@@ -136,14 +136,14 @@ func (tg *ThreadGroup) IOUsage() *usage.IO {
func (t *Task) Name() string {
t.mu.Lock()
defer t.mu.Unlock()
- return t.tc.Name
+ return t.image.Name
}
// SetName changes t's name.
func (t *Task) SetName(name string) {
t.mu.Lock()
defer t.mu.Unlock()
- t.tc.Name = name
+ t.image.Name = name
t.Debugf("Set thread name to %q", name)
}
diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go
index 4a4a69ee2..9419f2e95 100644
--- a/pkg/sentry/kernel/task_block.go
+++ b/pkg/sentry/kernel/task_block.go
@@ -20,6 +20,7 @@ import (
"time"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -32,6 +33,8 @@ import (
//
// - An error which is nil if an event is received from C, ETIMEDOUT if the timeout
// expired, and syserror.ErrInterrupted if t is interrupted.
+//
+// Preconditions: The caller must be running on the task goroutine.
func (t *Task) BlockWithTimeout(C chan struct{}, haveTimeout bool, timeout time.Duration) (time.Duration, error) {
if !haveTimeout {
return timeout, t.block(C, nil)
@@ -112,7 +115,14 @@ func (t *Task) Block(C <-chan struct{}) error {
// block blocks a task on one of many events.
// N.B. defer is too expensive to be used here.
+//
+// Preconditions: The caller must be running on the task goroutine.
func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error {
+ // This function is very hot; skip this check outside of +race builds.
+ if sync.RaceEnabled {
+ t.assertTaskGoroutine()
+ }
+
// Fast path if the request is already done.
select {
case <-C:
@@ -156,33 +166,39 @@ func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error {
}
}
-// SleepStart implements amutex.Sleeper.SleepStart.
+// SleepStart implements context.ChannelSleeper.SleepStart.
func (t *Task) SleepStart() <-chan struct{} {
+ t.assertTaskGoroutine()
t.Deactivate()
t.accountTaskGoroutineEnter(TaskGoroutineBlockedInterruptible)
return t.interruptChan
}
-// SleepFinish implements amutex.Sleeper.SleepFinish.
+// SleepFinish implements context.ChannelSleeper.SleepFinish.
func (t *Task) SleepFinish(success bool) {
if !success {
- // The interrupted notification is consumed only at the top-level
- // (Run). Therefore we attempt to reset the pending notification.
- // This will also elide our next entry back into the task, so we
- // will process signals, state changes, etc.
+ // Our caller received from t.interruptChan; we need to re-send to it
+ // to ensure that t.interrupted() is still true.
t.interruptSelf()
}
t.accountTaskGoroutineLeave(TaskGoroutineBlockedInterruptible)
t.Activate()
}
-// Interrupted implements amutex.Sleeper.Interrupted
+// Interrupted implements context.ChannelSleeper.Interrupted.
func (t *Task) Interrupted() bool {
- return len(t.interruptChan) != 0
+ if t.interrupted() {
+ return true
+ }
+ // Indicate that t's task goroutine is still responsive (i.e. reset the
+ // watchdog timer).
+ t.accountTaskGoroutineRunning()
+ return false
}
// UninterruptibleSleepStart implements context.Context.UninterruptibleSleepStart.
func (t *Task) UninterruptibleSleepStart(deactivate bool) {
+ t.assertTaskGoroutine()
if deactivate {
t.Deactivate()
}
@@ -198,13 +214,17 @@ func (t *Task) UninterruptibleSleepFinish(activate bool) {
}
// interrupted returns true if interrupt or interruptSelf has been called at
-// least once since the last call to interrupted.
+// least once since the last call to unsetInterrupted.
func (t *Task) interrupted() bool {
+ return len(t.interruptChan) != 0
+}
+
+// unsetInterrupted causes interrupted to return false until the next call to
+// interrupt or interruptSelf.
+func (t *Task) unsetInterrupted() {
select {
case <-t.interruptChan:
- return true
default:
- return false
}
}
@@ -220,9 +240,7 @@ func (t *Task) interrupt() {
func (t *Task) interruptSelf() {
select {
case t.interruptChan <- struct{}{}:
- t.Debugf("Interrupt queued")
default:
- t.Debugf("Dropping duplicate interrupt")
}
// platform.Context.Interrupt() is unnecessary since a task goroutine
// calling interruptSelf() cannot also be blocked in
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index 527344162..f305e69c0 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -115,7 +115,7 @@ type CloneOptions struct {
ParentTID usermem.Addr
// If Vfork is true, place the parent in vforkStop until the cloned task
- // releases its TaskContext.
+ // releases its TaskImage.
Vfork bool
// If Untraced is true, do not report PTRACE_EVENT_CLONE/FORK/VFORK for
@@ -226,20 +226,20 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
})
}
- tc, err := t.tc.Fork(t, t.k, !opts.NewAddressSpace)
+ image, err := t.image.Fork(t, t.k, !opts.NewAddressSpace)
if err != nil {
return 0, nil, err
}
cu.Add(func() {
- tc.release()
+ image.release()
})
// clone() returns 0 in the child.
- tc.Arch.SetReturn(0)
+ image.Arch.SetReturn(0)
if opts.Stack != 0 {
- tc.Arch.SetStack(uintptr(opts.Stack))
+ image.Arch.SetStack(uintptr(opts.Stack))
}
if opts.SetTLS {
- if !tc.Arch.SetTLS(uintptr(opts.TLS)) {
+ if !image.Arch.SetTLS(uintptr(opts.TLS)) {
return 0, nil, syserror.EPERM
}
}
@@ -288,7 +288,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
Kernel: t.k,
ThreadGroup: tg,
SignalMask: t.SignalMask(),
- TaskContext: tc,
+ TaskImage: image,
FSContext: fsContext,
FDTable: fdTable,
Credentials: creds,
diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go
index d1136461a..70b0699dc 100644
--- a/pkg/sentry/kernel/task_context.go
+++ b/pkg/sentry/kernel/task_context.go
@@ -1,4 +1,4 @@
-// Copyright 2018 The gVisor Authors.
+// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -15,159 +15,175 @@
package kernel
import (
- "fmt"
+ "time"
- "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
- "gvisor.dev/gvisor/pkg/sentry/loader"
- "gvisor.dev/gvisor/pkg/sentry/mm"
- "gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/platform"
+ "gvisor.dev/gvisor/pkg/sentry/unimpl"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
)
-var errNoSyscalls = syserr.New("no syscall table found", linux.ENOEXEC)
-
-// Auxmap contains miscellaneous data for the task.
-type Auxmap map[string]interface{}
-
-// TaskContext is the subset of a task's data that is provided by the loader.
-//
-// +stateify savable
-type TaskContext struct {
- // Name is the thread name set by the prctl(PR_SET_NAME) system call.
- Name string
-
- // Arch is the architecture-specific context (registers, etc.)
- Arch arch.Context
-
- // MemoryManager is the task's address space.
- MemoryManager *mm.MemoryManager
+// Deadline implements context.Context.Deadline.
+func (t *Task) Deadline() (time.Time, bool) {
+ return time.Time{}, false
+}
- // fu implements futexes in the address space.
- fu *futex.Manager
+// Done implements context.Context.Done.
+func (t *Task) Done() <-chan struct{} {
+ return nil
+}
- // st is the task's syscall table.
- st *SyscallTable `state:".(syscallTableInfo)"`
+// Err implements context.Context.Err.
+func (t *Task) Err() error {
+ return nil
}
-// release releases all resources held by the TaskContext. release is called by
-// the task when it execs into a new TaskContext or exits.
-func (tc *TaskContext) release() {
- // Nil out pointers so that if the task is saved after release, it doesn't
- // follow the pointers to possibly now-invalid objects.
- if tc.MemoryManager != nil {
- tc.MemoryManager.DecUsers(context.Background())
- tc.MemoryManager = nil
+// Value implements context.Context.Value.
+//
+// Preconditions: The caller must be running on the task goroutine.
+func (t *Task) Value(key interface{}) interface{} {
+ // This function is very hot; skip this check outside of +race builds.
+ if sync.RaceEnabled {
+ t.assertTaskGoroutine()
}
- tc.fu = nil
+ return t.contextValue(key, true /* isTaskGoroutine */)
}
-// Fork returns a duplicate of tc. The copied TaskContext always has an
-// independent arch.Context. If shareAddressSpace is true, the copied
-// TaskContext shares an address space with the original; otherwise, the copied
-// TaskContext has an independent address space that is initially a duplicate
-// of the original's.
-func (tc *TaskContext) Fork(ctx context.Context, k *Kernel, shareAddressSpace bool) (*TaskContext, error) {
- newTC := &TaskContext{
- Name: tc.Name,
- Arch: tc.Arch.Fork(),
- st: tc.st,
- }
- if shareAddressSpace {
- newTC.MemoryManager = tc.MemoryManager
- if newTC.MemoryManager != nil {
- if !newTC.MemoryManager.IncUsers() {
- // Shouldn't be possible since tc.MemoryManager should be a
- // counted user.
- panic(fmt.Sprintf("TaskContext.Fork called with userless TaskContext.MemoryManager"))
- }
+func (t *Task) contextValue(key interface{}, isTaskGoroutine bool) interface{} {
+ switch key {
+ case CtxCanTrace:
+ return t.CanTrace
+ case CtxKernel:
+ return t.k
+ case CtxPIDNamespace:
+ return t.tg.pidns
+ case CtxUTSNamespace:
+ if !isTaskGoroutine {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ }
+ return t.utsns
+ case CtxIPCNamespace:
+ if !isTaskGoroutine {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ }
+ ipcns := t.ipcns
+ ipcns.IncRef()
+ return ipcns
+ case CtxTask:
+ return t
+ case auth.CtxCredentials:
+ return t.creds.Load()
+ case context.CtxThreadGroupID:
+ return int32(t.tg.ID())
+ case fs.CtxRoot:
+ if !isTaskGoroutine {
+ t.mu.Lock()
+ defer t.mu.Unlock()
+ }
+ return t.fsContext.RootDirectory()
+ case vfs.CtxRoot:
+ if !isTaskGoroutine {
+ t.mu.Lock()
+ defer t.mu.Unlock()
}
- newTC.fu = tc.fu
- } else {
- newMM, err := tc.MemoryManager.Fork(ctx)
- if err != nil {
- return nil, err
+ return t.fsContext.RootDirectoryVFS2()
+ case vfs.CtxMountNamespace:
+ if !isTaskGoroutine {
+ t.mu.Lock()
+ defer t.mu.Unlock()
}
- newTC.MemoryManager = newMM
- newTC.fu = k.futexes.Fork()
+ t.mountNamespaceVFS2.IncRef()
+ return t.mountNamespaceVFS2
+ case fs.CtxDirentCacheLimiter:
+ return t.k.DirentCacheLimiter
+ case inet.CtxStack:
+ return t.NetworkContext()
+ case ktime.CtxRealtimeClock:
+ return t.k.RealtimeClock()
+ case limits.CtxLimits:
+ return t.tg.limits
+ case pgalloc.CtxMemoryFile:
+ return t.k.mf
+ case pgalloc.CtxMemoryFileProvider:
+ return t.k
+ case platform.CtxPlatform:
+ return t.k
+ case uniqueid.CtxGlobalUniqueID:
+ return t.k.UniqueID()
+ case uniqueid.CtxGlobalUniqueIDProvider:
+ return t.k
+ case uniqueid.CtxInotifyCookie:
+ return t.k.GenerateInotifyCookie()
+ case unimpl.CtxEvents:
+ return t.k
+ default:
+ return nil
}
- return newTC, nil
}
-// Arch returns t's arch.Context.
-//
-// Preconditions: The caller must be running on the task goroutine, or t.mu
-// must be locked.
-func (t *Task) Arch() arch.Context {
- return t.tc.Arch
+// taskAsyncContext implements context.Context for a goroutine that performs
+// work on behalf of a Task, but is not the task goroutine.
+type taskAsyncContext struct {
+ context.NoopSleeper
+
+ t *Task
}
-// MemoryManager returns t's MemoryManager. MemoryManager does not take an
-// additional reference on the returned MM.
-//
-// Preconditions: The caller must be running on the task goroutine, or t.mu
-// must be locked.
-func (t *Task) MemoryManager() *mm.MemoryManager {
- return t.tc.MemoryManager
+// AsyncContext returns a context.Context representing t. The returned
+// context.Context is intended for use by goroutines other than t's task
+// goroutine; for example, signal delivery to t will not interrupt goroutines
+// that are blocking using the returned context.Context.
+func (t *Task) AsyncContext() context.Context {
+ return taskAsyncContext{t: t}
}
-// SyscallTable returns t's syscall table.
-//
-// Preconditions: The caller must be running on the task goroutine, or t.mu
-// must be locked.
-func (t *Task) SyscallTable() *SyscallTable {
- return t.tc.st
+// Debugf implements log.Logger.Debugf.
+func (ctx taskAsyncContext) Debugf(format string, v ...interface{}) {
+ ctx.t.Debugf(format, v...)
}
-// Stack returns the userspace stack.
-//
-// Preconditions: The caller must be running on the task goroutine, or t.mu
-// must be locked.
-func (t *Task) Stack() *arch.Stack {
- return &arch.Stack{
- Arch: t.Arch(),
- IO: t.MemoryManager(),
- Bottom: usermem.Addr(t.Arch().Stack()),
- }
+// Infof implements log.Logger.Infof.
+func (ctx taskAsyncContext) Infof(format string, v ...interface{}) {
+ ctx.t.Infof(format, v...)
}
-// LoadTaskImage loads a specified file into a new TaskContext.
-//
-// args.MemoryManager does not need to be set by the caller.
-func (k *Kernel) LoadTaskImage(ctx context.Context, args loader.LoadArgs) (*TaskContext, *syserr.Error) {
- // If File is not nil, we should load that instead of resolving Filename.
- if args.File != nil {
- args.Filename = args.File.PathnameWithDeleted(ctx)
- }
+// Warningf implements log.Logger.Warningf.
+func (ctx taskAsyncContext) Warningf(format string, v ...interface{}) {
+ ctx.t.Warningf(format, v...)
+}
+
+// IsLogging implements log.Logger.IsLogging.
+func (ctx taskAsyncContext) IsLogging(level log.Level) bool {
+ return ctx.t.IsLogging(level)
+}
- // Prepare a new user address space to load into.
- m := mm.NewMemoryManager(k, k, k.SleepForAddressSpaceActivation)
- defer m.DecUsers(ctx)
- args.MemoryManager = m
+// Deadline implements context.Context.Deadline.
+func (ctx taskAsyncContext) Deadline() (time.Time, bool) {
+ return time.Time{}, false
+}
- os, ac, name, err := loader.Load(ctx, args, k.extraAuxv, k.vdso)
- if err != nil {
- return nil, err
- }
+// Done implements context.Context.Done.
+func (ctx taskAsyncContext) Done() <-chan struct{} {
+ return nil
+}
- // Lookup our new syscall table.
- st, ok := LookupSyscallTable(os, ac.Arch())
- if !ok {
- // No syscall table found. This means that the ELF binary does not match
- // the architecture.
- return nil, errNoSyscalls
- }
+// Err implements context.Context.Err.
+func (ctx taskAsyncContext) Err() error {
+ return nil
+}
- if !m.IncUsers() {
- panic("Failed to increment users count on new MM")
- }
- return &TaskContext{
- Name: name,
- Arch: ac,
- MemoryManager: m,
- fu: k.futexes.Fork(),
- st: st,
- }, nil
+// Value implements context.Context.Value.
+func (ctx taskAsyncContext) Value(key interface{}) interface{} {
+ return ctx.t.contextValue(key, false /* isTaskGoroutine */)
}
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index 412d471d3..d9897e802 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -83,11 +83,12 @@ type execStop struct{}
func (*execStop) Killable() bool { return true }
// Execve implements the execve(2) syscall by killing all other tasks in its
-// thread group and switching to newTC. Execve always takes ownership of newTC.
+// thread group and switching to newImage. Execve always takes ownership of
+// newImage.
//
// Preconditions: The caller must be running Task.doSyscallInvoke on the task
// goroutine.
-func (t *Task) Execve(newTC *TaskContext) (*SyscallControl, error) {
+func (t *Task) Execve(newImage *TaskImage) (*SyscallControl, error) {
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
t.tg.signalHandlers.mu.Lock()
@@ -96,7 +97,7 @@ func (t *Task) Execve(newTC *TaskContext) (*SyscallControl, error) {
if t.tg.exiting || t.tg.execing != nil {
// We lost to a racing group-exit, kill, or exec from another thread
// and should just exit.
- newTC.release()
+ newImage.release()
return nil, syserror.EINTR
}
@@ -118,7 +119,7 @@ func (t *Task) Execve(newTC *TaskContext) (*SyscallControl, error) {
t.beginInternalStopLocked((*execStop)(nil))
}
- return &SyscallControl{next: &runSyscallAfterExecStop{newTC}, ignoreReturn: true}, nil
+ return &SyscallControl{next: &runSyscallAfterExecStop{newImage}, ignoreReturn: true}, nil
}
// The runSyscallAfterExecStop state continues execve(2) after all siblings of
@@ -126,16 +127,16 @@ func (t *Task) Execve(newTC *TaskContext) (*SyscallControl, error) {
//
// +stateify savable
type runSyscallAfterExecStop struct {
- tc *TaskContext
+ image *TaskImage
}
func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
- t.traceExecEvent(r.tc)
+ t.traceExecEvent(r.image)
t.tg.pidns.owner.mu.Lock()
t.tg.execing = nil
if t.killed() {
t.tg.pidns.owner.mu.Unlock()
- r.tc.release()
+ r.image.release()
return (*runInterrupt)(nil)
}
// We are the thread group leader now. Save our old thread ID for
@@ -214,7 +215,7 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
// executables (set-user/group-ID bits and file capabilities). This
// allows us to unconditionally enable user dumpability on the new mm.
// See fs/exec.c:setup_new_exec.
- r.tc.MemoryManager.SetDumpability(mm.UserDumpable)
+ r.image.MemoryManager.SetDumpability(mm.UserDumpable)
// Switch to the new process.
t.MemoryManager().Deactivate()
@@ -222,8 +223,8 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
// Update credentials to reflect the execve. This should precede switching
// MMs to ensure that dumpability has been reset first, if needed.
t.updateCredsForExecLocked()
- t.tc.release()
- t.tc = *r.tc
+ t.image.release()
+ t.image = *r.image
t.mu.Unlock()
t.unstopVforkParent()
t.p.FullStateChanged()
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index ce7b9641d..c5137c282 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -266,7 +266,7 @@ func (*runExitMain) execute(t *Task) taskRunState {
t.updateRSSLocked()
t.tg.pidns.owner.mu.Unlock()
t.mu.Lock()
- t.tc.release()
+ t.image.release()
t.mu.Unlock()
// Releasing the MM unblocks a blocked CLONE_VFORK parent.
diff --git a/pkg/sentry/kernel/task_futex.go b/pkg/sentry/kernel/task_futex.go
index c80391475..195c7da9b 100644
--- a/pkg/sentry/kernel/task_futex.go
+++ b/pkg/sentry/kernel/task_futex.go
@@ -26,7 +26,7 @@ import (
// Preconditions: The caller must be running on the task goroutine, or t.mu
// must be locked.
func (t *Task) Futex() *futex.Manager {
- return t.tc.fu
+ return t.image.fu
}
// SwapUint32 implements futex.Target.SwapUint32.
diff --git a/pkg/sentry/kernel/task_image.go b/pkg/sentry/kernel/task_image.go
new file mode 100644
index 000000000..ce5fbd299
--- /dev/null
+++ b/pkg/sentry/kernel/task_image.go
@@ -0,0 +1,173 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/futex"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+var errNoSyscalls = syserr.New("no syscall table found", linux.ENOEXEC)
+
+// Auxmap contains miscellaneous data for the task.
+type Auxmap map[string]interface{}
+
+// TaskImage is the subset of a task's data that is provided by the loader.
+//
+// +stateify savable
+type TaskImage struct {
+ // Name is the thread name set by the prctl(PR_SET_NAME) system call.
+ Name string
+
+ // Arch is the architecture-specific context (registers, etc.)
+ Arch arch.Context
+
+ // MemoryManager is the task's address space.
+ MemoryManager *mm.MemoryManager
+
+ // fu implements futexes in the address space.
+ fu *futex.Manager
+
+ // st is the task's syscall table.
+ st *SyscallTable `state:".(syscallTableInfo)"`
+}
+
+// release releases all resources held by the TaskImage. release is called by
+// the task when it execs into a new TaskImage or exits.
+func (image *TaskImage) release() {
+ // Nil out pointers so that if the task is saved after release, it doesn't
+ // follow the pointers to possibly now-invalid objects.
+ if image.MemoryManager != nil {
+ image.MemoryManager.DecUsers(context.Background())
+ image.MemoryManager = nil
+ }
+ image.fu = nil
+}
+
+// Fork returns a duplicate of image. The copied TaskImage always has an
+// independent arch.Context. If shareAddressSpace is true, the copied
+// TaskImage shares an address space with the original; otherwise, the copied
+// TaskImage has an independent address space that is initially a duplicate
+// of the original's.
+func (image *TaskImage) Fork(ctx context.Context, k *Kernel, shareAddressSpace bool) (*TaskImage, error) {
+ newImage := &TaskImage{
+ Name: image.Name,
+ Arch: image.Arch.Fork(),
+ st: image.st,
+ }
+ if shareAddressSpace {
+ newImage.MemoryManager = image.MemoryManager
+ if newImage.MemoryManager != nil {
+ if !newImage.MemoryManager.IncUsers() {
+ // Shouldn't be possible since image.MemoryManager should be a
+ // counted user.
+ panic(fmt.Sprintf("TaskImage.Fork called with userless TaskImage.MemoryManager"))
+ }
+ }
+ newImage.fu = image.fu
+ } else {
+ newMM, err := image.MemoryManager.Fork(ctx)
+ if err != nil {
+ return nil, err
+ }
+ newImage.MemoryManager = newMM
+ newImage.fu = k.futexes.Fork()
+ }
+ return newImage, nil
+}
+
+// Arch returns t's arch.Context.
+//
+// Preconditions: The caller must be running on the task goroutine, or t.mu
+// must be locked.
+func (t *Task) Arch() arch.Context {
+ return t.image.Arch
+}
+
+// MemoryManager returns t's MemoryManager. MemoryManager does not take an
+// additional reference on the returned MM.
+//
+// Preconditions: The caller must be running on the task goroutine, or t.mu
+// must be locked.
+func (t *Task) MemoryManager() *mm.MemoryManager {
+ return t.image.MemoryManager
+}
+
+// SyscallTable returns t's syscall table.
+//
+// Preconditions: The caller must be running on the task goroutine, or t.mu
+// must be locked.
+func (t *Task) SyscallTable() *SyscallTable {
+ return t.image.st
+}
+
+// Stack returns the userspace stack.
+//
+// Preconditions: The caller must be running on the task goroutine, or t.mu
+// must be locked.
+func (t *Task) Stack() *arch.Stack {
+ return &arch.Stack{
+ Arch: t.Arch(),
+ IO: t.MemoryManager(),
+ Bottom: usermem.Addr(t.Arch().Stack()),
+ }
+}
+
+// LoadTaskImage loads a specified file into a new TaskImage.
+//
+// args.MemoryManager does not need to be set by the caller.
+func (k *Kernel) LoadTaskImage(ctx context.Context, args loader.LoadArgs) (*TaskImage, *syserr.Error) {
+ // If File is not nil, we should load that instead of resolving Filename.
+ if args.File != nil {
+ args.Filename = args.File.PathnameWithDeleted(ctx)
+ }
+
+ // Prepare a new user address space to load into.
+ m := mm.NewMemoryManager(k, k, k.SleepForAddressSpaceActivation)
+ defer m.DecUsers(ctx)
+ args.MemoryManager = m
+
+ os, ac, name, err := loader.Load(ctx, args, k.extraAuxv, k.vdso)
+ if err != nil {
+ return nil, err
+ }
+
+ // Lookup our new syscall table.
+ st, ok := LookupSyscallTable(os, ac.Arch())
+ if !ok {
+ // No syscall table found. This means that the ELF binary does not match
+ // the architecture.
+ return nil, errNoSyscalls
+ }
+
+ if !m.IncUsers() {
+ panic("Failed to increment users count on new MM")
+ }
+ return &TaskImage{
+ Name: name,
+ Arch: ac,
+ MemoryManager: m,
+ fu: k.futexes.Fork(),
+ st: st,
+ }, nil
+}
diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go
index d23cea802..c70e5e6ce 100644
--- a/pkg/sentry/kernel/task_log.go
+++ b/pkg/sentry/kernel/task_log.go
@@ -19,6 +19,7 @@ import (
"runtime/trace"
"sort"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -215,7 +216,7 @@ func (t *Task) rebuildTraceContext(tid ThreadID) {
// arbitrarily large (in general it won't be, especially for cases
// where we're collecting a brief profile), so using the TID is a
// reasonable compromise in this case.
- t.traceContext, t.traceTask = trace.NewTask(t, fmt.Sprintf("tid:%d", tid))
+ t.traceContext, t.traceTask = trace.NewTask(context.Background(), fmt.Sprintf("tid:%d", tid))
}
// traceCloneEvent is called when a new task is spawned.
@@ -237,11 +238,11 @@ func (t *Task) traceExitEvent() {
}
// traceExecEvent is called when a task calls exec.
-func (t *Task) traceExecEvent(tc *TaskContext) {
+func (t *Task) traceExecEvent(image *TaskImage) {
if !trace.IsEnabled() {
return
}
- file := tc.MemoryManager.Executable()
+ file := image.MemoryManager.Executable()
if file == nil {
trace.Logf(t.traceContext, traceCategory, "exec: << unknown >>")
return
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index 8dc3fec90..3ccecf4b6 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -16,11 +16,13 @@ package kernel
import (
"bytes"
+ "fmt"
"runtime"
"runtime/trace"
"sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/goid"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
@@ -57,6 +59,8 @@ type taskRunState interface {
// make it visible in stack dumps. A goroutine for a given task can be identified
// searching for Task.run()'s argument value.
func (t *Task) run(threadID uintptr) {
+ atomic.StoreInt64(&t.goid, goid.Get())
+
// Construct t.blockingTimer here. We do this here because we can't
// reconstruct t.blockingTimer during restore in Task.afterLoad(), because
// kernel.timekeeper.SetClocks() hasn't been called yet.
@@ -99,6 +103,9 @@ func (t *Task) run(threadID uintptr) {
t.tg.pidns.owner.runningGoroutines.Done()
t.p.Release()
+ // Deferring this store triggers a false positive in the race
+ // detector (https://github.com/golang/go/issues/42599).
+ atomic.StoreInt64(&t.goid, 0)
// Keep argument alive because stack trace for dead variables may not be correct.
runtime.KeepAlive(threadID)
return
@@ -317,7 +324,7 @@ func (app *runApp) execute(t *Task) taskRunState {
// region. We should be able to easily identify
// vsyscalls by having a <fault><syscall> pair.
if at.Execute {
- if sysno, ok := t.tc.st.LookupEmulate(addr); ok {
+ if sysno, ok := t.image.st.LookupEmulate(addr); ok {
return t.doVsyscall(addr, sysno)
}
}
@@ -375,6 +382,19 @@ func (app *runApp) execute(t *Task) taskRunState {
}
}
+// assertTaskGoroutine panics if the caller is not running on t's task
+// goroutine.
+func (t *Task) assertTaskGoroutine() {
+ if got, want := goid.Get(), atomic.LoadInt64(&t.goid); got != want {
+ panic(fmt.Sprintf("running on goroutine %d (task goroutine for kernel.Task %p is %d)", got, t, want))
+ }
+}
+
+// GoroutineID returns the ID of t's task goroutine.
+func (t *Task) GoroutineID() int64 {
+ return atomic.LoadInt64(&t.goid)
+}
+
// waitGoroutineStoppedOrExited blocks until t's task goroutine stops or exits.
func (t *Task) waitGoroutineStoppedOrExited() {
t.goroutineStopped.Wait()
diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go
index 52c55d13d..9ba5f8d78 100644
--- a/pkg/sentry/kernel/task_sched.go
+++ b/pkg/sentry/kernel/task_sched.go
@@ -157,6 +157,18 @@ func (t *Task) accountTaskGoroutineLeave(state TaskGoroutineState) {
t.goschedSeq.EndWrite()
}
+// Preconditions: The caller must be running on the task goroutine.
+func (t *Task) accountTaskGoroutineRunning() {
+ now := t.k.CPUClockNow()
+ if t.gosched.State != TaskGoroutineRunningSys {
+ panic(fmt.Sprintf("Task goroutine in state %v (expected %v)", t.gosched.State, TaskGoroutineRunningSys))
+ }
+ t.goschedSeq.BeginWrite()
+ t.gosched.SysTicks += now - t.gosched.Timestamp
+ t.gosched.Timestamp = now
+ t.goschedSeq.EndWrite()
+}
+
// TaskGoroutineSchedInfo returns a copy of t's task goroutine scheduling info.
// Most clients should use t.CPUStats() instead.
func (t *Task) TaskGoroutineSchedInfo() TaskGoroutineSchedInfo {
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index ebdb83061..42dd3e278 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -619,9 +619,6 @@ func (t *Task) setSignalMaskLocked(mask linux.SignalSet) {
return
}
})
- // We have to re-issue the interrupt consumed by t.interrupted() since
- // it might have been for a different reason.
- t.interruptSelf()
}
// Conversely, if the new mask unblocks any signals that were blocked by
@@ -931,10 +928,10 @@ func (t *Task) signalStop(target *Task, code int32, status int32) {
type runInterrupt struct{}
func (*runInterrupt) execute(t *Task) taskRunState {
- // Interrupts are de-duplicated (if t is interrupted twice before
- // t.interrupted() is called, t.interrupted() will only return true once),
- // so early exits from this function must re-enter the runInterrupt state
- // to check for more interrupt-signaled conditions.
+ // Interrupts are de-duplicated (t.unsetInterrupted() will undo the effect
+ // of all previous calls to t.interrupted() regardless of how many such
+ // calls there have been), so early exits from this function must re-enter
+ // the runInterrupt state to check for more interrupt-signaled conditions.
t.tg.signalHandlers.mu.Lock()
@@ -1080,6 +1077,7 @@ func (*runInterrupt) execute(t *Task) taskRunState {
return t.deliverSignal(info, act)
}
+ t.unsetInterrupted()
t.tg.signalHandlers.mu.Unlock()
return (*runApp)(nil)
}
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 8e28230cc..36e1384f1 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -46,10 +46,10 @@ type TaskConfig struct {
// SignalMask is the new task's initial signal mask.
SignalMask linux.SignalSet
- // TaskContext is the TaskContext of the new task. Ownership of the
- // TaskContext is transferred to TaskSet.NewTask, whether or not it
+ // TaskImage is the TaskImage of the new task. Ownership of the
+ // TaskImage is transferred to TaskSet.NewTask, whether or not it
// succeeds.
- TaskContext *TaskContext
+ TaskImage *TaskImage
// FSContext is the FSContext of the new task. A reference must be held on
// FSContext, which is transferred to TaskSet.NewTask whether or not it
@@ -105,7 +105,7 @@ type TaskConfig struct {
func (ts *TaskSet) NewTask(ctx context.Context, cfg *TaskConfig) (*Task, error) {
t, err := ts.newTask(cfg)
if err != nil {
- cfg.TaskContext.release()
+ cfg.TaskImage.release()
cfg.FSContext.DecRef(ctx)
cfg.FDTable.DecRef(ctx)
cfg.IPCNamespace.DecRef(ctx)
@@ -121,7 +121,7 @@ func (ts *TaskSet) NewTask(ctx context.Context, cfg *TaskConfig) (*Task, error)
// of cfg if it succeeds.
func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
tg := cfg.ThreadGroup
- tc := cfg.TaskContext
+ image := cfg.TaskImage
t := &Task{
taskNode: taskNode{
tg: tg,
@@ -132,7 +132,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
interruptChan: make(chan struct{}, 1),
signalMask: cfg.SignalMask,
signalStack: arch.SignalStack{Flags: arch.SignalStackFlagDisable},
- tc: *tc,
+ image: *image,
fsContext: cfg.FSContext,
fdTable: cfg.FDTable,
p: cfg.Kernel.Platform.NewContext(),