summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/kernel/ptrace.go242
-rw-r--r--pkg/sentry/kernel/sessions.go2
-rw-r--r--pkg/sentry/kernel/task.go42
-rw-r--r--pkg/sentry/kernel/task_exit.go26
-rw-r--r--pkg/sentry/kernel/task_signals.go220
-rw-r--r--pkg/sentry/kernel/thread_group.go47
6 files changed, 368 insertions, 211 deletions
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index fa7a0d141..e8043bf8a 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -193,6 +193,10 @@ type ptraceStop struct {
// If frozen is true, the stopped task's tracer is currently operating on
// it, so Task.Kill should not remove the stop.
frozen bool
+
+ // If listen is true, the stopped task's tracer invoked PTRACE_LISTEN, so
+ // ptraceFreeze should fail.
+ listen bool
}
// Killable implements TaskStop.Killable.
@@ -216,11 +220,11 @@ func (t *Task) beginPtraceStopLocked() bool {
// is what prevents tasks from entering ptrace-stops after being killed.
// Note that if t was SIGKILLed and beingPtraceStopLocked is being called
// for PTRACE_EVENT_EXIT, the task will have dequeued the signal before
- // entering the exit path, so t.killable() will no longer return true. This
- // is consistent with Linux: "Bugs: ... A SIGKILL signal may still cause a
- // PTRACE_EVENT_EXIT stop before actual signal death. This may be changed
- // in the future; SIGKILL is meant to always immediately kill tasks even
- // under ptrace. Last confirmed on Linux 3.13." - ptrace(2)
+ // entering the exit path, so t.killedLocked() will no longer return true.
+ // This is consistent with Linux: "Bugs: ... A SIGKILL signal may still
+ // cause a PTRACE_EVENT_EXIT stop before actual signal death. This may be
+ // changed in the future; SIGKILL is meant to always immediately kill tasks
+ // even under ptrace. Last confirmed on Linux 3.13." - ptrace(2)
if t.killedLocked() {
return false
}
@@ -230,6 +234,10 @@ func (t *Task) beginPtraceStopLocked() bool {
// Preconditions: The TaskSet mutex must be locked.
func (t *Task) ptraceTrapLocked(code int32) {
+ // This is unconditional in ptrace_stop().
+ t.tg.signalHandlers.mu.Lock()
+ t.trapStopPending = false
+ t.tg.signalHandlers.mu.Unlock()
t.ptraceCode = code
t.ptraceSiginfo = &arch.SignalInfo{
Signo: int32(linux.SIGTRAP),
@@ -260,6 +268,9 @@ func (t *Task) ptraceFreeze() bool {
if !ok {
return false
}
+ if s.listen {
+ return false
+ }
s.frozen = true
return true
}
@@ -273,6 +284,12 @@ func (t *Task) ptraceUnfreeze() {
// preventing its thread group from completing execve.
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
+ t.ptraceUnfreezeLocked()
+}
+
+// Preconditions: t must be in a frozen ptraceStop. t's signal mutex must be
+// locked.
+func (t *Task) ptraceUnfreezeLocked() {
// Do this even if the task has been killed to ensure a panic if t.stop is
// nil or not a ptraceStop.
t.stop.(*ptraceStop).frozen = false
@@ -336,8 +353,9 @@ func (t *Task) ptraceTraceme() error {
return nil
}
-// ptraceAttach implements ptrace(PTRACE_ATTACH, target). t is the caller.
-func (t *Task) ptraceAttach(target *Task) error {
+// ptraceAttach implements ptrace(PTRACE_ATTACH, target) if seize is false, and
+// ptrace(PTRACE_SEIZE, target, 0, opts) if seize is true. t is the caller.
+func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
if t.tg == target.tg {
return syserror.EPERM
}
@@ -355,19 +373,31 @@ func (t *Task) ptraceAttach(target *Task) error {
if target.exitState >= TaskExitZombie {
return syserror.EPERM
}
+ if seize {
+ if err := t.ptraceSetOptionsLocked(opts); err != nil {
+ return syserror.EIO
+ }
+ }
target.ptraceTracer.Store(t)
t.ptraceTracees[target] = struct{}{}
+ target.ptraceSeized = seize
target.tg.signalHandlers.mu.Lock()
- target.sendSignalLocked(&arch.SignalInfo{
- Signo: int32(linux.SIGSTOP),
- Code: arch.SignalInfoUser,
- }, false /* group */)
+ // "Unlike PTRACE_ATTACH, PTRACE_SEIZE does not stop the process." -
+ // ptrace(2)
+ if !seize {
+ target.sendSignalLocked(&arch.SignalInfo{
+ Signo: int32(linux.SIGSTOP),
+ Code: arch.SignalInfoUser,
+ }, false /* group */)
+ }
// Undocumented Linux feature: If the tracee is already group-stopped (and
// consequently will not report the SIGSTOP just sent), force it to leave
// and re-enter the stop so that it will switch to a ptrace-stop.
if target.stop == (*groupStop)(nil) {
- target.groupStopRequired = true
+ target.trapStopPending = true
target.endInternalStopLocked()
+ // TODO: Linux blocks ptrace_attach() until the task has
+ // entered the ptrace-stop (or exited) via JOBCTL_TRAPPING.
}
target.tg.signalHandlers.mu.Unlock()
return nil
@@ -418,6 +448,7 @@ func (t *Task) exitPtrace() {
//
// Preconditions: The TaskSet mutex must be locked for writing.
func (t *Task) forgetTracerLocked() {
+ t.ptraceSeized = false
t.ptraceOpts = ptraceOptions{}
t.ptraceSyscallMode = ptraceSyscallNone
t.ptraceSinglestep = false
@@ -426,21 +457,25 @@ func (t *Task) forgetTracerLocked() {
t.exitTracerAcked = true
t.exitNotifyLocked(true)
}
- // If t is ptrace-stopped, but its thread group is in a group stop and t is
- // eligible to participate, make it do so. This is essentially the reverse
- // of the special case in ptraceAttach, which converts a group stop to a
- // ptrace stop. ("Handling of restart from group-stop is currently buggy,
- // but the "as planned" behavior is to leave tracee stopped and waiting for
- // SIGCONT." - ptrace(2))
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
- if t.stop == nil {
- return
+ // Unset t.trapStopPending, which might have been set by PTRACE_INTERRUPT. If
+ // it wasn't, it will be reset via t.groupStopPending after the following.
+ t.trapStopPending = false
+ // If t's thread group is in a group stop and t is eligible to participate,
+ // make it do so. This is essentially the reverse of the special case in
+ // ptraceAttach, which converts a group stop to a ptrace stop. ("Handling
+ // of restart from group-stop is currently buggy, but the "as planned"
+ // behavior is to leave tracee stopped and waiting for SIGCONT." -
+ // ptrace(2))
+ if (t.tg.groupStopComplete || t.tg.groupStopPendingCount != 0) && !t.groupStopPending && t.exitState < TaskExitInitiated {
+ t.groupStopPending = true
+ // t already participated in the group stop when it unset
+ // groupStopPending.
+ t.groupStopAcknowledged = true
+ t.interrupt()
}
if _, ok := t.stop.(*ptraceStop); ok {
- if t.exitState < TaskExitInitiated && t.tg.groupStopPhase >= groupStopInitiated {
- t.groupStopRequired = true
- }
t.endInternalStopLocked()
}
}
@@ -460,9 +495,9 @@ func (t *Task) ptraceSignalLocked(info *arch.SignalInfo) bool {
// The tracer might change this signal into a stop signal, in which case
// any SIGCONT received after the signal was originally dequeued should
// cancel it. This is consistent with Linux.
- if t.tg.groupStopPhase == groupStopNone {
- t.tg.groupStopPhase = groupStopDequeued
- }
+ t.tg.groupStopDequeued = true
+ // This is unconditional in ptrace_stop().
+ t.trapStopPending = false
// Can't lock the TaskSet mutex while holding a signal mutex.
t.tg.signalHandlers.mu.Unlock()
defer t.tg.signalHandlers.mu.Lock()
@@ -612,22 +647,27 @@ func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions
if tracer != nil {
child.ptraceTracer.Store(tracer)
tracer.ptraceTracees[child] = struct{}{}
+ // "The "seized" behavior ... is inherited by children that are
+ // automatically attached using PTRACE_O_TRACEFORK,
+ // PTRACE_O_TRACEVFORK, and PTRACE_O_TRACECLONE." - ptrace(2)
+ child.ptraceSeized = t.ptraceSeized
// "Flags are inherited by new tracees created and "auto-attached"
// via active PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, or
- // PTRACE_O_TRACECLONE options."
+ // PTRACE_O_TRACECLONE options." - ptrace(2)
child.ptraceOpts = t.ptraceOpts
child.tg.signalHandlers.mu.Lock()
- // If the child is PT_SEIZED (currently not possible in the sentry
- // because PTRACE_SEIZE is unimplemented, but for future
- // reference), Linux just sets JOBCTL_TRAP_STOP instead, so the
- // child skips signal-delivery-stop and goes directly to
- // group-stop.
- //
- // The child will self-t.interrupt() when its task goroutine starts
+ // "PTRACE_SEIZE: ... Automatically attached children stop with
+ // PTRACE_EVENT_STOP and WSTOPSIG(status) returns SIGTRAP instead
+ // of having SIGSTOP signal delivered to them." - ptrace(2)
+ if child.ptraceSeized {
+ child.trapStopPending = true
+ } else {
+ child.pendingSignals.enqueue(&arch.SignalInfo{
+ Signo: int32(linux.SIGSTOP),
+ }, nil)
+ }
+ // The child will self-interrupt() when its task goroutine starts
// running, so we don't have to.
- child.pendingSignals.enqueue(&arch.SignalInfo{
- Signo: int32(linux.SIGSTOP),
- }, nil)
child.tg.signalHandlers.mu.Unlock()
}
}
@@ -681,6 +721,9 @@ func (t *Task) ptraceExec(oldTID ThreadID) {
// Employing PTRACE_GETSIGINFO for this signal returns si_code set to 0
// (SI_USER). This signal may be blocked by signal mask, and thus may be
// delivered (much) later." - ptrace(2)
+ if t.ptraceSeized {
+ return
+ }
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
t.sendSignalLocked(&arch.SignalInfo{
@@ -749,6 +792,57 @@ func (t *Task) ptraceKill(target *Task) error {
return nil
}
+func (t *Task) ptraceInterrupt(target *Task) error {
+ t.tg.pidns.owner.mu.Lock()
+ defer t.tg.pidns.owner.mu.Unlock()
+ if target.Tracer() != t {
+ return syserror.ESRCH
+ }
+ if !target.ptraceSeized {
+ return syserror.EIO
+ }
+ target.tg.signalHandlers.mu.Lock()
+ defer target.tg.signalHandlers.mu.Unlock()
+ if target.killedLocked() || target.exitState >= TaskExitInitiated {
+ return nil
+ }
+ target.trapStopPending = true
+ if s, ok := target.stop.(*ptraceStop); ok && s.listen {
+ target.endInternalStopLocked()
+ }
+ target.interrupt()
+ return nil
+}
+
+// Preconditions: The TaskSet mutex must be locked for writing. t must have a
+// tracer.
+func (t *Task) ptraceSetOptionsLocked(opts uintptr) error {
+ const valid = uintptr(linux.PTRACE_O_EXITKILL |
+ linux.PTRACE_O_TRACESYSGOOD |
+ linux.PTRACE_O_TRACECLONE |
+ linux.PTRACE_O_TRACEEXEC |
+ linux.PTRACE_O_TRACEEXIT |
+ linux.PTRACE_O_TRACEFORK |
+ linux.PTRACE_O_TRACESECCOMP |
+ linux.PTRACE_O_TRACEVFORK |
+ linux.PTRACE_O_TRACEVFORKDONE)
+ if opts&^valid != 0 {
+ return syserror.EINVAL
+ }
+ t.ptraceOpts = ptraceOptions{
+ ExitKill: opts&linux.PTRACE_O_EXITKILL != 0,
+ SysGood: opts&linux.PTRACE_O_TRACESYSGOOD != 0,
+ TraceClone: opts&linux.PTRACE_O_TRACECLONE != 0,
+ TraceExec: opts&linux.PTRACE_O_TRACEEXEC != 0,
+ TraceExit: opts&linux.PTRACE_O_TRACEEXIT != 0,
+ TraceFork: opts&linux.PTRACE_O_TRACEFORK != 0,
+ TraceSeccomp: opts&linux.PTRACE_O_TRACESECCOMP != 0,
+ TraceVfork: opts&linux.PTRACE_O_TRACEVFORK != 0,
+ TraceVforkDone: opts&linux.PTRACE_O_TRACEVFORKDONE != 0,
+ }
+ return nil
+}
+
// Ptrace implements the ptrace system call.
func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
// PTRACE_TRACEME ignores all other arguments.
@@ -762,16 +856,23 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
return syserror.ESRCH
}
- // PTRACE_ATTACH (and PTRACE_SEIZE, which is unimplemented) do not require
- // that target is not already a tracee.
- if req == linux.PTRACE_ATTACH {
- return t.ptraceAttach(target)
+ // PTRACE_ATTACH and PTRACE_SEIZE do not require that target is not already
+ // a tracee.
+ if req == linux.PTRACE_ATTACH || req == linux.PTRACE_SEIZE {
+ seize := req == linux.PTRACE_SEIZE
+ if seize && addr != 0 {
+ return syserror.EIO
+ }
+ return t.ptraceAttach(target, seize, uintptr(data))
}
- // PTRACE_KILL (and PTRACE_INTERRUPT, which is unimplemented) require that
- // the target is a tracee, but does not require that it is ptrace-stopped.
+ // PTRACE_KILL and PTRACE_INTERRUPT require that the target is a tracee,
+ // but does not require that it is ptrace-stopped.
if req == linux.PTRACE_KILL {
return t.ptraceKill(target)
}
+ if req == linux.PTRACE_INTERRUPT {
+ return t.ptraceInterrupt(target)
+ }
// All other ptrace requests require that the target is a ptrace-stopped
// tracee, and freeze the ptrace-stop so the tracee can be operated on.
t.tg.pidns.owner.mu.RLock()
@@ -801,6 +902,8 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
t.UninterruptibleSleepFinish(true)
// Resuming commands end the ptrace stop, but only if successful.
+ // PTRACE_LISTEN ends the ptrace stop if trapNotifyPending is already set on the
+ // target.
switch req {
case linux.PTRACE_DETACH:
if err := t.ptraceDetach(target, linux.Signal(data)); err != nil {
@@ -808,37 +911,65 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
return err
}
return nil
+
case linux.PTRACE_CONT:
if err := target.ptraceUnstop(ptraceSyscallNone, false, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
+
case linux.PTRACE_SYSCALL:
if err := target.ptraceUnstop(ptraceSyscallIntercept, false, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
+
case linux.PTRACE_SINGLESTEP:
if err := target.ptraceUnstop(ptraceSyscallNone, true, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
+
case linux.PTRACE_SYSEMU:
if err := target.ptraceUnstop(ptraceSyscallEmu, false, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
+
case linux.PTRACE_SYSEMU_SINGLESTEP:
if err := target.ptraceUnstop(ptraceSyscallEmu, true, linux.Signal(data)); err != nil {
target.ptraceUnfreeze()
return err
}
return nil
+
+ case linux.PTRACE_LISTEN:
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
+ if !target.ptraceSeized {
+ return syserror.EIO
+ }
+ if target.ptraceSiginfo == nil {
+ return syserror.EIO
+ }
+ if target.ptraceSiginfo.Code>>8 != linux.PTRACE_EVENT_STOP {
+ return syserror.EIO
+ }
+ target.tg.signalHandlers.mu.Lock()
+ defer target.tg.signalHandlers.mu.Unlock()
+ if target.trapNotifyPending {
+ target.endInternalStopLocked()
+ } else {
+ target.stop.(*ptraceStop).listen = true
+ target.ptraceUnfreezeLocked()
+ }
+ return nil
}
+
// All other ptrace requests expect us to unfreeze the stop.
defer target.ptraceUnfreeze()
@@ -958,30 +1089,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
case linux.PTRACE_SETOPTIONS:
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
- validOpts := uintptr(linux.PTRACE_O_EXITKILL |
- linux.PTRACE_O_TRACESYSGOOD |
- linux.PTRACE_O_TRACECLONE |
- linux.PTRACE_O_TRACEEXEC |
- linux.PTRACE_O_TRACEEXIT |
- linux.PTRACE_O_TRACEFORK |
- linux.PTRACE_O_TRACESECCOMP |
- linux.PTRACE_O_TRACEVFORK |
- linux.PTRACE_O_TRACEVFORKDONE)
- if uintptr(data)&^validOpts != 0 {
- return syserror.EINVAL
- }
- target.ptraceOpts = ptraceOptions{
- ExitKill: data&linux.PTRACE_O_EXITKILL != 0,
- SysGood: data&linux.PTRACE_O_TRACESYSGOOD != 0,
- TraceClone: data&linux.PTRACE_O_TRACECLONE != 0,
- TraceExec: data&linux.PTRACE_O_TRACEEXEC != 0,
- TraceExit: data&linux.PTRACE_O_TRACEEXIT != 0,
- TraceFork: data&linux.PTRACE_O_TRACEFORK != 0,
- TraceSeccomp: data&linux.PTRACE_O_TRACESECCOMP != 0,
- TraceVfork: data&linux.PTRACE_O_TRACEVFORK != 0,
- TraceVforkDone: data&linux.PTRACE_O_TRACEVFORKDONE != 0,
- }
- return nil
+ return target.ptraceSetOptionsLocked(uintptr(data))
case linux.PTRACE_GETEVENTMSG:
t.tg.pidns.owner.mu.RLock()
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index 6fd65f2b0..ae6daac60 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -204,7 +204,7 @@ func (pg *ProcessGroup) handleOrphan() {
return
}
tg.signalHandlers.mu.Lock()
- if tg.groupStopPhase == groupStopComplete {
+ if tg.groupStopComplete {
hasStopped = true
}
tg.signalHandlers.mu.Unlock()
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index e9f133c0b..f958aba26 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -133,28 +133,42 @@ type Task struct {
// signalStack is exclusive to the task goroutine.
signalStack arch.SignalStack
- // If groupStopRequired is true, the task should enter a group stop in the
- // interrupt path. groupStopRequired is not redundant with
- // tg.groupStopPhase != groupStopNone, because ptrace allows tracers to
- // resume individual tasks from a group stop without ending the group stop
- // as a whole.
+ // If groupStopPending is true, the task should participate in a group
+ // stop in the interrupt path.
//
- // groupStopRequired is analogous to JOBCTL_TRAP_STOP in Linux, except that
- // Linux only uses that flag for ptraced tasks.
+ // groupStopPending is analogous to JOBCTL_STOP_PENDING in Linux.
//
- // groupStopRequired is protected by the signal mutex.
- groupStopRequired bool
+ // groupStopPending is protected by the signal mutex.
+ groupStopPending bool
// If groupStopAcknowledged is true, the task has already acknowledged that
// it is entering the most recent group stop that has been initiated on its
- // thread group. groupStopAcknowledged is only meaningful if
- // tg.groupStopPhase == groupStopInitiated.
+ // thread group.
//
// groupStopAcknowledged is analogous to !JOBCTL_STOP_CONSUME in Linux.
//
// groupStopAcknowledged is protected by the signal mutex.
groupStopAcknowledged bool
+ // If trapStopPending is true, the task goroutine should enter a
+ // PTRACE_INTERRUPT-induced stop from the interrupt path.
+ //
+ // trapStopPending is analogous to JOBCTL_TRAP_STOP in Linux, except that
+ // Linux also sets JOBCTL_TRAP_STOP when a ptraced task detects
+ // JOBCTL_STOP_PENDING.
+ //
+ // trapStopPending is protected by the signal mutex.
+ trapStopPending bool
+
+ // If trapNotifyPending is true, this task is PTRACE_SEIZEd, and a group
+ // stop has begun or ended since the last time the task entered a
+ // ptrace-stop from the group-stop path.
+ //
+ // trapNotifyPending is analogous to JOBCTL_TRAP_NOTIFY in Linux.
+ //
+ // trapNotifyPending is protected by the signal mutex.
+ trapNotifyPending bool
+
// If stop is not nil, it is the internally-initiated condition that
// currently prevents the task goroutine from running.
//
@@ -296,6 +310,12 @@ type Task struct {
// ptraceTracees is protected by the TaskSet mutex.
ptraceTracees map[*Task]struct{}
+ // ptraceSeized is true if ptraceTracer attached to this task with
+ // PTRACE_SEIZE.
+ //
+ // ptraceSeized is protected by the TaskSet mutex.
+ ptraceSeized bool
+
// ptraceOpts contains ptrace options explicitly set by the tracer. If
// ptraceTracer is nil, ptraceOpts is expected to be the zero value.
//
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index 791cc9831..b9c558ccb 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -122,7 +122,6 @@ func (t *Task) killLocked() {
if t.stop != nil && t.stop.Killable() {
t.endInternalStopLocked()
}
- t.groupStopRequired = false
t.pendingSignals.enqueue(&arch.SignalInfo{
Signo: int32(linux.SIGKILL),
// Linux just sets SIGKILL in the pending signal bitmask without
@@ -304,33 +303,16 @@ func (t *Task) exitThreadGroup() bool {
t.setSignalMaskLocked(^linux.SignalSet(0))
// Check if this task's exit interacts with an initiated group stop.
- if t.tg.groupStopPhase != groupStopInitiated {
+ if !t.groupStopPending {
t.tg.signalHandlers.mu.Unlock()
return last
}
- if t.groupStopAcknowledged {
- // Un-acknowledge the group stop.
- t.tg.groupStopCount--
- t.groupStopAcknowledged = false
- // If the group stop wasn't complete before, then there is still at
- // least one other task that hasn't acknowledged the group stop, so
- // it is still not complete now.
- t.tg.signalHandlers.mu.Unlock()
- return last
- }
- if t.tg.groupStopCount != t.tg.activeTasks {
- t.tg.signalHandlers.mu.Unlock()
- return last
- }
- t.Debugf("Completing group stop")
- t.tg.groupStopPhase = groupStopComplete
- t.tg.groupStopWaitable = true
+ t.groupStopPending = false
sig := t.tg.groupStopSignal
- t.tg.groupContNotify = false
- t.tg.groupContWaitable = false
+ notifyParent := t.participateGroupStopLocked()
// signalStop must be called with t's signal mutex unlocked.
t.tg.signalHandlers.mu.Unlock()
- if t.tg.leader.parent != nil {
+ if notifyParent && t.tg.leader.parent != nil {
t.tg.leader.parent.signalStop(t, arch.CLD_STOPPED, int32(sig))
t.tg.leader.parent.tg.eventQueue.Notify(EventChildGroupStop)
}
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index 583acddb1..6a204aa59 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -748,48 +748,21 @@ type groupStop struct{}
// Killable implements TaskStop.Killable.
func (*groupStop) Killable() bool { return true }
-type groupStopPhase int
-
-const (
- // groupStopNone indicates that a thread group is not in, or attempting to
- // enter or leave, a group stop.
- groupStopNone groupStopPhase = iota
-
- // groupStopDequeued indicates that at least one task in a thread group has
- // dequeued a stop signal (or dequeued any signal and entered a
- // signal-delivery-stop as a result, which allows ptrace to change the
- // signal into a stop signal), but temporarily dropped the signal mutex
- // without initiating the group stop.
- //
- // groupStopDequeued is analogous to JOBCTL_STOP_DEQUEUED in Linux.
- groupStopDequeued
-
- // groupStopInitiated indicates that a task in a thread group has initiated
- // a group stop, but not all tasks in the thread group have acknowledged
- // entering the group stop.
- //
- // groupStopInitiated is represented by JOBCTL_STOP_PENDING &&
- // !SIGNAL_STOP_STOPPED in Linux.
- groupStopInitiated
-
- // groupStopComplete indicates that all tasks in a thread group have
- // acknowledged entering the group stop, and the last one to do so has
- // notified the thread group's parent.
- //
- // groupStopComplete is represented by JOBCTL_STOP_PENDING &&
- // SIGNAL_STOP_STOPPED in Linux.
- groupStopComplete
-)
-
// initiateGroupStop attempts to initiate a group stop based on a
// previously-dequeued stop signal.
//
// Preconditions: The caller must be running on the task goroutine.
func (t *Task) initiateGroupStop(info *arch.SignalInfo) {
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
t.tg.signalHandlers.mu.Lock()
defer t.tg.signalHandlers.mu.Unlock()
- if t.tg.groupStopPhase != groupStopDequeued {
- t.Debugf("Signal %d: not stopping thread group: lost to racing signal", info.Signo)
+ if t.groupStopPending {
+ t.Debugf("Signal %d: not stopping thread group: lost to racing stop signal", info.Signo)
+ return
+ }
+ if !t.tg.groupStopDequeued {
+ t.Debugf("Signal %d: not stopping thread group: lost to racing SIGCONT", info.Signo)
return
}
if t.tg.exiting {
@@ -800,15 +773,27 @@ func (t *Task) initiateGroupStop(info *arch.SignalInfo) {
t.Debugf("Signal %d: not stopping thread group: lost to racing execve", info.Signo)
return
}
- t.Debugf("Signal %d: stopping thread group", info.Signo)
- t.tg.groupStopPhase = groupStopInitiated
- t.tg.groupStopSignal = linux.Signal(info.Signo)
- t.tg.groupStopCount = 0
+ if !t.tg.groupStopComplete {
+ t.tg.groupStopSignal = linux.Signal(info.Signo)
+ }
+ t.tg.groupStopPendingCount = 0
for t2 := t.tg.tasks.Front(); t2 != nil; t2 = t2.Next() {
- t2.groupStopRequired = true
+ if t2.killedLocked() || t2.exitState >= TaskExitInitiated {
+ t2.groupStopPending = false
+ continue
+ }
+ t2.groupStopPending = true
t2.groupStopAcknowledged = false
+ if t2.ptraceSeized {
+ t2.trapNotifyPending = true
+ if s, ok := t2.stop.(*ptraceStop); ok && s.listen {
+ t2.endInternalStopLocked()
+ }
+ }
t2.interrupt()
+ t.tg.groupStopPendingCount++
}
+ t.Debugf("Signal %d: stopping %d threads in thread group", info.Signo, t.tg.groupStopPendingCount)
}
// endGroupStopLocked ensures that all prior stop signals received by tg are
@@ -820,37 +805,77 @@ func (tg *ThreadGroup) endGroupStopLocked(broadcast bool) {
// Discard all previously-queued stop signals.
linux.ForEachSignal(StopSignals, tg.discardSpecificLocked)
- if tg.groupStopPhase != groupStopNone {
- tg.leader.Debugf("Ending group stop currently in phase %d", tg.groupStopPhase)
- if tg.groupStopPhase == groupStopInitiated || tg.groupStopPhase == groupStopComplete {
- tg.groupStopSignal = 0
- for t := tg.tasks.Front(); t != nil; t = t.Next() {
- if _, ok := t.stop.(*groupStop); ok {
- t.endInternalStopLocked()
- }
+ if tg.groupStopPendingCount == 0 && !tg.groupStopComplete {
+ return
+ }
+
+ completeStr := "incomplete"
+ if tg.groupStopComplete {
+ completeStr = "complete"
+ }
+ tg.leader.Debugf("Ending %s group stop with %d threads pending", completeStr, tg.groupStopPendingCount)
+ for t := tg.tasks.Front(); t != nil; t = t.Next() {
+ t.groupStopPending = false
+ if t.ptraceSeized {
+ t.trapNotifyPending = true
+ if s, ok := t.stop.(*ptraceStop); ok && s.listen {
+ t.endInternalStopLocked()
}
- if broadcast {
- // Instead of notifying the parent here, set groupContNotify so
- // that one of the continuing tasks does so. (Linux does
- // something similar.) The reason we do this is to keep locking
- // sane. In order to send a signal to the parent, we need to
- // lock its signal mutex, but we're already holding tg's signal
- // mutex, and the TaskSet mutex must be locked for writing for
- // us to hold two signal mutexes. Since we don't want to
- // require this for endGroupStopLocked (which is called from
- // signal-sending paths), nor do we want to lose atomicity by
- // releasing the mutexes we're already holding, just let the
- // continuing thread group deal with it.
- tg.groupContNotify = true
- tg.groupContInterrupted = tg.groupStopPhase == groupStopInitiated
- tg.groupContWaitable = true
+ } else {
+ if _, ok := t.stop.(*groupStop); ok {
+ t.endInternalStopLocked()
}
}
- // If groupStopPhase was groupStopDequeued, setting it to groupStopNone
- // will cause following calls to initiateGroupStop to recognize that
- // the group stop has been cancelled.
- tg.groupStopPhase = groupStopNone
}
+ if broadcast {
+ // Instead of notifying the parent here, set groupContNotify so that
+ // one of the continuing tasks does so. (Linux does something similar.)
+ // The reason we do this is to keep locking sane. In order to send a
+ // signal to the parent, we need to lock its signal mutex, but we're
+ // already holding tg's signal mutex, and the TaskSet mutex must be
+ // locked for writing for us to hold two signal mutexes. Since we don't
+ // want to require this for endGroupStopLocked (which is called from
+ // signal-sending paths), nor do we want to lose atomicity by releasing
+ // the mutexes we're already holding, just let the continuing thread
+ // group deal with it.
+ tg.groupContNotify = true
+ tg.groupContInterrupted = !tg.groupStopComplete
+ tg.groupContWaitable = true
+ }
+ // Unsetting groupStopDequeued will cause racing calls to initiateGroupStop
+ // to recognize that the group stop has been cancelled.
+ tg.groupStopDequeued = false
+ tg.groupStopSignal = 0
+ tg.groupStopPendingCount = 0
+ tg.groupStopComplete = false
+ tg.groupStopWaitable = false
+}
+
+// participateGroupStopLocked is called to handle thread group side effects
+// after t unsets t.groupStopPending. The caller must handle task side effects
+// (e.g. placing the task goroutine into the group stop). It returns true if
+// the caller must notify t.tg.leader's parent of a completed group stop (which
+// participateGroupStopLocked cannot do due to holding the wrong locks).
+//
+// Preconditions: The signal mutex must be locked.
+func (t *Task) participateGroupStopLocked() bool {
+ if t.groupStopAcknowledged {
+ return false
+ }
+ t.groupStopAcknowledged = true
+ t.tg.groupStopPendingCount--
+ if t.tg.groupStopPendingCount != 0 {
+ return false
+ }
+ if t.tg.groupStopComplete {
+ return false
+ }
+ t.Debugf("Completing group stop")
+ t.tg.groupStopComplete = true
+ t.tg.groupStopWaitable = true
+ t.tg.groupContNotify = false
+ t.tg.groupContWaitable = false
+ return true
}
// signalStop sends a signal to t's thread group of a new group stop, group
@@ -899,7 +924,7 @@ func (*runInterrupt) execute(t *Task) taskRunState {
// leader's) tracer are in the same thread group, deduplicate
// notifications.
notifyParent := t.tg.leader.parent != nil
- if tracer := t.tg.leader.ptraceTracer.Load().(*Task); tracer != nil {
+ if tracer := t.tg.leader.Tracer(); tracer != nil {
if notifyParent && tracer.tg == t.tg.leader.parent.tg {
notifyParent = false
}
@@ -938,23 +963,21 @@ func (*runInterrupt) execute(t *Task) taskRunState {
return (*runInterrupt)(nil)
}
- // Do we need to enter a group stop?
- if t.groupStopRequired {
- t.groupStopRequired = false
+ // Do we need to enter a group stop or related ptrace stop? This path is
+ // analogous to Linux's kernel/signal.c:get_signal() => do_signal_stop()
+ // (with ptrace enabled) and do_jobctl_trap().
+ if t.groupStopPending || t.trapStopPending || t.trapNotifyPending {
sig := t.tg.groupStopSignal
notifyParent := false
- if !t.groupStopAcknowledged {
- t.groupStopAcknowledged = true
- t.tg.groupStopCount++
- if t.tg.groupStopCount == t.tg.activeTasks {
- t.Debugf("Completing group stop")
- notifyParent = true
- t.tg.groupStopPhase = groupStopComplete
- t.tg.groupStopWaitable = true
- t.tg.groupContNotify = false
- t.tg.groupContWaitable = false
- }
+ if t.groupStopPending {
+ t.groupStopPending = false
+ // We care about t.tg.groupStopSignal (for tracer notification)
+ // even if this doesn't complete a group stop, so keep the
+ // value of sig we've already read.
+ notifyParent = t.participateGroupStopLocked()
}
+ t.trapStopPending = false
+ t.trapNotifyPending = false
// Drop the signal mutex so we can take the TaskSet mutex.
t.tg.signalHandlers.mu.Unlock()
@@ -963,8 +986,26 @@ func (*runInterrupt) execute(t *Task) taskRunState {
notifyParent = false
}
if tracer := t.Tracer(); tracer != nil {
- t.ptraceCode = int32(sig)
- t.ptraceSiginfo = nil
+ if t.ptraceSeized {
+ if sig == 0 {
+ sig = linux.SIGTRAP
+ }
+ // "If tracee was attached using PTRACE_SEIZE, group-stop is
+ // indicated by PTRACE_EVENT_STOP: status>>16 ==
+ // PTRACE_EVENT_STOP. This allows detection of group-stops
+ // without requiring an extra PTRACE_GETSIGINFO call." -
+ // "Group-stop", ptrace(2)
+ t.ptraceCode = int32(sig) | linux.PTRACE_EVENT_STOP<<8
+ t.ptraceSiginfo = &arch.SignalInfo{
+ Signo: int32(sig),
+ Code: t.ptraceCode,
+ }
+ t.ptraceSiginfo.SetPid(int32(t.tg.pidns.tids[t]))
+ t.ptraceSiginfo.SetUid(int32(t.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()))
+ } else {
+ t.ptraceCode = int32(sig)
+ t.ptraceSiginfo = nil
+ }
if t.beginPtraceStopLocked() {
tracer.signalStop(t, arch.CLD_STOPPED, int32(sig))
// For consistency with Linux, if the parent and tracer are in the
@@ -994,12 +1035,11 @@ func (*runInterrupt) execute(t *Task) taskRunState {
// Are there signals pending?
if info := t.dequeueSignalLocked(t.signalMask); info != nil {
- if linux.SignalSetOf(linux.Signal(info.Signo))&StopSignals != 0 && t.tg.groupStopPhase == groupStopNone {
- // Indicate that we've dequeued a stop signal before
- // unlocking the signal mutex; initiateGroupStop will check
- // that the phase hasn't changed (or is at least another
- // "stop signal dequeued" phase) after relocking it.
- t.tg.groupStopPhase = groupStopDequeued
+ if linux.SignalSetOf(linux.Signal(info.Signo))&StopSignals != 0 {
+ // Indicate that we've dequeued a stop signal before unlocking the
+ // signal mutex; initiateGroupStop will check for races with
+ // endGroupStopLocked after relocking it.
+ t.tg.groupStopDequeued = true
}
if t.ptraceSignalLocked(info) {
// Dequeueing the signal action must wait until after the
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index d7652f57c..1b7b74319 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -60,25 +60,35 @@ type ThreadGroup struct {
// pendingSignals is protected by the signal mutex.
pendingSignals pendingSignals
- // groupStopPhase indicates the state of a group stop in progress on the
- // thread group, if any.
+ // If groupStopDequeued is true, a task in the thread group has dequeued a
+ // stop signal, but has not yet initiated the group stop.
//
- // groupStopPhase is protected by the signal mutex.
- groupStopPhase groupStopPhase
+ // groupStopDequeued is analogous to Linux's JOBCTL_STOP_DEQUEUED.
+ //
+ // groupStopDequeued is protected by the signal mutex.
+ groupStopDequeued bool
// groupStopSignal is the signal that caused a group stop to be initiated.
- // groupStopSignal is only meaningful if groupStopPhase is
- // groupStopInitiated or groupStopComplete.
//
// groupStopSignal is protected by the signal mutex.
groupStopSignal linux.Signal
- // groupStopCount is the number of non-exited tasks in the thread group
- // that have acknowledged an initiated group stop. groupStopCount is only
- // meaningful if groupStopPhase is groupStopInitiated.
+ // groupStopPendingCount is the number of active tasks in the thread group
+ // for which Task.groupStopPending is set.
+ //
+ // groupStopPendingCount is analogous to Linux's
+ // signal_struct::group_stop_count.
//
- // groupStopCount is protected by the signal mutex.
- groupStopCount int
+ // groupStopPendingCount is protected by the signal mutex.
+ groupStopPendingCount int
+
+ // If groupStopComplete is true, groupStopPendingCount transitioned from
+ // non-zero to zero without an intervening SIGCONT.
+ //
+ // groupStopComplete is analogous to Linux's SIGNAL_STOP_STOPPED.
+ //
+ // groupStopComplete is protected by the signal mutex.
+ groupStopComplete bool
// If groupStopWaitable is true, the thread group is indicating a waitable
// group stop event (as defined by EventChildGroupStop).
@@ -91,14 +101,9 @@ type ThreadGroup struct {
// If groupContNotify is true, then a SIGCONT has recently ended a group
// stop on this thread group, and the first task to observe it should
- // notify its parent.
- //
- // groupContNotify is protected by the signal mutex.
- groupContNotify bool
-
- // If groupContNotify is true, groupContInterrupted is true iff SIGCONT
- // ended a group stop in phase groupStopInitiated. If groupContNotify is
- // false, groupContInterrupted is meaningless.
+ // notify its parent. groupContInterrupted is true iff SIGCONT ended an
+ // incomplete group stop. If groupContNotify is false, groupContInterrupted is
+ // meaningless.
//
// Analogues in Linux:
//
@@ -110,7 +115,9 @@ type ThreadGroup struct {
//
// - !groupContNotify is represented by neither flag being set.
//
- // groupContInterrupted is protected by the signal mutex.
+ // groupContNotify and groupContInterrupted are protected by the signal
+ // mutex.
+ groupContNotify bool
groupContInterrupted bool
// If groupContWaitable is true, the thread group is indicating a waitable