summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel/task_signals.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel/task_signals.go')
-rw-r--r--pkg/sentry/kernel/task_signals.go1110
1 files changed, 1110 insertions, 0 deletions
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
new file mode 100644
index 000000000..654cf7525
--- /dev/null
+++ b/pkg/sentry/kernel/task_signals.go
@@ -0,0 +1,1110 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+// This file defines the behavior of task signal handling.
+
+import (
+ "fmt"
+ "sync/atomic"
+ "time"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/eventchannel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ ucspb "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// SignalAction is an internal signal action.
+type SignalAction int
+
+// Available signal actions.
+// Note that although we refer the complete set internally,
+// the application is only capable of using the Default and
+// Ignore actions from the system call interface.
+const (
+ SignalActionTerm SignalAction = iota
+ SignalActionCore
+ SignalActionStop
+ SignalActionIgnore
+ SignalActionHandler
+)
+
+// Default signal handler actions. Note that for most signals,
+// (except SIGKILL and SIGSTOP) these can be overridden by the app.
+var defaultActions = map[linux.Signal]SignalAction{
+ // POSIX.1-1990 standard.
+ linux.SIGHUP: SignalActionTerm,
+ linux.SIGINT: SignalActionTerm,
+ linux.SIGQUIT: SignalActionCore,
+ linux.SIGILL: SignalActionCore,
+ linux.SIGABRT: SignalActionCore,
+ linux.SIGFPE: SignalActionCore,
+ linux.SIGKILL: SignalActionTerm, // but see ThreadGroup.applySignalSideEffects
+ linux.SIGSEGV: SignalActionCore,
+ linux.SIGPIPE: SignalActionTerm,
+ linux.SIGALRM: SignalActionTerm,
+ linux.SIGTERM: SignalActionTerm,
+ linux.SIGUSR1: SignalActionTerm,
+ linux.SIGUSR2: SignalActionTerm,
+ linux.SIGCHLD: SignalActionIgnore,
+ linux.SIGCONT: SignalActionIgnore, // but see ThreadGroup.applySignalSideEffects
+ linux.SIGSTOP: SignalActionStop,
+ linux.SIGTSTP: SignalActionStop,
+ linux.SIGTTIN: SignalActionStop,
+ linux.SIGTTOU: SignalActionStop,
+ // POSIX.1-2001 standard.
+ linux.SIGBUS: SignalActionCore,
+ linux.SIGPROF: SignalActionTerm,
+ linux.SIGSYS: SignalActionCore,
+ linux.SIGTRAP: SignalActionCore,
+ linux.SIGURG: SignalActionIgnore,
+ linux.SIGVTALRM: SignalActionTerm,
+ linux.SIGXCPU: SignalActionCore,
+ linux.SIGXFSZ: SignalActionCore,
+ // The rest on linux.
+ linux.SIGSTKFLT: SignalActionTerm,
+ linux.SIGIO: SignalActionTerm,
+ linux.SIGPWR: SignalActionTerm,
+ linux.SIGWINCH: SignalActionIgnore,
+}
+
+// computeAction figures out what to do given a signal number
+// and an arch.SignalAct. SIGSTOP always results in a SignalActionStop,
+// and SIGKILL always results in a SignalActionTerm.
+// Signal 0 is always ignored as many programs use it for various internal functions
+// and don't expect it to do anything.
+//
+// In the event the signal is not one of these, act.Handler determines what
+// happens next.
+// If act.Handler is:
+// 0, the default action is taken;
+// 1, the signal is ignored;
+// anything else, the function returns SignalActionHandler.
+func computeAction(sig linux.Signal, act arch.SignalAct) SignalAction {
+ switch sig {
+ case linux.SIGSTOP:
+ return SignalActionStop
+ case linux.SIGKILL:
+ return SignalActionTerm
+ case linux.Signal(0):
+ return SignalActionIgnore
+ }
+
+ switch act.Handler {
+ case arch.SignalActDefault:
+ return defaultActions[sig]
+ case arch.SignalActIgnore:
+ return SignalActionIgnore
+ default:
+ return SignalActionHandler
+ }
+}
+
+// UnblockableSignals contains the set of signals which cannot be blocked.
+var UnblockableSignals = linux.MakeSignalSet(linux.SIGKILL, linux.SIGSTOP)
+
+// StopSignals is the set of signals whose default action is SignalActionStop.
+var StopSignals = linux.MakeSignalSet(linux.SIGSTOP, linux.SIGTSTP, linux.SIGTTIN, linux.SIGTTOU)
+
+// dequeueSignalLocked returns a pending signal that is *not* included in mask.
+// If there are no pending unmasked signals, dequeueSignalLocked returns nil.
+//
+// Preconditions: t.tg.signalHandlers.mu must be locked.
+func (t *Task) dequeueSignalLocked(mask linux.SignalSet) *arch.SignalInfo {
+ if info := t.pendingSignals.dequeue(mask); info != nil {
+ return info
+ }
+ return t.tg.pendingSignals.dequeue(mask)
+}
+
+// discardSpecificLocked removes all instances of the given signal from all
+// signal queues in tg.
+//
+// Preconditions: The signal mutex must be locked.
+func (tg *ThreadGroup) discardSpecificLocked(sig linux.Signal) {
+ tg.pendingSignals.discardSpecific(sig)
+ for t := tg.tasks.Front(); t != nil; t = t.Next() {
+ t.pendingSignals.discardSpecific(sig)
+ }
+}
+
+// PendingSignals returns the set of pending signals.
+func (t *Task) PendingSignals() linux.SignalSet {
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
+ t.tg.signalHandlers.mu.Lock()
+ defer t.tg.signalHandlers.mu.Unlock()
+ return t.pendingSignals.pendingSet | t.tg.pendingSignals.pendingSet
+}
+
+// deliverSignal delivers the given signal and returns the following run state.
+func (t *Task) deliverSignal(info *arch.SignalInfo, act arch.SignalAct) taskRunState {
+ sigact := computeAction(linux.Signal(info.Signo), act)
+
+ if t.haveSyscallReturn {
+ if sre, ok := SyscallRestartErrnoFromReturn(t.Arch().Return()); ok {
+ // Signals that are ignored, cause a thread group stop, or
+ // terminate the thread group do not interact with interrupted
+ // syscalls; in Linux terms, they are never returned to the signal
+ // handling path from get_signal => get_signal_to_deliver. The
+ // behavior of an interrupted syscall is determined by the first
+ // signal that is actually handled (by userspace).
+ if sigact == SignalActionHandler {
+ switch {
+ case sre == ERESTARTNOHAND:
+ fallthrough
+ case sre == ERESTART_RESTARTBLOCK:
+ fallthrough
+ case (sre == ERESTARTSYS && !act.IsRestart()):
+ t.Debugf("Not restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo)
+ t.Arch().SetReturn(uintptr(-t.ExtractErrno(syserror.EINTR, -1)))
+ default:
+ t.Debugf("Restarting syscall %d after errno %d: interrupted by signal %d", t.Arch().SyscallNo(), sre, info.Signo)
+ t.Arch().RestartSyscall()
+ }
+ }
+ }
+ }
+
+ switch sigact {
+ case SignalActionTerm, SignalActionCore:
+ // "Default action is to terminate the process." - signal(7)
+ t.Debugf("Signal %d: terminating thread group", info.Signo)
+
+ // Emit an event channel messages related to this uncaught signal.
+ ucs := &ucspb.UncaughtSignal{
+ Tid: int32(t.Kernel().TaskSet().Root.IDOfTask(t)),
+ Pid: int32(t.Kernel().TaskSet().Root.IDOfThreadGroup(t.ThreadGroup())),
+ Registers: t.Arch().StateData().Proto(),
+ SignalNumber: info.Signo,
+ }
+
+ // Attach an fault address if appropriate.
+ switch linux.Signal(info.Signo) {
+ case linux.SIGSEGV, linux.SIGFPE, linux.SIGILL, linux.SIGTRAP, linux.SIGBUS:
+ ucs.FaultAddr = info.Addr()
+ }
+
+ eventchannel.Emit(ucs)
+
+ t.PrepareGroupExit(ExitStatus{Signo: int(info.Signo)})
+ return (*runExit)(nil)
+
+ case SignalActionStop:
+ // "Default action is to stop the process."
+ t.initiateGroupStop(info)
+
+ case SignalActionIgnore:
+ // "Default action is to ignore the signal."
+ t.Debugf("Signal %d: ignored", info.Signo)
+
+ case SignalActionHandler:
+ // Try to deliver the signal to the user-configured handler.
+ t.Debugf("Signal %d: delivering to handler", info.Signo)
+ if err := t.deliverSignalToHandler(info, act); err != nil {
+ // This is not a warning, it can occur during normal operation.
+ t.Debugf("Failed to deliver signal %+v to user handler: %v", info, err)
+
+ // Send a forced SIGSEGV. If the signal that couldn't be delivered
+ // was a SIGSEGV, force the handler to SIG_DFL.
+ t.forceSignal(linux.SIGSEGV, linux.Signal(info.Signo) == linux.SIGSEGV /* unconditional */)
+ t.SendSignal(SignalInfoPriv(linux.SIGSEGV))
+ }
+
+ default:
+ panic(fmt.Sprintf("Unknown signal action %+v, %d?", info, computeAction(linux.Signal(info.Signo), act)))
+ }
+ return (*runInterrupt)(nil)
+}
+
+// deliverSignalToHandler changes the task's userspace state to enter the given
+// user-configured handler for the given signal.
+func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct) error {
+ // Signal delivery to an application handler interrupts restartable
+ // sequences.
+ t.rseqInterrupt()
+
+ // Are executing on the main stack,
+ // or the provided alternate stack?
+ sp := usermem.Addr(t.Arch().Stack())
+
+ // N.B. This is a *copy* of the alternate stack that the user's signal
+ // handler expects to see in its ucontext (even if it's not in use).
+ alt := t.signalStack
+ if act.IsOnStack() && alt.IsEnabled() {
+ alt.SetOnStack()
+ if !alt.Contains(sp) {
+ sp = usermem.Addr(alt.Top())
+ }
+ }
+
+ // Set up the signal handler. If we have a saved signal mask, the signal
+ // handler should run with the current mask, but sigreturn should restore
+ // the saved one.
+ st := &arch.Stack{t.Arch(), t.MemoryManager(), sp}
+ mask := t.signalMask
+ if t.haveSavedSignalMask {
+ mask = t.savedSignalMask
+ }
+ if err := t.Arch().SignalSetup(st, &act, info, &alt, mask); err != nil {
+ return err
+ }
+ t.haveSavedSignalMask = false
+
+ // Add our signal mask.
+ newMask := t.signalMask | act.Mask
+ if !act.IsNoDefer() {
+ newMask |= linux.SignalSetOf(linux.Signal(info.Signo))
+ }
+ t.SetSignalMask(newMask)
+
+ return nil
+}
+
+var ctrlResume = &SyscallControl{ignoreReturn: true}
+
+// SignalReturn implements sigreturn(2) (if rt is false) or rt_sigreturn(2) (if
+// rt is true).
+func (t *Task) SignalReturn(rt bool) (*SyscallControl, error) {
+ st := t.Stack()
+ sigset, alt, err := t.Arch().SignalRestore(st, rt)
+ if err != nil {
+ return nil, err
+ }
+
+ // Attempt to record the given signal stack. Note that we silently
+ // ignore failures here, as does Linux. Only an EFAULT may be
+ // generated, but SignalRestore has already deserialized the entire
+ // frame successfully.
+ t.SetSignalStack(alt)
+
+ // Restore our signal mask. SIGKILL and SIGSTOP should not be blocked.
+ t.SetSignalMask(sigset &^ UnblockableSignals)
+
+ return ctrlResume, nil
+}
+
+// Sigtimedwait implements the semantics of sigtimedwait(2).
+//
+// Preconditions: The caller must be running on the task goroutine. t.exitState
+// < TaskExitZombie.
+func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*arch.SignalInfo, error) {
+ // set is the set of signals we're interested in; invert it to get the set
+ // of signals to block.
+ mask := ^(set &^ UnblockableSignals)
+
+ t.tg.signalHandlers.mu.Lock()
+ defer t.tg.signalHandlers.mu.Unlock()
+ if info := t.dequeueSignalLocked(mask); info != nil {
+ return info, nil
+ }
+
+ if timeout == 0 {
+ return nil, syserror.EAGAIN
+ }
+
+ // Unblock signals we're waiting for. Remember the original signal mask so
+ // that Task.sendSignalTimerLocked doesn't discard ignored signals that
+ // we're temporarily unblocking.
+ t.realSignalMask = t.signalMask
+ t.setSignalMaskLocked(t.signalMask & mask)
+
+ // Wait for a timeout or new signal.
+ t.tg.signalHandlers.mu.Unlock()
+ _, err := t.BlockWithTimeout(nil, true, timeout)
+ t.tg.signalHandlers.mu.Lock()
+
+ // Restore the original signal mask.
+ t.setSignalMaskLocked(t.realSignalMask)
+ t.realSignalMask = 0
+
+ if info := t.dequeueSignalLocked(mask); info != nil {
+ return info, nil
+ }
+ if err == syserror.ETIMEDOUT {
+ return nil, syserror.EAGAIN
+ }
+ return nil, err
+}
+
+// SendSignal sends the given signal to t.
+//
+// The following errors may be returned:
+//
+// syserror.ESRCH - The task has exited.
+// syserror.EINVAL - The signal is not valid.
+// syserror.EAGAIN - THe signal is realtime, and cannot be queued.
+//
+func (t *Task) SendSignal(info *arch.SignalInfo) error {
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
+ t.tg.signalHandlers.mu.Lock()
+ defer t.tg.signalHandlers.mu.Unlock()
+ return t.sendSignalLocked(info, false /* group */)
+}
+
+// SendGroupSignal sends the given signal to t's thread group.
+func (t *Task) SendGroupSignal(info *arch.SignalInfo) error {
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
+ t.tg.signalHandlers.mu.Lock()
+ defer t.tg.signalHandlers.mu.Unlock()
+ return t.sendSignalLocked(info, true /* group */)
+}
+
+// SendSignal sends the given signal to tg, using tg's leader to determine if
+// the signal is blocked.
+func (tg *ThreadGroup) SendSignal(info *arch.SignalInfo) error {
+ tg.pidns.owner.mu.RLock()
+ defer tg.pidns.owner.mu.RUnlock()
+ tg.signalHandlers.mu.Lock()
+ defer tg.signalHandlers.mu.Unlock()
+ return tg.leader.sendSignalLocked(info, true /* group */)
+}
+
+func (t *Task) sendSignalLocked(info *arch.SignalInfo, group bool) error {
+ return t.sendSignalTimerLocked(info, group, nil)
+}
+
+func (t *Task) sendSignalTimerLocked(info *arch.SignalInfo, group bool, timer *IntervalTimer) error {
+ if t.exitState == TaskExitDead {
+ return syserror.ESRCH
+ }
+ sig := linux.Signal(info.Signo)
+ if sig == 0 {
+ return nil
+ }
+ if !sig.IsValid() {
+ return syserror.EINVAL
+ }
+
+ // Signal side effects apply even if the signal is ultimately discarded.
+ t.tg.applySignalSideEffectsLocked(sig)
+
+ // TODO: "Only signals for which the "init" process has established a
+ // signal handler can be sent to the "init" process by other members of the
+ // PID namespace. This restriction applies even to privileged processes,
+ // and prevents other members of the PID namespace from accidentally
+ // killing the "init" process." - pid_namespaces(7). We don't currently do
+ // this for child namespaces, though we should; we also don't do this for
+ // the root namespace (the same restriction applies to global init on
+ // Linux), where whether or not we should is much murkier. In practice,
+ // most sandboxed applications are not prepared to function as an init
+ // process.
+
+ // Unmasked, ignored signals are discarded without being queued, unless
+ // they will be visible to a tracer. Even for group signals, it's the
+ // originally-targeted task's signal mask and tracer that matter; compare
+ // Linux's kernel/signal.c:__send_signal() => prepare_signal() =>
+ // sig_ignored().
+ ignored := computeAction(sig, t.tg.signalHandlers.actions[sig]) == SignalActionIgnore
+ if sigset := linux.SignalSetOf(sig); sigset&t.signalMask == 0 && sigset&t.realSignalMask == 0 && ignored && !t.hasTracer() {
+ t.Debugf("Discarding ignored signal %d", sig)
+ if timer != nil {
+ timer.signalRejectedLocked()
+ }
+ return nil
+ }
+
+ q := &t.pendingSignals
+ if group {
+ q = &t.tg.pendingSignals
+ }
+ if !q.enqueue(info, timer) {
+ if sig.IsRealtime() {
+ return syserror.EAGAIN
+ }
+ t.Debugf("Discarding duplicate signal %d", sig)
+ if timer != nil {
+ timer.signalRejectedLocked()
+ }
+ return nil
+ }
+
+ // Find a receiver to notify. Note that the task we choose to notify, if
+ // any, may not be the task that actually dequeues and handles the signal;
+ // e.g. a racing signal mask change may cause the notified task to become
+ // ineligible, or a racing sibling task may dequeue the signal first.
+ if t.canReceiveSignalLocked(sig) {
+ t.Debugf("Notified of signal %d", sig)
+ t.interrupt()
+ return nil
+ }
+ if group {
+ if nt := t.tg.findSignalReceiverLocked(sig); nt != nil {
+ nt.Debugf("Notified of group signal %d", sig)
+ nt.interrupt()
+ return nil
+ }
+ }
+ t.Debugf("No task notified of signal %d", sig)
+ return nil
+}
+
+func (tg *ThreadGroup) applySignalSideEffectsLocked(sig linux.Signal) {
+ switch {
+ case linux.SignalSetOf(sig)&StopSignals != 0:
+ // Stop signals cause all prior SIGCONT to be discarded. (This is
+ // despite the fact this has little effect since SIGCONT's most
+ // important effect is applied when the signal is sent in the branch
+ // below, not when the signal is delivered.)
+ tg.discardSpecificLocked(linux.SIGCONT)
+ case sig == linux.SIGCONT:
+ // "The SIGCONT signal has a side effect of waking up (all threads of)
+ // a group-stopped process. This side effect happens before
+ // signal-delivery-stop. The tracer can't suppress this side effect (it
+ // can only suppress signal injection, which only causes the SIGCONT
+ // handler to not be executed in the tracee, if such a handler is
+ // installed." - ptrace(2)
+ tg.endGroupStopLocked(true)
+ case sig == linux.SIGKILL:
+ // "SIGKILL does not generate signal-delivery-stop and therefore the
+ // tracer can't suppress it. SIGKILL kills even within system calls
+ // (syscall-exit-stop is not generated prior to death by SIGKILL)." -
+ // ptrace(2)
+ //
+ // Note that this differs from ThreadGroup.requestExit in that it
+ // ignores tg.execing.
+ if !tg.exiting {
+ tg.exiting = true
+ tg.exitStatus = ExitStatus{Signo: int(linux.SIGKILL)}
+ }
+ for t := tg.tasks.Front(); t != nil; t = t.Next() {
+ t.killLocked()
+ }
+ }
+}
+
+// canReceiveSignalLocked returns true if t should be interrupted to receive
+// the given signal. canReceiveSignalLocked is analogous to Linux's
+// kernel/signal.c:wants_signal(), but see below for divergences.
+//
+// Preconditions: The signal mutex must be locked.
+func (t *Task) canReceiveSignalLocked(sig linux.Signal) bool {
+ // - Do not choose tasks that are blocking the signal.
+ if linux.SignalSetOf(sig)&t.signalMask != 0 {
+ return false
+ }
+ // - No need to check Task.exitState, as the exit path sets every bit in the
+ // signal mask when it transitions from TaskExitNone to TaskExitInitiated.
+ // - No special case for SIGKILL: SIGKILL already interrupted all tasks in the
+ // task group via applySignalSideEffects => killLocked.
+ // - Do not choose stopped tasks, which cannot handle signals.
+ if t.stop != nil {
+ return false
+ }
+ // - TODO(b/38173783): No special case for when t is also the sending task,
+ // because the identity of the sender is unknown.
+ // - Do not choose tasks that have already been interrupted, as they may be
+ // busy handling another signal.
+ if len(t.interruptChan) != 0 {
+ return false
+ }
+ return true
+}
+
+// findSignalReceiverLocked returns a task in tg that should be interrupted to
+// receive the given signal. If no such task exists, findSignalReceiverLocked
+// returns nil.
+//
+// Linux actually records curr_target to balance the group signal targets.
+//
+// Preconditions: The signal mutex must be locked.
+func (tg *ThreadGroup) findSignalReceiverLocked(sig linux.Signal) *Task {
+ for t := tg.tasks.Front(); t != nil; t = t.Next() {
+ if t.canReceiveSignalLocked(sig) {
+ return t
+ }
+ }
+ return nil
+}
+
+// forceSignal ensures that the task is not ignoring or blocking the given
+// signal. If unconditional is true, forceSignal takes action even if the
+// signal isn't being ignored or blocked.
+func (t *Task) forceSignal(sig linux.Signal, unconditional bool) {
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
+ t.tg.signalHandlers.mu.Lock()
+ defer t.tg.signalHandlers.mu.Unlock()
+ t.forceSignalLocked(sig, unconditional)
+}
+
+func (t *Task) forceSignalLocked(sig linux.Signal, unconditional bool) {
+ blocked := linux.SignalSetOf(sig)&t.signalMask != 0
+ act := t.tg.signalHandlers.actions[sig]
+ ignored := act.Handler == arch.SignalActIgnore
+ if blocked || ignored || unconditional {
+ act.Handler = arch.SignalActDefault
+ t.tg.signalHandlers.actions[sig] = act
+ if blocked {
+ t.setSignalMaskLocked(t.signalMask &^ linux.SignalSetOf(sig))
+ }
+ }
+}
+
+// SignalMask returns a copy of t's signal mask.
+func (t *Task) SignalMask() linux.SignalSet {
+ return linux.SignalSet(atomic.LoadUint64((*uint64)(&t.signalMask)))
+}
+
+// SetSignalMask sets t's signal mask.
+//
+// Preconditions: SetSignalMask can only be called by the task goroutine.
+// t.exitState < TaskExitZombie.
+func (t *Task) SetSignalMask(mask linux.SignalSet) {
+ // By precondition, t prevents t.tg from completing an execve and mutating
+ // t.tg.signalHandlers, so we can skip the TaskSet mutex.
+ t.tg.signalHandlers.mu.Lock()
+ t.setSignalMaskLocked(mask)
+ t.tg.signalHandlers.mu.Unlock()
+}
+
+// Preconditions: The signal mutex must be locked.
+func (t *Task) setSignalMaskLocked(mask linux.SignalSet) {
+ oldMask := t.signalMask
+ atomic.StoreUint64((*uint64)(&t.signalMask), uint64(mask))
+
+ // If the new mask blocks any signals that were not blocked by the old
+ // mask, and at least one such signal is pending in tg.pendingSignals, and
+ // t has been woken, it could be the case that t was woken to handle that
+ // signal, but will no longer do so as a result of its new signal mask, so
+ // we have to pick a replacement.
+ blocked := mask &^ oldMask
+ blockedGroupPending := blocked & t.tg.pendingSignals.pendingSet
+ if blockedGroupPending != 0 && t.interrupted() {
+ linux.ForEachSignal(blockedGroupPending, func(sig linux.Signal) {
+ if nt := t.tg.findSignalReceiverLocked(sig); nt != nil {
+ nt.interrupt()
+ return
+ }
+ })
+ // We have to re-issue the interrupt consumed by t.interrupted() since
+ // it might have been for a different reason.
+ t.interruptSelf()
+ }
+
+ // Conversely, if the new mask unblocks any signals that were blocked by
+ // the old mask, and at least one such signal is pending, we may now need
+ // to handle that signal.
+ unblocked := oldMask &^ mask
+ unblockedPending := unblocked & (t.pendingSignals.pendingSet | t.tg.pendingSignals.pendingSet)
+ if unblockedPending != 0 {
+ t.interruptSelf()
+ }
+}
+
+// SetSavedSignalMask sets the saved signal mask (see Task.savedSignalMask's
+// comment).
+//
+// Preconditions: SetSavedSignalMask can only be called by the task goroutine.
+func (t *Task) SetSavedSignalMask(mask linux.SignalSet) {
+ t.savedSignalMask = mask
+ t.haveSavedSignalMask = true
+}
+
+// SignalStack returns the task-private signal stack.
+func (t *Task) SignalStack() arch.SignalStack {
+ alt := t.signalStack
+ if t.onSignalStack(alt) {
+ alt.Flags |= arch.SignalStackFlagOnStack
+ }
+ return alt
+}
+
+// onSignalStack returns true if the task is executing on the given signal stack.
+func (t *Task) onSignalStack(alt arch.SignalStack) bool {
+ sp := usermem.Addr(t.Arch().Stack())
+ return alt.Contains(sp)
+}
+
+// SetSignalStack sets the task-private signal stack.
+//
+// This value may not be changed if the task is currently executing on the
+// signal stack, i.e. if t.onSignalStack returns true. In this case, this
+// function will return false. Otherwise, true is returned.
+func (t *Task) SetSignalStack(alt arch.SignalStack) bool {
+ // Check that we're not executing on the stack.
+ if t.onSignalStack(t.signalStack) {
+ return false
+ }
+
+ if alt.Flags&arch.SignalStackFlagDisable != 0 {
+ // Don't record anything beyond the flags.
+ t.signalStack = arch.SignalStack{
+ Flags: arch.SignalStackFlagDisable,
+ }
+ } else {
+ // Mask out irrelevant parts: only disable matters.
+ alt.Flags &= arch.SignalStackFlagDisable
+ t.signalStack = alt
+ }
+ return true
+}
+
+// SetSignalAct atomically sets the thread group's signal action for signal sig
+// to *actptr (if actptr is not nil) and returns the old signal action.
+func (tg *ThreadGroup) SetSignalAct(sig linux.Signal, actptr *arch.SignalAct) (arch.SignalAct, error) {
+ if !sig.IsValid() {
+ return arch.SignalAct{}, syserror.EINVAL
+ }
+
+ tg.pidns.owner.mu.RLock()
+ defer tg.pidns.owner.mu.RUnlock()
+ sh := tg.signalHandlers
+ sh.mu.Lock()
+ defer sh.mu.Unlock()
+ oldact := sh.actions[sig]
+ if actptr != nil {
+ if sig == linux.SIGKILL || sig == linux.SIGSTOP {
+ return oldact, syserror.EINVAL
+ }
+
+ act := *actptr
+ act.Mask &^= UnblockableSignals
+ sh.actions[sig] = act
+ // From POSIX, by way of Linux:
+ //
+ // "Setting a signal action to SIG_IGN for a signal that is pending
+ // shall cause the pending signal to be discarded, whether or not it is
+ // blocked."
+ //
+ // "Setting a signal action to SIG_DFL for a signal that is pending and
+ // whose default action is to ignore the signal (for example, SIGCHLD),
+ // shall cause the pending signal to be discarded, whether or not it is
+ // blocked."
+ if computeAction(sig, act) == SignalActionIgnore {
+ tg.discardSpecificLocked(sig)
+ }
+ }
+ return oldact, nil
+}
+
+// CopyOutSignalAct converts the given SignalAct into an architecture-specific
+// type and then copies it out to task memory.
+func (t *Task) CopyOutSignalAct(addr usermem.Addr, s *arch.SignalAct) error {
+ n := t.Arch().NewSignalAct()
+ n.SerializeFrom(s)
+ _, err := t.CopyOut(addr, n)
+ return err
+}
+
+// CopyInSignalAct copies an architecture-specific sigaction type from task
+// memory and then converts it into a SignalAct.
+func (t *Task) CopyInSignalAct(addr usermem.Addr) (arch.SignalAct, error) {
+ n := t.Arch().NewSignalAct()
+ var s arch.SignalAct
+ if _, err := t.CopyIn(addr, n); err != nil {
+ return s, err
+ }
+ n.DeserializeTo(&s)
+ return s, nil
+}
+
+// CopyOutSignalStack converts the given SignalStack into an
+// architecture-specific type and then copies it out to task memory.
+func (t *Task) CopyOutSignalStack(addr usermem.Addr, s *arch.SignalStack) error {
+ n := t.Arch().NewSignalStack()
+ n.SerializeFrom(s)
+ _, err := t.CopyOut(addr, n)
+ return err
+}
+
+// CopyInSignalStack copies an architecture-specific stack_t from task memory
+// and then converts it into a SignalStack.
+func (t *Task) CopyInSignalStack(addr usermem.Addr) (arch.SignalStack, error) {
+ n := t.Arch().NewSignalStack()
+ var s arch.SignalStack
+ if _, err := t.CopyIn(addr, n); err != nil {
+ return s, err
+ }
+ n.DeserializeTo(&s)
+ return s, nil
+}
+
+// groupStop is a TaskStop placed on tasks that have received a stop signal
+// (SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU). (The term "group-stop" originates from
+// the ptrace man page.)
+//
+// +stateify savable
+type groupStop struct{}
+
+// Killable implements TaskStop.Killable.
+func (*groupStop) Killable() bool { return true }
+
+// initiateGroupStop attempts to initiate a group stop based on a
+// previously-dequeued stop signal.
+//
+// Preconditions: The caller must be running on the task goroutine.
+func (t *Task) initiateGroupStop(info *arch.SignalInfo) {
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
+ t.tg.signalHandlers.mu.Lock()
+ defer t.tg.signalHandlers.mu.Unlock()
+ if t.groupStopPending {
+ t.Debugf("Signal %d: not stopping thread group: lost to racing stop signal", info.Signo)
+ return
+ }
+ if !t.tg.groupStopDequeued {
+ t.Debugf("Signal %d: not stopping thread group: lost to racing SIGCONT", info.Signo)
+ return
+ }
+ if t.tg.exiting {
+ t.Debugf("Signal %d: not stopping thread group: lost to racing group exit", info.Signo)
+ return
+ }
+ if t.tg.execing != nil {
+ t.Debugf("Signal %d: not stopping thread group: lost to racing execve", info.Signo)
+ return
+ }
+ if !t.tg.groupStopComplete {
+ t.tg.groupStopSignal = linux.Signal(info.Signo)
+ }
+ t.tg.groupStopPendingCount = 0
+ for t2 := t.tg.tasks.Front(); t2 != nil; t2 = t2.Next() {
+ if t2.killedLocked() || t2.exitState >= TaskExitInitiated {
+ t2.groupStopPending = false
+ continue
+ }
+ t2.groupStopPending = true
+ t2.groupStopAcknowledged = false
+ if t2.ptraceSeized {
+ t2.trapNotifyPending = true
+ if s, ok := t2.stop.(*ptraceStop); ok && s.listen {
+ t2.endInternalStopLocked()
+ }
+ }
+ t2.interrupt()
+ t.tg.groupStopPendingCount++
+ }
+ t.Debugf("Signal %d: stopping %d threads in thread group", info.Signo, t.tg.groupStopPendingCount)
+}
+
+// endGroupStopLocked ensures that all prior stop signals received by tg are
+// not stopping tg and will not stop tg in the future. If broadcast is true,
+// parent and tracer notification will be scheduled if appropriate.
+//
+// Preconditions: The signal mutex must be locked.
+func (tg *ThreadGroup) endGroupStopLocked(broadcast bool) {
+ // Discard all previously-queued stop signals.
+ linux.ForEachSignal(StopSignals, tg.discardSpecificLocked)
+
+ if tg.groupStopPendingCount == 0 && !tg.groupStopComplete {
+ return
+ }
+
+ completeStr := "incomplete"
+ if tg.groupStopComplete {
+ completeStr = "complete"
+ }
+ tg.leader.Debugf("Ending %s group stop with %d threads pending", completeStr, tg.groupStopPendingCount)
+ for t := tg.tasks.Front(); t != nil; t = t.Next() {
+ t.groupStopPending = false
+ if t.ptraceSeized {
+ t.trapNotifyPending = true
+ if s, ok := t.stop.(*ptraceStop); ok && s.listen {
+ t.endInternalStopLocked()
+ }
+ } else {
+ if _, ok := t.stop.(*groupStop); ok {
+ t.endInternalStopLocked()
+ }
+ }
+ }
+ if broadcast {
+ // Instead of notifying the parent here, set groupContNotify so that
+ // one of the continuing tasks does so. (Linux does something similar.)
+ // The reason we do this is to keep locking sane. In order to send a
+ // signal to the parent, we need to lock its signal mutex, but we're
+ // already holding tg's signal mutex, and the TaskSet mutex must be
+ // locked for writing for us to hold two signal mutexes. Since we don't
+ // want to require this for endGroupStopLocked (which is called from
+ // signal-sending paths), nor do we want to lose atomicity by releasing
+ // the mutexes we're already holding, just let the continuing thread
+ // group deal with it.
+ tg.groupContNotify = true
+ tg.groupContInterrupted = !tg.groupStopComplete
+ tg.groupContWaitable = true
+ }
+ // Unsetting groupStopDequeued will cause racing calls to initiateGroupStop
+ // to recognize that the group stop has been cancelled.
+ tg.groupStopDequeued = false
+ tg.groupStopSignal = 0
+ tg.groupStopPendingCount = 0
+ tg.groupStopComplete = false
+ tg.groupStopWaitable = false
+}
+
+// participateGroupStopLocked is called to handle thread group side effects
+// after t unsets t.groupStopPending. The caller must handle task side effects
+// (e.g. placing the task goroutine into the group stop). It returns true if
+// the caller must notify t.tg.leader's parent of a completed group stop (which
+// participateGroupStopLocked cannot do due to holding the wrong locks).
+//
+// Preconditions: The signal mutex must be locked.
+func (t *Task) participateGroupStopLocked() bool {
+ if t.groupStopAcknowledged {
+ return false
+ }
+ t.groupStopAcknowledged = true
+ t.tg.groupStopPendingCount--
+ if t.tg.groupStopPendingCount != 0 {
+ return false
+ }
+ if t.tg.groupStopComplete {
+ return false
+ }
+ t.Debugf("Completing group stop")
+ t.tg.groupStopComplete = true
+ t.tg.groupStopWaitable = true
+ t.tg.groupContNotify = false
+ t.tg.groupContWaitable = false
+ return true
+}
+
+// signalStop sends a signal to t's thread group of a new group stop, group
+// continue, or ptrace stop, if appropriate. code and status are set in the
+// signal sent to tg, if any.
+//
+// Preconditions: The TaskSet mutex must be locked (for reading or writing).
+func (t *Task) signalStop(target *Task, code int32, status int32) {
+ t.tg.signalHandlers.mu.Lock()
+ defer t.tg.signalHandlers.mu.Unlock()
+ act, ok := t.tg.signalHandlers.actions[linux.SIGCHLD]
+ if !ok || (act.Handler != arch.SignalActIgnore && act.Flags&arch.SignalFlagNoCldStop == 0) {
+ sigchld := &arch.SignalInfo{
+ Signo: int32(linux.SIGCHLD),
+ Code: code,
+ }
+ sigchld.SetPid(int32(t.tg.pidns.tids[target]))
+ sigchld.SetUid(int32(target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()))
+ sigchld.SetStatus(status)
+ // TODO(b/72102453): Set utime, stime.
+ t.sendSignalLocked(sigchld, true /* group */)
+ }
+}
+
+// The runInterrupt state handles conditions indicated by interrupts.
+//
+// +stateify savable
+type runInterrupt struct{}
+
+func (*runInterrupt) execute(t *Task) taskRunState {
+ // Interrupts are de-duplicated (if t is interrupted twice before
+ // t.interrupted() is called, t.interrupted() will only return true once),
+ // so early exits from this function must re-enter the runInterrupt state
+ // to check for more interrupt-signaled conditions.
+
+ t.tg.signalHandlers.mu.Lock()
+
+ // Did we just leave a group stop?
+ if t.tg.groupContNotify {
+ t.tg.groupContNotify = false
+ sig := t.tg.groupStopSignal
+ intr := t.tg.groupContInterrupted
+ t.tg.signalHandlers.mu.Unlock()
+ t.tg.pidns.owner.mu.RLock()
+ // For consistency with Linux, if the parent and (thread group
+ // leader's) tracer are in the same thread group, deduplicate
+ // notifications.
+ notifyParent := t.tg.leader.parent != nil
+ if tracer := t.tg.leader.Tracer(); tracer != nil {
+ if notifyParent && tracer.tg == t.tg.leader.parent.tg {
+ notifyParent = false
+ }
+ // Sending CLD_STOPPED to the tracer doesn't really make any sense;
+ // the thread group leader may have already entered the stop and
+ // notified its tracer accordingly. But it's consistent with
+ // Linux...
+ if intr {
+ tracer.signalStop(t.tg.leader, arch.CLD_STOPPED, int32(sig))
+ if !notifyParent {
+ tracer.tg.eventQueue.Notify(EventGroupContinue | EventTraceeStop | EventChildGroupStop)
+ } else {
+ tracer.tg.eventQueue.Notify(EventGroupContinue | EventTraceeStop)
+ }
+ } else {
+ tracer.signalStop(t.tg.leader, arch.CLD_CONTINUED, int32(sig))
+ tracer.tg.eventQueue.Notify(EventGroupContinue)
+ }
+ }
+ if notifyParent {
+ // If groupContInterrupted, do as Linux does and pretend the group
+ // stop completed just before it ended. The theoretical behavior in
+ // this case would be to send a SIGCHLD indicating the completed
+ // stop, followed by a SIGCHLD indicating the continue. However,
+ // SIGCHLD is a standard signal, so the latter would always be
+ // dropped. Hence sending only the former is equivalent.
+ if intr {
+ t.tg.leader.parent.signalStop(t.tg.leader, arch.CLD_STOPPED, int32(sig))
+ t.tg.leader.parent.tg.eventQueue.Notify(EventGroupContinue | EventChildGroupStop)
+ } else {
+ t.tg.leader.parent.signalStop(t.tg.leader, arch.CLD_CONTINUED, int32(sig))
+ t.tg.leader.parent.tg.eventQueue.Notify(EventGroupContinue)
+ }
+ }
+ t.tg.pidns.owner.mu.RUnlock()
+ return (*runInterrupt)(nil)
+ }
+
+ // Do we need to enter a group stop or related ptrace stop? This path is
+ // analogous to Linux's kernel/signal.c:get_signal() => do_signal_stop()
+ // (with ptrace enabled) and do_jobctl_trap().
+ if t.groupStopPending || t.trapStopPending || t.trapNotifyPending {
+ sig := t.tg.groupStopSignal
+ notifyParent := false
+ if t.groupStopPending {
+ t.groupStopPending = false
+ // We care about t.tg.groupStopSignal (for tracer notification)
+ // even if this doesn't complete a group stop, so keep the
+ // value of sig we've already read.
+ notifyParent = t.participateGroupStopLocked()
+ }
+ t.trapStopPending = false
+ t.trapNotifyPending = false
+ // Drop the signal mutex so we can take the TaskSet mutex.
+ t.tg.signalHandlers.mu.Unlock()
+
+ t.tg.pidns.owner.mu.RLock()
+ if t.tg.leader.parent == nil {
+ notifyParent = false
+ }
+ if tracer := t.Tracer(); tracer != nil {
+ if t.ptraceSeized {
+ if sig == 0 {
+ sig = linux.SIGTRAP
+ }
+ // "If tracee was attached using PTRACE_SEIZE, group-stop is
+ // indicated by PTRACE_EVENT_STOP: status>>16 ==
+ // PTRACE_EVENT_STOP. This allows detection of group-stops
+ // without requiring an extra PTRACE_GETSIGINFO call." -
+ // "Group-stop", ptrace(2)
+ t.ptraceCode = int32(sig) | linux.PTRACE_EVENT_STOP<<8
+ t.ptraceSiginfo = &arch.SignalInfo{
+ Signo: int32(sig),
+ Code: t.ptraceCode,
+ }
+ t.ptraceSiginfo.SetPid(int32(t.tg.pidns.tids[t]))
+ t.ptraceSiginfo.SetUid(int32(t.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()))
+ } else {
+ t.ptraceCode = int32(sig)
+ t.ptraceSiginfo = nil
+ }
+ if t.beginPtraceStopLocked() {
+ tracer.signalStop(t, arch.CLD_STOPPED, int32(sig))
+ // For consistency with Linux, if the parent and tracer are in the
+ // same thread group, deduplicate notification signals.
+ if notifyParent && tracer.tg == t.tg.leader.parent.tg {
+ notifyParent = false
+ tracer.tg.eventQueue.Notify(EventChildGroupStop | EventTraceeStop)
+ } else {
+ tracer.tg.eventQueue.Notify(EventTraceeStop)
+ }
+ }
+ } else {
+ t.tg.signalHandlers.mu.Lock()
+ if !t.killedLocked() {
+ t.beginInternalStopLocked((*groupStop)(nil))
+ }
+ t.tg.signalHandlers.mu.Unlock()
+ }
+ if notifyParent {
+ t.tg.leader.parent.signalStop(t.tg.leader, arch.CLD_STOPPED, int32(sig))
+ t.tg.leader.parent.tg.eventQueue.Notify(EventChildGroupStop)
+ }
+ t.tg.pidns.owner.mu.RUnlock()
+
+ return (*runInterrupt)(nil)
+ }
+
+ // Are there signals pending?
+ if info := t.dequeueSignalLocked(t.signalMask); info != nil {
+ if linux.SignalSetOf(linux.Signal(info.Signo))&StopSignals != 0 {
+ // Indicate that we've dequeued a stop signal before unlocking the
+ // signal mutex; initiateGroupStop will check for races with
+ // endGroupStopLocked after relocking it.
+ t.tg.groupStopDequeued = true
+ }
+ if t.ptraceSignalLocked(info) {
+ // Dequeueing the signal action must wait until after the
+ // signal-delivery-stop ends since the tracer can change or
+ // suppress the signal.
+ t.tg.signalHandlers.mu.Unlock()
+ return (*runInterruptAfterSignalDeliveryStop)(nil)
+ }
+ act := t.tg.signalHandlers.dequeueAction(linux.Signal(info.Signo))
+ t.tg.signalHandlers.mu.Unlock()
+ return t.deliverSignal(info, act)
+ }
+
+ t.tg.signalHandlers.mu.Unlock()
+ return (*runApp)(nil)
+}
+
+// +stateify savable
+type runInterruptAfterSignalDeliveryStop struct{}
+
+func (*runInterruptAfterSignalDeliveryStop) execute(t *Task) taskRunState {
+ t.tg.pidns.owner.mu.Lock()
+ // Can't defer unlock: deliverSignal must be called without holding TaskSet
+ // mutex.
+ sig := linux.Signal(t.ptraceCode)
+ defer func() {
+ t.ptraceSiginfo = nil
+ }()
+ if !sig.IsValid() {
+ t.tg.pidns.owner.mu.Unlock()
+ return (*runInterrupt)(nil)
+ }
+ info := t.ptraceSiginfo
+ if sig != linux.Signal(info.Signo) {
+ info.Signo = int32(sig)
+ info.Errno = 0
+ info.Code = arch.SignalInfoUser
+ // pid isn't a valid field for all signal numbers, but Linux
+ // doesn't care (kernel/signal.c:ptrace_signal()).
+ //
+ // Linux uses t->parent for the tid and uid here, which is the tracer
+ // if it hasn't detached or the real parent otherwise.
+ parent := t.parent
+ if tracer := t.Tracer(); tracer != nil {
+ parent = tracer
+ }
+ if parent == nil {
+ // Tracer has detached and t was created by Kernel.CreateProcess().
+ // Pretend the parent is in an ancestor PID + user namespace.
+ info.SetPid(0)
+ info.SetUid(int32(auth.OverflowUID))
+ } else {
+ info.SetPid(int32(t.tg.pidns.tids[parent]))
+ info.SetUid(int32(parent.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()))
+ }
+ }
+ t.tg.signalHandlers.mu.Lock()
+ t.tg.pidns.owner.mu.Unlock()
+ // If the signal is masked, re-queue it.
+ if linux.SignalSetOf(sig)&t.signalMask != 0 {
+ t.sendSignalLocked(info, false /* group */)
+ t.tg.signalHandlers.mu.Unlock()
+ return (*runInterrupt)(nil)
+ }
+ act := t.tg.signalHandlers.dequeueAction(linux.Signal(info.Signo))
+ t.tg.signalHandlers.mu.Unlock()
+ return t.deliverSignal(info, act)
+}