Check in gVisor.

PiperOrigin-RevId: 194583126 Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
author: Googler <noreply@google.com> 2018-04-27 10:37:02 -0700
committer: Adin Scannell <ascannell@google.com> 2018-04-28 01:44:26 -0400
commit: d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree: 54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/sentry/syscalls/linux/sys_signal.go
parent: f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)
1 files changed, 553 insertions, 0 deletions
diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go
new file mode 100644
index 000000000..93b3f531a
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/sys_signal.go
@@ -0,0 +1,553 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+	"math"
+	"time"
+
+	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// "For a process to have permission to send a signal it must
+// - either be privileged (CAP_KILL), or
+// - the real or effective user ID of the sending process must be equal to the
+// real or saved set-user-ID of the target process.
+//
+// In the case of SIGCONT it suffices when the sending and receiving processes
+// belong to the same session." - kill(2)
+//
+// Equivalent to kernel/signal.c:check_kill_permission.
+func mayKill(t *kernel.Task, target *kernel.Task, sig linux.Signal) bool {
+	// kernel/signal.c:check_kill_permission also allows a signal if the
+	// sending and receiving tasks share a thread group, which is not
+	// mentioned in kill(2) since kill does not allow task-level
+	// granularity in signal sending.
+	if t.ThreadGroup() == target.ThreadGroup() {
+		return true
+	}
+
+	if t.HasCapabilityIn(linux.CAP_KILL, target.UserNamespace()) {
+		return true
+	}
+
+	creds := t.Credentials()
+	tcreds := target.Credentials()
+	if creds.EffectiveKUID == tcreds.SavedKUID ||
+		creds.EffectiveKUID == tcreds.RealKUID ||
+		creds.RealKUID == tcreds.SavedKUID ||
+		creds.RealKUID == tcreds.RealKUID {
+		return true
+	}
+
+	if sig == linux.SIGCONT && target.ThreadGroup().Session() == t.ThreadGroup().Session() {
+		return true
+	}
+	return false
+}
+
+// Kill implements linux syscall kill(2).
+func Kill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	pid := kernel.ThreadID(args[0].Int())
+	sig := linux.Signal(args[1].Int())
+
+	switch {
+	case pid > 0:
+		// "If pid is positive, then signal sig is sent to the process with the
+		// ID specified by pid." - kill(2)
+		// This loops to handle races with execve where target dies between
+		// TaskWithID and SendGroupSignal. Compare Linux's
+		// kernel/signal.c:kill_pid_info().
+		for {
+			target := t.PIDNamespace().TaskWithID(pid)
+			if target == nil {
+				return 0, nil, syserror.ESRCH
+			}
+			if !mayKill(t, target, sig) {
+				return 0, nil, syserror.EPERM
+			}
+			info := &arch.SignalInfo{
+				Signo: int32(sig),
+				Code:  arch.SignalInfoUser,
+			}
+			info.SetPid(int32(target.PIDNamespace().IDOfTask(t)))
+			info.SetUid(int32(t.Credentials().RealKUID.In(target.UserNamespace()).OrOverflow()))
+			if err := target.SendGroupSignal(info); err != syserror.ESRCH {
+				return 0, nil, err
+			}
+		}
+	case pid == -1:
+		// "If pid equals -1, then sig is sent to every process for which the
+		// calling process has permission to send signals, except for process 1
+		// (init), but see below. ... POSIX.1-2001 requires that kill(-1,sig)
+		// send sig to all processes that the calling process may send signals
+		// to, except possibly for some implementation-defined system
+		// processes. Linux allows a process to signal itself, but on Linux the
+		// call kill(-1,sig) does not signal the calling process."
+		var (
+			lastErr   error
+			delivered int
+		)
+		for _, tg := range t.PIDNamespace().ThreadGroups() {
+			if tg == t.ThreadGroup() {
+				continue
+			}
+			if t.PIDNamespace().IDOfThreadGroup(tg) == kernel.InitTID {
+				continue
+			}
+
+			// If pid == -1, the returned error is the last non-EPERM error
+			// from any call to group_send_sig_info.
+			if !mayKill(t, tg.Leader(), sig) {
+				continue
+			}
+			// Here and below, whether or not kill returns an error may
+			// depend on the iteration order. We at least implement the
+			// semantics documented by the man page: "On success (at least
+			// one signal was sent), zero is returned."
+			info := &arch.SignalInfo{
+				Signo: int32(sig),
+				Code:  arch.SignalInfoUser,
+			}
+			info.SetPid(int32(tg.PIDNamespace().IDOfTask(t)))
+			info.SetUid(int32(t.Credentials().RealKUID.In(tg.Leader().UserNamespace()).OrOverflow()))
+			err := tg.SendSignal(info)
+			if err == syserror.ESRCH {
+				// ESRCH is ignored because it means the task
+				// exited while we were iterating.  This is a
+				// race which would not normally exist on
+				// Linux, so we suppress it.
+				continue
+			}
+			delivered++
+			if err != nil {
+				lastErr = err
+			}
+		}
+		if delivered > 0 {
+			return 0, nil, lastErr
+		}
+		return 0, nil, syserror.ESRCH
+	default:
+		// "If pid equals 0, then sig is sent to every process in the process
+		// group of the calling process."
+		//
+		// "If pid is less than -1, then sig is sent to every process
+		// in the process group whose ID is -pid."
+		pgid := kernel.ProcessGroupID(-pid)
+		if pgid == 0 {
+			pgid = t.PIDNamespace().IDOfProcessGroup(t.ThreadGroup().ProcessGroup())
+		}
+
+		// If pid != -1 (i.e. signalling a process group), the returned error
+		// is the last error from any call to group_send_sig_info.
+		lastErr := syserror.ESRCH
+		for _, tg := range t.PIDNamespace().ThreadGroups() {
+			if t.PIDNamespace().IDOfProcessGroup(tg.ProcessGroup()) == pgid {
+				if !mayKill(t, tg.Leader(), sig) {
+					lastErr = syserror.EPERM
+					continue
+				}
+
+				info := &arch.SignalInfo{
+					Signo: int32(sig),
+					Code:  arch.SignalInfoUser,
+				}
+				info.SetPid(int32(tg.PIDNamespace().IDOfTask(t)))
+				info.SetUid(int32(t.Credentials().RealKUID.In(tg.Leader().UserNamespace()).OrOverflow()))
+				// See note above regarding ESRCH race above.
+				if err := tg.SendSignal(info); err != syserror.ESRCH {
+					lastErr = err
+				}
+			}
+		}
+
+		return 0, nil, lastErr
+	}
+}
+
+func tkillSigInfo(sender, receiver *kernel.Task, sig linux.Signal) *arch.SignalInfo {
+	info := &arch.SignalInfo{
+		Signo: int32(sig),
+		Code:  arch.SignalInfoTkill,
+	}
+	info.SetPid(int32(receiver.PIDNamespace().IDOfThreadGroup(sender.ThreadGroup())))
+	info.SetUid(int32(sender.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow()))
+	return info
+}
+
+// Tkill implements linux syscall tkill(2).
+func Tkill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	tid := kernel.ThreadID(args[0].Int())
+	sig := linux.Signal(args[1].Int())
+
+	// N.B. Inconsistent with man page, linux actually rejects calls with
+	// tid <=0 by EINVAL. This isn't the same for all signal calls.
+	if tid <= 0 {
+		return 0, nil, syserror.EINVAL
+	}
+
+	target := t.PIDNamespace().TaskWithID(tid)
+	if target == nil {
+		return 0, nil, syserror.ESRCH
+	}
+
+	if !mayKill(t, target, sig) {
+		return 0, nil, syserror.EPERM
+	}
+	return 0, nil, target.SendSignal(tkillSigInfo(t, target, sig))
+}
+
+// Tgkill implements linux syscall tgkill(2).
+func Tgkill(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	tgid := kernel.ThreadID(args[0].Int())
+	tid := kernel.ThreadID(args[1].Int())
+	sig := linux.Signal(args[2].Int())
+
+	// N.B. Inconsistent with man page, linux actually rejects calls with
+	// tgid/tid <=0 by EINVAL. This isn't the same for all signal calls.
+	if tgid <= 0 || tid <= 0 {
+		return 0, nil, syserror.EINVAL
+	}
+
+	targetTG := t.PIDNamespace().ThreadGroupWithID(tgid)
+	target := t.PIDNamespace().TaskWithID(tid)
+	if targetTG == nil || target == nil || target.ThreadGroup() != targetTG {
+		return 0, nil, syserror.ESRCH
+	}
+
+	if !mayKill(t, target, sig) {
+		return 0, nil, syserror.EPERM
+	}
+	return 0, nil, target.SendSignal(tkillSigInfo(t, target, sig))
+}
+
+// RtSigaction implements linux syscall rt_sigaction(2).
+func RtSigaction(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	sig := linux.Signal(args[0].Int())
+	newactarg := args[1].Pointer()
+	oldactarg := args[2].Pointer()
+
+	var newactptr *arch.SignalAct
+	if newactarg != 0 {
+		newact, err := t.CopyInSignalAct(newactarg)
+		if err != nil {
+			return 0, nil, err
+		}
+		newactptr = &newact
+	}
+	oldact, err := t.ThreadGroup().SetSignalAct(sig, newactptr)
+	if err != nil {
+		return 0, nil, err
+	}
+	if oldactarg != 0 {
+		if err := t.CopyOutSignalAct(oldactarg, &oldact); err != nil {
+			return 0, nil, err
+		}
+	}
+	return 0, nil, nil
+}
+
+// Sigreturn implements linux syscall sigreturn(2).
+func Sigreturn(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	ctrl, err := t.SignalReturn(false)
+	return 0, ctrl, err
+}
+
+// RtSigreturn implements linux syscall rt_sigreturn(2).
+func RtSigreturn(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	ctrl, err := t.SignalReturn(true)
+	return 0, ctrl, err
+}
+
+// RtSigprocmask implements linux syscall rt_sigprocmask(2).
+func RtSigprocmask(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	how := args[0].Int()
+	setaddr := args[1].Pointer()
+	oldaddr := args[2].Pointer()
+	sigsetsize := args[3].SizeT()
+
+	if sigsetsize != linux.SignalSetSize {
+		return 0, nil, syserror.EINVAL
+	}
+	oldmask := t.SignalMask()
+	if setaddr != 0 {
+		mask, err := copyInSigSet(t, setaddr, sigsetsize)
+		if err != nil {
+			return 0, nil, err
+		}
+
+		switch how {
+		case linux.SIG_BLOCK:
+			t.SetSignalMask(oldmask | mask)
+		case linux.SIG_UNBLOCK:
+			t.SetSignalMask(oldmask &^ mask)
+		case linux.SIG_SETMASK:
+			t.SetSignalMask(mask)
+		default:
+			return 0, nil, syserror.EINVAL
+		}
+	}
+	if oldaddr != 0 {
+		return 0, nil, copyOutSigSet(t, oldaddr, oldmask)
+	}
+
+	return 0, nil, nil
+}
+
+// Sigaltstack implements linux syscall sigaltstack(2).
+func Sigaltstack(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	setaddr := args[0].Pointer()
+	oldaddr := args[1].Pointer()
+
+	if oldaddr != 0 {
+		alt := t.SignalStack()
+		if t.OnSignalStack(alt) {
+			alt.Flags |= arch.SignalStackFlagOnStack
+		}
+		if err := t.CopyOutSignalStack(oldaddr, &alt); err != nil {
+			return 0, nil, err
+		}
+	}
+	if setaddr != 0 {
+		if t.OnSignalStack(t.SignalStack()) {
+			return 0, nil, syserror.EPERM
+		}
+		alt, err := t.CopyInSignalStack(setaddr)
+		if err != nil {
+			return 0, nil, err
+		}
+		if err := t.SetSignalStack(alt); err != nil {
+			return 0, nil, err
+		}
+	}
+
+	return 0, nil, nil
+}
+
+// Pause implements linux syscall pause(2).
+func Pause(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	return 0, nil, syserror.ConvertIntr(t.Block(nil), kernel.ERESTARTNOHAND)
+}
+
+func sigtimedwait(t *kernel.Task, mask linux.SignalSet, timeout time.Duration) (*arch.SignalInfo, error) {
+	// Is it already pending?
+	if info := t.TakeSignal(^mask); info != nil {
+		return info, nil
+	}
+
+	// No signals available immediately and asked not to wait.
+	if timeout == 0 {
+		return nil, syserror.EAGAIN
+	}
+
+	// No signals available yet. Temporarily unblock the ones we are interested
+	// in then wait for either a timeout or a new signal.
+	oldmask := t.SignalMask()
+	t.SetSignalMask(oldmask &^ mask)
+	_, err := t.BlockWithTimeout(nil, true, timeout)
+	t.SetSignalMask(oldmask)
+
+	// How did the wait go?
+	switch err {
+	case syserror.ErrInterrupted:
+		if info := t.TakeSignal(^mask); info != nil {
+			// Got one of the signals we were waiting for.
+			return info, nil
+		}
+		// Got a signal we weren't waiting for.
+		return nil, syserror.EINTR
+	case syserror.ETIMEDOUT:
+		// Timed out and still no signals.
+		return nil, syserror.EAGAIN
+	default:
+		// Some other error? Shouldn't be possible. The event channel
+		// passed to BlockWithTimeout was nil, so the only two ways the
+		// block could've ended are a timeout or an interrupt.
+		panic("unreachable")
+	}
+}
+
+// RtSigpending implements linux syscall rt_sigpending(2).
+func RtSigpending(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	addr := args[0].Pointer()
+	pending := t.PendingSignals()
+	_, err := t.CopyOut(addr, pending)
+	return 0, nil, err
+}
+
+// RtSigtimedwait implements linux syscall rt_sigtimedwait(2).
+func RtSigtimedwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	sigset := args[0].Pointer()
+	siginfo := args[1].Pointer()
+	timespec := args[2].Pointer()
+	sigsetsize := args[3].SizeT()
+
+	mask, err := copyInSigSet(t, sigset, sigsetsize)
+	if err != nil {
+		return 0, nil, err
+	}
+
+	var timeout time.Duration
+	if timespec != 0 {
+		d, err := copyTimespecIn(t, timespec)
+		if err != nil {
+			return 0, nil, err
+		}
+		if !d.Valid() {
+			return 0, nil, syserror.EINVAL
+		}
+		timeout = time.Duration(d.ToNsecCapped())
+	} else {
+		timeout = time.Duration(math.MaxInt64)
+	}
+
+	si, err := sigtimedwait(t, mask, timeout)
+	if err != nil {
+		return 0, nil, err
+	}
+
+	if si != nil {
+		if siginfo != 0 {
+			si.FixSignalCodeForUser()
+			if _, err := t.CopyOut(siginfo, si); err != nil {
+				return 0, nil, err
+			}
+		}
+		return uintptr(si.Signo), nil, nil
+	}
+
+	// sigtimedwait's not supposed to return nil si and err...
+	return 0, nil, nil
+}
+
+// RtSigqueueinfo implements linux syscall rt_sigqueueinfo(2).
+func RtSigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	pid := kernel.ThreadID(args[0].Int())
+	sig := linux.Signal(args[1].Int())
+	infoAddr := args[2].Pointer()
+
+	// Copy in the info.
+	//
+	// We must ensure that the Signo is set (Linux overrides this in the
+	// same way), and that the code is in the allowed set. This same logic
+	// appears below in RtSigtgqueueinfo and should be kept in sync.
+	var info arch.SignalInfo
+	if _, err := t.CopyIn(infoAddr, &info); err != nil {
+		return 0, nil, err
+	}
+	info.Signo = int32(sig)
+
+	// This must loop to handle the race with execve described in Kill.
+	for {
+		// Deliver to the given task's thread group.
+		target := t.PIDNamespace().TaskWithID(pid)
+		if target == nil {
+			return 0, nil, syserror.ESRCH
+		}
+
+		// If the sender is not the receiver, it can't use si_codes used by the
+		// kernel or SI_TKILL.
+		if (info.Code >= 0 || info.Code == arch.SignalInfoTkill) && target != t {
+			return 0, nil, syserror.EPERM
+		}
+
+		if !mayKill(t, target, sig) {
+			return 0, nil, syserror.EPERM
+		}
+
+		if err := target.SendGroupSignal(&info); err != syserror.ESRCH {
+			return 0, nil, err
+		}
+	}
+}
+
+// RtTgsigqueueinfo implements linux syscall rt_tgsigqueueinfo(2).
+func RtTgsigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	tgid := kernel.ThreadID(args[0].Int())
+	tid := kernel.ThreadID(args[1].Int())
+	sig := linux.Signal(args[2].Int())
+	infoAddr := args[3].Pointer()
+
+	// N.B. Inconsistent with man page, linux actually rejects calls with
+	// tgid/tid <=0 by EINVAL. This isn't the same for all signal calls.
+	if tgid <= 0 || tid <= 0 {
+		return 0, nil, syserror.EINVAL
+	}
+
+	// Copy in the info. See RtSigqueueinfo above.
+	var info arch.SignalInfo
+	if _, err := t.CopyIn(infoAddr, &info); err != nil {
+		return 0, nil, err
+	}
+	info.Signo = int32(sig)
+
+	// Deliver to the given task.
+	targetTG := t.PIDNamespace().ThreadGroupWithID(tgid)
+	target := t.PIDNamespace().TaskWithID(tid)
+	if targetTG == nil || target == nil || target.ThreadGroup() != targetTG {
+		return 0, nil, syserror.ESRCH
+	}
+
+	// If the sender is not the receiver, it can't use si_codes used by the
+	// kernel or SI_TKILL.
+	if (info.Code >= 0 || info.Code == arch.SignalInfoTkill) && target != t {
+		return 0, nil, syserror.EPERM
+	}
+
+	if !mayKill(t, target, sig) {
+		return 0, nil, syserror.EPERM
+	}
+	return 0, nil, target.SendSignal(&info)
+}
+
+// RtSigsuspend implements linux syscall rt_sigsuspend(2).
+func RtSigsuspend(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	sigset := args[0].Pointer()
+
+	// Copy in the signal mask.
+	var mask linux.SignalSet
+	if _, err := t.CopyIn(sigset, &mask); err != nil {
+		return 0, nil, err
+	}
+	mask &^= kernel.UnblockableSignals
+
+	// Swap the mask.
+	oldmask := t.SignalMask()
+	t.SetSignalMask(mask)
+	t.SetSavedSignalMask(oldmask)
+
+	// Perform the wait.
+	return 0, nil, syserror.ConvertIntr(t.Block(nil), kernel.ERESTARTNOHAND)
+}
+
+// RestartSyscall implements the linux syscall restart_syscall(2).
+func RestartSyscall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	if r := t.SyscallRestartBlock(); r != nil {
+		n, err := r.Restart(t)
+		return n, nil, err
+	}
+	// The restart block should never be nil here, but it's possible
+	// ERESTART_RESTARTBLOCK was set by ptrace without the current syscall
+	// setting up a restart block. If ptrace didn't manipulate the return value,
+	// finding a nil restart block is a bug. Linux ensures that the restart
+	// function is never null by (re)initializing it with one that translates
+	// the restart into EINTR. We'll emulate that behaviour.
+	t.Debugf("Restart block missing in restart_syscall(2). Did ptrace inject a return value of ERESTART_RESTARTBLOCK?")
+	return 0, nil, syserror.EINTR
+}
author	Googler <noreply@google.com>	2018-04-27 10:37:02 -0700
committer	Adin Scannell <ascannell@google.com>	2018-04-28 01:44:26 -0400
commit	d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree	54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/sentry/syscalls/linux/sys_signal.go
parent	f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)