From b52cbd60280342f25411561702e97fe650fdaa9c Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 17 Apr 2019 13:42:16 -0700 Subject: Don't allow sigtimedwait to catch unblockable signals The existing logic attempting to do this is incorrect. Unary ^ has higher precedence than &^, so mask always has UnblockableSignals cleared, allowing dequeueSignalLocked to dequeue unblockable signals (which allows userspace to ignore them). Switch the logic so that unblockable signals are always masked. PiperOrigin-RevId: 244058487 Change-Id: Ib19630ac04068a1fbfb9dc4a8eab1ccbdb21edc3 --- pkg/sentry/kernel/task_signals.go | 2 +- test/syscalls/linux/BUILD | 1 + test/syscalls/linux/sigtimedwait.cc | 76 +++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go index e177562d7..3a8e61900 100644 --- a/pkg/sentry/kernel/task_signals.go +++ b/pkg/sentry/kernel/task_signals.go @@ -307,7 +307,7 @@ func (t *Task) SignalReturn(rt bool) (*SyscallControl, error) { func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*arch.SignalInfo, error) { // set is the set of signals we're interested in; invert it to get the set // of signals to block. - mask := ^set &^ UnblockableSignals + mask := ^(set &^ UnblockableSignals) t.tg.signalHandlers.mu.Lock() defer t.tg.signalHandlers.mu.Unlock() diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 38faba267..d99733fc9 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1825,6 +1825,7 @@ cc_binary( srcs = ["sigtimedwait.cc"], linkstatic = 1, deps = [ + "//test/util:file_descriptor", "//test/util:logging", "//test/util:signal_util", "//test/util:test_util", diff --git a/test/syscalls/linux/sigtimedwait.cc b/test/syscalls/linux/sigtimedwait.cc index 3a350fc28..1df9c013f 100644 --- a/test/syscalls/linux/sigtimedwait.cc +++ b/test/syscalls/linux/sigtimedwait.cc @@ -18,6 +18,7 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "test/util/file_descriptor.h" #include "test/util/logging.h" #include "test/util/signal_util.h" #include "test/util/test_util.h" @@ -163,6 +164,81 @@ TEST(SigtimedwaitTest, ChildExitGeneratedSIGCHLDWithHandler) { EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) << status; } +// sigtimedwait cannot catch SIGKILL. +TEST(SigtimedwaitTest, SIGKILLUncaught) { + // This is a regression test for sigtimedwait dequeuing SIGKILLs, thus + // preventing the task from exiting. + // + // The explanation below is specific to behavior in gVisor. The Linux behavior + // here is irrelevant because without a bug that prevents delivery of SIGKILL, + // none of this behavior is visible (in Linux or gVisor). + // + // SIGKILL is rather intrusive. Simply sending the SIGKILL marks + // ThreadGroup.exitStatus as exiting with SIGKILL, before the SIGKILL is even + // delivered. + // + // As a result, we cannot simply exit the child with a different exit code if + // it survives and expect to see that code in waitpid because: + // 1. PrepareGroupExit will override Task.exitStatus with + // ThreadGroup.exitStatus. + // 2. waitpid(2) will always return ThreadGroup.exitStatus rather than + // Task.exitStatus. + // + // We could use exit(2) to set Task.exitStatus without override, and a SIGCHLD + // handler to receive Task.exitStatus in the parent, but with that much + // test complexity, it is cleaner to simply use a pipe to notify the parent + // that we survived. + constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2); + + int pipe_fds[2]; + ASSERT_THAT(pipe(pipe_fds), SyscallSucceeds()); + FileDescriptor rfd(pipe_fds[0]); + FileDescriptor wfd(pipe_fds[1]); + + pid_t pid = fork(); + if (pid == 0) { + rfd.reset(); + + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGKILL); + RetryEINTR(sigtimedwait)(&mask, nullptr, nullptr); + + // Survived. + char c = 'a'; + TEST_PCHECK(WriteFd(wfd.get(), &c, 1) == 1); + _exit(1); + } + ASSERT_THAT(pid, SyscallSucceeds()); + + wfd.reset(); + + // Wait for child to block in sigtimedwait, then kill it. + absl::SleepFor(kSigtimedwaitSetupTime); + + // Sending SIGKILL will attempt to enqueue the signal twice: once in the + // normal signal sending path, and once to all Tasks in the ThreadGroup when + // applying SIGKILL side-effects. + // + // If we use kill(2), the former will be on the ThreadGroup signal queue and + // the latter will be on the Task signal queue. sigtimedwait can only dequeue + // one signal, so the other would kill the Task, masking bugs. + // + // If we use tkill(2), the former will be on the Task signal queue and the + // latter will be dropped as a duplicate. Then sigtimedwait can theoretically + // dequeue the single SIGKILL. + EXPECT_THAT(syscall(SYS_tkill, pid, SIGKILL), SyscallSucceeds()); + + int status; + EXPECT_THAT(RetryEINTR(waitpid)(pid, &status, 0), + SyscallSucceedsWithValue(pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) << status; + + // Child shouldn't have survived. + char c; + EXPECT_THAT(ReadFd(rfd.get(), &c, 1), SyscallSucceedsWithValue(0)); +} + TEST(SigtimedwaitTest, IgnoredUnmaskedSignal) { constexpr int kSigno = SIGUSR1; constexpr auto kSigtimedwaitSetupTime = absl::Seconds(2); -- cgit v1.2.3