Deflake PtraceTest.SeizeSetOptions.

PiperOrigin-RevId: 242226319 Change-Id: Iefc78656841315f6b7d48bd85db451486850264d
author: Jamie Liu <jamieliu@google.com> 2019-04-05 17:53:24 -0700
committer: Shentubot <shentubot@google.com> 2019-04-05 17:54:31 -0700
commit: 124bafc81c7291d31cbe2a74f9bda155d0f71469 (patch)
tree: 4384dc79fe33e7a88ceb0b46e07893a94b895688 /test/syscalls/linux
parent: ee7e6d33b2a017a53bebfdc55d182f53474d4d7d (diff)
1 files changed, 37 insertions, 1 deletions
diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc
index 8fc0045ce..668d49128 100644
--- a/test/syscalls/linux/ptrace.cc
+++ b/test/syscalls/linux/ptrace.cc
@@ -1152,10 +1152,46 @@ TEST(PtraceTest, SeizeSetOptions) {
   EXPECT_TRUE(WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80))
       << " status " << status;
 
-  // SIGKILL the child (detaching the tracer) and wait for it to exit.
+  // Clean up the child.
   ASSERT_THAT(kill(child_pid, SIGKILL), SyscallSucceeds());
   ASSERT_THAT(waitpid(child_pid, &status, 0),
               SyscallSucceedsWithValue(child_pid));
+  if (WIFSTOPPED(status) && WSTOPSIG(status) == (SIGTRAP | 0x80)) {
+    // "SIGKILL kills even within system calls (syscall-exit-stop is not
+    // generated prior to death by SIGKILL). The net effect is that SIGKILL
+    // always kills the process (all its threads), even if some threads of the
+    // process are ptraced." - ptrace(2). This is technically true, but...
+    //
+    // When we send SIGKILL to the child, kernel/signal.c:complete_signal() =>
+    // signal_wake_up(resume=1) kicks the tracee out of the syscall-enter-stop.
+    // The pending SIGKILL causes the syscall to be skipped, but the child
+    // thread still reports syscall-exit before checking for pending signals; in
+    // current kernels, this is
+    // arch/x86/entry/common.c:syscall_return_slowpath() =>
+    // syscall_slow_exit_work() =>
+    // include/linux/tracehook.h:tracehook_report_syscall_exit() =>
+    // ptrace_report_syscall() => kernel/signal.c:ptrace_notify() =>
+    // ptrace_do_notify() => ptrace_stop().
+    //
+    // ptrace_stop() sets the task's state to TASK_TRACED and the task's
+    // exit_code to SIGTRAP|0x80 (passed by ptrace_report_syscall()), then calls
+    // freezable_schedule(). freezable_schedule() eventually reaches
+    // __schedule(), which detects signal_pending_state() due to the pending
+    // SIGKILL, sets the task's state back to TASK_RUNNING, and returns without
+    // descheduling. Thus, the task never enters syscall-exit-stop. However, if
+    // our wait4() => kernel/exit.c:wait_task_stopped() racily observes the
+    // TASK_TRACED state and the non-zero exit code set by ptrace_stop() before
+    // __schedule() sets the state back to TASK_RUNNING, it will return the
+    // task's exit_code as status W_STOPCODE(SIGTRAP|0x80). So we get a spurious
+    // syscall-exit-stop notification, and need to wait4() again for task exit.
+    //
+    // gVisor is not susceptible to this race because
+    // kernel.Task.waitCollectTraceeStopLocked() checks specifically for an
+    // active ptraceStop, which is not initiated if SIGKILL is pending.
+    LOG(INFO) << "Observed syscall-exit after SIGKILL";
+    ASSERT_THAT(waitpid(child_pid, &status, 0),
+                SyscallSucceedsWithValue(child_pid));
+  }
   EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
       << " status " << status;
 }
author	Jamie Liu <jamieliu@google.com>	2019-04-05 17:53:24 -0700
committer	Shentubot <shentubot@google.com>	2019-04-05 17:54:31 -0700
commit	124bafc81c7291d31cbe2a74f9bda155d0f71469 (patch)
tree	4384dc79fe33e7a88ceb0b46e07893a94b895688 /test/syscalls/linux
parent	ee7e6d33b2a017a53bebfdc55d182f53474d4d7d (diff)