summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/kernel/task_exit.go164
-rw-r--r--pkg/sentry/syscalls/linux/sys_thread.go10
-rw-r--r--test/syscalls/linux/BUILD2
-rw-r--r--test/syscalls/linux/wait.cc185
4 files changed, 236 insertions, 125 deletions
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index 6e9701b01..2e1e46582 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -782,6 +782,10 @@ type WaitOptions struct {
// for.
CloneTasks bool
+ // If SiblingChildren is true, events from children tasks of any task
+ // in the thread group of the waiter are eligible to be waited for.
+ SiblingChildren bool
+
// Events is a bitwise combination of the events defined above that specify
// what events are of interest to the call to Wait.
Events waiter.EventMask
@@ -869,87 +873,109 @@ func (t *Task) waitOnce(opts *WaitOptions) (*WaitResult, error) {
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
- // Without the (unimplemented) __WNOTHREAD flag, a task can wait on the
- // children and tracees of any task in the same thread group.
- for parent := t.tg.tasks.Front(); parent != nil; parent = parent.Next() {
- for child := range parent.children {
- if !opts.matchesTask(child, parent.tg.pidns) {
- continue
- }
- // Non-leaders don't notify parents on exit and aren't eligible to
- // be waited on.
- if opts.Events&EventExit != 0 && child == child.tg.leader && !child.exitParentAcked {
- anyWaitableTasks = true
- if wr := t.waitCollectZombieLocked(child, opts, false); wr != nil {
- return wr, nil
- }
- }
- // Check for group stops and continues. Tasks that have passed
- // TaskExitInitiated can no longer participate in group stops.
- if opts.Events&(EventChildGroupStop|EventGroupContinue) == 0 {
- continue
- }
- if child.exitState >= TaskExitInitiated {
- continue
- }
- // If the waiter is in the same thread group as the task's
- // tracer, do not report its group stops; they will be reported
- // as ptrace stops instead. This also skips checking for group
- // continues, but they'll be checked for when scanning tracees
- // below. (Per kernel/exit.c:wait_consider_task(): "If a
- // ptracer wants to distinguish the two events for its own
- // children, it should create a separate process which takes
- // the role of real parent.")
- if tracer := child.Tracer(); tracer != nil && tracer.tg == parent.tg {
- continue
+ if opts.SiblingChildren {
+ // We can wait on the children and tracees of any task in the
+ // same thread group.
+ for parent := t.tg.tasks.Front(); parent != nil; parent = parent.Next() {
+ wr, any := t.waitParentLocked(opts, parent)
+ if wr != nil {
+ return wr, nil
}
+ anyWaitableTasks = anyWaitableTasks || any
+ }
+ } else {
+ // We can only wait on this task.
+ var wr *WaitResult
+ wr, anyWaitableTasks = t.waitParentLocked(opts, t)
+ if wr != nil {
+ return wr, nil
+ }
+ }
+
+ if anyWaitableTasks {
+ return nil, ErrNoWaitableEvent
+ }
+ return nil, syserror.ECHILD
+}
+
+// Preconditions: The TaskSet mutex must be locked for writing.
+func (t *Task) waitParentLocked(opts *WaitOptions, parent *Task) (*WaitResult, bool) {
+ anyWaitableTasks := false
+
+ for child := range parent.children {
+ if !opts.matchesTask(child, parent.tg.pidns) {
+ continue
+ }
+ // Non-leaders don't notify parents on exit and aren't eligible to
+ // be waited on.
+ if opts.Events&EventExit != 0 && child == child.tg.leader && !child.exitParentAcked {
anyWaitableTasks = true
- if opts.Events&EventChildGroupStop != 0 {
- if wr := t.waitCollectChildGroupStopLocked(child, opts); wr != nil {
- return wr, nil
- }
- }
- if opts.Events&EventGroupContinue != 0 {
- if wr := t.waitCollectGroupContinueLocked(child, opts); wr != nil {
- return wr, nil
- }
+ if wr := t.waitCollectZombieLocked(child, opts, false); wr != nil {
+ return wr, anyWaitableTasks
}
}
- for tracee := range parent.ptraceTracees {
- if !opts.matchesTask(tracee, parent.tg.pidns) {
- continue
- }
- // Non-leaders do notify tracers on exit.
- if opts.Events&EventExit != 0 && !tracee.exitTracerAcked {
- anyWaitableTasks = true
- if wr := t.waitCollectZombieLocked(tracee, opts, true); wr != nil {
- return wr, nil
- }
- }
- if opts.Events&(EventTraceeStop|EventGroupContinue) == 0 {
- continue
+ // Check for group stops and continues. Tasks that have passed
+ // TaskExitInitiated can no longer participate in group stops.
+ if opts.Events&(EventChildGroupStop|EventGroupContinue) == 0 {
+ continue
+ }
+ if child.exitState >= TaskExitInitiated {
+ continue
+ }
+ // If the waiter is in the same thread group as the task's
+ // tracer, do not report its group stops; they will be reported
+ // as ptrace stops instead. This also skips checking for group
+ // continues, but they'll be checked for when scanning tracees
+ // below. (Per kernel/exit.c:wait_consider_task(): "If a
+ // ptracer wants to distinguish the two events for its own
+ // children, it should create a separate process which takes
+ // the role of real parent.")
+ if tracer := child.Tracer(); tracer != nil && tracer.tg == parent.tg {
+ continue
+ }
+ anyWaitableTasks = true
+ if opts.Events&EventChildGroupStop != 0 {
+ if wr := t.waitCollectChildGroupStopLocked(child, opts); wr != nil {
+ return wr, anyWaitableTasks
}
- if tracee.exitState >= TaskExitInitiated {
- continue
+ }
+ if opts.Events&EventGroupContinue != 0 {
+ if wr := t.waitCollectGroupContinueLocked(child, opts); wr != nil {
+ return wr, anyWaitableTasks
}
+ }
+ }
+ for tracee := range parent.ptraceTracees {
+ if !opts.matchesTask(tracee, parent.tg.pidns) {
+ continue
+ }
+ // Non-leaders do notify tracers on exit.
+ if opts.Events&EventExit != 0 && !tracee.exitTracerAcked {
anyWaitableTasks = true
- if opts.Events&EventTraceeStop != 0 {
- if wr := t.waitCollectTraceeStopLocked(tracee, opts); wr != nil {
- return wr, nil
- }
+ if wr := t.waitCollectZombieLocked(tracee, opts, true); wr != nil {
+ return wr, anyWaitableTasks
}
- if opts.Events&EventGroupContinue != 0 {
- if wr := t.waitCollectGroupContinueLocked(tracee, opts); wr != nil {
- return wr, nil
- }
+ }
+ if opts.Events&(EventTraceeStop|EventGroupContinue) == 0 {
+ continue
+ }
+ if tracee.exitState >= TaskExitInitiated {
+ continue
+ }
+ anyWaitableTasks = true
+ if opts.Events&EventTraceeStop != 0 {
+ if wr := t.waitCollectTraceeStopLocked(tracee, opts); wr != nil {
+ return wr, anyWaitableTasks
+ }
+ }
+ if opts.Events&EventGroupContinue != 0 {
+ if wr := t.waitCollectGroupContinueLocked(tracee, opts); wr != nil {
+ return wr, anyWaitableTasks
}
}
}
- if anyWaitableTasks {
- return nil, ErrNoWaitableEvent
- }
- return nil, syserror.ECHILD
+ return nil, anyWaitableTasks
}
// Preconditions: The TaskSet mutex must be locked for writing.
diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go
index cc441460c..14fa7ef92 100644
--- a/pkg/sentry/syscalls/linux/sys_thread.go
+++ b/pkg/sentry/syscalls/linux/sys_thread.go
@@ -183,7 +183,7 @@ func Vfork(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// wait4 waits for the given child process to exit.
func wait4(t *kernel.Task, pid int, statusAddr usermem.Addr, options int, rusageAddr usermem.Addr) (uintptr, error) {
- if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WALL|linux.WCLONE) != 0 {
+ if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 {
return 0, syscall.EINVAL
}
wopts := kernel.WaitOptions{
@@ -227,6 +227,9 @@ func wait4(t *kernel.Task, pid int, statusAddr usermem.Addr, options int, rusage
if options&linux.WNOHANG == 0 {
wopts.BlockInterruptErr = kernel.ERESTARTSYS
}
+ if options&linux.WNOTHREAD == 0 {
+ wopts.SiblingChildren = true
+ }
wr, err := t.Wait(&wopts)
if err != nil {
@@ -278,7 +281,7 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
options := int(args[3].Uint())
rusageAddr := args[4].Pointer()
- if options&^(linux.WNOHANG|linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED|linux.WNOWAIT) != 0 {
+ if options&^(linux.WNOHANG|linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED|linux.WNOWAIT|linux.WNOTHREAD) != 0 {
return 0, nil, syscall.EINVAL
}
if options&(linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED) == 0 {
@@ -310,6 +313,9 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
if options&linux.WNOHANG == 0 {
wopts.BlockInterruptErr = kernel.ERESTARTSYS
}
+ if options&linux.WNOTHREAD == 0 {
+ wopts.SiblingChildren = true
+ }
wr, err := t.Wait(&wopts)
if err != nil {
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index e8caf31fc..014679ec5 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -3179,7 +3179,9 @@ cc_binary(
"//test/util:signal_util",
"//test/util:test_main",
"//test/util:test_util",
+ "//test/util:thread_util",
"@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
"@com_google_absl//absl/time",
"@com_google_googletest//:gtest",
],
diff --git a/test/syscalls/linux/wait.cc b/test/syscalls/linux/wait.cc
index 50d0725a7..da3b97828 100644
--- a/test/syscalls/linux/wait.cc
+++ b/test/syscalls/linux/wait.cc
@@ -21,11 +21,13 @@
#include <unistd.h>
#include <functional>
+#include <tuple>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"
+#include "absl/synchronization/mutex.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "test/util/cleanup.h"
@@ -34,6 +36,7 @@
#include "test/util/posix_error.h"
#include "test/util/signal_util.h"
#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
using ::testing::UnorderedElementsAre;
@@ -42,10 +45,8 @@ using ::testing::UnorderedElementsAre;
//
// NOTE(b/22640830,b/27680907,b/29049891): Some functionality is not tested as
// it is not currently supported by gVisor:
-// * UID in waitid(2) siginfo.
// * Process groups.
// * Core dump status (WCOREDUMP).
-// * Linux only option __WNOTHREAD.
//
// Tests for waiting on stopped/continued children are in sigstop.cc.
@@ -357,13 +358,22 @@ INSTANTIATE_TEST_SUITE_P(
return static_cast<pid_t>(si.si_pid);
}));
-// Fixture for tests parameterized by a function that takes the PID of a
-// specific child to wait for, waits for it to exit, and checks that it exits
-// with the given code.
+// Fixture for tests parameterized by a (sysno, function) tuple. The function
+// takes the PID of a specific child to wait for, waits for it to exit, and
+// checks that it exits with the given code.
class WaitSpecificChildTest
- : public ::testing::TestWithParam<std::function<PosixError(pid_t, int)>> {
+ : public ::testing::TestWithParam<
+ std::tuple<int, std::function<PosixError(pid_t, int, int)>>> {
protected:
- PosixError WaitFor(pid_t pid, int code) { return GetParam()(pid, code); }
+ int Sysno() { return std::get<0>(GetParam()); }
+
+ PosixError WaitForWithOptions(pid_t pid, int options, int code) {
+ return std::get<1>(GetParam())(pid, options, code);
+ }
+
+ PosixError WaitFor(pid_t pid, int code) {
+ return std::get<1>(GetParam())(pid, 0, code);
+ }
};
// Wait for specific child to exit.
@@ -432,6 +442,75 @@ TEST_P(WaitSpecificChildTest, AfterExit) {
EXPECT_NO_ERRNO(WaitFor(child, 0));
}
+// Wait for child of sibling thread.
+TEST_P(WaitSpecificChildTest, SiblingChildren) {
+ absl::Mutex mu;
+ pid_t child;
+ bool ready = false;
+ bool stop = false;
+
+ ScopedThread t([&] {
+ absl::MutexLock ml(&mu);
+ EXPECT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+ ready = true;
+ mu.Await(absl::Condition(&stop));
+ });
+
+ // N.B. This must be declared after ScopedThread, so it is destructed first,
+ // thus waking the thread.
+ absl::MutexLock ml(&mu);
+ mu.Await(absl::Condition(&ready));
+
+ EXPECT_NO_ERRNO(WaitFor(child, 0));
+
+ // Keep the sibling alive until after we've waited so the child isn't
+ // reparented.
+ stop = true;
+}
+
+// Waiting for child of sibling thread not allowed with WNOTHREAD.
+TEST_P(WaitSpecificChildTest, SiblingChildrenWNOTHREAD) {
+ // Linux added WNOTHREAD support to waitid(2) in
+ // 91c4e8ea8f05916df0c8a6f383508ac7c9e10dba ("wait: allow sys_waitid() to
+ // accept __WNOTHREAD/__WCLONE/__WALL"). i.e., Linux 4.7.
+ //
+ // Skip the test if it isn't supported yet.
+ if (Sysno() == SYS_waitid) {
+ int ret = waitid(P_ALL, 0, nullptr, WEXITED | WNOHANG | __WNOTHREAD);
+ SKIP_IF(ret < 0 && errno == EINVAL);
+ }
+
+ absl::Mutex mu;
+ pid_t child;
+ bool ready = false;
+ bool stop = false;
+
+ ScopedThread t([&] {
+ absl::MutexLock ml(&mu);
+ EXPECT_THAT(child = ForkAndExit(0, 0), SyscallSucceeds());
+ ready = true;
+ mu.Await(absl::Condition(&stop));
+
+ // This thread can wait on child.
+ EXPECT_NO_ERRNO(WaitForWithOptions(child, __WNOTHREAD, 0));
+ });
+
+ // N.B. This must be declared after ScopedThread, so it is destructed first,
+ // thus waking the thread.
+ absl::MutexLock ml(&mu);
+ mu.Await(absl::Condition(&ready));
+
+ // This thread can't wait on child.
+ EXPECT_THAT(
+ WaitForWithOptions(child, __WNOTHREAD, 0),
+ PosixErrorIs(ECHILD, ::testing::AnyOf(::testing::StrEq("waitid"),
+ ::testing::StrEq("wait4"))));
+
+ // Keep the sibling alive until after we've waited so the child isn't
+ // reparented.
+ stop = true;
+}
+
// Wait for specific child to exit.
// A non-CLONE_THREAD child which sends SIGCHLD upon exit behaves much like
// a forked process.
@@ -551,55 +630,53 @@ TEST_P(WaitSpecificChildTest, AfterChildExecve) {
EXPECT_NO_ERRNO(WaitFor(child, 0));
}
-INSTANTIATE_TEST_SUITE_P(
- Waiters, WaitSpecificChildTest,
- ::testing::Values(
- [](pid_t pid, int code) -> PosixError {
- int status;
- auto const rv = Wait4(pid, &status, 0, nullptr);
- MaybeSave();
- if (rv < 0) {
- return PosixError(errno, "wait4");
- } else if (rv != pid) {
- return PosixError(EINVAL, absl::StrCat("unexpected pid: got ", rv,
- ", wanted ", pid));
- }
- if (!WIFEXITED(status) || WEXITSTATUS(status) != code) {
- return PosixError(
- EINVAL, absl::StrCat("unexpected wait status: got ", status,
- ", wanted ", code));
- }
- return NoError();
- },
- [](pid_t pid, int code) -> PosixError {
- siginfo_t si;
- auto const rv = Waitid(P_PID, pid, &si, WEXITED);
- MaybeSave();
- if (rv < 0) {
- return PosixError(errno, "waitid");
- }
- if (si.si_pid != pid) {
- return PosixError(EINVAL,
- absl::StrCat("unexpected pid: got ", si.si_pid,
+PosixError CheckWait4(pid_t pid, int options, int code) {
+ int status;
+ auto const rv = Wait4(pid, &status, options, nullptr);
+ MaybeSave();
+ if (rv < 0) {
+ return PosixError(errno, "wait4");
+ } else if (rv != pid) {
+ return PosixError(
+ EINVAL, absl::StrCat("unexpected pid: got ", rv, ", wanted ", pid));
+ }
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != code) {
+ return PosixError(EINVAL, absl::StrCat("unexpected wait status: got ",
+ status, ", wanted ", code));
+ }
+ return NoError();
+};
+
+PosixError CheckWaitid(pid_t pid, int options, int code) {
+ siginfo_t si;
+ auto const rv = Waitid(P_PID, pid, &si, options | WEXITED);
+ MaybeSave();
+ if (rv < 0) {
+ return PosixError(errno, "waitid");
+ }
+ if (si.si_pid != pid) {
+ return PosixError(EINVAL, absl::StrCat("unexpected pid: got ", si.si_pid,
", wanted ", pid));
- }
- if (si.si_signo != SIGCHLD) {
- return PosixError(
- EINVAL, absl::StrCat("unexpected signo: got ", si.si_signo,
- ", wanted ", SIGCHLD));
- }
- if (si.si_status != code) {
- return PosixError(
- EINVAL, absl::StrCat("unexpected status: got ", si.si_status,
- ", wanted ", code));
- }
- if (si.si_code != CLD_EXITED) {
- return PosixError(EINVAL,
- absl::StrCat("unexpected code: got ", si.si_code,
+ }
+ if (si.si_signo != SIGCHLD) {
+ return PosixError(EINVAL, absl::StrCat("unexpected signo: got ",
+ si.si_signo, ", wanted ", SIGCHLD));
+ }
+ if (si.si_status != code) {
+ return PosixError(EINVAL, absl::StrCat("unexpected status: got ",
+ si.si_status, ", wanted ", code));
+ }
+ if (si.si_code != CLD_EXITED) {
+ return PosixError(EINVAL, absl::StrCat("unexpected code: got ", si.si_code,
", wanted ", CLD_EXITED));
- }
- return NoError();
- }));
+ }
+ return NoError();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ Waiters, WaitSpecificChildTest,
+ ::testing::Values(std::make_tuple(SYS_wait4, CheckWait4),
+ std::make_tuple(SYS_waitid, CheckWaitid)));
// WIFEXITED, WIFSIGNALED, WTERMSIG indicate signal exit.
TEST(WaitTest, SignalExit) {