summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/kernel/task_exit.go164
-rw-r--r--pkg/sentry/syscalls/linux/sys_thread.go10
2 files changed, 103 insertions, 71 deletions
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index 6e9701b01..2e1e46582 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -782,6 +782,10 @@ type WaitOptions struct {
// for.
CloneTasks bool
+ // If SiblingChildren is true, events from children tasks of any task
+ // in the thread group of the waiter are eligible to be waited for.
+ SiblingChildren bool
+
// Events is a bitwise combination of the events defined above that specify
// what events are of interest to the call to Wait.
Events waiter.EventMask
@@ -869,87 +873,109 @@ func (t *Task) waitOnce(opts *WaitOptions) (*WaitResult, error) {
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
- // Without the (unimplemented) __WNOTHREAD flag, a task can wait on the
- // children and tracees of any task in the same thread group.
- for parent := t.tg.tasks.Front(); parent != nil; parent = parent.Next() {
- for child := range parent.children {
- if !opts.matchesTask(child, parent.tg.pidns) {
- continue
- }
- // Non-leaders don't notify parents on exit and aren't eligible to
- // be waited on.
- if opts.Events&EventExit != 0 && child == child.tg.leader && !child.exitParentAcked {
- anyWaitableTasks = true
- if wr := t.waitCollectZombieLocked(child, opts, false); wr != nil {
- return wr, nil
- }
- }
- // Check for group stops and continues. Tasks that have passed
- // TaskExitInitiated can no longer participate in group stops.
- if opts.Events&(EventChildGroupStop|EventGroupContinue) == 0 {
- continue
- }
- if child.exitState >= TaskExitInitiated {
- continue
- }
- // If the waiter is in the same thread group as the task's
- // tracer, do not report its group stops; they will be reported
- // as ptrace stops instead. This also skips checking for group
- // continues, but they'll be checked for when scanning tracees
- // below. (Per kernel/exit.c:wait_consider_task(): "If a
- // ptracer wants to distinguish the two events for its own
- // children, it should create a separate process which takes
- // the role of real parent.")
- if tracer := child.Tracer(); tracer != nil && tracer.tg == parent.tg {
- continue
+ if opts.SiblingChildren {
+ // We can wait on the children and tracees of any task in the
+ // same thread group.
+ for parent := t.tg.tasks.Front(); parent != nil; parent = parent.Next() {
+ wr, any := t.waitParentLocked(opts, parent)
+ if wr != nil {
+ return wr, nil
}
+ anyWaitableTasks = anyWaitableTasks || any
+ }
+ } else {
+ // We can only wait on this task.
+ var wr *WaitResult
+ wr, anyWaitableTasks = t.waitParentLocked(opts, t)
+ if wr != nil {
+ return wr, nil
+ }
+ }
+
+ if anyWaitableTasks {
+ return nil, ErrNoWaitableEvent
+ }
+ return nil, syserror.ECHILD
+}
+
+// Preconditions: The TaskSet mutex must be locked for writing.
+func (t *Task) waitParentLocked(opts *WaitOptions, parent *Task) (*WaitResult, bool) {
+ anyWaitableTasks := false
+
+ for child := range parent.children {
+ if !opts.matchesTask(child, parent.tg.pidns) {
+ continue
+ }
+ // Non-leaders don't notify parents on exit and aren't eligible to
+ // be waited on.
+ if opts.Events&EventExit != 0 && child == child.tg.leader && !child.exitParentAcked {
anyWaitableTasks = true
- if opts.Events&EventChildGroupStop != 0 {
- if wr := t.waitCollectChildGroupStopLocked(child, opts); wr != nil {
- return wr, nil
- }
- }
- if opts.Events&EventGroupContinue != 0 {
- if wr := t.waitCollectGroupContinueLocked(child, opts); wr != nil {
- return wr, nil
- }
+ if wr := t.waitCollectZombieLocked(child, opts, false); wr != nil {
+ return wr, anyWaitableTasks
}
}
- for tracee := range parent.ptraceTracees {
- if !opts.matchesTask(tracee, parent.tg.pidns) {
- continue
- }
- // Non-leaders do notify tracers on exit.
- if opts.Events&EventExit != 0 && !tracee.exitTracerAcked {
- anyWaitableTasks = true
- if wr := t.waitCollectZombieLocked(tracee, opts, true); wr != nil {
- return wr, nil
- }
- }
- if opts.Events&(EventTraceeStop|EventGroupContinue) == 0 {
- continue
+ // Check for group stops and continues. Tasks that have passed
+ // TaskExitInitiated can no longer participate in group stops.
+ if opts.Events&(EventChildGroupStop|EventGroupContinue) == 0 {
+ continue
+ }
+ if child.exitState >= TaskExitInitiated {
+ continue
+ }
+ // If the waiter is in the same thread group as the task's
+ // tracer, do not report its group stops; they will be reported
+ // as ptrace stops instead. This also skips checking for group
+ // continues, but they'll be checked for when scanning tracees
+ // below. (Per kernel/exit.c:wait_consider_task(): "If a
+ // ptracer wants to distinguish the two events for its own
+ // children, it should create a separate process which takes
+ // the role of real parent.")
+ if tracer := child.Tracer(); tracer != nil && tracer.tg == parent.tg {
+ continue
+ }
+ anyWaitableTasks = true
+ if opts.Events&EventChildGroupStop != 0 {
+ if wr := t.waitCollectChildGroupStopLocked(child, opts); wr != nil {
+ return wr, anyWaitableTasks
}
- if tracee.exitState >= TaskExitInitiated {
- continue
+ }
+ if opts.Events&EventGroupContinue != 0 {
+ if wr := t.waitCollectGroupContinueLocked(child, opts); wr != nil {
+ return wr, anyWaitableTasks
}
+ }
+ }
+ for tracee := range parent.ptraceTracees {
+ if !opts.matchesTask(tracee, parent.tg.pidns) {
+ continue
+ }
+ // Non-leaders do notify tracers on exit.
+ if opts.Events&EventExit != 0 && !tracee.exitTracerAcked {
anyWaitableTasks = true
- if opts.Events&EventTraceeStop != 0 {
- if wr := t.waitCollectTraceeStopLocked(tracee, opts); wr != nil {
- return wr, nil
- }
+ if wr := t.waitCollectZombieLocked(tracee, opts, true); wr != nil {
+ return wr, anyWaitableTasks
}
- if opts.Events&EventGroupContinue != 0 {
- if wr := t.waitCollectGroupContinueLocked(tracee, opts); wr != nil {
- return wr, nil
- }
+ }
+ if opts.Events&(EventTraceeStop|EventGroupContinue) == 0 {
+ continue
+ }
+ if tracee.exitState >= TaskExitInitiated {
+ continue
+ }
+ anyWaitableTasks = true
+ if opts.Events&EventTraceeStop != 0 {
+ if wr := t.waitCollectTraceeStopLocked(tracee, opts); wr != nil {
+ return wr, anyWaitableTasks
+ }
+ }
+ if opts.Events&EventGroupContinue != 0 {
+ if wr := t.waitCollectGroupContinueLocked(tracee, opts); wr != nil {
+ return wr, anyWaitableTasks
}
}
}
- if anyWaitableTasks {
- return nil, ErrNoWaitableEvent
- }
- return nil, syserror.ECHILD
+ return nil, anyWaitableTasks
}
// Preconditions: The TaskSet mutex must be locked for writing.
diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go
index cc441460c..14fa7ef92 100644
--- a/pkg/sentry/syscalls/linux/sys_thread.go
+++ b/pkg/sentry/syscalls/linux/sys_thread.go
@@ -183,7 +183,7 @@ func Vfork(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
// wait4 waits for the given child process to exit.
func wait4(t *kernel.Task, pid int, statusAddr usermem.Addr, options int, rusageAddr usermem.Addr) (uintptr, error) {
- if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WALL|linux.WCLONE) != 0 {
+ if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 {
return 0, syscall.EINVAL
}
wopts := kernel.WaitOptions{
@@ -227,6 +227,9 @@ func wait4(t *kernel.Task, pid int, statusAddr usermem.Addr, options int, rusage
if options&linux.WNOHANG == 0 {
wopts.BlockInterruptErr = kernel.ERESTARTSYS
}
+ if options&linux.WNOTHREAD == 0 {
+ wopts.SiblingChildren = true
+ }
wr, err := t.Wait(&wopts)
if err != nil {
@@ -278,7 +281,7 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
options := int(args[3].Uint())
rusageAddr := args[4].Pointer()
- if options&^(linux.WNOHANG|linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED|linux.WNOWAIT) != 0 {
+ if options&^(linux.WNOHANG|linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED|linux.WNOWAIT|linux.WNOTHREAD) != 0 {
return 0, nil, syscall.EINVAL
}
if options&(linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED) == 0 {
@@ -310,6 +313,9 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
if options&linux.WNOHANG == 0 {
wopts.BlockInterruptErr = kernel.ERESTARTSYS
}
+ if options&linux.WNOTHREAD == 0 {
+ wopts.SiblingChildren = true
+ }
wr, err := t.Wait(&wopts)
if err != nil {