diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/kernel/task_exit.go | 164 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_thread.go | 10 |
2 files changed, 103 insertions, 71 deletions
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index 6e9701b01..2e1e46582 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -782,6 +782,10 @@ type WaitOptions struct { // for. CloneTasks bool + // If SiblingChildren is true, events from children tasks of any task + // in the thread group of the waiter are eligible to be waited for. + SiblingChildren bool + // Events is a bitwise combination of the events defined above that specify // what events are of interest to the call to Wait. Events waiter.EventMask @@ -869,87 +873,109 @@ func (t *Task) waitOnce(opts *WaitOptions) (*WaitResult, error) { t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() - // Without the (unimplemented) __WNOTHREAD flag, a task can wait on the - // children and tracees of any task in the same thread group. - for parent := t.tg.tasks.Front(); parent != nil; parent = parent.Next() { - for child := range parent.children { - if !opts.matchesTask(child, parent.tg.pidns) { - continue - } - // Non-leaders don't notify parents on exit and aren't eligible to - // be waited on. - if opts.Events&EventExit != 0 && child == child.tg.leader && !child.exitParentAcked { - anyWaitableTasks = true - if wr := t.waitCollectZombieLocked(child, opts, false); wr != nil { - return wr, nil - } - } - // Check for group stops and continues. Tasks that have passed - // TaskExitInitiated can no longer participate in group stops. - if opts.Events&(EventChildGroupStop|EventGroupContinue) == 0 { - continue - } - if child.exitState >= TaskExitInitiated { - continue - } - // If the waiter is in the same thread group as the task's - // tracer, do not report its group stops; they will be reported - // as ptrace stops instead. This also skips checking for group - // continues, but they'll be checked for when scanning tracees - // below. (Per kernel/exit.c:wait_consider_task(): "If a - // ptracer wants to distinguish the two events for its own - // children, it should create a separate process which takes - // the role of real parent.") - if tracer := child.Tracer(); tracer != nil && tracer.tg == parent.tg { - continue + if opts.SiblingChildren { + // We can wait on the children and tracees of any task in the + // same thread group. + for parent := t.tg.tasks.Front(); parent != nil; parent = parent.Next() { + wr, any := t.waitParentLocked(opts, parent) + if wr != nil { + return wr, nil } + anyWaitableTasks = anyWaitableTasks || any + } + } else { + // We can only wait on this task. + var wr *WaitResult + wr, anyWaitableTasks = t.waitParentLocked(opts, t) + if wr != nil { + return wr, nil + } + } + + if anyWaitableTasks { + return nil, ErrNoWaitableEvent + } + return nil, syserror.ECHILD +} + +// Preconditions: The TaskSet mutex must be locked for writing. +func (t *Task) waitParentLocked(opts *WaitOptions, parent *Task) (*WaitResult, bool) { + anyWaitableTasks := false + + for child := range parent.children { + if !opts.matchesTask(child, parent.tg.pidns) { + continue + } + // Non-leaders don't notify parents on exit and aren't eligible to + // be waited on. + if opts.Events&EventExit != 0 && child == child.tg.leader && !child.exitParentAcked { anyWaitableTasks = true - if opts.Events&EventChildGroupStop != 0 { - if wr := t.waitCollectChildGroupStopLocked(child, opts); wr != nil { - return wr, nil - } - } - if opts.Events&EventGroupContinue != 0 { - if wr := t.waitCollectGroupContinueLocked(child, opts); wr != nil { - return wr, nil - } + if wr := t.waitCollectZombieLocked(child, opts, false); wr != nil { + return wr, anyWaitableTasks } } - for tracee := range parent.ptraceTracees { - if !opts.matchesTask(tracee, parent.tg.pidns) { - continue - } - // Non-leaders do notify tracers on exit. - if opts.Events&EventExit != 0 && !tracee.exitTracerAcked { - anyWaitableTasks = true - if wr := t.waitCollectZombieLocked(tracee, opts, true); wr != nil { - return wr, nil - } - } - if opts.Events&(EventTraceeStop|EventGroupContinue) == 0 { - continue + // Check for group stops and continues. Tasks that have passed + // TaskExitInitiated can no longer participate in group stops. + if opts.Events&(EventChildGroupStop|EventGroupContinue) == 0 { + continue + } + if child.exitState >= TaskExitInitiated { + continue + } + // If the waiter is in the same thread group as the task's + // tracer, do not report its group stops; they will be reported + // as ptrace stops instead. This also skips checking for group + // continues, but they'll be checked for when scanning tracees + // below. (Per kernel/exit.c:wait_consider_task(): "If a + // ptracer wants to distinguish the two events for its own + // children, it should create a separate process which takes + // the role of real parent.") + if tracer := child.Tracer(); tracer != nil && tracer.tg == parent.tg { + continue + } + anyWaitableTasks = true + if opts.Events&EventChildGroupStop != 0 { + if wr := t.waitCollectChildGroupStopLocked(child, opts); wr != nil { + return wr, anyWaitableTasks } - if tracee.exitState >= TaskExitInitiated { - continue + } + if opts.Events&EventGroupContinue != 0 { + if wr := t.waitCollectGroupContinueLocked(child, opts); wr != nil { + return wr, anyWaitableTasks } + } + } + for tracee := range parent.ptraceTracees { + if !opts.matchesTask(tracee, parent.tg.pidns) { + continue + } + // Non-leaders do notify tracers on exit. + if opts.Events&EventExit != 0 && !tracee.exitTracerAcked { anyWaitableTasks = true - if opts.Events&EventTraceeStop != 0 { - if wr := t.waitCollectTraceeStopLocked(tracee, opts); wr != nil { - return wr, nil - } + if wr := t.waitCollectZombieLocked(tracee, opts, true); wr != nil { + return wr, anyWaitableTasks } - if opts.Events&EventGroupContinue != 0 { - if wr := t.waitCollectGroupContinueLocked(tracee, opts); wr != nil { - return wr, nil - } + } + if opts.Events&(EventTraceeStop|EventGroupContinue) == 0 { + continue + } + if tracee.exitState >= TaskExitInitiated { + continue + } + anyWaitableTasks = true + if opts.Events&EventTraceeStop != 0 { + if wr := t.waitCollectTraceeStopLocked(tracee, opts); wr != nil { + return wr, anyWaitableTasks + } + } + if opts.Events&EventGroupContinue != 0 { + if wr := t.waitCollectGroupContinueLocked(tracee, opts); wr != nil { + return wr, anyWaitableTasks } } } - if anyWaitableTasks { - return nil, ErrNoWaitableEvent - } - return nil, syserror.ECHILD + return nil, anyWaitableTasks } // Preconditions: The TaskSet mutex must be locked for writing. diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index cc441460c..14fa7ef92 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -183,7 +183,7 @@ func Vfork(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall // wait4 waits for the given child process to exit. func wait4(t *kernel.Task, pid int, statusAddr usermem.Addr, options int, rusageAddr usermem.Addr) (uintptr, error) { - if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WALL|linux.WCLONE) != 0 { + if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 { return 0, syscall.EINVAL } wopts := kernel.WaitOptions{ @@ -227,6 +227,9 @@ func wait4(t *kernel.Task, pid int, statusAddr usermem.Addr, options int, rusage if options&linux.WNOHANG == 0 { wopts.BlockInterruptErr = kernel.ERESTARTSYS } + if options&linux.WNOTHREAD == 0 { + wopts.SiblingChildren = true + } wr, err := t.Wait(&wopts) if err != nil { @@ -278,7 +281,7 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal options := int(args[3].Uint()) rusageAddr := args[4].Pointer() - if options&^(linux.WNOHANG|linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED|linux.WNOWAIT) != 0 { + if options&^(linux.WNOHANG|linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED|linux.WNOWAIT|linux.WNOTHREAD) != 0 { return 0, nil, syscall.EINVAL } if options&(linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED) == 0 { @@ -310,6 +313,9 @@ func Waitid(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if options&linux.WNOHANG == 0 { wopts.BlockInterruptErr = kernel.ERESTARTSYS } + if options&linux.WNOTHREAD == 0 { + wopts.SiblingChildren = true + } wr, err := t.Wait(&wopts) if err != nil { |