diff options
-rw-r--r-- | pkg/abi/linux/prctl.go | 5 | ||||
-rw-r--r-- | pkg/abi/linux/ptrace.go | 6 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks_sys.go | 3 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/yama.go | 80 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 15 | ||||
-rw-r--r-- | pkg/sentry/kernel/ptrace.go | 175 | ||||
-rw-r--r-- | pkg/sentry/kernel/task.go | 7 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_exit.go | 3 | ||||
-rw-r--r-- | pkg/sentry/kernel/threads.go | 2 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/error.go | 19 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_prctl.go | 18 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/aio.go | 2 | ||||
-rw-r--r-- | runsc/boot/filter/extra_filters_race.go | 1 | ||||
-rw-r--r-- | runsc/fsgofer/filter/extra_filters_race.go | 1 | ||||
-rw-r--r-- | test/syscalls/linux/BUILD | 1 | ||||
-rw-r--r-- | test/syscalls/linux/ptrace.cc | 1151 |
17 files changed, 1462 insertions, 28 deletions
diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go index 391cfaa1c..41118c3b4 100644 --- a/pkg/abi/linux/prctl.go +++ b/pkg/abi/linux/prctl.go @@ -144,6 +144,11 @@ const ( // PR_MPX_DISABLE_MANAGEMENT disables kernel management of Memory // Protection eXtensions (MPX) bounds tables. PR_MPX_DISABLE_MANAGEMENT = 44 + + // PR_SET_PTRACER allows a specific process (or any, if PR_SET_PTRACER_ANY is + // specified) to ptrace the current task. + PR_SET_PTRACER = 0x59616d61 + PR_SET_PTRACER_ANY = -1 ) // From <asm/prctl.h> diff --git a/pkg/abi/linux/ptrace.go b/pkg/abi/linux/ptrace.go index 23e605ab2..db1c6a0d8 100644 --- a/pkg/abi/linux/ptrace.go +++ b/pkg/abi/linux/ptrace.go @@ -87,3 +87,9 @@ const ( PTRACE_O_EXITKILL = 1 << 20 PTRACE_O_SUSPEND_SECCOMP = 1 << 21 ) + +// YAMA ptrace_scope levels from security/yama/yama_lsm.c. +const ( + YAMA_SCOPE_DISABLED = 0 + YAMA_SCOPE_RELATIONAL = 1 +) diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 5196a2a80..d47a4fff9 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -75,6 +75,7 @@ go_library( "tasks_files.go", "tasks_inode_refs.go", "tasks_sys.go", + "yama.go", ], visibility = ["//pkg/sentry:internal"], deps = [ diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index 25c407d98..fd7823daa 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -48,6 +48,9 @@ func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k * "shmall": fs.newInode(ctx, root, 0444, shmData(linux.SHMALL)), "shmmax": fs.newInode(ctx, root, 0444, shmData(linux.SHMMAX)), "shmmni": fs.newInode(ctx, root, 0444, shmData(linux.SHMMNI)), + "yama": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ + "ptrace_scope": fs.newYAMAPtraceScopeFile(ctx, k, root), + }), }), "vm": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ "mmap_min_addr": fs.newInode(ctx, root, 0444, &mmapMinAddrData{k: k}), diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go new file mode 100644 index 000000000..aebfe8944 --- /dev/null +++ b/pkg/sentry/fsimpl/proc/yama.go @@ -0,0 +1,80 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +func (fs *filesystem) newYAMAPtraceScopeFile(ctx context.Context, k *kernel.Kernel, creds *auth.Credentials) kernfs.Inode { + s := &yamaPtraceScope{level: &k.YAMAPtraceScope} + s.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), s, 0644) + return s +} + +// yamaPtraceScope implements vfs.WritableDynamicBytesSource for +// /sys/kernel/yama/ptrace_scope. +// +// +stateify savable +type yamaPtraceScope struct { + kernfs.DynamicBytesFile + + // level is the ptrace_scope level. + level *int32 +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (s *yamaPtraceScope) Generate(ctx context.Context, buf *bytes.Buffer) error { + _, err := fmt.Fprintf(buf, "%d\n", atomic.LoadInt32(s.level)) + return err +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + // Ignore partial writes. + return 0, syserror.EINVAL + } + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit the amount of memory allocated. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + + // We do not support YAMA levels > YAMA_SCOPE_RELATIONAL. + if v < linux.YAMA_SCOPE_DISABLED || v > linux.YAMA_SCOPE_RELATIONAL { + return 0, syserror.EINVAL + } + + atomic.StoreInt32(s.level, v) + return n, nil +} diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index ef4e934a1..43065b45a 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -282,6 +282,18 @@ type Kernel struct { // If set to true, report address space activation waits as if the task is in // external wait so that the watchdog doesn't report the task stuck. SleepForAddressSpaceActivation bool + + // Exceptions to YAMA ptrace restrictions. Each key-value pair represents a + // tracee-tracer relationship. The key is a process (technically, the thread + // group leader) that can be traced by any thread that is a descendant of the + // value. If the value is nil, then anyone can trace the process represented by + // the key. + // + // ptraceExceptions is protected by the TaskSet mutex. + ptraceExceptions map[*Task]*Task + + // YAMAPtraceScope is the current level of YAMA ptrace restrictions. + YAMAPtraceScope int32 } // InitKernelArgs holds arguments to Init. @@ -382,6 +394,8 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic} k.futexes = futex.NewManager() k.netlinkPorts = port.New() + k.ptraceExceptions = make(map[*Task]*Task) + k.YAMAPtraceScope = linux.YAMA_SCOPE_RELATIONAL if VFS2Enabled { ctx := k.SupervisorContext() @@ -425,7 +439,6 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord) } - return nil } diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index cef58a590..c3980350a 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -16,6 +16,7 @@ package kernel import ( "fmt" + "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -95,7 +96,11 @@ const ( // checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access // mode PTRACE_MODE_READ. // -// NOTE(b/30815691): The result of CanTrace is immediately stale (e.g., a +// In Linux, ptrace access restrictions may be configured by LSMs. While we do +// not support LSMs, we do add additional restrictions based on the commoncap +// and YAMA LSMs. +// +// TODO(gvisor.dev/issue/212): The result of CanTrace is immediately stale (e.g., a // racing setuid(2) may change traceability). This may pose a risk when a task // changes from traceable to not traceable. This is only problematic across // execve, where privileges may increase. @@ -103,7 +108,7 @@ const ( // We currently do not implement privileged executables (set-user/group-ID bits // and file capabilities), so that case is not reachable. func (t *Task) CanTrace(target *Task, attach bool) bool { - // "1. If the calling thread and the target thread are in the same thread + // "If the calling thread and the target thread are in the same thread // group, access is always allowed." - ptrace(2) // // Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access() @@ -115,9 +120,57 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { return true } + if !t.canTraceStandard(target, attach) { + return false + } + + // YAMA only supported for vfs2. + if !VFS2Enabled { + return true + } + + if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL { + t.tg.pidns.owner.mu.RLock() + defer t.tg.pidns.owner.mu.RUnlock() + if !t.canTraceYAMALocked(target) { + return false + } + } + return true +} + +// canTraceLocked is the same as CanTrace, except the caller must already hold +// the TaskSet mutex (for reading or writing). +func (t *Task) canTraceLocked(target *Task, attach bool) bool { + if t.tg == target.tg { + return true + } + + if !t.canTraceStandard(target, attach) { + return false + } + + // YAMA only supported for vfs2. + if !VFS2Enabled { + return true + } + + if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL { + if !t.canTraceYAMALocked(target) { + return false + } + } + return true +} + +// canTraceStandard performs standard ptrace access checks as defined by +// kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM +// implementation of the security_ptrace_access_check() interface, which is +// always invoked. +func (t *Task) canTraceStandard(target *Task, attach bool) bool { // """ - // 2. If the access mode specifies PTRACE_MODE_FSCREDS (ED: snipped, - // doesn't exist until Linux 4.5). + // TODO(gvisor.dev/issue/260): 1. If the access mode specifies + // PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5). // // Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the // caller's real UID and GID for the checks in the next step. (Most APIs @@ -125,7 +178,7 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { // historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs // instead.) // - // 3. Deny access if neither of the following is true: + // 2. Deny access if neither of the following is true: // // - The real, effective, and saved-set user IDs of the target match the // caller's user ID, *and* the real, effective, and saved-set group IDs of @@ -134,15 +187,12 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { // - The caller has the CAP_SYS_PTRACE capability in the user namespace of // the target. // - // 4. Deny access if the target process "dumpable" attribute has a value + // 3. Deny access if the target process "dumpable" attribute has a value // other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in // prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in // the user namespace of the target process. // - // 5. The kernel LSM security_ptrace_access_check() interface is invoked to - // see if ptrace access is permitted. The results depend on the LSM(s). The - // implementation of this interface in the commoncap LSM performs the - // following steps: + // 4. The commoncap LSM performs the following steps: // // a) If the access mode includes PTRACE_MODE_FSCREDS, then use the // caller's effective capability set; otherwise (the access mode specifies @@ -188,6 +238,94 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { return true } +// canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM +// implementation of the security_ptrace_access_check() interface, with YAMA +// configured to mode 1. This is a common default among various Linux +// distributions. +// +// It only permits the tracer to proceed if one of the following conditions is +// met: +// +// a) The tracer is already attached to the tracee. +// +// b) The target is a descendant of the tracer. +// +// c) The target has explicitly given permission to the tracer through the +// PR_SET_PTRACER prctl. +// +// d) The tracer has CAP_SYS_PTRACE. +// +// See security/yama/yama_lsm.c:yama_ptrace_access_check. +// +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) canTraceYAMALocked(target *Task) bool { + if tracer := target.Tracer(); tracer != nil { + if tracer.tg == t.tg { + return true + } + } + if target.isYAMADescendantOfLocked(t) { + return true + } + if target.hasYAMAExceptionForLocked(t) { + return true + } + if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) { + return true + } + return false +} + +// Determines whether t is considered a descendant of ancestor for the purposes +// of YAMA permissions (specifically, whether t's thread group is descended from +// ancestor's). +// +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool { + walker := t + for walker != nil { + if walker.tg.leader == ancestor.tg.leader { + return true + } + walker = walker.parent + } + return false +} + +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool { + allowed, ok := t.k.ptraceExceptions[t] + if !ok { + return false + } + return allowed == nil || tracer.isYAMADescendantOfLocked(allowed) +} + +// ClearYAMAException removes any YAMA exception with t as the tracee. +func (t *Task) ClearYAMAException() { + t.tg.pidns.owner.mu.Lock() + defer t.tg.pidns.owner.mu.Unlock() + tracee := t.tg.leader + delete(t.k.ptraceExceptions, tracee) +} + +// SetYAMAException creates a YAMA exception allowing all descendants of tracer +// to trace t. If tracer is nil, then any task is allowed to trace t. +// +// If there was an existing exception, it is overwritten with the new one. +func (t *Task) SetYAMAException(tracer *Task) { + t.tg.pidns.owner.mu.Lock() + defer t.tg.pidns.owner.mu.Unlock() + + tracee := t.tg.leader + tracee.ptraceYAMAExceptionAdded = true + if tracer != nil { + tracer.ptraceYAMAExceptionAdded = true + } + + t.k.ptraceExceptions[tracee] = tracer +} + // Tracer returns t's ptrace Tracer. func (t *Task) Tracer() *Task { return t.ptraceTracer.Load().(*Task) @@ -358,7 +496,7 @@ func (t *Task) ptraceTraceme() error { // returning nil here is correct. return nil } - if !t.parent.CanTrace(t, true) { + if !t.parent.canTraceLocked(t, true) { return syserror.EPERM } if t.parent.exitState != TaskExitNone { @@ -377,11 +515,11 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error { if t.tg == target.tg { return syserror.EPERM } - if !t.CanTrace(target, true) { - return syserror.EPERM - } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() + if !t.canTraceLocked(target, true) { + return syserror.EPERM + } if target.hasTracer() { return syserror.EPERM } @@ -459,6 +597,15 @@ func (t *Task) exitPtrace() { } // "nil maps cannot be saved" t.ptraceTracees = make(map[*Task]struct{}) + + if t.ptraceYAMAExceptionAdded { + delete(t.k.ptraceExceptions, t) + for tracee, tracer := range t.k.ptraceExceptions { + if tracer == t { + delete(t.k.ptraceExceptions, tracee) + } + } + } } // forgetTracerLocked detaches t's tracer and ensures that t is no longer diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index c0ab53c94..36141dd09 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -403,6 +403,13 @@ type Task struct { // ptraceEventMsg is protected by the TaskSet mutex. ptraceEventMsg uint64 + // ptraceYAMAExceptionAdded is true if a YAMA exception involving the task has + // been added before. This is used during task exit to decide whether we need + // to clean up YAMA exceptions. + // + // ptraceYAMAExceptionAdded is protected by the TaskSet mutex. + ptraceYAMAExceptionAdded bool + // The struct that holds the IO-related usage. The ioUsage pointer is // immutable. ioUsage *usage.IO diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index f7765fa3a..ad59e4f60 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -694,7 +694,8 @@ func (t *Task) exitNotifyLocked(fromPtraceDetach bool) { } if t.parent != nil { delete(t.parent.children, t) - t.parent = nil + // Do not clear t.parent. It may be still be needed after the task has exited + // (for example, to perform ptrace access checks on /proc/[pid] files). } } } diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index e9da99067..09d070ec8 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -55,7 +55,7 @@ const InitTID ThreadID = 1 // // +stateify savable type TaskSet struct { - // mu protects all relationships betweens tasks and thread groups in the + // mu protects all relationships between tasks and thread groups in the // TaskSet. (mu is approximately equivalent to Linux's tasklist_lock.) mu sync.RWMutex `state:"nosave"` diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go index d1778d029..5bd526b73 100644 --- a/pkg/sentry/syscalls/linux/error.go +++ b/pkg/sentry/syscalls/linux/error.go @@ -18,6 +18,7 @@ import ( "io" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -36,16 +37,16 @@ var ( // errors, we may consume the error and return only the partial read/write. // // op and f are used only for panics. -func HandleIOErrorVFS2(t *kernel.Task, partialResult bool, ioerr, intr error, op string, f *vfs.FileDescription) error { - known, err := handleIOErrorImpl(t, partialResult, ioerr, intr, op) +func HandleIOErrorVFS2(ctx context.Context, partialResult bool, ioerr, intr error, op string, f *vfs.FileDescription) error { + known, err := handleIOErrorImpl(ctx, partialResult, ioerr, intr, op) if err != nil { return err } if !known { // An unknown error is encountered with a partial read/write. fs := f.Mount().Filesystem().VirtualFilesystem() - root := vfs.RootFromContext(t) - name, _ := fs.PathnameWithDeleted(t, root, f.VirtualDentry()) + root := vfs.RootFromContext(ctx) + name, _ := fs.PathnameWithDeleted(ctx, root, f.VirtualDentry()) log.Traceback("Invalid request partialResult %v and err (type %T) %v for %s operation on %q", partialResult, ioerr, ioerr, op, name) partialResultOnce.Do(partialResultMetric.Increment) } @@ -56,8 +57,8 @@ func HandleIOErrorVFS2(t *kernel.Task, partialResult bool, ioerr, intr error, op // errors, we may consume the error and return only the partial read/write. // // op and f are used only for panics. -func handleIOError(t *kernel.Task, partialResult bool, ioerr, intr error, op string, f *fs.File) error { - known, err := handleIOErrorImpl(t, partialResult, ioerr, intr, op) +func handleIOError(ctx context.Context, partialResult bool, ioerr, intr error, op string, f *fs.File) error { + known, err := handleIOErrorImpl(ctx, partialResult, ioerr, intr, op) if err != nil { return err } @@ -74,7 +75,7 @@ func handleIOError(t *kernel.Task, partialResult bool, ioerr, intr error, op str // errors, we may consume the error and return only the partial read/write. // // Returns false if error is unknown. -func handleIOErrorImpl(t *kernel.Task, partialResult bool, err, intr error, op string) (bool, error) { +func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, op string) (bool, error) { switch err { case nil: // Typical successful syscall. @@ -85,6 +86,10 @@ func handleIOErrorImpl(t *kernel.Task, partialResult bool, err, intr error, op s // they will see 0. return true, nil case syserror.ErrExceedsFileSizeLimit: + t := kernel.TaskFromContext(ctx) + if t == nil { + panic("I/O error should only occur from a context associated with a Task") + } // Ignore partialResult because this error only applies to // normal files, and for those files we cannot accumulate // write results. diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index a892d2c62..9890dd946 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -172,6 +172,24 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } return 1, nil, nil + case linux.PR_SET_PTRACER: + pid := args[1].Int() + switch pid { + case 0: + t.ClearYAMAException() + return 0, nil, nil + case linux.PR_SET_PTRACER_ANY: + t.SetYAMAException(nil) + return 0, nil, nil + default: + tracer := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) + if tracer == nil { + return 0, nil, syserror.EINVAL + } + t.SetYAMAException(tracer) + return 0, nil, nil + } + case linux.PR_SET_SECCOMP: if args[1].Int() != linux.SECCOMP_MODE_FILTER { // Unsupported mode. diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go index 1365a5a62..de6789a65 100644 --- a/pkg/sentry/syscalls/linux/vfs2/aio.go +++ b/pkg/sentry/syscalls/linux/vfs2/aio.go @@ -177,7 +177,7 @@ func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr use // Update the result. if err != nil { - err = slinux.HandleIOErrorVFS2(t, ev.Result != 0 /* partial */, err, nil /* never interrupted */, "aio", fd) + err = slinux.HandleIOErrorVFS2(ctx, ev.Result != 0 /* partial */, err, nil /* never interrupted */, "aio", fd) ev.Result = -int64(kernel.ExtractErrno(err, 0)) } diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go index 9ff80276a..5b99eb8cd 100644 --- a/runsc/boot/filter/extra_filters_race.go +++ b/runsc/boot/filter/extra_filters_race.go @@ -27,6 +27,7 @@ func instrumentationFilters() seccomp.SyscallRules { Report("TSAN is enabled: syscall filters less restrictive!") return seccomp.SyscallRules{ syscall.SYS_BRK: {}, + syscall.SYS_CLOCK_NANOSLEEP: {}, syscall.SYS_CLONE: {}, syscall.SYS_FUTEX: {}, syscall.SYS_MMAP: {}, diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go index 20a0732be..cbd5c487e 100644 --- a/runsc/fsgofer/filter/extra_filters_race.go +++ b/runsc/fsgofer/filter/extra_filters_race.go @@ -28,6 +28,7 @@ func instrumentationFilters() seccomp.SyscallRules { log.Warningf("*** SECCOMP WARNING: TSAN is enabled: syscall filters less restrictive!") return seccomp.SyscallRules{ syscall.SYS_BRK: {}, + syscall.SYS_CLOCK_NANOSLEEP: {}, syscall.SYS_CLONE: {}, syscall.SYS_FUTEX: {}, syscall.SYS_MADVISE: {}, diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 1a492bca3..f871426f0 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1814,6 +1814,7 @@ cc_binary( "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/time", gtest, + "//test/util:capability_util", "//test/util:file_descriptor", "//test/util:fs_util", "//test/util:logging", diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc index 13c19d4a8..d1d7c6f84 100644 --- a/test/syscalls/linux/ptrace.cc +++ b/test/syscalls/linux/ptrace.cc @@ -15,7 +15,9 @@ #include <elf.h> #include <signal.h> #include <stddef.h> +#include <sys/prctl.h> #include <sys/ptrace.h> +#include <sys/socket.h> #include <sys/time.h> #include <sys/types.h> #include <sys/user.h> @@ -30,6 +32,7 @@ #include "absl/flags/flag.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "test/util/capability_util.h" #include "test/util/fs_util.h" #include "test/util/logging.h" #include "test/util/memory_util.h" @@ -45,6 +48,49 @@ ABSL_FLAG(bool, ptrace_test_execve_child, false, "If true, run the " "PtraceExecveTest_Execve_GetRegs_PeekUser_SIGKILL_TraceClone_" "TraceExit child workload."); +ABSL_FLAG(bool, ptrace_test_trace_descendants_allowed, false, + "If set, run the child workload for " + "PtraceTest_TraceDescendantsAllowed."); +ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_pid, false, + "If set, run the child workload for PtraceTest_PrctlSetPtracerPID."); +ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_any, false, + "If set, run the child workload for PtraceTest_PrctlSetPtracerAny."); +ABSL_FLAG(bool, ptrace_test_prctl_clear_ptracer, false, + "If set, run the child workload for PtraceTest_PrctlClearPtracer."); +ABSL_FLAG(bool, ptrace_test_prctl_replace_ptracer, false, + "If set, run the child workload for PtraceTest_PrctlReplacePtracer."); +ABSL_FLAG(int, ptrace_test_prctl_replace_ptracer_tid, -1, + "Specifies the replacement tracer tid in the child workload for " + "PtraceTest_PrctlReplacePtracer."); +ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_and_exit_tracee_thread, false, + "If set, run the child workload for " + "PtraceTest_PrctlSetPtracerPersistsPastTraceeThreadExit."); +ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_and_exec_non_leader, false, + "If set, run the child workload for " + "PtraceTest_PrctlSetPtracerDoesNotPersistPastNonLeaderExec."); +ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_and_exit_tracer_thread, false, + "If set, run the child workload for " + "PtraceTest_PrctlSetPtracerDoesNotPersistPastTracerThreadExit."); +ABSL_FLAG(int, ptrace_test_prctl_set_ptracer_and_exit_tracer_thread_tid, -1, + "Specifies the tracee tid in the child workload for " + "PtraceTest_PrctlSetPtracerDoesNotPersistPastTracerThreadExit."); +ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_respects_tracer_thread_id, false, + "If set, run the child workload for PtraceTest_PrctlSetPtracePID."); +ABSL_FLAG(int, ptrace_test_prctl_set_ptracer_respects_tracer_thread_id_tid, -1, + "Specifies the thread tid to be traced in the child workload " + "for PtraceTest_PrctlSetPtracerRespectsTracerThreadID."); + +ABSL_FLAG(bool, ptrace_test_tracee, false, + "If true, run the tracee process for the " + "PrctlSetPtracerDoesNotPersistPastLeaderExec and " + "PrctlSetPtracerDoesNotPersistPastNonLeaderExec workloads."); +ABSL_FLAG(int, ptrace_test_trace_tid, -1, + "If set, run a process to ptrace attach to the thread with the " + "specified pid for the PrctlSetPtracerRespectsTracerThreadID " + "workload."); +ABSL_FLAG(int, ptrace_test_fd, -1, + "Specifies the fd used for communication between tracer and tracee " + "processes across exec."); namespace gvisor { namespace testing { @@ -78,10 +124,10 @@ void RaiseSignal(int sig) { TEST_PCHECK(tgkill(pid, tid, sig) == 0); } +constexpr char kYamaPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope"; + // Returns the Yama ptrace scope. PosixErrorOr<int> YamaPtraceScope() { - constexpr char kYamaPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope"; - ASSIGN_OR_RETURN_ERRNO(bool exists, Exists(kYamaPtraceScopePath)); if (!exists) { // File doesn't exist means no Yama, so the scope is disabled -> 0. @@ -99,6 +145,22 @@ PosixErrorOr<int> YamaPtraceScope() { return scope; } +int CheckPtraceAttach(pid_t pid) { + int ret = ptrace(PTRACE_ATTACH, pid, 0, 0); + MaybeSave(); + if (ret < 0) { + return ret; + } + + int status; + TEST_PCHECK(waitpid(pid, &status, 0) == pid); + MaybeSave(); + TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + TEST_PCHECK(ptrace(PTRACE_DETACH, pid, 0, 0) == 0); + MaybeSave(); + return 0; +} + TEST(PtraceTest, AttachSelf) { EXPECT_THAT(ptrace(PTRACE_ATTACH, gettid(), 0, 0), SyscallFailsWithErrno(EPERM)); @@ -111,10 +173,995 @@ TEST(PtraceTest, AttachSameThreadGroup) { }); } +TEST(PtraceTest, TraceParentNotAllowed) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) < 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + pid_t const child_pid = fork(); + if (child_pid == 0) { + TEST_CHECK(CheckPtraceAttach(getppid()) == -1); + TEST_PCHECK(errno == EPERM); + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +TEST(PtraceTest, TraceNonDescendantNotAllowed) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) < 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + while (true) { + SleepSafe(absl::Seconds(1)); + } + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1); + TEST_PCHECK(errno == EPERM); + _exit(0); + } + EXPECT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +TEST(PtraceTest, TraceNonDescendantWithCapabilityAllowed) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_PTRACE))); + // Skip if disallowed by YAMA despite having CAP_SYS_PTRACE. + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 2); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + while (true) { + SleepSafe(absl::Seconds(1)); + } + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +TEST(PtraceTest, TraceDescendantsAllowed) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + // Use socket pair to communicate tids to this process from its grandchild. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", "--ptrace_test_trace_descendants_allowed", + "--ptrace_test_fd", std::to_string(sockets[0])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const child_pid = fork(); + if (child_pid == 0) { + // In child process. + TEST_PCHECK(close(sockets[1]) == 0); + pid_t const grandchild_pid = fork(); + if (grandchild_pid == 0) { + // This test will create a new thread in the grandchild process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + TEST_PCHECK(grandchild_pid > 0); + MaybeSave(); + + // Wait for grandchild. Our parent process will kill it once it's done. + int status; + TEST_PCHECK(waitpid(grandchild_pid, &status, 0) == grandchild_pid); + TEST_CHECK(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL); + MaybeSave(); + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + // We should be able to attach to any thread in the grandchild. + pid_t grandchild_tid1, grandchild_tid2; + ASSERT_THAT(read(sockets[1], &grandchild_tid1, sizeof(grandchild_tid1)), + SyscallSucceedsWithValue(sizeof(grandchild_tid1))); + ASSERT_THAT(read(sockets[1], &grandchild_tid2, sizeof(grandchild_tid2)), + SyscallSucceedsWithValue(sizeof(grandchild_tid2))); + + EXPECT_THAT(CheckPtraceAttach(grandchild_tid1), SyscallSucceeds()); + EXPECT_THAT(CheckPtraceAttach(grandchild_tid2), SyscallSucceeds()); + + // Clean up grandchild. + ASSERT_THAT(kill(grandchild_tid1, SIGKILL), SyscallSucceeds()); + + // Clean up child. + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; +} + +[[noreturn]] void RunTraceDescendantsAllowed(int fd) { + // Let the tracer know our tid through the socket fd. + pid_t const tid = gettid(); + TEST_PCHECK(write(fd, &tid, sizeof(tid)) == sizeof(tid)); + MaybeSave(); + + ScopedThread t([fd] { + // See if any arbitrary thread (whose tid differs from the process id) can + // be traced as well. + pid_t const tid = gettid(); + TEST_PCHECK(write(fd, &tid, sizeof(tid)) == sizeof(tid)); + MaybeSave(); + while (true) { + SleepSafe(absl::Seconds(1)); + } + }); + + while (true) { + SleepSafe(absl::Seconds(1)); + } +} + +TEST(PtraceTest, PrctlSetPtracerInvalidPID) { + // EINVAL should also be returned if PR_SET_PTRACER is not supported. + EXPECT_THAT(prctl(PR_SET_PTRACER, 123456789), SyscallFailsWithErrno(EINVAL)); +} + +TEST(PtraceTest, PrctlSetPtracerPID) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", "--ptrace_test_prctl_set_ptracer_pid", + "--ptrace_test_fd", std::to_string(sockets[0])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + TEST_PCHECK(close(sockets[1]) == 0); + // This test will create a new thread in the child process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // Wait until tracee has called prctl. + char done; + TEST_PCHECK(read(sockets[1], &done, 1) == 1); + MaybeSave(); + + TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0); + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunPrctlSetPtracerPID(int fd) { + ScopedThread t([fd] { + // Perform prctl in a separate thread to verify that it is process-wide. + TEST_PCHECK(prctl(PR_SET_PTRACER, getppid()) == 0); + MaybeSave(); + // Indicate that the prctl has been set. + TEST_PCHECK(write(fd, "x", 1) == 1); + MaybeSave(); + }); + while (true) { + SleepSafe(absl::Seconds(1)); + } +} + +TEST(PtraceTest, PrctlSetPtracerAny) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", "--ptrace_test_prctl_set_ptracer_any", + "--ptrace_test_fd", std::to_string(sockets[0])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + // This test will create a new thread in the child process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + TEST_PCHECK(close(sockets[1]) == 0); + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // Wait until tracee has called prctl. + char done; + TEST_PCHECK(read(sockets[1], &done, 1) == 1); + MaybeSave(); + + TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunPrctlSetPtracerAny(int fd) { + ScopedThread t([fd] { + // Perform prctl in a separate thread to verify that it is process-wide. + TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0); + MaybeSave(); + // Indicate that the prctl has been set. + TEST_PCHECK(write(fd, "x", 1) == 1); + MaybeSave(); + }); + while (true) { + SleepSafe(absl::Seconds(1)); + } +} + +TEST(PtraceTest, PrctlClearPtracer) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", "--ptrace_test_prctl_clear_ptracer", "--ptrace_test_fd", + std::to_string(sockets[0])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + // This test will create a new thread in the child process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + TEST_PCHECK(close(sockets[1]) == 0); + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // Wait until tracee has called prctl. + char done; + TEST_PCHECK(read(sockets[1], &done, 1) == 1); + MaybeSave(); + + TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1); + TEST_PCHECK(errno == EPERM); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunPrctlClearPtracer(int fd) { + ScopedThread t([fd] { + // Perform prctl in a separate thread to verify that it is process-wide. + TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0); + MaybeSave(); + TEST_PCHECK(prctl(PR_SET_PTRACER, 0) == 0); + MaybeSave(); + // Indicate that the prctl has been set/cleared. + TEST_PCHECK(write(fd, "x", 1) == 1); + MaybeSave(); + }); + while (true) { + SleepSafe(absl::Seconds(1)); + } +} + +TEST(PtraceTest, PrctlReplacePtracer) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + pid_t const unused_pid = fork(); + if (unused_pid == 0) { + while (true) { + SleepSafe(absl::Seconds(1)); + } + } + ASSERT_THAT(unused_pid, SyscallSucceeds()); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", + "--ptrace_test_prctl_replace_ptracer", + "--ptrace_test_prctl_replace_ptracer_tid", + std::to_string(unused_pid), + "--ptrace_test_fd", + std::to_string(sockets[0])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + TEST_PCHECK(close(sockets[1]) == 0); + // This test will create a new thread in the child process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // Wait until tracee has called prctl. + char done; + TEST_PCHECK(read(sockets[1], &done, 1) == 1); + MaybeSave(); + + TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1); + TEST_PCHECK(errno == EPERM); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; + + // Clean up unused. + ASSERT_THAT(kill(unused_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(unused_pid, &status, 0), + SyscallSucceedsWithValue(unused_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunPrctlReplacePtracer(int new_tracer_pid, int fd) { + TEST_PCHECK(prctl(PR_SET_PTRACER, getppid()) == 0); + MaybeSave(); + + ScopedThread t([new_tracer_pid, fd] { + TEST_PCHECK(prctl(PR_SET_PTRACER, new_tracer_pid) == 0); + MaybeSave(); + // Indicate that the prctl has been set. + TEST_PCHECK(write(fd, "x", 1) == 1); + MaybeSave(); + }); + while (true) { + SleepSafe(absl::Seconds(1)); + } +} + +// Tests that YAMA exceptions store tracees by thread group leader. Exceptions +// are preserved even after the tracee thread exits, as long as the tracee's +// thread group leader is still around. +TEST(PtraceTest, PrctlSetPtracerPersistsPastTraceeThreadExit) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", + "--ptrace_test_prctl_set_ptracer_and_exit_tracee_thread", + "--ptrace_test_fd", std::to_string(sockets[0])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + // This test will create a new thread in the child process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + TEST_PCHECK(close(sockets[1]) == 0); + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // Wait until the tracee thread calling prctl has terminated. + char done; + TEST_PCHECK(read(sockets[1], &done, 1) == 1); + MaybeSave(); + + TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunPrctlSetPtracerPersistsPastTraceeThreadExit(int fd) { + ScopedThread t([] { + TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0); + MaybeSave(); + }); + t.Join(); + // Indicate that thread setting the prctl has exited. + TEST_PCHECK(write(fd, "x", 1) == 1); + MaybeSave(); + + while (true) { + SleepSafe(absl::Seconds(1)); + } +} + +// Tests that YAMA exceptions store tracees by thread group leader. Exceptions +// are preserved across exec as long as the thread group leader does not change, +// even if the tracee thread is terminated. +TEST(PtraceTest, PrctlSetPtracerPersistsPastLeaderExec) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", "--ptrace_test_tracee", "--ptrace_test_fd", + std::to_string(sockets[0])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + TEST_PCHECK(close(sockets[1]) == 0); + TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0); + MaybeSave(); + + // This test will create a new thread in the child process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // Wait until the tracee has exec'd. + char done; + TEST_PCHECK(read(sockets[1], &done, 1) == 1); + MaybeSave(); + + TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunTracee(int fd) { + // Indicate that we have exec'd. + TEST_PCHECK(write(fd, "x", 1) == 1); + MaybeSave(); + + while (true) { + SleepSafe(absl::Seconds(1)); + } +} + +// Tests that YAMA exceptions store tracees by thread group leader. Exceptions +// are cleared if the tracee process's thread group leader is terminated by +// exec. +TEST(PtraceTest, PrctlSetPtracerDoesNotPersistPastNonLeaderExec) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", "--ptrace_test_prctl_set_ptracer_and_exec_non_leader", + "--ptrace_test_fd", std::to_string(sockets[0])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + // This test will create a new thread in the child process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + TEST_PCHECK(close(sockets[1]) == 0); + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // Wait until the tracee has exec'd. + char done; + TEST_PCHECK(read(sockets[1], &done, 1) == 1); + MaybeSave(); + + TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1); + TEST_PCHECK(errno == EPERM); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunPrctlSetPtracerDoesNotPersistPastNonLeaderExec(int fd) { + ScopedThread t([fd] { + TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0); + MaybeSave(); + + ExecveArray const owned_child_argv = { + "/proc/self/exe", "--ptrace_test_tracee", "--ptrace_test_fd", + std::to_string(fd)}; + char* const* const child_argv = owned_child_argv.get(); + + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + }); + t.Join(); + TEST_CHECK_MSG(false, "Survived execve? (main)"); + _exit(1); +} + +// Tests that YAMA exceptions store the tracer itself rather than the thread +// group leader. Exceptions are cleared when the tracer task exits, rather than +// when its thread group leader exits. +TEST(PtraceTest, PrctlSetPtracerDoesNotPersistPastTracerThreadExit) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + TEST_PCHECK(close(sockets[1]) == 0); + pid_t tracer_tid; + TEST_PCHECK(read(sockets[0], &tracer_tid, sizeof(tracer_tid)) == + sizeof(tracer_tid)); + MaybeSave(); + + TEST_PCHECK(prctl(PR_SET_PTRACER, tracer_tid) == 0); + MaybeSave(); + // Indicate that the prctl has been set. + TEST_PCHECK(write(sockets[0], "x", 1) == 1); + MaybeSave(); + + while (true) { + SleepSafe(absl::Seconds(1)); + } + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", + "--ptrace_test_prctl_set_ptracer_and_exit_tracer_thread", + "--ptrace_test_prctl_set_ptracer_and_exit_tracer_thread_tid", + std::to_string(tracee_pid), + "--ptrace_test_fd", + std::to_string(sockets[1])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // This test will create a new thread in the child process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunPrctlSetPtracerDoesNotPersistPastTracerThreadExit( + int tracee_tid, int fd) { + TEST_PCHECK(SetCapability(CAP_SYS_PTRACE, false).ok()); + + ScopedThread t([fd] { + pid_t const tracer_tid = gettid(); + TEST_PCHECK(write(fd, &tracer_tid, sizeof(tracer_tid)) == + sizeof(tracer_tid)); + + // Wait until the prctl has been set. + char done; + TEST_PCHECK(read(fd, &done, 1) == 1); + MaybeSave(); + }); + t.Join(); + + // Sleep for a bit before verifying the invalidation. The thread exit above + // should cause the ptrace exception to be invalidated, but in Linux, this is + // not done immediately. The YAMA exception is dropped during + // __put_task_struct(), which occurs (at the earliest) one RCU grace period + // after exit_notify() ==> release_task(). + SleepSafe(absl::Milliseconds(100)); + + TEST_CHECK(CheckPtraceAttach(tracee_tid) == -1); + TEST_PCHECK(errno == EPERM); + _exit(0); +} + +// Tests that YAMA exceptions store the tracer thread itself rather than the +// thread group leader. Exceptions are preserved across exec in the tracer +// thread, even if the thread group leader is terminated. +TEST(PtraceTest, PrctlSetPtracerRespectsTracerThreadID) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + TEST_PCHECK(close(sockets[1]) == 0); + pid_t tracer_tid; + TEST_PCHECK(read(sockets[0], &tracer_tid, sizeof(tracer_tid)) == + sizeof(tracer_tid)); + MaybeSave(); + + TEST_PCHECK(prctl(PR_SET_PTRACER, tracer_tid) == 0); + MaybeSave(); + // Indicate that the prctl has been set. + TEST_PCHECK(write(sockets[0], "x", 1) == 1); + MaybeSave(); + + while (true) { + SleepSafe(absl::Seconds(1)); + } + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + // Allocate vector before forking (not async-signal-safe). + ExecveArray const owned_child_argv = { + "/proc/self/exe", + "--ptrace_test_prctl_set_ptracer_respects_tracer_thread_id", + "--ptrace_test_prctl_set_ptracer_respects_tracer_thread_id_tid", + std::to_string(tracee_pid), + "--ptrace_test_fd", + std::to_string(sockets[1])}; + char* const* const child_argv = owned_child_argv.get(); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // This test will create a new thread in the child process. + // pthread_create(2) isn't async-signal-safe, so we execve() first. + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +[[noreturn]] void RunPrctlSetPtracerRespectsTracerThreadID(int tracee_tid, + int fd) { + // Create a separate thread for tracing (i.e., not the thread group + // leader). After the subsequent execve(), the current thread group leader + // will no longer be exist, but the YAMA exception installed with this + // thread should still be valid. + ScopedThread t([tracee_tid, fd] { + pid_t const tracer_tid = gettid(); + TEST_PCHECK(write(fd, &tracer_tid, sizeof(tracer_tid))); + MaybeSave(); + + // Wait until the tracee has made the PR_SET_PTRACER prctl. + char done; + TEST_PCHECK(read(fd, &done, 1) == 1); + MaybeSave(); + + ExecveArray const owned_child_argv = { + "/proc/self/exe", "--ptrace_test_trace_tid", std::to_string(tracee_tid), + "--ptrace_test_fd", std::to_string(fd)}; + char* const* const child_argv = owned_child_argv.get(); + + execve(child_argv[0], child_argv, /* envp = */ nullptr); + TEST_PCHECK_MSG(false, "Survived execve to test child"); + }); + t.Join(); + TEST_CHECK_MSG(false, "Survived execve? (main)"); + _exit(1); +} + +[[noreturn]] void RunTraceTID(int tracee_tid, int fd) { + TEST_PCHECK(SetCapability(CAP_SYS_PTRACE, false).ok()); + TEST_PCHECK(CheckPtraceAttach(tracee_tid) == 0); + _exit(0); +} + +// Tests that removing a YAMA exception does not affect a tracer that is already +// attached. +TEST(PtraceTest, PrctlClearPtracerDoesNotAffectCurrentTracer) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + // Use sockets to synchronize between tracer and tracee. + int sockets[2]; + ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds()); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + TEST_PCHECK(close(sockets[1]) == 0); + TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0); + MaybeSave(); + // Indicate that the prctl has been set. + TEST_PCHECK(write(sockets[0], "x", 1) == 1); + MaybeSave(); + + // Wait until tracer has attached before clearing PR_SET_PTRACER. + char done; + TEST_PCHECK(read(sockets[0], &done, 1) == 1); + MaybeSave(); + + TEST_PCHECK(prctl(PR_SET_PTRACER, 0) == 0); + MaybeSave(); + // Indicate that the prctl has been set. + TEST_PCHECK(write(sockets[0], "x", 1) == 1); + MaybeSave(); + + while (true) { + SleepSafe(absl::Seconds(1)); + } + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + ASSERT_THAT(close(sockets[0]), SyscallSucceeds()); + + std::string mem_path = "/proc/" + std::to_string(tracee_pid) + "/mem"; + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + // Wait until tracee has called prctl, or else we won't be able to attach. + char done; + TEST_PCHECK(read(sockets[1], &done, 1) == 1); + MaybeSave(); + + TEST_PCHECK(ptrace(PTRACE_ATTACH, tracee_pid, 0, 0) == 0); + MaybeSave(); + // Indicate that we have attached. + TEST_PCHECK(write(sockets[1], &done, 1) == 1); + MaybeSave(); + + // Block until tracee enters signal-delivery-stop as a result of the + // SIGSTOP sent by PTRACE_ATTACH. + int status; + TEST_PCHECK(waitpid(tracee_pid, &status, 0) == tracee_pid); + MaybeSave(); + TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + MaybeSave(); + + TEST_PCHECK(ptrace(PTRACE_CONT, tracee_pid, 0, 0) == 0); + MaybeSave(); + + // Wait until tracee has cleared PR_SET_PTRACER. Even though it was cleared, + // we should still be able to access /proc/[pid]/mem because we are already + // attached. + TEST_PCHECK(read(sockets[1], &done, 1) == 1); + MaybeSave(); + TEST_PCHECK(open(mem_path.c_str(), O_RDONLY) != -1); + MaybeSave(); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + +TEST(PtraceTest, PrctlNotInherited) { + SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1); + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + + // Allow any ptracer. This should not affect the child processes. + ASSERT_THAT(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), SyscallSucceeds()); + + pid_t const tracee_pid = fork(); + if (tracee_pid == 0) { + while (true) { + SleepSafe(absl::Seconds(1)); + } + } + ASSERT_THAT(tracee_pid, SyscallSucceeds()); + + pid_t const tracer_pid = fork(); + if (tracer_pid == 0) { + TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1); + TEST_PCHECK(errno == EPERM); + _exit(0); + } + ASSERT_THAT(tracer_pid, SyscallSucceeds()); + + // Clean up tracer. + int status; + ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Clean up tracee. + ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds()); + ASSERT_THAT(waitpid(tracee_pid, &status, 0), + SyscallSucceedsWithValue(tracee_pid)); + EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) + << " status " << status; +} + TEST(PtraceTest, AttachParent_PeekData_PokeData_SignalSuppression) { // Yama prevents attaching to a parent. Skip the test if the scope is anything // except disabled. - SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 0); + const int yama_scope = ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()); + SKIP_IF(yama_scope > 1); + if (yama_scope == 1) { + // Allow child to trace us. + ASSERT_THAT(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), SyscallSucceeds()); + } // Test PTRACE_POKE/PEEKDATA on both anonymous and file mappings. const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); @@ -1238,6 +2285,46 @@ TEST(PtraceTest, SeizeSetOptions) { << " status " << status; } +TEST(PtraceTest, SetYAMAPtraceScope) { + SKIP_IF(IsRunningWithVFS1()); + + // Do not modify the ptrace scope on the host. + SKIP_IF(!IsRunningOnGvisor()); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN))); + + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(std::string(kYamaPtraceScopePath), O_RDWR)); + + ASSERT_THAT(write(fd.get(), "0", 1), SyscallSucceedsWithValue(1)); + + ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds()); + std::vector<char> buf(10); + EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), SyscallSucceeds()); + EXPECT_STREQ(buf.data(), "0\n"); + + // Test that a child can attach to its parent when ptrace_scope is 0. + ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false)); + pid_t const child_pid = fork(); + if (child_pid == 0) { + TEST_PCHECK(CheckPtraceAttach(getppid()) == 0); + _exit(0); + } + ASSERT_THAT(child_pid, SyscallSucceeds()); + + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds()); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << " status " << status; + + // Set ptrace_scope back to 1 (and try writing with a newline). + ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds()); + ASSERT_THAT(write(fd.get(), "1\n", 2), SyscallSucceedsWithValue(2)); + + ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds()); + EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), SyscallSucceeds()); + EXPECT_STREQ(buf.data(), "1\n"); +} + } // namespace } // namespace testing @@ -1250,5 +2337,63 @@ int main(int argc, char** argv) { gvisor::testing::RunExecveChild(); } + int fd = absl::GetFlag(FLAGS_ptrace_test_fd); + + if (absl::GetFlag(FLAGS_ptrace_test_trace_descendants_allowed)) { + gvisor::testing::RunTraceDescendantsAllowed(fd); + } + + if (absl::GetFlag(FLAGS_ptrace_test_prctl_set_ptracer_pid)) { + gvisor::testing::RunPrctlSetPtracerPID(fd); + } + + if (absl::GetFlag(FLAGS_ptrace_test_prctl_set_ptracer_any)) { + gvisor::testing::RunPrctlSetPtracerAny(fd); + } + + if (absl::GetFlag(FLAGS_ptrace_test_prctl_clear_ptracer)) { + gvisor::testing::RunPrctlClearPtracer(fd); + } + + if (absl::GetFlag(FLAGS_ptrace_test_prctl_replace_ptracer)) { + gvisor::testing::RunPrctlReplacePtracer( + absl::GetFlag(FLAGS_ptrace_test_prctl_replace_ptracer_tid), fd); + } + + if (absl::GetFlag( + FLAGS_ptrace_test_prctl_set_ptracer_and_exit_tracee_thread)) { + gvisor::testing::RunPrctlSetPtracerPersistsPastTraceeThreadExit(fd); + } + + if (absl::GetFlag(FLAGS_ptrace_test_prctl_set_ptracer_and_exec_non_leader)) { + gvisor::testing::RunPrctlSetPtracerDoesNotPersistPastNonLeaderExec( + fd); + } + + if (absl::GetFlag( + FLAGS_ptrace_test_prctl_set_ptracer_and_exit_tracer_thread)) { + gvisor::testing::RunPrctlSetPtracerDoesNotPersistPastTracerThreadExit( + absl::GetFlag( + FLAGS_ptrace_test_prctl_set_ptracer_and_exit_tracer_thread_tid), + fd); + } + + if (absl::GetFlag( + FLAGS_ptrace_test_prctl_set_ptracer_respects_tracer_thread_id)) { + gvisor::testing::RunPrctlSetPtracerRespectsTracerThreadID( + absl::GetFlag( + FLAGS_ptrace_test_prctl_set_ptracer_respects_tracer_thread_id_tid), + fd); + } + + if (absl::GetFlag(FLAGS_ptrace_test_tracee)) { + gvisor::testing::RunTracee(fd); + } + + int pid = absl::GetFlag(FLAGS_ptrace_test_trace_tid); + if (pid != -1) { + gvisor::testing::RunTraceTID(pid, fd); + } + return gvisor::testing::RunAllTests(); } |