summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/abi/linux/prctl.go5
-rw-r--r--pkg/abi/linux/ptrace.go6
-rw-r--r--pkg/sentry/fsimpl/proc/BUILD1
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go3
-rw-r--r--pkg/sentry/fsimpl/proc/yama.go80
-rw-r--r--pkg/sentry/kernel/kernel.go15
-rw-r--r--pkg/sentry/kernel/ptrace.go175
-rw-r--r--pkg/sentry/kernel/task.go7
-rw-r--r--pkg/sentry/kernel/task_exit.go3
-rw-r--r--pkg/sentry/kernel/threads.go2
-rw-r--r--pkg/sentry/syscalls/linux/error.go19
-rw-r--r--pkg/sentry/syscalls/linux/sys_prctl.go18
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/aio.go2
-rw-r--r--runsc/boot/filter/extra_filters_race.go1
-rw-r--r--runsc/fsgofer/filter/extra_filters_race.go1
-rw-r--r--test/syscalls/linux/BUILD1
-rw-r--r--test/syscalls/linux/ptrace.cc1151
17 files changed, 1462 insertions, 28 deletions
diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go
index 391cfaa1c..41118c3b4 100644
--- a/pkg/abi/linux/prctl.go
+++ b/pkg/abi/linux/prctl.go
@@ -144,6 +144,11 @@ const (
// PR_MPX_DISABLE_MANAGEMENT disables kernel management of Memory
// Protection eXtensions (MPX) bounds tables.
PR_MPX_DISABLE_MANAGEMENT = 44
+
+ // PR_SET_PTRACER allows a specific process (or any, if PR_SET_PTRACER_ANY is
+ // specified) to ptrace the current task.
+ PR_SET_PTRACER = 0x59616d61
+ PR_SET_PTRACER_ANY = -1
)
// From <asm/prctl.h>
diff --git a/pkg/abi/linux/ptrace.go b/pkg/abi/linux/ptrace.go
index 23e605ab2..db1c6a0d8 100644
--- a/pkg/abi/linux/ptrace.go
+++ b/pkg/abi/linux/ptrace.go
@@ -87,3 +87,9 @@ const (
PTRACE_O_EXITKILL = 1 << 20
PTRACE_O_SUSPEND_SECCOMP = 1 << 21
)
+
+// YAMA ptrace_scope levels from security/yama/yama_lsm.c.
+const (
+ YAMA_SCOPE_DISABLED = 0
+ YAMA_SCOPE_RELATIONAL = 1
+)
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
index 5196a2a80..d47a4fff9 100644
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -75,6 +75,7 @@ go_library(
"tasks_files.go",
"tasks_inode_refs.go",
"tasks_sys.go",
+ "yama.go",
],
visibility = ["//pkg/sentry:internal"],
deps = [
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 25c407d98..fd7823daa 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -48,6 +48,9 @@ func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k *
"shmall": fs.newInode(ctx, root, 0444, shmData(linux.SHMALL)),
"shmmax": fs.newInode(ctx, root, 0444, shmData(linux.SHMMAX)),
"shmmni": fs.newInode(ctx, root, 0444, shmData(linux.SHMMNI)),
+ "yama": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{
+ "ptrace_scope": fs.newYAMAPtraceScopeFile(ctx, k, root),
+ }),
}),
"vm": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{
"mmap_min_addr": fs.newInode(ctx, root, 0444, &mmapMinAddrData{k: k}),
diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go
new file mode 100644
index 000000000..aebfe8944
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/yama.go
@@ -0,0 +1,80 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+func (fs *filesystem) newYAMAPtraceScopeFile(ctx context.Context, k *kernel.Kernel, creds *auth.Credentials) kernfs.Inode {
+ s := &yamaPtraceScope{level: &k.YAMAPtraceScope}
+ s.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), s, 0644)
+ return s
+}
+
+// yamaPtraceScope implements vfs.WritableDynamicBytesSource for
+// /sys/kernel/yama/ptrace_scope.
+//
+// +stateify savable
+type yamaPtraceScope struct {
+ kernfs.DynamicBytesFile
+
+ // level is the ptrace_scope level.
+ level *int32
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *yamaPtraceScope) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ _, err := fmt.Fprintf(buf, "%d\n", atomic.LoadInt32(s.level))
+ return err
+}
+
+// Write implements vfs.WritableDynamicBytesSource.Write.
+func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ if offset != 0 {
+ // Ignore partial writes.
+ return 0, syserror.EINVAL
+ }
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+
+ // Limit the amount of memory allocated.
+ src = src.TakeFirst(usermem.PageSize - 1)
+
+ var v int32
+ n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+ if err != nil {
+ return 0, err
+ }
+
+ // We do not support YAMA levels > YAMA_SCOPE_RELATIONAL.
+ if v < linux.YAMA_SCOPE_DISABLED || v > linux.YAMA_SCOPE_RELATIONAL {
+ return 0, syserror.EINVAL
+ }
+
+ atomic.StoreInt32(s.level, v)
+ return n, nil
+}
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index ef4e934a1..43065b45a 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -282,6 +282,18 @@ type Kernel struct {
// If set to true, report address space activation waits as if the task is in
// external wait so that the watchdog doesn't report the task stuck.
SleepForAddressSpaceActivation bool
+
+ // Exceptions to YAMA ptrace restrictions. Each key-value pair represents a
+ // tracee-tracer relationship. The key is a process (technically, the thread
+ // group leader) that can be traced by any thread that is a descendant of the
+ // value. If the value is nil, then anyone can trace the process represented by
+ // the key.
+ //
+ // ptraceExceptions is protected by the TaskSet mutex.
+ ptraceExceptions map[*Task]*Task
+
+ // YAMAPtraceScope is the current level of YAMA ptrace restrictions.
+ YAMAPtraceScope int32
}
// InitKernelArgs holds arguments to Init.
@@ -382,6 +394,8 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic}
k.futexes = futex.NewManager()
k.netlinkPorts = port.New()
+ k.ptraceExceptions = make(map[*Task]*Task)
+ k.YAMAPtraceScope = linux.YAMA_SCOPE_RELATIONAL
if VFS2Enabled {
ctx := k.SupervisorContext()
@@ -425,7 +439,6 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord)
}
-
return nil
}
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index cef58a590..c3980350a 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -16,6 +16,7 @@ package kernel
import (
"fmt"
+ "sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/marshal/primitive"
@@ -95,7 +96,11 @@ const (
// checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access
// mode PTRACE_MODE_READ.
//
-// NOTE(b/30815691): The result of CanTrace is immediately stale (e.g., a
+// In Linux, ptrace access restrictions may be configured by LSMs. While we do
+// not support LSMs, we do add additional restrictions based on the commoncap
+// and YAMA LSMs.
+//
+// TODO(gvisor.dev/issue/212): The result of CanTrace is immediately stale (e.g., a
// racing setuid(2) may change traceability). This may pose a risk when a task
// changes from traceable to not traceable. This is only problematic across
// execve, where privileges may increase.
@@ -103,7 +108,7 @@ const (
// We currently do not implement privileged executables (set-user/group-ID bits
// and file capabilities), so that case is not reachable.
func (t *Task) CanTrace(target *Task, attach bool) bool {
- // "1. If the calling thread and the target thread are in the same thread
+ // "If the calling thread and the target thread are in the same thread
// group, access is always allowed." - ptrace(2)
//
// Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access()
@@ -115,9 +120,57 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
return true
}
+ if !t.canTraceStandard(target, attach) {
+ return false
+ }
+
+ // YAMA only supported for vfs2.
+ if !VFS2Enabled {
+ return true
+ }
+
+ if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL {
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
+ if !t.canTraceYAMALocked(target) {
+ return false
+ }
+ }
+ return true
+}
+
+// canTraceLocked is the same as CanTrace, except the caller must already hold
+// the TaskSet mutex (for reading or writing).
+func (t *Task) canTraceLocked(target *Task, attach bool) bool {
+ if t.tg == target.tg {
+ return true
+ }
+
+ if !t.canTraceStandard(target, attach) {
+ return false
+ }
+
+ // YAMA only supported for vfs2.
+ if !VFS2Enabled {
+ return true
+ }
+
+ if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL {
+ if !t.canTraceYAMALocked(target) {
+ return false
+ }
+ }
+ return true
+}
+
+// canTraceStandard performs standard ptrace access checks as defined by
+// kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM
+// implementation of the security_ptrace_access_check() interface, which is
+// always invoked.
+func (t *Task) canTraceStandard(target *Task, attach bool) bool {
// """
- // 2. If the access mode specifies PTRACE_MODE_FSCREDS (ED: snipped,
- // doesn't exist until Linux 4.5).
+ // TODO(gvisor.dev/issue/260): 1. If the access mode specifies
+ // PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5).
//
// Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the
// caller's real UID and GID for the checks in the next step. (Most APIs
@@ -125,7 +178,7 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
// historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs
// instead.)
//
- // 3. Deny access if neither of the following is true:
+ // 2. Deny access if neither of the following is true:
//
// - The real, effective, and saved-set user IDs of the target match the
// caller's user ID, *and* the real, effective, and saved-set group IDs of
@@ -134,15 +187,12 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
// - The caller has the CAP_SYS_PTRACE capability in the user namespace of
// the target.
//
- // 4. Deny access if the target process "dumpable" attribute has a value
+ // 3. Deny access if the target process "dumpable" attribute has a value
// other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in
// prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in
// the user namespace of the target process.
//
- // 5. The kernel LSM security_ptrace_access_check() interface is invoked to
- // see if ptrace access is permitted. The results depend on the LSM(s). The
- // implementation of this interface in the commoncap LSM performs the
- // following steps:
+ // 4. The commoncap LSM performs the following steps:
//
// a) If the access mode includes PTRACE_MODE_FSCREDS, then use the
// caller's effective capability set; otherwise (the access mode specifies
@@ -188,6 +238,94 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
return true
}
+// canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM
+// implementation of the security_ptrace_access_check() interface, with YAMA
+// configured to mode 1. This is a common default among various Linux
+// distributions.
+//
+// It only permits the tracer to proceed if one of the following conditions is
+// met:
+//
+// a) The tracer is already attached to the tracee.
+//
+// b) The target is a descendant of the tracer.
+//
+// c) The target has explicitly given permission to the tracer through the
+// PR_SET_PTRACER prctl.
+//
+// d) The tracer has CAP_SYS_PTRACE.
+//
+// See security/yama/yama_lsm.c:yama_ptrace_access_check.
+//
+// Precondition: the TaskSet mutex must be locked (for reading or writing).
+func (t *Task) canTraceYAMALocked(target *Task) bool {
+ if tracer := target.Tracer(); tracer != nil {
+ if tracer.tg == t.tg {
+ return true
+ }
+ }
+ if target.isYAMADescendantOfLocked(t) {
+ return true
+ }
+ if target.hasYAMAExceptionForLocked(t) {
+ return true
+ }
+ if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) {
+ return true
+ }
+ return false
+}
+
+// Determines whether t is considered a descendant of ancestor for the purposes
+// of YAMA permissions (specifically, whether t's thread group is descended from
+// ancestor's).
+//
+// Precondition: the TaskSet mutex must be locked (for reading or writing).
+func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool {
+ walker := t
+ for walker != nil {
+ if walker.tg.leader == ancestor.tg.leader {
+ return true
+ }
+ walker = walker.parent
+ }
+ return false
+}
+
+// Precondition: the TaskSet mutex must be locked (for reading or writing).
+func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool {
+ allowed, ok := t.k.ptraceExceptions[t]
+ if !ok {
+ return false
+ }
+ return allowed == nil || tracer.isYAMADescendantOfLocked(allowed)
+}
+
+// ClearYAMAException removes any YAMA exception with t as the tracee.
+func (t *Task) ClearYAMAException() {
+ t.tg.pidns.owner.mu.Lock()
+ defer t.tg.pidns.owner.mu.Unlock()
+ tracee := t.tg.leader
+ delete(t.k.ptraceExceptions, tracee)
+}
+
+// SetYAMAException creates a YAMA exception allowing all descendants of tracer
+// to trace t. If tracer is nil, then any task is allowed to trace t.
+//
+// If there was an existing exception, it is overwritten with the new one.
+func (t *Task) SetYAMAException(tracer *Task) {
+ t.tg.pidns.owner.mu.Lock()
+ defer t.tg.pidns.owner.mu.Unlock()
+
+ tracee := t.tg.leader
+ tracee.ptraceYAMAExceptionAdded = true
+ if tracer != nil {
+ tracer.ptraceYAMAExceptionAdded = true
+ }
+
+ t.k.ptraceExceptions[tracee] = tracer
+}
+
// Tracer returns t's ptrace Tracer.
func (t *Task) Tracer() *Task {
return t.ptraceTracer.Load().(*Task)
@@ -358,7 +496,7 @@ func (t *Task) ptraceTraceme() error {
// returning nil here is correct.
return nil
}
- if !t.parent.CanTrace(t, true) {
+ if !t.parent.canTraceLocked(t, true) {
return syserror.EPERM
}
if t.parent.exitState != TaskExitNone {
@@ -377,11 +515,11 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
if t.tg == target.tg {
return syserror.EPERM
}
- if !t.CanTrace(target, true) {
- return syserror.EPERM
- }
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
+ if !t.canTraceLocked(target, true) {
+ return syserror.EPERM
+ }
if target.hasTracer() {
return syserror.EPERM
}
@@ -459,6 +597,15 @@ func (t *Task) exitPtrace() {
}
// "nil maps cannot be saved"
t.ptraceTracees = make(map[*Task]struct{})
+
+ if t.ptraceYAMAExceptionAdded {
+ delete(t.k.ptraceExceptions, t)
+ for tracee, tracer := range t.k.ptraceExceptions {
+ if tracer == t {
+ delete(t.k.ptraceExceptions, tracee)
+ }
+ }
+ }
}
// forgetTracerLocked detaches t's tracer and ensures that t is no longer
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index c0ab53c94..36141dd09 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -403,6 +403,13 @@ type Task struct {
// ptraceEventMsg is protected by the TaskSet mutex.
ptraceEventMsg uint64
+ // ptraceYAMAExceptionAdded is true if a YAMA exception involving the task has
+ // been added before. This is used during task exit to decide whether we need
+ // to clean up YAMA exceptions.
+ //
+ // ptraceYAMAExceptionAdded is protected by the TaskSet mutex.
+ ptraceYAMAExceptionAdded bool
+
// The struct that holds the IO-related usage. The ioUsage pointer is
// immutable.
ioUsage *usage.IO
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index f7765fa3a..ad59e4f60 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -694,7 +694,8 @@ func (t *Task) exitNotifyLocked(fromPtraceDetach bool) {
}
if t.parent != nil {
delete(t.parent.children, t)
- t.parent = nil
+ // Do not clear t.parent. It may be still be needed after the task has exited
+ // (for example, to perform ptrace access checks on /proc/[pid] files).
}
}
}
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index e9da99067..09d070ec8 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -55,7 +55,7 @@ const InitTID ThreadID = 1
//
// +stateify savable
type TaskSet struct {
- // mu protects all relationships betweens tasks and thread groups in the
+ // mu protects all relationships between tasks and thread groups in the
// TaskSet. (mu is approximately equivalent to Linux's tasklist_lock.)
mu sync.RWMutex `state:"nosave"`
diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go
index d1778d029..5bd526b73 100644
--- a/pkg/sentry/syscalls/linux/error.go
+++ b/pkg/sentry/syscalls/linux/error.go
@@ -18,6 +18,7 @@ import (
"io"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/metric"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -36,16 +37,16 @@ var (
// errors, we may consume the error and return only the partial read/write.
//
// op and f are used only for panics.
-func HandleIOErrorVFS2(t *kernel.Task, partialResult bool, ioerr, intr error, op string, f *vfs.FileDescription) error {
- known, err := handleIOErrorImpl(t, partialResult, ioerr, intr, op)
+func HandleIOErrorVFS2(ctx context.Context, partialResult bool, ioerr, intr error, op string, f *vfs.FileDescription) error {
+ known, err := handleIOErrorImpl(ctx, partialResult, ioerr, intr, op)
if err != nil {
return err
}
if !known {
// An unknown error is encountered with a partial read/write.
fs := f.Mount().Filesystem().VirtualFilesystem()
- root := vfs.RootFromContext(t)
- name, _ := fs.PathnameWithDeleted(t, root, f.VirtualDentry())
+ root := vfs.RootFromContext(ctx)
+ name, _ := fs.PathnameWithDeleted(ctx, root, f.VirtualDentry())
log.Traceback("Invalid request partialResult %v and err (type %T) %v for %s operation on %q", partialResult, ioerr, ioerr, op, name)
partialResultOnce.Do(partialResultMetric.Increment)
}
@@ -56,8 +57,8 @@ func HandleIOErrorVFS2(t *kernel.Task, partialResult bool, ioerr, intr error, op
// errors, we may consume the error and return only the partial read/write.
//
// op and f are used only for panics.
-func handleIOError(t *kernel.Task, partialResult bool, ioerr, intr error, op string, f *fs.File) error {
- known, err := handleIOErrorImpl(t, partialResult, ioerr, intr, op)
+func handleIOError(ctx context.Context, partialResult bool, ioerr, intr error, op string, f *fs.File) error {
+ known, err := handleIOErrorImpl(ctx, partialResult, ioerr, intr, op)
if err != nil {
return err
}
@@ -74,7 +75,7 @@ func handleIOError(t *kernel.Task, partialResult bool, ioerr, intr error, op str
// errors, we may consume the error and return only the partial read/write.
//
// Returns false if error is unknown.
-func handleIOErrorImpl(t *kernel.Task, partialResult bool, err, intr error, op string) (bool, error) {
+func handleIOErrorImpl(ctx context.Context, partialResult bool, err, intr error, op string) (bool, error) {
switch err {
case nil:
// Typical successful syscall.
@@ -85,6 +86,10 @@ func handleIOErrorImpl(t *kernel.Task, partialResult bool, err, intr error, op s
// they will see 0.
return true, nil
case syserror.ErrExceedsFileSizeLimit:
+ t := kernel.TaskFromContext(ctx)
+ if t == nil {
+ panic("I/O error should only occur from a context associated with a Task")
+ }
// Ignore partialResult because this error only applies to
// normal files, and for those files we cannot accumulate
// write results.
diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go
index a892d2c62..9890dd946 100644
--- a/pkg/sentry/syscalls/linux/sys_prctl.go
+++ b/pkg/sentry/syscalls/linux/sys_prctl.go
@@ -172,6 +172,24 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
return 1, nil, nil
+ case linux.PR_SET_PTRACER:
+ pid := args[1].Int()
+ switch pid {
+ case 0:
+ t.ClearYAMAException()
+ return 0, nil, nil
+ case linux.PR_SET_PTRACER_ANY:
+ t.SetYAMAException(nil)
+ return 0, nil, nil
+ default:
+ tracer := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid))
+ if tracer == nil {
+ return 0, nil, syserror.EINVAL
+ }
+ t.SetYAMAException(tracer)
+ return 0, nil, nil
+ }
+
case linux.PR_SET_SECCOMP:
if args[1].Int() != linux.SECCOMP_MODE_FILTER {
// Unsupported mode.
diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go
index 1365a5a62..de6789a65 100644
--- a/pkg/sentry/syscalls/linux/vfs2/aio.go
+++ b/pkg/sentry/syscalls/linux/vfs2/aio.go
@@ -177,7 +177,7 @@ func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr use
// Update the result.
if err != nil {
- err = slinux.HandleIOErrorVFS2(t, ev.Result != 0 /* partial */, err, nil /* never interrupted */, "aio", fd)
+ err = slinux.HandleIOErrorVFS2(ctx, ev.Result != 0 /* partial */, err, nil /* never interrupted */, "aio", fd)
ev.Result = -int64(kernel.ExtractErrno(err, 0))
}
diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go
index 9ff80276a..5b99eb8cd 100644
--- a/runsc/boot/filter/extra_filters_race.go
+++ b/runsc/boot/filter/extra_filters_race.go
@@ -27,6 +27,7 @@ func instrumentationFilters() seccomp.SyscallRules {
Report("TSAN is enabled: syscall filters less restrictive!")
return seccomp.SyscallRules{
syscall.SYS_BRK: {},
+ syscall.SYS_CLOCK_NANOSLEEP: {},
syscall.SYS_CLONE: {},
syscall.SYS_FUTEX: {},
syscall.SYS_MMAP: {},
diff --git a/runsc/fsgofer/filter/extra_filters_race.go b/runsc/fsgofer/filter/extra_filters_race.go
index 20a0732be..cbd5c487e 100644
--- a/runsc/fsgofer/filter/extra_filters_race.go
+++ b/runsc/fsgofer/filter/extra_filters_race.go
@@ -28,6 +28,7 @@ func instrumentationFilters() seccomp.SyscallRules {
log.Warningf("*** SECCOMP WARNING: TSAN is enabled: syscall filters less restrictive!")
return seccomp.SyscallRules{
syscall.SYS_BRK: {},
+ syscall.SYS_CLOCK_NANOSLEEP: {},
syscall.SYS_CLONE: {},
syscall.SYS_FUTEX: {},
syscall.SYS_MADVISE: {},
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 1a492bca3..f871426f0 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1814,6 +1814,7 @@ cc_binary(
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/time",
gtest,
+ "//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:fs_util",
"//test/util:logging",
diff --git a/test/syscalls/linux/ptrace.cc b/test/syscalls/linux/ptrace.cc
index 13c19d4a8..d1d7c6f84 100644
--- a/test/syscalls/linux/ptrace.cc
+++ b/test/syscalls/linux/ptrace.cc
@@ -15,7 +15,9 @@
#include <elf.h>
#include <signal.h>
#include <stddef.h>
+#include <sys/prctl.h>
#include <sys/ptrace.h>
+#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/user.h>
@@ -30,6 +32,7 @@
#include "absl/flags/flag.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
+#include "test/util/capability_util.h"
#include "test/util/fs_util.h"
#include "test/util/logging.h"
#include "test/util/memory_util.h"
@@ -45,6 +48,49 @@ ABSL_FLAG(bool, ptrace_test_execve_child, false,
"If true, run the "
"PtraceExecveTest_Execve_GetRegs_PeekUser_SIGKILL_TraceClone_"
"TraceExit child workload.");
+ABSL_FLAG(bool, ptrace_test_trace_descendants_allowed, false,
+ "If set, run the child workload for "
+ "PtraceTest_TraceDescendantsAllowed.");
+ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_pid, false,
+ "If set, run the child workload for PtraceTest_PrctlSetPtracerPID.");
+ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_any, false,
+ "If set, run the child workload for PtraceTest_PrctlSetPtracerAny.");
+ABSL_FLAG(bool, ptrace_test_prctl_clear_ptracer, false,
+ "If set, run the child workload for PtraceTest_PrctlClearPtracer.");
+ABSL_FLAG(bool, ptrace_test_prctl_replace_ptracer, false,
+ "If set, run the child workload for PtraceTest_PrctlReplacePtracer.");
+ABSL_FLAG(int, ptrace_test_prctl_replace_ptracer_tid, -1,
+ "Specifies the replacement tracer tid in the child workload for "
+ "PtraceTest_PrctlReplacePtracer.");
+ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_and_exit_tracee_thread, false,
+ "If set, run the child workload for "
+ "PtraceTest_PrctlSetPtracerPersistsPastTraceeThreadExit.");
+ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_and_exec_non_leader, false,
+ "If set, run the child workload for "
+ "PtraceTest_PrctlSetPtracerDoesNotPersistPastNonLeaderExec.");
+ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_and_exit_tracer_thread, false,
+ "If set, run the child workload for "
+ "PtraceTest_PrctlSetPtracerDoesNotPersistPastTracerThreadExit.");
+ABSL_FLAG(int, ptrace_test_prctl_set_ptracer_and_exit_tracer_thread_tid, -1,
+ "Specifies the tracee tid in the child workload for "
+ "PtraceTest_PrctlSetPtracerDoesNotPersistPastTracerThreadExit.");
+ABSL_FLAG(bool, ptrace_test_prctl_set_ptracer_respects_tracer_thread_id, false,
+ "If set, run the child workload for PtraceTest_PrctlSetPtracePID.");
+ABSL_FLAG(int, ptrace_test_prctl_set_ptracer_respects_tracer_thread_id_tid, -1,
+ "Specifies the thread tid to be traced in the child workload "
+ "for PtraceTest_PrctlSetPtracerRespectsTracerThreadID.");
+
+ABSL_FLAG(bool, ptrace_test_tracee, false,
+ "If true, run the tracee process for the "
+ "PrctlSetPtracerDoesNotPersistPastLeaderExec and "
+ "PrctlSetPtracerDoesNotPersistPastNonLeaderExec workloads.");
+ABSL_FLAG(int, ptrace_test_trace_tid, -1,
+ "If set, run a process to ptrace attach to the thread with the "
+ "specified pid for the PrctlSetPtracerRespectsTracerThreadID "
+ "workload.");
+ABSL_FLAG(int, ptrace_test_fd, -1,
+ "Specifies the fd used for communication between tracer and tracee "
+ "processes across exec.");
namespace gvisor {
namespace testing {
@@ -78,10 +124,10 @@ void RaiseSignal(int sig) {
TEST_PCHECK(tgkill(pid, tid, sig) == 0);
}
+constexpr char kYamaPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope";
+
// Returns the Yama ptrace scope.
PosixErrorOr<int> YamaPtraceScope() {
- constexpr char kYamaPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope";
-
ASSIGN_OR_RETURN_ERRNO(bool exists, Exists(kYamaPtraceScopePath));
if (!exists) {
// File doesn't exist means no Yama, so the scope is disabled -> 0.
@@ -99,6 +145,22 @@ PosixErrorOr<int> YamaPtraceScope() {
return scope;
}
+int CheckPtraceAttach(pid_t pid) {
+ int ret = ptrace(PTRACE_ATTACH, pid, 0, 0);
+ MaybeSave();
+ if (ret < 0) {
+ return ret;
+ }
+
+ int status;
+ TEST_PCHECK(waitpid(pid, &status, 0) == pid);
+ MaybeSave();
+ TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+ TEST_PCHECK(ptrace(PTRACE_DETACH, pid, 0, 0) == 0);
+ MaybeSave();
+ return 0;
+}
+
TEST(PtraceTest, AttachSelf) {
EXPECT_THAT(ptrace(PTRACE_ATTACH, gettid(), 0, 0),
SyscallFailsWithErrno(EPERM));
@@ -111,10 +173,995 @@ TEST(PtraceTest, AttachSameThreadGroup) {
});
}
+TEST(PtraceTest, TraceParentNotAllowed) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) < 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ TEST_CHECK(CheckPtraceAttach(getppid()) == -1);
+ TEST_PCHECK(errno == EPERM);
+ _exit(0);
+ }
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+TEST(PtraceTest, TraceNonDescendantNotAllowed) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) < 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1);
+ TEST_PCHECK(errno == EPERM);
+ _exit(0);
+ }
+ EXPECT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+TEST(PtraceTest, TraceNonDescendantWithCapabilityAllowed) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_PTRACE)));
+ // Skip if disallowed by YAMA despite having CAP_SYS_PTRACE.
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 2);
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+TEST(PtraceTest, TraceDescendantsAllowed) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ // Use socket pair to communicate tids to this process from its grandchild.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe", "--ptrace_test_trace_descendants_allowed",
+ "--ptrace_test_fd", std::to_string(sockets[0])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // In child process.
+ TEST_PCHECK(close(sockets[1]) == 0);
+ pid_t const grandchild_pid = fork();
+ if (grandchild_pid == 0) {
+ // This test will create a new thread in the grandchild process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ TEST_PCHECK(grandchild_pid > 0);
+ MaybeSave();
+
+ // Wait for grandchild. Our parent process will kill it once it's done.
+ int status;
+ TEST_PCHECK(waitpid(grandchild_pid, &status, 0) == grandchild_pid);
+ TEST_CHECK(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL);
+ MaybeSave();
+ _exit(0);
+ }
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ // We should be able to attach to any thread in the grandchild.
+ pid_t grandchild_tid1, grandchild_tid2;
+ ASSERT_THAT(read(sockets[1], &grandchild_tid1, sizeof(grandchild_tid1)),
+ SyscallSucceedsWithValue(sizeof(grandchild_tid1)));
+ ASSERT_THAT(read(sockets[1], &grandchild_tid2, sizeof(grandchild_tid2)),
+ SyscallSucceedsWithValue(sizeof(grandchild_tid2)));
+
+ EXPECT_THAT(CheckPtraceAttach(grandchild_tid1), SyscallSucceeds());
+ EXPECT_THAT(CheckPtraceAttach(grandchild_tid2), SyscallSucceeds());
+
+ // Clean up grandchild.
+ ASSERT_THAT(kill(grandchild_tid1, SIGKILL), SyscallSucceeds());
+
+ // Clean up child.
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+}
+
+[[noreturn]] void RunTraceDescendantsAllowed(int fd) {
+ // Let the tracer know our tid through the socket fd.
+ pid_t const tid = gettid();
+ TEST_PCHECK(write(fd, &tid, sizeof(tid)) == sizeof(tid));
+ MaybeSave();
+
+ ScopedThread t([fd] {
+ // See if any arbitrary thread (whose tid differs from the process id) can
+ // be traced as well.
+ pid_t const tid = gettid();
+ TEST_PCHECK(write(fd, &tid, sizeof(tid)) == sizeof(tid));
+ MaybeSave();
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+ });
+
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+}
+
+TEST(PtraceTest, PrctlSetPtracerInvalidPID) {
+ // EINVAL should also be returned if PR_SET_PTRACER is not supported.
+ EXPECT_THAT(prctl(PR_SET_PTRACER, 123456789), SyscallFailsWithErrno(EINVAL));
+}
+
+TEST(PtraceTest, PrctlSetPtracerPID) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe", "--ptrace_test_prctl_set_ptracer_pid",
+ "--ptrace_test_fd", std::to_string(sockets[0])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ TEST_PCHECK(close(sockets[1]) == 0);
+ // This test will create a new thread in the child process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // Wait until tracee has called prctl.
+ char done;
+ TEST_PCHECK(read(sockets[1], &done, 1) == 1);
+ MaybeSave();
+
+ TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0);
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+[[noreturn]] void RunPrctlSetPtracerPID(int fd) {
+ ScopedThread t([fd] {
+ // Perform prctl in a separate thread to verify that it is process-wide.
+ TEST_PCHECK(prctl(PR_SET_PTRACER, getppid()) == 0);
+ MaybeSave();
+ // Indicate that the prctl has been set.
+ TEST_PCHECK(write(fd, "x", 1) == 1);
+ MaybeSave();
+ });
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+}
+
+TEST(PtraceTest, PrctlSetPtracerAny) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe", "--ptrace_test_prctl_set_ptracer_any",
+ "--ptrace_test_fd", std::to_string(sockets[0])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ // This test will create a new thread in the child process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ TEST_PCHECK(close(sockets[1]) == 0);
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // Wait until tracee has called prctl.
+ char done;
+ TEST_PCHECK(read(sockets[1], &done, 1) == 1);
+ MaybeSave();
+
+ TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+[[noreturn]] void RunPrctlSetPtracerAny(int fd) {
+ ScopedThread t([fd] {
+ // Perform prctl in a separate thread to verify that it is process-wide.
+ TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
+ MaybeSave();
+ // Indicate that the prctl has been set.
+ TEST_PCHECK(write(fd, "x", 1) == 1);
+ MaybeSave();
+ });
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+}
+
+TEST(PtraceTest, PrctlClearPtracer) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe", "--ptrace_test_prctl_clear_ptracer", "--ptrace_test_fd",
+ std::to_string(sockets[0])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ // This test will create a new thread in the child process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ TEST_PCHECK(close(sockets[1]) == 0);
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // Wait until tracee has called prctl.
+ char done;
+ TEST_PCHECK(read(sockets[1], &done, 1) == 1);
+ MaybeSave();
+
+ TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1);
+ TEST_PCHECK(errno == EPERM);
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+[[noreturn]] void RunPrctlClearPtracer(int fd) {
+ ScopedThread t([fd] {
+ // Perform prctl in a separate thread to verify that it is process-wide.
+ TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
+ MaybeSave();
+ TEST_PCHECK(prctl(PR_SET_PTRACER, 0) == 0);
+ MaybeSave();
+ // Indicate that the prctl has been set/cleared.
+ TEST_PCHECK(write(fd, "x", 1) == 1);
+ MaybeSave();
+ });
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+}
+
+TEST(PtraceTest, PrctlReplacePtracer) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ pid_t const unused_pid = fork();
+ if (unused_pid == 0) {
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+ }
+ ASSERT_THAT(unused_pid, SyscallSucceeds());
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe",
+ "--ptrace_test_prctl_replace_ptracer",
+ "--ptrace_test_prctl_replace_ptracer_tid",
+ std::to_string(unused_pid),
+ "--ptrace_test_fd",
+ std::to_string(sockets[0])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ TEST_PCHECK(close(sockets[1]) == 0);
+ // This test will create a new thread in the child process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // Wait until tracee has called prctl.
+ char done;
+ TEST_PCHECK(read(sockets[1], &done, 1) == 1);
+ MaybeSave();
+
+ TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1);
+ TEST_PCHECK(errno == EPERM);
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+
+ // Clean up unused.
+ ASSERT_THAT(kill(unused_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(unused_pid, &status, 0),
+ SyscallSucceedsWithValue(unused_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+[[noreturn]] void RunPrctlReplacePtracer(int new_tracer_pid, int fd) {
+ TEST_PCHECK(prctl(PR_SET_PTRACER, getppid()) == 0);
+ MaybeSave();
+
+ ScopedThread t([new_tracer_pid, fd] {
+ TEST_PCHECK(prctl(PR_SET_PTRACER, new_tracer_pid) == 0);
+ MaybeSave();
+ // Indicate that the prctl has been set.
+ TEST_PCHECK(write(fd, "x", 1) == 1);
+ MaybeSave();
+ });
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+}
+
+// Tests that YAMA exceptions store tracees by thread group leader. Exceptions
+// are preserved even after the tracee thread exits, as long as the tracee's
+// thread group leader is still around.
+TEST(PtraceTest, PrctlSetPtracerPersistsPastTraceeThreadExit) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe",
+ "--ptrace_test_prctl_set_ptracer_and_exit_tracee_thread",
+ "--ptrace_test_fd", std::to_string(sockets[0])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ // This test will create a new thread in the child process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ TEST_PCHECK(close(sockets[1]) == 0);
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // Wait until the tracee thread calling prctl has terminated.
+ char done;
+ TEST_PCHECK(read(sockets[1], &done, 1) == 1);
+ MaybeSave();
+
+ TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+[[noreturn]] void RunPrctlSetPtracerPersistsPastTraceeThreadExit(int fd) {
+ ScopedThread t([] {
+ TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
+ MaybeSave();
+ });
+ t.Join();
+ // Indicate that thread setting the prctl has exited.
+ TEST_PCHECK(write(fd, "x", 1) == 1);
+ MaybeSave();
+
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+}
+
+// Tests that YAMA exceptions store tracees by thread group leader. Exceptions
+// are preserved across exec as long as the thread group leader does not change,
+// even if the tracee thread is terminated.
+TEST(PtraceTest, PrctlSetPtracerPersistsPastLeaderExec) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe", "--ptrace_test_tracee", "--ptrace_test_fd",
+ std::to_string(sockets[0])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ TEST_PCHECK(close(sockets[1]) == 0);
+ TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
+ MaybeSave();
+
+ // This test will create a new thread in the child process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // Wait until the tracee has exec'd.
+ char done;
+ TEST_PCHECK(read(sockets[1], &done, 1) == 1);
+ MaybeSave();
+
+ TEST_PCHECK(CheckPtraceAttach(tracee_pid) == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+[[noreturn]] void RunTracee(int fd) {
+ // Indicate that we have exec'd.
+ TEST_PCHECK(write(fd, "x", 1) == 1);
+ MaybeSave();
+
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+}
+
+// Tests that YAMA exceptions store tracees by thread group leader. Exceptions
+// are cleared if the tracee process's thread group leader is terminated by
+// exec.
+TEST(PtraceTest, PrctlSetPtracerDoesNotPersistPastNonLeaderExec) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe", "--ptrace_test_prctl_set_ptracer_and_exec_non_leader",
+ "--ptrace_test_fd", std::to_string(sockets[0])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ // This test will create a new thread in the child process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ TEST_PCHECK(close(sockets[1]) == 0);
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // Wait until the tracee has exec'd.
+ char done;
+ TEST_PCHECK(read(sockets[1], &done, 1) == 1);
+ MaybeSave();
+
+ TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1);
+ TEST_PCHECK(errno == EPERM);
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+[[noreturn]] void RunPrctlSetPtracerDoesNotPersistPastNonLeaderExec(int fd) {
+ ScopedThread t([fd] {
+ TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
+ MaybeSave();
+
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe", "--ptrace_test_tracee", "--ptrace_test_fd",
+ std::to_string(fd)};
+ char* const* const child_argv = owned_child_argv.get();
+
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ });
+ t.Join();
+ TEST_CHECK_MSG(false, "Survived execve? (main)");
+ _exit(1);
+}
+
+// Tests that YAMA exceptions store the tracer itself rather than the thread
+// group leader. Exceptions are cleared when the tracer task exits, rather than
+// when its thread group leader exits.
+TEST(PtraceTest, PrctlSetPtracerDoesNotPersistPastTracerThreadExit) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ TEST_PCHECK(close(sockets[1]) == 0);
+ pid_t tracer_tid;
+ TEST_PCHECK(read(sockets[0], &tracer_tid, sizeof(tracer_tid)) ==
+ sizeof(tracer_tid));
+ MaybeSave();
+
+ TEST_PCHECK(prctl(PR_SET_PTRACER, tracer_tid) == 0);
+ MaybeSave();
+ // Indicate that the prctl has been set.
+ TEST_PCHECK(write(sockets[0], "x", 1) == 1);
+ MaybeSave();
+
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe",
+ "--ptrace_test_prctl_set_ptracer_and_exit_tracer_thread",
+ "--ptrace_test_prctl_set_ptracer_and_exit_tracer_thread_tid",
+ std::to_string(tracee_pid),
+ "--ptrace_test_fd",
+ std::to_string(sockets[1])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // This test will create a new thread in the child process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+[[noreturn]] void RunPrctlSetPtracerDoesNotPersistPastTracerThreadExit(
+ int tracee_tid, int fd) {
+ TEST_PCHECK(SetCapability(CAP_SYS_PTRACE, false).ok());
+
+ ScopedThread t([fd] {
+ pid_t const tracer_tid = gettid();
+ TEST_PCHECK(write(fd, &tracer_tid, sizeof(tracer_tid)) ==
+ sizeof(tracer_tid));
+
+ // Wait until the prctl has been set.
+ char done;
+ TEST_PCHECK(read(fd, &done, 1) == 1);
+ MaybeSave();
+ });
+ t.Join();
+
+ // Sleep for a bit before verifying the invalidation. The thread exit above
+ // should cause the ptrace exception to be invalidated, but in Linux, this is
+ // not done immediately. The YAMA exception is dropped during
+ // __put_task_struct(), which occurs (at the earliest) one RCU grace period
+ // after exit_notify() ==> release_task().
+ SleepSafe(absl::Milliseconds(100));
+
+ TEST_CHECK(CheckPtraceAttach(tracee_tid) == -1);
+ TEST_PCHECK(errno == EPERM);
+ _exit(0);
+}
+
+// Tests that YAMA exceptions store the tracer thread itself rather than the
+// thread group leader. Exceptions are preserved across exec in the tracer
+// thread, even if the thread group leader is terminated.
+TEST(PtraceTest, PrctlSetPtracerRespectsTracerThreadID) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ TEST_PCHECK(close(sockets[1]) == 0);
+ pid_t tracer_tid;
+ TEST_PCHECK(read(sockets[0], &tracer_tid, sizeof(tracer_tid)) ==
+ sizeof(tracer_tid));
+ MaybeSave();
+
+ TEST_PCHECK(prctl(PR_SET_PTRACER, tracer_tid) == 0);
+ MaybeSave();
+ // Indicate that the prctl has been set.
+ TEST_PCHECK(write(sockets[0], "x", 1) == 1);
+ MaybeSave();
+
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ // Allocate vector before forking (not async-signal-safe).
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe",
+ "--ptrace_test_prctl_set_ptracer_respects_tracer_thread_id",
+ "--ptrace_test_prctl_set_ptracer_respects_tracer_thread_id_tid",
+ std::to_string(tracee_pid),
+ "--ptrace_test_fd",
+ std::to_string(sockets[1])};
+ char* const* const child_argv = owned_child_argv.get();
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // This test will create a new thread in the child process.
+ // pthread_create(2) isn't async-signal-safe, so we execve() first.
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+[[noreturn]] void RunPrctlSetPtracerRespectsTracerThreadID(int tracee_tid,
+ int fd) {
+ // Create a separate thread for tracing (i.e., not the thread group
+ // leader). After the subsequent execve(), the current thread group leader
+ // will no longer be exist, but the YAMA exception installed with this
+ // thread should still be valid.
+ ScopedThread t([tracee_tid, fd] {
+ pid_t const tracer_tid = gettid();
+ TEST_PCHECK(write(fd, &tracer_tid, sizeof(tracer_tid)));
+ MaybeSave();
+
+ // Wait until the tracee has made the PR_SET_PTRACER prctl.
+ char done;
+ TEST_PCHECK(read(fd, &done, 1) == 1);
+ MaybeSave();
+
+ ExecveArray const owned_child_argv = {
+ "/proc/self/exe", "--ptrace_test_trace_tid", std::to_string(tracee_tid),
+ "--ptrace_test_fd", std::to_string(fd)};
+ char* const* const child_argv = owned_child_argv.get();
+
+ execve(child_argv[0], child_argv, /* envp = */ nullptr);
+ TEST_PCHECK_MSG(false, "Survived execve to test child");
+ });
+ t.Join();
+ TEST_CHECK_MSG(false, "Survived execve? (main)");
+ _exit(1);
+}
+
+[[noreturn]] void RunTraceTID(int tracee_tid, int fd) {
+ TEST_PCHECK(SetCapability(CAP_SYS_PTRACE, false).ok());
+ TEST_PCHECK(CheckPtraceAttach(tracee_tid) == 0);
+ _exit(0);
+}
+
+// Tests that removing a YAMA exception does not affect a tracer that is already
+// attached.
+TEST(PtraceTest, PrctlClearPtracerDoesNotAffectCurrentTracer) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ // Use sockets to synchronize between tracer and tracee.
+ int sockets[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, 0, sockets), SyscallSucceeds());
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ TEST_PCHECK(close(sockets[1]) == 0);
+ TEST_PCHECK(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) == 0);
+ MaybeSave();
+ // Indicate that the prctl has been set.
+ TEST_PCHECK(write(sockets[0], "x", 1) == 1);
+ MaybeSave();
+
+ // Wait until tracer has attached before clearing PR_SET_PTRACER.
+ char done;
+ TEST_PCHECK(read(sockets[0], &done, 1) == 1);
+ MaybeSave();
+
+ TEST_PCHECK(prctl(PR_SET_PTRACER, 0) == 0);
+ MaybeSave();
+ // Indicate that the prctl has been set.
+ TEST_PCHECK(write(sockets[0], "x", 1) == 1);
+ MaybeSave();
+
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+ ASSERT_THAT(close(sockets[0]), SyscallSucceeds());
+
+ std::string mem_path = "/proc/" + std::to_string(tracee_pid) + "/mem";
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ // Wait until tracee has called prctl, or else we won't be able to attach.
+ char done;
+ TEST_PCHECK(read(sockets[1], &done, 1) == 1);
+ MaybeSave();
+
+ TEST_PCHECK(ptrace(PTRACE_ATTACH, tracee_pid, 0, 0) == 0);
+ MaybeSave();
+ // Indicate that we have attached.
+ TEST_PCHECK(write(sockets[1], &done, 1) == 1);
+ MaybeSave();
+
+ // Block until tracee enters signal-delivery-stop as a result of the
+ // SIGSTOP sent by PTRACE_ATTACH.
+ int status;
+ TEST_PCHECK(waitpid(tracee_pid, &status, 0) == tracee_pid);
+ MaybeSave();
+ TEST_CHECK(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+ MaybeSave();
+
+ TEST_PCHECK(ptrace(PTRACE_CONT, tracee_pid, 0, 0) == 0);
+ MaybeSave();
+
+ // Wait until tracee has cleared PR_SET_PTRACER. Even though it was cleared,
+ // we should still be able to access /proc/[pid]/mem because we are already
+ // attached.
+ TEST_PCHECK(read(sockets[1], &done, 1) == 1);
+ MaybeSave();
+ TEST_PCHECK(open(mem_path.c_str(), O_RDONLY) != -1);
+ MaybeSave();
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
+TEST(PtraceTest, PrctlNotInherited) {
+ SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) != 1);
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+
+ // Allow any ptracer. This should not affect the child processes.
+ ASSERT_THAT(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), SyscallSucceeds());
+
+ pid_t const tracee_pid = fork();
+ if (tracee_pid == 0) {
+ while (true) {
+ SleepSafe(absl::Seconds(1));
+ }
+ }
+ ASSERT_THAT(tracee_pid, SyscallSucceeds());
+
+ pid_t const tracer_pid = fork();
+ if (tracer_pid == 0) {
+ TEST_CHECK(CheckPtraceAttach(tracee_pid) == -1);
+ TEST_PCHECK(errno == EPERM);
+ _exit(0);
+ }
+ ASSERT_THAT(tracer_pid, SyscallSucceeds());
+
+ // Clean up tracer.
+ int status;
+ ASSERT_THAT(waitpid(tracer_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Clean up tracee.
+ ASSERT_THAT(kill(tracee_pid, SIGKILL), SyscallSucceeds());
+ ASSERT_THAT(waitpid(tracee_pid, &status, 0),
+ SyscallSucceedsWithValue(tracee_pid));
+ EXPECT_TRUE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
+ << " status " << status;
+}
+
TEST(PtraceTest, AttachParent_PeekData_PokeData_SignalSuppression) {
// Yama prevents attaching to a parent. Skip the test if the scope is anything
// except disabled.
- SKIP_IF(ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope()) > 0);
+ const int yama_scope = ASSERT_NO_ERRNO_AND_VALUE(YamaPtraceScope());
+ SKIP_IF(yama_scope > 1);
+ if (yama_scope == 1) {
+ // Allow child to trace us.
+ ASSERT_THAT(prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY), SyscallSucceeds());
+ }
// Test PTRACE_POKE/PEEKDATA on both anonymous and file mappings.
const auto file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
@@ -1238,6 +2285,46 @@ TEST(PtraceTest, SeizeSetOptions) {
<< " status " << status;
}
+TEST(PtraceTest, SetYAMAPtraceScope) {
+ SKIP_IF(IsRunningWithVFS1());
+
+ // Do not modify the ptrace scope on the host.
+ SKIP_IF(!IsRunningOnGvisor());
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
+
+ const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(
+ Open(std::string(kYamaPtraceScopePath), O_RDWR));
+
+ ASSERT_THAT(write(fd.get(), "0", 1), SyscallSucceedsWithValue(1));
+
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds());
+ std::vector<char> buf(10);
+ EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), SyscallSucceeds());
+ EXPECT_STREQ(buf.data(), "0\n");
+
+ // Test that a child can attach to its parent when ptrace_scope is 0.
+ ASSERT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, false));
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ TEST_PCHECK(CheckPtraceAttach(getppid()) == 0);
+ _exit(0);
+ }
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+
+ // Set ptrace_scope back to 1 (and try writing with a newline).
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds());
+ ASSERT_THAT(write(fd.get(), "1\n", 2), SyscallSucceedsWithValue(2));
+
+ ASSERT_THAT(lseek(fd.get(), 0, SEEK_SET), SyscallSucceeds());
+ EXPECT_THAT(read(fd.get(), buf.data(), buf.size()), SyscallSucceeds());
+ EXPECT_STREQ(buf.data(), "1\n");
+}
+
} // namespace
} // namespace testing
@@ -1250,5 +2337,63 @@ int main(int argc, char** argv) {
gvisor::testing::RunExecveChild();
}
+ int fd = absl::GetFlag(FLAGS_ptrace_test_fd);
+
+ if (absl::GetFlag(FLAGS_ptrace_test_trace_descendants_allowed)) {
+ gvisor::testing::RunTraceDescendantsAllowed(fd);
+ }
+
+ if (absl::GetFlag(FLAGS_ptrace_test_prctl_set_ptracer_pid)) {
+ gvisor::testing::RunPrctlSetPtracerPID(fd);
+ }
+
+ if (absl::GetFlag(FLAGS_ptrace_test_prctl_set_ptracer_any)) {
+ gvisor::testing::RunPrctlSetPtracerAny(fd);
+ }
+
+ if (absl::GetFlag(FLAGS_ptrace_test_prctl_clear_ptracer)) {
+ gvisor::testing::RunPrctlClearPtracer(fd);
+ }
+
+ if (absl::GetFlag(FLAGS_ptrace_test_prctl_replace_ptracer)) {
+ gvisor::testing::RunPrctlReplacePtracer(
+ absl::GetFlag(FLAGS_ptrace_test_prctl_replace_ptracer_tid), fd);
+ }
+
+ if (absl::GetFlag(
+ FLAGS_ptrace_test_prctl_set_ptracer_and_exit_tracee_thread)) {
+ gvisor::testing::RunPrctlSetPtracerPersistsPastTraceeThreadExit(fd);
+ }
+
+ if (absl::GetFlag(FLAGS_ptrace_test_prctl_set_ptracer_and_exec_non_leader)) {
+ gvisor::testing::RunPrctlSetPtracerDoesNotPersistPastNonLeaderExec(
+ fd);
+ }
+
+ if (absl::GetFlag(
+ FLAGS_ptrace_test_prctl_set_ptracer_and_exit_tracer_thread)) {
+ gvisor::testing::RunPrctlSetPtracerDoesNotPersistPastTracerThreadExit(
+ absl::GetFlag(
+ FLAGS_ptrace_test_prctl_set_ptracer_and_exit_tracer_thread_tid),
+ fd);
+ }
+
+ if (absl::GetFlag(
+ FLAGS_ptrace_test_prctl_set_ptracer_respects_tracer_thread_id)) {
+ gvisor::testing::RunPrctlSetPtracerRespectsTracerThreadID(
+ absl::GetFlag(
+ FLAGS_ptrace_test_prctl_set_ptracer_respects_tracer_thread_id_tid),
+ fd);
+ }
+
+ if (absl::GetFlag(FLAGS_ptrace_test_tracee)) {
+ gvisor::testing::RunTracee(fd);
+ }
+
+ int pid = absl::GetFlag(FLAGS_ptrace_test_trace_tid);
+ if (pid != -1) {
+ gvisor::testing::RunTraceTID(pid, fd);
+ }
+
return gvisor::testing::RunAllTests();
}