From acd516cfe2920006a5d2760c78bd2245d498023a Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Wed, 24 Feb 2021 02:01:38 -0800 Subject: Add YAMA security module restrictions on ptrace(2). Restrict ptrace(2) according to the default configurations of the YAMA security module (mode 1), which is a common default among various Linux distributions. The new access checks only permit the tracer to proceed if one of the following conditions is met: a) The tracer is already attached to the tracee. b) The target is a descendant of the tracer. c) The target has explicitly given permission to the tracer through the PR_SET_PTRACER prctl. d) The tracer has CAP_SYS_PTRACE. See security/yama/yama_lsm.c for more details. Note that these checks are added to CanTrace, which is checked for PTRACE_ATTACH as well as some other operations, e.g., checking a process' memory layout through /proc/[pid]/mem. Since this patch adds restrictions to ptrace, it may break compatibility for applications run by non-root users that, for instance, rely on being able to trace processes that are not descended from the tracer (e.g., `gdb -p`). YAMA restrictions can be turned off by setting /proc/sys/kernel/yama/ptrace_scope to 0, or exceptions can be made on a per-process basis with the PR_SET_PTRACER prctl. Reported-by: syzbot+622822d8bca08c99e8c8@syzkaller.appspotmail.com PiperOrigin-RevId: 359237723 --- pkg/sentry/kernel/kernel.go | 15 +++- pkg/sentry/kernel/ptrace.go | 175 +++++++++++++++++++++++++++++++++++++---- pkg/sentry/kernel/task.go | 7 ++ pkg/sentry/kernel/task_exit.go | 3 +- pkg/sentry/kernel/threads.go | 2 +- 5 files changed, 185 insertions(+), 17 deletions(-) (limited to 'pkg/sentry/kernel') diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index ef4e934a1..43065b45a 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -282,6 +282,18 @@ type Kernel struct { // If set to true, report address space activation waits as if the task is in // external wait so that the watchdog doesn't report the task stuck. SleepForAddressSpaceActivation bool + + // Exceptions to YAMA ptrace restrictions. Each key-value pair represents a + // tracee-tracer relationship. The key is a process (technically, the thread + // group leader) that can be traced by any thread that is a descendant of the + // value. If the value is nil, then anyone can trace the process represented by + // the key. + // + // ptraceExceptions is protected by the TaskSet mutex. + ptraceExceptions map[*Task]*Task + + // YAMAPtraceScope is the current level of YAMA ptrace restrictions. + YAMAPtraceScope int32 } // InitKernelArgs holds arguments to Init. @@ -382,6 +394,8 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic} k.futexes = futex.NewManager() k.netlinkPorts = port.New() + k.ptraceExceptions = make(map[*Task]*Task) + k.YAMAPtraceScope = linux.YAMA_SCOPE_RELATIONAL if VFS2Enabled { ctx := k.SupervisorContext() @@ -425,7 +439,6 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord) } - return nil } diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index cef58a590..c3980350a 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -16,6 +16,7 @@ package kernel import ( "fmt" + "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -95,7 +96,11 @@ const ( // checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access // mode PTRACE_MODE_READ. // -// NOTE(b/30815691): The result of CanTrace is immediately stale (e.g., a +// In Linux, ptrace access restrictions may be configured by LSMs. While we do +// not support LSMs, we do add additional restrictions based on the commoncap +// and YAMA LSMs. +// +// TODO(gvisor.dev/issue/212): The result of CanTrace is immediately stale (e.g., a // racing setuid(2) may change traceability). This may pose a risk when a task // changes from traceable to not traceable. This is only problematic across // execve, where privileges may increase. @@ -103,7 +108,7 @@ const ( // We currently do not implement privileged executables (set-user/group-ID bits // and file capabilities), so that case is not reachable. func (t *Task) CanTrace(target *Task, attach bool) bool { - // "1. If the calling thread and the target thread are in the same thread + // "If the calling thread and the target thread are in the same thread // group, access is always allowed." - ptrace(2) // // Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access() @@ -115,9 +120,57 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { return true } + if !t.canTraceStandard(target, attach) { + return false + } + + // YAMA only supported for vfs2. + if !VFS2Enabled { + return true + } + + if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL { + t.tg.pidns.owner.mu.RLock() + defer t.tg.pidns.owner.mu.RUnlock() + if !t.canTraceYAMALocked(target) { + return false + } + } + return true +} + +// canTraceLocked is the same as CanTrace, except the caller must already hold +// the TaskSet mutex (for reading or writing). +func (t *Task) canTraceLocked(target *Task, attach bool) bool { + if t.tg == target.tg { + return true + } + + if !t.canTraceStandard(target, attach) { + return false + } + + // YAMA only supported for vfs2. + if !VFS2Enabled { + return true + } + + if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL { + if !t.canTraceYAMALocked(target) { + return false + } + } + return true +} + +// canTraceStandard performs standard ptrace access checks as defined by +// kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM +// implementation of the security_ptrace_access_check() interface, which is +// always invoked. +func (t *Task) canTraceStandard(target *Task, attach bool) bool { // """ - // 2. If the access mode specifies PTRACE_MODE_FSCREDS (ED: snipped, - // doesn't exist until Linux 4.5). + // TODO(gvisor.dev/issue/260): 1. If the access mode specifies + // PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5). // // Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the // caller's real UID and GID for the checks in the next step. (Most APIs @@ -125,7 +178,7 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { // historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs // instead.) // - // 3. Deny access if neither of the following is true: + // 2. Deny access if neither of the following is true: // // - The real, effective, and saved-set user IDs of the target match the // caller's user ID, *and* the real, effective, and saved-set group IDs of @@ -134,15 +187,12 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { // - The caller has the CAP_SYS_PTRACE capability in the user namespace of // the target. // - // 4. Deny access if the target process "dumpable" attribute has a value + // 3. Deny access if the target process "dumpable" attribute has a value // other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in // prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in // the user namespace of the target process. // - // 5. The kernel LSM security_ptrace_access_check() interface is invoked to - // see if ptrace access is permitted. The results depend on the LSM(s). The - // implementation of this interface in the commoncap LSM performs the - // following steps: + // 4. The commoncap LSM performs the following steps: // // a) If the access mode includes PTRACE_MODE_FSCREDS, then use the // caller's effective capability set; otherwise (the access mode specifies @@ -188,6 +238,94 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { return true } +// canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM +// implementation of the security_ptrace_access_check() interface, with YAMA +// configured to mode 1. This is a common default among various Linux +// distributions. +// +// It only permits the tracer to proceed if one of the following conditions is +// met: +// +// a) The tracer is already attached to the tracee. +// +// b) The target is a descendant of the tracer. +// +// c) The target has explicitly given permission to the tracer through the +// PR_SET_PTRACER prctl. +// +// d) The tracer has CAP_SYS_PTRACE. +// +// See security/yama/yama_lsm.c:yama_ptrace_access_check. +// +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) canTraceYAMALocked(target *Task) bool { + if tracer := target.Tracer(); tracer != nil { + if tracer.tg == t.tg { + return true + } + } + if target.isYAMADescendantOfLocked(t) { + return true + } + if target.hasYAMAExceptionForLocked(t) { + return true + } + if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) { + return true + } + return false +} + +// Determines whether t is considered a descendant of ancestor for the purposes +// of YAMA permissions (specifically, whether t's thread group is descended from +// ancestor's). +// +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool { + walker := t + for walker != nil { + if walker.tg.leader == ancestor.tg.leader { + return true + } + walker = walker.parent + } + return false +} + +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool { + allowed, ok := t.k.ptraceExceptions[t] + if !ok { + return false + } + return allowed == nil || tracer.isYAMADescendantOfLocked(allowed) +} + +// ClearYAMAException removes any YAMA exception with t as the tracee. +func (t *Task) ClearYAMAException() { + t.tg.pidns.owner.mu.Lock() + defer t.tg.pidns.owner.mu.Unlock() + tracee := t.tg.leader + delete(t.k.ptraceExceptions, tracee) +} + +// SetYAMAException creates a YAMA exception allowing all descendants of tracer +// to trace t. If tracer is nil, then any task is allowed to trace t. +// +// If there was an existing exception, it is overwritten with the new one. +func (t *Task) SetYAMAException(tracer *Task) { + t.tg.pidns.owner.mu.Lock() + defer t.tg.pidns.owner.mu.Unlock() + + tracee := t.tg.leader + tracee.ptraceYAMAExceptionAdded = true + if tracer != nil { + tracer.ptraceYAMAExceptionAdded = true + } + + t.k.ptraceExceptions[tracee] = tracer +} + // Tracer returns t's ptrace Tracer. func (t *Task) Tracer() *Task { return t.ptraceTracer.Load().(*Task) @@ -358,7 +496,7 @@ func (t *Task) ptraceTraceme() error { // returning nil here is correct. return nil } - if !t.parent.CanTrace(t, true) { + if !t.parent.canTraceLocked(t, true) { return syserror.EPERM } if t.parent.exitState != TaskExitNone { @@ -377,11 +515,11 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error { if t.tg == target.tg { return syserror.EPERM } - if !t.CanTrace(target, true) { - return syserror.EPERM - } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() + if !t.canTraceLocked(target, true) { + return syserror.EPERM + } if target.hasTracer() { return syserror.EPERM } @@ -459,6 +597,15 @@ func (t *Task) exitPtrace() { } // "nil maps cannot be saved" t.ptraceTracees = make(map[*Task]struct{}) + + if t.ptraceYAMAExceptionAdded { + delete(t.k.ptraceExceptions, t) + for tracee, tracer := range t.k.ptraceExceptions { + if tracer == t { + delete(t.k.ptraceExceptions, tracee) + } + } + } } // forgetTracerLocked detaches t's tracer and ensures that t is no longer diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index c0ab53c94..36141dd09 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -403,6 +403,13 @@ type Task struct { // ptraceEventMsg is protected by the TaskSet mutex. ptraceEventMsg uint64 + // ptraceYAMAExceptionAdded is true if a YAMA exception involving the task has + // been added before. This is used during task exit to decide whether we need + // to clean up YAMA exceptions. + // + // ptraceYAMAExceptionAdded is protected by the TaskSet mutex. + ptraceYAMAExceptionAdded bool + // The struct that holds the IO-related usage. The ioUsage pointer is // immutable. ioUsage *usage.IO diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index f7765fa3a..ad59e4f60 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -694,7 +694,8 @@ func (t *Task) exitNotifyLocked(fromPtraceDetach bool) { } if t.parent != nil { delete(t.parent.children, t) - t.parent = nil + // Do not clear t.parent. It may be still be needed after the task has exited + // (for example, to perform ptrace access checks on /proc/[pid] files). } } } diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index e9da99067..09d070ec8 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -55,7 +55,7 @@ const InitTID ThreadID = 1 // // +stateify savable type TaskSet struct { - // mu protects all relationships betweens tasks and thread groups in the + // mu protects all relationships between tasks and thread groups in the // TaskSet. (mu is approximately equivalent to Linux's tasklist_lock.) mu sync.RWMutex `state:"nosave"` -- cgit v1.2.3