diff options
author | Dean Deng <deandeng@google.com> | 2021-02-24 02:01:38 -0800 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-02-24 02:03:16 -0800 |
commit | acd516cfe2920006a5d2760c78bd2245d498023a (patch) | |
tree | a60c57de55b821e6dd33bf2cfa37feb09903c2a2 /pkg/sentry | |
parent | 6e000d3424c0eed685483f54348c6b0a752a0435 (diff) |
Add YAMA security module restrictions on ptrace(2).
Restrict ptrace(2) according to the default configurations of the YAMA security
module (mode 1), which is a common default among various Linux distributions.
The new access checks only permit the tracer to proceed if one of the following
conditions is met:
a) The tracer is already attached to the tracee.
b) The target is a descendant of the tracer.
c) The target has explicitly given permission to the tracer through the
PR_SET_PTRACER prctl.
d) The tracer has CAP_SYS_PTRACE.
See security/yama/yama_lsm.c for more details.
Note that these checks are added to CanTrace, which is checked for
PTRACE_ATTACH as well as some other operations, e.g., checking a process'
memory layout through /proc/[pid]/mem.
Since this patch adds restrictions to ptrace, it may break compatibility for
applications run by non-root users that, for instance, rely on being able to
trace processes that are not descended from the tracer (e.g., `gdb -p`). YAMA
restrictions can be turned off by setting /proc/sys/kernel/yama/ptrace_scope
to 0, or exceptions can be made on a per-process basis with the PR_SET_PTRACER
prctl.
Reported-by: syzbot+622822d8bca08c99e8c8@syzkaller.appspotmail.com
PiperOrigin-RevId: 359237723
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/fsimpl/proc/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks_sys.go | 3 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/yama.go | 80 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 15 | ||||
-rw-r--r-- | pkg/sentry/kernel/ptrace.go | 175 | ||||
-rw-r--r-- | pkg/sentry/kernel/task.go | 7 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_exit.go | 3 | ||||
-rw-r--r-- | pkg/sentry/kernel/threads.go | 2 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_prctl.go | 18 |
9 files changed, 287 insertions, 17 deletions
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 5196a2a80..d47a4fff9 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -75,6 +75,7 @@ go_library( "tasks_files.go", "tasks_inode_refs.go", "tasks_sys.go", + "yama.go", ], visibility = ["//pkg/sentry:internal"], deps = [ diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index 25c407d98..fd7823daa 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -48,6 +48,9 @@ func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k * "shmall": fs.newInode(ctx, root, 0444, shmData(linux.SHMALL)), "shmmax": fs.newInode(ctx, root, 0444, shmData(linux.SHMMAX)), "shmmni": fs.newInode(ctx, root, 0444, shmData(linux.SHMMNI)), + "yama": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ + "ptrace_scope": fs.newYAMAPtraceScopeFile(ctx, k, root), + }), }), "vm": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ "mmap_min_addr": fs.newInode(ctx, root, 0444, &mmapMinAddrData{k: k}), diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go new file mode 100644 index 000000000..aebfe8944 --- /dev/null +++ b/pkg/sentry/fsimpl/proc/yama.go @@ -0,0 +1,80 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +func (fs *filesystem) newYAMAPtraceScopeFile(ctx context.Context, k *kernel.Kernel, creds *auth.Credentials) kernfs.Inode { + s := &yamaPtraceScope{level: &k.YAMAPtraceScope} + s.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), s, 0644) + return s +} + +// yamaPtraceScope implements vfs.WritableDynamicBytesSource for +// /sys/kernel/yama/ptrace_scope. +// +// +stateify savable +type yamaPtraceScope struct { + kernfs.DynamicBytesFile + + // level is the ptrace_scope level. + level *int32 +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (s *yamaPtraceScope) Generate(ctx context.Context, buf *bytes.Buffer) error { + _, err := fmt.Fprintf(buf, "%d\n", atomic.LoadInt32(s.level)) + return err +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + // Ignore partial writes. + return 0, syserror.EINVAL + } + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit the amount of memory allocated. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + + // We do not support YAMA levels > YAMA_SCOPE_RELATIONAL. + if v < linux.YAMA_SCOPE_DISABLED || v > linux.YAMA_SCOPE_RELATIONAL { + return 0, syserror.EINVAL + } + + atomic.StoreInt32(s.level, v) + return n, nil +} diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index ef4e934a1..43065b45a 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -282,6 +282,18 @@ type Kernel struct { // If set to true, report address space activation waits as if the task is in // external wait so that the watchdog doesn't report the task stuck. SleepForAddressSpaceActivation bool + + // Exceptions to YAMA ptrace restrictions. Each key-value pair represents a + // tracee-tracer relationship. The key is a process (technically, the thread + // group leader) that can be traced by any thread that is a descendant of the + // value. If the value is nil, then anyone can trace the process represented by + // the key. + // + // ptraceExceptions is protected by the TaskSet mutex. + ptraceExceptions map[*Task]*Task + + // YAMAPtraceScope is the current level of YAMA ptrace restrictions. + YAMAPtraceScope int32 } // InitKernelArgs holds arguments to Init. @@ -382,6 +394,8 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic} k.futexes = futex.NewManager() k.netlinkPorts = port.New() + k.ptraceExceptions = make(map[*Task]*Task) + k.YAMAPtraceScope = linux.YAMA_SCOPE_RELATIONAL if VFS2Enabled { ctx := k.SupervisorContext() @@ -425,7 +439,6 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord) } - return nil } diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index cef58a590..c3980350a 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -16,6 +16,7 @@ package kernel import ( "fmt" + "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -95,7 +96,11 @@ const ( // checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access // mode PTRACE_MODE_READ. // -// NOTE(b/30815691): The result of CanTrace is immediately stale (e.g., a +// In Linux, ptrace access restrictions may be configured by LSMs. While we do +// not support LSMs, we do add additional restrictions based on the commoncap +// and YAMA LSMs. +// +// TODO(gvisor.dev/issue/212): The result of CanTrace is immediately stale (e.g., a // racing setuid(2) may change traceability). This may pose a risk when a task // changes from traceable to not traceable. This is only problematic across // execve, where privileges may increase. @@ -103,7 +108,7 @@ const ( // We currently do not implement privileged executables (set-user/group-ID bits // and file capabilities), so that case is not reachable. func (t *Task) CanTrace(target *Task, attach bool) bool { - // "1. If the calling thread and the target thread are in the same thread + // "If the calling thread and the target thread are in the same thread // group, access is always allowed." - ptrace(2) // // Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access() @@ -115,9 +120,57 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { return true } + if !t.canTraceStandard(target, attach) { + return false + } + + // YAMA only supported for vfs2. + if !VFS2Enabled { + return true + } + + if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL { + t.tg.pidns.owner.mu.RLock() + defer t.tg.pidns.owner.mu.RUnlock() + if !t.canTraceYAMALocked(target) { + return false + } + } + return true +} + +// canTraceLocked is the same as CanTrace, except the caller must already hold +// the TaskSet mutex (for reading or writing). +func (t *Task) canTraceLocked(target *Task, attach bool) bool { + if t.tg == target.tg { + return true + } + + if !t.canTraceStandard(target, attach) { + return false + } + + // YAMA only supported for vfs2. + if !VFS2Enabled { + return true + } + + if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL { + if !t.canTraceYAMALocked(target) { + return false + } + } + return true +} + +// canTraceStandard performs standard ptrace access checks as defined by +// kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM +// implementation of the security_ptrace_access_check() interface, which is +// always invoked. +func (t *Task) canTraceStandard(target *Task, attach bool) bool { // """ - // 2. If the access mode specifies PTRACE_MODE_FSCREDS (ED: snipped, - // doesn't exist until Linux 4.5). + // TODO(gvisor.dev/issue/260): 1. If the access mode specifies + // PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5). // // Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the // caller's real UID and GID for the checks in the next step. (Most APIs @@ -125,7 +178,7 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { // historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs // instead.) // - // 3. Deny access if neither of the following is true: + // 2. Deny access if neither of the following is true: // // - The real, effective, and saved-set user IDs of the target match the // caller's user ID, *and* the real, effective, and saved-set group IDs of @@ -134,15 +187,12 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { // - The caller has the CAP_SYS_PTRACE capability in the user namespace of // the target. // - // 4. Deny access if the target process "dumpable" attribute has a value + // 3. Deny access if the target process "dumpable" attribute has a value // other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in // prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in // the user namespace of the target process. // - // 5. The kernel LSM security_ptrace_access_check() interface is invoked to - // see if ptrace access is permitted. The results depend on the LSM(s). The - // implementation of this interface in the commoncap LSM performs the - // following steps: + // 4. The commoncap LSM performs the following steps: // // a) If the access mode includes PTRACE_MODE_FSCREDS, then use the // caller's effective capability set; otherwise (the access mode specifies @@ -188,6 +238,94 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { return true } +// canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM +// implementation of the security_ptrace_access_check() interface, with YAMA +// configured to mode 1. This is a common default among various Linux +// distributions. +// +// It only permits the tracer to proceed if one of the following conditions is +// met: +// +// a) The tracer is already attached to the tracee. +// +// b) The target is a descendant of the tracer. +// +// c) The target has explicitly given permission to the tracer through the +// PR_SET_PTRACER prctl. +// +// d) The tracer has CAP_SYS_PTRACE. +// +// See security/yama/yama_lsm.c:yama_ptrace_access_check. +// +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) canTraceYAMALocked(target *Task) bool { + if tracer := target.Tracer(); tracer != nil { + if tracer.tg == t.tg { + return true + } + } + if target.isYAMADescendantOfLocked(t) { + return true + } + if target.hasYAMAExceptionForLocked(t) { + return true + } + if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) { + return true + } + return false +} + +// Determines whether t is considered a descendant of ancestor for the purposes +// of YAMA permissions (specifically, whether t's thread group is descended from +// ancestor's). +// +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool { + walker := t + for walker != nil { + if walker.tg.leader == ancestor.tg.leader { + return true + } + walker = walker.parent + } + return false +} + +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool { + allowed, ok := t.k.ptraceExceptions[t] + if !ok { + return false + } + return allowed == nil || tracer.isYAMADescendantOfLocked(allowed) +} + +// ClearYAMAException removes any YAMA exception with t as the tracee. +func (t *Task) ClearYAMAException() { + t.tg.pidns.owner.mu.Lock() + defer t.tg.pidns.owner.mu.Unlock() + tracee := t.tg.leader + delete(t.k.ptraceExceptions, tracee) +} + +// SetYAMAException creates a YAMA exception allowing all descendants of tracer +// to trace t. If tracer is nil, then any task is allowed to trace t. +// +// If there was an existing exception, it is overwritten with the new one. +func (t *Task) SetYAMAException(tracer *Task) { + t.tg.pidns.owner.mu.Lock() + defer t.tg.pidns.owner.mu.Unlock() + + tracee := t.tg.leader + tracee.ptraceYAMAExceptionAdded = true + if tracer != nil { + tracer.ptraceYAMAExceptionAdded = true + } + + t.k.ptraceExceptions[tracee] = tracer +} + // Tracer returns t's ptrace Tracer. func (t *Task) Tracer() *Task { return t.ptraceTracer.Load().(*Task) @@ -358,7 +496,7 @@ func (t *Task) ptraceTraceme() error { // returning nil here is correct. return nil } - if !t.parent.CanTrace(t, true) { + if !t.parent.canTraceLocked(t, true) { return syserror.EPERM } if t.parent.exitState != TaskExitNone { @@ -377,11 +515,11 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error { if t.tg == target.tg { return syserror.EPERM } - if !t.CanTrace(target, true) { - return syserror.EPERM - } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() + if !t.canTraceLocked(target, true) { + return syserror.EPERM + } if target.hasTracer() { return syserror.EPERM } @@ -459,6 +597,15 @@ func (t *Task) exitPtrace() { } // "nil maps cannot be saved" t.ptraceTracees = make(map[*Task]struct{}) + + if t.ptraceYAMAExceptionAdded { + delete(t.k.ptraceExceptions, t) + for tracee, tracer := range t.k.ptraceExceptions { + if tracer == t { + delete(t.k.ptraceExceptions, tracee) + } + } + } } // forgetTracerLocked detaches t's tracer and ensures that t is no longer diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index c0ab53c94..36141dd09 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -403,6 +403,13 @@ type Task struct { // ptraceEventMsg is protected by the TaskSet mutex. ptraceEventMsg uint64 + // ptraceYAMAExceptionAdded is true if a YAMA exception involving the task has + // been added before. This is used during task exit to decide whether we need + // to clean up YAMA exceptions. + // + // ptraceYAMAExceptionAdded is protected by the TaskSet mutex. + ptraceYAMAExceptionAdded bool + // The struct that holds the IO-related usage. The ioUsage pointer is // immutable. ioUsage *usage.IO diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index f7765fa3a..ad59e4f60 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -694,7 +694,8 @@ func (t *Task) exitNotifyLocked(fromPtraceDetach bool) { } if t.parent != nil { delete(t.parent.children, t) - t.parent = nil + // Do not clear t.parent. It may be still be needed after the task has exited + // (for example, to perform ptrace access checks on /proc/[pid] files). } } } diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index e9da99067..09d070ec8 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -55,7 +55,7 @@ const InitTID ThreadID = 1 // // +stateify savable type TaskSet struct { - // mu protects all relationships betweens tasks and thread groups in the + // mu protects all relationships between tasks and thread groups in the // TaskSet. (mu is approximately equivalent to Linux's tasklist_lock.) mu sync.RWMutex `state:"nosave"` diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index a892d2c62..9890dd946 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -172,6 +172,24 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } return 1, nil, nil + case linux.PR_SET_PTRACER: + pid := args[1].Int() + switch pid { + case 0: + t.ClearYAMAException() + return 0, nil, nil + case linux.PR_SET_PTRACER_ANY: + t.SetYAMAException(nil) + return 0, nil, nil + default: + tracer := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) + if tracer == nil { + return 0, nil, syserror.EINVAL + } + t.SetYAMAException(tracer) + return 0, nil, nil + } + case linux.PR_SET_SECCOMP: if args[1].Int() != linux.SECCOMP_MODE_FILTER { // Unsupported mode. |