diff options
author | gVisor bot <gvisor-bot@google.com> | 2021-02-24 10:07:45 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-02-24 10:07:45 +0000 |
commit | a004d3309c5d8320408b4661940e6b0839eef69b (patch) | |
tree | a1295517d02e17401932b9f0db45523f5a475868 /pkg | |
parent | 2b0d4ebbbaa18506fdba8988210fc0f2009ca379 (diff) | |
parent | acd516cfe2920006a5d2760c78bd2245d498023a (diff) |
Merge release-20210208.0-85-gacd516cfe (automated)
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/abi/linux/prctl.go | 5 | ||||
-rw-r--r-- | pkg/abi/linux/ptrace.go | 6 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/proc_state_autogen.go | 27 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks_sys.go | 3 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/yama.go | 80 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 15 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel_state_autogen.go | 97 | ||||
-rw-r--r-- | pkg/sentry/kernel/ptrace.go | 175 | ||||
-rw-r--r-- | pkg/sentry/kernel/task.go | 7 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_exit.go | 3 | ||||
-rw-r--r-- | pkg/sentry/kernel/threads.go | 2 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_prctl.go | 18 |
12 files changed, 377 insertions, 61 deletions
diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go index 391cfaa1c..41118c3b4 100644 --- a/pkg/abi/linux/prctl.go +++ b/pkg/abi/linux/prctl.go @@ -144,6 +144,11 @@ const ( // PR_MPX_DISABLE_MANAGEMENT disables kernel management of Memory // Protection eXtensions (MPX) bounds tables. PR_MPX_DISABLE_MANAGEMENT = 44 + + // PR_SET_PTRACER allows a specific process (or any, if PR_SET_PTRACER_ANY is + // specified) to ptrace the current task. + PR_SET_PTRACER = 0x59616d61 + PR_SET_PTRACER_ANY = -1 ) // From <asm/prctl.h> diff --git a/pkg/abi/linux/ptrace.go b/pkg/abi/linux/ptrace.go index 23e605ab2..db1c6a0d8 100644 --- a/pkg/abi/linux/ptrace.go +++ b/pkg/abi/linux/ptrace.go @@ -87,3 +87,9 @@ const ( PTRACE_O_EXITKILL = 1 << 20 PTRACE_O_SUSPEND_SECCOMP = 1 << 21 ) + +// YAMA ptrace_scope levels from security/yama/yama_lsm.c. +const ( + YAMA_SCOPE_DISABLED = 0 + YAMA_SCOPE_RELATIONAL = 1 +) diff --git a/pkg/sentry/fsimpl/proc/proc_state_autogen.go b/pkg/sentry/fsimpl/proc/proc_state_autogen.go index f2ba64a7f..6b511ab7d 100644 --- a/pkg/sentry/fsimpl/proc/proc_state_autogen.go +++ b/pkg/sentry/fsimpl/proc/proc_state_autogen.go @@ -1994,6 +1994,32 @@ func (ipf *ipForwarding) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(2, &ipf.enabled) } +func (s *yamaPtraceScope) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.yamaPtraceScope" +} + +func (s *yamaPtraceScope) StateFields() []string { + return []string{ + "DynamicBytesFile", + "level", + } +} + +func (s *yamaPtraceScope) beforeSave() {} + +func (s *yamaPtraceScope) StateSave(stateSinkObject state.Sink) { + s.beforeSave() + stateSinkObject.Save(0, &s.DynamicBytesFile) + stateSinkObject.Save(1, &s.level) +} + +func (s *yamaPtraceScope) afterLoad() {} + +func (s *yamaPtraceScope) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &s.DynamicBytesFile) + stateSourceObject.Load(1, &s.level) +} + func init() { state.Register((*fdDirInodeRefs)(nil)) state.Register((*fdInfoDirInodeRefs)(nil)) @@ -2064,4 +2090,5 @@ func init() { state.Register((*tcpRecoveryData)(nil)) state.Register((*tcpMemData)(nil)) state.Register((*ipForwarding)(nil)) + state.Register((*yamaPtraceScope)(nil)) } diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index 25c407d98..fd7823daa 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -48,6 +48,9 @@ func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k * "shmall": fs.newInode(ctx, root, 0444, shmData(linux.SHMALL)), "shmmax": fs.newInode(ctx, root, 0444, shmData(linux.SHMMAX)), "shmmni": fs.newInode(ctx, root, 0444, shmData(linux.SHMMNI)), + "yama": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ + "ptrace_scope": fs.newYAMAPtraceScopeFile(ctx, k, root), + }), }), "vm": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ "mmap_min_addr": fs.newInode(ctx, root, 0444, &mmapMinAddrData{k: k}), diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go new file mode 100644 index 000000000..aebfe8944 --- /dev/null +++ b/pkg/sentry/fsimpl/proc/yama.go @@ -0,0 +1,80 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +func (fs *filesystem) newYAMAPtraceScopeFile(ctx context.Context, k *kernel.Kernel, creds *auth.Credentials) kernfs.Inode { + s := &yamaPtraceScope{level: &k.YAMAPtraceScope} + s.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), s, 0644) + return s +} + +// yamaPtraceScope implements vfs.WritableDynamicBytesSource for +// /sys/kernel/yama/ptrace_scope. +// +// +stateify savable +type yamaPtraceScope struct { + kernfs.DynamicBytesFile + + // level is the ptrace_scope level. + level *int32 +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (s *yamaPtraceScope) Generate(ctx context.Context, buf *bytes.Buffer) error { + _, err := fmt.Fprintf(buf, "%d\n", atomic.LoadInt32(s.level)) + return err +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + // Ignore partial writes. + return 0, syserror.EINVAL + } + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit the amount of memory allocated. + src = src.TakeFirst(usermem.PageSize - 1) + + var v int32 + n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) + if err != nil { + return 0, err + } + + // We do not support YAMA levels > YAMA_SCOPE_RELATIONAL. + if v < linux.YAMA_SCOPE_DISABLED || v > linux.YAMA_SCOPE_RELATIONAL { + return 0, syserror.EINVAL + } + + atomic.StoreInt32(s.level, v) + return n, nil +} diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index ef4e934a1..43065b45a 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -282,6 +282,18 @@ type Kernel struct { // If set to true, report address space activation waits as if the task is in // external wait so that the watchdog doesn't report the task stuck. SleepForAddressSpaceActivation bool + + // Exceptions to YAMA ptrace restrictions. Each key-value pair represents a + // tracee-tracer relationship. The key is a process (technically, the thread + // group leader) that can be traced by any thread that is a descendant of the + // value. If the value is nil, then anyone can trace the process represented by + // the key. + // + // ptraceExceptions is protected by the TaskSet mutex. + ptraceExceptions map[*Task]*Task + + // YAMAPtraceScope is the current level of YAMA ptrace restrictions. + YAMAPtraceScope int32 } // InitKernelArgs holds arguments to Init. @@ -382,6 +394,8 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic} k.futexes = futex.NewManager() k.netlinkPorts = port.New() + k.ptraceExceptions = make(map[*Task]*Task) + k.YAMAPtraceScope = linux.YAMA_SCOPE_RELATIONAL if VFS2Enabled { ctx := k.SupervisorContext() @@ -425,7 +439,6 @@ func (k *Kernel) Init(args InitKernelArgs) error { k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord) } - return nil } diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go index fc4843f36..12b076fc6 100644 --- a/pkg/sentry/kernel/kernel_state_autogen.go +++ b/pkg/sentry/kernel/kernel_state_autogen.go @@ -331,6 +331,8 @@ func (k *Kernel) StateFields() []string { "shmMount", "socketMount", "SleepForAddressSpaceActivation", + "ptraceExceptions", + "YAMAPtraceScope", } } @@ -377,6 +379,8 @@ func (k *Kernel) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(34, &k.shmMount) stateSinkObject.Save(35, &k.socketMount) stateSinkObject.Save(36, &k.SleepForAddressSpaceActivation) + stateSinkObject.Save(37, &k.ptraceExceptions) + stateSinkObject.Save(38, &k.YAMAPtraceScope) } func (k *Kernel) afterLoad() {} @@ -417,6 +421,8 @@ func (k *Kernel) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(34, &k.shmMount) stateSourceObject.Load(35, &k.socketMount) stateSourceObject.Load(36, &k.SleepForAddressSpaceActivation) + stateSourceObject.Load(37, &k.ptraceExceptions) + stateSourceObject.Load(38, &k.YAMAPtraceScope) stateSourceObject.LoadValue(24, new([]tcpip.Endpoint), func(y interface{}) { k.loadDanglingEndpoints(y.([]tcpip.Endpoint)) }) stateSourceObject.LoadValue(28, new(*device.Registry), func(y interface{}) { k.loadDeviceRegistry(y.(*device.Registry)) }) } @@ -1179,6 +1185,7 @@ func (t *Task) StateFields() []string { "ptraceCode", "ptraceSiginfo", "ptraceEventMsg", + "ptraceYAMAExceptionAdded", "ioUsage", "creds", "utsns", @@ -1214,7 +1221,7 @@ func (t *Task) StateSave(stateSinkObject state.Sink) { var ptraceTracerValue *Task = t.savePtraceTracer() stateSinkObject.SaveValue(31, ptraceTracerValue) var syscallFiltersValue []bpf.Program = t.saveSyscallFilters() - stateSinkObject.SaveValue(47, syscallFiltersValue) + stateSinkObject.SaveValue(48, syscallFiltersValue) stateSinkObject.Save(0, &t.taskNode) stateSinkObject.Save(1, &t.runState) stateSinkObject.Save(2, &t.taskWorkCount) @@ -1254,27 +1261,28 @@ func (t *Task) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(37, &t.ptraceCode) stateSinkObject.Save(38, &t.ptraceSiginfo) stateSinkObject.Save(39, &t.ptraceEventMsg) - stateSinkObject.Save(40, &t.ioUsage) - stateSinkObject.Save(41, &t.creds) - stateSinkObject.Save(42, &t.utsns) - stateSinkObject.Save(43, &t.ipcns) - stateSinkObject.Save(44, &t.abstractSockets) - stateSinkObject.Save(45, &t.mountNamespaceVFS2) - stateSinkObject.Save(46, &t.parentDeathSignal) - stateSinkObject.Save(48, &t.cleartid) - stateSinkObject.Save(49, &t.allowedCPUMask) - stateSinkObject.Save(50, &t.cpu) - stateSinkObject.Save(51, &t.niceness) - stateSinkObject.Save(52, &t.numaPolicy) - stateSinkObject.Save(53, &t.numaNodeMask) - stateSinkObject.Save(54, &t.netns) - stateSinkObject.Save(55, &t.rseqCPU) - stateSinkObject.Save(56, &t.oldRSeqCPUAddr) - stateSinkObject.Save(57, &t.rseqAddr) - stateSinkObject.Save(58, &t.rseqSignature) - stateSinkObject.Save(59, &t.robustList) - stateSinkObject.Save(60, &t.startTime) - stateSinkObject.Save(61, &t.kcov) + stateSinkObject.Save(40, &t.ptraceYAMAExceptionAdded) + stateSinkObject.Save(41, &t.ioUsage) + stateSinkObject.Save(42, &t.creds) + stateSinkObject.Save(43, &t.utsns) + stateSinkObject.Save(44, &t.ipcns) + stateSinkObject.Save(45, &t.abstractSockets) + stateSinkObject.Save(46, &t.mountNamespaceVFS2) + stateSinkObject.Save(47, &t.parentDeathSignal) + stateSinkObject.Save(49, &t.cleartid) + stateSinkObject.Save(50, &t.allowedCPUMask) + stateSinkObject.Save(51, &t.cpu) + stateSinkObject.Save(52, &t.niceness) + stateSinkObject.Save(53, &t.numaPolicy) + stateSinkObject.Save(54, &t.numaNodeMask) + stateSinkObject.Save(55, &t.netns) + stateSinkObject.Save(56, &t.rseqCPU) + stateSinkObject.Save(57, &t.oldRSeqCPUAddr) + stateSinkObject.Save(58, &t.rseqAddr) + stateSinkObject.Save(59, &t.rseqSignature) + stateSinkObject.Save(60, &t.robustList) + stateSinkObject.Save(61, &t.startTime) + stateSinkObject.Save(62, &t.kcov) } func (t *Task) StateLoad(stateSourceObject state.Source) { @@ -1317,29 +1325,30 @@ func (t *Task) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(37, &t.ptraceCode) stateSourceObject.Load(38, &t.ptraceSiginfo) stateSourceObject.Load(39, &t.ptraceEventMsg) - stateSourceObject.Load(40, &t.ioUsage) - stateSourceObject.Load(41, &t.creds) - stateSourceObject.Load(42, &t.utsns) - stateSourceObject.Load(43, &t.ipcns) - stateSourceObject.Load(44, &t.abstractSockets) - stateSourceObject.Load(45, &t.mountNamespaceVFS2) - stateSourceObject.Load(46, &t.parentDeathSignal) - stateSourceObject.Load(48, &t.cleartid) - stateSourceObject.Load(49, &t.allowedCPUMask) - stateSourceObject.Load(50, &t.cpu) - stateSourceObject.Load(51, &t.niceness) - stateSourceObject.Load(52, &t.numaPolicy) - stateSourceObject.Load(53, &t.numaNodeMask) - stateSourceObject.Load(54, &t.netns) - stateSourceObject.Load(55, &t.rseqCPU) - stateSourceObject.Load(56, &t.oldRSeqCPUAddr) - stateSourceObject.Load(57, &t.rseqAddr) - stateSourceObject.Load(58, &t.rseqSignature) - stateSourceObject.Load(59, &t.robustList) - stateSourceObject.Load(60, &t.startTime) - stateSourceObject.Load(61, &t.kcov) + stateSourceObject.Load(40, &t.ptraceYAMAExceptionAdded) + stateSourceObject.Load(41, &t.ioUsage) + stateSourceObject.Load(42, &t.creds) + stateSourceObject.Load(43, &t.utsns) + stateSourceObject.Load(44, &t.ipcns) + stateSourceObject.Load(45, &t.abstractSockets) + stateSourceObject.Load(46, &t.mountNamespaceVFS2) + stateSourceObject.Load(47, &t.parentDeathSignal) + stateSourceObject.Load(49, &t.cleartid) + stateSourceObject.Load(50, &t.allowedCPUMask) + stateSourceObject.Load(51, &t.cpu) + stateSourceObject.Load(52, &t.niceness) + stateSourceObject.Load(53, &t.numaPolicy) + stateSourceObject.Load(54, &t.numaNodeMask) + stateSourceObject.Load(55, &t.netns) + stateSourceObject.Load(56, &t.rseqCPU) + stateSourceObject.Load(57, &t.oldRSeqCPUAddr) + stateSourceObject.Load(58, &t.rseqAddr) + stateSourceObject.Load(59, &t.rseqSignature) + stateSourceObject.Load(60, &t.robustList) + stateSourceObject.Load(61, &t.startTime) + stateSourceObject.Load(62, &t.kcov) stateSourceObject.LoadValue(31, new(*Task), func(y interface{}) { t.loadPtraceTracer(y.(*Task)) }) - stateSourceObject.LoadValue(47, new([]bpf.Program), func(y interface{}) { t.loadSyscallFilters(y.([]bpf.Program)) }) + stateSourceObject.LoadValue(48, new([]bpf.Program), func(y interface{}) { t.loadSyscallFilters(y.([]bpf.Program)) }) stateSourceObject.AfterLoad(t.afterLoad) } diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go index cef58a590..c3980350a 100644 --- a/pkg/sentry/kernel/ptrace.go +++ b/pkg/sentry/kernel/ptrace.go @@ -16,6 +16,7 @@ package kernel import ( "fmt" + "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/marshal/primitive" @@ -95,7 +96,11 @@ const ( // checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access // mode PTRACE_MODE_READ. // -// NOTE(b/30815691): The result of CanTrace is immediately stale (e.g., a +// In Linux, ptrace access restrictions may be configured by LSMs. While we do +// not support LSMs, we do add additional restrictions based on the commoncap +// and YAMA LSMs. +// +// TODO(gvisor.dev/issue/212): The result of CanTrace is immediately stale (e.g., a // racing setuid(2) may change traceability). This may pose a risk when a task // changes from traceable to not traceable. This is only problematic across // execve, where privileges may increase. @@ -103,7 +108,7 @@ const ( // We currently do not implement privileged executables (set-user/group-ID bits // and file capabilities), so that case is not reachable. func (t *Task) CanTrace(target *Task, attach bool) bool { - // "1. If the calling thread and the target thread are in the same thread + // "If the calling thread and the target thread are in the same thread // group, access is always allowed." - ptrace(2) // // Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access() @@ -115,9 +120,57 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { return true } + if !t.canTraceStandard(target, attach) { + return false + } + + // YAMA only supported for vfs2. + if !VFS2Enabled { + return true + } + + if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL { + t.tg.pidns.owner.mu.RLock() + defer t.tg.pidns.owner.mu.RUnlock() + if !t.canTraceYAMALocked(target) { + return false + } + } + return true +} + +// canTraceLocked is the same as CanTrace, except the caller must already hold +// the TaskSet mutex (for reading or writing). +func (t *Task) canTraceLocked(target *Task, attach bool) bool { + if t.tg == target.tg { + return true + } + + if !t.canTraceStandard(target, attach) { + return false + } + + // YAMA only supported for vfs2. + if !VFS2Enabled { + return true + } + + if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL { + if !t.canTraceYAMALocked(target) { + return false + } + } + return true +} + +// canTraceStandard performs standard ptrace access checks as defined by +// kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM +// implementation of the security_ptrace_access_check() interface, which is +// always invoked. +func (t *Task) canTraceStandard(target *Task, attach bool) bool { // """ - // 2. If the access mode specifies PTRACE_MODE_FSCREDS (ED: snipped, - // doesn't exist until Linux 4.5). + // TODO(gvisor.dev/issue/260): 1. If the access mode specifies + // PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5). // // Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the // caller's real UID and GID for the checks in the next step. (Most APIs @@ -125,7 +178,7 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { // historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs // instead.) // - // 3. Deny access if neither of the following is true: + // 2. Deny access if neither of the following is true: // // - The real, effective, and saved-set user IDs of the target match the // caller's user ID, *and* the real, effective, and saved-set group IDs of @@ -134,15 +187,12 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { // - The caller has the CAP_SYS_PTRACE capability in the user namespace of // the target. // - // 4. Deny access if the target process "dumpable" attribute has a value + // 3. Deny access if the target process "dumpable" attribute has a value // other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in // prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in // the user namespace of the target process. // - // 5. The kernel LSM security_ptrace_access_check() interface is invoked to - // see if ptrace access is permitted. The results depend on the LSM(s). The - // implementation of this interface in the commoncap LSM performs the - // following steps: + // 4. The commoncap LSM performs the following steps: // // a) If the access mode includes PTRACE_MODE_FSCREDS, then use the // caller's effective capability set; otherwise (the access mode specifies @@ -188,6 +238,94 @@ func (t *Task) CanTrace(target *Task, attach bool) bool { return true } +// canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM +// implementation of the security_ptrace_access_check() interface, with YAMA +// configured to mode 1. This is a common default among various Linux +// distributions. +// +// It only permits the tracer to proceed if one of the following conditions is +// met: +// +// a) The tracer is already attached to the tracee. +// +// b) The target is a descendant of the tracer. +// +// c) The target has explicitly given permission to the tracer through the +// PR_SET_PTRACER prctl. +// +// d) The tracer has CAP_SYS_PTRACE. +// +// See security/yama/yama_lsm.c:yama_ptrace_access_check. +// +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) canTraceYAMALocked(target *Task) bool { + if tracer := target.Tracer(); tracer != nil { + if tracer.tg == t.tg { + return true + } + } + if target.isYAMADescendantOfLocked(t) { + return true + } + if target.hasYAMAExceptionForLocked(t) { + return true + } + if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) { + return true + } + return false +} + +// Determines whether t is considered a descendant of ancestor for the purposes +// of YAMA permissions (specifically, whether t's thread group is descended from +// ancestor's). +// +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool { + walker := t + for walker != nil { + if walker.tg.leader == ancestor.tg.leader { + return true + } + walker = walker.parent + } + return false +} + +// Precondition: the TaskSet mutex must be locked (for reading or writing). +func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool { + allowed, ok := t.k.ptraceExceptions[t] + if !ok { + return false + } + return allowed == nil || tracer.isYAMADescendantOfLocked(allowed) +} + +// ClearYAMAException removes any YAMA exception with t as the tracee. +func (t *Task) ClearYAMAException() { + t.tg.pidns.owner.mu.Lock() + defer t.tg.pidns.owner.mu.Unlock() + tracee := t.tg.leader + delete(t.k.ptraceExceptions, tracee) +} + +// SetYAMAException creates a YAMA exception allowing all descendants of tracer +// to trace t. If tracer is nil, then any task is allowed to trace t. +// +// If there was an existing exception, it is overwritten with the new one. +func (t *Task) SetYAMAException(tracer *Task) { + t.tg.pidns.owner.mu.Lock() + defer t.tg.pidns.owner.mu.Unlock() + + tracee := t.tg.leader + tracee.ptraceYAMAExceptionAdded = true + if tracer != nil { + tracer.ptraceYAMAExceptionAdded = true + } + + t.k.ptraceExceptions[tracee] = tracer +} + // Tracer returns t's ptrace Tracer. func (t *Task) Tracer() *Task { return t.ptraceTracer.Load().(*Task) @@ -358,7 +496,7 @@ func (t *Task) ptraceTraceme() error { // returning nil here is correct. return nil } - if !t.parent.CanTrace(t, true) { + if !t.parent.canTraceLocked(t, true) { return syserror.EPERM } if t.parent.exitState != TaskExitNone { @@ -377,11 +515,11 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error { if t.tg == target.tg { return syserror.EPERM } - if !t.CanTrace(target, true) { - return syserror.EPERM - } t.tg.pidns.owner.mu.Lock() defer t.tg.pidns.owner.mu.Unlock() + if !t.canTraceLocked(target, true) { + return syserror.EPERM + } if target.hasTracer() { return syserror.EPERM } @@ -459,6 +597,15 @@ func (t *Task) exitPtrace() { } // "nil maps cannot be saved" t.ptraceTracees = make(map[*Task]struct{}) + + if t.ptraceYAMAExceptionAdded { + delete(t.k.ptraceExceptions, t) + for tracee, tracer := range t.k.ptraceExceptions { + if tracer == t { + delete(t.k.ptraceExceptions, tracee) + } + } + } } // forgetTracerLocked detaches t's tracer and ensures that t is no longer diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index c0ab53c94..36141dd09 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -403,6 +403,13 @@ type Task struct { // ptraceEventMsg is protected by the TaskSet mutex. ptraceEventMsg uint64 + // ptraceYAMAExceptionAdded is true if a YAMA exception involving the task has + // been added before. This is used during task exit to decide whether we need + // to clean up YAMA exceptions. + // + // ptraceYAMAExceptionAdded is protected by the TaskSet mutex. + ptraceYAMAExceptionAdded bool + // The struct that holds the IO-related usage. The ioUsage pointer is // immutable. ioUsage *usage.IO diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go index f7765fa3a..ad59e4f60 100644 --- a/pkg/sentry/kernel/task_exit.go +++ b/pkg/sentry/kernel/task_exit.go @@ -694,7 +694,8 @@ func (t *Task) exitNotifyLocked(fromPtraceDetach bool) { } if t.parent != nil { delete(t.parent.children, t) - t.parent = nil + // Do not clear t.parent. It may be still be needed after the task has exited + // (for example, to perform ptrace access checks on /proc/[pid] files). } } } diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index e9da99067..09d070ec8 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -55,7 +55,7 @@ const InitTID ThreadID = 1 // // +stateify savable type TaskSet struct { - // mu protects all relationships betweens tasks and thread groups in the + // mu protects all relationships between tasks and thread groups in the // TaskSet. (mu is approximately equivalent to Linux's tasklist_lock.) mu sync.RWMutex `state:"nosave"` diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go index a892d2c62..9890dd946 100644 --- a/pkg/sentry/syscalls/linux/sys_prctl.go +++ b/pkg/sentry/syscalls/linux/sys_prctl.go @@ -172,6 +172,24 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } return 1, nil, nil + case linux.PR_SET_PTRACER: + pid := args[1].Int() + switch pid { + case 0: + t.ClearYAMAException() + return 0, nil, nil + case linux.PR_SET_PTRACER_ANY: + t.SetYAMAException(nil) + return 0, nil, nil + default: + tracer := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid)) + if tracer == nil { + return 0, nil, syserror.EINVAL + } + t.SetYAMAException(tracer) + return 0, nil, nil + } + case linux.PR_SET_SECCOMP: if args[1].Int() != linux.SECCOMP_MODE_FILTER { // Unsupported mode. |