summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/abi/linux/prctl.go5
-rw-r--r--pkg/abi/linux/ptrace.go6
-rw-r--r--pkg/sentry/fsimpl/proc/proc_state_autogen.go27
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go3
-rw-r--r--pkg/sentry/fsimpl/proc/yama.go80
-rw-r--r--pkg/sentry/kernel/kernel.go15
-rw-r--r--pkg/sentry/kernel/kernel_state_autogen.go97
-rw-r--r--pkg/sentry/kernel/ptrace.go175
-rw-r--r--pkg/sentry/kernel/task.go7
-rw-r--r--pkg/sentry/kernel/task_exit.go3
-rw-r--r--pkg/sentry/kernel/threads.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_prctl.go18
12 files changed, 377 insertions, 61 deletions
diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go
index 391cfaa1c..41118c3b4 100644
--- a/pkg/abi/linux/prctl.go
+++ b/pkg/abi/linux/prctl.go
@@ -144,6 +144,11 @@ const (
// PR_MPX_DISABLE_MANAGEMENT disables kernel management of Memory
// Protection eXtensions (MPX) bounds tables.
PR_MPX_DISABLE_MANAGEMENT = 44
+
+ // PR_SET_PTRACER allows a specific process (or any, if PR_SET_PTRACER_ANY is
+ // specified) to ptrace the current task.
+ PR_SET_PTRACER = 0x59616d61
+ PR_SET_PTRACER_ANY = -1
)
// From <asm/prctl.h>
diff --git a/pkg/abi/linux/ptrace.go b/pkg/abi/linux/ptrace.go
index 23e605ab2..db1c6a0d8 100644
--- a/pkg/abi/linux/ptrace.go
+++ b/pkg/abi/linux/ptrace.go
@@ -87,3 +87,9 @@ const (
PTRACE_O_EXITKILL = 1 << 20
PTRACE_O_SUSPEND_SECCOMP = 1 << 21
)
+
+// YAMA ptrace_scope levels from security/yama/yama_lsm.c.
+const (
+ YAMA_SCOPE_DISABLED = 0
+ YAMA_SCOPE_RELATIONAL = 1
+)
diff --git a/pkg/sentry/fsimpl/proc/proc_state_autogen.go b/pkg/sentry/fsimpl/proc/proc_state_autogen.go
index f2ba64a7f..6b511ab7d 100644
--- a/pkg/sentry/fsimpl/proc/proc_state_autogen.go
+++ b/pkg/sentry/fsimpl/proc/proc_state_autogen.go
@@ -1994,6 +1994,32 @@ func (ipf *ipForwarding) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(2, &ipf.enabled)
}
+func (s *yamaPtraceScope) StateTypeName() string {
+ return "pkg/sentry/fsimpl/proc.yamaPtraceScope"
+}
+
+func (s *yamaPtraceScope) StateFields() []string {
+ return []string{
+ "DynamicBytesFile",
+ "level",
+ }
+}
+
+func (s *yamaPtraceScope) beforeSave() {}
+
+func (s *yamaPtraceScope) StateSave(stateSinkObject state.Sink) {
+ s.beforeSave()
+ stateSinkObject.Save(0, &s.DynamicBytesFile)
+ stateSinkObject.Save(1, &s.level)
+}
+
+func (s *yamaPtraceScope) afterLoad() {}
+
+func (s *yamaPtraceScope) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &s.DynamicBytesFile)
+ stateSourceObject.Load(1, &s.level)
+}
+
func init() {
state.Register((*fdDirInodeRefs)(nil))
state.Register((*fdInfoDirInodeRefs)(nil))
@@ -2064,4 +2090,5 @@ func init() {
state.Register((*tcpRecoveryData)(nil))
state.Register((*tcpMemData)(nil))
state.Register((*ipForwarding)(nil))
+ state.Register((*yamaPtraceScope)(nil))
}
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 25c407d98..fd7823daa 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -48,6 +48,9 @@ func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k *
"shmall": fs.newInode(ctx, root, 0444, shmData(linux.SHMALL)),
"shmmax": fs.newInode(ctx, root, 0444, shmData(linux.SHMMAX)),
"shmmni": fs.newInode(ctx, root, 0444, shmData(linux.SHMMNI)),
+ "yama": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{
+ "ptrace_scope": fs.newYAMAPtraceScopeFile(ctx, k, root),
+ }),
}),
"vm": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{
"mmap_min_addr": fs.newInode(ctx, root, 0444, &mmapMinAddrData{k: k}),
diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go
new file mode 100644
index 000000000..aebfe8944
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/yama.go
@@ -0,0 +1,80 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+func (fs *filesystem) newYAMAPtraceScopeFile(ctx context.Context, k *kernel.Kernel, creds *auth.Credentials) kernfs.Inode {
+ s := &yamaPtraceScope{level: &k.YAMAPtraceScope}
+ s.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), s, 0644)
+ return s
+}
+
+// yamaPtraceScope implements vfs.WritableDynamicBytesSource for
+// /sys/kernel/yama/ptrace_scope.
+//
+// +stateify savable
+type yamaPtraceScope struct {
+ kernfs.DynamicBytesFile
+
+ // level is the ptrace_scope level.
+ level *int32
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *yamaPtraceScope) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ _, err := fmt.Fprintf(buf, "%d\n", atomic.LoadInt32(s.level))
+ return err
+}
+
+// Write implements vfs.WritableDynamicBytesSource.Write.
+func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ if offset != 0 {
+ // Ignore partial writes.
+ return 0, syserror.EINVAL
+ }
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+
+ // Limit the amount of memory allocated.
+ src = src.TakeFirst(usermem.PageSize - 1)
+
+ var v int32
+ n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+ if err != nil {
+ return 0, err
+ }
+
+ // We do not support YAMA levels > YAMA_SCOPE_RELATIONAL.
+ if v < linux.YAMA_SCOPE_DISABLED || v > linux.YAMA_SCOPE_RELATIONAL {
+ return 0, syserror.EINVAL
+ }
+
+ atomic.StoreInt32(s.level, v)
+ return n, nil
+}
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index ef4e934a1..43065b45a 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -282,6 +282,18 @@ type Kernel struct {
// If set to true, report address space activation waits as if the task is in
// external wait so that the watchdog doesn't report the task stuck.
SleepForAddressSpaceActivation bool
+
+ // Exceptions to YAMA ptrace restrictions. Each key-value pair represents a
+ // tracee-tracer relationship. The key is a process (technically, the thread
+ // group leader) that can be traced by any thread that is a descendant of the
+ // value. If the value is nil, then anyone can trace the process represented by
+ // the key.
+ //
+ // ptraceExceptions is protected by the TaskSet mutex.
+ ptraceExceptions map[*Task]*Task
+
+ // YAMAPtraceScope is the current level of YAMA ptrace restrictions.
+ YAMAPtraceScope int32
}
// InitKernelArgs holds arguments to Init.
@@ -382,6 +394,8 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic}
k.futexes = futex.NewManager()
k.netlinkPorts = port.New()
+ k.ptraceExceptions = make(map[*Task]*Task)
+ k.YAMAPtraceScope = linux.YAMA_SCOPE_RELATIONAL
if VFS2Enabled {
ctx := k.SupervisorContext()
@@ -425,7 +439,6 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord)
}
-
return nil
}
diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go
index fc4843f36..12b076fc6 100644
--- a/pkg/sentry/kernel/kernel_state_autogen.go
+++ b/pkg/sentry/kernel/kernel_state_autogen.go
@@ -331,6 +331,8 @@ func (k *Kernel) StateFields() []string {
"shmMount",
"socketMount",
"SleepForAddressSpaceActivation",
+ "ptraceExceptions",
+ "YAMAPtraceScope",
}
}
@@ -377,6 +379,8 @@ func (k *Kernel) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(34, &k.shmMount)
stateSinkObject.Save(35, &k.socketMount)
stateSinkObject.Save(36, &k.SleepForAddressSpaceActivation)
+ stateSinkObject.Save(37, &k.ptraceExceptions)
+ stateSinkObject.Save(38, &k.YAMAPtraceScope)
}
func (k *Kernel) afterLoad() {}
@@ -417,6 +421,8 @@ func (k *Kernel) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(34, &k.shmMount)
stateSourceObject.Load(35, &k.socketMount)
stateSourceObject.Load(36, &k.SleepForAddressSpaceActivation)
+ stateSourceObject.Load(37, &k.ptraceExceptions)
+ stateSourceObject.Load(38, &k.YAMAPtraceScope)
stateSourceObject.LoadValue(24, new([]tcpip.Endpoint), func(y interface{}) { k.loadDanglingEndpoints(y.([]tcpip.Endpoint)) })
stateSourceObject.LoadValue(28, new(*device.Registry), func(y interface{}) { k.loadDeviceRegistry(y.(*device.Registry)) })
}
@@ -1179,6 +1185,7 @@ func (t *Task) StateFields() []string {
"ptraceCode",
"ptraceSiginfo",
"ptraceEventMsg",
+ "ptraceYAMAExceptionAdded",
"ioUsage",
"creds",
"utsns",
@@ -1214,7 +1221,7 @@ func (t *Task) StateSave(stateSinkObject state.Sink) {
var ptraceTracerValue *Task = t.savePtraceTracer()
stateSinkObject.SaveValue(31, ptraceTracerValue)
var syscallFiltersValue []bpf.Program = t.saveSyscallFilters()
- stateSinkObject.SaveValue(47, syscallFiltersValue)
+ stateSinkObject.SaveValue(48, syscallFiltersValue)
stateSinkObject.Save(0, &t.taskNode)
stateSinkObject.Save(1, &t.runState)
stateSinkObject.Save(2, &t.taskWorkCount)
@@ -1254,27 +1261,28 @@ func (t *Task) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(37, &t.ptraceCode)
stateSinkObject.Save(38, &t.ptraceSiginfo)
stateSinkObject.Save(39, &t.ptraceEventMsg)
- stateSinkObject.Save(40, &t.ioUsage)
- stateSinkObject.Save(41, &t.creds)
- stateSinkObject.Save(42, &t.utsns)
- stateSinkObject.Save(43, &t.ipcns)
- stateSinkObject.Save(44, &t.abstractSockets)
- stateSinkObject.Save(45, &t.mountNamespaceVFS2)
- stateSinkObject.Save(46, &t.parentDeathSignal)
- stateSinkObject.Save(48, &t.cleartid)
- stateSinkObject.Save(49, &t.allowedCPUMask)
- stateSinkObject.Save(50, &t.cpu)
- stateSinkObject.Save(51, &t.niceness)
- stateSinkObject.Save(52, &t.numaPolicy)
- stateSinkObject.Save(53, &t.numaNodeMask)
- stateSinkObject.Save(54, &t.netns)
- stateSinkObject.Save(55, &t.rseqCPU)
- stateSinkObject.Save(56, &t.oldRSeqCPUAddr)
- stateSinkObject.Save(57, &t.rseqAddr)
- stateSinkObject.Save(58, &t.rseqSignature)
- stateSinkObject.Save(59, &t.robustList)
- stateSinkObject.Save(60, &t.startTime)
- stateSinkObject.Save(61, &t.kcov)
+ stateSinkObject.Save(40, &t.ptraceYAMAExceptionAdded)
+ stateSinkObject.Save(41, &t.ioUsage)
+ stateSinkObject.Save(42, &t.creds)
+ stateSinkObject.Save(43, &t.utsns)
+ stateSinkObject.Save(44, &t.ipcns)
+ stateSinkObject.Save(45, &t.abstractSockets)
+ stateSinkObject.Save(46, &t.mountNamespaceVFS2)
+ stateSinkObject.Save(47, &t.parentDeathSignal)
+ stateSinkObject.Save(49, &t.cleartid)
+ stateSinkObject.Save(50, &t.allowedCPUMask)
+ stateSinkObject.Save(51, &t.cpu)
+ stateSinkObject.Save(52, &t.niceness)
+ stateSinkObject.Save(53, &t.numaPolicy)
+ stateSinkObject.Save(54, &t.numaNodeMask)
+ stateSinkObject.Save(55, &t.netns)
+ stateSinkObject.Save(56, &t.rseqCPU)
+ stateSinkObject.Save(57, &t.oldRSeqCPUAddr)
+ stateSinkObject.Save(58, &t.rseqAddr)
+ stateSinkObject.Save(59, &t.rseqSignature)
+ stateSinkObject.Save(60, &t.robustList)
+ stateSinkObject.Save(61, &t.startTime)
+ stateSinkObject.Save(62, &t.kcov)
}
func (t *Task) StateLoad(stateSourceObject state.Source) {
@@ -1317,29 +1325,30 @@ func (t *Task) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(37, &t.ptraceCode)
stateSourceObject.Load(38, &t.ptraceSiginfo)
stateSourceObject.Load(39, &t.ptraceEventMsg)
- stateSourceObject.Load(40, &t.ioUsage)
- stateSourceObject.Load(41, &t.creds)
- stateSourceObject.Load(42, &t.utsns)
- stateSourceObject.Load(43, &t.ipcns)
- stateSourceObject.Load(44, &t.abstractSockets)
- stateSourceObject.Load(45, &t.mountNamespaceVFS2)
- stateSourceObject.Load(46, &t.parentDeathSignal)
- stateSourceObject.Load(48, &t.cleartid)
- stateSourceObject.Load(49, &t.allowedCPUMask)
- stateSourceObject.Load(50, &t.cpu)
- stateSourceObject.Load(51, &t.niceness)
- stateSourceObject.Load(52, &t.numaPolicy)
- stateSourceObject.Load(53, &t.numaNodeMask)
- stateSourceObject.Load(54, &t.netns)
- stateSourceObject.Load(55, &t.rseqCPU)
- stateSourceObject.Load(56, &t.oldRSeqCPUAddr)
- stateSourceObject.Load(57, &t.rseqAddr)
- stateSourceObject.Load(58, &t.rseqSignature)
- stateSourceObject.Load(59, &t.robustList)
- stateSourceObject.Load(60, &t.startTime)
- stateSourceObject.Load(61, &t.kcov)
+ stateSourceObject.Load(40, &t.ptraceYAMAExceptionAdded)
+ stateSourceObject.Load(41, &t.ioUsage)
+ stateSourceObject.Load(42, &t.creds)
+ stateSourceObject.Load(43, &t.utsns)
+ stateSourceObject.Load(44, &t.ipcns)
+ stateSourceObject.Load(45, &t.abstractSockets)
+ stateSourceObject.Load(46, &t.mountNamespaceVFS2)
+ stateSourceObject.Load(47, &t.parentDeathSignal)
+ stateSourceObject.Load(49, &t.cleartid)
+ stateSourceObject.Load(50, &t.allowedCPUMask)
+ stateSourceObject.Load(51, &t.cpu)
+ stateSourceObject.Load(52, &t.niceness)
+ stateSourceObject.Load(53, &t.numaPolicy)
+ stateSourceObject.Load(54, &t.numaNodeMask)
+ stateSourceObject.Load(55, &t.netns)
+ stateSourceObject.Load(56, &t.rseqCPU)
+ stateSourceObject.Load(57, &t.oldRSeqCPUAddr)
+ stateSourceObject.Load(58, &t.rseqAddr)
+ stateSourceObject.Load(59, &t.rseqSignature)
+ stateSourceObject.Load(60, &t.robustList)
+ stateSourceObject.Load(61, &t.startTime)
+ stateSourceObject.Load(62, &t.kcov)
stateSourceObject.LoadValue(31, new(*Task), func(y interface{}) { t.loadPtraceTracer(y.(*Task)) })
- stateSourceObject.LoadValue(47, new([]bpf.Program), func(y interface{}) { t.loadSyscallFilters(y.([]bpf.Program)) })
+ stateSourceObject.LoadValue(48, new([]bpf.Program), func(y interface{}) { t.loadSyscallFilters(y.([]bpf.Program)) })
stateSourceObject.AfterLoad(t.afterLoad)
}
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index cef58a590..c3980350a 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -16,6 +16,7 @@ package kernel
import (
"fmt"
+ "sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/marshal/primitive"
@@ -95,7 +96,11 @@ const (
// checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access
// mode PTRACE_MODE_READ.
//
-// NOTE(b/30815691): The result of CanTrace is immediately stale (e.g., a
+// In Linux, ptrace access restrictions may be configured by LSMs. While we do
+// not support LSMs, we do add additional restrictions based on the commoncap
+// and YAMA LSMs.
+//
+// TODO(gvisor.dev/issue/212): The result of CanTrace is immediately stale (e.g., a
// racing setuid(2) may change traceability). This may pose a risk when a task
// changes from traceable to not traceable. This is only problematic across
// execve, where privileges may increase.
@@ -103,7 +108,7 @@ const (
// We currently do not implement privileged executables (set-user/group-ID bits
// and file capabilities), so that case is not reachable.
func (t *Task) CanTrace(target *Task, attach bool) bool {
- // "1. If the calling thread and the target thread are in the same thread
+ // "If the calling thread and the target thread are in the same thread
// group, access is always allowed." - ptrace(2)
//
// Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access()
@@ -115,9 +120,57 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
return true
}
+ if !t.canTraceStandard(target, attach) {
+ return false
+ }
+
+ // YAMA only supported for vfs2.
+ if !VFS2Enabled {
+ return true
+ }
+
+ if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL {
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
+ if !t.canTraceYAMALocked(target) {
+ return false
+ }
+ }
+ return true
+}
+
+// canTraceLocked is the same as CanTrace, except the caller must already hold
+// the TaskSet mutex (for reading or writing).
+func (t *Task) canTraceLocked(target *Task, attach bool) bool {
+ if t.tg == target.tg {
+ return true
+ }
+
+ if !t.canTraceStandard(target, attach) {
+ return false
+ }
+
+ // YAMA only supported for vfs2.
+ if !VFS2Enabled {
+ return true
+ }
+
+ if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL {
+ if !t.canTraceYAMALocked(target) {
+ return false
+ }
+ }
+ return true
+}
+
+// canTraceStandard performs standard ptrace access checks as defined by
+// kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM
+// implementation of the security_ptrace_access_check() interface, which is
+// always invoked.
+func (t *Task) canTraceStandard(target *Task, attach bool) bool {
// """
- // 2. If the access mode specifies PTRACE_MODE_FSCREDS (ED: snipped,
- // doesn't exist until Linux 4.5).
+ // TODO(gvisor.dev/issue/260): 1. If the access mode specifies
+ // PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5).
//
// Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the
// caller's real UID and GID for the checks in the next step. (Most APIs
@@ -125,7 +178,7 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
// historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs
// instead.)
//
- // 3. Deny access if neither of the following is true:
+ // 2. Deny access if neither of the following is true:
//
// - The real, effective, and saved-set user IDs of the target match the
// caller's user ID, *and* the real, effective, and saved-set group IDs of
@@ -134,15 +187,12 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
// - The caller has the CAP_SYS_PTRACE capability in the user namespace of
// the target.
//
- // 4. Deny access if the target process "dumpable" attribute has a value
+ // 3. Deny access if the target process "dumpable" attribute has a value
// other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in
// prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in
// the user namespace of the target process.
//
- // 5. The kernel LSM security_ptrace_access_check() interface is invoked to
- // see if ptrace access is permitted. The results depend on the LSM(s). The
- // implementation of this interface in the commoncap LSM performs the
- // following steps:
+ // 4. The commoncap LSM performs the following steps:
//
// a) If the access mode includes PTRACE_MODE_FSCREDS, then use the
// caller's effective capability set; otherwise (the access mode specifies
@@ -188,6 +238,94 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
return true
}
+// canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM
+// implementation of the security_ptrace_access_check() interface, with YAMA
+// configured to mode 1. This is a common default among various Linux
+// distributions.
+//
+// It only permits the tracer to proceed if one of the following conditions is
+// met:
+//
+// a) The tracer is already attached to the tracee.
+//
+// b) The target is a descendant of the tracer.
+//
+// c) The target has explicitly given permission to the tracer through the
+// PR_SET_PTRACER prctl.
+//
+// d) The tracer has CAP_SYS_PTRACE.
+//
+// See security/yama/yama_lsm.c:yama_ptrace_access_check.
+//
+// Precondition: the TaskSet mutex must be locked (for reading or writing).
+func (t *Task) canTraceYAMALocked(target *Task) bool {
+ if tracer := target.Tracer(); tracer != nil {
+ if tracer.tg == t.tg {
+ return true
+ }
+ }
+ if target.isYAMADescendantOfLocked(t) {
+ return true
+ }
+ if target.hasYAMAExceptionForLocked(t) {
+ return true
+ }
+ if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) {
+ return true
+ }
+ return false
+}
+
+// Determines whether t is considered a descendant of ancestor for the purposes
+// of YAMA permissions (specifically, whether t's thread group is descended from
+// ancestor's).
+//
+// Precondition: the TaskSet mutex must be locked (for reading or writing).
+func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool {
+ walker := t
+ for walker != nil {
+ if walker.tg.leader == ancestor.tg.leader {
+ return true
+ }
+ walker = walker.parent
+ }
+ return false
+}
+
+// Precondition: the TaskSet mutex must be locked (for reading or writing).
+func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool {
+ allowed, ok := t.k.ptraceExceptions[t]
+ if !ok {
+ return false
+ }
+ return allowed == nil || tracer.isYAMADescendantOfLocked(allowed)
+}
+
+// ClearYAMAException removes any YAMA exception with t as the tracee.
+func (t *Task) ClearYAMAException() {
+ t.tg.pidns.owner.mu.Lock()
+ defer t.tg.pidns.owner.mu.Unlock()
+ tracee := t.tg.leader
+ delete(t.k.ptraceExceptions, tracee)
+}
+
+// SetYAMAException creates a YAMA exception allowing all descendants of tracer
+// to trace t. If tracer is nil, then any task is allowed to trace t.
+//
+// If there was an existing exception, it is overwritten with the new one.
+func (t *Task) SetYAMAException(tracer *Task) {
+ t.tg.pidns.owner.mu.Lock()
+ defer t.tg.pidns.owner.mu.Unlock()
+
+ tracee := t.tg.leader
+ tracee.ptraceYAMAExceptionAdded = true
+ if tracer != nil {
+ tracer.ptraceYAMAExceptionAdded = true
+ }
+
+ t.k.ptraceExceptions[tracee] = tracer
+}
+
// Tracer returns t's ptrace Tracer.
func (t *Task) Tracer() *Task {
return t.ptraceTracer.Load().(*Task)
@@ -358,7 +496,7 @@ func (t *Task) ptraceTraceme() error {
// returning nil here is correct.
return nil
}
- if !t.parent.CanTrace(t, true) {
+ if !t.parent.canTraceLocked(t, true) {
return syserror.EPERM
}
if t.parent.exitState != TaskExitNone {
@@ -377,11 +515,11 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
if t.tg == target.tg {
return syserror.EPERM
}
- if !t.CanTrace(target, true) {
- return syserror.EPERM
- }
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
+ if !t.canTraceLocked(target, true) {
+ return syserror.EPERM
+ }
if target.hasTracer() {
return syserror.EPERM
}
@@ -459,6 +597,15 @@ func (t *Task) exitPtrace() {
}
// "nil maps cannot be saved"
t.ptraceTracees = make(map[*Task]struct{})
+
+ if t.ptraceYAMAExceptionAdded {
+ delete(t.k.ptraceExceptions, t)
+ for tracee, tracer := range t.k.ptraceExceptions {
+ if tracer == t {
+ delete(t.k.ptraceExceptions, tracee)
+ }
+ }
+ }
}
// forgetTracerLocked detaches t's tracer and ensures that t is no longer
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index c0ab53c94..36141dd09 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -403,6 +403,13 @@ type Task struct {
// ptraceEventMsg is protected by the TaskSet mutex.
ptraceEventMsg uint64
+ // ptraceYAMAExceptionAdded is true if a YAMA exception involving the task has
+ // been added before. This is used during task exit to decide whether we need
+ // to clean up YAMA exceptions.
+ //
+ // ptraceYAMAExceptionAdded is protected by the TaskSet mutex.
+ ptraceYAMAExceptionAdded bool
+
// The struct that holds the IO-related usage. The ioUsage pointer is
// immutable.
ioUsage *usage.IO
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index f7765fa3a..ad59e4f60 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -694,7 +694,8 @@ func (t *Task) exitNotifyLocked(fromPtraceDetach bool) {
}
if t.parent != nil {
delete(t.parent.children, t)
- t.parent = nil
+ // Do not clear t.parent. It may be still be needed after the task has exited
+ // (for example, to perform ptrace access checks on /proc/[pid] files).
}
}
}
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index e9da99067..09d070ec8 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -55,7 +55,7 @@ const InitTID ThreadID = 1
//
// +stateify savable
type TaskSet struct {
- // mu protects all relationships betweens tasks and thread groups in the
+ // mu protects all relationships between tasks and thread groups in the
// TaskSet. (mu is approximately equivalent to Linux's tasklist_lock.)
mu sync.RWMutex `state:"nosave"`
diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go
index a892d2c62..9890dd946 100644
--- a/pkg/sentry/syscalls/linux/sys_prctl.go
+++ b/pkg/sentry/syscalls/linux/sys_prctl.go
@@ -172,6 +172,24 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
return 1, nil, nil
+ case linux.PR_SET_PTRACER:
+ pid := args[1].Int()
+ switch pid {
+ case 0:
+ t.ClearYAMAException()
+ return 0, nil, nil
+ case linux.PR_SET_PTRACER_ANY:
+ t.SetYAMAException(nil)
+ return 0, nil, nil
+ default:
+ tracer := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid))
+ if tracer == nil {
+ return 0, nil, syserror.EINVAL
+ }
+ t.SetYAMAException(tracer)
+ return 0, nil, nil
+ }
+
case linux.PR_SET_SECCOMP:
if args[1].Int() != linux.SECCOMP_MODE_FILTER {
// Unsupported mode.