summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fsimpl/proc/BUILD1
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go3
-rw-r--r--pkg/sentry/fsimpl/proc/yama.go80
-rw-r--r--pkg/sentry/kernel/kernel.go15
-rw-r--r--pkg/sentry/kernel/ptrace.go175
-rw-r--r--pkg/sentry/kernel/task.go7
-rw-r--r--pkg/sentry/kernel/task_exit.go3
-rw-r--r--pkg/sentry/kernel/threads.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_prctl.go18
9 files changed, 287 insertions, 17 deletions
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
index 5196a2a80..d47a4fff9 100644
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -75,6 +75,7 @@ go_library(
"tasks_files.go",
"tasks_inode_refs.go",
"tasks_sys.go",
+ "yama.go",
],
visibility = ["//pkg/sentry:internal"],
deps = [
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 25c407d98..fd7823daa 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -48,6 +48,9 @@ func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k *
"shmall": fs.newInode(ctx, root, 0444, shmData(linux.SHMALL)),
"shmmax": fs.newInode(ctx, root, 0444, shmData(linux.SHMMAX)),
"shmmni": fs.newInode(ctx, root, 0444, shmData(linux.SHMMNI)),
+ "yama": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{
+ "ptrace_scope": fs.newYAMAPtraceScopeFile(ctx, k, root),
+ }),
}),
"vm": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{
"mmap_min_addr": fs.newInode(ctx, root, 0444, &mmapMinAddrData{k: k}),
diff --git a/pkg/sentry/fsimpl/proc/yama.go b/pkg/sentry/fsimpl/proc/yama.go
new file mode 100644
index 000000000..aebfe8944
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/yama.go
@@ -0,0 +1,80 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+func (fs *filesystem) newYAMAPtraceScopeFile(ctx context.Context, k *kernel.Kernel, creds *auth.Credentials) kernfs.Inode {
+ s := &yamaPtraceScope{level: &k.YAMAPtraceScope}
+ s.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), s, 0644)
+ return s
+}
+
+// yamaPtraceScope implements vfs.WritableDynamicBytesSource for
+// /sys/kernel/yama/ptrace_scope.
+//
+// +stateify savable
+type yamaPtraceScope struct {
+ kernfs.DynamicBytesFile
+
+ // level is the ptrace_scope level.
+ level *int32
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *yamaPtraceScope) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ _, err := fmt.Fprintf(buf, "%d\n", atomic.LoadInt32(s.level))
+ return err
+}
+
+// Write implements vfs.WritableDynamicBytesSource.Write.
+func (s *yamaPtraceScope) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ if offset != 0 {
+ // Ignore partial writes.
+ return 0, syserror.EINVAL
+ }
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+
+ // Limit the amount of memory allocated.
+ src = src.TakeFirst(usermem.PageSize - 1)
+
+ var v int32
+ n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+ if err != nil {
+ return 0, err
+ }
+
+ // We do not support YAMA levels > YAMA_SCOPE_RELATIONAL.
+ if v < linux.YAMA_SCOPE_DISABLED || v > linux.YAMA_SCOPE_RELATIONAL {
+ return 0, syserror.EINVAL
+ }
+
+ atomic.StoreInt32(s.level, v)
+ return n, nil
+}
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index ef4e934a1..43065b45a 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -282,6 +282,18 @@ type Kernel struct {
// If set to true, report address space activation waits as if the task is in
// external wait so that the watchdog doesn't report the task stuck.
SleepForAddressSpaceActivation bool
+
+ // Exceptions to YAMA ptrace restrictions. Each key-value pair represents a
+ // tracee-tracer relationship. The key is a process (technically, the thread
+ // group leader) that can be traced by any thread that is a descendant of the
+ // value. If the value is nil, then anyone can trace the process represented by
+ // the key.
+ //
+ // ptraceExceptions is protected by the TaskSet mutex.
+ ptraceExceptions map[*Task]*Task
+
+ // YAMAPtraceScope is the current level of YAMA ptrace restrictions.
+ YAMAPtraceScope int32
}
// InitKernelArgs holds arguments to Init.
@@ -382,6 +394,8 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.monotonicClock = &timekeeperClock{tk: args.Timekeeper, c: sentrytime.Monotonic}
k.futexes = futex.NewManager()
k.netlinkPorts = port.New()
+ k.ptraceExceptions = make(map[*Task]*Task)
+ k.YAMAPtraceScope = linux.YAMA_SCOPE_RELATIONAL
if VFS2Enabled {
ctx := k.SupervisorContext()
@@ -425,7 +439,6 @@ func (k *Kernel) Init(args InitKernelArgs) error {
k.socketsVFS2 = make(map[*vfs.FileDescription]*SocketRecord)
}
-
return nil
}
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index cef58a590..c3980350a 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -16,6 +16,7 @@ package kernel
import (
"fmt"
+ "sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/marshal/primitive"
@@ -95,7 +96,11 @@ const (
// checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access
// mode PTRACE_MODE_READ.
//
-// NOTE(b/30815691): The result of CanTrace is immediately stale (e.g., a
+// In Linux, ptrace access restrictions may be configured by LSMs. While we do
+// not support LSMs, we do add additional restrictions based on the commoncap
+// and YAMA LSMs.
+//
+// TODO(gvisor.dev/issue/212): The result of CanTrace is immediately stale (e.g., a
// racing setuid(2) may change traceability). This may pose a risk when a task
// changes from traceable to not traceable. This is only problematic across
// execve, where privileges may increase.
@@ -103,7 +108,7 @@ const (
// We currently do not implement privileged executables (set-user/group-ID bits
// and file capabilities), so that case is not reachable.
func (t *Task) CanTrace(target *Task, attach bool) bool {
- // "1. If the calling thread and the target thread are in the same thread
+ // "If the calling thread and the target thread are in the same thread
// group, access is always allowed." - ptrace(2)
//
// Note: Strictly speaking, prior to 73af963f9f30 ("__ptrace_may_access()
@@ -115,9 +120,57 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
return true
}
+ if !t.canTraceStandard(target, attach) {
+ return false
+ }
+
+ // YAMA only supported for vfs2.
+ if !VFS2Enabled {
+ return true
+ }
+
+ if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL {
+ t.tg.pidns.owner.mu.RLock()
+ defer t.tg.pidns.owner.mu.RUnlock()
+ if !t.canTraceYAMALocked(target) {
+ return false
+ }
+ }
+ return true
+}
+
+// canTraceLocked is the same as CanTrace, except the caller must already hold
+// the TaskSet mutex (for reading or writing).
+func (t *Task) canTraceLocked(target *Task, attach bool) bool {
+ if t.tg == target.tg {
+ return true
+ }
+
+ if !t.canTraceStandard(target, attach) {
+ return false
+ }
+
+ // YAMA only supported for vfs2.
+ if !VFS2Enabled {
+ return true
+ }
+
+ if atomic.LoadInt32(&t.k.YAMAPtraceScope) == linux.YAMA_SCOPE_RELATIONAL {
+ if !t.canTraceYAMALocked(target) {
+ return false
+ }
+ }
+ return true
+}
+
+// canTraceStandard performs standard ptrace access checks as defined by
+// kernel/ptrace.c:__ptrace_may_access as well as the commoncap LSM
+// implementation of the security_ptrace_access_check() interface, which is
+// always invoked.
+func (t *Task) canTraceStandard(target *Task, attach bool) bool {
// """
- // 2. If the access mode specifies PTRACE_MODE_FSCREDS (ED: snipped,
- // doesn't exist until Linux 4.5).
+ // TODO(gvisor.dev/issue/260): 1. If the access mode specifies
+ // PTRACE_MODE_FSCREDS (ED: snipped, doesn't exist until Linux 4.5).
//
// Otherwise, the access mode specifies PTRACE_MODE_REALCREDS, so use the
// caller's real UID and GID for the checks in the next step. (Most APIs
@@ -125,7 +178,7 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
// historical reasons, the PTRACE_MODE_REALCREDS check uses the real IDs
// instead.)
//
- // 3. Deny access if neither of the following is true:
+ // 2. Deny access if neither of the following is true:
//
// - The real, effective, and saved-set user IDs of the target match the
// caller's user ID, *and* the real, effective, and saved-set group IDs of
@@ -134,15 +187,12 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
// - The caller has the CAP_SYS_PTRACE capability in the user namespace of
// the target.
//
- // 4. Deny access if the target process "dumpable" attribute has a value
+ // 3. Deny access if the target process "dumpable" attribute has a value
// other than 1 (SUID_DUMP_USER; see the discussion of PR_SET_DUMPABLE in
// prctl(2)), and the caller does not have the CAP_SYS_PTRACE capability in
// the user namespace of the target process.
//
- // 5. The kernel LSM security_ptrace_access_check() interface is invoked to
- // see if ptrace access is permitted. The results depend on the LSM(s). The
- // implementation of this interface in the commoncap LSM performs the
- // following steps:
+ // 4. The commoncap LSM performs the following steps:
//
// a) If the access mode includes PTRACE_MODE_FSCREDS, then use the
// caller's effective capability set; otherwise (the access mode specifies
@@ -188,6 +238,94 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
return true
}
+// canTraceYAMALocked performs ptrace access checks as defined by the YAMA LSM
+// implementation of the security_ptrace_access_check() interface, with YAMA
+// configured to mode 1. This is a common default among various Linux
+// distributions.
+//
+// It only permits the tracer to proceed if one of the following conditions is
+// met:
+//
+// a) The tracer is already attached to the tracee.
+//
+// b) The target is a descendant of the tracer.
+//
+// c) The target has explicitly given permission to the tracer through the
+// PR_SET_PTRACER prctl.
+//
+// d) The tracer has CAP_SYS_PTRACE.
+//
+// See security/yama/yama_lsm.c:yama_ptrace_access_check.
+//
+// Precondition: the TaskSet mutex must be locked (for reading or writing).
+func (t *Task) canTraceYAMALocked(target *Task) bool {
+ if tracer := target.Tracer(); tracer != nil {
+ if tracer.tg == t.tg {
+ return true
+ }
+ }
+ if target.isYAMADescendantOfLocked(t) {
+ return true
+ }
+ if target.hasYAMAExceptionForLocked(t) {
+ return true
+ }
+ if t.HasCapabilityIn(linux.CAP_SYS_PTRACE, target.UserNamespace()) {
+ return true
+ }
+ return false
+}
+
+// Determines whether t is considered a descendant of ancestor for the purposes
+// of YAMA permissions (specifically, whether t's thread group is descended from
+// ancestor's).
+//
+// Precondition: the TaskSet mutex must be locked (for reading or writing).
+func (t *Task) isYAMADescendantOfLocked(ancestor *Task) bool {
+ walker := t
+ for walker != nil {
+ if walker.tg.leader == ancestor.tg.leader {
+ return true
+ }
+ walker = walker.parent
+ }
+ return false
+}
+
+// Precondition: the TaskSet mutex must be locked (for reading or writing).
+func (t *Task) hasYAMAExceptionForLocked(tracer *Task) bool {
+ allowed, ok := t.k.ptraceExceptions[t]
+ if !ok {
+ return false
+ }
+ return allowed == nil || tracer.isYAMADescendantOfLocked(allowed)
+}
+
+// ClearYAMAException removes any YAMA exception with t as the tracee.
+func (t *Task) ClearYAMAException() {
+ t.tg.pidns.owner.mu.Lock()
+ defer t.tg.pidns.owner.mu.Unlock()
+ tracee := t.tg.leader
+ delete(t.k.ptraceExceptions, tracee)
+}
+
+// SetYAMAException creates a YAMA exception allowing all descendants of tracer
+// to trace t. If tracer is nil, then any task is allowed to trace t.
+//
+// If there was an existing exception, it is overwritten with the new one.
+func (t *Task) SetYAMAException(tracer *Task) {
+ t.tg.pidns.owner.mu.Lock()
+ defer t.tg.pidns.owner.mu.Unlock()
+
+ tracee := t.tg.leader
+ tracee.ptraceYAMAExceptionAdded = true
+ if tracer != nil {
+ tracer.ptraceYAMAExceptionAdded = true
+ }
+
+ t.k.ptraceExceptions[tracee] = tracer
+}
+
// Tracer returns t's ptrace Tracer.
func (t *Task) Tracer() *Task {
return t.ptraceTracer.Load().(*Task)
@@ -358,7 +496,7 @@ func (t *Task) ptraceTraceme() error {
// returning nil here is correct.
return nil
}
- if !t.parent.CanTrace(t, true) {
+ if !t.parent.canTraceLocked(t, true) {
return syserror.EPERM
}
if t.parent.exitState != TaskExitNone {
@@ -377,11 +515,11 @@ func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
if t.tg == target.tg {
return syserror.EPERM
}
- if !t.CanTrace(target, true) {
- return syserror.EPERM
- }
t.tg.pidns.owner.mu.Lock()
defer t.tg.pidns.owner.mu.Unlock()
+ if !t.canTraceLocked(target, true) {
+ return syserror.EPERM
+ }
if target.hasTracer() {
return syserror.EPERM
}
@@ -459,6 +597,15 @@ func (t *Task) exitPtrace() {
}
// "nil maps cannot be saved"
t.ptraceTracees = make(map[*Task]struct{})
+
+ if t.ptraceYAMAExceptionAdded {
+ delete(t.k.ptraceExceptions, t)
+ for tracee, tracer := range t.k.ptraceExceptions {
+ if tracer == t {
+ delete(t.k.ptraceExceptions, tracee)
+ }
+ }
+ }
}
// forgetTracerLocked detaches t's tracer and ensures that t is no longer
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index c0ab53c94..36141dd09 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -403,6 +403,13 @@ type Task struct {
// ptraceEventMsg is protected by the TaskSet mutex.
ptraceEventMsg uint64
+ // ptraceYAMAExceptionAdded is true if a YAMA exception involving the task has
+ // been added before. This is used during task exit to decide whether we need
+ // to clean up YAMA exceptions.
+ //
+ // ptraceYAMAExceptionAdded is protected by the TaskSet mutex.
+ ptraceYAMAExceptionAdded bool
+
// The struct that holds the IO-related usage. The ioUsage pointer is
// immutable.
ioUsage *usage.IO
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index f7765fa3a..ad59e4f60 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -694,7 +694,8 @@ func (t *Task) exitNotifyLocked(fromPtraceDetach bool) {
}
if t.parent != nil {
delete(t.parent.children, t)
- t.parent = nil
+ // Do not clear t.parent. It may be still be needed after the task has exited
+ // (for example, to perform ptrace access checks on /proc/[pid] files).
}
}
}
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index e9da99067..09d070ec8 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -55,7 +55,7 @@ const InitTID ThreadID = 1
//
// +stateify savable
type TaskSet struct {
- // mu protects all relationships betweens tasks and thread groups in the
+ // mu protects all relationships between tasks and thread groups in the
// TaskSet. (mu is approximately equivalent to Linux's tasklist_lock.)
mu sync.RWMutex `state:"nosave"`
diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go
index a892d2c62..9890dd946 100644
--- a/pkg/sentry/syscalls/linux/sys_prctl.go
+++ b/pkg/sentry/syscalls/linux/sys_prctl.go
@@ -172,6 +172,24 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
return 1, nil, nil
+ case linux.PR_SET_PTRACER:
+ pid := args[1].Int()
+ switch pid {
+ case 0:
+ t.ClearYAMAException()
+ return 0, nil, nil
+ case linux.PR_SET_PTRACER_ANY:
+ t.SetYAMAException(nil)
+ return 0, nil, nil
+ default:
+ tracer := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid))
+ if tracer == nil {
+ return 0, nil, syserror.EINVAL
+ }
+ t.SetYAMAException(tracer)
+ return 0, nil, nil
+ }
+
case linux.PR_SET_SECCOMP:
if args[1].Int() != linux.SECCOMP_MODE_FILTER {
// Unsupported mode.