summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/abi/linux/prctl.go7
-rw-r--r--pkg/sentry/fs/proc/inode.go40
-rw-r--r--pkg/sentry/fs/proc/task.go17
-rw-r--r--pkg/sentry/kernel/ptrace.go17
-rw-r--r--pkg/sentry/kernel/task_exec.go7
-rw-r--r--pkg/sentry/kernel/task_identity.go24
-rw-r--r--pkg/sentry/mm/lifecycle.go6
-rw-r--r--pkg/sentry/mm/metadata.go30
-rw-r--r--pkg/sentry/mm/mm.go6
-rw-r--r--pkg/sentry/syscalls/linux/sys_prctl.go33
10 files changed, 176 insertions, 11 deletions
diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go
index 0428282dd..391cfaa1c 100644
--- a/pkg/abi/linux/prctl.go
+++ b/pkg/abi/linux/prctl.go
@@ -155,3 +155,10 @@ const (
ARCH_GET_GS = 0x1004
ARCH_SET_CPUID = 0x1012
)
+
+// Flags for prctl(PR_SET_DUMPABLE), defined in include/linux/sched/coredump.h.
+const (
+ SUID_DUMP_DISABLE = 0
+ SUID_DUMP_USER = 1
+ SUID_DUMP_ROOT = 2
+)
diff --git a/pkg/sentry/fs/proc/inode.go b/pkg/sentry/fs/proc/inode.go
index 379569823..986bc0a45 100644
--- a/pkg/sentry/fs/proc/inode.go
+++ b/pkg/sentry/fs/proc/inode.go
@@ -21,11 +21,14 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/device"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
)
// taskOwnedInodeOps wraps an fs.InodeOperations and overrides the UnstableAttr
-// method to return the task as the owner.
+// method to return either the task or root as the owner, depending on the
+// task's dumpability.
//
// +stateify savable
type taskOwnedInodeOps struct {
@@ -41,9 +44,42 @@ func (i *taskOwnedInodeOps) UnstableAttr(ctx context.Context, inode *fs.Inode) (
if err != nil {
return fs.UnstableAttr{}, err
}
- // Set the task owner as the file owner.
+
+ // By default, set the task owner as the file owner.
creds := i.t.Credentials()
uattr.Owner = fs.FileOwner{creds.EffectiveKUID, creds.EffectiveKGID}
+
+ // Linux doesn't apply dumpability adjustments to world
+ // readable/executable directories so that applications can stat
+ // /proc/PID to determine the effective UID of a process. See
+ // fs/proc/base.c:task_dump_owner.
+ if fs.IsDir(inode.StableAttr) && uattr.Perms == fs.FilePermsFromMode(0555) {
+ return uattr, nil
+ }
+
+ // If the task is not dumpable, then root (in the namespace preferred)
+ // owns the file.
+ var m *mm.MemoryManager
+ i.t.WithMuLocked(func(t *kernel.Task) {
+ m = t.MemoryManager()
+ })
+
+ if m == nil {
+ uattr.Owner.UID = auth.RootKUID
+ uattr.Owner.GID = auth.RootKGID
+ } else if m.Dumpability() != mm.UserDumpable {
+ if kuid := creds.UserNamespace.MapToKUID(auth.RootUID); kuid.Ok() {
+ uattr.Owner.UID = kuid
+ } else {
+ uattr.Owner.UID = auth.RootKUID
+ }
+ if kgid := creds.UserNamespace.MapToKGID(auth.RootGID); kgid.Ok() {
+ uattr.Owner.GID = kgid
+ } else {
+ uattr.Owner.GID = auth.RootKGID
+ }
+ }
+
return uattr, nil
}
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
index 77e03d349..21a965f90 100644
--- a/pkg/sentry/fs/proc/task.go
+++ b/pkg/sentry/fs/proc/task.go
@@ -96,7 +96,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, showSubtasks boo
contents["cgroup"] = newCGroupInode(t, msrc, p.cgroupControllers)
}
- // TODO(b/31916171): Set EUID/EGID based on dumpability.
+ // N.B. taskOwnedInodeOps enforces dumpability-based ownership.
d := &taskDir{
Dir: *ramfs.NewDir(t, contents, fs.RootOwner, fs.FilePermsFromMode(0555)),
t: t,
@@ -667,6 +667,21 @@ func newComm(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
return newProcInode(c, msrc, fs.SpecialFile, t)
}
+// Check implements fs.InodeOperations.Check.
+func (c *comm) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
+ // This file can always be read or written by members of the same
+ // thread group. See fs/proc/base.c:proc_tid_comm_permission.
+ //
+ // N.B. This check is currently a no-op as we don't yet support writing
+ // and this file is world-readable anyways.
+ t := kernel.TaskFromContext(ctx)
+ if t != nil && t.ThreadGroup() == c.t.ThreadGroup() && !p.Execute {
+ return true
+ }
+
+ return fs.ContextCanAccessFile(ctx, inode, p)
+}
+
// GetFile implements fs.InodeOperations.GetFile.
func (c *comm) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
return fs.NewFile(ctx, dirent, flags, &commFile{t: c.t}), nil
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index 4423e7efd..193447b17 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -19,6 +19,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
@@ -92,6 +93,14 @@ const (
// ptrace(2), subsection "Ptrace access mode checking". If attach is true, it
// checks for access mode PTRACE_MODE_ATTACH; otherwise, it checks for access
// mode PTRACE_MODE_READ.
+//
+// NOTE(b/30815691): The result of CanTrace is immediately stale (e.g., a
+// racing setuid(2) may change traceability). This may pose a risk when a task
+// changes from traceable to not traceable. This is only problematic across
+// execve, where privileges may increase.
+//
+// We currently do not implement privileged executables (set-user/group-ID bits
+// and file capabilities), so that case is not reachable.
func (t *Task) CanTrace(target *Task, attach bool) bool {
// "1. If the calling thread and the target thread are in the same thread
// group, access is always allowed." - ptrace(2)
@@ -162,7 +171,13 @@ func (t *Task) CanTrace(target *Task, attach bool) bool {
if cgid := callerCreds.RealKGID; cgid != targetCreds.RealKGID || cgid != targetCreds.EffectiveKGID || cgid != targetCreds.SavedKGID {
return false
}
- // TODO(b/31916171): dumpability check
+ var targetMM *mm.MemoryManager
+ target.WithMuLocked(func(t *Task) {
+ targetMM = t.MemoryManager()
+ })
+ if targetMM != nil && targetMM.Dumpability() != mm.UserDumpable {
+ return false
+ }
if callerCreds.UserNamespace != targetCreds.UserNamespace {
return false
}
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index 5d1425d5c..35d5cb90c 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -68,6 +68,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
@@ -198,6 +199,12 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
return flags.CloseOnExec
})
+ // NOTE(b/30815691): We currently do not implement privileged
+ // executables (set-user/group-ID bits and file capabilities). This
+ // allows us to unconditionally enable user dumpability on the new mm.
+ // See fs/exec.c:setup_new_exec.
+ r.tc.MemoryManager.SetDumpability(mm.UserDumpable)
+
// Switch to the new process.
t.MemoryManager().Deactivate()
t.mu.Lock()
diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go
index 17f08729a..ec95f78d0 100644
--- a/pkg/sentry/kernel/task_identity.go
+++ b/pkg/sentry/kernel/task_identity.go
@@ -17,6 +17,7 @@ package kernel
import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
@@ -206,8 +207,17 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
// (filesystem UIDs aren't implemented, nor are any of the capabilities in
// question)
- // Not documented, but compare Linux's kernel/cred.c:commit_creds().
if oldE != newE {
+ // "[dumpability] is reset to the current value contained in
+ // the file /proc/sys/fs/suid_dumpable (which by default has
+ // the value 0), in the following circumstances: The process's
+ // effective user or group ID is changed." - prctl(2)
+ //
+ // (suid_dumpable isn't implemented, so we just use the
+ // default.
+ t.MemoryManager().SetDumpability(mm.NotDumpable)
+
+ // Not documented, but compare Linux's kernel/cred.c:commit_creds().
t.parentDeathSignal = 0
}
}
@@ -303,8 +313,18 @@ func (t *Task) setKGIDsUncheckedLocked(newR, newE, newS auth.KGID) {
t.creds = t.creds.Fork() // See doc for creds.
t.creds.RealKGID, t.creds.EffectiveKGID, t.creds.SavedKGID = newR, newE, newS
- // Not documented, but compare Linux's kernel/cred.c:commit_creds().
if oldE != newE {
+ // "[dumpability] is reset to the current value contained in
+ // the file /proc/sys/fs/suid_dumpable (which by default has
+ // the value 0), in the following circumstances: The process's
+ // effective user or group ID is changed." - prctl(2)
+ //
+ // (suid_dumpable isn't implemented, so we just use the
+ // default.
+ t.MemoryManager().SetDumpability(mm.NotDumpable)
+
+ // Not documented, but compare Linux's
+ // kernel/cred.c:commit_creds().
t.parentDeathSignal = 0
}
}
diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go
index 7a65a62a2..7646d5ab2 100644
--- a/pkg/sentry/mm/lifecycle.go
+++ b/pkg/sentry/mm/lifecycle.go
@@ -37,6 +37,7 @@ func NewMemoryManager(p platform.Platform, mfp pgalloc.MemoryFileProvider) *Memo
privateRefs: &privateRefs{},
users: 1,
auxv: arch.Auxv{},
+ dumpability: UserDumpable,
aioManager: aioManager{contexts: make(map[uint64]*AIOContext)},
}
}
@@ -79,8 +80,9 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
envv: mm.envv,
auxv: append(arch.Auxv(nil), mm.auxv...),
// IncRef'd below, once we know that there isn't an error.
- executable: mm.executable,
- aioManager: aioManager{contexts: make(map[uint64]*AIOContext)},
+ executable: mm.executable,
+ dumpability: mm.dumpability,
+ aioManager: aioManager{contexts: make(map[uint64]*AIOContext)},
}
// Copy vmas.
diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go
index 9768e51f1..c218006ee 100644
--- a/pkg/sentry/mm/metadata.go
+++ b/pkg/sentry/mm/metadata.go
@@ -20,6 +20,36 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
)
+// Dumpability describes if and how core dumps should be created.
+type Dumpability int
+
+const (
+ // NotDumpable indicates that core dumps should never be created.
+ NotDumpable Dumpability = iota
+
+ // UserDumpable indicates that core dumps should be created, owned by
+ // the current user.
+ UserDumpable
+
+ // RootDumpable indicates that core dumps should be created, owned by
+ // root.
+ RootDumpable
+)
+
+// Dumpability returns the dumpability.
+func (mm *MemoryManager) Dumpability() Dumpability {
+ mm.metadataMu.Lock()
+ defer mm.metadataMu.Unlock()
+ return mm.dumpability
+}
+
+// SetDumpability sets the dumpability.
+func (mm *MemoryManager) SetDumpability(d Dumpability) {
+ mm.metadataMu.Lock()
+ defer mm.metadataMu.Unlock()
+ mm.dumpability = d
+}
+
// ArgvStart returns the start of the application argument vector.
//
// There is no guarantee that this value is sensible w.r.t. ArgvEnd.
diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go
index eb6defa2b..0a026ff8c 100644
--- a/pkg/sentry/mm/mm.go
+++ b/pkg/sentry/mm/mm.go
@@ -219,6 +219,12 @@ type MemoryManager struct {
// executable is protected by metadataMu.
executable *fs.Dirent
+ // dumpability describes if and how this MemoryManager may be dumped to
+ // userspace.
+ //
+ // dumpability is protected by metadataMu.
+ dumpability Dumpability
+
// aioManager keeps track of AIOContexts used for async IOs. AIOManager
// must be cloned when CLONE_VM is used.
aioManager aioManager
diff --git a/pkg/sentry/syscalls/linux/sys_prctl.go b/pkg/sentry/syscalls/linux/sys_prctl.go
index 117ae1a0e..1b7e5616b 100644
--- a/pkg/sentry/syscalls/linux/sys_prctl.go
+++ b/pkg/sentry/syscalls/linux/sys_prctl.go
@@ -15,6 +15,7 @@
package linux
import (
+ "fmt"
"syscall"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
@@ -23,6 +24,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/mm"
)
// Prctl implements linux syscall prctl(2).
@@ -44,6 +46,33 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
_, err := t.CopyOut(args[1].Pointer(), int32(t.ParentDeathSignal()))
return 0, nil, err
+ case linux.PR_GET_DUMPABLE:
+ d := t.MemoryManager().Dumpability()
+ switch d {
+ case mm.NotDumpable:
+ return linux.SUID_DUMP_DISABLE, nil, nil
+ case mm.UserDumpable:
+ return linux.SUID_DUMP_USER, nil, nil
+ case mm.RootDumpable:
+ return linux.SUID_DUMP_ROOT, nil, nil
+ default:
+ panic(fmt.Sprintf("Unknown dumpability %v", d))
+ }
+
+ case linux.PR_SET_DUMPABLE:
+ var d mm.Dumpability
+ switch args[1].Int() {
+ case linux.SUID_DUMP_DISABLE:
+ d = mm.NotDumpable
+ case linux.SUID_DUMP_USER:
+ d = mm.UserDumpable
+ default:
+ // N.B. Userspace may not pass SUID_DUMP_ROOT.
+ return 0, nil, syscall.EINVAL
+ }
+ t.MemoryManager().SetDumpability(d)
+ return 0, nil, nil
+
case linux.PR_GET_KEEPCAPS:
if t.Credentials().KeepCaps {
return 1, nil, nil
@@ -171,9 +200,7 @@ func Prctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
return 0, nil, t.DropBoundingCapability(cp)
- case linux.PR_GET_DUMPABLE,
- linux.PR_SET_DUMPABLE,
- linux.PR_GET_TIMING,
+ case linux.PR_GET_TIMING,
linux.PR_SET_TIMING,
linux.PR_GET_TSC,
linux.PR_SET_TSC,