summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNicolas Lacasse <nlacasse@google.com>2021-08-27 13:09:28 -0700
committergVisor bot <gvisor-bot@google.com>2021-08-27 13:18:49 -0700
commitf7281c6cb9bbf3e5757adf52a7820499b5a7483b (patch)
tree58bcf863ec15020cadfc87fd6157ac9b057b1a9a
parent0db19ea910def9848d0f53f65f993270ed579a8f (diff)
Fix lock order violations: mm.mappingMu > Task.mu.
Document this ordering in mm/mm.go. PiperOrigin-RevId: 393413203
-rw-r--r--pkg/sentry/fs/proc/exec_args.go2
-rw-r--r--pkg/sentry/fs/proc/task.go116
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go61
-rw-r--r--pkg/sentry/kernel/task_exec.go8
-rw-r--r--pkg/sentry/kernel/task_exit.go9
-rw-r--r--pkg/sentry/kernel/task_image.go2
-rw-r--r--pkg/sentry/mm/mm.go1
7 files changed, 91 insertions, 108 deletions
diff --git a/pkg/sentry/fs/proc/exec_args.go b/pkg/sentry/fs/proc/exec_args.go
index 379429ab2..75dc5d204 100644
--- a/pkg/sentry/fs/proc/exec_args.go
+++ b/pkg/sentry/fs/proc/exec_args.go
@@ -107,7 +107,7 @@ func (f *execArgFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen
return 0, linuxerr.EINVAL
}
- m, err := getTaskMM(f.t)
+ m, err := getTaskMMIncRef(f.t)
if err != nil {
return 0, err
}
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
index 89a799b21..03f2a882d 100644
--- a/pkg/sentry/fs/proc/task.go
+++ b/pkg/sentry/fs/proc/task.go
@@ -41,10 +41,24 @@ import (
// LINT.IfChange
-// getTaskMM returns t's MemoryManager. If getTaskMM succeeds, the MemoryManager's
-// users count is incremented, and must be decremented by the caller when it is
-// no longer in use.
-func getTaskMM(t *kernel.Task) (*mm.MemoryManager, error) {
+// getTaskMM gets the kernel task's MemoryManager. No additional reference is
+// taken on mm here. This is safe because MemoryManager.destroy is required to
+// leave the MemoryManager in a state where it's still usable as a
+// DynamicBytesSource.
+func getTaskMM(t *kernel.Task) *mm.MemoryManager {
+ var tmm *mm.MemoryManager
+ t.WithMuLocked(func(t *kernel.Task) {
+ if mm := t.MemoryManager(); mm != nil {
+ tmm = mm
+ }
+ })
+ return tmm
+}
+
+// getTaskMMIncRef returns t's MemoryManager. If getTaskMMIncRef succeeds, the
+// MemoryManager's users count is incremented, and must be decremented by the
+// caller when it is no longer in use.
+func getTaskMMIncRef(t *kernel.Task) (*mm.MemoryManager, error) {
if t.ExitState() == kernel.TaskExitDead {
return nil, linuxerr.ESRCH
}
@@ -269,21 +283,18 @@ func (e *exe) executable() (file fsbridge.File, err error) {
if err := checkTaskState(e.t); err != nil {
return nil, err
}
- e.t.WithMuLocked(func(t *kernel.Task) {
- mm := t.MemoryManager()
- if mm == nil {
- err = linuxerr.EACCES
- return
- }
+ mm := getTaskMM(e.t)
+ if mm == nil {
+ return nil, linuxerr.EACCES
+ }
- // The MemoryManager may be destroyed, in which case
- // MemoryManager.destroy will simply set the executable to nil
- // (with locks held).
- file = mm.Executable()
- if file == nil {
- err = linuxerr.ESRCH
- }
- })
+ // The MemoryManager may be destroyed, in which case
+ // MemoryManager.destroy will simply set the executable to nil
+ // (with locks held).
+ file = mm.Executable()
+ if file == nil {
+ err = linuxerr.ESRCH
+ }
return
}
@@ -463,7 +474,7 @@ func (m *memDataFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequen
if dst.NumBytes() == 0 {
return 0, nil
}
- mm, err := getTaskMM(m.t)
+ mm, err := getTaskMMIncRef(m.t)
if err != nil {
return 0, nil
}
@@ -494,22 +505,9 @@ func newMaps(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inod
return newProcInode(ctx, seqfile.NewSeqFile(ctx, &mapsData{t}), msrc, fs.SpecialFile, t)
}
-func (md *mapsData) mm() *mm.MemoryManager {
- var tmm *mm.MemoryManager
- md.t.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- // No additional reference is taken on mm here. This is safe
- // because MemoryManager.destroy is required to leave the
- // MemoryManager in a state where it's still usable as a SeqSource.
- tmm = mm
- }
- })
- return tmm
-}
-
// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
func (md *mapsData) NeedsUpdate(generation int64) bool {
- if mm := md.mm(); mm != nil {
+ if mm := getTaskMM(md.t); mm != nil {
return mm.NeedsUpdate(generation)
}
return true
@@ -517,7 +515,7 @@ func (md *mapsData) NeedsUpdate(generation int64) bool {
// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
func (md *mapsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
- if mm := md.mm(); mm != nil {
+ if mm := getTaskMM(md.t); mm != nil {
return mm.ReadMapsSeqFileData(ctx, h)
}
return []seqfile.SeqData{}, 0
@@ -534,22 +532,9 @@ func newSmaps(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Ino
return newProcInode(ctx, seqfile.NewSeqFile(ctx, &smapsData{t}), msrc, fs.SpecialFile, t)
}
-func (sd *smapsData) mm() *mm.MemoryManager {
- var tmm *mm.MemoryManager
- sd.t.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- // No additional reference is taken on mm here. This is safe
- // because MemoryManager.destroy is required to leave the
- // MemoryManager in a state where it's still usable as a SeqSource.
- tmm = mm
- }
- })
- return tmm
-}
-
// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
func (sd *smapsData) NeedsUpdate(generation int64) bool {
- if mm := sd.mm(); mm != nil {
+ if mm := getTaskMM(sd.t); mm != nil {
return mm.NeedsUpdate(generation)
}
return true
@@ -557,7 +542,7 @@ func (sd *smapsData) NeedsUpdate(generation int64) bool {
// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
func (sd *smapsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
- if mm := sd.mm(); mm != nil {
+ if mm := getTaskMM(sd.t); mm != nil {
return mm.ReadSmapsSeqFileData(ctx, h)
}
return []seqfile.SeqData{}, 0
@@ -627,12 +612,10 @@ func (s *taskStatData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle)
fmt.Fprintf(&buf, "%d ", linux.ClockTFromDuration(s.t.StartTime().Sub(s.t.Kernel().Timekeeper().BootTime())))
var vss, rss uint64
- s.t.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- }
- })
+ if mm := getTaskMM(s.t); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
fmt.Fprintf(&buf, "%d %d ", vss, rss/hostarch.PageSize)
// rsslim.
@@ -677,12 +660,10 @@ func (s *statmData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([
}
var vss, rss uint64
- s.t.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- }
- })
+ if mm := getTaskMM(s.t); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
var buf bytes.Buffer
fmt.Fprintf(&buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize)
@@ -734,12 +715,13 @@ func (s *statusData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) (
if fdTable := t.FDTable(); fdTable != nil {
fds = fdTable.CurrentMaxFDs()
}
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- data = mm.VirtualDataSize()
- }
})
+
+ if mm := getTaskMM(s.t); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ data = mm.VirtualDataSize()
+ }
fmt.Fprintf(&buf, "FDSize:\t%d\n", fds)
fmt.Fprintf(&buf, "VmSize:\t%d kB\n", vss>>10)
fmt.Fprintf(&buf, "VmRSS:\t%d kB\n", rss>>10)
@@ -925,7 +907,7 @@ func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequenc
return 0, linuxerr.EINVAL
}
- m, err := getTaskMM(f.t)
+ m, err := getTaskMMIncRef(f.t)
if err != nil {
return 0, err
}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 34b0c4f63..d3f9cf489 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -40,7 +40,7 @@ import (
// Linux 3.18, the limit is five lines." - user_namespaces(7)
const maxIDMapLines = 5
-// mm gets the kernel task's MemoryManager. No additional reference is taken on
+// getMM gets the kernel task's MemoryManager. No additional reference is taken on
// mm here. This is safe because MemoryManager.destroy is required to leave the
// MemoryManager in a state where it's still usable as a DynamicBytesSource.
func getMM(task *kernel.Task) *mm.MemoryManager {
@@ -608,12 +608,10 @@ func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.task.StartTime().Sub(s.task.Kernel().Timekeeper().BootTime())))
var vss, rss uint64
- s.task.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- }
- })
+ if mm := getMM(s.task); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
fmt.Fprintf(buf, "%d %d ", vss, rss/hostarch.PageSize)
// rsslim.
@@ -649,13 +647,10 @@ var _ dynamicInode = (*statmData)(nil)
// Generate implements vfs.DynamicBytesSource.Generate.
func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
var vss, rss uint64
- s.task.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- }
- })
-
+ if mm := getMM(s.task); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize)
return nil
}
@@ -779,12 +774,12 @@ func (s *statusFD) Generate(ctx context.Context, buf *bytes.Buffer) error {
if fdTable := t.FDTable(); fdTable != nil {
fds = fdTable.CurrentMaxFDs()
}
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- data = mm.VirtualDataSize()
- }
})
+ if mm := getMM(s.task); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ data = mm.VirtualDataSize()
+ }
// Filesystem user/group IDs aren't implemented; effective UID/GID are used
// instead.
fmt.Fprintf(buf, "Uid:\t%d\t%d\t%d\t%d\n", ruid, euid, suid, euid)
@@ -945,25 +940,17 @@ func (s *exeSymlink) Getlink(ctx context.Context, _ *vfs.Mount) (vfs.VirtualDent
return vfs.VirtualDentry{}, "", err
}
- var err error
- var exec fsbridge.File
- s.task.WithMuLocked(func(t *kernel.Task) {
- mm := t.MemoryManager()
- if mm == nil {
- err = linuxerr.EACCES
- return
- }
+ mm := getMM(s.task)
+ if mm == nil {
+ return vfs.VirtualDentry{}, "", linuxerr.EACCES
+ }
- // The MemoryManager may be destroyed, in which case
- // MemoryManager.destroy will simply set the executable to nil
- // (with locks held).
- exec = mm.Executable()
- if exec == nil {
- err = linuxerr.ESRCH
- }
- })
- if err != nil {
- return vfs.VirtualDentry{}, "", err
+ // The MemoryManager may be destroyed, in which case
+ // MemoryManager.destroy will simply set the executable to nil
+ // (with locks held).
+ exec := mm.Executable()
+ if exec == nil {
+ return vfs.VirtualDentry{}, "", linuxerr.ESRCH
}
defer exec.DecRef(ctx)
diff --git a/pkg/sentry/kernel/task_exec.go b/pkg/sentry/kernel/task_exec.go
index 9175b911c..db91fc4d8 100644
--- a/pkg/sentry/kernel/task_exec.go
+++ b/pkg/sentry/kernel/task_exec.go
@@ -222,9 +222,15 @@ func (r *runSyscallAfterExecStop) execute(t *Task) taskRunState {
// Update credentials to reflect the execve. This should precede switching
// MMs to ensure that dumpability has been reset first, if needed.
t.updateCredsForExecLocked()
- t.image.release()
+ oldImage := t.image
t.image = *r.image
t.mu.Unlock()
+
+ // Don't hold t.mu while calling t.image.release(), that may
+ // attempt to acquire TaskImage.MemoryManager.mappingMu, a lock order
+ // violation.
+ oldImage.release()
+
t.unstopVforkParent()
t.p.FullStateChanged()
// NOTE(b/30316266): All locks must be dropped prior to calling Activate.
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index 342e5debe..b3931445b 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -230,9 +230,16 @@ func (*runExitMain) execute(t *Task) taskRunState {
t.tg.pidns.owner.mu.Lock()
t.updateRSSLocked()
t.tg.pidns.owner.mu.Unlock()
+
+ // Release the task image resources. Accessing these fields must be
+ // done with t.mu held, but the mm.DecUsers() call must be done outside
+ // of that lock.
t.mu.Lock()
- t.image.release()
+ mm := t.image.MemoryManager
+ t.image.MemoryManager = nil
+ t.image.fu = nil
t.mu.Unlock()
+ mm.DecUsers(t)
// Releasing the MM unblocks a blocked CLONE_VFORK parent.
t.unstopVforkParent()
diff --git a/pkg/sentry/kernel/task_image.go b/pkg/sentry/kernel/task_image.go
index c132c27ef..6002ffb42 100644
--- a/pkg/sentry/kernel/task_image.go
+++ b/pkg/sentry/kernel/task_image.go
@@ -53,7 +53,7 @@ type TaskImage struct {
}
// release releases all resources held by the TaskImage. release is called by
-// the task when it execs into a new TaskImage or exits.
+// the task when it execs into a new TaskImage.
func (image *TaskImage) release() {
// Nil out pointers so that if the task is saved after release, it doesn't
// follow the pointers to possibly now-invalid objects.
diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go
index 57969b26c..0fca59b64 100644
--- a/pkg/sentry/mm/mm.go
+++ b/pkg/sentry/mm/mm.go
@@ -28,6 +28,7 @@
// memmap.File locks
// mm.aioManager.mu
// mm.AIOContext.mu
+// kernel.TaskSet.mu
//
// Only mm.MemoryManager.Fork is permitted to lock mm.MemoryManager.activeMu in
// multiple mm.MemoryManagers, as it does so in a well-defined order (forked