From d321f575e2acec6f1077ab09ff0a089fa6ac0ec6 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Wed, 9 Jan 2019 10:28:20 -0800
Subject: Fix lock order violation.

overlayFileOperations.Readdir was holding overlay.copyMu while calling
DirentReaddir, which then attempts to take take the corresponding Dirent.mu,
causing a lock order violation. (See lock order documentation in
fs/copy_up.go.)

We only actually need to hold copyMu during readdirEntries(), so holding the
lock is moved in there, thus avoiding the lock order violation.

A new lock was added to protect overlayFileOperations.dirCache. We were
inadvertently relying on copyMu to protect this.  There is no reason it should
not have its own lock.

PiperOrigin-RevId: 228542473
Change-Id: I03c3a368c8cbc0b5a79d50cc486fc94adaddc1c2
---
 pkg/sentry/fs/file_overlay.go | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

(limited to 'pkg/sentry')

diff --git a/pkg/sentry/fs/file_overlay.go b/pkg/sentry/fs/file_overlay.go
index 9b958b64b..cd231bdef 100644
--- a/pkg/sentry/fs/file_overlay.go
+++ b/pkg/sentry/fs/file_overlay.go
@@ -82,9 +82,14 @@ type overlayFileOperations struct {
 	upper *File
 	lower *File
 
-	// dirCursor is a directory cursor for a directory in an overlay.
+	// dirCursor is a directory cursor for a directory in an overlay. It is
+	// protected by File.mu of the owning file, which is held during
+	// Readdir and Seek calls.
 	dirCursor string
 
+	// dirCacheMu protects dirCache.
+	dirCacheMu sync.RWMutex `state:"nosave"`
+
 	// dirCache is cache of DentAttrs from upper and lower Inodes.
 	dirCache *SortedDentryMap
 }
@@ -180,21 +185,38 @@ func (f *overlayFileOperations) Readdir(ctx context.Context, file *File, seriali
 	// Otherwise proceed with usual overlay readdir.
 	o := file.Dirent.Inode.overlay
 
-	o.copyMu.RLock()
-	defer o.copyMu.RUnlock()
-
-	var err error
-	f.dirCache, err = readdirEntries(ctx, o)
+	// readdirEntries holds o.copyUpMu to ensure that copy-up does not
+	// occur while calculating the readir results.
+	//
+	// However, it is possible for a copy-up to occur after the call to
+	// readdirEntries, but before setting f.dirCache. This is OK, since
+	// copy-up only does not change the children in a way that would affect
+	// the children returned in dirCache. Copy-up only moves
+	// files/directories between layers in the overlay.
+	//
+	// It is also possible for Readdir to race with a Create operation
+	// (which may trigger a copy-up during it's execution). Depending on
+	// whether the Create happens before or after the readdirEntries call,
+	// the newly created file may or may not appear in the readdir results.
+	// But this can only be caused by a real race between readdir and
+	// create syscalls, so it's also OK.
+	dirCache, err := readdirEntries(ctx, o)
 	if err != nil {
 		return file.Offset(), err
 	}
 
+	f.dirCacheMu.Lock()
+	f.dirCache = dirCache
+	f.dirCacheMu.Unlock()
+
 	return DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
 }
 
 // IterateDir implements DirIterator.IterateDir.
 func (f *overlayFileOperations) IterateDir(ctx context.Context, dirCtx *DirCtx, offset int) (int, error) {
+	f.dirCacheMu.RLock()
 	n, err := GenericReaddir(dirCtx, f.dirCache)
+	f.dirCacheMu.RUnlock()
 	return offset + n, err
 }
 
@@ -323,6 +345,9 @@ func (*overlayFileOperations) Ioctl(ctx context.Context, io usermem.IO, args arc
 // readdirEntries returns a sorted map of directory entries from the
 // upper and/or lower filesystem.
 func readdirEntries(ctx context.Context, o *overlayEntry) (*SortedDentryMap, error) {
+	o.copyMu.RLock()
+	defer o.copyMu.RUnlock()
+
 	// Assert that there is at least one upper or lower entry.
 	if o.upper == nil && o.lower == nil {
 		panic("invalid overlayEntry, needs at least one Inode")
-- 
cgit v1.2.3