From 1947c873423cabcaf5e67b667542421ade7414ff Mon Sep 17 00:00:00 2001
From: Fabricio Voznika <fvoznika@google.com>
Date: Mon, 3 May 2021 13:47:45 -0700
Subject: Fix deadlock in /proc/[pid]/fd/[num]

In order to resolve path names, fsSymlink.Readlink() may need to reenter
kernfs. Change the code so that kernfs.Inode.Readlink() is called without
locks and document the new contract.

PiperOrigin-RevId: 371770222
---
 pkg/sentry/fsimpl/kernfs/filesystem.go | 12 ++++++++++--
 pkg/sentry/fsimpl/kernfs/kernfs.go     |  6 ++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'pkg/sentry/fsimpl/kernfs')

diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index badca4d9f..f50b0fb08 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -612,16 +612,24 @@ afterTrailingSymlink:
 
 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
 func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
-	fs.mu.RLock()
 	defer fs.processDeferredDecRefs(ctx)
-	defer fs.mu.RUnlock()
+
+	fs.mu.RLock()
 	d, err := fs.walkExistingLocked(ctx, rp)
 	if err != nil {
+		fs.mu.RUnlock()
 		return "", err
 	}
 	if !d.isSymlink() {
+		fs.mu.RUnlock()
 		return "", syserror.EINVAL
 	}
+
+	// Inode.Readlink() cannot be called holding fs locks.
+	d.IncRef()
+	defer d.DecRef(ctx)
+	fs.mu.RUnlock()
+
 	return d.inode.Readlink(ctx, rp.Mount())
 }
 
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 16486eeae..6f699c9cd 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -534,6 +534,9 @@ func (d *Dentry) FSLocalPath() string {
 // - Checking that dentries passed to methods are of the appropriate file type.
 // - Checking permissions.
 //
+// Inode functions may be called holding filesystem wide locks and are not
+// allowed to call vfs functions that may reenter, unless otherwise noted.
+//
 // Specific responsibilities of implementations are documented below.
 type Inode interface {
 	// Methods related to reference counting. A generic implementation is
@@ -680,6 +683,9 @@ type inodeDirectory interface {
 type inodeSymlink interface {
 	// Readlink returns the target of a symbolic link. If an inode is not a
 	// symlink, the implementation should return EINVAL.
+	//
+	// Readlink is called with no kernfs locks held, so it may reenter if needed
+	// to resolve symlink targets.
 	Readlink(ctx context.Context, mnt *vfs.Mount) (string, error)
 
 	// Getlink returns the target of a symbolic link, as used by path
-- 
cgit v1.2.3