From 6f60a2b0a27a742690aa6acd5df1912ccb5fc8d3 Mon Sep 17 00:00:00 2001
From: Etienne Perot <eperot@google.com>
Date: Thu, 3 Dec 2020 06:18:03 -0800
Subject: Implement `fcntl` options `F_GETSIG` and `F_SETSIG`.

These options allow overriding the signal that gets sent to the process when
I/O operations are available on the file descriptor, rather than the default
`SIGIO` signal. Doing so also populates `siginfo` to contain extra information
about which file descriptor caused the event (`si_fd`) and what events happened
on it (`si_band`). The logic around which FD is populated within `si_fd`
matches Linux's, which means it has some weird edge cases where that value may
not actually refer to a file descriptor that is still valid.

This CL also ports extra S/R logic regarding async handler in VFS2.
Without this, async I/O handlers aren't properly re-registered after S/R.

PiperOrigin-RevId: 345436598
---
 pkg/sentry/kernel/epoll/epoll.go   |  7 ++-
 pkg/sentry/kernel/fasync/BUILD     |  2 +
 pkg/sentry/kernel/fasync/fasync.go | 96 ++++++++++++++++++++++++++++++++------
 3 files changed, 88 insertions(+), 17 deletions(-)

(limited to 'pkg/sentry/kernel')

diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go
index 15519f0df..61aeca044 100644
--- a/pkg/sentry/kernel/epoll/epoll.go
+++ b/pkg/sentry/kernel/epoll/epoll.go
@@ -273,7 +273,7 @@ func (e *EventPoll) ReadEvents(max int) []linux.EpollEvent {
 //
 // Callback is called when one of the files we're polling becomes ready. It
 // moves said file to the readyList if it's currently in the waiting list.
-func (p *pollEntry) Callback(*waiter.Entry) {
+func (p *pollEntry) Callback(*waiter.Entry, waiter.EventMask) {
 	e := p.epoll
 
 	e.listsMu.Lock()
@@ -306,9 +306,8 @@ func (e *EventPoll) initEntryReadiness(entry *pollEntry) {
 	f.EventRegister(&entry.waiter, entry.mask)
 
 	// Check if the file happens to already be in a ready state.
-	ready := f.Readiness(entry.mask) & entry.mask
-	if ready != 0 {
-		entry.Callback(&entry.waiter)
+	if ready := f.Readiness(entry.mask) & entry.mask; ready != 0 {
+		entry.Callback(&entry.waiter, ready)
 	}
 }
 
diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD
index 2b3955598..f855f038b 100644
--- a/pkg/sentry/kernel/fasync/BUILD
+++ b/pkg/sentry/kernel/fasync/BUILD
@@ -8,11 +8,13 @@ go_library(
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/abi/linux",
+        "//pkg/sentry/arch",
         "//pkg/sentry/fs",
         "//pkg/sentry/kernel",
         "//pkg/sentry/kernel/auth",
         "//pkg/sentry/vfs",
         "//pkg/sync",
+        "//pkg/syserror",
         "//pkg/waiter",
     ],
 )
diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go
index 153d2cd9b..b66d61c6f 100644
--- a/pkg/sentry/kernel/fasync/fasync.go
+++ b/pkg/sentry/kernel/fasync/fasync.go
@@ -17,22 +17,45 @@ package fasync
 
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
-// New creates a new fs.FileAsync.
-func New() fs.FileAsync {
-	return &FileAsync{}
+// Table to convert waiter event masks into si_band siginfo codes.
+// Taken from fs/fcntl.c:band_table.
+var bandTable = map[waiter.EventMask]int64{
+	// POLL_IN
+	waiter.EventIn: linux.EPOLLIN | linux.EPOLLRDNORM,
+	// POLL_OUT
+	waiter.EventOut: linux.EPOLLOUT | linux.EPOLLWRNORM | linux.EPOLLWRBAND,
+	// POLL_ERR
+	waiter.EventErr: linux.EPOLLERR,
+	// POLL_PRI
+	waiter.EventPri: linux.EPOLLPRI | linux.EPOLLRDBAND,
+	// POLL_HUP
+	waiter.EventHUp: linux.EPOLLHUP | linux.EPOLLERR,
 }
 
-// NewVFS2 creates a new vfs.FileAsync.
-func NewVFS2() vfs.FileAsync {
-	return &FileAsync{}
+// New returns a function that creates a new fs.FileAsync with the given file
+// descriptor.
+func New(fd int) func() fs.FileAsync {
+	return func() fs.FileAsync {
+		return &FileAsync{fd: fd}
+	}
+}
+
+// NewVFS2 returns a function that creates a new vfs.FileAsync with the given
+// file descriptor.
+func NewVFS2(fd int) func() vfs.FileAsync {
+	return func() vfs.FileAsync {
+		return &FileAsync{fd: fd}
+	}
 }
 
 // FileAsync sends signals when the registered file is ready for IO.
@@ -42,6 +65,12 @@ type FileAsync struct {
 	// e is immutable after first use (which is protected by mu below).
 	e waiter.Entry
 
+	// fd is the file descriptor to notify about.
+	// It is immutable, set at allocation time. This matches Linux semantics in
+	// fs/fcntl.c:fasync_helper.
+	// The fd value is passed to the signal recipient in siginfo.si_fd.
+	fd int
+
 	// regMu protects registeration and unregistration actions on e.
 	//
 	// regMu must be held while registration decisions are being made
@@ -56,6 +85,10 @@ type FileAsync struct {
 	mu         sync.Mutex `state:"nosave"`
 	requester  *auth.Credentials
 	registered bool
+	// signal is the signal to deliver upon I/O being available.
+	// The default value ("zero signal") means the default SIGIO signal will be
+	// delivered.
+	signal linux.Signal
 
 	// Only one of the following is allowed to be non-nil.
 	recipientPG *kernel.ProcessGroup
@@ -64,10 +97,10 @@ type FileAsync struct {
 }
 
 // Callback sends a signal.
-func (a *FileAsync) Callback(e *waiter.Entry) {
+func (a *FileAsync) Callback(e *waiter.Entry, mask waiter.EventMask) {
 	a.mu.Lock()
+	defer a.mu.Unlock()
 	if !a.registered {
-		a.mu.Unlock()
 		return
 	}
 	t := a.recipientT
@@ -80,19 +113,34 @@ func (a *FileAsync) Callback(e *waiter.Entry) {
 	}
 	if t == nil {
 		// No recipient has been registered.
-		a.mu.Unlock()
 		return
 	}
 	c := t.Credentials()
 	// Logic from sigio_perm in fs/fcntl.c.
-	if a.requester.EffectiveKUID == 0 ||
+	permCheck := (a.requester.EffectiveKUID == 0 ||
 		a.requester.EffectiveKUID == c.SavedKUID ||
 		a.requester.EffectiveKUID == c.RealKUID ||
 		a.requester.RealKUID == c.SavedKUID ||
-		a.requester.RealKUID == c.RealKUID {
-		t.SendSignal(kernel.SignalInfoPriv(linux.SIGIO))
+		a.requester.RealKUID == c.RealKUID)
+	if !permCheck {
+		return
 	}
-	a.mu.Unlock()
+	signalInfo := &arch.SignalInfo{
+		Signo: int32(linux.SIGIO),
+		Code:  arch.SignalInfoKernel,
+	}
+	if a.signal != 0 {
+		signalInfo.Signo = int32(a.signal)
+		signalInfo.SetFD(uint32(a.fd))
+		var band int64
+		for m, bandCode := range bandTable {
+			if m&mask != 0 {
+				band |= bandCode
+			}
+		}
+		signalInfo.SetBand(band)
+	}
+	t.SendSignal(signalInfo)
 }
 
 // Register sets the file which will be monitored for IO events.
@@ -186,3 +234,25 @@ func (a *FileAsync) ClearOwner() {
 	a.recipientTG = nil
 	a.recipientPG = nil
 }
+
+// Signal returns which signal will be sent to the signal recipient.
+// A value of zero means the signal to deliver wasn't customized, which means
+// the default signal (SIGIO) will be delivered.
+func (a *FileAsync) Signal() linux.Signal {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	return a.signal
+}
+
+// SetSignal overrides which signal to send when I/O is available.
+// The default behavior can be reset by specifying signal zero, which means
+// to send SIGIO.
+func (a *FileAsync) SetSignal(signal linux.Signal) error {
+	if signal != 0 && !signal.IsValid() {
+		return syserror.EINVAL
+	}
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	a.signal = signal
+	return nil
+}
-- 
cgit v1.2.3