summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/syscalls/linux/sys_poll.go
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2019-04-17 12:13:46 -0700
committerShentubot <shentubot@google.com>2019-04-17 12:15:01 -0700
commit08d99c5fbea76ecc92038280387d24ecdf7ed814 (patch)
tree76df71b51b5515098e8c61978c441e8c530526ff /pkg/sentry/syscalls/linux/sys_poll.go
parente091b4e7c07056e32120ab25cc9a78ed24f7c754 (diff)
Convert poll/select to operate more directly on linux.PollFD
Current, doPoll copies the user struct pollfd array into a []syscalls.PollFD, which contains internal kdefs.FD and waiter.EventMask types. While these are currently binary-compatible with the Linux versions, we generally discourage copying directly to internal types (someone may inadvertantly change kdefs.FD to uint64). Instead, copy directly to a []linux.PollFD, which will certainly be binary compatible. Most of syscalls/polling.go is included directly into syscalls/linux/sys_poll.go, as it can then operate directly on linux.PollFD. The additional syscalls.PollFD type is providing little value. I've also added explicit conversion functions for waiter.EventMask, which creates the possibility of a different binary format. PiperOrigin-RevId: 244042947 Change-Id: I24e5b642002a32b3afb95a9dcb80d4acd1288abf
Diffstat (limited to 'pkg/sentry/syscalls/linux/sys_poll.go')
-rw-r--r--pkg/sentry/syscalls/linux/sys_poll.go133
1 files changed, 120 insertions, 13 deletions
diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go
index 0cf6aad7f..23fcb907f 100644
--- a/pkg/sentry/syscalls/linux/sys_poll.go
+++ b/pkg/sentry/syscalls/linux/sys_poll.go
@@ -19,11 +19,11 @@ import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
"gvisor.googlesource.com/gvisor/pkg/waiter"
@@ -37,23 +37,130 @@ const fileCap = 1024 * 1024
const (
// selectReadEvents is analogous to the Linux kernel's
// fs/select.c:POLLIN_SET.
- selectReadEvents = waiter.EventIn | waiter.EventHUp | waiter.EventErr
+ selectReadEvents = linux.POLLIN | linux.POLLHUP | linux.POLLERR
// selectWriteEvents is analogous to the Linux kernel's
// fs/select.c:POLLOUT_SET.
- selectWriteEvents = waiter.EventOut | waiter.EventErr
+ selectWriteEvents = linux.POLLOUT | linux.POLLERR
// selectExceptEvents is analogous to the Linux kernel's
// fs/select.c:POLLEX_SET.
- selectExceptEvents = waiter.EventPri
+ selectExceptEvents = linux.POLLPRI
)
+// pollState tracks the associated file descriptor and waiter of a PollFD.
+type pollState struct {
+ file *fs.File
+ waiter waiter.Entry
+}
+
+// initReadiness gets the current ready mask for the file represented by the FD
+// stored in pfd.FD. If a channel is passed in, the waiter entry in "state" is
+// used to register with the file for event notifications, and a reference to
+// the file is stored in "state".
+func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan struct{}) {
+ if pfd.FD < 0 {
+ pfd.REvents = 0
+ return
+ }
+
+ file := t.FDMap().GetFile(kdefs.FD(pfd.FD))
+ if file == nil {
+ pfd.REvents = linux.POLLNVAL
+ return
+ }
+
+ if ch == nil {
+ defer file.DecRef()
+ } else {
+ state.file = file
+ state.waiter, _ = waiter.NewChannelEntry(ch)
+ file.EventRegister(&state.waiter, waiter.EventMaskFromLinux(uint32(pfd.Events)))
+ }
+
+ r := file.Readiness(waiter.EventMaskFromLinux(uint32(pfd.Events)))
+ pfd.REvents = int16(r.ToLinux()) & pfd.Events
+}
+
+// releaseState releases all the pollState in "state".
+func releaseState(state []pollState) {
+ for i := range state {
+ if state[i].file != nil {
+ state[i].file.EventUnregister(&state[i].waiter)
+ state[i].file.DecRef()
+ }
+ }
+}
+
+// pollBlock polls the PollFDs in "pfd" with a bounded time specified in "timeout"
+// when "timeout" is greater than zero.
+//
+// pollBlock returns the remaining timeout, which is always 0 on a timeout; and 0 or
+// positive if interrupted by a signal.
+func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time.Duration, uintptr, error) {
+ var ch chan struct{}
+ if timeout != 0 {
+ ch = make(chan struct{}, 1)
+ }
+
+ // Register for event notification in the files involved if we may
+ // block (timeout not zero). Once we find a file that has a non-zero
+ // result, we stop registering for events but still go through all files
+ // to get their ready masks.
+ state := make([]pollState, len(pfd))
+ defer releaseState(state)
+ n := uintptr(0)
+ for i := range pfd {
+ initReadiness(t, &pfd[i], &state[i], ch)
+ if pfd[i].REvents != 0 {
+ n++
+ ch = nil
+ }
+ }
+
+ if timeout == 0 {
+ return timeout, n, nil
+ }
+
+ forever := timeout < 0
+
+ for n == 0 {
+ var err error
+ // Wait for a notification.
+ timeout, err = t.BlockWithTimeout(ch, !forever, timeout)
+ if err != nil {
+ if err == syserror.ETIMEDOUT {
+ err = nil
+ }
+ return timeout, 0, err
+ }
+
+ // We got notified, count how many files are ready. If none,
+ // then this was a spurious notification, and we just go back
+ // to sleep with the remaining timeout.
+ for i := range state {
+ if state[i].file == nil {
+ continue
+ }
+
+ r := state[i].file.Readiness(waiter.EventMaskFromLinux(uint32(pfd[i].Events)))
+ rl := int16(r.ToLinux()) & pfd[i].Events
+ if rl != 0 {
+ pfd[i].REvents = rl
+ n++
+ }
+ }
+ }
+
+ return timeout, n, nil
+}
+
func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) {
if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) {
return timeout, 0, syserror.EINVAL
}
- pfd := make([]syscalls.PollFD, nfds)
+ pfd := make([]linux.PollFD, nfds)
if nfds > 0 {
if _, err := t.CopyIn(pfdAddr, &pfd); err != nil {
return timeout, 0, err
@@ -65,9 +172,9 @@ func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Durati
// polling, changing event masks here is an application-visible difference.
// (Linux also doesn't copy out event masks at all, only revents.)
for i := range pfd {
- pfd[i].Events |= waiter.EventHUp | waiter.EventErr
+ pfd[i].Events |= linux.POLLHUP | linux.POLLERR
}
- remainingTimeout, n, err := syscalls.Poll(t, pfd, timeout)
+ remainingTimeout, n, err := pollBlock(t, pfd, timeout)
err = syserror.ConvertIntr(err, syserror.EINTR)
// The poll entries are copied out regardless of whether
@@ -136,8 +243,8 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
}
// Build the PollFD array.
- pfd := make([]syscalls.PollFD, 0, fdCount)
- fd := kdefs.FD(0)
+ pfd := make([]linux.PollFD, 0, fdCount)
+ var fd int32
for i := 0; i < byteCount; i++ {
rV, wV, eV := r[i], w[i], e[i]
v := rV | wV | eV
@@ -148,13 +255,13 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
// immediately to ensure we don't leak. Note, another thread
// might be about to close fd. This is racy, but that's
// OK. Linux is racy in the same way.
- file := t.FDMap().GetFile(fd)
+ file := t.FDMap().GetFile(kdefs.FD(fd))
if file == nil {
return 0, syserror.EBADF
}
file.DecRef()
- mask := waiter.EventMask(0)
+ var mask int16
if (rV & m) != 0 {
mask |= selectReadEvents
}
@@ -167,7 +274,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
mask |= selectExceptEvents
}
- pfd = append(pfd, syscalls.PollFD{
+ pfd = append(pfd, linux.PollFD{
FD: fd,
Events: mask,
})
@@ -179,7 +286,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
}
// Do the syscall, then count the number of bits set.
- _, _, err := syscalls.Poll(t, pfd, timeout)
+ _, _, err := pollBlock(t, pfd, timeout)
if err != nil {
return 0, syserror.ConvertIntr(err, syserror.EINTR)
}