diff options
author | Michael Pratt <mpratt@google.com> | 2019-04-17 12:13:46 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-04-17 12:15:01 -0700 |
commit | 08d99c5fbea76ecc92038280387d24ecdf7ed814 (patch) | |
tree | 76df71b51b5515098e8c61978c441e8c530526ff /pkg/sentry/syscalls/linux/sys_poll.go | |
parent | e091b4e7c07056e32120ab25cc9a78ed24f7c754 (diff) |
Convert poll/select to operate more directly on linux.PollFD
Current, doPoll copies the user struct pollfd array into a
[]syscalls.PollFD, which contains internal kdefs.FD and
waiter.EventMask types. While these are currently binary-compatible with
the Linux versions, we generally discourage copying directly to internal
types (someone may inadvertantly change kdefs.FD to uint64).
Instead, copy directly to a []linux.PollFD, which will certainly be
binary compatible. Most of syscalls/polling.go is included directly into
syscalls/linux/sys_poll.go, as it can then operate directly on
linux.PollFD. The additional syscalls.PollFD type is providing little
value.
I've also added explicit conversion functions for waiter.EventMask,
which creates the possibility of a different binary format.
PiperOrigin-RevId: 244042947
Change-Id: I24e5b642002a32b3afb95a9dcb80d4acd1288abf
Diffstat (limited to 'pkg/sentry/syscalls/linux/sys_poll.go')
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_poll.go | 133 |
1 files changed, 120 insertions, 13 deletions
diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index 0cf6aad7f..23fcb907f 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -19,11 +19,11 @@ import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" "gvisor.googlesource.com/gvisor/pkg/syserror" "gvisor.googlesource.com/gvisor/pkg/waiter" @@ -37,23 +37,130 @@ const fileCap = 1024 * 1024 const ( // selectReadEvents is analogous to the Linux kernel's // fs/select.c:POLLIN_SET. - selectReadEvents = waiter.EventIn | waiter.EventHUp | waiter.EventErr + selectReadEvents = linux.POLLIN | linux.POLLHUP | linux.POLLERR // selectWriteEvents is analogous to the Linux kernel's // fs/select.c:POLLOUT_SET. - selectWriteEvents = waiter.EventOut | waiter.EventErr + selectWriteEvents = linux.POLLOUT | linux.POLLERR // selectExceptEvents is analogous to the Linux kernel's // fs/select.c:POLLEX_SET. - selectExceptEvents = waiter.EventPri + selectExceptEvents = linux.POLLPRI ) +// pollState tracks the associated file descriptor and waiter of a PollFD. +type pollState struct { + file *fs.File + waiter waiter.Entry +} + +// initReadiness gets the current ready mask for the file represented by the FD +// stored in pfd.FD. If a channel is passed in, the waiter entry in "state" is +// used to register with the file for event notifications, and a reference to +// the file is stored in "state". +func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan struct{}) { + if pfd.FD < 0 { + pfd.REvents = 0 + return + } + + file := t.FDMap().GetFile(kdefs.FD(pfd.FD)) + if file == nil { + pfd.REvents = linux.POLLNVAL + return + } + + if ch == nil { + defer file.DecRef() + } else { + state.file = file + state.waiter, _ = waiter.NewChannelEntry(ch) + file.EventRegister(&state.waiter, waiter.EventMaskFromLinux(uint32(pfd.Events))) + } + + r := file.Readiness(waiter.EventMaskFromLinux(uint32(pfd.Events))) + pfd.REvents = int16(r.ToLinux()) & pfd.Events +} + +// releaseState releases all the pollState in "state". +func releaseState(state []pollState) { + for i := range state { + if state[i].file != nil { + state[i].file.EventUnregister(&state[i].waiter) + state[i].file.DecRef() + } + } +} + +// pollBlock polls the PollFDs in "pfd" with a bounded time specified in "timeout" +// when "timeout" is greater than zero. +// +// pollBlock returns the remaining timeout, which is always 0 on a timeout; and 0 or +// positive if interrupted by a signal. +func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time.Duration, uintptr, error) { + var ch chan struct{} + if timeout != 0 { + ch = make(chan struct{}, 1) + } + + // Register for event notification in the files involved if we may + // block (timeout not zero). Once we find a file that has a non-zero + // result, we stop registering for events but still go through all files + // to get their ready masks. + state := make([]pollState, len(pfd)) + defer releaseState(state) + n := uintptr(0) + for i := range pfd { + initReadiness(t, &pfd[i], &state[i], ch) + if pfd[i].REvents != 0 { + n++ + ch = nil + } + } + + if timeout == 0 { + return timeout, n, nil + } + + forever := timeout < 0 + + for n == 0 { + var err error + // Wait for a notification. + timeout, err = t.BlockWithTimeout(ch, !forever, timeout) + if err != nil { + if err == syserror.ETIMEDOUT { + err = nil + } + return timeout, 0, err + } + + // We got notified, count how many files are ready. If none, + // then this was a spurious notification, and we just go back + // to sleep with the remaining timeout. + for i := range state { + if state[i].file == nil { + continue + } + + r := state[i].file.Readiness(waiter.EventMaskFromLinux(uint32(pfd[i].Events))) + rl := int16(r.ToLinux()) & pfd[i].Events + if rl != 0 { + pfd[i].REvents = rl + n++ + } + } + } + + return timeout, n, nil +} + func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) { if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) { return timeout, 0, syserror.EINVAL } - pfd := make([]syscalls.PollFD, nfds) + pfd := make([]linux.PollFD, nfds) if nfds > 0 { if _, err := t.CopyIn(pfdAddr, &pfd); err != nil { return timeout, 0, err @@ -65,9 +172,9 @@ func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Durati // polling, changing event masks here is an application-visible difference. // (Linux also doesn't copy out event masks at all, only revents.) for i := range pfd { - pfd[i].Events |= waiter.EventHUp | waiter.EventErr + pfd[i].Events |= linux.POLLHUP | linux.POLLERR } - remainingTimeout, n, err := syscalls.Poll(t, pfd, timeout) + remainingTimeout, n, err := pollBlock(t, pfd, timeout) err = syserror.ConvertIntr(err, syserror.EINTR) // The poll entries are copied out regardless of whether @@ -136,8 +243,8 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add } // Build the PollFD array. - pfd := make([]syscalls.PollFD, 0, fdCount) - fd := kdefs.FD(0) + pfd := make([]linux.PollFD, 0, fdCount) + var fd int32 for i := 0; i < byteCount; i++ { rV, wV, eV := r[i], w[i], e[i] v := rV | wV | eV @@ -148,13 +255,13 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add // immediately to ensure we don't leak. Note, another thread // might be about to close fd. This is racy, but that's // OK. Linux is racy in the same way. - file := t.FDMap().GetFile(fd) + file := t.FDMap().GetFile(kdefs.FD(fd)) if file == nil { return 0, syserror.EBADF } file.DecRef() - mask := waiter.EventMask(0) + var mask int16 if (rV & m) != 0 { mask |= selectReadEvents } @@ -167,7 +274,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add mask |= selectExceptEvents } - pfd = append(pfd, syscalls.PollFD{ + pfd = append(pfd, linux.PollFD{ FD: fd, Events: mask, }) @@ -179,7 +286,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add } // Do the syscall, then count the number of bits set. - _, _, err := syscalls.Poll(t, pfd, timeout) + _, _, err := pollBlock(t, pfd, timeout) if err != nil { return 0, syserror.ConvertIntr(err, syserror.EINTR) } |