From 08d99c5fbea76ecc92038280387d24ecdf7ed814 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 17 Apr 2019 12:13:46 -0700 Subject: Convert poll/select to operate more directly on linux.PollFD Current, doPoll copies the user struct pollfd array into a []syscalls.PollFD, which contains internal kdefs.FD and waiter.EventMask types. While these are currently binary-compatible with the Linux versions, we generally discourage copying directly to internal types (someone may inadvertantly change kdefs.FD to uint64). Instead, copy directly to a []linux.PollFD, which will certainly be binary compatible. Most of syscalls/polling.go is included directly into syscalls/linux/sys_poll.go, as it can then operate directly on linux.PollFD. The additional syscalls.PollFD type is providing little value. I've also added explicit conversion functions for waiter.EventMask, which creates the possibility of a different binary format. PiperOrigin-RevId: 244042947 Change-Id: I24e5b642002a32b3afb95a9dcb80d4acd1288abf --- pkg/sentry/syscalls/BUILD | 2 - pkg/sentry/syscalls/linux/sys_epoll.go | 2 +- pkg/sentry/syscalls/linux/sys_poll.go | 133 ++++++++++++++++++++++++++++---- pkg/sentry/syscalls/polling.go | 137 --------------------------------- 4 files changed, 121 insertions(+), 153 deletions(-) delete mode 100644 pkg/sentry/syscalls/polling.go (limited to 'pkg/sentry/syscalls') diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD index 6b5469e45..877318fa9 100644 --- a/pkg/sentry/syscalls/BUILD +++ b/pkg/sentry/syscalls/BUILD @@ -6,7 +6,6 @@ go_library( name = "syscalls", srcs = [ "epoll.go", - "polling.go", "syscalls.go", ], importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls", @@ -14,7 +13,6 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/sentry/arch", - "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/kdefs", diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go index 62272efcd..200c46355 100644 --- a/pkg/sentry/syscalls/linux/sys_epoll.go +++ b/pkg/sentry/syscalls/linux/sys_epoll.go @@ -87,7 +87,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc flags |= epoll.EdgeTriggered } - mask = waiter.EventMask(e.Events) + mask = waiter.EventMaskFromLinux(e.Events) data[0] = e.Fd data[1] = e.Pad } diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index 0cf6aad7f..23fcb907f 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -19,11 +19,11 @@ import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" "gvisor.googlesource.com/gvisor/pkg/syserror" "gvisor.googlesource.com/gvisor/pkg/waiter" @@ -37,23 +37,130 @@ const fileCap = 1024 * 1024 const ( // selectReadEvents is analogous to the Linux kernel's // fs/select.c:POLLIN_SET. - selectReadEvents = waiter.EventIn | waiter.EventHUp | waiter.EventErr + selectReadEvents = linux.POLLIN | linux.POLLHUP | linux.POLLERR // selectWriteEvents is analogous to the Linux kernel's // fs/select.c:POLLOUT_SET. - selectWriteEvents = waiter.EventOut | waiter.EventErr + selectWriteEvents = linux.POLLOUT | linux.POLLERR // selectExceptEvents is analogous to the Linux kernel's // fs/select.c:POLLEX_SET. - selectExceptEvents = waiter.EventPri + selectExceptEvents = linux.POLLPRI ) +// pollState tracks the associated file descriptor and waiter of a PollFD. +type pollState struct { + file *fs.File + waiter waiter.Entry +} + +// initReadiness gets the current ready mask for the file represented by the FD +// stored in pfd.FD. If a channel is passed in, the waiter entry in "state" is +// used to register with the file for event notifications, and a reference to +// the file is stored in "state". +func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan struct{}) { + if pfd.FD < 0 { + pfd.REvents = 0 + return + } + + file := t.FDMap().GetFile(kdefs.FD(pfd.FD)) + if file == nil { + pfd.REvents = linux.POLLNVAL + return + } + + if ch == nil { + defer file.DecRef() + } else { + state.file = file + state.waiter, _ = waiter.NewChannelEntry(ch) + file.EventRegister(&state.waiter, waiter.EventMaskFromLinux(uint32(pfd.Events))) + } + + r := file.Readiness(waiter.EventMaskFromLinux(uint32(pfd.Events))) + pfd.REvents = int16(r.ToLinux()) & pfd.Events +} + +// releaseState releases all the pollState in "state". +func releaseState(state []pollState) { + for i := range state { + if state[i].file != nil { + state[i].file.EventUnregister(&state[i].waiter) + state[i].file.DecRef() + } + } +} + +// pollBlock polls the PollFDs in "pfd" with a bounded time specified in "timeout" +// when "timeout" is greater than zero. +// +// pollBlock returns the remaining timeout, which is always 0 on a timeout; and 0 or +// positive if interrupted by a signal. +func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time.Duration, uintptr, error) { + var ch chan struct{} + if timeout != 0 { + ch = make(chan struct{}, 1) + } + + // Register for event notification in the files involved if we may + // block (timeout not zero). Once we find a file that has a non-zero + // result, we stop registering for events but still go through all files + // to get their ready masks. + state := make([]pollState, len(pfd)) + defer releaseState(state) + n := uintptr(0) + for i := range pfd { + initReadiness(t, &pfd[i], &state[i], ch) + if pfd[i].REvents != 0 { + n++ + ch = nil + } + } + + if timeout == 0 { + return timeout, n, nil + } + + forever := timeout < 0 + + for n == 0 { + var err error + // Wait for a notification. + timeout, err = t.BlockWithTimeout(ch, !forever, timeout) + if err != nil { + if err == syserror.ETIMEDOUT { + err = nil + } + return timeout, 0, err + } + + // We got notified, count how many files are ready. If none, + // then this was a spurious notification, and we just go back + // to sleep with the remaining timeout. + for i := range state { + if state[i].file == nil { + continue + } + + r := state[i].file.Readiness(waiter.EventMaskFromLinux(uint32(pfd[i].Events))) + rl := int16(r.ToLinux()) & pfd[i].Events + if rl != 0 { + pfd[i].REvents = rl + n++ + } + } + } + + return timeout, n, nil +} + func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) { if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) { return timeout, 0, syserror.EINVAL } - pfd := make([]syscalls.PollFD, nfds) + pfd := make([]linux.PollFD, nfds) if nfds > 0 { if _, err := t.CopyIn(pfdAddr, &pfd); err != nil { return timeout, 0, err @@ -65,9 +172,9 @@ func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Durati // polling, changing event masks here is an application-visible difference. // (Linux also doesn't copy out event masks at all, only revents.) for i := range pfd { - pfd[i].Events |= waiter.EventHUp | waiter.EventErr + pfd[i].Events |= linux.POLLHUP | linux.POLLERR } - remainingTimeout, n, err := syscalls.Poll(t, pfd, timeout) + remainingTimeout, n, err := pollBlock(t, pfd, timeout) err = syserror.ConvertIntr(err, syserror.EINTR) // The poll entries are copied out regardless of whether @@ -136,8 +243,8 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add } // Build the PollFD array. - pfd := make([]syscalls.PollFD, 0, fdCount) - fd := kdefs.FD(0) + pfd := make([]linux.PollFD, 0, fdCount) + var fd int32 for i := 0; i < byteCount; i++ { rV, wV, eV := r[i], w[i], e[i] v := rV | wV | eV @@ -148,13 +255,13 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add // immediately to ensure we don't leak. Note, another thread // might be about to close fd. This is racy, but that's // OK. Linux is racy in the same way. - file := t.FDMap().GetFile(fd) + file := t.FDMap().GetFile(kdefs.FD(fd)) if file == nil { return 0, syserror.EBADF } file.DecRef() - mask := waiter.EventMask(0) + var mask int16 if (rV & m) != 0 { mask |= selectReadEvents } @@ -167,7 +274,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add mask |= selectExceptEvents } - pfd = append(pfd, syscalls.PollFD{ + pfd = append(pfd, linux.PollFD{ FD: fd, Events: mask, }) @@ -179,7 +286,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add } // Do the syscall, then count the number of bits set. - _, _, err := syscalls.Poll(t, pfd, timeout) + _, _, err := pollBlock(t, pfd, timeout) if err != nil { return 0, syserror.ConvertIntr(err, syserror.EINTR) } diff --git a/pkg/sentry/syscalls/polling.go b/pkg/sentry/syscalls/polling.go deleted file mode 100644 index 2b33d6c19..000000000 --- a/pkg/sentry/syscalls/polling.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2018 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package syscalls - -import ( - "syscall" - "time" - - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/waiter" -) - -// PollFD describes a pollable FD. -type PollFD struct { - FD kdefs.FD - Events waiter.EventMask - REvents waiter.EventMask -} - -// pollState tracks the associated file descriptor and waiter of a PollFD. -type pollState struct { - file *fs.File - waiter waiter.Entry -} - -// initReadiness gets the current ready mask for the file represented by the FD -// stored in pfd.FD. If a channel is passed in, the waiter entry in "state" is -// used to register with the file for event notifications, and a reference to -// the file is stored in "state". -func (pfd *PollFD) initReadiness(t *kernel.Task, state *pollState, ch chan struct{}) { - if pfd.FD < 0 { - pfd.REvents = 0 - return - } - - file := t.FDMap().GetFile(pfd.FD) - if file == nil { - pfd.REvents = waiter.EventNVal - return - } - - if ch == nil { - defer file.DecRef() - } else { - state.file = file - state.waiter, _ = waiter.NewChannelEntry(ch) - file.EventRegister(&state.waiter, pfd.Events) - } - - pfd.REvents = file.Readiness(pfd.Events) & pfd.Events -} - -// releaseState releases all the pollState in "state". -func releaseState(state []pollState) { - for i := range state { - if state[i].file != nil { - state[i].file.EventUnregister(&state[i].waiter) - state[i].file.DecRef() - } - } -} - -// Poll polls the PollFDs in "pfd" with a bounded time specified in "timeout" -// when "timeout" is greater than zero. -// -// Poll returns the remaining timeout, which is always 0 on a timeout; and 0 or -// positive if interrupted by a signal. -func Poll(t *kernel.Task, pfd []PollFD, timeout time.Duration) (time.Duration, uintptr, error) { - var ch chan struct{} - if timeout != 0 { - ch = make(chan struct{}, 1) - } - - // Register for event notification in the files involved if we may - // block (timeout not zero). Once we find a file that has a non-zero - // result, we stop registering for events but still go through all files - // to get their ready masks. - state := make([]pollState, len(pfd)) - defer releaseState(state) - n := uintptr(0) - for i := range pfd { - pfd[i].initReadiness(t, &state[i], ch) - if pfd[i].REvents != 0 { - n++ - ch = nil - } - } - - if timeout == 0 { - return timeout, n, nil - } - - forever := timeout < 0 - - for n == 0 { - var err error - // Wait for a notification. - timeout, err = t.BlockWithTimeout(ch, !forever, timeout) - if err != nil { - if err == syscall.ETIMEDOUT { - err = nil - } - return timeout, 0, err - } - - // We got notified, count how many files are ready. If none, - // then this was a spurious notification, and we just go back - // to sleep with the remaining timeout. - for i := range state { - if state[i].file == nil { - continue - } - - ready := state[i].file.Readiness(pfd[i].Events) & pfd[i].Events - if ready != 0 { - pfd[i].REvents = ready - n++ - } - } - } - - return timeout, n, nil -} -- cgit v1.2.3