diff options
author | Michael Pratt <mpratt@google.com> | 2019-04-17 12:13:46 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-04-17 12:15:01 -0700 |
commit | 08d99c5fbea76ecc92038280387d24ecdf7ed814 (patch) | |
tree | 76df71b51b5515098e8c61978c441e8c530526ff /pkg | |
parent | e091b4e7c07056e32120ab25cc9a78ed24f7c754 (diff) |
Convert poll/select to operate more directly on linux.PollFD
Current, doPoll copies the user struct pollfd array into a
[]syscalls.PollFD, which contains internal kdefs.FD and
waiter.EventMask types. While these are currently binary-compatible with
the Linux versions, we generally discourage copying directly to internal
types (someone may inadvertantly change kdefs.FD to uint64).
Instead, copy directly to a []linux.PollFD, which will certainly be
binary compatible. Most of syscalls/polling.go is included directly into
syscalls/linux/sys_poll.go, as it can then operate directly on
linux.PollFD. The additional syscalls.PollFD type is providing little
value.
I've also added explicit conversion functions for waiter.EventMask,
which creates the possibility of a different binary format.
PiperOrigin-RevId: 244042947
Change-Id: I24e5b642002a32b3afb95a9dcb80d4acd1288abf
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/fdnotifier/fdnotifier.go | 6 | ||||
-rw-r--r-- | pkg/fdnotifier/poll_unsafe.go | 6 | ||||
-rw-r--r-- | pkg/sentry/socket/rpcinet/notifier/notifier.go | 8 | ||||
-rw-r--r-- | pkg/sentry/syscalls/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_epoll.go | 2 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_poll.go | 133 | ||||
-rw-r--r-- | pkg/sentry/syscalls/polling.go | 137 | ||||
-rw-r--r-- | pkg/waiter/waiter.go | 26 |
8 files changed, 153 insertions, 167 deletions
diff --git a/pkg/fdnotifier/fdnotifier.go b/pkg/fdnotifier/fdnotifier.go index 624b1a0c5..aa4906ca0 100644 --- a/pkg/fdnotifier/fdnotifier.go +++ b/pkg/fdnotifier/fdnotifier.go @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +// +build linux + // Package fdnotifier contains an adapter that translates IO events (e.g., a // file became readable/writable) from native FDs to the notifications in the // waiter package. It uses epoll in edge-triggered mode to receive notifications @@ -70,7 +72,7 @@ func (n *notifier) waitFD(fd int32, fi *fdInfo, mask waiter.EventMask) error { } e := syscall.EpollEvent{ - Events: uint32(mask) | -syscall.EPOLLET, + Events: mask.ToLinux() | -syscall.EPOLLET, Fd: fd, } @@ -155,7 +157,7 @@ func (n *notifier) waitAndNotify() error { n.mu.Lock() for i := 0; i < v; i++ { if fi, ok := n.fdMap[e[i].Fd]; ok { - fi.queue.Notify(waiter.EventMask(e[i].Events)) + fi.queue.Notify(waiter.EventMaskFromLinux(e[i].Events)) } } n.mu.Unlock() diff --git a/pkg/fdnotifier/poll_unsafe.go b/pkg/fdnotifier/poll_unsafe.go index 8459d4c74..05be9aeb5 100644 --- a/pkg/fdnotifier/poll_unsafe.go +++ b/pkg/fdnotifier/poll_unsafe.go @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +// +build linux + package fdnotifier import ( @@ -30,7 +32,7 @@ func NonBlockingPoll(fd int32, mask waiter.EventMask) waiter.EventMask { revents int16 }{ fd: fd, - events: int16(mask), + events: int16(mask.ToLinux()), } for { @@ -51,7 +53,7 @@ func NonBlockingPoll(fd int32, mask waiter.EventMask) waiter.EventMask { } // Otherwise we got the ready events in the revents field. - return waiter.EventMask(e.revents) + return waiter.EventMaskFromLinux(uint32(e.revents)) } } diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier.go b/pkg/sentry/socket/rpcinet/notifier/notifier.go index 73c255c33..d9bda78b0 100644 --- a/pkg/sentry/socket/rpcinet/notifier/notifier.go +++ b/pkg/sentry/socket/rpcinet/notifier/notifier.go @@ -76,7 +76,7 @@ func (n *Notifier) waitFD(fd uint32, fi *fdInfo, mask waiter.EventMask) error { } e := pb.EpollEvent{ - Events: uint32(mask) | -syscall.EPOLLET, + Events: mask.ToLinux() | -syscall.EPOLLET, Fd: fd, } @@ -178,7 +178,7 @@ func (n *Notifier) waitAndNotify() error { n.mu.Lock() for _, e := range res.(*pb.EpollWaitResponse_Events).Events.Events { if fi, ok := n.fdMap[e.Fd]; ok { - fi.queue.Notify(waiter.EventMask(e.Events)) + fi.queue.Notify(waiter.EventMaskFromLinux(e.Events)) } } n.mu.Unlock() @@ -214,7 +214,7 @@ func (n *Notifier) HasFD(fd uint32) bool { // although the syscall is non-blocking. func (n *Notifier) NonBlockingPoll(fd uint32, mask waiter.EventMask) waiter.EventMask { for { - id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Poll{&pb.PollRequest{Fd: fd, Events: uint32(mask)}}}, false /* ignoreResult */) + id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Poll{&pb.PollRequest{Fd: fd, Events: mask.ToLinux()}}}, false /* ignoreResult */) <-c res := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_Poll).Poll.Result @@ -225,6 +225,6 @@ func (n *Notifier) NonBlockingPoll(fd uint32, mask waiter.EventMask) waiter.Even return mask } - return waiter.EventMask(res.(*pb.PollResponse_Events).Events) + return waiter.EventMaskFromLinux(res.(*pb.PollResponse_Events).Events) } } diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD index 6b5469e45..877318fa9 100644 --- a/pkg/sentry/syscalls/BUILD +++ b/pkg/sentry/syscalls/BUILD @@ -6,7 +6,6 @@ go_library( name = "syscalls", srcs = [ "epoll.go", - "polling.go", "syscalls.go", ], importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls", @@ -14,7 +13,6 @@ go_library( deps = [ "//pkg/abi/linux", "//pkg/sentry/arch", - "//pkg/sentry/fs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/kdefs", diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go index 62272efcd..200c46355 100644 --- a/pkg/sentry/syscalls/linux/sys_epoll.go +++ b/pkg/sentry/syscalls/linux/sys_epoll.go @@ -87,7 +87,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc flags |= epoll.EdgeTriggered } - mask = waiter.EventMask(e.Events) + mask = waiter.EventMaskFromLinux(e.Events) data[0] = e.Fd data[1] = e.Pad } diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go index 0cf6aad7f..23fcb907f 100644 --- a/pkg/sentry/syscalls/linux/sys_poll.go +++ b/pkg/sentry/syscalls/linux/sys_poll.go @@ -19,11 +19,11 @@ import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time" "gvisor.googlesource.com/gvisor/pkg/sentry/limits" - "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" "gvisor.googlesource.com/gvisor/pkg/syserror" "gvisor.googlesource.com/gvisor/pkg/waiter" @@ -37,23 +37,130 @@ const fileCap = 1024 * 1024 const ( // selectReadEvents is analogous to the Linux kernel's // fs/select.c:POLLIN_SET. - selectReadEvents = waiter.EventIn | waiter.EventHUp | waiter.EventErr + selectReadEvents = linux.POLLIN | linux.POLLHUP | linux.POLLERR // selectWriteEvents is analogous to the Linux kernel's // fs/select.c:POLLOUT_SET. - selectWriteEvents = waiter.EventOut | waiter.EventErr + selectWriteEvents = linux.POLLOUT | linux.POLLERR // selectExceptEvents is analogous to the Linux kernel's // fs/select.c:POLLEX_SET. - selectExceptEvents = waiter.EventPri + selectExceptEvents = linux.POLLPRI ) +// pollState tracks the associated file descriptor and waiter of a PollFD. +type pollState struct { + file *fs.File + waiter waiter.Entry +} + +// initReadiness gets the current ready mask for the file represented by the FD +// stored in pfd.FD. If a channel is passed in, the waiter entry in "state" is +// used to register with the file for event notifications, and a reference to +// the file is stored in "state". +func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan struct{}) { + if pfd.FD < 0 { + pfd.REvents = 0 + return + } + + file := t.FDMap().GetFile(kdefs.FD(pfd.FD)) + if file == nil { + pfd.REvents = linux.POLLNVAL + return + } + + if ch == nil { + defer file.DecRef() + } else { + state.file = file + state.waiter, _ = waiter.NewChannelEntry(ch) + file.EventRegister(&state.waiter, waiter.EventMaskFromLinux(uint32(pfd.Events))) + } + + r := file.Readiness(waiter.EventMaskFromLinux(uint32(pfd.Events))) + pfd.REvents = int16(r.ToLinux()) & pfd.Events +} + +// releaseState releases all the pollState in "state". +func releaseState(state []pollState) { + for i := range state { + if state[i].file != nil { + state[i].file.EventUnregister(&state[i].waiter) + state[i].file.DecRef() + } + } +} + +// pollBlock polls the PollFDs in "pfd" with a bounded time specified in "timeout" +// when "timeout" is greater than zero. +// +// pollBlock returns the remaining timeout, which is always 0 on a timeout; and 0 or +// positive if interrupted by a signal. +func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time.Duration, uintptr, error) { + var ch chan struct{} + if timeout != 0 { + ch = make(chan struct{}, 1) + } + + // Register for event notification in the files involved if we may + // block (timeout not zero). Once we find a file that has a non-zero + // result, we stop registering for events but still go through all files + // to get their ready masks. + state := make([]pollState, len(pfd)) + defer releaseState(state) + n := uintptr(0) + for i := range pfd { + initReadiness(t, &pfd[i], &state[i], ch) + if pfd[i].REvents != 0 { + n++ + ch = nil + } + } + + if timeout == 0 { + return timeout, n, nil + } + + forever := timeout < 0 + + for n == 0 { + var err error + // Wait for a notification. + timeout, err = t.BlockWithTimeout(ch, !forever, timeout) + if err != nil { + if err == syserror.ETIMEDOUT { + err = nil + } + return timeout, 0, err + } + + // We got notified, count how many files are ready. If none, + // then this was a spurious notification, and we just go back + // to sleep with the remaining timeout. + for i := range state { + if state[i].file == nil { + continue + } + + r := state[i].file.Readiness(waiter.EventMaskFromLinux(uint32(pfd[i].Events))) + rl := int16(r.ToLinux()) & pfd[i].Events + if rl != 0 { + pfd[i].REvents = rl + n++ + } + } + } + + return timeout, n, nil +} + func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) { if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) { return timeout, 0, syserror.EINVAL } - pfd := make([]syscalls.PollFD, nfds) + pfd := make([]linux.PollFD, nfds) if nfds > 0 { if _, err := t.CopyIn(pfdAddr, &pfd); err != nil { return timeout, 0, err @@ -65,9 +172,9 @@ func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Durati // polling, changing event masks here is an application-visible difference. // (Linux also doesn't copy out event masks at all, only revents.) for i := range pfd { - pfd[i].Events |= waiter.EventHUp | waiter.EventErr + pfd[i].Events |= linux.POLLHUP | linux.POLLERR } - remainingTimeout, n, err := syscalls.Poll(t, pfd, timeout) + remainingTimeout, n, err := pollBlock(t, pfd, timeout) err = syserror.ConvertIntr(err, syserror.EINTR) // The poll entries are copied out regardless of whether @@ -136,8 +243,8 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add } // Build the PollFD array. - pfd := make([]syscalls.PollFD, 0, fdCount) - fd := kdefs.FD(0) + pfd := make([]linux.PollFD, 0, fdCount) + var fd int32 for i := 0; i < byteCount; i++ { rV, wV, eV := r[i], w[i], e[i] v := rV | wV | eV @@ -148,13 +255,13 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add // immediately to ensure we don't leak. Note, another thread // might be about to close fd. This is racy, but that's // OK. Linux is racy in the same way. - file := t.FDMap().GetFile(fd) + file := t.FDMap().GetFile(kdefs.FD(fd)) if file == nil { return 0, syserror.EBADF } file.DecRef() - mask := waiter.EventMask(0) + var mask int16 if (rV & m) != 0 { mask |= selectReadEvents } @@ -167,7 +274,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add mask |= selectExceptEvents } - pfd = append(pfd, syscalls.PollFD{ + pfd = append(pfd, linux.PollFD{ FD: fd, Events: mask, }) @@ -179,7 +286,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add } // Do the syscall, then count the number of bits set. - _, _, err := syscalls.Poll(t, pfd, timeout) + _, _, err := pollBlock(t, pfd, timeout) if err != nil { return 0, syserror.ConvertIntr(err, syserror.EINTR) } diff --git a/pkg/sentry/syscalls/polling.go b/pkg/sentry/syscalls/polling.go deleted file mode 100644 index 2b33d6c19..000000000 --- a/pkg/sentry/syscalls/polling.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2018 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package syscalls - -import ( - "syscall" - "time" - - "gvisor.googlesource.com/gvisor/pkg/sentry/fs" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.googlesource.com/gvisor/pkg/waiter" -) - -// PollFD describes a pollable FD. -type PollFD struct { - FD kdefs.FD - Events waiter.EventMask - REvents waiter.EventMask -} - -// pollState tracks the associated file descriptor and waiter of a PollFD. -type pollState struct { - file *fs.File - waiter waiter.Entry -} - -// initReadiness gets the current ready mask for the file represented by the FD -// stored in pfd.FD. If a channel is passed in, the waiter entry in "state" is -// used to register with the file for event notifications, and a reference to -// the file is stored in "state". -func (pfd *PollFD) initReadiness(t *kernel.Task, state *pollState, ch chan struct{}) { - if pfd.FD < 0 { - pfd.REvents = 0 - return - } - - file := t.FDMap().GetFile(pfd.FD) - if file == nil { - pfd.REvents = waiter.EventNVal - return - } - - if ch == nil { - defer file.DecRef() - } else { - state.file = file - state.waiter, _ = waiter.NewChannelEntry(ch) - file.EventRegister(&state.waiter, pfd.Events) - } - - pfd.REvents = file.Readiness(pfd.Events) & pfd.Events -} - -// releaseState releases all the pollState in "state". -func releaseState(state []pollState) { - for i := range state { - if state[i].file != nil { - state[i].file.EventUnregister(&state[i].waiter) - state[i].file.DecRef() - } - } -} - -// Poll polls the PollFDs in "pfd" with a bounded time specified in "timeout" -// when "timeout" is greater than zero. -// -// Poll returns the remaining timeout, which is always 0 on a timeout; and 0 or -// positive if interrupted by a signal. -func Poll(t *kernel.Task, pfd []PollFD, timeout time.Duration) (time.Duration, uintptr, error) { - var ch chan struct{} - if timeout != 0 { - ch = make(chan struct{}, 1) - } - - // Register for event notification in the files involved if we may - // block (timeout not zero). Once we find a file that has a non-zero - // result, we stop registering for events but still go through all files - // to get their ready masks. - state := make([]pollState, len(pfd)) - defer releaseState(state) - n := uintptr(0) - for i := range pfd { - pfd[i].initReadiness(t, &state[i], ch) - if pfd[i].REvents != 0 { - n++ - ch = nil - } - } - - if timeout == 0 { - return timeout, n, nil - } - - forever := timeout < 0 - - for n == 0 { - var err error - // Wait for a notification. - timeout, err = t.BlockWithTimeout(ch, !forever, timeout) - if err != nil { - if err == syscall.ETIMEDOUT { - err = nil - } - return timeout, 0, err - } - - // We got notified, count how many files are ready. If none, - // then this was a spurious notification, and we just go back - // to sleep with the remaining timeout. - for i := range state { - if state[i].file == nil { - continue - } - - ready := state[i].file.Readiness(pfd[i].Events) & pfd[i].Events - if ready != 0 { - pfd[i].REvents = ready - n++ - } - } - } - - return timeout, n, nil -} diff --git a/pkg/waiter/waiter.go b/pkg/waiter/waiter.go index fd429f733..a6c9dff3c 100644 --- a/pkg/waiter/waiter.go +++ b/pkg/waiter/waiter.go @@ -67,14 +67,28 @@ type EventMask uint16 // Events that waiters can wait on. The meaning is the same as those in the // poll() syscall. const ( - EventIn EventMask = 0x01 // syscall.EPOLLIN - EventPri EventMask = 0x02 // syscall.EPOLLPRI - EventOut EventMask = 0x04 // syscall.EPOLLOUT - EventErr EventMask = 0x08 // syscall.EPOLLERR - EventHUp EventMask = 0x10 // syscall.EPOLLHUP - EventNVal EventMask = 0x20 // Not defined in syscall. + EventIn EventMask = 0x01 // POLLIN + EventPri EventMask = 0x02 // POLLPRI + EventOut EventMask = 0x04 // POLLOUT + EventErr EventMask = 0x08 // POLLERR + EventHUp EventMask = 0x10 // POLLHUP + + allEvents EventMask = 0x1f ) +// EventMaskFromLinux returns an EventMask representing the supported events +// from the Linux events e, which is in the format used by poll(2). +func EventMaskFromLinux(e uint32) EventMask { + // Our flag definitions are currently identical to Linux. + return EventMask(e) & allEvents +} + +// ToLinux returns e in the format used by Linux poll(2). +func (e EventMask) ToLinux() uint32 { + // Our flag definitions are currently identical to Linux. + return uint32(e) +} + // Waitable contains the methods that need to be implemented by waitable // objects. type Waitable interface { |