summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2019-04-17 12:13:46 -0700
committerShentubot <shentubot@google.com>2019-04-17 12:15:01 -0700
commit08d99c5fbea76ecc92038280387d24ecdf7ed814 (patch)
tree76df71b51b5515098e8c61978c441e8c530526ff
parente091b4e7c07056e32120ab25cc9a78ed24f7c754 (diff)
Convert poll/select to operate more directly on linux.PollFD
Current, doPoll copies the user struct pollfd array into a []syscalls.PollFD, which contains internal kdefs.FD and waiter.EventMask types. While these are currently binary-compatible with the Linux versions, we generally discourage copying directly to internal types (someone may inadvertantly change kdefs.FD to uint64). Instead, copy directly to a []linux.PollFD, which will certainly be binary compatible. Most of syscalls/polling.go is included directly into syscalls/linux/sys_poll.go, as it can then operate directly on linux.PollFD. The additional syscalls.PollFD type is providing little value. I've also added explicit conversion functions for waiter.EventMask, which creates the possibility of a different binary format. PiperOrigin-RevId: 244042947 Change-Id: I24e5b642002a32b3afb95a9dcb80d4acd1288abf
-rw-r--r--pkg/fdnotifier/fdnotifier.go6
-rw-r--r--pkg/fdnotifier/poll_unsafe.go6
-rw-r--r--pkg/sentry/socket/rpcinet/notifier/notifier.go8
-rw-r--r--pkg/sentry/syscalls/BUILD2
-rw-r--r--pkg/sentry/syscalls/linux/sys_epoll.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_poll.go133
-rw-r--r--pkg/sentry/syscalls/polling.go137
-rw-r--r--pkg/waiter/waiter.go26
8 files changed, 153 insertions, 167 deletions
diff --git a/pkg/fdnotifier/fdnotifier.go b/pkg/fdnotifier/fdnotifier.go
index 624b1a0c5..aa4906ca0 100644
--- a/pkg/fdnotifier/fdnotifier.go
+++ b/pkg/fdnotifier/fdnotifier.go
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// +build linux
+
// Package fdnotifier contains an adapter that translates IO events (e.g., a
// file became readable/writable) from native FDs to the notifications in the
// waiter package. It uses epoll in edge-triggered mode to receive notifications
@@ -70,7 +72,7 @@ func (n *notifier) waitFD(fd int32, fi *fdInfo, mask waiter.EventMask) error {
}
e := syscall.EpollEvent{
- Events: uint32(mask) | -syscall.EPOLLET,
+ Events: mask.ToLinux() | -syscall.EPOLLET,
Fd: fd,
}
@@ -155,7 +157,7 @@ func (n *notifier) waitAndNotify() error {
n.mu.Lock()
for i := 0; i < v; i++ {
if fi, ok := n.fdMap[e[i].Fd]; ok {
- fi.queue.Notify(waiter.EventMask(e[i].Events))
+ fi.queue.Notify(waiter.EventMaskFromLinux(e[i].Events))
}
}
n.mu.Unlock()
diff --git a/pkg/fdnotifier/poll_unsafe.go b/pkg/fdnotifier/poll_unsafe.go
index 8459d4c74..05be9aeb5 100644
--- a/pkg/fdnotifier/poll_unsafe.go
+++ b/pkg/fdnotifier/poll_unsafe.go
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// +build linux
+
package fdnotifier
import (
@@ -30,7 +32,7 @@ func NonBlockingPoll(fd int32, mask waiter.EventMask) waiter.EventMask {
revents int16
}{
fd: fd,
- events: int16(mask),
+ events: int16(mask.ToLinux()),
}
for {
@@ -51,7 +53,7 @@ func NonBlockingPoll(fd int32, mask waiter.EventMask) waiter.EventMask {
}
// Otherwise we got the ready events in the revents field.
- return waiter.EventMask(e.revents)
+ return waiter.EventMaskFromLinux(uint32(e.revents))
}
}
diff --git a/pkg/sentry/socket/rpcinet/notifier/notifier.go b/pkg/sentry/socket/rpcinet/notifier/notifier.go
index 73c255c33..d9bda78b0 100644
--- a/pkg/sentry/socket/rpcinet/notifier/notifier.go
+++ b/pkg/sentry/socket/rpcinet/notifier/notifier.go
@@ -76,7 +76,7 @@ func (n *Notifier) waitFD(fd uint32, fi *fdInfo, mask waiter.EventMask) error {
}
e := pb.EpollEvent{
- Events: uint32(mask) | -syscall.EPOLLET,
+ Events: mask.ToLinux() | -syscall.EPOLLET,
Fd: fd,
}
@@ -178,7 +178,7 @@ func (n *Notifier) waitAndNotify() error {
n.mu.Lock()
for _, e := range res.(*pb.EpollWaitResponse_Events).Events.Events {
if fi, ok := n.fdMap[e.Fd]; ok {
- fi.queue.Notify(waiter.EventMask(e.Events))
+ fi.queue.Notify(waiter.EventMaskFromLinux(e.Events))
}
}
n.mu.Unlock()
@@ -214,7 +214,7 @@ func (n *Notifier) HasFD(fd uint32) bool {
// although the syscall is non-blocking.
func (n *Notifier) NonBlockingPoll(fd uint32, mask waiter.EventMask) waiter.EventMask {
for {
- id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Poll{&pb.PollRequest{Fd: fd, Events: uint32(mask)}}}, false /* ignoreResult */)
+ id, c := n.rpcConn.NewRequest(pb.SyscallRequest{Args: &pb.SyscallRequest_Poll{&pb.PollRequest{Fd: fd, Events: mask.ToLinux()}}}, false /* ignoreResult */)
<-c
res := n.rpcConn.Request(id).Result.(*pb.SyscallResponse_Poll).Poll.Result
@@ -225,6 +225,6 @@ func (n *Notifier) NonBlockingPoll(fd uint32, mask waiter.EventMask) waiter.Even
return mask
}
- return waiter.EventMask(res.(*pb.PollResponse_Events).Events)
+ return waiter.EventMaskFromLinux(res.(*pb.PollResponse_Events).Events)
}
}
diff --git a/pkg/sentry/syscalls/BUILD b/pkg/sentry/syscalls/BUILD
index 6b5469e45..877318fa9 100644
--- a/pkg/sentry/syscalls/BUILD
+++ b/pkg/sentry/syscalls/BUILD
@@ -6,7 +6,6 @@ go_library(
name = "syscalls",
srcs = [
"epoll.go",
- "polling.go",
"syscalls.go",
],
importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls",
@@ -14,7 +13,6 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/sentry/arch",
- "//pkg/sentry/fs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/epoll",
"//pkg/sentry/kernel/kdefs",
diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go
index 62272efcd..200c46355 100644
--- a/pkg/sentry/syscalls/linux/sys_epoll.go
+++ b/pkg/sentry/syscalls/linux/sys_epoll.go
@@ -87,7 +87,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
flags |= epoll.EdgeTriggered
}
- mask = waiter.EventMask(e.Events)
+ mask = waiter.EventMaskFromLinux(e.Events)
data[0] = e.Fd
data[1] = e.Pad
}
diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go
index 0cf6aad7f..23fcb907f 100644
--- a/pkg/sentry/syscalls/linux/sys_poll.go
+++ b/pkg/sentry/syscalls/linux/sys_poll.go
@@ -19,11 +19,11 @@ import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
- "gvisor.googlesource.com/gvisor/pkg/sentry/syscalls"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
"gvisor.googlesource.com/gvisor/pkg/waiter"
@@ -37,23 +37,130 @@ const fileCap = 1024 * 1024
const (
// selectReadEvents is analogous to the Linux kernel's
// fs/select.c:POLLIN_SET.
- selectReadEvents = waiter.EventIn | waiter.EventHUp | waiter.EventErr
+ selectReadEvents = linux.POLLIN | linux.POLLHUP | linux.POLLERR
// selectWriteEvents is analogous to the Linux kernel's
// fs/select.c:POLLOUT_SET.
- selectWriteEvents = waiter.EventOut | waiter.EventErr
+ selectWriteEvents = linux.POLLOUT | linux.POLLERR
// selectExceptEvents is analogous to the Linux kernel's
// fs/select.c:POLLEX_SET.
- selectExceptEvents = waiter.EventPri
+ selectExceptEvents = linux.POLLPRI
)
+// pollState tracks the associated file descriptor and waiter of a PollFD.
+type pollState struct {
+ file *fs.File
+ waiter waiter.Entry
+}
+
+// initReadiness gets the current ready mask for the file represented by the FD
+// stored in pfd.FD. If a channel is passed in, the waiter entry in "state" is
+// used to register with the file for event notifications, and a reference to
+// the file is stored in "state".
+func initReadiness(t *kernel.Task, pfd *linux.PollFD, state *pollState, ch chan struct{}) {
+ if pfd.FD < 0 {
+ pfd.REvents = 0
+ return
+ }
+
+ file := t.FDMap().GetFile(kdefs.FD(pfd.FD))
+ if file == nil {
+ pfd.REvents = linux.POLLNVAL
+ return
+ }
+
+ if ch == nil {
+ defer file.DecRef()
+ } else {
+ state.file = file
+ state.waiter, _ = waiter.NewChannelEntry(ch)
+ file.EventRegister(&state.waiter, waiter.EventMaskFromLinux(uint32(pfd.Events)))
+ }
+
+ r := file.Readiness(waiter.EventMaskFromLinux(uint32(pfd.Events)))
+ pfd.REvents = int16(r.ToLinux()) & pfd.Events
+}
+
+// releaseState releases all the pollState in "state".
+func releaseState(state []pollState) {
+ for i := range state {
+ if state[i].file != nil {
+ state[i].file.EventUnregister(&state[i].waiter)
+ state[i].file.DecRef()
+ }
+ }
+}
+
+// pollBlock polls the PollFDs in "pfd" with a bounded time specified in "timeout"
+// when "timeout" is greater than zero.
+//
+// pollBlock returns the remaining timeout, which is always 0 on a timeout; and 0 or
+// positive if interrupted by a signal.
+func pollBlock(t *kernel.Task, pfd []linux.PollFD, timeout time.Duration) (time.Duration, uintptr, error) {
+ var ch chan struct{}
+ if timeout != 0 {
+ ch = make(chan struct{}, 1)
+ }
+
+ // Register for event notification in the files involved if we may
+ // block (timeout not zero). Once we find a file that has a non-zero
+ // result, we stop registering for events but still go through all files
+ // to get their ready masks.
+ state := make([]pollState, len(pfd))
+ defer releaseState(state)
+ n := uintptr(0)
+ for i := range pfd {
+ initReadiness(t, &pfd[i], &state[i], ch)
+ if pfd[i].REvents != 0 {
+ n++
+ ch = nil
+ }
+ }
+
+ if timeout == 0 {
+ return timeout, n, nil
+ }
+
+ forever := timeout < 0
+
+ for n == 0 {
+ var err error
+ // Wait for a notification.
+ timeout, err = t.BlockWithTimeout(ch, !forever, timeout)
+ if err != nil {
+ if err == syserror.ETIMEDOUT {
+ err = nil
+ }
+ return timeout, 0, err
+ }
+
+ // We got notified, count how many files are ready. If none,
+ // then this was a spurious notification, and we just go back
+ // to sleep with the remaining timeout.
+ for i := range state {
+ if state[i].file == nil {
+ continue
+ }
+
+ r := state[i].file.Readiness(waiter.EventMaskFromLinux(uint32(pfd[i].Events)))
+ rl := int16(r.ToLinux()) & pfd[i].Events
+ if rl != 0 {
+ pfd[i].REvents = rl
+ n++
+ }
+ }
+ }
+
+ return timeout, n, nil
+}
+
func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Duration) (time.Duration, uintptr, error) {
if uint64(nfds) > t.ThreadGroup().Limits().GetCapped(limits.NumberOfFiles, fileCap) {
return timeout, 0, syserror.EINVAL
}
- pfd := make([]syscalls.PollFD, nfds)
+ pfd := make([]linux.PollFD, nfds)
if nfds > 0 {
if _, err := t.CopyIn(pfdAddr, &pfd); err != nil {
return timeout, 0, err
@@ -65,9 +172,9 @@ func doPoll(t *kernel.Task, pfdAddr usermem.Addr, nfds uint, timeout time.Durati
// polling, changing event masks here is an application-visible difference.
// (Linux also doesn't copy out event masks at all, only revents.)
for i := range pfd {
- pfd[i].Events |= waiter.EventHUp | waiter.EventErr
+ pfd[i].Events |= linux.POLLHUP | linux.POLLERR
}
- remainingTimeout, n, err := syscalls.Poll(t, pfd, timeout)
+ remainingTimeout, n, err := pollBlock(t, pfd, timeout)
err = syserror.ConvertIntr(err, syserror.EINTR)
// The poll entries are copied out regardless of whether
@@ -136,8 +243,8 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
}
// Build the PollFD array.
- pfd := make([]syscalls.PollFD, 0, fdCount)
- fd := kdefs.FD(0)
+ pfd := make([]linux.PollFD, 0, fdCount)
+ var fd int32
for i := 0; i < byteCount; i++ {
rV, wV, eV := r[i], w[i], e[i]
v := rV | wV | eV
@@ -148,13 +255,13 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
// immediately to ensure we don't leak. Note, another thread
// might be about to close fd. This is racy, but that's
// OK. Linux is racy in the same way.
- file := t.FDMap().GetFile(fd)
+ file := t.FDMap().GetFile(kdefs.FD(fd))
if file == nil {
return 0, syserror.EBADF
}
file.DecRef()
- mask := waiter.EventMask(0)
+ var mask int16
if (rV & m) != 0 {
mask |= selectReadEvents
}
@@ -167,7 +274,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
mask |= selectExceptEvents
}
- pfd = append(pfd, syscalls.PollFD{
+ pfd = append(pfd, linux.PollFD{
FD: fd,
Events: mask,
})
@@ -179,7 +286,7 @@ func doSelect(t *kernel.Task, nfds int, readFDs, writeFDs, exceptFDs usermem.Add
}
// Do the syscall, then count the number of bits set.
- _, _, err := syscalls.Poll(t, pfd, timeout)
+ _, _, err := pollBlock(t, pfd, timeout)
if err != nil {
return 0, syserror.ConvertIntr(err, syserror.EINTR)
}
diff --git a/pkg/sentry/syscalls/polling.go b/pkg/sentry/syscalls/polling.go
deleted file mode 100644
index 2b33d6c19..000000000
--- a/pkg/sentry/syscalls/polling.go
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright 2018 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package syscalls
-
-import (
- "syscall"
- "time"
-
- "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
- "gvisor.googlesource.com/gvisor/pkg/waiter"
-)
-
-// PollFD describes a pollable FD.
-type PollFD struct {
- FD kdefs.FD
- Events waiter.EventMask
- REvents waiter.EventMask
-}
-
-// pollState tracks the associated file descriptor and waiter of a PollFD.
-type pollState struct {
- file *fs.File
- waiter waiter.Entry
-}
-
-// initReadiness gets the current ready mask for the file represented by the FD
-// stored in pfd.FD. If a channel is passed in, the waiter entry in "state" is
-// used to register with the file for event notifications, and a reference to
-// the file is stored in "state".
-func (pfd *PollFD) initReadiness(t *kernel.Task, state *pollState, ch chan struct{}) {
- if pfd.FD < 0 {
- pfd.REvents = 0
- return
- }
-
- file := t.FDMap().GetFile(pfd.FD)
- if file == nil {
- pfd.REvents = waiter.EventNVal
- return
- }
-
- if ch == nil {
- defer file.DecRef()
- } else {
- state.file = file
- state.waiter, _ = waiter.NewChannelEntry(ch)
- file.EventRegister(&state.waiter, pfd.Events)
- }
-
- pfd.REvents = file.Readiness(pfd.Events) & pfd.Events
-}
-
-// releaseState releases all the pollState in "state".
-func releaseState(state []pollState) {
- for i := range state {
- if state[i].file != nil {
- state[i].file.EventUnregister(&state[i].waiter)
- state[i].file.DecRef()
- }
- }
-}
-
-// Poll polls the PollFDs in "pfd" with a bounded time specified in "timeout"
-// when "timeout" is greater than zero.
-//
-// Poll returns the remaining timeout, which is always 0 on a timeout; and 0 or
-// positive if interrupted by a signal.
-func Poll(t *kernel.Task, pfd []PollFD, timeout time.Duration) (time.Duration, uintptr, error) {
- var ch chan struct{}
- if timeout != 0 {
- ch = make(chan struct{}, 1)
- }
-
- // Register for event notification in the files involved if we may
- // block (timeout not zero). Once we find a file that has a non-zero
- // result, we stop registering for events but still go through all files
- // to get their ready masks.
- state := make([]pollState, len(pfd))
- defer releaseState(state)
- n := uintptr(0)
- for i := range pfd {
- pfd[i].initReadiness(t, &state[i], ch)
- if pfd[i].REvents != 0 {
- n++
- ch = nil
- }
- }
-
- if timeout == 0 {
- return timeout, n, nil
- }
-
- forever := timeout < 0
-
- for n == 0 {
- var err error
- // Wait for a notification.
- timeout, err = t.BlockWithTimeout(ch, !forever, timeout)
- if err != nil {
- if err == syscall.ETIMEDOUT {
- err = nil
- }
- return timeout, 0, err
- }
-
- // We got notified, count how many files are ready. If none,
- // then this was a spurious notification, and we just go back
- // to sleep with the remaining timeout.
- for i := range state {
- if state[i].file == nil {
- continue
- }
-
- ready := state[i].file.Readiness(pfd[i].Events) & pfd[i].Events
- if ready != 0 {
- pfd[i].REvents = ready
- n++
- }
- }
- }
-
- return timeout, n, nil
-}
diff --git a/pkg/waiter/waiter.go b/pkg/waiter/waiter.go
index fd429f733..a6c9dff3c 100644
--- a/pkg/waiter/waiter.go
+++ b/pkg/waiter/waiter.go
@@ -67,14 +67,28 @@ type EventMask uint16
// Events that waiters can wait on. The meaning is the same as those in the
// poll() syscall.
const (
- EventIn EventMask = 0x01 // syscall.EPOLLIN
- EventPri EventMask = 0x02 // syscall.EPOLLPRI
- EventOut EventMask = 0x04 // syscall.EPOLLOUT
- EventErr EventMask = 0x08 // syscall.EPOLLERR
- EventHUp EventMask = 0x10 // syscall.EPOLLHUP
- EventNVal EventMask = 0x20 // Not defined in syscall.
+ EventIn EventMask = 0x01 // POLLIN
+ EventPri EventMask = 0x02 // POLLPRI
+ EventOut EventMask = 0x04 // POLLOUT
+ EventErr EventMask = 0x08 // POLLERR
+ EventHUp EventMask = 0x10 // POLLHUP
+
+ allEvents EventMask = 0x1f
)
+// EventMaskFromLinux returns an EventMask representing the supported events
+// from the Linux events e, which is in the format used by poll(2).
+func EventMaskFromLinux(e uint32) EventMask {
+ // Our flag definitions are currently identical to Linux.
+ return EventMask(e) & allEvents
+}
+
+// ToLinux returns e in the format used by Linux poll(2).
+func (e EventMask) ToLinux() uint32 {
+ // Our flag definitions are currently identical to Linux.
+ return uint32(e)
+}
+
// Waitable contains the methods that need to be implemented by waitable
// objects.
type Waitable interface {