summaryrefslogtreecommitdiffhomepage
path: root/pkg/fdnotifier
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/fdnotifier')
-rw-r--r--pkg/fdnotifier/BUILD14
-rw-r--r--pkg/fdnotifier/fdnotifier.go200
-rw-r--r--pkg/fdnotifier/poll_unsafe.go74
3 files changed, 288 insertions, 0 deletions
diff --git a/pkg/fdnotifier/BUILD b/pkg/fdnotifier/BUILD
new file mode 100644
index 000000000..27d378d5b
--- /dev/null
+++ b/pkg/fdnotifier/BUILD
@@ -0,0 +1,14 @@
+load("//tools/go_stateify:defs.bzl", "go_library")
+
+package(licenses = ["notice"]) # Apache 2.0
+
+go_library(
+ name = "fdnotifier",
+ srcs = [
+ "fdnotifier.go",
+ "poll_unsafe.go",
+ ],
+ importpath = "gvisor.googlesource.com/gvisor/pkg/fdnotifier",
+ visibility = ["//:sandbox"],
+ deps = ["//pkg/waiter"],
+)
diff --git a/pkg/fdnotifier/fdnotifier.go b/pkg/fdnotifier/fdnotifier.go
new file mode 100644
index 000000000..624b1a0c5
--- /dev/null
+++ b/pkg/fdnotifier/fdnotifier.go
@@ -0,0 +1,200 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package fdnotifier contains an adapter that translates IO events (e.g., a
+// file became readable/writable) from native FDs to the notifications in the
+// waiter package. It uses epoll in edge-triggered mode to receive notifications
+// for registered FDs.
+package fdnotifier
+
+import (
+ "fmt"
+ "sync"
+ "syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+type fdInfo struct {
+ queue *waiter.Queue
+ waiting bool
+}
+
+// notifier holds all the state necessary to issue notifications when IO events
+// occur in the observed FDs.
+type notifier struct {
+ // epFD is the epoll file descriptor used to register for io
+ // notifications.
+ epFD int
+
+ // mu protects fdMap.
+ mu sync.Mutex
+
+ // fdMap maps file descriptors to their notification queues and waiting
+ // status.
+ fdMap map[int32]*fdInfo
+}
+
+// newNotifier creates a new notifier object.
+func newNotifier() (*notifier, error) {
+ epfd, err := syscall.EpollCreate1(0)
+ if err != nil {
+ return nil, err
+ }
+
+ w := &notifier{
+ epFD: epfd,
+ fdMap: make(map[int32]*fdInfo),
+ }
+
+ go w.waitAndNotify() // S/R-SAFE: no waiter exists during save / load.
+
+ return w, nil
+}
+
+// waitFD waits on mask for fd. The fdMap mutex must be hold.
+func (n *notifier) waitFD(fd int32, fi *fdInfo, mask waiter.EventMask) error {
+ if !fi.waiting && mask == 0 {
+ return nil
+ }
+
+ e := syscall.EpollEvent{
+ Events: uint32(mask) | -syscall.EPOLLET,
+ Fd: fd,
+ }
+
+ switch {
+ case !fi.waiting && mask != 0:
+ if err := syscall.EpollCtl(n.epFD, syscall.EPOLL_CTL_ADD, int(fd), &e); err != nil {
+ return err
+ }
+ fi.waiting = true
+ case fi.waiting && mask == 0:
+ syscall.EpollCtl(n.epFD, syscall.EPOLL_CTL_DEL, int(fd), nil)
+ fi.waiting = false
+ case fi.waiting && mask != 0:
+ if err := syscall.EpollCtl(n.epFD, syscall.EPOLL_CTL_MOD, int(fd), &e); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// addFD adds an FD to the list of FDs observed by n.
+func (n *notifier) addFD(fd int32, queue *waiter.Queue) {
+ n.mu.Lock()
+ defer n.mu.Unlock()
+
+ // Panic if we're already notifying on this FD.
+ if _, ok := n.fdMap[fd]; ok {
+ panic(fmt.Sprintf("File descriptor %v added twice", fd))
+ }
+
+ // We have nothing to wait for at the moment. Just add it to the map.
+ n.fdMap[fd] = &fdInfo{queue: queue}
+}
+
+// updateFD updates the set of events the fd needs to be notified on.
+func (n *notifier) updateFD(fd int32) error {
+ n.mu.Lock()
+ defer n.mu.Unlock()
+
+ if fi, ok := n.fdMap[fd]; ok {
+ return n.waitFD(fd, fi, fi.queue.Events())
+ }
+
+ return nil
+}
+
+// RemoveFD removes an FD from the list of FDs observed by n.
+func (n *notifier) removeFD(fd int32) {
+ n.mu.Lock()
+ defer n.mu.Unlock()
+
+ // Remove from map, then from epoll object.
+ n.waitFD(fd, n.fdMap[fd], 0)
+ delete(n.fdMap, fd)
+}
+
+// hasFD returns true if the fd is in the list of observed FDs.
+func (n *notifier) hasFD(fd int32) bool {
+ n.mu.Lock()
+ defer n.mu.Unlock()
+
+ _, ok := n.fdMap[fd]
+ return ok
+}
+
+// waitAndNotify run is its own goroutine and loops waiting for io event
+// notifications from the epoll object. Once notifications arrive, they are
+// dispatched to the registered queue.
+func (n *notifier) waitAndNotify() error {
+ e := make([]syscall.EpollEvent, 100)
+ for {
+ v, err := epollWait(n.epFD, e, -1)
+ if err == syscall.EINTR {
+ continue
+ }
+
+ if err != nil {
+ return err
+ }
+
+ n.mu.Lock()
+ for i := 0; i < v; i++ {
+ if fi, ok := n.fdMap[e[i].Fd]; ok {
+ fi.queue.Notify(waiter.EventMask(e[i].Events))
+ }
+ }
+ n.mu.Unlock()
+ }
+}
+
+var shared struct {
+ notifier *notifier
+ once sync.Once
+ initErr error
+}
+
+// AddFD adds an FD to the list of observed FDs.
+func AddFD(fd int32, queue *waiter.Queue) error {
+ shared.once.Do(func() {
+ shared.notifier, shared.initErr = newNotifier()
+ })
+
+ if shared.initErr != nil {
+ return shared.initErr
+ }
+
+ shared.notifier.addFD(fd, queue)
+ return nil
+}
+
+// UpdateFD updates the set of events the fd needs to be notified on.
+func UpdateFD(fd int32) error {
+ return shared.notifier.updateFD(fd)
+}
+
+// RemoveFD removes an FD from the list of observed FDs.
+func RemoveFD(fd int32) {
+ shared.notifier.removeFD(fd)
+}
+
+// HasFD returns true if the FD is in the list of observed FDs.
+//
+// This should only be used by tests to assert that FDs are correctly registered.
+func HasFD(fd int32) bool {
+ return shared.notifier.hasFD(fd)
+}
diff --git a/pkg/fdnotifier/poll_unsafe.go b/pkg/fdnotifier/poll_unsafe.go
new file mode 100644
index 000000000..8459d4c74
--- /dev/null
+++ b/pkg/fdnotifier/poll_unsafe.go
@@ -0,0 +1,74 @@
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fdnotifier
+
+import (
+ "syscall"
+ "unsafe"
+
+ "gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+// NonBlockingPoll polls the given FD in non-blocking fashion. It is used just
+// to query the FD's current state.
+func NonBlockingPoll(fd int32, mask waiter.EventMask) waiter.EventMask {
+ e := struct {
+ fd int32
+ events int16
+ revents int16
+ }{
+ fd: fd,
+ events: int16(mask),
+ }
+
+ for {
+ n, _, err := syscall.RawSyscall(syscall.SYS_POLL, uintptr(unsafe.Pointer(&e)), 1, 0)
+ // Interrupted by signal, try again.
+ if err == syscall.EINTR {
+ continue
+ }
+ // If an error occur we'll conservatively say the FD is ready for
+ // whatever is being checked.
+ if err != 0 {
+ return mask
+ }
+
+ // If no FDs were returned, it wasn't ready for anything.
+ if n == 0 {
+ return 0
+ }
+
+ // Otherwise we got the ready events in the revents field.
+ return waiter.EventMask(e.revents)
+ }
+}
+
+// epollWait performs a blocking wait on epfd.
+//
+// Preconditions:
+// * len(events) > 0
+func epollWait(epfd int, events []syscall.EpollEvent, msec int) (int, error) {
+ if len(events) == 0 {
+ panic("Empty events passed to EpollWait")
+ }
+
+ // We actually use epoll_pwait with NULL sigmask instead of epoll_wait
+ // since that is what the Go >= 1.11 runtime prefers.
+ r, _, e := syscall.Syscall6(syscall.SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(unsafe.Pointer(&events[0])), uintptr(len(events)), uintptr(msec), 0, 0)
+ if e != 0 {
+ return 0, e
+ }
+ return int(r), nil
+}