diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/abi/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/abi/linux/eventfd.go | 22 | ||||
-rw-r--r-- | pkg/sentry/fs/fsutil/inode_cached.go | 3 | ||||
-rw-r--r-- | pkg/sentry/kernel/eventfd/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/eventfd/eventfd.go | 119 |
5 files changed, 139 insertions, 8 deletions
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD index e164945cf..ae7e4378c 100644 --- a/pkg/abi/linux/BUILD +++ b/pkg/abi/linux/BUILD @@ -30,6 +30,7 @@ go_library( "dev.go", "elf.go", "errors.go", + "eventfd.go", "exec.go", "fcntl.go", "file.go", diff --git a/pkg/abi/linux/eventfd.go b/pkg/abi/linux/eventfd.go new file mode 100644 index 000000000..bc0fb44d2 --- /dev/null +++ b/pkg/abi/linux/eventfd.go @@ -0,0 +1,22 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +// Constants for eventfd2(2). +const ( + EFD_SEMAPHORE = 0x1 + EFD_CLOEXEC = O_CLOEXEC + EFD_NONBLOCK = O_NONBLOCK +) diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go index 7c0f96ac2..cba642a8f 100644 --- a/pkg/sentry/fs/fsutil/inode_cached.go +++ b/pkg/sentry/fs/fsutil/inode_cached.go @@ -44,8 +44,7 @@ import ( // // CachingInodeOperations implements Mappable for the CachedFileObject: // -// - If CachedFileObject.FD returns a value >= 0 and the current platform shares -// a host fd table with the sentry, then the value of CachedFileObject.FD +// - If CachedFileObject.FD returns a value >= 0 then the file descriptor // will be memory mapped on the host. // // - Otherwise, the contents of CachedFileObject are buffered into memory diff --git a/pkg/sentry/kernel/eventfd/BUILD b/pkg/sentry/kernel/eventfd/BUILD index 2d5a3c693..561ced852 100644 --- a/pkg/sentry/kernel/eventfd/BUILD +++ b/pkg/sentry/kernel/eventfd/BUILD @@ -21,6 +21,7 @@ go_library( importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/eventfd", visibility = ["//pkg/sentry:internal"], deps = [ + "//pkg/abi/linux", "//pkg/refs", "//pkg/sentry/context", "//pkg/sentry/fs", @@ -30,6 +31,7 @@ go_library( "//pkg/state", "//pkg/syserror", "//pkg/waiter", + "//pkg/waiter/fdnotifier", ], ) diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go index c9333719e..bd50bd9fe 100644 --- a/pkg/sentry/kernel/eventfd/eventfd.go +++ b/pkg/sentry/kernel/eventfd/eventfd.go @@ -21,6 +21,7 @@ import ( "sync" "syscall" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/context" "gvisor.googlesource.com/gvisor/pkg/sentry/fs" "gvisor.googlesource.com/gvisor/pkg/sentry/fs/anon" @@ -28,10 +29,12 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" "gvisor.googlesource.com/gvisor/pkg/syserror" "gvisor.googlesource.com/gvisor/pkg/waiter" + "gvisor.googlesource.com/gvisor/pkg/waiter/fdnotifier" ) // EventOperations represents an event with the semantics of Linux's file-based event -// notification (eventfd). +// notification (eventfd). Eventfds are usually internal to the Sentry but in certain +// situations they may be converted into a host-backed eventfd. type EventOperations struct { fsutil.NoopRelease `state:"nosave"` fsutil.PipeSeek `state:"nosave"` @@ -46,13 +49,16 @@ type EventOperations struct { // Queue is used to notify interested parties when the event object // becomes readable or writable. - waiter.Queue `state:"nosave"` + wq waiter.Queue `state:"nosave"` // val is the current value of the event counter. val uint64 // semMode specifies whether the event is in "semaphore" mode. semMode bool + + // hostfd indicates whether this eventfd is passed through to the host. + hostfd int } // New creates a new event object with the supplied initial value and mode. @@ -62,9 +68,48 @@ func New(ctx context.Context, initVal uint64, semMode bool) *fs.File { return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &EventOperations{ val: initVal, semMode: semMode, + hostfd: -1, }) } +// HostFD returns the host eventfd associated with this event. +func (e *EventOperations) HostFD() (int, error) { + e.mu.Lock() + defer e.mu.Unlock() + if e.hostfd >= 0 { + return e.hostfd, nil + } + + flags := linux.EFD_NONBLOCK + if e.semMode { + flags |= linux.EFD_SEMAPHORE + } + + fd, _, err := syscall.Syscall(syscall.SYS_EVENTFD2, uintptr(e.val), uintptr(flags), 0) + if err != 0 { + return -1, err + } + + if err := fdnotifier.AddFD(int32(fd), &e.wq); err != nil { + syscall.Close(int(fd)) + return -1, err + } + + e.hostfd = int(fd) + return e.hostfd, nil +} + +// Release implements fs.FileOperations.Release. +func (e *EventOperations) Release() { + e.mu.Lock() + defer e.mu.Unlock() + if e.hostfd >= 0 { + fdnotifier.RemoveFD(int32(e.hostfd)) + syscall.Close(e.hostfd) + e.hostfd = -1 + } +} + // Read implements fs.FileOperations.Read. func (e *EventOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) { if dst.NumBytes() < 8 { @@ -87,9 +132,29 @@ func (e *EventOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOS return 8, nil } +// Must be called with e.mu locked. +func (e *EventOperations) hostRead(ctx context.Context, dst usermem.IOSequence) error { + var buf [8]byte + + if _, err := syscall.Read(e.hostfd, buf[:]); err != nil { + if err == syscall.EWOULDBLOCK { + return syserror.ErrWouldBlock + } + return err + } + + _, err := dst.CopyOut(ctx, buf[:]) + return err +} + func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) error { e.mu.Lock() + if e.hostfd >= 0 { + defer e.mu.Unlock() + return e.hostRead(ctx, dst) + } + // We can't complete the read if the value is currently zero. if e.val == 0 { e.mu.Unlock() @@ -112,7 +177,7 @@ func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) erro // Notify writers. We do this even if we were already writable because // it is possible that a writer is waiting to write the maximum value // to the event. - e.Notify(waiter.EventOut) + e.wq.Notify(waiter.EventOut) var buf [8]byte usermem.ByteOrder.PutUint64(buf[:], val) @@ -120,6 +185,17 @@ func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) erro return err } +// Must be called with e.mu locked. +func (e *EventOperations) hostWrite(val uint64) error { + var buf [8]byte + usermem.ByteOrder.PutUint64(buf[:], val) + _, err := syscall.Write(e.hostfd, buf[:]) + if err == syscall.EWOULDBLOCK { + return syserror.ErrWouldBlock + } + return err +} + func (e *EventOperations) write(ctx context.Context, src usermem.IOSequence) error { var buf [8]byte if _, err := src.CopyIn(ctx, buf[:]); err != nil { @@ -138,6 +214,11 @@ func (e *EventOperations) Signal(val uint64) error { e.mu.Lock() + if e.hostfd >= 0 { + defer e.mu.Unlock() + return e.hostWrite(val) + } + // We only allow writes that won't cause the value to go over the max // uint64 minus 1. if val > math.MaxUint64-1-e.val { @@ -149,16 +230,20 @@ func (e *EventOperations) Signal(val uint64) error { e.mu.Unlock() // Always trigger a notification. - e.Notify(waiter.EventIn) + e.wq.Notify(waiter.EventIn) return nil } // Readiness returns the ready events for the event fd. func (e *EventOperations) Readiness(mask waiter.EventMask) waiter.EventMask { - ready := waiter.EventMask(0) - e.mu.Lock() + if e.hostfd >= 0 { + defer e.mu.Unlock() + return fdnotifier.NonBlockingPoll(int32(e.hostfd), mask) + } + + ready := waiter.EventMask(0) if e.val > 0 { ready |= waiter.EventIn } @@ -170,3 +255,25 @@ func (e *EventOperations) Readiness(mask waiter.EventMask) waiter.EventMask { return mask & ready } + +// EventRegister implements waiter.Waitable.EventRegister. +func (e *EventOperations) EventRegister(entry *waiter.Entry, mask waiter.EventMask) { + e.wq.EventRegister(entry, mask) + + e.mu.Lock() + defer e.mu.Unlock() + if e.hostfd >= 0 { + fdnotifier.UpdateFD(int32(e.hostfd)) + } +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (e *EventOperations) EventUnregister(entry *waiter.Entry) { + e.wq.EventUnregister(entry) + + e.mu.Lock() + defer e.mu.Unlock() + if e.hostfd >= 0 { + fdnotifier.UpdateFD(int32(e.hostfd)) + } +} |