summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/strace/linux64_amd64.go1
-rw-r--r--pkg/sentry/strace/linux64_arm64.go1
-rw-r--r--pkg/sentry/syscalls/epoll.go8
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_epoll.go56
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/epoll.go52
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/vfs2.go2
-rw-r--r--test/syscalls/linux/epoll.cc53
-rw-r--r--test/util/test_util.h9
9 files changed, 160 insertions, 24 deletions
diff --git a/pkg/sentry/strace/linux64_amd64.go b/pkg/sentry/strace/linux64_amd64.go
index 71b92eaee..d66befe81 100644
--- a/pkg/sentry/strace/linux64_amd64.go
+++ b/pkg/sentry/strace/linux64_amd64.go
@@ -371,6 +371,7 @@ var linuxAMD64 = SyscallMap{
433: makeSyscallInfo("fspick", FD, Path, Hex),
434: makeSyscallInfo("pidfd_open", Hex, Hex),
435: makeSyscallInfo("clone3", Hex, Hex),
+ 441: makeSyscallInfo("epoll_pwait2", FD, EpollEvents, Hex, Timespec, SigSet),
}
func init() {
diff --git a/pkg/sentry/strace/linux64_arm64.go b/pkg/sentry/strace/linux64_arm64.go
index bd7361a52..1a2d7d75f 100644
--- a/pkg/sentry/strace/linux64_arm64.go
+++ b/pkg/sentry/strace/linux64_arm64.go
@@ -312,6 +312,7 @@ var linuxARM64 = SyscallMap{
433: makeSyscallInfo("fspick", FD, Path, Hex),
434: makeSyscallInfo("pidfd_open", Hex, Hex),
435: makeSyscallInfo("clone3", Hex, Hex),
+ 441: makeSyscallInfo("epoll_pwait2", FD, EpollEvents, Hex, Timespec, SigSet),
}
func init() {
diff --git a/pkg/sentry/syscalls/epoll.go b/pkg/sentry/syscalls/epoll.go
index e115683f8..3b4d79889 100644
--- a/pkg/sentry/syscalls/epoll.go
+++ b/pkg/sentry/syscalls/epoll.go
@@ -119,7 +119,7 @@ func RemoveEpoll(t *kernel.Task, epfd int32, fd int32) error {
}
// WaitEpoll implements the epoll_wait(2) linux syscall.
-func WaitEpoll(t *kernel.Task, fd int32, max int, timeout int) ([]linux.EpollEvent, error) {
+func WaitEpoll(t *kernel.Task, fd int32, max int, timeoutInNanos int64) ([]linux.EpollEvent, error) {
// Get epoll from the file descriptor.
epollfile := t.GetFile(fd)
if epollfile == nil {
@@ -136,7 +136,7 @@ func WaitEpoll(t *kernel.Task, fd int32, max int, timeout int) ([]linux.EpollEve
// Try to read events and return right away if we got them or if the
// caller requested a non-blocking "wait".
r := e.ReadEvents(max)
- if len(r) != 0 || timeout == 0 {
+ if len(r) != 0 || timeoutInNanos == 0 {
return r, nil
}
@@ -144,8 +144,8 @@ func WaitEpoll(t *kernel.Task, fd int32, max int, timeout int) ([]linux.EpollEve
// and register with the epoll object for readability events.
var haveDeadline bool
var deadline ktime.Time
- if timeout > 0 {
- timeoutDur := time.Duration(timeout) * time.Millisecond
+ if timeoutInNanos > 0 {
+ timeoutDur := time.Duration(timeoutInNanos) * time.Nanosecond
deadline = t.Kernel().MonotonicClock().Now().Add(timeoutDur)
haveDeadline = true
}
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 2d2212605..090c5ffcb 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -404,6 +404,7 @@ var AMD64 = &kernel.SyscallTable{
433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil),
434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil),
435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil),
+ 441: syscalls.Supported("epoll_pwait2", EpollPwait2),
},
Emulate: map[hostarch.Addr]uintptr{
0xffffffffff600000: 96, // vsyscall gettimeofday(2)
@@ -722,6 +723,7 @@ var ARM64 = &kernel.SyscallTable{
433: syscalls.ErrorWithEvent("fspick", syserror.ENOSYS, "", nil),
434: syscalls.ErrorWithEvent("pidfd_open", syserror.ENOSYS, "", nil),
435: syscalls.ErrorWithEvent("clone3", syserror.ENOSYS, "", nil),
+ 441: syscalls.Supported("epoll_pwait2", EpollPwait2),
},
Emulate: map[hostarch.Addr]uintptr{},
Missing: func(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go
index 7f460d30b..69cbc98d0 100644
--- a/pkg/sentry/syscalls/linux/sys_epoll.go
+++ b/pkg/sentry/syscalls/linux/sys_epoll.go
@@ -16,6 +16,7 @@ package linux
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
@@ -104,14 +105,8 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
}
-// EpollWait implements the epoll_wait(2) linux syscall.
-func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- epfd := args[0].Int()
- eventsAddr := args[1].Pointer()
- maxEvents := int(args[2].Int())
- timeout := int(args[3].Int())
-
- r, err := syscalls.WaitEpoll(t, epfd, maxEvents, timeout)
+func waitEpoll(t *kernel.Task, fd int32, eventsAddr hostarch.Addr, max int, timeoutInNanos int64) (uintptr, *kernel.SyscallControl, error) {
+ r, err := syscalls.WaitEpoll(t, fd, max, timeoutInNanos)
if err != nil {
return 0, nil, syserror.ConvertIntr(err, syserror.EINTR)
}
@@ -123,6 +118,17 @@ func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
return uintptr(len(r)), nil, nil
+
+}
+
+// EpollWait implements the epoll_wait(2) linux syscall.
+func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ epfd := args[0].Int()
+ eventsAddr := args[1].Pointer()
+ maxEvents := int(args[2].Int())
+ // Convert milliseconds to nanoseconds.
+ timeoutInNanos := int64(args[3].Int()) * 1000000
+ return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos)
}
// EpollPwait implements the epoll_pwait(2) linux syscall.
@@ -144,4 +150,38 @@ func EpollPwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
return EpollWait(t, args)
}
+// EpollPwait2 implements the epoll_pwait(2) linux syscall.
+func EpollPwait2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ epfd := args[0].Int()
+ eventsAddr := args[1].Pointer()
+ maxEvents := int(args[2].Int())
+ timeoutPtr := args[3].Pointer()
+ maskAddr := args[4].Pointer()
+ maskSize := uint(args[5].Uint())
+ haveTimeout := timeoutPtr != 0
+
+ var timeoutInNanos int64 = -1
+ if haveTimeout {
+ timeout, err := copyTimespecIn(t, timeoutPtr)
+ if err != nil {
+ return 0, nil, err
+ }
+ timeoutInNanos = timeout.ToNsec()
+
+ }
+
+ if maskAddr != 0 {
+ mask, err := CopyInSigSet(t, maskAddr, maskSize)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ oldmask := t.SignalMask()
+ t.SetSignalMask(mask)
+ t.SetSavedSignalMask(oldmask)
+ }
+
+ return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos)
+}
+
// LINT.ThenChange(vfs2/epoll.go)
diff --git a/pkg/sentry/syscalls/linux/vfs2/epoll.go b/pkg/sentry/syscalls/linux/vfs2/epoll.go
index b980aa43e..047d955b6 100644
--- a/pkg/sentry/syscalls/linux/vfs2/epoll.go
+++ b/pkg/sentry/syscalls/linux/vfs2/epoll.go
@@ -19,6 +19,7 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
@@ -118,13 +119,7 @@ func EpollCtl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
}
-// EpollWait implements Linux syscall epoll_wait(2).
-func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- epfd := args[0].Int()
- eventsAddr := args[1].Pointer()
- maxEvents := int(args[2].Int())
- timeout := int(args[3].Int())
-
+func waitEpoll(t *kernel.Task, epfd int32, eventsAddr hostarch.Addr, maxEvents int, timeoutInNanos int64) (uintptr, *kernel.SyscallControl, error) {
var _EP_MAX_EVENTS = math.MaxInt32 / sizeofEpollEvent // Linux: fs/eventpoll.c:EP_MAX_EVENTS
if maxEvents <= 0 || maxEvents > _EP_MAX_EVENTS {
return 0, nil, syserror.EINVAL
@@ -158,7 +153,7 @@ func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
return 0, nil, err
}
- if timeout == 0 {
+ if timeoutInNanos == 0 {
return 0, nil, nil
}
// In the first iteration of this loop, register with the epoll
@@ -173,8 +168,8 @@ func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
defer epfile.EventUnregister(&w)
} else {
// Set up the timer if a timeout was specified.
- if timeout > 0 && !haveDeadline {
- timeoutDur := time.Duration(timeout) * time.Millisecond
+ if timeoutInNanos > 0 && !haveDeadline {
+ timeoutDur := time.Duration(timeoutInNanos) * time.Nanosecond
deadline = t.Kernel().MonotonicClock().Now().Add(timeoutDur)
haveDeadline = true
}
@@ -186,6 +181,17 @@ func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
}
}
+
+}
+
+// EpollWait implements Linux syscall epoll_wait(2).
+func EpollWait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ epfd := args[0].Int()
+ eventsAddr := args[1].Pointer()
+ maxEvents := int(args[2].Int())
+ timeoutInNanos := int64(args[3].Int()) * 1000000
+
+ return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos)
}
// EpollPwait implements Linux syscall epoll_pwait(2).
@@ -199,3 +205,29 @@ func EpollPwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
return EpollWait(t, args)
}
+
+// EpollPwait2 implements Linux syscall epoll_pwait(2).
+func EpollPwait2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ epfd := args[0].Int()
+ eventsAddr := args[1].Pointer()
+ maxEvents := int(args[2].Int())
+ timeoutPtr := args[3].Pointer()
+ maskAddr := args[4].Pointer()
+ maskSize := uint(args[5].Uint())
+ haveTimeout := timeoutPtr != 0
+
+ var timeoutInNanos int64 = -1
+ if haveTimeout {
+ var timeout linux.Timespec
+ if _, err := timeout.CopyIn(t, timeoutPtr); err != nil {
+ return 0, nil, err
+ }
+ timeoutInNanos = timeout.ToNsec()
+ }
+
+ if err := setTempSignalSet(t, maskAddr, maskSize); err != nil {
+ return 0, nil, err
+ }
+
+ return waitEpoll(t, epfd, eventsAddr, maxEvents, timeoutInNanos)
+}
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index c50fd97eb..0fc81e694 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -159,6 +159,7 @@ func Override() {
s.Table[327] = syscalls.Supported("preadv2", Preadv2)
s.Table[328] = syscalls.Supported("pwritev2", Pwritev2)
s.Table[332] = syscalls.Supported("statx", Statx)
+ s.Table[441] = syscalls.Supported("epoll_pwait2", EpollPwait2)
s.Init()
// Override ARM64.
@@ -269,6 +270,7 @@ func Override() {
s.Table[286] = syscalls.Supported("preadv2", Preadv2)
s.Table[287] = syscalls.Supported("pwritev2", Pwritev2)
s.Table[291] = syscalls.Supported("statx", Statx)
+ s.Table[441] = syscalls.Supported("epoll_pwait2", EpollPwait2)
s.Init()
}
diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc
index b180f633c..af3d27894 100644
--- a/test/syscalls/linux/epoll.cc
+++ b/test/syscalls/linux/epoll.cc
@@ -39,6 +39,15 @@ namespace {
constexpr int kFDsPerEpoll = 3;
constexpr uint64_t kMagicConstant = 0x0102030405060708;
+#ifndef SYS_epoll_pwait2
+#define SYS_epoll_pwait2 441
+#endif
+
+int epoll_pwait2(int fd, struct epoll_event* events, int maxevents,
+ const struct timespec* timeout, const sigset_t* sigset) {
+ return syscall(SYS_epoll_pwait2, fd, events, maxevents, timeout, sigset);
+}
+
TEST(EpollTest, AllWritable) {
auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
std::vector<FileDescriptor> eventfds;
@@ -144,6 +153,50 @@ TEST(EpollTest, Timeout) {
EXPECT_GT(ms_elapsed(begin, end), kTimeoutMs - 1);
}
+TEST(EpollTest, EpollPwait2Timeout) {
+ auto epollfd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD());
+ // 200 milliseconds.
+ constexpr int kTimeoutNs = 200000000;
+ struct timespec timeout;
+ timeout.tv_sec = 0;
+ timeout.tv_nsec = 0;
+ struct timespec begin;
+ struct timespec end;
+ struct epoll_event result[kFDsPerEpoll];
+
+ std::vector<FileDescriptor> eventfds;
+ for (int i = 0; i < kFDsPerEpoll; i++) {
+ eventfds.push_back(ASSERT_NO_ERRNO_AND_VALUE(NewEventFD()));
+ ASSERT_NO_ERRNO(RegisterEpollFD(epollfd.get(), eventfds[i].get(), EPOLLIN,
+ kMagicConstant + i));
+ }
+
+ // Pass valid arguments so that the syscall won't be blocked indefinitely
+ // nor return errno EINVAL.
+ //
+ // The syscall returns immediately when timeout is zero,
+ // even if no events are available.
+ SKIP_IF(!IsRunningOnGvisor() &&
+ epoll_pwait2(epollfd.get(), result, kFDsPerEpoll, &timeout, nullptr) <
+ 0 &&
+ errno == ENOSYS);
+
+ {
+ const DisableSave ds; // Timing-related.
+ EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds());
+
+ timeout.tv_nsec = kTimeoutNs;
+ ASSERT_THAT(RetryEINTR(epoll_pwait2)(epollfd.get(), result, kFDsPerEpoll,
+ &timeout, nullptr),
+ SyscallSucceedsWithValue(0));
+ EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds());
+ }
+
+ // Check the lower bound on the timeout. Checking for an upper bound is
+ // fragile because Linux can overrun the timeout due to scheduling delays.
+ EXPECT_GT(ns_elapsed(begin, end), kTimeoutNs - 1);
+}
+
void* writer(void* arg) {
int fd = *reinterpret_cast<int*>(arg);
uint64_t tmp = 1;
diff --git a/test/util/test_util.h b/test/util/test_util.h
index 876ff58db..bcbb388ed 100644
--- a/test/util/test_util.h
+++ b/test/util/test_util.h
@@ -272,10 +272,15 @@ PosixErrorOr<std::vector<OpenFd>> GetOpenFDs();
// Returns the number of hard links to a path.
PosixErrorOr<uint64_t> Links(const std::string& path);
+inline uint64_t ns_elapsed(const struct timespec& begin,
+ const struct timespec& end) {
+ return (end.tv_sec - begin.tv_sec) * 1000000000 +
+ (end.tv_nsec - begin.tv_nsec);
+}
+
inline uint64_t ms_elapsed(const struct timespec& begin,
const struct timespec& end) {
- return (end.tv_sec - begin.tv_sec) * 1000 +
- (end.tv_nsec - begin.tv_nsec) / 1000000;
+ return ns_elapsed(begin, end) / 1000000;
}
namespace internal {