diff options
author | Fabricio Voznika <fvoznika@google.com> | 2018-12-27 14:58:41 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-12-27 14:59:50 -0800 |
commit | 46e6577014c849d7306c63905db25f3c695fa7e7 (patch) | |
tree | f0beec40f3bbda38bf209809b091cf537b66cba8 | |
parent | bce2f9751f415da869d04ccb53833b024373666d (diff) |
Fix deadlock between epoll_wait and getdents
epoll_wait acquires EventPoll.listsMu (in EventPoll.ReadEvents) and
then calls Inotify.Readiness which tries to acquire Inotify.evMu.
getdents acquires Inotify.evMu (in Inotify.queueEvent) and then calls
readyCallback.Callback which tries to acquire EventPoll.listsMu.
The fix is to release Inotify.evMu before calling Queue.Notify. Queue
is thread-safe and doesn't require Inotify.evMu to be held.
Closes #121
PiperOrigin-RevId: 227066695
Change-Id: Id29364bb940d1727f33a5dff9a3c52f390c15761
-rw-r--r-- | pkg/sentry/fs/inotify.go | 13 | ||||
-rw-r--r-- | test/syscalls/linux/BUILD | 3 | ||||
-rw-r--r-- | test/syscalls/linux/epoll.cc | 29 | ||||
-rw-r--r-- | test/syscalls/linux/inotify.cc | 55 | ||||
-rw-r--r-- | test/util/BUILD | 13 | ||||
-rw-r--r-- | test/util/epoll_util.cc | 52 | ||||
-rw-r--r-- | test/util/epoll_util.h | 36 |
7 files changed, 169 insertions, 32 deletions
diff --git a/pkg/sentry/fs/inotify.go b/pkg/sentry/fs/inotify.go index f251df0d1..51ece5ed0 100644 --- a/pkg/sentry/fs/inotify.go +++ b/pkg/sentry/fs/inotify.go @@ -42,14 +42,14 @@ type Inotify struct { // user, since we may aggressively reuse an id on S/R. id uint64 - // evMu *only* protects the event queue. We need a separate lock because + waiter.Queue `state:"nosave"` + + // evMu *only* protects the events list. We need a separate lock because // while queuing events, a watch needs to lock the event queue, and using mu // for that would violate lock ordering since at that point the calling // goroutine already holds Watch.target.Watches.mu. evMu sync.Mutex `state:"nosave"` - waiter.Queue `state:"nosave"` - // A list of pending events for this inotify instance. Protected by evMu. events ilist.List @@ -212,7 +212,6 @@ func (i *Inotify) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArg func (i *Inotify) queueEvent(ev *Event) { i.evMu.Lock() - defer i.evMu.Unlock() // Check if we should coalesce the event we're about to queue with the last // one currently in the queue. Events are coalesced if they are identical. @@ -221,11 +220,17 @@ func (i *Inotify) queueEvent(ev *Event) { // "Coalesce" the two events by simply not queuing the new one. We // don't need to raise a waiter.EventIn notification because no new // data is available for reading. + i.evMu.Unlock() return } } i.events.PushBack(ev) + + // Release mutex before notifying waiters because we don't control what they + // can do. + i.evMu.Unlock() + i.Queue.Notify(waiter.EventIn) } diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 0f9b406d8..ae33d14da 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -496,6 +496,7 @@ cc_binary( srcs = ["epoll.cc"], linkstatic = 1, deps = [ + "//test/util:epoll_util", "//test/util:file_descriptor", "//test/util:posix_error", "//test/util:test_main", @@ -844,6 +845,7 @@ cc_binary( srcs = ["inotify.cc"], linkstatic = 1, deps = [ + "//test/util:epoll_util", "//test/util:file_descriptor", "//test/util:fs_util", "//test/util:temp_path", @@ -852,6 +854,7 @@ cc_binary( "//test/util:thread_util", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", ], ) diff --git a/test/syscalls/linux/epoll.cc b/test/syscalls/linux/epoll.cc index 9ae87c00b..46fba7b2d 100644 --- a/test/syscalls/linux/epoll.cc +++ b/test/syscalls/linux/epoll.cc @@ -25,6 +25,7 @@ #include <unistd.h> #include "gtest/gtest.h" +#include "test/util/epoll_util.h" #include "test/util/file_descriptor.h" #include "test/util/posix_error.h" #include "test/util/test_util.h" @@ -37,18 +38,6 @@ namespace { constexpr int kFDsPerEpoll = 3; constexpr uint64_t kMagicConstant = 0x0102030405060708; -// Returns a new epoll file descriptor. -PosixErrorOr<FileDescriptor> NewEpollFD() { - // "Since Linux 2.6.8, the size argument is ignored, but must be greater than - // zero." - epoll_create(2) - int fd = epoll_create(/* size = */ 1); - MaybeSave(); - if (fd < 0) { - return PosixError(errno, "epoll_create"); - } - return FileDescriptor(fd); -} - // Returns a new eventfd. PosixErrorOr<FileDescriptor> NewEventFD() { int fd = eventfd(/* initval = */ 0, /* flags = */ 0); @@ -59,22 +48,6 @@ PosixErrorOr<FileDescriptor> NewEventFD() { return FileDescriptor(fd); } -// Registers `target_fd` with the epoll instance represented by `epoll_fd` for -// the epoll events `events`. Events on `target_fd` will be indicated by setting -// data.u64 to `data` in the returned epoll_event. -PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, - uint64_t data) { - struct epoll_event event; - event.events = events; - event.data.u64 = data; - int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event); - MaybeSave(); - if (rc < 0) { - return PosixError(errno, "epoll_ctl"); - } - return NoError(); -} - uint64_t ms_elapsed(const struct timespec* begin, const struct timespec* end) { return (end->tv_sec - begin->tv_sec) * 1000 + (end->tv_nsec - begin->tv_nsec) / 1000000; diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 0e361496c..167ca44a8 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -14,9 +14,12 @@ #include <fcntl.h> #include <libgen.h> +#include <sched.h> +#include <sys/epoll.h> #include <sys/inotify.h> #include <sys/ioctl.h> +#include <atomic> #include <list> #include <string> #include <vector> @@ -24,6 +27,9 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" +#include "test/util/epoll_util.h" #include "test/util/file_descriptor.h" #include "test/util/fs_util.h" #include "test/util/temp_path.h" @@ -1512,6 +1518,55 @@ TEST(Inotify, ControlEvents) { ASSERT_THAT(events2, Are({})); } +// Regression test to ensure epoll and directory access doesn't deadlock. +TEST(Inotify, EpollNoDeadlock) { + const DisableSave ds; // Too many syscalls. + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + + // Create lots of directories and watch all of them. + const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + std::vector<TempPath> children; + for (size_t i = 0; i < 1000; ++i) { + auto child = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirIn(root.path())); + ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(fd.get(), child.path(), IN_ACCESS)); + children.emplace_back(std::move(child)); + } + + // Run epoll_wait constantly in a separate thread. + std::atomic<bool> done(false); + ScopedThread th([&fd, &done] { + for (auto start = absl::Now(); absl::Now() - start < absl::Seconds(5);) { + FileDescriptor epoll_fd = ASSERT_NO_ERRNO_AND_VALUE(NewEpollFD()); + ASSERT_NO_ERRNO(RegisterEpollFD(epoll_fd.get(), fd.get(), + EPOLLIN | EPOLLOUT | EPOLLET, 0)); + struct epoll_event result[1]; + EXPECT_THAT(RetryEINTR(epoll_wait)(epoll_fd.get(), result, 1, -1), + SyscallSucceedsWithValue(1)); + + sched_yield(); + } + done = true; + }); + + // While epoll thread is running, constantly access all directories to + // generate inotify events. + while (!done) { + std::vector<std::string> files = + ASSERT_NO_ERRNO_AND_VALUE(ListDir(root.path(), false)); + ASSERT_EQ(files.size(), 1002); + for (const auto& child : files) { + if (child == "." || child == "..") { + continue; + } + ASSERT_NO_ERRNO_AND_VALUE(ListDir(JoinPath(root.path(), child), false)); + } + sched_yield(); + } +} + } // namespace } // namespace testing } // namespace gvisor diff --git a/test/util/BUILD b/test/util/BUILD index f981a8d1d..10507eae4 100644 --- a/test/util/BUILD +++ b/test/util/BUILD @@ -259,3 +259,16 @@ cc_library( srcs = ["test_main.cc"], deps = [":test_util"], ) + +cc_library( + name = "epoll_util", + testonly = 1, + srcs = ["epoll_util.cc"], + hdrs = ["epoll_util.h"], + deps = [ + ":file_descriptor", + ":posix_error", + ":save_util", + "@com_google_googletest//:gtest", + ], +) diff --git a/test/util/epoll_util.cc b/test/util/epoll_util.cc new file mode 100644 index 000000000..0b95aa8cd --- /dev/null +++ b/test/util/epoll_util.cc @@ -0,0 +1,52 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test/util/epoll_util.h" + +#include <sys/epoll.h> + +#include "gmock/gmock.h" +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" +#include "test/util/save_util.h" + +namespace gvisor { +namespace testing { + +PosixErrorOr<FileDescriptor> NewEpollFD(int size) { + // "Since Linux 2.6.8, the size argument is ignored, but must be greater than + // zero." - epoll_create(2) + int fd = epoll_create(size); + MaybeSave(); + if (fd < 0) { + return PosixError(errno, "epoll_create"); + } + return FileDescriptor(fd); +} + +PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, + uint64_t data) { + struct epoll_event event; + event.events = events; + event.data.u64 = data; + int rc = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, target_fd, &event); + MaybeSave(); + if (rc < 0) { + return PosixError(errno, "epoll_ctl"); + } + return NoError(); +} + +} // namespace testing +} // namespace gvisor diff --git a/test/util/epoll_util.h b/test/util/epoll_util.h new file mode 100644 index 000000000..521e7a3d3 --- /dev/null +++ b/test/util/epoll_util.h @@ -0,0 +1,36 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GVISOR_TEST_UTIL_EPOLL_UTIL_H_ +#define GVISOR_TEST_UTIL_EPOLL_UTIL_H_ + +#include "test/util/file_descriptor.h" +#include "test/util/posix_error.h" + +namespace gvisor { +namespace testing { + +// Returns a new epoll file descriptor. +PosixErrorOr<FileDescriptor> NewEpollFD(int size = 1); + +// Registers `target_fd` with the epoll instance represented by `epoll_fd` for +// the epoll events `events`. Events on `target_fd` will be indicated by setting +// data.u64 to `data` in the returned epoll_event. +PosixError RegisterEpollFD(int epoll_fd, int target_fd, int events, + uint64_t data); + +} // namespace testing +} // namespace gvisor + +#endif // GVISOR_TEST_UTIL_EPOLL_UTIL_H_ |