summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorDean Deng <deandeng@google.com>2020-06-26 13:46:01 -0700
committergVisor bot <gvisor-bot@google.com>2020-06-26 13:47:48 -0700
commit54a31e219ca9d6086a367213a92d2a72ce3af07b (patch)
tree72fa69f3f94115c56b2c884527d7c3b93b3ba593
parentcfd049da87f1a8ce5b9b20c65ab5ccd84cdaf3f1 (diff)
Support inotify IN_ONESHOT.
Also, while we're here, make sure that gofer inotify events are generated when files are created in remote revalidating mode. Updates #1479. PiperOrigin-RevId: 318536354
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go10
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/inotify.go2
-rw-r--r--pkg/sentry/vfs/inotify.go84
-rw-r--r--test/syscalls/linux/inotify.cc48
4 files changed, 115 insertions, 29 deletions
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index d253c996c..73bac738d 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -389,7 +389,15 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
// RPC will fail with EEXIST like we would have. If the RPC succeeds, and a
// stale dentry exists, the dentry will fail revalidation next time it's
// used.
- return createInRemoteDir(parent, name)
+ if err := createInRemoteDir(parent, name); err != nil {
+ return err
+ }
+ ev := linux.IN_CREATE
+ if dir {
+ ev |= linux.IN_ISDIR
+ }
+ parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
+ return nil
}
if child := parent.children[name]; child != nil {
return syserror.EEXIST
diff --git a/pkg/sentry/syscalls/linux/vfs2/inotify.go b/pkg/sentry/syscalls/linux/vfs2/inotify.go
index 74e5287b7..5d98134a5 100644
--- a/pkg/sentry/syscalls/linux/vfs2/inotify.go
+++ b/pkg/sentry/syscalls/linux/vfs2/inotify.go
@@ -81,7 +81,7 @@ func InotifyAddWatch(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kern
// "EINVAL: The given event mask contains no valid events."
// -- inotify_add_watch(2)
- if validBits := mask & linux.ALL_INOTIFY_BITS; validBits == 0 {
+ if mask&linux.ALL_INOTIFY_BITS == 0 {
return 0, nil, syserror.EINVAL
}
diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go
index 35960394c..509034531 100644
--- a/pkg/sentry/vfs/inotify.go
+++ b/pkg/sentry/vfs/inotify.go
@@ -447,29 +447,51 @@ func (w *Watches) Remove(id uint64) {
return
}
- if _, ok := w.ws[id]; !ok {
- // While there's technically no problem with silently ignoring a missing
- // watch, this is almost certainly a bug.
- panic(fmt.Sprintf("Attempt to remove a watch, but no watch found with provided id %+v.", id))
+ // It is possible for w.Remove() to be called for the same watch multiple
+ // times. See the treatment of one-shot watches in Watches.Notify().
+ if _, ok := w.ws[id]; ok {
+ delete(w.ws, id)
}
- delete(w.ws, id)
}
// Notify queues a new event with watches in this set. Watches with
// IN_EXCL_UNLINK are skipped if the event is coming from a child that has been
// unlinked.
func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlinked bool) {
- // N.B. We don't defer the unlocks because Notify is in the hot path of
- // all IO operations, and the defer costs too much for small IO
- // operations.
+ var hasExpired bool
w.mu.RLock()
for _, watch := range w.ws {
if unlinked && watch.ExcludeUnlinked() && et == PathEvent {
continue
}
- watch.Notify(name, events, cookie)
+ if watch.Notify(name, events, cookie) {
+ hasExpired = true
+ }
+ }
+ w.mu.RUnlock()
+
+ if hasExpired {
+ w.cleanupExpiredWatches()
+ }
+}
+
+// This function is relatively expensive and should only be called where there
+// are expired watches.
+func (w *Watches) cleanupExpiredWatches() {
+ // Because of lock ordering, we cannot acquire Inotify.mu for each watch
+ // owner while holding w.mu. As a result, store expired watches locally
+ // before removing.
+ var toRemove []*Watch
+ w.mu.RLock()
+ for _, watch := range w.ws {
+ if atomic.LoadInt32(&watch.expired) == 1 {
+ toRemove = append(toRemove, watch)
+ }
}
w.mu.RUnlock()
+ for _, watch := range toRemove {
+ watch.owner.RmWatch(watch.wd)
+ }
}
// HandleDeletion is called when the watch target is destroyed. Clear the
@@ -478,16 +500,10 @@ func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlin
func (w *Watches) HandleDeletion() {
w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */)
- // We can't hold w.mu while calling watch.handleDeletion to preserve lock
- // ordering w.r.t to the owner inotify instances. Instead, atomically move
- // the watches map into a local variable so we can iterate over it safely.
- //
- // Because of this however, it is possible for the watches' owners to reach
- // this inode while the inode has no refs. This is still safe because the
- // owners can only reach the inode until this function finishes calling
- // watch.handleDeletion below and the inode is guaranteed to exist in the
- // meantime. But we still have to be very careful not to rely on inode state
- // that may have been already destroyed.
+ // As in Watches.Notify, we can't hold w.mu while acquiring Inotify.mu for
+ // the owner of each watch being deleted. Instead, atomically store the
+ // watches map in a local variable and set it to nil so we can iterate over
+ // it with the assurance that there will be no concurrent accesses.
var ws map[uint64]*Watch
w.mu.Lock()
ws = w.ws
@@ -519,17 +535,28 @@ func (w *Watches) HandleDeletion() {
// +stateify savable
type Watch struct {
// Inotify instance which owns this watch.
+ //
+ // This field is immutable after creation.
owner *Inotify
// Descriptor for this watch. This is unique across an inotify instance.
+ //
+ // This field is immutable after creation.
wd int32
// target is a dentry representing the watch target. Its watch set contains this watch.
+ //
+ // This field is immutable after creation.
target *Dentry
// Events being monitored via this watch. Must be accessed with atomic
// memory operations.
mask uint32
+
+ // expired is set to 1 to indicate that this watch is a one-shot that has
+ // already sent a notification and therefore can be removed. Must be accessed
+ // with atomic memory operations.
+ expired int32
}
// OwnerID returns the id of the inotify instance that owns this watch.
@@ -546,12 +573,20 @@ func (w *Watch) ExcludeUnlinked() bool {
return atomic.LoadUint32(&w.mask)&linux.IN_EXCL_UNLINK != 0
}
-// Notify queues a new event on this watch.
-func (w *Watch) Notify(name string, events uint32, cookie uint32) {
+// Notify queues a new event on this watch. Returns true if this is a one-shot
+// watch that should be deleted, after this event was successfully queued.
+func (w *Watch) Notify(name string, events uint32, cookie uint32) bool {
+ if atomic.LoadInt32(&w.expired) == 1 {
+ // This is a one-shot watch that is already in the process of being
+ // removed. This may happen if a second event reaches the watch target
+ // before this watch has been removed.
+ return false
+ }
+
mask := atomic.LoadUint32(&w.mask)
if mask&events == 0 {
// We weren't watching for this event.
- return
+ return false
}
// Event mask should include bits matched from the watch plus all control
@@ -560,6 +595,11 @@ func (w *Watch) Notify(name string, events uint32, cookie uint32) {
effectiveMask := unmaskableBits | mask
matchedEvents := effectiveMask & events
w.owner.queueEvent(newEvent(w.wd, name, matchedEvents, cookie))
+ if mask&linux.IN_ONESHOT != 0 {
+ atomic.StoreInt32(&w.expired, 1)
+ return true
+ }
+ return false
}
// Event represents a struct inotify_event from linux.
diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc
index 731c7046c..bdb645c35 100644
--- a/test/syscalls/linux/inotify.cc
+++ b/test/syscalls/linux/inotify.cc
@@ -1485,20 +1485,26 @@ TEST(Inotify, DuplicateWatchReturnsSameWatchDescriptor) {
TEST(Inotify, UnmatchedEventsAreDiscarded) {
const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
- const TempPath file1 =
+ TempPath file1 =
ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(root.path()));
const FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
- ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(fd.get(), file1.path(), IN_ACCESS));
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(fd.get(), file1.path(), IN_ACCESS));
- const FileDescriptor file1_fd =
+ FileDescriptor file1_fd =
ASSERT_NO_ERRNO_AND_VALUE(Open(file1.path(), O_WRONLY));
- const std::vector<Event> events =
- ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ std::vector<Event> events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
// We only asked for access events, the open event should be discarded.
ASSERT_THAT(events, Are({}));
+
+ // IN_IGNORED events are always generated, regardless of the mask.
+ file1_fd.reset();
+ file1.reset();
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(fd.get()));
+ ASSERT_THAT(events, Are({Event(IN_IGNORED, wd)}));
}
TEST(Inotify, AddWatchWithInvalidEventMaskFails) {
@@ -2073,6 +2079,38 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) {
}));
}
+TEST(Inotify, OneShot) {
+ // TODO(gvisor.dev/issue/1624): IN_ONESHOT not supported in VFS1.
+ SKIP_IF(IsRunningWithVFS1());
+
+ const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+
+ const int wd = ASSERT_NO_ERRNO_AND_VALUE(
+ InotifyAddWatch(inotify_fd.get(), file.path(), IN_MODIFY | IN_ONESHOT));
+
+ // Open an fd, write to it, and then close it.
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_WRONLY));
+ ASSERT_THAT(write(fd.get(), "x", 1), SyscallSucceedsWithValue(1));
+ fd.reset();
+
+ // We should get a single event followed by IN_IGNORED indicating removal
+ // of the one-shot watch. Prior activity (i.e. open) that is not in the mask
+ // should not trigger removal, and activity after removal (i.e. close) should
+ // not generate events.
+ std::vector<Event> events =
+ ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({
+ Event(IN_MODIFY, wd),
+ Event(IN_IGNORED, wd),
+ }));
+
+ // The watch should already have been removed.
+ EXPECT_THAT(inotify_rm_watch(inotify_fd.get(), wd),
+ SyscallFailsWithErrno(EINVAL));
+}
+
// This test helps verify that the lock order of filesystem and inotify locks
// is respected when inotify instances and watch targets are concurrently being
// destroyed.