summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorDean Deng <deandeng@google.com>2020-06-23 18:31:53 -0700
committergVisor bot <gvisor-bot@google.com>2020-06-23 18:33:04 -0700
commit65a587dedf1a30b3614a66532d2b448026b9c540 (patch)
tree425422cc8115fa4534dba7e270becfd2d4201723
parentacf519a77b480e8d974186568bd66eaa89bac024 (diff)
Complete inotify IN_EXCL_UNLINK implementation in VFS2.
Events were only skipped on parent directories after their children were unlinked; events on the unlinked file itself need to be skipped as well. As a result, all Watches.Notify() calls need to know whether the dentry where the call came from was unlinked. Updates #1479. PiperOrigin-RevId: 317979476
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go8
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go8
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go10
-rw-r--r--pkg/sentry/vfs/inotify.go39
-rw-r--r--test/syscalls/linux/inotify.cc84
6 files changed, 95 insertions, 58 deletions
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 0321fd384..d253c996c 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -375,7 +375,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
if dir {
ev |= linux.IN_ISDIR
}
- parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent)
+ parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
return nil
}
if fs.opts.interop == InteropModeShared {
@@ -409,7 +409,7 @@ func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir
if dir {
ev |= linux.IN_ISDIR
}
- parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent)
+ parent.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
return nil
}
@@ -543,7 +543,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
// Generate inotify events for rmdir or unlink.
if dir {
- parent.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent)
+ parent.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */)
} else {
var cw *vfs.Watches
if child != nil {
@@ -1040,7 +1040,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
childVFSFD = &fd.vfsfd
}
- d.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent)
+ d.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */)
return childVFSFD, nil
}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index c6c284ff3..b0b6d8c64 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -1066,9 +1066,9 @@ func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) {
d.fs.renameMu.RLock()
// The ordering below is important, Linux always notifies the parent first.
if d.parent != nil {
- d.parent.watches.NotifyWithExclusions(d.name, events, cookie, et, d.isDeleted())
+ d.parent.watches.Notify(d.name, events, cookie, et, d.isDeleted())
}
- d.watches.Notify("", events, cookie, et)
+ d.watches.Notify("", events, cookie, et, d.isDeleted())
d.fs.renameMu.RUnlock()
}
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 85d8e37b2..f766b7ce2 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -182,7 +182,7 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa
if dir {
ev |= linux.IN_ISDIR
}
- parentDir.inode.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent)
+ parentDir.inode.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
parentDir.inode.touchCMtime()
return nil
}
@@ -251,7 +251,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return syserror.EMLINK
}
i.incLinksLocked()
- i.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent)
+ i.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent, false /* unlinked */)
parentDir.insertChildLocked(fs.newDentry(i), name)
return nil
})
@@ -368,7 +368,7 @@ afterTrailingSymlink:
if err != nil {
return nil, err
}
- parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent)
+ parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */)
parentDir.inode.touchCMtime()
return fd, nil
}
@@ -625,7 +625,7 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
parentDir.removeChildLocked(child)
- parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent)
+ parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */)
// Remove links for child, child/., and child/..
child.inode.decLinksLocked()
child.inode.decLinksLocked()
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index a85bfc968..d7f4f0779 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -259,14 +259,16 @@ func (d *dentry) InotifyWithParent(events, cookie uint32, et vfs.EventType) {
events |= linux.IN_ISDIR
}
+ // tmpfs never calls VFS.InvalidateDentry(), so d.vfsd.IsDead() indicates
+ // that d was deleted.
+ deleted := d.vfsd.IsDead()
+
d.inode.fs.mu.RLock()
// The ordering below is important, Linux always notifies the parent first.
if d.parent != nil {
- // tmpfs never calls VFS.InvalidateDentry(), so d.vfsd.IsDead() indicates
- // that d was deleted.
- d.parent.inode.watches.NotifyWithExclusions(d.name, events, cookie, et, d.vfsd.IsDead())
+ d.parent.inode.watches.Notify(d.name, events, cookie, et, deleted)
}
- d.inode.watches.Notify("", events, cookie, et)
+ d.inode.watches.Notify("", events, cookie, et, deleted)
d.inode.fs.mu.RUnlock()
}
diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go
index 36e7a3767..6043a0566 100644
--- a/pkg/sentry/vfs/inotify.go
+++ b/pkg/sentry/vfs/inotify.go
@@ -467,21 +467,16 @@ func (w *Watches) Remove(id uint64) {
delete(w.ws, id)
}
-// Notify queues a new event with all watches in this set.
-func (w *Watches) Notify(name string, events, cookie uint32, et EventType) {
- w.NotifyWithExclusions(name, events, cookie, et, false)
-}
-
-// NotifyWithExclusions queues a new event with watches in this set. Watches
-// with IN_EXCL_UNLINK are skipped if the event is coming from a child that
-// has been unlinked.
-func (w *Watches) NotifyWithExclusions(name string, events, cookie uint32, et EventType, unlinked bool) {
+// Notify queues a new event with watches in this set. Watches with
+// IN_EXCL_UNLINK are skipped if the event is coming from a child that has been
+// unlinked.
+func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlinked bool) {
// N.B. We don't defer the unlocks because Notify is in the hot path of
// all IO operations, and the defer costs too much for small IO
// operations.
w.mu.RLock()
for _, watch := range w.ws {
- if unlinked && watch.ExcludeUnlinkedChildren() && et == PathEvent {
+ if unlinked && watch.ExcludeUnlinked() && et == PathEvent {
continue
}
watch.Notify(name, events, cookie)
@@ -492,7 +487,7 @@ func (w *Watches) NotifyWithExclusions(name string, events, cookie uint32, et Ev
// HandleDeletion is called when the watch target is destroyed to emit
// the appropriate events.
func (w *Watches) HandleDeletion() {
- w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent)
+ w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */)
// We can't hold w.mu while calling watch.handleDeletion to preserve lock
// ordering w.r.t to the owner inotify instances. Instead, atomically move
@@ -539,14 +534,12 @@ func (w *Watch) OwnerID() uint64 {
return w.owner.id
}
-// ExcludeUnlinkedChildren indicates whether the watched object should continue
-// to be notified of events of its children after they have been unlinked, e.g.
-// for an open file descriptor.
+// ExcludeUnlinked indicates whether the watched object should continue to be
+// notified of events originating from a path that has been unlinked.
//
-// TODO(gvisor.dev/issue/1479): Implement IN_EXCL_UNLINK.
-// We can do this by keeping track of the set of unlinked children in Watches
-// to skip notification.
-func (w *Watch) ExcludeUnlinkedChildren() bool {
+// For example, if "foo/bar" is opened and then unlinked, operations on the
+// open fd may be ignored by watches on "foo" and "foo/bar" with IN_EXCL_UNLINK.
+func (w *Watch) ExcludeUnlinked() bool {
return atomic.LoadUint32(&w.mask)&linux.IN_EXCL_UNLINK != 0
}
@@ -710,10 +703,10 @@ func InotifyEventFromStatMask(mask uint32) uint32 {
// parent/child notifications, the child is notified first in this case.
func InotifyRemoveChild(self, parent *Watches, name string) {
if self != nil {
- self.Notify("", linux.IN_ATTRIB, 0, InodeEvent)
+ self.Notify("", linux.IN_ATTRIB, 0, InodeEvent, true /* unlinked */)
}
if parent != nil {
- parent.Notify(name, linux.IN_DELETE, 0, InodeEvent)
+ parent.Notify(name, linux.IN_DELETE, 0, InodeEvent, true /* unlinked */)
}
}
@@ -726,13 +719,13 @@ func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches,
}
cookie := uniqueid.InotifyCookie(ctx)
if oldParent != nil {
- oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent)
+ oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent, false /* unlinked */)
}
if newParent != nil {
- newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent)
+ newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent, false /* unlinked */)
}
// Somewhat surprisingly, self move events do not have a cookie.
if renamed != nil {
- renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent)
+ renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent, false /* unlinked */)
}
}
diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc
index 13e096e9c..731c7046c 100644
--- a/test/syscalls/linux/inotify.cc
+++ b/test/syscalls/linux/inotify.cc
@@ -1840,9 +1840,7 @@ TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) {
const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
TempPath::CreateFileWith(dir.path(), "123", TempPath::kDefaultFileMode));
-
- const FileDescriptor fd =
- ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
const FileDescriptor inotify_fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
@@ -1855,7 +1853,7 @@ TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) {
int val = 0;
ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds());
ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds());
- const std::vector<Event> events =
+ std::vector<Event> events =
ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
EXPECT_THAT(events, Are({
Event(IN_ATTRIB, file_wd),
@@ -1865,6 +1863,15 @@ TEST(Inotify, IncludeUnlinkedFile_NoRandomSave) {
Event(IN_MODIFY, dir_wd, Basename(file.path())),
Event(IN_MODIFY, file_wd),
}));
+
+ fd.reset();
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ EXPECT_THAT(events, Are({
+ Event(IN_CLOSE_WRITE, dir_wd, Basename(file.path())),
+ Event(IN_CLOSE_WRITE, file_wd),
+ Event(IN_DELETE_SELF, file_wd),
+ Event(IN_IGNORED, file_wd),
+ }));
}
// Watches created with IN_EXCL_UNLINK will stop emitting events on fds for
@@ -1881,13 +1888,14 @@ TEST(Inotify, ExcludeUnlink_NoRandomSave) {
const TempPath file =
ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileIn(dir.path()));
- const FileDescriptor fd =
- ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
+ FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
const FileDescriptor inotify_fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
- const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+ const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ inotify_fd.get(), file.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
// Unlink the child, which should cause further operations on the open file
// descriptor to be ignored.
@@ -1895,14 +1903,28 @@ TEST(Inotify, ExcludeUnlink_NoRandomSave) {
int val = 0;
ASSERT_THAT(write(fd.get(), &val, sizeof(val)), SyscallSucceeds());
ASSERT_THAT(read(fd.get(), &val, sizeof(val)), SyscallSucceeds());
- const std::vector<Event> events =
+ std::vector<Event> events =
ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
- EXPECT_THAT(events, Are({Event(IN_DELETE, wd, Basename(file.path()))}));
+ EXPECT_THAT(events, Are({
+ Event(IN_ATTRIB, file_wd),
+ Event(IN_DELETE, dir_wd, Basename(file.path())),
+ }));
+
+ fd.reset();
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ ASSERT_THAT(events, Are({
+ Event(IN_DELETE_SELF, file_wd),
+ Event(IN_IGNORED, file_wd),
+ }));
}
// We need to disable S/R because there are filesystems where we cannot re-open
// fds to an unlinked file across S/R, e.g. gofer-backed filesytems.
TEST(Inotify, ExcludeUnlinkDirectory_NoRandomSave) {
+ // TODO(gvisor.dev/issue/1624): This test fails on VFS1. Remove once VFS1 is
+ // deleted.
+ SKIP_IF(IsRunningWithVFS1());
+
const DisableSave ds;
const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
@@ -1912,20 +1934,29 @@ TEST(Inotify, ExcludeUnlinkDirectory_NoRandomSave) {
const FileDescriptor inotify_fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
- const FileDescriptor fd =
+ FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(Open(dirPath.c_str(), O_RDONLY | O_DIRECTORY));
- const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ const int parent_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
inotify_fd.get(), parent.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+ const int self_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
// Unlink the dir, and then close the open fd.
ASSERT_THAT(rmdir(dirPath.c_str()), SyscallSucceeds());
dir.reset();
- const std::vector<Event> events =
+ std::vector<Event> events =
ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
// No close event should appear.
ASSERT_THAT(events,
- Are({Event(IN_DELETE | IN_ISDIR, wd, Basename(dirPath))}));
+ Are({Event(IN_DELETE | IN_ISDIR, parent_wd, Basename(dirPath))}));
+
+ fd.reset();
+ events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
+ ASSERT_THAT(events, Are({
+ Event(IN_DELETE_SELF, self_wd),
+ Event(IN_IGNORED, self_wd),
+ }));
}
// If "dir/child" and "dir/child2" are links to the same file, and "dir/child"
@@ -1989,10 +2020,6 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) {
const FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR));
- const FileDescriptor inotify_fd =
- ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
- const int wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
- inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
// NOTE(b/157163751): Create another link before unlinking. This is needed for
// the gofer filesystem in gVisor, where open fds will not work once the link
@@ -2006,6 +2033,13 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) {
ASSERT_THAT(rc, SyscallSucceeds());
}
+ const FileDescriptor inotify_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
+ const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ inotify_fd.get(), dir.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+ const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(
+ inotify_fd.get(), file.path(), IN_ALL_EVENTS | IN_EXCL_UNLINK));
+
// Even after unlinking, inode-level operations will trigger events regardless
// of IN_EXCL_UNLINK.
ASSERT_THAT(unlink(file.path().c_str()), SyscallSucceeds());
@@ -2015,20 +2049,28 @@ TEST(Inotify, ExcludeUnlinkInodeEvents_NoRandomSave) {
std::vector<Event> events =
ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
EXPECT_THAT(events, Are({
- Event(IN_DELETE, wd, Basename(file.path())),
- Event(IN_MODIFY, wd, Basename(file.path())),
+ Event(IN_ATTRIB, file_wd),
+ Event(IN_DELETE, dir_wd, Basename(file.path())),
+ Event(IN_MODIFY, dir_wd, Basename(file.path())),
+ Event(IN_MODIFY, file_wd),
}));
const struct timeval times[2] = {{1, 0}, {2, 0}};
ASSERT_THAT(futimes(fd.get(), times), SyscallSucceeds());
events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
- EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))}));
+ EXPECT_THAT(events, Are({
+ Event(IN_ATTRIB, dir_wd, Basename(file.path())),
+ Event(IN_ATTRIB, file_wd),
+ }));
// S/R is disabled on this entire test due to behavior with unlink; it must
// also be disabled after this point because of fchmod.
ASSERT_THAT(fchmod(fd.get(), 0777), SyscallSucceeds());
events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
- EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd, Basename(file.path()))}));
+ EXPECT_THAT(events, Are({
+ Event(IN_ATTRIB, dir_wd, Basename(file.path())),
+ Event(IN_ATTRIB, file_wd),
+ }));
}
// This test helps verify that the lock order of filesystem and inotify locks