summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorDean Deng <deandeng@google.com>2021-01-26 11:15:25 -0800
committergVisor bot <gvisor-bot@google.com>2021-01-26 11:17:05 -0800
commit203890b13447a41c484bfeb737958d1ae01383c9 (patch)
treed94630031ccce3ad6c38c065d7d6b00a1da7a8ac
parent9ba24d449f8c62756b809610ec4cd02222e18bd3 (diff)
Move inotify events from syscall to vfs layer.
This also causes inotify events to be generated when reading files for exec. This change also requires us to adjust splice+inotify tests due to discrepancies between gVisor and Linux behavior. Note that these discrepancies existed before; we just did not exercise them previously. See comment for more details. Fixes #5348. PiperOrigin-RevId: 353907187
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/read_write.go39
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/setstat.go13
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/splice.go12
-rw-r--r--pkg/sentry/vfs/file_description.go24
-rw-r--r--test/syscalls/linux/inotify.cc31
5 files changed, 38 insertions, 81 deletions
diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go
index b77b29dcc..c7417840f 100644
--- a/pkg/sentry/syscalls/linux/vfs2/read_write.go
+++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go
@@ -93,17 +93,11 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
n, err := file.Read(t, dst, opts)
if err != syserror.ErrWouldBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
@@ -134,9 +128,6 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt
}
file.EventUnregister(&w)
- if total > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return total, err
}
@@ -257,17 +248,11 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
n, err := file.PRead(t, dst, offset, opts)
if err != syserror.ErrWouldBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
@@ -297,10 +282,6 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of
}
}
file.EventUnregister(&w)
-
- if total > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return total, err
}
@@ -363,17 +344,11 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
n, err := file.Write(t, src, opts)
if err != syserror.ErrWouldBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
return n, err
}
@@ -403,10 +378,6 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op
}
}
file.EventUnregister(&w)
-
- if total > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
return total, err
}
@@ -527,17 +498,11 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
n, err := file.PWrite(t, src, offset, opts)
if err != syserror.ErrWouldBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
return n, err
}
allowBlock, deadline, hasDeadline := blockPolicy(t, file)
if !allowBlock {
- if n > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return n, err
}
@@ -567,10 +532,6 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o
}
}
file.EventUnregister(&w)
-
- if total > 0 {
- file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- }
return total, err
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go
index 1ee37e5a8..903169dc2 100644
--- a/pkg/sentry/syscalls/linux/vfs2/setstat.go
+++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go
@@ -220,7 +220,6 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
length := args[3].Int64()
file := t.GetFileVFS2(fd)
-
if file == nil {
return 0, nil, syserror.EBADF
}
@@ -229,23 +228,18 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
if !file.IsWritable() {
return 0, nil, syserror.EBADF
}
-
if mode != 0 {
return 0, nil, syserror.ENOTSUP
}
-
if offset < 0 || length <= 0 {
return 0, nil, syserror.EINVAL
}
size := offset + length
-
if size < 0 {
return 0, nil, syserror.EFBIG
}
-
limit := limits.FromContext(t).Get(limits.FileSize).Cur
-
if uint64(size) >= limit {
t.SendSignal(&arch.SignalInfo{
Signo: int32(linux.SIGXFSZ),
@@ -254,12 +248,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
return 0, nil, syserror.EFBIG
}
- if err := file.Allocate(t, mode, uint64(offset), uint64(length)); err != nil {
- return 0, nil, err
- }
-
- file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- return 0, nil, nil
+ return 0, nil, file.Allocate(t, mode, uint64(offset), uint64(length))
}
// Utime implements Linux syscall utime(2).
diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index 8bb763a47..19e175203 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -170,13 +170,6 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
}
}
- if n != 0 {
- // On Linux, inotify behavior is not very consistent with splice(2). We try
- // our best to emulate Linux for very basic calls to splice, where for some
- // reason, events are generated for output files, but not input files.
- outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
- }
-
// We can only pass a single file to handleIOError, so pick inFile arbitrarily.
// This is used only for debugging purposes.
return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "splice", outFile)
@@ -256,8 +249,6 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo
}
if n != 0 {
- outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
-
// If a partial write is completed, the error is dropped. Log it here.
if err != nil && err != io.EOF && err != syserror.ErrWouldBlock {
log.Debugf("tee completed a partial write with error: %v", err)
@@ -449,9 +440,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
}
if total != 0 {
- inFile.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent)
- outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent)
-
if err != nil && err != io.EOF && err != syserror.ErrWouldBlock {
// If a partial write is completed, the error is dropped. Log it here.
log.Debugf("sendfile completed a partial write with error: %v", err)
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 326e35688..f612a71b2 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -566,7 +566,11 @@ func (fd *FileDescription) Allocate(ctx context.Context, mode, offset, length ui
if !fd.IsWritable() {
return syserror.EBADF
}
- return fd.impl.Allocate(ctx, mode, offset, length)
+ if err := fd.impl.Allocate(ctx, mode, offset, length); err != nil {
+ return err
+ }
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent)
+ return nil
}
// Readiness implements waiter.Waitable.Readiness.
@@ -602,6 +606,9 @@ func (fd *FileDescription) PRead(ctx context.Context, dst usermem.IOSequence, of
}
start := fsmetric.StartReadWait()
n, err := fd.impl.PRead(ctx, dst, offset, opts)
+ if n > 0 {
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, PathEvent)
+ }
fsmetric.Reads.Increment()
fsmetric.FinishReadWait(fsmetric.ReadWait, start)
return n, err
@@ -614,6 +621,9 @@ func (fd *FileDescription) Read(ctx context.Context, dst usermem.IOSequence, opt
}
start := fsmetric.StartReadWait()
n, err := fd.impl.Read(ctx, dst, opts)
+ if n > 0 {
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, PathEvent)
+ }
fsmetric.Reads.Increment()
fsmetric.FinishReadWait(fsmetric.ReadWait, start)
return n, err
@@ -629,7 +639,11 @@ func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, o
if !fd.writable {
return 0, syserror.EBADF
}
- return fd.impl.PWrite(ctx, src, offset, opts)
+ n, err := fd.impl.PWrite(ctx, src, offset, opts)
+ if n > 0 {
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent)
+ }
+ return n, err
}
// Write is similar to PWrite, but does not specify an offset.
@@ -637,7 +651,11 @@ func (fd *FileDescription) Write(ctx context.Context, src usermem.IOSequence, op
if !fd.writable {
return 0, syserror.EBADF
}
- return fd.impl.Write(ctx, src, opts)
+ n, err := fd.impl.Write(ctx, src, opts)
+ if n > 0 {
+ fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent)
+ }
+ return n, err
}
// IterDirents invokes cb on each entry in the directory represented by fd. If
diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc
index 1d3bef31c..a88c89e20 100644
--- a/test/syscalls/linux/inotify.cc
+++ b/test/syscalls/linux/inotify.cc
@@ -1809,11 +1809,9 @@ TEST(Inotify, Sendfile) {
EXPECT_THAT(out_events, Are({Event(IN_MODIFY, out_wd)}));
}
-// On Linux, inotify behavior is not very consistent with splice(2). We try our
-// best to emulate Linux for very basic calls to splice.
TEST(Inotify, SpliceOnWatchTarget) {
- int pipes[2];
- ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds());
+ int pipefds[2];
+ ASSERT_THAT(pipe2(pipefds, O_NONBLOCK), SyscallSucceeds());
const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
const FileDescriptor inotify_fd =
@@ -1828,15 +1826,20 @@ TEST(Inotify, SpliceOnWatchTarget) {
const int file_wd = ASSERT_NO_ERRNO_AND_VALUE(
InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS));
- EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr, 1, /*flags=*/0),
+ EXPECT_THAT(splice(fd.get(), nullptr, pipefds[1], nullptr, 1, /*flags=*/0),
SyscallSucceedsWithValue(1));
- // Surprisingly, events are not generated in Linux if we read from a file.
+ // Surprisingly, events may not be generated in Linux if we read from a file.
+ // fs/splice.c:generic_file_splice_read, which is used most often, does not
+ // generate events, whereas fs/splice.c:default_file_splice_read does.
std::vector<Event> events =
ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
- ASSERT_THAT(events, Are({}));
+ if (IsRunningOnGvisor() && !IsRunningWithVFS1()) {
+ ASSERT_THAT(events, Are({Event(IN_ACCESS, dir_wd, Basename(file.path())),
+ Event(IN_ACCESS, file_wd)}));
+ }
- EXPECT_THAT(splice(pipes[0], nullptr, fd.get(), nullptr, 1, /*flags=*/0),
+ EXPECT_THAT(splice(pipefds[0], nullptr, fd.get(), nullptr, 1, /*flags=*/0),
SyscallSucceedsWithValue(1));
events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get()));
@@ -1847,8 +1850,8 @@ TEST(Inotify, SpliceOnWatchTarget) {
}
TEST(Inotify, SpliceOnInotifyFD) {
- int pipes[2];
- ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds());
+ int pipefds[2];
+ ASSERT_THAT(pipe2(pipefds, O_NONBLOCK), SyscallSucceeds());
const TempPath root = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
const FileDescriptor fd =
@@ -1864,11 +1867,11 @@ TEST(Inotify, SpliceOnInotifyFD) {
char buf;
EXPECT_THAT(read(file1_fd.get(), &buf, 1), SyscallSucceeds());
- EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr,
+ EXPECT_THAT(splice(fd.get(), nullptr, pipefds[1], nullptr,
sizeof(struct inotify_event) + 1, SPLICE_F_NONBLOCK),
SyscallSucceedsWithValue(sizeof(struct inotify_event)));
- const FileDescriptor read_fd(pipes[0]);
+ const FileDescriptor read_fd(pipefds[0]);
const std::vector<Event> events =
ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(read_fd.get()));
ASSERT_THAT(events, Are({Event(IN_ACCESS, watcher)}));
@@ -1966,9 +1969,7 @@ TEST(Inotify, Xattr) {
}
TEST(Inotify, Exec) {
- // TODO(gvisor.dev/issues/5348)
- SKIP_IF(IsRunningOnGvisor());
-
+ SKIP_IF(IsRunningWithVFS1());
const FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK));
const int wd = ASSERT_NO_ERRNO_AND_VALUE(