diff options
author | Andrei Vagin <avagin@google.com> | 2019-06-24 17:44:00 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2019-06-24 17:45:02 -0700 |
commit | e9ea7230f7dc70d3e1bb5ae32b6927209cafb465 (patch) | |
tree | 2f8b5aacade58f48ad3f4748c5fe6875f2761f0b | |
parent | 7f5d0afe525af4728ed5ec75193e9e4560d9558c (diff) |
fs: synchronize concurrent writes into files with O_APPEND
For files with O_APPEND, a file write operation gets a file size and uses it as
offset to call an inode write operation. This means that all other operations
which can change a file size should be blocked while the write operation doesn't
complete.
PiperOrigin-RevId: 254873771
-rw-r--r-- | pkg/sentry/fs/file.go | 14 | ||||
-rw-r--r-- | pkg/sentry/fs/inode.go | 19 | ||||
-rw-r--r-- | pkg/sentry/fs/splice.go | 2 | ||||
-rw-r--r-- | test/syscalls/linux/BUILD | 3 | ||||
-rw-r--r-- | test/syscalls/linux/open.cc | 37 |
5 files changed, 69 insertions, 6 deletions
diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go index 55ffe6c0c..8e1f5674d 100644 --- a/pkg/sentry/fs/file.go +++ b/pkg/sentry/fs/file.go @@ -310,9 +310,11 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error return 0, syserror.ErrInterrupted } + unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) // Handle append mode. if f.Flags().Append { if err := f.offsetForAppend(ctx, &f.offset); err != nil { + unlockAppendMu() f.mu.Unlock() return 0, err } @@ -322,6 +324,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error limit, ok := f.checkLimit(ctx, f.offset) switch { case ok && limit == 0: + unlockAppendMu() f.mu.Unlock() return 0, syserror.ErrExceedsFileSizeLimit case ok: @@ -333,6 +336,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error if n >= 0 && !f.flags.NonSeekable { atomic.StoreInt64(&f.offset, f.offset+n) } + unlockAppendMu() f.mu.Unlock() return n, err } @@ -348,13 +352,11 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64 // However, on Linux, if a file is opened with O_APPEND, pwrite() // appends data to the end of the file, regardless of the value of // offset." + unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) + defer unlockAppendMu() + if f.Flags().Append { - if !f.mu.Lock(ctx) { - return 0, syserror.ErrInterrupted - } - defer f.mu.Unlock() if err := f.offsetForAppend(ctx, &offset); err != nil { - f.mu.Unlock() return 0, err } } @@ -373,7 +375,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64 // offsetForAppend sets the given offset to the end of the file. // -// Precondition: the underlying file mutex should be held. +// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing. func (f *File) offsetForAppend(ctx context.Context, offset *int64) error { uattr, err := f.Dirent.Inode.UnstableAttr(ctx) if err != nil { diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go index a889586aa..e4aae1135 100644 --- a/pkg/sentry/fs/inode.go +++ b/pkg/sentry/fs/inode.go @@ -15,6 +15,8 @@ package fs import ( + "sync" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/metric" @@ -55,6 +57,12 @@ type Inode struct { // overlay is the overlay entry for this Inode. overlay *overlayEntry + + // appendMu is used to synchronize write operations into files which + // have been opened with O_APPEND. Operations which change a file size + // have to take this lock for read. Write operations to files with + // O_APPEND have to take this lock for write. + appendMu sync.RWMutex `state:"nosave"` } // LockCtx is an Inode's lock context and contains different personalities of locks; both @@ -337,6 +345,8 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error { if i.overlay != nil { return overlayTruncate(ctx, i.overlay, d, size) } + i.appendMu.RLock() + defer i.appendMu.RUnlock() return i.InodeOperations.Truncate(ctx, i, size) } @@ -438,3 +448,12 @@ func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool { } return creds.HasCapability(cp) } + +func (i *Inode) lockAppendMu(appendMode bool) func() { + if appendMode { + i.appendMu.Lock() + return i.appendMu.Unlock + } + i.appendMu.RLock() + return i.appendMu.RUnlock +} diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go index 978dc679b..eed1c2854 100644 --- a/pkg/sentry/fs/splice.go +++ b/pkg/sentry/fs/splice.go @@ -88,6 +88,8 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, // Check append-only mode and the limit. if !dstPipe { + unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append) + defer unlock() if dst.Flags().Append { if opts.DstOffset { // We need to acquire the lock. diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index bfe3dfe04..8a24d8c0b 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1177,6 +1177,7 @@ cc_binary( "//test/util:temp_path", "//test/util:test_main", "//test/util:test_util", + "//test/util:thread_util", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest", ], @@ -2940,6 +2941,8 @@ cc_binary( testonly = 1, srcs = ["tcp_socket.cc"], linkstatic = 1, + # FIXME(b/135470853) + tags = ["flaky"], deps = [ ":socket_test_util", "//test/util:file_descriptor", diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc index 42646bb02..e0525f386 100644 --- a/test/syscalls/linux/open.cc +++ b/test/syscalls/linux/open.cc @@ -28,6 +28,7 @@ #include "test/util/fs_util.h" #include "test/util/temp_path.h" #include "test/util/test_util.h" +#include "test/util/thread_util.h" namespace gvisor { namespace testing { @@ -214,6 +215,42 @@ TEST_F(OpenTest, AppendOnly) { SyscallSucceedsWithValue(kBufSize * 3)); } +TEST_F(OpenTest, AppendConcurrentWrite) { + constexpr int kThreadCount = 5; + constexpr int kBytesPerThread = 10000; + std::unique_ptr<ScopedThread> threads[kThreadCount]; + + // In case of the uncached policy, we expect that a file system can be changed + // externally, so we create a new inode each time when we open a file and we + // can't guarantee that writes to files with O_APPEND will work correctly. + SKIP_IF(getenv("GVISOR_GOFER_UNCACHED")); + + EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds()); + + std::string filename = test_file_name_; + DisableSave ds; // Too many syscalls. + // Start kThreadCount threads which will write concurrently into the same + // file. + for (int i = 0; i < kThreadCount; i++) { + threads[i] = absl::make_unique<ScopedThread>([filename]() { + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDWR | O_APPEND)); + + for (int j = 0; j < kBytesPerThread; j++) { + EXPECT_THAT(WriteFd(fd.get(), &j, 1), SyscallSucceedsWithValue(1)); + } + }); + } + for (int i = 0; i < kThreadCount; i++) { + threads[i]->Join(); + } + + // Check that the size of the file is correct. + struct stat st; + EXPECT_THAT(stat(test_file_name_.c_str(), &st), SyscallSucceeds()); + EXPECT_EQ(st.st_size, kThreadCount * kBytesPerThread); +} + TEST_F(OpenTest, Truncate) { { // First write some data to the new file and close it. |