summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndrei Vagin <avagin@google.com>2019-06-24 17:44:00 -0700
committergVisor bot <gvisor-bot@google.com>2019-06-24 17:45:02 -0700
commite9ea7230f7dc70d3e1bb5ae32b6927209cafb465 (patch)
tree2f8b5aacade58f48ad3f4748c5fe6875f2761f0b
parent7f5d0afe525af4728ed5ec75193e9e4560d9558c (diff)
fs: synchronize concurrent writes into files with O_APPEND
For files with O_APPEND, a file write operation gets a file size and uses it as offset to call an inode write operation. This means that all other operations which can change a file size should be blocked while the write operation doesn't complete. PiperOrigin-RevId: 254873771
-rw-r--r--pkg/sentry/fs/file.go14
-rw-r--r--pkg/sentry/fs/inode.go19
-rw-r--r--pkg/sentry/fs/splice.go2
-rw-r--r--test/syscalls/linux/BUILD3
-rw-r--r--test/syscalls/linux/open.cc37
5 files changed, 69 insertions, 6 deletions
diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go
index 55ffe6c0c..8e1f5674d 100644
--- a/pkg/sentry/fs/file.go
+++ b/pkg/sentry/fs/file.go
@@ -310,9 +310,11 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
return 0, syserror.ErrInterrupted
}
+ unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
// Handle append mode.
if f.Flags().Append {
if err := f.offsetForAppend(ctx, &f.offset); err != nil {
+ unlockAppendMu()
f.mu.Unlock()
return 0, err
}
@@ -322,6 +324,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
limit, ok := f.checkLimit(ctx, f.offset)
switch {
case ok && limit == 0:
+ unlockAppendMu()
f.mu.Unlock()
return 0, syserror.ErrExceedsFileSizeLimit
case ok:
@@ -333,6 +336,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
if n >= 0 && !f.flags.NonSeekable {
atomic.StoreInt64(&f.offset, f.offset+n)
}
+ unlockAppendMu()
f.mu.Unlock()
return n, err
}
@@ -348,13 +352,11 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
// However, on Linux, if a file is opened with O_APPEND, pwrite()
// appends data to the end of the file, regardless of the value of
// offset."
+ unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
+ defer unlockAppendMu()
+
if f.Flags().Append {
- if !f.mu.Lock(ctx) {
- return 0, syserror.ErrInterrupted
- }
- defer f.mu.Unlock()
if err := f.offsetForAppend(ctx, &offset); err != nil {
- f.mu.Unlock()
return 0, err
}
}
@@ -373,7 +375,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
// offsetForAppend sets the given offset to the end of the file.
//
-// Precondition: the underlying file mutex should be held.
+// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing.
func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
uattr, err := f.Dirent.Inode.UnstableAttr(ctx)
if err != nil {
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index a889586aa..e4aae1135 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -15,6 +15,8 @@
package fs
import (
+ "sync"
+
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/metric"
@@ -55,6 +57,12 @@ type Inode struct {
// overlay is the overlay entry for this Inode.
overlay *overlayEntry
+
+ // appendMu is used to synchronize write operations into files which
+ // have been opened with O_APPEND. Operations which change a file size
+ // have to take this lock for read. Write operations to files with
+ // O_APPEND have to take this lock for write.
+ appendMu sync.RWMutex `state:"nosave"`
}
// LockCtx is an Inode's lock context and contains different personalities of locks; both
@@ -337,6 +345,8 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error {
if i.overlay != nil {
return overlayTruncate(ctx, i.overlay, d, size)
}
+ i.appendMu.RLock()
+ defer i.appendMu.RUnlock()
return i.InodeOperations.Truncate(ctx, i, size)
}
@@ -438,3 +448,12 @@ func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool {
}
return creds.HasCapability(cp)
}
+
+func (i *Inode) lockAppendMu(appendMode bool) func() {
+ if appendMode {
+ i.appendMu.Lock()
+ return i.appendMu.Unlock
+ }
+ i.appendMu.RLock()
+ return i.appendMu.RUnlock
+}
diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go
index 978dc679b..eed1c2854 100644
--- a/pkg/sentry/fs/splice.go
+++ b/pkg/sentry/fs/splice.go
@@ -88,6 +88,8 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
// Check append-only mode and the limit.
if !dstPipe {
+ unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append)
+ defer unlock()
if dst.Flags().Append {
if opts.DstOffset {
// We need to acquire the lock.
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index bfe3dfe04..8a24d8c0b 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1177,6 +1177,7 @@ cc_binary(
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
+ "//test/util:thread_util",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest",
],
@@ -2940,6 +2941,8 @@ cc_binary(
testonly = 1,
srcs = ["tcp_socket.cc"],
linkstatic = 1,
+ # FIXME(b/135470853)
+ tags = ["flaky"],
deps = [
":socket_test_util",
"//test/util:file_descriptor",
diff --git a/test/syscalls/linux/open.cc b/test/syscalls/linux/open.cc
index 42646bb02..e0525f386 100644
--- a/test/syscalls/linux/open.cc
+++ b/test/syscalls/linux/open.cc
@@ -28,6 +28,7 @@
#include "test/util/fs_util.h"
#include "test/util/temp_path.h"
#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
namespace gvisor {
namespace testing {
@@ -214,6 +215,42 @@ TEST_F(OpenTest, AppendOnly) {
SyscallSucceedsWithValue(kBufSize * 3));
}
+TEST_F(OpenTest, AppendConcurrentWrite) {
+ constexpr int kThreadCount = 5;
+ constexpr int kBytesPerThread = 10000;
+ std::unique_ptr<ScopedThread> threads[kThreadCount];
+
+ // In case of the uncached policy, we expect that a file system can be changed
+ // externally, so we create a new inode each time when we open a file and we
+ // can't guarantee that writes to files with O_APPEND will work correctly.
+ SKIP_IF(getenv("GVISOR_GOFER_UNCACHED"));
+
+ EXPECT_THAT(truncate(test_file_name_.c_str(), 0), SyscallSucceeds());
+
+ std::string filename = test_file_name_;
+ DisableSave ds; // Too many syscalls.
+ // Start kThreadCount threads which will write concurrently into the same
+ // file.
+ for (int i = 0; i < kThreadCount; i++) {
+ threads[i] = absl::make_unique<ScopedThread>([filename]() {
+ const FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(filename, O_RDWR | O_APPEND));
+
+ for (int j = 0; j < kBytesPerThread; j++) {
+ EXPECT_THAT(WriteFd(fd.get(), &j, 1), SyscallSucceedsWithValue(1));
+ }
+ });
+ }
+ for (int i = 0; i < kThreadCount; i++) {
+ threads[i]->Join();
+ }
+
+ // Check that the size of the file is correct.
+ struct stat st;
+ EXPECT_THAT(stat(test_file_name_.c_str(), &st), SyscallSucceeds());
+ EXPECT_EQ(st.st_size, kThreadCount * kBytesPerThread);
+}
+
TEST_F(OpenTest, Truncate) {
{
// First write some data to the new file and close it.