From e9ea7230f7dc70d3e1bb5ae32b6927209cafb465 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Mon, 24 Jun 2019 17:44:00 -0700
Subject: fs: synchronize concurrent writes into files with O_APPEND

For files with O_APPEND, a file write operation gets a file size and uses it as
offset to call an inode write operation. This means that all other operations
which can change a file size should be blocked while the write operation doesn't
complete.

PiperOrigin-RevId: 254873771
---
 pkg/sentry/fs/file.go   | 14 ++++++++------
 pkg/sentry/fs/inode.go  | 19 +++++++++++++++++++
 pkg/sentry/fs/splice.go |  2 ++
 3 files changed, 29 insertions(+), 6 deletions(-)

(limited to 'pkg/sentry/fs')

diff --git a/pkg/sentry/fs/file.go b/pkg/sentry/fs/file.go
index 55ffe6c0c..8e1f5674d 100644
--- a/pkg/sentry/fs/file.go
+++ b/pkg/sentry/fs/file.go
@@ -310,9 +310,11 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
 		return 0, syserror.ErrInterrupted
 	}
 
+	unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
 	// Handle append mode.
 	if f.Flags().Append {
 		if err := f.offsetForAppend(ctx, &f.offset); err != nil {
+			unlockAppendMu()
 			f.mu.Unlock()
 			return 0, err
 		}
@@ -322,6 +324,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
 	limit, ok := f.checkLimit(ctx, f.offset)
 	switch {
 	case ok && limit == 0:
+		unlockAppendMu()
 		f.mu.Unlock()
 		return 0, syserror.ErrExceedsFileSizeLimit
 	case ok:
@@ -333,6 +336,7 @@ func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error
 	if n >= 0 && !f.flags.NonSeekable {
 		atomic.StoreInt64(&f.offset, f.offset+n)
 	}
+	unlockAppendMu()
 	f.mu.Unlock()
 	return n, err
 }
@@ -348,13 +352,11 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
 	// However, on Linux, if a file is opened with O_APPEND,  pwrite()
 	// appends data to the end of the file, regardless of the value of
 	// offset."
+	unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
+	defer unlockAppendMu()
+
 	if f.Flags().Append {
-		if !f.mu.Lock(ctx) {
-			return 0, syserror.ErrInterrupted
-		}
-		defer f.mu.Unlock()
 		if err := f.offsetForAppend(ctx, &offset); err != nil {
-			f.mu.Unlock()
 			return 0, err
 		}
 	}
@@ -373,7 +375,7 @@ func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64
 
 // offsetForAppend sets the given offset to the end of the file.
 //
-// Precondition: the underlying file mutex should be held.
+// Precondition: the file.Dirent.Inode.appendMu mutex should be held for writing.
 func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
 	uattr, err := f.Dirent.Inode.UnstableAttr(ctx)
 	if err != nil {
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index a889586aa..e4aae1135 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -15,6 +15,8 @@
 package fs
 
 import (
+	"sync"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/metric"
@@ -55,6 +57,12 @@ type Inode struct {
 
 	// overlay is the overlay entry for this Inode.
 	overlay *overlayEntry
+
+	// appendMu is used to synchronize write operations into files which
+	// have been opened with O_APPEND. Operations which change a file size
+	// have to take this lock for read. Write operations to files with
+	// O_APPEND have to take this lock for write.
+	appendMu sync.RWMutex `state:"nosave"`
 }
 
 // LockCtx is an Inode's lock context and contains different personalities of locks; both
@@ -337,6 +345,8 @@ func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error {
 	if i.overlay != nil {
 		return overlayTruncate(ctx, i.overlay, d, size)
 	}
+	i.appendMu.RLock()
+	defer i.appendMu.RUnlock()
 	return i.InodeOperations.Truncate(ctx, i, size)
 }
 
@@ -438,3 +448,12 @@ func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool {
 	}
 	return creds.HasCapability(cp)
 }
+
+func (i *Inode) lockAppendMu(appendMode bool) func() {
+	if appendMode {
+		i.appendMu.Lock()
+		return i.appendMu.Unlock
+	}
+	i.appendMu.RLock()
+	return i.appendMu.RUnlock
+}
diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go
index 978dc679b..eed1c2854 100644
--- a/pkg/sentry/fs/splice.go
+++ b/pkg/sentry/fs/splice.go
@@ -88,6 +88,8 @@ func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64,
 
 	// Check append-only mode and the limit.
 	if !dstPipe {
+		unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append)
+		defer unlock()
 		if dst.Flags().Append {
 			if opts.DstOffset {
 				// We need to acquire the lock.
-- 
cgit v1.2.3