summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorDean Deng <deandeng@google.com>2020-08-27 16:52:21 -0700
committerAndrei Vagin <avagin@gmail.com>2020-09-09 17:53:10 -0700
commitbb089f9c9075a78e8bde7ff946bac77badc08894 (patch)
tree86026686e411a273239a8a5637853194c6e97022
parent05166f14c93323d6279987ae3fe9a803ad188ade (diff)
Fix vfs2 pipe behavior when splicing to a non-pipe.
Fixes *.sh Java runtime tests, where splice()-ing from a pipe to /dev/zero would not actually empty the pipe. There was no guarantee that the data would actually be consumed on a splice operation unless the output file's implementation of Write/PWrite actually called VFSPipeFD.CopyIn. Now, whatever bytes are "written" are consumed regardless of whether CopyIn is called or not. Furthermore, the number of bytes in the IOSequence for reads is now capped at the amount of data actually available. Before, splicing to /dev/zero would always return the requested splice size without taking the actual available data into account. This change also refactors the case where an input file is spliced into an output pipe so that it follows a similar pattern, which is arguably cleaner anyway. Updates #3576. PiperOrigin-RevId: 328843954
-rw-r--r--pkg/buffer/BUILD2
-rw-r--r--pkg/sentry/kernel/pipe/pipe.go14
-rw-r--r--pkg/sentry/kernel/pipe/vfs.go63
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/splice.go35
-rw-r--r--test/syscalls/linux/sendfile.cc36
-rw-r--r--test/syscalls/linux/splice.cc64
6 files changed, 174 insertions, 40 deletions
diff --git a/pkg/buffer/BUILD b/pkg/buffer/BUILD
index dcd086298..b03d46d18 100644
--- a/pkg/buffer/BUILD
+++ b/pkg/buffer/BUILD
@@ -26,8 +26,10 @@ go_library(
],
visibility = ["//visibility:public"],
deps = [
+ "//pkg/context",
"//pkg/log",
"//pkg/safemem",
+ "//pkg/usermem",
],
)
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index 297e8f28f..c410c96aa 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -200,17 +200,17 @@ type readOps struct {
//
// Precondition: this pipe must have readers.
func (p *Pipe) read(ctx context.Context, ops readOps) (int64, error) {
- // Don't block for a zero-length read even if the pipe is empty.
- if ops.left() == 0 {
- return 0, nil
- }
-
p.mu.Lock()
defer p.mu.Unlock()
return p.readLocked(ctx, ops)
}
func (p *Pipe) readLocked(ctx context.Context, ops readOps) (int64, error) {
+ // Don't block for a zero-length read even if the pipe is empty.
+ if ops.left() == 0 {
+ return 0, nil
+ }
+
// Is the pipe empty?
if p.view.Size() == 0 {
if !p.HasWriters() {
@@ -388,6 +388,10 @@ func (p *Pipe) rwReadiness() waiter.EventMask {
func (p *Pipe) queued() int64 {
p.mu.Lock()
defer p.mu.Unlock()
+ return p.queuedLocked()
+}
+
+func (p *Pipe) queuedLocked() int64 {
return p.view.Size()
}
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index 28f998e45..f223d59e1 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -244,19 +244,57 @@ func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) {
return fd.pipe.SetFifoSize(size)
}
-// IOSequence returns a useremm.IOSequence that reads up to count bytes from,
-// or writes up to count bytes to, fd.
-func (fd *VFSPipeFD) IOSequence(count int64) usermem.IOSequence {
- return usermem.IOSequence{
+// SpliceToNonPipe performs a splice operation from fd to a non-pipe file.
+func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescription, off, count int64) (int64, error) {
+ fd.pipe.mu.Lock()
+ defer fd.pipe.mu.Unlock()
+
+ // Cap the sequence at number of bytes actually available.
+ v := fd.pipe.queuedLocked()
+ if v < count {
+ count = v
+ }
+ src := usermem.IOSequence{
IO: fd,
Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}),
}
+
+ var (
+ n int64
+ err error
+ )
+ if off == -1 {
+ n, err = out.Write(ctx, src, vfs.WriteOptions{})
+ } else {
+ n, err = out.PWrite(ctx, src, off, vfs.WriteOptions{})
+ }
+ if n > 0 {
+ fd.pipe.view.TrimFront(n)
+ }
+ return n, err
+}
+
+// SpliceFromNonPipe performs a splice operation from a non-pipe file to fd.
+func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) {
+ fd.pipe.mu.Lock()
+ defer fd.pipe.mu.Unlock()
+
+ dst := usermem.IOSequence{
+ IO: fd,
+ Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}),
+ }
+
+ if off == -1 {
+ return in.Read(ctx, dst, vfs.ReadOptions{})
+ }
+ return in.PRead(ctx, dst, off, vfs.ReadOptions{})
}
-// CopyIn implements usermem.IO.CopyIn.
+// CopyIn implements usermem.IO.CopyIn. Note that it is the caller's
+// responsibility to trim fd.pipe.view after the read is completed.
func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
origCount := int64(len(dst))
- n, err := fd.pipe.read(ctx, readOps{
+ n, err := fd.pipe.readLocked(ctx, readOps{
left: func() int64 {
return int64(len(dst))
},
@@ -265,7 +303,6 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte,
},
read: func(view *buffer.View) (int64, error) {
n, err := view.ReadAt(dst, 0)
- view.TrimFront(int64(n))
return int64(n), err
},
})
@@ -281,7 +318,7 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte,
// CopyOut implements usermem.IO.CopyOut.
func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) {
origCount := int64(len(src))
- n, err := fd.pipe.write(ctx, writeOps{
+ n, err := fd.pipe.writeLocked(ctx, writeOps{
left: func() int64 {
return int64(len(src))
},
@@ -305,7 +342,7 @@ func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte,
// ZeroOut implements usermem.IO.ZeroOut.
func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
origCount := toZero
- n, err := fd.pipe.write(ctx, writeOps{
+ n, err := fd.pipe.writeLocked(ctx, writeOps{
left: func() int64 {
return toZero
},
@@ -326,14 +363,15 @@ func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int6
return n, err
}
-// CopyInTo implements usermem.IO.CopyInTo.
+// CopyInTo implements usermem.IO.CopyInTo. Note that it is the caller's
+// responsibility to trim fd.pipe.view after the read is completed.
func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
count := ars.NumBytes()
if count == 0 {
return 0, nil
}
origCount := count
- n, err := fd.pipe.read(ctx, readOps{
+ n, err := fd.pipe.readLocked(ctx, readOps{
left: func() int64 {
return count
},
@@ -342,7 +380,6 @@ func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst
},
read: func(view *buffer.View) (int64, error) {
n, err := view.ReadToSafememWriter(dst, uint64(count))
- view.TrimFront(int64(n))
return int64(n), err
},
})
@@ -362,7 +399,7 @@ func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq,
return 0, nil
}
origCount := count
- n, err := fd.pipe.write(ctx, writeOps{
+ n, err := fd.pipe.writeLocked(ctx, writeOps{
left: func() int64 {
return count
},
diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index 75bfa2c79..192411393 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -131,18 +131,14 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
case inIsPipe && outIsPipe:
n, err = pipe.Splice(t, outPipeFD, inPipeFD, count)
case inIsPipe:
+ n, err = inPipeFD.SpliceToNonPipe(t, outFile, outOffset, count)
if outOffset != -1 {
- n, err = outFile.PWrite(t, inPipeFD.IOSequence(count), outOffset, vfs.WriteOptions{})
outOffset += n
- } else {
- n, err = outFile.Write(t, inPipeFD.IOSequence(count), vfs.WriteOptions{})
}
case outIsPipe:
+ n, err = outPipeFD.SpliceFromNonPipe(t, inFile, inOffset, count)
if inOffset != -1 {
- n, err = inFile.PRead(t, outPipeFD.IOSequence(count), inOffset, vfs.ReadOptions{})
inOffset += n
- } else {
- n, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{})
}
default:
panic("not possible")
@@ -341,17 +337,15 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
if outIsPipe {
for n < count {
var spliceN int64
- if offset != -1 {
- spliceN, err = inFile.PRead(t, outPipeFD.IOSequence(count), offset, vfs.ReadOptions{})
- offset += spliceN
- } else {
- spliceN, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{})
- }
+ spliceN, err = outPipeFD.SpliceFromNonPipe(t, inFile, offset, count)
if spliceN == 0 && err == io.EOF {
// We reached the end of the file. Eat the error and exit the loop.
err = nil
break
}
+ if offset != -1 {
+ offset += spliceN
+ }
n += spliceN
if err == syserror.ErrWouldBlock && !nonBlock {
err = dw.waitForBoth(t)
@@ -371,19 +365,18 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
} else {
readN, err = inFile.Read(t, usermem.BytesIOSequence(buf), vfs.ReadOptions{})
}
- if readN == 0 && err == io.EOF {
- // We reached the end of the file. Eat the error and exit the loop.
- err = nil
+ if readN == 0 && err != nil {
+ if err == io.EOF {
+ // We reached the end of the file. Eat the error before exiting the loop.
+ err = nil
+ }
break
}
n += readN
- if err != nil {
- break
- }
// Write all of the bytes that we read. This may need
// multiple write calls to complete.
- wbuf := buf[:n]
+ wbuf := buf[:readN]
for len(wbuf) > 0 {
var writeN int64
writeN, err = outFile.Write(t, usermem.BytesIOSequence(wbuf), vfs.WriteOptions{})
@@ -398,6 +391,10 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
notWritten := int64(len(wbuf))
n -= notWritten
if offset != -1 {
+ // TODO(gvisor.dev/issue/3779): The inFile offset will be incorrect if we
+ // roll back, because it has already been advanced by the full amount.
+ // Merely seeking on inFile does not work, because there may be concurrent
+ // file operations.
offset -= notWritten
}
break
diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc
index 64123e904..e65387f59 100644
--- a/test/syscalls/linux/sendfile.cc
+++ b/test/syscalls/linux/sendfile.cc
@@ -198,7 +198,39 @@ TEST(SendFileTest, SendAndUpdateFileOffset) {
EXPECT_EQ(absl::string_view(kData, kHalfDataSize),
absl::string_view(actual, bytes_sent));
- // Verify that the input file offset has been updated
+ // Verify that the input file offset has been updated.
+ ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent),
+ SyscallSucceedsWithValue(kHalfDataSize));
+ EXPECT_EQ(
+ absl::string_view(kData + kDataSize - bytes_sent, kDataSize - bytes_sent),
+ absl::string_view(actual, kHalfDataSize));
+}
+
+TEST(SendFileTest, SendToDevZeroAndUpdateFileOffset) {
+ // Create temp files.
+ // Test input string length must be > 2 AND even.
+ constexpr char kData[] = "The slings and arrows of outrageous fortune,";
+ constexpr int kDataSize = sizeof(kData) - 1;
+ constexpr int kHalfDataSize = kDataSize / 2;
+ const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
+ GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode));
+
+ // Open the input file as read only.
+ const FileDescriptor inf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY));
+
+ // Open /dev/zero as write only.
+ const FileDescriptor outf =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY));
+
+ // Send data and verify that sendfile returns the correct value.
+ int bytes_sent;
+ EXPECT_THAT(
+ bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize),
+ SyscallSucceedsWithValue(kHalfDataSize));
+
+ char actual[kHalfDataSize];
+ // Verify that the input file offset has been updated.
ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent),
SyscallSucceedsWithValue(kHalfDataSize));
EXPECT_EQ(
@@ -250,7 +282,7 @@ TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) {
EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize),
absl::string_view(actual, bytes_sent));
- // Verify that the input file offset has been updated
+ // Verify that the input file offset has been updated.
ASSERT_THAT(read(inf.get(), &actual, kQuarterDataSize),
SyscallSucceedsWithValue(kQuarterDataSize));
diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc
index 08fc4b1b7..be3fb4840 100644
--- a/test/syscalls/linux/splice.cc
+++ b/test/syscalls/linux/splice.cc
@@ -342,7 +342,7 @@ TEST(SpliceTest, FromPipe) {
ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
SyscallSucceedsWithValue(kPageSize));
- // Open the input file.
+ // Open the output file.
const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
const FileDescriptor out_fd =
ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR));
@@ -364,6 +364,40 @@ TEST(SpliceTest, FromPipe) {
EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0);
}
+TEST(SpliceTest, FromPipeMultiple) {
+ // Create a new pipe.
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ const FileDescriptor rfd(fds[0]);
+ const FileDescriptor wfd(fds[1]);
+
+ std::string buf = "abcABC123";
+ ASSERT_THAT(write(wfd.get(), buf.c_str(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+
+ // Open the output file.
+ const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());
+ const FileDescriptor out_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR));
+
+ // Splice from the pipe to the output file over several calls.
+ EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0),
+ SyscallSucceedsWithValue(3));
+ EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0),
+ SyscallSucceedsWithValue(3));
+ EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0),
+ SyscallSucceedsWithValue(3));
+
+ // Reset cursor to zero so that we can check the contents.
+ ASSERT_THAT(lseek(out_fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0));
+
+ // Contents should be equal.
+ std::vector<char> rbuf(buf.size());
+ ASSERT_THAT(read(out_fd.get(), rbuf.data(), rbuf.size()),
+ SyscallSucceedsWithValue(rbuf.size()));
+ EXPECT_EQ(memcmp(rbuf.data(), buf.c_str(), buf.size()), 0);
+}
+
TEST(SpliceTest, FromPipeOffset) {
// Create a new pipe.
int fds[2];
@@ -693,6 +727,34 @@ TEST(SpliceTest, FromPipeMaxFileSize) {
EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0);
}
+TEST(SpliceTest, FromPipeToDevZero) {
+ // Create a new pipe.
+ int fds[2];
+ ASSERT_THAT(pipe(fds), SyscallSucceeds());
+ const FileDescriptor rfd(fds[0]);
+ FileDescriptor wfd(fds[1]);
+
+ // Fill with some random data.
+ std::vector<char> buf(kPageSize);
+ RandomizeBuffer(buf.data(), buf.size());
+ ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+ SyscallSucceedsWithValue(kPageSize));
+
+ const FileDescriptor zero =
+ ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY));
+
+ // Close the write end to prevent blocking below.
+ wfd.reset();
+
+ // Splice to /dev/zero. The first call should empty the pipe, and the return
+ // value should not exceed the number of bytes available for reading.
+ EXPECT_THAT(
+ splice(rfd.get(), nullptr, zero.get(), nullptr, kPageSize + 123, 0),
+ SyscallSucceedsWithValue(kPageSize));
+ EXPECT_THAT(splice(rfd.get(), nullptr, zero.get(), nullptr, 1, 0),
+ SyscallSucceedsWithValue(0));
+}
+
} // namespace
} // namespace testing