diff options
author | Dean Deng <deandeng@google.com> | 2020-08-27 16:52:21 -0700 |
---|---|---|
committer | Andrei Vagin <avagin@gmail.com> | 2020-09-09 17:53:10 -0700 |
commit | bb089f9c9075a78e8bde7ff946bac77badc08894 (patch) | |
tree | 86026686e411a273239a8a5637853194c6e97022 | |
parent | 05166f14c93323d6279987ae3fe9a803ad188ade (diff) |
Fix vfs2 pipe behavior when splicing to a non-pipe.
Fixes *.sh Java runtime tests, where splice()-ing from a pipe to /dev/zero
would not actually empty the pipe.
There was no guarantee that the data would actually be consumed on a splice
operation unless the output file's implementation of Write/PWrite actually
called VFSPipeFD.CopyIn. Now, whatever bytes are "written" are consumed
regardless of whether CopyIn is called or not.
Furthermore, the number of bytes in the IOSequence for reads is now capped at
the amount of data actually available. Before, splicing to /dev/zero would
always return the requested splice size without taking the actual available
data into account.
This change also refactors the case where an input file is spliced into an
output pipe so that it follows a similar pattern, which is arguably cleaner
anyway.
Updates #3576.
PiperOrigin-RevId: 328843954
-rw-r--r-- | pkg/buffer/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/pipe/pipe.go | 14 | ||||
-rw-r--r-- | pkg/sentry/kernel/pipe/vfs.go | 63 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/splice.go | 35 | ||||
-rw-r--r-- | test/syscalls/linux/sendfile.cc | 36 | ||||
-rw-r--r-- | test/syscalls/linux/splice.cc | 64 |
6 files changed, 174 insertions, 40 deletions
diff --git a/pkg/buffer/BUILD b/pkg/buffer/BUILD index dcd086298..b03d46d18 100644 --- a/pkg/buffer/BUILD +++ b/pkg/buffer/BUILD @@ -26,8 +26,10 @@ go_library( ], visibility = ["//visibility:public"], deps = [ + "//pkg/context", "//pkg/log", "//pkg/safemem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 297e8f28f..c410c96aa 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -200,17 +200,17 @@ type readOps struct { // // Precondition: this pipe must have readers. func (p *Pipe) read(ctx context.Context, ops readOps) (int64, error) { - // Don't block for a zero-length read even if the pipe is empty. - if ops.left() == 0 { - return 0, nil - } - p.mu.Lock() defer p.mu.Unlock() return p.readLocked(ctx, ops) } func (p *Pipe) readLocked(ctx context.Context, ops readOps) (int64, error) { + // Don't block for a zero-length read even if the pipe is empty. + if ops.left() == 0 { + return 0, nil + } + // Is the pipe empty? if p.view.Size() == 0 { if !p.HasWriters() { @@ -388,6 +388,10 @@ func (p *Pipe) rwReadiness() waiter.EventMask { func (p *Pipe) queued() int64 { p.mu.Lock() defer p.mu.Unlock() + return p.queuedLocked() +} + +func (p *Pipe) queuedLocked() int64 { return p.view.Size() } diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 28f998e45..f223d59e1 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -244,19 +244,57 @@ func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) { return fd.pipe.SetFifoSize(size) } -// IOSequence returns a useremm.IOSequence that reads up to count bytes from, -// or writes up to count bytes to, fd. -func (fd *VFSPipeFD) IOSequence(count int64) usermem.IOSequence { - return usermem.IOSequence{ +// SpliceToNonPipe performs a splice operation from fd to a non-pipe file. +func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescription, off, count int64) (int64, error) { + fd.pipe.mu.Lock() + defer fd.pipe.mu.Unlock() + + // Cap the sequence at number of bytes actually available. + v := fd.pipe.queuedLocked() + if v < count { + count = v + } + src := usermem.IOSequence{ IO: fd, Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}), } + + var ( + n int64 + err error + ) + if off == -1 { + n, err = out.Write(ctx, src, vfs.WriteOptions{}) + } else { + n, err = out.PWrite(ctx, src, off, vfs.WriteOptions{}) + } + if n > 0 { + fd.pipe.view.TrimFront(n) + } + return n, err +} + +// SpliceFromNonPipe performs a splice operation from a non-pipe file to fd. +func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) { + fd.pipe.mu.Lock() + defer fd.pipe.mu.Unlock() + + dst := usermem.IOSequence{ + IO: fd, + Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}), + } + + if off == -1 { + return in.Read(ctx, dst, vfs.ReadOptions{}) + } + return in.PRead(ctx, dst, off, vfs.ReadOptions{}) } -// CopyIn implements usermem.IO.CopyIn. +// CopyIn implements usermem.IO.CopyIn. Note that it is the caller's +// responsibility to trim fd.pipe.view after the read is completed. func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) { origCount := int64(len(dst)) - n, err := fd.pipe.read(ctx, readOps{ + n, err := fd.pipe.readLocked(ctx, readOps{ left: func() int64 { return int64(len(dst)) }, @@ -265,7 +303,6 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, }, read: func(view *buffer.View) (int64, error) { n, err := view.ReadAt(dst, 0) - view.TrimFront(int64(n)) return int64(n), err }, }) @@ -281,7 +318,7 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, // CopyOut implements usermem.IO.CopyOut. func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) { origCount := int64(len(src)) - n, err := fd.pipe.write(ctx, writeOps{ + n, err := fd.pipe.writeLocked(ctx, writeOps{ left: func() int64 { return int64(len(src)) }, @@ -305,7 +342,7 @@ func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, // ZeroOut implements usermem.IO.ZeroOut. func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { origCount := toZero - n, err := fd.pipe.write(ctx, writeOps{ + n, err := fd.pipe.writeLocked(ctx, writeOps{ left: func() int64 { return toZero }, @@ -326,14 +363,15 @@ func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int6 return n, err } -// CopyInTo implements usermem.IO.CopyInTo. +// CopyInTo implements usermem.IO.CopyInTo. Note that it is the caller's +// responsibility to trim fd.pipe.view after the read is completed. func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { count := ars.NumBytes() if count == 0 { return 0, nil } origCount := count - n, err := fd.pipe.read(ctx, readOps{ + n, err := fd.pipe.readLocked(ctx, readOps{ left: func() int64 { return count }, @@ -342,7 +380,6 @@ func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst }, read: func(view *buffer.View) (int64, error) { n, err := view.ReadToSafememWriter(dst, uint64(count)) - view.TrimFront(int64(n)) return int64(n), err }, }) @@ -362,7 +399,7 @@ func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, return 0, nil } origCount := count - n, err := fd.pipe.write(ctx, writeOps{ + n, err := fd.pipe.writeLocked(ctx, writeOps{ left: func() int64 { return count }, diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 75bfa2c79..192411393 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -131,18 +131,14 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case inIsPipe && outIsPipe: n, err = pipe.Splice(t, outPipeFD, inPipeFD, count) case inIsPipe: + n, err = inPipeFD.SpliceToNonPipe(t, outFile, outOffset, count) if outOffset != -1 { - n, err = outFile.PWrite(t, inPipeFD.IOSequence(count), outOffset, vfs.WriteOptions{}) outOffset += n - } else { - n, err = outFile.Write(t, inPipeFD.IOSequence(count), vfs.WriteOptions{}) } case outIsPipe: + n, err = outPipeFD.SpliceFromNonPipe(t, inFile, inOffset, count) if inOffset != -1 { - n, err = inFile.PRead(t, outPipeFD.IOSequence(count), inOffset, vfs.ReadOptions{}) inOffset += n - } else { - n, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{}) } default: panic("not possible") @@ -341,17 +337,15 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if outIsPipe { for n < count { var spliceN int64 - if offset != -1 { - spliceN, err = inFile.PRead(t, outPipeFD.IOSequence(count), offset, vfs.ReadOptions{}) - offset += spliceN - } else { - spliceN, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{}) - } + spliceN, err = outPipeFD.SpliceFromNonPipe(t, inFile, offset, count) if spliceN == 0 && err == io.EOF { // We reached the end of the file. Eat the error and exit the loop. err = nil break } + if offset != -1 { + offset += spliceN + } n += spliceN if err == syserror.ErrWouldBlock && !nonBlock { err = dw.waitForBoth(t) @@ -371,19 +365,18 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } else { readN, err = inFile.Read(t, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) } - if readN == 0 && err == io.EOF { - // We reached the end of the file. Eat the error and exit the loop. - err = nil + if readN == 0 && err != nil { + if err == io.EOF { + // We reached the end of the file. Eat the error before exiting the loop. + err = nil + } break } n += readN - if err != nil { - break - } // Write all of the bytes that we read. This may need // multiple write calls to complete. - wbuf := buf[:n] + wbuf := buf[:readN] for len(wbuf) > 0 { var writeN int64 writeN, err = outFile.Write(t, usermem.BytesIOSequence(wbuf), vfs.WriteOptions{}) @@ -398,6 +391,10 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc notWritten := int64(len(wbuf)) n -= notWritten if offset != -1 { + // TODO(gvisor.dev/issue/3779): The inFile offset will be incorrect if we + // roll back, because it has already been advanced by the full amount. + // Merely seeking on inFile does not work, because there may be concurrent + // file operations. offset -= notWritten } break diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc index 64123e904..e65387f59 100644 --- a/test/syscalls/linux/sendfile.cc +++ b/test/syscalls/linux/sendfile.cc @@ -198,7 +198,39 @@ TEST(SendFileTest, SendAndUpdateFileOffset) { EXPECT_EQ(absl::string_view(kData, kHalfDataSize), absl::string_view(actual, bytes_sent)); - // Verify that the input file offset has been updated + // Verify that the input file offset has been updated. + ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ( + absl::string_view(kData + kDataSize - bytes_sent, kDataSize - bytes_sent), + absl::string_view(actual, kHalfDataSize)); +} + +TEST(SendFileTest, SendToDevZeroAndUpdateFileOffset) { + // Create temp files. + // Test input string length must be > 2 AND even. + constexpr char kData[] = "The slings and arrows of outrageous fortune,"; + constexpr int kDataSize = sizeof(kData) - 1; + constexpr int kHalfDataSize = kDataSize / 2; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open /dev/zero as write only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + + char actual[kHalfDataSize]; + // Verify that the input file offset has been updated. ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent), SyscallSucceedsWithValue(kHalfDataSize)); EXPECT_EQ( @@ -250,7 +282,7 @@ TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) { EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize), absl::string_view(actual, bytes_sent)); - // Verify that the input file offset has been updated + // Verify that the input file offset has been updated. ASSERT_THAT(read(inf.get(), &actual, kQuarterDataSize), SyscallSucceedsWithValue(kQuarterDataSize)); diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc index 08fc4b1b7..be3fb4840 100644 --- a/test/syscalls/linux/splice.cc +++ b/test/syscalls/linux/splice.cc @@ -342,7 +342,7 @@ TEST(SpliceTest, FromPipe) { ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), SyscallSucceedsWithValue(kPageSize)); - // Open the input file. + // Open the output file. const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); const FileDescriptor out_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR)); @@ -364,6 +364,40 @@ TEST(SpliceTest, FromPipe) { EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0); } +TEST(SpliceTest, FromPipeMultiple) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + std::string buf = "abcABC123"; + ASSERT_THAT(write(wfd.get(), buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Open the output file. + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor out_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR)); + + // Splice from the pipe to the output file over several calls. + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0), + SyscallSucceedsWithValue(3)); + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0), + SyscallSucceedsWithValue(3)); + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0), + SyscallSucceedsWithValue(3)); + + // Reset cursor to zero so that we can check the contents. + ASSERT_THAT(lseek(out_fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Contents should be equal. + std::vector<char> rbuf(buf.size()); + ASSERT_THAT(read(out_fd.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(rbuf.size())); + EXPECT_EQ(memcmp(rbuf.data(), buf.c_str(), buf.size()), 0); +} + TEST(SpliceTest, FromPipeOffset) { // Create a new pipe. int fds[2]; @@ -693,6 +727,34 @@ TEST(SpliceTest, FromPipeMaxFileSize) { EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0); } +TEST(SpliceTest, FromPipeToDevZero) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + FileDescriptor wfd(fds[1]); + + // Fill with some random data. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + const FileDescriptor zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY)); + + // Close the write end to prevent blocking below. + wfd.reset(); + + // Splice to /dev/zero. The first call should empty the pipe, and the return + // value should not exceed the number of bytes available for reading. + EXPECT_THAT( + splice(rfd.get(), nullptr, zero.get(), nullptr, kPageSize + 123, 0), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_THAT(splice(rfd.get(), nullptr, zero.get(), nullptr, 1, 0), + SyscallSucceedsWithValue(0)); +} + } // namespace } // namespace testing |