diff options
-rw-r--r-- | pkg/buffer/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/pipe/pipe.go | 14 | ||||
-rw-r--r-- | pkg/sentry/kernel/pipe/vfs.go | 63 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/splice.go | 35 | ||||
-rw-r--r-- | test/syscalls/linux/sendfile.cc | 36 | ||||
-rw-r--r-- | test/syscalls/linux/splice.cc | 64 |
6 files changed, 174 insertions, 40 deletions
diff --git a/pkg/buffer/BUILD b/pkg/buffer/BUILD index dcd086298..b03d46d18 100644 --- a/pkg/buffer/BUILD +++ b/pkg/buffer/BUILD @@ -26,8 +26,10 @@ go_library( ], visibility = ["//visibility:public"], deps = [ + "//pkg/context", "//pkg/log", "//pkg/safemem", + "//pkg/usermem", ], ) diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 297e8f28f..c410c96aa 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -200,17 +200,17 @@ type readOps struct { // // Precondition: this pipe must have readers. func (p *Pipe) read(ctx context.Context, ops readOps) (int64, error) { - // Don't block for a zero-length read even if the pipe is empty. - if ops.left() == 0 { - return 0, nil - } - p.mu.Lock() defer p.mu.Unlock() return p.readLocked(ctx, ops) } func (p *Pipe) readLocked(ctx context.Context, ops readOps) (int64, error) { + // Don't block for a zero-length read even if the pipe is empty. + if ops.left() == 0 { + return 0, nil + } + // Is the pipe empty? if p.view.Size() == 0 { if !p.HasWriters() { @@ -388,6 +388,10 @@ func (p *Pipe) rwReadiness() waiter.EventMask { func (p *Pipe) queued() int64 { p.mu.Lock() defer p.mu.Unlock() + return p.queuedLocked() +} + +func (p *Pipe) queuedLocked() int64 { return p.view.Size() } diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index 28f998e45..f223d59e1 100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -244,19 +244,57 @@ func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) { return fd.pipe.SetFifoSize(size) } -// IOSequence returns a useremm.IOSequence that reads up to count bytes from, -// or writes up to count bytes to, fd. -func (fd *VFSPipeFD) IOSequence(count int64) usermem.IOSequence { - return usermem.IOSequence{ +// SpliceToNonPipe performs a splice operation from fd to a non-pipe file. +func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescription, off, count int64) (int64, error) { + fd.pipe.mu.Lock() + defer fd.pipe.mu.Unlock() + + // Cap the sequence at number of bytes actually available. + v := fd.pipe.queuedLocked() + if v < count { + count = v + } + src := usermem.IOSequence{ IO: fd, Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}), } + + var ( + n int64 + err error + ) + if off == -1 { + n, err = out.Write(ctx, src, vfs.WriteOptions{}) + } else { + n, err = out.PWrite(ctx, src, off, vfs.WriteOptions{}) + } + if n > 0 { + fd.pipe.view.TrimFront(n) + } + return n, err +} + +// SpliceFromNonPipe performs a splice operation from a non-pipe file to fd. +func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) { + fd.pipe.mu.Lock() + defer fd.pipe.mu.Unlock() + + dst := usermem.IOSequence{ + IO: fd, + Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}), + } + + if off == -1 { + return in.Read(ctx, dst, vfs.ReadOptions{}) + } + return in.PRead(ctx, dst, off, vfs.ReadOptions{}) } -// CopyIn implements usermem.IO.CopyIn. +// CopyIn implements usermem.IO.CopyIn. Note that it is the caller's +// responsibility to trim fd.pipe.view after the read is completed. func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) { origCount := int64(len(dst)) - n, err := fd.pipe.read(ctx, readOps{ + n, err := fd.pipe.readLocked(ctx, readOps{ left: func() int64 { return int64(len(dst)) }, @@ -265,7 +303,6 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, }, read: func(view *buffer.View) (int64, error) { n, err := view.ReadAt(dst, 0) - view.TrimFront(int64(n)) return int64(n), err }, }) @@ -281,7 +318,7 @@ func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, // CopyOut implements usermem.IO.CopyOut. func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) { origCount := int64(len(src)) - n, err := fd.pipe.write(ctx, writeOps{ + n, err := fd.pipe.writeLocked(ctx, writeOps{ left: func() int64 { return int64(len(src)) }, @@ -305,7 +342,7 @@ func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, // ZeroOut implements usermem.IO.ZeroOut. func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { origCount := toZero - n, err := fd.pipe.write(ctx, writeOps{ + n, err := fd.pipe.writeLocked(ctx, writeOps{ left: func() int64 { return toZero }, @@ -326,14 +363,15 @@ func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int6 return n, err } -// CopyInTo implements usermem.IO.CopyInTo. +// CopyInTo implements usermem.IO.CopyInTo. Note that it is the caller's +// responsibility to trim fd.pipe.view after the read is completed. func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { count := ars.NumBytes() if count == 0 { return 0, nil } origCount := count - n, err := fd.pipe.read(ctx, readOps{ + n, err := fd.pipe.readLocked(ctx, readOps{ left: func() int64 { return count }, @@ -342,7 +380,6 @@ func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst }, read: func(view *buffer.View) (int64, error) { n, err := view.ReadToSafememWriter(dst, uint64(count)) - view.TrimFront(int64(n)) return int64(n), err }, }) @@ -362,7 +399,7 @@ func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, return 0, nil } origCount := count - n, err := fd.pipe.write(ctx, writeOps{ + n, err := fd.pipe.writeLocked(ctx, writeOps{ left: func() int64 { return count }, diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 75bfa2c79..192411393 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -131,18 +131,14 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal case inIsPipe && outIsPipe: n, err = pipe.Splice(t, outPipeFD, inPipeFD, count) case inIsPipe: + n, err = inPipeFD.SpliceToNonPipe(t, outFile, outOffset, count) if outOffset != -1 { - n, err = outFile.PWrite(t, inPipeFD.IOSequence(count), outOffset, vfs.WriteOptions{}) outOffset += n - } else { - n, err = outFile.Write(t, inPipeFD.IOSequence(count), vfs.WriteOptions{}) } case outIsPipe: + n, err = outPipeFD.SpliceFromNonPipe(t, inFile, inOffset, count) if inOffset != -1 { - n, err = inFile.PRead(t, outPipeFD.IOSequence(count), inOffset, vfs.ReadOptions{}) inOffset += n - } else { - n, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{}) } default: panic("not possible") @@ -341,17 +337,15 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc if outIsPipe { for n < count { var spliceN int64 - if offset != -1 { - spliceN, err = inFile.PRead(t, outPipeFD.IOSequence(count), offset, vfs.ReadOptions{}) - offset += spliceN - } else { - spliceN, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{}) - } + spliceN, err = outPipeFD.SpliceFromNonPipe(t, inFile, offset, count) if spliceN == 0 && err == io.EOF { // We reached the end of the file. Eat the error and exit the loop. err = nil break } + if offset != -1 { + offset += spliceN + } n += spliceN if err == syserror.ErrWouldBlock && !nonBlock { err = dw.waitForBoth(t) @@ -371,19 +365,18 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } else { readN, err = inFile.Read(t, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) } - if readN == 0 && err == io.EOF { - // We reached the end of the file. Eat the error and exit the loop. - err = nil + if readN == 0 && err != nil { + if err == io.EOF { + // We reached the end of the file. Eat the error before exiting the loop. + err = nil + } break } n += readN - if err != nil { - break - } // Write all of the bytes that we read. This may need // multiple write calls to complete. - wbuf := buf[:n] + wbuf := buf[:readN] for len(wbuf) > 0 { var writeN int64 writeN, err = outFile.Write(t, usermem.BytesIOSequence(wbuf), vfs.WriteOptions{}) @@ -398,6 +391,10 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc notWritten := int64(len(wbuf)) n -= notWritten if offset != -1 { + // TODO(gvisor.dev/issue/3779): The inFile offset will be incorrect if we + // roll back, because it has already been advanced by the full amount. + // Merely seeking on inFile does not work, because there may be concurrent + // file operations. offset -= notWritten } break diff --git a/test/syscalls/linux/sendfile.cc b/test/syscalls/linux/sendfile.cc index 64123e904..e65387f59 100644 --- a/test/syscalls/linux/sendfile.cc +++ b/test/syscalls/linux/sendfile.cc @@ -198,7 +198,39 @@ TEST(SendFileTest, SendAndUpdateFileOffset) { EXPECT_EQ(absl::string_view(kData, kHalfDataSize), absl::string_view(actual, bytes_sent)); - // Verify that the input file offset has been updated + // Verify that the input file offset has been updated. + ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent), + SyscallSucceedsWithValue(kHalfDataSize)); + EXPECT_EQ( + absl::string_view(kData + kDataSize - bytes_sent, kDataSize - bytes_sent), + absl::string_view(actual, kHalfDataSize)); +} + +TEST(SendFileTest, SendToDevZeroAndUpdateFileOffset) { + // Create temp files. + // Test input string length must be > 2 AND even. + constexpr char kData[] = "The slings and arrows of outrageous fortune,"; + constexpr int kDataSize = sizeof(kData) - 1; + constexpr int kHalfDataSize = kDataSize / 2; + const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + GetAbsoluteTestTmpdir(), kData, TempPath::kDefaultFileMode)); + + // Open the input file as read only. + const FileDescriptor inf = + ASSERT_NO_ERRNO_AND_VALUE(Open(in_file.path(), O_RDONLY)); + + // Open /dev/zero as write only. + const FileDescriptor outf = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY)); + + // Send data and verify that sendfile returns the correct value. + int bytes_sent; + EXPECT_THAT( + bytes_sent = sendfile(outf.get(), inf.get(), nullptr, kHalfDataSize), + SyscallSucceedsWithValue(kHalfDataSize)); + + char actual[kHalfDataSize]; + // Verify that the input file offset has been updated. ASSERT_THAT(read(inf.get(), &actual, kDataSize - bytes_sent), SyscallSucceedsWithValue(kHalfDataSize)); EXPECT_EQ( @@ -250,7 +282,7 @@ TEST(SendFileTest, SendAndUpdateFileOffsetFromNonzeroStartingPoint) { EXPECT_EQ(absl::string_view(kData + kQuarterDataSize, kHalfDataSize), absl::string_view(actual, bytes_sent)); - // Verify that the input file offset has been updated + // Verify that the input file offset has been updated. ASSERT_THAT(read(inf.get(), &actual, kQuarterDataSize), SyscallSucceedsWithValue(kQuarterDataSize)); diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc index 08fc4b1b7..be3fb4840 100644 --- a/test/syscalls/linux/splice.cc +++ b/test/syscalls/linux/splice.cc @@ -342,7 +342,7 @@ TEST(SpliceTest, FromPipe) { ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), SyscallSucceedsWithValue(kPageSize)); - // Open the input file. + // Open the output file. const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); const FileDescriptor out_fd = ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR)); @@ -364,6 +364,40 @@ TEST(SpliceTest, FromPipe) { EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0); } +TEST(SpliceTest, FromPipeMultiple) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + const FileDescriptor wfd(fds[1]); + + std::string buf = "abcABC123"; + ASSERT_THAT(write(wfd.get(), buf.c_str(), buf.size()), + SyscallSucceedsWithValue(buf.size())); + + // Open the output file. + const TempPath out_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const FileDescriptor out_fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(out_file.path(), O_RDWR)); + + // Splice from the pipe to the output file over several calls. + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0), + SyscallSucceedsWithValue(3)); + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0), + SyscallSucceedsWithValue(3)); + EXPECT_THAT(splice(rfd.get(), nullptr, out_fd.get(), nullptr, 3, 0), + SyscallSucceedsWithValue(3)); + + // Reset cursor to zero so that we can check the contents. + ASSERT_THAT(lseek(out_fd.get(), 0, SEEK_SET), SyscallSucceedsWithValue(0)); + + // Contents should be equal. + std::vector<char> rbuf(buf.size()); + ASSERT_THAT(read(out_fd.get(), rbuf.data(), rbuf.size()), + SyscallSucceedsWithValue(rbuf.size())); + EXPECT_EQ(memcmp(rbuf.data(), buf.c_str(), buf.size()), 0); +} + TEST(SpliceTest, FromPipeOffset) { // Create a new pipe. int fds[2]; @@ -693,6 +727,34 @@ TEST(SpliceTest, FromPipeMaxFileSize) { EXPECT_EQ(memcmp(rbuf.data(), buf.data(), buf.size()), 0); } +TEST(SpliceTest, FromPipeToDevZero) { + // Create a new pipe. + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + const FileDescriptor rfd(fds[0]); + FileDescriptor wfd(fds[1]); + + // Fill with some random data. + std::vector<char> buf(kPageSize); + RandomizeBuffer(buf.data(), buf.size()); + ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()), + SyscallSucceedsWithValue(kPageSize)); + + const FileDescriptor zero = + ASSERT_NO_ERRNO_AND_VALUE(Open("/dev/zero", O_WRONLY)); + + // Close the write end to prevent blocking below. + wfd.reset(); + + // Splice to /dev/zero. The first call should empty the pipe, and the return + // value should not exceed the number of bytes available for reading. + EXPECT_THAT( + splice(rfd.get(), nullptr, zero.get(), nullptr, kPageSize + 123, 0), + SyscallSucceedsWithValue(kPageSize)); + EXPECT_THAT(splice(rfd.get(), nullptr, zero.get(), nullptr, 1, 0), + SyscallSucceedsWithValue(0)); +} + } // namespace } // namespace testing |