diff options
author | Adin Scannell <ascannell@google.com> | 2019-05-21 15:17:05 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-05-21 15:18:12 -0700 |
commit | 9cdae51feca5cee9faa198161b92a0aeece52d6c (patch) | |
tree | 9a2a22b2e2c99f70505d9d8700bdc8dcac06e11d /pkg/sentry/syscalls | |
parent | adeb99709bda40e62363b229464b8ae3a90e237b (diff) |
Add basic plumbing for splice and stub implementation.
This does not actually implement an efficient splice or sendfile. Rather, it
adds a generic plumbing to the file internals so that this can be added. All
file implementations use the stub fileutil.NoSplice implementation, which
causes sendfile and splice to fall back to an internal copy.
A basic splice system call interface is added, along with a test.
PiperOrigin-RevId: 249335960
Change-Id: Ic5568be2af0a505c19e7aec66d5af2480ab0939b
Diffstat (limited to 'pkg/sentry/syscalls')
-rw-r--r-- | pkg/sentry/syscalls/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/linux64.go | 2 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_file.go | 98 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_splice.go | 293 |
4 files changed, 295 insertions, 99 deletions
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 6e2843b36..f76989ae2 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -34,6 +34,7 @@ go_library( "sys_shm.go", "sys_signal.go", "sys_socket.go", + "sys_splice.go", "sys_stat.go", "sys_sync.go", "sys_sysinfo.go", diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 9a460ebdf..3e4d312af 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -407,7 +407,7 @@ var AMD64 = &kernel.SyscallTable{ 273: syscalls.Error(syscall.ENOSYS), // @Syscall(GetRobustList, note:Obsolete) 274: syscalls.Error(syscall.ENOSYS), - // 275: @Syscall(Splice), TODO(b/29354098) + 275: Splice, // 276: @Syscall(Tee), TODO(b/29354098) 277: SyncFileRange, // 278: @Syscall(Vmsplice), TODO(b/29354098) diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 1764bb4b6..8a80cd430 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -15,7 +15,6 @@ package linux import ( - "io" "syscall" "gvisor.googlesource.com/gvisor/pkg/abi/linux" @@ -2025,103 +2024,6 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, nil } -// Sendfile implements linux system call sendfile(2). -func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - outFD := kdefs.FD(args[0].Int()) - inFD := kdefs.FD(args[1].Int()) - offsetAddr := args[2].Pointer() - count := int64(args[3].SizeT()) - - // Don't send a negative number of bytes. - if count < 0 { - return 0, nil, syserror.EINVAL - } - - if count > int64(kernel.MAX_RW_COUNT) { - count = int64(kernel.MAX_RW_COUNT) - } - - // Get files. - outFile := t.FDMap().GetFile(outFD) - if outFile == nil { - return 0, nil, syserror.EBADF - } - defer outFile.DecRef() - - inFile := t.FDMap().GetFile(inFD) - if inFile == nil { - return 0, nil, syserror.EBADF - } - defer inFile.DecRef() - - // Verify that the outfile is writable. - outFlags := outFile.Flags() - if !outFlags.Write { - return 0, nil, syserror.EBADF - } - - // Verify that the outfile Append flag is not set. - if outFlags.Append { - return 0, nil, syserror.EINVAL - } - - // Verify that we have a regular infile. - // http://elixir.free-electrons.com/linux/latest/source/fs/splice.c#L933 - if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) { - return 0, nil, syserror.EINVAL - } - - // Verify that the infile is readable. - if !inFile.Flags().Read { - return 0, nil, syserror.EBADF - } - - // Setup for sending data. - var n int64 - var err error - w := &fs.FileWriter{t, outFile} - hasOffset := offsetAddr != 0 - // If we have a provided offset. - if hasOffset { - // Verify that when offset address is not null, infile must be seekable - if !inFile.Flags().Pread { - return 0, nil, syserror.ESPIPE - } - // Copy in the offset. - var offset int64 - if _, err := t.CopyIn(offsetAddr, &offset); err != nil { - return 0, nil, err - } - if offset < 0 { - return 0, nil, syserror.EINVAL - } - // Send data using Preadv. - r := io.NewSectionReader(&fs.FileReader{t, inFile}, offset, count) - n, err = io.Copy(w, r) - // Copy out the new offset. - if _, err := t.CopyOut(offsetAddr, n+offset); err != nil { - return 0, nil, err - } - // If we don't have a provided offset. - } else { - // Send data using readv. - inOff := inFile.Offset() - r := &io.LimitedReader{R: &fs.FileReader{t, inFile}, N: count} - n, err = io.Copy(w, r) - inOff += n - if inFile.Offset() != inOff { - // Adjust file position in case more bytes were read than written. - if _, err := inFile.Seek(t, fs.SeekSet, inOff); err != nil { - return 0, nil, syserror.EIO - } - } - } - - // We can only pass a single file to handleIOError, so pick inFile - // arbitrarily. - return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "sendfile", inFile) -} - const ( memfdPrefix = "/memfd:" memfdAllFlags = uint32(linux.MFD_CLOEXEC | linux.MFD_ALLOW_SEALING) diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go new file mode 100644 index 000000000..37303606f --- /dev/null +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -0,0 +1,293 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// doSplice implements a blocking splice operation. +func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) { + var ( + total int64 + n int64 + err error + ch chan struct{} + inW bool + outW bool + ) + for opts.Length > 0 { + n, err = fs.Splice(t, outFile, inFile, opts) + opts.Length -= n + total += n + if err != syserror.ErrWouldBlock { + break + } else if err == syserror.ErrWouldBlock && nonBlocking { + break + } + + // Are we a registered waiter? + if ch == nil { + ch = make(chan struct{}, 1) + } + if !inW && inFile.Readiness(EventMaskRead) == 0 && !inFile.Flags().NonBlocking { + w, _ := waiter.NewChannelEntry(ch) + inFile.EventRegister(&w, EventMaskRead) + defer inFile.EventUnregister(&w) + inW = true // Registered. + } else if !outW && outFile.Readiness(EventMaskWrite) == 0 && !outFile.Flags().NonBlocking { + w, _ := waiter.NewChannelEntry(ch) + outFile.EventRegister(&w, EventMaskWrite) + defer outFile.EventUnregister(&w) + outW = true // Registered. + } + + // Was anything registered? If no, everything is non-blocking. + if !inW && !outW { + break + } + + // Block until there's data. + if err = t.Block(ch); err != nil { + break + } + } + + return total, err +} + +// Sendfile implements linux system call sendfile(2). +func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + outFD := kdefs.FD(args[0].Int()) + inFD := kdefs.FD(args[1].Int()) + offsetAddr := args[2].Pointer() + count := int64(args[3].SizeT()) + + // Don't send a negative number of bytes. + if count < 0 { + return 0, nil, syserror.EINVAL + } + + // Get files. + outFile := t.FDMap().GetFile(outFD) + if outFile == nil { + return 0, nil, syserror.EBADF + } + defer outFile.DecRef() + + inFile := t.FDMap().GetFile(inFD) + if inFile == nil { + return 0, nil, syserror.EBADF + } + defer inFile.DecRef() + + // Verify that the outfile Append flag is not set. Note that fs.Splice + // itself validates that the output file is writable. + if outFile.Flags().Append { + return 0, nil, syserror.EBADF + } + + // Verify that we have a regular infile. This is a requirement; the + // same check appears in Linux (fs/splice.c:splice_direct_to_actor). + if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) { + return 0, nil, syserror.EINVAL + } + + var ( + n int64 + err error + ) + if offsetAddr != 0 { + // Verify that when offset address is not null, infile must be + // seekable. The fs.Splice routine itself validates basic read. + if !inFile.Flags().Pread { + return 0, nil, syserror.ESPIPE + } + + // Copy in the offset. + var offset int64 + if _, err := t.CopyIn(offsetAddr, &offset); err != nil { + return 0, nil, err + } + + // The offset must be valid. + if offset < 0 { + return 0, nil, syserror.EINVAL + } + + // Do the splice. + n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{ + Length: count, + SrcOffset: true, + SrcStart: offset, + }, false) + + // Copy out the new offset. + if _, err := t.CopyOut(offsetAddr, n+offset); err != nil { + return 0, nil, err + } + } else { + // Send data using splice. + n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{ + Length: count, + }, false) + } + + // We can only pass a single file to handleIOError, so pick inFile + // arbitrarily. This is used only for debugging purposes. + return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "sendfile", inFile) +} + +// Splice implements splice(2). +func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + inFD := kdefs.FD(args[0].Int()) + inOffset := args[1].Pointer() + outFD := kdefs.FD(args[2].Int()) + outOffset := args[3].Pointer() + count := int64(args[4].SizeT()) + flags := args[5].Int() + + // Check for invalid flags. + if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { + return 0, nil, syserror.EINVAL + } + + // Only non-blocking is meaningful. Note that unlike in Linux, this + // flag is applied consistently. We will have either fully blocking or + // non-blocking behavior below, regardless of the underlying files + // being spliced to. It's unclear if this is a bug or not yet. + nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0 + + // Get files. + outFile := t.FDMap().GetFile(outFD) + if outFile == nil { + return 0, nil, syserror.EBADF + } + defer outFile.DecRef() + + inFile := t.FDMap().GetFile(inFD) + if inFile == nil { + return 0, nil, syserror.EBADF + } + defer inFile.DecRef() + + // Construct our options. + // + // Note that exactly one of the underlying buffers must be a pipe. We + // don't actually have this constraint internally, but we enforce it + // for the semantics of the call. + opts := fs.SpliceOpts{ + Length: count, + } + switch { + case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && !fs.IsPipe(outFile.Dirent.Inode.StableAttr): + if inOffset != 0 { + return 0, nil, syserror.ESPIPE + } + if outOffset != 0 { + var offset int64 + if _, err := t.CopyIn(outOffset, &offset); err != nil { + return 0, nil, err + } + // Use the destination offset. + opts.DstOffset = true + opts.DstStart = offset + } + case !fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr): + if outOffset != 0 { + return 0, nil, syserror.ESPIPE + } + if inOffset != 0 { + var offset int64 + if _, err := t.CopyIn(inOffset, &offset); err != nil { + return 0, nil, err + } + // Use the source offset. + opts.SrcOffset = true + opts.SrcStart = offset + } + case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr): + if inOffset != 0 || outOffset != 0 { + return 0, nil, syserror.ESPIPE + } + default: + return 0, nil, syserror.EINVAL + } + + // We may not refer to the same pipe; otherwise it's a continuous loop. + if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID { + return 0, nil, syserror.EINVAL + } + + // Splice data. + n, err := doSplice(t, outFile, inFile, opts, nonBlocking) + + // See above; inFile is chosen arbitrarily here. + return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "splice", inFile) +} + +// Tee imlements tee(2). +func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + inFD := kdefs.FD(args[0].Int()) + outFD := kdefs.FD(args[1].Int()) + count := int64(args[2].SizeT()) + flags := args[3].Int() + + // Check for invalid flags. + if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { + return 0, nil, syserror.EINVAL + } + + // Only non-blocking is meaningful. + nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0 + + // Get files. + outFile := t.FDMap().GetFile(outFD) + if outFile == nil { + return 0, nil, syserror.EBADF + } + defer outFile.DecRef() + + inFile := t.FDMap().GetFile(inFD) + if inFile == nil { + return 0, nil, syserror.EBADF + } + defer inFile.DecRef() + + // All files must be pipes. + if !fs.IsPipe(inFile.Dirent.Inode.StableAttr) || !fs.IsPipe(outFile.Dirent.Inode.StableAttr) { + return 0, nil, syserror.EINVAL + } + + // We may not refer to the same pipe; see above. + if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID { + return 0, nil, syserror.EINVAL + } + + // Splice data. + n, err := doSplice(t, outFile, inFile, fs.SpliceOpts{ + Length: count, + Dup: true, + }, nonBlocking) + + // See above; inFile is chosen arbitrarily here. + return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "tee", inFile) +} |