diff options
Diffstat (limited to 'pkg/sentry/syscalls/linux')
-rw-r--r-- | pkg/sentry/syscalls/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/linux64.go | 2 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_file.go | 98 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_splice.go | 293 |
4 files changed, 295 insertions, 99 deletions
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 6e2843b36..f76989ae2 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -34,6 +34,7 @@ go_library( "sys_shm.go", "sys_signal.go", "sys_socket.go", + "sys_splice.go", "sys_stat.go", "sys_sync.go", "sys_sysinfo.go", diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 9a460ebdf..3e4d312af 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -407,7 +407,7 @@ var AMD64 = &kernel.SyscallTable{ 273: syscalls.Error(syscall.ENOSYS), // @Syscall(GetRobustList, note:Obsolete) 274: syscalls.Error(syscall.ENOSYS), - // 275: @Syscall(Splice), TODO(b/29354098) + 275: Splice, // 276: @Syscall(Tee), TODO(b/29354098) 277: SyncFileRange, // 278: @Syscall(Vmsplice), TODO(b/29354098) diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index 1764bb4b6..8a80cd430 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -15,7 +15,6 @@ package linux import ( - "io" "syscall" "gvisor.googlesource.com/gvisor/pkg/abi/linux" @@ -2025,103 +2024,6 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, nil } -// Sendfile implements linux system call sendfile(2). -func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - outFD := kdefs.FD(args[0].Int()) - inFD := kdefs.FD(args[1].Int()) - offsetAddr := args[2].Pointer() - count := int64(args[3].SizeT()) - - // Don't send a negative number of bytes. - if count < 0 { - return 0, nil, syserror.EINVAL - } - - if count > int64(kernel.MAX_RW_COUNT) { - count = int64(kernel.MAX_RW_COUNT) - } - - // Get files. - outFile := t.FDMap().GetFile(outFD) - if outFile == nil { - return 0, nil, syserror.EBADF - } - defer outFile.DecRef() - - inFile := t.FDMap().GetFile(inFD) - if inFile == nil { - return 0, nil, syserror.EBADF - } - defer inFile.DecRef() - - // Verify that the outfile is writable. - outFlags := outFile.Flags() - if !outFlags.Write { - return 0, nil, syserror.EBADF - } - - // Verify that the outfile Append flag is not set. - if outFlags.Append { - return 0, nil, syserror.EINVAL - } - - // Verify that we have a regular infile. - // http://elixir.free-electrons.com/linux/latest/source/fs/splice.c#L933 - if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) { - return 0, nil, syserror.EINVAL - } - - // Verify that the infile is readable. - if !inFile.Flags().Read { - return 0, nil, syserror.EBADF - } - - // Setup for sending data. - var n int64 - var err error - w := &fs.FileWriter{t, outFile} - hasOffset := offsetAddr != 0 - // If we have a provided offset. - if hasOffset { - // Verify that when offset address is not null, infile must be seekable - if !inFile.Flags().Pread { - return 0, nil, syserror.ESPIPE - } - // Copy in the offset. - var offset int64 - if _, err := t.CopyIn(offsetAddr, &offset); err != nil { - return 0, nil, err - } - if offset < 0 { - return 0, nil, syserror.EINVAL - } - // Send data using Preadv. - r := io.NewSectionReader(&fs.FileReader{t, inFile}, offset, count) - n, err = io.Copy(w, r) - // Copy out the new offset. - if _, err := t.CopyOut(offsetAddr, n+offset); err != nil { - return 0, nil, err - } - // If we don't have a provided offset. - } else { - // Send data using readv. - inOff := inFile.Offset() - r := &io.LimitedReader{R: &fs.FileReader{t, inFile}, N: count} - n, err = io.Copy(w, r) - inOff += n - if inFile.Offset() != inOff { - // Adjust file position in case more bytes were read than written. - if _, err := inFile.Seek(t, fs.SeekSet, inOff); err != nil { - return 0, nil, syserror.EIO - } - } - } - - // We can only pass a single file to handleIOError, so pick inFile - // arbitrarily. - return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "sendfile", inFile) -} - const ( memfdPrefix = "/memfd:" memfdAllFlags = uint32(linux.MFD_CLOEXEC | linux.MFD_ALLOW_SEALING) diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go new file mode 100644 index 000000000..37303606f --- /dev/null +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -0,0 +1,293 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// doSplice implements a blocking splice operation. +func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) { + var ( + total int64 + n int64 + err error + ch chan struct{} + inW bool + outW bool + ) + for opts.Length > 0 { + n, err = fs.Splice(t, outFile, inFile, opts) + opts.Length -= n + total += n + if err != syserror.ErrWouldBlock { + break + } else if err == syserror.ErrWouldBlock && nonBlocking { + break + } + + // Are we a registered waiter? + if ch == nil { + ch = make(chan struct{}, 1) + } + if !inW && inFile.Readiness(EventMaskRead) == 0 && !inFile.Flags().NonBlocking { + w, _ := waiter.NewChannelEntry(ch) + inFile.EventRegister(&w, EventMaskRead) + defer inFile.EventUnregister(&w) + inW = true // Registered. + } else if !outW && outFile.Readiness(EventMaskWrite) == 0 && !outFile.Flags().NonBlocking { + w, _ := waiter.NewChannelEntry(ch) + outFile.EventRegister(&w, EventMaskWrite) + defer outFile.EventUnregister(&w) + outW = true // Registered. + } + + // Was anything registered? If no, everything is non-blocking. + if !inW && !outW { + break + } + + // Block until there's data. + if err = t.Block(ch); err != nil { + break + } + } + + return total, err +} + +// Sendfile implements linux system call sendfile(2). +func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + outFD := kdefs.FD(args[0].Int()) + inFD := kdefs.FD(args[1].Int()) + offsetAddr := args[2].Pointer() + count := int64(args[3].SizeT()) + + // Don't send a negative number of bytes. + if count < 0 { + return 0, nil, syserror.EINVAL + } + + // Get files. + outFile := t.FDMap().GetFile(outFD) + if outFile == nil { + return 0, nil, syserror.EBADF + } + defer outFile.DecRef() + + inFile := t.FDMap().GetFile(inFD) + if inFile == nil { + return 0, nil, syserror.EBADF + } + defer inFile.DecRef() + + // Verify that the outfile Append flag is not set. Note that fs.Splice + // itself validates that the output file is writable. + if outFile.Flags().Append { + return 0, nil, syserror.EBADF + } + + // Verify that we have a regular infile. This is a requirement; the + // same check appears in Linux (fs/splice.c:splice_direct_to_actor). + if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) { + return 0, nil, syserror.EINVAL + } + + var ( + n int64 + err error + ) + if offsetAddr != 0 { + // Verify that when offset address is not null, infile must be + // seekable. The fs.Splice routine itself validates basic read. + if !inFile.Flags().Pread { + return 0, nil, syserror.ESPIPE + } + + // Copy in the offset. + var offset int64 + if _, err := t.CopyIn(offsetAddr, &offset); err != nil { + return 0, nil, err + } + + // The offset must be valid. + if offset < 0 { + return 0, nil, syserror.EINVAL + } + + // Do the splice. + n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{ + Length: count, + SrcOffset: true, + SrcStart: offset, + }, false) + + // Copy out the new offset. + if _, err := t.CopyOut(offsetAddr, n+offset); err != nil { + return 0, nil, err + } + } else { + // Send data using splice. + n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{ + Length: count, + }, false) + } + + // We can only pass a single file to handleIOError, so pick inFile + // arbitrarily. This is used only for debugging purposes. + return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "sendfile", inFile) +} + +// Splice implements splice(2). +func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + inFD := kdefs.FD(args[0].Int()) + inOffset := args[1].Pointer() + outFD := kdefs.FD(args[2].Int()) + outOffset := args[3].Pointer() + count := int64(args[4].SizeT()) + flags := args[5].Int() + + // Check for invalid flags. + if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { + return 0, nil, syserror.EINVAL + } + + // Only non-blocking is meaningful. Note that unlike in Linux, this + // flag is applied consistently. We will have either fully blocking or + // non-blocking behavior below, regardless of the underlying files + // being spliced to. It's unclear if this is a bug or not yet. + nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0 + + // Get files. + outFile := t.FDMap().GetFile(outFD) + if outFile == nil { + return 0, nil, syserror.EBADF + } + defer outFile.DecRef() + + inFile := t.FDMap().GetFile(inFD) + if inFile == nil { + return 0, nil, syserror.EBADF + } + defer inFile.DecRef() + + // Construct our options. + // + // Note that exactly one of the underlying buffers must be a pipe. We + // don't actually have this constraint internally, but we enforce it + // for the semantics of the call. + opts := fs.SpliceOpts{ + Length: count, + } + switch { + case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && !fs.IsPipe(outFile.Dirent.Inode.StableAttr): + if inOffset != 0 { + return 0, nil, syserror.ESPIPE + } + if outOffset != 0 { + var offset int64 + if _, err := t.CopyIn(outOffset, &offset); err != nil { + return 0, nil, err + } + // Use the destination offset. + opts.DstOffset = true + opts.DstStart = offset + } + case !fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr): + if outOffset != 0 { + return 0, nil, syserror.ESPIPE + } + if inOffset != 0 { + var offset int64 + if _, err := t.CopyIn(inOffset, &offset); err != nil { + return 0, nil, err + } + // Use the source offset. + opts.SrcOffset = true + opts.SrcStart = offset + } + case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr): + if inOffset != 0 || outOffset != 0 { + return 0, nil, syserror.ESPIPE + } + default: + return 0, nil, syserror.EINVAL + } + + // We may not refer to the same pipe; otherwise it's a continuous loop. + if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID { + return 0, nil, syserror.EINVAL + } + + // Splice data. + n, err := doSplice(t, outFile, inFile, opts, nonBlocking) + + // See above; inFile is chosen arbitrarily here. + return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "splice", inFile) +} + +// Tee imlements tee(2). +func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + inFD := kdefs.FD(args[0].Int()) + outFD := kdefs.FD(args[1].Int()) + count := int64(args[2].SizeT()) + flags := args[3].Int() + + // Check for invalid flags. + if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { + return 0, nil, syserror.EINVAL + } + + // Only non-blocking is meaningful. + nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0 + + // Get files. + outFile := t.FDMap().GetFile(outFD) + if outFile == nil { + return 0, nil, syserror.EBADF + } + defer outFile.DecRef() + + inFile := t.FDMap().GetFile(inFD) + if inFile == nil { + return 0, nil, syserror.EBADF + } + defer inFile.DecRef() + + // All files must be pipes. + if !fs.IsPipe(inFile.Dirent.Inode.StableAttr) || !fs.IsPipe(outFile.Dirent.Inode.StableAttr) { + return 0, nil, syserror.EINVAL + } + + // We may not refer to the same pipe; see above. + if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID { + return 0, nil, syserror.EINVAL + } + + // Splice data. + n, err := doSplice(t, outFile, inFile, fs.SpliceOpts{ + Length: count, + Dup: true, + }, nonBlocking) + + // See above; inFile is chosen arbitrarily here. + return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "tee", inFile) +} |