summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/syscalls/linux
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/syscalls/linux')
-rw-r--r--pkg/sentry/syscalls/linux/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go98
-rw-r--r--pkg/sentry/syscalls/linux/sys_splice.go293
4 files changed, 295 insertions, 99 deletions
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index 6e2843b36..f76989ae2 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -34,6 +34,7 @@ go_library(
"sys_shm.go",
"sys_signal.go",
"sys_socket.go",
+ "sys_splice.go",
"sys_stat.go",
"sys_sync.go",
"sys_sysinfo.go",
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 9a460ebdf..3e4d312af 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -407,7 +407,7 @@ var AMD64 = &kernel.SyscallTable{
273: syscalls.Error(syscall.ENOSYS),
// @Syscall(GetRobustList, note:Obsolete)
274: syscalls.Error(syscall.ENOSYS),
- // 275: @Syscall(Splice), TODO(b/29354098)
+ 275: Splice,
// 276: @Syscall(Tee), TODO(b/29354098)
277: SyncFileRange,
// 278: @Syscall(Vmsplice), TODO(b/29354098)
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 1764bb4b6..8a80cd430 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -15,7 +15,6 @@
package linux
import (
- "io"
"syscall"
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
@@ -2025,103 +2024,6 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, nil
}
-// Sendfile implements linux system call sendfile(2).
-func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- outFD := kdefs.FD(args[0].Int())
- inFD := kdefs.FD(args[1].Int())
- offsetAddr := args[2].Pointer()
- count := int64(args[3].SizeT())
-
- // Don't send a negative number of bytes.
- if count < 0 {
- return 0, nil, syserror.EINVAL
- }
-
- if count > int64(kernel.MAX_RW_COUNT) {
- count = int64(kernel.MAX_RW_COUNT)
- }
-
- // Get files.
- outFile := t.FDMap().GetFile(outFD)
- if outFile == nil {
- return 0, nil, syserror.EBADF
- }
- defer outFile.DecRef()
-
- inFile := t.FDMap().GetFile(inFD)
- if inFile == nil {
- return 0, nil, syserror.EBADF
- }
- defer inFile.DecRef()
-
- // Verify that the outfile is writable.
- outFlags := outFile.Flags()
- if !outFlags.Write {
- return 0, nil, syserror.EBADF
- }
-
- // Verify that the outfile Append flag is not set.
- if outFlags.Append {
- return 0, nil, syserror.EINVAL
- }
-
- // Verify that we have a regular infile.
- // http://elixir.free-electrons.com/linux/latest/source/fs/splice.c#L933
- if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) {
- return 0, nil, syserror.EINVAL
- }
-
- // Verify that the infile is readable.
- if !inFile.Flags().Read {
- return 0, nil, syserror.EBADF
- }
-
- // Setup for sending data.
- var n int64
- var err error
- w := &fs.FileWriter{t, outFile}
- hasOffset := offsetAddr != 0
- // If we have a provided offset.
- if hasOffset {
- // Verify that when offset address is not null, infile must be seekable
- if !inFile.Flags().Pread {
- return 0, nil, syserror.ESPIPE
- }
- // Copy in the offset.
- var offset int64
- if _, err := t.CopyIn(offsetAddr, &offset); err != nil {
- return 0, nil, err
- }
- if offset < 0 {
- return 0, nil, syserror.EINVAL
- }
- // Send data using Preadv.
- r := io.NewSectionReader(&fs.FileReader{t, inFile}, offset, count)
- n, err = io.Copy(w, r)
- // Copy out the new offset.
- if _, err := t.CopyOut(offsetAddr, n+offset); err != nil {
- return 0, nil, err
- }
- // If we don't have a provided offset.
- } else {
- // Send data using readv.
- inOff := inFile.Offset()
- r := &io.LimitedReader{R: &fs.FileReader{t, inFile}, N: count}
- n, err = io.Copy(w, r)
- inOff += n
- if inFile.Offset() != inOff {
- // Adjust file position in case more bytes were read than written.
- if _, err := inFile.Seek(t, fs.SeekSet, inOff); err != nil {
- return 0, nil, syserror.EIO
- }
- }
- }
-
- // We can only pass a single file to handleIOError, so pick inFile
- // arbitrarily.
- return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "sendfile", inFile)
-}
-
const (
memfdPrefix = "/memfd:"
memfdAllFlags = uint32(linux.MFD_CLOEXEC | linux.MFD_ALLOW_SEALING)
diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go
new file mode 100644
index 000000000..37303606f
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/sys_splice.go
@@ -0,0 +1,293 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+// doSplice implements a blocking splice operation.
+func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) {
+ var (
+ total int64
+ n int64
+ err error
+ ch chan struct{}
+ inW bool
+ outW bool
+ )
+ for opts.Length > 0 {
+ n, err = fs.Splice(t, outFile, inFile, opts)
+ opts.Length -= n
+ total += n
+ if err != syserror.ErrWouldBlock {
+ break
+ } else if err == syserror.ErrWouldBlock && nonBlocking {
+ break
+ }
+
+ // Are we a registered waiter?
+ if ch == nil {
+ ch = make(chan struct{}, 1)
+ }
+ if !inW && inFile.Readiness(EventMaskRead) == 0 && !inFile.Flags().NonBlocking {
+ w, _ := waiter.NewChannelEntry(ch)
+ inFile.EventRegister(&w, EventMaskRead)
+ defer inFile.EventUnregister(&w)
+ inW = true // Registered.
+ } else if !outW && outFile.Readiness(EventMaskWrite) == 0 && !outFile.Flags().NonBlocking {
+ w, _ := waiter.NewChannelEntry(ch)
+ outFile.EventRegister(&w, EventMaskWrite)
+ defer outFile.EventUnregister(&w)
+ outW = true // Registered.
+ }
+
+ // Was anything registered? If no, everything is non-blocking.
+ if !inW && !outW {
+ break
+ }
+
+ // Block until there's data.
+ if err = t.Block(ch); err != nil {
+ break
+ }
+ }
+
+ return total, err
+}
+
+// Sendfile implements linux system call sendfile(2).
+func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ outFD := kdefs.FD(args[0].Int())
+ inFD := kdefs.FD(args[1].Int())
+ offsetAddr := args[2].Pointer()
+ count := int64(args[3].SizeT())
+
+ // Don't send a negative number of bytes.
+ if count < 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Get files.
+ outFile := t.FDMap().GetFile(outFD)
+ if outFile == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer outFile.DecRef()
+
+ inFile := t.FDMap().GetFile(inFD)
+ if inFile == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer inFile.DecRef()
+
+ // Verify that the outfile Append flag is not set. Note that fs.Splice
+ // itself validates that the output file is writable.
+ if outFile.Flags().Append {
+ return 0, nil, syserror.EBADF
+ }
+
+ // Verify that we have a regular infile. This is a requirement; the
+ // same check appears in Linux (fs/splice.c:splice_direct_to_actor).
+ if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) {
+ return 0, nil, syserror.EINVAL
+ }
+
+ var (
+ n int64
+ err error
+ )
+ if offsetAddr != 0 {
+ // Verify that when offset address is not null, infile must be
+ // seekable. The fs.Splice routine itself validates basic read.
+ if !inFile.Flags().Pread {
+ return 0, nil, syserror.ESPIPE
+ }
+
+ // Copy in the offset.
+ var offset int64
+ if _, err := t.CopyIn(offsetAddr, &offset); err != nil {
+ return 0, nil, err
+ }
+
+ // The offset must be valid.
+ if offset < 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Do the splice.
+ n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{
+ Length: count,
+ SrcOffset: true,
+ SrcStart: offset,
+ }, false)
+
+ // Copy out the new offset.
+ if _, err := t.CopyOut(offsetAddr, n+offset); err != nil {
+ return 0, nil, err
+ }
+ } else {
+ // Send data using splice.
+ n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{
+ Length: count,
+ }, false)
+ }
+
+ // We can only pass a single file to handleIOError, so pick inFile
+ // arbitrarily. This is used only for debugging purposes.
+ return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "sendfile", inFile)
+}
+
+// Splice implements splice(2).
+func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ inFD := kdefs.FD(args[0].Int())
+ inOffset := args[1].Pointer()
+ outFD := kdefs.FD(args[2].Int())
+ outOffset := args[3].Pointer()
+ count := int64(args[4].SizeT())
+ flags := args[5].Int()
+
+ // Check for invalid flags.
+ if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Only non-blocking is meaningful. Note that unlike in Linux, this
+ // flag is applied consistently. We will have either fully blocking or
+ // non-blocking behavior below, regardless of the underlying files
+ // being spliced to. It's unclear if this is a bug or not yet.
+ nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0
+
+ // Get files.
+ outFile := t.FDMap().GetFile(outFD)
+ if outFile == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer outFile.DecRef()
+
+ inFile := t.FDMap().GetFile(inFD)
+ if inFile == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer inFile.DecRef()
+
+ // Construct our options.
+ //
+ // Note that exactly one of the underlying buffers must be a pipe. We
+ // don't actually have this constraint internally, but we enforce it
+ // for the semantics of the call.
+ opts := fs.SpliceOpts{
+ Length: count,
+ }
+ switch {
+ case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && !fs.IsPipe(outFile.Dirent.Inode.StableAttr):
+ if inOffset != 0 {
+ return 0, nil, syserror.ESPIPE
+ }
+ if outOffset != 0 {
+ var offset int64
+ if _, err := t.CopyIn(outOffset, &offset); err != nil {
+ return 0, nil, err
+ }
+ // Use the destination offset.
+ opts.DstOffset = true
+ opts.DstStart = offset
+ }
+ case !fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr):
+ if outOffset != 0 {
+ return 0, nil, syserror.ESPIPE
+ }
+ if inOffset != 0 {
+ var offset int64
+ if _, err := t.CopyIn(inOffset, &offset); err != nil {
+ return 0, nil, err
+ }
+ // Use the source offset.
+ opts.SrcOffset = true
+ opts.SrcStart = offset
+ }
+ case fs.IsPipe(inFile.Dirent.Inode.StableAttr) && fs.IsPipe(outFile.Dirent.Inode.StableAttr):
+ if inOffset != 0 || outOffset != 0 {
+ return 0, nil, syserror.ESPIPE
+ }
+ default:
+ return 0, nil, syserror.EINVAL
+ }
+
+ // We may not refer to the same pipe; otherwise it's a continuous loop.
+ if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Splice data.
+ n, err := doSplice(t, outFile, inFile, opts, nonBlocking)
+
+ // See above; inFile is chosen arbitrarily here.
+ return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "splice", inFile)
+}
+
+// Tee imlements tee(2).
+func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ inFD := kdefs.FD(args[0].Int())
+ outFD := kdefs.FD(args[1].Int())
+ count := int64(args[2].SizeT())
+ flags := args[3].Int()
+
+ // Check for invalid flags.
+ if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Only non-blocking is meaningful.
+ nonBlocking := (flags & linux.SPLICE_F_NONBLOCK) != 0
+
+ // Get files.
+ outFile := t.FDMap().GetFile(outFD)
+ if outFile == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer outFile.DecRef()
+
+ inFile := t.FDMap().GetFile(inFD)
+ if inFile == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer inFile.DecRef()
+
+ // All files must be pipes.
+ if !fs.IsPipe(inFile.Dirent.Inode.StableAttr) || !fs.IsPipe(outFile.Dirent.Inode.StableAttr) {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // We may not refer to the same pipe; see above.
+ if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Splice data.
+ n, err := doSplice(t, outFile, inFile, fs.SpliceOpts{
+ Length: count,
+ Dup: true,
+ }, nonBlocking)
+
+ // See above; inFile is chosen arbitrarily here.
+ return uintptr(n), nil, handleIOError(t, n != 0, err, kernel.ERESTARTSYS, "tee", inFile)
+}