summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJustine Olshan <justineolshan@google.com>2018-08-10 16:09:52 -0700
committerShentubot <shentubot@google.com>2018-08-10 16:11:01 -0700
commitae6f092fe117a738df34e072ef5ba01a41c89222 (patch)
tree24f8492ac032e39b2a2908f7e1383224b34f6b64
parent36c940b093af58d02eb6e7fd186f14cce84a8dd9 (diff)
Implemented the splice(2) syscall.
Currently the implementation matches the behavior of moving data between two file descriptors. However, it does not implement this through zero-copy movement. Thus, this code is a starting point to build the more complex implementation. PiperOrigin-RevId: 208284483 Change-Id: Ibde79520a3d50bc26aead7ad4f128d2be31db14e
-rw-r--r--pkg/abi/linux/BUILD1
-rw-r--r--pkg/abi/linux/splice.go20
-rw-r--r--pkg/sentry/syscalls/linux/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_splice.go260
5 files changed, 283 insertions, 1 deletions
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD
index ac4ceefbc..9a44c2042 100644
--- a/pkg/abi/linux/BUILD
+++ b/pkg/abi/linux/BUILD
@@ -43,6 +43,7 @@ go_library(
"shm.go",
"signal.go",
"socket.go",
+ "splice.go",
"time.go",
"tty.go",
"uio.go",
diff --git a/pkg/abi/linux/splice.go b/pkg/abi/linux/splice.go
new file mode 100644
index 000000000..9331ec84b
--- /dev/null
+++ b/pkg/abi/linux/splice.go
@@ -0,0 +1,20 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// Flags for splice(2).
+const (
+ SPLICE_F_NONBLOCK = 2
+)
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index bbdfad9da..62423c0fa 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -34,6 +34,7 @@ go_library(
"sys_shm.go",
"sys_signal.go",
"sys_socket.go",
+ "sys_splice.go",
"sys_stat.go",
"sys_sync.go",
"sys_sysinfo.go",
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index c102af101..485c96202 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -319,7 +319,7 @@ var AMD64 = &kernel.SyscallTable{
272: Unshare,
273: syscalls.Error(syscall.ENOSYS), // SetRobustList, obsolete
274: syscalls.Error(syscall.ENOSYS), // GetRobustList, obsolete
- // 275: Splice, TODO
+ 275: Splice,
// 276: Tee, TODO
// 277: SyncFileRange, TODO
// 278: Vmsplice, TODO
diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go
new file mode 100644
index 000000000..8151e3599
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/sys_splice.go
@@ -0,0 +1,260 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "io"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/kdefs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/pipe"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// Splice implements linux syscall splice(2).
+func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fdIn := kdefs.FD(args[0].Int())
+ offIn := args[1].Pointer()
+ fdOut := kdefs.FD(args[2].Int())
+ offOut := args[3].Pointer()
+ size := int64(args[4].SizeT())
+ flags := uint(args[5].Uint())
+
+ fileIn := t.FDMap().GetFile(fdIn)
+ if fileIn == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer fileIn.DecRef()
+ fileOut := t.FDMap().GetFile(fdOut)
+ if fileOut == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer fileOut.DecRef()
+
+ // Check for whether we have pipes.
+ ipipe := fs.IsPipe(fileIn.Dirent.Inode.StableAttr)
+ opipe := fs.IsPipe(fileOut.Dirent.Inode.StableAttr)
+ if (ipipe && offIn != 0) || (opipe && offOut != 0) {
+ return 0, nil, syserror.ESPIPE
+ }
+
+ // Check if both file descriptors are pipes.
+ if ipipe && opipe {
+ var readPipe *pipe.Pipe
+ switch p := fileIn.FileOperations.(type) {
+ case *pipe.Reader:
+ readPipe = p.ReaderWriter.Pipe
+ case *pipe.ReaderWriter:
+ readPipe = p.Pipe
+ default:
+ return 0, nil, syserror.EBADF
+ }
+ var writePipe *pipe.Pipe
+ switch p := fileOut.FileOperations.(type) {
+ case *pipe.Writer:
+ writePipe = p.ReaderWriter.Pipe
+ case *pipe.ReaderWriter:
+ writePipe = p.Pipe
+ default:
+ return 0, nil, syserror.EBADF
+ }
+
+ // Splicing with two ends of the same pipe is not allowed.
+ if readPipe == writePipe {
+ return 0, nil, syserror.EINVAL
+ }
+ spliced, err := splicePipeToPipe(t, fileIn, fileOut, size, flags)
+ if err != nil {
+ return 0, nil, err
+ }
+ return uintptr(spliced), nil, nil
+ }
+
+ // Check if the file descriptor that contains the data to move is a pipe.
+ if ipipe {
+ flagsOut := fileOut.Flags()
+ offset := uint64(fileOut.Offset())
+
+ // If there is an offset for the file, ensure the file has the Pwrite flag.
+ if offOut != 0 {
+ if !flagsOut.Pwrite {
+ return 0, nil, syserror.EINVAL
+ }
+ if _, err := t.CopyIn(offOut, &offset); err != nil {
+ return 0, nil, err
+ }
+ }
+
+ if !flagsOut.Write {
+ return 0, nil, syserror.EBADF
+ }
+
+ if flagsOut.Append {
+ return 0, nil, syserror.EINVAL
+ }
+
+ switch fileIn.FileOperations.(type) {
+ case *pipe.Reader, *pipe.ReaderWriter:
+ // If the pipe in is a Reader or ReaderWriter, we can continue.
+ default:
+ return 0, nil, syserror.EBADF
+ }
+ spliced, err := spliceWrite(t, fileIn, fileOut, size, offset, flags)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // Make sure value that offset points to is updated.
+ if offOut == 0 {
+ fileOut.Seek(t, fs.SeekSet, spliced+int64(offset))
+ } else if _, err := t.CopyOut(offOut, spliced+int64(offset)); err != nil {
+ return 0, nil, err
+ }
+ return uintptr(spliced), nil, nil
+ }
+
+ // Check if the file descriptor that the data will be moved to is a pipe.
+ if opipe {
+ flagsIn := fileIn.Flags()
+ offset := uint64(fileIn.Offset())
+
+ // If there is an offset for the file, ensure the file has the Pread flag.
+ if offIn != 0 {
+ if !flagsIn.Pread {
+ return 0, nil, syserror.EINVAL
+ }
+ if _, err := t.CopyIn(offIn, &offset); err != nil {
+ return 0, nil, err
+ }
+ }
+
+ if !flagsIn.Read {
+ return 0, nil, syserror.EBADF
+ }
+
+ switch fileOut.FileOperations.(type) {
+ case *pipe.Writer, *pipe.ReaderWriter:
+ // If the pipe out is a Writer or ReaderWriter, we can continue.
+ default:
+ return 0, nil, syserror.EBADF
+ }
+ spliced, err := spliceRead(t, fileIn, fileOut, size, offset, flags)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // Make sure value that offset points to is updated.
+ if offIn == 0 {
+ fileOut.Seek(t, fs.SeekSet, spliced+int64(offset))
+ } else if _, err := t.CopyOut(offIn, spliced+int64(offset)); err != nil {
+ return 0, nil, err
+ }
+ return uintptr(spliced), nil, nil
+ }
+
+ // Splice requires one of the file descriptors to be a pipe.
+ return 0, nil, syserror.EINVAL
+}
+
+// splicePipeToPipe moves data from one pipe to another pipe.
+// TODO: Implement with zero copy movement/without copying between
+// user and kernel address spaces.
+func splicePipeToPipe(t *kernel.Task, inPipe *fs.File, outPipe *fs.File, size int64, flags uint) (int64, error) {
+ w := &fs.FileWriter{t, outPipe}
+ if flags == linux.SPLICE_F_NONBLOCK {
+ r := &io.LimitedReader{R: &fs.FileReader{t, inPipe}, N: size}
+ return io.Copy(w, r)
+ }
+ var n int64
+ for read := int64(0); read < size; {
+ var err error
+ r := &io.LimitedReader{R: &fs.FileReader{t, inPipe}, N: size}
+ n, err = io.Copy(w, r)
+ if err != nil && err != syserror.ErrWouldBlock {
+ return 0, err
+ }
+ read += n
+ }
+ return n, nil
+}
+
+// spliceRead moves data from a file to a pipe.
+// TODO: Implement with zero copy movement/without copying between
+// user and kernel address spaces.
+func spliceRead(t *kernel.Task, inFile *fs.File, outPipe *fs.File, size int64, offset uint64, flags uint) (int64, error) {
+ w := &fs.FileWriter{t, outPipe}
+ if flags == linux.SPLICE_F_NONBLOCK {
+ r := io.NewSectionReader(&fs.FileReader{t, inFile}, int64(offset), size)
+ return io.Copy(w, r)
+ }
+ var n int64
+ for read := int64(0); read < size; {
+ r := io.NewSectionReader(&fs.FileReader{t, inFile}, int64(offset), size)
+ var err error
+ n, err = io.Copy(w, r)
+ if err != nil && err != syserror.ErrWouldBlock {
+ return 0, err
+ }
+ read += n
+ }
+ return n, nil
+}
+
+// offsetWriter implements io.Writer on a section of an underlying
+// WriterAt starting from the offset and ending at the limit.
+type offsetWriter struct {
+ w io.WriterAt
+ off int64
+ limit int64
+}
+
+// Write implements io.Writer.Write and writes the content of the offsetWriter
+// starting at the offset and ending at the limit into the given buffer.
+func (o *offsetWriter) Write(p []byte) (n int, err error) {
+ if o.off >= o.limit {
+ return 0, io.EOF
+ }
+ if max := o.limit - o.off; int64(len(p)) > max {
+ p = p[0:max]
+ }
+ n, err = o.w.WriteAt(p, o.off)
+ o.off += int64(n)
+ return n, err
+}
+
+// spliceWrite moves data from a pipe to a file.
+// TODO: Implement with zero copy movement/without copying between
+// user and kernel address spaces.
+func spliceWrite(t *kernel.Task, inPipe *fs.File, outFile *fs.File, size int64, offset uint64, flags uint) (int64, error) {
+ w := &offsetWriter{&fs.FileWriter{t, outFile}, int64(offset), int64(offset) + size}
+ if flags == linux.SPLICE_F_NONBLOCK {
+ r := &io.LimitedReader{R: &fs.FileReader{t, inPipe}, N: size}
+ return io.Copy(w, r)
+ }
+ var n int64
+ for read := int64(0); read < size; {
+ var err error
+ r := &io.LimitedReader{R: &fs.FileReader{t, inPipe}, N: size}
+ n, err = io.Copy(w, r)
+ if err != nil && err != syserror.ErrWouldBlock {
+ return 0, err
+ }
+ read += n
+ }
+ return n, nil
+}