diff options
Diffstat (limited to 'pkg/sentry/fs/splice.go')
-rw-r--r-- | pkg/sentry/fs/splice.go | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/pkg/sentry/fs/splice.go b/pkg/sentry/fs/splice.go new file mode 100644 index 000000000..33da82868 --- /dev/null +++ b/pkg/sentry/fs/splice.go @@ -0,0 +1,181 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "io" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Splice moves data to this file, directly from another. +// +// Offsets are updated only if DstOffset and SrcOffset are set. +func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, error) { + // Verify basic file flag permissions. + if !dst.Flags().Write || !src.Flags().Read { + return 0, syserror.EBADF + } + + // Check whether or not the objects being sliced are stream-oriented + // (i.e. pipes or sockets). For all stream-oriented files and files + // where a specific offiset is not request, we acquire the file mutex. + // This has two important side effects. First, it provides the standard + // protection against concurrent writes that would mutate the offset. + // Second, it prevents Splice deadlocks. Only internal anonymous files + // implement the ReadFrom and WriteTo methods directly, and since such + // anonymous files are referred to by a unique fs.File object, we know + // that the file mutex takes strict precedence over internal locks. + // Since we enforce lock ordering here, we can't deadlock by using + // using a file in two different splice operations simultaneously. + srcPipe := !IsRegular(src.Dirent.Inode.StableAttr) + dstPipe := !IsRegular(dst.Dirent.Inode.StableAttr) + dstAppend := !dstPipe && dst.Flags().Append + srcLock := srcPipe || !opts.SrcOffset + dstLock := dstPipe || !opts.DstOffset || dstAppend + + switch { + case srcLock && dstLock: + switch { + case dst.UniqueID < src.UniqueID: + // Acquire dst first. + if !dst.mu.Lock(ctx) { + return 0, syserror.ErrInterrupted + } + if !src.mu.Lock(ctx) { + dst.mu.Unlock() + return 0, syserror.ErrInterrupted + } + case dst.UniqueID > src.UniqueID: + // Acquire src first. + if !src.mu.Lock(ctx) { + return 0, syserror.ErrInterrupted + } + if !dst.mu.Lock(ctx) { + src.mu.Unlock() + return 0, syserror.ErrInterrupted + } + case dst.UniqueID == src.UniqueID: + // Acquire only one lock; it's the same file. This is a + // bit of a edge case, but presumably it's possible. + if !dst.mu.Lock(ctx) { + return 0, syserror.ErrInterrupted + } + srcLock = false // Only need one unlock. + } + // Use both offsets (locked). + opts.DstStart = dst.offset + opts.SrcStart = src.offset + case dstLock: + // Acquire only dst. + if !dst.mu.Lock(ctx) { + return 0, syserror.ErrInterrupted + } + opts.DstStart = dst.offset // Safe: locked. + case srcLock: + // Acquire only src. + if !src.mu.Lock(ctx) { + return 0, syserror.ErrInterrupted + } + opts.SrcStart = src.offset // Safe: locked. + } + + var err error + if dstAppend { + unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append) + defer unlock() + + // Figure out the appropriate offset to use. + err = dst.offsetForAppend(ctx, &opts.DstStart) + } + if err == nil && !dstPipe { + // Enforce file limits. + limit, ok := dst.checkLimit(ctx, opts.DstStart) + switch { + case ok && limit == 0: + err = syserror.ErrExceedsFileSizeLimit + case ok && limit < opts.Length: + opts.Length = limit // Cap the write. + } + } + if err != nil { + if dstLock { + dst.mu.Unlock() + } + if srcLock { + src.mu.Unlock() + } + return 0, err + } + + // Construct readers and writers for the splice. This is used to + // provide a safer locking path for the WriteTo/ReadFrom operations + // (since they will otherwise go through public interface methods which + // conflict with locking done above), and simplifies the fallback path. + w := &lockedWriter{ + Ctx: ctx, + File: dst, + Offset: opts.DstStart, + } + r := &lockedReader{ + Ctx: ctx, + File: src, + Offset: opts.SrcStart, + } + + // Attempt to do a WriteTo; this is likely the most efficient. + n, err := src.FileOperations.WriteTo(ctx, src, w, opts.Length, opts.Dup) + if n == 0 && err == syserror.ENOSYS && !opts.Dup { + // Attempt as a ReadFrom. If a WriteTo, a ReadFrom may also be + // more efficient than a copy if buffers are cached or readily + // available. (It's unlikely that they can actually be donated). + n, err = dst.FileOperations.ReadFrom(ctx, dst, r, opts.Length) + } + + // Support one last fallback option, but only if at least one of + // the source and destination are regular files. This is because + // if we block at some point, we could lose data. If the source is + // not a pipe then reading is not destructive; if the destination + // is a regular file, then it is guaranteed not to block writing. + if n == 0 && err == syserror.ENOSYS && !opts.Dup && (!dstPipe || !srcPipe) { + // Fallback to an in-kernel copy. + n, err = io.Copy(w, &io.LimitedReader{ + R: r, + N: opts.Length, + }) + } + + // Update offsets, if required. + if n > 0 { + if !dstPipe && !opts.DstOffset { + atomic.StoreInt64(&dst.offset, dst.offset+n) + } + if !srcPipe && !opts.SrcOffset { + atomic.StoreInt64(&src.offset, src.offset+n) + } + } + + // Drop locks. + if dstLock { + dst.mu.Unlock() + } + if srcLock { + src.mu.Unlock() + } + + return n, err +} |