From 492229d0176c1af2ab4ea4cf91bf211e940b5b12 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Tue, 4 Feb 2020 11:28:36 -0800 Subject: VFS2 gofer client Updates #1198 Opening host pipes (by spinning in fdpipe) and host sockets is not yet complete, and will be done in a future CL. Major differences from VFS1 gofer client (sentry/fs/gofer), with varying levels of backportability: - "Cache policies" are replaced by InteropMode, which control the behavior of timestamps in addition to caching. Under InteropModeExclusive (analogous to cacheAll) and InteropModeWritethrough (analogous to cacheAllWritethrough), client timestamps are *not* written back to the server (it is not possible in 9P or Linux for clients to set ctime, so writing back client-authoritative timestamps results in incoherence between atime/mtime and ctime). Under InteropModeShared (analogous to cacheRemoteRevalidating), client timestamps are not used at all (remote filesystem clocks are authoritative). cacheNone is translated to InteropModeShared + new option filesystemOptions.specialRegularFiles. - Under InteropModeShared, "unstable attribute" reloading for permission checks, lookup, and revalidation are fused, which is feasible in VFS2 since gofer.filesystem controls path resolution. This results in a ~33% reduction in RPCs for filesystem operations compared to cacheRemoteRevalidating. For example, consider stat("/foo/bar/baz") where "/foo/bar/baz" fails revalidation, resulting in the instantiation of a new dentry: VFS1 RPCs: getattr("/") // fs.MountNamespace.FindLink() => fs.Inode.CheckPermission() => gofer.inodeOperations.check() => gofer.inodeOperations.UnstableAttr() walkgetattr("/", "foo") = fid1 // fs.Dirent.walk() => gofer.session.Revalidate() => gofer.cachePolicy.Revalidate() clunk(fid1) getattr("/foo") // CheckPermission walkgetattr("/foo", "bar") = fid2 // Revalidate clunk(fid2) getattr("/foo/bar") // CheckPermission walkgetattr("/foo/bar", "baz") = fid3 // Revalidate clunk(fid3) walkgetattr("/foo/bar", "baz") = fid4 // fs.Dirent.walk() => gofer.inodeOperations.Lookup getattr("/foo/bar/baz") // linux.stat() => gofer.inodeOperations.UnstableAttr() VFS2 RPCs: getattr("/") // gofer.filesystem.walkExistingLocked() walkgetattr("/", "foo") = fid1 // gofer.filesystem.stepExistingLocked() clunk(fid1) // No getattr: walkgetattr already updated metadata for permission check walkgetattr("/foo", "bar") = fid2 clunk(fid2) walkgetattr("/foo/bar", "baz") = fid3 // No clunk: fid3 used for new gofer.dentry // No getattr: walkgetattr already updated metadata for stat() - gofer.filesystem.unlinkAt() does not require instantiation of a dentry that represents the file to be deleted. Updates #898. - gofer.regularFileFD.OnClose() skips Tflushf for regular files under InteropModeExclusive, as it's nonsensical to request a remote file flush without flushing locally-buffered writes to that remote file first. - Symlink targets are cached when InteropModeShared is not in effect. - p9.QID.Path (which is already required to be unique for each file within a server, and is accordingly already synthesized from device/inode numbers in all known gofers) is used as-is for inode numbers, rather than being mapped along with attr.RDev in the client to yet another synthetic inode number. - Relevant parts of fsutil.CachingInodeOperations are inlined directly into gofer package code. This avoids having to duplicate part of its functionality in fsutil.HostMappable. PiperOrigin-RevId: 293190213 --- pkg/sentry/fsimpl/gofer/handle.go | 135 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 pkg/sentry/fsimpl/gofer/handle.go (limited to 'pkg/sentry/fsimpl/gofer/handle.go') diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go new file mode 100644 index 000000000..cfe66f797 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/handle.go @@ -0,0 +1,135 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "syscall" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/safemem" +) + +// handle represents a remote "open file descriptor", consisting of an opened +// fid (p9.File) and optionally a host file descriptor. +type handle struct { + file p9file + fd int32 // -1 if unavailable +} + +// Preconditions: read || write. +func openHandle(ctx context.Context, file p9file, read, write, trunc bool) (handle, error) { + _, newfile, err := file.walk(ctx, nil) + if err != nil { + return handle{fd: -1}, err + } + var flags p9.OpenFlags + switch { + case read && !write: + flags = p9.ReadOnly + case !read && write: + flags = p9.WriteOnly + case read && write: + flags = p9.ReadWrite + } + if trunc { + flags |= p9.OpenTruncate + } + fdobj, _, _, err := newfile.open(ctx, flags) + if err != nil { + newfile.close(ctx) + return handle{fd: -1}, err + } + fd := int32(-1) + if fdobj != nil { + fd = int32(fdobj.Release()) + } + return handle{ + file: newfile, + fd: fd, + }, nil +} + +func (h *handle) close(ctx context.Context) { + h.file.close(ctx) + h.file = p9file{} + if h.fd >= 0 { + syscall.Close(int(h.fd)) + h.fd = -1 + } +} + +func (h *handle) readToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { + if dsts.IsEmpty() { + return 0, nil + } + if h.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + n, err := hostPreadv(h.fd, dsts, int64(offset)) + ctx.UninterruptibleSleepFinish(false) + return n, err + } + if dsts.NumBlocks() == 1 && !dsts.Head().NeedSafecopy() { + n, err := h.file.readAt(ctx, dsts.Head().ToSlice(), offset) + return uint64(n), err + } + // Buffer the read since p9.File.ReadAt() takes []byte. + buf := make([]byte, dsts.NumBytes()) + n, err := h.file.readAt(ctx, buf, offset) + if n == 0 { + return 0, err + } + if cp, cperr := safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:n]))); cperr != nil { + return cp, cperr + } + return uint64(n), err +} + +func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { + if srcs.IsEmpty() { + return 0, nil + } + if h.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + n, err := hostPwritev(h.fd, srcs, int64(offset)) + ctx.UninterruptibleSleepFinish(false) + return n, err + } + if srcs.NumBlocks() == 1 && !srcs.Head().NeedSafecopy() { + n, err := h.file.writeAt(ctx, srcs.Head().ToSlice(), offset) + return uint64(n), err + } + // Buffer the write since p9.File.WriteAt() takes []byte. + buf := make([]byte, srcs.NumBytes()) + cp, cperr := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), srcs) + if cp == 0 { + return 0, cperr + } + n, err := h.file.writeAt(ctx, buf[:cp], offset) + if err != nil { + return uint64(n), err + } + return cp, cperr +} + +func (h *handle) sync(ctx context.Context) error { + if h.fd >= 0 { + ctx.UninterruptibleSleepStart(false) + err := syscall.Fsync(int(h.fd)) + ctx.UninterruptibleSleepFinish(false) + return err + } + return h.file.fsync(ctx) +} -- cgit v1.2.3