summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/gofer
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/gofer')
-rw-r--r--pkg/sentry/fsimpl/gofer/BUILD55
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go194
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go1103
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go1150
-rw-r--r--pkg/sentry/fsimpl/gofer/handle.go135
-rw-r--r--pkg/sentry/fsimpl/gofer/handle_unsafe.go66
-rw-r--r--pkg/sentry/fsimpl/gofer/p9file.go219
-rw-r--r--pkg/sentry/fsimpl/gofer/pagemath.go31
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go872
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go159
-rw-r--r--pkg/sentry/fsimpl/gofer/symlink.go47
-rw-r--r--pkg/sentry/fsimpl/gofer/time.go75
12 files changed, 0 insertions, 4106 deletions
diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD
deleted file mode 100644
index 4ba76a1e8..000000000
--- a/pkg/sentry/fsimpl/gofer/BUILD
+++ /dev/null
@@ -1,55 +0,0 @@
-load("//tools:defs.bzl", "go_library")
-load("//tools/go_generics:defs.bzl", "go_template_instance")
-
-licenses(["notice"])
-
-go_template_instance(
- name = "dentry_list",
- out = "dentry_list.go",
- package = "gofer",
- prefix = "dentry",
- template = "//pkg/ilist:generic_list",
- types = {
- "Element": "*dentry",
- "Linker": "*dentry",
- },
-)
-
-go_library(
- name = "gofer",
- srcs = [
- "dentry_list.go",
- "directory.go",
- "filesystem.go",
- "gofer.go",
- "handle.go",
- "handle_unsafe.go",
- "p9file.go",
- "pagemath.go",
- "regular_file.go",
- "special_file.go",
- "symlink.go",
- "time.go",
- ],
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/context",
- "//pkg/fd",
- "//pkg/fspath",
- "//pkg/log",
- "//pkg/p9",
- "//pkg/safemem",
- "//pkg/sentry/fs/fsutil",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/time",
- "//pkg/sentry/memmap",
- "//pkg/sentry/pgalloc",
- "//pkg/sentry/platform",
- "//pkg/sentry/usage",
- "//pkg/sentry/vfs",
- "//pkg/syserror",
- "//pkg/unet",
- "//pkg/usermem",
- ],
-)
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
deleted file mode 100644
index 5dbfc6250..000000000
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ /dev/null
@@ -1,194 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "sync"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/p9"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-func (d *dentry) isDir() bool {
- return d.fileType() == linux.S_IFDIR
-}
-
-// Preconditions: d.dirMu must be locked. d.isDir(). fs.opts.interop !=
-// InteropModeShared.
-func (d *dentry) cacheNegativeChildLocked(name string) {
- if d.negativeChildren == nil {
- d.negativeChildren = make(map[string]struct{})
- }
- d.negativeChildren[name] = struct{}{}
-}
-
-type directoryFD struct {
- fileDescription
- vfs.DirectoryFileDescriptionDefaultImpl
-
- mu sync.Mutex
- off int64
- dirents []vfs.Dirent
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *directoryFD) Release() {
-}
-
-// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
-func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
- fd.mu.Lock()
- defer fd.mu.Unlock()
-
- if fd.dirents == nil {
- ds, err := fd.dentry().getDirents(ctx)
- if err != nil {
- return err
- }
- fd.dirents = ds
- }
-
- for fd.off < int64(len(fd.dirents)) {
- if err := cb.Handle(fd.dirents[fd.off]); err != nil {
- return err
- }
- fd.off++
- }
- return nil
-}
-
-// Preconditions: d.isDir(). There exists at least one directoryFD representing d.
-func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
- // 9P2000.L's readdir does not specify behavior in the presence of
- // concurrent mutation of an iterated directory, so implementations may
- // duplicate or omit entries in this case, which violates POSIX semantics.
- // Thus we read all directory entries while holding d.dirMu to exclude
- // directory mutations. (Note that it is impossible for the client to
- // exclude concurrent mutation from other remote filesystem users. Since
- // there is no way to detect if the server has incorrectly omitted
- // directory entries, we simply assume that the server is well-behaved
- // under InteropModeShared.) This is inconsistent with Linux (which appears
- // to assume that directory fids have the correct semantics, and translates
- // struct file_operations::readdir calls directly to readdir RPCs), but is
- // consistent with VFS1.
- //
- // NOTE(b/135560623): In particular, some gofer implementations may not
- // retain state between calls to Readdir, so may not provide a coherent
- // directory stream across in the presence of mutation.
-
- d.fs.renameMu.RLock()
- defer d.fs.renameMu.RUnlock()
- d.dirMu.Lock()
- defer d.dirMu.Unlock()
- if d.dirents != nil {
- return d.dirents, nil
- }
-
- // It's not clear if 9P2000.L's readdir is expected to return "." and "..",
- // so we generate them here.
- parent := d.vfsd.ParentOrSelf().Impl().(*dentry)
- dirents := []vfs.Dirent{
- {
- Name: ".",
- Type: linux.DT_DIR,
- Ino: d.ino,
- NextOff: 1,
- },
- {
- Name: "..",
- Type: uint8(atomic.LoadUint32(&parent.mode) >> 12),
- Ino: parent.ino,
- NextOff: 2,
- },
- }
- off := uint64(0)
- const count = 64 * 1024 // for consistency with the vfs1 client
- d.handleMu.RLock()
- defer d.handleMu.RUnlock()
- if !d.handleReadable {
- // This should not be possible because a readable handle should have
- // been opened when the calling directoryFD was opened.
- panic("gofer.dentry.getDirents called without a readable handle")
- }
- for {
- p9ds, err := d.handle.file.readdir(ctx, off, count)
- if err != nil {
- return nil, err
- }
- if len(p9ds) == 0 {
- // Cache dirents for future directoryFDs if permitted.
- if d.fs.opts.interop != InteropModeShared {
- d.dirents = dirents
- }
- return dirents, nil
- }
- for _, p9d := range p9ds {
- if p9d.Name == "." || p9d.Name == ".." {
- continue
- }
- dirent := vfs.Dirent{
- Name: p9d.Name,
- Ino: p9d.QID.Path,
- NextOff: int64(len(dirents) + 1),
- }
- // p9 does not expose 9P2000.U's DMDEVICE, DMNAMEDPIPE, or
- // DMSOCKET.
- switch p9d.Type {
- case p9.TypeSymlink:
- dirent.Type = linux.DT_LNK
- case p9.TypeDir:
- dirent.Type = linux.DT_DIR
- default:
- dirent.Type = linux.DT_REG
- }
- dirents = append(dirents, dirent)
- }
- off = p9ds[len(p9ds)-1].Offset
- }
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- fd.mu.Lock()
- defer fd.mu.Unlock()
-
- switch whence {
- case linux.SEEK_SET:
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- if offset == 0 {
- // Ensure that the next call to fd.IterDirents() calls
- // fd.dentry().getDirents().
- fd.dirents = nil
- }
- fd.off = offset
- return fd.off, nil
- case linux.SEEK_CUR:
- offset += fd.off
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- // Don't clear fd.dirents in this case, even if offset == 0.
- fd.off = offset
- return fd.off, nil
- default:
- return 0, syserror.EINVAL
- }
-}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
deleted file mode 100644
index 38e4cdbc5..000000000
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ /dev/null
@@ -1,1103 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "sync"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/p9"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// Sync implements vfs.FilesystemImpl.Sync.
-func (fs *filesystem) Sync(ctx context.Context) error {
- // Snapshot current dentries and special files.
- fs.syncMu.Lock()
- ds := make([]*dentry, 0, len(fs.dentries))
- for d := range fs.dentries {
- ds = append(ds, d)
- }
- sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs))
- for sffd := range fs.specialFileFDs {
- sffds = append(sffds, sffd)
- }
- fs.syncMu.Unlock()
-
- // Return the first error we encounter, but sync everything we can
- // regardless.
- var retErr error
-
- // Sync regular files.
- for _, d := range ds {
- if !d.TryIncRef() {
- continue
- }
- err := d.syncSharedHandle(ctx)
- d.DecRef()
- if err != nil && retErr == nil {
- retErr = err
- }
- }
-
- // Sync special files, which may be writable but do not use dentry shared
- // handles (so they won't be synced by the above).
- for _, sffd := range sffds {
- if !sffd.vfsfd.TryIncRef() {
- continue
- }
- err := sffd.Sync(ctx)
- sffd.vfsfd.DecRef()
- if err != nil && retErr == nil {
- retErr = err
- }
- }
-
- return retErr
-}
-
-// maxFilenameLen is the maximum length of a filename. This is dictated by 9P's
-// encoding of strings, which uses 2 bytes for the length prefix.
-const maxFilenameLen = (1 << 16) - 1
-
-// dentrySlicePool is a pool of *[]*dentry used to store dentries for which
-// dentry.checkCachingLocked() must be called. The pool holds pointers to
-// slices because Go lacks generics, so sync.Pool operates on interface{}, so
-// every call to (what should be) sync.Pool<[]*dentry>.Put() allocates a copy
-// of the slice header on the heap.
-var dentrySlicePool = sync.Pool{
- New: func() interface{} {
- ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity
- return &ds
- },
-}
-
-func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry {
- if ds == nil {
- ds = dentrySlicePool.Get().(*[]*dentry)
- }
- *ds = append(*ds, d)
- return ds
-}
-
-// Preconditions: ds != nil.
-func putDentrySlice(ds *[]*dentry) {
- // Allow dentries to be GC'd.
- for i := range *ds {
- (*ds)[i] = nil
- }
- *ds = (*ds)[:0]
- dentrySlicePool.Put(ds)
-}
-
-// stepLocked resolves rp.Component() to an existing file, starting from the
-// given directory.
-//
-// Dentries which may become cached as a result of the traversal are appended
-// to *ds.
-//
-// Preconditions: fs.renameMu must be locked. d.dirMu must be locked.
-// !rp.Done(). If fs.opts.interop == InteropModeShared, then d's cached
-// metadata must be up to date.
-func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
- if !d.isDir() {
- return nil, syserror.ENOTDIR
- }
- if err := d.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
- return nil, err
- }
-afterSymlink:
- name := rp.Component()
- if name == "." {
- rp.Advance()
- return d, nil
- }
- if name == ".." {
- parentVFSD, err := rp.ResolveParent(&d.vfsd)
- if err != nil {
- return nil, err
- }
- parent := parentVFSD.Impl().(*dentry)
- if fs.opts.interop == InteropModeShared {
- // We must assume that parentVFSD is correct, because if d has been
- // moved elsewhere in the remote filesystem so that its parent has
- // changed, we have no way of determining its new parent's location
- // in the filesystem. Get updated metadata for parentVFSD.
- _, attrMask, attr, err := parent.file.getAttr(ctx, dentryAttrMask())
- if err != nil {
- return nil, err
- }
- parent.updateFromP9Attrs(attrMask, &attr)
- }
- rp.Advance()
- return parent, nil
- }
- childVFSD, err := rp.ResolveChild(&d.vfsd, name)
- if err != nil {
- return nil, err
- }
- // FIXME(jamieliu): Linux performs revalidation before mount lookup
- // (fs/namei.c:lookup_fast() => __d_lookup_rcu(), d_revalidate(),
- // __follow_mount_rcu()).
- child, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, childVFSD, ds)
- if err != nil {
- return nil, err
- }
- if child == nil {
- return nil, syserror.ENOENT
- }
- if child.isSymlink() && rp.ShouldFollowSymlink() {
- target, err := child.readlink(ctx, rp.Mount())
- if err != nil {
- return nil, err
- }
- if err := rp.HandleSymlink(target); err != nil {
- return nil, err
- }
- goto afterSymlink // don't check the current directory again
- }
- rp.Advance()
- return child, nil
-}
-
-// revalidateChildLocked must be called after a call to parent.vfsd.Child(name)
-// or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be
-// nil) to verify that the returned child (or lack thereof) is correct. If no file
-// exists at name, revalidateChildLocked returns (nil, nil).
-//
-// Preconditions: fs.renameMu must be locked. parent.dirMu must be locked.
-// parent.isDir(). name is not "." or "..".
-//
-// Postconditions: If revalidateChildLocked returns a non-nil dentry, its
-// cached metadata is up to date.
-func (fs *filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *dentry, name string, childVFSD *vfs.Dentry, ds **[]*dentry) (*dentry, error) {
- if childVFSD != nil && fs.opts.interop != InteropModeShared {
- // We have a cached dentry that is assumed to be correct.
- return childVFSD.Impl().(*dentry), nil
- }
- // We either don't have a cached dentry or need to verify that it's still
- // correct, either of which requires a remote lookup. Check if this name is
- // valid before performing the lookup.
- if len(name) > maxFilenameLen {
- return nil, syserror.ENAMETOOLONG
- }
- // Check if we've already cached this lookup with a negative result.
- if _, ok := parent.negativeChildren[name]; ok {
- return nil, nil
- }
- // Perform the remote lookup.
- qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name)
- if err != nil && err != syserror.ENOENT {
- return nil, err
- }
- if childVFSD != nil {
- child := childVFSD.Impl().(*dentry)
- if !file.isNil() && qid.Path == child.ino {
- // The file at this path hasn't changed. Just update cached
- // metadata.
- file.close(ctx)
- child.updateFromP9Attrs(attrMask, &attr)
- return child, nil
- }
- // The file at this path has changed or no longer exists. Remove
- // the stale dentry from the tree, and re-evaluate its caching
- // status (i.e. if it has 0 references, drop it).
- vfsObj.ForceDeleteDentry(childVFSD)
- *ds = appendDentry(*ds, child)
- childVFSD = nil
- }
- if file.isNil() {
- // No file exists at this path now. Cache the negative lookup if
- // allowed.
- if fs.opts.interop != InteropModeShared {
- parent.cacheNegativeChildLocked(name)
- }
- return nil, nil
- }
- // Create a new dentry representing the file.
- child, err := fs.newDentry(ctx, file, qid, attrMask, &attr)
- if err != nil {
- file.close(ctx)
- return nil, err
- }
- parent.IncRef() // reference held by child on its parent
- parent.vfsd.InsertChild(&child.vfsd, name)
- // For now, child has 0 references, so our caller should call
- // child.checkCachingLocked().
- *ds = appendDentry(*ds, child)
- return child, nil
-}
-
-// walkParentDirLocked resolves all but the last path component of rp to an
-// existing directory, starting from the given directory (which is usually
-// rp.Start().Impl().(*dentry)). It does not check that the returned directory
-// is searchable by the provider of rp.
-//
-// Preconditions: fs.renameMu must be locked. !rp.Done(). If fs.opts.interop ==
-// InteropModeShared, then d's cached metadata must be up to date.
-func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
- for !rp.Final() {
- d.dirMu.Lock()
- next, err := fs.stepLocked(ctx, rp, d, ds)
- d.dirMu.Unlock()
- if err != nil {
- return nil, err
- }
- d = next
- }
- if !d.isDir() {
- return nil, syserror.ENOTDIR
- }
- return d, nil
-}
-
-// resolveLocked resolves rp to an existing file.
-//
-// Preconditions: fs.renameMu must be locked.
-func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
- d := rp.Start().Impl().(*dentry)
- if fs.opts.interop == InteropModeShared {
- // Get updated metadata for rp.Start() as required by fs.stepLocked().
- if err := d.updateFromGetattr(ctx); err != nil {
- return nil, err
- }
- }
- for !rp.Done() {
- d.dirMu.Lock()
- next, err := fs.stepLocked(ctx, rp, d, ds)
- d.dirMu.Unlock()
- if err != nil {
- return nil, err
- }
- d = next
- }
- if rp.MustBeDir() && !d.isDir() {
- return nil, syserror.ENOTDIR
- }
- return d, nil
-}
-
-// doCreateAt checks that creating a file at rp is permitted, then invokes
-// create to do so.
-//
-// Preconditions: !rp.Done(). For the final path component in rp,
-// !rp.ShouldFollowSymlink().
-func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- start := rp.Start().Impl().(*dentry)
- if fs.opts.interop == InteropModeShared {
- // Get updated metadata for start as required by
- // fs.walkParentDirLocked().
- if err := start.updateFromGetattr(ctx); err != nil {
- return err
- }
- }
- parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
- if err != nil {
- return err
- }
- if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
- return err
- }
- if parent.isDeleted() {
- return syserror.ENOENT
- }
- name := rp.Component()
- if name == "." || name == ".." {
- return syserror.EEXIST
- }
- if len(name) > maxFilenameLen {
- return syserror.ENAMETOOLONG
- }
- if !dir && rp.MustBeDir() {
- return syserror.ENOENT
- }
- mnt := rp.Mount()
- if err := mnt.CheckBeginWrite(); err != nil {
- return err
- }
- defer mnt.EndWrite()
- parent.dirMu.Lock()
- defer parent.dirMu.Unlock()
- if fs.opts.interop == InteropModeShared {
- // The existence of a dentry at name would be inconclusive because the
- // file it represents may have been deleted from the remote filesystem,
- // so we would need to make an RPC to revalidate the dentry. Just
- // attempt the file creation RPC instead. If a file does exist, the RPC
- // will fail with EEXIST like we would have. If the RPC succeeds, and a
- // stale dentry exists, the dentry will fail revalidation next time
- // it's used.
- return create(parent, name)
- }
- if parent.vfsd.Child(name) != nil {
- return syserror.EEXIST
- }
- // No cached dentry exists; however, there might still be an existing file
- // at name. As above, we attempt the file creation RPC anyway.
- if err := create(parent, name); err != nil {
- return err
- }
- parent.touchCMtime(ctx)
- delete(parent.negativeChildren, name)
- parent.dirents = nil
- return nil
-}
-
-// Preconditions: !rp.Done().
-func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- start := rp.Start().Impl().(*dentry)
- if fs.opts.interop == InteropModeShared {
- // Get updated metadata for start as required by
- // fs.walkParentDirLocked().
- if err := start.updateFromGetattr(ctx); err != nil {
- return err
- }
- }
- parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
- if err != nil {
- return err
- }
- if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
- return err
- }
- if err := rp.Mount().CheckBeginWrite(); err != nil {
- return err
- }
- defer rp.Mount().EndWrite()
-
- name := rp.Component()
- if dir {
- if name == "." {
- return syserror.EINVAL
- }
- if name == ".." {
- return syserror.ENOTEMPTY
- }
- } else {
- if name == "." || name == ".." {
- return syserror.EISDIR
- }
- }
- vfsObj := rp.VirtualFilesystem()
- mntns := vfs.MountNamespaceFromContext(ctx)
- defer mntns.DecRef()
- parent.dirMu.Lock()
- defer parent.dirMu.Unlock()
- childVFSD := parent.vfsd.Child(name)
- var child *dentry
- // We only need a dentry representing the file at name if it can be a mount
- // point. If childVFSD is nil, then it can't be a mount point. If childVFSD
- // is non-nil but stale, the actual file can't be a mount point either; we
- // detect this case by just speculatively calling PrepareDeleteDentry and
- // only revalidating the dentry if that fails (indicating that the existing
- // dentry is a mount point).
- if childVFSD != nil {
- child = childVFSD.Impl().(*dentry)
- if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil {
- child, err = fs.revalidateChildLocked(ctx, vfsObj, parent, name, childVFSD, &ds)
- if err != nil {
- return err
- }
- if child != nil {
- childVFSD = &child.vfsd
- if err := vfsObj.PrepareDeleteDentry(mntns, childVFSD); err != nil {
- return err
- }
- } else {
- childVFSD = nil
- }
- }
- } else if _, ok := parent.negativeChildren[name]; ok {
- return syserror.ENOENT
- }
- flags := uint32(0)
- if dir {
- if child != nil && !child.isDir() {
- return syserror.ENOTDIR
- }
- flags = linux.AT_REMOVEDIR
- } else {
- if child != nil && child.isDir() {
- return syserror.EISDIR
- }
- if rp.MustBeDir() {
- return syserror.ENOTDIR
- }
- }
- err = parent.file.unlinkAt(ctx, name, flags)
- if err != nil {
- if childVFSD != nil {
- vfsObj.AbortDeleteDentry(childVFSD)
- }
- return err
- }
- if fs.opts.interop != InteropModeShared {
- parent.touchCMtime(ctx)
- parent.cacheNegativeChildLocked(name)
- parent.dirents = nil
- }
- if child != nil {
- child.setDeleted()
- vfsObj.CommitDeleteDentry(childVFSD)
- ds = appendDentry(ds, child)
- }
- return nil
-}
-
-// renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls
-// dentry.checkCachingLocked on all dentries in *ds with fs.renameMu locked for
-// writing.
-//
-// ds is a pointer-to-pointer since defer evaluates its arguments immediately,
-// but dentry slices are allocated lazily, and it's much easier to say "defer
-// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
-// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
-func (fs *filesystem) renameMuRUnlockAndCheckCaching(ds **[]*dentry) {
- fs.renameMu.RUnlock()
- if *ds == nil {
- return
- }
- if len(**ds) != 0 {
- fs.renameMu.Lock()
- for _, d := range **ds {
- d.checkCachingLocked()
- }
- fs.renameMu.Unlock()
- }
- putDentrySlice(*ds)
-}
-
-func (fs *filesystem) renameMuUnlockAndCheckCaching(ds **[]*dentry) {
- if *ds == nil {
- fs.renameMu.Unlock()
- return
- }
- for _, d := range **ds {
- d.checkCachingLocked()
- }
- fs.renameMu.Unlock()
- putDentrySlice(*ds)
-}
-
-// AccessAt implements vfs.Filesystem.Impl.AccessAt.
-func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return err
- }
- return d.checkPermissions(creds, ats, d.isDir())
-}
-
-// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
-func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return nil, err
- }
- if opts.CheckSearchable {
- if !d.isDir() {
- return nil, syserror.ENOTDIR
- }
- if err := d.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
- return nil, err
- }
- }
- d.IncRef()
- return &d.vfsd, nil
-}
-
-// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
-func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- start := rp.Start().Impl().(*dentry)
- if fs.opts.interop == InteropModeShared {
- // Get updated metadata for start as required by
- // fs.walkParentDirLocked().
- if err := start.updateFromGetattr(ctx); err != nil {
- return nil, err
- }
- }
- d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
- if err != nil {
- return nil, err
- }
- d.IncRef()
- return &d.vfsd, nil
-}
-
-// LinkAt implements vfs.FilesystemImpl.LinkAt.
-func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
- return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string) error {
- if rp.Mount() != vd.Mount() {
- return syserror.EXDEV
- }
- // 9P2000.L supports hard links, but we don't.
- return syserror.EPERM
- })
-}
-
-// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
-func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
- return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string) error {
- creds := rp.Credentials()
- _, err := parent.file.mkdir(ctx, name, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
- return err
- })
-}
-
-// MknodAt implements vfs.FilesystemImpl.MknodAt.
-func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
- return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error {
- creds := rp.Credentials()
- _, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
- return err
- })
-}
-
-// OpenAt implements vfs.FilesystemImpl.OpenAt.
-func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- // Reject O_TMPFILE, which is not supported; supporting it correctly in the
- // presence of other remote filesystem users requires remote filesystem
- // support, and it isn't clear that there's any way to implement this in
- // 9P.
- if opts.Flags&linux.O_TMPFILE != 0 {
- return nil, syserror.EOPNOTSUPP
- }
- mayCreate := opts.Flags&linux.O_CREAT != 0
- mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL)
-
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
-
- start := rp.Start().Impl().(*dentry)
- if fs.opts.interop == InteropModeShared {
- // Get updated metadata for start as required by fs.stepLocked().
- if err := start.updateFromGetattr(ctx); err != nil {
- return nil, err
- }
- }
- if rp.Done() {
- return start.openLocked(ctx, rp, &opts)
- }
-
-afterTrailingSymlink:
- parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
- if err != nil {
- return nil, err
- }
- // Check for search permission in the parent directory.
- if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
- return nil, err
- }
- // Determine whether or not we need to create a file.
- parent.dirMu.Lock()
- child, err := fs.stepLocked(ctx, rp, parent, &ds)
- if err == syserror.ENOENT && mayCreate {
- fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts)
- parent.dirMu.Unlock()
- return fd, err
- }
- if err != nil {
- parent.dirMu.Unlock()
- return nil, err
- }
- // Open existing child or follow symlink.
- parent.dirMu.Unlock()
- if mustCreate {
- return nil, syserror.EEXIST
- }
- if child.isSymlink() && rp.ShouldFollowSymlink() {
- target, err := child.readlink(ctx, rp.Mount())
- if err != nil {
- return nil, err
- }
- if err := rp.HandleSymlink(target); err != nil {
- return nil, err
- }
- start = parent
- goto afterTrailingSymlink
- }
- return child.openLocked(ctx, rp, &opts)
-}
-
-// Preconditions: fs.renameMu must be locked.
-func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
- ats := vfs.AccessTypesForOpenFlags(opts)
- if err := d.checkPermissions(rp.Credentials(), ats, d.isDir()); err != nil {
- return nil, err
- }
- mnt := rp.Mount()
- filetype := d.fileType()
- switch {
- case filetype == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD:
- if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0); err != nil {
- return nil, err
- }
- fd := &regularFileFD{}
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
- AllowDirectIO: true,
- }); err != nil {
- return nil, err
- }
- return &fd.vfsfd, nil
- case filetype == linux.S_IFDIR:
- // Can't open directories with O_CREAT.
- if opts.Flags&linux.O_CREAT != 0 {
- return nil, syserror.EISDIR
- }
- // Can't open directories writably.
- if ats&vfs.MayWrite != 0 {
- return nil, syserror.EISDIR
- }
- if opts.Flags&linux.O_DIRECT != 0 {
- return nil, syserror.EINVAL
- }
- if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil {
- return nil, err
- }
- fd := &directoryFD{}
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
- return nil, err
- }
- return &fd.vfsfd, nil
- case filetype == linux.S_IFLNK:
- // Can't open symlinks without O_PATH (which is unimplemented).
- return nil, syserror.ELOOP
- default:
- if opts.Flags&linux.O_DIRECT != 0 {
- return nil, syserror.EINVAL
- }
- h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0)
- if err != nil {
- return nil, err
- }
- fd := &specialFileFD{
- handle: h,
- }
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
- h.close(ctx)
- return nil, err
- }
- return &fd.vfsfd, nil
- }
-}
-
-// Preconditions: d.fs.renameMu must be locked. d.dirMu must be locked.
-func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
- if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
- return nil, err
- }
- if d.isDeleted() {
- return nil, syserror.ENOENT
- }
- mnt := rp.Mount()
- if err := mnt.CheckBeginWrite(); err != nil {
- return nil, err
- }
- defer mnt.EndWrite()
-
- // 9P2000.L's lcreate takes a fid representing the parent directory, and
- // converts it into an open fid representing the created file, so we need
- // to duplicate the directory fid first.
- _, dirfile, err := d.file.walk(ctx, nil)
- if err != nil {
- return nil, err
- }
- creds := rp.Credentials()
- name := rp.Component()
- fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, (p9.OpenFlags)(opts.Flags), (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
- if err != nil {
- dirfile.close(ctx)
- return nil, err
- }
- // Then we need to walk to the file we just created to get a non-open fid
- // representing it, and to get its metadata. This must use d.file since, as
- // explained above, dirfile was invalidated by dirfile.Create().
- walkQID, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name)
- if err != nil {
- openFile.close(ctx)
- if fdobj != nil {
- fdobj.Close()
- }
- return nil, err
- }
- // Sanity-check that we walked to the file we created.
- if createQID.Path != walkQID.Path {
- // Probably due to concurrent remote filesystem mutation?
- ctx.Warningf("gofer.dentry.createAndOpenChildLocked: created file has QID %v before walk, QID %v after (interop=%v)", createQID, walkQID, d.fs.opts.interop)
- nonOpenFile.close(ctx)
- openFile.close(ctx)
- if fdobj != nil {
- fdobj.Close()
- }
- return nil, syserror.EAGAIN
- }
-
- // Construct the new dentry.
- child, err := d.fs.newDentry(ctx, nonOpenFile, createQID, attrMask, &attr)
- if err != nil {
- nonOpenFile.close(ctx)
- openFile.close(ctx)
- if fdobj != nil {
- fdobj.Close()
- }
- return nil, err
- }
- // Incorporate the fid that was opened by lcreate.
- useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD
- if useRegularFileFD {
- child.handleMu.Lock()
- child.handle.file = openFile
- if fdobj != nil {
- child.handle.fd = int32(fdobj.Release())
- }
- child.handleReadable = vfs.MayReadFileWithOpenFlags(opts.Flags)
- child.handleWritable = vfs.MayWriteFileWithOpenFlags(opts.Flags)
- child.handleMu.Unlock()
- }
- // Take a reference on the new dentry to be held by the new file
- // description. (This reference also means that the new dentry is not
- // eligible for caching yet, so we don't need to append to a dentry slice.)
- child.refs = 1
- // Insert the dentry into the tree.
- d.IncRef() // reference held by child on its parent d
- d.vfsd.InsertChild(&child.vfsd, name)
- if d.fs.opts.interop != InteropModeShared {
- d.touchCMtime(ctx)
- delete(d.negativeChildren, name)
- d.dirents = nil
- }
-
- // Finally, construct a file description representing the created file.
- var childVFSFD *vfs.FileDescription
- mnt.IncRef()
- if useRegularFileFD {
- fd := &regularFileFD{}
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{
- AllowDirectIO: true,
- }); err != nil {
- return nil, err
- }
- childVFSFD = &fd.vfsfd
- } else {
- fd := &specialFileFD{
- handle: handle{
- file: openFile,
- fd: -1,
- },
- }
- if fdobj != nil {
- fd.handle.fd = int32(fdobj.Release())
- }
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
- fd.handle.close(ctx)
- return nil, err
- }
- childVFSFD = &fd.vfsfd
- }
- return childVFSFD, nil
-}
-
-// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
-func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return "", err
- }
- if !d.isSymlink() {
- return "", syserror.EINVAL
- }
- return d.readlink(ctx, rp.Mount())
-}
-
-// RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
- if opts.Flags != 0 {
- // Requires 9P support.
- return syserror.EINVAL
- }
-
- var ds *[]*dentry
- fs.renameMu.Lock()
- defer fs.renameMuUnlockAndCheckCaching(&ds)
- newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds)
- if err != nil {
- return err
- }
- newName := rp.Component()
- if newName == "." || newName == ".." {
- return syserror.EBUSY
- }
- mnt := rp.Mount()
- if mnt != oldParentVD.Mount() {
- return syserror.EXDEV
- }
- if err := mnt.CheckBeginWrite(); err != nil {
- return err
- }
- defer mnt.EndWrite()
-
- oldParent := oldParentVD.Dentry().Impl().(*dentry)
- if fs.opts.interop == InteropModeShared {
- if err := oldParent.updateFromGetattr(ctx); err != nil {
- return err
- }
- }
- if err := oldParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
- return err
- }
- vfsObj := rp.VirtualFilesystem()
- // We need a dentry representing the renamed file since, if it's a
- // directory, we need to check for write permission on it.
- oldParent.dirMu.Lock()
- defer oldParent.dirMu.Unlock()
- renamed, err := fs.revalidateChildLocked(ctx, vfsObj, oldParent, oldName, oldParent.vfsd.Child(oldName), &ds)
- if err != nil {
- return err
- }
- if renamed == nil {
- return syserror.ENOENT
- }
- if renamed.isDir() {
- if renamed == newParent || renamed.vfsd.IsAncestorOf(&newParent.vfsd) {
- return syserror.EINVAL
- }
- if oldParent != newParent {
- if err := renamed.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
- return err
- }
- }
- } else {
- if opts.MustBeDir || rp.MustBeDir() {
- return syserror.ENOTDIR
- }
- }
-
- if oldParent != newParent {
- if err := newParent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
- return err
- }
- newParent.dirMu.Lock()
- defer newParent.dirMu.Unlock()
- }
- if newParent.isDeleted() {
- return syserror.ENOENT
- }
- replacedVFSD := newParent.vfsd.Child(newName)
- var replaced *dentry
- // This is similar to unlinkAt, except:
- //
- // - We revalidate the replaced dentry unconditionally for simplicity.
- //
- // - If rp.MustBeDir(), then we need a dentry representing the replaced
- // file regardless to confirm that it's a directory.
- if replacedVFSD != nil || rp.MustBeDir() {
- replaced, err = fs.revalidateChildLocked(ctx, vfsObj, newParent, newName, replacedVFSD, &ds)
- if err != nil {
- return err
- }
- if replaced != nil {
- if replaced.isDir() {
- if !renamed.isDir() {
- return syserror.EISDIR
- }
- } else {
- if rp.MustBeDir() || renamed.isDir() {
- return syserror.ENOTDIR
- }
- }
- replacedVFSD = &replaced.vfsd
- } else {
- replacedVFSD = nil
- }
- }
-
- if oldParent == newParent && oldName == newName {
- return nil
- }
- mntns := vfs.MountNamespaceFromContext(ctx)
- defer mntns.DecRef()
- if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil {
- return err
- }
- if err := renamed.file.rename(ctx, newParent.file, newName); err != nil {
- vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
- return err
- }
- if fs.opts.interop != InteropModeShared {
- oldParent.cacheNegativeChildLocked(oldName)
- oldParent.dirents = nil
- delete(newParent.negativeChildren, newName)
- newParent.dirents = nil
- }
- vfsObj.CommitRenameReplaceDentry(&renamed.vfsd, &newParent.vfsd, newName, replacedVFSD)
- return nil
-}
-
-// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
-func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- return fs.unlinkAt(ctx, rp, true /* dir */)
-}
-
-// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
-func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return err
- }
- return d.setStat(ctx, rp.Credentials(), &opts.Stat, rp.Mount())
-}
-
-// StatAt implements vfs.FilesystemImpl.StatAt.
-func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return linux.Statx{}, err
- }
- // Since walking updates metadata for all traversed dentries under
- // InteropModeShared, including the returned one, we can return cached
- // metadata here regardless of fs.opts.interop.
- var stat linux.Statx
- d.statTo(&stat)
- return stat, nil
-}
-
-// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
-func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return linux.Statfs{}, err
- }
- fsstat, err := d.file.statFS(ctx)
- if err != nil {
- return linux.Statfs{}, err
- }
- nameLen := uint64(fsstat.NameLength)
- if nameLen > maxFilenameLen {
- nameLen = maxFilenameLen
- }
- return linux.Statfs{
- // This is primarily for distinguishing a gofer file system in
- // tests. Testing is important, so instead of defining
- // something completely random, use a standard value.
- Type: linux.V9FS_MAGIC,
- BlockSize: int64(fsstat.BlockSize),
- Blocks: fsstat.Blocks,
- BlocksFree: fsstat.BlocksFree,
- BlocksAvailable: fsstat.BlocksAvailable,
- Files: fsstat.Files,
- FilesFree: fsstat.FilesFree,
- NameLength: nameLen,
- }, nil
-}
-
-// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
-func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
- return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string) error {
- creds := rp.Credentials()
- _, err := parent.file.symlink(ctx, target, name, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
- return err
- })
-}
-
-// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
-func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
- return fs.unlinkAt(ctx, rp, false /* dir */)
-}
-
-// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
-func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return nil, err
- }
- return d.listxattr(ctx)
-}
-
-// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
-func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return "", err
- }
- return d.getxattr(ctx, name)
-}
-
-// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
-func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return err
- }
- return d.setxattr(ctx, &opts)
-}
-
-// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
-func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
- var ds *[]*dentry
- fs.renameMu.RLock()
- defer fs.renameMuRUnlockAndCheckCaching(&ds)
- d, err := fs.resolveLocked(ctx, rp, &ds)
- if err != nil {
- return err
- }
- return d.removexattr(ctx, name)
-}
-
-// PrependPath implements vfs.FilesystemImpl.PrependPath.
-func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
- fs.renameMu.RLock()
- defer fs.renameMu.RUnlock()
- return vfs.GenericPrependPath(vfsroot, vd, b)
-}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
deleted file mode 100644
index c4a8f0b38..000000000
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ /dev/null
@@ -1,1150 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package gofer provides a filesystem implementation that is backed by a 9p
-// server, interchangably referred to as "gofers" throughout this package.
-//
-// Lock order:
-// regularFileFD/directoryFD.mu
-// filesystem.renameMu
-// dentry.dirMu
-// filesystem.syncMu
-// dentry.metadataMu
-// *** "memmap.Mappable locks" below this point
-// dentry.mapsMu
-// *** "memmap.Mappable locks taken by Translate" below this point
-// dentry.handleMu
-// dentry.dataMu
-//
-// Locking dentry.dirMu in multiple dentries requires holding
-// filesystem.renameMu for writing.
-package gofer
-
-import (
- "fmt"
- "strconv"
- "sync"
- "sync/atomic"
- "syscall"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/p9"
- "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/pgalloc"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/unet"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// Name is the default filesystem name.
-const Name = "9p"
-
-// FilesystemType implements vfs.FilesystemType.
-type FilesystemType struct{}
-
-// filesystem implements vfs.FilesystemImpl.
-type filesystem struct {
- vfsfs vfs.Filesystem
-
- // mfp is used to allocate memory that caches regular file contents. mfp is
- // immutable.
- mfp pgalloc.MemoryFileProvider
-
- // Immutable options.
- opts filesystemOptions
-
- // client is the client used by this filesystem. client is immutable.
- client *p9.Client
-
- // uid and gid are the effective KUID and KGID of the filesystem's creator,
- // and are used as the owner and group for files that don't specify one.
- // uid and gid are immutable.
- uid auth.KUID
- gid auth.KGID
-
- // renameMu serves two purposes:
- //
- // - It synchronizes path resolution with renaming initiated by this
- // client.
- //
- // - It is held by path resolution to ensure that reachable dentries remain
- // valid. A dentry is reachable by path resolution if it has a non-zero
- // reference count (such that it is usable as vfs.ResolvingPath.Start() or
- // is reachable from its children), or if it is a child dentry (such that
- // it is reachable from its parent).
- renameMu sync.RWMutex
-
- // cachedDentries contains all dentries with 0 references. (Due to race
- // conditions, it may also contain dentries with non-zero references.)
- // cachedDentriesLen is the number of dentries in cachedDentries. These
- // fields are protected by renameMu.
- cachedDentries dentryList
- cachedDentriesLen uint64
-
- // dentries contains all dentries in this filesystem. specialFileFDs
- // contains all open specialFileFDs. These fields are protected by syncMu.
- syncMu sync.Mutex
- dentries map[*dentry]struct{}
- specialFileFDs map[*specialFileFD]struct{}
-}
-
-type filesystemOptions struct {
- // "Standard" 9P options.
- fd int
- aname string
- interop InteropMode // derived from the "cache" mount option
- msize uint32
- version string
-
- // maxCachedDentries is the maximum number of dentries with 0 references
- // retained by the client.
- maxCachedDentries uint64
-
- // If forcePageCache is true, host FDs may not be used for application
- // memory mappings even if available; instead, the client must perform its
- // own caching of regular file pages. This is primarily useful for testing.
- forcePageCache bool
-
- // If limitHostFDTranslation is true, apply maxFillRange() constraints to
- // host FD mappings returned by dentry.(memmap.Mappable).Translate(). This
- // makes memory accounting behavior more consistent between cases where
- // host FDs are / are not available, but may increase the frequency of
- // sentry-handled page faults on files for which a host FD is available.
- limitHostFDTranslation bool
-
- // If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote
- // filesystem may not be coherent with writable host FDs opened later, so
- // mappings of the former must be replaced by mappings of the latter. This
- // is usually only the case when the remote filesystem is an overlayfs
- // mount on Linux < 4.19.
- overlayfsStaleRead bool
-
- // If regularFilesUseSpecialFileFD is true, application FDs representing
- // regular files will use distinct file handles for each FD, in the same
- // way that application FDs representing "special files" such as sockets
- // do. Note that this disables client caching and mmap for regular files.
- regularFilesUseSpecialFileFD bool
-}
-
-// InteropMode controls the client's interaction with other remote filesystem
-// users.
-type InteropMode uint32
-
-const (
- // InteropModeExclusive is appropriate when the filesystem client is the
- // only user of the remote filesystem.
- //
- // - The client may cache arbitrary filesystem state (file data, metadata,
- // filesystem structure, etc.).
- //
- // - Client changes to filesystem state may be sent to the remote
- // filesystem asynchronously, except when server permission checks are
- // necessary.
- //
- // - File timestamps are based on client clocks. This ensures that users of
- // the client observe timestamps that are coherent with their own clocks
- // and consistent with Linux's semantics. However, since it is not always
- // possible for clients to set arbitrary atimes and mtimes, and never
- // possible for clients to set arbitrary ctimes, file timestamp changes are
- // stored in the client only and never sent to the remote filesystem.
- InteropModeExclusive InteropMode = iota
-
- // InteropModeWritethrough is appropriate when there are read-only users of
- // the remote filesystem that expect to observe changes made by the
- // filesystem client.
- //
- // - The client may cache arbitrary filesystem state.
- //
- // - Client changes to filesystem state must be sent to the remote
- // filesystem synchronously.
- //
- // - File timestamps are based on client clocks. As a corollary, access
- // timestamp changes from other remote filesystem users will not be visible
- // to the client.
- InteropModeWritethrough
-
- // InteropModeShared is appropriate when there are users of the remote
- // filesystem that may mutate its state other than the client.
- //
- // - The client must verify cached filesystem state before using it.
- //
- // - Client changes to filesystem state must be sent to the remote
- // filesystem synchronously.
- //
- // - File timestamps are based on server clocks. This is necessary to
- // ensure that timestamp changes are synchronized between remote filesystem
- // users.
- //
- // Note that the correctness of InteropModeShared depends on the server
- // correctly implementing 9P fids (i.e. each fid immutably represents a
- // single filesystem object), even in the presence of remote filesystem
- // mutations from other users. If this is violated, the behavior of the
- // client is undefined.
- InteropModeShared
-)
-
-// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
-func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
- mfp := pgalloc.MemoryFileProviderFromContext(ctx)
- if mfp == nil {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: context does not provide a pgalloc.MemoryFileProvider")
- return nil, nil, syserror.EINVAL
- }
-
- mopts := vfs.GenericParseMountOptions(opts.Data)
- var fsopts filesystemOptions
-
- // Check that the transport is "fd".
- trans, ok := mopts["trans"]
- if !ok {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: transport must be specified as 'trans=fd'")
- return nil, nil, syserror.EINVAL
- }
- delete(mopts, "trans")
- if trans != "fd" {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: unsupported transport: trans=%s", trans)
- return nil, nil, syserror.EINVAL
- }
-
- // Check that read and write FDs are provided and identical.
- rfdstr, ok := mopts["rfdno"]
- if !ok {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: read FD must be specified as 'rfdno=<file descriptor>")
- return nil, nil, syserror.EINVAL
- }
- delete(mopts, "rfdno")
- rfd, err := strconv.Atoi(rfdstr)
- if err != nil {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid read FD: rfdno=%s", rfdstr)
- return nil, nil, syserror.EINVAL
- }
- wfdstr, ok := mopts["wfdno"]
- if !ok {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: write FD must be specified as 'wfdno=<file descriptor>")
- return nil, nil, syserror.EINVAL
- }
- delete(mopts, "wfdno")
- wfd, err := strconv.Atoi(wfdstr)
- if err != nil {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid write FD: wfdno=%s", wfdstr)
- return nil, nil, syserror.EINVAL
- }
- if rfd != wfd {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: read FD (%d) and write FD (%d) must be equal", rfd, wfd)
- return nil, nil, syserror.EINVAL
- }
- fsopts.fd = rfd
-
- // Get the attach name.
- fsopts.aname = "/"
- if aname, ok := mopts["aname"]; ok {
- delete(mopts, "aname")
- fsopts.aname = aname
- }
-
- // Parse the cache policy. For historical reasons, this defaults to the
- // least generally-applicable option, InteropModeExclusive.
- fsopts.interop = InteropModeExclusive
- if cache, ok := mopts["cache"]; ok {
- delete(mopts, "cache")
- switch cache {
- case "fscache":
- fsopts.interop = InteropModeExclusive
- case "fscache_writethrough":
- fsopts.interop = InteropModeWritethrough
- case "none":
- fsopts.regularFilesUseSpecialFileFD = true
- fallthrough
- case "remote_revalidating":
- fsopts.interop = InteropModeShared
- default:
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: cache=%s", cache)
- return nil, nil, syserror.EINVAL
- }
- }
-
- // Parse the 9P message size.
- fsopts.msize = 1024 * 1024 // 1M, tested to give good enough performance up to 64M
- if msizestr, ok := mopts["msize"]; ok {
- delete(mopts, "msize")
- msize, err := strconv.ParseUint(msizestr, 10, 32)
- if err != nil {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: msize=%s", msizestr)
- return nil, nil, syserror.EINVAL
- }
- fsopts.msize = uint32(msize)
- }
-
- // Parse the 9P protocol version.
- fsopts.version = p9.HighestVersionString()
- if version, ok := mopts["version"]; ok {
- delete(mopts, "version")
- fsopts.version = version
- }
-
- // Parse the dentry cache limit.
- fsopts.maxCachedDentries = 1000
- if str, ok := mopts["dentry_cache_limit"]; ok {
- delete(mopts, "dentry_cache_limit")
- maxCachedDentries, err := strconv.ParseUint(str, 10, 64)
- if err != nil {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str)
- return nil, nil, syserror.EINVAL
- }
- fsopts.maxCachedDentries = maxCachedDentries
- }
-
- // Handle simple flags.
- if _, ok := mopts["force_page_cache"]; ok {
- delete(mopts, "force_page_cache")
- fsopts.forcePageCache = true
- }
- if _, ok := mopts["limit_host_fd_translation"]; ok {
- delete(mopts, "limit_host_fd_translation")
- fsopts.limitHostFDTranslation = true
- }
- if _, ok := mopts["overlayfs_stale_read"]; ok {
- delete(mopts, "overlayfs_stale_read")
- fsopts.overlayfsStaleRead = true
- }
- // fsopts.regularFilesUseSpecialFileFD can only be enabled by specifying
- // "cache=none".
-
- // Check for unparsed options.
- if len(mopts) != 0 {
- ctx.Warningf("gofer.FilesystemType.GetFilesystem: unknown options: %v", mopts)
- return nil, nil, syserror.EINVAL
- }
-
- // Establish a connection with the server.
- conn, err := unet.NewSocket(fsopts.fd)
- if err != nil {
- return nil, nil, err
- }
-
- // Perform version negotiation with the server.
- ctx.UninterruptibleSleepStart(false)
- client, err := p9.NewClient(conn, fsopts.msize, fsopts.version)
- ctx.UninterruptibleSleepFinish(false)
- if err != nil {
- conn.Close()
- return nil, nil, err
- }
- // Ownership of conn has been transferred to client.
-
- // Perform attach to obtain the filesystem root.
- ctx.UninterruptibleSleepStart(false)
- attached, err := client.Attach(fsopts.aname)
- ctx.UninterruptibleSleepFinish(false)
- if err != nil {
- client.Close()
- return nil, nil, err
- }
- attachFile := p9file{attached}
- qid, attrMask, attr, err := attachFile.getAttr(ctx, dentryAttrMask())
- if err != nil {
- attachFile.close(ctx)
- client.Close()
- return nil, nil, err
- }
-
- // Construct the filesystem object.
- fs := &filesystem{
- mfp: mfp,
- opts: fsopts,
- uid: creds.EffectiveKUID,
- gid: creds.EffectiveKGID,
- client: client,
- dentries: make(map[*dentry]struct{}),
- specialFileFDs: make(map[*specialFileFD]struct{}),
- }
- fs.vfsfs.Init(vfsObj, fs)
-
- // Construct the root dentry.
- root, err := fs.newDentry(ctx, attachFile, qid, attrMask, &attr)
- if err != nil {
- attachFile.close(ctx)
- fs.vfsfs.DecRef()
- return nil, nil, err
- }
- // Set the root's reference count to 2. One reference is returned to the
- // caller, and the other is deliberately leaked to prevent the root from
- // being "cached" and subsequently evicted. Its resources will still be
- // cleaned up by fs.Release().
- root.refs = 2
-
- return &fs.vfsfs, &root.vfsd, nil
-}
-
-// Release implements vfs.FilesystemImpl.Release.
-func (fs *filesystem) Release() {
- ctx := context.Background()
- mf := fs.mfp.MemoryFile()
-
- fs.syncMu.Lock()
- for d := range fs.dentries {
- d.handleMu.Lock()
- d.dataMu.Lock()
- if d.handleWritable {
- // Write dirty cached data to the remote file.
- if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt); err != nil {
- log.Warningf("gofer.filesystem.Release: failed to flush dentry: %v", err)
- }
- // TODO(jamieliu): Do we need to flushf/fsync d?
- }
- // Discard cached pages.
- d.cache.DropAll(mf)
- d.dirty.RemoveAll()
- d.dataMu.Unlock()
- // Close the host fd if one exists.
- if d.handle.fd >= 0 {
- syscall.Close(int(d.handle.fd))
- d.handle.fd = -1
- }
- d.handleMu.Unlock()
- }
- // There can't be any specialFileFDs still using fs, since each such
- // FileDescription would hold a reference on a Mount holding a reference on
- // fs.
- fs.syncMu.Unlock()
-
- // Close the connection to the server. This implicitly clunks all fids.
- fs.client.Close()
-}
-
-// dentry implements vfs.DentryImpl.
-type dentry struct {
- vfsd vfs.Dentry
-
- // refs is the reference count. Each dentry holds a reference on its
- // parent, even if disowned. refs is accessed using atomic memory
- // operations.
- refs int64
-
- // fs is the owning filesystem. fs is immutable.
- fs *filesystem
-
- // We don't support hard links, so each dentry maps 1:1 to an inode.
-
- // file is the unopened p9.File that backs this dentry. file is immutable.
- file p9file
-
- // If deleted is non-zero, the file represented by this dentry has been
- // deleted. deleted is accessed using atomic memory operations.
- deleted uint32
-
- // If cached is true, dentryEntry links dentry into
- // filesystem.cachedDentries. cached and dentryEntry are protected by
- // filesystem.renameMu.
- cached bool
- dentryEntry
-
- dirMu sync.Mutex
-
- // If this dentry represents a directory, and InteropModeShared is not in
- // effect, negativeChildren is a set of child names in this directory that
- // are known not to exist. negativeChildren is protected by dirMu.
- negativeChildren map[string]struct{}
-
- // If this dentry represents a directory, InteropModeShared is not in
- // effect, and dirents is not nil, it is a cache of all entries in the
- // directory, in the order they were returned by the server. dirents is
- // protected by dirMu.
- dirents []vfs.Dirent
-
- // Cached metadata; protected by metadataMu and accessed using atomic
- // memory operations unless otherwise specified.
- metadataMu sync.Mutex
- ino uint64 // immutable
- mode uint32 // type is immutable, perms are mutable
- uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
- gid uint32 // auth.KGID, but ...
- blockSize uint32 // 0 if unknown
- // Timestamps, all nsecs from the Unix epoch.
- atime int64
- mtime int64
- ctime int64
- btime int64
- // File size, protected by both metadataMu and dataMu (i.e. both must be
- // locked to mutate it).
- size uint64
-
- mapsMu sync.Mutex
-
- // If this dentry represents a regular file, mappings tracks mappings of
- // the file into memmap.MappingSpaces. mappings is protected by mapsMu.
- mappings memmap.MappingSet
-
- // If this dentry represents a regular file or directory:
- //
- // - handle is the I/O handle used by all regularFileFDs/directoryFDs
- // representing this dentry.
- //
- // - handleReadable is true if handle is readable.
- //
- // - handleWritable is true if handle is writable.
- //
- // Invariants:
- //
- // - If handleReadable == handleWritable == false, then handle.file == nil
- // (i.e. there is no open handle). Conversely, if handleReadable ||
- // handleWritable == true, then handle.file != nil (i.e. there is an open
- // handle).
- //
- // - handleReadable and handleWritable cannot transition from true to false
- // (i.e. handles may not be downgraded).
- //
- // These fields are protected by handleMu.
- handleMu sync.RWMutex
- handle handle
- handleReadable bool
- handleWritable bool
-
- dataMu sync.RWMutex
-
- // If this dentry represents a regular file that is client-cached, cache
- // maps offsets into the cached file to offsets into
- // filesystem.mfp.MemoryFile() that store the file's data. cache is
- // protected by dataMu.
- cache fsutil.FileRangeSet
-
- // If this dentry represents a regular file that is client-cached, dirty
- // tracks dirty segments in cache. dirty is protected by dataMu.
- dirty fsutil.DirtySet
-
- // pf implements platform.File for mappings of handle.fd.
- pf dentryPlatformFile
-
- // If this dentry represents a symbolic link, InteropModeShared is not in
- // effect, and haveTarget is true, target is the symlink target. haveTarget
- // and target are protected by dataMu.
- haveTarget bool
- target string
-}
-
-// dentryAttrMask returns a p9.AttrMask enabling all attributes used by the
-// gofer client.
-func dentryAttrMask() p9.AttrMask {
- return p9.AttrMask{
- Mode: true,
- UID: true,
- GID: true,
- ATime: true,
- MTime: true,
- CTime: true,
- Size: true,
- BTime: true,
- }
-}
-
-// newDentry creates a new dentry representing the given file. The dentry
-// initially has no references, but is not cached; it is the caller's
-// responsibility to set the dentry's reference count and/or call
-// dentry.checkCachingLocked() as appropriate.
-func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, mask p9.AttrMask, attr *p9.Attr) (*dentry, error) {
- if !mask.Mode {
- ctx.Warningf("can't create gofer.dentry without file type")
- return nil, syserror.EIO
- }
- if attr.Mode.FileType() == p9.ModeRegular && !mask.Size {
- ctx.Warningf("can't create regular file gofer.dentry without file size")
- return nil, syserror.EIO
- }
-
- d := &dentry{
- fs: fs,
- file: file,
- ino: qid.Path,
- mode: uint32(attr.Mode),
- uid: uint32(fs.uid),
- gid: uint32(fs.gid),
- blockSize: usermem.PageSize,
- handle: handle{
- fd: -1,
- },
- }
- d.pf.dentry = d
- if mask.UID {
- d.uid = uint32(attr.UID)
- }
- if mask.GID {
- d.gid = uint32(attr.GID)
- }
- if mask.Size {
- d.size = attr.Size
- }
- if attr.BlockSize != 0 {
- d.blockSize = uint32(attr.BlockSize)
- }
- if mask.ATime {
- d.atime = dentryTimestampFromP9(attr.ATimeSeconds, attr.ATimeNanoSeconds)
- }
- if mask.MTime {
- d.mtime = dentryTimestampFromP9(attr.MTimeSeconds, attr.MTimeNanoSeconds)
- }
- if mask.CTime {
- d.ctime = dentryTimestampFromP9(attr.CTimeSeconds, attr.CTimeNanoSeconds)
- }
- if mask.BTime {
- d.btime = dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds)
- }
- d.vfsd.Init(d)
-
- fs.syncMu.Lock()
- fs.dentries[d] = struct{}{}
- fs.syncMu.Unlock()
- return d, nil
-}
-
-// updateFromP9Attrs is called to update d's metadata after an update from the
-// remote filesystem.
-func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) {
- d.metadataMu.Lock()
- if mask.Mode {
- if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want {
- d.metadataMu.Unlock()
- panic(fmt.Sprintf("gofer.dentry file type changed from %#o to %#o", want, got))
- }
- atomic.StoreUint32(&d.mode, uint32(attr.Mode))
- }
- if mask.UID {
- atomic.StoreUint32(&d.uid, uint32(attr.UID))
- }
- if mask.GID {
- atomic.StoreUint32(&d.gid, uint32(attr.GID))
- }
- // There is no P9_GETATTR_* bit for I/O block size.
- if attr.BlockSize != 0 {
- atomic.StoreUint32(&d.blockSize, uint32(attr.BlockSize))
- }
- if mask.ATime {
- atomic.StoreInt64(&d.atime, dentryTimestampFromP9(attr.ATimeSeconds, attr.ATimeNanoSeconds))
- }
- if mask.MTime {
- atomic.StoreInt64(&d.mtime, dentryTimestampFromP9(attr.MTimeSeconds, attr.MTimeNanoSeconds))
- }
- if mask.CTime {
- atomic.StoreInt64(&d.ctime, dentryTimestampFromP9(attr.CTimeSeconds, attr.CTimeNanoSeconds))
- }
- if mask.BTime {
- atomic.StoreInt64(&d.btime, dentryTimestampFromP9(attr.BTimeSeconds, attr.BTimeNanoSeconds))
- }
- if mask.Size {
- d.dataMu.Lock()
- atomic.StoreUint64(&d.size, attr.Size)
- d.dataMu.Unlock()
- }
- d.metadataMu.Unlock()
-}
-
-func (d *dentry) updateFromGetattr(ctx context.Context) error {
- // Use d.handle.file, which represents a 9P fid that has been opened, in
- // preference to d.file, which represents a 9P fid that has not. This may
- // be significantly more efficient in some implementations.
- var (
- file p9file
- handleMuRLocked bool
- )
- d.handleMu.RLock()
- if !d.handle.file.isNil() {
- file = d.handle.file
- handleMuRLocked = true
- } else {
- file = d.file
- d.handleMu.RUnlock()
- }
- _, attrMask, attr, err := file.getAttr(ctx, dentryAttrMask())
- if handleMuRLocked {
- d.handleMu.RUnlock()
- }
- if err != nil {
- return err
- }
- d.updateFromP9Attrs(attrMask, &attr)
- return nil
-}
-
-func (d *dentry) fileType() uint32 {
- return atomic.LoadUint32(&d.mode) & linux.S_IFMT
-}
-
-func (d *dentry) statTo(stat *linux.Statx) {
- stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS | linux.STATX_BTIME
- stat.Blksize = atomic.LoadUint32(&d.blockSize)
- stat.Nlink = 1
- if d.isDir() {
- stat.Nlink = 2
- }
- stat.UID = atomic.LoadUint32(&d.uid)
- stat.GID = atomic.LoadUint32(&d.gid)
- stat.Mode = uint16(atomic.LoadUint32(&d.mode))
- stat.Ino = d.ino
- stat.Size = atomic.LoadUint64(&d.size)
- // This is consistent with regularFileFD.Seek(), which treats regular files
- // as having no holes.
- stat.Blocks = (stat.Size + 511) / 512
- stat.Atime = statxTimestampFromDentry(atomic.LoadInt64(&d.atime))
- stat.Btime = statxTimestampFromDentry(atomic.LoadInt64(&d.btime))
- stat.Ctime = statxTimestampFromDentry(atomic.LoadInt64(&d.ctime))
- stat.Mtime = statxTimestampFromDentry(atomic.LoadInt64(&d.mtime))
- // TODO(jamieliu): device number
-}
-
-func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mnt *vfs.Mount) error {
- if stat.Mask == 0 {
- return nil
- }
- if stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_SIZE) != 0 {
- return syserror.EPERM
- }
- if err := vfs.CheckSetStat(creds, stat, uint16(atomic.LoadUint32(&d.mode))&^linux.S_IFMT, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
- return err
- }
- if err := mnt.CheckBeginWrite(); err != nil {
- return err
- }
- defer mnt.EndWrite()
- setLocalAtime := false
- setLocalMtime := false
- if d.fs.opts.interop != InteropModeShared {
- // Timestamp updates will be handled locally.
- setLocalAtime = stat.Mask&linux.STATX_ATIME != 0
- setLocalMtime = stat.Mask&linux.STATX_MTIME != 0
- stat.Mask &^= linux.STATX_ATIME | linux.STATX_MTIME
- if !setLocalMtime && (stat.Mask&linux.STATX_SIZE != 0) {
- // Truncate updates mtime.
- setLocalMtime = true
- stat.Mtime.Nsec = linux.UTIME_NOW
- }
- }
- d.metadataMu.Lock()
- defer d.metadataMu.Unlock()
- if stat.Mask != 0 {
- if err := d.file.setAttr(ctx, p9.SetAttrMask{
- Permissions: stat.Mask&linux.STATX_MODE != 0,
- UID: stat.Mask&linux.STATX_UID != 0,
- GID: stat.Mask&linux.STATX_GID != 0,
- Size: stat.Mask&linux.STATX_SIZE != 0,
- ATime: stat.Mask&linux.STATX_ATIME != 0,
- MTime: stat.Mask&linux.STATX_MTIME != 0,
- ATimeNotSystemTime: stat.Atime.Nsec != linux.UTIME_NOW,
- MTimeNotSystemTime: stat.Mtime.Nsec != linux.UTIME_NOW,
- }, p9.SetAttr{
- Permissions: p9.FileMode(stat.Mode),
- UID: p9.UID(stat.UID),
- GID: p9.GID(stat.GID),
- Size: stat.Size,
- ATimeSeconds: uint64(stat.Atime.Sec),
- ATimeNanoSeconds: uint64(stat.Atime.Nsec),
- MTimeSeconds: uint64(stat.Mtime.Sec),
- MTimeNanoSeconds: uint64(stat.Mtime.Nsec),
- }); err != nil {
- return err
- }
- }
- if d.fs.opts.interop == InteropModeShared {
- // There's no point to updating d's metadata in this case since it'll
- // be overwritten by revalidation before the next time it's used
- // anyway. (InteropModeShared inhibits client caching of regular file
- // data, so there's no cache to truncate either.)
- return nil
- }
- now, haveNow := nowFromContext(ctx)
- if !haveNow {
- ctx.Warningf("gofer.dentry.setStat: current time not available")
- }
- if stat.Mask&linux.STATX_MODE != 0 {
- atomic.StoreUint32(&d.mode, d.fileType()|uint32(stat.Mode))
- }
- if stat.Mask&linux.STATX_UID != 0 {
- atomic.StoreUint32(&d.uid, stat.UID)
- }
- if stat.Mask&linux.STATX_GID != 0 {
- atomic.StoreUint32(&d.gid, stat.GID)
- }
- if setLocalAtime {
- if stat.Atime.Nsec == linux.UTIME_NOW {
- if haveNow {
- atomic.StoreInt64(&d.atime, now)
- }
- } else {
- atomic.StoreInt64(&d.atime, dentryTimestampFromStatx(stat.Atime))
- }
- }
- if setLocalMtime {
- if stat.Mtime.Nsec == linux.UTIME_NOW {
- if haveNow {
- atomic.StoreInt64(&d.mtime, now)
- }
- } else {
- atomic.StoreInt64(&d.mtime, dentryTimestampFromStatx(stat.Mtime))
- }
- }
- if haveNow {
- atomic.StoreInt64(&d.ctime, now)
- }
- if stat.Mask&linux.STATX_SIZE != 0 {
- d.dataMu.Lock()
- oldSize := d.size
- d.size = stat.Size
- // d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings
- // below. This allows concurrent calls to Read/Translate/etc. These
- // functions synchronize with truncation by refusing to use cache
- // contents beyond the new d.size. (We are still holding d.metadataMu,
- // so we can't race with Write or another truncate.)
- d.dataMu.Unlock()
- if d.size < oldSize {
- oldpgend := pageRoundUp(oldSize)
- newpgend := pageRoundUp(d.size)
- if oldpgend != newpgend {
- d.mapsMu.Lock()
- d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
- // Compare Linux's mm/truncate.c:truncate_setsize() =>
- // truncate_pagecache() =>
- // mm/memory.c:unmap_mapping_range(evencows=1).
- InvalidatePrivate: true,
- })
- d.mapsMu.Unlock()
- }
- // We are now guaranteed that there are no translations of
- // truncated pages, and can remove them from the cache. Since
- // truncated pages have been removed from the remote file, they
- // should be dropped without being written back.
- d.dataMu.Lock()
- d.cache.Truncate(d.size, d.fs.mfp.MemoryFile())
- d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend})
- d.dataMu.Unlock()
- }
- }
- return nil
-}
-
-func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error {
- return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&d.mode))&0777, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid)))
-}
-
-// IncRef implements vfs.DentryImpl.IncRef.
-func (d *dentry) IncRef() {
- // d.refs may be 0 if d.fs.renameMu is locked, which serializes against
- // d.checkCachingLocked().
- atomic.AddInt64(&d.refs, 1)
-}
-
-// TryIncRef implements vfs.DentryImpl.TryIncRef.
-func (d *dentry) TryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&d.refs)
- if refs == 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&d.refs, refs, refs+1) {
- return true
- }
- }
-}
-
-// DecRef implements vfs.DentryImpl.DecRef.
-func (d *dentry) DecRef() {
- if refs := atomic.AddInt64(&d.refs, -1); refs == 0 {
- d.fs.renameMu.Lock()
- d.checkCachingLocked()
- d.fs.renameMu.Unlock()
- } else if refs < 0 {
- panic("gofer.dentry.DecRef() called without holding a reference")
- }
-}
-
-// checkCachingLocked should be called after d's reference count becomes 0 or it
-// becomes disowned.
-//
-// Preconditions: d.fs.renameMu must be locked for writing.
-func (d *dentry) checkCachingLocked() {
- // Dentries with a non-zero reference count must be retained. (The only way
- // to obtain a reference on a dentry with zero references is via path
- // resolution, which requires renameMu, so if d.refs is zero then it will
- // remain zero while we hold renameMu for writing.)
- if atomic.LoadInt64(&d.refs) != 0 {
- if d.cached {
- d.fs.cachedDentries.Remove(d)
- d.fs.cachedDentriesLen--
- d.cached = false
- }
- return
- }
- // Non-child dentries with zero references are no longer reachable by path
- // resolution and should be dropped immediately.
- if d.vfsd.Parent() == nil || d.vfsd.IsDisowned() {
- if d.cached {
- d.fs.cachedDentries.Remove(d)
- d.fs.cachedDentriesLen--
- d.cached = false
- }
- d.destroyLocked()
- return
- }
- // If d is already cached, just move it to the front of the LRU.
- if d.cached {
- d.fs.cachedDentries.Remove(d)
- d.fs.cachedDentries.PushFront(d)
- return
- }
- // Cache the dentry, then evict the least recently used cached dentry if
- // the cache becomes over-full.
- d.fs.cachedDentries.PushFront(d)
- d.fs.cachedDentriesLen++
- d.cached = true
- if d.fs.cachedDentriesLen > d.fs.opts.maxCachedDentries {
- victim := d.fs.cachedDentries.Back()
- d.fs.cachedDentries.Remove(victim)
- d.fs.cachedDentriesLen--
- victim.cached = false
- // victim.refs may have become non-zero from an earlier path
- // resolution since it was inserted into fs.cachedDentries; see
- // dentry.incRefLocked(). Either way, we brought
- // fs.cachedDentriesLen back down to fs.opts.maxCachedDentries, so
- // we don't loop.
- if atomic.LoadInt64(&victim.refs) == 0 {
- if victimParentVFSD := victim.vfsd.Parent(); victimParentVFSD != nil {
- victimParent := victimParentVFSD.Impl().(*dentry)
- victimParent.dirMu.Lock()
- if !victim.vfsd.IsDisowned() {
- // victim can't be a mount point (in any mount
- // namespace), since VFS holds references on mount
- // points.
- d.fs.vfsfs.VirtualFilesystem().ForceDeleteDentry(&victim.vfsd)
- // We're only deleting the dentry, not the file it
- // represents, so we don't need to update
- // victimParent.dirents etc.
- }
- victimParent.dirMu.Unlock()
- }
- victim.destroyLocked()
- }
- }
-}
-
-// Preconditions: d.fs.renameMu must be locked for writing. d.refs == 0. d is
-// not a child dentry.
-func (d *dentry) destroyLocked() {
- ctx := context.Background()
- d.handleMu.Lock()
- if !d.handle.file.isNil() {
- mf := d.fs.mfp.MemoryFile()
- d.dataMu.Lock()
- // Write dirty pages back to the remote filesystem.
- if d.handleWritable {
- if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil {
- log.Warningf("gofer.dentry.DecRef: failed to write dirty data back: %v", err)
- }
- }
- // Discard cached data.
- d.cache.DropAll(mf)
- d.dirty.RemoveAll()
- d.dataMu.Unlock()
- // Clunk open fids and close open host FDs.
- d.handle.close(ctx)
- }
- d.handleMu.Unlock()
- d.file.close(ctx)
- // Remove d from the set of all dentries.
- d.fs.syncMu.Lock()
- delete(d.fs.dentries, d)
- d.fs.syncMu.Unlock()
- // Drop the reference held by d on its parent.
- if parentVFSD := d.vfsd.Parent(); parentVFSD != nil {
- parent := parentVFSD.Impl().(*dentry)
- // This is parent.DecRef() without recursive locking of d.fs.renameMu.
- if refs := atomic.AddInt64(&parent.refs, -1); refs == 0 {
- parent.checkCachingLocked()
- } else if refs < 0 {
- panic("gofer.dentry.DecRef() called without holding a reference")
- }
- }
-}
-
-func (d *dentry) isDeleted() bool {
- return atomic.LoadUint32(&d.deleted) != 0
-}
-
-func (d *dentry) setDeleted() {
- atomic.StoreUint32(&d.deleted, 1)
-}
-
-func (d *dentry) listxattr(ctx context.Context) ([]string, error) {
- return nil, syserror.ENOTSUP
-}
-
-func (d *dentry) getxattr(ctx context.Context, name string) (string, error) {
- // TODO(jamieliu): add vfs.GetxattrOptions.Size
- return d.file.getXattr(ctx, name, linux.XATTR_SIZE_MAX)
-}
-
-func (d *dentry) setxattr(ctx context.Context, opts *vfs.SetxattrOptions) error {
- return d.file.setXattr(ctx, opts.Name, opts.Value, opts.Flags)
-}
-
-func (d *dentry) removexattr(ctx context.Context, name string) error {
- return syserror.ENOTSUP
-}
-
-// Preconditions: d.isRegularFile() || d.isDirectory().
-func (d *dentry) ensureSharedHandle(ctx context.Context, read, write, trunc bool) error {
- // O_TRUNC unconditionally requires us to obtain a new handle (opened with
- // O_TRUNC).
- if !trunc {
- d.handleMu.RLock()
- if (!read || d.handleReadable) && (!write || d.handleWritable) {
- // The current handle is sufficient.
- d.handleMu.RUnlock()
- return nil
- }
- d.handleMu.RUnlock()
- }
-
- haveOldFD := false
- d.handleMu.Lock()
- if (read && !d.handleReadable) || (write && !d.handleWritable) || trunc {
- // Get a new handle.
- wantReadable := d.handleReadable || read
- wantWritable := d.handleWritable || write
- h, err := openHandle(ctx, d.file, wantReadable, wantWritable, trunc)
- if err != nil {
- d.handleMu.Unlock()
- return err
- }
- if !d.handle.file.isNil() {
- // Check that old and new handles are compatible: If the old handle
- // includes a host file descriptor but the new one does not, or
- // vice versa, old and new memory mappings may be incoherent.
- haveOldFD = d.handle.fd >= 0
- haveNewFD := h.fd >= 0
- if haveOldFD != haveNewFD {
- d.handleMu.Unlock()
- ctx.Warningf("gofer.dentry.ensureSharedHandle: can't change host FD availability from %v to %v across dentry handle upgrade", haveOldFD, haveNewFD)
- h.close(ctx)
- return syserror.EIO
- }
- if haveOldFD {
- // We may have raced with callers of d.pf.FD() that are now
- // using the old file descriptor, preventing us from safely
- // closing it. We could handle this by invalidating existing
- // memmap.Translations, but this is expensive. Instead, use
- // dup3 to make the old file descriptor refer to the new file
- // description, then close the new file descriptor (which is no
- // longer needed). Racing callers may use the old or new file
- // description, but this doesn't matter since they refer to the
- // same file (unless d.fs.opts.overlayfsStaleRead is true,
- // which we handle separately).
- if err := syscall.Dup3(int(h.fd), int(d.handle.fd), 0); err != nil {
- d.handleMu.Unlock()
- ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to dup fd %d to fd %d: %v", h.fd, d.handle.fd, err)
- h.close(ctx)
- return err
- }
- syscall.Close(int(h.fd))
- h.fd = d.handle.fd
- if d.fs.opts.overlayfsStaleRead {
- // Replace sentry mappings of the old FD with mappings of
- // the new FD, since the two are not necessarily coherent.
- if err := d.pf.hostFileMapper.RegenerateMappings(int(h.fd)); err != nil {
- d.handleMu.Unlock()
- ctx.Warningf("gofer.dentry.ensureSharedHandle: failed to replace sentry mappings of old FD with mappings of new FD: %v", err)
- h.close(ctx)
- return err
- }
- }
- // Clunk the old fid before making the new handle visible (by
- // unlocking d.handleMu).
- d.handle.file.close(ctx)
- }
- }
- // Switch to the new handle.
- d.handle = h
- d.handleReadable = wantReadable
- d.handleWritable = wantWritable
- }
- d.handleMu.Unlock()
-
- if d.fs.opts.overlayfsStaleRead && haveOldFD {
- // Invalidate application mappings that may be using the old FD; they
- // will be replaced with mappings using the new FD after future calls
- // to d.Translate(). This requires holding d.mapsMu, which precedes
- // d.handleMu in the lock order.
- d.mapsMu.Lock()
- d.mappings.InvalidateAll(memmap.InvalidateOpts{})
- d.mapsMu.Unlock()
- }
-
- return nil
-}
-
-// fileDescription is embedded by gofer implementations of
-// vfs.FileDescriptionImpl.
-type fileDescription struct {
- vfsfd vfs.FileDescription
- vfs.FileDescriptionDefaultImpl
-}
-
-func (fd *fileDescription) filesystem() *filesystem {
- return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
-}
-
-func (fd *fileDescription) dentry() *dentry {
- return fd.vfsfd.Dentry().Impl().(*dentry)
-}
-
-// Stat implements vfs.FileDescriptionImpl.Stat.
-func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
- d := fd.dentry()
- if d.fs.opts.interop == InteropModeShared && opts.Mask&(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID|linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME|linux.STATX_SIZE|linux.STATX_BLOCKS|linux.STATX_BTIME) != 0 && opts.Sync != linux.AT_STATX_DONT_SYNC {
- // TODO(jamieliu): Use specialFileFD.handle.file for the getattr if
- // available?
- if err := d.updateFromGetattr(ctx); err != nil {
- return linux.Statx{}, err
- }
- }
- var stat linux.Statx
- d.statTo(&stat)
- return stat, nil
-}
-
-// SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
- return fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, fd.vfsfd.Mount())
-}
-
-// Listxattr implements vfs.FileDescriptionImpl.Listxattr.
-func (fd *fileDescription) Listxattr(ctx context.Context) ([]string, error) {
- return fd.dentry().listxattr(ctx)
-}
-
-// Getxattr implements vfs.FileDescriptionImpl.Getxattr.
-func (fd *fileDescription) Getxattr(ctx context.Context, name string) (string, error) {
- return fd.dentry().getxattr(ctx, name)
-}
-
-// Setxattr implements vfs.FileDescriptionImpl.Setxattr.
-func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error {
- return fd.dentry().setxattr(ctx, &opts)
-}
-
-// Removexattr implements vfs.FileDescriptionImpl.Removexattr.
-func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
- return fd.dentry().removexattr(ctx, name)
-}
diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go
deleted file mode 100644
index cfe66f797..000000000
--- a/pkg/sentry/fsimpl/gofer/handle.go
+++ /dev/null
@@ -1,135 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "syscall"
-
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/p9"
- "gvisor.dev/gvisor/pkg/safemem"
-)
-
-// handle represents a remote "open file descriptor", consisting of an opened
-// fid (p9.File) and optionally a host file descriptor.
-type handle struct {
- file p9file
- fd int32 // -1 if unavailable
-}
-
-// Preconditions: read || write.
-func openHandle(ctx context.Context, file p9file, read, write, trunc bool) (handle, error) {
- _, newfile, err := file.walk(ctx, nil)
- if err != nil {
- return handle{fd: -1}, err
- }
- var flags p9.OpenFlags
- switch {
- case read && !write:
- flags = p9.ReadOnly
- case !read && write:
- flags = p9.WriteOnly
- case read && write:
- flags = p9.ReadWrite
- }
- if trunc {
- flags |= p9.OpenTruncate
- }
- fdobj, _, _, err := newfile.open(ctx, flags)
- if err != nil {
- newfile.close(ctx)
- return handle{fd: -1}, err
- }
- fd := int32(-1)
- if fdobj != nil {
- fd = int32(fdobj.Release())
- }
- return handle{
- file: newfile,
- fd: fd,
- }, nil
-}
-
-func (h *handle) close(ctx context.Context) {
- h.file.close(ctx)
- h.file = p9file{}
- if h.fd >= 0 {
- syscall.Close(int(h.fd))
- h.fd = -1
- }
-}
-
-func (h *handle) readToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) {
- if dsts.IsEmpty() {
- return 0, nil
- }
- if h.fd >= 0 {
- ctx.UninterruptibleSleepStart(false)
- n, err := hostPreadv(h.fd, dsts, int64(offset))
- ctx.UninterruptibleSleepFinish(false)
- return n, err
- }
- if dsts.NumBlocks() == 1 && !dsts.Head().NeedSafecopy() {
- n, err := h.file.readAt(ctx, dsts.Head().ToSlice(), offset)
- return uint64(n), err
- }
- // Buffer the read since p9.File.ReadAt() takes []byte.
- buf := make([]byte, dsts.NumBytes())
- n, err := h.file.readAt(ctx, buf, offset)
- if n == 0 {
- return 0, err
- }
- if cp, cperr := safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:n]))); cperr != nil {
- return cp, cperr
- }
- return uint64(n), err
-}
-
-func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) {
- if srcs.IsEmpty() {
- return 0, nil
- }
- if h.fd >= 0 {
- ctx.UninterruptibleSleepStart(false)
- n, err := hostPwritev(h.fd, srcs, int64(offset))
- ctx.UninterruptibleSleepFinish(false)
- return n, err
- }
- if srcs.NumBlocks() == 1 && !srcs.Head().NeedSafecopy() {
- n, err := h.file.writeAt(ctx, srcs.Head().ToSlice(), offset)
- return uint64(n), err
- }
- // Buffer the write since p9.File.WriteAt() takes []byte.
- buf := make([]byte, srcs.NumBytes())
- cp, cperr := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), srcs)
- if cp == 0 {
- return 0, cperr
- }
- n, err := h.file.writeAt(ctx, buf[:cp], offset)
- if err != nil {
- return uint64(n), err
- }
- return cp, cperr
-}
-
-func (h *handle) sync(ctx context.Context) error {
- if h.fd >= 0 {
- ctx.UninterruptibleSleepStart(false)
- err := syscall.Fsync(int(h.fd))
- ctx.UninterruptibleSleepFinish(false)
- return err
- }
- return h.file.fsync(ctx)
-}
diff --git a/pkg/sentry/fsimpl/gofer/handle_unsafe.go b/pkg/sentry/fsimpl/gofer/handle_unsafe.go
deleted file mode 100644
index 19560ab26..000000000
--- a/pkg/sentry/fsimpl/gofer/handle_unsafe.go
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "syscall"
- "unsafe"
-
- "gvisor.dev/gvisor/pkg/safemem"
-)
-
-// Preconditions: !dsts.IsEmpty().
-func hostPreadv(fd int32, dsts safemem.BlockSeq, off int64) (uint64, error) {
- // No buffering is necessary regardless of safecopy; host syscalls will
- // return EFAULT if appropriate, instead of raising SIGBUS.
- if dsts.NumBlocks() == 1 {
- // Use pread() instead of preadv() to avoid iovec allocation and
- // copying.
- dst := dsts.Head()
- n, _, e := syscall.Syscall6(syscall.SYS_PREAD64, uintptr(fd), dst.Addr(), uintptr(dst.Len()), uintptr(off), 0, 0)
- if e != 0 {
- return 0, e
- }
- return uint64(n), nil
- }
- iovs := safemem.IovecsFromBlockSeq(dsts)
- n, _, e := syscall.Syscall6(syscall.SYS_PREADV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(off), 0, 0)
- if e != 0 {
- return 0, e
- }
- return uint64(n), nil
-}
-
-// Preconditions: !srcs.IsEmpty().
-func hostPwritev(fd int32, srcs safemem.BlockSeq, off int64) (uint64, error) {
- // No buffering is necessary regardless of safecopy; host syscalls will
- // return EFAULT if appropriate, instead of raising SIGBUS.
- if srcs.NumBlocks() == 1 {
- // Use pwrite() instead of pwritev() to avoid iovec allocation and
- // copying.
- src := srcs.Head()
- n, _, e := syscall.Syscall6(syscall.SYS_PWRITE64, uintptr(fd), src.Addr(), uintptr(src.Len()), uintptr(off), 0, 0)
- if e != 0 {
- return 0, e
- }
- return uint64(n), nil
- }
- iovs := safemem.IovecsFromBlockSeq(srcs)
- n, _, e := syscall.Syscall6(syscall.SYS_PWRITEV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(off), 0, 0)
- if e != 0 {
- return 0, e
- }
- return uint64(n), nil
-}
diff --git a/pkg/sentry/fsimpl/gofer/p9file.go b/pkg/sentry/fsimpl/gofer/p9file.go
deleted file mode 100644
index 755ac2985..000000000
--- a/pkg/sentry/fsimpl/gofer/p9file.go
+++ /dev/null
@@ -1,219 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fd"
- "gvisor.dev/gvisor/pkg/p9"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// p9file is a wrapper around p9.File that provides methods that are
-// Context-aware.
-type p9file struct {
- file p9.File
-}
-
-func (f p9file) isNil() bool {
- return f.file == nil
-}
-
-func (f p9file) walk(ctx context.Context, names []string) ([]p9.QID, p9file, error) {
- ctx.UninterruptibleSleepStart(false)
- qids, newfile, err := f.file.Walk(names)
- ctx.UninterruptibleSleepFinish(false)
- return qids, p9file{newfile}, err
-}
-
-func (f p9file) walkGetAttr(ctx context.Context, names []string) ([]p9.QID, p9file, p9.AttrMask, p9.Attr, error) {
- ctx.UninterruptibleSleepStart(false)
- qids, newfile, attrMask, attr, err := f.file.WalkGetAttr(names)
- ctx.UninterruptibleSleepFinish(false)
- return qids, p9file{newfile}, attrMask, attr, err
-}
-
-// walkGetAttrOne is a wrapper around p9.File.WalkGetAttr that takes a single
-// path component and returns a single qid.
-func (f p9file) walkGetAttrOne(ctx context.Context, name string) (p9.QID, p9file, p9.AttrMask, p9.Attr, error) {
- ctx.UninterruptibleSleepStart(false)
- qids, newfile, attrMask, attr, err := f.file.WalkGetAttr([]string{name})
- ctx.UninterruptibleSleepFinish(false)
- if err != nil {
- return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, err
- }
- if len(qids) != 1 {
- ctx.Warningf("p9.File.WalkGetAttr returned %d qids (%v), wanted 1", len(qids), qids)
- if newfile != nil {
- p9file{newfile}.close(ctx)
- }
- return p9.QID{}, p9file{}, p9.AttrMask{}, p9.Attr{}, syserror.EIO
- }
- return qids[0], p9file{newfile}, attrMask, attr, nil
-}
-
-func (f p9file) statFS(ctx context.Context) (p9.FSStat, error) {
- ctx.UninterruptibleSleepStart(false)
- fsstat, err := f.file.StatFS()
- ctx.UninterruptibleSleepFinish(false)
- return fsstat, err
-}
-
-func (f p9file) getAttr(ctx context.Context, req p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) {
- ctx.UninterruptibleSleepStart(false)
- qid, attrMask, attr, err := f.file.GetAttr(req)
- ctx.UninterruptibleSleepFinish(false)
- return qid, attrMask, attr, err
-}
-
-func (f p9file) setAttr(ctx context.Context, valid p9.SetAttrMask, attr p9.SetAttr) error {
- ctx.UninterruptibleSleepStart(false)
- err := f.file.SetAttr(valid, attr)
- ctx.UninterruptibleSleepFinish(false)
- return err
-}
-
-func (f p9file) getXattr(ctx context.Context, name string, size uint64) (string, error) {
- ctx.UninterruptibleSleepStart(false)
- val, err := f.file.GetXattr(name, size)
- ctx.UninterruptibleSleepFinish(false)
- return val, err
-}
-
-func (f p9file) setXattr(ctx context.Context, name, value string, flags uint32) error {
- ctx.UninterruptibleSleepStart(false)
- err := f.file.SetXattr(name, value, flags)
- ctx.UninterruptibleSleepFinish(false)
- return err
-}
-
-func (f p9file) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error {
- ctx.UninterruptibleSleepStart(false)
- err := f.file.Allocate(mode, offset, length)
- ctx.UninterruptibleSleepFinish(false)
- return err
-}
-
-func (f p9file) close(ctx context.Context) error {
- ctx.UninterruptibleSleepStart(false)
- err := f.file.Close()
- ctx.UninterruptibleSleepFinish(false)
- return err
-}
-
-func (f p9file) open(ctx context.Context, flags p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) {
- ctx.UninterruptibleSleepStart(false)
- fdobj, qid, iounit, err := f.file.Open(flags)
- ctx.UninterruptibleSleepFinish(false)
- return fdobj, qid, iounit, err
-}
-
-func (f p9file) readAt(ctx context.Context, p []byte, offset uint64) (int, error) {
- ctx.UninterruptibleSleepStart(false)
- n, err := f.file.ReadAt(p, offset)
- ctx.UninterruptibleSleepFinish(false)
- return n, err
-}
-
-func (f p9file) writeAt(ctx context.Context, p []byte, offset uint64) (int, error) {
- ctx.UninterruptibleSleepStart(false)
- n, err := f.file.WriteAt(p, offset)
- ctx.UninterruptibleSleepFinish(false)
- return n, err
-}
-
-func (f p9file) fsync(ctx context.Context) error {
- ctx.UninterruptibleSleepStart(false)
- err := f.file.FSync()
- ctx.UninterruptibleSleepFinish(false)
- return err
-}
-
-func (f p9file) create(ctx context.Context, name string, flags p9.OpenFlags, permissions p9.FileMode, uid p9.UID, gid p9.GID) (*fd.FD, p9file, p9.QID, uint32, error) {
- ctx.UninterruptibleSleepStart(false)
- fdobj, newfile, qid, iounit, err := f.file.Create(name, flags, permissions, uid, gid)
- ctx.UninterruptibleSleepFinish(false)
- return fdobj, p9file{newfile}, qid, iounit, err
-}
-
-func (f p9file) mkdir(ctx context.Context, name string, permissions p9.FileMode, uid p9.UID, gid p9.GID) (p9.QID, error) {
- ctx.UninterruptibleSleepStart(false)
- qid, err := f.file.Mkdir(name, permissions, uid, gid)
- ctx.UninterruptibleSleepFinish(false)
- return qid, err
-}
-
-func (f p9file) symlink(ctx context.Context, oldName string, newName string, uid p9.UID, gid p9.GID) (p9.QID, error) {
- ctx.UninterruptibleSleepStart(false)
- qid, err := f.file.Symlink(oldName, newName, uid, gid)
- ctx.UninterruptibleSleepFinish(false)
- return qid, err
-}
-
-func (f p9file) link(ctx context.Context, target p9file, newName string) error {
- ctx.UninterruptibleSleepStart(false)
- err := f.file.Link(target.file, newName)
- ctx.UninterruptibleSleepFinish(false)
- return err
-}
-
-func (f p9file) mknod(ctx context.Context, name string, mode p9.FileMode, major uint32, minor uint32, uid p9.UID, gid p9.GID) (p9.QID, error) {
- ctx.UninterruptibleSleepStart(false)
- qid, err := f.file.Mknod(name, mode, major, minor, uid, gid)
- ctx.UninterruptibleSleepFinish(false)
- return qid, err
-}
-
-func (f p9file) rename(ctx context.Context, newDir p9file, newName string) error {
- ctx.UninterruptibleSleepStart(false)
- err := f.file.Rename(newDir.file, newName)
- ctx.UninterruptibleSleepFinish(false)
- return err
-}
-
-func (f p9file) unlinkAt(ctx context.Context, name string, flags uint32) error {
- ctx.UninterruptibleSleepStart(false)
- err := f.file.UnlinkAt(name, flags)
- ctx.UninterruptibleSleepFinish(false)
- return err
-}
-
-func (f p9file) readdir(ctx context.Context, offset uint64, count uint32) ([]p9.Dirent, error) {
- ctx.UninterruptibleSleepStart(false)
- dirents, err := f.file.Readdir(offset, count)
- ctx.UninterruptibleSleepFinish(false)
- return dirents, err
-}
-
-func (f p9file) readlink(ctx context.Context) (string, error) {
- ctx.UninterruptibleSleepStart(false)
- target, err := f.file.Readlink()
- ctx.UninterruptibleSleepFinish(false)
- return target, err
-}
-
-func (f p9file) flush(ctx context.Context) error {
- ctx.UninterruptibleSleepStart(false)
- err := f.file.Flush()
- ctx.UninterruptibleSleepFinish(false)
- return err
-}
-
-func (f p9file) connect(ctx context.Context, flags p9.ConnectFlags) (*fd.FD, error) {
- ctx.UninterruptibleSleepStart(false)
- fdobj, err := f.file.Connect(flags)
- ctx.UninterruptibleSleepFinish(false)
- return fdobj, err
-}
diff --git a/pkg/sentry/fsimpl/gofer/pagemath.go b/pkg/sentry/fsimpl/gofer/pagemath.go
deleted file mode 100644
index 847cb0784..000000000
--- a/pkg/sentry/fsimpl/gofer/pagemath.go
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// This are equivalent to usermem.Addr.RoundDown/Up, but without the
-// potentially truncating conversion to usermem.Addr. This is necessary because
-// there is no way to define generic "PageRoundDown/Up" functions in Go.
-
-func pageRoundDown(x uint64) uint64 {
- return x &^ (usermem.PageSize - 1)
-}
-
-func pageRoundUp(x uint64) uint64 {
- return pageRoundDown(x + usermem.PageSize - 1)
-}
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
deleted file mode 100644
index e95209661..000000000
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ /dev/null
@@ -1,872 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "fmt"
- "io"
- "math"
- "sync"
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/safemem"
- "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.dev/gvisor/pkg/sentry/memmap"
- "gvisor.dev/gvisor/pkg/sentry/pgalloc"
- "gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-func (d *dentry) isRegularFile() bool {
- return d.fileType() == linux.S_IFREG
-}
-
-type regularFileFD struct {
- fileDescription
-
- // off is the file offset. off is protected by mu.
- mu sync.Mutex
- off int64
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *regularFileFD) Release() {
-}
-
-// OnClose implements vfs.FileDescriptionImpl.OnClose.
-func (fd *regularFileFD) OnClose(ctx context.Context) error {
- if !fd.vfsfd.IsWritable() {
- return nil
- }
- // Skip flushing if writes may be buffered by the client, since (as with
- // the VFS1 client) we don't flush buffered writes on close anyway.
- d := fd.dentry()
- if d.fs.opts.interop == InteropModeExclusive {
- return nil
- }
- d.handleMu.RLock()
- defer d.handleMu.RUnlock()
- return d.handle.file.flush(ctx)
-}
-
-// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- if opts.Flags != 0 {
- return 0, syserror.EOPNOTSUPP
- }
-
- // Check for reading at EOF before calling into MM (but not under
- // InteropModeShared, which makes d.size unreliable).
- d := fd.dentry()
- if d.fs.opts.interop != InteropModeShared && uint64(offset) >= atomic.LoadUint64(&d.size) {
- return 0, io.EOF
- }
-
- if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
- // Lock d.metadataMu for the rest of the read to prevent d.size from
- // changing.
- d.metadataMu.Lock()
- defer d.metadataMu.Unlock()
- // Write dirty cached pages that will be touched by the read back to
- // the remote file.
- if err := d.writeback(ctx, offset, dst.NumBytes()); err != nil {
- return 0, err
- }
- }
-
- rw := getDentryReadWriter(ctx, d, offset)
- if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
- // Require the read to go to the remote file.
- rw.direct = true
- }
- n, err := dst.CopyOutFrom(ctx, rw)
- putDentryReadWriter(rw)
- if d.fs.opts.interop != InteropModeShared {
- // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed().
- d.touchAtime(ctx, fd.vfsfd.Mount())
- }
- return n, err
-}
-
-// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- fd.mu.Lock()
- n, err := fd.PRead(ctx, dst, fd.off, opts)
- fd.off += n
- fd.mu.Unlock()
- return n, err
-}
-
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- if opts.Flags != 0 {
- return 0, syserror.EOPNOTSUPP
- }
-
- d := fd.dentry()
- d.metadataMu.Lock()
- defer d.metadataMu.Unlock()
- if d.fs.opts.interop != InteropModeShared {
- // Compare Linux's mm/filemap.c:__generic_file_write_iter() =>
- // file_update_time(). This is d.touchCMtime(), but without locking
- // d.metadataMu (recursively).
- if now, ok := nowFromContext(ctx); ok {
- atomic.StoreInt64(&d.mtime, now)
- atomic.StoreInt64(&d.ctime, now)
- }
- }
- if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
- // Write dirty cached pages that will be touched by the write back to
- // the remote file.
- if err := d.writeback(ctx, offset, src.NumBytes()); err != nil {
- return 0, err
- }
- // Remove touched pages from the cache.
- pgstart := pageRoundDown(uint64(offset))
- pgend := pageRoundUp(uint64(offset + src.NumBytes()))
- if pgend < pgstart {
- return 0, syserror.EINVAL
- }
- mr := memmap.MappableRange{pgstart, pgend}
- var freed []platform.FileRange
- d.dataMu.Lock()
- cseg := d.cache.LowerBoundSegment(mr.Start)
- for cseg.Ok() && cseg.Start() < mr.End {
- cseg = d.cache.Isolate(cseg, mr)
- freed = append(freed, platform.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()})
- cseg = d.cache.Remove(cseg).NextSegment()
- }
- d.dataMu.Unlock()
- // Invalidate mappings of removed pages.
- d.mapsMu.Lock()
- d.mappings.Invalidate(mr, memmap.InvalidateOpts{})
- d.mapsMu.Unlock()
- // Finally free pages removed from the cache.
- mf := d.fs.mfp.MemoryFile()
- for _, freedFR := range freed {
- mf.DecRef(freedFR)
- }
- }
- rw := getDentryReadWriter(ctx, d, offset)
- if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
- // Require the write to go to the remote file.
- rw.direct = true
- }
- n, err := src.CopyInTo(ctx, rw)
- putDentryReadWriter(rw)
- if n != 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 {
- // Write dirty cached pages touched by the write back to the remote
- // file.
- if err := d.writeback(ctx, offset, src.NumBytes()); err != nil {
- return 0, err
- }
- // Request the remote filesystem to sync the remote file.
- if err := d.handle.file.fsync(ctx); err != nil {
- return 0, err
- }
- }
- return n, err
-}
-
-// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- fd.mu.Lock()
- n, err := fd.PWrite(ctx, src, fd.off, opts)
- fd.off += n
- fd.mu.Unlock()
- return n, err
-}
-
-type dentryReadWriter struct {
- ctx context.Context
- d *dentry
- off uint64
- direct bool
-}
-
-var dentryReadWriterPool = sync.Pool{
- New: func() interface{} {
- return &dentryReadWriter{}
- },
-}
-
-func getDentryReadWriter(ctx context.Context, d *dentry, offset int64) *dentryReadWriter {
- rw := dentryReadWriterPool.Get().(*dentryReadWriter)
- rw.ctx = ctx
- rw.d = d
- rw.off = uint64(offset)
- rw.direct = false
- return rw
-}
-
-func putDentryReadWriter(rw *dentryReadWriter) {
- rw.ctx = nil
- rw.d = nil
- dentryReadWriterPool.Put(rw)
-}
-
-// ReadToBlocks implements safemem.Reader.ReadToBlocks.
-func (rw *dentryReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
- if dsts.IsEmpty() {
- return 0, nil
- }
-
- // If we have a mmappable host FD (which must be used here to ensure
- // coherence with memory-mapped I/O), or if InteropModeShared is in effect
- // (which prevents us from caching file contents and makes dentry.size
- // unreliable), or if the file was opened O_DIRECT, read directly from
- // dentry.handle without locking dentry.dataMu.
- rw.d.handleMu.RLock()
- if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct {
- n, err := rw.d.handle.readToBlocksAt(rw.ctx, dsts, rw.off)
- rw.d.handleMu.RUnlock()
- rw.off += n
- return n, err
- }
-
- // Otherwise read from/through the cache.
- mf := rw.d.fs.mfp.MemoryFile()
- fillCache := mf.ShouldCacheEvictable()
- var dataMuUnlock func()
- if fillCache {
- rw.d.dataMu.Lock()
- dataMuUnlock = rw.d.dataMu.Unlock
- } else {
- rw.d.dataMu.RLock()
- dataMuUnlock = rw.d.dataMu.RUnlock
- }
-
- // Compute the range to read (limited by file size and overflow-checked).
- if rw.off >= rw.d.size {
- dataMuUnlock()
- rw.d.handleMu.RUnlock()
- return 0, io.EOF
- }
- end := rw.d.size
- if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end {
- end = rend
- }
-
- var done uint64
- seg, gap := rw.d.cache.Find(rw.off)
- for rw.off < end {
- mr := memmap.MappableRange{rw.off, end}
- switch {
- case seg.Ok():
- // Get internal mappings from the cache.
- ims, err := mf.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
- if err != nil {
- dataMuUnlock()
- rw.d.handleMu.RUnlock()
- return done, err
- }
-
- // Copy from internal mappings.
- n, err := safemem.CopySeq(dsts, ims)
- done += n
- rw.off += n
- dsts = dsts.DropFirst64(n)
- if err != nil {
- dataMuUnlock()
- rw.d.handleMu.RUnlock()
- return done, err
- }
-
- // Continue.
- seg, gap = seg.NextNonEmpty()
-
- case gap.Ok():
- gapMR := gap.Range().Intersect(mr)
- if fillCache {
- // Read into the cache, then re-enter the loop to read from the
- // cache.
- reqMR := memmap.MappableRange{
- Start: pageRoundDown(gapMR.Start),
- End: pageRoundUp(gapMR.End),
- }
- optMR := gap.Range()
- err := rw.d.cache.Fill(rw.ctx, reqMR, maxFillRange(reqMR, optMR), mf, usage.PageCache, rw.d.handle.readToBlocksAt)
- mf.MarkEvictable(rw.d, pgalloc.EvictableRange{optMR.Start, optMR.End})
- seg, gap = rw.d.cache.Find(rw.off)
- if !seg.Ok() {
- dataMuUnlock()
- rw.d.handleMu.RUnlock()
- return done, err
- }
- // err might have occurred in part of gap.Range() outside
- // gapMR. Forget about it for now; if the error matters and
- // persists, we'll run into it again in a later iteration of
- // this loop.
- } else {
- // Read directly from the file.
- gapDsts := dsts.TakeFirst64(gapMR.Length())
- n, err := rw.d.handle.readToBlocksAt(rw.ctx, gapDsts, gapMR.Start)
- done += n
- rw.off += n
- dsts = dsts.DropFirst64(n)
- // Partial reads are fine. But we must stop reading.
- if n != gapDsts.NumBytes() || err != nil {
- dataMuUnlock()
- rw.d.handleMu.RUnlock()
- return done, err
- }
-
- // Continue.
- seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{}
- }
- }
- }
- dataMuUnlock()
- rw.d.handleMu.RUnlock()
- return done, nil
-}
-
-// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
-//
-// Preconditions: rw.d.metadataMu must be locked.
-func (rw *dentryReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
- if srcs.IsEmpty() {
- return 0, nil
- }
-
- // If we have a mmappable host FD (which must be used here to ensure
- // coherence with memory-mapped I/O), or if InteropModeShared is in effect
- // (which prevents us from caching file contents), or if the file was
- // opened with O_DIRECT, write directly to dentry.handle without locking
- // dentry.dataMu.
- rw.d.handleMu.RLock()
- if (rw.d.handle.fd >= 0 && !rw.d.fs.opts.forcePageCache) || rw.d.fs.opts.interop == InteropModeShared || rw.direct {
- n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, srcs, rw.off)
- rw.off += n
- rw.d.dataMu.Lock()
- if rw.off > rw.d.size {
- atomic.StoreUint64(&rw.d.size, rw.off)
- // The remote file's size will implicitly be extended to the correct
- // value when we write back to it.
- }
- rw.d.dataMu.Unlock()
- rw.d.handleMu.RUnlock()
- return n, err
- }
-
- // Otherwise write to/through the cache.
- mf := rw.d.fs.mfp.MemoryFile()
- rw.d.dataMu.Lock()
-
- // Compute the range to write (overflow-checked).
- start := rw.off
- end := rw.off + srcs.NumBytes()
- if end <= rw.off {
- end = math.MaxInt64
- }
-
- var (
- done uint64
- retErr error
- )
- seg, gap := rw.d.cache.Find(rw.off)
- for rw.off < end {
- mr := memmap.MappableRange{rw.off, end}
- switch {
- case seg.Ok():
- // Get internal mappings from the cache.
- segMR := seg.Range().Intersect(mr)
- ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write)
- if err != nil {
- retErr = err
- goto exitLoop
- }
-
- // Copy to internal mappings.
- n, err := safemem.CopySeq(ims, srcs)
- done += n
- rw.off += n
- srcs = srcs.DropFirst64(n)
- rw.d.dirty.MarkDirty(segMR)
- if err != nil {
- retErr = err
- goto exitLoop
- }
-
- // Continue.
- seg, gap = seg.NextNonEmpty()
-
- case gap.Ok():
- // Write directly to the file. At present, we never fill the cache
- // when writing, since doing so can convert small writes into
- // inefficient read-modify-write cycles, and we have no mechanism
- // for detecting or avoiding this.
- gapMR := gap.Range().Intersect(mr)
- gapSrcs := srcs.TakeFirst64(gapMR.Length())
- n, err := rw.d.handle.writeFromBlocksAt(rw.ctx, gapSrcs, gapMR.Start)
- done += n
- rw.off += n
- srcs = srcs.DropFirst64(n)
- // Partial writes are fine. But we must stop writing.
- if n != gapSrcs.NumBytes() || err != nil {
- retErr = err
- goto exitLoop
- }
-
- // Continue.
- seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{}
- }
- }
-exitLoop:
- if rw.off > rw.d.size {
- atomic.StoreUint64(&rw.d.size, rw.off)
- // The remote file's size will implicitly be extended to the correct
- // value when we write back to it.
- }
- // If InteropModeWritethrough is in effect, flush written data back to the
- // remote filesystem.
- if rw.d.fs.opts.interop == InteropModeWritethrough && done != 0 {
- if err := fsutil.SyncDirty(rw.ctx, memmap.MappableRange{
- Start: start,
- End: rw.off,
- }, &rw.d.cache, &rw.d.dirty, rw.d.size, mf, rw.d.handle.writeFromBlocksAt); err != nil {
- // We have no idea how many bytes were actually flushed.
- rw.off = start
- done = 0
- retErr = err
- }
- }
- rw.d.dataMu.Unlock()
- rw.d.handleMu.RUnlock()
- return done, retErr
-}
-
-func (d *dentry) writeback(ctx context.Context, offset, size int64) error {
- if size == 0 {
- return nil
- }
- d.handleMu.RLock()
- defer d.handleMu.RUnlock()
- d.dataMu.Lock()
- defer d.dataMu.Unlock()
- // Compute the range of valid bytes (overflow-checked).
- if uint64(offset) >= d.size {
- return nil
- }
- end := int64(d.size)
- if rend := offset + size; rend > offset && rend < end {
- end = rend
- }
- return fsutil.SyncDirty(ctx, memmap.MappableRange{
- Start: uint64(offset),
- End: uint64(end),
- }, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt)
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- fd.mu.Lock()
- defer fd.mu.Unlock()
- switch whence {
- case linux.SEEK_SET:
- // Use offset as specified.
- case linux.SEEK_CUR:
- offset += fd.off
- case linux.SEEK_END, linux.SEEK_DATA, linux.SEEK_HOLE:
- // Ensure file size is up to date.
- d := fd.dentry()
- if fd.filesystem().opts.interop == InteropModeShared {
- if err := d.updateFromGetattr(ctx); err != nil {
- return 0, err
- }
- }
- size := int64(atomic.LoadUint64(&d.size))
- // For SEEK_DATA and SEEK_HOLE, treat the file as a single contiguous
- // block of data.
- switch whence {
- case linux.SEEK_END:
- offset += size
- case linux.SEEK_DATA:
- if offset > size {
- return 0, syserror.ENXIO
- }
- // Use offset as specified.
- case linux.SEEK_HOLE:
- if offset > size {
- return 0, syserror.ENXIO
- }
- offset = size
- }
- default:
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- fd.off = offset
- return offset, nil
-}
-
-// Sync implements vfs.FileDescriptionImpl.Sync.
-func (fd *regularFileFD) Sync(ctx context.Context) error {
- return fd.dentry().syncSharedHandle(ctx)
-}
-
-func (d *dentry) syncSharedHandle(ctx context.Context) error {
- d.handleMu.RLock()
- if !d.handleWritable {
- d.handleMu.RUnlock()
- return nil
- }
- d.dataMu.Lock()
- // Write dirty cached data to the remote file.
- err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt)
- d.dataMu.Unlock()
- if err == nil {
- // Sync the remote file.
- err = d.handle.sync(ctx)
- }
- d.handleMu.RUnlock()
- return err
-}
-
-// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
-func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
- d := fd.dentry()
- switch d.fs.opts.interop {
- case InteropModeExclusive:
- // Any mapping is fine.
- case InteropModeWritethrough:
- // Shared writable mappings require a host FD, since otherwise we can't
- // synchronously flush memory-mapped writes to the remote file.
- if opts.Private || !opts.MaxPerms.Write {
- break
- }
- fallthrough
- case InteropModeShared:
- // All mappings require a host FD to be coherent with other filesystem
- // users.
- if d.fs.opts.forcePageCache {
- // Whether or not we have a host FD, we're not allowed to use it.
- return syserror.ENODEV
- }
- d.handleMu.RLock()
- haveFD := d.handle.fd >= 0
- d.handleMu.RUnlock()
- if !haveFD {
- return syserror.ENODEV
- }
- default:
- panic(fmt.Sprintf("unknown InteropMode %v", d.fs.opts.interop))
- }
- // After this point, d may be used as a memmap.Mappable.
- d.pf.hostFileMapperInitOnce.Do(d.pf.hostFileMapper.Init)
- return vfs.GenericConfigureMMap(&fd.vfsfd, d, opts)
-}
-
-func (d *dentry) mayCachePages() bool {
- if d.fs.opts.interop == InteropModeShared {
- return false
- }
- if d.fs.opts.forcePageCache {
- return true
- }
- d.handleMu.RLock()
- haveFD := d.handle.fd >= 0
- d.handleMu.RUnlock()
- return haveFD
-}
-
-// AddMapping implements memmap.Mappable.AddMapping.
-func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
- d.mapsMu.Lock()
- mapped := d.mappings.AddMapping(ms, ar, offset, writable)
- // Do this unconditionally since whether we have a host FD can change
- // across save/restore.
- for _, r := range mapped {
- d.pf.hostFileMapper.IncRefOn(r)
- }
- if d.mayCachePages() {
- // d.Evict() will refuse to evict memory-mapped pages, so tell the
- // MemoryFile to not bother trying.
- mf := d.fs.mfp.MemoryFile()
- for _, r := range mapped {
- mf.MarkUnevictable(d, pgalloc.EvictableRange{r.Start, r.End})
- }
- }
- d.mapsMu.Unlock()
- return nil
-}
-
-// RemoveMapping implements memmap.Mappable.RemoveMapping.
-func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
- d.mapsMu.Lock()
- unmapped := d.mappings.RemoveMapping(ms, ar, offset, writable)
- for _, r := range unmapped {
- d.pf.hostFileMapper.DecRefOn(r)
- }
- if d.mayCachePages() {
- // Pages that are no longer referenced by any application memory
- // mappings are now considered unused; allow MemoryFile to evict them
- // when necessary.
- mf := d.fs.mfp.MemoryFile()
- d.dataMu.Lock()
- for _, r := range unmapped {
- // Since these pages are no longer mapped, they are no longer
- // concurrently dirtyable by a writable memory mapping.
- d.dirty.AllowClean(r)
- mf.MarkEvictable(d, pgalloc.EvictableRange{r.Start, r.End})
- }
- d.dataMu.Unlock()
- }
- d.mapsMu.Unlock()
-}
-
-// CopyMapping implements memmap.Mappable.CopyMapping.
-func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
- return d.AddMapping(ctx, ms, dstAR, offset, writable)
-}
-
-// Translate implements memmap.Mappable.Translate.
-func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
- d.handleMu.RLock()
- if d.handle.fd >= 0 && !d.fs.opts.forcePageCache {
- d.handleMu.RUnlock()
- mr := optional
- if d.fs.opts.limitHostFDTranslation {
- mr = maxFillRange(required, optional)
- }
- return []memmap.Translation{
- {
- Source: mr,
- File: &d.pf,
- Offset: mr.Start,
- Perms: usermem.AnyAccess,
- },
- }, nil
- }
-
- d.dataMu.Lock()
-
- // Constrain translations to d.size (rounded up) to prevent translation to
- // pages that may be concurrently truncated.
- pgend := pageRoundUp(d.size)
- var beyondEOF bool
- if required.End > pgend {
- if required.Start >= pgend {
- d.dataMu.Unlock()
- d.handleMu.RUnlock()
- return nil, &memmap.BusError{io.EOF}
- }
- beyondEOF = true
- required.End = pgend
- }
- if optional.End > pgend {
- optional.End = pgend
- }
-
- mf := d.fs.mfp.MemoryFile()
- cerr := d.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, d.handle.readToBlocksAt)
-
- var ts []memmap.Translation
- var translatedEnd uint64
- for seg := d.cache.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() {
- segMR := seg.Range().Intersect(optional)
- // TODO(jamieliu): Make Translations writable even if writability is
- // not required if already kept-dirty by another writable translation.
- perms := usermem.AccessType{
- Read: true,
- Execute: true,
- }
- if at.Write {
- // From this point forward, this memory can be dirtied through the
- // mapping at any time.
- d.dirty.KeepDirty(segMR)
- perms.Write = true
- }
- ts = append(ts, memmap.Translation{
- Source: segMR,
- File: mf,
- Offset: seg.FileRangeOf(segMR).Start,
- Perms: perms,
- })
- translatedEnd = segMR.End
- }
-
- d.dataMu.Unlock()
- d.handleMu.RUnlock()
-
- // Don't return the error returned by c.cache.Fill if it occurred outside
- // of required.
- if translatedEnd < required.End && cerr != nil {
- return ts, &memmap.BusError{cerr}
- }
- if beyondEOF {
- return ts, &memmap.BusError{io.EOF}
- }
- return ts, nil
-}
-
-func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange {
- const maxReadahead = 64 << 10 // 64 KB, chosen arbitrarily
- if required.Length() >= maxReadahead {
- return required
- }
- if optional.Length() <= maxReadahead {
- return optional
- }
- optional.Start = required.Start
- if optional.Length() <= maxReadahead {
- return optional
- }
- optional.End = optional.Start + maxReadahead
- return optional
-}
-
-// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
-func (d *dentry) InvalidateUnsavable(ctx context.Context) error {
- // Whether we have a host fd (and consequently what platform.File is
- // mapped) can change across save/restore, so invalidate all translations
- // unconditionally.
- d.mapsMu.Lock()
- defer d.mapsMu.Unlock()
- d.mappings.InvalidateAll(memmap.InvalidateOpts{})
-
- // Write the cache's contents back to the remote file so that if we have a
- // host fd after restore, the remote file's contents are coherent.
- mf := d.fs.mfp.MemoryFile()
- d.dataMu.Lock()
- defer d.dataMu.Unlock()
- if err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil {
- return err
- }
-
- // Discard the cache so that it's not stored in saved state. This is safe
- // because per InvalidateUnsavable invariants, no new translations can have
- // been returned after we invalidated all existing translations above.
- d.cache.DropAll(mf)
- d.dirty.RemoveAll()
-
- return nil
-}
-
-// Evict implements pgalloc.EvictableMemoryUser.Evict.
-func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) {
- d.mapsMu.Lock()
- defer d.mapsMu.Unlock()
- d.dataMu.Lock()
- defer d.dataMu.Unlock()
-
- mr := memmap.MappableRange{er.Start, er.End}
- mf := d.fs.mfp.MemoryFile()
- // Only allow pages that are no longer memory-mapped to be evicted.
- for mgap := d.mappings.LowerBoundGap(mr.Start); mgap.Ok() && mgap.Start() < mr.End; mgap = mgap.NextGap() {
- mgapMR := mgap.Range().Intersect(mr)
- if mgapMR.Length() == 0 {
- continue
- }
- if err := fsutil.SyncDirty(ctx, mgapMR, &d.cache, &d.dirty, d.size, mf, d.handle.writeFromBlocksAt); err != nil {
- log.Warningf("Failed to writeback cached data %v: %v", mgapMR, err)
- }
- d.cache.Drop(mgapMR, mf)
- d.dirty.KeepClean(mgapMR)
- }
-}
-
-// dentryPlatformFile implements platform.File. It exists solely because dentry
-// cannot implement both vfs.DentryImpl.IncRef and platform.File.IncRef.
-//
-// dentryPlatformFile is only used when a host FD representing the remote file
-// is available (i.e. dentry.handle.fd >= 0), and that FD is used for
-// application memory mappings (i.e. !filesystem.opts.forcePageCache).
-type dentryPlatformFile struct {
- *dentry
-
- // fdRefs counts references on platform.File offsets. fdRefs is protected
- // by dentry.dataMu.
- fdRefs fsutil.FrameRefSet
-
- // If this dentry represents a regular file, and handle.fd >= 0,
- // hostFileMapper caches mappings of handle.fd.
- hostFileMapper fsutil.HostFileMapper
-
- // hostFileMapperInitOnce is used to lazily initialize hostFileMapper.
- hostFileMapperInitOnce sync.Once
-}
-
-// IncRef implements platform.File.IncRef.
-func (d *dentryPlatformFile) IncRef(fr platform.FileRange) {
- d.dataMu.Lock()
- seg, gap := d.fdRefs.Find(fr.Start)
- for {
- switch {
- case seg.Ok() && seg.Start() < fr.End:
- seg = d.fdRefs.Isolate(seg, fr)
- seg.SetValue(seg.Value() + 1)
- seg, gap = seg.NextNonEmpty()
- case gap.Ok() && gap.Start() < fr.End:
- newRange := gap.Range().Intersect(fr)
- usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped)
- seg, gap = d.fdRefs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty()
- default:
- d.fdRefs.MergeAdjacent(fr)
- d.dataMu.Unlock()
- return
- }
- }
-}
-
-// DecRef implements platform.File.DecRef.
-func (d *dentryPlatformFile) DecRef(fr platform.FileRange) {
- d.dataMu.Lock()
- seg := d.fdRefs.FindSegment(fr.Start)
-
- for seg.Ok() && seg.Start() < fr.End {
- seg = d.fdRefs.Isolate(seg, fr)
- if old := seg.Value(); old == 1 {
- usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped)
- seg = d.fdRefs.Remove(seg).NextSegment()
- } else {
- seg.SetValue(old - 1)
- seg = seg.NextSegment()
- }
- }
- d.fdRefs.MergeAdjacent(fr)
- d.dataMu.Unlock()
-
-}
-
-// MapInternal implements platform.File.MapInternal.
-func (d *dentryPlatformFile) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
- d.handleMu.RLock()
- bs, err := d.hostFileMapper.MapInternal(fr, int(d.handle.fd), at.Write)
- d.handleMu.RUnlock()
- return bs, err
-}
-
-// FD implements platform.File.FD.
-func (d *dentryPlatformFile) FD() int {
- d.handleMu.RLock()
- fd := d.handle.fd
- d.handleMu.RUnlock()
- return int(fd)
-}
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
deleted file mode 100644
index 08c691c47..000000000
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "sync"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/safemem"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// specialFileFD implements vfs.FileDescriptionImpl for files other than
-// regular files, directories, and symlinks: pipes, sockets, etc. It is also
-// used for regular files when filesystemOptions.specialRegularFiles is in
-// effect. specialFileFD differs from regularFileFD by using per-FD handles
-// instead of shared per-dentry handles, and never buffering I/O.
-type specialFileFD struct {
- fileDescription
-
- // handle is immutable.
- handle handle
-
- // off is the file offset. off is protected by mu. (POSIX 2.9.7 only
- // requires operations using the file offset to be atomic for regular files
- // and symlinks; however, since specialFileFD may be used for regular
- // files, we apply this atomicity unconditionally.)
- mu sync.Mutex
- off int64
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *specialFileFD) Release() {
- fd.handle.close(context.Background())
- fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
- fs.syncMu.Lock()
- delete(fs.specialFileFDs, fd)
- fs.syncMu.Unlock()
-}
-
-// OnClose implements vfs.FileDescriptionImpl.OnClose.
-func (fd *specialFileFD) OnClose(ctx context.Context) error {
- if !fd.vfsfd.IsWritable() {
- return nil
- }
- return fd.handle.file.flush(ctx)
-}
-
-// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- if opts.Flags != 0 {
- return 0, syserror.EOPNOTSUPP
- }
-
- // Going through dst.CopyOutFrom() holds MM locks around file operations of
- // unknown duration. For regularFileFD, doing so is necessary to support
- // mmap due to lock ordering; MM locks precede dentry.dataMu. That doesn't
- // hold here since specialFileFD doesn't client-cache data. Just buffer the
- // read instead.
- if d := fd.dentry(); d.fs.opts.interop != InteropModeShared {
- d.touchAtime(ctx, fd.vfsfd.Mount())
- }
- buf := make([]byte, dst.NumBytes())
- n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
- if n == 0 {
- return 0, err
- }
- if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil {
- return int64(cp), cperr
- }
- return int64(n), err
-}
-
-// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
- fd.mu.Lock()
- n, err := fd.PRead(ctx, dst, fd.off, opts)
- fd.off += n
- fd.mu.Unlock()
- return n, err
-}
-
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- if opts.Flags != 0 {
- return 0, syserror.EOPNOTSUPP
- }
-
- // Do a buffered write. See rationale in PRead.
- if d := fd.dentry(); d.fs.opts.interop != InteropModeShared {
- d.touchCMtime(ctx)
- }
- buf := make([]byte, src.NumBytes())
- // Don't do partial writes if we get a partial read from src.
- if _, err := src.CopyIn(ctx, buf); err != nil {
- return 0, err
- }
- n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
- return int64(n), err
-}
-
-// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
- fd.mu.Lock()
- n, err := fd.PWrite(ctx, src, fd.off, opts)
- fd.off += n
- fd.mu.Unlock()
- return n, err
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
- fd.mu.Lock()
- defer fd.mu.Unlock()
- switch whence {
- case linux.SEEK_SET:
- // Use offset as given.
- case linux.SEEK_CUR:
- offset += fd.off
- default:
- // SEEK_END, SEEK_DATA, and SEEK_HOLE aren't supported since it's not
- // clear that file size is even meaningful for these files.
- return 0, syserror.EINVAL
- }
- if offset < 0 {
- return 0, syserror.EINVAL
- }
- fd.off = offset
- return offset, nil
-}
-
-// Sync implements vfs.FileDescriptionImpl.Sync.
-func (fd *specialFileFD) Sync(ctx context.Context) error {
- if !fd.vfsfd.IsWritable() {
- return nil
- }
- return fd.handle.sync(ctx)
-}
diff --git a/pkg/sentry/fsimpl/gofer/symlink.go b/pkg/sentry/fsimpl/gofer/symlink.go
deleted file mode 100644
index adf43be60..000000000
--- a/pkg/sentry/fsimpl/gofer/symlink.go
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-func (d *dentry) isSymlink() bool {
- return d.fileType() == linux.S_IFLNK
-}
-
-// Precondition: d.isSymlink().
-func (d *dentry) readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
- if d.fs.opts.interop != InteropModeShared {
- d.touchAtime(ctx, mnt)
- d.dataMu.Lock()
- if d.haveTarget {
- target := d.target
- d.dataMu.Unlock()
- return target, nil
- }
- }
- target, err := d.file.readlink(ctx)
- if d.fs.opts.interop != InteropModeShared {
- if err == nil {
- d.haveTarget = true
- d.target = target
- }
- d.dataMu.Unlock()
- }
- return target, err
-}
diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go
deleted file mode 100644
index 7598ec6a8..000000000
--- a/pkg/sentry/fsimpl/gofer/time.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package gofer
-
-import (
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-func dentryTimestampFromP9(s, ns uint64) int64 {
- return int64(s*1e9 + ns)
-}
-
-func dentryTimestampFromStatx(ts linux.StatxTimestamp) int64 {
- return ts.Sec*1e9 + int64(ts.Nsec)
-}
-
-func statxTimestampFromDentry(ns int64) linux.StatxTimestamp {
- return linux.StatxTimestamp{
- Sec: ns / 1e9,
- Nsec: uint32(ns % 1e9),
- }
-}
-
-func nowFromContext(ctx context.Context) (int64, bool) {
- if clock := ktime.RealtimeClockFromContext(ctx); clock != nil {
- return clock.Now().Nanoseconds(), true
- }
- return 0, false
-}
-
-// Preconditions: fs.interop != InteropModeShared.
-func (d *dentry) touchAtime(ctx context.Context, mnt *vfs.Mount) {
- if err := mnt.CheckBeginWrite(); err != nil {
- return
- }
- now, ok := nowFromContext(ctx)
- if !ok {
- mnt.EndWrite()
- return
- }
- d.metadataMu.Lock()
- atomic.StoreInt64(&d.atime, now)
- d.metadataMu.Unlock()
- mnt.EndWrite()
-}
-
-// Preconditions: fs.interop != InteropModeShared. The caller has successfully
-// called vfs.Mount.CheckBeginWrite().
-func (d *dentry) touchCMtime(ctx context.Context) {
- now, ok := nowFromContext(ctx)
- if !ok {
- return
- }
- d.metadataMu.Lock()
- atomic.StoreInt64(&d.mtime, now)
- atomic.StoreInt64(&d.ctime, now)
- d.metadataMu.Unlock()
-}