summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs/gofer/inode.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fs/gofer/inode.go')
-rw-r--r--pkg/sentry/fs/gofer/inode.go554
1 files changed, 554 insertions, 0 deletions
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
new file mode 100644
index 000000000..454242923
--- /dev/null
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -0,0 +1,554 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gofer
+
+import (
+ "errors"
+ "sync"
+ "syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
+ "gvisor.googlesource.com/gvisor/pkg/fd"
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/p9"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/device"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fdpipe"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// inodeOperations implements fs.InodeOperations.
+type inodeOperations struct {
+ fsutil.InodeNotVirtual `state:"nosave"`
+ fsutil.InodeNoExtendedAttributes `state:"nosave"`
+ fsutil.DeprecatedFileOperations `state:"nosave"`
+
+ // fileState implements fs.CachedFileObject. It exists
+ // to break a circular load dependency between inodeOperations
+ // and cachingInodeOps (below).
+ fileState *inodeFileState `state:"wait"`
+
+ // cachingInodeOps implement memmap.Mappable for inodeOperations.
+ cachingInodeOps *fsutil.CachingInodeOperations
+
+ // readdirMu protects readdirCache and concurrent Readdirs.
+ readdirMu sync.Mutex `state:"nosave"`
+
+ // readdirCache is a cache of readdir results in the form of
+ // a fs.SortedDentryMap.
+ //
+ // Starts out as nil, and is initialized under readdirMu lazily;
+ // invalidating the cache means setting it to nil.
+ readdirCache *fs.SortedDentryMap `state:"nosave"`
+}
+
+// inodeFileState implements fs.CachedFileObject and otherwise fully
+// encapsulates state that needs to be manually loaded on restore for
+// this file object.
+//
+// This unfortunate structure exists because fs.CachingInodeOperations
+// defines afterLoad and therefore cannot be lazily loaded (to break a
+// circular load dependency between it and inodeOperations). Even with
+// lazy loading, this approach defines the dependencies between objects
+// and the expected load behavior more concretely.
+type inodeFileState struct {
+ // s is common file system state for Gofers.
+ s *session `state:"wait"`
+
+ // MultiDeviceKey consists of:
+ //
+ // * Device: file system device from a specific gofer.
+ // * SecondaryDevice: unique identifier of the attach point.
+ // * Inode: the inode of this resource, unique per Device.=
+ //
+ // These fields combined enable consistent hashing of virtual inodes
+ // on goferDevice.
+ key device.MultiDeviceKey `state:"nosave"`
+
+ // file is the p9 file that contains a single unopened fid.
+ file contextFile `state:"nosave"`
+
+ // sattr caches the stable attributes.
+ sattr fs.StableAttr `state:"wait"`
+
+ // handlesMu protects the below fields.
+ handlesMu sync.RWMutex `state:"nosave"`
+
+ // Do minimal open handle caching: only for read only filesystems.
+ readonly *handles `state:"nosave"`
+
+ // Maintain readthrough handles for populating page caches.
+ readthrough *handles `state:"nosave"`
+
+ // Maintain writeback handles for syncing from page caches.
+ writeback *handles `state:"nosave"`
+
+ // writebackRW indicates whether writeback is opened read-write. If
+ // it is not and a read-write handle could replace writeback (above),
+ // then writeback is replaced with the read-write handle. This
+ // ensures that files that were first opened write-only and then
+ // later are opened read-write to be mapped can in fact be mapped.
+ writebackRW bool
+
+ // loading is acquired when the inodeFileState begins an asynchronous
+ // load. It releases when the load is complete. Callers that require all
+ // state to be available should call waitForLoad() to ensure that.
+ loading sync.Mutex `state:".(struct{})"`
+
+ // savedUAttr is only allocated during S/R. It points to the save-time
+ // unstable attributes and is used to validate restore-time ones.
+ //
+ // Note that these unstable attributes are only used to detect cross-S/R
+ // external file system metadata changes. They may differ from the
+ // cached unstable attributes in cachingInodeOps, as that might differ
+ // from the external file system attributes if there had been WriteOut
+ // failures. S/R is transparent to Sentry and the latter will continue
+ // using its cached values after restore.
+ savedUAttr *fs.UnstableAttr
+}
+
+// Release releases file handles.
+func (i *inodeFileState) Release(ctx context.Context) {
+ i.file.close(ctx)
+ if i.readonly != nil {
+ i.readonly.DecRef()
+ }
+ if i.readthrough != nil {
+ i.readthrough.DecRef()
+ }
+ if i.writeback != nil {
+ i.writeback.DecRef()
+ }
+}
+
+// setHandlesForCachedIO installs file handles for reading and writing
+// through fs.CachingInodeOperations.
+func (i *inodeFileState) setHandlesForCachedIO(flags fs.FileFlags, h *handles) {
+ i.handlesMu.Lock()
+ defer i.handlesMu.Unlock()
+
+ if flags.Read {
+ if i.readthrough == nil {
+ h.IncRef()
+ i.readthrough = h
+ }
+ }
+ if flags.Write {
+ if i.writeback == nil {
+ h.IncRef()
+ i.writeback = h
+ } else if !i.writebackRW && flags.Read {
+ i.writeback.DecRef()
+ h.IncRef()
+ i.writeback = h
+ }
+ if flags.Read {
+ i.writebackRW = true
+ }
+ }
+}
+
+// getCachedHandles returns any cached handles which would accelerate
+// performance generally. These handles should only be used if the mount
+// supports caching. This is distinct from fs.CachingInodeOperations
+// which is used for a limited set of file types (those that can be mapped).
+func (i *inodeFileState) getCachedHandles(ctx context.Context, flags fs.FileFlags, msrc *fs.MountSource) (*handles, bool) {
+ i.handlesMu.Lock()
+ defer i.handlesMu.Unlock()
+
+ if flags.Read && !flags.Write && msrc.Flags.ReadOnly {
+ if i.readonly != nil {
+ i.readonly.IncRef()
+ return i.readonly, true
+ }
+ h, err := newHandles(ctx, i.file, flags)
+ if err != nil {
+ return nil, false
+ }
+ i.readonly = h
+ i.readonly.IncRef()
+ return i.readonly, true
+ }
+
+ return nil, false
+}
+
+// ReadToBlocksAt implements fsutil.CachedFileObject.ReadToBlocksAt.
+func (i *inodeFileState) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) {
+ i.handlesMu.RLock()
+ defer i.handlesMu.RUnlock()
+ return i.readthrough.readWriterAt(ctx, int64(offset)).ReadToBlocks(dsts)
+}
+
+// WriteFromBlocksAt implements fsutil.CachedFileObject.WriteFromBlocksAt.
+func (i *inodeFileState) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) {
+ i.handlesMu.RLock()
+ defer i.handlesMu.RUnlock()
+ return i.writeback.readWriterAt(ctx, int64(offset)).WriteFromBlocks(srcs)
+}
+
+// SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes.
+func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error {
+ if mask.Empty() {
+ return nil
+ }
+ as, ans := attr.AccessTime.Unix()
+ ms, mns := attr.ModificationTime.Unix()
+ // An update of status change time is implied by mask.AccessTime
+ // or mask.ModificationTime. Updating status change time to a
+ // time earlier than the system time is not possible.
+ return i.file.setAttr(
+ ctx,
+ p9.SetAttrMask{
+ Permissions: mask.Perms,
+ Size: mask.Size,
+ UID: mask.UID,
+ GID: mask.GID,
+ ATime: mask.AccessTime,
+ ATimeNotSystemTime: true,
+ MTime: mask.ModificationTime,
+ MTimeNotSystemTime: true,
+ }, p9.SetAttr{
+ Permissions: p9.FileMode(attr.Perms.LinuxMode()),
+ UID: p9.UID(attr.Owner.UID),
+ GID: p9.GID(attr.Owner.GID),
+ Size: uint64(attr.Size),
+ ATimeSeconds: uint64(as),
+ ATimeNanoSeconds: uint64(ans),
+ MTimeSeconds: uint64(ms),
+ MTimeNanoSeconds: uint64(mns),
+ })
+}
+
+// Sync implements fsutil.CachedFileObject.Sync.
+func (i *inodeFileState) Sync(ctx context.Context) error {
+ i.handlesMu.RLock()
+ defer i.handlesMu.RUnlock()
+ if i.writeback == nil {
+ return nil
+ }
+ return i.writeback.File.fsync(ctx)
+}
+
+// FD implements fsutil.CachedFileObject.FD.
+//
+// FD meets the requirements of fsutil.CachedFileObject.FD because p9.File.Open
+// returns a host file descriptor to back _both_ readthrough and writeback or
+// not at all (e.g. both are nil).
+func (i *inodeFileState) FD() int {
+ i.handlesMu.RLock()
+ defer i.handlesMu.RUnlock()
+
+ // Assert that the file was actually opened.
+ if i.writeback == nil && i.readthrough == nil {
+ panic("cannot get host FD for a file that was never opened")
+ }
+ // If this file is mapped, then it must have been opened
+ // read-write and i.writeback was upgraded to a read-write
+ // handle. Prefer that to map.
+ if i.writeback != nil {
+ if i.writeback.Host == nil {
+ return -1
+ }
+ return int(i.writeback.Host.FD())
+ }
+ // Otherwise the file may only have been opened readable
+ // so far. That's the only way it can be accessed.
+ if i.readthrough.Host == nil {
+ return -1
+ }
+ return int(i.readthrough.Host.FD())
+}
+
+// waitForLoad makes sure any restore-issued loading is done.
+func (i *inodeFileState) waitForLoad() {
+ // This is not a no-op. The loading mutex is hold upon restore until
+ // all loading actions are done.
+ i.loading.Lock()
+ i.loading.Unlock()
+}
+
+func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, error) {
+ _, valid, pattr, err := getattr(ctx, i.file)
+ if err != nil {
+ return fs.UnstableAttr{}, err
+ }
+ return unstable(ctx, valid, pattr, i.s.mounter, i.s.client), nil
+}
+
+// session extracts the gofer's session from the MountSource.
+func (i *inodeOperations) session() *session {
+ return i.fileState.s
+}
+
+// Release implements fs.InodeOperations.Release.
+func (i *inodeOperations) Release(ctx context.Context) {
+ i.fileState.Release(ctx)
+ i.cachingInodeOps.Release()
+}
+
+// Mappable implements fs.InodeOperations.Mappable.
+func (i *inodeOperations) Mappable(inode *fs.Inode) memmap.Mappable {
+ if i.session().cachePolicy == cacheNone || !fs.IsFile(inode.StableAttr) {
+ return nil
+ }
+ return i.cachingInodeOps
+}
+
+func isCachable(session *session, inode *fs.Inode) bool {
+ return session.cachePolicy != cacheNone && (fs.IsFile(inode.StableAttr) || fs.IsDir(inode.StableAttr))
+}
+
+func isFileCachable(session *session, inode *fs.Inode) bool {
+ return session.cachePolicy != cacheNone && fs.IsFile(inode.StableAttr)
+}
+
+// UnstableAttr implements fs.InodeOperations.UnstableAttr.
+func (i *inodeOperations) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
+ if isCachable(i.session(), inode) {
+ return i.cachingInodeOps.UnstableAttr(ctx, inode)
+ }
+ return i.fileState.unstableAttr(ctx)
+}
+
+// Check implements fs.InodeOperations.Check.
+func (i *inodeOperations) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
+ return fs.ContextCanAccessFile(ctx, inode, p)
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ switch d.Inode.StableAttr.Type {
+ case fs.Socket:
+ return i.getFileSocket(ctx, d, flags)
+ case fs.Pipe:
+ return i.getFilePipe(ctx, d, flags)
+ default:
+ return i.getFileDefault(ctx, d, flags)
+ }
+}
+
+func (i *inodeOperations) getFileSocket(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ f, err := i.fileState.file.connect(ctx, p9.AnonymousSocket)
+ if err != nil {
+ return nil, syscall.EIO
+ }
+ fsf, err := host.NewSocketWithDirent(ctx, d, f, flags)
+ if err != nil {
+ f.Close()
+ return nil, err
+ }
+ return fsf, nil
+}
+
+func (i *inodeOperations) getFilePipe(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ // Try to open as a host pipe.
+ if pipeOps, err := fdpipe.Open(ctx, i, flags); err != errNotHostFile {
+ return fs.NewFile(ctx, d, flags, pipeOps), err
+ }
+
+ // If the error is due to the fact that this was never a host pipe, then back
+ // this file with its dirent.
+ h, err := newHandles(ctx, i.fileState.file, flags)
+ if err != nil {
+ return nil, err
+ }
+ return NewFile(ctx, d, flags, i, h), nil
+}
+
+// errNotHostFile indicates that the file is not a host file.
+var errNotHostFile = errors.New("not a host file")
+
+// NonBlockingOpen implements fdpipe.NonBlockingOpener for opening host named pipes.
+func (i *inodeOperations) NonBlockingOpen(ctx context.Context, p fs.PermMask) (*fd.FD, error) {
+ i.fileState.waitForLoad()
+
+ // Get a cloned fid which we will open.
+ _, newFile, err := i.fileState.file.walk(ctx, nil)
+ if err != nil {
+ log.Warningf("Open Walk failed: %v", err)
+ return nil, err
+ }
+ defer newFile.close(ctx)
+
+ flags, err := openFlagsFromPerms(p)
+ if err != nil {
+ log.Warningf("Open flags %s parsing failed: %v", p, err)
+ return nil, err
+ }
+ hostFile, _, _, err := newFile.open(ctx, flags)
+ // If the host file returned is nil and the error is nil,
+ // then this was never a host file to begin with, and should
+ // be treated like a remote file.
+ if hostFile == nil && err == nil {
+ return nil, errNotHostFile
+ }
+ return hostFile, err
+}
+
+func (i *inodeOperations) getFileDefault(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ if !isFileCachable(i.session(), d.Inode) {
+ h, err := newHandles(ctx, i.fileState.file, flags)
+ if err != nil {
+ return nil, err
+ }
+ return NewFile(ctx, d, flags, i, h), nil
+ }
+
+ h, ok := i.fileState.getCachedHandles(ctx, flags, d.Inode.MountSource)
+ if !ok {
+ var err error
+ h, err = newHandles(ctx, i.fileState.file, flags)
+ if err != nil {
+ return nil, err
+ }
+ }
+ i.fileState.setHandlesForCachedIO(flags, h)
+
+ return NewFile(ctx, d, flags, i, h), nil
+}
+
+// SetPermissions implements fs.InodeOperations.SetPermissions.
+func (i *inodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, p fs.FilePermissions) bool {
+ if isCachable(i.session(), inode) {
+ return i.cachingInodeOps.SetPermissions(ctx, inode, p)
+ }
+
+ mask := p9.SetAttrMask{Permissions: true}
+ pattr := p9.SetAttr{Permissions: p9.FileMode(p.LinuxMode())}
+ // Execute the chmod.
+ return i.fileState.file.setAttr(ctx, mask, pattr) == nil
+}
+
+// SetOwner implements fs.InodeOperations.SetOwner.
+func (i *inodeOperations) SetOwner(ctx context.Context, inode *fs.Inode, owner fs.FileOwner) error {
+ // Save the roundtrip.
+ if !owner.UID.Ok() && !owner.GID.Ok() {
+ return nil
+ }
+
+ if isCachable(i.session(), inode) {
+ return i.cachingInodeOps.SetOwner(ctx, inode, owner)
+ }
+
+ var mask p9.SetAttrMask
+ var attr p9.SetAttr
+ if owner.UID.Ok() {
+ mask.UID = true
+ attr.UID = p9.UID(owner.UID)
+ }
+ if owner.GID.Ok() {
+ mask.GID = true
+ attr.GID = p9.GID(owner.GID)
+ }
+ return i.fileState.file.setAttr(ctx, mask, attr)
+}
+
+// SetTimestamps implements fs.InodeOperations.SetTimestamps.
+func (i *inodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts fs.TimeSpec) error {
+ if isCachable(i.session(), inode) {
+ return i.cachingInodeOps.SetTimestamps(ctx, inode, ts)
+ }
+
+ return utimes(ctx, i.fileState.file, ts)
+}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length int64) error {
+ // This can only be called for files anyway.
+ if isFileCachable(i.session(), inode) {
+ return i.cachingInodeOps.Truncate(ctx, inode, length)
+ }
+
+ return i.fileState.file.setAttr(ctx, p9.SetAttrMask{Size: true}, p9.SetAttr{Size: uint64(length)})
+}
+
+// WriteOut implements fs.InodeOperations.WriteOut.
+func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
+ if !isCachable(i.session(), inode) {
+ return nil
+ }
+
+ return i.cachingInodeOps.WriteOut(ctx, inode)
+}
+
+// Readlink implements fs.InodeOperations.Readlink.
+func (i *inodeOperations) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
+ if !fs.IsSymlink(inode.StableAttr) {
+ return "", syscall.ENOLINK
+ }
+ return i.fileState.file.readlink(ctx)
+}
+
+// Getlink implementfs fs.InodeOperations.Getlink.
+func (i *inodeOperations) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) {
+ if !fs.IsSymlink(i.fileState.sattr) {
+ return nil, syserror.ENOLINK
+ }
+ return nil, fs.ErrResolveViaReadlink
+}
+
+// StatFS makes a StatFS request.
+func (i *inodeOperations) StatFS(ctx context.Context) (fs.Info, error) {
+ fsstat, err := i.fileState.file.statFS(ctx)
+ if err != nil {
+ return fs.Info{}, err
+ }
+
+ info := fs.Info{
+ // This is primarily for distinguishing a gofer file system in
+ // tests. Testing is important, so instead of defining
+ // something completely random, use a standard value.
+ Type: linux.V9FS_MAGIC,
+ TotalBlocks: fsstat.Blocks,
+ FreeBlocks: fsstat.BlocksFree,
+ TotalFiles: fsstat.Files,
+ FreeFiles: fsstat.FilesFree,
+ }
+
+ // If blocks available is non-zero, prefer that.
+ if fsstat.BlocksAvailable != 0 {
+ info.FreeBlocks = fsstat.BlocksAvailable
+ }
+
+ return info, nil
+}
+
+func init() {
+ syserror.AddErrorUnwrapper(func(err error) (syscall.Errno, bool) {
+ if _, ok := err.(p9.ErrSocket); ok {
+ // Treat as an I/O error.
+ return syscall.EIO, true
+ }
+ return 0, false
+ })
+}
+
+// AddLink implements InodeOperations.AddLink, but is currently a noop.
+// FIXME: Remove this from InodeOperations altogether.
+func (*inodeOperations) AddLink() {}
+
+// DropLink implements InodeOperations.DropLink, but is currently a noop.
+// FIXME: Remove this from InodeOperations altogether.
+func (*inodeOperations) DropLink() {}
+
+// NotifyStatusChange implements fs.InodeOperations.NotifyStatusChange.
+// FIXME: Remove this from InodeOperations altogether.
+func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {}