diff options
Diffstat (limited to 'pkg/sentry/vfs/dentry.go')
-rw-r--r-- | pkg/sentry/vfs/dentry.go | 324 |
1 files changed, 324 insertions, 0 deletions
diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go new file mode 100644 index 000000000..cea3e6955 --- /dev/null +++ b/pkg/sentry/vfs/dentry.go @@ -0,0 +1,324 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs + +import ( + "sync/atomic" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Dentry represents a node in a Filesystem tree at which a file exists. +// +// Dentries are reference-counted. Unless otherwise specified, all Dentry +// methods require that a reference is held. +// +// Dentry is loosely analogous to Linux's struct dentry, but: +// +// - VFS does not associate Dentries with inodes. gVisor interacts primarily +// with filesystems that are accessed through filesystem APIs (as opposed to +// raw block devices); many such APIs support only paths and file descriptors, +// and not inodes. Furthermore, when parties outside the scope of VFS can +// rename inodes on such filesystems, VFS generally cannot "follow" the rename, +// both due to synchronization issues and because it may not even be able to +// name the destination path; this implies that it would in fact be incorrect +// for Dentries to be associated with inodes on such filesystems. Consequently, +// operations that are inode operations in Linux are FilesystemImpl methods +// and/or FileDescriptionImpl methods in gVisor's VFS. Filesystems that do +// support inodes may store appropriate state in implementations of DentryImpl. +// +// - VFS does not require that Dentries are instantiated for all paths accessed +// through VFS, only those that are tracked beyond the scope of a single +// Filesystem operation. This includes file descriptions, mount points, mount +// roots, process working directories, and chroots. This avoids instantiation +// of Dentries for operations on mutable remote filesystems that can't actually +// cache any state in the Dentry. +// +// - VFS does not track filesystem structure (i.e. relationships between +// Dentries), since both the relevant state and synchronization are +// filesystem-specific. +// +// - For the reasons above, VFS is not directly responsible for managing Dentry +// lifetime. Dentry reference counts only indicate the extent to which VFS +// requires Dentries to exist; Filesystems may elect to cache or discard +// Dentries with zero references. +// +// +stateify savable +type Dentry struct { + // mu synchronizes deletion/invalidation and mounting over this Dentry. + mu sync.Mutex `state:"nosave"` + + // dead is true if the file represented by this Dentry has been deleted (by + // CommitDeleteDentry or CommitRenameReplaceDentry) or invalidated (by + // InvalidateDentry). dead is protected by mu. + dead bool + + // mounts is the number of Mounts for which this Dentry is Mount.point. + // mounts is accessed using atomic memory operations. + mounts uint32 + + // impl is the DentryImpl associated with this Dentry. impl is immutable. + // This should be the last field in Dentry. + impl DentryImpl +} + +// Init must be called before first use of d. +func (d *Dentry) Init(impl DentryImpl) { + d.impl = impl +} + +// Impl returns the DentryImpl associated with d. +func (d *Dentry) Impl() DentryImpl { + return d.impl +} + +// DentryImpl contains implementation details for a Dentry. Implementations of +// DentryImpl should contain their associated Dentry by value as their first +// field. +type DentryImpl interface { + // IncRef increments the Dentry's reference count. A Dentry with a non-zero + // reference count must remain coherent with the state of the filesystem. + IncRef() + + // TryIncRef increments the Dentry's reference count and returns true. If + // the Dentry's reference count is zero, TryIncRef may do nothing and + // return false. (It is also permitted to succeed if it can restore the + // guarantee that the Dentry is coherent with the state of the filesystem.) + // + // TryIncRef does not require that a reference is held on the Dentry. + TryIncRef() bool + + // DecRef decrements the Dentry's reference count. + DecRef() + + // InotifyWithParent notifies all watches on the targets represented by this + // dentry and its parent. The parent's watches are notified first, followed + // by this dentry's. + // + // InotifyWithParent automatically adds the IN_ISDIR flag for dentries + // representing directories. + // + // Note that the events may not actually propagate up to the user, depending + // on the event masks. + InotifyWithParent(events, cookie uint32, et EventType) + + // Watches returns the set of inotify watches for the file corresponding to + // the Dentry. Dentries that are hard links to the same underlying file + // share the same watches. + // + // Watches may return nil if the dentry belongs to a FilesystemImpl that + // does not support inotify. If an implementation returns a non-nil watch + // set, it must always return a non-nil watch set. Likewise, if an + // implementation returns a nil watch set, it must always return a nil watch + // set. + // + // The caller does not need to hold a reference on the dentry. + Watches() *Watches + + // OnZeroWatches is called whenever the number of watches on a dentry drops + // to zero. This is needed by some FilesystemImpls (e.g. gofer) to manage + // dentry lifetime. + // + // The caller does not need to hold a reference on the dentry. OnZeroWatches + // may acquire inotify locks, so to prevent deadlock, no inotify locks should + // be held by the caller. + OnZeroWatches() +} + +// IncRef increments d's reference count. +func (d *Dentry) IncRef() { + d.impl.IncRef() +} + +// TryIncRef increments d's reference count and returns true. If d's reference +// count is zero, TryIncRef may instead do nothing and return false. +func (d *Dentry) TryIncRef() bool { + return d.impl.TryIncRef() +} + +// DecRef decrements d's reference count. +func (d *Dentry) DecRef() { + d.impl.DecRef() +} + +// IsDead returns true if d has been deleted or invalidated by its owning +// filesystem. +func (d *Dentry) IsDead() bool { + d.mu.Lock() + defer d.mu.Unlock() + return d.dead +} + +func (d *Dentry) isMounted() bool { + return atomic.LoadUint32(&d.mounts) != 0 +} + +// InotifyWithParent notifies all watches on the targets represented by d and +// its parent of events. +func (d *Dentry) InotifyWithParent(events, cookie uint32, et EventType) { + d.impl.InotifyWithParent(events, cookie, et) +} + +// Watches returns the set of inotify watches associated with d. +// +// Watches will return nil if d belongs to a FilesystemImpl that does not +// support inotify. +func (d *Dentry) Watches() *Watches { + return d.impl.Watches() +} + +// OnZeroWatches performs cleanup tasks whenever the number of watches on a +// dentry drops to zero. +func (d *Dentry) OnZeroWatches() { + d.impl.OnZeroWatches() +} + +// The following functions are exported so that filesystem implementations can +// use them. The vfs package, and users of VFS, should not call these +// functions. + +// PrepareDeleteDentry must be called before attempting to delete the file +// represented by d. If PrepareDeleteDentry succeeds, the caller must call +// AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome. +func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error { + vfs.mountMu.Lock() + if mntns.mountpoints[d] != 0 { + vfs.mountMu.Unlock() + return syserror.EBUSY + } + d.mu.Lock() + vfs.mountMu.Unlock() + // Return with d.mu locked to block attempts to mount over it; it will be + // unlocked by AbortDeleteDentry or CommitDeleteDentry. + return nil +} + +// AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion +// fails. +func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) { + d.mu.Unlock() +} + +// CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion +// succeeds. +func (vfs *VirtualFilesystem) CommitDeleteDentry(d *Dentry) { + d.dead = true + d.mu.Unlock() + if d.isMounted() { + vfs.forgetDeadMountpoint(d) + } +} + +// InvalidateDentry is called when d ceases to represent the file it formerly +// did for reasons outside of VFS' control (e.g. d represents the local state +// of a file on a remote filesystem on which the file has already been +// deleted). +func (vfs *VirtualFilesystem) InvalidateDentry(d *Dentry) { + d.mu.Lock() + d.dead = true + d.mu.Unlock() + if d.isMounted() { + vfs.forgetDeadMountpoint(d) + } +} + +// PrepareRenameDentry must be called before attempting to rename the file +// represented by from. If to is not nil, it represents the file that will be +// replaced or exchanged by the rename. If PrepareRenameDentry succeeds, the +// caller must call AbortRenameDentry, CommitRenameReplaceDentry, or +// CommitRenameExchangeDentry depending on the rename's outcome. +// +// Preconditions: If to is not nil, it must be a child Dentry from the same +// Filesystem. from != to. +func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error { + vfs.mountMu.Lock() + if mntns.mountpoints[from] != 0 { + vfs.mountMu.Unlock() + return syserror.EBUSY + } + if to != nil { + if mntns.mountpoints[to] != 0 { + vfs.mountMu.Unlock() + return syserror.EBUSY + } + to.mu.Lock() + } + from.mu.Lock() + vfs.mountMu.Unlock() + // Return with from.mu and to.mu locked, which will be unlocked by + // AbortRenameDentry, CommitRenameReplaceDentry, or + // CommitRenameExchangeDentry. + return nil +} + +// AbortRenameDentry must be called after PrepareRenameDentry if the rename +// fails. +func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) { + from.mu.Unlock() + if to != nil { + to.mu.Unlock() + } +} + +// CommitRenameReplaceDentry must be called after the file represented by from +// is renamed without RENAME_EXCHANGE. If to is not nil, it represents the file +// that was replaced by from. +// +// Preconditions: PrepareRenameDentry was previously called on from and to. +func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(from, to *Dentry) { + from.mu.Unlock() + if to != nil { + to.dead = true + to.mu.Unlock() + if to.isMounted() { + vfs.forgetDeadMountpoint(to) + } + } +} + +// CommitRenameExchangeDentry must be called after the files represented by +// from and to are exchanged by rename(RENAME_EXCHANGE). +// +// Preconditions: PrepareRenameDentry was previously called on from and to. +func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) { + from.mu.Unlock() + to.mu.Unlock() +} + +// forgetDeadMountpoint is called when a mount point is deleted or invalidated +// to umount all mounts using it in all other mount namespaces. +// +// forgetDeadMountpoint is analogous to Linux's +// fs/namespace.c:__detach_mounts(). +func (vfs *VirtualFilesystem) forgetDeadMountpoint(d *Dentry) { + var ( + vdsToDecRef []VirtualDentry + mountsToDecRef []*Mount + ) + vfs.mountMu.Lock() + vfs.mounts.seq.BeginWrite() + for mnt := range vfs.mountpoints[d] { + vdsToDecRef, mountsToDecRef = vfs.umountRecursiveLocked(mnt, &umountRecursiveOptions{}, vdsToDecRef, mountsToDecRef) + } + vfs.mounts.seq.EndWrite() + vfs.mountMu.Unlock() + for _, vd := range vdsToDecRef { + vd.DecRef() + } + for _, mnt := range mountsToDecRef { + mnt.DecRef() + } +} |