summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/vfs/dentry.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/vfs/dentry.go')
-rw-r--r--pkg/sentry/vfs/dentry.go347
1 files changed, 347 insertions, 0 deletions
diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go
new file mode 100644
index 000000000..45912fc58
--- /dev/null
+++ b/pkg/sentry/vfs/dentry.go
@@ -0,0 +1,347 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Dentry represents a node in a Filesystem tree which may represent a file.
+//
+// Dentries are reference-counted. Unless otherwise specified, all Dentry
+// methods require that a reference is held.
+//
+// A Dentry transitions through up to 3 different states through its lifetime:
+//
+// - Dentries are initially "independent". Independent Dentries have no parent,
+// and consequently no name.
+//
+// - Dentry.InsertChild() causes an independent Dentry to become a "child" of
+// another Dentry. A child node has a parent node, and a name in that parent,
+// both of which are mutable by DentryMoveChild(). Each child Dentry's name is
+// unique within its parent.
+//
+// - Dentry.RemoveChild() causes a child Dentry to become "disowned". A
+// disowned Dentry can still refer to its former parent and its former name in
+// said parent, but the disowned Dentry is no longer reachable from its parent,
+// and a new Dentry with the same name may become a child of the parent. (This
+// is analogous to a struct dentry being "unhashed" in Linux.)
+//
+// Dentry is loosely analogous to Linux's struct dentry, but:
+//
+// - VFS does not associate Dentries with inodes. gVisor interacts primarily
+// with filesystems that are accessed through filesystem APIs (as opposed to
+// raw block devices); many such APIs support only paths and file descriptors,
+// and not inodes. Furthermore, when parties outside the scope of VFS can
+// rename inodes on such filesystems, VFS generally cannot "follow" the rename,
+// both due to synchronization issues and because it may not even be able to
+// name the destination path; this implies that it would in fact be *incorrect*
+// for Dentries to be associated with inodes on such filesystems. Consequently,
+// operations that are inode operations in Linux are FilesystemImpl methods
+// and/or FileDescriptionImpl methods in gVisor's VFS. Filesystems that do
+// support inodes may store appropriate state in implementations of DentryImpl.
+//
+// - VFS does not provide synchronization for mutable Dentry fields, other than
+// mount-related ones.
+//
+// - VFS does not require that Dentries are instantiated for all paths accessed
+// through VFS, only those that are tracked beyond the scope of a single
+// Filesystem operation. This includes file descriptions, mount points, mount
+// roots, process working directories, and chroots. This avoids instantiation
+// of Dentries for operations on mutable remote filesystems that can't actually
+// cache any state in the Dentry.
+//
+// - For the reasons above, VFS is not directly responsible for managing Dentry
+// lifetime. Dentry reference counts only indicate the extent to which VFS
+// requires Dentries to exist; Filesystems may elect to cache or discard
+// Dentries with zero references.
+type Dentry struct {
+ // parent is this Dentry's parent in this Filesystem. If this Dentry is
+ // independent, parent is nil.
+ parent *Dentry
+
+ // name is this Dentry's name in parent.
+ name string
+
+ flags uint32
+
+ // mounts is the number of Mounts for which this Dentry is Mount.point.
+ // mounts is accessed using atomic memory operations.
+ mounts uint32
+
+ // children are child Dentries.
+ children map[string]*Dentry
+
+ // impl is the DentryImpl associated with this Dentry. impl is immutable.
+ // This should be the last field in Dentry.
+ impl DentryImpl
+}
+
+const (
+ // dflagsDisownedMask is set in Dentry.flags if the Dentry has been
+ // disowned.
+ dflagsDisownedMask = 1 << iota
+)
+
+// Init must be called before first use of d.
+func (d *Dentry) Init(impl DentryImpl) {
+ d.impl = impl
+}
+
+// Impl returns the DentryImpl associated with d.
+func (d *Dentry) Impl() DentryImpl {
+ return d.impl
+}
+
+// DentryImpl contains implementation details for a Dentry. Implementations of
+// DentryImpl should contain their associated Dentry by value as their first
+// field.
+type DentryImpl interface {
+ // IncRef increments the Dentry's reference count. A Dentry with a non-zero
+ // reference count must remain coherent with the state of the filesystem.
+ IncRef(fs *Filesystem)
+
+ // TryIncRef increments the Dentry's reference count and returns true. If
+ // the Dentry's reference count is zero, TryIncRef may do nothing and
+ // return false. (It is also permitted to succeed if it can restore the
+ // guarantee that the Dentry is coherent with the state of the filesystem.)
+ //
+ // TryIncRef does not require that a reference is held on the Dentry.
+ TryIncRef(fs *Filesystem) bool
+
+ // DecRef decrements the Dentry's reference count.
+ DecRef(fs *Filesystem)
+}
+
+// IsDisowned returns true if d is disowned.
+func (d *Dentry) IsDisowned() bool {
+ return atomic.LoadUint32(&d.flags)&dflagsDisownedMask != 0
+}
+
+// Preconditions: !d.IsDisowned().
+func (d *Dentry) setDisowned() {
+ atomic.AddUint32(&d.flags, dflagsDisownedMask)
+}
+
+func (d *Dentry) isMounted() bool {
+ return atomic.LoadUint32(&d.mounts) != 0
+}
+
+func (d *Dentry) incRef(fs *Filesystem) {
+ d.impl.IncRef(fs)
+}
+
+func (d *Dentry) tryIncRef(fs *Filesystem) bool {
+ return d.impl.TryIncRef(fs)
+}
+
+func (d *Dentry) decRef(fs *Filesystem) {
+ d.impl.DecRef(fs)
+}
+
+// These functions are exported so that filesystem implementations can use
+// them. The vfs package, and users of VFS, should not call these functions.
+// Unless otherwise specified, these methods require that there are no
+// concurrent mutators of d.
+
+// Name returns d's name in its parent in its owning Filesystem. If d is
+// independent, Name returns an empty string.
+func (d *Dentry) Name() string {
+ return d.name
+}
+
+// Parent returns d's parent in its owning Filesystem. It does not take a
+// reference on the returned Dentry. If d is independent, Parent returns nil.
+func (d *Dentry) Parent() *Dentry {
+ return d.parent
+}
+
+// ParentOrSelf is equivalent to Parent, but returns d if d is independent.
+func (d *Dentry) ParentOrSelf() *Dentry {
+ if d.parent == nil {
+ return d
+ }
+ return d.parent
+}
+
+// Child returns d's child with the given name in its owning Filesystem. It
+// does not take a reference on the returned Dentry. If no such child exists,
+// Child returns nil.
+func (d *Dentry) Child(name string) *Dentry {
+ return d.children[name]
+}
+
+// HasChildren returns true if d has any children.
+func (d *Dentry) HasChildren() bool {
+ return len(d.children) != 0
+}
+
+// InsertChild makes child a child of d with the given name.
+//
+// InsertChild is a mutator of d and child.
+//
+// Preconditions: child must be an independent Dentry. d and child must be from
+// the same Filesystem. d must not already have a child with the given name.
+func (d *Dentry) InsertChild(child *Dentry, name string) {
+ if checkInvariants {
+ if _, ok := d.children[name]; ok {
+ panic(fmt.Sprintf("parent already contains a child named %q", name))
+ }
+ if child.parent != nil || child.name != "" {
+ panic(fmt.Sprintf("child is not independent: parent = %v, name = %q", child.parent, child.name))
+ }
+ }
+ if d.children == nil {
+ d.children = make(map[string]*Dentry)
+ }
+ d.children[name] = child
+ child.parent = d
+ child.name = name
+}
+
+// PrepareDeleteDentry must be called before attempting to delete the file
+// represented by d. If PrepareDeleteDentry succeeds, the caller must call
+// AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome.
+//
+// Preconditions: d is a child Dentry.
+func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error {
+ if checkInvariants {
+ if d.parent == nil {
+ panic("d is independent")
+ }
+ if d.IsDisowned() {
+ panic("d is already disowned")
+ }
+ }
+ vfs.mountMu.RLock()
+ if _, ok := mntns.mountpoints[d]; ok {
+ vfs.mountMu.RUnlock()
+ return syserror.EBUSY
+ }
+ // Return with vfs.mountMu locked, which will be unlocked by
+ // AbortDeleteDentry or CommitDeleteDentry.
+ return nil
+}
+
+// AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion
+// fails.
+func (vfs *VirtualFilesystem) AbortDeleteDentry() {
+ vfs.mountMu.RUnlock()
+}
+
+// CommitDeleteDentry must be called after the file represented by d is
+// deleted, and causes d to become disowned.
+//
+// Preconditions: PrepareDeleteDentry was previously called on d.
+func (vfs *VirtualFilesystem) CommitDeleteDentry(d *Dentry) {
+ delete(d.parent.children, d.name)
+ d.setDisowned()
+ // TODO: lazily unmount mounts at d
+ vfs.mountMu.RUnlock()
+}
+
+// DeleteDentry combines PrepareDeleteDentry and CommitDeleteDentry, as
+// appropriate for in-memory filesystems that don't need to ensure that some
+// external state change succeeds before committing the deletion.
+func (vfs *VirtualFilesystem) DeleteDentry(mntns *MountNamespace, d *Dentry) error {
+ if err := vfs.PrepareDeleteDentry(mntns, d); err != nil {
+ return err
+ }
+ vfs.CommitDeleteDentry(d)
+ return nil
+}
+
+// PrepareRenameDentry must be called before attempting to rename the file
+// represented by from. If to is not nil, it represents the file that will be
+// replaced or exchanged by the rename. If PrepareRenameDentry succeeds, the
+// caller must call AbortRenameDentry, CommitRenameReplaceDentry, or
+// CommitRenameExchangeDentry depending on the rename's outcome.
+//
+// Preconditions: from is a child Dentry. If to is not nil, it must be a child
+// Dentry from the same Filesystem.
+func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
+ if checkInvariants {
+ if from.parent == nil {
+ panic("from is independent")
+ }
+ if from.IsDisowned() {
+ panic("from is already disowned")
+ }
+ if to != nil {
+ if to.parent == nil {
+ panic("to is independent")
+ }
+ if to.IsDisowned() {
+ panic("to is already disowned")
+ }
+ }
+ }
+ vfs.mountMu.RLock()
+ if _, ok := mntns.mountpoints[from]; ok {
+ vfs.mountMu.RUnlock()
+ return syserror.EBUSY
+ }
+ if to != nil {
+ if _, ok := mntns.mountpoints[to]; ok {
+ vfs.mountMu.RUnlock()
+ return syserror.EBUSY
+ }
+ }
+ // Return with vfs.mountMu locked, which will be unlocked by
+ // AbortRenameDentry, CommitRenameReplaceDentry, or
+ // CommitRenameExchangeDentry.
+ return nil
+}
+
+// AbortRenameDentry must be called after PrepareRenameDentry if the rename
+// fails.
+func (vfs *VirtualFilesystem) AbortRenameDentry() {
+ vfs.mountMu.RUnlock()
+}
+
+// CommitRenameReplaceDentry must be called after the file represented by from
+// is renamed without RENAME_EXCHANGE. If to is not nil, it represents the file
+// that was replaced by from.
+//
+// Preconditions: PrepareRenameDentry was previously called on from and to.
+// newParent.Child(newName) == to.
+func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(from, newParent *Dentry, newName string, to *Dentry) {
+ if to != nil {
+ to.setDisowned()
+ // TODO: lazily unmount mounts at d
+ }
+ if newParent.children == nil {
+ newParent.children = make(map[string]*Dentry)
+ }
+ newParent.children[newName] = from
+ from.parent = newParent
+ from.name = newName
+ vfs.mountMu.RUnlock()
+}
+
+// CommitRenameExchangeDentry must be called after the files represented by
+// from and to are exchanged by rename(RENAME_EXCHANGE).
+//
+// Preconditions: PrepareRenameDentry was previously called on from and to.
+func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) {
+ from.parent, to.parent = to.parent, from.parent
+ from.name, to.name = to.name, from.name
+ from.parent.children[from.name] = from
+ to.parent.children[to.name] = to
+ vfs.mountMu.RUnlock()
+}