// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package vfs

import (
	"sync/atomic"

	"gvisor.dev/gvisor/pkg/context"
	"gvisor.dev/gvisor/pkg/errors/linuxerr"
	"gvisor.dev/gvisor/pkg/sync"
)

// Dentry represents a node in a Filesystem tree at which a file exists.
//
// Dentries are reference-counted. Unless otherwise specified, all Dentry
// methods require that a reference is held.
//
// Dentry is loosely analogous to Linux's struct dentry, but:
//
// - VFS does not associate Dentries with inodes. gVisor interacts primarily
// with filesystems that are accessed through filesystem APIs (as opposed to
// raw block devices); many such APIs support only paths and file descriptors,
// and not inodes. Furthermore, when parties outside the scope of VFS can
// rename inodes on such filesystems, VFS generally cannot "follow" the rename,
// both due to synchronization issues and because it may not even be able to
// name the destination path; this implies that it would in fact be incorrect
// for Dentries to be associated with inodes on such filesystems. Consequently,
// operations that are inode operations in Linux are FilesystemImpl methods
// and/or FileDescriptionImpl methods in gVisor's VFS. Filesystems that do
// support inodes may store appropriate state in implementations of DentryImpl.
//
// - VFS does not require that Dentries are instantiated for all paths accessed
// through VFS, only those that are tracked beyond the scope of a single
// Filesystem operation. This includes file descriptions, mount points, mount
// roots, process working directories, and chroots. This avoids instantiation
// of Dentries for operations on mutable remote filesystems that can't actually
// cache any state in the Dentry.
//
// - VFS does not track filesystem structure (i.e. relationships between
// Dentries), since both the relevant state and synchronization are
// filesystem-specific.
//
// - For the reasons above, VFS is not directly responsible for managing Dentry
// lifetime. Dentry reference counts only indicate the extent to which VFS
// requires Dentries to exist; Filesystems may elect to cache or discard
// Dentries with zero references.
//
// +stateify savable
type Dentry struct {
	// mu synchronizes deletion/invalidation and mounting over this Dentry.
	mu sync.Mutex `state:"nosave"`

	// dead is true if the file represented by this Dentry has been deleted (by
	// CommitDeleteDentry or CommitRenameReplaceDentry) or invalidated (by
	// InvalidateDentry). dead is protected by mu.
	dead bool

	// mounts is the number of Mounts for which this Dentry is Mount.point.
	// mounts is accessed using atomic memory operations.
	mounts uint32

	// impl is the DentryImpl associated with this Dentry. impl is immutable.
	// This should be the last field in Dentry.
	impl DentryImpl
}

// Init must be called before first use of d.
func (d *Dentry) Init(impl DentryImpl) {
	d.impl = impl
}

// Impl returns the DentryImpl associated with d.
func (d *Dentry) Impl() DentryImpl {
	return d.impl
}

// DentryImpl contains implementation details for a Dentry. Implementations of
// DentryImpl should contain their associated Dentry by value as their first
// field.
//
// +stateify savable
type DentryImpl interface {
	// IncRef increments the Dentry's reference count. A Dentry with a non-zero
	// reference count must remain coherent with the state of the filesystem.
	IncRef()

	// TryIncRef increments the Dentry's reference count and returns true. If
	// the Dentry's reference count is zero, TryIncRef may do nothing and
	// return false. (It is also permitted to succeed if it can restore the
	// guarantee that the Dentry is coherent with the state of the filesystem.)
	//
	// TryIncRef does not require that a reference is held on the Dentry.
	TryIncRef() bool

	// DecRef decrements the Dentry's reference count.
	DecRef(ctx context.Context)

	// InotifyWithParent notifies all watches on the targets represented by this
	// dentry and its parent. The parent's watches are notified first, followed
	// by this dentry's.
	//
	// InotifyWithParent automatically adds the IN_ISDIR flag for dentries
	// representing directories.
	//
	// Note that the events may not actually propagate up to the user, depending
	// on the event masks.
	InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType)

	// Watches returns the set of inotify watches for the file corresponding to
	// the Dentry. Dentries that are hard links to the same underlying file
	// share the same watches.
	//
	// Watches may return nil if the dentry belongs to a FilesystemImpl that
	// does not support inotify. If an implementation returns a non-nil watch
	// set, it must always return a non-nil watch set. Likewise, if an
	// implementation returns a nil watch set, it must always return a nil watch
	// set.
	//
	// The caller does not need to hold a reference on the dentry.
	Watches() *Watches

	// OnZeroWatches is called whenever the number of watches on a dentry drops
	// to zero. This is needed by some FilesystemImpls (e.g. gofer) to manage
	// dentry lifetime.
	//
	// The caller does not need to hold a reference on the dentry. OnZeroWatches
	// may acquire inotify locks, so to prevent deadlock, no inotify locks should
	// be held by the caller.
	OnZeroWatches(ctx context.Context)
}

// IncRef increments d's reference count.
func (d *Dentry) IncRef() {
	d.impl.IncRef()
}

// TryIncRef increments d's reference count and returns true. If d's reference
// count is zero, TryIncRef may instead do nothing and return false.
func (d *Dentry) TryIncRef() bool {
	return d.impl.TryIncRef()
}

// DecRef decrements d's reference count.
func (d *Dentry) DecRef(ctx context.Context) {
	d.impl.DecRef(ctx)
}

// IsDead returns true if d has been deleted or invalidated by its owning
// filesystem.
func (d *Dentry) IsDead() bool {
	d.mu.Lock()
	defer d.mu.Unlock()
	return d.dead
}

func (d *Dentry) isMounted() bool {
	return atomic.LoadUint32(&d.mounts) != 0
}

// InotifyWithParent notifies all watches on the targets represented by d and
// its parent of events.
func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) {
	d.impl.InotifyWithParent(ctx, events, cookie, et)
}

// Watches returns the set of inotify watches associated with d.
//
// Watches will return nil if d belongs to a FilesystemImpl that does not
// support inotify.
func (d *Dentry) Watches() *Watches {
	return d.impl.Watches()
}

// OnZeroWatches performs cleanup tasks whenever the number of watches on a
// dentry drops to zero.
func (d *Dentry) OnZeroWatches(ctx context.Context) {
	d.impl.OnZeroWatches(ctx)
}

// The following functions are exported so that filesystem implementations can
// use them. The vfs package, and users of VFS, should not call these
// functions.

// PrepareDeleteDentry must be called before attempting to delete the file
// represented by d. If PrepareDeleteDentry succeeds, the caller must call
// AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome.
func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error {
	vfs.mountMu.Lock()
	if mntns.mountpoints[d] != 0 {
		vfs.mountMu.Unlock()
		return linuxerr.EBUSY
	}
	d.mu.Lock()
	vfs.mountMu.Unlock()
	// Return with d.mu locked to block attempts to mount over it; it will be
	// unlocked by AbortDeleteDentry or CommitDeleteDentry.
	return nil
}

// AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion
// fails.
// +checklocks:d.mu
func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) {
	d.mu.Unlock()
}

// CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion
// succeeds.
// +checklocks:d.mu
func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) {
	d.dead = true
	d.mu.Unlock()
	if d.isMounted() {
		vfs.forgetDeadMountpoint(ctx, d)
	}
}

// InvalidateDentry is called when d ceases to represent the file it formerly
// did for reasons outside of VFS' control (e.g. d represents the local state
// of a file on a remote filesystem on which the file has already been
// deleted).
func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) {
	d.mu.Lock()
	d.dead = true
	d.mu.Unlock()
	if d.isMounted() {
		vfs.forgetDeadMountpoint(ctx, d)
	}
}

// PrepareRenameDentry must be called before attempting to rename the file
// represented by from. If to is not nil, it represents the file that will be
// replaced or exchanged by the rename. If PrepareRenameDentry succeeds, the
// caller must call AbortRenameDentry, CommitRenameReplaceDentry, or
// CommitRenameExchangeDentry depending on the rename's outcome.
//
// Preconditions:
// * If to is not nil, it must be a child Dentry from the same Filesystem.
// * from != to.
func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
	vfs.mountMu.Lock()
	if mntns.mountpoints[from] != 0 {
		vfs.mountMu.Unlock()
		return linuxerr.EBUSY
	}
	if to != nil {
		if mntns.mountpoints[to] != 0 {
			vfs.mountMu.Unlock()
			return linuxerr.EBUSY
		}
		to.mu.Lock()
	}
	from.mu.Lock()
	vfs.mountMu.Unlock()
	// Return with from.mu and to.mu locked, which will be unlocked by
	// AbortRenameDentry, CommitRenameReplaceDentry, or
	// CommitRenameExchangeDentry.
	return nil
}

// AbortRenameDentry must be called after PrepareRenameDentry if the rename
// fails.
// +checklocks:from.mu
// +checklocks:to.mu
func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) {
	from.mu.Unlock()
	if to != nil {
		to.mu.Unlock()
	}
}

// CommitRenameReplaceDentry must be called after the file represented by from
// is renamed without RENAME_EXCHANGE. If to is not nil, it represents the file
// that was replaced by from.
//
// Preconditions: PrepareRenameDentry was previously called on from and to.
// +checklocks:from.mu
// +checklocks:to.mu
func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) {
	from.mu.Unlock()
	if to != nil {
		to.dead = true
		to.mu.Unlock()
		if to.isMounted() {
			vfs.forgetDeadMountpoint(ctx, to)
		}
	}
}

// CommitRenameExchangeDentry must be called after the files represented by
// from and to are exchanged by rename(RENAME_EXCHANGE).
//
// Preconditions: PrepareRenameDentry was previously called on from and to.
// +checklocks:from.mu
// +checklocks:to.mu
func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) {
	from.mu.Unlock()
	to.mu.Unlock()
}

// forgetDeadMountpoint is called when a mount point is deleted or invalidated
// to umount all mounts using it in all other mount namespaces.
//
// forgetDeadMountpoint is analogous to Linux's
// fs/namespace.c:__detach_mounts().
func (vfs *VirtualFilesystem) forgetDeadMountpoint(ctx context.Context, d *Dentry) {
	var (
		vdsToDecRef    []VirtualDentry
		mountsToDecRef []*Mount
	)
	vfs.mountMu.Lock()
	vfs.mounts.seq.BeginWrite()
	for mnt := range vfs.mountpoints[d] {
		vdsToDecRef, mountsToDecRef = vfs.umountRecursiveLocked(mnt, &umountRecursiveOptions{}, vdsToDecRef, mountsToDecRef)
	}
	vfs.mounts.seq.EndWrite()
	vfs.mountMu.Unlock()
	for _, vd := range vdsToDecRef {
		vd.DecRef(ctx)
	}
	for _, mnt := range mountsToDecRef {
		mnt.DecRef(ctx)
	}
}