// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package fs

import (
	"fmt"

	"gvisor.dev/gvisor/pkg/context"
	"gvisor.dev/gvisor/pkg/sync"
)

// Watches is the collection of inotify watches on an inode.
//
// +stateify savable
type Watches struct {
	// mu protects the fields below.
	mu sync.RWMutex `state:"nosave"`

	// ws is the map of active watches in this collection, keyed by the inotify
	// instance id of the owner.
	ws map[uint64]*Watch

	// unlinked indicates whether the target inode was ever unlinked. This is a
	// hack to figure out if we should queue a IN_DELETE_SELF event when this
	// watches collection is being destroyed, since otherwise we have no way of
	// knowing if the target inode is going down due to a deletion or
	// revalidation.
	unlinked bool
}

func newWatches() *Watches {
	return &Watches{}
}

// MarkUnlinked indicates the target for this set of watches to be unlinked.
// This has implications for the IN_EXCL_UNLINK flag.
func (w *Watches) MarkUnlinked() {
	w.mu.Lock()
	defer w.mu.Unlock()
	w.unlinked = true
}

// Lookup returns a matching watch with the given id. Returns nil if no such
// watch exists. Note that the result returned by this method only remains valid
// if the inotify instance owning the watch is locked, preventing modification
// of the returned watch and preventing the replacement of the watch by another
// one from the same instance (since there may be at most one watch per
// instance, per target).
func (w *Watches) Lookup(id uint64) *Watch {
	w.mu.Lock()
	defer w.mu.Unlock()
	return w.ws[id]
}

// Add adds watch into this set of watches. The watch being added must be unique
// - its ID() should not collide with any existing watches.
func (w *Watches) Add(watch *Watch) {
	w.mu.Lock()
	defer w.mu.Unlock()

	// Sanity check, the new watch shouldn't collide with an existing
	// watch. Silently replacing an existing watch would result in a ref leak on
	// this inode. We could handle this collision by calling Unpin() on the
	// existing watch, but then we end up leaking watch descriptor ids at the
	// inotify level.
	if _, exists := w.ws[watch.ID()]; exists {
		panic(fmt.Sprintf("Watch collision with ID %+v", watch.ID()))
	}
	if w.ws == nil {
		w.ws = make(map[uint64]*Watch)
	}
	w.ws[watch.ID()] = watch
}

// Remove removes a watch with the given id from this set of watches. The caller
// is responsible for generating any watch removal event, as appropriate. The
// provided id must match an existing watch in this collection.
func (w *Watches) Remove(id uint64) {
	w.mu.Lock()
	defer w.mu.Unlock()

	if w.ws == nil {
		// This watch set is being destroyed. The thread executing the
		// destructor is already in the process of deleting all our watches. We
		// got here with no refs on the inode because we raced with the
		// destructor notifying all the watch owners of the inode's destruction.
		// See the comment in Watches.TargetDestroyed for why this race exists.
		return
	}

	watch, ok := w.ws[id]
	if !ok {
		// While there's technically no problem with silently ignoring a missing
		// watch, this is almost certainly a bug.
		panic(fmt.Sprintf("Attempt to remove a watch, but no watch found with provided id %+v.", id))
	}
	delete(w.ws, watch.ID())
}

// Notify queues a new event with all watches in this set.
func (w *Watches) Notify(name string, events, cookie uint32) {
	// N.B. We don't defer the unlocks because Notify is in the hot path of
	// all IO operations, and the defer costs too much for small IO
	// operations.
	w.mu.RLock()
	for _, watch := range w.ws {
		if name != "" && w.unlinked && !watch.NotifyParentAfterUnlink() {
			// IN_EXCL_UNLINK - By default, when watching events on the children
			// of a directory, events are generated for children even after they
			// have been unlinked from the directory. This can result in large
			// numbers of uninteresting events for some applications (e.g., if
			// watching /tmp, in which many applications create temporary files
			// whose names are immediately unlinked). Specifying IN_EXCL_UNLINK
			// changes the default behavior, so that events are not generated
			// for children after they have been unlinked from the watched
			// directory.  -- inotify(7)
			//
			// We know we're dealing with events for a parent when the name
			// isn't empty.
			continue
		}
		watch.Notify(name, events, cookie)
	}
	w.mu.RUnlock()
}

// Unpin unpins dirent from all watches in this set.
func (w *Watches) Unpin(ctx context.Context, d *Dirent) {
	w.mu.RLock()
	defer w.mu.RUnlock()
	for _, watch := range w.ws {
		watch.Unpin(ctx, d)
	}
}

// targetDestroyed is called by the inode destructor to notify the watch owners
// of the impending destruction of the watch target.
func (w *Watches) targetDestroyed() {
	var ws map[uint64]*Watch

	// We can't hold w.mu while calling watch.TargetDestroyed to preserve lock
	// ordering w.r.t to the owner inotify instances. Instead, atomically move
	// the watches map into a local variable so we can iterate over it safely.
	//
	// Because of this however, it is possible for the watches' owners to reach
	// this inode while the inode has no refs. This is still safe because the
	// owners can only reach the inode until this function finishes calling
	// watch.TargetDestroyed() below and the inode is guaranteed to exist in the
	// meanwhile. But we still have to be very careful not to rely on inode
	// state that may have been already destroyed.
	w.mu.Lock()
	ws = w.ws
	w.ws = nil
	w.mu.Unlock()

	for _, watch := range ws {
		watch.TargetDestroyed()
	}
}