// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package kernel

import (
	"bytes"
	"fmt"
	"sort"
	"sync"
	"sync/atomic"
	"syscall"

	"gvisor.dev/gvisor/pkg/abi/linux"
	"gvisor.dev/gvisor/pkg/refs"
	"gvisor.dev/gvisor/pkg/sentry/fs"
	"gvisor.dev/gvisor/pkg/sentry/fs/lock"
	"gvisor.dev/gvisor/pkg/sentry/kernel/kdefs"
	"gvisor.dev/gvisor/pkg/sentry/limits"
)

// FDs is an ordering of FD's that can be made stable.
type FDs []kdefs.FD

func (f FDs) Len() int {
	return len(f)
}

func (f FDs) Swap(i, j int) {
	f[i], f[j] = f[j], f[i]
}

func (f FDs) Less(i, j int) bool {
	return f[i] < f[j]
}

// FDFlags define flags for an individual descriptor.
//
// +stateify savable
type FDFlags struct {
	// CloseOnExec indicates the descriptor should be closed on exec.
	CloseOnExec bool
}

// ToLinuxFileFlags converts a kernel.FDFlags object to a Linux file flags
// representation.
func (f FDFlags) ToLinuxFileFlags() (mask uint) {
	if f.CloseOnExec {
		mask |= linux.O_CLOEXEC
	}
	return
}

// ToLinuxFDFlags converts a kernel.FDFlags object to a Linux descriptor flags
// representation.
func (f FDFlags) ToLinuxFDFlags() (mask uint) {
	if f.CloseOnExec {
		mask |= linux.FD_CLOEXEC
	}
	return
}

// descriptor holds the details about a file descriptor, namely a pointer the
// file itself and the descriptor flags.
//
// +stateify savable
type descriptor struct {
	file  *fs.File
	flags FDFlags
}

// FDMap is used to manage File references and flags.
//
// +stateify savable
type FDMap struct {
	refs.AtomicRefCount
	k     *Kernel
	files map[kdefs.FD]descriptor
	mu    sync.RWMutex `state:"nosave"`
	uid   uint64
}

// ID returns a unique identifier for this FDMap.
func (f *FDMap) ID() uint64 {
	return f.uid
}

// NewFDMap allocates a new FDMap that may be used by tasks in k.
func (k *Kernel) NewFDMap() *FDMap {
	return &FDMap{
		k:     k,
		files: make(map[kdefs.FD]descriptor),
		uid:   atomic.AddUint64(&k.fdMapUids, 1),
	}
}

// destroy removes all of the file descriptors from the map.
func (f *FDMap) destroy() {
	f.RemoveIf(func(*fs.File, FDFlags) bool {
		return true
	})
}

// DecRef implements RefCounter.DecRef with destructor f.destroy.
func (f *FDMap) DecRef() {
	f.DecRefWithDestructor(f.destroy)
}

// Size returns the number of file descriptor slots currently allocated.
func (f *FDMap) Size() int {
	f.mu.RLock()
	defer f.mu.RUnlock()

	return len(f.files)
}

// String is a stringer for FDMap.
func (f *FDMap) String() string {
	f.mu.RLock()
	defer f.mu.RUnlock()

	var b bytes.Buffer
	for k, v := range f.files {
		n, _ := v.file.Dirent.FullName(nil /* root */)
		b.WriteString(fmt.Sprintf("\tfd:%d => name %s\n", k, n))
	}
	return b.String()
}

// NewFDFrom allocates a new FD guaranteed to be the lowest number available
// greater than or equal to from. This property is important as Unix programs
// tend to count on this allocation order.
func (f *FDMap) NewFDFrom(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) (kdefs.FD, error) {
	if fd < 0 {
		// Don't accept negative FDs.
		return 0, syscall.EINVAL
	}

	f.mu.Lock()
	defer f.mu.Unlock()

	// Finds the lowest fd not in the handles map.
	lim := limitSet.Get(limits.NumberOfFiles)
	for i := fd; lim.Cur == limits.Infinity || i < kdefs.FD(lim.Cur); i++ {
		if _, ok := f.files[i]; !ok {
			file.IncRef()
			f.files[i] = descriptor{file, flags}
			return i, nil
		}
	}

	return -1, syscall.EMFILE
}

// NewFDAt sets the file reference for the given FD. If there is an
// active reference for that FD, the ref count for that existing reference
// is decremented.
func (f *FDMap) NewFDAt(fd kdefs.FD, file *fs.File, flags FDFlags, limitSet *limits.LimitSet) error {
	if fd < 0 {
		// Don't accept negative FDs.
		return syscall.EBADF
	}

	// In this one case we do not do a defer of the Unlock.  The
	// reason is that we must have done all the work needed for
	// discarding any old open file before we return to the
	// caller. In other words, the DecRef(), below, must have
	// completed by the time we return to the caller to ensure
	// side effects are, in fact, effected. A classic example is
	// dup2(fd1, fd2); if fd2 was already open, it must be closed,
	// and we don't want to resume the caller until it is; we have
	// to block on the DecRef(). Hence we can not just do a 'go
	// oldfile.DecRef()', since there would be no guarantee that
	// it would be done before we the caller resumed. Since we
	// must wait for the DecRef() to finish, and that could take
	// time, it's best to first call f.muUnlock beore so we are
	// not blocking other uses of this FDMap on the DecRef() call.
	f.mu.Lock()
	oldDesc, oldExists := f.files[fd]
	lim := limitSet.Get(limits.NumberOfFiles).Cur
	// if we're closing one then the effective limit is one
	// more than the actual limit.
	if oldExists && lim != limits.Infinity {
		lim++
	}
	if lim != limits.Infinity && fd >= kdefs.FD(lim) {
		f.mu.Unlock()
		return syscall.EMFILE
	}

	file.IncRef()
	f.files[fd] = descriptor{file, flags}
	f.mu.Unlock()

	if oldExists {
		oldDesc.file.DecRef()
	}
	return nil
}

// SetFlags sets the flags for the given file descriptor, if it is valid.
func (f *FDMap) SetFlags(fd kdefs.FD, flags FDFlags) {
	f.mu.Lock()
	defer f.mu.Unlock()

	desc, ok := f.files[fd]
	if !ok {
		return
	}

	f.files[fd] = descriptor{desc.file, flags}
}

// GetDescriptor returns a reference to the file and the flags for the FD. It
// bumps its reference count as well. It returns nil if there is no File
// for the FD, i.e. if the FD is invalid. The caller must use DecRef
// when they are done.
func (f *FDMap) GetDescriptor(fd kdefs.FD) (*fs.File, FDFlags) {
	f.mu.RLock()
	defer f.mu.RUnlock()

	if desc, ok := f.files[fd]; ok {
		desc.file.IncRef()
		return desc.file, desc.flags
	}
	return nil, FDFlags{}
}

// GetFile returns a reference to the File for the FD and bumps
// its reference count as well. It returns nil if there is no File
// for the FD, i.e. if the FD is invalid. The caller must use DecRef
// when they are done.
func (f *FDMap) GetFile(fd kdefs.FD) *fs.File {
	f.mu.RLock()
	if desc, ok := f.files[fd]; ok {
		desc.file.IncRef()
		f.mu.RUnlock()
		return desc.file
	}
	f.mu.RUnlock()
	return nil
}

// fds returns an ordering of FDs.
func (f *FDMap) fds() FDs {
	fds := make(FDs, 0, len(f.files))
	for fd := range f.files {
		fds = append(fds, fd)
	}
	sort.Sort(fds)
	return fds
}

// GetFDs returns a list of valid fds.
func (f *FDMap) GetFDs() FDs {
	f.mu.RLock()
	defer f.mu.RUnlock()
	return f.fds()
}

// GetRefs returns a stable slice of references to all files and bumps the
// reference count on each.  The caller must use DecRef on each reference when
// they're done using the slice.
func (f *FDMap) GetRefs() []*fs.File {
	f.mu.RLock()
	defer f.mu.RUnlock()

	fds := f.fds()
	fs := make([]*fs.File, 0, len(fds))
	for _, fd := range fds {
		desc := f.files[fd]
		desc.file.IncRef()
		fs = append(fs, desc.file)
	}
	return fs
}

// Fork returns an independent FDMap pointing to the same descriptors.
func (f *FDMap) Fork() *FDMap {
	f.mu.RLock()
	defer f.mu.RUnlock()

	clone := f.k.NewFDMap()

	// Grab a extra reference for every file.
	for fd, desc := range f.files {
		desc.file.IncRef()
		clone.files[fd] = desc
	}

	// That's it!
	return clone
}

// unlock releases all file locks held by this FDMap's uid.  Must only be
// called on a non-nil *fs.File.
func (f *FDMap) unlock(file *fs.File) {
	id := lock.UniqueID(f.ID())
	file.Dirent.Inode.LockCtx.Posix.UnlockRegion(id, lock.LockRange{0, lock.LockEOF})
}

// inotifyFileClose generates the appropriate inotify events for f being closed.
func inotifyFileClose(f *fs.File) {
	var ev uint32
	d := f.Dirent

	if fs.IsDir(d.Inode.StableAttr) {
		ev |= linux.IN_ISDIR
	}

	if f.Flags().Write {
		ev |= linux.IN_CLOSE_WRITE
	} else {
		ev |= linux.IN_CLOSE_NOWRITE
	}

	d.InotifyEvent(ev, 0)
}

// Remove removes an FD from the FDMap, and returns (File, true) if a File
// one was found. Callers are expected to decrement the reference count on
// the File. Otherwise returns (nil, false).
func (f *FDMap) Remove(fd kdefs.FD) (*fs.File, bool) {
	f.mu.Lock()
	desc := f.files[fd]
	delete(f.files, fd)
	f.mu.Unlock()
	if desc.file != nil {
		f.unlock(desc.file)
		inotifyFileClose(desc.file)
		return desc.file, true
	}
	return nil, false
}

// RemoveIf removes all FDs where cond is true.
func (f *FDMap) RemoveIf(cond func(*fs.File, FDFlags) bool) {
	var removed []*fs.File
	f.mu.Lock()
	for fd, desc := range f.files {
		if desc.file != nil && cond(desc.file, desc.flags) {
			delete(f.files, fd)
			removed = append(removed, desc.file)
		}
	}
	f.mu.Unlock()

	for _, file := range removed {
		f.unlock(file)
		inotifyFileClose(file)
		file.DecRef()
	}
}