// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package gofer

import (
	"fmt"
	"syscall"

	"gvisor.dev/gvisor/pkg/context"
	"gvisor.dev/gvisor/pkg/log"
	"gvisor.dev/gvisor/pkg/p9"
	"gvisor.dev/gvisor/pkg/sentry/device"
	"gvisor.dev/gvisor/pkg/sentry/fs"
	"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
	"gvisor.dev/gvisor/pkg/syserror"
	"gvisor.dev/gvisor/pkg/usermem"
)

// maxFilenameLen is the maximum length of a filename. This is dictated by 9P's
// encoding of strings, which uses 2 bytes for the length prefix.
const maxFilenameLen = (1 << 16) - 1

func changeType(mode p9.FileMode, newType p9.FileMode) p9.FileMode {
	if newType&^p9.FileModeMask != 0 {
		panic(fmt.Sprintf("newType contained more bits than just file mode: %x", newType))
	}
	clear := mode &^ p9.FileModeMask
	return clear | newType
}

// Lookup loads an Inode at name into a Dirent based on the session's cache
// policy.
func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) {
	if len(name) > maxFilenameLen {
		return nil, syserror.ENAMETOOLONG
	}

	cp := i.session().cachePolicy
	if cp.cacheReaddir() {
		// Check to see if we have readdirCache that indicates the
		// child does not exist.  Avoid holding readdirMu longer than
		// we need to.
		i.readdirMu.Lock()
		if i.readdirCache != nil && !i.readdirCache.Contains(name) {
			// No such child.
			i.readdirMu.Unlock()
			if cp.cacheNegativeDirents() {
				return fs.NewNegativeDirent(name), nil
			}
			return nil, syserror.ENOENT
		}
		i.readdirMu.Unlock()
	}

	// Get a p9.File for name.
	qids, newFile, mask, p9attr, err := i.fileState.file.walkGetAttr(ctx, []string{name})
	if err != nil {
		if err == syscall.ENOENT {
			if cp.cacheNegativeDirents() {
				// Return a negative Dirent. It will stay cached until something
				// is created over it.
				return fs.NewNegativeDirent(name), nil
			}
			return nil, syserror.ENOENT
		}
		return nil, err
	}

	if i.session().overrides != nil {
		// Check if file belongs to a internal named pipe. Note that it doesn't need
		// to check for sockets because it's done in newInodeOperations below.
		deviceKey := device.MultiDeviceKey{
			Device:          p9attr.RDev,
			SecondaryDevice: i.session().connID,
			Inode:           qids[0].Path,
		}
		unlock := i.session().overrides.lock()
		if pipeInode := i.session().overrides.getPipe(deviceKey); pipeInode != nil {
			unlock()
			pipeInode.IncRef()
			return fs.NewDirent(ctx, pipeInode, name), nil
		}
		unlock()
	}

	// Construct the Inode operations.
	sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr)

	// Construct a positive Dirent.
	return fs.NewDirent(ctx, fs.NewInode(ctx, node, dir.MountSource, sattr), name), nil
}

// Creates a new Inode at name and returns its File based on the session's cache policy.
//
// Ownership is currently ignored.
func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) {
	if len(name) > maxFilenameLen {
		return nil, syserror.ENAMETOOLONG
	}

	// Create replaces the directory fid with the newly created/opened
	// file, so clone this directory so it doesn't change out from under
	// this node.
	_, newFile, err := i.fileState.file.walk(ctx, nil)
	if err != nil {
		return nil, err
	}

	// Map the FileFlags to p9 OpenFlags.
	var openFlags p9.OpenFlags
	switch {
	case flags.Read && flags.Write:
		openFlags = p9.ReadWrite
	case flags.Read:
		openFlags = p9.ReadOnly
	case flags.Write:
		openFlags = p9.WriteOnly
	default:
		panic(fmt.Sprintf("Create called with unknown or unset open flags: %v", flags))
	}

	owner := fs.FileOwnerFromContext(ctx)
	hostFile, err := newFile.create(ctx, name, openFlags, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID))
	if err != nil {
		// Could not create the file.
		newFile.close(ctx)
		return nil, err
	}

	i.touchModificationAndStatusChangeTime(ctx, dir)

	// Get an unopened p9.File for the file we created so that it can be cloned
	// and re-opened multiple times after creation, while also getting its
	// attributes. Both are required for inodeOperations.
	qids, unopened, mask, p9attr, err := i.fileState.file.walkGetAttr(ctx, []string{name})
	if err != nil {
		newFile.close(ctx)
		if hostFile != nil {
			hostFile.Close()
		}
		return nil, err
	}
	if len(qids) != 1 {
		log.Warningf("WalkGetAttr(%s) succeeded, but returned %d QIDs (%v), wanted 1", name, len(qids), qids)
		newFile.close(ctx)
		if hostFile != nil {
			hostFile.Close()
		}
		unopened.close(ctx)
		return nil, syserror.EIO
	}
	qid := qids[0]

	// Construct the InodeOperations.
	sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr)

	// Construct the positive Dirent.
	d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
	defer d.DecRef()

	// Construct the new file, caching the handles if allowed.
	h := handles{
		File: newFile,
		Host: hostFile,
	}
	h.EnableLeakCheck("gofer.handles")
	if iops.fileState.canShareHandles() {
		iops.fileState.handlesMu.Lock()
		iops.fileState.setSharedHandlesLocked(flags, &h)
		iops.fileState.handlesMu.Unlock()
	}
	return NewFile(ctx, d, name, flags, iops, &h), nil
}

// CreateLink uses Create to create a symlink between oldname and newname.
func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error {
	if len(newname) > maxFilenameLen {
		return syserror.ENAMETOOLONG
	}

	owner := fs.FileOwnerFromContext(ctx)
	if _, err := i.fileState.file.symlink(ctx, oldname, newname, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
		return err
	}
	i.touchModificationAndStatusChangeTime(ctx, dir)
	return nil
}

// CreateHardLink implements InodeOperations.CreateHardLink.
func (i *inodeOperations) CreateHardLink(ctx context.Context, inode *fs.Inode, target *fs.Inode, newName string) error {
	if len(newName) > maxFilenameLen {
		return syserror.ENAMETOOLONG
	}

	targetOpts, ok := target.InodeOperations.(*inodeOperations)
	if !ok {
		return syscall.EXDEV
	}

	if err := i.fileState.file.link(ctx, &targetOpts.fileState.file, newName); err != nil {
		return err
	}
	if i.session().cachePolicy.cacheUAttrs(inode) {
		// Increase link count.
		targetOpts.cachingInodeOps.IncLinks(ctx)
	}
	i.touchModificationAndStatusChangeTime(ctx, inode)
	return nil
}

// CreateDirectory uses Create to create a directory named s under inodeOperations.
func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s string, perm fs.FilePermissions) error {
	if len(s) > maxFilenameLen {
		return syserror.ENAMETOOLONG
	}

	owner := fs.FileOwnerFromContext(ctx)
	if _, err := i.fileState.file.mkdir(ctx, s, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
		return err
	}
	if i.session().cachePolicy.cacheUAttrs(dir) {
		// Increase link count.
		//
		// N.B. This will update the modification time.
		i.cachingInodeOps.IncLinks(ctx)
	}
	if i.session().cachePolicy.cacheReaddir() {
		// Invalidate readdir cache.
		i.markDirectoryDirty()
	}
	return nil
}

// Bind implements InodeOperations.Bind.
func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) {
	if len(name) > maxFilenameLen {
		return nil, syserror.ENAMETOOLONG
	}

	if i.session().overrides == nil {
		return nil, syscall.EOPNOTSUPP
	}

	// Stabilize the override map while creation is in progress.
	unlock := i.session().overrides.lock()
	defer unlock()

	sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket)
	if err != nil {
		return nil, err
	}

	// Construct the positive Dirent.
	childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
	i.session().overrides.addBoundEndpoint(iops.fileState.key, childDir, ep)
	return childDir, nil
}

// CreateFifo implements fs.InodeOperations.CreateFifo.
func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
	if len(name) > maxFilenameLen {
		return syserror.ENAMETOOLONG
	}

	owner := fs.FileOwnerFromContext(ctx)
	mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe

	// N.B. FIFOs use major/minor numbers 0.
	if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
		if i.session().overrides == nil || err != syscall.EPERM {
			return err
		}
		// If gofer doesn't support mknod, check if we can create an internal fifo.
		return i.createInternalFifo(ctx, dir, name, owner, perm)
	}

	i.touchModificationAndStatusChangeTime(ctx, dir)
	return nil
}

func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error {
	if i.session().overrides == nil {
		return syserror.EPERM
	}

	// Stabilize the override map while creation is in progress.
	unlock := i.session().overrides.lock()
	defer unlock()

	sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe)
	if err != nil {
		return err
	}

	// First create a pipe.
	p := pipe.NewPipe(true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize)

	// Wrap the fileOps with our Fifo.
	iops := &fifo{
		InodeOperations: pipe.NewInodeOperations(ctx, perm, p),
		fileIops:        fileOps,
	}
	inode := fs.NewInode(ctx, iops, dir.MountSource, sattr)

	// Construct the positive Dirent.
	childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
	i.session().overrides.addPipe(fileOps.fileState.key, childDir, inode)
	return nil
}

// Caller must hold Session.endpoint lock.
func (i *inodeOperations) createEndpointFile(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions, fileType p9.FileMode) (fs.StableAttr, *inodeOperations, error) {
	_, dirClone, err := i.fileState.file.walk(ctx, nil)
	if err != nil {
		return fs.StableAttr{}, nil, err
	}
	// We're not going to use dirClone after return.
	defer dirClone.close(ctx)

	// Create a regular file in the gofer and then mark it as a socket by
	// adding this inode key in the 'overrides' map.
	owner := fs.FileOwnerFromContext(ctx)
	hostFile, err := dirClone.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID))
	if err != nil {
		return fs.StableAttr{}, nil, err
	}
	// We're not going to use this file.
	hostFile.Close()

	i.touchModificationAndStatusChangeTime(ctx, dir)

	// Get the attributes of the file to create inode key.
	qid, mask, attr, err := getattr(ctx, dirClone)
	if err != nil {
		return fs.StableAttr{}, nil, err
	}

	// Get an unopened p9.File for the file we created so that it can be
	// cloned and re-opened multiple times after creation.
	_, unopened, err := i.fileState.file.walk(ctx, []string{name})
	if err != nil {
		return fs.StableAttr{}, nil, err
	}

	// Construct new inode with file type overridden.
	attr.Mode = changeType(attr.Mode, fileType)
	sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr)
	return sattr, iops, nil
}

// Remove implements InodeOperations.Remove.
func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error {
	if len(name) > maxFilenameLen {
		return syserror.ENAMETOOLONG
	}

	var key *device.MultiDeviceKey
	if i.session().overrides != nil {
		// Find out if file being deleted is a socket or pipe that needs to be
		// removed from endpoint map.
		if d, err := i.Lookup(ctx, dir, name); err == nil {
			defer d.DecRef()

			if fs.IsSocket(d.Inode.StableAttr) || fs.IsPipe(d.Inode.StableAttr) {
				switch iops := d.Inode.InodeOperations.(type) {
				case *inodeOperations:
					key = &iops.fileState.key
				case *fifo:
					key = &iops.fileIops.fileState.key
				}

				// Stabilize the override map while deletion is in progress.
				unlock := i.session().overrides.lock()
				defer unlock()
			}
		}
	}

	if err := i.fileState.file.unlinkAt(ctx, name, 0); err != nil {
		return err
	}
	if key != nil {
		i.session().overrides.remove(*key)
	}
	i.touchModificationAndStatusChangeTime(ctx, dir)

	return nil
}

// Remove implements InodeOperations.RemoveDirectory.
func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error {
	if len(name) > maxFilenameLen {
		return syserror.ENAMETOOLONG
	}

	// 0x200 = AT_REMOVEDIR.
	if err := i.fileState.file.unlinkAt(ctx, name, 0x200); err != nil {
		return err
	}
	if i.session().cachePolicy.cacheUAttrs(dir) {
		// Decrease link count and updates atime.
		i.cachingInodeOps.DecLinks(ctx)
	}
	if i.session().cachePolicy.cacheReaddir() {
		// Invalidate readdir cache.
		i.markDirectoryDirty()
	}
	return nil
}

// Rename renames this node.
func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error {
	if len(newName) > maxFilenameLen {
		return syserror.ENAMETOOLONG
	}

	// Unwrap the new parent to a *inodeOperations.
	newParentInodeOperations, ok := newParent.InodeOperations.(*inodeOperations)
	if !ok {
		return syscall.EXDEV
	}

	// Unwrap the old parent to a *inodeOperations.
	oldParentInodeOperations, ok := oldParent.InodeOperations.(*inodeOperations)
	if !ok {
		return syscall.EXDEV
	}

	// Do the rename.
	if err := i.fileState.file.rename(ctx, newParentInodeOperations.fileState.file, newName); err != nil {
		return err
	}

	// Is the renamed entity a directory? Fix link counts.
	if fs.IsDir(i.fileState.sattr) {
		// Update cached state.
		if i.session().cachePolicy.cacheUAttrs(oldParent) {
			oldParentInodeOperations.cachingInodeOps.DecLinks(ctx)
		}
		if i.session().cachePolicy.cacheUAttrs(newParent) {
			// Only IncLinks if there is a new addition to
			// newParent. If this is replacement, then the total
			// count remains the same.
			if !replacement {
				newParentInodeOperations.cachingInodeOps.IncLinks(ctx)
			}
		}
	}
	if i.session().cachePolicy.cacheReaddir() {
		// Mark old directory dirty.
		oldParentInodeOperations.markDirectoryDirty()
		if oldParent != newParent {
			// Mark new directory dirty.
			newParentInodeOperations.markDirectoryDirty()
		}
	}

	// Rename always updates ctime.
	if i.session().cachePolicy.cacheUAttrs(inode) {
		i.cachingInodeOps.TouchStatusChangeTime(ctx)
	}
	return nil
}

func (i *inodeOperations) touchModificationAndStatusChangeTime(ctx context.Context, inode *fs.Inode) {
	if i.session().cachePolicy.cacheUAttrs(inode) {
		i.cachingInodeOps.TouchModificationAndStatusChangeTime(ctx)
	}
	if i.session().cachePolicy.cacheReaddir() {
		// Invalidate readdir cache.
		i.markDirectoryDirty()
	}
}

// markDirectoryDirty marks any cached data dirty for this directory. This is necessary in order
// to ensure that this node does not retain stale state throughout its lifetime across multiple
// open directory handles.
//
// Currently this means invalidating any readdir caches.
func (i *inodeOperations) markDirectoryDirty() {
	i.readdirMu.Lock()
	defer i.readdirMu.Unlock()
	i.readdirCache = nil
}