// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package gofer

import (
	"errors"
	"sync"
	"syscall"

	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
	"gvisor.googlesource.com/gvisor/pkg/fd"
	"gvisor.googlesource.com/gvisor/pkg/log"
	"gvisor.googlesource.com/gvisor/pkg/p9"
	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
	"gvisor.googlesource.com/gvisor/pkg/sentry/device"
	"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
	"gvisor.googlesource.com/gvisor/pkg/sentry/fs/fdpipe"
	"gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
	"gvisor.googlesource.com/gvisor/pkg/sentry/fs/host"
	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
	"gvisor.googlesource.com/gvisor/pkg/syserror"
)

// inodeOperations implements fs.InodeOperations.
//
// +stateify savable
type inodeOperations struct {
	fsutil.InodeNotVirtual           `state:"nosave"`
	fsutil.InodeNoExtendedAttributes `state:"nosave"`

	// fileState implements fs.CachedFileObject. It exists
	// to break a circular load dependency between inodeOperations
	// and cachingInodeOps (below).
	fileState *inodeFileState `state:"wait"`

	// cachingInodeOps implement memmap.Mappable for inodeOperations.
	cachingInodeOps *fsutil.CachingInodeOperations

	// readdirMu protects readdirCache and concurrent Readdirs.
	readdirMu sync.Mutex `state:"nosave"`

	// readdirCache is a cache of readdir results in the form of
	// a fs.SortedDentryMap.
	//
	// Starts out as nil, and is initialized under readdirMu lazily;
	// invalidating the cache means setting it to nil.
	readdirCache *fs.SortedDentryMap `state:"nosave"`
}

// inodeFileState implements fs.CachedFileObject and otherwise fully
// encapsulates state that needs to be manually loaded on restore for
// this file object.
//
// This unfortunate structure exists because fs.CachingInodeOperations
// defines afterLoad and therefore cannot be lazily loaded (to break a
// circular load dependency between it and inodeOperations). Even with
// lazy loading, this approach defines the dependencies between objects
// and the expected load behavior more concretely.
//
// +stateify savable
type inodeFileState struct {
	// s is common file system state for Gofers.
	s *session `state:"wait"`

	// MultiDeviceKey consists of:
	//
	// * Device:          file system device from a specific gofer.
	// * SecondaryDevice: unique identifier of the attach point.
	// * Inode:           the inode of this resource, unique per Device.=
	//
	// These fields combined enable consistent hashing of virtual inodes
	// on goferDevice.
	key device.MultiDeviceKey `state:"nosave"`

	// file is the p9 file that contains a single unopened fid.
	file contextFile `state:"nosave"`

	// sattr caches the stable attributes.
	sattr fs.StableAttr `state:"wait"`

	// handlesMu protects the below fields.
	handlesMu sync.RWMutex `state:"nosave"`

	// If readHandles is non-nil, it holds handles that are either read-only or
	// read/write. If writeHandles is non-nil, it holds write-only handles if
	// writeHandlesRW is false, and read/write handles if writeHandlesRW is
	// true.
	//
	// Once readHandles becomes non-nil, it can't be changed until
	// inodeFileState.Release(), because of a defect in the
	// fsutil.CachedFileObject interface: there's no way for the caller of
	// fsutil.CachedFileObject.FD() to keep the returned FD open, so if we
	// racily replace readHandles after inodeFileState.FD() has returned
	// readHandles.Host.FD(), fsutil.CachingInodeOperations may use a closed
	// FD. writeHandles can be changed if writeHandlesRW is false, since
	// inodeFileState.FD() can't return a write-only FD, but can't be changed
	// if writeHandlesRW is true for the same reason.
	readHandles    *handles `state:"nosave"`
	writeHandles   *handles `state:"nosave"`
	writeHandlesRW bool     `state:"nosave"`

	// loading is acquired when the inodeFileState begins an asynchronous
	// load. It releases when the load is complete. Callers that require all
	// state to be available should call waitForLoad() to ensure that.
	loading sync.Mutex `state:".(struct{})"`

	// savedUAttr is only allocated during S/R. It points to the save-time
	// unstable attributes and is used to validate restore-time ones.
	//
	// Note that these unstable attributes are only used to detect cross-S/R
	// external file system metadata changes. They may differ from the
	// cached unstable attributes in cachingInodeOps, as that might differ
	// from the external file system attributes if there had been WriteOut
	// failures. S/R is transparent to Sentry and the latter will continue
	// using its cached values after restore.
	savedUAttr *fs.UnstableAttr

	// hostMappable is created when using 'cacheRemoteRevalidating' to map pages
	// directly from host.
	hostMappable *fsutil.HostMappable
}

// Release releases file handles.
func (i *inodeFileState) Release(ctx context.Context) {
	i.file.close(ctx)
	if i.readHandles != nil {
		i.readHandles.DecRef()
	}
	if i.writeHandles != nil {
		i.writeHandles.DecRef()
	}
}

func (i *inodeFileState) canShareHandles() bool {
	// Only share handles for regular files, since for other file types,
	// distinct handles may have special semantics even if they represent the
	// same file. Disable handle sharing for cache policy cacheNone, since this
	// is legacy behavior.
	return fs.IsFile(i.sattr) && i.s.cachePolicy != cacheNone
}

// Preconditions: i.handlesMu must be locked for writing.
func (i *inodeFileState) setSharedHandlesLocked(flags fs.FileFlags, h *handles) {
	if flags.Read && i.readHandles == nil {
		h.IncRef()
		i.readHandles = h
	}
	if flags.Write {
		if i.writeHandles == nil {
			h.IncRef()
			i.writeHandles = h
			i.writeHandlesRW = flags.Read
		} else if !i.writeHandlesRW && flags.Read {
			// Upgrade i.writeHandles.
			i.writeHandles.DecRef()
			h.IncRef()
			i.writeHandles = h
			i.writeHandlesRW = flags.Read
		}
	}
}

// getHandles returns a set of handles for a new file using i opened with the
// given flags.
func (i *inodeFileState) getHandles(ctx context.Context, flags fs.FileFlags) (*handles, error) {
	if !i.canShareHandles() {
		return newHandles(ctx, i.file, flags)
	}
	i.handlesMu.Lock()
	defer i.handlesMu.Unlock()
	// Do we already have usable shared handles?
	if flags.Write {
		if i.writeHandles != nil && (i.writeHandlesRW || !flags.Read) {
			i.writeHandles.IncRef()
			return i.writeHandles, nil
		}
	} else if i.readHandles != nil {
		i.readHandles.IncRef()
		return i.readHandles, nil
	}
	// No; get new handles and cache them for future sharing.
	h, err := newHandles(ctx, i.file, flags)
	if err != nil {
		return nil, err
	}
	i.setSharedHandlesLocked(flags, h)
	return h, nil
}

// ReadToBlocksAt implements fsutil.CachedFileObject.ReadToBlocksAt.
func (i *inodeFileState) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) {
	i.handlesMu.RLock()
	defer i.handlesMu.RUnlock()
	return i.readHandles.readWriterAt(ctx, int64(offset)).ReadToBlocks(dsts)
}

// WriteFromBlocksAt implements fsutil.CachedFileObject.WriteFromBlocksAt.
func (i *inodeFileState) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) {
	i.handlesMu.RLock()
	defer i.handlesMu.RUnlock()
	return i.writeHandles.readWriterAt(ctx, int64(offset)).WriteFromBlocks(srcs)
}

// SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes.
func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error {
	if i.skipSetAttr(mask) {
		return nil
	}
	as, ans := attr.AccessTime.Unix()
	ms, mns := attr.ModificationTime.Unix()
	// An update of status change time is implied by mask.AccessTime
	// or mask.ModificationTime. Updating status change time to a
	// time earlier than the system time is not possible.
	return i.file.setAttr(
		ctx,
		p9.SetAttrMask{
			Permissions:        mask.Perms,
			Size:               mask.Size,
			UID:                mask.UID,
			GID:                mask.GID,
			ATime:              mask.AccessTime,
			ATimeNotSystemTime: true,
			MTime:              mask.ModificationTime,
			MTimeNotSystemTime: true,
		}, p9.SetAttr{
			Permissions:      p9.FileMode(attr.Perms.LinuxMode()),
			UID:              p9.UID(attr.Owner.UID),
			GID:              p9.GID(attr.Owner.GID),
			Size:             uint64(attr.Size),
			ATimeSeconds:     uint64(as),
			ATimeNanoSeconds: uint64(ans),
			MTimeSeconds:     uint64(ms),
			MTimeNanoSeconds: uint64(mns),
		})
}

// skipSetAttr checks if attribute change can be skipped. It can be skipped
// when:
//   - Mask is empty
//   - Mask contains only attributes that cannot be set in the gofer
//   - Mask contains only atime and/or mtime, and host FD exists
//
// Updates to atime and mtime can be skipped because cached value will be
// "close enough" to host value, given that operation went directly to host FD.
// Skipping atime updates is particularly important to reduce the number of
// operations sent to the Gofer for readonly files.
func (i *inodeFileState) skipSetAttr(mask fs.AttrMask) bool {
	// First remove attributes that cannot be updated.
	cpy := mask
	cpy.Type = false
	cpy.DeviceID = false
	cpy.InodeID = false
	cpy.BlockSize = false
	cpy.Usage = false
	cpy.Links = false
	if cpy.Empty() {
		return true
	}

	// Then check if more than just atime and mtime is being set.
	cpy.AccessTime = false
	cpy.ModificationTime = false
	if !cpy.Empty() {
		return false
	}

	i.handlesMu.RLock()
	defer i.handlesMu.RUnlock()
	return (i.readHandles != nil && i.readHandles.Host != nil) ||
		(i.writeHandles != nil && i.writeHandles.Host != nil)
}

// Sync implements fsutil.CachedFileObject.Sync.
func (i *inodeFileState) Sync(ctx context.Context) error {
	i.handlesMu.RLock()
	defer i.handlesMu.RUnlock()
	if i.writeHandles == nil {
		return nil
	}
	return i.writeHandles.File.fsync(ctx)
}

// FD implements fsutil.CachedFileObject.FD.
func (i *inodeFileState) FD() int {
	i.handlesMu.RLock()
	defer i.handlesMu.RUnlock()
	if i.writeHandlesRW && i.writeHandles != nil && i.writeHandles.Host != nil {
		return int(i.writeHandles.Host.FD())
	}
	if i.readHandles != nil && i.readHandles.Host != nil {
		return int(i.readHandles.Host.FD())
	}
	return -1
}

// waitForLoad makes sure any restore-issued loading is done.
func (i *inodeFileState) waitForLoad() {
	// This is not a no-op. The loading mutex is hold upon restore until
	// all loading actions are done.
	i.loading.Lock()
	i.loading.Unlock()
}

func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, error) {
	_, valid, pattr, err := getattr(ctx, i.file)
	if err != nil {
		return fs.UnstableAttr{}, err
	}
	return unstable(ctx, valid, pattr, i.s.mounter, i.s.client), nil
}

// session extracts the gofer's session from the MountSource.
func (i *inodeOperations) session() *session {
	return i.fileState.s
}

// Release implements fs.InodeOperations.Release.
func (i *inodeOperations) Release(ctx context.Context) {
	i.cachingInodeOps.Release()

	// Releasing the fileState may make RPCs to the gofer. There is
	// no need to wait for those to return, so we can do this
	// asynchronously.
	//
	// We use AsyncWithContext to avoid needing to allocate an extra
	// anonymous function on the heap.
	fs.AsyncWithContext(ctx, i.fileState.Release)
}

// Mappable implements fs.InodeOperations.Mappable.
func (i *inodeOperations) Mappable(inode *fs.Inode) memmap.Mappable {
	if i.session().cachePolicy.useCachingInodeOps(inode) {
		return i.cachingInodeOps
	}
	// This check is necessary because it's returning an interface type.
	if i.fileState.hostMappable != nil {
		return i.fileState.hostMappable
	}
	return nil
}

// UnstableAttr implements fs.InodeOperations.UnstableAttr.
func (i *inodeOperations) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
	if i.session().cachePolicy.cacheUAttrs(inode) {
		return i.cachingInodeOps.UnstableAttr(ctx, inode)
	}
	return i.fileState.unstableAttr(ctx)
}

// Check implements fs.InodeOperations.Check.
func (i *inodeOperations) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
	return fs.ContextCanAccessFile(ctx, inode, p)
}

// GetFile implements fs.InodeOperations.GetFile.
func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
	switch d.Inode.StableAttr.Type {
	case fs.Socket:
		return i.getFileSocket(ctx, d, flags)
	case fs.Pipe:
		return i.getFilePipe(ctx, d, flags)
	default:
		return i.getFileDefault(ctx, d, flags)
	}
}

func (i *inodeOperations) getFileSocket(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
	f, err := i.fileState.file.connect(ctx, p9.AnonymousSocket)
	if err != nil {
		return nil, syscall.EIO
	}
	fsf, err := host.NewSocketWithDirent(ctx, d, f, flags)
	if err != nil {
		f.Close()
		return nil, err
	}
	return fsf, nil
}

func (i *inodeOperations) getFilePipe(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
	// Try to open as a host pipe; if that doesn't work, handle it normally.
	pipeOps, err := fdpipe.Open(ctx, i, flags)
	if err == errNotHostFile {
		return i.getFileDefault(ctx, d, flags)
	}
	if err != nil {
		return nil, err
	}
	return fs.NewFile(ctx, d, flags, pipeOps), nil
}

// errNotHostFile indicates that the file is not a host file.
var errNotHostFile = errors.New("not a host file")

// NonBlockingOpen implements fdpipe.NonBlockingOpener for opening host named pipes.
func (i *inodeOperations) NonBlockingOpen(ctx context.Context, p fs.PermMask) (*fd.FD, error) {
	i.fileState.waitForLoad()

	// Get a cloned fid which we will open.
	_, newFile, err := i.fileState.file.walk(ctx, nil)
	if err != nil {
		log.Warningf("Open Walk failed: %v", err)
		return nil, err
	}
	defer newFile.close(ctx)

	flags, err := openFlagsFromPerms(p)
	if err != nil {
		log.Warningf("Open flags %s parsing failed: %v", p, err)
		return nil, err
	}
	hostFile, _, _, err := newFile.open(ctx, flags)
	// If the host file returned is nil and the error is nil,
	// then this was never a host file to begin with, and should
	// be treated like a remote file.
	if hostFile == nil && err == nil {
		return nil, errNotHostFile
	}
	return hostFile, err
}

func (i *inodeOperations) getFileDefault(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
	h, err := i.fileState.getHandles(ctx, flags)
	if err != nil {
		return nil, err
	}
	return NewFile(ctx, d, d.BaseName(), flags, i, h), nil
}

// SetPermissions implements fs.InodeOperations.SetPermissions.
func (i *inodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, p fs.FilePermissions) bool {
	if i.session().cachePolicy.cacheUAttrs(inode) {
		return i.cachingInodeOps.SetPermissions(ctx, inode, p)
	}

	mask := p9.SetAttrMask{Permissions: true}
	pattr := p9.SetAttr{Permissions: p9.FileMode(p.LinuxMode())}
	// Execute the chmod.
	return i.fileState.file.setAttr(ctx, mask, pattr) == nil
}

// SetOwner implements fs.InodeOperations.SetOwner.
func (i *inodeOperations) SetOwner(ctx context.Context, inode *fs.Inode, owner fs.FileOwner) error {
	// Save the roundtrip.
	if !owner.UID.Ok() && !owner.GID.Ok() {
		return nil
	}

	if i.session().cachePolicy.cacheUAttrs(inode) {
		return i.cachingInodeOps.SetOwner(ctx, inode, owner)
	}

	var mask p9.SetAttrMask
	var attr p9.SetAttr
	if owner.UID.Ok() {
		mask.UID = true
		attr.UID = p9.UID(owner.UID)
	}
	if owner.GID.Ok() {
		mask.GID = true
		attr.GID = p9.GID(owner.GID)
	}
	return i.fileState.file.setAttr(ctx, mask, attr)
}

// SetTimestamps implements fs.InodeOperations.SetTimestamps.
func (i *inodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts fs.TimeSpec) error {
	if i.session().cachePolicy.cacheUAttrs(inode) {
		return i.cachingInodeOps.SetTimestamps(ctx, inode, ts)
	}

	return utimes(ctx, i.fileState.file, ts)
}

// Truncate implements fs.InodeOperations.Truncate.
func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length int64) error {
	// This can only be called for files anyway.
	if i.session().cachePolicy.useCachingInodeOps(inode) {
		return i.cachingInodeOps.Truncate(ctx, inode, length)
	}
	if i.session().cachePolicy == cacheRemoteRevalidating {
		return i.fileState.hostMappable.Truncate(ctx, length)
	}

	return i.fileState.file.setAttr(ctx, p9.SetAttrMask{Size: true}, p9.SetAttr{Size: uint64(length)})
}

// WriteOut implements fs.InodeOperations.WriteOut.
func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
	if !i.session().cachePolicy.cacheUAttrs(inode) {
		return nil
	}

	return i.cachingInodeOps.WriteOut(ctx, inode)
}

// Readlink implements fs.InodeOperations.Readlink.
func (i *inodeOperations) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
	if !fs.IsSymlink(inode.StableAttr) {
		return "", syscall.ENOLINK
	}
	return i.fileState.file.readlink(ctx)
}

// Getlink implementfs fs.InodeOperations.Getlink.
func (i *inodeOperations) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) {
	if !fs.IsSymlink(i.fileState.sattr) {
		return nil, syserror.ENOLINK
	}
	return nil, fs.ErrResolveViaReadlink
}

// StatFS makes a StatFS request.
func (i *inodeOperations) StatFS(ctx context.Context) (fs.Info, error) {
	fsstat, err := i.fileState.file.statFS(ctx)
	if err != nil {
		return fs.Info{}, err
	}

	info := fs.Info{
		// This is primarily for distinguishing a gofer file system in
		// tests. Testing is important, so instead of defining
		// something completely random, use a standard value.
		Type:        linux.V9FS_MAGIC,
		TotalBlocks: fsstat.Blocks,
		FreeBlocks:  fsstat.BlocksFree,
		TotalFiles:  fsstat.Files,
		FreeFiles:   fsstat.FilesFree,
	}

	// If blocks available is non-zero, prefer that.
	if fsstat.BlocksAvailable != 0 {
		info.FreeBlocks = fsstat.BlocksAvailable
	}

	return info, nil
}

func (i *inodeOperations) configureMMap(file *fs.File, opts *memmap.MMapOpts) error {
	if i.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) {
		return fsutil.GenericConfigureMMap(file, i.cachingInodeOps, opts)
	}
	if i.fileState.hostMappable != nil {
		return fsutil.GenericConfigureMMap(file, i.fileState.hostMappable, opts)
	}
	return syserror.ENODEV
}

func init() {
	syserror.AddErrorUnwrapper(func(err error) (syscall.Errno, bool) {
		if _, ok := err.(p9.ErrSocket); ok {
			// Treat as an I/O error.
			return syscall.EIO, true
		}
		return 0, false
	})
}

// AddLink implements InodeOperations.AddLink, but is currently a noop.
// FIXME: Remove this from InodeOperations altogether.
func (*inodeOperations) AddLink() {}

// DropLink implements InodeOperations.DropLink, but is currently a noop.
// FIXME: Remove this from InodeOperations altogether.
func (*inodeOperations) DropLink() {}

// NotifyStatusChange implements fs.InodeOperations.NotifyStatusChange.
// FIXME: Remove this from InodeOperations altogether.
func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {}