// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package gofer

import (
	"fmt"
	"syscall"

	"gvisor.googlesource.com/gvisor/pkg/log"
	"gvisor.googlesource.com/gvisor/pkg/metric"
	"gvisor.googlesource.com/gvisor/pkg/p9"
	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
	"gvisor.googlesource.com/gvisor/pkg/sentry/device"
	"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
	"gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil"
	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
	"gvisor.googlesource.com/gvisor/pkg/syserror"
	"gvisor.googlesource.com/gvisor/pkg/waiter"
)

var openedWX = metric.MustCreateNewUint64Metric("/gofer/opened_write_execute_file", true /* sync */, "Number of times a writable+executable file was opened from a gofer.")

// fileOperations implements fs.FileOperations for a remote file system.
//
// +stateify savable
type fileOperations struct {
	fsutil.NoIoctl     `state:"nosave"`
	waiter.AlwaysReady `state:"nosave"`

	// inodeOperations is the inodeOperations backing the file. It is protected
	// by a reference held by File.Dirent.Inode which is stable until
	// FileOperations.Release is called.
	inodeOperations *inodeOperations `state:"wait"`

	// dirCursor is the directory cursor.
	dirCursor string

	// handles are the opened remote file system handles, which may
	// be shared with other files.
	handles *handles `state:"nosave"`

	// flags are the flags used to open handles.
	flags fs.FileFlags `state:"wait"`
}

// fileOperations implements fs.FileOperations.
var _ fs.FileOperations = (*fileOperations)(nil)

// NewFile returns a file. NewFile is not appropriate with host pipes and sockets.
//
// The `name` argument is only used to log a warning if we are returning a
// writeable+executable file. (A metric counter is incremented in this case as
// well.) Note that we cannot call d.BaseName() directly in this function,
// because that would lead to a lock order violation, since this is called in
// d.Create which holds d.mu, while d.BaseName() takes d.parent.mu, and the two
// locks must be taken in the opposite order.
func NewFile(ctx context.Context, dirent *fs.Dirent, name string, flags fs.FileFlags, i *inodeOperations, handles *handles) *fs.File {
	// Remote file systems enforce readability/writability at an offset,
	// see fs/9p/vfs_inode.c:v9fs_vfs_atomic_open -> fs/open.c:finish_open.
	flags.Pread = true
	flags.Pwrite = true

	if fs.IsFile(dirent.Inode.StableAttr) {
		// If cache policy is "remote revalidating", then we must
		// ensure that we have a host FD. Otherwise, the
		// sentry-internal page cache will be used, and we can end up
		// in an inconsistent state if the remote file changes.
		cp := dirent.Inode.InodeOperations.(*inodeOperations).session().cachePolicy
		if cp == cacheRemoteRevalidating && handles.Host == nil {
			panic(fmt.Sprintf("remote-revalidating cache policy requires gofer to donate host FD, but file %q did not have host FD", name))
		}
	}

	f := &fileOperations{
		inodeOperations: i,
		handles:         handles,
		flags:           flags,
	}
	if flags.Write {
		if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Execute: true}); err == nil {
			openedWX.Increment()
			log.Warningf("Opened a writable executable: %q", name)
		}
	}
	return fs.NewFile(ctx, dirent, flags, f)
}

// Release implements fs.FileOpeations.Release.
func (f *fileOperations) Release() {
	f.handles.DecRef()
}

// Readdir implements fs.FileOperations.Readdir.
func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
	root := fs.RootFromContext(ctx)
	defer root.DecRef()

	dirCtx := &fs.DirCtx{
		Serializer: serializer,
		DirCursor:  &f.dirCursor,
	}
	n, err := fs.DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
	if f.inodeOperations.session().cachePolicy.cacheUAttrs(file.Dirent.Inode) {
		f.inodeOperations.cachingInodeOps.TouchAccessTime(ctx, file.Dirent.Inode)
	}
	return n, err
}

// IterateDir implements fs.DirIterator.IterateDir.
func (f *fileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
	f.inodeOperations.readdirMu.Lock()
	defer f.inodeOperations.readdirMu.Unlock()

	// Fetch directory entries if needed.
	if !f.inodeOperations.session().cachePolicy.cacheReaddir() || f.inodeOperations.readdirCache == nil {
		entries, err := f.readdirAll(ctx)
		if err != nil {
			return offset, err
		}

		// Cache the readdir result.
		f.inodeOperations.readdirCache = fs.NewSortedDentryMap(entries)
	}

	// Serialize the entries.
	n, err := fs.GenericReaddir(dirCtx, f.inodeOperations.readdirCache)
	return offset + n, err
}

// readdirAll fetches fs.DentAttrs for f, using the attributes of g.
func (f *fileOperations) readdirAll(ctx context.Context) (map[string]fs.DentAttr, error) {
	entries := make(map[string]fs.DentAttr)
	var readOffset uint64
	for {
		// We choose some arbitrary high number of directory entries (64k) and call
		// Readdir until we've exhausted them all.
		dirents, err := f.handles.File.readdir(ctx, readOffset, 64*1024)
		if err != nil {
			return nil, err
		}
		if len(dirents) == 0 {
			// We're done, we reached EOF.
			break
		}

		// The last dirent contains the offset into the next set of dirents.  The gofer
		// returns the offset as an index into directories, not as a byte offset, because
		// converting a byte offset to an index into directories entries is a huge pain.
		// But everything is fine if we're consistent.
		readOffset = dirents[len(dirents)-1].Offset

		for _, dirent := range dirents {
			if dirent.Name == "." || dirent.Name == ".." {
				// These must not be included in Readdir results.
				continue
			}

			// Find a best approximation of the type.
			var nt fs.InodeType
			switch dirent.Type {
			case p9.TypeDir:
				nt = fs.Directory
			case p9.TypeSymlink:
				nt = fs.Symlink
			default:
				nt = fs.RegularFile
			}

			// Install the DentAttr.
			entries[dirent.Name] = fs.DentAttr{
				Type: nt,
				// Construct the key to find the virtual inode.
				// Directory entries reside on the same Device
				// and SecondaryDevice as their parent.
				InodeID: goferDevice.Map(device.MultiDeviceKey{
					Device:          f.inodeOperations.fileState.key.Device,
					SecondaryDevice: f.inodeOperations.fileState.key.SecondaryDevice,
					Inode:           dirent.QID.Path,
				}),
			}
		}
	}

	return entries, nil
}

// Write implements fs.FileOperations.Write.
func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
	if fs.IsDir(file.Dirent.Inode.StableAttr) {
		// Not all remote file systems enforce this so this client does.
		return 0, syserror.EISDIR
	}
	cp := f.inodeOperations.session().cachePolicy
	if cp.usePageCache(file.Dirent.Inode) {
		n, err := f.inodeOperations.cachingInodeOps.Write(ctx, src, offset)
		if err != nil {
			return n, err
		}
		if cp.writeThrough(file.Dirent.Inode) {
			// Write out the file.
			err = f.inodeOperations.cachingInodeOps.WriteOut(ctx, file.Dirent.Inode)
		}
		return n, err
	}
	return src.CopyInTo(ctx, f.handles.readWriterAt(ctx, offset))
}

// Read implements fs.FileOperations.Read.
func (f *fileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
	if fs.IsDir(file.Dirent.Inode.StableAttr) {
		// Not all remote file systems enforce this so this client does.
		return 0, syserror.EISDIR
	}

	if f.inodeOperations.session().cachePolicy.usePageCache(file.Dirent.Inode) {
		return f.inodeOperations.cachingInodeOps.Read(ctx, file, dst, offset)
	}
	return dst.CopyOutFrom(ctx, f.handles.readWriterAt(ctx, offset))
}

// Fsync implements fs.FileOperations.Fsync.
func (f *fileOperations) Fsync(ctx context.Context, file *fs.File, start int64, end int64, syncType fs.SyncType) error {
	switch syncType {
	case fs.SyncAll, fs.SyncData:
		if err := file.Dirent.Inode.WriteOut(ctx); err != nil {
			return err
		}
		fallthrough
	case fs.SyncBackingStorage:
		// Sync remote caches.
		if f.handles.Host != nil {
			// Sync the host fd directly.
			return syscall.Fsync(f.handles.Host.FD())
		}
		// Otherwise sync on the p9.File handle.
		return f.handles.File.fsync(ctx)
	}
	panic("invalid sync type")
}

// Flush implements fs.FileOperations.Flush.
func (f *fileOperations) Flush(ctx context.Context, file *fs.File) error {
	// If this file is not opened writable then there is nothing to flush.
	// We do this because some p9 server implementations of Flush are
	// over-zealous.
	//
	// FIXME: weaken these implementations and remove this check.
	if !file.Flags().Write {
		return nil
	}
	// Execute the flush.
	return f.handles.File.flush(ctx)
}

// ConfigureMMap implements fs.FileOperations.ConfigureMMap.
func (f *fileOperations) ConfigureMMap(ctx context.Context, file *fs.File, opts *memmap.MMapOpts) error {
	if !f.inodeOperations.session().cachePolicy.usePageCache(file.Dirent.Inode) {
		return syserror.ENODEV
	}
	return fsutil.GenericConfigureMMap(file, f.inodeOperations.cachingInodeOps, opts)
}

// Seek implements fs.FileOperations.Seek.
func (f *fileOperations) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
	return fsutil.SeekWithDirCursor(ctx, file, whence, offset, &f.dirCursor)
}