// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package gofer

import (
	"fmt"

	"gvisor.dev/gvisor/pkg/p9"
	"gvisor.dev/gvisor/pkg/refs"
	"gvisor.dev/gvisor/pkg/sentry/context"
	"gvisor.dev/gvisor/pkg/sentry/device"
	"gvisor.dev/gvisor/pkg/sentry/fs"
	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
	"gvisor.dev/gvisor/pkg/sync"
	"gvisor.dev/gvisor/pkg/unet"
)

// DefaultDirentCacheSize is the default dirent cache size for 9P mounts. It can
// be adjusted independently from the other dirent caches.
var DefaultDirentCacheSize uint64 = fs.DefaultDirentCacheSize

// +stateify savable
type endpointMaps struct {
	// mu protexts the direntMap, the keyMap, and the pathMap below.
	mu sync.RWMutex `state:"nosave"`

	// direntMap links sockets to their dirents.
	// It is filled concurrently with the keyMap and is stored upon save.
	// Before saving, this map is used to populate the pathMap.
	direntMap map[transport.BoundEndpoint]*fs.Dirent

	// keyMap links MultiDeviceKeys (containing inode IDs) to their sockets.
	// It is not stored during save because the inode ID may change upon restore.
	keyMap map[device.MultiDeviceKey]transport.BoundEndpoint `state:"nosave"`

	// pathMap links the sockets to their paths.
	// It is filled before saving from the direntMap and is stored upon save.
	// Upon restore, this map is used to re-populate the keyMap.
	pathMap map[transport.BoundEndpoint]string
}

// add adds the endpoint to the maps.
// A reference is taken on the dirent argument.
//
// Precondition: maps must have been locked with 'lock'.
func (e *endpointMaps) add(key device.MultiDeviceKey, d *fs.Dirent, ep transport.BoundEndpoint) {
	e.keyMap[key] = ep
	d.IncRef()
	e.direntMap[ep] = d
}

// remove deletes the key from the maps.
//
// Precondition: maps must have been locked with 'lock'.
func (e *endpointMaps) remove(key device.MultiDeviceKey) {
	endpoint := e.get(key)
	delete(e.keyMap, key)

	d := e.direntMap[endpoint]
	d.DecRef()
	delete(e.direntMap, endpoint)
}

// lock blocks other addition and removal operations from happening while
// the backing file is being created or deleted. Returns a function that unlocks
// the endpoint map.
func (e *endpointMaps) lock() func() {
	e.mu.Lock()
	return func() { e.mu.Unlock() }
}

// get returns the endpoint mapped to the given key.
//
// Precondition: maps must have been locked for reading.
func (e *endpointMaps) get(key device.MultiDeviceKey) transport.BoundEndpoint {
	return e.keyMap[key]
}

// session holds state for each 9p session established during sys_mount.
//
// +stateify savable
type session struct {
	refs.AtomicRefCount

	// msize is the value of the msize mount option, see fs/gofer/fs.go.
	msize uint32 `state:"wait"`

	// version is the value of the version mount option, see fs/gofer/fs.go.
	version string `state:"wait"`

	// cachePolicy is the cache policy.
	cachePolicy cachePolicy `state:"wait"`

	// aname is the value of the aname mount option, see fs/gofer/fs.go.
	aname string `state:"wait"`

	// The client associated with this session. This will be initialized lazily.
	client *p9.Client `state:"nosave"`

	// The p9.File pointing to attachName via the client. This will be initialized
	// lazily.
	attach contextFile `state:"nosave"`

	// Flags provided to the mount.
	superBlockFlags fs.MountSourceFlags `state:"wait"`

	// limitHostFDTranslation is the value used for
	// CachingInodeOperationsOptions.LimitHostFDTranslation for all
	// CachingInodeOperations created by the session.
	limitHostFDTranslation bool

	// overlayfsStaleRead when set causes the readonly handle to be invalidated
	// after file is open for write.
	overlayfsStaleRead bool

	// connID is a unique identifier for the session connection.
	connID string `state:"wait"`

	// inodeMappings contains mappings of fs.Inodes associated with this session
	// to paths relative to the attach point, where inodeMappings is keyed by
	// Inode.StableAttr.InodeID.
	inodeMappings map[uint64]string `state:"wait"`

	// mounter is the EUID/EGID that mounted this file system.
	mounter fs.FileOwner `state:"wait"`

	// endpoints is used to map inodes that represent socket files to their
	// corresponding endpoint. Socket files are created as regular files in the
	// gofer and their presence in this map indicate that they should indeed be
	// socket files. This allows unix domain sockets to be used with paths that
	// belong to a gofer.
	//
	// TODO(gvisor.dev/issue/1200): there are few possible races with someone
	// stat'ing the file and another deleting it concurrently, where the file
	// will not be reported as socket file.
	endpoints *endpointMaps `state:"wait"`
}

// Destroy tears down the session.
func (s *session) Destroy() {
	s.client.Close()
}

// Revalidate implements MountSourceOperations.Revalidate.
func (s *session) Revalidate(ctx context.Context, name string, parent, child *fs.Inode) bool {
	return s.cachePolicy.revalidate(ctx, name, parent, child)
}

// Keep implements MountSourceOperations.Keep.
func (s *session) Keep(d *fs.Dirent) bool {
	return s.cachePolicy.keep(d)
}

// CacheReaddir implements MountSourceOperations.CacheReaddir.
func (s *session) CacheReaddir() bool {
	return s.cachePolicy.cacheReaddir()
}

// ResetInodeMappings implements fs.MountSourceOperations.ResetInodeMappings.
func (s *session) ResetInodeMappings() {
	s.inodeMappings = make(map[uint64]string)
}

// SaveInodeMapping implements fs.MountSourceOperations.SaveInodeMapping.
func (s *session) SaveInodeMapping(inode *fs.Inode, path string) {
	// This is very unintuitive. We *CANNOT* trust the inode's StableAttrs,
	// because overlay copyUp may have changed them out from under us.
	// So much for "immutable".
	sattr := inode.InodeOperations.(*inodeOperations).fileState.sattr
	s.inodeMappings[sattr.InodeID] = path
}

// newInodeOperations creates a new 9p fs.InodeOperations backed by a p9.File and attributes
// (p9.QID, p9.AttrMask, p9.Attr).
//
// Endpoints lock must not be held if socket == false.
func newInodeOperations(ctx context.Context, s *session, file contextFile, qid p9.QID, valid p9.AttrMask, attr p9.Attr, socket bool) (fs.StableAttr, *inodeOperations) {
	deviceKey := device.MultiDeviceKey{
		Device:          attr.RDev,
		SecondaryDevice: s.connID,
		Inode:           qid.Path,
	}

	sattr := fs.StableAttr{
		Type:      ntype(attr),
		DeviceID:  goferDevice.DeviceID(),
		InodeID:   goferDevice.Map(deviceKey),
		BlockSize: bsize(attr),
	}

	if s.endpoints != nil {
		if socket {
			sattr.Type = fs.Socket
		} else {
			// If unix sockets are allowed on this filesystem, check if this file is
			// supposed to be a socket file.
			unlock := s.endpoints.lock()
			if s.endpoints.get(deviceKey) != nil {
				sattr.Type = fs.Socket
			}
			unlock()
		}
	}

	fileState := &inodeFileState{
		s:     s,
		file:  file,
		sattr: sattr,
		key:   deviceKey,
	}
	if s.cachePolicy == cacheRemoteRevalidating && fs.IsFile(sattr) {
		fileState.hostMappable = fsutil.NewHostMappable(fileState)
	}

	uattr := unstable(ctx, valid, attr, s.mounter, s.client)
	return sattr, &inodeOperations{
		fileState: fileState,
		cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{
			ForcePageCache:         s.superBlockFlags.ForcePageCache,
			LimitHostFDTranslation: s.limitHostFDTranslation,
		}),
	}
}

// Root returns the root of a 9p mount. This mount is bound to a 9p server
// based on conn. Otherwise configuration parameters are:
//
// * dev:         connection id
// * filesystem:  the filesystem backing the mount
// * superBlockFlags:  the mount flags describing general mount options
// * opts:        parsed 9p mount options
func Root(ctx context.Context, dev string, filesystem fs.Filesystem, superBlockFlags fs.MountSourceFlags, o opts) (*fs.Inode, error) {
	// The mounting EUID/EGID will be cached by this file system. This will
	// be used to assign ownership to files that the Gofer owns.
	mounter := fs.FileOwnerFromContext(ctx)

	conn, err := unet.NewSocket(o.fd)
	if err != nil {
		return nil, err
	}

	// Construct the session.
	s := session{
		connID:                 dev,
		msize:                  o.msize,
		version:                o.version,
		cachePolicy:            o.policy,
		aname:                  o.aname,
		superBlockFlags:        superBlockFlags,
		limitHostFDTranslation: o.limitHostFDTranslation,
		overlayfsStaleRead:     o.overlayfsStaleRead,
		mounter:                mounter,
	}
	s.EnableLeakCheck("gofer.session")

	if o.privateunixsocket {
		s.endpoints = newEndpointMaps()
	}

	// Construct the MountSource with the session and superBlockFlags.
	m := fs.NewMountSource(ctx, &s, filesystem, superBlockFlags)

	// Given that gofer files can consume host FDs, restrict the number
	// of files that can be held by the cache.
	m.SetDirentCacheMaxSize(DefaultDirentCacheSize)
	m.SetDirentCacheLimiter(fs.DirentCacheLimiterFromContext(ctx))

	// Send the Tversion request.
	s.client, err = p9.NewClient(conn, s.msize, s.version)
	if err != nil {
		// Drop our reference on the session, it needs to be torn down.
		s.DecRef()
		return nil, err
	}

	// Notify that we're about to call the Gofer and block.
	ctx.UninterruptibleSleepStart(false)
	// Send the Tattach request.
	s.attach.file, err = s.client.Attach(s.aname)
	ctx.UninterruptibleSleepFinish(false)
	if err != nil {
		// Same as above.
		s.DecRef()
		return nil, err
	}

	qid, valid, attr, err := s.attach.getAttr(ctx, p9.AttrMaskAll())
	if err != nil {
		s.attach.close(ctx)
		// Same as above, but after we execute the Close request.
		s.DecRef()
		return nil, err
	}

	sattr, iops := newInodeOperations(ctx, &s, s.attach, qid, valid, attr, false)
	return fs.NewInode(ctx, iops, m, sattr), nil
}

// newEndpointMaps creates a new endpointMaps.
func newEndpointMaps() *endpointMaps {
	return &endpointMaps{
		direntMap: make(map[transport.BoundEndpoint]*fs.Dirent),
		keyMap:    make(map[device.MultiDeviceKey]transport.BoundEndpoint),
		pathMap:   make(map[transport.BoundEndpoint]string),
	}
}

// fillKeyMap populates key and dirent maps upon restore from saved
// pathmap.
func (s *session) fillKeyMap(ctx context.Context) error {
	unlock := s.endpoints.lock()
	defer unlock()

	for ep, dirPath := range s.endpoints.pathMap {
		_, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath))
		if err != nil {
			return fmt.Errorf("error filling endpointmaps, failed to walk to %q: %v", dirPath, err)
		}

		qid, _, attr, err := file.getAttr(ctx, p9.AttrMaskAll())
		if err != nil {
			return fmt.Errorf("failed to get file attributes of %s: %v", dirPath, err)
		}

		key := device.MultiDeviceKey{
			Device:          attr.RDev,
			SecondaryDevice: s.connID,
			Inode:           qid.Path,
		}

		s.endpoints.keyMap[key] = ep
	}
	return nil
}

// fillPathMap populates paths for endpoints from dirents in direntMap
// before save.
func (s *session) fillPathMap() error {
	unlock := s.endpoints.lock()
	defer unlock()

	for ep, dir := range s.endpoints.direntMap {
		mountRoot := dir.MountRoot()
		defer mountRoot.DecRef()
		dirPath, _ := dir.FullName(mountRoot)
		if dirPath == "" {
			return fmt.Errorf("error getting path from dirent")
		}
		s.endpoints.pathMap[ep] = dirPath
	}
	return nil
}

// restoreEndpointMaps recreates and fills the key and dirent maps.
func (s *session) restoreEndpointMaps(ctx context.Context) error {
	// When restoring, only need to create the keyMap because the dirent and path
	// maps got stored through the save.
	s.endpoints.keyMap = make(map[device.MultiDeviceKey]transport.BoundEndpoint)
	if err := s.fillKeyMap(ctx); err != nil {
		return fmt.Errorf("failed to insert sockets into endpoint map: %v", err)
	}

	// Re-create pathMap because it can no longer be trusted as socket paths can
	// change while process continues to run. Empty pathMap will be re-filled upon
	// next save.
	s.endpoints.pathMap = make(map[transport.BoundEndpoint]string)
	return nil
}