// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package vfs2

import (
	"gvisor.dev/gvisor/pkg/abi/linux"
	"gvisor.dev/gvisor/pkg/sentry/arch"
	"gvisor.dev/gvisor/pkg/sentry/kernel"
	slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
	"gvisor.dev/gvisor/pkg/sentry/vfs"
	"gvisor.dev/gvisor/pkg/syserror"
	"gvisor.dev/gvisor/pkg/usermem"
	"gvisor.dev/gvisor/pkg/waiter"
)

const (
	eventMaskRead  = waiter.EventIn | waiter.EventHUp | waiter.EventErr
	eventMaskWrite = waiter.EventOut | waiter.EventHUp | waiter.EventErr
)

// Read implements Linux syscall read(2).
func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	fd := args[0].Int()
	addr := args[1].Pointer()
	size := args[2].SizeT()

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Check that the size is legitimate.
	si := int(size)
	if si < 0 {
		return 0, nil, syserror.EINVAL
	}

	// Get the destination of the read.
	dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	n, err := read(t, file, dst, vfs.ReadOptions{})
	t.IOUsage().AccountReadSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "read", file)
}

// Readv implements Linux syscall readv(2).
func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	fd := args[0].Int()
	addr := args[1].Pointer()
	iovcnt := int(args[2].Int())

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Get the destination of the read.
	dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	n, err := read(t, file, dst, vfs.ReadOptions{})
	t.IOUsage().AccountReadSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "readv", file)
}

func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
	n, err := file.Read(t, dst, opts)
	if err != syserror.ErrWouldBlock || file.StatusFlags()&linux.O_NONBLOCK != 0 {
		return n, err
	}

	// Register for notifications.
	w, ch := waiter.NewChannelEntry(nil)
	file.EventRegister(&w, eventMaskRead)

	total := n
	for {
		// Shorten dst to reflect bytes previously read.
		dst = dst.DropFirst(int(n))

		// Issue the request and break out if it completes with anything other than
		// "would block".
		n, err := file.Read(t, dst, opts)
		total += n
		if err != syserror.ErrWouldBlock {
			break
		}
		if err := t.Block(ch); err != nil {
			break
		}
	}
	file.EventUnregister(&w)

	return total, err
}

// Pread64 implements Linux syscall pread64(2).
func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	fd := args[0].Int()
	addr := args[1].Pointer()
	size := args[2].SizeT()
	offset := args[3].Int64()

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Check that the offset is legitimate.
	if offset < 0 {
		return 0, nil, syserror.EINVAL
	}

	// Check that the size is legitimate.
	si := int(size)
	if si < 0 {
		return 0, nil, syserror.EINVAL
	}

	// Get the destination of the read.
	dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	n, err := pread(t, file, dst, offset, vfs.ReadOptions{})
	t.IOUsage().AccountReadSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "pread64", file)
}

// Preadv implements Linux syscall preadv(2).
func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	fd := args[0].Int()
	addr := args[1].Pointer()
	iovcnt := int(args[2].Int())
	offset := args[3].Int64()

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Check that the offset is legitimate.
	if offset < 0 {
		return 0, nil, syserror.EINVAL
	}

	// Get the destination of the read.
	dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	n, err := pread(t, file, dst, offset, vfs.ReadOptions{})
	t.IOUsage().AccountReadSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "preadv", file)
}

// Preadv2 implements Linux syscall preadv2(2).
func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	// While the glibc signature is
	// preadv2(int fd, struct iovec* iov, int iov_cnt, off_t offset, int flags)
	// the actual syscall
	// (https://elixir.bootlin.com/linux/v5.5/source/fs/read_write.c#L1142)
	// splits the offset argument into a high/low value for compatibility with
	// 32-bit architectures. The flags argument is the 6th argument (index 5).
	fd := args[0].Int()
	addr := args[1].Pointer()
	iovcnt := int(args[2].Int())
	offset := args[3].Int64()
	flags := args[5].Int()

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Check that the offset is legitimate.
	if offset < -1 {
		return 0, nil, syserror.EINVAL
	}

	// Get the destination of the read.
	dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	opts := vfs.ReadOptions{
		Flags: uint32(flags),
	}
	var n int64
	if offset == -1 {
		n, err = read(t, file, dst, opts)
	} else {
		n, err = pread(t, file, dst, offset, opts)
	}
	t.IOUsage().AccountReadSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "preadv2", file)
}

func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
	n, err := file.PRead(t, dst, offset, opts)
	if err != syserror.ErrWouldBlock || file.StatusFlags()&linux.O_NONBLOCK != 0 {
		return n, err
	}

	// Register for notifications.
	w, ch := waiter.NewChannelEntry(nil)
	file.EventRegister(&w, eventMaskRead)

	total := n
	for {
		// Shorten dst to reflect bytes previously read.
		dst = dst.DropFirst(int(n))

		// Issue the request and break out if it completes with anything other than
		// "would block".
		n, err := file.PRead(t, dst, offset+total, opts)
		total += n
		if err != syserror.ErrWouldBlock {
			break
		}
		if err := t.Block(ch); err != nil {
			break
		}
	}
	file.EventUnregister(&w)

	return total, err
}

// Write implements Linux syscall write(2).
func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	fd := args[0].Int()
	addr := args[1].Pointer()
	size := args[2].SizeT()

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Check that the size is legitimate.
	si := int(size)
	if si < 0 {
		return 0, nil, syserror.EINVAL
	}

	// Get the source of the write.
	src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	n, err := write(t, file, src, vfs.WriteOptions{})
	t.IOUsage().AccountWriteSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "write", file)
}

// Writev implements Linux syscall writev(2).
func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	fd := args[0].Int()
	addr := args[1].Pointer()
	iovcnt := int(args[2].Int())

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Get the source of the write.
	src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	n, err := write(t, file, src, vfs.WriteOptions{})
	t.IOUsage().AccountWriteSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "writev", file)
}

func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
	n, err := file.Write(t, src, opts)
	if err != syserror.ErrWouldBlock || file.StatusFlags()&linux.O_NONBLOCK != 0 {
		return n, err
	}

	// Register for notifications.
	w, ch := waiter.NewChannelEntry(nil)
	file.EventRegister(&w, eventMaskWrite)

	total := n
	for {
		// Shorten src to reflect bytes previously written.
		src = src.DropFirst(int(n))

		// Issue the request and break out if it completes with anything other than
		// "would block".
		n, err := file.Write(t, src, opts)
		total += n
		if err != syserror.ErrWouldBlock {
			break
		}
		if err := t.Block(ch); err != nil {
			break
		}
	}
	file.EventUnregister(&w)

	return total, err
}

// Pwrite64 implements Linux syscall pwrite64(2).
func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	fd := args[0].Int()
	addr := args[1].Pointer()
	size := args[2].SizeT()
	offset := args[3].Int64()

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Check that the offset is legitimate.
	if offset < 0 {
		return 0, nil, syserror.EINVAL
	}

	// Check that the size is legitimate.
	si := int(size)
	if si < 0 {
		return 0, nil, syserror.EINVAL
	}

	// Get the source of the write.
	src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	n, err := pwrite(t, file, src, offset, vfs.WriteOptions{})
	t.IOUsage().AccountWriteSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "pwrite64", file)
}

// Pwritev implements Linux syscall pwritev(2).
func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	fd := args[0].Int()
	addr := args[1].Pointer()
	iovcnt := int(args[2].Int())
	offset := args[3].Int64()

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Check that the offset is legitimate.
	if offset < 0 {
		return 0, nil, syserror.EINVAL
	}

	// Get the source of the write.
	src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	n, err := pwrite(t, file, src, offset, vfs.WriteOptions{})
	t.IOUsage().AccountReadSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "pwritev", file)
}

// Pwritev2 implements Linux syscall pwritev2(2).
func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	// While the glibc signature is
	// pwritev2(int fd, struct iovec* iov, int iov_cnt, off_t offset, int flags)
	// the actual syscall
	// (https://elixir.bootlin.com/linux/v5.5/source/fs/read_write.c#L1162)
	// splits the offset argument into a high/low value for compatibility with
	// 32-bit architectures. The flags argument is the 6th argument (index 5).
	fd := args[0].Int()
	addr := args[1].Pointer()
	iovcnt := int(args[2].Int())
	offset := args[3].Int64()
	flags := args[5].Int()

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	// Check that the offset is legitimate.
	if offset < -1 {
		return 0, nil, syserror.EINVAL
	}

	// Get the source of the write.
	src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
		AddressSpaceActive: true,
	})
	if err != nil {
		return 0, nil, err
	}

	opts := vfs.WriteOptions{
		Flags: uint32(flags),
	}
	var n int64
	if offset == -1 {
		n, err = write(t, file, src, opts)
	} else {
		n, err = pwrite(t, file, src, offset, opts)
	}
	t.IOUsage().AccountWriteSyscall(n)
	return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, kernel.ERESTARTSYS, "pwritev2", file)
}

func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
	n, err := file.PWrite(t, src, offset, opts)
	if err != syserror.ErrWouldBlock || file.StatusFlags()&linux.O_NONBLOCK != 0 {
		return n, err
	}

	// Register for notifications.
	w, ch := waiter.NewChannelEntry(nil)
	file.EventRegister(&w, eventMaskWrite)

	total := n
	for {
		// Shorten src to reflect bytes previously written.
		src = src.DropFirst(int(n))

		// Issue the request and break out if it completes with anything other than
		// "would block".
		n, err := file.PWrite(t, src, offset+total, opts)
		total += n
		if err != syserror.ErrWouldBlock {
			break
		}
		if err := t.Block(ch); err != nil {
			break
		}
	}
	file.EventUnregister(&w)

	return total, err
}

// Lseek implements Linux syscall lseek(2).
func Lseek(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
	fd := args[0].Int()
	offset := args[1].Int64()
	whence := args[2].Int()

	file := t.GetFileVFS2(fd)
	if file == nil {
		return 0, nil, syserror.EBADF
	}
	defer file.DecRef()

	newoff, err := file.Seek(t, offset, whence)
	return uintptr(newoff), nil, err
}