summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/syscalls
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/syscalls')
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/fd.go54
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/filesystem.go50
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/ioctl.go43
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/sync.go42
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/vfs2.go2
5 files changed, 162 insertions, 29 deletions
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index 7e4c6a56e..517394ba9 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -156,11 +156,10 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
return uintptr(n), nil, nil
case linux.F_GETOWN:
- a := file.AsyncHandler()
- if a == nil {
+ owner, hasOwner := getAsyncOwner(t, file)
+ if !hasOwner {
return 0, nil, nil
}
- owner := getAsyncOwner(t, a.(*fasync.FileAsync))
if owner.Type == linux.F_OWNER_PGRP {
return uintptr(-owner.PID), nil, nil
}
@@ -176,26 +175,21 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
ownerType = linux.F_OWNER_PGRP
who = -who
}
- a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync)
- return 0, nil, setAsyncOwner(t, a, ownerType, who)
+ return 0, nil, setAsyncOwner(t, file, ownerType, who)
case linux.F_GETOWN_EX:
- a := file.AsyncHandler()
- if a == nil {
+ owner, hasOwner := getAsyncOwner(t, file)
+ if !hasOwner {
return 0, nil, nil
}
- addr := args[2].Pointer()
- owner := getAsyncOwner(t, a.(*fasync.FileAsync))
- _, err := t.CopyOut(addr, &owner)
+ _, err := t.CopyOut(args[2].Pointer(), &owner)
return 0, nil, err
case linux.F_SETOWN_EX:
- addr := args[2].Pointer()
var owner linux.FOwnerEx
- n, err := t.CopyIn(addr, &owner)
+ n, err := t.CopyIn(args[2].Pointer(), &owner)
if err != nil {
return 0, nil, err
}
- a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync)
- return uintptr(n), nil, setAsyncOwner(t, a, owner.Type, owner.PID)
+ return uintptr(n), nil, setAsyncOwner(t, file, owner.Type, owner.PID)
case linux.F_GETPIPE_SZ:
pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
if !ok {
@@ -219,30 +213,48 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
}
-func getAsyncOwner(t *kernel.Task, a *fasync.FileAsync) linux.FOwnerEx {
- ot, otg, opg := a.Owner()
+func getAsyncOwner(t *kernel.Task, fd *vfs.FileDescription) (ownerEx linux.FOwnerEx, hasOwner bool) {
+ a := fd.AsyncHandler()
+ if a == nil {
+ return linux.FOwnerEx{}, false
+ }
+
+ ot, otg, opg := a.(*fasync.FileAsync).Owner()
switch {
case ot != nil:
return linux.FOwnerEx{
Type: linux.F_OWNER_TID,
PID: int32(t.PIDNamespace().IDOfTask(ot)),
- }
+ }, true
case otg != nil:
return linux.FOwnerEx{
Type: linux.F_OWNER_PID,
PID: int32(t.PIDNamespace().IDOfThreadGroup(otg)),
- }
+ }, true
case opg != nil:
return linux.FOwnerEx{
Type: linux.F_OWNER_PGRP,
PID: int32(t.PIDNamespace().IDOfProcessGroup(opg)),
- }
+ }, true
default:
- return linux.FOwnerEx{}
+ return linux.FOwnerEx{}, true
}
}
-func setAsyncOwner(t *kernel.Task, a *fasync.FileAsync, ownerType, pid int32) error {
+func setAsyncOwner(t *kernel.Task, fd *vfs.FileDescription, ownerType, pid int32) error {
+ switch ownerType {
+ case linux.F_OWNER_TID, linux.F_OWNER_PID, linux.F_OWNER_PGRP:
+ // Acceptable type.
+ default:
+ return syserror.EINVAL
+ }
+
+ a := fd.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync)
+ if pid == 0 {
+ a.ClearOwner()
+ return nil
+ }
+
switch ownerType {
case linux.F_OWNER_TID:
task := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid))
diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
index 5dac77e4d..b12b5967b 100644
--- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go
+++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
@@ -18,6 +18,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -239,6 +240,55 @@ func renameat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd
})
}
+// Fallocate implements linux system call fallocate(2).
+func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ mode := args[1].Uint64()
+ offset := args[2].Int64()
+ length := args[3].Int64()
+
+ file := t.GetFileVFS2(fd)
+
+ if file == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer file.DecRef()
+
+ if !file.IsWritable() {
+ return 0, nil, syserror.EBADF
+ }
+
+ if mode != 0 {
+ return 0, nil, syserror.ENOTSUP
+ }
+
+ if offset < 0 || length <= 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ size := offset + length
+
+ if size < 0 {
+ return 0, nil, syserror.EFBIG
+ }
+
+ limit := limits.FromContext(t).Get(limits.FileSize).Cur
+
+ if uint64(size) >= limit {
+ t.SendSignal(&arch.SignalInfo{
+ Signo: int32(linux.SIGXFSZ),
+ Code: arch.SignalInfoUser,
+ })
+ return 0, nil, syserror.EFBIG
+ }
+
+ return 0, nil, file.Impl().Allocate(t, mode, uint64(offset), uint64(length))
+
+ // File length modified, generate notification.
+ // TODO(gvisor.dev/issue/1479): Reenable when Inotify is ported.
+ // file.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
+}
+
// Rmdir implements Linux syscall rmdir(2).
func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
pathAddr := args[0].Pointer()
diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
index 0399c0db4..fd6ab94b2 100644
--- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go
+++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
@@ -57,6 +57,49 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
flags &^= linux.O_NONBLOCK
}
return 0, nil, file.SetStatusFlags(t, t.Credentials(), flags)
+
+ case linux.FIOASYNC:
+ var set int32
+ if _, err := t.CopyIn(args[2].Pointer(), &set); err != nil {
+ return 0, nil, err
+ }
+ flags := file.StatusFlags()
+ if set != 0 {
+ flags |= linux.O_ASYNC
+ } else {
+ flags &^= linux.O_ASYNC
+ }
+ file.SetStatusFlags(t, t.Credentials(), flags)
+ return 0, nil, nil
+
+ case linux.FIOGETOWN, linux.SIOCGPGRP:
+ var who int32
+ owner, hasOwner := getAsyncOwner(t, file)
+ if hasOwner {
+ if owner.Type == linux.F_OWNER_PGRP {
+ who = -owner.PID
+ } else {
+ who = owner.PID
+ }
+ }
+ _, err := t.CopyOut(args[2].Pointer(), &who)
+ return 0, nil, err
+
+ case linux.FIOSETOWN, linux.SIOCSPGRP:
+ var who int32
+ if _, err := t.CopyIn(args[2].Pointer(), &who); err != nil {
+ return 0, nil, err
+ }
+ ownerType := int32(linux.F_OWNER_PID)
+ if who < 0 {
+ // Check for overflow before flipping the sign.
+ if who-1 > who {
+ return 0, nil, syserror.EINVAL
+ }
+ ownerType = linux.F_OWNER_PGRP
+ who = -who
+ }
+ return 0, nil, setAsyncOwner(t, file, ownerType, who)
}
ret, err := file.Ioctl(t, t.MemoryManager(), args)
diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go
index 365250b0b..0d0ebf46a 100644
--- a/pkg/sentry/syscalls/linux/vfs2/sync.go
+++ b/pkg/sentry/syscalls/linux/vfs2/sync.go
@@ -65,10 +65,8 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
nbytes := args[2].Int64()
flags := args[3].Uint()
- if offset < 0 {
- return 0, nil, syserror.EINVAL
- }
- if nbytes < 0 {
+ // Check for negative values and overflow.
+ if offset < 0 || offset+nbytes < 0 {
return 0, nil, syserror.EINVAL
}
if flags&^(linux.SYNC_FILE_RANGE_WAIT_BEFORE|linux.SYNC_FILE_RANGE_WRITE|linux.SYNC_FILE_RANGE_WAIT_AFTER) != 0 {
@@ -81,7 +79,37 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
}
defer file.DecRef()
- // TODO(gvisor.dev/issue/1897): Avoid writeback of data ranges outside of
- // [offset, offset+nbytes).
- return 0, nil, file.Sync(t)
+ // TODO(gvisor.dev/issue/1897): Currently, the only file syncing we support
+ // is a full-file sync, i.e. fsync(2). As a result, there are severe
+ // limitations on how much we support sync_file_range:
+ // - In Linux, sync_file_range(2) doesn't write out the file's metadata, even
+ // if the file size is changed. We do.
+ // - We always sync the entire file instead of [offset, offset+nbytes).
+ // - We do not support the use of WAIT_BEFORE without WAIT_AFTER. For
+ // correctness, we would have to perform a write-out every time WAIT_BEFORE
+ // was used, but this would be much more expensive than expected if there
+ // were no write-out operations in progress.
+ // - Whenever WAIT_AFTER is used, we sync the file.
+ // - Ignore WRITE. If this flag is used with WAIT_AFTER, then the file will
+ // be synced anyway. If this flag is used without WAIT_AFTER, then it is
+ // safe (and less expensive) to do nothing, because the syscall will not
+ // wait for the write-out to complete--we only need to make sure that the
+ // next time WAIT_BEFORE or WAIT_AFTER are used, the write-out completes.
+ // - According to fs/sync.c, WAIT_BEFORE|WAIT_AFTER "will detect any I/O
+ // errors or ENOSPC conditions and will return those to the caller, after
+ // clearing the EIO and ENOSPC flags in the address_space." We don't do
+ // this.
+
+ if flags&linux.SYNC_FILE_RANGE_WAIT_BEFORE != 0 &&
+ flags&linux.SYNC_FILE_RANGE_WAIT_AFTER == 0 {
+ t.Kernel().EmitUnimplementedEvent(t)
+ return 0, nil, syserror.ENOSYS
+ }
+
+ if flags&linux.SYNC_FILE_RANGE_WAIT_AFTER != 0 {
+ if err := file.Sync(t); err != nil {
+ return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+ }
+ }
+ return 0, nil, nil
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index 9e60c4a1c..8f497ecc7 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -138,7 +138,7 @@ func Override() {
s.Table[282] = syscalls.Supported("signalfd", Signalfd)
s.Table[283] = syscalls.Supported("timerfd_create", TimerfdCreate)
s.Table[284] = syscalls.Supported("eventfd", Eventfd)
- delete(s.Table, 285) // fallocate
+ s.Table[285] = syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil)
s.Table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime)
s.Table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime)
s.Table[288] = syscalls.Supported("accept4", Accept4)