diff options
Diffstat (limited to 'pkg/sentry/syscalls')
-rw-r--r-- | pkg/sentry/syscalls/linux/linux64.go | 4 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_file.go | 16 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/fd.go | 78 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/ioctl.go | 4 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/lock.go | 4 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/read_write.go | 39 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/setstat.go | 13 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/splice.go | 12 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/sync.go | 4 |
9 files changed, 92 insertions, 82 deletions
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index a72df62f6..62d1e8f8b 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -503,8 +503,8 @@ var ARM64 = &kernel.SyscallTable{ 72: syscalls.Supported("pselect", Pselect), 73: syscalls.Supported("ppoll", Ppoll), 74: syscalls.PartiallySupported("signalfd4", Signalfd4, "Semantics are slightly different.", []string{"gvisor.dev/issue/139"}), - 75: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) - 76: syscalls.PartiallySupported("splice", Splice, "Stub implementation.", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 75: syscalls.ErrorWithEvent("vmsplice", syserror.ENOSYS, "", []string{"gvisor.dev/issue/138"}), // TODO(b/29354098) + 76: syscalls.Supported("splice", Splice), 77: syscalls.Supported("tee", Tee), 78: syscalls.Supported("readlinkat", Readlinkat), 79: syscalls.Supported("fstatat", Fstatat), diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go index c33571f43..a6253626e 100644 --- a/pkg/sentry/syscalls/linux/sys_file.go +++ b/pkg/sentry/syscalls/linux/sys_file.go @@ -1014,12 +1014,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, nil) { return 0, nil, syserror.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.ReadLock, rng, t) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.ReadLock, rng, t) { return 0, nil, syserror.EINTR } } @@ -1030,12 +1030,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } if cmd == linux.F_SETLK { // Non-blocking lock, provide a nil lock.Blocker. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, nil) { return 0, nil, syserror.EAGAIN } } else { // Blocking lock, pass in the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.Posix.LockRegion(t.FDTable(), lock.WriteLock, rng, t) { + if !file.Dirent.Inode.LockCtx.Posix.LockRegionVFS1(t.FDTable(), lock.WriteLock, rng, t) { return 0, nil, syserror.EINTR } } @@ -2167,24 +2167,24 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall case linux.LOCK_EX: if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, nil) { return 0, nil, syserror.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.WriteLock, rng, t) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.WriteLock, rng, t) { return 0, nil, syserror.EINTR } } case linux.LOCK_SH: if nonblocking { // Since we're nonblocking we pass a nil lock.Blocker implementation. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, nil) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, nil) { return 0, nil, syserror.EWOULDBLOCK } } else { // Because we're blocking we will pass the task to satisfy the lock.Blocker interface. - if !file.Dirent.Inode.LockCtx.BSD.LockRegion(file, lock.ReadLock, rng, t) { + if !file.Dirent.Inode.LockCtx.BSD.LockRegionVFS1(file, lock.ReadLock, rng, t) { return 0, nil, syserror.EINTR } } diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 7dd9ef857..1a31898e8 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -123,6 +123,15 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } defer file.DecRef(t) + if file.StatusFlags()&linux.O_PATH != 0 { + switch cmd { + case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC, linux.F_GETFD, linux.F_SETFD, linux.F_GETFL: + // allowed + default: + return 0, nil, syserror.EBADF + } + } + switch cmd { case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC: minfd := args[2].Int() @@ -205,8 +214,12 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } err := tmpfs.AddSeals(file, args[2].Uint()) return 0, nil, err - case linux.F_SETLK, linux.F_SETLKW: - return 0, nil, posixLock(t, args, file, cmd) + case linux.F_SETLK: + return 0, nil, posixLock(t, args, file, false /* blocking */) + case linux.F_SETLKW: + return 0, nil, posixLock(t, args, file, true /* blocking */) + case linux.F_GETLK: + return 0, nil, posixTestLock(t, args, file) case linux.F_GETSIG: a := file.AsyncHandler() if a == nil { @@ -292,7 +305,49 @@ func setAsyncOwner(t *kernel.Task, fd int, file *vfs.FileDescription, ownerType, } } -func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, cmd int32) error { +func posixTestLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription) error { + // Copy in the lock request. + flockAddr := args[2].Pointer() + var flock linux.Flock + if _, err := flock.CopyIn(t, flockAddr); err != nil { + return err + } + var typ lock.LockType + switch flock.Type { + case linux.F_RDLCK: + typ = lock.ReadLock + case linux.F_WRLCK: + typ = lock.WriteLock + default: + return syserror.EINVAL + } + r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence) + if err != nil { + return err + } + + newFlock, err := file.TestPOSIX(t, t.FDTable(), typ, r) + if err != nil { + return err + } + newFlock.PID = translatePID(t.PIDNamespace().Root(), t.PIDNamespace(), newFlock.PID) + if _, err = newFlock.CopyOut(t, flockAddr); err != nil { + return err + } + return nil +} + +// translatePID translates a pid from one namespace to another. Note that this +// may race with task termination/creation, in which case the original task +// corresponding to pid may no longer exist. This is used to implement the +// F_GETLK fcntl, which has the same potential race in Linux as well (i.e., +// there is no synchronization between retrieving the lock PID and translating +// it). See fs/locks.c:posix_lock_to_flock. +func translatePID(old, new *kernel.PIDNamespace, pid int32) int32 { + return int32(new.IDOfTask(old.TaskWithID(kernel.ThreadID(pid)))) +} + +func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescription, blocking bool) error { // Copy in the lock request. flockAddr := args[2].Pointer() var flock linux.Flock @@ -301,25 +356,30 @@ func posixLock(t *kernel.Task, args arch.SyscallArguments, file *vfs.FileDescrip } var blocker lock.Blocker - if cmd == linux.F_SETLKW { + if blocking { blocker = t } + r, err := file.ComputeLockRange(t, uint64(flock.Start), uint64(flock.Len), flock.Whence) + if err != nil { + return err + } + switch flock.Type { case linux.F_RDLCK: if !file.IsReadable() { return syserror.EBADF } - return file.LockPOSIX(t, t.FDTable(), lock.ReadLock, uint64(flock.Start), uint64(flock.Len), flock.Whence, blocker) + return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.ReadLock, r, blocker) case linux.F_WRLCK: if !file.IsWritable() { return syserror.EBADF } - return file.LockPOSIX(t, t.FDTable(), lock.WriteLock, uint64(flock.Start), uint64(flock.Len), flock.Whence, blocker) + return file.LockPOSIX(t, t.FDTable(), int32(t.TGIDInRoot()), lock.WriteLock, r, blocker) case linux.F_UNLCK: - return file.UnlockPOSIX(t, t.FDTable(), uint64(flock.Start), uint64(flock.Len), flock.Whence) + return file.UnlockPOSIX(t, t.FDTable(), r) default: return syserror.EINVAL @@ -344,6 +404,10 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys } defer file.DecRef(t) + if file.StatusFlags()&linux.O_PATH != 0 { + return 0, nil, syserror.EBADF + } + // If the FD refers to a pipe or FIFO, return error. if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe { return 0, nil, syserror.ESPIPE diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go index 20c264fef..c7c3fed57 100644 --- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go +++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go @@ -32,6 +32,10 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall } defer file.DecRef(t) + if file.StatusFlags()&linux.O_PATH != 0 { + return 0, nil, syserror.EBADF + } + // Handle ioctls that apply to all FDs. switch args[1].Int() { case linux.FIONCLEX: diff --git a/pkg/sentry/syscalls/linux/vfs2/lock.go b/pkg/sentry/syscalls/linux/vfs2/lock.go index b910b5a74..d1452a04d 100644 --- a/pkg/sentry/syscalls/linux/vfs2/lock.go +++ b/pkg/sentry/syscalls/linux/vfs2/lock.go @@ -44,11 +44,11 @@ func Flock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall switch operation { case linux.LOCK_EX: - if err := file.LockBSD(t, lock.WriteLock, blocker); err != nil { + if err := file.LockBSD(t, int32(t.TGIDInRoot()), lock.WriteLock, blocker); err != nil { return 0, nil, err } case linux.LOCK_SH: - if err := file.LockBSD(t, lock.ReadLock, blocker); err != nil { + if err := file.LockBSD(t, int32(t.TGIDInRoot()), lock.ReadLock, blocker); err != nil { return 0, nil, err } case linux.LOCK_UN: diff --git a/pkg/sentry/syscalls/linux/vfs2/read_write.go b/pkg/sentry/syscalls/linux/vfs2/read_write.go index b77b29dcc..c7417840f 100644 --- a/pkg/sentry/syscalls/linux/vfs2/read_write.go +++ b/pkg/sentry/syscalls/linux/vfs2/read_write.go @@ -93,17 +93,11 @@ func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { n, err := file.Read(t, dst, opts) if err != syserror.ErrWouldBlock { - if n > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - } return n, err } allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { - if n > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - } return n, err } @@ -134,9 +128,6 @@ func read(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, opt } file.EventUnregister(&w) - if total > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - } return total, err } @@ -257,17 +248,11 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { n, err := file.PRead(t, dst, offset, opts) if err != syserror.ErrWouldBlock { - if n > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - } return n, err } allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { - if n > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - } return n, err } @@ -297,10 +282,6 @@ func pread(t *kernel.Task, file *vfs.FileDescription, dst usermem.IOSequence, of } } file.EventUnregister(&w) - - if total > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - } return total, err } @@ -363,17 +344,11 @@ func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { n, err := file.Write(t, src, opts) if err != syserror.ErrWouldBlock { - if n > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - } return n, err } allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { - if n > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - } return n, err } @@ -403,10 +378,6 @@ func write(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, op } } file.EventUnregister(&w) - - if total > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - } return total, err } @@ -527,17 +498,11 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { n, err := file.PWrite(t, src, offset, opts) if err != syserror.ErrWouldBlock { - if n > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - } return n, err } allowBlock, deadline, hasDeadline := blockPolicy(t, file) if !allowBlock { - if n > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - } return n, err } @@ -567,10 +532,6 @@ func pwrite(t *kernel.Task, file *vfs.FileDescription, src usermem.IOSequence, o } } file.EventUnregister(&w) - - if total > 0 { - file.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - } return total, err } diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go index 1ee37e5a8..903169dc2 100644 --- a/pkg/sentry/syscalls/linux/vfs2/setstat.go +++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go @@ -220,7 +220,6 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys length := args[3].Int64() file := t.GetFileVFS2(fd) - if file == nil { return 0, nil, syserror.EBADF } @@ -229,23 +228,18 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys if !file.IsWritable() { return 0, nil, syserror.EBADF } - if mode != 0 { return 0, nil, syserror.ENOTSUP } - if offset < 0 || length <= 0 { return 0, nil, syserror.EINVAL } size := offset + length - if size < 0 { return 0, nil, syserror.EFBIG } - limit := limits.FromContext(t).Get(limits.FileSize).Cur - if uint64(size) >= limit { t.SendSignal(&arch.SignalInfo{ Signo: int32(linux.SIGXFSZ), @@ -254,12 +248,7 @@ func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys return 0, nil, syserror.EFBIG } - if err := file.Allocate(t, mode, uint64(offset), uint64(length)); err != nil { - return 0, nil, err - } - - file.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - return 0, nil, nil + return 0, nil, file.Allocate(t, mode, uint64(offset), uint64(length)) } // Utime implements Linux syscall utime(2). diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 8bb763a47..19e175203 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -170,13 +170,6 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } } - if n != 0 { - // On Linux, inotify behavior is not very consistent with splice(2). We try - // our best to emulate Linux for very basic calls to splice, where for some - // reason, events are generated for output files, but not input files. - outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - } - // We can only pass a single file to handleIOError, so pick inFile arbitrarily. // This is used only for debugging purposes. return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "splice", outFile) @@ -256,8 +249,6 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo } if n != 0 { - outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - // If a partial write is completed, the error is dropped. Log it here. if err != nil && err != io.EOF && err != syserror.ErrWouldBlock { log.Debugf("tee completed a partial write with error: %v", err) @@ -449,9 +440,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } if total != 0 { - inFile.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - if err != nil && err != io.EOF && err != syserror.ErrWouldBlock { // If a partial write is completed, the error is dropped. Log it here. log.Debugf("sendfile completed a partial write with error: %v", err) diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go index 6e9b599e2..1f8a5878c 100644 --- a/pkg/sentry/syscalls/linux/vfs2/sync.go +++ b/pkg/sentry/syscalls/linux/vfs2/sync.go @@ -36,6 +36,10 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } defer file.DecRef(t) + if file.StatusFlags()&linux.O_PATH != 0 { + return 0, nil, syserror.EBADF + } + return 0, nil, file.SyncFS(t) } |