diff options
Diffstat (limited to 'pkg/sentry/syscalls')
-rw-r--r-- | pkg/sentry/syscalls/linux/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/linux64.go | 8 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_membarrier.go | 70 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_mmap.go | 9 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_sched.go | 2 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/mmap.go | 12 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/splice.go | 67 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/vfs2.go | 6 |
8 files changed, 137 insertions, 38 deletions
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD index 75752b2e6..a2e441448 100644 --- a/pkg/sentry/syscalls/linux/BUILD +++ b/pkg/sentry/syscalls/linux/BUILD @@ -21,6 +21,7 @@ go_library( "sys_identity.go", "sys_inotify.go", "sys_lseek.go", + "sys_membarrier.go", "sys_mempolicy.go", "sys_mmap.go", "sys_mount.go", diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 5f26697d2..9c9def7cd 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -376,7 +376,7 @@ var AMD64 = &kernel.SyscallTable{ 321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), 322: syscalls.Supported("execveat", Execveat), 323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) - 324: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(gvisor.dev/issue/267) + 324: syscalls.PartiallySupported("membarrier", Membarrier, "Not supported on all platforms.", nil), 325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), // Syscalls implemented after 325 are "backports" from versions @@ -527,8 +527,8 @@ var ARM64 = &kernel.SyscallTable{ 96: syscalls.Supported("set_tid_address", SetTidAddress), 97: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil), 98: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil), - 99: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil), - 100: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil), + 99: syscalls.Supported("set_robust_list", SetRobustList), + 100: syscalls.Supported("get_robust_list", GetRobustList), 101: syscalls.Supported("nanosleep", Nanosleep), 102: syscalls.Supported("getitimer", Getitimer), 103: syscalls.Supported("setitimer", Setitimer), @@ -695,7 +695,7 @@ var ARM64 = &kernel.SyscallTable{ 280: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil), 281: syscalls.Supported("execveat", Execveat), 282: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345) - 283: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(gvisor.dev/issue/267) + 283: syscalls.PartiallySupported("membarrier", Membarrier, "Not supported on all platforms.", nil), 284: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil), // Syscalls after 284 are "backports" from versions of Linux after 4.4. diff --git a/pkg/sentry/syscalls/linux/sys_membarrier.go b/pkg/sentry/syscalls/linux/sys_membarrier.go new file mode 100644 index 000000000..288cd8512 --- /dev/null +++ b/pkg/sentry/syscalls/linux/sys_membarrier.go @@ -0,0 +1,70 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linux + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Membarrier implements syscall membarrier(2). +func Membarrier(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + cmd := args[0].Int() + flags := args[1].Int() + + p := t.Kernel().Platform + if !p.HaveGlobalMemoryBarrier() { + // Event for applications that want membarrier on a configuration that + // doesn't support them. + t.Kernel().EmitUnimplementedEvent(t) + return 0, nil, syserror.ENOSYS + } + + if flags != 0 { + return 0, nil, syserror.EINVAL + } + + switch cmd { + case linux.MEMBARRIER_CMD_QUERY: + const supportedCommands = linux.MEMBARRIER_CMD_GLOBAL | + linux.MEMBARRIER_CMD_GLOBAL_EXPEDITED | + linux.MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED | + linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED | + linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED + return supportedCommands, nil, nil + case linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED: + if !t.MemoryManager().IsMembarrierPrivateEnabled() { + return 0, nil, syserror.EPERM + } + fallthrough + case linux.MEMBARRIER_CMD_GLOBAL, linux.MEMBARRIER_CMD_GLOBAL_EXPEDITED: + return 0, nil, p.GlobalMemoryBarrier() + case linux.MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED: + // no-op + return 0, nil, nil + case linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + t.MemoryManager().EnableMembarrierPrivate() + return 0, nil, nil + case linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE: + // We're aware of these, but they aren't implemented since no platform + // supports them yet. + t.Kernel().EmitUnimplementedEvent(t) + fallthrough + default: + return 0, nil, syserror.EINVAL + } +} diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index 8ab062bca..cd8dfdfa4 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -100,6 +100,15 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if err := file.ConfigureMMap(t, &opts); err != nil { return 0, nil, err } + } else if shared { + // Back shared anonymous mappings with a special mappable. + opts.Offset = 0 + m, err := mm.NewSharedAnonMappable(opts.Length, t.Kernel()) + if err != nil { + return 0, nil, err + } + opts.MappingIdentity = m // transfers ownership of m to opts + opts.Mappable = m } rv, err := t.MemoryManager().MMap(t, opts) diff --git a/pkg/sentry/syscalls/linux/sys_sched.go b/pkg/sentry/syscalls/linux/sys_sched.go index cd6f4dd94..bfcf44b6f 100644 --- a/pkg/sentry/syscalls/linux/sys_sched.go +++ b/pkg/sentry/syscalls/linux/sys_sched.go @@ -30,7 +30,7 @@ const ( // // +marshal type SchedParam struct { - schedPriority int64 + schedPriority int32 } // SchedGetparam implements linux syscall sched_getparam(2). diff --git a/pkg/sentry/syscalls/linux/vfs2/mmap.go b/pkg/sentry/syscalls/linux/vfs2/mmap.go index dc05c2994..9d9dbf775 100644 --- a/pkg/sentry/syscalls/linux/vfs2/mmap.go +++ b/pkg/sentry/syscalls/linux/vfs2/mmap.go @@ -17,6 +17,7 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/syserror" @@ -85,6 +86,17 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC if err := file.ConfigureMMap(t, &opts); err != nil { return 0, nil, err } + } else if shared { + // Back shared anonymous mappings with an anonymous tmpfs file. + opts.Offset = 0 + file, err := tmpfs.NewZeroFile(t, t.Credentials(), t.Kernel().ShmMount(), opts.Length) + if err != nil { + return 0, nil, err + } + defer file.DecRef(t) + if err := file.ConfigureMMap(t, &opts); err != nil { + return 0, nil, err + } } rv, err := t.MemoryManager().MMap(t, opts) diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index f55d74cd2..bf5c1171f 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" + slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" @@ -146,11 +147,6 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal panic("at least one end of splice must be a pipe") } - if n == 0 && err == io.EOF { - // We reached the end of the file. Eat the error and exit the loop. - err = nil - break - } if n != 0 || err != syserror.ErrWouldBlock || nonBlock { break } @@ -171,15 +167,16 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal } } - if n == 0 { - return 0, nil, err + if n != 0 { + // On Linux, inotify behavior is not very consistent with splice(2). We try + // our best to emulate Linux for very basic calls to splice, where for some + // reason, events are generated for output files, but not input files. + outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) } - // On Linux, inotify behavior is not very consistent with splice(2). We try - // our best to emulate Linux for very basic calls to splice, where for some - // reason, events are generated for output files, but not input files. - outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - return uintptr(n), nil, nil + // We can only pass a single file to handleIOError, so pick inFile arbitrarily. + // This is used only for debugging purposes. + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "splice", outFile) } // Tee implements Linux syscall tee(2). @@ -251,11 +248,20 @@ func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallCo break } } - if n == 0 { - return 0, nil, err + + if n != 0 { + outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) + + // If a partial write is completed, the error is dropped. Log it here. + if err != nil && err != io.EOF && err != syserror.ErrWouldBlock { + log.Debugf("tee completed a partial write with error: %v", err) + err = nil + } } - outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - return uintptr(n), nil, nil + + // We can only pass a single file to handleIOError, so pick inFile arbitrarily. + // This is used only for debugging purposes. + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "tee", inFile) } // Sendfile implements linux system call sendfile(2). @@ -348,11 +354,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc for n < count { var spliceN int64 spliceN, err = outPipeFD.SpliceFromNonPipe(t, inFile, offset, count) - if spliceN == 0 && err == io.EOF { - // We reached the end of the file. Eat the error and exit the loop. - err = nil - break - } if offset != -1 { offset += spliceN } @@ -375,13 +376,6 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } else { readN, err = inFile.Read(t, usermem.BytesIOSequence(buf), vfs.ReadOptions{}) } - if readN == 0 && err != nil { - if err == io.EOF { - // We reached the end of the file. Eat the error before exiting the loop. - err = nil - } - break - } n += readN // Write all of the bytes that we read. This may need @@ -432,13 +426,20 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc } } - if n == 0 { - return 0, nil, err + if n != 0 { + inFile.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) + outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) + + if err != nil && err != io.EOF && err != syserror.ErrWouldBlock { + // If a partial write is completed, the error is dropped. Log it here. + log.Debugf("sendfile completed a partial write with error: %v", err) + err = nil + } } - inFile.Dentry().InotifyWithParent(t, linux.IN_ACCESS, 0, vfs.PathEvent) - outFile.Dentry().InotifyWithParent(t, linux.IN_MODIFY, 0, vfs.PathEvent) - return uintptr(n), nil, nil + // We can only pass a single file to handleIOError, so pick inFile arbitrarily. + // This is used only for debugging purposes. + return uintptr(n), nil, slinux.HandleIOErrorVFS2(t, n != 0, err, syserror.ERESTARTSYS, "sendfile", inFile) } // dualWaiter is used to wait on one or both vfs.FileDescriptions. It is not diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index 0df3bd449..c50fd97eb 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -163,6 +163,7 @@ func Override() { // Override ARM64. s = linux.ARM64 + s.Table[2] = syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}) s.Table[5] = syscalls.Supported("setxattr", SetXattr) s.Table[6] = syscalls.Supported("lsetxattr", Lsetxattr) s.Table[7] = syscalls.Supported("fsetxattr", Fsetxattr) @@ -200,6 +201,7 @@ func Override() { s.Table[44] = syscalls.Supported("fstatfs", Fstatfs) s.Table[45] = syscalls.Supported("truncate", Truncate) s.Table[46] = syscalls.Supported("ftruncate", Ftruncate) + s.Table[47] = syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil) s.Table[48] = syscalls.Supported("faccessat", Faccessat) s.Table[49] = syscalls.Supported("chdir", Chdir) s.Table[50] = syscalls.Supported("fchdir", Fchdir) @@ -221,12 +223,14 @@ func Override() { s.Table[68] = syscalls.Supported("pwrite64", Pwrite64) s.Table[69] = syscalls.Supported("preadv", Preadv) s.Table[70] = syscalls.Supported("pwritev", Pwritev) + s.Table[71] = syscalls.Supported("sendfile", Sendfile) s.Table[72] = syscalls.Supported("pselect", Pselect) s.Table[73] = syscalls.Supported("ppoll", Ppoll) s.Table[74] = syscalls.Supported("signalfd4", Signalfd4) s.Table[76] = syscalls.Supported("splice", Splice) s.Table[77] = syscalls.Supported("tee", Tee) s.Table[78] = syscalls.Supported("readlinkat", Readlinkat) + s.Table[79] = syscalls.Supported("newfstatat", Newfstatat) s.Table[80] = syscalls.Supported("fstat", Fstat) s.Table[81] = syscalls.Supported("sync", Sync) s.Table[82] = syscalls.Supported("fsync", Fsync) @@ -251,8 +255,10 @@ func Override() { s.Table[210] = syscalls.Supported("shutdown", Shutdown) s.Table[211] = syscalls.Supported("sendmsg", SendMsg) s.Table[212] = syscalls.Supported("recvmsg", RecvMsg) + s.Table[213] = syscalls.Supported("readahead", Readahead) s.Table[221] = syscalls.Supported("execve", Execve) s.Table[222] = syscalls.Supported("mmap", Mmap) + s.Table[223] = syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil) s.Table[242] = syscalls.Supported("accept4", Accept4) s.Table[243] = syscalls.Supported("recvmmsg", RecvMMsg) s.Table[267] = syscalls.Supported("syncfs", Syncfs) |