summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/syscalls
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/syscalls')
-rw-r--r--pkg/sentry/syscalls/linux/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go8
-rw-r--r--pkg/sentry/syscalls/linux/sys_file.go1
-rw-r--r--pkg/sentry/syscalls/linux/sys_membarrier.go103
-rw-r--r--pkg/sentry/syscalls/linux/sys_sysinfo.go12
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/execve.go3
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/vfs2.go6
7 files changed, 124 insertions, 10 deletions
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index 75752b2e6..a2e441448 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -21,6 +21,7 @@ go_library(
"sys_identity.go",
"sys_inotify.go",
"sys_lseek.go",
+ "sys_membarrier.go",
"sys_mempolicy.go",
"sys_mmap.go",
"sys_mount.go",
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 5f26697d2..9c9def7cd 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -376,7 +376,7 @@ var AMD64 = &kernel.SyscallTable{
321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
322: syscalls.Supported("execveat", Execveat),
323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
- 324: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(gvisor.dev/issue/267)
+ 324: syscalls.PartiallySupported("membarrier", Membarrier, "Not supported on all platforms.", nil),
325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
// Syscalls implemented after 325 are "backports" from versions
@@ -527,8 +527,8 @@ var ARM64 = &kernel.SyscallTable{
96: syscalls.Supported("set_tid_address", SetTidAddress),
97: syscalls.PartiallySupported("unshare", Unshare, "Mount, cgroup namespaces not supported. Network namespaces supported but must be empty.", nil),
98: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil),
- 99: syscalls.Error("set_robust_list", syserror.ENOSYS, "Obsolete.", nil),
- 100: syscalls.Error("get_robust_list", syserror.ENOSYS, "Obsolete.", nil),
+ 99: syscalls.Supported("set_robust_list", SetRobustList),
+ 100: syscalls.Supported("get_robust_list", GetRobustList),
101: syscalls.Supported("nanosleep", Nanosleep),
102: syscalls.Supported("getitimer", Getitimer),
103: syscalls.Supported("setitimer", Setitimer),
@@ -695,7 +695,7 @@ var ARM64 = &kernel.SyscallTable{
280: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
281: syscalls.Supported("execveat", Execveat),
282: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
- 283: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(gvisor.dev/issue/267)
+ 283: syscalls.PartiallySupported("membarrier", Membarrier, "Not supported on all platforms.", nil),
284: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
// Syscalls after 284 are "backports" from versions of Linux after 4.4.
diff --git a/pkg/sentry/syscalls/linux/sys_file.go b/pkg/sentry/syscalls/linux/sys_file.go
index 98331eb3c..519066a47 100644
--- a/pkg/sentry/syscalls/linux/sys_file.go
+++ b/pkg/sentry/syscalls/linux/sys_file.go
@@ -84,6 +84,7 @@ func fileOpOn(t *kernel.Task, dirFD int32, path string, resolve bool, fn func(ro
}
rel = f.Dirent
if !fs.IsDir(rel.Inode.StableAttr) {
+ f.DecRef(t)
return syserror.ENOTDIR
}
}
diff --git a/pkg/sentry/syscalls/linux/sys_membarrier.go b/pkg/sentry/syscalls/linux/sys_membarrier.go
new file mode 100644
index 000000000..63ee5d435
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/sys_membarrier.go
@@ -0,0 +1,103 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Membarrier implements syscall membarrier(2).
+func Membarrier(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ cmd := args[0].Int()
+ flags := args[1].Uint()
+
+ switch cmd {
+ case linux.MEMBARRIER_CMD_QUERY:
+ if flags != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ var supportedCommands uintptr
+ if t.Kernel().Platform.HaveGlobalMemoryBarrier() {
+ supportedCommands |= linux.MEMBARRIER_CMD_GLOBAL |
+ linux.MEMBARRIER_CMD_GLOBAL_EXPEDITED |
+ linux.MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED |
+ linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED |
+ linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
+ }
+ if t.RSeqAvailable() {
+ supportedCommands |= linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ |
+ linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
+ }
+ return supportedCommands, nil, nil
+ case linux.MEMBARRIER_CMD_GLOBAL, linux.MEMBARRIER_CMD_GLOBAL_EXPEDITED, linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED:
+ if flags != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ if !t.Kernel().Platform.HaveGlobalMemoryBarrier() {
+ return 0, nil, syserror.EINVAL
+ }
+ if cmd == linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED && !t.MemoryManager().IsMembarrierPrivateEnabled() {
+ return 0, nil, syserror.EPERM
+ }
+ return 0, nil, t.Kernel().Platform.GlobalMemoryBarrier()
+ case linux.MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
+ if flags != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ if !t.Kernel().Platform.HaveGlobalMemoryBarrier() {
+ return 0, nil, syserror.EINVAL
+ }
+ // no-op
+ return 0, nil, nil
+ case linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
+ if flags != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ if !t.Kernel().Platform.HaveGlobalMemoryBarrier() {
+ return 0, nil, syserror.EINVAL
+ }
+ t.MemoryManager().EnableMembarrierPrivate()
+ return 0, nil, nil
+ case linux.MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
+ if flags&^linux.MEMBARRIER_CMD_FLAG_CPU != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ if !t.RSeqAvailable() {
+ return 0, nil, syserror.EINVAL
+ }
+ if !t.MemoryManager().IsMembarrierRSeqEnabled() {
+ return 0, nil, syserror.EPERM
+ }
+ // MEMBARRIER_CMD_FLAG_CPU and cpu_id are ignored since we don't have
+ // the ability to preempt specific CPUs.
+ return 0, nil, t.Kernel().Platform.PreemptAllCPUs()
+ case linux.MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
+ if flags != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ if !t.RSeqAvailable() {
+ return 0, nil, syserror.EINVAL
+ }
+ t.MemoryManager().EnableMembarrierRSeq()
+ return 0, nil, nil
+ default:
+ // Probably a command we don't implement.
+ t.Kernel().EmitUnimplementedEvent(t)
+ return 0, nil, syserror.EINVAL
+ }
+}
diff --git a/pkg/sentry/syscalls/linux/sys_sysinfo.go b/pkg/sentry/syscalls/linux/sys_sysinfo.go
index 674d341b6..6320593f0 100644
--- a/pkg/sentry/syscalls/linux/sys_sysinfo.go
+++ b/pkg/sentry/syscalls/linux/sys_sysinfo.go
@@ -26,8 +26,12 @@ func Sysinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
addr := args[0].Pointer()
mf := t.Kernel().MemoryFile()
- mf.UpdateUsage()
- _, totalUsage := usage.MemoryAccounting.Copy()
+ mfUsage, err := mf.TotalUsage()
+ if err != nil {
+ return 0, nil, err
+ }
+ memStats, _ := usage.MemoryAccounting.Copy()
+ totalUsage := mfUsage + memStats.Mapped
totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
memFree := totalSize - totalUsage
if memFree > totalSize {
@@ -37,12 +41,12 @@ func Sysinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
// Only a subset of the fields in sysinfo_t make sense to return.
si := linux.Sysinfo{
- Procs: uint16(len(t.PIDNamespace().Tasks())),
+ Procs: uint16(t.Kernel().TaskSet().Root.NumTasks()),
Uptime: t.Kernel().MonotonicClock().Now().Seconds(),
TotalRAM: totalSize,
FreeRAM: memFree,
Unit: 1,
}
- _, err := si.CopyOut(t, addr)
+ _, err = si.CopyOut(t, addr)
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/execve.go b/pkg/sentry/syscalls/linux/vfs2/execve.go
index 066ee0863..c8ce2aabc 100644
--- a/pkg/sentry/syscalls/linux/vfs2/execve.go
+++ b/pkg/sentry/syscalls/linux/vfs2/execve.go
@@ -110,8 +110,7 @@ func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr user
}
// Load the new TaskContext.
- mntns := t.MountNamespaceVFS2() // FIXME(jamieliu): useless refcount change
- defer mntns.DecRef(t)
+ mntns := t.MountNamespaceVFS2()
wd := t.FSContext().WorkingDirectoryVFS2()
defer wd.DecRef(t)
remainingTraversals := uint(linux.MaxSymlinkTraversals)
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index 0df3bd449..c50fd97eb 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -163,6 +163,7 @@ func Override() {
// Override ARM64.
s = linux.ARM64
+ s.Table[2] = syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"})
s.Table[5] = syscalls.Supported("setxattr", SetXattr)
s.Table[6] = syscalls.Supported("lsetxattr", Lsetxattr)
s.Table[7] = syscalls.Supported("fsetxattr", Fsetxattr)
@@ -200,6 +201,7 @@ func Override() {
s.Table[44] = syscalls.Supported("fstatfs", Fstatfs)
s.Table[45] = syscalls.Supported("truncate", Truncate)
s.Table[46] = syscalls.Supported("ftruncate", Ftruncate)
+ s.Table[47] = syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil)
s.Table[48] = syscalls.Supported("faccessat", Faccessat)
s.Table[49] = syscalls.Supported("chdir", Chdir)
s.Table[50] = syscalls.Supported("fchdir", Fchdir)
@@ -221,12 +223,14 @@ func Override() {
s.Table[68] = syscalls.Supported("pwrite64", Pwrite64)
s.Table[69] = syscalls.Supported("preadv", Preadv)
s.Table[70] = syscalls.Supported("pwritev", Pwritev)
+ s.Table[71] = syscalls.Supported("sendfile", Sendfile)
s.Table[72] = syscalls.Supported("pselect", Pselect)
s.Table[73] = syscalls.Supported("ppoll", Ppoll)
s.Table[74] = syscalls.Supported("signalfd4", Signalfd4)
s.Table[76] = syscalls.Supported("splice", Splice)
s.Table[77] = syscalls.Supported("tee", Tee)
s.Table[78] = syscalls.Supported("readlinkat", Readlinkat)
+ s.Table[79] = syscalls.Supported("newfstatat", Newfstatat)
s.Table[80] = syscalls.Supported("fstat", Fstat)
s.Table[81] = syscalls.Supported("sync", Sync)
s.Table[82] = syscalls.Supported("fsync", Fsync)
@@ -251,8 +255,10 @@ func Override() {
s.Table[210] = syscalls.Supported("shutdown", Shutdown)
s.Table[211] = syscalls.Supported("sendmsg", SendMsg)
s.Table[212] = syscalls.Supported("recvmsg", RecvMsg)
+ s.Table[213] = syscalls.Supported("readahead", Readahead)
s.Table[221] = syscalls.Supported("execve", Execve)
s.Table[222] = syscalls.Supported("mmap", Mmap)
+ s.Table[223] = syscalls.PartiallySupported("fadvise64", Fadvise64, "Not all options are supported.", nil)
s.Table[242] = syscalls.Supported("accept4", Accept4)
s.Table[243] = syscalls.Supported("recvmmsg", RecvMMsg)
s.Table[267] = syscalls.Supported("syncfs", Syncfs)