summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/syscalls/linux
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/syscalls/linux')
-rw-r--r--pkg/sentry/syscalls/linux/linux64.go15
-rw-r--r--pkg/sentry/syscalls/linux/sys_mmap.go106
-rw-r--r--pkg/sentry/syscalls/linux/sys_rlimit.go1
3 files changed, 47 insertions, 75 deletions
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index e855590e6..7a5c93f9b 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -196,11 +196,11 @@ var AMD64 = &kernel.SyscallTable{
145: SchedGetscheduler,
146: SchedGetPriorityMax,
147: SchedGetPriorityMin,
- 148: syscalls.ErrorWithEvent(syscall.EPERM), // SchedRrGetInterval,
- 149: Mlock,
- 150: Munlock,
- 151: Mlockall,
- 152: Munlockall,
+ 148: syscalls.ErrorWithEvent(syscall.EPERM), // SchedRrGetInterval,
+ 149: syscalls.Error(nil), // Mlock, TODO
+ 150: syscalls.Error(nil), // Munlock, TODO
+ 151: syscalls.Error(nil), // Mlockall, TODO
+ 152: syscalls.Error(nil), // Munlockall, TODO
153: syscalls.CapError(linux.CAP_SYS_TTY_CONFIG), // Vhangup,
154: syscalls.Error(syscall.EPERM), // ModifyLdt,
155: syscalls.Error(syscall.EPERM), // PivotRoot,
@@ -373,9 +373,8 @@ var AMD64 = &kernel.SyscallTable{
// 322: Execveat, TODO
// 323: Userfaultfd, TODO
// 324: Membarrier, TODO
- 325: Mlock2,
- // Syscalls after 325 are "backports" from versions of Linux after 4.4.
- // 326: CopyFileRange,
+ // Syscalls after 325 are backports from 4.6.
+ 325: syscalls.Error(nil), // Mlock2, TODO
327: Preadv2,
328: Pwritev2,
},
diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go
index 8732861e0..145f7846c 100644
--- a/pkg/sentry/syscalls/linux/sys_mmap.go
+++ b/pkg/sentry/syscalls/linux/sys_mmap.go
@@ -69,9 +69,6 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC
GrowsDown: linux.MAP_GROWSDOWN&flags != 0,
Precommit: linux.MAP_POPULATE&flags != 0,
}
- if linux.MAP_LOCKED&flags != 0 {
- opts.MLockMode = memmap.MLockEager
- }
defer func() {
if opts.MappingIdentity != nil {
opts.MappingIdentity.DecRef()
@@ -387,6 +384,16 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
length := args[1].SizeT()
flags := args[2].Int()
+ if addr != addr.RoundDown() {
+ return 0, nil, syserror.EINVAL
+ }
+ if length == 0 {
+ return 0, nil, nil
+ }
+ la, ok := usermem.Addr(length).RoundUp()
+ if !ok {
+ return 0, nil, syserror.ENOMEM
+ }
// "The flags argument should specify exactly one of MS_ASYNC and MS_SYNC,
// and may additionally include the MS_INVALIDATE bit. ... However, Linux
// permits a call to msync() that specifies neither of these flags, with
@@ -399,72 +406,39 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
if sync && flags&linux.MS_ASYNC != 0 {
return 0, nil, syserror.EINVAL
}
- err := t.MemoryManager().MSync(t, addr, uint64(length), mm.MSyncOpts{
- Sync: sync,
- Invalidate: flags&linux.MS_INVALIDATE != 0,
- })
- // MSync calls fsync, the same interrupt conversion rules apply, see
- // mm/msync.c, fsync POSIX.1-2008.
- return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
-}
-
-// Mlock implements linux syscall mlock(2).
-func Mlock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- addr := args[0].Pointer()
- length := args[1].SizeT()
-
- return 0, nil, t.MemoryManager().MLock(t, addr, uint64(length), memmap.MLockEager)
-}
-// Mlock2 implements linux syscall mlock2(2).
-func Mlock2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- addr := args[0].Pointer()
- length := args[1].SizeT()
- flags := args[2].Int()
-
- if flags&^(linux.MLOCK_ONFAULT) != 0 {
+ // MS_INVALIDATE "asks to invalidate other mappings of the same file (so
+ // that they can be updated with the fresh values just written)". This is a
+ // no-op given that shared memory exists. However, MS_INVALIDATE can also
+ // be used to detect mlocks: "EBUSY: MS_INVALIDATE was specified in flags,
+ // and a memory lock exists for the specified address range." Given that
+ // mlock is stubbed out, it's unsafe to pass MS_INVALIDATE silently since
+ // some user program could be using it for synchronization.
+ if flags&linux.MS_INVALIDATE != 0 {
return 0, nil, syserror.EINVAL
}
-
- mode := memmap.MLockEager
- if flags&linux.MLOCK_ONFAULT != 0 {
- mode = memmap.MLockLazy
- }
- return 0, nil, t.MemoryManager().MLock(t, addr, uint64(length), mode)
-}
-
-// Munlock implements linux syscall munlock(2).
-func Munlock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- addr := args[0].Pointer()
- length := args[1].SizeT()
-
- return 0, nil, t.MemoryManager().MLock(t, addr, uint64(length), memmap.MLockNone)
-}
-
-// Mlockall implements linux syscall mlockall(2).
-func Mlockall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- flags := args[0].Int()
-
- if flags&^(linux.MCL_CURRENT|linux.MCL_FUTURE|linux.MCL_ONFAULT) != 0 {
- return 0, nil, syserror.EINVAL
+ // MS_SYNC "requests an update and waits for it to complete."
+ if sync {
+ err := t.MemoryManager().Sync(t, addr, uint64(la))
+ // Sync calls fsync, the same interrupt conversion rules apply, see
+ // mm/msync.c, fsync POSIX.1-2008.
+ return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+ }
+ // MS_ASYNC "specifies that an update be scheduled, but the call returns
+ // immediately". As long as dirty pages are tracked and eventually written
+ // back, this is a no-op. (Correspondingly: "Since Linux 2.6.19, MS_ASYNC
+ // is in fact a no-op, since the kernel properly tracks dirty pages and
+ // flushes them to storage as necessary.")
+ //
+ // However: "ENOMEM: The indicated memory (or part of it) was not mapped."
+ // This applies even for MS_ASYNC.
+ ar, ok := addr.ToRange(uint64(la))
+ if !ok {
+ return 0, nil, syserror.ENOMEM
}
-
- mode := memmap.MLockEager
- if flags&linux.MCL_ONFAULT != 0 {
- mode = memmap.MLockLazy
+ mapped := t.MemoryManager().VirtualMemorySizeRange(ar)
+ if mapped != uint64(la) {
+ return 0, nil, syserror.ENOMEM
}
- return 0, nil, t.MemoryManager().MLockAll(t, mm.MLockAllOpts{
- Current: flags&linux.MCL_CURRENT != 0,
- Future: flags&linux.MCL_FUTURE != 0,
- Mode: mode,
- })
-}
-
-// Munlockall implements linux syscall munlockall(2).
-func Munlockall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
- return 0, nil, t.MemoryManager().MLockAll(t, mm.MLockAllOpts{
- Current: true,
- Future: true,
- Mode: memmap.MLockNone,
- })
+ return 0, nil, nil
}
diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go
index b0b216045..2f16e1791 100644
--- a/pkg/sentry/syscalls/linux/sys_rlimit.go
+++ b/pkg/sentry/syscalls/linux/sys_rlimit.go
@@ -90,7 +90,6 @@ var setableLimits = map[limits.LimitType]struct{}{
limits.CPU: {},
limits.Data: {},
limits.FileSize: {},
- limits.MemoryLocked: {},
limits.Stack: {},
// These are not enforced, but we include them here to avoid returning
// EPERM, since some apps expect them to succeed.