diff options
Diffstat (limited to 'pkg/sentry/syscalls')
-rw-r--r-- | pkg/sentry/syscalls/linux/linux64.go | 15 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_mmap.go | 106 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_rlimit.go | 1 |
3 files changed, 75 insertions, 47 deletions
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go index 2aab948da..cc5ebb955 100644 --- a/pkg/sentry/syscalls/linux/linux64.go +++ b/pkg/sentry/syscalls/linux/linux64.go @@ -196,11 +196,11 @@ var AMD64 = &kernel.SyscallTable{ 145: SchedGetscheduler, 146: SchedGetPriorityMax, 147: SchedGetPriorityMin, - 148: syscalls.ErrorWithEvent(syscall.EPERM), // SchedRrGetInterval, - 149: syscalls.Error(nil), // Mlock, TODO - 150: syscalls.Error(nil), // Munlock, TODO - 151: syscalls.Error(nil), // Mlockall, TODO - 152: syscalls.Error(nil), // Munlockall, TODO + 148: syscalls.ErrorWithEvent(syscall.EPERM), // SchedRrGetInterval, + 149: Mlock, + 150: Munlock, + 151: Mlockall, + 152: Munlockall, 153: syscalls.CapError(linux.CAP_SYS_TTY_CONFIG), // Vhangup, 154: syscalls.Error(syscall.EPERM), // ModifyLdt, 155: syscalls.Error(syscall.EPERM), // PivotRoot, @@ -373,8 +373,9 @@ var AMD64 = &kernel.SyscallTable{ // 322: Execveat, TODO // 323: Userfaultfd, TODO // 324: Membarrier, TODO - // Syscalls after 325 are backports from 4.6. - 325: syscalls.Error(nil), // Mlock2, TODO + 325: Mlock2, + // Syscalls after 325 are "backports" from versions of Linux after 4.4. + // 326: CopyFileRange, 327: Preadv2, // 328: Pwritev2, // Pwritev2, TODO }, diff --git a/pkg/sentry/syscalls/linux/sys_mmap.go b/pkg/sentry/syscalls/linux/sys_mmap.go index 145f7846c..8732861e0 100644 --- a/pkg/sentry/syscalls/linux/sys_mmap.go +++ b/pkg/sentry/syscalls/linux/sys_mmap.go @@ -69,6 +69,9 @@ func Mmap(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallC GrowsDown: linux.MAP_GROWSDOWN&flags != 0, Precommit: linux.MAP_POPULATE&flags != 0, } + if linux.MAP_LOCKED&flags != 0 { + opts.MLockMode = memmap.MLockEager + } defer func() { if opts.MappingIdentity != nil { opts.MappingIdentity.DecRef() @@ -384,16 +387,6 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall length := args[1].SizeT() flags := args[2].Int() - if addr != addr.RoundDown() { - return 0, nil, syserror.EINVAL - } - if length == 0 { - return 0, nil, nil - } - la, ok := usermem.Addr(length).RoundUp() - if !ok { - return 0, nil, syserror.ENOMEM - } // "The flags argument should specify exactly one of MS_ASYNC and MS_SYNC, // and may additionally include the MS_INVALIDATE bit. ... However, Linux // permits a call to msync() that specifies neither of these flags, with @@ -406,39 +399,72 @@ func Msync(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall if sync && flags&linux.MS_ASYNC != 0 { return 0, nil, syserror.EINVAL } + err := t.MemoryManager().MSync(t, addr, uint64(length), mm.MSyncOpts{ + Sync: sync, + Invalidate: flags&linux.MS_INVALIDATE != 0, + }) + // MSync calls fsync, the same interrupt conversion rules apply, see + // mm/msync.c, fsync POSIX.1-2008. + return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS) +} + +// Mlock implements linux syscall mlock(2). +func Mlock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + addr := args[0].Pointer() + length := args[1].SizeT() + + return 0, nil, t.MemoryManager().MLock(t, addr, uint64(length), memmap.MLockEager) +} - // MS_INVALIDATE "asks to invalidate other mappings of the same file (so - // that they can be updated with the fresh values just written)". This is a - // no-op given that shared memory exists. However, MS_INVALIDATE can also - // be used to detect mlocks: "EBUSY: MS_INVALIDATE was specified in flags, - // and a memory lock exists for the specified address range." Given that - // mlock is stubbed out, it's unsafe to pass MS_INVALIDATE silently since - // some user program could be using it for synchronization. - if flags&linux.MS_INVALIDATE != 0 { +// Mlock2 implements linux syscall mlock2(2). +func Mlock2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + addr := args[0].Pointer() + length := args[1].SizeT() + flags := args[2].Int() + + if flags&^(linux.MLOCK_ONFAULT) != 0 { return 0, nil, syserror.EINVAL } - // MS_SYNC "requests an update and waits for it to complete." - if sync { - err := t.MemoryManager().Sync(t, addr, uint64(la)) - // Sync calls fsync, the same interrupt conversion rules apply, see - // mm/msync.c, fsync POSIX.1-2008. - return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS) - } - // MS_ASYNC "specifies that an update be scheduled, but the call returns - // immediately". As long as dirty pages are tracked and eventually written - // back, this is a no-op. (Correspondingly: "Since Linux 2.6.19, MS_ASYNC - // is in fact a no-op, since the kernel properly tracks dirty pages and - // flushes them to storage as necessary.") - // - // However: "ENOMEM: The indicated memory (or part of it) was not mapped." - // This applies even for MS_ASYNC. - ar, ok := addr.ToRange(uint64(la)) - if !ok { - return 0, nil, syserror.ENOMEM + + mode := memmap.MLockEager + if flags&linux.MLOCK_ONFAULT != 0 { + mode = memmap.MLockLazy } - mapped := t.MemoryManager().VirtualMemorySizeRange(ar) - if mapped != uint64(la) { - return 0, nil, syserror.ENOMEM + return 0, nil, t.MemoryManager().MLock(t, addr, uint64(length), mode) +} + +// Munlock implements linux syscall munlock(2). +func Munlock(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + addr := args[0].Pointer() + length := args[1].SizeT() + + return 0, nil, t.MemoryManager().MLock(t, addr, uint64(length), memmap.MLockNone) +} + +// Mlockall implements linux syscall mlockall(2). +func Mlockall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + flags := args[0].Int() + + if flags&^(linux.MCL_CURRENT|linux.MCL_FUTURE|linux.MCL_ONFAULT) != 0 { + return 0, nil, syserror.EINVAL } - return 0, nil, nil + + mode := memmap.MLockEager + if flags&linux.MCL_ONFAULT != 0 { + mode = memmap.MLockLazy + } + return 0, nil, t.MemoryManager().MLockAll(t, mm.MLockAllOpts{ + Current: flags&linux.MCL_CURRENT != 0, + Future: flags&linux.MCL_FUTURE != 0, + Mode: mode, + }) +} + +// Munlockall implements linux syscall munlockall(2). +func Munlockall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + return 0, nil, t.MemoryManager().MLockAll(t, mm.MLockAllOpts{ + Current: true, + Future: true, + Mode: memmap.MLockNone, + }) } diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go index 2f16e1791..b0b216045 100644 --- a/pkg/sentry/syscalls/linux/sys_rlimit.go +++ b/pkg/sentry/syscalls/linux/sys_rlimit.go @@ -90,6 +90,7 @@ var setableLimits = map[limits.LimitType]struct{}{ limits.CPU: {}, limits.Data: {}, limits.FileSize: {}, + limits.MemoryLocked: {}, limits.Stack: {}, // These are not enforced, but we include them here to avoid returning // EPERM, since some apps expect them to succeed. |