diff options
author | Jamie Liu <jamieliu@google.com> | 2018-12-17 11:37:38 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-12-17 11:38:59 -0800 |
commit | 2421006426445a1827422c2dbdd6fc6a47087147 (patch) | |
tree | 49aa2bc113c208fc117aff8a036866a7260090e5 /pkg/sentry/mm/vma.go | |
parent | 54694086dfb02a6f8453f043a44ffd10bb5a7070 (diff) |
Implement mlock(), kind of.
Currently mlock() and friends do nothing whatsoever. However, mlocking
is directly application-visible in a number of ways; for example,
madvise(MADV_DONTNEED) and msync(MS_INVALIDATE) both fail on mlocked
regions. We handle this inconsistently: MADV_DONTNEED is too important
to not work, but MS_INVALIDATE is rejected.
Change MM to track mlocked regions in a manner consistent with Linux.
It still will not actually pin pages into host physical memory, but:
- mlock() will now cause sentry memory management to precommit mlocked
pages.
- MADV_DONTNEED and MS_INVALIDATE will interact with mlocked pages as
described above.
PiperOrigin-RevId: 225861605
Change-Id: Iee187204979ac9a4d15d0e037c152c0902c8d0ee
Diffstat (limited to 'pkg/sentry/mm/vma.go')
-rw-r--r-- | pkg/sentry/mm/vma.go | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go index 5c2c802f6..28ba9f2f5 100644 --- a/pkg/sentry/mm/vma.go +++ b/pkg/sentry/mm/vma.go @@ -17,8 +17,10 @@ package mm import ( "fmt" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" "gvisor.googlesource.com/gvisor/pkg/sentry/limits" "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" @@ -53,6 +55,23 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp return vmaIterator{}, usermem.AddrRange{}, syserror.ENOMEM } + if opts.MLockMode != memmap.MLockNone { + // Check against RLIMIT_MEMLOCK. + if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) { + mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur + if mlockLimit == 0 { + return vmaIterator{}, usermem.AddrRange{}, syserror.EPERM + } + newLockedAS := mm.lockedAS + opts.Length + if opts.Unmap { + newLockedAS -= mm.mlockedBytesRangeLocked(ar) + } + if newLockedAS > mlockLimit { + return vmaIterator{}, usermem.AddrRange{}, syserror.EAGAIN + } + } + } + // Remove overwritten mappings. This ordering is consistent with Linux: // compare Linux's mm/mmap.c:mmap_region() => do_munmap(), // file->f_op->mmap(). @@ -85,10 +104,14 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp maxPerms: opts.MaxPerms, private: opts.Private, growsDown: opts.GrowsDown, + mlockMode: opts.MLockMode, id: opts.MappingIdentity, hint: opts.Hint, }) mm.usageAS += opts.Length + if opts.MLockMode != memmap.MLockNone { + mm.lockedAS += opts.Length + } return vseg, ar, nil } @@ -201,6 +224,17 @@ func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bo return 0, syserror.ENOMEM } +// Preconditions: mm.mappingMu must be locked. +func (mm *MemoryManager) mlockedBytesRangeLocked(ar usermem.AddrRange) uint64 { + var total uint64 + for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() { + if vseg.ValuePtr().mlockMode != memmap.MLockNone { + total += uint64(vseg.Range().Intersect(ar).Length()) + } + } + return total +} + // getVMAsLocked ensures that vmas exist for all addresses in ar, and support // access of type (at, ignorePermissions). It returns: // @@ -338,6 +372,9 @@ func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRa vma.id.DecRef() } mm.usageAS -= uint64(vmaAR.Length()) + if vma.mlockMode != memmap.MLockNone { + mm.lockedAS -= uint64(vmaAR.Length()) + } vgap = mm.vmas.Remove(vseg) vseg = vgap.NextSegment() } @@ -368,6 +405,7 @@ func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRa vma1.maxPerms != vma2.maxPerms || vma1.private != vma2.private || vma1.growsDown != vma2.growsDown || + vma1.mlockMode != vma2.mlockMode || vma1.id != vma2.id || vma1.hint != vma2.hint { return vma{}, false |