summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/mm/vma.go
diff options
context:
space:
mode:
authorJamie Liu <jamieliu@google.com>2018-12-17 11:37:38 -0800
committerShentubot <shentubot@google.com>2018-12-17 11:38:59 -0800
commit2421006426445a1827422c2dbdd6fc6a47087147 (patch)
tree49aa2bc113c208fc117aff8a036866a7260090e5 /pkg/sentry/mm/vma.go
parent54694086dfb02a6f8453f043a44ffd10bb5a7070 (diff)
Implement mlock(), kind of.
Currently mlock() and friends do nothing whatsoever. However, mlocking is directly application-visible in a number of ways; for example, madvise(MADV_DONTNEED) and msync(MS_INVALIDATE) both fail on mlocked regions. We handle this inconsistently: MADV_DONTNEED is too important to not work, but MS_INVALIDATE is rejected. Change MM to track mlocked regions in a manner consistent with Linux. It still will not actually pin pages into host physical memory, but: - mlock() will now cause sentry memory management to precommit mlocked pages. - MADV_DONTNEED and MS_INVALIDATE will interact with mlocked pages as described above. PiperOrigin-RevId: 225861605 Change-Id: Iee187204979ac9a4d15d0e037c152c0902c8d0ee
Diffstat (limited to 'pkg/sentry/mm/vma.go')
-rw-r--r--pkg/sentry/mm/vma.go38
1 files changed, 38 insertions, 0 deletions
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go
index 5c2c802f6..28ba9f2f5 100644
--- a/pkg/sentry/mm/vma.go
+++ b/pkg/sentry/mm/vma.go
@@ -17,8 +17,10 @@ package mm
import (
"fmt"
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
@@ -53,6 +55,23 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
return vmaIterator{}, usermem.AddrRange{}, syserror.ENOMEM
}
+ if opts.MLockMode != memmap.MLockNone {
+ // Check against RLIMIT_MEMLOCK.
+ if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
+ mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
+ if mlockLimit == 0 {
+ return vmaIterator{}, usermem.AddrRange{}, syserror.EPERM
+ }
+ newLockedAS := mm.lockedAS + opts.Length
+ if opts.Unmap {
+ newLockedAS -= mm.mlockedBytesRangeLocked(ar)
+ }
+ if newLockedAS > mlockLimit {
+ return vmaIterator{}, usermem.AddrRange{}, syserror.EAGAIN
+ }
+ }
+ }
+
// Remove overwritten mappings. This ordering is consistent with Linux:
// compare Linux's mm/mmap.c:mmap_region() => do_munmap(),
// file->f_op->mmap().
@@ -85,10 +104,14 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
maxPerms: opts.MaxPerms,
private: opts.Private,
growsDown: opts.GrowsDown,
+ mlockMode: opts.MLockMode,
id: opts.MappingIdentity,
hint: opts.Hint,
})
mm.usageAS += opts.Length
+ if opts.MLockMode != memmap.MLockNone {
+ mm.lockedAS += opts.Length
+ }
return vseg, ar, nil
}
@@ -201,6 +224,17 @@ func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bo
return 0, syserror.ENOMEM
}
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) mlockedBytesRangeLocked(ar usermem.AddrRange) uint64 {
+ var total uint64
+ for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
+ if vseg.ValuePtr().mlockMode != memmap.MLockNone {
+ total += uint64(vseg.Range().Intersect(ar).Length())
+ }
+ }
+ return total
+}
+
// getVMAsLocked ensures that vmas exist for all addresses in ar, and support
// access of type (at, ignorePermissions). It returns:
//
@@ -338,6 +372,9 @@ func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRa
vma.id.DecRef()
}
mm.usageAS -= uint64(vmaAR.Length())
+ if vma.mlockMode != memmap.MLockNone {
+ mm.lockedAS -= uint64(vmaAR.Length())
+ }
vgap = mm.vmas.Remove(vseg)
vseg = vgap.NextSegment()
}
@@ -368,6 +405,7 @@ func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRa
vma1.maxPerms != vma2.maxPerms ||
vma1.private != vma2.private ||
vma1.growsDown != vma2.growsDown ||
+ vma1.mlockMode != vma2.mlockMode ||
vma1.id != vma2.id ||
vma1.hint != vma2.hint {
return vma{}, false