summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/mm
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/mm')
-rw-r--r--pkg/sentry/mm/lifecycle.go37
-rw-r--r--pkg/sentry/mm/mm.go3
-rwxr-xr-xpkg/sentry/mm/mm_state_autogen.go2
-rw-r--r--pkg/sentry/mm/syscalls.go26
-rw-r--r--pkg/sentry/mm/vma.go1
5 files changed, 69 insertions, 0 deletions
diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go
index 06e4372ff..4e9ca1de6 100644
--- a/pkg/sentry/mm/lifecycle.go
+++ b/pkg/sentry/mm/lifecycle.go
@@ -86,10 +86,22 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
}
// Copy vmas.
+ dontforks := false
dstvgap := mm2.vmas.FirstGap()
for srcvseg := mm.vmas.FirstSegment(); srcvseg.Ok(); srcvseg = srcvseg.NextSegment() {
vma := srcvseg.Value() // makes a copy of the vma
vmaAR := srcvseg.Range()
+
+ if vma.dontfork {
+ length := uint64(vmaAR.Length())
+ mm2.usageAS -= length
+ if vma.isPrivateDataLocked() {
+ mm2.dataAS -= length
+ }
+ dontforks = true
+ continue
+ }
+
// Inform the Mappable, if any, of the new mapping.
if vma.mappable != nil {
if err := vma.mappable.AddMapping(ctx, mm2, vmaAR, vma.off, vma.canWriteMappableLocked()); err != nil {
@@ -118,6 +130,10 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
defer mm2.activeMu.Unlock()
mm.activeMu.Lock()
defer mm.activeMu.Unlock()
+ if dontforks {
+ defer mm.pmas.MergeRange(mm.applicationAddrRange())
+ }
+ srcvseg := mm.vmas.FirstSegment()
dstpgap := mm2.pmas.FirstGap()
var unmapAR usermem.AddrRange
for srcpseg := mm.pmas.FirstSegment(); srcpseg.Ok(); srcpseg = srcpseg.NextSegment() {
@@ -125,6 +141,27 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
if !pma.private {
continue
}
+
+ if dontforks {
+ // Find the 'vma' that contains the starting address
+ // associated with the 'pma' (there must be one).
+ srcvseg = srcvseg.seekNextLowerBound(srcpseg.Start())
+ if checkInvariants {
+ if !srcvseg.Ok() {
+ panic(fmt.Sprintf("no vma covers pma range %v", srcpseg.Range()))
+ }
+ if srcpseg.Start() < srcvseg.Start() {
+ panic(fmt.Sprintf("vma %v ran ahead of pma %v", srcvseg.Range(), srcpseg.Range()))
+ }
+ }
+
+ srcpseg = mm.pmas.Isolate(srcpseg, srcvseg.Range())
+ if srcvseg.ValuePtr().dontfork {
+ continue
+ }
+ pma = srcpseg.ValuePtr()
+ }
+
if !pma.needCOW {
pma.needCOW = true
if pma.effectivePerms.Write {
diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go
index 2ec2ad99b..7bb96b159 100644
--- a/pkg/sentry/mm/mm.go
+++ b/pkg/sentry/mm/mm.go
@@ -274,6 +274,9 @@ type vma struct {
// metag, none of which we currently support.
growsDown bool `state:"manual"`
+ // dontfork is the MADV_DONTFORK setting for this vma configured by madvise().
+ dontfork bool
+
mlockMode memmap.MLockMode
// numaPolicy is the NUMA policy for this vma set by mbind().
diff --git a/pkg/sentry/mm/mm_state_autogen.go b/pkg/sentry/mm/mm_state_autogen.go
index 29f2f5634..3b5af2401 100755
--- a/pkg/sentry/mm/mm_state_autogen.go
+++ b/pkg/sentry/mm/mm_state_autogen.go
@@ -194,6 +194,7 @@ func (x *vma) save(m state.Map) {
m.SaveValue("realPerms", realPerms)
m.Save("mappable", &x.mappable)
m.Save("off", &x.off)
+ m.Save("dontfork", &x.dontfork)
m.Save("mlockMode", &x.mlockMode)
m.Save("numaPolicy", &x.numaPolicy)
m.Save("numaNodemask", &x.numaNodemask)
@@ -205,6 +206,7 @@ func (x *vma) afterLoad() {}
func (x *vma) load(m state.Map) {
m.Load("mappable", &x.mappable)
m.Load("off", &x.off)
+ m.Load("dontfork", &x.dontfork)
m.Load("mlockMode", &x.mlockMode)
m.Load("numaPolicy", &x.numaPolicy)
m.Load("numaNodemask", &x.numaNodemask)
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
index 9aa39e31d..c2466c988 100644
--- a/pkg/sentry/mm/syscalls.go
+++ b/pkg/sentry/mm/syscalls.go
@@ -1026,6 +1026,32 @@ func (mm *MemoryManager) SetNumaPolicy(addr usermem.Addr, length uint64, policy
}
}
+// SetDontFork implements the semantics of madvise MADV_DONTFORK.
+func (mm *MemoryManager) SetDontFork(addr usermem.Addr, length uint64, dontfork bool) error {
+ ar, ok := addr.ToRange(length)
+ if !ok {
+ return syserror.EINVAL
+ }
+
+ mm.mappingMu.Lock()
+ defer mm.mappingMu.Unlock()
+ defer func() {
+ mm.vmas.MergeRange(ar)
+ mm.vmas.MergeAdjacent(ar)
+ }()
+
+ for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
+ vseg = mm.vmas.Isolate(vseg, ar)
+ vma := vseg.ValuePtr()
+ vma.dontfork = dontfork
+ }
+
+ if mm.vmas.SpanRange(ar) != ar.Length() {
+ return syserror.ENOMEM
+ }
+ return nil
+}
+
// Decommit implements the semantics of Linux's madvise(MADV_DONTNEED).
func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
ar, ok := addr.ToRange(length)
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go
index 9f846cdb8..074e2b141 100644
--- a/pkg/sentry/mm/vma.go
+++ b/pkg/sentry/mm/vma.go
@@ -439,6 +439,7 @@ func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRa
vma1.mlockMode != vma2.mlockMode ||
vma1.numaPolicy != vma2.numaPolicy ||
vma1.numaNodemask != vma2.numaNodemask ||
+ vma1.dontfork != vma2.dontfork ||
vma1.id != vma2.id ||
vma1.hint != vma2.hint {
return vma{}, false