diff options
author | Jamie Liu <jamieliu@google.com> | 2019-03-25 12:41:36 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-03-25 12:42:43 -0700 |
commit | f3723f805989d0c5956fbb6aa88b1cd9ac20753c (patch) | |
tree | f8992ae20287a20ab4325a0fa8f5da213283470f /pkg/sentry/mm | |
parent | ddc05e3053e387be9c81aa98c621b6fc92b01000 (diff) |
Call memmap.Mappable.Translate with more conservative usermem.AccessType.
MM.insertPMAsLocked() passes vma.maxPerms to memmap.Mappable.Translate
(although it unsets AccessType.Write if the vma is private). This
somewhat simplifies handling of pmas, since it means only COW-break
needs to replace existing pmas. However, it also means that a MAP_SHARED
mapping of a file opened O_RDWR dirties the file, regardless of the
mapping's permissions and whether or not the mapping is ever actually
written to with I/O that ignores permissions (e.g.
ptrace(PTRACE_POKEDATA)).
To fix this:
- Change the pma-getting path to request only the permissions that are
required for the calling access.
- Change memmap.Mappable.Translate to take requested permissions, and
return allowed permissions. This preserves the existing behavior in the
common cases where the memmap.Mappable isn't
fsutil.CachingInodeOperations and doesn't care if the translated
platform.File pages are written to.
- Change the MM.getPMAsLocked path to support permission upgrading of
pmas outside of copy-on-write.
PiperOrigin-RevId: 240196979
Change-Id: Ie0147c62c1fbc409467a6fa16269a413f3d7d571
Diffstat (limited to 'pkg/sentry/mm')
-rw-r--r-- | pkg/sentry/mm/address_space.go | 2 | ||||
-rw-r--r-- | pkg/sentry/mm/aio_context.go | 1 | ||||
-rw-r--r-- | pkg/sentry/mm/debug.go | 6 | ||||
-rw-r--r-- | pkg/sentry/mm/io.go | 20 | ||||
-rw-r--r-- | pkg/sentry/mm/lifecycle.go | 4 | ||||
-rw-r--r-- | pkg/sentry/mm/mm.go | 27 | ||||
-rw-r--r-- | pkg/sentry/mm/pma.go | 664 | ||||
-rw-r--r-- | pkg/sentry/mm/special_mappable.go | 1 | ||||
-rw-r--r-- | pkg/sentry/mm/syscalls.go | 22 |
9 files changed, 374 insertions, 373 deletions
diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go index 90cfef746..4dddcf7b5 100644 --- a/pkg/sentry/mm/address_space.go +++ b/pkg/sentry/mm/address_space.go @@ -179,7 +179,7 @@ func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, pre pma := pseg.ValuePtr() pmaAR := pseg.Range() pmaMapAR := pmaAR.Intersect(mapAR) - perms := pma.vmaEffectivePerms + perms := pma.effectivePerms if pma.needCOW { perms.Write = false } diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go index 6cec6387a..f7ff06de0 100644 --- a/pkg/sentry/mm/aio_context.go +++ b/pkg/sentry/mm/aio_context.go @@ -302,6 +302,7 @@ func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.M Source: source, File: m.mfp.MemoryFile(), Offset: m.fr.Start + source.Start, + Perms: usermem.AnyAccess, }, }, err } diff --git a/pkg/sentry/mm/debug.go b/pkg/sentry/mm/debug.go index d341b9c07..d075ee1ca 100644 --- a/pkg/sentry/mm/debug.go +++ b/pkg/sentry/mm/debug.go @@ -68,12 +68,12 @@ func (pseg pmaIterator) debugStringEntryLocked() []byte { fmt.Fprintf(&b, "%08x-%08x ", pseg.Start(), pseg.End()) pma := pseg.ValuePtr() - if pma.vmaEffectivePerms.Read { + if pma.effectivePerms.Read { b.WriteByte('r') } else { b.WriteByte('-') } - if pma.vmaEffectivePerms.Write { + if pma.effectivePerms.Write { if pma.needCOW { b.WriteByte('c') } else { @@ -82,7 +82,7 @@ func (pseg pmaIterator) debugStringEntryLocked() []byte { } else { b.WriteByte('-') } - if pma.vmaEffectivePerms.Execute { + if pma.effectivePerms.Execute { b.WriteByte('x') } else { b.WriteByte('-') diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go index e0cebef84..81787a6fd 100644 --- a/pkg/sentry/mm/io.go +++ b/pkg/sentry/mm/io.go @@ -466,9 +466,7 @@ func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr usermem.Addr, // Ensure that we have usable pmas. mm.activeMu.Lock() - pseg, pend, err := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{ - breakCOW: at.Write, - }) + pseg, pend, err := mm.getPMAsLocked(ctx, vseg, ar, at) mm.mappingMu.RUnlock() if pendaddr := pend.Start(); pendaddr < ar.End { if pendaddr <= ar.Start { @@ -498,14 +496,10 @@ func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr usermem.Addr, // // Preconditions: 0 < ar.Length() <= math.MaxInt64. func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) { - po := pmaOpts{ - breakCOW: at.Write, - } - // If pmas are already available, we can do IO without touching mm.vmas or // mm.mappingMu. mm.activeMu.RLock() - if pseg := mm.existingPMAsLocked(ar, at, ignorePermissions, po, true /* needInternalMappings */); pseg.Ok() { + if pseg := mm.existingPMAsLocked(ar, at, ignorePermissions, true /* needInternalMappings */); pseg.Ok() { n, err := f(mm.internalMappingsLocked(pseg, ar)) mm.activeMu.RUnlock() // Do not convert errors returned by f to EFAULT. @@ -526,7 +520,7 @@ func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar usermem.Ad // Ensure that we have usable pmas. mm.activeMu.Lock() - pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, po) + pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, at) mm.mappingMu.RUnlock() if pendaddr := pend.Start(); pendaddr < ar.End { if pendaddr <= ar.Start { @@ -578,14 +572,10 @@ func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars userme return mm.withInternalMappings(ctx, ars.Head(), at, ignorePermissions, f) } - po := pmaOpts{ - breakCOW: at.Write, - } - // If pmas are already available, we can do IO without touching mm.vmas or // mm.mappingMu. mm.activeMu.RLock() - if mm.existingVecPMAsLocked(ars, at, ignorePermissions, po, true /* needInternalMappings */) { + if mm.existingVecPMAsLocked(ars, at, ignorePermissions, true /* needInternalMappings */) { n, err := f(mm.vecInternalMappingsLocked(ars)) mm.activeMu.RUnlock() // Do not convert errors returned by f to EFAULT. @@ -603,7 +593,7 @@ func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars userme // Ensure that we have usable pmas. mm.activeMu.Lock() - pars, perr := mm.getVecPMAsLocked(ctx, vars, po) + pars, perr := mm.getVecPMAsLocked(ctx, vars, at) mm.mappingMu.RUnlock() if pars.NumBytes() == 0 { mm.activeMu.Unlock() diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go index a71286f14..2fe03172c 100644 --- a/pkg/sentry/mm/lifecycle.go +++ b/pkg/sentry/mm/lifecycle.go @@ -124,7 +124,7 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { } if !pma.needCOW { pma.needCOW = true - if pma.vmaEffectivePerms.Write { + if pma.effectivePerms.Write { // We don't want to unmap the whole address space, even though // doing so would reduce calls to unmapASLocked(), because mm // will most likely continue to be used after the fork, so @@ -139,7 +139,9 @@ func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) { } unmapAR = srcpseg.Range() } + pma.effectivePerms.Write = false } + pma.maxPerms.Write = false } fr := srcpseg.fileRange() mm2.incPrivateRef(fr) diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go index 6ed838d64..a3417a46e 100644 --- a/pkg/sentry/mm/mm.go +++ b/pkg/sentry/mm/mm.go @@ -71,9 +71,6 @@ type MemoryManager struct { // ownership is shared by one or more pmas instead of being owned by a // memmap.Mappable). // - // NOTE: This should be replaced using refcounts on - // platform.File. - // // privateRefs is immutable. privateRefs *privateRefs @@ -374,13 +371,27 @@ type pma struct { file platform.File `state:"nosave"` // off is the offset into file at which this pma begins. + // + // Note that pmas do *not* hold references on offsets in file! If private + // is true, MemoryManager.privateRefs holds the reference instead. If + // private is false, the corresponding memmap.Mappable holds the reference + // instead (per memmap.Mappable.Translate requirement). off uint64 - // vmaEffectivePerms and vmaMaxPerms are duplicated from the - // corresponding vma so that the IO implementation can avoid iterating - // mm.vmas when pmas already exist. - vmaEffectivePerms usermem.AccessType - vmaMaxPerms usermem.AccessType + // translatePerms is the permissions returned by memmap.Mappable.Translate. + // If private is true, translatePerms is usermem.AnyAccess. + translatePerms usermem.AccessType + + // effectivePerms is the permissions allowed for non-ignorePermissions + // accesses. maxPerms is the permissions allowed for ignorePermissions + // accesses. These are vma.effectivePerms and vma.maxPerms respectively, + // masked by pma.translatePerms and with Write disallowed if pma.needCOW is + // true. + // + // These are stored in the pma so that the IO implementation can avoid + // iterating mm.vmas when pmas already exist. + effectivePerms usermem.AccessType + maxPerms usermem.AccessType // needCOW is true if writes to the mapping must be propagated to a copy. needCOW bool diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go index bb779a45b..e090537cc 100644 --- a/pkg/sentry/mm/pma.go +++ b/pkg/sentry/mm/pma.go @@ -27,18 +27,13 @@ import ( "gvisor.googlesource.com/gvisor/pkg/syserror" ) -type pmaOpts struct { - // If breakCOW is true, pmas must not be copy-on-write. - breakCOW bool -} - // existingPMAsLocked checks that pmas exist for all addresses in ar, and // support access of type (at, ignorePermissions). If so, it returns an // iterator to the pma containing ar.Start. Otherwise it returns a terminal // iterator. // // Preconditions: mm.activeMu must be locked. ar.Length() != 0. -func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, opts pmaOpts, needInternalMappings bool) pmaIterator { +func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator { if checkInvariants { if !ar.WellFormed() || ar.Length() <= 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -49,16 +44,11 @@ func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.Acc pseg := first for pseg.Ok() { pma := pseg.ValuePtr() - perms := pma.vmaEffectivePerms + perms := pma.effectivePerms if ignorePermissions { - perms = pma.vmaMaxPerms + perms = pma.maxPerms } if !perms.SupersetOf(at) { - // These are the vma's permissions, so the caller will get an error - // when they try to get new pmas. - return pmaIterator{} - } - if opts.breakCOW && pma.needCOW { return pmaIterator{} } if needInternalMappings && pma.internalMappings.IsEmpty() { @@ -79,17 +69,17 @@ func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.Acc // and support access of type (at, ignorePermissions). // // Preconditions: mm.activeMu must be locked. -func (mm *MemoryManager) existingVecPMAsLocked(ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool, opts pmaOpts, needInternalMappings bool) bool { +func (mm *MemoryManager) existingVecPMAsLocked(ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool, needInternalMappings bool) bool { for ; !ars.IsEmpty(); ars = ars.Tail() { - if ar := ars.Head(); ar.Length() != 0 && !mm.existingPMAsLocked(ar, at, ignorePermissions, opts, needInternalMappings).Ok() { + if ar := ars.Head(); ar.Length() != 0 && !mm.existingPMAsLocked(ar, at, ignorePermissions, needInternalMappings).Ok() { return false } } return true } -// getPMAsLocked ensures that pmas exist for all addresses in ar, subject to -// opts. It returns: +// getPMAsLocked ensures that pmas exist for all addresses in ar, and support +// access of type at. It returns: // // - An iterator to the pma containing ar.Start. If no pma contains ar.Start, // the iterator is unspecified. @@ -102,8 +92,9 @@ func (mm *MemoryManager) existingVecPMAsLocked(ars usermem.AddrRangeSeq, at user // // Preconditions: mm.mappingMu must be locked. mm.activeMu must be locked for // writing. ar.Length() != 0. vseg.Range().Contains(ar.Start). vmas must exist -// for all addresses in ar. -func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, opts pmaOpts) (pmaIterator, pmaGapIterator, error) { +// for all addresses in ar, and support accesses of type at (i.e. permission +// checks must have been performed against vmas). +func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, at usermem.AccessType) (pmaIterator, pmaGapIterator, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() <= 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) @@ -125,54 +116,40 @@ func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar } ar = usermem.AddrRange{ar.Start.RoundDown(), end} - pstart, pend, perr := mm.ensurePMAsLocked(ctx, vseg, ar) + pstart, pend, perr := mm.getPMAsInternalLocked(ctx, vseg, ar, at) if pend.Start() <= ar.Start { return pmaIterator{}, pend, perr } - // ensurePMAsLocked may not have pstart due to iterator invalidation. We - // need it, either to return it immediately or to pass to - // breakCopyOnWriteLocked. + // getPMAsInternalLocked may not have returned pstart due to iterator + // invalidation. if !pstart.Ok() { pstart = mm.findOrSeekPrevUpperBoundPMA(ar.Start, pend) } - - var cowerr error - if opts.breakCOW { - if pend.Start() < ar.End { - // Adjust ar to reflect missing pmas. - ar.End = pend.Start() - } - var invalidated bool - pend, invalidated, cowerr = mm.breakCopyOnWriteLocked(pstart, ar) - if pend.Start() <= ar.Start { - return pmaIterator{}, pend, cowerr - } - if invalidated { - pstart = mm.findOrSeekPrevUpperBoundPMA(ar.Start, pend) - } - } - - if cowerr != nil { - return pstart, pend, cowerr - } if perr != nil { return pstart, pend, perr } return pstart, pend, alignerr } -// getVecPMAsLocked ensures that pmas exist for all addresses in ars. It -// returns the subset of ars for which pmas exist. If this is not equal to ars, -// it returns a non-nil error explaining why. +// getVecPMAsLocked ensures that pmas exist for all addresses in ars, and +// support access of type at. It returns the subset of ars for which pmas +// exist. If this is not equal to ars, it returns a non-nil error explaining +// why. // // Preconditions: mm.mappingMu must be locked. mm.activeMu must be locked for -// writing. vmas must exist for all addresses in ars. -func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrRangeSeq, opts pmaOpts) (usermem.AddrRangeSeq, error) { +// writing. vmas must exist for all addresses in ars, and support accesses of +// type at (i.e. permission checks must have been performed against vmas). +func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType) (usermem.AddrRangeSeq, error) { for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() { ar := arsit.Head() if ar.Length() == 0 { continue } + if checkInvariants { + if !ar.WellFormed() { + panic(fmt.Sprintf("invalid ar: %v", ar)) + } + } // Page-align ar so that all AddrRanges are aligned. end, ok := ar.End.RoundUp() @@ -183,26 +160,7 @@ func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrR } ar = usermem.AddrRange{ar.Start.RoundDown(), end} - pstart, pend, perr := mm.ensurePMAsLocked(ctx, mm.vmas.FindSegment(ar.Start), ar) - if pend.Start() <= ar.Start { - return truncatedAddrRangeSeq(ars, arsit, pend.Start()), perr - } - - var cowerr error - if opts.breakCOW { - if !pstart.Ok() { - pstart = mm.findOrSeekPrevUpperBoundPMA(ar.Start, pend) - } - if pend.Start() < ar.End { - // Adjust ar to reflect missing pmas. - ar.End = pend.Start() - } - pend, _, cowerr = mm.breakCopyOnWriteLocked(pstart, ar) - } - - if cowerr != nil { - return truncatedAddrRangeSeq(ars, arsit, pend.Start()), cowerr - } + _, pend, perr := mm.getPMAsInternalLocked(ctx, mm.vmas.FindSegment(ar.Start), ar, at) if perr != nil { return truncatedAddrRangeSeq(ars, arsit, pend.Start()), perr } @@ -214,64 +172,303 @@ func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrR return ars, nil } -// ensurePMAsLocked ensures that pmas exist for all addresses in ar. It returns: +// getPMAsInternalLocked is equivalent to getPMAsLocked, with the following +// exceptions: // -// - An iterator to the pma containing ar.Start, on a best-effort basis (that -// is, the returned iterator may be terminal, even if such a pma exists). -// Returning this iterator on a best-effort basis allows callers that require -// it to use it when it's cheaply available, while also avoiding the overhead -// of retrieving it when it's not. -// -// - An iterator to the gap after the last pma containing an address in ar. If -// pmas exist for no addresses in ar, the iterator is to a gap that begins -// before ar.Start. +// - getPMAsInternalLocked returns a pmaIterator on a best-effort basis (that +// is, the returned iterator may be terminal, even if a pma that contains +// ar.Start exists). Returning this iterator on a best-effort basis allows +// callers that require it to use it when it's cheaply available, while also +// avoiding the overhead of retrieving it when it's not. // -// - An error that is non-nil if pmas exist for only a subset of ar. +// - getPMAsInternalLocked additionally requires that ar is page-aligned. // -// Preconditions: mm.mappingMu must be locked. mm.activeMu must be locked for -// writing. ar.Length() != 0. ar must be page-aligned. -// vseg.Range().Contains(ar.Start). vmas must exist for all addresses in ar. -func (mm *MemoryManager) ensurePMAsLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange) (pmaIterator, pmaGapIterator, error) { +// getPMAsInternalLocked is an implementation helper for getPMAsLocked and +// getVecPMAsLocked; other clients should call one of those instead. +func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, at usermem.AccessType) (pmaIterator, pmaGapIterator, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) } + if !vseg.Ok() { + panic("terminal vma iterator") + } if !vseg.Range().Contains(ar.Start) { panic(fmt.Sprintf("initial vma %v does not cover start of ar %v", vseg.Range(), ar)) } } - pstart, pgap := mm.pmas.Find(ar.Start) - if pstart.Ok() { - pgap = pstart.NextGap() - } - for pgap.Start() < ar.End { - if pgap.Range().Length() == 0 { - pgap = pgap.NextGap() - continue - } - // A single pgap might be spanned by multiple vmas. Insert pmas to - // cover the first (vma, pgap) pair. - pgapAR := pgap.Range().Intersect(ar) - vseg = vseg.seekNextLowerBound(pgapAR.Start) - if checkInvariants { - if !vseg.Ok() { - panic(fmt.Sprintf("no vma after %#x", pgapAR.Start)) - } - if pgapAR.Start < vseg.Start() { - panic(fmt.Sprintf("no vma in [%#x, %#x)", pgapAR.Start, vseg.Start())) + mf := mm.mfp.MemoryFile() + // Limit the range we allocate to ar, aligned to privateAllocUnit. + maskAR := privateAligned(ar) + didUnmapAS := false + // The range in which we iterate vmas and pmas is still limited to ar, to + // ensure that we don't allocate or COW-break a pma we don't need. + pseg, pgap := mm.pmas.Find(ar.Start) + pstart := pseg + for { + // Get pmas for this vma. + vsegAR := vseg.Range().Intersect(ar) + vma := vseg.ValuePtr() + pmaLoop: + for { + switch { + case pgap.Ok() && pgap.Start() < vsegAR.End: + // Need a pma here. + optAR := vseg.Range().Intersect(pgap.Range()) + if checkInvariants { + if optAR.Length() <= 0 { + panic(fmt.Sprintf("vseg %v and pgap %v do not overlap", vseg, pgap)) + } + } + if vma.mappable == nil { + // Private anonymous mappings get pmas by allocating. + allocAR := optAR.Intersect(maskAR) + fr, err := mf.Allocate(uint64(allocAR.Length()), usage.Anonymous) + if err != nil { + return pstart, pgap, err + } + if checkInvariants { + if !fr.WellFormed() || fr.Length() != uint64(allocAR.Length()) { + panic(fmt.Sprintf("Allocate(%v) returned invalid FileRange %v", allocAR.Length(), fr)) + } + } + mm.addRSSLocked(allocAR) + mm.incPrivateRef(fr) + mf.IncRef(fr) + pseg, pgap = mm.pmas.Insert(pgap, allocAR, pma{ + file: mf, + off: fr.Start, + translatePerms: usermem.AnyAccess, + effectivePerms: vma.effectivePerms, + maxPerms: vma.maxPerms, + // Since we just allocated this memory and have the + // only reference, the new pma does not need + // copy-on-write. + private: true, + }).NextNonEmpty() + pstart = pmaIterator{} // iterators invalidated + } else { + // Other mappings get pmas by translating. + optMR := vseg.mappableRangeOf(optAR) + reqAR := optAR.Intersect(ar) + reqMR := vseg.mappableRangeOf(reqAR) + perms := at + if vma.private { + // This pma will be copy-on-write; don't require write + // permission, but do require read permission to + // facilitate the copy. + // + // If at.Write is true, we will need to break + // copy-on-write immediately, which occurs after + // translation below. + perms.Read = true + perms.Write = false + } + ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms) + if checkInvariants { + if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil { + panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err)) + } + } + // Install a pma for each translation. + if len(ts) == 0 { + return pstart, pgap, err + } + pstart = pmaIterator{} // iterators invalidated + for _, t := range ts { + newpmaAR := vseg.addrRangeOf(t.Source) + newpma := pma{ + file: t.File, + off: t.Offset, + translatePerms: t.Perms, + effectivePerms: vma.effectivePerms.Intersect(t.Perms), + maxPerms: vma.maxPerms.Intersect(t.Perms), + } + if vma.private { + newpma.effectivePerms.Write = false + newpma.maxPerms.Write = false + newpma.needCOW = true + } + mm.addRSSLocked(newpmaAR) + t.File.IncRef(t.FileRange()) + // This is valid because memmap.Mappable.Translate is + // required to return Translations in increasing + // Translation.Source order. + pseg = mm.pmas.Insert(pgap, newpmaAR, newpma) + pgap = pseg.NextGap() + } + // The error returned by Translate is only significant if + // it occurred before ar.End. + if err != nil && vseg.addrRangeOf(ts[len(ts)-1].Source).End < ar.End { + return pstart, pgap, err + } + // Rewind pseg to the first pma inserted and continue the + // loop to check if we need to break copy-on-write. + pseg, pgap = mm.findOrSeekPrevUpperBoundPMA(vseg.addrRangeOf(ts[0].Source).Start, pgap), pmaGapIterator{} + continue + } + + case pseg.Ok() && pseg.Start() < vsegAR.End: + oldpma := pseg.ValuePtr() + if at.Write && mm.isPMACopyOnWriteLocked(vseg, pseg) { + // Break copy-on-write by copying. + if checkInvariants { + if !oldpma.maxPerms.Read { + panic(fmt.Sprintf("pma %v needs to be copied for writing, but is not readable: %v", pseg.Range(), oldpma)) + } + } + copyAR := pseg.Range().Intersect(maskAR) + // Get internal mappings from the pma to copy from. + if err := pseg.getInternalMappingsLocked(); err != nil { + return pstart, pseg.PrevGap(), err + } + // Copy contents. + fr, err := mf.AllocateAndFill(uint64(copyAR.Length()), usage.Anonymous, &safemem.BlockSeqReader{mm.internalMappingsLocked(pseg, copyAR)}) + if _, ok := err.(safecopy.BusError); ok { + // If we got SIGBUS during the copy, deliver SIGBUS to + // userspace (instead of SIGSEGV) if we're breaking + // copy-on-write due to application page fault. + err = &memmap.BusError{err} + } + if fr.Length() == 0 { + return pstart, pseg.PrevGap(), err + } + // Unmap all of maskAR, not just copyAR, to minimize host + // syscalls. AddressSpace mappings must be removed before + // mm.decPrivateRef(). + if !didUnmapAS { + mm.unmapASLocked(maskAR) + didUnmapAS = true + } + // Replace the pma with a copy in the part of the address + // range where copying was successful. This doesn't change + // RSS. + copyAR.End = copyAR.Start + usermem.Addr(fr.Length()) + if copyAR != pseg.Range() { + pseg = mm.pmas.Isolate(pseg, copyAR) + pstart = pmaIterator{} // iterators invalidated + } + oldpma = pseg.ValuePtr() + if oldpma.private { + mm.decPrivateRef(pseg.fileRange()) + } + oldpma.file.DecRef(pseg.fileRange()) + mm.incPrivateRef(fr) + mf.IncRef(fr) + oldpma.file = mf + oldpma.off = fr.Start + oldpma.translatePerms = usermem.AnyAccess + oldpma.effectivePerms = vma.effectivePerms + oldpma.maxPerms = vma.maxPerms + oldpma.needCOW = false + oldpma.private = true + oldpma.internalMappings = safemem.BlockSeq{} + // Try to merge the pma with its neighbors. + if prev := pseg.PrevSegment(); prev.Ok() { + if merged := mm.pmas.Merge(prev, pseg); merged.Ok() { + pseg = merged + pstart = pmaIterator{} // iterators invalidated + } + } + if next := pseg.NextSegment(); next.Ok() { + if merged := mm.pmas.Merge(pseg, next); merged.Ok() { + pseg = merged + pstart = pmaIterator{} // iterators invalidated + } + } + // The error returned by AllocateAndFill is only + // significant if it occurred before ar.End. + if err != nil && pseg.End() < ar.End { + return pstart, pseg.NextGap(), err + } + // Ensure pseg and pgap are correct for the next iteration + // of the loop. + pseg, pgap = pseg.NextNonEmpty() + } else if !oldpma.translatePerms.SupersetOf(at) { + // Get new pmas (with sufficient permissions) by calling + // memmap.Mappable.Translate again. + if checkInvariants { + if oldpma.private { + panic(fmt.Sprintf("private pma %v has non-maximal pma.translatePerms: %v", pseg.Range(), oldpma)) + } + } + // Allow the entire pma to be replaced. + optAR := pseg.Range() + optMR := vseg.mappableRangeOf(optAR) + reqAR := optAR.Intersect(ar) + reqMR := vseg.mappableRangeOf(reqAR) + perms := oldpma.translatePerms.Union(at) + ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms) + if checkInvariants { + if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil { + panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err)) + } + } + // Remove the part of the existing pma covered by new + // Translations, then insert new pmas. This doesn't change + // RSS. Note that we don't need to call unmapASLocked: any + // existing AddressSpace mappings are still valid (though + // less permissive than the new pmas indicate) until + // Invalidate is called, and will be replaced by future + // calls to mapASLocked. + if len(ts) == 0 { + return pstart, pseg.PrevGap(), err + } + transMR := memmap.MappableRange{ts[0].Source.Start, ts[len(ts)-1].Source.End} + transAR := vseg.addrRangeOf(transMR) + pseg = mm.pmas.Isolate(pseg, transAR) + pseg.ValuePtr().file.DecRef(pseg.fileRange()) + pgap = mm.pmas.Remove(pseg) + pstart = pmaIterator{} // iterators invalidated + for _, t := range ts { + newpmaAR := vseg.addrRangeOf(t.Source) + newpma := pma{ + file: t.File, + off: t.Offset, + translatePerms: t.Perms, + effectivePerms: vma.effectivePerms.Intersect(t.Perms), + maxPerms: vma.maxPerms.Intersect(t.Perms), + } + if vma.private { + newpma.effectivePerms.Write = false + newpma.maxPerms.Write = false + newpma.needCOW = true + } + t.File.IncRef(t.FileRange()) + pseg = mm.pmas.Insert(pgap, newpmaAR, newpma) + pgap = pseg.NextGap() + } + // The error returned by Translate is only significant if + // it occurred before ar.End. + if err != nil && pseg.End() < ar.End { + return pstart, pgap, err + } + // Ensure pseg and pgap are correct for the next iteration + // of the loop. + if pgap.Range().Length() == 0 { + pseg, pgap = pgap.NextSegment(), pmaGapIterator{} + } else { + pseg = pmaIterator{} + } + } else { + // We have a usable pma; continue. + pseg, pgap = pseg.NextNonEmpty() + } + + default: + break pmaLoop } } - var err error - pgap, err = mm.insertPMAsLocked(ctx, vseg, pgap, ar) - // insertPMAsLocked most likely invalidated iterators, so pstart is now - // unknown. - pstart = pmaIterator{} - if err != nil { - return pstart, pgap, err + // Go to the next vma. + if ar.End <= vseg.End() { + if pgap.Ok() { + return pstart, pgap, nil + } + return pstart, pseg.PrevGap(), nil } + vseg = vseg.NextSegment() } - return pstart, pgap, nil } const ( @@ -299,215 +496,16 @@ func privateAligned(ar usermem.AddrRange) usermem.AddrRange { return aligned } -// insertPMAsLocked inserts pmas into pgap corresponding to the vma iterated by -// vseg, spanning at least ar. It returns: -// -// - An iterator to the gap after the last pma containing an address in ar. If -// pmas exist for no addresses in ar, the iterator is to a gap that begins -// before ar.Start. -// -// - An error that is non-nil if pmas exist for only a subset of ar. -// -// Preconditions: mm.mappingMu must be locked. mm.activeMu must be locked for -// writing. vseg.Range().Intersect(pgap.Range()).Intersect(ar).Length() != 0. -// ar must be page-aligned. -func (mm *MemoryManager) insertPMAsLocked(ctx context.Context, vseg vmaIterator, pgap pmaGapIterator, ar usermem.AddrRange) (pmaGapIterator, error) { - optAR := vseg.Range().Intersect(pgap.Range()) - if checkInvariants { - if optAR.Length() <= 0 { - panic(fmt.Sprintf("vseg %v and pgap %v do not overlap", vseg, pgap)) - } - if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() { - panic(fmt.Sprintf("invalid ar %v", ar)) - } - } - vma := vseg.ValuePtr() - - // Private anonymous mappings get pmas by allocating. - if vma.mappable == nil { - // Limit the range we allocate to ar, aligned to privateAllocUnit. - maskAR := privateAligned(ar) - allocAR := optAR.Intersect(maskAR) - mf := mm.mfp.MemoryFile() - fr, err := mf.Allocate(uint64(allocAR.Length()), usage.Anonymous) - if err != nil { - return pgap, err - } - mm.incPrivateRef(fr) - - if checkInvariants { - if !fr.WellFormed() || fr.Length() != uint64(allocAR.Length()) { - panic(fmt.Sprintf("Allocate(%v) returned invalid FileRange %v", allocAR.Length(), fr)) - } - } - - mm.addRSSLocked(allocAR) - mf.IncRef(fr) - - return mm.pmas.Insert(pgap, allocAR, pma{ - file: mf, - off: fr.Start, - vmaEffectivePerms: vma.effectivePerms, - vmaMaxPerms: vma.maxPerms, - private: true, - // Since we just allocated this memory and have the only reference, - // the new pma does not need copy-on-write. - }).NextGap(), nil - } - - // Other mappings get pmas by translating. Limit the required range - // to ar. - optMR := vseg.mappableRangeOf(optAR) - reqAR := optAR.Intersect(ar) - reqMR := vseg.mappableRangeOf(reqAR) - perms := vma.maxPerms - if vma.private { - perms.Write = false - } - ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms) - if checkInvariants { - if err := memmap.CheckTranslateResult(reqMR, optMR, ts, err); err != nil { - panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v): %v", vma.mappable, reqMR, optMR, err)) - } - } - - // Install a pma for each Translation. - for _, t := range ts { - // This is valid because memmap.Mappable.Translate is required to - // return Translations in increasing Translation.Source order. - addrRange := vseg.addrRangeOf(t.Source) - mm.addRSSLocked(addrRange) - pseg := mm.pmas.Insert(pgap, addrRange, pma{ - file: t.File, - off: t.Offset, - vmaEffectivePerms: vma.effectivePerms, - vmaMaxPerms: vma.maxPerms, - needCOW: vma.private, - }) - // The new pseg may have been merged with existing segments, only take a - // ref on the inserted range. - t.File.IncRef(pseg.fileRangeOf(addrRange)) - pgap = pseg.NextGap() - } - - // Even if Translate returned an error, if we got to ar.End, - // insertPMAsLocked succeeded. - if ar.End <= pgap.Start() { - return pgap, nil - } - return pgap, err -} - -// breakCopyOnWriteLocked ensures that pmas in ar are not copy-on-write. It -// returns: +// isPMACopyOnWriteLocked returns true if the contents of the pma represented +// by pseg must be copied to a new private pma to be written to. // -// - An iterator to the gap after the last non-COW pma containing an address in -// ar. If non-COW pmas exist for no addresses in ar, the iterator is to a gap -// that begins before ar.Start. +// If the pma is a copy-on-write private pma, and holds the only reference on +// the memory it maps, isPMACopyOnWriteLocked will take ownership of the memory +// and update the pma to indicate that it does not require copy-on-write. // -// - A boolean that is true if iterators into mm.pmas may have been -// invalidated. -// -// - An error that is non-nil if non-COW pmas exist for only a subset of ar. -// -// Preconditions: mm.activeMu must be locked for writing. ar.Length() != 0. ar -// must be page-aligned. pseg.Range().Contains(ar.Start). pmas must exist for -// all addresses in ar. -func (mm *MemoryManager) breakCopyOnWriteLocked(pseg pmaIterator, ar usermem.AddrRange) (pmaGapIterator, bool, error) { - if checkInvariants { - if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() { - panic(fmt.Sprintf("invalid ar: %v", ar)) - } - if !pseg.Range().Contains(ar.Start) { - panic(fmt.Sprintf("initial pma %v does not cover start of ar %v", pseg.Range(), ar)) - } - } - - // Limit the range we copy to ar, aligned to privateAllocUnit. - maskAR := privateAligned(ar) - var invalidatedIterators, didUnmapAS bool - mf := mm.mfp.MemoryFile() - for { - if mm.isPMACopyOnWriteLocked(pseg) { - // Determine the range to copy. - copyAR := pseg.Range().Intersect(maskAR) - - // Get internal mappings from the pma to copy from. - if err := pseg.getInternalMappingsLocked(); err != nil { - return pseg.PrevGap(), invalidatedIterators, err - } - - // Copy contents. - fr, err := mf.AllocateAndFill(uint64(copyAR.Length()), usage.Anonymous, &safemem.BlockSeqReader{mm.internalMappingsLocked(pseg, copyAR)}) - if _, ok := err.(safecopy.BusError); ok { - // If we got SIGBUS during the copy, deliver SIGBUS to - // userspace (instead of SIGSEGV) if we're breaking - // copy-on-write due to application page fault. - err = &memmap.BusError{err} - } - if fr.Length() == 0 { - return pseg.PrevGap(), invalidatedIterators, err - } - mm.incPrivateRef(fr) - mf.IncRef(fr) - - // Unmap all of maskAR, not just copyAR, to minimize host syscalls. - // AddressSpace mappings must be removed before mm.decPrivateRef(). - if !didUnmapAS { - mm.unmapASLocked(maskAR) - didUnmapAS = true - } - - // Replace the pma with a copy in the part of the address range - // where copying was successful. - copyAR.End = copyAR.Start + usermem.Addr(fr.Length()) - if copyAR != pseg.Range() { - pseg = mm.pmas.Isolate(pseg, copyAR) - invalidatedIterators = true - } - pma := pseg.ValuePtr() - if pma.private { - mm.decPrivateRef(pseg.fileRange()) - } - pma.file.DecRef(pseg.fileRange()) - - pma.file = mf - pma.off = fr.Start - pma.private = true - pma.needCOW = false - pma.internalMappings = safemem.BlockSeq{} - - // Try to merge pma with its neighbors. - if prev := pseg.PrevSegment(); prev.Ok() { - if merged := mm.pmas.Merge(prev, pseg); merged.Ok() { - pseg = merged - invalidatedIterators = true - } - } - if next := pseg.NextSegment(); next.Ok() { - if merged := mm.pmas.Merge(pseg, next); merged.Ok() { - pseg = merged - invalidatedIterators = true - } - } - - // If an error occurred after ar.End, breakCopyOnWriteLocked still - // did its job, so discard the error. - if err != nil && pseg.End() < ar.End { - return pseg.NextGap(), invalidatedIterators, err - } - } - // This checks against ar.End, not maskAR.End, so we will never break - // COW on a pma that does not intersect ar. - if ar.End <= pseg.End() { - return pseg.NextGap(), invalidatedIterators, nil - } - pseg = pseg.NextSegment() - } -} - -// Preconditions: mm.activeMu must be locked for writing. -func (mm *MemoryManager) isPMACopyOnWriteLocked(pseg pmaIterator) bool { +// Preconditions: vseg.Range().IsSupersetOf(pseg.Range()). mm.mappingMu must be +// locked. mm.activeMu must be locked for writing. +func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterator) bool { pma := pseg.ValuePtr() if !pma.needCOW { return false @@ -526,6 +524,10 @@ func (mm *MemoryManager) isPMACopyOnWriteLocked(pseg pmaIterator) bool { rseg := mm.privateRefs.refs.FindSegment(fr.Start) if rseg.Ok() && rseg.Value() == 1 && fr.End <= rseg.End() { pma.needCOW = false + // pma.private => pma.translatePerms == usermem.AnyAccess + vma := vseg.ValuePtr() + pma.effectivePerms = vma.effectivePerms + pma.maxPerms = vma.maxPerms return false } return true @@ -617,9 +619,7 @@ func (mm *MemoryManager) Pin(ctx context.Context, ar usermem.AddrRange, at userm // Ensure that we have usable pmas. mm.activeMu.Lock() - pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{ - breakCOW: at.Write, - }) + pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, at) mm.mappingMu.RUnlock() if pendaddr := pend.Start(); pendaddr < ar.End { if pendaddr <= ar.Start { @@ -925,8 +925,9 @@ func (pmaSetFunctions) ClearValue(pma *pma) { func (pmaSetFunctions) Merge(ar1 usermem.AddrRange, pma1 pma, ar2 usermem.AddrRange, pma2 pma) (pma, bool) { if pma1.file != pma2.file || pma1.off+uint64(ar1.Length()) != pma2.off || - pma1.vmaEffectivePerms != pma2.vmaEffectivePerms || - pma1.vmaMaxPerms != pma2.vmaMaxPerms || + pma1.translatePerms != pma2.translatePerms || + pma1.effectivePerms != pma2.effectivePerms || + pma1.maxPerms != pma2.maxPerms || pma1.needCOW != pma2.needCOW || pma1.private != pma2.private { return pma{}, false @@ -979,20 +980,13 @@ func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr usermem.Addr, pgap pma func (pseg pmaIterator) getInternalMappingsLocked() error { pma := pseg.ValuePtr() if pma.internalMappings.IsEmpty() { - // Internal mappings are used for ignorePermissions accesses, - // so we need to use vma.maxPerms instead of - // vma.effectivePerms. However, we will never execute - // application code through an internal mapping, and we don't - // actually need a writable mapping if copy-on-write is in - // effect. (But get a writable mapping anyway if the pma is - // private, so that if breakCopyOnWriteLocked => - // isPMACopyOnWriteLocked takes ownership of the pma instead of - // copying, it doesn't need to get a new mapping.) - perms := pma.vmaMaxPerms + // This must use maxPerms (instead of perms) because some permission + // constraints are only visible to vmas; for example, mappings of + // read-only files have vma.maxPerms.Write unset, but this may not be + // visible to the memmap.Mappable. + perms := pma.maxPerms + // We will never execute application code through an internal mapping. perms.Execute = false - if pma.needCOW && !pma.private { - perms.Write = false - } ims, err := pma.file.MapInternal(pseg.fileRange(), perms) if err != nil { return err diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go index aa94d7d6a..cfbf7a104 100644 --- a/pkg/sentry/mm/special_mappable.go +++ b/pkg/sentry/mm/special_mappable.go @@ -102,6 +102,7 @@ func (m *SpecialMappable) Translate(ctx context.Context, required, optional memm Source: source, File: m.mfp.MemoryFile(), Offset: m.fr.Start + source.Start, + Perms: usermem.AnyAccess, }, }, err } diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go index b56e0d3b9..3725c98aa 100644 --- a/pkg/sentry/mm/syscalls.go +++ b/pkg/sentry/mm/syscalls.go @@ -54,9 +54,7 @@ func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr usermem.Addr, // Ensure that we have a usable pma. mm.activeMu.Lock() - pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{ - breakCOW: at.Write, - }) + pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, at) mm.mappingMu.RUnlock() if err != nil { mm.activeMu.Unlock() @@ -186,7 +184,7 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar u } // Ensure that we have usable pmas. - pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{}) + pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, usermem.NoAccess) if err != nil { // mm/util.c:vm_mmap_pgoff() ignores the error, if any, from // mm/gup.c:mm_populate(). If it matters, we'll get it again when @@ -231,7 +229,7 @@ func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaItera // mm.mappingMu doesn't need to be write-locked for getPMAsLocked, and it // isn't needed at all for mapASLocked. mm.mappingMu.DowngradeLock() - pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, pmaOpts{}) + pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, usermem.NoAccess) mm.mappingMu.RUnlock() if err != nil { mm.activeMu.Unlock() @@ -651,13 +649,17 @@ func (mm *MemoryManager) MProtect(addr usermem.Addr, length uint64, realPerms us for pseg.Ok() && pseg.Start() < vseg.End() { if pseg.Range().Overlaps(vseg.Range()) { pseg = mm.pmas.Isolate(pseg, vseg.Range()) - if !effectivePerms.SupersetOf(pseg.ValuePtr().vmaEffectivePerms) && !didUnmapAS { + pma := pseg.ValuePtr() + if !effectivePerms.SupersetOf(pma.effectivePerms) && !didUnmapAS { // Unmap all of ar, not just vseg.Range(), to minimize host // syscalls. mm.unmapASLocked(ar) didUnmapAS = true } - pseg.ValuePtr().vmaEffectivePerms = effectivePerms + pma.effectivePerms = effectivePerms.Intersect(pma.translatePerms) + if pma.needCOW { + pma.effectivePerms.Write = false + } } pseg = pseg.NextSegment() } @@ -828,7 +830,7 @@ func (mm *MemoryManager) MLock(ctx context.Context, addr usermem.Addr, length ui mm.mappingMu.RUnlock() return syserror.ENOMEM } - _, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), pmaOpts{}) + _, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), usermem.NoAccess) if err != nil { mm.activeMu.Unlock() mm.mappingMu.RUnlock() @@ -923,7 +925,7 @@ func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error mm.mappingMu.DowngradeLock() for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() { if vseg.ValuePtr().effectivePerms.Any() { - mm.getPMAsLocked(ctx, vseg, vseg.Range(), pmaOpts{}) + mm.getPMAsLocked(ctx, vseg, vseg.Range(), usermem.NoAccess) } } @@ -981,7 +983,7 @@ func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error { } for pseg.Ok() && pseg.Start() < vsegAR.End { pma := pseg.ValuePtr() - if pma.private && !mm.isPMACopyOnWriteLocked(pseg) { + if pma.private && !mm.isPMACopyOnWriteLocked(vseg, pseg) { psegAR := pseg.Range().Intersect(ar) if vsegAR.IsSupersetOf(psegAR) && vma.mappable == nil { if err := mf.Decommit(pseg.fileRangeOf(psegAR)); err == nil { |