// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package mm import ( "fmt" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" "gvisor.dev/gvisor/pkg/safecopy" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/usage" ) // existingPMAsLocked checks that pmas exist for all addresses in ar, and // support access of type (at, ignorePermissions). If so, it returns an // iterator to the pma containing ar.Start. Otherwise it returns a terminal // iterator. // // Preconditions: // * mm.activeMu must be locked. // * ar.Length() != 0. func (mm *MemoryManager) existingPMAsLocked(ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) } } first := mm.pmas.FindSegment(ar.Start) pseg := first for pseg.Ok() { pma := pseg.ValuePtr() perms := pma.effectivePerms if ignorePermissions { perms = pma.maxPerms } if !perms.SupersetOf(at) { return pmaIterator{} } if needInternalMappings && pma.internalMappings.IsEmpty() { return pmaIterator{} } if ar.End <= pseg.End() { return first } pseg, _ = pseg.NextNonEmpty() } // Ran out of pmas before reaching ar.End. return pmaIterator{} } // existingVecPMAsLocked returns true if pmas exist for all addresses in ars, // and support access of type (at, ignorePermissions). // // Preconditions: mm.activeMu must be locked. func (mm *MemoryManager) existingVecPMAsLocked(ars hostarch.AddrRangeSeq, at hostarch.AccessType, ignorePermissions bool, needInternalMappings bool) bool { for ; !ars.IsEmpty(); ars = ars.Tail() { if ar := ars.Head(); ar.Length() != 0 && !mm.existingPMAsLocked(ar, at, ignorePermissions, needInternalMappings).Ok() { return false } } return true } // getPMAsLocked ensures that pmas exist for all addresses in ar, and support // access of type at. It returns: // // - An iterator to the pma containing ar.Start. If no pma contains ar.Start, // the iterator is unspecified. // // - An iterator to the gap after the last pma containing an address in ar. If // pmas exist for no addresses in ar, the iterator is to a gap that begins // before ar.Start. // // - An error that is non-nil if pmas exist for only a subset of ar. // // Preconditions: // * mm.mappingMu must be locked. // * mm.activeMu must be locked for writing. // * ar.Length() != 0. // * vseg.Range().Contains(ar.Start). // * vmas must exist for all addresses in ar, and support accesses of type at // (i.e. permission checks must have been performed against vmas). func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) } if !vseg.Ok() { panic("terminal vma iterator") } if !vseg.Range().Contains(ar.Start) { panic(fmt.Sprintf("initial vma %v does not cover start of ar %v", vseg.Range(), ar)) } } // Page-align ar so that all AddrRanges are aligned. end, ok := ar.End.RoundUp() var alignerr error if !ok { end = ar.End.RoundDown() alignerr = linuxerr.EFAULT } ar = hostarch.AddrRange{ar.Start.RoundDown(), end} pstart, pend, perr := mm.getPMAsInternalLocked(ctx, vseg, ar, at) if pend.Start() <= ar.Start { return pmaIterator{}, pend, perr } // getPMAsInternalLocked may not have returned pstart due to iterator // invalidation. if !pstart.Ok() { pstart = mm.findOrSeekPrevUpperBoundPMA(ar.Start, pend) } if perr != nil { return pstart, pend, perr } return pstart, pend, alignerr } // getVecPMAsLocked ensures that pmas exist for all addresses in ars, and // support access of type at. It returns the subset of ars for which pmas // exist. If this is not equal to ars, it returns a non-nil error explaining // why. // // Preconditions: // * mm.mappingMu must be locked. // * mm.activeMu must be locked for writing. // * vmas must exist for all addresses in ars, and support accesses of type at // (i.e. permission checks must have been performed against vmas). func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars hostarch.AddrRangeSeq, at hostarch.AccessType) (hostarch.AddrRangeSeq, error) { for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() { ar := arsit.Head() if ar.Length() == 0 { continue } if checkInvariants { if !ar.WellFormed() { panic(fmt.Sprintf("invalid ar: %v", ar)) } } // Page-align ar so that all AddrRanges are aligned. end, ok := ar.End.RoundUp() var alignerr error if !ok { end = ar.End.RoundDown() alignerr = linuxerr.EFAULT } ar = hostarch.AddrRange{ar.Start.RoundDown(), end} _, pend, perr := mm.getPMAsInternalLocked(ctx, mm.vmas.FindSegment(ar.Start), ar, at) if perr != nil { return truncatedAddrRangeSeq(ars, arsit, pend.Start()), perr } if alignerr != nil { return truncatedAddrRangeSeq(ars, arsit, pend.Start()), alignerr } } return ars, nil } // getPMAsInternalLocked is equivalent to getPMAsLocked, with the following // exceptions: // // - getPMAsInternalLocked returns a pmaIterator on a best-effort basis (that // is, the returned iterator may be terminal, even if a pma that contains // ar.Start exists). Returning this iterator on a best-effort basis allows // callers that require it to use it when it's cheaply available, while also // avoiding the overhead of retrieving it when it's not. // // - getPMAsInternalLocked additionally requires that ar is page-aligned. // // getPMAsInternalLocked is an implementation helper for getPMAsLocked and // getVecPMAsLocked; other clients should call one of those instead. func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, at hostarch.AccessType) (pmaIterator, pmaGapIterator, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) } if !vseg.Ok() { panic("terminal vma iterator") } if !vseg.Range().Contains(ar.Start) { panic(fmt.Sprintf("initial vma %v does not cover start of ar %v", vseg.Range(), ar)) } } mf := mm.mfp.MemoryFile() // Limit the range we allocate to ar, aligned to privateAllocUnit. maskAR := privateAligned(ar) didUnmapAS := false // The range in which we iterate vmas and pmas is still limited to ar, to // ensure that we don't allocate or COW-break a pma we don't need. pseg, pgap := mm.pmas.Find(ar.Start) pstart := pseg for { // Get pmas for this vma. vsegAR := vseg.Range().Intersect(ar) vma := vseg.ValuePtr() pmaLoop: for { switch { case pgap.Ok() && pgap.Start() < vsegAR.End: // Need a pma here. optAR := vseg.Range().Intersect(pgap.Range()) if checkInvariants { if optAR.Length() == 0 { panic(fmt.Sprintf("vseg %v and pgap %v do not overlap", vseg, pgap)) } } if vma.mappable == nil { // Private anonymous mappings get pmas by allocating. allocAR := optAR.Intersect(maskAR) fr, err := mf.Allocate(uint64(allocAR.Length()), usage.Anonymous) if err != nil { return pstart, pgap, err } if checkInvariants { if !fr.WellFormed() || fr.Length() != uint64(allocAR.Length()) { panic(fmt.Sprintf("Allocate(%v) returned invalid FileRange %v", allocAR.Length(), fr)) } } mm.addRSSLocked(allocAR) mm.incPrivateRef(fr) mf.IncRef(fr) pseg, pgap = mm.pmas.Insert(pgap, allocAR, pma{ file: mf, off: fr.Start, translatePerms: hostarch.AnyAccess, effectivePerms: vma.effectivePerms, maxPerms: vma.maxPerms, // Since we just allocated this memory and have the // only reference, the new pma does not need // copy-on-write. private: true, }).NextNonEmpty() pstart = pmaIterator{} // iterators invalidated } else { // Other mappings get pmas by translating. optMR := vseg.mappableRangeOf(optAR) reqAR := optAR.Intersect(ar) reqMR := vseg.mappableRangeOf(reqAR) perms := at if vma.private { // This pma will be copy-on-write; don't require write // permission, but do require read permission to // facilitate the copy. // // If at.Write is true, we will need to break // copy-on-write immediately, which occurs after // translation below. perms.Read = true perms.Write = false } ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms) if checkInvariants { if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil { panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err)) } } // Install a pma for each translation. if len(ts) == 0 { return pstart, pgap, err } pstart = pmaIterator{} // iterators invalidated for _, t := range ts { newpmaAR := vseg.addrRangeOf(t.Source) newpma := pma{ file: t.File, off: t.Offset, translatePerms: t.Perms, effectivePerms: vma.effectivePerms.Intersect(t.Perms), maxPerms: vma.maxPerms.Intersect(t.Perms), } if vma.private { newpma.effectivePerms.Write = false newpma.maxPerms.Write = false newpma.needCOW = true } mm.addRSSLocked(newpmaAR) t.File.IncRef(t.FileRange()) // This is valid because memmap.Mappable.Translate is // required to return Translations in increasing // Translation.Source order. pseg = mm.pmas.Insert(pgap, newpmaAR, newpma) pgap = pseg.NextGap() } // The error returned by Translate is only significant if // it occurred before ar.End. if err != nil && vseg.addrRangeOf(ts[len(ts)-1].Source).End < ar.End { return pstart, pgap, err } // Rewind pseg to the first pma inserted and continue the // loop to check if we need to break copy-on-write. pseg, pgap = mm.findOrSeekPrevUpperBoundPMA(vseg.addrRangeOf(ts[0].Source).Start, pgap), pmaGapIterator{} continue } case pseg.Ok() && pseg.Start() < vsegAR.End: oldpma := pseg.ValuePtr() if at.Write && mm.isPMACopyOnWriteLocked(vseg, pseg) { // Break copy-on-write by copying. if checkInvariants { if !oldpma.maxPerms.Read { panic(fmt.Sprintf("pma %v needs to be copied for writing, but is not readable: %v", pseg.Range(), oldpma)) } } // The majority of copy-on-write breaks on executable pages // come from: // // - The ELF loader, which must zero out bytes on the last // page of each segment after the end of the segment. // // - gdb's use of ptrace to insert breakpoints. // // Neither of these cases has enough spatial locality to // benefit from copying nearby pages, so if the vma is // executable, only copy the pages required. var copyAR hostarch.AddrRange if vseg.ValuePtr().effectivePerms.Execute { copyAR = pseg.Range().Intersect(ar) } else { copyAR = pseg.Range().Intersect(maskAR) } // Get internal mappings from the pma to copy from. if err := pseg.getInternalMappingsLocked(); err != nil { return pstart, pseg.PrevGap(), err } // Copy contents. fr, err := mf.AllocateAndFill(uint64(copyAR.Length()), usage.Anonymous, &safemem.BlockSeqReader{mm.internalMappingsLocked(pseg, copyAR)}) if _, ok := err.(safecopy.BusError); ok { // If we got SIGBUS during the copy, deliver SIGBUS to // userspace (instead of SIGSEGV) if we're breaking // copy-on-write due to application page fault. err = &memmap.BusError{err} } if fr.Length() == 0 { return pstart, pseg.PrevGap(), err } // Unmap all of maskAR, not just copyAR, to minimize host // syscalls. AddressSpace mappings must be removed before // mm.decPrivateRef(). if !didUnmapAS { mm.unmapASLocked(maskAR) didUnmapAS = true } // Replace the pma with a copy in the part of the address // range where copying was successful. This doesn't change // RSS. copyAR.End = copyAR.Start + hostarch.Addr(fr.Length()) if copyAR != pseg.Range() { pseg = mm.pmas.Isolate(pseg, copyAR) pstart = pmaIterator{} // iterators invalidated } oldpma = pseg.ValuePtr() if oldpma.private { mm.decPrivateRef(pseg.fileRange()) } oldpma.file.DecRef(pseg.fileRange()) mm.incPrivateRef(fr) mf.IncRef(fr) oldpma.file = mf oldpma.off = fr.Start oldpma.translatePerms = hostarch.AnyAccess oldpma.effectivePerms = vma.effectivePerms oldpma.maxPerms = vma.maxPerms oldpma.needCOW = false oldpma.private = true oldpma.internalMappings = safemem.BlockSeq{} // Try to merge the pma with its neighbors. if prev := pseg.PrevSegment(); prev.Ok() { if merged := mm.pmas.Merge(prev, pseg); merged.Ok() { pseg = merged pstart = pmaIterator{} // iterators invalidated } } if next := pseg.NextSegment(); next.Ok() { if merged := mm.pmas.Merge(pseg, next); merged.Ok() { pseg = merged pstart = pmaIterator{} // iterators invalidated } } // The error returned by AllocateAndFill is only // significant if it occurred before ar.End. if err != nil && pseg.End() < ar.End { return pstart, pseg.NextGap(), err } // Ensure pseg and pgap are correct for the next iteration // of the loop. pseg, pgap = pseg.NextNonEmpty() } else if !oldpma.translatePerms.SupersetOf(at) { // Get new pmas (with sufficient permissions) by calling // memmap.Mappable.Translate again. if checkInvariants { if oldpma.private { panic(fmt.Sprintf("private pma %v has non-maximal pma.translatePerms: %v", pseg.Range(), oldpma)) } } // Allow the entire pma to be replaced. optAR := pseg.Range() optMR := vseg.mappableRangeOf(optAR) reqAR := optAR.Intersect(ar) reqMR := vseg.mappableRangeOf(reqAR) perms := oldpma.translatePerms.Union(at) ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms) if checkInvariants { if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil { panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err)) } } // Remove the part of the existing pma covered by new // Translations, then insert new pmas. This doesn't change // RSS. Note that we don't need to call unmapASLocked: any // existing AddressSpace mappings are still valid (though // less permissive than the new pmas indicate) until // Invalidate is called, and will be replaced by future // calls to mapASLocked. if len(ts) == 0 { return pstart, pseg.PrevGap(), err } transMR := memmap.MappableRange{ts[0].Source.Start, ts[len(ts)-1].Source.End} transAR := vseg.addrRangeOf(transMR) pseg = mm.pmas.Isolate(pseg, transAR) pseg.ValuePtr().file.DecRef(pseg.fileRange()) pgap = mm.pmas.Remove(pseg) pstart = pmaIterator{} // iterators invalidated for _, t := range ts { newpmaAR := vseg.addrRangeOf(t.Source) newpma := pma{ file: t.File, off: t.Offset, translatePerms: t.Perms, effectivePerms: vma.effectivePerms.Intersect(t.Perms), maxPerms: vma.maxPerms.Intersect(t.Perms), } if vma.private { newpma.effectivePerms.Write = false newpma.maxPerms.Write = false newpma.needCOW = true } t.File.IncRef(t.FileRange()) pseg = mm.pmas.Insert(pgap, newpmaAR, newpma) pgap = pseg.NextGap() } // The error returned by Translate is only significant if // it occurred before ar.End. if err != nil && pseg.End() < ar.End { return pstart, pgap, err } // Ensure pseg and pgap are correct for the next iteration // of the loop. if pgap.Range().Length() == 0 { pseg, pgap = pgap.NextSegment(), pmaGapIterator{} } else { pseg = pmaIterator{} } } else { // We have a usable pma; continue. pseg, pgap = pseg.NextNonEmpty() } default: break pmaLoop } } // Go to the next vma. if ar.End <= vseg.End() { if pgap.Ok() { return pstart, pgap, nil } return pstart, pseg.PrevGap(), nil } vseg = vseg.NextSegment() } } const ( // When memory is allocated for a private pma, align the allocated address // range to a privateAllocUnit boundary when possible. Larger values of // privateAllocUnit may reduce page faults by allowing fewer, larger pmas // to be mapped, but may result in larger amounts of wasted memory in the // presence of fragmentation. privateAllocUnit must be a power-of-2 // multiple of hostarch.PageSize. privateAllocUnit = hostarch.HugePageSize privateAllocMask = privateAllocUnit - 1 ) func privateAligned(ar hostarch.AddrRange) hostarch.AddrRange { aligned := hostarch.AddrRange{ar.Start &^ privateAllocMask, ar.End} if end := (ar.End + privateAllocMask) &^ privateAllocMask; end >= ar.End { aligned.End = end } if checkInvariants { if !aligned.IsSupersetOf(ar) { panic(fmt.Sprintf("aligned AddrRange %#v is not a superset of ar %#v", aligned, ar)) } } return aligned } // isPMACopyOnWriteLocked returns true if the contents of the pma represented // by pseg must be copied to a new private pma to be written to. // // If the pma is a copy-on-write private pma, and holds the only reference on // the memory it maps, isPMACopyOnWriteLocked will take ownership of the memory // and update the pma to indicate that it does not require copy-on-write. // // Preconditions: // * vseg.Range().IsSupersetOf(pseg.Range()). // * mm.mappingMu must be locked. // * mm.activeMu must be locked for writing. func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterator) bool { pma := pseg.ValuePtr() if !pma.needCOW { return false } if !pma.private { return true } // If we have the only reference on private memory to be copied, just take // ownership of it instead of copying. If we do hold the only reference, // additional references can only be taken by mm.Fork(), which is excluded // by mm.activeMu, so this isn't racy. mm.privateRefs.mu.Lock() defer mm.privateRefs.mu.Unlock() fr := pseg.fileRange() // This check relies on mm.privateRefs.refs being kept fully merged. rseg := mm.privateRefs.refs.FindSegment(fr.Start) if rseg.Ok() && rseg.Value() == 1 && fr.End <= rseg.End() { pma.needCOW = false // pma.private => pma.translatePerms == hostarch.AnyAccess vma := vseg.ValuePtr() pma.effectivePerms = vma.effectivePerms pma.maxPerms = vma.maxPerms return false } return true } // Invalidate implements memmap.MappingSpace.Invalidate. func (mm *MemoryManager) Invalidate(ar hostarch.AddrRange, opts memmap.InvalidateOpts) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) } } mm.activeMu.Lock() defer mm.activeMu.Unlock() if mm.captureInvalidations { mm.capturedInvalidations = append(mm.capturedInvalidations, invalidateArgs{ar, opts}) return } mm.invalidateLocked(ar, opts.InvalidatePrivate, true) } // invalidateLocked removes pmas and AddressSpace mappings of those pmas for // addresses in ar. // // Preconditions: // * mm.activeMu must be locked for writing. // * ar.Length() != 0. // * ar must be page-aligned. func (mm *MemoryManager) invalidateLocked(ar hostarch.AddrRange, invalidatePrivate, invalidateShared bool) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) } } var didUnmapAS bool pseg := mm.pmas.LowerBoundSegment(ar.Start) for pseg.Ok() && pseg.Start() < ar.End { pma := pseg.ValuePtr() if (invalidatePrivate && pma.private) || (invalidateShared && !pma.private) { pseg = mm.pmas.Isolate(pseg, ar) pma = pseg.ValuePtr() if !didUnmapAS { // Unmap all of ar, not just pseg.Range(), to minimize host // syscalls. AddressSpace mappings must be removed before // mm.decPrivateRef(). mm.unmapASLocked(ar) didUnmapAS = true } if pma.private { mm.decPrivateRef(pseg.fileRange()) } mm.removeRSSLocked(pseg.Range()) pma.file.DecRef(pseg.fileRange()) pseg = mm.pmas.Remove(pseg).NextSegment() } else { pseg = pseg.NextSegment() } } } // Pin returns the memmap.File ranges currently mapped by addresses in ar in // mm, acquiring a reference on the returned ranges which the caller must // release by calling Unpin. If not all addresses are mapped, Pin returns a // non-nil error. Note that Pin may return both a non-empty slice of // PinnedRanges and a non-nil error. // // Pin does not prevent mapped ranges from changing, making it unsuitable for // most I/O. It should only be used in contexts that would use get_user_pages() // in the Linux kernel. // // Preconditions: // * ar.Length() != 0. // * ar must be page-aligned. func (mm *MemoryManager) Pin(ctx context.Context, ar hostarch.AddrRange, at hostarch.AccessType, ignorePermissions bool) ([]PinnedRange, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 || !ar.IsPageAligned() { panic(fmt.Sprintf("invalid ar: %v", ar)) } } // Ensure that we have usable vmas. mm.mappingMu.RLock() vseg, vend, verr := mm.getVMAsLocked(ctx, ar, at, ignorePermissions) if vendaddr := vend.Start(); vendaddr < ar.End { if vendaddr <= ar.Start { mm.mappingMu.RUnlock() return nil, verr } ar.End = vendaddr } // Ensure that we have usable pmas. mm.activeMu.Lock() pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, at) mm.mappingMu.RUnlock() if pendaddr := pend.Start(); pendaddr < ar.End { if pendaddr <= ar.Start { mm.activeMu.Unlock() return nil, perr } ar.End = pendaddr } // Gather pmas. var prs []PinnedRange for pseg.Ok() && pseg.Start() < ar.End { psar := pseg.Range().Intersect(ar) f := pseg.ValuePtr().file fr := pseg.fileRangeOf(psar) f.IncRef(fr) prs = append(prs, PinnedRange{ Source: psar, File: f, Offset: fr.Start, }) pseg = pseg.NextSegment() } mm.activeMu.Unlock() // Return the first error in order of progress through ar. if perr != nil { return prs, perr } return prs, verr } // PinnedRanges are returned by MemoryManager.Pin. type PinnedRange struct { // Source is the corresponding range of addresses. Source hostarch.AddrRange // File is the mapped file. File memmap.File // Offset is the offset into File at which this PinnedRange begins. Offset uint64 } // FileRange returns the memmap.File offsets mapped by pr. func (pr PinnedRange) FileRange() memmap.FileRange { return memmap.FileRange{pr.Offset, pr.Offset + uint64(pr.Source.Length())} } // Unpin releases the reference held by prs. func Unpin(prs []PinnedRange) { for i := range prs { prs[i].File.DecRef(prs[i].FileRange()) } } // movePMAsLocked moves all pmas in oldAR to newAR. // // Preconditions: // * mm.activeMu must be locked for writing. // * oldAR.Length() != 0. // * oldAR.Length() <= newAR.Length(). // * !oldAR.Overlaps(newAR). // * mm.pmas.IsEmptyRange(newAR). // * oldAR and newAR must be page-aligned. func (mm *MemoryManager) movePMAsLocked(oldAR, newAR hostarch.AddrRange) { if checkInvariants { if !oldAR.WellFormed() || oldAR.Length() == 0 || !oldAR.IsPageAligned() { panic(fmt.Sprintf("invalid oldAR: %v", oldAR)) } if !newAR.WellFormed() || newAR.Length() == 0 || !newAR.IsPageAligned() { panic(fmt.Sprintf("invalid newAR: %v", newAR)) } if oldAR.Length() > newAR.Length() { panic(fmt.Sprintf("old address range %v may contain pmas that will not fit in new address range %v", oldAR, newAR)) } if oldAR.Overlaps(newAR) { panic(fmt.Sprintf("old and new address ranges overlap: %v, %v", oldAR, newAR)) } // mm.pmas.IsEmptyRange is checked by mm.pmas.Insert. } type movedPMA struct { oldAR hostarch.AddrRange pma pma } var movedPMAs []movedPMA pseg := mm.pmas.LowerBoundSegment(oldAR.Start) for pseg.Ok() && pseg.Start() < oldAR.End { pseg = mm.pmas.Isolate(pseg, oldAR) movedPMAs = append(movedPMAs, movedPMA{ oldAR: pseg.Range(), pma: pseg.Value(), }) pseg = mm.pmas.Remove(pseg).NextSegment() // No RSS change is needed since we're re-inserting the same pmas // below. } off := newAR.Start - oldAR.Start pgap := mm.pmas.FindGap(newAR.Start) for i := range movedPMAs { mpma := &movedPMAs[i] pmaNewAR := hostarch.AddrRange{mpma.oldAR.Start + off, mpma.oldAR.End + off} pgap = mm.pmas.Insert(pgap, pmaNewAR, mpma.pma).NextGap() } mm.unmapASLocked(oldAR) } // getPMAInternalMappingsLocked ensures that pmas for all addresses in ar have // cached internal mappings. It returns: // // - An iterator to the gap after the last pma with internal mappings // containing an address in ar. If internal mappings exist for no addresses in // ar, the iterator is to a gap that begins before ar.Start. // // - An error that is non-nil if internal mappings exist for only a subset of // ar. // // Preconditions: // * mm.activeMu must be locked for writing. // * pseg.Range().Contains(ar.Start). // * pmas must exist for all addresses in ar. // * ar.Length() != 0. // // Postconditions: getPMAInternalMappingsLocked does not invalidate iterators // into mm.pmas. func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) (pmaGapIterator, error) { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) } if !pseg.Range().Contains(ar.Start) { panic(fmt.Sprintf("initial pma %v does not cover start of ar %v", pseg.Range(), ar)) } } for { if err := pseg.getInternalMappingsLocked(); err != nil { return pseg.PrevGap(), err } if ar.End <= pseg.End() { return pseg.NextGap(), nil } pseg, _ = pseg.NextNonEmpty() } } // getVecPMAInternalMappingsLocked ensures that pmas for all addresses in ars // have cached internal mappings. It returns the subset of ars for which // internal mappings exist. If this is not equal to ars, it returns a non-nil // error explaining why. // // Preconditions: // * mm.activeMu must be locked for writing. // * pmas must exist for all addresses in ar. // // Postconditions: getVecPMAInternalMappingsLocked does not invalidate iterators // into mm.pmas. func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars hostarch.AddrRangeSeq) (hostarch.AddrRangeSeq, error) { for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() { ar := arsit.Head() if ar.Length() == 0 { continue } if pend, err := mm.getPMAInternalMappingsLocked(mm.pmas.FindSegment(ar.Start), ar); err != nil { return truncatedAddrRangeSeq(ars, arsit, pend.Start()), err } } return ars, nil } // internalMappingsLocked returns internal mappings for addresses in ar. // // Preconditions: // * mm.activeMu must be locked. // * Internal mappings must have been previously established for all addresses // in ar. // * ar.Length() != 0. // * pseg.Range().Contains(ar.Start). func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar hostarch.AddrRange) safemem.BlockSeq { if checkInvariants { if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) } if !pseg.Range().Contains(ar.Start) { panic(fmt.Sprintf("initial pma %v does not cover start of ar %v", pseg.Range(), ar)) } } if ar.End <= pseg.End() { // Since only one pma is involved, we can use pma.internalMappings // directly, avoiding a slice allocation. offset := uint64(ar.Start - pseg.Start()) return pseg.ValuePtr().internalMappings.DropFirst64(offset).TakeFirst64(uint64(ar.Length())) } var ims []safemem.Block for { pr := pseg.Range().Intersect(ar) for pims := pseg.ValuePtr().internalMappings.DropFirst64(uint64(pr.Start - pseg.Start())).TakeFirst64(uint64(pr.Length())); !pims.IsEmpty(); pims = pims.Tail() { ims = append(ims, pims.Head()) } if ar.End <= pseg.End() { break } pseg = pseg.NextSegment() } return safemem.BlockSeqFromSlice(ims) } // vecInternalMappingsLocked returns internal mappings for addresses in ars. // // Preconditions: // * mm.activeMu must be locked. // * Internal mappings must have been previously established for all addresses // in ars. func (mm *MemoryManager) vecInternalMappingsLocked(ars hostarch.AddrRangeSeq) safemem.BlockSeq { var ims []safemem.Block for ; !ars.IsEmpty(); ars = ars.Tail() { ar := ars.Head() if ar.Length() == 0 { continue } for pims := mm.internalMappingsLocked(mm.pmas.FindSegment(ar.Start), ar); !pims.IsEmpty(); pims = pims.Tail() { ims = append(ims, pims.Head()) } } return safemem.BlockSeqFromSlice(ims) } // incPrivateRef acquires a reference on private pages in fr. func (mm *MemoryManager) incPrivateRef(fr memmap.FileRange) { mm.privateRefs.mu.Lock() defer mm.privateRefs.mu.Unlock() refSet := &mm.privateRefs.refs seg, gap := refSet.Find(fr.Start) for { switch { case seg.Ok() && seg.Start() < fr.End: seg = refSet.Isolate(seg, fr) seg.SetValue(seg.Value() + 1) seg, gap = seg.NextNonEmpty() case gap.Ok() && gap.Start() < fr.End: seg, gap = refSet.InsertWithoutMerging(gap, gap.Range().Intersect(fr), 1).NextNonEmpty() default: refSet.MergeAdjacent(fr) return } } } // decPrivateRef releases a reference on private pages in fr. func (mm *MemoryManager) decPrivateRef(fr memmap.FileRange) { var freed []memmap.FileRange mm.privateRefs.mu.Lock() refSet := &mm.privateRefs.refs seg := refSet.LowerBoundSegment(fr.Start) for seg.Ok() && seg.Start() < fr.End { seg = refSet.Isolate(seg, fr) if old := seg.Value(); old == 1 { freed = append(freed, seg.Range()) seg = refSet.Remove(seg).NextSegment() } else { seg.SetValue(old - 1) seg = seg.NextSegment() } } refSet.MergeAdjacent(fr) mm.privateRefs.mu.Unlock() mf := mm.mfp.MemoryFile() for _, fr := range freed { mf.DecRef(fr) } } // addRSSLocked updates the current and maximum resident set size of a // MemoryManager to reflect the insertion of a pma at ar. // // Preconditions: mm.activeMu must be locked for writing. func (mm *MemoryManager) addRSSLocked(ar hostarch.AddrRange) { mm.curRSS += uint64(ar.Length()) if mm.curRSS > mm.maxRSS { mm.maxRSS = mm.curRSS } } // removeRSSLocked updates the current resident set size of a MemoryManager to // reflect the removal of a pma at ar. // // Preconditions: mm.activeMu must be locked for writing. func (mm *MemoryManager) removeRSSLocked(ar hostarch.AddrRange) { mm.curRSS -= uint64(ar.Length()) } // pmaSetFunctions implements segment.Functions for pmaSet. type pmaSetFunctions struct{} func (pmaSetFunctions) MinKey() hostarch.Addr { return 0 } func (pmaSetFunctions) MaxKey() hostarch.Addr { return ^hostarch.Addr(0) } func (pmaSetFunctions) ClearValue(pma *pma) { pma.file = nil pma.internalMappings = safemem.BlockSeq{} } func (pmaSetFunctions) Merge(ar1 hostarch.AddrRange, pma1 pma, ar2 hostarch.AddrRange, pma2 pma) (pma, bool) { if pma1.file != pma2.file || pma1.off+uint64(ar1.Length()) != pma2.off || pma1.translatePerms != pma2.translatePerms || pma1.effectivePerms != pma2.effectivePerms || pma1.maxPerms != pma2.maxPerms || pma1.needCOW != pma2.needCOW || pma1.private != pma2.private { return pma{}, false } // Discard internal mappings instead of trying to merge them, since merging // them requires an allocation and getting them again from the // memmap.File might not. pma1.internalMappings = safemem.BlockSeq{} return pma1, true } func (pmaSetFunctions) Split(ar hostarch.AddrRange, p pma, split hostarch.Addr) (pma, pma) { newlen1 := uint64(split - ar.Start) p2 := p p2.off += newlen1 if !p.internalMappings.IsEmpty() { p.internalMappings = p.internalMappings.TakeFirst64(newlen1) p2.internalMappings = p2.internalMappings.DropFirst64(newlen1) } return p, p2 } // findOrSeekPrevUpperBoundPMA returns mm.pmas.UpperBoundSegment(addr), but may do // so by scanning linearly backward from pgap. // // Preconditions: // * mm.activeMu must be locked. // * addr <= pgap.Start(). func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr hostarch.Addr, pgap pmaGapIterator) pmaIterator { if checkInvariants { if !pgap.Ok() { panic("terminal pma iterator") } if addr > pgap.Start() { panic(fmt.Sprintf("can't seek backward to %#x from %#x", addr, pgap.Start())) } } // Optimistically check if pgap.PrevSegment() is the PMA we're looking for, // which is the case if findOrSeekPrevUpperBoundPMA is called to find the // start of a range containing only a single PMA. if pseg := pgap.PrevSegment(); pseg.Start() <= addr { return pseg } return mm.pmas.UpperBoundSegment(addr) } // getInternalMappingsLocked ensures that pseg.ValuePtr().internalMappings is // non-empty. // // Preconditions: mm.activeMu must be locked for writing. func (pseg pmaIterator) getInternalMappingsLocked() error { pma := pseg.ValuePtr() if pma.internalMappings.IsEmpty() { // This must use maxPerms (instead of perms) because some permission // constraints are only visible to vmas; for example, mappings of // read-only files have vma.maxPerms.Write unset, but this may not be // visible to the memmap.Mappable. perms := pma.maxPerms // We will never execute application code through an internal mapping. perms.Execute = false ims, err := pma.file.MapInternal(pseg.fileRange(), perms) if err != nil { return err } pma.internalMappings = ims } return nil } func (pseg pmaIterator) fileRange() memmap.FileRange { return pseg.fileRangeOf(pseg.Range()) } // Preconditions: // * pseg.Range().IsSupersetOf(ar). // * ar.Length != 0. func (pseg pmaIterator) fileRangeOf(ar hostarch.AddrRange) memmap.FileRange { if checkInvariants { if !pseg.Ok() { panic("terminal pma iterator") } if !ar.WellFormed() || ar.Length() == 0 { panic(fmt.Sprintf("invalid ar: %v", ar)) } if !pseg.Range().IsSupersetOf(ar) { panic(fmt.Sprintf("ar %v out of bounds %v", ar, pseg.Range())) } } pma := pseg.ValuePtr() pstart := pseg.Start() return memmap.FileRange{pma.off + uint64(ar.Start-pstart), pma.off + uint64(ar.End-pstart)} }