20 files changed, 9928 insertions, 0 deletions
diff --git a/pkg/sentry/mm/address_space.go b/pkg/sentry/mm/address_space.go
new file mode 100644
index 000000000..06f587fde
--- /dev/null
+++ b/pkg/sentry/mm/address_space.go
@@ -0,0 +1,216 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"fmt"
+	"sync/atomic"
+
+	"gvisor.googlesource.com/gvisor/pkg/atomicbitops"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// AddressSpace returns the platform.AddressSpace bound to mm.
+//
+// Preconditions: The caller must have called mm.Activate().
+func (mm *MemoryManager) AddressSpace() platform.AddressSpace {
+	if atomic.LoadInt32(&mm.active) == 0 {
+		panic("trying to use inactive address space?")
+	}
+	return mm.as
+}
+
+// Activate ensures this MemoryManager has a platform.AddressSpace.
+//
+// The caller must not hold any locks when calling Activate.
+//
+// When this MemoryManager is no longer needed by a task, it should call
+// Deactivate to release the reference.
+func (mm *MemoryManager) Activate() error {
+	// Fast path: the MemoryManager already has an active
+	// platform.AddressSpace, and we just need to indicate that we need it too.
+	if atomicbitops.IncUnlessZeroInt32(&mm.active) {
+		return nil
+	}
+
+	for {
+		// Slow path: may need to synchronize with other goroutines changing
+		// mm.active to or from zero.
+		mm.activeMu.Lock()
+		// Inline Unlock instead of using a defer for performance since this
+		// method is commonly in the hot-path.
+
+		// Check if we raced with another goroutine performing activation.
+		if atomic.LoadInt32(&mm.active) > 0 {
+			// This can't race; Deactivate can't decrease mm.active from 1 to 0
+			// without holding activeMu.
+			atomic.AddInt32(&mm.active, 1)
+			mm.activeMu.Unlock()
+			return nil
+		}
+
+		// Do we have a context? If so, then we never unmapped it. This can
+		// only be the case if !mm.p.CooperativelySchedulesAddressSpace().
+		if mm.as != nil {
+			atomic.StoreInt32(&mm.active, 1)
+			mm.activeMu.Unlock()
+			return nil
+		}
+
+		// Get a new address space. We must force unmapping by passing nil to
+		// NewAddressSpace if requested. (As in the nil interface object, not a
+		// typed nil.)
+		mappingsID := (interface{})(mm)
+		if mm.unmapAllOnActivate {
+			mappingsID = nil
+		}
+		as, c, err := mm.p.NewAddressSpace(mappingsID)
+		if err != nil {
+			mm.activeMu.Unlock()
+			return err
+		}
+		if as == nil {
+			// AddressSpace is unavailable, we must wait.
+			//
+			// activeMu must not be held while waiting, as the user
+			// of the address space we are waiting on may attempt
+			// to take activeMu.
+			//
+			// Don't call UninterruptibleSleepStart to register the
+			// wait to allow the watchdog stuck task to trigger in
+			// case a process is starved waiting for the address
+			// space.
+			mm.activeMu.Unlock()
+			<-c
+			continue
+		}
+
+		// Okay, we could restore all mappings at this point.
+		// But forget that. Let's just let them fault in.
+		mm.as = as
+
+		// Unmapping is done, if necessary.
+		mm.unmapAllOnActivate = false
+
+		// Now that m.as has been assigned, we can set m.active to a non-zero value
+		// to enable the fast path.
+		atomic.StoreInt32(&mm.active, 1)
+
+		mm.activeMu.Unlock()
+		return nil
+	}
+}
+
+// Deactivate releases a reference to the MemoryManager.
+func (mm *MemoryManager) Deactivate() {
+	// Fast path: this is not the last goroutine to deactivate the
+	// MemoryManager.
+	if atomicbitops.DecUnlessOneInt32(&mm.active) {
+		return
+	}
+
+	mm.activeMu.Lock()
+	// Same as Activate.
+
+	// Still active?
+	if atomic.AddInt32(&mm.active, -1) > 0 {
+		mm.activeMu.Unlock()
+		return
+	}
+
+	// Can we hold on to the address space?
+	if !mm.p.CooperativelySchedulesAddressSpace() {
+		mm.activeMu.Unlock()
+		return
+	}
+
+	// Release the address space.
+	mm.as.Release()
+
+	// Lost it.
+	mm.as = nil
+	mm.activeMu.Unlock()
+}
+
+// mapASLocked maps addresses in ar into mm.as. If precommit is true, mappings
+// for all addresses in ar should be precommitted.
+//
+// Preconditions: mm.activeMu must be locked. mm.as != nil. ar.Length() != 0.
+// ar must be page-aligned. pseg == mm.pmas.LowerBoundSegment(ar.Start).
+func (mm *MemoryManager) mapASLocked(pseg pmaIterator, ar usermem.AddrRange, precommit bool) error {
+	// By default, map entire pmas at a time, under the assumption that there
+	// is no cost to mapping more of a pma than necessary.
+	mapAR := usermem.AddrRange{0, ^usermem.Addr(usermem.PageSize - 1)}
+	if precommit {
+		// When explicitly precommitting, only map ar, since overmapping may
+		// incur unexpected resource usage.
+		mapAR = ar
+	} else if mapUnit := mm.p.MapUnit(); mapUnit != 0 {
+		// Limit the range we map to ar, aligned to mapUnit.
+		mapMask := usermem.Addr(mapUnit - 1)
+		mapAR.Start = ar.Start &^ mapMask
+		// If rounding ar.End up overflows, just keep the existing mapAR.End.
+		if end := (ar.End + mapMask) &^ mapMask; end >= ar.End {
+			mapAR.End = end
+		}
+	}
+	if checkInvariants {
+		if !mapAR.IsSupersetOf(ar) {
+			panic(fmt.Sprintf("mapAR %#v is not a superset of ar %#v", mapAR, ar))
+		}
+	}
+
+	// Since this checks ar.End and not mapAR.End, we will never map a pma that
+	// is not required.
+	for pseg.Ok() && pseg.Start() < ar.End {
+		pma := pseg.ValuePtr()
+		pmaAR := pseg.Range()
+		pmaMapAR := pmaAR.Intersect(mapAR)
+		perms := pma.effectivePerms
+		if pma.needCOW {
+			perms.Write = false
+		}
+		if err := mm.as.MapFile(pmaMapAR.Start, pma.file, pseg.fileRangeOf(pmaMapAR), perms, precommit); err != nil {
+			return err
+		}
+		pseg = pseg.NextSegment()
+	}
+	return nil
+}
+
+// unmapASLocked removes all AddressSpace mappings for addresses in ar.
+//
+// Preconditions: mm.activeMu must be locked.
+func (mm *MemoryManager) unmapASLocked(ar usermem.AddrRange) {
+	if mm.as == nil {
+		// No AddressSpace? Force all mappings to be unmapped on the next
+		// Activate.
+		mm.unmapAllOnActivate = true
+		return
+	}
+
+	// unmapASLocked doesn't require vmas or pmas to exist for ar, so it can be
+	// passed ranges that include addresses that can't be mapped by the
+	// application.
+	ar = ar.Intersect(mm.applicationAddrRange())
+
+	// Note that this AddressSpace may or may not be active. If the
+	// platform does not require cooperative sharing of AddressSpaces, they
+	// are retained between Deactivate/Activate calls. Despite not being
+	// active, it is still valid to perform operations on these address
+	// spaces.
+	mm.as.Unmap(ar.Start, uint64(ar.Length()))
+}
diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go
new file mode 100644
index 000000000..5c61acf36
--- /dev/null
+++ b/pkg/sentry/mm/aio_context.go
@@ -0,0 +1,387 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"sync"
+
+	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
+	"gvisor.googlesource.com/gvisor/pkg/refs"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// aioManager creates and manages asynchronous I/O contexts.
+//
+// +stateify savable
+type aioManager struct {
+	// mu protects below.
+	mu sync.Mutex `state:"nosave"`
+
+	// aioContexts is the set of asynchronous I/O contexts.
+	contexts map[uint64]*AIOContext
+}
+
+func (a *aioManager) destroy() {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+
+	for _, ctx := range a.contexts {
+		ctx.destroy()
+	}
+}
+
+// newAIOContext creates a new context for asynchronous I/O.
+//
+// Returns false if 'id' is currently in use.
+func (a *aioManager) newAIOContext(events uint32, id uint64) bool {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+
+	if _, ok := a.contexts[id]; ok {
+		return false
+	}
+
+	a.contexts[id] = &AIOContext{
+		done:           make(chan struct{}, 1),
+		maxOutstanding: events,
+	}
+	return true
+}
+
+// destroyAIOContext destroys an asynchronous I/O context.
+//
+// False is returned if the context does not exist.
+func (a *aioManager) destroyAIOContext(id uint64) bool {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	ctx, ok := a.contexts[id]
+	if !ok {
+		return false
+	}
+	delete(a.contexts, id)
+	ctx.destroy()
+	return true
+}
+
+// lookupAIOContext looks up the given context.
+//
+// Returns false if context does not exist.
+func (a *aioManager) lookupAIOContext(id uint64) (*AIOContext, bool) {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	ctx, ok := a.contexts[id]
+	return ctx, ok
+}
+
+// ioResult is a completed I/O operation.
+//
+// +stateify savable
+type ioResult struct {
+	data interface{}
+	ioEntry
+}
+
+// AIOContext is a single asynchronous I/O context.
+//
+// +stateify savable
+type AIOContext struct {
+	// done is the notification channel used for all requests.
+	done chan struct{} `state:"nosave"`
+
+	// mu protects below.
+	mu sync.Mutex `state:"nosave"`
+
+	// results is the set of completed requests.
+	results ioList
+
+	// maxOutstanding is the maximum number of outstanding entries; this value
+	// is immutable.
+	maxOutstanding uint32
+
+	// outstanding is the number of requests outstanding; this will effectively
+	// be the number of entries in the result list or that are expected to be
+	// added to the result list.
+	outstanding uint32
+
+	// dead is set when the context is destroyed.
+	dead bool `state:"zerovalue"`
+}
+
+// destroy marks the context dead.
+func (ctx *AIOContext) destroy() {
+	ctx.mu.Lock()
+	defer ctx.mu.Unlock()
+	ctx.dead = true
+	if ctx.outstanding == 0 {
+		close(ctx.done)
+	}
+}
+
+// Prepare reserves space for a new request, returning true if available.
+// Returns false if the context is busy.
+func (ctx *AIOContext) Prepare() bool {
+	ctx.mu.Lock()
+	defer ctx.mu.Unlock()
+	if ctx.outstanding >= ctx.maxOutstanding {
+		return false
+	}
+	ctx.outstanding++
+	return true
+}
+
+// PopRequest pops a completed request if available, this function does not do
+// any blocking. Returns false if no request is available.
+func (ctx *AIOContext) PopRequest() (interface{}, bool) {
+	ctx.mu.Lock()
+	defer ctx.mu.Unlock()
+
+	// Is there anything ready?
+	if e := ctx.results.Front(); e != nil {
+		ctx.results.Remove(e)
+		ctx.outstanding--
+		if ctx.outstanding == 0 && ctx.dead {
+			close(ctx.done)
+		}
+		return e.data, true
+	}
+	return nil, false
+}
+
+// FinishRequest finishes a pending request. It queues up the data
+// and notifies listeners.
+func (ctx *AIOContext) FinishRequest(data interface{}) {
+	ctx.mu.Lock()
+	defer ctx.mu.Unlock()
+
+	// Push to the list and notify opportunistically. The channel notify
+	// here is guaranteed to be safe because outstanding must be non-zero.
+	// The done channel is only closed when outstanding reaches zero.
+	ctx.results.PushBack(&ioResult{data: data})
+
+	select {
+	case ctx.done <- struct{}{}:
+	default:
+	}
+}
+
+// WaitChannel returns a channel that is notified when an AIO request is
+// completed.
+//
+// The boolean return value indicates whether or not the context is active.
+func (ctx *AIOContext) WaitChannel() (chan struct{}, bool) {
+	ctx.mu.Lock()
+	defer ctx.mu.Unlock()
+	if ctx.outstanding == 0 && ctx.dead {
+		return nil, false
+	}
+	return ctx.done, true
+}
+
+// aioMappable implements memmap.MappingIdentity and memmap.Mappable for AIO
+// ring buffers.
+//
+// +stateify savable
+type aioMappable struct {
+	refs.AtomicRefCount
+
+	mfp pgalloc.MemoryFileProvider
+	fr  platform.FileRange
+}
+
+var aioRingBufferSize = uint64(usermem.Addr(linux.AIORingSize).MustRoundUp())
+
+func newAIOMappable(mfp pgalloc.MemoryFileProvider) (*aioMappable, error) {
+	fr, err := mfp.MemoryFile().Allocate(aioRingBufferSize, usage.Anonymous)
+	if err != nil {
+		return nil, err
+	}
+	return &aioMappable{mfp: mfp, fr: fr}, nil
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+func (m *aioMappable) DecRef() {
+	m.AtomicRefCount.DecRefWithDestructor(func() {
+		m.mfp.MemoryFile().DecRef(m.fr)
+	})
+}
+
+// MappedName implements memmap.MappingIdentity.MappedName.
+func (m *aioMappable) MappedName(ctx context.Context) string {
+	return "[aio]"
+}
+
+// DeviceID implements memmap.MappingIdentity.DeviceID.
+func (m *aioMappable) DeviceID() uint64 {
+	return 0
+}
+
+// InodeID implements memmap.MappingIdentity.InodeID.
+func (m *aioMappable) InodeID() uint64 {
+	return 0
+}
+
+// Msync implements memmap.MappingIdentity.Msync.
+func (m *aioMappable) Msync(ctx context.Context, mr memmap.MappableRange) error {
+	// Linux: aio_ring_fops.fsync == NULL
+	return syserror.EINVAL
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (m *aioMappable) AddMapping(_ context.Context, _ memmap.MappingSpace, ar usermem.AddrRange, offset uint64, _ bool) error {
+	// Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap()
+	// sets VM_DONTEXPAND).
+	if offset != 0 || uint64(ar.Length()) != aioRingBufferSize {
+		return syserror.EFAULT
+	}
+	return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (m *aioMappable) RemoveMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) {
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (m *aioMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, _ bool) error {
+	// Don't allow mappings to be expanded (in Linux, fs/aio.c:aio_ring_mmap()
+	// sets VM_DONTEXPAND).
+	if offset != 0 || uint64(dstAR.Length()) != aioRingBufferSize {
+		return syserror.EFAULT
+	}
+	// Require that the mapping correspond to a live AIOContext. Compare
+	// Linux's fs/aio.c:aio_ring_mremap().
+	mm, ok := ms.(*MemoryManager)
+	if !ok {
+		return syserror.EINVAL
+	}
+	am := &mm.aioManager
+	am.mu.Lock()
+	defer am.mu.Unlock()
+	oldID := uint64(srcAR.Start)
+	aioCtx, ok := am.contexts[oldID]
+	if !ok {
+		return syserror.EINVAL
+	}
+	aioCtx.mu.Lock()
+	defer aioCtx.mu.Unlock()
+	if aioCtx.dead {
+		return syserror.EINVAL
+	}
+	// Use the new ID for the AIOContext.
+	am.contexts[uint64(dstAR.Start)] = aioCtx
+	delete(am.contexts, oldID)
+	return nil
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (m *aioMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+	var err error
+	if required.End > m.fr.Length() {
+		err = &memmap.BusError{syserror.EFAULT}
+	}
+	if source := optional.Intersect(memmap.MappableRange{0, m.fr.Length()}); source.Length() != 0 {
+		return []memmap.Translation{
+			{
+				Source: source,
+				File:   m.mfp.MemoryFile(),
+				Offset: m.fr.Start + source.Start,
+				Perms:  usermem.AnyAccess,
+			},
+		}, err
+	}
+	return nil, err
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (m *aioMappable) InvalidateUnsavable(ctx context.Context) error {
+	return nil
+}
+
+// NewAIOContext creates a new context for asynchronous I/O.
+//
+// NewAIOContext is analogous to Linux's fs/aio.c:ioctx_alloc().
+func (mm *MemoryManager) NewAIOContext(ctx context.Context, events uint32) (uint64, error) {
+	// libaio get_ioevents() expects context "handle" to be a valid address.
+	// libaio peeks inside looking for a magic number. This function allocates
+	// a page per context and keeps it set to zeroes to ensure it will not
+	// match AIO_RING_MAGIC and make libaio happy.
+	m, err := newAIOMappable(mm.mfp)
+	if err != nil {
+		return 0, err
+	}
+	defer m.DecRef()
+	addr, err := mm.MMap(ctx, memmap.MMapOpts{
+		Length:          aioRingBufferSize,
+		MappingIdentity: m,
+		Mappable:        m,
+		// TODO(fvoznika): Linux does "do_mmap_pgoff(..., PROT_READ |
+		// PROT_WRITE, ...)" in fs/aio.c:aio_setup_ring(); why do we make this
+		// mapping read-only?
+		Perms:    usermem.Read,
+		MaxPerms: usermem.Read,
+	})
+	if err != nil {
+		return 0, err
+	}
+	id := uint64(addr)
+	if !mm.aioManager.newAIOContext(events, id) {
+		mm.MUnmap(ctx, addr, aioRingBufferSize)
+		return 0, syserror.EINVAL
+	}
+	return id, nil
+}
+
+// DestroyAIOContext destroys an asynchronous I/O context. It returns false if
+// the context does not exist.
+func (mm *MemoryManager) DestroyAIOContext(ctx context.Context, id uint64) bool {
+	if _, ok := mm.LookupAIOContext(ctx, id); !ok {
+		return false
+	}
+
+	// Only unmaps after it assured that the address is a valid aio context to
+	// prevent random memory from been unmapped.
+	//
+	// Note: It's possible to unmap this address and map something else into
+	// the same address. Then it would be unmapping memory that it doesn't own.
+	// This is, however, the way Linux implements AIO. Keeps the same [weird]
+	// semantics in case anyone relies on it.
+	mm.MUnmap(ctx, usermem.Addr(id), aioRingBufferSize)
+
+	return mm.aioManager.destroyAIOContext(id)
+}
+
+// LookupAIOContext looks up the given context. It returns false if the context
+// does not exist.
+func (mm *MemoryManager) LookupAIOContext(ctx context.Context, id uint64) (*AIOContext, bool) {
+	aioCtx, ok := mm.aioManager.lookupAIOContext(id)
+	if !ok {
+		return nil, false
+	}
+
+	// Protect against 'ids' that are inaccessible (Linux also reads 4 bytes
+	// from id).
+	var buf [4]byte
+	_, err := mm.CopyIn(ctx, usermem.Addr(id), buf[:], usermem.IOOpts{})
+	if err != nil {
+		return nil, false
+	}
+
+	return aioCtx, true
+}
diff --git a/pkg/sentry/mm/aio_context_state.go b/pkg/sentry/mm/aio_context_state.go
new file mode 100644
index 000000000..c37fc9f7b
--- /dev/null
+++ b/pkg/sentry/mm/aio_context_state.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+// afterLoad is invoked by stateify.
+func (a *AIOContext) afterLoad() {
+	a.done = make(chan struct{}, 1)
+}
diff --git a/pkg/sentry/mm/debug.go b/pkg/sentry/mm/debug.go
new file mode 100644
index 000000000..fe58cfc4c
--- /dev/null
+++ b/pkg/sentry/mm/debug.go
@@ -0,0 +1,98 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+)
+
+const (
+	// If checkInvariants is true, perform runtime checks for invariants
+	// expected by the mm package. This is normally disabled since MM is a
+	// significant hot path in general, and some such checks (notably
+	// memmap.CheckTranslateResult) are very expensive.
+	checkInvariants = false
+
+	// If logIOErrors is true, log I/O errors that originate from MM before
+	// converting them to EFAULT.
+	logIOErrors = false
+)
+
+// String implements fmt.Stringer.String.
+func (mm *MemoryManager) String() string {
+	return mm.DebugString(context.Background())
+}
+
+// DebugString returns a string containing information about mm for debugging.
+func (mm *MemoryManager) DebugString(ctx context.Context) string {
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	mm.activeMu.RLock()
+	defer mm.activeMu.RUnlock()
+	return mm.debugStringLocked(ctx)
+}
+
+// Preconditions: mm.mappingMu and mm.activeMu must be locked.
+func (mm *MemoryManager) debugStringLocked(ctx context.Context) string {
+	var b bytes.Buffer
+	b.WriteString("VMAs:\n")
+	for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() {
+		b.Write(mm.vmaMapsEntryLocked(ctx, vseg))
+	}
+	b.WriteString("PMAs:\n")
+	for pseg := mm.pmas.FirstSegment(); pseg.Ok(); pseg = pseg.NextSegment() {
+		b.Write(pseg.debugStringEntryLocked())
+	}
+	return string(b.Bytes())
+}
+
+// Preconditions: mm.activeMu must be locked.
+func (pseg pmaIterator) debugStringEntryLocked() []byte {
+	var b bytes.Buffer
+
+	fmt.Fprintf(&b, "%08x-%08x ", pseg.Start(), pseg.End())
+
+	pma := pseg.ValuePtr()
+	if pma.effectivePerms.Read {
+		b.WriteByte('r')
+	} else {
+		b.WriteByte('-')
+	}
+	if pma.effectivePerms.Write {
+		if pma.needCOW {
+			b.WriteByte('c')
+		} else {
+			b.WriteByte('w')
+		}
+	} else {
+		b.WriteByte('-')
+	}
+	if pma.effectivePerms.Execute {
+		b.WriteByte('x')
+	} else {
+		b.WriteByte('-')
+	}
+	if pma.private {
+		b.WriteByte('p')
+	} else {
+		b.WriteByte('s')
+	}
+
+	fmt.Fprintf(&b, " %08x %T\n", pma.off, pma.file)
+	return b.Bytes()
+}
diff --git a/pkg/sentry/mm/file_refcount_set.go b/pkg/sentry/mm/file_refcount_set.go
new file mode 100755
index 000000000..99c088c83
--- /dev/null
+++ b/pkg/sentry/mm/file_refcount_set.go
@@ -0,0 +1,1274 @@
+package mm
+
+import (
+	__generics_imported0 "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+)
+
+import (
+	"bytes"
+	"fmt"
+)
+
+const (
+	// minDegree is the minimum degree of an internal node in a Set B-tree.
+	//
+	// - Any non-root node has at least minDegree-1 segments.
+	//
+	// - Any non-root internal (non-leaf) node has at least minDegree children.
+	//
+	// - The root node may have fewer than minDegree-1 segments, but it may
+	// only have 0 segments if the tree is empty.
+	//
+	// Our implementation requires minDegree >= 3. Higher values of minDegree
+	// usually improve performance, but increase memory usage for small sets.
+	fileRefcountminDegree = 3
+
+	fileRefcountmaxDegree = 2 * fileRefcountminDegree
+)
+
+// A Set is a mapping of segments with non-overlapping Range keys. The zero
+// value for a Set is an empty set. Set values are not safely movable nor
+// copyable. Set is thread-compatible.
+//
+// +stateify savable
+type fileRefcountSet struct {
+	root fileRefcountnode `state:".(*fileRefcountSegmentDataSlices)"`
+}
+
+// IsEmpty returns true if the set contains no segments.
+func (s *fileRefcountSet) IsEmpty() bool {
+	return s.root.nrSegments == 0
+}
+
+// IsEmptyRange returns true iff no segments in the set overlap the given
+// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be
+// more efficient.
+func (s *fileRefcountSet) IsEmptyRange(r __generics_imported0.FileRange) bool {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return true
+	}
+	_, gap := s.Find(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	return r.End <= gap.End()
+}
+
+// Span returns the total size of all segments in the set.
+func (s *fileRefcountSet) Span() uint64 {
+	var sz uint64
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sz += seg.Range().Length()
+	}
+	return sz
+}
+
+// SpanRange returns the total size of the intersection of segments in the set
+// with the given range.
+func (s *fileRefcountSet) SpanRange(r __generics_imported0.FileRange) uint64 {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return 0
+	}
+	var sz uint64
+	for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() {
+		sz += seg.Range().Intersect(r).Length()
+	}
+	return sz
+}
+
+// FirstSegment returns the first segment in the set. If the set is empty,
+// FirstSegment returns a terminal iterator.
+func (s *fileRefcountSet) FirstSegment() fileRefcountIterator {
+	if s.root.nrSegments == 0 {
+		return fileRefcountIterator{}
+	}
+	return s.root.firstSegment()
+}
+
+// LastSegment returns the last segment in the set. If the set is empty,
+// LastSegment returns a terminal iterator.
+func (s *fileRefcountSet) LastSegment() fileRefcountIterator {
+	if s.root.nrSegments == 0 {
+		return fileRefcountIterator{}
+	}
+	return s.root.lastSegment()
+}
+
+// FirstGap returns the first gap in the set.
+func (s *fileRefcountSet) FirstGap() fileRefcountGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return fileRefcountGapIterator{n, 0}
+}
+
+// LastGap returns the last gap in the set.
+func (s *fileRefcountSet) LastGap() fileRefcountGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return fileRefcountGapIterator{n, n.nrSegments}
+}
+
+// Find returns the segment or gap whose range contains the given key. If a
+// segment is found, the returned Iterator is non-terminal and the
+// returned GapIterator is terminal. Otherwise, the returned Iterator is
+// terminal and the returned GapIterator is non-terminal.
+func (s *fileRefcountSet) Find(key uint64) (fileRefcountIterator, fileRefcountGapIterator) {
+	n := &s.root
+	for {
+
+		lower := 0
+		upper := n.nrSegments
+		for lower < upper {
+			i := lower + (upper-lower)/2
+			if r := n.keys[i]; key < r.End {
+				if key >= r.Start {
+					return fileRefcountIterator{n, i}, fileRefcountGapIterator{}
+				}
+				upper = i
+			} else {
+				lower = i + 1
+			}
+		}
+		i := lower
+		if !n.hasChildren {
+			return fileRefcountIterator{}, fileRefcountGapIterator{n, i}
+		}
+		n = n.children[i]
+	}
+}
+
+// FindSegment returns the segment whose range contains the given key. If no
+// such segment exists, FindSegment returns a terminal iterator.
+func (s *fileRefcountSet) FindSegment(key uint64) fileRefcountIterator {
+	seg, _ := s.Find(key)
+	return seg
+}
+
+// LowerBoundSegment returns the segment with the lowest range that contains a
+// key greater than or equal to min. If no such segment exists,
+// LowerBoundSegment returns a terminal iterator.
+func (s *fileRefcountSet) LowerBoundSegment(min uint64) fileRefcountIterator {
+	seg, gap := s.Find(min)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.NextSegment()
+}
+
+// UpperBoundSegment returns the segment with the highest range that contains a
+// key less than or equal to max. If no such segment exists, UpperBoundSegment
+// returns a terminal iterator.
+func (s *fileRefcountSet) UpperBoundSegment(max uint64) fileRefcountIterator {
+	seg, gap := s.Find(max)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.PrevSegment()
+}
+
+// FindGap returns the gap containing the given key. If no such gap exists
+// (i.e. the set contains a segment containing that key), FindGap returns a
+// terminal iterator.
+func (s *fileRefcountSet) FindGap(key uint64) fileRefcountGapIterator {
+	_, gap := s.Find(key)
+	return gap
+}
+
+// LowerBoundGap returns the gap with the lowest range that is greater than or
+// equal to min.
+func (s *fileRefcountSet) LowerBoundGap(min uint64) fileRefcountGapIterator {
+	seg, gap := s.Find(min)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.NextGap()
+}
+
+// UpperBoundGap returns the gap with the highest range that is less than or
+// equal to max.
+func (s *fileRefcountSet) UpperBoundGap(max uint64) fileRefcountGapIterator {
+	seg, gap := s.Find(max)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.PrevGap()
+}
+
+// Add inserts the given segment into the set and returns true. If the new
+// segment can be merged with adjacent segments, Add will do so. If the new
+// segment would overlap an existing segment, Add returns false. If Add
+// succeeds, all existing iterators are invalidated.
+func (s *fileRefcountSet) Add(r __generics_imported0.FileRange, val int32) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.Insert(gap, r, val)
+	return true
+}
+
+// AddWithoutMerging inserts the given segment into the set and returns true.
+// If it would overlap an existing segment, AddWithoutMerging does nothing and
+// returns false. If AddWithoutMerging succeeds, all existing iterators are
+// invalidated.
+func (s *fileRefcountSet) AddWithoutMerging(r __generics_imported0.FileRange, val int32) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.InsertWithoutMergingUnchecked(gap, r, val)
+	return true
+}
+
+// Insert inserts the given segment into the given gap. If the new segment can
+// be merged with adjacent segments, Insert will do so. Insert returns an
+// iterator to the segment containing the inserted value (which may have been
+// merged with other values). All existing iterators (including gap, but not
+// including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid, Insert panics.
+//
+// Insert is semantically equivalent to a InsertWithoutMerging followed by a
+// Merge, but may be more efficient. Note that there is no unchecked variant of
+// Insert since Insert must retrieve and inspect gap's predecessor and
+// successor segments regardless.
+func (s *fileRefcountSet) Insert(gap fileRefcountGapIterator, r __generics_imported0.FileRange, val int32) fileRefcountIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	prev, next := gap.PrevSegment(), gap.NextSegment()
+	if prev.Ok() && prev.End() > r.Start {
+		panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range()))
+	}
+	if next.Ok() && next.Start() < r.End {
+		panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range()))
+	}
+	if prev.Ok() && prev.End() == r.Start {
+		if mval, ok := (fileRefcountSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok {
+			prev.SetEndUnchecked(r.End)
+			prev.SetValue(mval)
+			if next.Ok() && next.Start() == r.End {
+				val = mval
+				if mval, ok := (fileRefcountSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok {
+					prev.SetEndUnchecked(next.End())
+					prev.SetValue(mval)
+					return s.Remove(next).PrevSegment()
+				}
+			}
+			return prev
+		}
+	}
+	if next.Ok() && next.Start() == r.End {
+		if mval, ok := (fileRefcountSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok {
+			next.SetStartUnchecked(r.Start)
+			next.SetValue(mval)
+			return next
+		}
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMerging inserts the given segment into the given gap and
+// returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid,
+// InsertWithoutMerging panics.
+func (s *fileRefcountSet) InsertWithoutMerging(gap fileRefcountGapIterator, r __generics_imported0.FileRange, val int32) fileRefcountIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if gr := gap.Range(); !gr.IsSupersetOf(r) {
+		panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr))
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMergingUnchecked inserts the given segment into the given gap
+// and returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+func (s *fileRefcountSet) InsertWithoutMergingUnchecked(gap fileRefcountGapIterator, r __generics_imported0.FileRange, val int32) fileRefcountIterator {
+	gap = gap.node.rebalanceBeforeInsert(gap)
+	copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments])
+	copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments])
+	gap.node.keys[gap.index] = r
+	gap.node.values[gap.index] = val
+	gap.node.nrSegments++
+	return fileRefcountIterator{gap.node, gap.index}
+}
+
+// Remove removes the given segment and returns an iterator to the vacated gap.
+// All existing iterators (including seg, but not including the returned
+// iterator) are invalidated.
+func (s *fileRefcountSet) Remove(seg fileRefcountIterator) fileRefcountGapIterator {
+
+	if seg.node.hasChildren {
+
+		victim := seg.PrevSegment()
+
+		seg.SetRangeUnchecked(victim.Range())
+		seg.SetValue(victim.Value())
+		return s.Remove(victim).NextGap()
+	}
+	copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments])
+	copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments])
+	fileRefcountSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1])
+	seg.node.nrSegments--
+	return seg.node.rebalanceAfterRemove(fileRefcountGapIterator{seg.node, seg.index})
+}
+
+// RemoveAll removes all segments from the set. All existing iterators are
+// invalidated.
+func (s *fileRefcountSet) RemoveAll() {
+	s.root = fileRefcountnode{}
+}
+
+// RemoveRange removes all segments in the given range. An iterator to the
+// newly formed gap is returned, and all existing iterators are invalidated.
+func (s *fileRefcountSet) RemoveRange(r __generics_imported0.FileRange) fileRefcountGapIterator {
+	seg, gap := s.Find(r.Start)
+	if seg.Ok() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	return gap
+}
+
+// Merge attempts to merge two neighboring segments. If successful, Merge
+// returns an iterator to the merged segment, and all existing iterators are
+// invalidated. Otherwise, Merge returns a terminal iterator.
+//
+// If first is not the predecessor of second, Merge panics.
+func (s *fileRefcountSet) Merge(first, second fileRefcountIterator) fileRefcountIterator {
+	if first.NextSegment() != second {
+		panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range()))
+	}
+	return s.MergeUnchecked(first, second)
+}
+
+// MergeUnchecked attempts to merge two neighboring segments. If successful,
+// MergeUnchecked returns an iterator to the merged segment, and all existing
+// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal
+// iterator.
+//
+// Precondition: first is the predecessor of second: first.NextSegment() ==
+// second, first == second.PrevSegment().
+func (s *fileRefcountSet) MergeUnchecked(first, second fileRefcountIterator) fileRefcountIterator {
+	if first.End() == second.Start() {
+		if mval, ok := (fileRefcountSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok {
+
+			first.SetEndUnchecked(second.End())
+			first.SetValue(mval)
+			return s.Remove(second).PrevSegment()
+		}
+	}
+	return fileRefcountIterator{}
+}
+
+// MergeAll attempts to merge all adjacent segments in the set. All existing
+// iterators are invalidated.
+func (s *fileRefcountSet) MergeAll() {
+	seg := s.FirstSegment()
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeRange attempts to merge all adjacent segments that contain a key in the
+// specific range. All existing iterators are invalidated.
+func (s *fileRefcountSet) MergeRange(r __generics_imported0.FileRange) {
+	seg := s.LowerBoundSegment(r.Start)
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() && next.Range().Start < r.End {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeAdjacent attempts to merge the segment containing r.Start with its
+// predecessor, and the segment containing r.End-1 with its successor.
+func (s *fileRefcountSet) MergeAdjacent(r __generics_imported0.FileRange) {
+	first := s.FindSegment(r.Start)
+	if first.Ok() {
+		if prev := first.PrevSegment(); prev.Ok() {
+			s.Merge(prev, first)
+		}
+	}
+	last := s.FindSegment(r.End - 1)
+	if last.Ok() {
+		if next := last.NextSegment(); next.Ok() {
+			s.Merge(last, next)
+		}
+	}
+}
+
+// Split splits the given segment at the given key and returns iterators to the
+// two resulting segments. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+//
+// If the segment cannot be split at split (because split is at the start or
+// end of the segment's range, so splitting would produce a segment with zero
+// length, or because split falls outside the segment's range altogether),
+// Split panics.
+func (s *fileRefcountSet) Split(seg fileRefcountIterator, split uint64) (fileRefcountIterator, fileRefcountIterator) {
+	if !seg.Range().CanSplitAt(split) {
+		panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split))
+	}
+	return s.SplitUnchecked(seg, split)
+}
+
+// SplitUnchecked splits the given segment at the given key and returns
+// iterators to the two resulting segments. All existing iterators (including
+// seg, but not including the returned iterators) are invalidated.
+//
+// Preconditions: seg.Start() < key < seg.End().
+func (s *fileRefcountSet) SplitUnchecked(seg fileRefcountIterator, split uint64) (fileRefcountIterator, fileRefcountIterator) {
+	val1, val2 := (fileRefcountSetFunctions{}).Split(seg.Range(), seg.Value(), split)
+	end2 := seg.End()
+	seg.SetEndUnchecked(split)
+	seg.SetValue(val1)
+	seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2)
+
+	return seg2.PrevSegment(), seg2
+}
+
+// SplitAt splits the segment straddling split, if one exists. SplitAt returns
+// true if a segment was split and false otherwise. If SplitAt splits a
+// segment, all existing iterators are invalidated.
+func (s *fileRefcountSet) SplitAt(split uint64) bool {
+	if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) {
+		s.SplitUnchecked(seg, split)
+		return true
+	}
+	return false
+}
+
+// Isolate ensures that the given segment's range does not escape r by
+// splitting at r.Start and r.End if necessary, and returns an updated iterator
+// to the bounded segment. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+func (s *fileRefcountSet) Isolate(seg fileRefcountIterator, r __generics_imported0.FileRange) fileRefcountIterator {
+	if seg.Range().CanSplitAt(r.Start) {
+		_, seg = s.SplitUnchecked(seg, r.Start)
+	}
+	if seg.Range().CanSplitAt(r.End) {
+		seg, _ = s.SplitUnchecked(seg, r.End)
+	}
+	return seg
+}
+
+// ApplyContiguous applies a function to a contiguous range of segments,
+// splitting if necessary. The function is applied until the first gap is
+// encountered, at which point the gap is returned. If the function is applied
+// across the entire range, a terminal gap is returned. All existing iterators
+// are invalidated.
+//
+// N.B. The Iterator must not be invalidated by the function.
+func (s *fileRefcountSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg fileRefcountIterator)) fileRefcountGapIterator {
+	seg, gap := s.Find(r.Start)
+	if !seg.Ok() {
+		return gap
+	}
+	for {
+		seg = s.Isolate(seg, r)
+		fn(seg)
+		if seg.End() >= r.End {
+			return fileRefcountGapIterator{}
+		}
+		gap = seg.NextGap()
+		if !gap.IsEmpty() {
+			return gap
+		}
+		seg = gap.NextSegment()
+		if !seg.Ok() {
+
+			return fileRefcountGapIterator{}
+		}
+	}
+}
+
+// +stateify savable
+type fileRefcountnode struct {
+	// An internal binary tree node looks like:
+	//
+	//   K
+	//  / \
+	// Cl Cr
+	//
+	// where all keys in the subtree rooted by Cl (the left subtree) are less
+	// than K (the key of the parent node), and all keys in the subtree rooted
+	// by Cr (the right subtree) are greater than K.
+	//
+	// An internal B-tree node's indexes work out to look like:
+	//
+	//   K0 K1 K2  ...   Kn-1
+	//  / \/ \/ \  ...  /  \
+	// C0 C1 C2 C3 ... Cn-1 Cn
+	//
+	// where n is nrSegments.
+	nrSegments int
+
+	// parent is a pointer to this node's parent. If this node is root, parent
+	// is nil.
+	parent *fileRefcountnode
+
+	// parentIndex is the index of this node in parent.children.
+	parentIndex int
+
+	// Flag for internal nodes that is technically redundant with "children[0]
+	// != nil", but is stored in the first cache line. "hasChildren" rather
+	// than "isLeaf" because false must be the correct value for an empty root.
+	hasChildren bool
+
+	// Nodes store keys and values in separate arrays to maximize locality in
+	// the common case (scanning keys for lookup).
+	keys     [fileRefcountmaxDegree - 1]__generics_imported0.FileRange
+	values   [fileRefcountmaxDegree - 1]int32
+	children [fileRefcountmaxDegree]*fileRefcountnode
+}
+
+// firstSegment returns the first segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *fileRefcountnode) firstSegment() fileRefcountIterator {
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return fileRefcountIterator{n, 0}
+}
+
+// lastSegment returns the last segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *fileRefcountnode) lastSegment() fileRefcountIterator {
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return fileRefcountIterator{n, n.nrSegments - 1}
+}
+
+func (n *fileRefcountnode) prevSibling() *fileRefcountnode {
+	if n.parent == nil || n.parentIndex == 0 {
+		return nil
+	}
+	return n.parent.children[n.parentIndex-1]
+}
+
+func (n *fileRefcountnode) nextSibling() *fileRefcountnode {
+	if n.parent == nil || n.parentIndex == n.parent.nrSegments {
+		return nil
+	}
+	return n.parent.children[n.parentIndex+1]
+}
+
+// rebalanceBeforeInsert splits n and its ancestors if they are full, as
+// required for insertion, and returns an updated iterator to the position
+// represented by gap.
+func (n *fileRefcountnode) rebalanceBeforeInsert(gap fileRefcountGapIterator) fileRefcountGapIterator {
+	if n.parent != nil {
+		gap = n.parent.rebalanceBeforeInsert(gap)
+	}
+	if n.nrSegments < fileRefcountmaxDegree-1 {
+		return gap
+	}
+	if n.parent == nil {
+
+		left := &fileRefcountnode{
+			nrSegments:  fileRefcountminDegree - 1,
+			parent:      n,
+			parentIndex: 0,
+			hasChildren: n.hasChildren,
+		}
+		right := &fileRefcountnode{
+			nrSegments:  fileRefcountminDegree - 1,
+			parent:      n,
+			parentIndex: 1,
+			hasChildren: n.hasChildren,
+		}
+		copy(left.keys[:fileRefcountminDegree-1], n.keys[:fileRefcountminDegree-1])
+		copy(left.values[:fileRefcountminDegree-1], n.values[:fileRefcountminDegree-1])
+		copy(right.keys[:fileRefcountminDegree-1], n.keys[fileRefcountminDegree:])
+		copy(right.values[:fileRefcountminDegree-1], n.values[fileRefcountminDegree:])
+		n.keys[0], n.values[0] = n.keys[fileRefcountminDegree-1], n.values[fileRefcountminDegree-1]
+		fileRefcountzeroValueSlice(n.values[1:])
+		if n.hasChildren {
+			copy(left.children[:fileRefcountminDegree], n.children[:fileRefcountminDegree])
+			copy(right.children[:fileRefcountminDegree], n.children[fileRefcountminDegree:])
+			fileRefcountzeroNodeSlice(n.children[2:])
+			for i := 0; i < fileRefcountminDegree; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+				right.children[i].parent = right
+				right.children[i].parentIndex = i
+			}
+		}
+		n.nrSegments = 1
+		n.hasChildren = true
+		n.children[0] = left
+		n.children[1] = right
+		if gap.node != n {
+			return gap
+		}
+		if gap.index < fileRefcountminDegree {
+			return fileRefcountGapIterator{left, gap.index}
+		}
+		return fileRefcountGapIterator{right, gap.index - fileRefcountminDegree}
+	}
+
+	copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments])
+	copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments])
+	n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[fileRefcountminDegree-1], n.values[fileRefcountminDegree-1]
+	copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1])
+	for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ {
+		n.parent.children[i].parentIndex = i
+	}
+	sibling := &fileRefcountnode{
+		nrSegments:  fileRefcountminDegree - 1,
+		parent:      n.parent,
+		parentIndex: n.parentIndex + 1,
+		hasChildren: n.hasChildren,
+	}
+	n.parent.children[n.parentIndex+1] = sibling
+	n.parent.nrSegments++
+	copy(sibling.keys[:fileRefcountminDegree-1], n.keys[fileRefcountminDegree:])
+	copy(sibling.values[:fileRefcountminDegree-1], n.values[fileRefcountminDegree:])
+	fileRefcountzeroValueSlice(n.values[fileRefcountminDegree-1:])
+	if n.hasChildren {
+		copy(sibling.children[:fileRefcountminDegree], n.children[fileRefcountminDegree:])
+		fileRefcountzeroNodeSlice(n.children[fileRefcountminDegree:])
+		for i := 0; i < fileRefcountminDegree; i++ {
+			sibling.children[i].parent = sibling
+			sibling.children[i].parentIndex = i
+		}
+	}
+	n.nrSegments = fileRefcountminDegree - 1
+
+	if gap.node != n {
+		return gap
+	}
+	if gap.index < fileRefcountminDegree {
+		return gap
+	}
+	return fileRefcountGapIterator{sibling, gap.index - fileRefcountminDegree}
+}
+
+// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient
+// (contain fewer segments than required by B-tree invariants), as required for
+// removal, and returns an updated iterator to the position represented by gap.
+//
+// Precondition: n is the only node in the tree that may currently violate a
+// B-tree invariant.
+func (n *fileRefcountnode) rebalanceAfterRemove(gap fileRefcountGapIterator) fileRefcountGapIterator {
+	for {
+		if n.nrSegments >= fileRefcountminDegree-1 {
+			return gap
+		}
+		if n.parent == nil {
+
+			return gap
+		}
+
+		if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= fileRefcountminDegree {
+			copy(n.keys[1:], n.keys[:n.nrSegments])
+			copy(n.values[1:], n.values[:n.nrSegments])
+			n.keys[0] = n.parent.keys[n.parentIndex-1]
+			n.values[0] = n.parent.values[n.parentIndex-1]
+			n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1]
+			n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1]
+			fileRefcountSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				copy(n.children[1:], n.children[:n.nrSegments+1])
+				n.children[0] = sibling.children[sibling.nrSegments]
+				sibling.children[sibling.nrSegments] = nil
+				n.children[0].parent = n
+				n.children[0].parentIndex = 0
+				for i := 1; i < n.nrSegments+2; i++ {
+					n.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling && gap.index == sibling.nrSegments {
+				return fileRefcountGapIterator{n, 0}
+			}
+			if gap.node == n {
+				return fileRefcountGapIterator{n, gap.index + 1}
+			}
+			return gap
+		}
+		if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= fileRefcountminDegree {
+			n.keys[n.nrSegments] = n.parent.keys[n.parentIndex]
+			n.values[n.nrSegments] = n.parent.values[n.parentIndex]
+			n.parent.keys[n.parentIndex] = sibling.keys[0]
+			n.parent.values[n.parentIndex] = sibling.values[0]
+			copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:])
+			copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:])
+			fileRefcountSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				n.children[n.nrSegments+1] = sibling.children[0]
+				copy(sibling.children[:sibling.nrSegments], sibling.children[1:])
+				sibling.children[sibling.nrSegments] = nil
+				n.children[n.nrSegments+1].parent = n
+				n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1
+				for i := 0; i < sibling.nrSegments; i++ {
+					sibling.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling {
+				if gap.index == 0 {
+					return fileRefcountGapIterator{n, n.nrSegments}
+				}
+				return fileRefcountGapIterator{sibling, gap.index - 1}
+			}
+			return gap
+		}
+
+		p := n.parent
+		if p.nrSegments == 1 {
+
+			left, right := p.children[0], p.children[1]
+			p.nrSegments = left.nrSegments + right.nrSegments + 1
+			p.hasChildren = left.hasChildren
+			p.keys[left.nrSegments] = p.keys[0]
+			p.values[left.nrSegments] = p.values[0]
+			copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments])
+			copy(p.values[:left.nrSegments], left.values[:left.nrSegments])
+			copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+			copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments])
+			if left.hasChildren {
+				copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1])
+				copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+				for i := 0; i < p.nrSegments+1; i++ {
+					p.children[i].parent = p
+					p.children[i].parentIndex = i
+				}
+			} else {
+				p.children[0] = nil
+				p.children[1] = nil
+			}
+			if gap.node == left {
+				return fileRefcountGapIterator{p, gap.index}
+			}
+			if gap.node == right {
+				return fileRefcountGapIterator{p, gap.index + left.nrSegments + 1}
+			}
+			return gap
+		}
+		// Merge n and either sibling, along with the segment separating the
+		// two, into whichever of the two nodes comes first. This is the
+		// reverse of the non-root splitting case in
+		// node.rebalanceBeforeInsert.
+		var left, right *fileRefcountnode
+		if n.parentIndex > 0 {
+			left = n.prevSibling()
+			right = n
+		} else {
+			left = n
+			right = n.nextSibling()
+		}
+
+		if gap.node == right {
+			gap = fileRefcountGapIterator{left, gap.index + left.nrSegments + 1}
+		}
+		left.keys[left.nrSegments] = p.keys[left.parentIndex]
+		left.values[left.nrSegments] = p.values[left.parentIndex]
+		copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+		copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments])
+		if left.hasChildren {
+			copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+			for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+			}
+		}
+		left.nrSegments += right.nrSegments + 1
+		copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments])
+		copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments])
+		fileRefcountSetFunctions{}.ClearValue(&p.values[p.nrSegments-1])
+		copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1])
+		for i := 0; i < p.nrSegments; i++ {
+			p.children[i].parentIndex = i
+		}
+		p.children[p.nrSegments] = nil
+		p.nrSegments--
+
+		n = p
+	}
+}
+
+// A Iterator is conceptually one of:
+//
+// - A pointer to a segment in a set; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Iterators are copyable values and are meaningfully equality-comparable. The
+// zero value of Iterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type fileRefcountIterator struct {
+	// node is the node containing the iterated segment. If the iterator is
+	// terminal, node is nil.
+	node *fileRefcountnode
+
+	// index is the index of the segment in node.keys/values.
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (seg fileRefcountIterator) Ok() bool {
+	return seg.node != nil
+}
+
+// Range returns the iterated segment's range key.
+func (seg fileRefcountIterator) Range() __generics_imported0.FileRange {
+	return seg.node.keys[seg.index]
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (seg fileRefcountIterator) Start() uint64 {
+	return seg.node.keys[seg.index].Start
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (seg fileRefcountIterator) End() uint64 {
+	return seg.node.keys[seg.index].End
+}
+
+// SetRangeUnchecked mutates the iterated segment's range key. This operation
+// does not invalidate any iterators.
+//
+// Preconditions:
+//
+// - r.Length() > 0.
+//
+// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
+// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
+// r.start >= seg.PrevSegment().End().
+func (seg fileRefcountIterator) SetRangeUnchecked(r __generics_imported0.FileRange) {
+	seg.node.keys[seg.index] = r
+}
+
+// SetRange mutates the iterated segment's range key. If the new range would
+// cause the iterated segment to overlap another segment, or if the new range
+// is invalid, SetRange panics. This operation does not invalidate any
+// iterators.
+func (seg fileRefcountIterator) SetRange(r __generics_imported0.FileRange) {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && r.End > next.Start() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range()))
+	}
+	seg.SetRangeUnchecked(r)
+}
+
+// SetStartUnchecked mutates the iterated segment's start. This operation does
+// not invalidate any iterators.
+//
+// Preconditions: The new start must be valid: start < seg.End(); if
+// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+func (seg fileRefcountIterator) SetStartUnchecked(start uint64) {
+	seg.node.keys[seg.index].Start = start
+}
+
+// SetStart mutates the iterated segment's start. If the new start value would
+// cause the iterated segment to overlap another segment, or would result in an
+// invalid range, SetStart panics. This operation does not invalidate any
+// iterators.
+func (seg fileRefcountIterator) SetStart(start uint64) {
+	if start >= seg.End() {
+		panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range()))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() {
+		panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range()))
+	}
+	seg.SetStartUnchecked(start)
+}
+
+// SetEndUnchecked mutates the iterated segment's end. This operation does not
+// invalidate any iterators.
+//
+// Preconditions: The new end must be valid: end > seg.Start(); if
+// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+func (seg fileRefcountIterator) SetEndUnchecked(end uint64) {
+	seg.node.keys[seg.index].End = end
+}
+
+// SetEnd mutates the iterated segment's end. If the new end value would cause
+// the iterated segment to overlap another segment, or would result in an
+// invalid range, SetEnd panics. This operation does not invalidate any
+// iterators.
+func (seg fileRefcountIterator) SetEnd(end uint64) {
+	if end <= seg.Start() {
+		panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && end > next.Start() {
+		panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range()))
+	}
+	seg.SetEndUnchecked(end)
+}
+
+// Value returns a copy of the iterated segment's value.
+func (seg fileRefcountIterator) Value() int32 {
+	return seg.node.values[seg.index]
+}
+
+// ValuePtr returns a pointer to the iterated segment's value. The pointer is
+// invalidated if the iterator is invalidated. This operation does not
+// invalidate any iterators.
+func (seg fileRefcountIterator) ValuePtr() *int32 {
+	return &seg.node.values[seg.index]
+}
+
+// SetValue mutates the iterated segment's value. This operation does not
+// invalidate any iterators.
+func (seg fileRefcountIterator) SetValue(val int32) {
+	seg.node.values[seg.index] = val
+}
+
+// PrevSegment returns the iterated segment's predecessor. If there is no
+// preceding segment, PrevSegment returns a terminal iterator.
+func (seg fileRefcountIterator) PrevSegment() fileRefcountIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index].lastSegment()
+	}
+	if seg.index > 0 {
+		return fileRefcountIterator{seg.node, seg.index - 1}
+	}
+	if seg.node.parent == nil {
+		return fileRefcountIterator{}
+	}
+	return fileRefcountsegmentBeforePosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// NextSegment returns the iterated segment's successor. If there is no
+// succeeding segment, NextSegment returns a terminal iterator.
+func (seg fileRefcountIterator) NextSegment() fileRefcountIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment()
+	}
+	if seg.index < seg.node.nrSegments-1 {
+		return fileRefcountIterator{seg.node, seg.index + 1}
+	}
+	if seg.node.parent == nil {
+		return fileRefcountIterator{}
+	}
+	return fileRefcountsegmentAfterPosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// PrevGap returns the gap immediately before the iterated segment.
+func (seg fileRefcountIterator) PrevGap() fileRefcountGapIterator {
+	if seg.node.hasChildren {
+
+		return seg.node.children[seg.index].lastSegment().NextGap()
+	}
+	return fileRefcountGapIterator{seg.node, seg.index}
+}
+
+// NextGap returns the gap immediately after the iterated segment.
+func (seg fileRefcountIterator) NextGap() fileRefcountGapIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment().PrevGap()
+	}
+	return fileRefcountGapIterator{seg.node, seg.index + 1}
+}
+
+// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent,
+// or the gap before the iterated segment otherwise. If seg.Start() ==
+// Functions.MinKey(), PrevNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be
+// non-terminal.
+func (seg fileRefcountIterator) PrevNonEmpty() (fileRefcountIterator, fileRefcountGapIterator) {
+	gap := seg.PrevGap()
+	if gap.Range().Length() != 0 {
+		return fileRefcountIterator{}, gap
+	}
+	return gap.PrevSegment(), fileRefcountGapIterator{}
+}
+
+// NextNonEmpty returns the iterated segment's successor if it is adjacent, or
+// the gap after the iterated segment otherwise. If seg.End() ==
+// Functions.MaxKey(), NextNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by NextNonEmpty will be
+// non-terminal.
+func (seg fileRefcountIterator) NextNonEmpty() (fileRefcountIterator, fileRefcountGapIterator) {
+	gap := seg.NextGap()
+	if gap.Range().Length() != 0 {
+		return fileRefcountIterator{}, gap
+	}
+	return gap.NextSegment(), fileRefcountGapIterator{}
+}
+
+// A GapIterator is conceptually one of:
+//
+// - A pointer to a position between two segments, before the first segment, or
+// after the last segment in a set, called a *gap*; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Note that the gap between two adjacent segments exists (iterators to it are
+// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true
+// for such gaps. An empty set contains a single gap, spanning the entire range
+// of the set's keys.
+//
+// GapIterators are copyable values and are meaningfully equality-comparable.
+// The zero value of GapIterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type fileRefcountGapIterator struct {
+	// The representation of a GapIterator is identical to that of an Iterator,
+	// except that index corresponds to positions between segments in the same
+	// way as for node.children (see comment for node.nrSegments).
+	node  *fileRefcountnode
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (gap fileRefcountGapIterator) Ok() bool {
+	return gap.node != nil
+}
+
+// Range returns the range spanned by the iterated gap.
+func (gap fileRefcountGapIterator) Range() __generics_imported0.FileRange {
+	return __generics_imported0.FileRange{gap.Start(), gap.End()}
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (gap fileRefcountGapIterator) Start() uint64 {
+	if ps := gap.PrevSegment(); ps.Ok() {
+		return ps.End()
+	}
+	return fileRefcountSetFunctions{}.MinKey()
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (gap fileRefcountGapIterator) End() uint64 {
+	if ns := gap.NextSegment(); ns.Ok() {
+		return ns.Start()
+	}
+	return fileRefcountSetFunctions{}.MaxKey()
+}
+
+// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is
+// between two adjacent segments.)
+func (gap fileRefcountGapIterator) IsEmpty() bool {
+	return gap.Range().Length() == 0
+}
+
+// PrevSegment returns the segment immediately before the iterated gap. If no
+// such segment exists, PrevSegment returns a terminal iterator.
+func (gap fileRefcountGapIterator) PrevSegment() fileRefcountIterator {
+	return fileRefcountsegmentBeforePosition(gap.node, gap.index)
+}
+
+// NextSegment returns the segment immediately after the iterated gap. If no
+// such segment exists, NextSegment returns a terminal iterator.
+func (gap fileRefcountGapIterator) NextSegment() fileRefcountIterator {
+	return fileRefcountsegmentAfterPosition(gap.node, gap.index)
+}
+
+// PrevGap returns the iterated gap's predecessor. If no such gap exists,
+// PrevGap returns a terminal iterator.
+func (gap fileRefcountGapIterator) PrevGap() fileRefcountGapIterator {
+	seg := gap.PrevSegment()
+	if !seg.Ok() {
+		return fileRefcountGapIterator{}
+	}
+	return seg.PrevGap()
+}
+
+// NextGap returns the iterated gap's successor. If no such gap exists, NextGap
+// returns a terminal iterator.
+func (gap fileRefcountGapIterator) NextGap() fileRefcountGapIterator {
+	seg := gap.NextSegment()
+	if !seg.Ok() {
+		return fileRefcountGapIterator{}
+	}
+	return seg.NextGap()
+}
+
+// segmentBeforePosition returns the predecessor segment of the position given
+// by n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentBeforePosition returns a terminal iterator.
+func fileRefcountsegmentBeforePosition(n *fileRefcountnode, i int) fileRefcountIterator {
+	for i == 0 {
+		if n.parent == nil {
+			return fileRefcountIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return fileRefcountIterator{n, i - 1}
+}
+
+// segmentAfterPosition returns the successor segment of the position given by
+// n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentAfterPosition returns a terminal iterator.
+func fileRefcountsegmentAfterPosition(n *fileRefcountnode, i int) fileRefcountIterator {
+	for i == n.nrSegments {
+		if n.parent == nil {
+			return fileRefcountIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return fileRefcountIterator{n, i}
+}
+
+func fileRefcountzeroValueSlice(slice []int32) {
+
+	for i := range slice {
+		fileRefcountSetFunctions{}.ClearValue(&slice[i])
+	}
+}
+
+func fileRefcountzeroNodeSlice(slice []*fileRefcountnode) {
+	for i := range slice {
+		slice[i] = nil
+	}
+}
+
+// String stringifies a Set for debugging.
+func (s *fileRefcountSet) String() string {
+	return s.root.String()
+}
+
+// String stringifes a node (and all of its children) for debugging.
+func (n *fileRefcountnode) String() string {
+	var buf bytes.Buffer
+	n.writeDebugString(&buf, "")
+	return buf.String()
+}
+
+func (n *fileRefcountnode) writeDebugString(buf *bytes.Buffer, prefix string) {
+	if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) {
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren))
+	}
+	for i := 0; i < n.nrSegments; i++ {
+		if child := n.children[i]; child != nil {
+			cprefix := fmt.Sprintf("%s- % 3d ", prefix, i)
+			if child.parent != n || child.parentIndex != i {
+				buf.WriteString(cprefix)
+				buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i))
+			}
+			child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i))
+		}
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i]))
+	}
+	if child := n.children[n.nrSegments]; child != nil {
+		child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments))
+	}
+}
+
+// SegmentDataSlices represents segments from a set as slices of start, end, and
+// values. SegmentDataSlices is primarily used as an intermediate representation
+// for save/restore and the layout here is optimized for that.
+//
+// +stateify savable
+type fileRefcountSegmentDataSlices struct {
+	Start  []uint64
+	End    []uint64
+	Values []int32
+}
+
+// ExportSortedSlice returns a copy of all segments in the given set, in ascending
+// key order.
+func (s *fileRefcountSet) ExportSortedSlices() *fileRefcountSegmentDataSlices {
+	var sds fileRefcountSegmentDataSlices
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sds.Start = append(sds.Start, seg.Start())
+		sds.End = append(sds.End, seg.End())
+		sds.Values = append(sds.Values, seg.Value())
+	}
+	sds.Start = sds.Start[:len(sds.Start):len(sds.Start)]
+	sds.End = sds.End[:len(sds.End):len(sds.End)]
+	sds.Values = sds.Values[:len(sds.Values):len(sds.Values)]
+	return &sds
+}
+
+// ImportSortedSlice initializes the given set from the given slice.
+//
+// Preconditions: s must be empty. sds must represent a valid set (the segments
+// in sds must have valid lengths that do not overlap). The segments in sds
+// must be sorted in ascending key order.
+func (s *fileRefcountSet) ImportSortedSlices(sds *fileRefcountSegmentDataSlices) error {
+	if !s.IsEmpty() {
+		return fmt.Errorf("cannot import into non-empty set %v", s)
+	}
+	gap := s.FirstGap()
+	for i := range sds.Start {
+		r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]}
+		if !gap.Range().IsSupersetOf(r) {
+			return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i])
+		}
+		gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap()
+	}
+	return nil
+}
+func (s *fileRefcountSet) saveRoot() *fileRefcountSegmentDataSlices {
+	return s.ExportSortedSlices()
+}
+
+func (s *fileRefcountSet) loadRoot(sds *fileRefcountSegmentDataSlices) {
+	if err := s.ImportSortedSlices(sds); err != nil {
+		panic(err)
+	}
+}
diff --git a/pkg/sentry/mm/io.go b/pkg/sentry/mm/io.go
new file mode 100644
index 000000000..e4c057d28
--- /dev/null
+++ b/pkg/sentry/mm/io.go
@@ -0,0 +1,639 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// There are two supported ways to copy data to/from application virtual
+// memory:
+//
+// 1. Internally-mapped copying: Determine the platform.File that backs the
+// copied-to/from virtual address, obtain a mapping of its pages, and read or
+// write to the mapping.
+//
+// 2. AddressSpace copying: If platform.Platform.SupportsAddressSpaceIO() is
+// true, AddressSpace permissions are applicable, and an AddressSpace is
+// available, copy directly through the AddressSpace, handling faults as
+// needed.
+//
+// (Given that internally-mapped copying requires that backing memory is always
+// implemented using a host file descriptor, we could also preadv/pwritev to it
+// instead. But this would incur a host syscall for each use of the mapped
+// page, whereas mmap is a one-time cost.)
+//
+// The fixed overhead of internally-mapped copying is expected to be higher
+// than that of AddressSpace copying since the former always needs to translate
+// addresses, whereas the latter only needs to do so when faults occur.
+// However, the throughput of internally-mapped copying is expected to be
+// somewhat higher than that of AddressSpace copying due to the high cost of
+// page faults and because implementations of the latter usually rely on
+// safecopy, which doesn't use AVX registers. So we prefer to use AddressSpace
+// copying (when available) for smaller copies, and switch to internally-mapped
+// copying once a size threshold is exceeded.
+const (
+	// copyMapMinBytes is the size threshold for switching to internally-mapped
+	// copying in CopyOut, CopyIn, and ZeroOut.
+	copyMapMinBytes = 32 << 10 // 32 KB
+
+	// rwMapMinBytes is the size threshold for switching to internally-mapped
+	// copying in CopyOutFrom and CopyInTo. It's lower than copyMapMinBytes
+	// since AddressSpace copying in this case requires additional buffering;
+	// see CopyOutFrom for details.
+	rwMapMinBytes = 512
+)
+
+// CheckIORange is similar to usermem.Addr.ToRange, but applies bounds checks
+// consistent with Linux's arch/x86/include/asm/uaccess.h:access_ok().
+//
+// Preconditions: length >= 0.
+func (mm *MemoryManager) CheckIORange(addr usermem.Addr, length int64) (usermem.AddrRange, bool) {
+	// Note that access_ok() constrains end even if length == 0.
+	ar, ok := addr.ToRange(uint64(length))
+	return ar, (ok && ar.End <= mm.layout.MaxAddr)
+}
+
+// checkIOVec applies bound checks consistent with Linux's
+// arch/x86/include/asm/uaccess.h:access_ok() to ars.
+func (mm *MemoryManager) checkIOVec(ars usermem.AddrRangeSeq) bool {
+	for !ars.IsEmpty() {
+		ar := ars.Head()
+		if _, ok := mm.CheckIORange(ar.Start, int64(ar.Length())); !ok {
+			return false
+		}
+		ars = ars.Tail()
+	}
+	return true
+}
+
+func (mm *MemoryManager) asioEnabled(opts usermem.IOOpts) bool {
+	return mm.haveASIO && !opts.IgnorePermissions && opts.AddressSpaceActive
+}
+
+// translateIOError converts errors to EFAULT, as is usually reported for all
+// I/O errors originating from MM in Linux.
+func translateIOError(ctx context.Context, err error) error {
+	if err == nil {
+		return nil
+	}
+	if logIOErrors {
+		ctx.Debugf("MM I/O error: %v", err)
+	}
+	return syserror.EFAULT
+}
+
+// CopyOut implements usermem.IO.CopyOut.
+func (mm *MemoryManager) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) {
+	ar, ok := mm.CheckIORange(addr, int64(len(src)))
+	if !ok {
+		return 0, syserror.EFAULT
+	}
+
+	if len(src) == 0 {
+		return 0, nil
+	}
+
+	// Do AddressSpace IO if applicable.
+	if mm.asioEnabled(opts) && len(src) < copyMapMinBytes {
+		return mm.asCopyOut(ctx, addr, src)
+	}
+
+	// Go through internal mappings.
+	n64, err := mm.withInternalMappings(ctx, ar, usermem.Write, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+		n, err := safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src)))
+		return n, translateIOError(ctx, err)
+	})
+	return int(n64), err
+}
+
+func (mm *MemoryManager) asCopyOut(ctx context.Context, addr usermem.Addr, src []byte) (int, error) {
+	var done int
+	for {
+		n, err := mm.as.CopyOut(addr+usermem.Addr(done), src[done:])
+		done += n
+		if err == nil {
+			return done, nil
+		}
+		if f, ok := err.(platform.SegmentationFault); ok {
+			ar, _ := addr.ToRange(uint64(len(src)))
+			if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Write); err != nil {
+				return done, err
+			}
+			continue
+		}
+		return done, translateIOError(ctx, err)
+	}
+}
+
+// CopyIn implements usermem.IO.CopyIn.
+func (mm *MemoryManager) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
+	ar, ok := mm.CheckIORange(addr, int64(len(dst)))
+	if !ok {
+		return 0, syserror.EFAULT
+	}
+
+	if len(dst) == 0 {
+		return 0, nil
+	}
+
+	// Do AddressSpace IO if applicable.
+	if mm.asioEnabled(opts) && len(dst) < copyMapMinBytes {
+		return mm.asCopyIn(ctx, addr, dst)
+	}
+
+	// Go through internal mappings.
+	n64, err := mm.withInternalMappings(ctx, ar, usermem.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+		n, err := safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), ims)
+		return n, translateIOError(ctx, err)
+	})
+	return int(n64), err
+}
+
+func (mm *MemoryManager) asCopyIn(ctx context.Context, addr usermem.Addr, dst []byte) (int, error) {
+	var done int
+	for {
+		n, err := mm.as.CopyIn(addr+usermem.Addr(done), dst[done:])
+		done += n
+		if err == nil {
+			return done, nil
+		}
+		if f, ok := err.(platform.SegmentationFault); ok {
+			ar, _ := addr.ToRange(uint64(len(dst)))
+			if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Read); err != nil {
+				return done, err
+			}
+			continue
+		}
+		return done, translateIOError(ctx, err)
+	}
+}
+
+// ZeroOut implements usermem.IO.ZeroOut.
+func (mm *MemoryManager) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
+	ar, ok := mm.CheckIORange(addr, toZero)
+	if !ok {
+		return 0, syserror.EFAULT
+	}
+
+	if toZero == 0 {
+		return 0, nil
+	}
+
+	// Do AddressSpace IO if applicable.
+	if mm.asioEnabled(opts) && toZero < copyMapMinBytes {
+		return mm.asZeroOut(ctx, addr, toZero)
+	}
+
+	// Go through internal mappings.
+	return mm.withInternalMappings(ctx, ar, usermem.Write, opts.IgnorePermissions, func(dsts safemem.BlockSeq) (uint64, error) {
+		n, err := safemem.ZeroSeq(dsts)
+		return n, translateIOError(ctx, err)
+	})
+}
+
+func (mm *MemoryManager) asZeroOut(ctx context.Context, addr usermem.Addr, toZero int64) (int64, error) {
+	var done int64
+	for {
+		n, err := mm.as.ZeroOut(addr+usermem.Addr(done), uintptr(toZero-done))
+		done += int64(n)
+		if err == nil {
+			return done, nil
+		}
+		if f, ok := err.(platform.SegmentationFault); ok {
+			ar, _ := addr.ToRange(uint64(toZero))
+			if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Write); err != nil {
+				return done, err
+			}
+			continue
+		}
+		return done, translateIOError(ctx, err)
+	}
+}
+
+// CopyOutFrom implements usermem.IO.CopyOutFrom.
+func (mm *MemoryManager) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
+	if !mm.checkIOVec(ars) {
+		return 0, syserror.EFAULT
+	}
+
+	if ars.NumBytes() == 0 {
+		return 0, nil
+	}
+
+	// Do AddressSpace IO if applicable.
+	if mm.asioEnabled(opts) && ars.NumBytes() < rwMapMinBytes {
+		// We have to introduce a buffered copy, instead of just passing a
+		// safemem.BlockSeq representing addresses in the AddressSpace to src.
+		// This is because usermem.IO.CopyOutFrom() guarantees that it calls
+		// src.ReadToBlocks() at most once, which is incompatible with handling
+		// faults between calls. In the future, this is probably best resolved
+		// by introducing a CopyOutFrom variant or option that allows it to
+		// call src.ReadToBlocks() any number of times.
+		//
+		// This issue applies to CopyInTo as well.
+		buf := make([]byte, int(ars.NumBytes()))
+		bufN, bufErr := src.ReadToBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)))
+		var done int64
+		for done < int64(bufN) {
+			ar := ars.Head()
+			cplen := int64(ar.Length())
+			if cplen > int64(bufN)-done {
+				cplen = int64(bufN) - done
+			}
+			n, err := mm.asCopyOut(ctx, ar.Start, buf[int(done):int(done+cplen)])
+			done += int64(n)
+			if err != nil {
+				return done, err
+			}
+			ars = ars.Tail()
+		}
+		// Do not convert errors returned by src to EFAULT.
+		return done, bufErr
+	}
+
+	// Go through internal mappings.
+	return mm.withVecInternalMappings(ctx, ars, usermem.Write, opts.IgnorePermissions, src.ReadToBlocks)
+}
+
+// CopyInTo implements usermem.IO.CopyInTo.
+func (mm *MemoryManager) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
+	if !mm.checkIOVec(ars) {
+		return 0, syserror.EFAULT
+	}
+
+	if ars.NumBytes() == 0 {
+		return 0, nil
+	}
+
+	// Do AddressSpace IO if applicable.
+	if mm.asioEnabled(opts) && ars.NumBytes() < rwMapMinBytes {
+		buf := make([]byte, int(ars.NumBytes()))
+		var done int
+		var bufErr error
+		for !ars.IsEmpty() {
+			ar := ars.Head()
+			var n int
+			n, bufErr = mm.asCopyIn(ctx, ar.Start, buf[done:done+int(ar.Length())])
+			done += n
+			if bufErr != nil {
+				break
+			}
+			ars = ars.Tail()
+		}
+		n, err := dst.WriteFromBlocks(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:done])))
+		if err != nil {
+			return int64(n), err
+		}
+		// Do not convert errors returned by dst to EFAULT.
+		return int64(n), bufErr
+	}
+
+	// Go through internal mappings.
+	return mm.withVecInternalMappings(ctx, ars, usermem.Read, opts.IgnorePermissions, dst.WriteFromBlocks)
+}
+
+// SwapUint32 implements usermem.IO.SwapUint32.
+func (mm *MemoryManager) SwapUint32(ctx context.Context, addr usermem.Addr, new uint32, opts usermem.IOOpts) (uint32, error) {
+	ar, ok := mm.CheckIORange(addr, 4)
+	if !ok {
+		return 0, syserror.EFAULT
+	}
+
+	// Do AddressSpace IO if applicable.
+	if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
+		for {
+			old, err := mm.as.SwapUint32(addr, new)
+			if err == nil {
+				return old, nil
+			}
+			if f, ok := err.(platform.SegmentationFault); ok {
+				if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.ReadWrite); err != nil {
+					return 0, err
+				}
+				continue
+			}
+			return 0, translateIOError(ctx, err)
+		}
+	}
+
+	// Go through internal mappings.
+	var old uint32
+	_, err := mm.withInternalMappings(ctx, ar, usermem.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+		if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
+			// Atomicity is unachievable across mappings.
+			return 0, syserror.EFAULT
+		}
+		im := ims.Head()
+		var err error
+		old, err = safemem.SwapUint32(im, new)
+		if err != nil {
+			return 0, translateIOError(ctx, err)
+		}
+		// Return the number of bytes read.
+		return 4, nil
+	})
+	return old, err
+}
+
+// CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32.
+func (mm *MemoryManager) CompareAndSwapUint32(ctx context.Context, addr usermem.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) {
+	ar, ok := mm.CheckIORange(addr, 4)
+	if !ok {
+		return 0, syserror.EFAULT
+	}
+
+	// Do AddressSpace IO if applicable.
+	if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
+		for {
+			prev, err := mm.as.CompareAndSwapUint32(addr, old, new)
+			if err == nil {
+				return prev, nil
+			}
+			if f, ok := err.(platform.SegmentationFault); ok {
+				if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.ReadWrite); err != nil {
+					return 0, err
+				}
+				continue
+			}
+			return 0, translateIOError(ctx, err)
+		}
+	}
+
+	// Go through internal mappings.
+	var prev uint32
+	_, err := mm.withInternalMappings(ctx, ar, usermem.ReadWrite, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+		if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
+			// Atomicity is unachievable across mappings.
+			return 0, syserror.EFAULT
+		}
+		im := ims.Head()
+		var err error
+		prev, err = safemem.CompareAndSwapUint32(im, old, new)
+		if err != nil {
+			return 0, translateIOError(ctx, err)
+		}
+		// Return the number of bytes read.
+		return 4, nil
+	})
+	return prev, err
+}
+
+// LoadUint32 implements usermem.IO.LoadUint32.
+func (mm *MemoryManager) LoadUint32(ctx context.Context, addr usermem.Addr, opts usermem.IOOpts) (uint32, error) {
+	ar, ok := mm.CheckIORange(addr, 4)
+	if !ok {
+		return 0, syserror.EFAULT
+	}
+
+	// Do AddressSpace IO if applicable.
+	if mm.haveASIO && opts.AddressSpaceActive && !opts.IgnorePermissions {
+		for {
+			val, err := mm.as.LoadUint32(addr)
+			if err == nil {
+				return val, nil
+			}
+			if f, ok := err.(platform.SegmentationFault); ok {
+				if err := mm.handleASIOFault(ctx, f.Addr, ar, usermem.Read); err != nil {
+					return 0, err
+				}
+				continue
+			}
+			return 0, translateIOError(ctx, err)
+		}
+	}
+
+	// Go through internal mappings.
+	var val uint32
+	_, err := mm.withInternalMappings(ctx, ar, usermem.Read, opts.IgnorePermissions, func(ims safemem.BlockSeq) (uint64, error) {
+		if ims.NumBlocks() != 1 || ims.NumBytes() != 4 {
+			// Atomicity is unachievable across mappings.
+			return 0, syserror.EFAULT
+		}
+		im := ims.Head()
+		var err error
+		val, err = safemem.LoadUint32(im)
+		if err != nil {
+			return 0, translateIOError(ctx, err)
+		}
+		// Return the number of bytes read.
+		return 4, nil
+	})
+	return val, err
+}
+
+// handleASIOFault handles a page fault at address addr for an AddressSpaceIO
+// operation spanning ioar.
+//
+// Preconditions: mm.as != nil. ioar.Length() != 0. ioar.Contains(addr).
+func (mm *MemoryManager) handleASIOFault(ctx context.Context, addr usermem.Addr, ioar usermem.AddrRange, at usermem.AccessType) error {
+	// Try to map all remaining pages in the I/O operation. This RoundUp can't
+	// overflow because otherwise it would have been caught by CheckIORange.
+	end, _ := ioar.End.RoundUp()
+	ar := usermem.AddrRange{addr.RoundDown(), end}
+
+	// Don't bother trying existingPMAsLocked; in most cases, if we did have
+	// existing pmas, we wouldn't have faulted.
+
+	// Ensure that we have usable vmas. Here and below, only return early if we
+	// can't map the first (faulting) page; failure to map later pages are
+	// silently ignored. This maximizes partial success.
+	mm.mappingMu.RLock()
+	vseg, vend, err := mm.getVMAsLocked(ctx, ar, at, false)
+	if vendaddr := vend.Start(); vendaddr < ar.End {
+		if vendaddr <= ar.Start {
+			mm.mappingMu.RUnlock()
+			return translateIOError(ctx, err)
+		}
+		ar.End = vendaddr
+	}
+
+	// Ensure that we have usable pmas.
+	mm.activeMu.Lock()
+	pseg, pend, err := mm.getPMAsLocked(ctx, vseg, ar, at)
+	mm.mappingMu.RUnlock()
+	if pendaddr := pend.Start(); pendaddr < ar.End {
+		if pendaddr <= ar.Start {
+			mm.activeMu.Unlock()
+			return translateIOError(ctx, err)
+		}
+		ar.End = pendaddr
+	}
+
+	// Downgrade to a read-lock on activeMu since we don't need to mutate pmas
+	// anymore.
+	mm.activeMu.DowngradeLock()
+
+	err = mm.mapASLocked(pseg, ar, false)
+	mm.activeMu.RUnlock()
+	return translateIOError(ctx, err)
+}
+
+// withInternalMappings ensures that pmas exist for all addresses in ar,
+// support access of type (at, ignorePermissions), and have internal mappings
+// cached. It then calls f with mm.activeMu locked for reading, passing
+// internal mappings for the subrange of ar for which this property holds.
+//
+// withInternalMappings takes a function returning uint64 since many safemem
+// functions have this property, but returns an int64 since this is usually
+// more useful for usermem.IO methods.
+//
+// Preconditions: 0 < ar.Length() <= math.MaxInt64.
+func (mm *MemoryManager) withInternalMappings(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
+	// If pmas are already available, we can do IO without touching mm.vmas or
+	// mm.mappingMu.
+	mm.activeMu.RLock()
+	if pseg := mm.existingPMAsLocked(ar, at, ignorePermissions, true /* needInternalMappings */); pseg.Ok() {
+		n, err := f(mm.internalMappingsLocked(pseg, ar))
+		mm.activeMu.RUnlock()
+		// Do not convert errors returned by f to EFAULT.
+		return int64(n), err
+	}
+	mm.activeMu.RUnlock()
+
+	// Ensure that we have usable vmas.
+	mm.mappingMu.RLock()
+	vseg, vend, verr := mm.getVMAsLocked(ctx, ar, at, ignorePermissions)
+	if vendaddr := vend.Start(); vendaddr < ar.End {
+		if vendaddr <= ar.Start {
+			mm.mappingMu.RUnlock()
+			return 0, translateIOError(ctx, verr)
+		}
+		ar.End = vendaddr
+	}
+
+	// Ensure that we have usable pmas.
+	mm.activeMu.Lock()
+	pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, at)
+	mm.mappingMu.RUnlock()
+	if pendaddr := pend.Start(); pendaddr < ar.End {
+		if pendaddr <= ar.Start {
+			mm.activeMu.Unlock()
+			return 0, translateIOError(ctx, perr)
+		}
+		ar.End = pendaddr
+	}
+	imend, imerr := mm.getPMAInternalMappingsLocked(pseg, ar)
+	mm.activeMu.DowngradeLock()
+	if imendaddr := imend.Start(); imendaddr < ar.End {
+		if imendaddr <= ar.Start {
+			mm.activeMu.RUnlock()
+			return 0, translateIOError(ctx, imerr)
+		}
+		ar.End = imendaddr
+	}
+
+	// Do I/O.
+	un, err := f(mm.internalMappingsLocked(pseg, ar))
+	mm.activeMu.RUnlock()
+	n := int64(un)
+
+	// Return the first error in order of progress through ar.
+	if err != nil {
+		// Do not convert errors returned by f to EFAULT.
+		return n, err
+	}
+	if imerr != nil {
+		return n, translateIOError(ctx, imerr)
+	}
+	if perr != nil {
+		return n, translateIOError(ctx, perr)
+	}
+	return n, translateIOError(ctx, verr)
+}
+
+// withVecInternalMappings ensures that pmas exist for all addresses in ars,
+// support access of type (at, ignorePermissions), and have internal mappings
+// cached. It then calls f with mm.activeMu locked for reading, passing
+// internal mappings for the subset of ars for which this property holds.
+//
+// Preconditions: !ars.IsEmpty().
+func (mm *MemoryManager) withVecInternalMappings(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
+	// withInternalMappings is faster than withVecInternalMappings because of
+	// iterator plumbing (this isn't generally practical in the vector case due
+	// to iterator invalidation between AddrRanges). Use it if possible.
+	if ars.NumRanges() == 1 {
+		return mm.withInternalMappings(ctx, ars.Head(), at, ignorePermissions, f)
+	}
+
+	// If pmas are already available, we can do IO without touching mm.vmas or
+	// mm.mappingMu.
+	mm.activeMu.RLock()
+	if mm.existingVecPMAsLocked(ars, at, ignorePermissions, true /* needInternalMappings */) {
+		n, err := f(mm.vecInternalMappingsLocked(ars))
+		mm.activeMu.RUnlock()
+		// Do not convert errors returned by f to EFAULT.
+		return int64(n), err
+	}
+	mm.activeMu.RUnlock()
+
+	// Ensure that we have usable vmas.
+	mm.mappingMu.RLock()
+	vars, verr := mm.getVecVMAsLocked(ctx, ars, at, ignorePermissions)
+	if vars.NumBytes() == 0 {
+		mm.mappingMu.RUnlock()
+		return 0, translateIOError(ctx, verr)
+	}
+
+	// Ensure that we have usable pmas.
+	mm.activeMu.Lock()
+	pars, perr := mm.getVecPMAsLocked(ctx, vars, at)
+	mm.mappingMu.RUnlock()
+	if pars.NumBytes() == 0 {
+		mm.activeMu.Unlock()
+		return 0, translateIOError(ctx, perr)
+	}
+	imars, imerr := mm.getVecPMAInternalMappingsLocked(pars)
+	mm.activeMu.DowngradeLock()
+	if imars.NumBytes() == 0 {
+		mm.activeMu.RUnlock()
+		return 0, translateIOError(ctx, imerr)
+	}
+
+	// Do I/O.
+	un, err := f(mm.vecInternalMappingsLocked(imars))
+	mm.activeMu.RUnlock()
+	n := int64(un)
+
+	// Return the first error in order of progress through ars.
+	if err != nil {
+		// Do not convert errors from f to EFAULT.
+		return n, err
+	}
+	if imerr != nil {
+		return n, translateIOError(ctx, imerr)
+	}
+	if perr != nil {
+		return n, translateIOError(ctx, perr)
+	}
+	return n, translateIOError(ctx, verr)
+}
+
+// truncatedAddrRangeSeq returns a copy of ars, but with the end truncated to
+// at most address end on AddrRange arsit.Head(). It is used in vector I/O paths to
+// truncate usermem.AddrRangeSeq when errors occur.
+//
+// Preconditions: !arsit.IsEmpty(). end <= arsit.Head().End.
+func truncatedAddrRangeSeq(ars, arsit usermem.AddrRangeSeq, end usermem.Addr) usermem.AddrRangeSeq {
+	ar := arsit.Head()
+	if end <= ar.Start {
+		return ars.TakeFirst64(ars.NumBytes() - arsit.NumBytes())
+	}
+	return ars.TakeFirst64(ars.NumBytes() - arsit.NumBytes() + int64(end-ar.Start))
+}
diff --git a/pkg/sentry/mm/io_list.go b/pkg/sentry/mm/io_list.go
new file mode 100755
index 000000000..99c83c4b9
--- /dev/null
+++ b/pkg/sentry/mm/io_list.go
@@ -0,0 +1,173 @@
+package mm
+
+// ElementMapper provides an identity mapping by default.
+//
+// This can be replaced to provide a struct that maps elements to linker
+// objects, if they are not the same. An ElementMapper is not typically
+// required if: Linker is left as is, Element is left as is, or Linker and
+// Element are the same type.
+type ioElementMapper struct{}
+
+// linkerFor maps an Element to a Linker.
+//
+// This default implementation should be inlined.
+//
+//go:nosplit
+func (ioElementMapper) linkerFor(elem *ioResult) *ioResult { return elem }
+
+// List is an intrusive list. Entries can be added to or removed from the list
+// in O(1) time and with no additional memory allocations.
+//
+// The zero value for List is an empty list ready to use.
+//
+// To iterate over a list (where l is a List):
+//      for e := l.Front(); e != nil; e = e.Next() {
+// 		// do something with e.
+//      }
+//
+// +stateify savable
+type ioList struct {
+	head *ioResult
+	tail *ioResult
+}
+
+// Reset resets list l to the empty state.
+func (l *ioList) Reset() {
+	l.head = nil
+	l.tail = nil
+}
+
+// Empty returns true iff the list is empty.
+func (l *ioList) Empty() bool {
+	return l.head == nil
+}
+
+// Front returns the first element of list l or nil.
+func (l *ioList) Front() *ioResult {
+	return l.head
+}
+
+// Back returns the last element of list l or nil.
+func (l *ioList) Back() *ioResult {
+	return l.tail
+}
+
+// PushFront inserts the element e at the front of list l.
+func (l *ioList) PushFront(e *ioResult) {
+	ioElementMapper{}.linkerFor(e).SetNext(l.head)
+	ioElementMapper{}.linkerFor(e).SetPrev(nil)
+
+	if l.head != nil {
+		ioElementMapper{}.linkerFor(l.head).SetPrev(e)
+	} else {
+		l.tail = e
+	}
+
+	l.head = e
+}
+
+// PushBack inserts the element e at the back of list l.
+func (l *ioList) PushBack(e *ioResult) {
+	ioElementMapper{}.linkerFor(e).SetNext(nil)
+	ioElementMapper{}.linkerFor(e).SetPrev(l.tail)
+
+	if l.tail != nil {
+		ioElementMapper{}.linkerFor(l.tail).SetNext(e)
+	} else {
+		l.head = e
+	}
+
+	l.tail = e
+}
+
+// PushBackList inserts list m at the end of list l, emptying m.
+func (l *ioList) PushBackList(m *ioList) {
+	if l.head == nil {
+		l.head = m.head
+		l.tail = m.tail
+	} else if m.head != nil {
+		ioElementMapper{}.linkerFor(l.tail).SetNext(m.head)
+		ioElementMapper{}.linkerFor(m.head).SetPrev(l.tail)
+
+		l.tail = m.tail
+	}
+
+	m.head = nil
+	m.tail = nil
+}
+
+// InsertAfter inserts e after b.
+func (l *ioList) InsertAfter(b, e *ioResult) {
+	a := ioElementMapper{}.linkerFor(b).Next()
+	ioElementMapper{}.linkerFor(e).SetNext(a)
+	ioElementMapper{}.linkerFor(e).SetPrev(b)
+	ioElementMapper{}.linkerFor(b).SetNext(e)
+
+	if a != nil {
+		ioElementMapper{}.linkerFor(a).SetPrev(e)
+	} else {
+		l.tail = e
+	}
+}
+
+// InsertBefore inserts e before a.
+func (l *ioList) InsertBefore(a, e *ioResult) {
+	b := ioElementMapper{}.linkerFor(a).Prev()
+	ioElementMapper{}.linkerFor(e).SetNext(a)
+	ioElementMapper{}.linkerFor(e).SetPrev(b)
+	ioElementMapper{}.linkerFor(a).SetPrev(e)
+
+	if b != nil {
+		ioElementMapper{}.linkerFor(b).SetNext(e)
+	} else {
+		l.head = e
+	}
+}
+
+// Remove removes e from l.
+func (l *ioList) Remove(e *ioResult) {
+	prev := ioElementMapper{}.linkerFor(e).Prev()
+	next := ioElementMapper{}.linkerFor(e).Next()
+
+	if prev != nil {
+		ioElementMapper{}.linkerFor(prev).SetNext(next)
+	} else {
+		l.head = next
+	}
+
+	if next != nil {
+		ioElementMapper{}.linkerFor(next).SetPrev(prev)
+	} else {
+		l.tail = prev
+	}
+}
+
+// Entry is a default implementation of Linker. Users can add anonymous fields
+// of this type to their structs to make them automatically implement the
+// methods needed by List.
+//
+// +stateify savable
+type ioEntry struct {
+	next *ioResult
+	prev *ioResult
+}
+
+// Next returns the entry that follows e in the list.
+func (e *ioEntry) Next() *ioResult {
+	return e.next
+}
+
+// Prev returns the entry that precedes e in the list.
+func (e *ioEntry) Prev() *ioResult {
+	return e.prev
+}
+
+// SetNext assigns 'entry' as the entry that follows e in the list.
+func (e *ioEntry) SetNext(elem *ioResult) {
+	e.next = elem
+}
+
+// SetPrev assigns 'entry' as the entry that precedes e in the list.
+func (e *ioEntry) SetPrev(elem *ioResult) {
+	e.prev = elem
+}
diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go
new file mode 100644
index 000000000..7a65a62a2
--- /dev/null
+++ b/pkg/sentry/mm/lifecycle.go
@@ -0,0 +1,234 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"fmt"
+	"sync/atomic"
+
+	"gvisor.googlesource.com/gvisor/pkg/atomicbitops"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// NewMemoryManager returns a new MemoryManager with no mappings and 1 user.
+func NewMemoryManager(p platform.Platform, mfp pgalloc.MemoryFileProvider) *MemoryManager {
+	return &MemoryManager{
+		p:           p,
+		mfp:         mfp,
+		haveASIO:    p.SupportsAddressSpaceIO(),
+		privateRefs: &privateRefs{},
+		users:       1,
+		auxv:        arch.Auxv{},
+		aioManager:  aioManager{contexts: make(map[uint64]*AIOContext)},
+	}
+}
+
+// SetMmapLayout initializes mm's layout from the given arch.Context.
+//
+// Preconditions: mm contains no mappings and is not used concurrently.
+func (mm *MemoryManager) SetMmapLayout(ac arch.Context, r *limits.LimitSet) (arch.MmapLayout, error) {
+	layout, err := ac.NewMmapLayout(mm.p.MinUserAddress(), mm.p.MaxUserAddress(), r)
+	if err != nil {
+		return arch.MmapLayout{}, err
+	}
+	mm.layout = layout
+	return layout, nil
+}
+
+// Fork creates a copy of mm with 1 user, as for Linux syscalls fork() or
+// clone() (without CLONE_VM).
+func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	mm2 := &MemoryManager{
+		p:           mm.p,
+		mfp:         mm.mfp,
+		haveASIO:    mm.haveASIO,
+		layout:      mm.layout,
+		privateRefs: mm.privateRefs,
+		users:       1,
+		brk:         mm.brk,
+		usageAS:     mm.usageAS,
+		dataAS:      mm.dataAS,
+		// "The child does not inherit its parent's memory locks (mlock(2),
+		// mlockall(2))." - fork(2). So lockedAS is 0 and defMLockMode is
+		// MLockNone, both of which are zero values. vma.mlockMode is reset
+		// when copied below.
+		captureInvalidations: true,
+		argv:                 mm.argv,
+		envv:                 mm.envv,
+		auxv:                 append(arch.Auxv(nil), mm.auxv...),
+		// IncRef'd below, once we know that there isn't an error.
+		executable: mm.executable,
+		aioManager: aioManager{contexts: make(map[uint64]*AIOContext)},
+	}
+
+	// Copy vmas.
+	dstvgap := mm2.vmas.FirstGap()
+	for srcvseg := mm.vmas.FirstSegment(); srcvseg.Ok(); srcvseg = srcvseg.NextSegment() {
+		vma := srcvseg.Value() // makes a copy of the vma
+		vmaAR := srcvseg.Range()
+		// Inform the Mappable, if any, of the new mapping.
+		if vma.mappable != nil {
+			if err := vma.mappable.AddMapping(ctx, mm2, vmaAR, vma.off, vma.canWriteMappableLocked()); err != nil {
+				mm2.removeVMAsLocked(ctx, mm2.applicationAddrRange())
+				return nil, err
+			}
+		}
+		if vma.id != nil {
+			vma.id.IncRef()
+		}
+		vma.mlockMode = memmap.MLockNone
+		dstvgap = mm2.vmas.Insert(dstvgap, vmaAR, vma).NextGap()
+		// We don't need to update mm2.usageAS since we copied it from mm
+		// above.
+	}
+
+	// Copy pmas. We have to lock mm.activeMu for writing to make existing
+	// private pmas copy-on-write. We also have to lock mm2.activeMu since
+	// after copying vmas above, memmap.Mappables may call mm2.Invalidate. We
+	// only copy private pmas, since in the common case where fork(2) is
+	// immediately followed by execve(2), copying non-private pmas that can be
+	// regenerated by calling memmap.Mappable.Translate is a waste of time.
+	// (Linux does the same; compare kernel/fork.c:dup_mmap() =>
+	// mm/memory.c:copy_page_range().)
+	mm2.activeMu.Lock()
+	defer mm2.activeMu.Unlock()
+	mm.activeMu.Lock()
+	defer mm.activeMu.Unlock()
+	dstpgap := mm2.pmas.FirstGap()
+	var unmapAR usermem.AddrRange
+	for srcpseg := mm.pmas.FirstSegment(); srcpseg.Ok(); srcpseg = srcpseg.NextSegment() {
+		pma := srcpseg.ValuePtr()
+		if !pma.private {
+			continue
+		}
+		if !pma.needCOW {
+			pma.needCOW = true
+			if pma.effectivePerms.Write {
+				// We don't want to unmap the whole address space, even though
+				// doing so would reduce calls to unmapASLocked(), because mm
+				// will most likely continue to be used after the fork, so
+				// unmapping pmas unnecessarily will result in extra page
+				// faults. But we do want to merge consecutive AddrRanges
+				// across pma boundaries.
+				if unmapAR.End == srcpseg.Start() {
+					unmapAR.End = srcpseg.End()
+				} else {
+					if unmapAR.Length() != 0 {
+						mm.unmapASLocked(unmapAR)
+					}
+					unmapAR = srcpseg.Range()
+				}
+				pma.effectivePerms.Write = false
+			}
+			pma.maxPerms.Write = false
+		}
+		fr := srcpseg.fileRange()
+		mm2.incPrivateRef(fr)
+		srcpseg.ValuePtr().file.IncRef(fr)
+		addrRange := srcpseg.Range()
+		mm2.addRSSLocked(addrRange)
+		dstpgap = mm2.pmas.Insert(dstpgap, addrRange, *pma).NextGap()
+	}
+	if unmapAR.Length() != 0 {
+		mm.unmapASLocked(unmapAR)
+	}
+
+	// Between when we call memmap.Mappable.AddMapping while copying vmas and
+	// when we lock mm2.activeMu to copy pmas, calls to mm2.Invalidate() are
+	// ineffective because the pmas they invalidate haven't yet been copied,
+	// possibly allowing mm2 to get invalidated translations:
+	//
+	// Invalidating Mappable            mm.Fork
+	// ---------------------            -------
+	//
+	// mm2.Invalidate()
+	//                                  mm.activeMu.Lock()
+	// mm.Invalidate() /* blocks */
+	//                                  mm2.activeMu.Lock()
+	//                                  (mm copies invalidated pma to mm2)
+	//
+	// This would technically be both safe (since we only copy private pmas,
+	// which will still hold a reference on their memory) and consistent with
+	// Linux, but we avoid it anyway by setting mm2.captureInvalidations during
+	// construction, causing calls to mm2.Invalidate() to be captured in
+	// mm2.capturedInvalidations, to be replayed after pmas are copied - i.e.
+	// here.
+	mm2.captureInvalidations = false
+	for _, invArgs := range mm2.capturedInvalidations {
+		mm2.invalidateLocked(invArgs.ar, invArgs.opts.InvalidatePrivate, true)
+	}
+	mm2.capturedInvalidations = nil
+
+	if mm2.executable != nil {
+		mm2.executable.IncRef()
+	}
+	return mm2, nil
+}
+
+// IncUsers increments mm's user count and returns true. If the user count is
+// already 0, IncUsers does nothing and returns false.
+func (mm *MemoryManager) IncUsers() bool {
+	return atomicbitops.IncUnlessZeroInt32(&mm.users)
+}
+
+// DecUsers decrements mm's user count. If the user count reaches 0, all
+// mappings in mm are unmapped.
+func (mm *MemoryManager) DecUsers(ctx context.Context) {
+	if users := atomic.AddInt32(&mm.users, -1); users > 0 {
+		return
+	} else if users < 0 {
+		panic(fmt.Sprintf("Invalid MemoryManager.users: %d", users))
+	}
+
+	mm.aioManager.destroy()
+
+	mm.metadataMu.Lock()
+	exe := mm.executable
+	mm.executable = nil
+	mm.metadataMu.Unlock()
+	if exe != nil {
+		exe.DecRef()
+	}
+
+	mm.activeMu.Lock()
+	// Sanity check.
+	if atomic.LoadInt32(&mm.active) != 0 {
+		panic("active address space lost?")
+	}
+	// Make sure the AddressSpace is returned.
+	if mm.as != nil {
+		mm.as.Release()
+		mm.as = nil
+	}
+	mm.activeMu.Unlock()
+
+	mm.mappingMu.Lock()
+	defer mm.mappingMu.Unlock()
+	// If mm is being dropped before mm.SetMmapLayout was called,
+	// mm.applicationAddrRange() will be empty.
+	if ar := mm.applicationAddrRange(); ar.Length() != 0 {
+		mm.unmapLocked(ctx, ar)
+	}
+}
diff --git a/pkg/sentry/mm/metadata.go b/pkg/sentry/mm/metadata.go
new file mode 100644
index 000000000..9768e51f1
--- /dev/null
+++ b/pkg/sentry/mm/metadata.go
@@ -0,0 +1,139 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// ArgvStart returns the start of the application argument vector.
+//
+// There is no guarantee that this value is sensible w.r.t. ArgvEnd.
+func (mm *MemoryManager) ArgvStart() usermem.Addr {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	return mm.argv.Start
+}
+
+// SetArgvStart sets the start of the application argument vector.
+func (mm *MemoryManager) SetArgvStart(a usermem.Addr) {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	mm.argv.Start = a
+}
+
+// ArgvEnd returns the end of the application argument vector.
+//
+// There is no guarantee that this value is sensible w.r.t. ArgvStart.
+func (mm *MemoryManager) ArgvEnd() usermem.Addr {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	return mm.argv.End
+}
+
+// SetArgvEnd sets the end of the application argument vector.
+func (mm *MemoryManager) SetArgvEnd(a usermem.Addr) {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	mm.argv.End = a
+}
+
+// EnvvStart returns the start of the application environment vector.
+//
+// There is no guarantee that this value is sensible w.r.t. EnvvEnd.
+func (mm *MemoryManager) EnvvStart() usermem.Addr {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	return mm.envv.Start
+}
+
+// SetEnvvStart sets the start of the application environment vector.
+func (mm *MemoryManager) SetEnvvStart(a usermem.Addr) {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	mm.envv.Start = a
+}
+
+// EnvvEnd returns the end of the application environment vector.
+//
+// There is no guarantee that this value is sensible w.r.t. EnvvStart.
+func (mm *MemoryManager) EnvvEnd() usermem.Addr {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	return mm.envv.End
+}
+
+// SetEnvvEnd sets the end of the application environment vector.
+func (mm *MemoryManager) SetEnvvEnd(a usermem.Addr) {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	mm.envv.End = a
+}
+
+// Auxv returns the current map of auxiliary vectors.
+func (mm *MemoryManager) Auxv() arch.Auxv {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	return append(arch.Auxv(nil), mm.auxv...)
+}
+
+// SetAuxv sets the entire map of auxiliary vectors.
+func (mm *MemoryManager) SetAuxv(auxv arch.Auxv) {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+	mm.auxv = append(arch.Auxv(nil), auxv...)
+}
+
+// Executable returns the executable, if available.
+//
+// An additional reference will be taken in the case of a non-nil executable,
+// which must be released by the caller.
+func (mm *MemoryManager) Executable() *fs.Dirent {
+	mm.metadataMu.Lock()
+	defer mm.metadataMu.Unlock()
+
+	if mm.executable == nil {
+		return nil
+	}
+
+	mm.executable.IncRef()
+	return mm.executable
+}
+
+// SetExecutable sets the executable.
+//
+// This takes a reference on d.
+func (mm *MemoryManager) SetExecutable(d *fs.Dirent) {
+	mm.metadataMu.Lock()
+
+	// Grab a new reference.
+	d.IncRef()
+
+	// Set the executable.
+	orig := mm.executable
+	mm.executable = d
+
+	mm.metadataMu.Unlock()
+
+	// Release the old reference.
+	//
+	// Do this without holding the lock, since it may wind up doing some
+	// I/O to sync the dirent, etc.
+	if orig != nil {
+		orig.DecRef()
+	}
+}
diff --git a/pkg/sentry/mm/mm.go b/pkg/sentry/mm/mm.go
new file mode 100644
index 000000000..eb6defa2b
--- /dev/null
+++ b/pkg/sentry/mm/mm.go
@@ -0,0 +1,456 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package mm provides a memory management subsystem. See README.md for a
+// detailed overview.
+//
+// Lock order:
+//
+// fs locks, except for memmap.Mappable locks
+//   mm.MemoryManager.metadataMu
+//     mm.MemoryManager.mappingMu
+//       Locks taken by memmap.Mappable methods other than Translate
+//         mm.MemoryManager.activeMu
+//           Locks taken by memmap.Mappable.Translate
+//             mm.privateRefs.mu
+//               platform.AddressSpace locks
+//                 platform.File locks
+//         mm.aioManager.mu
+//           mm.AIOContext.mu
+//
+// Only mm.MemoryManager.Fork is permitted to lock mm.MemoryManager.activeMu in
+// multiple mm.MemoryManagers, as it does so in a well-defined order (forked
+// child first).
+package mm
+
+import (
+	"sync"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+	"gvisor.googlesource.com/gvisor/third_party/gvsync"
+)
+
+// MemoryManager implements a virtual address space.
+//
+// +stateify savable
+type MemoryManager struct {
+	// p and mfp are immutable.
+	p   platform.Platform
+	mfp pgalloc.MemoryFileProvider
+
+	// haveASIO is the cached result of p.SupportsAddressSpaceIO(). Aside from
+	// eliminating an indirect call in the hot I/O path, this makes
+	// MemoryManager.asioEnabled() a leaf function, allowing it to be inlined.
+	//
+	// haveASIO is immutable.
+	haveASIO bool `state:"nosave"`
+
+	// layout is the memory layout.
+	//
+	// layout is set by the binary loader before the MemoryManager can be used.
+	layout arch.MmapLayout
+
+	// privateRefs stores reference counts for private memory (memory whose
+	// ownership is shared by one or more pmas instead of being owned by a
+	// memmap.Mappable).
+	//
+	// privateRefs is immutable.
+	privateRefs *privateRefs
+
+	// users is the number of dependences on the mappings in the MemoryManager.
+	// When the number of references in users reaches zero, all mappings are
+	// unmapped.
+	//
+	// users is accessed using atomic memory operations.
+	users int32
+
+	// mappingMu is analogous to Linux's struct mm_struct::mmap_sem.
+	mappingMu gvsync.DowngradableRWMutex `state:"nosave"`
+
+	// vmas stores virtual memory areas. Since vmas are stored by value,
+	// clients should usually use vmaIterator.ValuePtr() instead of
+	// vmaIterator.Value() to get a pointer to the vma rather than a copy.
+	//
+	// Invariants: vmas are always page-aligned.
+	//
+	// vmas is protected by mappingMu.
+	vmas vmaSet
+
+	// brk is the mm's brk, which is manipulated using the brk(2) system call.
+	// The brk is initially set up by the loader which maps an executable
+	// binary into the mm.
+	//
+	// brk is protected by mappingMu.
+	brk usermem.AddrRange
+
+	// usageAS is vmas.Span(), cached to accelerate RLIMIT_AS checks.
+	//
+	// usageAS is protected by mappingMu.
+	usageAS uint64
+
+	// lockedAS is the combined size in bytes of all vmas with vma.mlockMode !=
+	// memmap.MLockNone.
+	//
+	// lockedAS is protected by mappingMu.
+	lockedAS uint64
+
+	// dataAS is the size of private data segments, like mm_struct->data_vm.
+	// It means the vma which is private, writable, not stack.
+	//
+	// dataAS is protected by mappingMu.
+	dataAS uint64
+
+	// New VMAs created by MMap use whichever of memmap.MMapOpts.MLockMode or
+	// defMLockMode is greater.
+	//
+	// defMLockMode is protected by mappingMu.
+	defMLockMode memmap.MLockMode
+
+	// activeMu is loosely analogous to Linux's struct
+	// mm_struct::page_table_lock.
+	activeMu gvsync.DowngradableRWMutex `state:"nosave"`
+
+	// pmas stores platform mapping areas used to implement vmas. Since pmas
+	// are stored by value, clients should usually use pmaIterator.ValuePtr()
+	// instead of pmaIterator.Value() to get a pointer to the pma rather than
+	// a copy.
+	//
+	// Inserting or removing segments from pmas should happen along with a
+	// call to mm.insertRSS or mm.removeRSS.
+	//
+	// Invariants: pmas are always page-aligned. If a pma exists for a given
+	// address, a vma must also exist for that address.
+	//
+	// pmas is protected by activeMu.
+	pmas pmaSet
+
+	// curRSS is pmas.Span(), cached to accelerate updates to maxRSS. It is
+	// reported as the MemoryManager's RSS.
+	//
+	// maxRSS should be modified only via insertRSS and removeRSS, not
+	// directly.
+	//
+	// maxRSS is protected by activeMu.
+	curRSS uint64
+
+	// maxRSS is the maximum resident set size in bytes of a MemoryManager.
+	// It is tracked as the application adds and removes mappings to pmas.
+	//
+	// maxRSS should be modified only via insertRSS, not directly.
+	//
+	// maxRSS is protected by activeMu.
+	maxRSS uint64
+
+	// as is the platform.AddressSpace that pmas are mapped into. active is the
+	// number of contexts that require as to be non-nil; if active == 0, as may
+	// be nil.
+	//
+	// as is protected by activeMu. active is manipulated with atomic memory
+	// operations; transitions to and from zero are additionally protected by
+	// activeMu. (This is because such transitions may need to be atomic with
+	// changes to as.)
+	as     platform.AddressSpace `state:"nosave"`
+	active int32                 `state:"zerovalue"`
+
+	// unmapAllOnActivate indicates that the next Activate call should activate
+	// an empty AddressSpace.
+	//
+	// This is used to ensure that an AddressSpace cached in
+	// NewAddressSpace is not used after some change in the MemoryManager
+	// or VMAs has made that AddressSpace stale.
+	//
+	// unmapAllOnActivate is protected by activeMu. It must only be set when
+	// there is no active or cached AddressSpace. If as != nil, then
+	// invalidations should be propagated immediately.
+	unmapAllOnActivate bool `state:"nosave"`
+
+	// If captureInvalidations is true, calls to MM.Invalidate() are recorded
+	// in capturedInvalidations rather than being applied immediately to pmas.
+	// This is to avoid a race condition in MM.Fork(); see that function for
+	// details.
+	//
+	// Both captureInvalidations and capturedInvalidations are protected by
+	// activeMu. Neither need to be saved since captureInvalidations is only
+	// enabled during MM.Fork(), during which saving can't occur.
+	captureInvalidations  bool             `state:"zerovalue"`
+	capturedInvalidations []invalidateArgs `state:"nosave"`
+
+	metadataMu sync.Mutex `state:"nosave"`
+
+	// argv is the application argv. This is set up by the loader and may be
+	// modified by prctl(PR_SET_MM_ARG_START/PR_SET_MM_ARG_END). No
+	// requirements apply to argv; we do not require that argv.WellFormed().
+	//
+	// argv is protected by metadataMu.
+	argv usermem.AddrRange
+
+	// envv is the application envv. This is set up by the loader and may be
+	// modified by prctl(PR_SET_MM_ENV_START/PR_SET_MM_ENV_END). No
+	// requirements apply to envv; we do not require that envv.WellFormed().
+	//
+	// envv is protected by metadataMu.
+	envv usermem.AddrRange
+
+	// auxv is the ELF's auxiliary vector.
+	//
+	// auxv is protected by metadataMu.
+	auxv arch.Auxv
+
+	// executable is the executable for this MemoryManager. If executable
+	// is not nil, it holds a reference on the Dirent.
+	//
+	// executable is protected by metadataMu.
+	executable *fs.Dirent
+
+	// aioManager keeps track of AIOContexts used for async IOs. AIOManager
+	// must be cloned when CLONE_VM is used.
+	aioManager aioManager
+}
+
+// vma represents a virtual memory area.
+//
+// +stateify savable
+type vma struct {
+	// mappable is the virtual memory object mapped by this vma. If mappable is
+	// nil, the vma represents a private anonymous mapping.
+	mappable memmap.Mappable
+
+	// off is the offset into mappable at which this vma begins. If mappable is
+	// nil, off is meaningless.
+	off uint64
+
+	// To speedup VMA save/restore, we group and save the following booleans
+	// as a single integer.
+
+	// realPerms are the memory permissions on this vma, as defined by the
+	// application.
+	realPerms usermem.AccessType `state:".(int)"`
+
+	// effectivePerms are the memory permissions on this vma which are
+	// actually used to control access.
+	//
+	// Invariant: effectivePerms == realPerms.Effective().
+	effectivePerms usermem.AccessType `state:"manual"`
+
+	// maxPerms limits the set of permissions that may ever apply to this
+	// memory, as well as accesses for which usermem.IOOpts.IgnorePermissions
+	// is true (e.g. ptrace(PTRACE_POKEDATA)).
+	//
+	// Invariant: maxPerms == maxPerms.Effective().
+	maxPerms usermem.AccessType `state:"manual"`
+
+	// private is true if this is a MAP_PRIVATE mapping, such that writes to
+	// the mapping are propagated to a copy.
+	private bool `state:"manual"`
+
+	// growsDown is true if the mapping may be automatically extended downward
+	// under certain conditions. If growsDown is true, mappable must be nil.
+	//
+	// There is currently no corresponding growsUp flag; in Linux, the only
+	// architectures that can have VM_GROWSUP mappings are ia64, parisc, and
+	// metag, none of which we currently support.
+	growsDown bool `state:"manual"`
+
+	mlockMode memmap.MLockMode
+
+	// If id is not nil, it controls the lifecycle of mappable and provides vma
+	// metadata shown in /proc/[pid]/maps, and the vma holds a reference.
+	id memmap.MappingIdentity
+
+	// If hint is non-empty, it is a description of the vma printed in
+	// /proc/[pid]/maps. hint takes priority over id.MappedName().
+	hint string
+}
+
+const (
+	vmaRealPermsRead = 1 << iota
+	vmaRealPermsWrite
+	vmaRealPermsExecute
+	vmaEffectivePermsRead
+	vmaEffectivePermsWrite
+	vmaEffectivePermsExecute
+	vmaMaxPermsRead
+	vmaMaxPermsWrite
+	vmaMaxPermsExecute
+	vmaPrivate
+	vmaGrowsDown
+)
+
+func (v *vma) saveRealPerms() int {
+	var b int
+	if v.realPerms.Read {
+		b |= vmaRealPermsRead
+	}
+	if v.realPerms.Write {
+		b |= vmaRealPermsWrite
+	}
+	if v.realPerms.Execute {
+		b |= vmaRealPermsExecute
+	}
+	if v.effectivePerms.Read {
+		b |= vmaEffectivePermsRead
+	}
+	if v.effectivePerms.Write {
+		b |= vmaEffectivePermsWrite
+	}
+	if v.effectivePerms.Execute {
+		b |= vmaEffectivePermsExecute
+	}
+	if v.maxPerms.Read {
+		b |= vmaMaxPermsRead
+	}
+	if v.maxPerms.Write {
+		b |= vmaMaxPermsWrite
+	}
+	if v.maxPerms.Execute {
+		b |= vmaMaxPermsExecute
+	}
+	if v.private {
+		b |= vmaPrivate
+	}
+	if v.growsDown {
+		b |= vmaGrowsDown
+	}
+	return b
+}
+
+func (v *vma) loadRealPerms(b int) {
+	if b&vmaRealPermsRead > 0 {
+		v.realPerms.Read = true
+	}
+	if b&vmaRealPermsWrite > 0 {
+		v.realPerms.Write = true
+	}
+	if b&vmaRealPermsExecute > 0 {
+		v.realPerms.Execute = true
+	}
+	if b&vmaEffectivePermsRead > 0 {
+		v.effectivePerms.Read = true
+	}
+	if b&vmaEffectivePermsWrite > 0 {
+		v.effectivePerms.Write = true
+	}
+	if b&vmaEffectivePermsExecute > 0 {
+		v.effectivePerms.Execute = true
+	}
+	if b&vmaMaxPermsRead > 0 {
+		v.maxPerms.Read = true
+	}
+	if b&vmaMaxPermsWrite > 0 {
+		v.maxPerms.Write = true
+	}
+	if b&vmaMaxPermsExecute > 0 {
+		v.maxPerms.Execute = true
+	}
+	if b&vmaPrivate > 0 {
+		v.private = true
+	}
+	if b&vmaGrowsDown > 0 {
+		v.growsDown = true
+	}
+}
+
+// pma represents a platform mapping area.
+//
+// +stateify savable
+type pma struct {
+	// file is the file mapped by this pma. Only pmas for which file ==
+	// MemoryManager.mfp.MemoryFile() may be saved. pmas hold a reference to
+	// the corresponding file range while they exist.
+	file platform.File `state:"nosave"`
+
+	// off is the offset into file at which this pma begins.
+	//
+	// Note that pmas do *not* hold references on offsets in file! If private
+	// is true, MemoryManager.privateRefs holds the reference instead. If
+	// private is false, the corresponding memmap.Mappable holds the reference
+	// instead (per memmap.Mappable.Translate requirement).
+	off uint64
+
+	// translatePerms is the permissions returned by memmap.Mappable.Translate.
+	// If private is true, translatePerms is usermem.AnyAccess.
+	translatePerms usermem.AccessType
+
+	// effectivePerms is the permissions allowed for non-ignorePermissions
+	// accesses. maxPerms is the permissions allowed for ignorePermissions
+	// accesses. These are vma.effectivePerms and vma.maxPerms respectively,
+	// masked by pma.translatePerms and with Write disallowed if pma.needCOW is
+	// true.
+	//
+	// These are stored in the pma so that the IO implementation can avoid
+	// iterating mm.vmas when pmas already exist.
+	effectivePerms usermem.AccessType
+	maxPerms       usermem.AccessType
+
+	// needCOW is true if writes to the mapping must be propagated to a copy.
+	needCOW bool
+
+	// private is true if this pma represents private memory.
+	//
+	// If private is true, file must be MemoryManager.mfp.MemoryFile(), the pma
+	// holds a reference on the mapped memory that is tracked in privateRefs,
+	// and calls to Invalidate for which
+	// memmap.InvalidateOpts.InvalidatePrivate is false should ignore the pma.
+	//
+	// If private is false, this pma caches a translation from the
+	// corresponding vma's memmap.Mappable.Translate.
+	private bool
+
+	// If internalMappings is not empty, it is the cached return value of
+	// file.MapInternal for the platform.FileRange mapped by this pma.
+	internalMappings safemem.BlockSeq `state:"nosave"`
+}
+
+// +stateify savable
+type privateRefs struct {
+	mu sync.Mutex `state:"nosave"`
+
+	// refs maps offsets into MemoryManager.mfp.MemoryFile() to the number of
+	// pmas (or, equivalently, MemoryManagers) that share ownership of the
+	// memory at that offset.
+	refs fileRefcountSet
+}
+
+type invalidateArgs struct {
+	ar   usermem.AddrRange
+	opts memmap.InvalidateOpts
+}
+
+// fileRefcountSetFunctions implements segment.Functions for fileRefcountSet.
+type fileRefcountSetFunctions struct{}
+
+func (fileRefcountSetFunctions) MinKey() uint64 {
+	return 0
+}
+
+func (fileRefcountSetFunctions) MaxKey() uint64 {
+	return ^uint64(0)
+}
+
+func (fileRefcountSetFunctions) ClearValue(_ *int32) {
+}
+
+func (fileRefcountSetFunctions) Merge(_ platform.FileRange, rc1 int32, _ platform.FileRange, rc2 int32) (int32, bool) {
+	return rc1, rc1 == rc2
+}
+
+func (fileRefcountSetFunctions) Split(_ platform.FileRange, rc int32, _ uint64) (int32, int32) {
+	return rc, rc
+}
diff --git a/pkg/sentry/mm/mm_state_autogen.go b/pkg/sentry/mm/mm_state_autogen.go
new file mode 100755
index 000000000..160f347f8
--- /dev/null
+++ b/pkg/sentry/mm/mm_state_autogen.go
@@ -0,0 +1,380 @@
+// automatically generated by stateify.
+
+package mm
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/state"
+)
+
+func (x *aioManager) beforeSave() {}
+func (x *aioManager) save(m state.Map) {
+	x.beforeSave()
+	m.Save("contexts", &x.contexts)
+}
+
+func (x *aioManager) afterLoad() {}
+func (x *aioManager) load(m state.Map) {
+	m.Load("contexts", &x.contexts)
+}
+
+func (x *ioResult) beforeSave() {}
+func (x *ioResult) save(m state.Map) {
+	x.beforeSave()
+	m.Save("data", &x.data)
+	m.Save("ioEntry", &x.ioEntry)
+}
+
+func (x *ioResult) afterLoad() {}
+func (x *ioResult) load(m state.Map) {
+	m.Load("data", &x.data)
+	m.Load("ioEntry", &x.ioEntry)
+}
+
+func (x *AIOContext) beforeSave() {}
+func (x *AIOContext) save(m state.Map) {
+	x.beforeSave()
+	if !state.IsZeroValue(x.dead) { m.Failf("dead is %v, expected zero", x.dead) }
+	m.Save("results", &x.results)
+	m.Save("maxOutstanding", &x.maxOutstanding)
+	m.Save("outstanding", &x.outstanding)
+}
+
+func (x *AIOContext) load(m state.Map) {
+	m.Load("results", &x.results)
+	m.Load("maxOutstanding", &x.maxOutstanding)
+	m.Load("outstanding", &x.outstanding)
+	m.AfterLoad(x.afterLoad)
+}
+
+func (x *aioMappable) beforeSave() {}
+func (x *aioMappable) save(m state.Map) {
+	x.beforeSave()
+	m.Save("AtomicRefCount", &x.AtomicRefCount)
+	m.Save("mfp", &x.mfp)
+	m.Save("fr", &x.fr)
+}
+
+func (x *aioMappable) afterLoad() {}
+func (x *aioMappable) load(m state.Map) {
+	m.Load("AtomicRefCount", &x.AtomicRefCount)
+	m.Load("mfp", &x.mfp)
+	m.Load("fr", &x.fr)
+}
+
+func (x *fileRefcountSet) beforeSave() {}
+func (x *fileRefcountSet) save(m state.Map) {
+	x.beforeSave()
+	var root *fileRefcountSegmentDataSlices = x.saveRoot()
+	m.SaveValue("root", root)
+}
+
+func (x *fileRefcountSet) afterLoad() {}
+func (x *fileRefcountSet) load(m state.Map) {
+	m.LoadValue("root", new(*fileRefcountSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*fileRefcountSegmentDataSlices)) })
+}
+
+func (x *fileRefcountnode) beforeSave() {}
+func (x *fileRefcountnode) save(m state.Map) {
+	x.beforeSave()
+	m.Save("nrSegments", &x.nrSegments)
+	m.Save("parent", &x.parent)
+	m.Save("parentIndex", &x.parentIndex)
+	m.Save("hasChildren", &x.hasChildren)
+	m.Save("keys", &x.keys)
+	m.Save("values", &x.values)
+	m.Save("children", &x.children)
+}
+
+func (x *fileRefcountnode) afterLoad() {}
+func (x *fileRefcountnode) load(m state.Map) {
+	m.Load("nrSegments", &x.nrSegments)
+	m.Load("parent", &x.parent)
+	m.Load("parentIndex", &x.parentIndex)
+	m.Load("hasChildren", &x.hasChildren)
+	m.Load("keys", &x.keys)
+	m.Load("values", &x.values)
+	m.Load("children", &x.children)
+}
+
+func (x *fileRefcountSegmentDataSlices) beforeSave() {}
+func (x *fileRefcountSegmentDataSlices) save(m state.Map) {
+	x.beforeSave()
+	m.Save("Start", &x.Start)
+	m.Save("End", &x.End)
+	m.Save("Values", &x.Values)
+}
+
+func (x *fileRefcountSegmentDataSlices) afterLoad() {}
+func (x *fileRefcountSegmentDataSlices) load(m state.Map) {
+	m.Load("Start", &x.Start)
+	m.Load("End", &x.End)
+	m.Load("Values", &x.Values)
+}
+
+func (x *ioList) beforeSave() {}
+func (x *ioList) save(m state.Map) {
+	x.beforeSave()
+	m.Save("head", &x.head)
+	m.Save("tail", &x.tail)
+}
+
+func (x *ioList) afterLoad() {}
+func (x *ioList) load(m state.Map) {
+	m.Load("head", &x.head)
+	m.Load("tail", &x.tail)
+}
+
+func (x *ioEntry) beforeSave() {}
+func (x *ioEntry) save(m state.Map) {
+	x.beforeSave()
+	m.Save("next", &x.next)
+	m.Save("prev", &x.prev)
+}
+
+func (x *ioEntry) afterLoad() {}
+func (x *ioEntry) load(m state.Map) {
+	m.Load("next", &x.next)
+	m.Load("prev", &x.prev)
+}
+
+func (x *MemoryManager) save(m state.Map) {
+	x.beforeSave()
+	if !state.IsZeroValue(x.active) { m.Failf("active is %v, expected zero", x.active) }
+	if !state.IsZeroValue(x.captureInvalidations) { m.Failf("captureInvalidations is %v, expected zero", x.captureInvalidations) }
+	m.Save("p", &x.p)
+	m.Save("mfp", &x.mfp)
+	m.Save("layout", &x.layout)
+	m.Save("privateRefs", &x.privateRefs)
+	m.Save("users", &x.users)
+	m.Save("vmas", &x.vmas)
+	m.Save("brk", &x.brk)
+	m.Save("usageAS", &x.usageAS)
+	m.Save("lockedAS", &x.lockedAS)
+	m.Save("dataAS", &x.dataAS)
+	m.Save("defMLockMode", &x.defMLockMode)
+	m.Save("pmas", &x.pmas)
+	m.Save("curRSS", &x.curRSS)
+	m.Save("maxRSS", &x.maxRSS)
+	m.Save("argv", &x.argv)
+	m.Save("envv", &x.envv)
+	m.Save("auxv", &x.auxv)
+	m.Save("executable", &x.executable)
+	m.Save("aioManager", &x.aioManager)
+}
+
+func (x *MemoryManager) load(m state.Map) {
+	m.Load("p", &x.p)
+	m.Load("mfp", &x.mfp)
+	m.Load("layout", &x.layout)
+	m.Load("privateRefs", &x.privateRefs)
+	m.Load("users", &x.users)
+	m.Load("vmas", &x.vmas)
+	m.Load("brk", &x.brk)
+	m.Load("usageAS", &x.usageAS)
+	m.Load("lockedAS", &x.lockedAS)
+	m.Load("dataAS", &x.dataAS)
+	m.Load("defMLockMode", &x.defMLockMode)
+	m.Load("pmas", &x.pmas)
+	m.Load("curRSS", &x.curRSS)
+	m.Load("maxRSS", &x.maxRSS)
+	m.Load("argv", &x.argv)
+	m.Load("envv", &x.envv)
+	m.Load("auxv", &x.auxv)
+	m.Load("executable", &x.executable)
+	m.Load("aioManager", &x.aioManager)
+	m.AfterLoad(x.afterLoad)
+}
+
+func (x *vma) beforeSave() {}
+func (x *vma) save(m state.Map) {
+	x.beforeSave()
+	var realPerms int = x.saveRealPerms()
+	m.SaveValue("realPerms", realPerms)
+	m.Save("mappable", &x.mappable)
+	m.Save("off", &x.off)
+	m.Save("mlockMode", &x.mlockMode)
+	m.Save("id", &x.id)
+	m.Save("hint", &x.hint)
+}
+
+func (x *vma) afterLoad() {}
+func (x *vma) load(m state.Map) {
+	m.Load("mappable", &x.mappable)
+	m.Load("off", &x.off)
+	m.Load("mlockMode", &x.mlockMode)
+	m.Load("id", &x.id)
+	m.Load("hint", &x.hint)
+	m.LoadValue("realPerms", new(int), func(y interface{}) { x.loadRealPerms(y.(int)) })
+}
+
+func (x *pma) beforeSave() {}
+func (x *pma) save(m state.Map) {
+	x.beforeSave()
+	m.Save("off", &x.off)
+	m.Save("translatePerms", &x.translatePerms)
+	m.Save("effectivePerms", &x.effectivePerms)
+	m.Save("maxPerms", &x.maxPerms)
+	m.Save("needCOW", &x.needCOW)
+	m.Save("private", &x.private)
+}
+
+func (x *pma) afterLoad() {}
+func (x *pma) load(m state.Map) {
+	m.Load("off", &x.off)
+	m.Load("translatePerms", &x.translatePerms)
+	m.Load("effectivePerms", &x.effectivePerms)
+	m.Load("maxPerms", &x.maxPerms)
+	m.Load("needCOW", &x.needCOW)
+	m.Load("private", &x.private)
+}
+
+func (x *privateRefs) beforeSave() {}
+func (x *privateRefs) save(m state.Map) {
+	x.beforeSave()
+	m.Save("refs", &x.refs)
+}
+
+func (x *privateRefs) afterLoad() {}
+func (x *privateRefs) load(m state.Map) {
+	m.Load("refs", &x.refs)
+}
+
+func (x *pmaSet) beforeSave() {}
+func (x *pmaSet) save(m state.Map) {
+	x.beforeSave()
+	var root *pmaSegmentDataSlices = x.saveRoot()
+	m.SaveValue("root", root)
+}
+
+func (x *pmaSet) afterLoad() {}
+func (x *pmaSet) load(m state.Map) {
+	m.LoadValue("root", new(*pmaSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*pmaSegmentDataSlices)) })
+}
+
+func (x *pmanode) beforeSave() {}
+func (x *pmanode) save(m state.Map) {
+	x.beforeSave()
+	m.Save("nrSegments", &x.nrSegments)
+	m.Save("parent", &x.parent)
+	m.Save("parentIndex", &x.parentIndex)
+	m.Save("hasChildren", &x.hasChildren)
+	m.Save("keys", &x.keys)
+	m.Save("values", &x.values)
+	m.Save("children", &x.children)
+}
+
+func (x *pmanode) afterLoad() {}
+func (x *pmanode) load(m state.Map) {
+	m.Load("nrSegments", &x.nrSegments)
+	m.Load("parent", &x.parent)
+	m.Load("parentIndex", &x.parentIndex)
+	m.Load("hasChildren", &x.hasChildren)
+	m.Load("keys", &x.keys)
+	m.Load("values", &x.values)
+	m.Load("children", &x.children)
+}
+
+func (x *pmaSegmentDataSlices) beforeSave() {}
+func (x *pmaSegmentDataSlices) save(m state.Map) {
+	x.beforeSave()
+	m.Save("Start", &x.Start)
+	m.Save("End", &x.End)
+	m.Save("Values", &x.Values)
+}
+
+func (x *pmaSegmentDataSlices) afterLoad() {}
+func (x *pmaSegmentDataSlices) load(m state.Map) {
+	m.Load("Start", &x.Start)
+	m.Load("End", &x.End)
+	m.Load("Values", &x.Values)
+}
+
+func (x *SpecialMappable) beforeSave() {}
+func (x *SpecialMappable) save(m state.Map) {
+	x.beforeSave()
+	m.Save("AtomicRefCount", &x.AtomicRefCount)
+	m.Save("mfp", &x.mfp)
+	m.Save("fr", &x.fr)
+	m.Save("name", &x.name)
+}
+
+func (x *SpecialMappable) afterLoad() {}
+func (x *SpecialMappable) load(m state.Map) {
+	m.Load("AtomicRefCount", &x.AtomicRefCount)
+	m.Load("mfp", &x.mfp)
+	m.Load("fr", &x.fr)
+	m.Load("name", &x.name)
+}
+
+func (x *vmaSet) beforeSave() {}
+func (x *vmaSet) save(m state.Map) {
+	x.beforeSave()
+	var root *vmaSegmentDataSlices = x.saveRoot()
+	m.SaveValue("root", root)
+}
+
+func (x *vmaSet) afterLoad() {}
+func (x *vmaSet) load(m state.Map) {
+	m.LoadValue("root", new(*vmaSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*vmaSegmentDataSlices)) })
+}
+
+func (x *vmanode) beforeSave() {}
+func (x *vmanode) save(m state.Map) {
+	x.beforeSave()
+	m.Save("nrSegments", &x.nrSegments)
+	m.Save("parent", &x.parent)
+	m.Save("parentIndex", &x.parentIndex)
+	m.Save("hasChildren", &x.hasChildren)
+	m.Save("keys", &x.keys)
+	m.Save("values", &x.values)
+	m.Save("children", &x.children)
+}
+
+func (x *vmanode) afterLoad() {}
+func (x *vmanode) load(m state.Map) {
+	m.Load("nrSegments", &x.nrSegments)
+	m.Load("parent", &x.parent)
+	m.Load("parentIndex", &x.parentIndex)
+	m.Load("hasChildren", &x.hasChildren)
+	m.Load("keys", &x.keys)
+	m.Load("values", &x.values)
+	m.Load("children", &x.children)
+}
+
+func (x *vmaSegmentDataSlices) beforeSave() {}
+func (x *vmaSegmentDataSlices) save(m state.Map) {
+	x.beforeSave()
+	m.Save("Start", &x.Start)
+	m.Save("End", &x.End)
+	m.Save("Values", &x.Values)
+}
+
+func (x *vmaSegmentDataSlices) afterLoad() {}
+func (x *vmaSegmentDataSlices) load(m state.Map) {
+	m.Load("Start", &x.Start)
+	m.Load("End", &x.End)
+	m.Load("Values", &x.Values)
+}
+
+func init() {
+	state.Register("mm.aioManager", (*aioManager)(nil), state.Fns{Save: (*aioManager).save, Load: (*aioManager).load})
+	state.Register("mm.ioResult", (*ioResult)(nil), state.Fns{Save: (*ioResult).save, Load: (*ioResult).load})
+	state.Register("mm.AIOContext", (*AIOContext)(nil), state.Fns{Save: (*AIOContext).save, Load: (*AIOContext).load})
+	state.Register("mm.aioMappable", (*aioMappable)(nil), state.Fns{Save: (*aioMappable).save, Load: (*aioMappable).load})
+	state.Register("mm.fileRefcountSet", (*fileRefcountSet)(nil), state.Fns{Save: (*fileRefcountSet).save, Load: (*fileRefcountSet).load})
+	state.Register("mm.fileRefcountnode", (*fileRefcountnode)(nil), state.Fns{Save: (*fileRefcountnode).save, Load: (*fileRefcountnode).load})
+	state.Register("mm.fileRefcountSegmentDataSlices", (*fileRefcountSegmentDataSlices)(nil), state.Fns{Save: (*fileRefcountSegmentDataSlices).save, Load: (*fileRefcountSegmentDataSlices).load})
+	state.Register("mm.ioList", (*ioList)(nil), state.Fns{Save: (*ioList).save, Load: (*ioList).load})
+	state.Register("mm.ioEntry", (*ioEntry)(nil), state.Fns{Save: (*ioEntry).save, Load: (*ioEntry).load})
+	state.Register("mm.MemoryManager", (*MemoryManager)(nil), state.Fns{Save: (*MemoryManager).save, Load: (*MemoryManager).load})
+	state.Register("mm.vma", (*vma)(nil), state.Fns{Save: (*vma).save, Load: (*vma).load})
+	state.Register("mm.pma", (*pma)(nil), state.Fns{Save: (*pma).save, Load: (*pma).load})
+	state.Register("mm.privateRefs", (*privateRefs)(nil), state.Fns{Save: (*privateRefs).save, Load: (*privateRefs).load})
+	state.Register("mm.pmaSet", (*pmaSet)(nil), state.Fns{Save: (*pmaSet).save, Load: (*pmaSet).load})
+	state.Register("mm.pmanode", (*pmanode)(nil), state.Fns{Save: (*pmanode).save, Load: (*pmanode).load})
+	state.Register("mm.pmaSegmentDataSlices", (*pmaSegmentDataSlices)(nil), state.Fns{Save: (*pmaSegmentDataSlices).save, Load: (*pmaSegmentDataSlices).load})
+	state.Register("mm.SpecialMappable", (*SpecialMappable)(nil), state.Fns{Save: (*SpecialMappable).save, Load: (*SpecialMappable).load})
+	state.Register("mm.vmaSet", (*vmaSet)(nil), state.Fns{Save: (*vmaSet).save, Load: (*vmaSet).load})
+	state.Register("mm.vmanode", (*vmanode)(nil), state.Fns{Save: (*vmanode).save, Load: (*vmanode).load})
+	state.Register("mm.vmaSegmentDataSlices", (*vmaSegmentDataSlices)(nil), state.Fns{Save: (*vmaSegmentDataSlices).save, Load: (*vmaSegmentDataSlices).load})
+}
diff --git a/pkg/sentry/mm/pma.go b/pkg/sentry/mm/pma.go
new file mode 100644
index 000000000..ece561ff0
--- /dev/null
+++ b/pkg/sentry/mm/pma.go
@@ -0,0 +1,1036 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"fmt"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform/safecopy"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// existingPMAsLocked checks that pmas exist for all addresses in ar, and
+// support access of type (at, ignorePermissions). If so, it returns an
+// iterator to the pma containing ar.Start. Otherwise it returns a terminal
+// iterator.
+//
+// Preconditions: mm.activeMu must be locked. ar.Length() != 0.
+func (mm *MemoryManager) existingPMAsLocked(ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool, needInternalMappings bool) pmaIterator {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+	}
+
+	first := mm.pmas.FindSegment(ar.Start)
+	pseg := first
+	for pseg.Ok() {
+		pma := pseg.ValuePtr()
+		perms := pma.effectivePerms
+		if ignorePermissions {
+			perms = pma.maxPerms
+		}
+		if !perms.SupersetOf(at) {
+			return pmaIterator{}
+		}
+		if needInternalMappings && pma.internalMappings.IsEmpty() {
+			return pmaIterator{}
+		}
+
+		if ar.End <= pseg.End() {
+			return first
+		}
+		pseg, _ = pseg.NextNonEmpty()
+	}
+
+	// Ran out of pmas before reaching ar.End.
+	return pmaIterator{}
+}
+
+// existingVecPMAsLocked returns true if pmas exist for all addresses in ars,
+// and support access of type (at, ignorePermissions).
+//
+// Preconditions: mm.activeMu must be locked.
+func (mm *MemoryManager) existingVecPMAsLocked(ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool, needInternalMappings bool) bool {
+	for ; !ars.IsEmpty(); ars = ars.Tail() {
+		if ar := ars.Head(); ar.Length() != 0 && !mm.existingPMAsLocked(ar, at, ignorePermissions, needInternalMappings).Ok() {
+			return false
+		}
+	}
+	return true
+}
+
+// getPMAsLocked ensures that pmas exist for all addresses in ar, and support
+// access of type at. It returns:
+//
+// - An iterator to the pma containing ar.Start. If no pma contains ar.Start,
+// the iterator is unspecified.
+//
+// - An iterator to the gap after the last pma containing an address in ar. If
+// pmas exist for no addresses in ar, the iterator is to a gap that begins
+// before ar.Start.
+//
+// - An error that is non-nil if pmas exist for only a subset of ar.
+//
+// Preconditions: mm.mappingMu must be locked. mm.activeMu must be locked for
+// writing. ar.Length() != 0. vseg.Range().Contains(ar.Start). vmas must exist
+// for all addresses in ar, and support accesses of type at (i.e. permission
+// checks must have been performed against vmas).
+func (mm *MemoryManager) getPMAsLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, at usermem.AccessType) (pmaIterator, pmaGapIterator, error) {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+		if !vseg.Ok() {
+			panic("terminal vma iterator")
+		}
+		if !vseg.Range().Contains(ar.Start) {
+			panic(fmt.Sprintf("initial vma %v does not cover start of ar %v", vseg.Range(), ar))
+		}
+	}
+
+	// Page-align ar so that all AddrRanges are aligned.
+	end, ok := ar.End.RoundUp()
+	var alignerr error
+	if !ok {
+		end = ar.End.RoundDown()
+		alignerr = syserror.EFAULT
+	}
+	ar = usermem.AddrRange{ar.Start.RoundDown(), end}
+
+	pstart, pend, perr := mm.getPMAsInternalLocked(ctx, vseg, ar, at)
+	if pend.Start() <= ar.Start {
+		return pmaIterator{}, pend, perr
+	}
+	// getPMAsInternalLocked may not have returned pstart due to iterator
+	// invalidation.
+	if !pstart.Ok() {
+		pstart = mm.findOrSeekPrevUpperBoundPMA(ar.Start, pend)
+	}
+	if perr != nil {
+		return pstart, pend, perr
+	}
+	return pstart, pend, alignerr
+}
+
+// getVecPMAsLocked ensures that pmas exist for all addresses in ars, and
+// support access of type at. It returns the subset of ars for which pmas
+// exist. If this is not equal to ars, it returns a non-nil error explaining
+// why.
+//
+// Preconditions: mm.mappingMu must be locked. mm.activeMu must be locked for
+// writing. vmas must exist for all addresses in ars, and support accesses of
+// type at (i.e. permission checks must have been performed against vmas).
+func (mm *MemoryManager) getVecPMAsLocked(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType) (usermem.AddrRangeSeq, error) {
+	for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() {
+		ar := arsit.Head()
+		if ar.Length() == 0 {
+			continue
+		}
+		if checkInvariants {
+			if !ar.WellFormed() {
+				panic(fmt.Sprintf("invalid ar: %v", ar))
+			}
+		}
+
+		// Page-align ar so that all AddrRanges are aligned.
+		end, ok := ar.End.RoundUp()
+		var alignerr error
+		if !ok {
+			end = ar.End.RoundDown()
+			alignerr = syserror.EFAULT
+		}
+		ar = usermem.AddrRange{ar.Start.RoundDown(), end}
+
+		_, pend, perr := mm.getPMAsInternalLocked(ctx, mm.vmas.FindSegment(ar.Start), ar, at)
+		if perr != nil {
+			return truncatedAddrRangeSeq(ars, arsit, pend.Start()), perr
+		}
+		if alignerr != nil {
+			return truncatedAddrRangeSeq(ars, arsit, pend.Start()), alignerr
+		}
+	}
+
+	return ars, nil
+}
+
+// getPMAsInternalLocked is equivalent to getPMAsLocked, with the following
+// exceptions:
+//
+// - getPMAsInternalLocked returns a pmaIterator on a best-effort basis (that
+// is, the returned iterator may be terminal, even if a pma that contains
+// ar.Start exists). Returning this iterator on a best-effort basis allows
+// callers that require it to use it when it's cheaply available, while also
+// avoiding the overhead of retrieving it when it's not.
+//
+// - getPMAsInternalLocked additionally requires that ar is page-aligned.
+//
+// getPMAsInternalLocked is an implementation helper for getPMAsLocked and
+// getVecPMAsLocked; other clients should call one of those instead.
+func (mm *MemoryManager) getPMAsInternalLocked(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, at usermem.AccessType) (pmaIterator, pmaGapIterator, error) {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+		if !vseg.Ok() {
+			panic("terminal vma iterator")
+		}
+		if !vseg.Range().Contains(ar.Start) {
+			panic(fmt.Sprintf("initial vma %v does not cover start of ar %v", vseg.Range(), ar))
+		}
+	}
+
+	mf := mm.mfp.MemoryFile()
+	// Limit the range we allocate to ar, aligned to privateAllocUnit.
+	maskAR := privateAligned(ar)
+	didUnmapAS := false
+	// The range in which we iterate vmas and pmas is still limited to ar, to
+	// ensure that we don't allocate or COW-break a pma we don't need.
+	pseg, pgap := mm.pmas.Find(ar.Start)
+	pstart := pseg
+	for {
+		// Get pmas for this vma.
+		vsegAR := vseg.Range().Intersect(ar)
+		vma := vseg.ValuePtr()
+	pmaLoop:
+		for {
+			switch {
+			case pgap.Ok() && pgap.Start() < vsegAR.End:
+				// Need a pma here.
+				optAR := vseg.Range().Intersect(pgap.Range())
+				if checkInvariants {
+					if optAR.Length() <= 0 {
+						panic(fmt.Sprintf("vseg %v and pgap %v do not overlap", vseg, pgap))
+					}
+				}
+				if vma.mappable == nil {
+					// Private anonymous mappings get pmas by allocating.
+					allocAR := optAR.Intersect(maskAR)
+					fr, err := mf.Allocate(uint64(allocAR.Length()), usage.Anonymous)
+					if err != nil {
+						return pstart, pgap, err
+					}
+					if checkInvariants {
+						if !fr.WellFormed() || fr.Length() != uint64(allocAR.Length()) {
+							panic(fmt.Sprintf("Allocate(%v) returned invalid FileRange %v", allocAR.Length(), fr))
+						}
+					}
+					mm.addRSSLocked(allocAR)
+					mm.incPrivateRef(fr)
+					mf.IncRef(fr)
+					pseg, pgap = mm.pmas.Insert(pgap, allocAR, pma{
+						file:           mf,
+						off:            fr.Start,
+						translatePerms: usermem.AnyAccess,
+						effectivePerms: vma.effectivePerms,
+						maxPerms:       vma.maxPerms,
+						// Since we just allocated this memory and have the
+						// only reference, the new pma does not need
+						// copy-on-write.
+						private: true,
+					}).NextNonEmpty()
+					pstart = pmaIterator{} // iterators invalidated
+				} else {
+					// Other mappings get pmas by translating.
+					optMR := vseg.mappableRangeOf(optAR)
+					reqAR := optAR.Intersect(ar)
+					reqMR := vseg.mappableRangeOf(reqAR)
+					perms := at
+					if vma.private {
+						// This pma will be copy-on-write; don't require write
+						// permission, but do require read permission to
+						// facilitate the copy.
+						//
+						// If at.Write is true, we will need to break
+						// copy-on-write immediately, which occurs after
+						// translation below.
+						perms.Read = true
+						perms.Write = false
+					}
+					ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms)
+					if checkInvariants {
+						if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil {
+							panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err))
+						}
+					}
+					// Install a pma for each translation.
+					if len(ts) == 0 {
+						return pstart, pgap, err
+					}
+					pstart = pmaIterator{} // iterators invalidated
+					for _, t := range ts {
+						newpmaAR := vseg.addrRangeOf(t.Source)
+						newpma := pma{
+							file:           t.File,
+							off:            t.Offset,
+							translatePerms: t.Perms,
+							effectivePerms: vma.effectivePerms.Intersect(t.Perms),
+							maxPerms:       vma.maxPerms.Intersect(t.Perms),
+						}
+						if vma.private {
+							newpma.effectivePerms.Write = false
+							newpma.maxPerms.Write = false
+							newpma.needCOW = true
+						}
+						mm.addRSSLocked(newpmaAR)
+						t.File.IncRef(t.FileRange())
+						// This is valid because memmap.Mappable.Translate is
+						// required to return Translations in increasing
+						// Translation.Source order.
+						pseg = mm.pmas.Insert(pgap, newpmaAR, newpma)
+						pgap = pseg.NextGap()
+					}
+					// The error returned by Translate is only significant if
+					// it occurred before ar.End.
+					if err != nil && vseg.addrRangeOf(ts[len(ts)-1].Source).End < ar.End {
+						return pstart, pgap, err
+					}
+					// Rewind pseg to the first pma inserted and continue the
+					// loop to check if we need to break copy-on-write.
+					pseg, pgap = mm.findOrSeekPrevUpperBoundPMA(vseg.addrRangeOf(ts[0].Source).Start, pgap), pmaGapIterator{}
+					continue
+				}
+
+			case pseg.Ok() && pseg.Start() < vsegAR.End:
+				oldpma := pseg.ValuePtr()
+				if at.Write && mm.isPMACopyOnWriteLocked(vseg, pseg) {
+					// Break copy-on-write by copying.
+					if checkInvariants {
+						if !oldpma.maxPerms.Read {
+							panic(fmt.Sprintf("pma %v needs to be copied for writing, but is not readable: %v", pseg.Range(), oldpma))
+						}
+					}
+					// The majority of copy-on-write breaks on executable pages
+					// come from:
+					//
+					// - The ELF loader, which must zero out bytes on the last
+					// page of each segment after the end of the segment.
+					//
+					// - gdb's use of ptrace to insert breakpoints.
+					//
+					// Neither of these cases has enough spatial locality to
+					// benefit from copying nearby pages, so if the vma is
+					// executable, only copy the pages required.
+					var copyAR usermem.AddrRange
+					if vseg.ValuePtr().effectivePerms.Execute {
+						copyAR = pseg.Range().Intersect(ar)
+					} else {
+						copyAR = pseg.Range().Intersect(maskAR)
+					}
+					// Get internal mappings from the pma to copy from.
+					if err := pseg.getInternalMappingsLocked(); err != nil {
+						return pstart, pseg.PrevGap(), err
+					}
+					// Copy contents.
+					fr, err := mf.AllocateAndFill(uint64(copyAR.Length()), usage.Anonymous, &safemem.BlockSeqReader{mm.internalMappingsLocked(pseg, copyAR)})
+					if _, ok := err.(safecopy.BusError); ok {
+						// If we got SIGBUS during the copy, deliver SIGBUS to
+						// userspace (instead of SIGSEGV) if we're breaking
+						// copy-on-write due to application page fault.
+						err = &memmap.BusError{err}
+					}
+					if fr.Length() == 0 {
+						return pstart, pseg.PrevGap(), err
+					}
+					// Unmap all of maskAR, not just copyAR, to minimize host
+					// syscalls. AddressSpace mappings must be removed before
+					// mm.decPrivateRef().
+					if !didUnmapAS {
+						mm.unmapASLocked(maskAR)
+						didUnmapAS = true
+					}
+					// Replace the pma with a copy in the part of the address
+					// range where copying was successful. This doesn't change
+					// RSS.
+					copyAR.End = copyAR.Start + usermem.Addr(fr.Length())
+					if copyAR != pseg.Range() {
+						pseg = mm.pmas.Isolate(pseg, copyAR)
+						pstart = pmaIterator{} // iterators invalidated
+					}
+					oldpma = pseg.ValuePtr()
+					if oldpma.private {
+						mm.decPrivateRef(pseg.fileRange())
+					}
+					oldpma.file.DecRef(pseg.fileRange())
+					mm.incPrivateRef(fr)
+					mf.IncRef(fr)
+					oldpma.file = mf
+					oldpma.off = fr.Start
+					oldpma.translatePerms = usermem.AnyAccess
+					oldpma.effectivePerms = vma.effectivePerms
+					oldpma.maxPerms = vma.maxPerms
+					oldpma.needCOW = false
+					oldpma.private = true
+					oldpma.internalMappings = safemem.BlockSeq{}
+					// Try to merge the pma with its neighbors.
+					if prev := pseg.PrevSegment(); prev.Ok() {
+						if merged := mm.pmas.Merge(prev, pseg); merged.Ok() {
+							pseg = merged
+							pstart = pmaIterator{} // iterators invalidated
+						}
+					}
+					if next := pseg.NextSegment(); next.Ok() {
+						if merged := mm.pmas.Merge(pseg, next); merged.Ok() {
+							pseg = merged
+							pstart = pmaIterator{} // iterators invalidated
+						}
+					}
+					// The error returned by AllocateAndFill is only
+					// significant if it occurred before ar.End.
+					if err != nil && pseg.End() < ar.End {
+						return pstart, pseg.NextGap(), err
+					}
+					// Ensure pseg and pgap are correct for the next iteration
+					// of the loop.
+					pseg, pgap = pseg.NextNonEmpty()
+				} else if !oldpma.translatePerms.SupersetOf(at) {
+					// Get new pmas (with sufficient permissions) by calling
+					// memmap.Mappable.Translate again.
+					if checkInvariants {
+						if oldpma.private {
+							panic(fmt.Sprintf("private pma %v has non-maximal pma.translatePerms: %v", pseg.Range(), oldpma))
+						}
+					}
+					// Allow the entire pma to be replaced.
+					optAR := pseg.Range()
+					optMR := vseg.mappableRangeOf(optAR)
+					reqAR := optAR.Intersect(ar)
+					reqMR := vseg.mappableRangeOf(reqAR)
+					perms := oldpma.translatePerms.Union(at)
+					ts, err := vma.mappable.Translate(ctx, reqMR, optMR, perms)
+					if checkInvariants {
+						if err := memmap.CheckTranslateResult(reqMR, optMR, perms, ts, err); err != nil {
+							panic(fmt.Sprintf("Mappable(%T).Translate(%v, %v, %v): %v", vma.mappable, reqMR, optMR, perms, err))
+						}
+					}
+					// Remove the part of the existing pma covered by new
+					// Translations, then insert new pmas. This doesn't change
+					// RSS. Note that we don't need to call unmapASLocked: any
+					// existing AddressSpace mappings are still valid (though
+					// less permissive than the new pmas indicate) until
+					// Invalidate is called, and will be replaced by future
+					// calls to mapASLocked.
+					if len(ts) == 0 {
+						return pstart, pseg.PrevGap(), err
+					}
+					transMR := memmap.MappableRange{ts[0].Source.Start, ts[len(ts)-1].Source.End}
+					transAR := vseg.addrRangeOf(transMR)
+					pseg = mm.pmas.Isolate(pseg, transAR)
+					pseg.ValuePtr().file.DecRef(pseg.fileRange())
+					pgap = mm.pmas.Remove(pseg)
+					pstart = pmaIterator{} // iterators invalidated
+					for _, t := range ts {
+						newpmaAR := vseg.addrRangeOf(t.Source)
+						newpma := pma{
+							file:           t.File,
+							off:            t.Offset,
+							translatePerms: t.Perms,
+							effectivePerms: vma.effectivePerms.Intersect(t.Perms),
+							maxPerms:       vma.maxPerms.Intersect(t.Perms),
+						}
+						if vma.private {
+							newpma.effectivePerms.Write = false
+							newpma.maxPerms.Write = false
+							newpma.needCOW = true
+						}
+						t.File.IncRef(t.FileRange())
+						pseg = mm.pmas.Insert(pgap, newpmaAR, newpma)
+						pgap = pseg.NextGap()
+					}
+					// The error returned by Translate is only significant if
+					// it occurred before ar.End.
+					if err != nil && pseg.End() < ar.End {
+						return pstart, pgap, err
+					}
+					// Ensure pseg and pgap are correct for the next iteration
+					// of the loop.
+					if pgap.Range().Length() == 0 {
+						pseg, pgap = pgap.NextSegment(), pmaGapIterator{}
+					} else {
+						pseg = pmaIterator{}
+					}
+				} else {
+					// We have a usable pma; continue.
+					pseg, pgap = pseg.NextNonEmpty()
+				}
+
+			default:
+				break pmaLoop
+			}
+		}
+		// Go to the next vma.
+		if ar.End <= vseg.End() {
+			if pgap.Ok() {
+				return pstart, pgap, nil
+			}
+			return pstart, pseg.PrevGap(), nil
+		}
+		vseg = vseg.NextSegment()
+	}
+}
+
+const (
+	// When memory is allocated for a private pma, align the allocated address
+	// range to a privateAllocUnit boundary when possible. Larger values of
+	// privateAllocUnit may reduce page faults by allowing fewer, larger pmas
+	// to be mapped, but may result in larger amounts of wasted memory in the
+	// presence of fragmentation. privateAllocUnit must be a power-of-2
+	// multiple of usermem.PageSize.
+	privateAllocUnit = usermem.HugePageSize
+
+	privateAllocMask = privateAllocUnit - 1
+)
+
+func privateAligned(ar usermem.AddrRange) usermem.AddrRange {
+	aligned := usermem.AddrRange{ar.Start &^ privateAllocMask, ar.End}
+	if end := (ar.End + privateAllocMask) &^ privateAllocMask; end >= ar.End {
+		aligned.End = end
+	}
+	if checkInvariants {
+		if !aligned.IsSupersetOf(ar) {
+			panic(fmt.Sprintf("aligned AddrRange %#v is not a superset of ar %#v", aligned, ar))
+		}
+	}
+	return aligned
+}
+
+// isPMACopyOnWriteLocked returns true if the contents of the pma represented
+// by pseg must be copied to a new private pma to be written to.
+//
+// If the pma is a copy-on-write private pma, and holds the only reference on
+// the memory it maps, isPMACopyOnWriteLocked will take ownership of the memory
+// and update the pma to indicate that it does not require copy-on-write.
+//
+// Preconditions: vseg.Range().IsSupersetOf(pseg.Range()). mm.mappingMu must be
+// locked. mm.activeMu must be locked for writing.
+func (mm *MemoryManager) isPMACopyOnWriteLocked(vseg vmaIterator, pseg pmaIterator) bool {
+	pma := pseg.ValuePtr()
+	if !pma.needCOW {
+		return false
+	}
+	if !pma.private {
+		return true
+	}
+	// If we have the only reference on private memory to be copied, just take
+	// ownership of it instead of copying. If we do hold the only reference,
+	// additional references can only be taken by mm.Fork(), which is excluded
+	// by mm.activeMu, so this isn't racy.
+	mm.privateRefs.mu.Lock()
+	defer mm.privateRefs.mu.Unlock()
+	fr := pseg.fileRange()
+	// This check relies on mm.privateRefs.refs being kept fully merged.
+	rseg := mm.privateRefs.refs.FindSegment(fr.Start)
+	if rseg.Ok() && rseg.Value() == 1 && fr.End <= rseg.End() {
+		pma.needCOW = false
+		// pma.private => pma.translatePerms == usermem.AnyAccess
+		vma := vseg.ValuePtr()
+		pma.effectivePerms = vma.effectivePerms
+		pma.maxPerms = vma.maxPerms
+		return false
+	}
+	return true
+}
+
+// Invalidate implements memmap.MappingSpace.Invalidate.
+func (mm *MemoryManager) Invalidate(ar usermem.AddrRange, opts memmap.InvalidateOpts) {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+	}
+
+	mm.activeMu.Lock()
+	defer mm.activeMu.Unlock()
+	if mm.captureInvalidations {
+		mm.capturedInvalidations = append(mm.capturedInvalidations, invalidateArgs{ar, opts})
+		return
+	}
+	mm.invalidateLocked(ar, opts.InvalidatePrivate, true)
+}
+
+// invalidateLocked removes pmas and AddressSpace mappings of those pmas for
+// addresses in ar.
+//
+// Preconditions: mm.activeMu must be locked for writing. ar.Length() != 0. ar
+// must be page-aligned.
+func (mm *MemoryManager) invalidateLocked(ar usermem.AddrRange, invalidatePrivate, invalidateShared bool) {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+	}
+
+	var didUnmapAS bool
+	pseg := mm.pmas.LowerBoundSegment(ar.Start)
+	for pseg.Ok() && pseg.Start() < ar.End {
+		pma := pseg.ValuePtr()
+		if (invalidatePrivate && pma.private) || (invalidateShared && !pma.private) {
+			pseg = mm.pmas.Isolate(pseg, ar)
+			pma = pseg.ValuePtr()
+			if !didUnmapAS {
+				// Unmap all of ar, not just pseg.Range(), to minimize host
+				// syscalls. AddressSpace mappings must be removed before
+				// mm.decPrivateRef().
+				mm.unmapASLocked(ar)
+				didUnmapAS = true
+			}
+			if pma.private {
+				mm.decPrivateRef(pseg.fileRange())
+			}
+			mm.removeRSSLocked(pseg.Range())
+			pma.file.DecRef(pseg.fileRange())
+			pseg = mm.pmas.Remove(pseg).NextSegment()
+		} else {
+			pseg = pseg.NextSegment()
+		}
+	}
+}
+
+// Pin returns the platform.File ranges currently mapped by addresses in ar in
+// mm, acquiring a reference on the returned ranges which the caller must
+// release by calling Unpin. If not all addresses are mapped, Pin returns a
+// non-nil error. Note that Pin may return both a non-empty slice of
+// PinnedRanges and a non-nil error.
+//
+// Pin does not prevent mapped ranges from changing, making it unsuitable for
+// most I/O. It should only be used in contexts that would use get_user_pages()
+// in the Linux kernel.
+//
+// Preconditions: ar.Length() != 0. ar must be page-aligned.
+func (mm *MemoryManager) Pin(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool) ([]PinnedRange, error) {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+	}
+
+	// Ensure that we have usable vmas.
+	mm.mappingMu.RLock()
+	vseg, vend, verr := mm.getVMAsLocked(ctx, ar, at, ignorePermissions)
+	if vendaddr := vend.Start(); vendaddr < ar.End {
+		if vendaddr <= ar.Start {
+			mm.mappingMu.RUnlock()
+			return nil, verr
+		}
+		ar.End = vendaddr
+	}
+
+	// Ensure that we have usable pmas.
+	mm.activeMu.Lock()
+	pseg, pend, perr := mm.getPMAsLocked(ctx, vseg, ar, at)
+	mm.mappingMu.RUnlock()
+	if pendaddr := pend.Start(); pendaddr < ar.End {
+		if pendaddr <= ar.Start {
+			mm.activeMu.Unlock()
+			return nil, perr
+		}
+		ar.End = pendaddr
+	}
+
+	// Gather pmas.
+	var prs []PinnedRange
+	for pseg.Ok() && pseg.Start() < ar.End {
+		psar := pseg.Range().Intersect(ar)
+		f := pseg.ValuePtr().file
+		fr := pseg.fileRangeOf(psar)
+		f.IncRef(fr)
+		prs = append(prs, PinnedRange{
+			Source: psar,
+			File:   f,
+			Offset: fr.Start,
+		})
+		pseg = pseg.NextSegment()
+	}
+	mm.activeMu.Unlock()
+
+	// Return the first error in order of progress through ar.
+	if perr != nil {
+		return prs, perr
+	}
+	return prs, verr
+}
+
+// PinnedRanges are returned by MemoryManager.Pin.
+type PinnedRange struct {
+	// Source is the corresponding range of addresses.
+	Source usermem.AddrRange
+
+	// File is the mapped file.
+	File platform.File
+
+	// Offset is the offset into File at which this PinnedRange begins.
+	Offset uint64
+}
+
+// FileRange returns the platform.File offsets mapped by pr.
+func (pr PinnedRange) FileRange() platform.FileRange {
+	return platform.FileRange{pr.Offset, pr.Offset + uint64(pr.Source.Length())}
+}
+
+// Unpin releases the reference held by prs.
+func Unpin(prs []PinnedRange) {
+	for i := range prs {
+		prs[i].File.DecRef(prs[i].FileRange())
+	}
+}
+
+// movePMAsLocked moves all pmas in oldAR to newAR.
+//
+// Preconditions: mm.activeMu must be locked for writing. oldAR.Length() != 0.
+// oldAR.Length() <= newAR.Length(). !oldAR.Overlaps(newAR).
+// mm.pmas.IsEmptyRange(newAR). oldAR and newAR must be page-aligned.
+func (mm *MemoryManager) movePMAsLocked(oldAR, newAR usermem.AddrRange) {
+	if checkInvariants {
+		if !oldAR.WellFormed() || oldAR.Length() <= 0 || !oldAR.IsPageAligned() {
+			panic(fmt.Sprintf("invalid oldAR: %v", oldAR))
+		}
+		if !newAR.WellFormed() || newAR.Length() <= 0 || !newAR.IsPageAligned() {
+			panic(fmt.Sprintf("invalid newAR: %v", newAR))
+		}
+		if oldAR.Length() > newAR.Length() {
+			panic(fmt.Sprintf("old address range %v may contain pmas that will not fit in new address range %v", oldAR, newAR))
+		}
+		if oldAR.Overlaps(newAR) {
+			panic(fmt.Sprintf("old and new address ranges overlap: %v, %v", oldAR, newAR))
+		}
+		// mm.pmas.IsEmptyRange is checked by mm.pmas.Insert.
+	}
+
+	type movedPMA struct {
+		oldAR usermem.AddrRange
+		pma   pma
+	}
+	var movedPMAs []movedPMA
+	pseg := mm.pmas.LowerBoundSegment(oldAR.Start)
+	for pseg.Ok() && pseg.Start() < oldAR.End {
+		pseg = mm.pmas.Isolate(pseg, oldAR)
+		movedPMAs = append(movedPMAs, movedPMA{
+			oldAR: pseg.Range(),
+			pma:   pseg.Value(),
+		})
+		pseg = mm.pmas.Remove(pseg).NextSegment()
+		// No RSS change is needed since we're re-inserting the same pmas
+		// below.
+	}
+
+	off := newAR.Start - oldAR.Start
+	pgap := mm.pmas.FindGap(newAR.Start)
+	for i := range movedPMAs {
+		mpma := &movedPMAs[i]
+		pmaNewAR := usermem.AddrRange{mpma.oldAR.Start + off, mpma.oldAR.End + off}
+		pgap = mm.pmas.Insert(pgap, pmaNewAR, mpma.pma).NextGap()
+	}
+
+	mm.unmapASLocked(oldAR)
+}
+
+// getPMAInternalMappingsLocked ensures that pmas for all addresses in ar have
+// cached internal mappings. It returns:
+//
+// - An iterator to the gap after the last pma with internal mappings
+// containing an address in ar. If internal mappings exist for no addresses in
+// ar, the iterator is to a gap that begins before ar.Start.
+//
+// - An error that is non-nil if internal mappings exist for only a subset of
+// ar.
+//
+// Preconditions: mm.activeMu must be locked for writing.
+// pseg.Range().Contains(ar.Start). pmas must exist for all addresses in ar.
+// ar.Length() != 0.
+//
+// Postconditions: getPMAInternalMappingsLocked does not invalidate iterators
+// into mm.pmas.
+func (mm *MemoryManager) getPMAInternalMappingsLocked(pseg pmaIterator, ar usermem.AddrRange) (pmaGapIterator, error) {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+		if !pseg.Range().Contains(ar.Start) {
+			panic(fmt.Sprintf("initial pma %v does not cover start of ar %v", pseg.Range(), ar))
+		}
+	}
+
+	for {
+		if err := pseg.getInternalMappingsLocked(); err != nil {
+			return pseg.PrevGap(), err
+		}
+		if ar.End <= pseg.End() {
+			return pseg.NextGap(), nil
+		}
+		pseg, _ = pseg.NextNonEmpty()
+	}
+}
+
+// getVecPMAInternalMappingsLocked ensures that pmas for all addresses in ars
+// have cached internal mappings. It returns the subset of ars for which
+// internal mappings exist. If this is not equal to ars, it returns a non-nil
+// error explaining why.
+//
+// Preconditions: mm.activeMu must be locked for writing. pmas must exist for
+// all addresses in ar.
+//
+// Postconditions: getVecPMAInternalMappingsLocked does not invalidate iterators
+// into mm.pmas.
+func (mm *MemoryManager) getVecPMAInternalMappingsLocked(ars usermem.AddrRangeSeq) (usermem.AddrRangeSeq, error) {
+	for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() {
+		ar := arsit.Head()
+		if ar.Length() == 0 {
+			continue
+		}
+		if pend, err := mm.getPMAInternalMappingsLocked(mm.pmas.FindSegment(ar.Start), ar); err != nil {
+			return truncatedAddrRangeSeq(ars, arsit, pend.Start()), err
+		}
+	}
+	return ars, nil
+}
+
+// internalMappingsLocked returns internal mappings for addresses in ar.
+//
+// Preconditions: mm.activeMu must be locked. Internal mappings must have been
+// previously established for all addresses in ar. ar.Length() != 0.
+// pseg.Range().Contains(ar.Start).
+func (mm *MemoryManager) internalMappingsLocked(pseg pmaIterator, ar usermem.AddrRange) safemem.BlockSeq {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+		if !pseg.Range().Contains(ar.Start) {
+			panic(fmt.Sprintf("initial pma %v does not cover start of ar %v", pseg.Range(), ar))
+		}
+	}
+
+	if ar.End <= pseg.End() {
+		// Since only one pma is involved, we can use pma.internalMappings
+		// directly, avoiding a slice allocation.
+		offset := uint64(ar.Start - pseg.Start())
+		return pseg.ValuePtr().internalMappings.DropFirst64(offset).TakeFirst64(uint64(ar.Length()))
+	}
+
+	var ims []safemem.Block
+	for {
+		pr := pseg.Range().Intersect(ar)
+		for pims := pseg.ValuePtr().internalMappings.DropFirst64(uint64(pr.Start - pseg.Start())).TakeFirst64(uint64(pr.Length())); !pims.IsEmpty(); pims = pims.Tail() {
+			ims = append(ims, pims.Head())
+		}
+		if ar.End <= pseg.End() {
+			break
+		}
+		pseg = pseg.NextSegment()
+	}
+	return safemem.BlockSeqFromSlice(ims)
+}
+
+// vecInternalMappingsLocked returns internal mappings for addresses in ars.
+//
+// Preconditions: mm.activeMu must be locked. Internal mappings must have been
+// previously established for all addresses in ars.
+func (mm *MemoryManager) vecInternalMappingsLocked(ars usermem.AddrRangeSeq) safemem.BlockSeq {
+	var ims []safemem.Block
+	for ; !ars.IsEmpty(); ars = ars.Tail() {
+		ar := ars.Head()
+		if ar.Length() == 0 {
+			continue
+		}
+		for pims := mm.internalMappingsLocked(mm.pmas.FindSegment(ar.Start), ar); !pims.IsEmpty(); pims = pims.Tail() {
+			ims = append(ims, pims.Head())
+		}
+	}
+	return safemem.BlockSeqFromSlice(ims)
+}
+
+// incPrivateRef acquires a reference on private pages in fr.
+func (mm *MemoryManager) incPrivateRef(fr platform.FileRange) {
+	mm.privateRefs.mu.Lock()
+	defer mm.privateRefs.mu.Unlock()
+	refSet := &mm.privateRefs.refs
+	seg, gap := refSet.Find(fr.Start)
+	for {
+		switch {
+		case seg.Ok() && seg.Start() < fr.End:
+			seg = refSet.Isolate(seg, fr)
+			seg.SetValue(seg.Value() + 1)
+			seg, gap = seg.NextNonEmpty()
+		case gap.Ok() && gap.Start() < fr.End:
+			seg, gap = refSet.InsertWithoutMerging(gap, gap.Range().Intersect(fr), 1).NextNonEmpty()
+		default:
+			refSet.MergeAdjacent(fr)
+			return
+		}
+	}
+}
+
+// decPrivateRef releases a reference on private pages in fr.
+func (mm *MemoryManager) decPrivateRef(fr platform.FileRange) {
+	var freed []platform.FileRange
+
+	mm.privateRefs.mu.Lock()
+	refSet := &mm.privateRefs.refs
+	seg := refSet.LowerBoundSegment(fr.Start)
+	for seg.Ok() && seg.Start() < fr.End {
+		seg = refSet.Isolate(seg, fr)
+		if old := seg.Value(); old == 1 {
+			freed = append(freed, seg.Range())
+			seg = refSet.Remove(seg).NextSegment()
+		} else {
+			seg.SetValue(old - 1)
+			seg = seg.NextSegment()
+		}
+	}
+	refSet.MergeAdjacent(fr)
+	mm.privateRefs.mu.Unlock()
+
+	mf := mm.mfp.MemoryFile()
+	for _, fr := range freed {
+		mf.DecRef(fr)
+	}
+}
+
+// addRSSLocked updates the current and maximum resident set size of a
+// MemoryManager to reflect the insertion of a pma at ar.
+//
+// Preconditions: mm.activeMu must be locked for writing.
+func (mm *MemoryManager) addRSSLocked(ar usermem.AddrRange) {
+	mm.curRSS += uint64(ar.Length())
+	if mm.curRSS > mm.maxRSS {
+		mm.maxRSS = mm.curRSS
+	}
+}
+
+// removeRSSLocked updates the current resident set size of a MemoryManager to
+// reflect the removal of a pma at ar.
+//
+// Preconditions: mm.activeMu must be locked for writing.
+func (mm *MemoryManager) removeRSSLocked(ar usermem.AddrRange) {
+	mm.curRSS -= uint64(ar.Length())
+}
+
+// pmaSetFunctions implements segment.Functions for pmaSet.
+type pmaSetFunctions struct{}
+
+func (pmaSetFunctions) MinKey() usermem.Addr {
+	return 0
+}
+
+func (pmaSetFunctions) MaxKey() usermem.Addr {
+	return ^usermem.Addr(0)
+}
+
+func (pmaSetFunctions) ClearValue(pma *pma) {
+	pma.file = nil
+	pma.internalMappings = safemem.BlockSeq{}
+}
+
+func (pmaSetFunctions) Merge(ar1 usermem.AddrRange, pma1 pma, ar2 usermem.AddrRange, pma2 pma) (pma, bool) {
+	if pma1.file != pma2.file ||
+		pma1.off+uint64(ar1.Length()) != pma2.off ||
+		pma1.translatePerms != pma2.translatePerms ||
+		pma1.effectivePerms != pma2.effectivePerms ||
+		pma1.maxPerms != pma2.maxPerms ||
+		pma1.needCOW != pma2.needCOW ||
+		pma1.private != pma2.private {
+		return pma{}, false
+	}
+
+	// Discard internal mappings instead of trying to merge them, since merging
+	// them requires an allocation and getting them again from the
+	// platform.File might not.
+	pma1.internalMappings = safemem.BlockSeq{}
+	return pma1, true
+}
+
+func (pmaSetFunctions) Split(ar usermem.AddrRange, p pma, split usermem.Addr) (pma, pma) {
+	newlen1 := uint64(split - ar.Start)
+	p2 := p
+	p2.off += newlen1
+	if !p.internalMappings.IsEmpty() {
+		p.internalMappings = p.internalMappings.TakeFirst64(newlen1)
+		p2.internalMappings = p2.internalMappings.DropFirst64(newlen1)
+	}
+	return p, p2
+}
+
+// findOrSeekPrevUpperBoundPMA returns mm.pmas.UpperBoundSegment(addr), but may do
+// so by scanning linearly backward from pgap.
+//
+// Preconditions: mm.activeMu must be locked. addr <= pgap.Start().
+func (mm *MemoryManager) findOrSeekPrevUpperBoundPMA(addr usermem.Addr, pgap pmaGapIterator) pmaIterator {
+	if checkInvariants {
+		if !pgap.Ok() {
+			panic("terminal pma iterator")
+		}
+		if addr > pgap.Start() {
+			panic(fmt.Sprintf("can't seek backward to %#x from %#x", addr, pgap.Start()))
+		}
+	}
+	// Optimistically check if pgap.PrevSegment() is the PMA we're looking for,
+	// which is the case if findOrSeekPrevUpperBoundPMA is called to find the
+	// start of a range containing only a single PMA.
+	if pseg := pgap.PrevSegment(); pseg.Start() <= addr {
+		return pseg
+	}
+	return mm.pmas.UpperBoundSegment(addr)
+}
+
+// getInternalMappingsLocked ensures that pseg.ValuePtr().internalMappings is
+// non-empty.
+//
+// Preconditions: mm.activeMu must be locked for writing.
+func (pseg pmaIterator) getInternalMappingsLocked() error {
+	pma := pseg.ValuePtr()
+	if pma.internalMappings.IsEmpty() {
+		// This must use maxPerms (instead of perms) because some permission
+		// constraints are only visible to vmas; for example, mappings of
+		// read-only files have vma.maxPerms.Write unset, but this may not be
+		// visible to the memmap.Mappable.
+		perms := pma.maxPerms
+		// We will never execute application code through an internal mapping.
+		perms.Execute = false
+		ims, err := pma.file.MapInternal(pseg.fileRange(), perms)
+		if err != nil {
+			return err
+		}
+		pma.internalMappings = ims
+	}
+	return nil
+}
+
+func (pseg pmaIterator) fileRange() platform.FileRange {
+	return pseg.fileRangeOf(pseg.Range())
+}
+
+// Preconditions: pseg.Range().IsSupersetOf(ar). ar.Length != 0.
+func (pseg pmaIterator) fileRangeOf(ar usermem.AddrRange) platform.FileRange {
+	if checkInvariants {
+		if !pseg.Ok() {
+			panic("terminal pma iterator")
+		}
+		if !ar.WellFormed() || ar.Length() <= 0 {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+		if !pseg.Range().IsSupersetOf(ar) {
+			panic(fmt.Sprintf("ar %v out of bounds %v", ar, pseg.Range()))
+		}
+	}
+
+	pma := pseg.ValuePtr()
+	pstart := pseg.Start()
+	return platform.FileRange{pma.off + uint64(ar.Start-pstart), pma.off + uint64(ar.End-pstart)}
+}
diff --git a/pkg/sentry/mm/pma_set.go b/pkg/sentry/mm/pma_set.go
new file mode 100755
index 000000000..6380d8619
--- /dev/null
+++ b/pkg/sentry/mm/pma_set.go
@@ -0,0 +1,1274 @@
+package mm
+
+import (
+	__generics_imported0 "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+import (
+	"bytes"
+	"fmt"
+)
+
+const (
+	// minDegree is the minimum degree of an internal node in a Set B-tree.
+	//
+	// - Any non-root node has at least minDegree-1 segments.
+	//
+	// - Any non-root internal (non-leaf) node has at least minDegree children.
+	//
+	// - The root node may have fewer than minDegree-1 segments, but it may
+	// only have 0 segments if the tree is empty.
+	//
+	// Our implementation requires minDegree >= 3. Higher values of minDegree
+	// usually improve performance, but increase memory usage for small sets.
+	pmaminDegree = 8
+
+	pmamaxDegree = 2 * pmaminDegree
+)
+
+// A Set is a mapping of segments with non-overlapping Range keys. The zero
+// value for a Set is an empty set. Set values are not safely movable nor
+// copyable. Set is thread-compatible.
+//
+// +stateify savable
+type pmaSet struct {
+	root pmanode `state:".(*pmaSegmentDataSlices)"`
+}
+
+// IsEmpty returns true if the set contains no segments.
+func (s *pmaSet) IsEmpty() bool {
+	return s.root.nrSegments == 0
+}
+
+// IsEmptyRange returns true iff no segments in the set overlap the given
+// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be
+// more efficient.
+func (s *pmaSet) IsEmptyRange(r __generics_imported0.AddrRange) bool {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return true
+	}
+	_, gap := s.Find(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	return r.End <= gap.End()
+}
+
+// Span returns the total size of all segments in the set.
+func (s *pmaSet) Span() __generics_imported0.Addr {
+	var sz __generics_imported0.Addr
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sz += seg.Range().Length()
+	}
+	return sz
+}
+
+// SpanRange returns the total size of the intersection of segments in the set
+// with the given range.
+func (s *pmaSet) SpanRange(r __generics_imported0.AddrRange) __generics_imported0.Addr {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return 0
+	}
+	var sz __generics_imported0.Addr
+	for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() {
+		sz += seg.Range().Intersect(r).Length()
+	}
+	return sz
+}
+
+// FirstSegment returns the first segment in the set. If the set is empty,
+// FirstSegment returns a terminal iterator.
+func (s *pmaSet) FirstSegment() pmaIterator {
+	if s.root.nrSegments == 0 {
+		return pmaIterator{}
+	}
+	return s.root.firstSegment()
+}
+
+// LastSegment returns the last segment in the set. If the set is empty,
+// LastSegment returns a terminal iterator.
+func (s *pmaSet) LastSegment() pmaIterator {
+	if s.root.nrSegments == 0 {
+		return pmaIterator{}
+	}
+	return s.root.lastSegment()
+}
+
+// FirstGap returns the first gap in the set.
+func (s *pmaSet) FirstGap() pmaGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return pmaGapIterator{n, 0}
+}
+
+// LastGap returns the last gap in the set.
+func (s *pmaSet) LastGap() pmaGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return pmaGapIterator{n, n.nrSegments}
+}
+
+// Find returns the segment or gap whose range contains the given key. If a
+// segment is found, the returned Iterator is non-terminal and the
+// returned GapIterator is terminal. Otherwise, the returned Iterator is
+// terminal and the returned GapIterator is non-terminal.
+func (s *pmaSet) Find(key __generics_imported0.Addr) (pmaIterator, pmaGapIterator) {
+	n := &s.root
+	for {
+
+		lower := 0
+		upper := n.nrSegments
+		for lower < upper {
+			i := lower + (upper-lower)/2
+			if r := n.keys[i]; key < r.End {
+				if key >= r.Start {
+					return pmaIterator{n, i}, pmaGapIterator{}
+				}
+				upper = i
+			} else {
+				lower = i + 1
+			}
+		}
+		i := lower
+		if !n.hasChildren {
+			return pmaIterator{}, pmaGapIterator{n, i}
+		}
+		n = n.children[i]
+	}
+}
+
+// FindSegment returns the segment whose range contains the given key. If no
+// such segment exists, FindSegment returns a terminal iterator.
+func (s *pmaSet) FindSegment(key __generics_imported0.Addr) pmaIterator {
+	seg, _ := s.Find(key)
+	return seg
+}
+
+// LowerBoundSegment returns the segment with the lowest range that contains a
+// key greater than or equal to min. If no such segment exists,
+// LowerBoundSegment returns a terminal iterator.
+func (s *pmaSet) LowerBoundSegment(min __generics_imported0.Addr) pmaIterator {
+	seg, gap := s.Find(min)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.NextSegment()
+}
+
+// UpperBoundSegment returns the segment with the highest range that contains a
+// key less than or equal to max. If no such segment exists, UpperBoundSegment
+// returns a terminal iterator.
+func (s *pmaSet) UpperBoundSegment(max __generics_imported0.Addr) pmaIterator {
+	seg, gap := s.Find(max)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.PrevSegment()
+}
+
+// FindGap returns the gap containing the given key. If no such gap exists
+// (i.e. the set contains a segment containing that key), FindGap returns a
+// terminal iterator.
+func (s *pmaSet) FindGap(key __generics_imported0.Addr) pmaGapIterator {
+	_, gap := s.Find(key)
+	return gap
+}
+
+// LowerBoundGap returns the gap with the lowest range that is greater than or
+// equal to min.
+func (s *pmaSet) LowerBoundGap(min __generics_imported0.Addr) pmaGapIterator {
+	seg, gap := s.Find(min)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.NextGap()
+}
+
+// UpperBoundGap returns the gap with the highest range that is less than or
+// equal to max.
+func (s *pmaSet) UpperBoundGap(max __generics_imported0.Addr) pmaGapIterator {
+	seg, gap := s.Find(max)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.PrevGap()
+}
+
+// Add inserts the given segment into the set and returns true. If the new
+// segment can be merged with adjacent segments, Add will do so. If the new
+// segment would overlap an existing segment, Add returns false. If Add
+// succeeds, all existing iterators are invalidated.
+func (s *pmaSet) Add(r __generics_imported0.AddrRange, val pma) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.Insert(gap, r, val)
+	return true
+}
+
+// AddWithoutMerging inserts the given segment into the set and returns true.
+// If it would overlap an existing segment, AddWithoutMerging does nothing and
+// returns false. If AddWithoutMerging succeeds, all existing iterators are
+// invalidated.
+func (s *pmaSet) AddWithoutMerging(r __generics_imported0.AddrRange, val pma) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.InsertWithoutMergingUnchecked(gap, r, val)
+	return true
+}
+
+// Insert inserts the given segment into the given gap. If the new segment can
+// be merged with adjacent segments, Insert will do so. Insert returns an
+// iterator to the segment containing the inserted value (which may have been
+// merged with other values). All existing iterators (including gap, but not
+// including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid, Insert panics.
+//
+// Insert is semantically equivalent to a InsertWithoutMerging followed by a
+// Merge, but may be more efficient. Note that there is no unchecked variant of
+// Insert since Insert must retrieve and inspect gap's predecessor and
+// successor segments regardless.
+func (s *pmaSet) Insert(gap pmaGapIterator, r __generics_imported0.AddrRange, val pma) pmaIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	prev, next := gap.PrevSegment(), gap.NextSegment()
+	if prev.Ok() && prev.End() > r.Start {
+		panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range()))
+	}
+	if next.Ok() && next.Start() < r.End {
+		panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range()))
+	}
+	if prev.Ok() && prev.End() == r.Start {
+		if mval, ok := (pmaSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok {
+			prev.SetEndUnchecked(r.End)
+			prev.SetValue(mval)
+			if next.Ok() && next.Start() == r.End {
+				val = mval
+				if mval, ok := (pmaSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok {
+					prev.SetEndUnchecked(next.End())
+					prev.SetValue(mval)
+					return s.Remove(next).PrevSegment()
+				}
+			}
+			return prev
+		}
+	}
+	if next.Ok() && next.Start() == r.End {
+		if mval, ok := (pmaSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok {
+			next.SetStartUnchecked(r.Start)
+			next.SetValue(mval)
+			return next
+		}
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMerging inserts the given segment into the given gap and
+// returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid,
+// InsertWithoutMerging panics.
+func (s *pmaSet) InsertWithoutMerging(gap pmaGapIterator, r __generics_imported0.AddrRange, val pma) pmaIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if gr := gap.Range(); !gr.IsSupersetOf(r) {
+		panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr))
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMergingUnchecked inserts the given segment into the given gap
+// and returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+func (s *pmaSet) InsertWithoutMergingUnchecked(gap pmaGapIterator, r __generics_imported0.AddrRange, val pma) pmaIterator {
+	gap = gap.node.rebalanceBeforeInsert(gap)
+	copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments])
+	copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments])
+	gap.node.keys[gap.index] = r
+	gap.node.values[gap.index] = val
+	gap.node.nrSegments++
+	return pmaIterator{gap.node, gap.index}
+}
+
+// Remove removes the given segment and returns an iterator to the vacated gap.
+// All existing iterators (including seg, but not including the returned
+// iterator) are invalidated.
+func (s *pmaSet) Remove(seg pmaIterator) pmaGapIterator {
+
+	if seg.node.hasChildren {
+
+		victim := seg.PrevSegment()
+
+		seg.SetRangeUnchecked(victim.Range())
+		seg.SetValue(victim.Value())
+		return s.Remove(victim).NextGap()
+	}
+	copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments])
+	copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments])
+	pmaSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1])
+	seg.node.nrSegments--
+	return seg.node.rebalanceAfterRemove(pmaGapIterator{seg.node, seg.index})
+}
+
+// RemoveAll removes all segments from the set. All existing iterators are
+// invalidated.
+func (s *pmaSet) RemoveAll() {
+	s.root = pmanode{}
+}
+
+// RemoveRange removes all segments in the given range. An iterator to the
+// newly formed gap is returned, and all existing iterators are invalidated.
+func (s *pmaSet) RemoveRange(r __generics_imported0.AddrRange) pmaGapIterator {
+	seg, gap := s.Find(r.Start)
+	if seg.Ok() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	return gap
+}
+
+// Merge attempts to merge two neighboring segments. If successful, Merge
+// returns an iterator to the merged segment, and all existing iterators are
+// invalidated. Otherwise, Merge returns a terminal iterator.
+//
+// If first is not the predecessor of second, Merge panics.
+func (s *pmaSet) Merge(first, second pmaIterator) pmaIterator {
+	if first.NextSegment() != second {
+		panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range()))
+	}
+	return s.MergeUnchecked(first, second)
+}
+
+// MergeUnchecked attempts to merge two neighboring segments. If successful,
+// MergeUnchecked returns an iterator to the merged segment, and all existing
+// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal
+// iterator.
+//
+// Precondition: first is the predecessor of second: first.NextSegment() ==
+// second, first == second.PrevSegment().
+func (s *pmaSet) MergeUnchecked(first, second pmaIterator) pmaIterator {
+	if first.End() == second.Start() {
+		if mval, ok := (pmaSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok {
+
+			first.SetEndUnchecked(second.End())
+			first.SetValue(mval)
+			return s.Remove(second).PrevSegment()
+		}
+	}
+	return pmaIterator{}
+}
+
+// MergeAll attempts to merge all adjacent segments in the set. All existing
+// iterators are invalidated.
+func (s *pmaSet) MergeAll() {
+	seg := s.FirstSegment()
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeRange attempts to merge all adjacent segments that contain a key in the
+// specific range. All existing iterators are invalidated.
+func (s *pmaSet) MergeRange(r __generics_imported0.AddrRange) {
+	seg := s.LowerBoundSegment(r.Start)
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() && next.Range().Start < r.End {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeAdjacent attempts to merge the segment containing r.Start with its
+// predecessor, and the segment containing r.End-1 with its successor.
+func (s *pmaSet) MergeAdjacent(r __generics_imported0.AddrRange) {
+	first := s.FindSegment(r.Start)
+	if first.Ok() {
+		if prev := first.PrevSegment(); prev.Ok() {
+			s.Merge(prev, first)
+		}
+	}
+	last := s.FindSegment(r.End - 1)
+	if last.Ok() {
+		if next := last.NextSegment(); next.Ok() {
+			s.Merge(last, next)
+		}
+	}
+}
+
+// Split splits the given segment at the given key and returns iterators to the
+// two resulting segments. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+//
+// If the segment cannot be split at split (because split is at the start or
+// end of the segment's range, so splitting would produce a segment with zero
+// length, or because split falls outside the segment's range altogether),
+// Split panics.
+func (s *pmaSet) Split(seg pmaIterator, split __generics_imported0.Addr) (pmaIterator, pmaIterator) {
+	if !seg.Range().CanSplitAt(split) {
+		panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split))
+	}
+	return s.SplitUnchecked(seg, split)
+}
+
+// SplitUnchecked splits the given segment at the given key and returns
+// iterators to the two resulting segments. All existing iterators (including
+// seg, but not including the returned iterators) are invalidated.
+//
+// Preconditions: seg.Start() < key < seg.End().
+func (s *pmaSet) SplitUnchecked(seg pmaIterator, split __generics_imported0.Addr) (pmaIterator, pmaIterator) {
+	val1, val2 := (pmaSetFunctions{}).Split(seg.Range(), seg.Value(), split)
+	end2 := seg.End()
+	seg.SetEndUnchecked(split)
+	seg.SetValue(val1)
+	seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.AddrRange{split, end2}, val2)
+
+	return seg2.PrevSegment(), seg2
+}
+
+// SplitAt splits the segment straddling split, if one exists. SplitAt returns
+// true if a segment was split and false otherwise. If SplitAt splits a
+// segment, all existing iterators are invalidated.
+func (s *pmaSet) SplitAt(split __generics_imported0.Addr) bool {
+	if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) {
+		s.SplitUnchecked(seg, split)
+		return true
+	}
+	return false
+}
+
+// Isolate ensures that the given segment's range does not escape r by
+// splitting at r.Start and r.End if necessary, and returns an updated iterator
+// to the bounded segment. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+func (s *pmaSet) Isolate(seg pmaIterator, r __generics_imported0.AddrRange) pmaIterator {
+	if seg.Range().CanSplitAt(r.Start) {
+		_, seg = s.SplitUnchecked(seg, r.Start)
+	}
+	if seg.Range().CanSplitAt(r.End) {
+		seg, _ = s.SplitUnchecked(seg, r.End)
+	}
+	return seg
+}
+
+// ApplyContiguous applies a function to a contiguous range of segments,
+// splitting if necessary. The function is applied until the first gap is
+// encountered, at which point the gap is returned. If the function is applied
+// across the entire range, a terminal gap is returned. All existing iterators
+// are invalidated.
+//
+// N.B. The Iterator must not be invalidated by the function.
+func (s *pmaSet) ApplyContiguous(r __generics_imported0.AddrRange, fn func(seg pmaIterator)) pmaGapIterator {
+	seg, gap := s.Find(r.Start)
+	if !seg.Ok() {
+		return gap
+	}
+	for {
+		seg = s.Isolate(seg, r)
+		fn(seg)
+		if seg.End() >= r.End {
+			return pmaGapIterator{}
+		}
+		gap = seg.NextGap()
+		if !gap.IsEmpty() {
+			return gap
+		}
+		seg = gap.NextSegment()
+		if !seg.Ok() {
+
+			return pmaGapIterator{}
+		}
+	}
+}
+
+// +stateify savable
+type pmanode struct {
+	// An internal binary tree node looks like:
+	//
+	//   K
+	//  / \
+	// Cl Cr
+	//
+	// where all keys in the subtree rooted by Cl (the left subtree) are less
+	// than K (the key of the parent node), and all keys in the subtree rooted
+	// by Cr (the right subtree) are greater than K.
+	//
+	// An internal B-tree node's indexes work out to look like:
+	//
+	//   K0 K1 K2  ...   Kn-1
+	//  / \/ \/ \  ...  /  \
+	// C0 C1 C2 C3 ... Cn-1 Cn
+	//
+	// where n is nrSegments.
+	nrSegments int
+
+	// parent is a pointer to this node's parent. If this node is root, parent
+	// is nil.
+	parent *pmanode
+
+	// parentIndex is the index of this node in parent.children.
+	parentIndex int
+
+	// Flag for internal nodes that is technically redundant with "children[0]
+	// != nil", but is stored in the first cache line. "hasChildren" rather
+	// than "isLeaf" because false must be the correct value for an empty root.
+	hasChildren bool
+
+	// Nodes store keys and values in separate arrays to maximize locality in
+	// the common case (scanning keys for lookup).
+	keys     [pmamaxDegree - 1]__generics_imported0.AddrRange
+	values   [pmamaxDegree - 1]pma
+	children [pmamaxDegree]*pmanode
+}
+
+// firstSegment returns the first segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *pmanode) firstSegment() pmaIterator {
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return pmaIterator{n, 0}
+}
+
+// lastSegment returns the last segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *pmanode) lastSegment() pmaIterator {
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return pmaIterator{n, n.nrSegments - 1}
+}
+
+func (n *pmanode) prevSibling() *pmanode {
+	if n.parent == nil || n.parentIndex == 0 {
+		return nil
+	}
+	return n.parent.children[n.parentIndex-1]
+}
+
+func (n *pmanode) nextSibling() *pmanode {
+	if n.parent == nil || n.parentIndex == n.parent.nrSegments {
+		return nil
+	}
+	return n.parent.children[n.parentIndex+1]
+}
+
+// rebalanceBeforeInsert splits n and its ancestors if they are full, as
+// required for insertion, and returns an updated iterator to the position
+// represented by gap.
+func (n *pmanode) rebalanceBeforeInsert(gap pmaGapIterator) pmaGapIterator {
+	if n.parent != nil {
+		gap = n.parent.rebalanceBeforeInsert(gap)
+	}
+	if n.nrSegments < pmamaxDegree-1 {
+		return gap
+	}
+	if n.parent == nil {
+
+		left := &pmanode{
+			nrSegments:  pmaminDegree - 1,
+			parent:      n,
+			parentIndex: 0,
+			hasChildren: n.hasChildren,
+		}
+		right := &pmanode{
+			nrSegments:  pmaminDegree - 1,
+			parent:      n,
+			parentIndex: 1,
+			hasChildren: n.hasChildren,
+		}
+		copy(left.keys[:pmaminDegree-1], n.keys[:pmaminDegree-1])
+		copy(left.values[:pmaminDegree-1], n.values[:pmaminDegree-1])
+		copy(right.keys[:pmaminDegree-1], n.keys[pmaminDegree:])
+		copy(right.values[:pmaminDegree-1], n.values[pmaminDegree:])
+		n.keys[0], n.values[0] = n.keys[pmaminDegree-1], n.values[pmaminDegree-1]
+		pmazeroValueSlice(n.values[1:])
+		if n.hasChildren {
+			copy(left.children[:pmaminDegree], n.children[:pmaminDegree])
+			copy(right.children[:pmaminDegree], n.children[pmaminDegree:])
+			pmazeroNodeSlice(n.children[2:])
+			for i := 0; i < pmaminDegree; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+				right.children[i].parent = right
+				right.children[i].parentIndex = i
+			}
+		}
+		n.nrSegments = 1
+		n.hasChildren = true
+		n.children[0] = left
+		n.children[1] = right
+		if gap.node != n {
+			return gap
+		}
+		if gap.index < pmaminDegree {
+			return pmaGapIterator{left, gap.index}
+		}
+		return pmaGapIterator{right, gap.index - pmaminDegree}
+	}
+
+	copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments])
+	copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments])
+	n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[pmaminDegree-1], n.values[pmaminDegree-1]
+	copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1])
+	for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ {
+		n.parent.children[i].parentIndex = i
+	}
+	sibling := &pmanode{
+		nrSegments:  pmaminDegree - 1,
+		parent:      n.parent,
+		parentIndex: n.parentIndex + 1,
+		hasChildren: n.hasChildren,
+	}
+	n.parent.children[n.parentIndex+1] = sibling
+	n.parent.nrSegments++
+	copy(sibling.keys[:pmaminDegree-1], n.keys[pmaminDegree:])
+	copy(sibling.values[:pmaminDegree-1], n.values[pmaminDegree:])
+	pmazeroValueSlice(n.values[pmaminDegree-1:])
+	if n.hasChildren {
+		copy(sibling.children[:pmaminDegree], n.children[pmaminDegree:])
+		pmazeroNodeSlice(n.children[pmaminDegree:])
+		for i := 0; i < pmaminDegree; i++ {
+			sibling.children[i].parent = sibling
+			sibling.children[i].parentIndex = i
+		}
+	}
+	n.nrSegments = pmaminDegree - 1
+
+	if gap.node != n {
+		return gap
+	}
+	if gap.index < pmaminDegree {
+		return gap
+	}
+	return pmaGapIterator{sibling, gap.index - pmaminDegree}
+}
+
+// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient
+// (contain fewer segments than required by B-tree invariants), as required for
+// removal, and returns an updated iterator to the position represented by gap.
+//
+// Precondition: n is the only node in the tree that may currently violate a
+// B-tree invariant.
+func (n *pmanode) rebalanceAfterRemove(gap pmaGapIterator) pmaGapIterator {
+	for {
+		if n.nrSegments >= pmaminDegree-1 {
+			return gap
+		}
+		if n.parent == nil {
+
+			return gap
+		}
+
+		if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= pmaminDegree {
+			copy(n.keys[1:], n.keys[:n.nrSegments])
+			copy(n.values[1:], n.values[:n.nrSegments])
+			n.keys[0] = n.parent.keys[n.parentIndex-1]
+			n.values[0] = n.parent.values[n.parentIndex-1]
+			n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1]
+			n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1]
+			pmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				copy(n.children[1:], n.children[:n.nrSegments+1])
+				n.children[0] = sibling.children[sibling.nrSegments]
+				sibling.children[sibling.nrSegments] = nil
+				n.children[0].parent = n
+				n.children[0].parentIndex = 0
+				for i := 1; i < n.nrSegments+2; i++ {
+					n.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling && gap.index == sibling.nrSegments {
+				return pmaGapIterator{n, 0}
+			}
+			if gap.node == n {
+				return pmaGapIterator{n, gap.index + 1}
+			}
+			return gap
+		}
+		if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= pmaminDegree {
+			n.keys[n.nrSegments] = n.parent.keys[n.parentIndex]
+			n.values[n.nrSegments] = n.parent.values[n.parentIndex]
+			n.parent.keys[n.parentIndex] = sibling.keys[0]
+			n.parent.values[n.parentIndex] = sibling.values[0]
+			copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:])
+			copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:])
+			pmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				n.children[n.nrSegments+1] = sibling.children[0]
+				copy(sibling.children[:sibling.nrSegments], sibling.children[1:])
+				sibling.children[sibling.nrSegments] = nil
+				n.children[n.nrSegments+1].parent = n
+				n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1
+				for i := 0; i < sibling.nrSegments; i++ {
+					sibling.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling {
+				if gap.index == 0 {
+					return pmaGapIterator{n, n.nrSegments}
+				}
+				return pmaGapIterator{sibling, gap.index - 1}
+			}
+			return gap
+		}
+
+		p := n.parent
+		if p.nrSegments == 1 {
+
+			left, right := p.children[0], p.children[1]
+			p.nrSegments = left.nrSegments + right.nrSegments + 1
+			p.hasChildren = left.hasChildren
+			p.keys[left.nrSegments] = p.keys[0]
+			p.values[left.nrSegments] = p.values[0]
+			copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments])
+			copy(p.values[:left.nrSegments], left.values[:left.nrSegments])
+			copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+			copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments])
+			if left.hasChildren {
+				copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1])
+				copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+				for i := 0; i < p.nrSegments+1; i++ {
+					p.children[i].parent = p
+					p.children[i].parentIndex = i
+				}
+			} else {
+				p.children[0] = nil
+				p.children[1] = nil
+			}
+			if gap.node == left {
+				return pmaGapIterator{p, gap.index}
+			}
+			if gap.node == right {
+				return pmaGapIterator{p, gap.index + left.nrSegments + 1}
+			}
+			return gap
+		}
+		// Merge n and either sibling, along with the segment separating the
+		// two, into whichever of the two nodes comes first. This is the
+		// reverse of the non-root splitting case in
+		// node.rebalanceBeforeInsert.
+		var left, right *pmanode
+		if n.parentIndex > 0 {
+			left = n.prevSibling()
+			right = n
+		} else {
+			left = n
+			right = n.nextSibling()
+		}
+
+		if gap.node == right {
+			gap = pmaGapIterator{left, gap.index + left.nrSegments + 1}
+		}
+		left.keys[left.nrSegments] = p.keys[left.parentIndex]
+		left.values[left.nrSegments] = p.values[left.parentIndex]
+		copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+		copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments])
+		if left.hasChildren {
+			copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+			for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+			}
+		}
+		left.nrSegments += right.nrSegments + 1
+		copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments])
+		copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments])
+		pmaSetFunctions{}.ClearValue(&p.values[p.nrSegments-1])
+		copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1])
+		for i := 0; i < p.nrSegments; i++ {
+			p.children[i].parentIndex = i
+		}
+		p.children[p.nrSegments] = nil
+		p.nrSegments--
+
+		n = p
+	}
+}
+
+// A Iterator is conceptually one of:
+//
+// - A pointer to a segment in a set; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Iterators are copyable values and are meaningfully equality-comparable. The
+// zero value of Iterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type pmaIterator struct {
+	// node is the node containing the iterated segment. If the iterator is
+	// terminal, node is nil.
+	node *pmanode
+
+	// index is the index of the segment in node.keys/values.
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (seg pmaIterator) Ok() bool {
+	return seg.node != nil
+}
+
+// Range returns the iterated segment's range key.
+func (seg pmaIterator) Range() __generics_imported0.AddrRange {
+	return seg.node.keys[seg.index]
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (seg pmaIterator) Start() __generics_imported0.Addr {
+	return seg.node.keys[seg.index].Start
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (seg pmaIterator) End() __generics_imported0.Addr {
+	return seg.node.keys[seg.index].End
+}
+
+// SetRangeUnchecked mutates the iterated segment's range key. This operation
+// does not invalidate any iterators.
+//
+// Preconditions:
+//
+// - r.Length() > 0.
+//
+// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
+// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
+// r.start >= seg.PrevSegment().End().
+func (seg pmaIterator) SetRangeUnchecked(r __generics_imported0.AddrRange) {
+	seg.node.keys[seg.index] = r
+}
+
+// SetRange mutates the iterated segment's range key. If the new range would
+// cause the iterated segment to overlap another segment, or if the new range
+// is invalid, SetRange panics. This operation does not invalidate any
+// iterators.
+func (seg pmaIterator) SetRange(r __generics_imported0.AddrRange) {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && r.End > next.Start() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range()))
+	}
+	seg.SetRangeUnchecked(r)
+}
+
+// SetStartUnchecked mutates the iterated segment's start. This operation does
+// not invalidate any iterators.
+//
+// Preconditions: The new start must be valid: start < seg.End(); if
+// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+func (seg pmaIterator) SetStartUnchecked(start __generics_imported0.Addr) {
+	seg.node.keys[seg.index].Start = start
+}
+
+// SetStart mutates the iterated segment's start. If the new start value would
+// cause the iterated segment to overlap another segment, or would result in an
+// invalid range, SetStart panics. This operation does not invalidate any
+// iterators.
+func (seg pmaIterator) SetStart(start __generics_imported0.Addr) {
+	if start >= seg.End() {
+		panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range()))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() {
+		panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range()))
+	}
+	seg.SetStartUnchecked(start)
+}
+
+// SetEndUnchecked mutates the iterated segment's end. This operation does not
+// invalidate any iterators.
+//
+// Preconditions: The new end must be valid: end > seg.Start(); if
+// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+func (seg pmaIterator) SetEndUnchecked(end __generics_imported0.Addr) {
+	seg.node.keys[seg.index].End = end
+}
+
+// SetEnd mutates the iterated segment's end. If the new end value would cause
+// the iterated segment to overlap another segment, or would result in an
+// invalid range, SetEnd panics. This operation does not invalidate any
+// iterators.
+func (seg pmaIterator) SetEnd(end __generics_imported0.Addr) {
+	if end <= seg.Start() {
+		panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && end > next.Start() {
+		panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range()))
+	}
+	seg.SetEndUnchecked(end)
+}
+
+// Value returns a copy of the iterated segment's value.
+func (seg pmaIterator) Value() pma {
+	return seg.node.values[seg.index]
+}
+
+// ValuePtr returns a pointer to the iterated segment's value. The pointer is
+// invalidated if the iterator is invalidated. This operation does not
+// invalidate any iterators.
+func (seg pmaIterator) ValuePtr() *pma {
+	return &seg.node.values[seg.index]
+}
+
+// SetValue mutates the iterated segment's value. This operation does not
+// invalidate any iterators.
+func (seg pmaIterator) SetValue(val pma) {
+	seg.node.values[seg.index] = val
+}
+
+// PrevSegment returns the iterated segment's predecessor. If there is no
+// preceding segment, PrevSegment returns a terminal iterator.
+func (seg pmaIterator) PrevSegment() pmaIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index].lastSegment()
+	}
+	if seg.index > 0 {
+		return pmaIterator{seg.node, seg.index - 1}
+	}
+	if seg.node.parent == nil {
+		return pmaIterator{}
+	}
+	return pmasegmentBeforePosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// NextSegment returns the iterated segment's successor. If there is no
+// succeeding segment, NextSegment returns a terminal iterator.
+func (seg pmaIterator) NextSegment() pmaIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment()
+	}
+	if seg.index < seg.node.nrSegments-1 {
+		return pmaIterator{seg.node, seg.index + 1}
+	}
+	if seg.node.parent == nil {
+		return pmaIterator{}
+	}
+	return pmasegmentAfterPosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// PrevGap returns the gap immediately before the iterated segment.
+func (seg pmaIterator) PrevGap() pmaGapIterator {
+	if seg.node.hasChildren {
+
+		return seg.node.children[seg.index].lastSegment().NextGap()
+	}
+	return pmaGapIterator{seg.node, seg.index}
+}
+
+// NextGap returns the gap immediately after the iterated segment.
+func (seg pmaIterator) NextGap() pmaGapIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment().PrevGap()
+	}
+	return pmaGapIterator{seg.node, seg.index + 1}
+}
+
+// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent,
+// or the gap before the iterated segment otherwise. If seg.Start() ==
+// Functions.MinKey(), PrevNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be
+// non-terminal.
+func (seg pmaIterator) PrevNonEmpty() (pmaIterator, pmaGapIterator) {
+	gap := seg.PrevGap()
+	if gap.Range().Length() != 0 {
+		return pmaIterator{}, gap
+	}
+	return gap.PrevSegment(), pmaGapIterator{}
+}
+
+// NextNonEmpty returns the iterated segment's successor if it is adjacent, or
+// the gap after the iterated segment otherwise. If seg.End() ==
+// Functions.MaxKey(), NextNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by NextNonEmpty will be
+// non-terminal.
+func (seg pmaIterator) NextNonEmpty() (pmaIterator, pmaGapIterator) {
+	gap := seg.NextGap()
+	if gap.Range().Length() != 0 {
+		return pmaIterator{}, gap
+	}
+	return gap.NextSegment(), pmaGapIterator{}
+}
+
+// A GapIterator is conceptually one of:
+//
+// - A pointer to a position between two segments, before the first segment, or
+// after the last segment in a set, called a *gap*; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Note that the gap between two adjacent segments exists (iterators to it are
+// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true
+// for such gaps. An empty set contains a single gap, spanning the entire range
+// of the set's keys.
+//
+// GapIterators are copyable values and are meaningfully equality-comparable.
+// The zero value of GapIterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type pmaGapIterator struct {
+	// The representation of a GapIterator is identical to that of an Iterator,
+	// except that index corresponds to positions between segments in the same
+	// way as for node.children (see comment for node.nrSegments).
+	node  *pmanode
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (gap pmaGapIterator) Ok() bool {
+	return gap.node != nil
+}
+
+// Range returns the range spanned by the iterated gap.
+func (gap pmaGapIterator) Range() __generics_imported0.AddrRange {
+	return __generics_imported0.AddrRange{gap.Start(), gap.End()}
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (gap pmaGapIterator) Start() __generics_imported0.Addr {
+	if ps := gap.PrevSegment(); ps.Ok() {
+		return ps.End()
+	}
+	return pmaSetFunctions{}.MinKey()
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (gap pmaGapIterator) End() __generics_imported0.Addr {
+	if ns := gap.NextSegment(); ns.Ok() {
+		return ns.Start()
+	}
+	return pmaSetFunctions{}.MaxKey()
+}
+
+// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is
+// between two adjacent segments.)
+func (gap pmaGapIterator) IsEmpty() bool {
+	return gap.Range().Length() == 0
+}
+
+// PrevSegment returns the segment immediately before the iterated gap. If no
+// such segment exists, PrevSegment returns a terminal iterator.
+func (gap pmaGapIterator) PrevSegment() pmaIterator {
+	return pmasegmentBeforePosition(gap.node, gap.index)
+}
+
+// NextSegment returns the segment immediately after the iterated gap. If no
+// such segment exists, NextSegment returns a terminal iterator.
+func (gap pmaGapIterator) NextSegment() pmaIterator {
+	return pmasegmentAfterPosition(gap.node, gap.index)
+}
+
+// PrevGap returns the iterated gap's predecessor. If no such gap exists,
+// PrevGap returns a terminal iterator.
+func (gap pmaGapIterator) PrevGap() pmaGapIterator {
+	seg := gap.PrevSegment()
+	if !seg.Ok() {
+		return pmaGapIterator{}
+	}
+	return seg.PrevGap()
+}
+
+// NextGap returns the iterated gap's successor. If no such gap exists, NextGap
+// returns a terminal iterator.
+func (gap pmaGapIterator) NextGap() pmaGapIterator {
+	seg := gap.NextSegment()
+	if !seg.Ok() {
+		return pmaGapIterator{}
+	}
+	return seg.NextGap()
+}
+
+// segmentBeforePosition returns the predecessor segment of the position given
+// by n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentBeforePosition returns a terminal iterator.
+func pmasegmentBeforePosition(n *pmanode, i int) pmaIterator {
+	for i == 0 {
+		if n.parent == nil {
+			return pmaIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return pmaIterator{n, i - 1}
+}
+
+// segmentAfterPosition returns the successor segment of the position given by
+// n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentAfterPosition returns a terminal iterator.
+func pmasegmentAfterPosition(n *pmanode, i int) pmaIterator {
+	for i == n.nrSegments {
+		if n.parent == nil {
+			return pmaIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return pmaIterator{n, i}
+}
+
+func pmazeroValueSlice(slice []pma) {
+
+	for i := range slice {
+		pmaSetFunctions{}.ClearValue(&slice[i])
+	}
+}
+
+func pmazeroNodeSlice(slice []*pmanode) {
+	for i := range slice {
+		slice[i] = nil
+	}
+}
+
+// String stringifies a Set for debugging.
+func (s *pmaSet) String() string {
+	return s.root.String()
+}
+
+// String stringifes a node (and all of its children) for debugging.
+func (n *pmanode) String() string {
+	var buf bytes.Buffer
+	n.writeDebugString(&buf, "")
+	return buf.String()
+}
+
+func (n *pmanode) writeDebugString(buf *bytes.Buffer, prefix string) {
+	if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) {
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren))
+	}
+	for i := 0; i < n.nrSegments; i++ {
+		if child := n.children[i]; child != nil {
+			cprefix := fmt.Sprintf("%s- % 3d ", prefix, i)
+			if child.parent != n || child.parentIndex != i {
+				buf.WriteString(cprefix)
+				buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i))
+			}
+			child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i))
+		}
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i]))
+	}
+	if child := n.children[n.nrSegments]; child != nil {
+		child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments))
+	}
+}
+
+// SegmentDataSlices represents segments from a set as slices of start, end, and
+// values. SegmentDataSlices is primarily used as an intermediate representation
+// for save/restore and the layout here is optimized for that.
+//
+// +stateify savable
+type pmaSegmentDataSlices struct {
+	Start  []__generics_imported0.Addr
+	End    []__generics_imported0.Addr
+	Values []pma
+}
+
+// ExportSortedSlice returns a copy of all segments in the given set, in ascending
+// key order.
+func (s *pmaSet) ExportSortedSlices() *pmaSegmentDataSlices {
+	var sds pmaSegmentDataSlices
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sds.Start = append(sds.Start, seg.Start())
+		sds.End = append(sds.End, seg.End())
+		sds.Values = append(sds.Values, seg.Value())
+	}
+	sds.Start = sds.Start[:len(sds.Start):len(sds.Start)]
+	sds.End = sds.End[:len(sds.End):len(sds.End)]
+	sds.Values = sds.Values[:len(sds.Values):len(sds.Values)]
+	return &sds
+}
+
+// ImportSortedSlice initializes the given set from the given slice.
+//
+// Preconditions: s must be empty. sds must represent a valid set (the segments
+// in sds must have valid lengths that do not overlap). The segments in sds
+// must be sorted in ascending key order.
+func (s *pmaSet) ImportSortedSlices(sds *pmaSegmentDataSlices) error {
+	if !s.IsEmpty() {
+		return fmt.Errorf("cannot import into non-empty set %v", s)
+	}
+	gap := s.FirstGap()
+	for i := range sds.Start {
+		r := __generics_imported0.AddrRange{sds.Start[i], sds.End[i]}
+		if !gap.Range().IsSupersetOf(r) {
+			return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i])
+		}
+		gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap()
+	}
+	return nil
+}
+func (s *pmaSet) saveRoot() *pmaSegmentDataSlices {
+	return s.ExportSortedSlices()
+}
+
+func (s *pmaSet) loadRoot(sds *pmaSegmentDataSlices) {
+	if err := s.ImportSortedSlices(sds); err != nil {
+		panic(err)
+	}
+}
diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go
new file mode 100644
index 000000000..c8302a553
--- /dev/null
+++ b/pkg/sentry/mm/procfs.go
@@ -0,0 +1,289 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/fs/proc/seqfile"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+const (
+	// devMinorBits is the number of minor bits in a device number. Linux:
+	// include/linux/kdev_t.h:MINORBITS
+	devMinorBits = 20
+
+	vsyscallEnd        = usermem.Addr(0xffffffffff601000)
+	vsyscallMapsEntry  = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
+	vsyscallSmapsEntry = vsyscallMapsEntry +
+		"Size:                  4 kB\n" +
+		"Rss:                   0 kB\n" +
+		"Pss:                   0 kB\n" +
+		"Shared_Clean:          0 kB\n" +
+		"Shared_Dirty:          0 kB\n" +
+		"Private_Clean:         0 kB\n" +
+		"Private_Dirty:         0 kB\n" +
+		"Referenced:            0 kB\n" +
+		"Anonymous:             0 kB\n" +
+		"AnonHugePages:         0 kB\n" +
+		"Shared_Hugetlb:        0 kB\n" +
+		"Private_Hugetlb:       0 kB\n" +
+		"Swap:                  0 kB\n" +
+		"SwapPss:               0 kB\n" +
+		"KernelPageSize:        4 kB\n" +
+		"MMUPageSize:           4 kB\n" +
+		"Locked:                0 kB\n" +
+		"VmFlags: rd ex \n"
+)
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (mm *MemoryManager) NeedsUpdate(generation int64) bool {
+	return true
+}
+
+// ReadMapsSeqFileData is called by fs/proc.mapsData.ReadSeqFileData to
+// implement /proc/[pid]/maps.
+func (mm *MemoryManager) ReadMapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	var data []seqfile.SeqData
+	var start usermem.Addr
+	if handle != nil {
+		start = *handle.(*usermem.Addr)
+	}
+	for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+		// FIXME(b/30793614): If we use a usermem.Addr for the handle, we get
+		// "panic: autosave error: type usermem.Addr is not registered".
+		vmaAddr := vseg.End()
+		data = append(data, seqfile.SeqData{
+			Buf:    mm.vmaMapsEntryLocked(ctx, vseg),
+			Handle: &vmaAddr,
+		})
+	}
+
+	// We always emulate vsyscall, so advertise it here. Everything about a
+	// vsyscall region is static, so just hard code the maps entry since we
+	// don't have a real vma backing it. The vsyscall region is at the end of
+	// the virtual address space so nothing should be mapped after it (if
+	// something is really mapped in the tiny ~10 MiB segment afterwards, we'll
+	// get the sorting on the maps file wrong at worst; but that's not possible
+	// on any current platform).
+	//
+	// Artifically adjust the seqfile handle so we only output vsyscall entry once.
+	if start != vsyscallEnd {
+		// FIXME(b/30793614): Can't get a pointer to constant vsyscallEnd.
+		vmaAddr := vsyscallEnd
+		data = append(data, seqfile.SeqData{
+			Buf:    []byte(vsyscallMapsEntry),
+			Handle: &vmaAddr,
+		})
+	}
+	return data, 1
+}
+
+// vmaMapsEntryLocked returns a /proc/[pid]/maps entry for the vma iterated by
+// vseg, including the trailing newline.
+//
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) vmaMapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
+	var b bytes.Buffer
+	mm.appendVMAMapsEntryLocked(ctx, vseg, &b)
+	return b.Bytes()
+}
+
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaIterator, b *bytes.Buffer) {
+	vma := vseg.ValuePtr()
+	private := "p"
+	if !vma.private {
+		private = "s"
+	}
+
+	var dev, ino uint64
+	if vma.id != nil {
+		dev = vma.id.DeviceID()
+		ino = vma.id.InodeID()
+	}
+	devMajor := uint32(dev >> devMinorBits)
+	devMinor := uint32(dev & ((1 << devMinorBits) - 1))
+
+	// Do not include the guard page: fs/proc/task_mmu.c:show_map_vma() =>
+	// stack_guard_page_start().
+	fmt.Fprintf(b, "%08x-%08x %s%s %08x %02x:%02x %d ",
+		vseg.Start(), vseg.End(), vma.realPerms, private, vma.off, devMajor, devMinor, ino)
+
+	// Figure out our filename or hint.
+	var s string
+	if vma.hint != "" {
+		s = vma.hint
+	} else if vma.id != nil {
+		// FIXME(jamieliu): We are holding mm.mappingMu here, which is
+		// consistent with Linux's holding mmap_sem in
+		// fs/proc/task_mmu.c:show_map_vma() => fs/seq_file.c:seq_file_path().
+		// However, it's not clear that fs.File.MappedName() is actually
+		// consistent with this lock order.
+		s = vma.id.MappedName(ctx)
+	}
+	if s != "" {
+		// Per linux, we pad until the 74th character.
+		if pad := 73 - b.Len(); pad > 0 {
+			b.WriteString(strings.Repeat(" ", pad))
+		}
+		b.WriteString(s)
+	}
+	b.WriteString("\n")
+}
+
+// ReadSmapsSeqFileData is called by fs/proc.smapsData.ReadSeqFileData to
+// implement /proc/[pid]/smaps.
+func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	var data []seqfile.SeqData
+	var start usermem.Addr
+	if handle != nil {
+		start = *handle.(*usermem.Addr)
+	}
+	for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+		// FIXME(b/30793614): If we use a usermem.Addr for the handle, we get
+		// "panic: autosave error: type usermem.Addr is not registered".
+		vmaAddr := vseg.End()
+		data = append(data, seqfile.SeqData{
+			Buf:    mm.vmaSmapsEntryLocked(ctx, vseg),
+			Handle: &vmaAddr,
+		})
+	}
+
+	// We always emulate vsyscall, so advertise it here. See
+	// ReadMapsSeqFileData for additional commentary.
+	if start != vsyscallEnd {
+		// FIXME(b/30793614): Can't get a pointer to constant vsyscallEnd.
+		vmaAddr := vsyscallEnd
+		data = append(data, seqfile.SeqData{
+			Buf:    []byte(vsyscallSmapsEntry),
+			Handle: &vmaAddr,
+		})
+	}
+	return data, 1
+}
+
+// vmaSmapsEntryLocked returns a /proc/[pid]/smaps entry for the vma iterated
+// by vseg, including the trailing newline.
+//
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
+	var b bytes.Buffer
+	mm.appendVMAMapsEntryLocked(ctx, vseg, &b)
+	vma := vseg.ValuePtr()
+
+	// We take mm.activeMu here in each call to vmaSmapsEntryLocked, instead of
+	// requiring it to be locked as a precondition, to reduce the latency
+	// impact of reading /proc/[pid]/smaps on concurrent performance-sensitive
+	// operations requiring activeMu for writing like faults.
+	mm.activeMu.RLock()
+	var rss uint64
+	var anon uint64
+	vsegAR := vseg.Range()
+	for pseg := mm.pmas.LowerBoundSegment(vsegAR.Start); pseg.Ok() && pseg.Start() < vsegAR.End; pseg = pseg.NextSegment() {
+		psegAR := pseg.Range().Intersect(vsegAR)
+		size := uint64(psegAR.Length())
+		rss += size
+		if pseg.ValuePtr().private {
+			anon += size
+		}
+	}
+	mm.activeMu.RUnlock()
+
+	fmt.Fprintf(&b, "Size:           %8d kB\n", vseg.Range().Length()/1024)
+	fmt.Fprintf(&b, "Rss:            %8d kB\n", rss/1024)
+	// Currently we report PSS = RSS, i.e. we pretend each page mapped by a pma
+	// is only mapped by that pma. This avoids having to query memmap.Mappables
+	// for reference count information on each page. As a corollary, all pages
+	// are accounted as "private" whether or not the vma is private; compare
+	// Linux's fs/proc/task_mmu.c:smaps_account().
+	fmt.Fprintf(&b, "Pss:            %8d kB\n", rss/1024)
+	fmt.Fprintf(&b, "Shared_Clean:   %8d kB\n", 0)
+	fmt.Fprintf(&b, "Shared_Dirty:   %8d kB\n", 0)
+	// Pretend that all pages are dirty if the vma is writable, and clean otherwise.
+	clean := rss
+	if vma.effectivePerms.Write {
+		clean = 0
+	}
+	fmt.Fprintf(&b, "Private_Clean:  %8d kB\n", clean/1024)
+	fmt.Fprintf(&b, "Private_Dirty:  %8d kB\n", (rss-clean)/1024)
+	// Pretend that all pages are "referenced" (recently touched).
+	fmt.Fprintf(&b, "Referenced:     %8d kB\n", rss/1024)
+	fmt.Fprintf(&b, "Anonymous:      %8d kB\n", anon/1024)
+	// Hugepages (hugetlb and THP) are not implemented.
+	fmt.Fprintf(&b, "AnonHugePages:  %8d kB\n", 0)
+	fmt.Fprintf(&b, "Shared_Hugetlb: %8d kB\n", 0)
+	fmt.Fprintf(&b, "Private_Hugetlb: %7d kB\n", 0)
+	// Swap is not implemented.
+	fmt.Fprintf(&b, "Swap:           %8d kB\n", 0)
+	fmt.Fprintf(&b, "SwapPss:        %8d kB\n", 0)
+	fmt.Fprintf(&b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024)
+	fmt.Fprintf(&b, "MMUPageSize:    %8d kB\n", usermem.PageSize/1024)
+	locked := rss
+	if vma.mlockMode == memmap.MLockNone {
+		locked = 0
+	}
+	fmt.Fprintf(&b, "Locked:         %8d kB\n", locked/1024)
+
+	b.WriteString("VmFlags: ")
+	if vma.realPerms.Read {
+		b.WriteString("rd ")
+	}
+	if vma.realPerms.Write {
+		b.WriteString("wr ")
+	}
+	if vma.realPerms.Execute {
+		b.WriteString("ex ")
+	}
+	if vma.canWriteMappableLocked() { // VM_SHARED
+		b.WriteString("sh ")
+	}
+	if vma.maxPerms.Read {
+		b.WriteString("mr ")
+	}
+	if vma.maxPerms.Write {
+		b.WriteString("mw ")
+	}
+	if vma.maxPerms.Execute {
+		b.WriteString("me ")
+	}
+	if !vma.private { // VM_MAYSHARE
+		b.WriteString("ms ")
+	}
+	if vma.growsDown {
+		b.WriteString("gd ")
+	}
+	if vma.mlockMode != memmap.MLockNone { // VM_LOCKED
+		b.WriteString("lo ")
+	}
+	if vma.mlockMode == memmap.MLockLazy { // VM_LOCKONFAULT
+		b.WriteString("?? ") // no explicit encoding in fs/proc/task_mmu.c:show_smap_vma_flags()
+	}
+	if vma.private && vma.effectivePerms.Write { // VM_ACCOUNT
+		b.WriteString("ac ")
+	}
+	b.WriteString("\n")
+
+	return b.Bytes()
+}
diff --git a/pkg/sentry/mm/save_restore.go b/pkg/sentry/mm/save_restore.go
new file mode 100644
index 000000000..0385957bd
--- /dev/null
+++ b/pkg/sentry/mm/save_restore.go
@@ -0,0 +1,57 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"fmt"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+)
+
+// InvalidateUnsavable invokes memmap.Mappable.InvalidateUnsavable on all
+// Mappables mapped by mm.
+func (mm *MemoryManager) InvalidateUnsavable(ctx context.Context) error {
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() {
+		if vma := vseg.ValuePtr(); vma.mappable != nil {
+			if err := vma.mappable.InvalidateUnsavable(ctx); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+// beforeSave is invoked by stateify.
+func (mm *MemoryManager) beforeSave() {
+	mf := mm.mfp.MemoryFile()
+	for pseg := mm.pmas.FirstSegment(); pseg.Ok(); pseg = pseg.NextSegment() {
+		if pma := pseg.ValuePtr(); pma.file != mf {
+			// InvalidateUnsavable should have caused all such pmas to be
+			// invalidated.
+			panic(fmt.Sprintf("Can't save pma %#v with non-MemoryFile of type %T:\n%s", pseg.Range(), pma.file, mm))
+		}
+	}
+}
+
+// afterLoad is invoked by stateify.
+func (mm *MemoryManager) afterLoad() {
+	mm.haveASIO = mm.p.SupportsAddressSpaceIO()
+	mf := mm.mfp.MemoryFile()
+	for pseg := mm.pmas.FirstSegment(); pseg.Ok(); pseg = pseg.NextSegment() {
+		pseg.ValuePtr().file = mf
+	}
+}
diff --git a/pkg/sentry/mm/shm.go b/pkg/sentry/mm/shm.go
new file mode 100644
index 000000000..12913007b
--- /dev/null
+++ b/pkg/sentry/mm/shm.go
@@ -0,0 +1,66 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/shm"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// DetachShm unmaps a sysv shared memory segment.
+func (mm *MemoryManager) DetachShm(ctx context.Context, addr usermem.Addr) error {
+	if addr != addr.RoundDown() {
+		// "... shmaddr is not aligned on a page boundary." - man shmdt(2)
+		return syserror.EINVAL
+	}
+
+	var detached *shm.Shm
+	mm.mappingMu.Lock()
+	defer mm.mappingMu.Unlock()
+
+	// Find and remove the first vma containing an address >= addr that maps a
+	// segment originally attached at addr.
+	vseg := mm.vmas.LowerBoundSegment(addr)
+	for vseg.Ok() {
+		vma := vseg.ValuePtr()
+		if shm, ok := vma.mappable.(*shm.Shm); ok && vseg.Start() >= addr && uint64(vseg.Start()-addr) == vma.off {
+			detached = shm
+			vseg = mm.unmapLocked(ctx, vseg.Range()).NextSegment()
+			break
+		} else {
+			vseg = vseg.NextSegment()
+		}
+	}
+
+	if detached == nil {
+		// There is no shared memory segment attached at addr.
+		return syserror.EINVAL
+	}
+
+	// Remove all vmas that could have been created by the same attach.
+	end := addr + usermem.Addr(detached.EffectiveSize())
+	for vseg.Ok() && vseg.End() <= end {
+		vma := vseg.ValuePtr()
+		if vma.mappable == detached && uint64(vseg.Start()-addr) == vma.off {
+			vseg = mm.unmapLocked(ctx, vseg.Range()).NextSegment()
+		} else {
+			vseg = vseg.NextSegment()
+		}
+	}
+
+	return nil
+}
diff --git a/pkg/sentry/mm/special_mappable.go b/pkg/sentry/mm/special_mappable.go
new file mode 100644
index 000000000..687959005
--- /dev/null
+++ b/pkg/sentry/mm/special_mappable.go
@@ -0,0 +1,155 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/refs"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// SpecialMappable implements memmap.MappingIdentity and memmap.Mappable with
+// semantics similar to Linux's mm/mmap.c:_install_special_mapping(), except
+// that SpecialMappable takes ownership of the memory that it represents
+// (_install_special_mapping() does not.)
+//
+// +stateify savable
+type SpecialMappable struct {
+	refs.AtomicRefCount
+
+	mfp  pgalloc.MemoryFileProvider
+	fr   platform.FileRange
+	name string
+}
+
+// NewSpecialMappable returns a SpecialMappable that owns fr, which represents
+// offsets in mfp.MemoryFile() that contain the SpecialMappable's data. The
+// SpecialMappable will use the given name in /proc/[pid]/maps.
+//
+// Preconditions: fr.Length() != 0.
+func NewSpecialMappable(name string, mfp pgalloc.MemoryFileProvider, fr platform.FileRange) *SpecialMappable {
+	return &SpecialMappable{mfp: mfp, fr: fr, name: name}
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+func (m *SpecialMappable) DecRef() {
+	m.AtomicRefCount.DecRefWithDestructor(func() {
+		m.mfp.MemoryFile().DecRef(m.fr)
+	})
+}
+
+// MappedName implements memmap.MappingIdentity.MappedName.
+func (m *SpecialMappable) MappedName(ctx context.Context) string {
+	return m.name
+}
+
+// DeviceID implements memmap.MappingIdentity.DeviceID.
+func (m *SpecialMappable) DeviceID() uint64 {
+	return 0
+}
+
+// InodeID implements memmap.MappingIdentity.InodeID.
+func (m *SpecialMappable) InodeID() uint64 {
+	return 0
+}
+
+// Msync implements memmap.MappingIdentity.Msync.
+func (m *SpecialMappable) Msync(ctx context.Context, mr memmap.MappableRange) error {
+	// Linux: vm_file is NULL, causing msync to skip it entirely.
+	return nil
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (*SpecialMappable) AddMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) error {
+	return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (*SpecialMappable) RemoveMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, uint64, bool) {
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (*SpecialMappable) CopyMapping(context.Context, memmap.MappingSpace, usermem.AddrRange, usermem.AddrRange, uint64, bool) error {
+	return nil
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (m *SpecialMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+	var err error
+	if required.End > m.fr.Length() {
+		err = &memmap.BusError{syserror.EFAULT}
+	}
+	if source := optional.Intersect(memmap.MappableRange{0, m.fr.Length()}); source.Length() != 0 {
+		return []memmap.Translation{
+			{
+				Source: source,
+				File:   m.mfp.MemoryFile(),
+				Offset: m.fr.Start + source.Start,
+				Perms:  usermem.AnyAccess,
+			},
+		}, err
+	}
+	return nil, err
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (m *SpecialMappable) InvalidateUnsavable(ctx context.Context) error {
+	// Since data is stored in pgalloc.MemoryFile, the contents of which are
+	// preserved across save/restore, we don't need to do anything.
+	return nil
+}
+
+// MemoryFileProvider returns the MemoryFileProvider whose MemoryFile stores
+// the SpecialMappable's contents.
+func (m *SpecialMappable) MemoryFileProvider() pgalloc.MemoryFileProvider {
+	return m.mfp
+}
+
+// FileRange returns the offsets into MemoryFileProvider().MemoryFile() that
+// store the SpecialMappable's contents.
+func (m *SpecialMappable) FileRange() platform.FileRange {
+	return m.fr
+}
+
+// Length returns the length of the SpecialMappable.
+func (m *SpecialMappable) Length() uint64 {
+	return m.fr.Length()
+}
+
+// NewSharedAnonMappable returns a SpecialMappable that implements the
+// semantics of mmap(MAP_SHARED|MAP_ANONYMOUS) and mappings of /dev/zero.
+//
+// TODO(jamieliu): The use of SpecialMappable is a lazy code reuse hack. Linux
+// uses an ephemeral file created by mm/shmem.c:shmem_zero_setup(); we should
+// do the same to get non-zero device and inode IDs.
+func NewSharedAnonMappable(length uint64, mfp pgalloc.MemoryFileProvider) (*SpecialMappable, error) {
+	if length == 0 {
+		return nil, syserror.EINVAL
+	}
+	alignedLen, ok := usermem.Addr(length).RoundUp()
+	if !ok {
+		return nil, syserror.EINVAL
+	}
+	fr, err := mfp.MemoryFile().Allocate(uint64(alignedLen), usage.Anonymous)
+	if err != nil {
+		return nil, err
+	}
+	return NewSpecialMappable("/dev/zero (deleted)", mfp, fr), nil
+}
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
new file mode 100644
index 000000000..0368c6794
--- /dev/null
+++ b/pkg/sentry/mm/syscalls.go
@@ -0,0 +1,1197 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"fmt"
+	mrand "math/rand"
+
+	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/futex"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// HandleUserFault handles an application page fault. sp is the faulting
+// application thread's stack pointer.
+//
+// Preconditions: mm.as != nil.
+func (mm *MemoryManager) HandleUserFault(ctx context.Context, addr usermem.Addr, at usermem.AccessType, sp usermem.Addr) error {
+	ar, ok := addr.RoundDown().ToRange(usermem.PageSize)
+	if !ok {
+		return syserror.EFAULT
+	}
+
+	// Don't bother trying existingPMAsLocked; in most cases, if we did have
+	// existing pmas, we wouldn't have faulted.
+
+	// Ensure that we have a usable vma. Here and below, since we are only
+	// asking for a single page, there is no possibility of partial success,
+	// and any error is immediately fatal.
+	mm.mappingMu.RLock()
+	vseg, _, err := mm.getVMAsLocked(ctx, ar, at, false)
+	if err != nil {
+		mm.mappingMu.RUnlock()
+		return err
+	}
+
+	// Ensure that we have a usable pma.
+	mm.activeMu.Lock()
+	pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, at)
+	mm.mappingMu.RUnlock()
+	if err != nil {
+		mm.activeMu.Unlock()
+		return err
+	}
+
+	// Downgrade to a read-lock on activeMu since we don't need to mutate pmas
+	// anymore.
+	mm.activeMu.DowngradeLock()
+
+	// Map the faulted page into the active AddressSpace.
+	err = mm.mapASLocked(pseg, ar, false)
+	mm.activeMu.RUnlock()
+	return err
+}
+
+// MMap establishes a memory mapping.
+func (mm *MemoryManager) MMap(ctx context.Context, opts memmap.MMapOpts) (usermem.Addr, error) {
+	if opts.Length == 0 {
+		return 0, syserror.EINVAL
+	}
+	length, ok := usermem.Addr(opts.Length).RoundUp()
+	if !ok {
+		return 0, syserror.ENOMEM
+	}
+	opts.Length = uint64(length)
+
+	if opts.Mappable != nil {
+		// Offset must be aligned.
+		if usermem.Addr(opts.Offset).RoundDown() != usermem.Addr(opts.Offset) {
+			return 0, syserror.EINVAL
+		}
+		// Offset + length must not overflow.
+		if end := opts.Offset + opts.Length; end < opts.Offset {
+			return 0, syserror.ENOMEM
+		}
+	} else {
+		opts.Offset = 0
+		if !opts.Private {
+			if opts.MappingIdentity != nil {
+				return 0, syserror.EINVAL
+			}
+			m, err := NewSharedAnonMappable(opts.Length, pgalloc.MemoryFileProviderFromContext(ctx))
+			if err != nil {
+				return 0, err
+			}
+			defer m.DecRef()
+			opts.MappingIdentity = m
+			opts.Mappable = m
+		}
+	}
+
+	if opts.Addr.RoundDown() != opts.Addr {
+		// MAP_FIXED requires addr to be page-aligned; non-fixed mappings
+		// don't.
+		if opts.Fixed {
+			return 0, syserror.EINVAL
+		}
+		opts.Addr = opts.Addr.RoundDown()
+	}
+
+	if !opts.MaxPerms.SupersetOf(opts.Perms) {
+		return 0, syserror.EACCES
+	}
+	if opts.Unmap && !opts.Fixed {
+		return 0, syserror.EINVAL
+	}
+	if opts.GrowsDown && opts.Mappable != nil {
+		return 0, syserror.EINVAL
+	}
+
+	// Get the new vma.
+	mm.mappingMu.Lock()
+	if opts.MLockMode < mm.defMLockMode {
+		opts.MLockMode = mm.defMLockMode
+	}
+	vseg, ar, err := mm.createVMALocked(ctx, opts)
+	if err != nil {
+		mm.mappingMu.Unlock()
+		return 0, err
+	}
+
+	// TODO(jamieliu): In Linux, VM_LOCKONFAULT (which may be set on the new
+	// vma by mlockall(MCL_FUTURE|MCL_ONFAULT) => mm_struct::def_flags) appears
+	// to effectively disable MAP_POPULATE by unsetting FOLL_POPULATE in
+	// mm/util.c:vm_mmap_pgoff() => mm/gup.c:__mm_populate() =>
+	// populate_vma_page_range(). Confirm this behavior.
+	switch {
+	case opts.Precommit || opts.MLockMode == memmap.MLockEager:
+		// Get pmas and map with precommit as requested.
+		mm.populateVMAAndUnlock(ctx, vseg, ar, true)
+
+	case opts.Mappable == nil && length <= privateAllocUnit:
+		// NOTE(b/63077076, b/63360184): Get pmas and map eagerly in the hope
+		// that doing so will save on future page faults. We only do this for
+		// anonymous mappings, since otherwise the cost of
+		// memmap.Mappable.Translate is unknown; and only for small mappings,
+		// to avoid needing to allocate large amounts of memory that we may
+		// subsequently need to checkpoint.
+		mm.populateVMAAndUnlock(ctx, vseg, ar, false)
+
+	default:
+		mm.mappingMu.Unlock()
+	}
+
+	return ar.Start, nil
+}
+
+// populateVMA obtains pmas for addresses in ar in the given vma, and maps them
+// into mm.as if it is active.
+//
+// Preconditions: mm.mappingMu must be locked. vseg.Range().IsSupersetOf(ar).
+func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
+	if !vseg.ValuePtr().effectivePerms.Any() {
+		// Linux doesn't populate inaccessible pages. See
+		// mm/gup.c:populate_vma_page_range.
+		return
+	}
+
+	mm.activeMu.Lock()
+	// Can't defer mm.activeMu.Unlock(); see below.
+
+	// Even if we get new pmas, we can't actually map them if we don't have an
+	// AddressSpace.
+	if mm.as == nil {
+		mm.activeMu.Unlock()
+		return
+	}
+
+	// Ensure that we have usable pmas.
+	pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, usermem.NoAccess)
+	if err != nil {
+		// mm/util.c:vm_mmap_pgoff() ignores the error, if any, from
+		// mm/gup.c:mm_populate(). If it matters, we'll get it again when
+		// userspace actually tries to use the failing page.
+		mm.activeMu.Unlock()
+		return
+	}
+
+	// Downgrade to a read-lock on activeMu since we don't need to mutate pmas
+	// anymore.
+	mm.activeMu.DowngradeLock()
+
+	// As above, errors are silently ignored.
+	mm.mapASLocked(pseg, ar, precommit)
+	mm.activeMu.RUnlock()
+}
+
+// populateVMAAndUnlock is equivalent to populateVMA, but also unconditionally
+// unlocks mm.mappingMu. In cases where populateVMAAndUnlock is usable, it is
+// preferable to populateVMA since it unlocks mm.mappingMu before performing
+// expensive operations that don't require it to be locked.
+//
+// Preconditions: mm.mappingMu must be locked for writing.
+// vseg.Range().IsSupersetOf(ar).
+//
+// Postconditions: mm.mappingMu will be unlocked.
+func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar usermem.AddrRange, precommit bool) {
+	// See populateVMA above for commentary.
+	if !vseg.ValuePtr().effectivePerms.Any() {
+		mm.mappingMu.Unlock()
+		return
+	}
+
+	mm.activeMu.Lock()
+
+	if mm.as == nil {
+		mm.activeMu.Unlock()
+		mm.mappingMu.Unlock()
+		return
+	}
+
+	// mm.mappingMu doesn't need to be write-locked for getPMAsLocked, and it
+	// isn't needed at all for mapASLocked.
+	mm.mappingMu.DowngradeLock()
+	pseg, _, err := mm.getPMAsLocked(ctx, vseg, ar, usermem.NoAccess)
+	mm.mappingMu.RUnlock()
+	if err != nil {
+		mm.activeMu.Unlock()
+		return
+	}
+
+	mm.activeMu.DowngradeLock()
+	mm.mapASLocked(pseg, ar, precommit)
+	mm.activeMu.RUnlock()
+}
+
+// MapStack allocates the initial process stack.
+func (mm *MemoryManager) MapStack(ctx context.Context) (usermem.AddrRange, error) {
+	// maxStackSize is the maximum supported process stack size in bytes.
+	//
+	// This limit exists because stack growing isn't implemented, so the entire
+	// process stack must be mapped up-front.
+	const maxStackSize = 128 << 20
+
+	stackSize := limits.FromContext(ctx).Get(limits.Stack)
+	r, ok := usermem.Addr(stackSize.Cur).RoundUp()
+	sz := uint64(r)
+	if !ok {
+		// RLIM_INFINITY rounds up to 0.
+		sz = linux.DefaultStackSoftLimit
+	} else if sz > maxStackSize {
+		ctx.Warningf("Capping stack size from RLIMIT_STACK of %v down to %v.", sz, maxStackSize)
+		sz = maxStackSize
+	} else if sz == 0 {
+		return usermem.AddrRange{}, syserror.ENOMEM
+	}
+	szaddr := usermem.Addr(sz)
+	ctx.Debugf("Allocating stack with size of %v bytes", sz)
+
+	// Determine the stack's desired location. Unlike Linux, address
+	// randomization can't be disabled.
+	stackEnd := mm.layout.MaxAddr - usermem.Addr(mrand.Int63n(int64(mm.layout.MaxStackRand))).RoundDown()
+	if stackEnd < szaddr {
+		return usermem.AddrRange{}, syserror.ENOMEM
+	}
+	stackStart := stackEnd - szaddr
+	mm.mappingMu.Lock()
+	defer mm.mappingMu.Unlock()
+	_, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{
+		Length:    sz,
+		Addr:      stackStart,
+		Perms:     usermem.ReadWrite,
+		MaxPerms:  usermem.AnyAccess,
+		Private:   true,
+		GrowsDown: true,
+		MLockMode: mm.defMLockMode,
+		Hint:      "[stack]",
+	})
+	return ar, err
+}
+
+// MUnmap implements the semantics of Linux's munmap(2).
+func (mm *MemoryManager) MUnmap(ctx context.Context, addr usermem.Addr, length uint64) error {
+	if addr != addr.RoundDown() {
+		return syserror.EINVAL
+	}
+	if length == 0 {
+		return syserror.EINVAL
+	}
+	la, ok := usermem.Addr(length).RoundUp()
+	if !ok {
+		return syserror.EINVAL
+	}
+	ar, ok := addr.ToRange(uint64(la))
+	if !ok {
+		return syserror.EINVAL
+	}
+
+	mm.mappingMu.Lock()
+	defer mm.mappingMu.Unlock()
+	mm.unmapLocked(ctx, ar)
+	return nil
+}
+
+// MRemapOpts specifies options to MRemap.
+type MRemapOpts struct {
+	// Move controls whether MRemap moves the remapped mapping to a new address.
+	Move MRemapMoveMode
+
+	// NewAddr is the new address for the remapping. NewAddr is ignored unless
+	// Move is MMRemapMustMove.
+	NewAddr usermem.Addr
+}
+
+// MRemapMoveMode controls MRemap's moving behavior.
+type MRemapMoveMode int
+
+const (
+	// MRemapNoMove prevents MRemap from moving the remapped mapping.
+	MRemapNoMove MRemapMoveMode = iota
+
+	// MRemapMayMove allows MRemap to move the remapped mapping.
+	MRemapMayMove
+
+	// MRemapMustMove requires MRemap to move the remapped mapping to
+	// MRemapOpts.NewAddr, replacing any existing mappings in the remapped
+	// range.
+	MRemapMustMove
+)
+
+// MRemap implements the semantics of Linux's mremap(2).
+func (mm *MemoryManager) MRemap(ctx context.Context, oldAddr usermem.Addr, oldSize uint64, newSize uint64, opts MRemapOpts) (usermem.Addr, error) {
+	// "Note that old_address has to be page aligned." - mremap(2)
+	if oldAddr.RoundDown() != oldAddr {
+		return 0, syserror.EINVAL
+	}
+
+	// Linux treats an old_size that rounds up to 0 as 0, which is otherwise a
+	// valid size. However, new_size can't be 0 after rounding.
+	oldSizeAddr, _ := usermem.Addr(oldSize).RoundUp()
+	oldSize = uint64(oldSizeAddr)
+	newSizeAddr, ok := usermem.Addr(newSize).RoundUp()
+	if !ok || newSizeAddr == 0 {
+		return 0, syserror.EINVAL
+	}
+	newSize = uint64(newSizeAddr)
+
+	oldEnd, ok := oldAddr.AddLength(oldSize)
+	if !ok {
+		return 0, syserror.EINVAL
+	}
+
+	mm.mappingMu.Lock()
+	defer mm.mappingMu.Unlock()
+
+	// All cases require that a vma exists at oldAddr.
+	vseg := mm.vmas.FindSegment(oldAddr)
+	if !vseg.Ok() {
+		return 0, syserror.EFAULT
+	}
+
+	// Behavior matrix:
+	//
+	// Move     | oldSize = 0 | oldSize < newSize | oldSize = newSize | oldSize > newSize
+	// ---------+-------------+-------------------+-------------------+------------------
+	//   NoMove | ENOMEM [1]  | Grow in-place     | No-op             | Shrink in-place
+	//  MayMove | Copy [1]    | Grow in-place or  | No-op             | Shrink in-place
+	//          |             |   move            |                   |
+	// MustMove | Copy        | Move and grow     | Move              | Shrink and move
+	//
+	// [1] In-place growth is impossible because the vma at oldAddr already
+	// occupies at least part of the destination. Thus the NoMove case always
+	// fails and the MayMove case always falls back to copying.
+
+	if vma := vseg.ValuePtr(); newSize > oldSize && vma.mlockMode != memmap.MLockNone {
+		// Check against RLIMIT_MEMLOCK. Unlike mmap, mlock, and mlockall,
+		// mremap in Linux does not check mm/mlock.c:can_do_mlock() and
+		// therefore does not return EPERM if RLIMIT_MEMLOCK is 0 and
+		// !CAP_IPC_LOCK.
+		mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
+		if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
+			if newLockedAS := mm.lockedAS - oldSize + newSize; newLockedAS > mlockLimit {
+				return 0, syserror.EAGAIN
+			}
+		}
+	}
+
+	if opts.Move != MRemapMustMove {
+		// Handle no-ops and in-place shrinking. These cases don't care if
+		// [oldAddr, oldEnd) maps to a single vma, or is even mapped at all
+		// (aside from oldAddr).
+		if newSize <= oldSize {
+			if newSize < oldSize {
+				// If oldAddr+oldSize didn't overflow, oldAddr+newSize can't
+				// either.
+				newEnd := oldAddr + usermem.Addr(newSize)
+				mm.unmapLocked(ctx, usermem.AddrRange{newEnd, oldEnd})
+			}
+			return oldAddr, nil
+		}
+
+		// Handle in-place growing.
+
+		// Check that oldEnd maps to the same vma as oldAddr.
+		if vseg.End() < oldEnd {
+			return 0, syserror.EFAULT
+		}
+		// "Grow" the existing vma by creating a new mergeable one.
+		vma := vseg.ValuePtr()
+		var newOffset uint64
+		if vma.mappable != nil {
+			newOffset = vseg.mappableRange().End
+		}
+		vseg, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{
+			Length:          newSize - oldSize,
+			MappingIdentity: vma.id,
+			Mappable:        vma.mappable,
+			Offset:          newOffset,
+			Addr:            oldEnd,
+			Fixed:           true,
+			Perms:           vma.realPerms,
+			MaxPerms:        vma.maxPerms,
+			Private:         vma.private,
+			GrowsDown:       vma.growsDown,
+			MLockMode:       vma.mlockMode,
+			Hint:            vma.hint,
+		})
+		if err == nil {
+			if vma.mlockMode == memmap.MLockEager {
+				mm.populateVMA(ctx, vseg, ar, true)
+			}
+			return oldAddr, nil
+		}
+		// In-place growth failed. In the MRemapMayMove case, fall through to
+		// copying/moving below.
+		if opts.Move == MRemapNoMove {
+			return 0, err
+		}
+	}
+
+	// Find a location for the new mapping.
+	var newAR usermem.AddrRange
+	switch opts.Move {
+	case MRemapMayMove:
+		newAddr, err := mm.findAvailableLocked(newSize, findAvailableOpts{})
+		if err != nil {
+			return 0, err
+		}
+		newAR, _ = newAddr.ToRange(newSize)
+
+	case MRemapMustMove:
+		newAddr := opts.NewAddr
+		if newAddr.RoundDown() != newAddr {
+			return 0, syserror.EINVAL
+		}
+		var ok bool
+		newAR, ok = newAddr.ToRange(newSize)
+		if !ok {
+			return 0, syserror.EINVAL
+		}
+		if (usermem.AddrRange{oldAddr, oldEnd}).Overlaps(newAR) {
+			return 0, syserror.EINVAL
+		}
+
+		// Unmap any mappings at the destination.
+		mm.unmapLocked(ctx, newAR)
+
+		// If the sizes specify shrinking, unmap everything between the new and
+		// old sizes at the source. Unmapping before the following checks is
+		// correct: compare Linux's mm/mremap.c:mremap_to() => do_munmap(),
+		// vma_to_resize().
+		if newSize < oldSize {
+			oldNewEnd := oldAddr + usermem.Addr(newSize)
+			mm.unmapLocked(ctx, usermem.AddrRange{oldNewEnd, oldEnd})
+			oldEnd = oldNewEnd
+		}
+
+		// unmapLocked may have invalidated vseg; look it up again.
+		vseg = mm.vmas.FindSegment(oldAddr)
+	}
+
+	oldAR := usermem.AddrRange{oldAddr, oldEnd}
+
+	// Check that oldEnd maps to the same vma as oldAddr.
+	if vseg.End() < oldEnd {
+		return 0, syserror.EFAULT
+	}
+
+	// Check against RLIMIT_AS.
+	newUsageAS := mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length())
+	if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS {
+		return 0, syserror.ENOMEM
+	}
+
+	if vma := vseg.ValuePtr(); vma.mappable != nil {
+		// Check that offset+length does not overflow.
+		if vma.off+uint64(newAR.Length()) < vma.off {
+			return 0, syserror.EINVAL
+		}
+		// Inform the Mappable, if any, of the new mapping.
+		if err := vma.mappable.CopyMapping(ctx, mm, oldAR, newAR, vseg.mappableOffsetAt(oldAR.Start), vma.canWriteMappableLocked()); err != nil {
+			return 0, err
+		}
+	}
+
+	if oldSize == 0 {
+		// Handle copying.
+		//
+		// We can't use createVMALocked because it calls Mappable.AddMapping,
+		// whereas we've already called Mappable.CopyMapping (which is
+		// consistent with Linux). Call vseg.Value() (rather than
+		// vseg.ValuePtr()) to make a copy of the vma.
+		vma := vseg.Value()
+		if vma.mappable != nil {
+			vma.off = vseg.mappableOffsetAt(oldAR.Start)
+		}
+		if vma.id != nil {
+			vma.id.IncRef()
+		}
+		vseg := mm.vmas.Insert(mm.vmas.FindGap(newAR.Start), newAR, vma)
+		mm.usageAS += uint64(newAR.Length())
+		if vma.isPrivateDataLocked() {
+			mm.dataAS += uint64(newAR.Length())
+		}
+		if vma.mlockMode != memmap.MLockNone {
+			mm.lockedAS += uint64(newAR.Length())
+			if vma.mlockMode == memmap.MLockEager {
+				mm.populateVMA(ctx, vseg, newAR, true)
+			}
+		}
+		return newAR.Start, nil
+	}
+
+	// Handle moving.
+	//
+	// Remove the existing vma before inserting the new one to minimize
+	// iterator invalidation. We do this directly (instead of calling
+	// removeVMAsLocked) because:
+	//
+	// 1. We can't drop the reference on vma.id, which will be transferred to
+	// the new vma.
+	//
+	// 2. We can't call vma.mappable.RemoveMapping, because pmas are still at
+	// oldAR, so calling RemoveMapping could cause us to miss an invalidation
+	// overlapping oldAR.
+	//
+	// Call vseg.Value() (rather than vseg.ValuePtr()) to make a copy of the
+	// vma.
+	vseg = mm.vmas.Isolate(vseg, oldAR)
+	vma := vseg.Value()
+	mm.vmas.Remove(vseg)
+	vseg = mm.vmas.Insert(mm.vmas.FindGap(newAR.Start), newAR, vma)
+	mm.usageAS = mm.usageAS - uint64(oldAR.Length()) + uint64(newAR.Length())
+	if vma.isPrivateDataLocked() {
+		mm.dataAS = mm.dataAS - uint64(oldAR.Length()) + uint64(newAR.Length())
+	}
+	if vma.mlockMode != memmap.MLockNone {
+		mm.lockedAS = mm.lockedAS - uint64(oldAR.Length()) + uint64(newAR.Length())
+	}
+
+	// Move pmas. This is technically optional for non-private pmas, which
+	// could just go through memmap.Mappable.Translate again, but it's required
+	// for private pmas.
+	mm.activeMu.Lock()
+	mm.movePMAsLocked(oldAR, newAR)
+	mm.activeMu.Unlock()
+
+	// Now that pmas have been moved to newAR, we can notify vma.mappable that
+	// oldAR is no longer mapped.
+	if vma.mappable != nil {
+		vma.mappable.RemoveMapping(ctx, mm, oldAR, vma.off, vma.canWriteMappableLocked())
+	}
+
+	if vma.mlockMode == memmap.MLockEager {
+		mm.populateVMA(ctx, vseg, newAR, true)
+	}
+
+	return newAR.Start, nil
+}
+
+// MProtect implements the semantics of Linux's mprotect(2).
+func (mm *MemoryManager) MProtect(addr usermem.Addr, length uint64, realPerms usermem.AccessType, growsDown bool) error {
+	if addr.RoundDown() != addr {
+		return syserror.EINVAL
+	}
+	if length == 0 {
+		return nil
+	}
+	rlength, ok := usermem.Addr(length).RoundUp()
+	if !ok {
+		return syserror.ENOMEM
+	}
+	ar, ok := addr.ToRange(uint64(rlength))
+	if !ok {
+		return syserror.ENOMEM
+	}
+	effectivePerms := realPerms.Effective()
+
+	mm.mappingMu.Lock()
+	defer mm.mappingMu.Unlock()
+	// Non-growsDown mprotect requires that all of ar is mapped, and stops at
+	// the first non-empty gap. growsDown mprotect requires that the first vma
+	// be growsDown, but does not require it to extend all the way to ar.Start;
+	// vmas after the first must be contiguous but need not be growsDown, like
+	// the non-growsDown case.
+	vseg := mm.vmas.LowerBoundSegment(ar.Start)
+	if !vseg.Ok() {
+		return syserror.ENOMEM
+	}
+	if growsDown {
+		if !vseg.ValuePtr().growsDown {
+			return syserror.EINVAL
+		}
+		if ar.End <= vseg.Start() {
+			return syserror.ENOMEM
+		}
+		ar.Start = vseg.Start()
+	} else {
+		if ar.Start < vseg.Start() {
+			return syserror.ENOMEM
+		}
+	}
+
+	mm.activeMu.Lock()
+	defer mm.activeMu.Unlock()
+	defer func() {
+		mm.vmas.MergeRange(ar)
+		mm.vmas.MergeAdjacent(ar)
+		mm.pmas.MergeRange(ar)
+		mm.pmas.MergeAdjacent(ar)
+	}()
+	pseg := mm.pmas.LowerBoundSegment(ar.Start)
+	var didUnmapAS bool
+	for {
+		// Check for permission validity before splitting vmas, for consistency
+		// with Linux.
+		if !vseg.ValuePtr().maxPerms.SupersetOf(effectivePerms) {
+			return syserror.EACCES
+		}
+		vseg = mm.vmas.Isolate(vseg, ar)
+
+		// Update vma permissions.
+		vma := vseg.ValuePtr()
+		vmaLength := vseg.Range().Length()
+		if vma.isPrivateDataLocked() {
+			mm.dataAS -= uint64(vmaLength)
+		}
+
+		vma.realPerms = realPerms
+		vma.effectivePerms = effectivePerms
+		if vma.isPrivateDataLocked() {
+			mm.dataAS += uint64(vmaLength)
+		}
+
+		// Propagate vma permission changes to pmas.
+		for pseg.Ok() && pseg.Start() < vseg.End() {
+			if pseg.Range().Overlaps(vseg.Range()) {
+				pseg = mm.pmas.Isolate(pseg, vseg.Range())
+				pma := pseg.ValuePtr()
+				if !effectivePerms.SupersetOf(pma.effectivePerms) && !didUnmapAS {
+					// Unmap all of ar, not just vseg.Range(), to minimize host
+					// syscalls.
+					mm.unmapASLocked(ar)
+					didUnmapAS = true
+				}
+				pma.effectivePerms = effectivePerms.Intersect(pma.translatePerms)
+				if pma.needCOW {
+					pma.effectivePerms.Write = false
+				}
+			}
+			pseg = pseg.NextSegment()
+		}
+
+		// Continue to the next vma.
+		if ar.End <= vseg.End() {
+			return nil
+		}
+		vseg, _ = vseg.NextNonEmpty()
+		if !vseg.Ok() {
+			return syserror.ENOMEM
+		}
+	}
+}
+
+// BrkSetup sets mm's brk address to addr and its brk size to 0.
+func (mm *MemoryManager) BrkSetup(ctx context.Context, addr usermem.Addr) {
+	mm.mappingMu.Lock()
+	defer mm.mappingMu.Unlock()
+	// Unmap the existing brk.
+	if mm.brk.Length() != 0 {
+		mm.unmapLocked(ctx, mm.brk)
+	}
+	mm.brk = usermem.AddrRange{addr, addr}
+}
+
+// Brk implements the semantics of Linux's brk(2), except that it returns an
+// error on failure.
+func (mm *MemoryManager) Brk(ctx context.Context, addr usermem.Addr) (usermem.Addr, error) {
+	mm.mappingMu.Lock()
+	// Can't defer mm.mappingMu.Unlock(); see below.
+
+	if addr < mm.brk.Start {
+		addr = mm.brk.End
+		mm.mappingMu.Unlock()
+		return addr, syserror.EINVAL
+	}
+
+	// TODO(gvisor.dev/issue/156): This enforces RLIMIT_DATA, but is
+	// slightly more permissive than the usual data limit. In particular,
+	// this only limits the size of the heap; a true RLIMIT_DATA limits the
+	// size of heap + data + bss. The segment sizes need to be plumbed from
+	// the loader package to fully enforce RLIMIT_DATA.
+	if uint64(addr-mm.brk.Start) > limits.FromContext(ctx).Get(limits.Data).Cur {
+		addr = mm.brk.End
+		mm.mappingMu.Unlock()
+		return addr, syserror.ENOMEM
+	}
+
+	oldbrkpg, _ := mm.brk.End.RoundUp()
+	newbrkpg, ok := addr.RoundUp()
+	if !ok {
+		addr = mm.brk.End
+		mm.mappingMu.Unlock()
+		return addr, syserror.EFAULT
+	}
+
+	switch {
+	case oldbrkpg < newbrkpg:
+		vseg, ar, err := mm.createVMALocked(ctx, memmap.MMapOpts{
+			Length: uint64(newbrkpg - oldbrkpg),
+			Addr:   oldbrkpg,
+			Fixed:  true,
+			// Compare Linux's
+			// arch/x86/include/asm/page_types.h:VM_DATA_DEFAULT_FLAGS.
+			Perms:    usermem.ReadWrite,
+			MaxPerms: usermem.AnyAccess,
+			Private:  true,
+			// Linux: mm/mmap.c:sys_brk() => do_brk_flags() includes
+			// mm->def_flags.
+			MLockMode: mm.defMLockMode,
+			Hint:      "[heap]",
+		})
+		if err != nil {
+			addr = mm.brk.End
+			mm.mappingMu.Unlock()
+			return addr, err
+		}
+		mm.brk.End = addr
+		if mm.defMLockMode == memmap.MLockEager {
+			mm.populateVMAAndUnlock(ctx, vseg, ar, true)
+		} else {
+			mm.mappingMu.Unlock()
+		}
+
+	case newbrkpg < oldbrkpg:
+		mm.unmapLocked(ctx, usermem.AddrRange{newbrkpg, oldbrkpg})
+		fallthrough
+
+	default:
+		mm.brk.End = addr
+		mm.mappingMu.Unlock()
+	}
+
+	return addr, nil
+}
+
+// MLock implements the semantics of Linux's mlock()/mlock2()/munlock(),
+// depending on mode.
+func (mm *MemoryManager) MLock(ctx context.Context, addr usermem.Addr, length uint64, mode memmap.MLockMode) error {
+	// Linux allows this to overflow.
+	la, _ := usermem.Addr(length + addr.PageOffset()).RoundUp()
+	ar, ok := addr.RoundDown().ToRange(uint64(la))
+	if !ok {
+		return syserror.EINVAL
+	}
+
+	mm.mappingMu.Lock()
+	// Can't defer mm.mappingMu.Unlock(); see below.
+
+	if mode != memmap.MLockNone {
+		// Check against RLIMIT_MEMLOCK.
+		if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
+			mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
+			if mlockLimit == 0 {
+				mm.mappingMu.Unlock()
+				return syserror.EPERM
+			}
+			if newLockedAS := mm.lockedAS + uint64(ar.Length()) - mm.mlockedBytesRangeLocked(ar); newLockedAS > mlockLimit {
+				mm.mappingMu.Unlock()
+				return syserror.ENOMEM
+			}
+		}
+	}
+
+	// Check this after RLIMIT_MEMLOCK for consistency with Linux.
+	if ar.Length() == 0 {
+		mm.mappingMu.Unlock()
+		return nil
+	}
+
+	// Apply the new mlock mode to vmas.
+	var unmapped bool
+	vseg := mm.vmas.FindSegment(ar.Start)
+	for {
+		if !vseg.Ok() {
+			unmapped = true
+			break
+		}
+		vseg = mm.vmas.Isolate(vseg, ar)
+		vma := vseg.ValuePtr()
+		prevMode := vma.mlockMode
+		vma.mlockMode = mode
+		if mode != memmap.MLockNone && prevMode == memmap.MLockNone {
+			mm.lockedAS += uint64(vseg.Range().Length())
+		} else if mode == memmap.MLockNone && prevMode != memmap.MLockNone {
+			mm.lockedAS -= uint64(vseg.Range().Length())
+		}
+		if ar.End <= vseg.End() {
+			break
+		}
+		vseg, _ = vseg.NextNonEmpty()
+	}
+	mm.vmas.MergeRange(ar)
+	mm.vmas.MergeAdjacent(ar)
+	if unmapped {
+		mm.mappingMu.Unlock()
+		return syserror.ENOMEM
+	}
+
+	if mode == memmap.MLockEager {
+		// Ensure that we have usable pmas. Since we didn't return ENOMEM
+		// above, ar must be fully covered by vmas, so we can just use
+		// NextSegment below.
+		mm.activeMu.Lock()
+		mm.mappingMu.DowngradeLock()
+		for vseg := mm.vmas.FindSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
+			if !vseg.ValuePtr().effectivePerms.Any() {
+				// Linux: mm/gup.c:__get_user_pages() returns EFAULT in this
+				// case, which is converted to ENOMEM by mlock.
+				mm.activeMu.Unlock()
+				mm.mappingMu.RUnlock()
+				return syserror.ENOMEM
+			}
+			_, _, err := mm.getPMAsLocked(ctx, vseg, vseg.Range().Intersect(ar), usermem.NoAccess)
+			if err != nil {
+				mm.activeMu.Unlock()
+				mm.mappingMu.RUnlock()
+				// Linux: mm/mlock.c:__mlock_posix_error_return()
+				if err == syserror.EFAULT {
+					return syserror.ENOMEM
+				}
+				if err == syserror.ENOMEM {
+					return syserror.EAGAIN
+				}
+				return err
+			}
+		}
+
+		// Map pmas into the active AddressSpace, if we have one.
+		mm.mappingMu.RUnlock()
+		if mm.as != nil {
+			mm.activeMu.DowngradeLock()
+			err := mm.mapASLocked(mm.pmas.LowerBoundSegment(ar.Start), ar, true /* precommit */)
+			mm.activeMu.RUnlock()
+			if err != nil {
+				return err
+			}
+		} else {
+			mm.activeMu.Unlock()
+		}
+	} else {
+		mm.mappingMu.Unlock()
+	}
+
+	return nil
+}
+
+// MLockAllOpts holds options to MLockAll.
+type MLockAllOpts struct {
+	// If Current is true, change the memory-locking behavior of all mappings
+	// to Mode. If Future is true, upgrade the memory-locking behavior of all
+	// future mappings to Mode. At least one of Current or Future must be true.
+	Current bool
+	Future  bool
+	Mode    memmap.MLockMode
+}
+
+// MLockAll implements the semantics of Linux's mlockall()/munlockall(),
+// depending on opts.
+func (mm *MemoryManager) MLockAll(ctx context.Context, opts MLockAllOpts) error {
+	if !opts.Current && !opts.Future {
+		return syserror.EINVAL
+	}
+
+	mm.mappingMu.Lock()
+	// Can't defer mm.mappingMu.Unlock(); see below.
+
+	if opts.Current {
+		if opts.Mode != memmap.MLockNone {
+			// Check against RLIMIT_MEMLOCK.
+			if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
+				mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
+				if mlockLimit == 0 {
+					mm.mappingMu.Unlock()
+					return syserror.EPERM
+				}
+				if uint64(mm.vmas.Span()) > mlockLimit {
+					mm.mappingMu.Unlock()
+					return syserror.ENOMEM
+				}
+			}
+		}
+		for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() {
+			vma := vseg.ValuePtr()
+			prevMode := vma.mlockMode
+			vma.mlockMode = opts.Mode
+			if opts.Mode != memmap.MLockNone && prevMode == memmap.MLockNone {
+				mm.lockedAS += uint64(vseg.Range().Length())
+			} else if opts.Mode == memmap.MLockNone && prevMode != memmap.MLockNone {
+				mm.lockedAS -= uint64(vseg.Range().Length())
+			}
+		}
+	}
+
+	if opts.Future {
+		mm.defMLockMode = opts.Mode
+	}
+
+	if opts.Current && opts.Mode == memmap.MLockEager {
+		// Linux: mm/mlock.c:sys_mlockall() => include/linux/mm.h:mm_populate()
+		// ignores the return value of __mm_populate(), so all errors below are
+		// ignored.
+		//
+		// Try to get usable pmas.
+		mm.activeMu.Lock()
+		mm.mappingMu.DowngradeLock()
+		for vseg := mm.vmas.FirstSegment(); vseg.Ok(); vseg = vseg.NextSegment() {
+			if vseg.ValuePtr().effectivePerms.Any() {
+				mm.getPMAsLocked(ctx, vseg, vseg.Range(), usermem.NoAccess)
+			}
+		}
+
+		// Map all pmas into the active AddressSpace, if we have one.
+		mm.mappingMu.RUnlock()
+		if mm.as != nil {
+			mm.activeMu.DowngradeLock()
+			mm.mapASLocked(mm.pmas.FirstSegment(), mm.applicationAddrRange(), true /* precommit */)
+			mm.activeMu.RUnlock()
+		} else {
+			mm.activeMu.Unlock()
+		}
+	} else {
+		mm.mappingMu.Unlock()
+	}
+	return nil
+}
+
+// Decommit implements the semantics of Linux's madvise(MADV_DONTNEED).
+func (mm *MemoryManager) Decommit(addr usermem.Addr, length uint64) error {
+	ar, ok := addr.ToRange(length)
+	if !ok {
+		return syserror.EINVAL
+	}
+
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	mm.activeMu.Lock()
+	defer mm.activeMu.Unlock()
+
+	// Linux's mm/madvise.c:madvise_dontneed() => mm/memory.c:zap_page_range()
+	// is analogous to our mm.invalidateLocked(ar, true, true). We inline this
+	// here, with the special case that we synchronously decommit
+	// uniquely-owned (non-copy-on-write) pages for private anonymous vma,
+	// which is the common case for MADV_DONTNEED. Invalidating these pmas, and
+	// allowing them to be reallocated when touched again, increases pma
+	// fragmentation, which may significantly reduce performance for
+	// non-vectored I/O implementations. Also, decommitting synchronously
+	// ensures that Decommit immediately reduces host memory usage.
+	var didUnmapAS bool
+	pseg := mm.pmas.LowerBoundSegment(ar.Start)
+	mf := mm.mfp.MemoryFile()
+	for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
+		vma := vseg.ValuePtr()
+		if vma.mlockMode != memmap.MLockNone {
+			return syserror.EINVAL
+		}
+		vsegAR := vseg.Range().Intersect(ar)
+		// pseg should already correspond to either this vma or a later one,
+		// since there can't be a pma without a corresponding vma.
+		if checkInvariants {
+			if pseg.Ok() && pseg.End() <= vsegAR.Start {
+				panic(fmt.Sprintf("pma %v precedes vma %v", pseg.Range(), vsegAR))
+			}
+		}
+		for pseg.Ok() && pseg.Start() < vsegAR.End {
+			pma := pseg.ValuePtr()
+			if pma.private && !mm.isPMACopyOnWriteLocked(vseg, pseg) {
+				psegAR := pseg.Range().Intersect(ar)
+				if vsegAR.IsSupersetOf(psegAR) && vma.mappable == nil {
+					if err := mf.Decommit(pseg.fileRangeOf(psegAR)); err == nil {
+						pseg = pseg.NextSegment()
+						continue
+					}
+					// If an error occurs, fall through to the general
+					// invalidation case below.
+				}
+			}
+			pseg = mm.pmas.Isolate(pseg, vsegAR)
+			pma = pseg.ValuePtr()
+			if !didUnmapAS {
+				// Unmap all of ar, not just pseg.Range(), to minimize host
+				// syscalls. AddressSpace mappings must be removed before
+				// mm.decPrivateRef().
+				mm.unmapASLocked(ar)
+				didUnmapAS = true
+			}
+			if pma.private {
+				mm.decPrivateRef(pseg.fileRange())
+			}
+			pma.file.DecRef(pseg.fileRange())
+			mm.removeRSSLocked(pseg.Range())
+			pseg = mm.pmas.Remove(pseg).NextSegment()
+		}
+	}
+
+	// "If there are some parts of the specified address space that are not
+	// mapped, the Linux version of madvise() ignores them and applies the call
+	// to the rest (but returns ENOMEM from the system call, as it should)." -
+	// madvise(2)
+	if mm.vmas.SpanRange(ar) != ar.Length() {
+		return syserror.ENOMEM
+	}
+	return nil
+}
+
+// MSyncOpts holds options to MSync.
+type MSyncOpts struct {
+	// Sync has the semantics of MS_SYNC.
+	Sync bool
+
+	// Invalidate has the semantics of MS_INVALIDATE.
+	Invalidate bool
+}
+
+// MSync implements the semantics of Linux's msync().
+func (mm *MemoryManager) MSync(ctx context.Context, addr usermem.Addr, length uint64, opts MSyncOpts) error {
+	if addr != addr.RoundDown() {
+		return syserror.EINVAL
+	}
+	if length == 0 {
+		return nil
+	}
+	la, ok := usermem.Addr(length).RoundUp()
+	if !ok {
+		return syserror.ENOMEM
+	}
+	ar, ok := addr.ToRange(uint64(la))
+	if !ok {
+		return syserror.ENOMEM
+	}
+
+	mm.mappingMu.RLock()
+	// Can't defer mm.mappingMu.RUnlock(); see below.
+	vseg := mm.vmas.LowerBoundSegment(ar.Start)
+	if !vseg.Ok() {
+		mm.mappingMu.RUnlock()
+		return syserror.ENOMEM
+	}
+	var unmapped bool
+	lastEnd := ar.Start
+	for {
+		if !vseg.Ok() {
+			mm.mappingMu.RUnlock()
+			unmapped = true
+			break
+		}
+		if lastEnd < vseg.Start() {
+			unmapped = true
+		}
+		lastEnd = vseg.End()
+		vma := vseg.ValuePtr()
+		if opts.Invalidate && vma.mlockMode != memmap.MLockNone {
+			mm.mappingMu.RUnlock()
+			return syserror.EBUSY
+		}
+		// It's only possible to have dirtied the Mappable through a shared
+		// mapping. Don't check if the mapping is writable, because mprotect
+		// may have changed this, and also because Linux doesn't.
+		if id := vma.id; opts.Sync && id != nil && vma.mappable != nil && !vma.private {
+			// We can't call memmap.MappingIdentity.Msync while holding
+			// mm.mappingMu since it may take fs locks that precede it in the
+			// lock order.
+			id.IncRef()
+			mr := vseg.mappableRangeOf(vseg.Range().Intersect(ar))
+			mm.mappingMu.RUnlock()
+			err := id.Msync(ctx, mr)
+			id.DecRef()
+			if err != nil {
+				return err
+			}
+			if lastEnd >= ar.End {
+				break
+			}
+			mm.mappingMu.RLock()
+			vseg = mm.vmas.LowerBoundSegment(lastEnd)
+		} else {
+			if lastEnd >= ar.End {
+				mm.mappingMu.RUnlock()
+				break
+			}
+			vseg = vseg.NextSegment()
+		}
+	}
+
+	if unmapped {
+		return syserror.ENOMEM
+	}
+	return nil
+}
+
+// GetSharedFutexKey is used by kernel.Task.GetSharedKey.
+func (mm *MemoryManager) GetSharedFutexKey(ctx context.Context, addr usermem.Addr) (futex.Key, error) {
+	ar, ok := addr.ToRange(4) // sizeof(int32).
+	if !ok {
+		return futex.Key{}, syserror.EFAULT
+	}
+
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	vseg, _, err := mm.getVMAsLocked(ctx, ar, usermem.Read, false)
+	if err != nil {
+		return futex.Key{}, err
+	}
+	vma := vseg.ValuePtr()
+
+	if vma.private {
+		return futex.Key{
+			Kind:   futex.KindSharedPrivate,
+			Offset: uint64(addr),
+		}, nil
+	}
+
+	if vma.id != nil {
+		vma.id.IncRef()
+	}
+	return futex.Key{
+		Kind:            futex.KindSharedMappable,
+		Mappable:        vma.mappable,
+		MappingIdentity: vma.id,
+		Offset:          vseg.mappableOffsetAt(addr),
+	}, nil
+}
+
+// VirtualMemorySize returns the combined length in bytes of all mappings in
+// mm.
+func (mm *MemoryManager) VirtualMemorySize() uint64 {
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	return mm.usageAS
+}
+
+// VirtualMemorySizeRange returns the combined length in bytes of all mappings
+// in ar in mm.
+func (mm *MemoryManager) VirtualMemorySizeRange(ar usermem.AddrRange) uint64 {
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	return uint64(mm.vmas.SpanRange(ar))
+}
+
+// ResidentSetSize returns the value advertised as mm's RSS in bytes.
+func (mm *MemoryManager) ResidentSetSize() uint64 {
+	mm.activeMu.RLock()
+	defer mm.activeMu.RUnlock()
+	return mm.curRSS
+}
+
+// MaxResidentSetSize returns the value advertised as mm's max RSS in bytes.
+func (mm *MemoryManager) MaxResidentSetSize() uint64 {
+	mm.activeMu.RLock()
+	defer mm.activeMu.RUnlock()
+	return mm.maxRSS
+}
+
+// VirtualDataSize returns the size of private data segments in mm.
+func (mm *MemoryManager) VirtualDataSize() uint64 {
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	return mm.dataAS
+}
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go
new file mode 100644
index 000000000..02203f79f
--- /dev/null
+++ b/pkg/sentry/mm/vma.go
@@ -0,0 +1,564 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mm
+
+import (
+	"fmt"
+
+	"gvisor.googlesource.com/gvisor/pkg/abi/linux"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/limits"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// Preconditions: mm.mappingMu must be locked for writing. opts must be valid
+// as defined by the checks in MMap.
+func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOpts) (vmaIterator, usermem.AddrRange, error) {
+	if opts.MaxPerms != opts.MaxPerms.Effective() {
+		panic(fmt.Sprintf("Non-effective MaxPerms %s cannot be enforced", opts.MaxPerms))
+	}
+
+	// Find a useable range.
+	addr, err := mm.findAvailableLocked(opts.Length, findAvailableOpts{
+		Addr:     opts.Addr,
+		Fixed:    opts.Fixed,
+		Unmap:    opts.Unmap,
+		Map32Bit: opts.Map32Bit,
+	})
+	if err != nil {
+		return vmaIterator{}, usermem.AddrRange{}, err
+	}
+	ar, _ := addr.ToRange(opts.Length)
+
+	// Check against RLIMIT_AS.
+	newUsageAS := mm.usageAS + opts.Length
+	if opts.Unmap {
+		newUsageAS -= uint64(mm.vmas.SpanRange(ar))
+	}
+	if limitAS := limits.FromContext(ctx).Get(limits.AS).Cur; newUsageAS > limitAS {
+		return vmaIterator{}, usermem.AddrRange{}, syserror.ENOMEM
+	}
+
+	if opts.MLockMode != memmap.MLockNone {
+		// Check against RLIMIT_MEMLOCK.
+		if creds := auth.CredentialsFromContext(ctx); !creds.HasCapabilityIn(linux.CAP_IPC_LOCK, creds.UserNamespace.Root()) {
+			mlockLimit := limits.FromContext(ctx).Get(limits.MemoryLocked).Cur
+			if mlockLimit == 0 {
+				return vmaIterator{}, usermem.AddrRange{}, syserror.EPERM
+			}
+			newLockedAS := mm.lockedAS + opts.Length
+			if opts.Unmap {
+				newLockedAS -= mm.mlockedBytesRangeLocked(ar)
+			}
+			if newLockedAS > mlockLimit {
+				return vmaIterator{}, usermem.AddrRange{}, syserror.EAGAIN
+			}
+		}
+	}
+
+	// Remove overwritten mappings. This ordering is consistent with Linux:
+	// compare Linux's mm/mmap.c:mmap_region() => do_munmap(),
+	// file->f_op->mmap().
+	var vgap vmaGapIterator
+	if opts.Unmap {
+		vgap = mm.unmapLocked(ctx, ar)
+	} else {
+		vgap = mm.vmas.FindGap(ar.Start)
+	}
+
+	// Inform the Mappable, if any, of the new mapping.
+	if opts.Mappable != nil {
+		// The expression for writable is vma.canWriteMappableLocked(), but we
+		// don't yet have a vma.
+		if err := opts.Mappable.AddMapping(ctx, mm, ar, opts.Offset, !opts.Private && opts.MaxPerms.Write); err != nil {
+			return vmaIterator{}, usermem.AddrRange{}, err
+		}
+	}
+
+	// Take a reference on opts.MappingIdentity before inserting the vma since
+	// vma merging can drop the reference.
+	if opts.MappingIdentity != nil {
+		opts.MappingIdentity.IncRef()
+	}
+
+	// Finally insert the vma.
+	v := vma{
+		mappable:       opts.Mappable,
+		off:            opts.Offset,
+		realPerms:      opts.Perms,
+		effectivePerms: opts.Perms.Effective(),
+		maxPerms:       opts.MaxPerms,
+		private:        opts.Private,
+		growsDown:      opts.GrowsDown,
+		mlockMode:      opts.MLockMode,
+		id:             opts.MappingIdentity,
+		hint:           opts.Hint,
+	}
+
+	vseg := mm.vmas.Insert(vgap, ar, v)
+	mm.usageAS += opts.Length
+	if v.isPrivateDataLocked() {
+		mm.dataAS += opts.Length
+	}
+	if opts.MLockMode != memmap.MLockNone {
+		mm.lockedAS += opts.Length
+	}
+
+	return vseg, ar, nil
+}
+
+type findAvailableOpts struct {
+	// These fields are equivalent to those in memmap.MMapOpts, except that:
+	//
+	// - Addr must be page-aligned.
+	//
+	// - Unmap allows existing guard pages in the returned range.
+
+	Addr     usermem.Addr
+	Fixed    bool
+	Unmap    bool
+	Map32Bit bool
+}
+
+// map32Start/End are the bounds to which MAP_32BIT mappings are constrained,
+// and are equivalent to Linux's MAP32_BASE and MAP32_MAX respectively.
+const (
+	map32Start = 0x40000000
+	map32End   = 0x80000000
+)
+
+// findAvailableLocked finds an allocatable range.
+//
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) findAvailableLocked(length uint64, opts findAvailableOpts) (usermem.Addr, error) {
+	if opts.Fixed {
+		opts.Map32Bit = false
+	}
+	allowedAR := mm.applicationAddrRange()
+	if opts.Map32Bit {
+		allowedAR = allowedAR.Intersect(usermem.AddrRange{map32Start, map32End})
+	}
+
+	// Does the provided suggestion work?
+	if ar, ok := opts.Addr.ToRange(length); ok {
+		if allowedAR.IsSupersetOf(ar) {
+			if opts.Unmap {
+				return ar.Start, nil
+			}
+			// Check for the presence of an existing vma or guard page.
+			if vgap := mm.vmas.FindGap(ar.Start); vgap.Ok() && vgap.availableRange().IsSupersetOf(ar) {
+				return ar.Start, nil
+			}
+		}
+	}
+
+	// Fixed mappings accept only the requested address.
+	if opts.Fixed {
+		return 0, syserror.ENOMEM
+	}
+
+	// Prefer hugepage alignment if a hugepage or more is requested.
+	alignment := uint64(usermem.PageSize)
+	if length >= usermem.HugePageSize {
+		alignment = usermem.HugePageSize
+	}
+
+	if opts.Map32Bit {
+		return mm.findLowestAvailableLocked(length, alignment, allowedAR)
+	}
+	if mm.layout.DefaultDirection == arch.MmapBottomUp {
+		return mm.findLowestAvailableLocked(length, alignment, usermem.AddrRange{mm.layout.BottomUpBase, mm.layout.MaxAddr})
+	}
+	return mm.findHighestAvailableLocked(length, alignment, usermem.AddrRange{mm.layout.MinAddr, mm.layout.TopDownBase})
+}
+
+func (mm *MemoryManager) applicationAddrRange() usermem.AddrRange {
+	return usermem.AddrRange{mm.layout.MinAddr, mm.layout.MaxAddr}
+}
+
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) findLowestAvailableLocked(length, alignment uint64, bounds usermem.AddrRange) (usermem.Addr, error) {
+	for gap := mm.vmas.LowerBoundGap(bounds.Start); gap.Ok() && gap.Start() < bounds.End; gap = gap.NextGap() {
+		if gr := gap.availableRange().Intersect(bounds); uint64(gr.Length()) >= length {
+			// Can we shift up to match the alignment?
+			if offset := uint64(gr.Start) % alignment; offset != 0 {
+				if uint64(gr.Length()) >= length+alignment-offset {
+					// Yes, we're aligned.
+					return gr.Start + usermem.Addr(alignment-offset), nil
+				}
+			}
+
+			// Either aligned perfectly, or can't align it.
+			return gr.Start, nil
+		}
+	}
+	return 0, syserror.ENOMEM
+}
+
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) findHighestAvailableLocked(length, alignment uint64, bounds usermem.AddrRange) (usermem.Addr, error) {
+	for gap := mm.vmas.UpperBoundGap(bounds.End); gap.Ok() && gap.End() > bounds.Start; gap = gap.PrevGap() {
+		if gr := gap.availableRange().Intersect(bounds); uint64(gr.Length()) >= length {
+			// Can we shift down to match the alignment?
+			start := gr.End - usermem.Addr(length)
+			if offset := uint64(start) % alignment; offset != 0 {
+				if gr.Start <= start-usermem.Addr(offset) {
+					// Yes, we're aligned.
+					return start - usermem.Addr(offset), nil
+				}
+			}
+
+			// Either aligned perfectly, or can't align it.
+			return start, nil
+		}
+	}
+	return 0, syserror.ENOMEM
+}
+
+// Preconditions: mm.mappingMu must be locked.
+func (mm *MemoryManager) mlockedBytesRangeLocked(ar usermem.AddrRange) uint64 {
+	var total uint64
+	for vseg := mm.vmas.LowerBoundSegment(ar.Start); vseg.Ok() && vseg.Start() < ar.End; vseg = vseg.NextSegment() {
+		if vseg.ValuePtr().mlockMode != memmap.MLockNone {
+			total += uint64(vseg.Range().Intersect(ar).Length())
+		}
+	}
+	return total
+}
+
+// getVMAsLocked ensures that vmas exist for all addresses in ar, and support
+// access of type (at, ignorePermissions). It returns:
+//
+// - An iterator to the vma containing ar.Start. If no vma contains ar.Start,
+// the iterator is unspecified.
+//
+// - An iterator to the gap after the last vma containing an address in ar. If
+// vmas exist for no addresses in ar, the iterator is to a gap that begins
+// before ar.Start.
+//
+// - An error that is non-nil if vmas exist for only a subset of ar.
+//
+// Preconditions: mm.mappingMu must be locked for reading; it may be
+// temporarily unlocked. ar.Length() != 0.
+func (mm *MemoryManager) getVMAsLocked(ctx context.Context, ar usermem.AddrRange, at usermem.AccessType, ignorePermissions bool) (vmaIterator, vmaGapIterator, error) {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+	}
+
+	// Inline mm.vmas.LowerBoundSegment so that we have the preceding gap if
+	// !vbegin.Ok().
+	vbegin, vgap := mm.vmas.Find(ar.Start)
+	if !vbegin.Ok() {
+		vbegin = vgap.NextSegment()
+		// vseg.Ok() is checked before entering the following loop.
+	} else {
+		vgap = vbegin.PrevGap()
+	}
+
+	addr := ar.Start
+	vseg := vbegin
+	for vseg.Ok() {
+		// Loop invariants: vgap = vseg.PrevGap(); addr < vseg.End().
+		vma := vseg.ValuePtr()
+		if addr < vseg.Start() {
+			// TODO(jamieliu): Implement vma.growsDown here.
+			return vbegin, vgap, syserror.EFAULT
+		}
+
+		perms := vma.effectivePerms
+		if ignorePermissions {
+			perms = vma.maxPerms
+		}
+		if !perms.SupersetOf(at) {
+			return vbegin, vgap, syserror.EPERM
+		}
+
+		addr = vseg.End()
+		vgap = vseg.NextGap()
+		if addr >= ar.End {
+			return vbegin, vgap, nil
+		}
+		vseg = vgap.NextSegment()
+	}
+
+	// Ran out of vmas before ar.End.
+	return vbegin, vgap, syserror.EFAULT
+}
+
+// getVecVMAsLocked ensures that vmas exist for all addresses in ars, and
+// support access to type of (at, ignorePermissions). It returns the subset of
+// ars for which vmas exist. If this is not equal to ars, it returns a non-nil
+// error explaining why.
+//
+// Preconditions: mm.mappingMu must be locked for reading; it may be
+// temporarily unlocked.
+//
+// Postconditions: ars is not mutated.
+func (mm *MemoryManager) getVecVMAsLocked(ctx context.Context, ars usermem.AddrRangeSeq, at usermem.AccessType, ignorePermissions bool) (usermem.AddrRangeSeq, error) {
+	for arsit := ars; !arsit.IsEmpty(); arsit = arsit.Tail() {
+		ar := arsit.Head()
+		if ar.Length() == 0 {
+			continue
+		}
+		if _, vend, err := mm.getVMAsLocked(ctx, ar, at, ignorePermissions); err != nil {
+			return truncatedAddrRangeSeq(ars, arsit, vend.Start()), err
+		}
+	}
+	return ars, nil
+}
+
+// vma extension will not shrink the number of unmapped bytes between the start
+// of a growsDown vma and the end of its predecessor non-growsDown vma below
+// guardBytes.
+//
+// guardBytes is equivalent to Linux's stack_guard_gap after upstream
+// 1be7107fbe18 "mm: larger stack guard gap, between vmas".
+const guardBytes = 256 * usermem.PageSize
+
+// unmapLocked unmaps all addresses in ar and returns the resulting gap in
+// mm.vmas.
+//
+// Preconditions: mm.mappingMu must be locked for writing. ar.Length() != 0.
+// ar must be page-aligned.
+func (mm *MemoryManager) unmapLocked(ctx context.Context, ar usermem.AddrRange) vmaGapIterator {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+	}
+
+	// AddressSpace mappings and pmas must be invalidated before
+	// mm.removeVMAsLocked() => memmap.Mappable.RemoveMapping().
+	mm.Invalidate(ar, memmap.InvalidateOpts{InvalidatePrivate: true})
+	return mm.removeVMAsLocked(ctx, ar)
+}
+
+// removeVMAsLocked removes vmas for addresses in ar and returns the resulting
+// gap in mm.vmas. It does not remove pmas or AddressSpace mappings; clients
+// must do so before calling removeVMAsLocked.
+//
+// Preconditions: mm.mappingMu must be locked for writing. ar.Length() != 0. ar
+// must be page-aligned.
+func (mm *MemoryManager) removeVMAsLocked(ctx context.Context, ar usermem.AddrRange) vmaGapIterator {
+	if checkInvariants {
+		if !ar.WellFormed() || ar.Length() <= 0 || !ar.IsPageAligned() {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+	}
+
+	vseg, vgap := mm.vmas.Find(ar.Start)
+	if vgap.Ok() {
+		vseg = vgap.NextSegment()
+	}
+	for vseg.Ok() && vseg.Start() < ar.End {
+		vseg = mm.vmas.Isolate(vseg, ar)
+		vmaAR := vseg.Range()
+		vma := vseg.ValuePtr()
+		if vma.mappable != nil {
+			vma.mappable.RemoveMapping(ctx, mm, vmaAR, vma.off, vma.canWriteMappableLocked())
+		}
+		if vma.id != nil {
+			vma.id.DecRef()
+		}
+		mm.usageAS -= uint64(vmaAR.Length())
+		if vma.isPrivateDataLocked() {
+			mm.dataAS -= uint64(vmaAR.Length())
+		}
+		if vma.mlockMode != memmap.MLockNone {
+			mm.lockedAS -= uint64(vmaAR.Length())
+		}
+		vgap = mm.vmas.Remove(vseg)
+		vseg = vgap.NextSegment()
+	}
+	return vgap
+}
+
+// canWriteMappableLocked returns true if it is possible for vma.mappable to be
+// written to via this vma, i.e. if it is possible that
+// vma.mappable.Translate(at.Write=true) may be called as a result of this vma.
+// This includes via I/O with usermem.IOOpts.IgnorePermissions = true, such as
+// PTRACE_POKEDATA.
+//
+// canWriteMappableLocked is equivalent to Linux's VM_SHARED.
+//
+// Preconditions: mm.mappingMu must be locked.
+func (vma *vma) canWriteMappableLocked() bool {
+	return !vma.private && vma.maxPerms.Write
+}
+
+// isPrivateDataLocked identify the data segments - private, writable, not stack
+//
+// Preconditions: mm.mappingMu must be locked.
+func (vma *vma) isPrivateDataLocked() bool {
+	return vma.realPerms.Write && vma.private && !vma.growsDown
+}
+
+// vmaSetFunctions implements segment.Functions for vmaSet.
+type vmaSetFunctions struct{}
+
+func (vmaSetFunctions) MinKey() usermem.Addr {
+	return 0
+}
+
+func (vmaSetFunctions) MaxKey() usermem.Addr {
+	return ^usermem.Addr(0)
+}
+
+func (vmaSetFunctions) ClearValue(vma *vma) {
+	vma.mappable = nil
+	vma.id = nil
+	vma.hint = ""
+}
+
+func (vmaSetFunctions) Merge(ar1 usermem.AddrRange, vma1 vma, ar2 usermem.AddrRange, vma2 vma) (vma, bool) {
+	if vma1.mappable != vma2.mappable ||
+		(vma1.mappable != nil && vma1.off+uint64(ar1.Length()) != vma2.off) ||
+		vma1.realPerms != vma2.realPerms ||
+		vma1.maxPerms != vma2.maxPerms ||
+		vma1.private != vma2.private ||
+		vma1.growsDown != vma2.growsDown ||
+		vma1.mlockMode != vma2.mlockMode ||
+		vma1.id != vma2.id ||
+		vma1.hint != vma2.hint {
+		return vma{}, false
+	}
+
+	if vma2.id != nil {
+		vma2.id.DecRef()
+	}
+	return vma1, true
+}
+
+func (vmaSetFunctions) Split(ar usermem.AddrRange, v vma, split usermem.Addr) (vma, vma) {
+	v2 := v
+	if v2.mappable != nil {
+		v2.off += uint64(split - ar.Start)
+	}
+	if v2.id != nil {
+		v2.id.IncRef()
+	}
+	return v, v2
+}
+
+// Preconditions: vseg.ValuePtr().mappable != nil. vseg.Range().Contains(addr).
+func (vseg vmaIterator) mappableOffsetAt(addr usermem.Addr) uint64 {
+	if checkInvariants {
+		if !vseg.Ok() {
+			panic("terminal vma iterator")
+		}
+		if vseg.ValuePtr().mappable == nil {
+			panic("Mappable offset is meaningless for anonymous vma")
+		}
+		if !vseg.Range().Contains(addr) {
+			panic(fmt.Sprintf("addr %v out of bounds %v", addr, vseg.Range()))
+		}
+	}
+
+	vma := vseg.ValuePtr()
+	vstart := vseg.Start()
+	return vma.off + uint64(addr-vstart)
+}
+
+// Preconditions: vseg.ValuePtr().mappable != nil.
+func (vseg vmaIterator) mappableRange() memmap.MappableRange {
+	return vseg.mappableRangeOf(vseg.Range())
+}
+
+// Preconditions: vseg.ValuePtr().mappable != nil.
+// vseg.Range().IsSupersetOf(ar). ar.Length() != 0.
+func (vseg vmaIterator) mappableRangeOf(ar usermem.AddrRange) memmap.MappableRange {
+	if checkInvariants {
+		if !vseg.Ok() {
+			panic("terminal vma iterator")
+		}
+		if vseg.ValuePtr().mappable == nil {
+			panic("MappableRange is meaningless for anonymous vma")
+		}
+		if !ar.WellFormed() || ar.Length() <= 0 {
+			panic(fmt.Sprintf("invalid ar: %v", ar))
+		}
+		if !vseg.Range().IsSupersetOf(ar) {
+			panic(fmt.Sprintf("ar %v out of bounds %v", ar, vseg.Range()))
+		}
+	}
+
+	vma := vseg.ValuePtr()
+	vstart := vseg.Start()
+	return memmap.MappableRange{vma.off + uint64(ar.Start-vstart), vma.off + uint64(ar.End-vstart)}
+}
+
+// Preconditions: vseg.ValuePtr().mappable != nil.
+// vseg.mappableRange().IsSupersetOf(mr). mr.Length() != 0.
+func (vseg vmaIterator) addrRangeOf(mr memmap.MappableRange) usermem.AddrRange {
+	if checkInvariants {
+		if !vseg.Ok() {
+			panic("terminal vma iterator")
+		}
+		if vseg.ValuePtr().mappable == nil {
+			panic("MappableRange is meaningless for anonymous vma")
+		}
+		if !mr.WellFormed() || mr.Length() <= 0 {
+			panic(fmt.Sprintf("invalid mr: %v", mr))
+		}
+		if !vseg.mappableRange().IsSupersetOf(mr) {
+			panic(fmt.Sprintf("mr %v out of bounds %v", mr, vseg.mappableRange()))
+		}
+	}
+
+	vma := vseg.ValuePtr()
+	vstart := vseg.Start()
+	return usermem.AddrRange{vstart + usermem.Addr(mr.Start-vma.off), vstart + usermem.Addr(mr.End-vma.off)}
+}
+
+// seekNextLowerBound returns mm.vmas.LowerBoundSegment(addr), but does so by
+// scanning linearly forward from vseg.
+//
+// Preconditions: mm.mappingMu must be locked. addr >= vseg.Start().
+func (vseg vmaIterator) seekNextLowerBound(addr usermem.Addr) vmaIterator {
+	if checkInvariants {
+		if !vseg.Ok() {
+			panic("terminal vma iterator")
+		}
+		if addr < vseg.Start() {
+			panic(fmt.Sprintf("can't seek forward to %#x from %#x", addr, vseg.Start()))
+		}
+	}
+	for vseg.Ok() && addr >= vseg.End() {
+		vseg = vseg.NextSegment()
+	}
+	return vseg
+}
+
+// availableRange returns the subset of vgap.Range() in which new vmas may be
+// created without MMapOpts.Unmap == true.
+func (vgap vmaGapIterator) availableRange() usermem.AddrRange {
+	ar := vgap.Range()
+	next := vgap.NextSegment()
+	if !next.Ok() || !next.ValuePtr().growsDown {
+		return ar
+	}
+	// Exclude guard pages.
+	if ar.Length() < guardBytes {
+		return usermem.AddrRange{ar.Start, ar.Start}
+	}
+	ar.End -= guardBytes
+	return ar
+}
diff --git a/pkg/sentry/mm/vma_set.go b/pkg/sentry/mm/vma_set.go
new file mode 100755
index 000000000..c042fe606
--- /dev/null
+++ b/pkg/sentry/mm/vma_set.go
@@ -0,0 +1,1274 @@
+package mm
+
+import (
+	__generics_imported0 "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+import (
+	"bytes"
+	"fmt"
+)
+
+const (
+	// minDegree is the minimum degree of an internal node in a Set B-tree.
+	//
+	// - Any non-root node has at least minDegree-1 segments.
+	//
+	// - Any non-root internal (non-leaf) node has at least minDegree children.
+	//
+	// - The root node may have fewer than minDegree-1 segments, but it may
+	// only have 0 segments if the tree is empty.
+	//
+	// Our implementation requires minDegree >= 3. Higher values of minDegree
+	// usually improve performance, but increase memory usage for small sets.
+	vmaminDegree = 8
+
+	vmamaxDegree = 2 * vmaminDegree
+)
+
+// A Set is a mapping of segments with non-overlapping Range keys. The zero
+// value for a Set is an empty set. Set values are not safely movable nor
+// copyable. Set is thread-compatible.
+//
+// +stateify savable
+type vmaSet struct {
+	root vmanode `state:".(*vmaSegmentDataSlices)"`
+}
+
+// IsEmpty returns true if the set contains no segments.
+func (s *vmaSet) IsEmpty() bool {
+	return s.root.nrSegments == 0
+}
+
+// IsEmptyRange returns true iff no segments in the set overlap the given
+// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be
+// more efficient.
+func (s *vmaSet) IsEmptyRange(r __generics_imported0.AddrRange) bool {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return true
+	}
+	_, gap := s.Find(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	return r.End <= gap.End()
+}
+
+// Span returns the total size of all segments in the set.
+func (s *vmaSet) Span() __generics_imported0.Addr {
+	var sz __generics_imported0.Addr
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sz += seg.Range().Length()
+	}
+	return sz
+}
+
+// SpanRange returns the total size of the intersection of segments in the set
+// with the given range.
+func (s *vmaSet) SpanRange(r __generics_imported0.AddrRange) __generics_imported0.Addr {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return 0
+	}
+	var sz __generics_imported0.Addr
+	for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() {
+		sz += seg.Range().Intersect(r).Length()
+	}
+	return sz
+}
+
+// FirstSegment returns the first segment in the set. If the set is empty,
+// FirstSegment returns a terminal iterator.
+func (s *vmaSet) FirstSegment() vmaIterator {
+	if s.root.nrSegments == 0 {
+		return vmaIterator{}
+	}
+	return s.root.firstSegment()
+}
+
+// LastSegment returns the last segment in the set. If the set is empty,
+// LastSegment returns a terminal iterator.
+func (s *vmaSet) LastSegment() vmaIterator {
+	if s.root.nrSegments == 0 {
+		return vmaIterator{}
+	}
+	return s.root.lastSegment()
+}
+
+// FirstGap returns the first gap in the set.
+func (s *vmaSet) FirstGap() vmaGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return vmaGapIterator{n, 0}
+}
+
+// LastGap returns the last gap in the set.
+func (s *vmaSet) LastGap() vmaGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return vmaGapIterator{n, n.nrSegments}
+}
+
+// Find returns the segment or gap whose range contains the given key. If a
+// segment is found, the returned Iterator is non-terminal and the
+// returned GapIterator is terminal. Otherwise, the returned Iterator is
+// terminal and the returned GapIterator is non-terminal.
+func (s *vmaSet) Find(key __generics_imported0.Addr) (vmaIterator, vmaGapIterator) {
+	n := &s.root
+	for {
+
+		lower := 0
+		upper := n.nrSegments
+		for lower < upper {
+			i := lower + (upper-lower)/2
+			if r := n.keys[i]; key < r.End {
+				if key >= r.Start {
+					return vmaIterator{n, i}, vmaGapIterator{}
+				}
+				upper = i
+			} else {
+				lower = i + 1
+			}
+		}
+		i := lower
+		if !n.hasChildren {
+			return vmaIterator{}, vmaGapIterator{n, i}
+		}
+		n = n.children[i]
+	}
+}
+
+// FindSegment returns the segment whose range contains the given key. If no
+// such segment exists, FindSegment returns a terminal iterator.
+func (s *vmaSet) FindSegment(key __generics_imported0.Addr) vmaIterator {
+	seg, _ := s.Find(key)
+	return seg
+}
+
+// LowerBoundSegment returns the segment with the lowest range that contains a
+// key greater than or equal to min. If no such segment exists,
+// LowerBoundSegment returns a terminal iterator.
+func (s *vmaSet) LowerBoundSegment(min __generics_imported0.Addr) vmaIterator {
+	seg, gap := s.Find(min)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.NextSegment()
+}
+
+// UpperBoundSegment returns the segment with the highest range that contains a
+// key less than or equal to max. If no such segment exists, UpperBoundSegment
+// returns a terminal iterator.
+func (s *vmaSet) UpperBoundSegment(max __generics_imported0.Addr) vmaIterator {
+	seg, gap := s.Find(max)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.PrevSegment()
+}
+
+// FindGap returns the gap containing the given key. If no such gap exists
+// (i.e. the set contains a segment containing that key), FindGap returns a
+// terminal iterator.
+func (s *vmaSet) FindGap(key __generics_imported0.Addr) vmaGapIterator {
+	_, gap := s.Find(key)
+	return gap
+}
+
+// LowerBoundGap returns the gap with the lowest range that is greater than or
+// equal to min.
+func (s *vmaSet) LowerBoundGap(min __generics_imported0.Addr) vmaGapIterator {
+	seg, gap := s.Find(min)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.NextGap()
+}
+
+// UpperBoundGap returns the gap with the highest range that is less than or
+// equal to max.
+func (s *vmaSet) UpperBoundGap(max __generics_imported0.Addr) vmaGapIterator {
+	seg, gap := s.Find(max)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.PrevGap()
+}
+
+// Add inserts the given segment into the set and returns true. If the new
+// segment can be merged with adjacent segments, Add will do so. If the new
+// segment would overlap an existing segment, Add returns false. If Add
+// succeeds, all existing iterators are invalidated.
+func (s *vmaSet) Add(r __generics_imported0.AddrRange, val vma) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.Insert(gap, r, val)
+	return true
+}
+
+// AddWithoutMerging inserts the given segment into the set and returns true.
+// If it would overlap an existing segment, AddWithoutMerging does nothing and
+// returns false. If AddWithoutMerging succeeds, all existing iterators are
+// invalidated.
+func (s *vmaSet) AddWithoutMerging(r __generics_imported0.AddrRange, val vma) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.InsertWithoutMergingUnchecked(gap, r, val)
+	return true
+}
+
+// Insert inserts the given segment into the given gap. If the new segment can
+// be merged with adjacent segments, Insert will do so. Insert returns an
+// iterator to the segment containing the inserted value (which may have been
+// merged with other values). All existing iterators (including gap, but not
+// including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid, Insert panics.
+//
+// Insert is semantically equivalent to a InsertWithoutMerging followed by a
+// Merge, but may be more efficient. Note that there is no unchecked variant of
+// Insert since Insert must retrieve and inspect gap's predecessor and
+// successor segments regardless.
+func (s *vmaSet) Insert(gap vmaGapIterator, r __generics_imported0.AddrRange, val vma) vmaIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	prev, next := gap.PrevSegment(), gap.NextSegment()
+	if prev.Ok() && prev.End() > r.Start {
+		panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range()))
+	}
+	if next.Ok() && next.Start() < r.End {
+		panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range()))
+	}
+	if prev.Ok() && prev.End() == r.Start {
+		if mval, ok := (vmaSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok {
+			prev.SetEndUnchecked(r.End)
+			prev.SetValue(mval)
+			if next.Ok() && next.Start() == r.End {
+				val = mval
+				if mval, ok := (vmaSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok {
+					prev.SetEndUnchecked(next.End())
+					prev.SetValue(mval)
+					return s.Remove(next).PrevSegment()
+				}
+			}
+			return prev
+		}
+	}
+	if next.Ok() && next.Start() == r.End {
+		if mval, ok := (vmaSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok {
+			next.SetStartUnchecked(r.Start)
+			next.SetValue(mval)
+			return next
+		}
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMerging inserts the given segment into the given gap and
+// returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid,
+// InsertWithoutMerging panics.
+func (s *vmaSet) InsertWithoutMerging(gap vmaGapIterator, r __generics_imported0.AddrRange, val vma) vmaIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if gr := gap.Range(); !gr.IsSupersetOf(r) {
+		panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr))
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMergingUnchecked inserts the given segment into the given gap
+// and returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+func (s *vmaSet) InsertWithoutMergingUnchecked(gap vmaGapIterator, r __generics_imported0.AddrRange, val vma) vmaIterator {
+	gap = gap.node.rebalanceBeforeInsert(gap)
+	copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments])
+	copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments])
+	gap.node.keys[gap.index] = r
+	gap.node.values[gap.index] = val
+	gap.node.nrSegments++
+	return vmaIterator{gap.node, gap.index}
+}
+
+// Remove removes the given segment and returns an iterator to the vacated gap.
+// All existing iterators (including seg, but not including the returned
+// iterator) are invalidated.
+func (s *vmaSet) Remove(seg vmaIterator) vmaGapIterator {
+
+	if seg.node.hasChildren {
+
+		victim := seg.PrevSegment()
+
+		seg.SetRangeUnchecked(victim.Range())
+		seg.SetValue(victim.Value())
+		return s.Remove(victim).NextGap()
+	}
+	copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments])
+	copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments])
+	vmaSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1])
+	seg.node.nrSegments--
+	return seg.node.rebalanceAfterRemove(vmaGapIterator{seg.node, seg.index})
+}
+
+// RemoveAll removes all segments from the set. All existing iterators are
+// invalidated.
+func (s *vmaSet) RemoveAll() {
+	s.root = vmanode{}
+}
+
+// RemoveRange removes all segments in the given range. An iterator to the
+// newly formed gap is returned, and all existing iterators are invalidated.
+func (s *vmaSet) RemoveRange(r __generics_imported0.AddrRange) vmaGapIterator {
+	seg, gap := s.Find(r.Start)
+	if seg.Ok() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	return gap
+}
+
+// Merge attempts to merge two neighboring segments. If successful, Merge
+// returns an iterator to the merged segment, and all existing iterators are
+// invalidated. Otherwise, Merge returns a terminal iterator.
+//
+// If first is not the predecessor of second, Merge panics.
+func (s *vmaSet) Merge(first, second vmaIterator) vmaIterator {
+	if first.NextSegment() != second {
+		panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range()))
+	}
+	return s.MergeUnchecked(first, second)
+}
+
+// MergeUnchecked attempts to merge two neighboring segments. If successful,
+// MergeUnchecked returns an iterator to the merged segment, and all existing
+// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal
+// iterator.
+//
+// Precondition: first is the predecessor of second: first.NextSegment() ==
+// second, first == second.PrevSegment().
+func (s *vmaSet) MergeUnchecked(first, second vmaIterator) vmaIterator {
+	if first.End() == second.Start() {
+		if mval, ok := (vmaSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok {
+
+			first.SetEndUnchecked(second.End())
+			first.SetValue(mval)
+			return s.Remove(second).PrevSegment()
+		}
+	}
+	return vmaIterator{}
+}
+
+// MergeAll attempts to merge all adjacent segments in the set. All existing
+// iterators are invalidated.
+func (s *vmaSet) MergeAll() {
+	seg := s.FirstSegment()
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeRange attempts to merge all adjacent segments that contain a key in the
+// specific range. All existing iterators are invalidated.
+func (s *vmaSet) MergeRange(r __generics_imported0.AddrRange) {
+	seg := s.LowerBoundSegment(r.Start)
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() && next.Range().Start < r.End {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeAdjacent attempts to merge the segment containing r.Start with its
+// predecessor, and the segment containing r.End-1 with its successor.
+func (s *vmaSet) MergeAdjacent(r __generics_imported0.AddrRange) {
+	first := s.FindSegment(r.Start)
+	if first.Ok() {
+		if prev := first.PrevSegment(); prev.Ok() {
+			s.Merge(prev, first)
+		}
+	}
+	last := s.FindSegment(r.End - 1)
+	if last.Ok() {
+		if next := last.NextSegment(); next.Ok() {
+			s.Merge(last, next)
+		}
+	}
+}
+
+// Split splits the given segment at the given key and returns iterators to the
+// two resulting segments. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+//
+// If the segment cannot be split at split (because split is at the start or
+// end of the segment's range, so splitting would produce a segment with zero
+// length, or because split falls outside the segment's range altogether),
+// Split panics.
+func (s *vmaSet) Split(seg vmaIterator, split __generics_imported0.Addr) (vmaIterator, vmaIterator) {
+	if !seg.Range().CanSplitAt(split) {
+		panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split))
+	}
+	return s.SplitUnchecked(seg, split)
+}
+
+// SplitUnchecked splits the given segment at the given key and returns
+// iterators to the two resulting segments. All existing iterators (including
+// seg, but not including the returned iterators) are invalidated.
+//
+// Preconditions: seg.Start() < key < seg.End().
+func (s *vmaSet) SplitUnchecked(seg vmaIterator, split __generics_imported0.Addr) (vmaIterator, vmaIterator) {
+	val1, val2 := (vmaSetFunctions{}).Split(seg.Range(), seg.Value(), split)
+	end2 := seg.End()
+	seg.SetEndUnchecked(split)
+	seg.SetValue(val1)
+	seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.AddrRange{split, end2}, val2)
+
+	return seg2.PrevSegment(), seg2
+}
+
+// SplitAt splits the segment straddling split, if one exists. SplitAt returns
+// true if a segment was split and false otherwise. If SplitAt splits a
+// segment, all existing iterators are invalidated.
+func (s *vmaSet) SplitAt(split __generics_imported0.Addr) bool {
+	if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) {
+		s.SplitUnchecked(seg, split)
+		return true
+	}
+	return false
+}
+
+// Isolate ensures that the given segment's range does not escape r by
+// splitting at r.Start and r.End if necessary, and returns an updated iterator
+// to the bounded segment. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+func (s *vmaSet) Isolate(seg vmaIterator, r __generics_imported0.AddrRange) vmaIterator {
+	if seg.Range().CanSplitAt(r.Start) {
+		_, seg = s.SplitUnchecked(seg, r.Start)
+	}
+	if seg.Range().CanSplitAt(r.End) {
+		seg, _ = s.SplitUnchecked(seg, r.End)
+	}
+	return seg
+}
+
+// ApplyContiguous applies a function to a contiguous range of segments,
+// splitting if necessary. The function is applied until the first gap is
+// encountered, at which point the gap is returned. If the function is applied
+// across the entire range, a terminal gap is returned. All existing iterators
+// are invalidated.
+//
+// N.B. The Iterator must not be invalidated by the function.
+func (s *vmaSet) ApplyContiguous(r __generics_imported0.AddrRange, fn func(seg vmaIterator)) vmaGapIterator {
+	seg, gap := s.Find(r.Start)
+	if !seg.Ok() {
+		return gap
+	}
+	for {
+		seg = s.Isolate(seg, r)
+		fn(seg)
+		if seg.End() >= r.End {
+			return vmaGapIterator{}
+		}
+		gap = seg.NextGap()
+		if !gap.IsEmpty() {
+			return gap
+		}
+		seg = gap.NextSegment()
+		if !seg.Ok() {
+
+			return vmaGapIterator{}
+		}
+	}
+}
+
+// +stateify savable
+type vmanode struct {
+	// An internal binary tree node looks like:
+	//
+	//   K
+	//  / \
+	// Cl Cr
+	//
+	// where all keys in the subtree rooted by Cl (the left subtree) are less
+	// than K (the key of the parent node), and all keys in the subtree rooted
+	// by Cr (the right subtree) are greater than K.
+	//
+	// An internal B-tree node's indexes work out to look like:
+	//
+	//   K0 K1 K2  ...   Kn-1
+	//  / \/ \/ \  ...  /  \
+	// C0 C1 C2 C3 ... Cn-1 Cn
+	//
+	// where n is nrSegments.
+	nrSegments int
+
+	// parent is a pointer to this node's parent. If this node is root, parent
+	// is nil.
+	parent *vmanode
+
+	// parentIndex is the index of this node in parent.children.
+	parentIndex int
+
+	// Flag for internal nodes that is technically redundant with "children[0]
+	// != nil", but is stored in the first cache line. "hasChildren" rather
+	// than "isLeaf" because false must be the correct value for an empty root.
+	hasChildren bool
+
+	// Nodes store keys and values in separate arrays to maximize locality in
+	// the common case (scanning keys for lookup).
+	keys     [vmamaxDegree - 1]__generics_imported0.AddrRange
+	values   [vmamaxDegree - 1]vma
+	children [vmamaxDegree]*vmanode
+}
+
+// firstSegment returns the first segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *vmanode) firstSegment() vmaIterator {
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return vmaIterator{n, 0}
+}
+
+// lastSegment returns the last segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *vmanode) lastSegment() vmaIterator {
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return vmaIterator{n, n.nrSegments - 1}
+}
+
+func (n *vmanode) prevSibling() *vmanode {
+	if n.parent == nil || n.parentIndex == 0 {
+		return nil
+	}
+	return n.parent.children[n.parentIndex-1]
+}
+
+func (n *vmanode) nextSibling() *vmanode {
+	if n.parent == nil || n.parentIndex == n.parent.nrSegments {
+		return nil
+	}
+	return n.parent.children[n.parentIndex+1]
+}
+
+// rebalanceBeforeInsert splits n and its ancestors if they are full, as
+// required for insertion, and returns an updated iterator to the position
+// represented by gap.
+func (n *vmanode) rebalanceBeforeInsert(gap vmaGapIterator) vmaGapIterator {
+	if n.parent != nil {
+		gap = n.parent.rebalanceBeforeInsert(gap)
+	}
+	if n.nrSegments < vmamaxDegree-1 {
+		return gap
+	}
+	if n.parent == nil {
+
+		left := &vmanode{
+			nrSegments:  vmaminDegree - 1,
+			parent:      n,
+			parentIndex: 0,
+			hasChildren: n.hasChildren,
+		}
+		right := &vmanode{
+			nrSegments:  vmaminDegree - 1,
+			parent:      n,
+			parentIndex: 1,
+			hasChildren: n.hasChildren,
+		}
+		copy(left.keys[:vmaminDegree-1], n.keys[:vmaminDegree-1])
+		copy(left.values[:vmaminDegree-1], n.values[:vmaminDegree-1])
+		copy(right.keys[:vmaminDegree-1], n.keys[vmaminDegree:])
+		copy(right.values[:vmaminDegree-1], n.values[vmaminDegree:])
+		n.keys[0], n.values[0] = n.keys[vmaminDegree-1], n.values[vmaminDegree-1]
+		vmazeroValueSlice(n.values[1:])
+		if n.hasChildren {
+			copy(left.children[:vmaminDegree], n.children[:vmaminDegree])
+			copy(right.children[:vmaminDegree], n.children[vmaminDegree:])
+			vmazeroNodeSlice(n.children[2:])
+			for i := 0; i < vmaminDegree; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+				right.children[i].parent = right
+				right.children[i].parentIndex = i
+			}
+		}
+		n.nrSegments = 1
+		n.hasChildren = true
+		n.children[0] = left
+		n.children[1] = right
+		if gap.node != n {
+			return gap
+		}
+		if gap.index < vmaminDegree {
+			return vmaGapIterator{left, gap.index}
+		}
+		return vmaGapIterator{right, gap.index - vmaminDegree}
+	}
+
+	copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments])
+	copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments])
+	n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[vmaminDegree-1], n.values[vmaminDegree-1]
+	copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1])
+	for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ {
+		n.parent.children[i].parentIndex = i
+	}
+	sibling := &vmanode{
+		nrSegments:  vmaminDegree - 1,
+		parent:      n.parent,
+		parentIndex: n.parentIndex + 1,
+		hasChildren: n.hasChildren,
+	}
+	n.parent.children[n.parentIndex+1] = sibling
+	n.parent.nrSegments++
+	copy(sibling.keys[:vmaminDegree-1], n.keys[vmaminDegree:])
+	copy(sibling.values[:vmaminDegree-1], n.values[vmaminDegree:])
+	vmazeroValueSlice(n.values[vmaminDegree-1:])
+	if n.hasChildren {
+		copy(sibling.children[:vmaminDegree], n.children[vmaminDegree:])
+		vmazeroNodeSlice(n.children[vmaminDegree:])
+		for i := 0; i < vmaminDegree; i++ {
+			sibling.children[i].parent = sibling
+			sibling.children[i].parentIndex = i
+		}
+	}
+	n.nrSegments = vmaminDegree - 1
+
+	if gap.node != n {
+		return gap
+	}
+	if gap.index < vmaminDegree {
+		return gap
+	}
+	return vmaGapIterator{sibling, gap.index - vmaminDegree}
+}
+
+// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient
+// (contain fewer segments than required by B-tree invariants), as required for
+// removal, and returns an updated iterator to the position represented by gap.
+//
+// Precondition: n is the only node in the tree that may currently violate a
+// B-tree invariant.
+func (n *vmanode) rebalanceAfterRemove(gap vmaGapIterator) vmaGapIterator {
+	for {
+		if n.nrSegments >= vmaminDegree-1 {
+			return gap
+		}
+		if n.parent == nil {
+
+			return gap
+		}
+
+		if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= vmaminDegree {
+			copy(n.keys[1:], n.keys[:n.nrSegments])
+			copy(n.values[1:], n.values[:n.nrSegments])
+			n.keys[0] = n.parent.keys[n.parentIndex-1]
+			n.values[0] = n.parent.values[n.parentIndex-1]
+			n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1]
+			n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1]
+			vmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				copy(n.children[1:], n.children[:n.nrSegments+1])
+				n.children[0] = sibling.children[sibling.nrSegments]
+				sibling.children[sibling.nrSegments] = nil
+				n.children[0].parent = n
+				n.children[0].parentIndex = 0
+				for i := 1; i < n.nrSegments+2; i++ {
+					n.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling && gap.index == sibling.nrSegments {
+				return vmaGapIterator{n, 0}
+			}
+			if gap.node == n {
+				return vmaGapIterator{n, gap.index + 1}
+			}
+			return gap
+		}
+		if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= vmaminDegree {
+			n.keys[n.nrSegments] = n.parent.keys[n.parentIndex]
+			n.values[n.nrSegments] = n.parent.values[n.parentIndex]
+			n.parent.keys[n.parentIndex] = sibling.keys[0]
+			n.parent.values[n.parentIndex] = sibling.values[0]
+			copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:])
+			copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:])
+			vmaSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				n.children[n.nrSegments+1] = sibling.children[0]
+				copy(sibling.children[:sibling.nrSegments], sibling.children[1:])
+				sibling.children[sibling.nrSegments] = nil
+				n.children[n.nrSegments+1].parent = n
+				n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1
+				for i := 0; i < sibling.nrSegments; i++ {
+					sibling.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling {
+				if gap.index == 0 {
+					return vmaGapIterator{n, n.nrSegments}
+				}
+				return vmaGapIterator{sibling, gap.index - 1}
+			}
+			return gap
+		}
+
+		p := n.parent
+		if p.nrSegments == 1 {
+
+			left, right := p.children[0], p.children[1]
+			p.nrSegments = left.nrSegments + right.nrSegments + 1
+			p.hasChildren = left.hasChildren
+			p.keys[left.nrSegments] = p.keys[0]
+			p.values[left.nrSegments] = p.values[0]
+			copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments])
+			copy(p.values[:left.nrSegments], left.values[:left.nrSegments])
+			copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+			copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments])
+			if left.hasChildren {
+				copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1])
+				copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+				for i := 0; i < p.nrSegments+1; i++ {
+					p.children[i].parent = p
+					p.children[i].parentIndex = i
+				}
+			} else {
+				p.children[0] = nil
+				p.children[1] = nil
+			}
+			if gap.node == left {
+				return vmaGapIterator{p, gap.index}
+			}
+			if gap.node == right {
+				return vmaGapIterator{p, gap.index + left.nrSegments + 1}
+			}
+			return gap
+		}
+		// Merge n and either sibling, along with the segment separating the
+		// two, into whichever of the two nodes comes first. This is the
+		// reverse of the non-root splitting case in
+		// node.rebalanceBeforeInsert.
+		var left, right *vmanode
+		if n.parentIndex > 0 {
+			left = n.prevSibling()
+			right = n
+		} else {
+			left = n
+			right = n.nextSibling()
+		}
+
+		if gap.node == right {
+			gap = vmaGapIterator{left, gap.index + left.nrSegments + 1}
+		}
+		left.keys[left.nrSegments] = p.keys[left.parentIndex]
+		left.values[left.nrSegments] = p.values[left.parentIndex]
+		copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+		copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments])
+		if left.hasChildren {
+			copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+			for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+			}
+		}
+		left.nrSegments += right.nrSegments + 1
+		copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments])
+		copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments])
+		vmaSetFunctions{}.ClearValue(&p.values[p.nrSegments-1])
+		copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1])
+		for i := 0; i < p.nrSegments; i++ {
+			p.children[i].parentIndex = i
+		}
+		p.children[p.nrSegments] = nil
+		p.nrSegments--
+
+		n = p
+	}
+}
+
+// A Iterator is conceptually one of:
+//
+// - A pointer to a segment in a set; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Iterators are copyable values and are meaningfully equality-comparable. The
+// zero value of Iterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type vmaIterator struct {
+	// node is the node containing the iterated segment. If the iterator is
+	// terminal, node is nil.
+	node *vmanode
+
+	// index is the index of the segment in node.keys/values.
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (seg vmaIterator) Ok() bool {
+	return seg.node != nil
+}
+
+// Range returns the iterated segment's range key.
+func (seg vmaIterator) Range() __generics_imported0.AddrRange {
+	return seg.node.keys[seg.index]
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (seg vmaIterator) Start() __generics_imported0.Addr {
+	return seg.node.keys[seg.index].Start
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (seg vmaIterator) End() __generics_imported0.Addr {
+	return seg.node.keys[seg.index].End
+}
+
+// SetRangeUnchecked mutates the iterated segment's range key. This operation
+// does not invalidate any iterators.
+//
+// Preconditions:
+//
+// - r.Length() > 0.
+//
+// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
+// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
+// r.start >= seg.PrevSegment().End().
+func (seg vmaIterator) SetRangeUnchecked(r __generics_imported0.AddrRange) {
+	seg.node.keys[seg.index] = r
+}
+
+// SetRange mutates the iterated segment's range key. If the new range would
+// cause the iterated segment to overlap another segment, or if the new range
+// is invalid, SetRange panics. This operation does not invalidate any
+// iterators.
+func (seg vmaIterator) SetRange(r __generics_imported0.AddrRange) {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && r.End > next.Start() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range()))
+	}
+	seg.SetRangeUnchecked(r)
+}
+
+// SetStartUnchecked mutates the iterated segment's start. This operation does
+// not invalidate any iterators.
+//
+// Preconditions: The new start must be valid: start < seg.End(); if
+// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+func (seg vmaIterator) SetStartUnchecked(start __generics_imported0.Addr) {
+	seg.node.keys[seg.index].Start = start
+}
+
+// SetStart mutates the iterated segment's start. If the new start value would
+// cause the iterated segment to overlap another segment, or would result in an
+// invalid range, SetStart panics. This operation does not invalidate any
+// iterators.
+func (seg vmaIterator) SetStart(start __generics_imported0.Addr) {
+	if start >= seg.End() {
+		panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range()))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() {
+		panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range()))
+	}
+	seg.SetStartUnchecked(start)
+}
+
+// SetEndUnchecked mutates the iterated segment's end. This operation does not
+// invalidate any iterators.
+//
+// Preconditions: The new end must be valid: end > seg.Start(); if
+// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+func (seg vmaIterator) SetEndUnchecked(end __generics_imported0.Addr) {
+	seg.node.keys[seg.index].End = end
+}
+
+// SetEnd mutates the iterated segment's end. If the new end value would cause
+// the iterated segment to overlap another segment, or would result in an
+// invalid range, SetEnd panics. This operation does not invalidate any
+// iterators.
+func (seg vmaIterator) SetEnd(end __generics_imported0.Addr) {
+	if end <= seg.Start() {
+		panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && end > next.Start() {
+		panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range()))
+	}
+	seg.SetEndUnchecked(end)
+}
+
+// Value returns a copy of the iterated segment's value.
+func (seg vmaIterator) Value() vma {
+	return seg.node.values[seg.index]
+}
+
+// ValuePtr returns a pointer to the iterated segment's value. The pointer is
+// invalidated if the iterator is invalidated. This operation does not
+// invalidate any iterators.
+func (seg vmaIterator) ValuePtr() *vma {
+	return &seg.node.values[seg.index]
+}
+
+// SetValue mutates the iterated segment's value. This operation does not
+// invalidate any iterators.
+func (seg vmaIterator) SetValue(val vma) {
+	seg.node.values[seg.index] = val
+}
+
+// PrevSegment returns the iterated segment's predecessor. If there is no
+// preceding segment, PrevSegment returns a terminal iterator.
+func (seg vmaIterator) PrevSegment() vmaIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index].lastSegment()
+	}
+	if seg.index > 0 {
+		return vmaIterator{seg.node, seg.index - 1}
+	}
+	if seg.node.parent == nil {
+		return vmaIterator{}
+	}
+	return vmasegmentBeforePosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// NextSegment returns the iterated segment's successor. If there is no
+// succeeding segment, NextSegment returns a terminal iterator.
+func (seg vmaIterator) NextSegment() vmaIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment()
+	}
+	if seg.index < seg.node.nrSegments-1 {
+		return vmaIterator{seg.node, seg.index + 1}
+	}
+	if seg.node.parent == nil {
+		return vmaIterator{}
+	}
+	return vmasegmentAfterPosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// PrevGap returns the gap immediately before the iterated segment.
+func (seg vmaIterator) PrevGap() vmaGapIterator {
+	if seg.node.hasChildren {
+
+		return seg.node.children[seg.index].lastSegment().NextGap()
+	}
+	return vmaGapIterator{seg.node, seg.index}
+}
+
+// NextGap returns the gap immediately after the iterated segment.
+func (seg vmaIterator) NextGap() vmaGapIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment().PrevGap()
+	}
+	return vmaGapIterator{seg.node, seg.index + 1}
+}
+
+// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent,
+// or the gap before the iterated segment otherwise. If seg.Start() ==
+// Functions.MinKey(), PrevNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be
+// non-terminal.
+func (seg vmaIterator) PrevNonEmpty() (vmaIterator, vmaGapIterator) {
+	gap := seg.PrevGap()
+	if gap.Range().Length() != 0 {
+		return vmaIterator{}, gap
+	}
+	return gap.PrevSegment(), vmaGapIterator{}
+}
+
+// NextNonEmpty returns the iterated segment's successor if it is adjacent, or
+// the gap after the iterated segment otherwise. If seg.End() ==
+// Functions.MaxKey(), NextNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by NextNonEmpty will be
+// non-terminal.
+func (seg vmaIterator) NextNonEmpty() (vmaIterator, vmaGapIterator) {
+	gap := seg.NextGap()
+	if gap.Range().Length() != 0 {
+		return vmaIterator{}, gap
+	}
+	return gap.NextSegment(), vmaGapIterator{}
+}
+
+// A GapIterator is conceptually one of:
+//
+// - A pointer to a position between two segments, before the first segment, or
+// after the last segment in a set, called a *gap*; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Note that the gap between two adjacent segments exists (iterators to it are
+// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true
+// for such gaps. An empty set contains a single gap, spanning the entire range
+// of the set's keys.
+//
+// GapIterators are copyable values and are meaningfully equality-comparable.
+// The zero value of GapIterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type vmaGapIterator struct {
+	// The representation of a GapIterator is identical to that of an Iterator,
+	// except that index corresponds to positions between segments in the same
+	// way as for node.children (see comment for node.nrSegments).
+	node  *vmanode
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (gap vmaGapIterator) Ok() bool {
+	return gap.node != nil
+}
+
+// Range returns the range spanned by the iterated gap.
+func (gap vmaGapIterator) Range() __generics_imported0.AddrRange {
+	return __generics_imported0.AddrRange{gap.Start(), gap.End()}
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (gap vmaGapIterator) Start() __generics_imported0.Addr {
+	if ps := gap.PrevSegment(); ps.Ok() {
+		return ps.End()
+	}
+	return vmaSetFunctions{}.MinKey()
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (gap vmaGapIterator) End() __generics_imported0.Addr {
+	if ns := gap.NextSegment(); ns.Ok() {
+		return ns.Start()
+	}
+	return vmaSetFunctions{}.MaxKey()
+}
+
+// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is
+// between two adjacent segments.)
+func (gap vmaGapIterator) IsEmpty() bool {
+	return gap.Range().Length() == 0
+}
+
+// PrevSegment returns the segment immediately before the iterated gap. If no
+// such segment exists, PrevSegment returns a terminal iterator.
+func (gap vmaGapIterator) PrevSegment() vmaIterator {
+	return vmasegmentBeforePosition(gap.node, gap.index)
+}
+
+// NextSegment returns the segment immediately after the iterated gap. If no
+// such segment exists, NextSegment returns a terminal iterator.
+func (gap vmaGapIterator) NextSegment() vmaIterator {
+	return vmasegmentAfterPosition(gap.node, gap.index)
+}
+
+// PrevGap returns the iterated gap's predecessor. If no such gap exists,
+// PrevGap returns a terminal iterator.
+func (gap vmaGapIterator) PrevGap() vmaGapIterator {
+	seg := gap.PrevSegment()
+	if !seg.Ok() {
+		return vmaGapIterator{}
+	}
+	return seg.PrevGap()
+}
+
+// NextGap returns the iterated gap's successor. If no such gap exists, NextGap
+// returns a terminal iterator.
+func (gap vmaGapIterator) NextGap() vmaGapIterator {
+	seg := gap.NextSegment()
+	if !seg.Ok() {
+		return vmaGapIterator{}
+	}
+	return seg.NextGap()
+}
+
+// segmentBeforePosition returns the predecessor segment of the position given
+// by n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentBeforePosition returns a terminal iterator.
+func vmasegmentBeforePosition(n *vmanode, i int) vmaIterator {
+	for i == 0 {
+		if n.parent == nil {
+			return vmaIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return vmaIterator{n, i - 1}
+}
+
+// segmentAfterPosition returns the successor segment of the position given by
+// n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentAfterPosition returns a terminal iterator.
+func vmasegmentAfterPosition(n *vmanode, i int) vmaIterator {
+	for i == n.nrSegments {
+		if n.parent == nil {
+			return vmaIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return vmaIterator{n, i}
+}
+
+func vmazeroValueSlice(slice []vma) {
+
+	for i := range slice {
+		vmaSetFunctions{}.ClearValue(&slice[i])
+	}
+}
+
+func vmazeroNodeSlice(slice []*vmanode) {
+	for i := range slice {
+		slice[i] = nil
+	}
+}
+
+// String stringifies a Set for debugging.
+func (s *vmaSet) String() string {
+	return s.root.String()
+}
+
+// String stringifes a node (and all of its children) for debugging.
+func (n *vmanode) String() string {
+	var buf bytes.Buffer
+	n.writeDebugString(&buf, "")
+	return buf.String()
+}
+
+func (n *vmanode) writeDebugString(buf *bytes.Buffer, prefix string) {
+	if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) {
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren))
+	}
+	for i := 0; i < n.nrSegments; i++ {
+		if child := n.children[i]; child != nil {
+			cprefix := fmt.Sprintf("%s- % 3d ", prefix, i)
+			if child.parent != n || child.parentIndex != i {
+				buf.WriteString(cprefix)
+				buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i))
+			}
+			child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i))
+		}
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i]))
+	}
+	if child := n.children[n.nrSegments]; child != nil {
+		child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments))
+	}
+}
+
+// SegmentDataSlices represents segments from a set as slices of start, end, and
+// values. SegmentDataSlices is primarily used as an intermediate representation
+// for save/restore and the layout here is optimized for that.
+//
+// +stateify savable
+type vmaSegmentDataSlices struct {
+	Start  []__generics_imported0.Addr
+	End    []__generics_imported0.Addr
+	Values []vma
+}
+
+// ExportSortedSlice returns a copy of all segments in the given set, in ascending
+// key order.
+func (s *vmaSet) ExportSortedSlices() *vmaSegmentDataSlices {
+	var sds vmaSegmentDataSlices
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sds.Start = append(sds.Start, seg.Start())
+		sds.End = append(sds.End, seg.End())
+		sds.Values = append(sds.Values, seg.Value())
+	}
+	sds.Start = sds.Start[:len(sds.Start):len(sds.Start)]
+	sds.End = sds.End[:len(sds.End):len(sds.End)]
+	sds.Values = sds.Values[:len(sds.Values):len(sds.Values)]
+	return &sds
+}
+
+// ImportSortedSlice initializes the given set from the given slice.
+//
+// Preconditions: s must be empty. sds must represent a valid set (the segments
+// in sds must have valid lengths that do not overlap). The segments in sds
+// must be sorted in ascending key order.
+func (s *vmaSet) ImportSortedSlices(sds *vmaSegmentDataSlices) error {
+	if !s.IsEmpty() {
+		return fmt.Errorf("cannot import into non-empty set %v", s)
+	}
+	gap := s.FirstGap()
+	for i := range sds.Start {
+		r := __generics_imported0.AddrRange{sds.Start[i], sds.End[i]}
+		if !gap.Range().IsSupersetOf(r) {
+			return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i])
+		}
+		gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap()
+	}
+	return nil
+}
+func (s *vmaSet) saveRoot() *vmaSegmentDataSlices {
+	return s.ExportSortedSlices()
+}
+
+func (s *vmaSet) loadRoot(sds *vmaSegmentDataSlices) {
+	if err := s.ImportSortedSlices(sds); err != nil {
+		panic(err)
+	}
+}