Merge 216da0b7 (automated)

author: gVisor bot <gvisor-bot@google.com> 2019-06-02 06:44:55 +0000
committer: gVisor bot <gvisor-bot@google.com> 2019-06-02 06:44:55 +0000
commit: ceb0d792f328d1fc0692197d8856a43c3936a571 (patch)
tree: 83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/sentry/fs/fsutil
parent: deb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff)
parent: 216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff)
15 files changed, 7047 insertions, 0 deletions
diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go
new file mode 100644
index 000000000..f1451d77a
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/dirty_set.go
@@ -0,0 +1,237 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+	"math"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// DirtySet maps offsets into a memmap.Mappable to DirtyInfo. It is used to
+// implement Mappables that cache data from another source.
+//
+// type DirtySet <generated by go_generics>
+
+// DirtyInfo is the value type of DirtySet, and represents information about a
+// Mappable offset that is dirty (the cached data for that offset is newer than
+// its source).
+//
+// +stateify savable
+type DirtyInfo struct {
+	// Keep is true if the represented offset is concurrently writable, such
+	// that writing the data for that offset back to the source does not
+	// guarantee that the offset is clean (since it may be concurrently
+	// rewritten after the writeback).
+	Keep bool
+}
+
+// dirtySetFunctions implements segment.Functions for DirtySet.
+type dirtySetFunctions struct{}
+
+// MinKey implements segment.Functions.MinKey.
+func (dirtySetFunctions) MinKey() uint64 {
+	return 0
+}
+
+// MaxKey implements segment.Functions.MaxKey.
+func (dirtySetFunctions) MaxKey() uint64 {
+	return math.MaxUint64
+}
+
+// ClearValue implements segment.Functions.ClearValue.
+func (dirtySetFunctions) ClearValue(val *DirtyInfo) {
+}
+
+// Merge implements segment.Functions.Merge.
+func (dirtySetFunctions) Merge(_ memmap.MappableRange, val1 DirtyInfo, _ memmap.MappableRange, val2 DirtyInfo) (DirtyInfo, bool) {
+	if val1 != val2 {
+		return DirtyInfo{}, false
+	}
+	return val1, true
+}
+
+// Split implements segment.Functions.Split.
+func (dirtySetFunctions) Split(_ memmap.MappableRange, val DirtyInfo, _ uint64) (DirtyInfo, DirtyInfo) {
+	return val, val
+}
+
+// MarkClean marks all offsets in mr as not dirty, except for those to which
+// KeepDirty has been applied.
+func (ds *DirtySet) MarkClean(mr memmap.MappableRange) {
+	seg := ds.LowerBoundSegment(mr.Start)
+	for seg.Ok() && seg.Start() < mr.End {
+		if seg.Value().Keep {
+			seg = seg.NextSegment()
+			continue
+		}
+		seg = ds.Isolate(seg, mr)
+		seg = ds.Remove(seg).NextSegment()
+	}
+}
+
+// KeepClean marks all offsets in mr as not dirty, even those that were
+// previously kept dirty by KeepDirty.
+func (ds *DirtySet) KeepClean(mr memmap.MappableRange) {
+	ds.RemoveRange(mr)
+}
+
+// MarkDirty marks all offsets in mr as dirty.
+func (ds *DirtySet) MarkDirty(mr memmap.MappableRange) {
+	ds.setDirty(mr, false)
+}
+
+// KeepDirty marks all offsets in mr as dirty and prevents them from being
+// marked as clean by MarkClean.
+func (ds *DirtySet) KeepDirty(mr memmap.MappableRange) {
+	ds.setDirty(mr, true)
+}
+
+func (ds *DirtySet) setDirty(mr memmap.MappableRange, keep bool) {
+	var changedAny bool
+	defer func() {
+		if changedAny {
+			// Merge segments split by Isolate to reduce cost of iteration.
+			ds.MergeRange(mr)
+		}
+	}()
+	seg, gap := ds.Find(mr.Start)
+	for {
+		switch {
+		case seg.Ok() && seg.Start() < mr.End:
+			if keep && !seg.Value().Keep {
+				changedAny = true
+				seg = ds.Isolate(seg, mr)
+				seg.ValuePtr().Keep = true
+			}
+			seg, gap = seg.NextNonEmpty()
+
+		case gap.Ok() && gap.Start() < mr.End:
+			changedAny = true
+			seg = ds.Insert(gap, gap.Range().Intersect(mr), DirtyInfo{keep})
+			seg, gap = seg.NextNonEmpty()
+
+		default:
+			return
+		}
+	}
+}
+
+// AllowClean allows MarkClean to mark offsets in mr as not dirty, ending the
+// effect of a previous call to KeepDirty. (It does not itself mark those
+// offsets as not dirty.)
+func (ds *DirtySet) AllowClean(mr memmap.MappableRange) {
+	var changedAny bool
+	defer func() {
+		if changedAny {
+			// Merge segments split by Isolate to reduce cost of iteration.
+			ds.MergeRange(mr)
+		}
+	}()
+	for seg := ds.LowerBoundSegment(mr.Start); seg.Ok() && seg.Start() < mr.End; seg = seg.NextSegment() {
+		if seg.Value().Keep {
+			changedAny = true
+			seg = ds.Isolate(seg, mr)
+			seg.ValuePtr().Keep = false
+		}
+	}
+}
+
+// SyncDirty passes pages in the range mr that are stored in cache and
+// identified as dirty to writeAt, updating dirty to reflect successful writes.
+// If writeAt returns a successful partial write, SyncDirty will call it
+// repeatedly until all bytes have been written. max is the true size of the
+// cached object; offsets beyond max will not be passed to writeAt, even if
+// they are marked dirty.
+func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+	var changedDirty bool
+	defer func() {
+		if changedDirty {
+			// Merge segments split by Isolate to reduce cost of iteration.
+			dirty.MergeRange(mr)
+		}
+	}()
+	dseg := dirty.LowerBoundSegment(mr.Start)
+	for dseg.Ok() && dseg.Start() < mr.End {
+		var dr memmap.MappableRange
+		if dseg.Value().Keep {
+			dr = dseg.Range().Intersect(mr)
+		} else {
+			changedDirty = true
+			dseg = dirty.Isolate(dseg, mr)
+			dr = dseg.Range()
+		}
+		if err := syncDirtyRange(ctx, dr, cache, max, mem, writeAt); err != nil {
+			return err
+		}
+		if dseg.Value().Keep {
+			dseg = dseg.NextSegment()
+		} else {
+			dseg = dirty.Remove(dseg).NextSegment()
+		}
+	}
+	return nil
+}
+
+// SyncDirtyAll passes all pages stored in cache identified as dirty to
+// writeAt, updating dirty to reflect successful writes. If writeAt returns a
+// successful partial write, SyncDirtyAll will call it repeatedly until all
+// bytes have been written. max is the true size of the cached object; offsets
+// beyond max will not be passed to writeAt, even if they are marked dirty.
+func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+	dseg := dirty.FirstSegment()
+	for dseg.Ok() {
+		if err := syncDirtyRange(ctx, dseg.Range(), cache, max, mem, writeAt); err != nil {
+			return err
+		}
+		if dseg.Value().Keep {
+			dseg = dseg.NextSegment()
+		} else {
+			dseg = dirty.Remove(dseg).NextSegment()
+		}
+	}
+	return nil
+}
+
+// Preconditions: mr must be page-aligned.
+func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+	for cseg := cache.LowerBoundSegment(mr.Start); cseg.Ok() && cseg.Start() < mr.End; cseg = cseg.NextSegment() {
+		wbr := cseg.Range().Intersect(mr)
+		if max < wbr.Start {
+			break
+		}
+		ims, err := mem.MapInternal(cseg.FileRangeOf(wbr), usermem.Read)
+		if err != nil {
+			return err
+		}
+		if max < wbr.End {
+			ims = ims.TakeFirst64(max - wbr.Start)
+		}
+		offset := wbr.Start
+		for !ims.IsEmpty() {
+			n, err := writeAt(ctx, ims, offset)
+			if err != nil {
+				return err
+			}
+			offset += n
+			ims = ims.DropFirst64(n)
+		}
+	}
+	return nil
+}
diff --git a/pkg/sentry/fs/fsutil/dirty_set_impl.go b/pkg/sentry/fs/fsutil/dirty_set_impl.go
new file mode 100755
index 000000000..5f25068a1
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/dirty_set_impl.go
@@ -0,0 +1,1274 @@
+package fsutil
+
+import (
+	__generics_imported0 "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+)
+
+import (
+	"bytes"
+	"fmt"
+)
+
+const (
+	// minDegree is the minimum degree of an internal node in a Set B-tree.
+	//
+	// - Any non-root node has at least minDegree-1 segments.
+	//
+	// - Any non-root internal (non-leaf) node has at least minDegree children.
+	//
+	// - The root node may have fewer than minDegree-1 segments, but it may
+	// only have 0 segments if the tree is empty.
+	//
+	// Our implementation requires minDegree >= 3. Higher values of minDegree
+	// usually improve performance, but increase memory usage for small sets.
+	DirtyminDegree = 3
+
+	DirtymaxDegree = 2 * DirtyminDegree
+)
+
+// A Set is a mapping of segments with non-overlapping Range keys. The zero
+// value for a Set is an empty set. Set values are not safely movable nor
+// copyable. Set is thread-compatible.
+//
+// +stateify savable
+type DirtySet struct {
+	root Dirtynode `state:".(*DirtySegmentDataSlices)"`
+}
+
+// IsEmpty returns true if the set contains no segments.
+func (s *DirtySet) IsEmpty() bool {
+	return s.root.nrSegments == 0
+}
+
+// IsEmptyRange returns true iff no segments in the set overlap the given
+// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be
+// more efficient.
+func (s *DirtySet) IsEmptyRange(r __generics_imported0.MappableRange) bool {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return true
+	}
+	_, gap := s.Find(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	return r.End <= gap.End()
+}
+
+// Span returns the total size of all segments in the set.
+func (s *DirtySet) Span() uint64 {
+	var sz uint64
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sz += seg.Range().Length()
+	}
+	return sz
+}
+
+// SpanRange returns the total size of the intersection of segments in the set
+// with the given range.
+func (s *DirtySet) SpanRange(r __generics_imported0.MappableRange) uint64 {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return 0
+	}
+	var sz uint64
+	for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() {
+		sz += seg.Range().Intersect(r).Length()
+	}
+	return sz
+}
+
+// FirstSegment returns the first segment in the set. If the set is empty,
+// FirstSegment returns a terminal iterator.
+func (s *DirtySet) FirstSegment() DirtyIterator {
+	if s.root.nrSegments == 0 {
+		return DirtyIterator{}
+	}
+	return s.root.firstSegment()
+}
+
+// LastSegment returns the last segment in the set. If the set is empty,
+// LastSegment returns a terminal iterator.
+func (s *DirtySet) LastSegment() DirtyIterator {
+	if s.root.nrSegments == 0 {
+		return DirtyIterator{}
+	}
+	return s.root.lastSegment()
+}
+
+// FirstGap returns the first gap in the set.
+func (s *DirtySet) FirstGap() DirtyGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return DirtyGapIterator{n, 0}
+}
+
+// LastGap returns the last gap in the set.
+func (s *DirtySet) LastGap() DirtyGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return DirtyGapIterator{n, n.nrSegments}
+}
+
+// Find returns the segment or gap whose range contains the given key. If a
+// segment is found, the returned Iterator is non-terminal and the
+// returned GapIterator is terminal. Otherwise, the returned Iterator is
+// terminal and the returned GapIterator is non-terminal.
+func (s *DirtySet) Find(key uint64) (DirtyIterator, DirtyGapIterator) {
+	n := &s.root
+	for {
+
+		lower := 0
+		upper := n.nrSegments
+		for lower < upper {
+			i := lower + (upper-lower)/2
+			if r := n.keys[i]; key < r.End {
+				if key >= r.Start {
+					return DirtyIterator{n, i}, DirtyGapIterator{}
+				}
+				upper = i
+			} else {
+				lower = i + 1
+			}
+		}
+		i := lower
+		if !n.hasChildren {
+			return DirtyIterator{}, DirtyGapIterator{n, i}
+		}
+		n = n.children[i]
+	}
+}
+
+// FindSegment returns the segment whose range contains the given key. If no
+// such segment exists, FindSegment returns a terminal iterator.
+func (s *DirtySet) FindSegment(key uint64) DirtyIterator {
+	seg, _ := s.Find(key)
+	return seg
+}
+
+// LowerBoundSegment returns the segment with the lowest range that contains a
+// key greater than or equal to min. If no such segment exists,
+// LowerBoundSegment returns a terminal iterator.
+func (s *DirtySet) LowerBoundSegment(min uint64) DirtyIterator {
+	seg, gap := s.Find(min)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.NextSegment()
+}
+
+// UpperBoundSegment returns the segment with the highest range that contains a
+// key less than or equal to max. If no such segment exists, UpperBoundSegment
+// returns a terminal iterator.
+func (s *DirtySet) UpperBoundSegment(max uint64) DirtyIterator {
+	seg, gap := s.Find(max)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.PrevSegment()
+}
+
+// FindGap returns the gap containing the given key. If no such gap exists
+// (i.e. the set contains a segment containing that key), FindGap returns a
+// terminal iterator.
+func (s *DirtySet) FindGap(key uint64) DirtyGapIterator {
+	_, gap := s.Find(key)
+	return gap
+}
+
+// LowerBoundGap returns the gap with the lowest range that is greater than or
+// equal to min.
+func (s *DirtySet) LowerBoundGap(min uint64) DirtyGapIterator {
+	seg, gap := s.Find(min)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.NextGap()
+}
+
+// UpperBoundGap returns the gap with the highest range that is less than or
+// equal to max.
+func (s *DirtySet) UpperBoundGap(max uint64) DirtyGapIterator {
+	seg, gap := s.Find(max)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.PrevGap()
+}
+
+// Add inserts the given segment into the set and returns true. If the new
+// segment can be merged with adjacent segments, Add will do so. If the new
+// segment would overlap an existing segment, Add returns false. If Add
+// succeeds, all existing iterators are invalidated.
+func (s *DirtySet) Add(r __generics_imported0.MappableRange, val DirtyInfo) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.Insert(gap, r, val)
+	return true
+}
+
+// AddWithoutMerging inserts the given segment into the set and returns true.
+// If it would overlap an existing segment, AddWithoutMerging does nothing and
+// returns false. If AddWithoutMerging succeeds, all existing iterators are
+// invalidated.
+func (s *DirtySet) AddWithoutMerging(r __generics_imported0.MappableRange, val DirtyInfo) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.InsertWithoutMergingUnchecked(gap, r, val)
+	return true
+}
+
+// Insert inserts the given segment into the given gap. If the new segment can
+// be merged with adjacent segments, Insert will do so. Insert returns an
+// iterator to the segment containing the inserted value (which may have been
+// merged with other values). All existing iterators (including gap, but not
+// including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid, Insert panics.
+//
+// Insert is semantically equivalent to a InsertWithoutMerging followed by a
+// Merge, but may be more efficient. Note that there is no unchecked variant of
+// Insert since Insert must retrieve and inspect gap's predecessor and
+// successor segments regardless.
+func (s *DirtySet) Insert(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	prev, next := gap.PrevSegment(), gap.NextSegment()
+	if prev.Ok() && prev.End() > r.Start {
+		panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range()))
+	}
+	if next.Ok() && next.Start() < r.End {
+		panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range()))
+	}
+	if prev.Ok() && prev.End() == r.Start {
+		if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok {
+			prev.SetEndUnchecked(r.End)
+			prev.SetValue(mval)
+			if next.Ok() && next.Start() == r.End {
+				val = mval
+				if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok {
+					prev.SetEndUnchecked(next.End())
+					prev.SetValue(mval)
+					return s.Remove(next).PrevSegment()
+				}
+			}
+			return prev
+		}
+	}
+	if next.Ok() && next.Start() == r.End {
+		if mval, ok := (dirtySetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok {
+			next.SetStartUnchecked(r.Start)
+			next.SetValue(mval)
+			return next
+		}
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMerging inserts the given segment into the given gap and
+// returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid,
+// InsertWithoutMerging panics.
+func (s *DirtySet) InsertWithoutMerging(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if gr := gap.Range(); !gr.IsSupersetOf(r) {
+		panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr))
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMergingUnchecked inserts the given segment into the given gap
+// and returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+func (s *DirtySet) InsertWithoutMergingUnchecked(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator {
+	gap = gap.node.rebalanceBeforeInsert(gap)
+	copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments])
+	copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments])
+	gap.node.keys[gap.index] = r
+	gap.node.values[gap.index] = val
+	gap.node.nrSegments++
+	return DirtyIterator{gap.node, gap.index}
+}
+
+// Remove removes the given segment and returns an iterator to the vacated gap.
+// All existing iterators (including seg, but not including the returned
+// iterator) are invalidated.
+func (s *DirtySet) Remove(seg DirtyIterator) DirtyGapIterator {
+
+	if seg.node.hasChildren {
+
+		victim := seg.PrevSegment()
+
+		seg.SetRangeUnchecked(victim.Range())
+		seg.SetValue(victim.Value())
+		return s.Remove(victim).NextGap()
+	}
+	copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments])
+	copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments])
+	dirtySetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1])
+	seg.node.nrSegments--
+	return seg.node.rebalanceAfterRemove(DirtyGapIterator{seg.node, seg.index})
+}
+
+// RemoveAll removes all segments from the set. All existing iterators are
+// invalidated.
+func (s *DirtySet) RemoveAll() {
+	s.root = Dirtynode{}
+}
+
+// RemoveRange removes all segments in the given range. An iterator to the
+// newly formed gap is returned, and all existing iterators are invalidated.
+func (s *DirtySet) RemoveRange(r __generics_imported0.MappableRange) DirtyGapIterator {
+	seg, gap := s.Find(r.Start)
+	if seg.Ok() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	return gap
+}
+
+// Merge attempts to merge two neighboring segments. If successful, Merge
+// returns an iterator to the merged segment, and all existing iterators are
+// invalidated. Otherwise, Merge returns a terminal iterator.
+//
+// If first is not the predecessor of second, Merge panics.
+func (s *DirtySet) Merge(first, second DirtyIterator) DirtyIterator {
+	if first.NextSegment() != second {
+		panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range()))
+	}
+	return s.MergeUnchecked(first, second)
+}
+
+// MergeUnchecked attempts to merge two neighboring segments. If successful,
+// MergeUnchecked returns an iterator to the merged segment, and all existing
+// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal
+// iterator.
+//
+// Precondition: first is the predecessor of second: first.NextSegment() ==
+// second, first == second.PrevSegment().
+func (s *DirtySet) MergeUnchecked(first, second DirtyIterator) DirtyIterator {
+	if first.End() == second.Start() {
+		if mval, ok := (dirtySetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok {
+
+			first.SetEndUnchecked(second.End())
+			first.SetValue(mval)
+			return s.Remove(second).PrevSegment()
+		}
+	}
+	return DirtyIterator{}
+}
+
+// MergeAll attempts to merge all adjacent segments in the set. All existing
+// iterators are invalidated.
+func (s *DirtySet) MergeAll() {
+	seg := s.FirstSegment()
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeRange attempts to merge all adjacent segments that contain a key in the
+// specific range. All existing iterators are invalidated.
+func (s *DirtySet) MergeRange(r __generics_imported0.MappableRange) {
+	seg := s.LowerBoundSegment(r.Start)
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() && next.Range().Start < r.End {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeAdjacent attempts to merge the segment containing r.Start with its
+// predecessor, and the segment containing r.End-1 with its successor.
+func (s *DirtySet) MergeAdjacent(r __generics_imported0.MappableRange) {
+	first := s.FindSegment(r.Start)
+	if first.Ok() {
+		if prev := first.PrevSegment(); prev.Ok() {
+			s.Merge(prev, first)
+		}
+	}
+	last := s.FindSegment(r.End - 1)
+	if last.Ok() {
+		if next := last.NextSegment(); next.Ok() {
+			s.Merge(last, next)
+		}
+	}
+}
+
+// Split splits the given segment at the given key and returns iterators to the
+// two resulting segments. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+//
+// If the segment cannot be split at split (because split is at the start or
+// end of the segment's range, so splitting would produce a segment with zero
+// length, or because split falls outside the segment's range altogether),
+// Split panics.
+func (s *DirtySet) Split(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) {
+	if !seg.Range().CanSplitAt(split) {
+		panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split))
+	}
+	return s.SplitUnchecked(seg, split)
+}
+
+// SplitUnchecked splits the given segment at the given key and returns
+// iterators to the two resulting segments. All existing iterators (including
+// seg, but not including the returned iterators) are invalidated.
+//
+// Preconditions: seg.Start() < key < seg.End().
+func (s *DirtySet) SplitUnchecked(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) {
+	val1, val2 := (dirtySetFunctions{}).Split(seg.Range(), seg.Value(), split)
+	end2 := seg.End()
+	seg.SetEndUnchecked(split)
+	seg.SetValue(val1)
+	seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2)
+
+	return seg2.PrevSegment(), seg2
+}
+
+// SplitAt splits the segment straddling split, if one exists. SplitAt returns
+// true if a segment was split and false otherwise. If SplitAt splits a
+// segment, all existing iterators are invalidated.
+func (s *DirtySet) SplitAt(split uint64) bool {
+	if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) {
+		s.SplitUnchecked(seg, split)
+		return true
+	}
+	return false
+}
+
+// Isolate ensures that the given segment's range does not escape r by
+// splitting at r.Start and r.End if necessary, and returns an updated iterator
+// to the bounded segment. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+func (s *DirtySet) Isolate(seg DirtyIterator, r __generics_imported0.MappableRange) DirtyIterator {
+	if seg.Range().CanSplitAt(r.Start) {
+		_, seg = s.SplitUnchecked(seg, r.Start)
+	}
+	if seg.Range().CanSplitAt(r.End) {
+		seg, _ = s.SplitUnchecked(seg, r.End)
+	}
+	return seg
+}
+
+// ApplyContiguous applies a function to a contiguous range of segments,
+// splitting if necessary. The function is applied until the first gap is
+// encountered, at which point the gap is returned. If the function is applied
+// across the entire range, a terminal gap is returned. All existing iterators
+// are invalidated.
+//
+// N.B. The Iterator must not be invalidated by the function.
+func (s *DirtySet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg DirtyIterator)) DirtyGapIterator {
+	seg, gap := s.Find(r.Start)
+	if !seg.Ok() {
+		return gap
+	}
+	for {
+		seg = s.Isolate(seg, r)
+		fn(seg)
+		if seg.End() >= r.End {
+			return DirtyGapIterator{}
+		}
+		gap = seg.NextGap()
+		if !gap.IsEmpty() {
+			return gap
+		}
+		seg = gap.NextSegment()
+		if !seg.Ok() {
+
+			return DirtyGapIterator{}
+		}
+	}
+}
+
+// +stateify savable
+type Dirtynode struct {
+	// An internal binary tree node looks like:
+	//
+	//   K
+	//  / \
+	// Cl Cr
+	//
+	// where all keys in the subtree rooted by Cl (the left subtree) are less
+	// than K (the key of the parent node), and all keys in the subtree rooted
+	// by Cr (the right subtree) are greater than K.
+	//
+	// An internal B-tree node's indexes work out to look like:
+	//
+	//   K0 K1 K2  ...   Kn-1
+	//  / \/ \/ \  ...  /  \
+	// C0 C1 C2 C3 ... Cn-1 Cn
+	//
+	// where n is nrSegments.
+	nrSegments int
+
+	// parent is a pointer to this node's parent. If this node is root, parent
+	// is nil.
+	parent *Dirtynode
+
+	// parentIndex is the index of this node in parent.children.
+	parentIndex int
+
+	// Flag for internal nodes that is technically redundant with "children[0]
+	// != nil", but is stored in the first cache line. "hasChildren" rather
+	// than "isLeaf" because false must be the correct value for an empty root.
+	hasChildren bool
+
+	// Nodes store keys and values in separate arrays to maximize locality in
+	// the common case (scanning keys for lookup).
+	keys     [DirtymaxDegree - 1]__generics_imported0.MappableRange
+	values   [DirtymaxDegree - 1]DirtyInfo
+	children [DirtymaxDegree]*Dirtynode
+}
+
+// firstSegment returns the first segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *Dirtynode) firstSegment() DirtyIterator {
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return DirtyIterator{n, 0}
+}
+
+// lastSegment returns the last segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *Dirtynode) lastSegment() DirtyIterator {
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return DirtyIterator{n, n.nrSegments - 1}
+}
+
+func (n *Dirtynode) prevSibling() *Dirtynode {
+	if n.parent == nil || n.parentIndex == 0 {
+		return nil
+	}
+	return n.parent.children[n.parentIndex-1]
+}
+
+func (n *Dirtynode) nextSibling() *Dirtynode {
+	if n.parent == nil || n.parentIndex == n.parent.nrSegments {
+		return nil
+	}
+	return n.parent.children[n.parentIndex+1]
+}
+
+// rebalanceBeforeInsert splits n and its ancestors if they are full, as
+// required for insertion, and returns an updated iterator to the position
+// represented by gap.
+func (n *Dirtynode) rebalanceBeforeInsert(gap DirtyGapIterator) DirtyGapIterator {
+	if n.parent != nil {
+		gap = n.parent.rebalanceBeforeInsert(gap)
+	}
+	if n.nrSegments < DirtymaxDegree-1 {
+		return gap
+	}
+	if n.parent == nil {
+
+		left := &Dirtynode{
+			nrSegments:  DirtyminDegree - 1,
+			parent:      n,
+			parentIndex: 0,
+			hasChildren: n.hasChildren,
+		}
+		right := &Dirtynode{
+			nrSegments:  DirtyminDegree - 1,
+			parent:      n,
+			parentIndex: 1,
+			hasChildren: n.hasChildren,
+		}
+		copy(left.keys[:DirtyminDegree-1], n.keys[:DirtyminDegree-1])
+		copy(left.values[:DirtyminDegree-1], n.values[:DirtyminDegree-1])
+		copy(right.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:])
+		copy(right.values[:DirtyminDegree-1], n.values[DirtyminDegree:])
+		n.keys[0], n.values[0] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1]
+		DirtyzeroValueSlice(n.values[1:])
+		if n.hasChildren {
+			copy(left.children[:DirtyminDegree], n.children[:DirtyminDegree])
+			copy(right.children[:DirtyminDegree], n.children[DirtyminDegree:])
+			DirtyzeroNodeSlice(n.children[2:])
+			for i := 0; i < DirtyminDegree; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+				right.children[i].parent = right
+				right.children[i].parentIndex = i
+			}
+		}
+		n.nrSegments = 1
+		n.hasChildren = true
+		n.children[0] = left
+		n.children[1] = right
+		if gap.node != n {
+			return gap
+		}
+		if gap.index < DirtyminDegree {
+			return DirtyGapIterator{left, gap.index}
+		}
+		return DirtyGapIterator{right, gap.index - DirtyminDegree}
+	}
+
+	copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments])
+	copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments])
+	n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1]
+	copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1])
+	for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ {
+		n.parent.children[i].parentIndex = i
+	}
+	sibling := &Dirtynode{
+		nrSegments:  DirtyminDegree - 1,
+		parent:      n.parent,
+		parentIndex: n.parentIndex + 1,
+		hasChildren: n.hasChildren,
+	}
+	n.parent.children[n.parentIndex+1] = sibling
+	n.parent.nrSegments++
+	copy(sibling.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:])
+	copy(sibling.values[:DirtyminDegree-1], n.values[DirtyminDegree:])
+	DirtyzeroValueSlice(n.values[DirtyminDegree-1:])
+	if n.hasChildren {
+		copy(sibling.children[:DirtyminDegree], n.children[DirtyminDegree:])
+		DirtyzeroNodeSlice(n.children[DirtyminDegree:])
+		for i := 0; i < DirtyminDegree; i++ {
+			sibling.children[i].parent = sibling
+			sibling.children[i].parentIndex = i
+		}
+	}
+	n.nrSegments = DirtyminDegree - 1
+
+	if gap.node != n {
+		return gap
+	}
+	if gap.index < DirtyminDegree {
+		return gap
+	}
+	return DirtyGapIterator{sibling, gap.index - DirtyminDegree}
+}
+
+// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient
+// (contain fewer segments than required by B-tree invariants), as required for
+// removal, and returns an updated iterator to the position represented by gap.
+//
+// Precondition: n is the only node in the tree that may currently violate a
+// B-tree invariant.
+func (n *Dirtynode) rebalanceAfterRemove(gap DirtyGapIterator) DirtyGapIterator {
+	for {
+		if n.nrSegments >= DirtyminDegree-1 {
+			return gap
+		}
+		if n.parent == nil {
+
+			return gap
+		}
+
+		if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree {
+			copy(n.keys[1:], n.keys[:n.nrSegments])
+			copy(n.values[1:], n.values[:n.nrSegments])
+			n.keys[0] = n.parent.keys[n.parentIndex-1]
+			n.values[0] = n.parent.values[n.parentIndex-1]
+			n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1]
+			n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1]
+			dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				copy(n.children[1:], n.children[:n.nrSegments+1])
+				n.children[0] = sibling.children[sibling.nrSegments]
+				sibling.children[sibling.nrSegments] = nil
+				n.children[0].parent = n
+				n.children[0].parentIndex = 0
+				for i := 1; i < n.nrSegments+2; i++ {
+					n.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling && gap.index == sibling.nrSegments {
+				return DirtyGapIterator{n, 0}
+			}
+			if gap.node == n {
+				return DirtyGapIterator{n, gap.index + 1}
+			}
+			return gap
+		}
+		if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree {
+			n.keys[n.nrSegments] = n.parent.keys[n.parentIndex]
+			n.values[n.nrSegments] = n.parent.values[n.parentIndex]
+			n.parent.keys[n.parentIndex] = sibling.keys[0]
+			n.parent.values[n.parentIndex] = sibling.values[0]
+			copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:])
+			copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:])
+			dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				n.children[n.nrSegments+1] = sibling.children[0]
+				copy(sibling.children[:sibling.nrSegments], sibling.children[1:])
+				sibling.children[sibling.nrSegments] = nil
+				n.children[n.nrSegments+1].parent = n
+				n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1
+				for i := 0; i < sibling.nrSegments; i++ {
+					sibling.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling {
+				if gap.index == 0 {
+					return DirtyGapIterator{n, n.nrSegments}
+				}
+				return DirtyGapIterator{sibling, gap.index - 1}
+			}
+			return gap
+		}
+
+		p := n.parent
+		if p.nrSegments == 1 {
+
+			left, right := p.children[0], p.children[1]
+			p.nrSegments = left.nrSegments + right.nrSegments + 1
+			p.hasChildren = left.hasChildren
+			p.keys[left.nrSegments] = p.keys[0]
+			p.values[left.nrSegments] = p.values[0]
+			copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments])
+			copy(p.values[:left.nrSegments], left.values[:left.nrSegments])
+			copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+			copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments])
+			if left.hasChildren {
+				copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1])
+				copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+				for i := 0; i < p.nrSegments+1; i++ {
+					p.children[i].parent = p
+					p.children[i].parentIndex = i
+				}
+			} else {
+				p.children[0] = nil
+				p.children[1] = nil
+			}
+			if gap.node == left {
+				return DirtyGapIterator{p, gap.index}
+			}
+			if gap.node == right {
+				return DirtyGapIterator{p, gap.index + left.nrSegments + 1}
+			}
+			return gap
+		}
+		// Merge n and either sibling, along with the segment separating the
+		// two, into whichever of the two nodes comes first. This is the
+		// reverse of the non-root splitting case in
+		// node.rebalanceBeforeInsert.
+		var left, right *Dirtynode
+		if n.parentIndex > 0 {
+			left = n.prevSibling()
+			right = n
+		} else {
+			left = n
+			right = n.nextSibling()
+		}
+
+		if gap.node == right {
+			gap = DirtyGapIterator{left, gap.index + left.nrSegments + 1}
+		}
+		left.keys[left.nrSegments] = p.keys[left.parentIndex]
+		left.values[left.nrSegments] = p.values[left.parentIndex]
+		copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+		copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments])
+		if left.hasChildren {
+			copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+			for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+			}
+		}
+		left.nrSegments += right.nrSegments + 1
+		copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments])
+		copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments])
+		dirtySetFunctions{}.ClearValue(&p.values[p.nrSegments-1])
+		copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1])
+		for i := 0; i < p.nrSegments; i++ {
+			p.children[i].parentIndex = i
+		}
+		p.children[p.nrSegments] = nil
+		p.nrSegments--
+
+		n = p
+	}
+}
+
+// A Iterator is conceptually one of:
+//
+// - A pointer to a segment in a set; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Iterators are copyable values and are meaningfully equality-comparable. The
+// zero value of Iterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type DirtyIterator struct {
+	// node is the node containing the iterated segment. If the iterator is
+	// terminal, node is nil.
+	node *Dirtynode
+
+	// index is the index of the segment in node.keys/values.
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (seg DirtyIterator) Ok() bool {
+	return seg.node != nil
+}
+
+// Range returns the iterated segment's range key.
+func (seg DirtyIterator) Range() __generics_imported0.MappableRange {
+	return seg.node.keys[seg.index]
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (seg DirtyIterator) Start() uint64 {
+	return seg.node.keys[seg.index].Start
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (seg DirtyIterator) End() uint64 {
+	return seg.node.keys[seg.index].End
+}
+
+// SetRangeUnchecked mutates the iterated segment's range key. This operation
+// does not invalidate any iterators.
+//
+// Preconditions:
+//
+// - r.Length() > 0.
+//
+// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
+// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
+// r.start >= seg.PrevSegment().End().
+func (seg DirtyIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) {
+	seg.node.keys[seg.index] = r
+}
+
+// SetRange mutates the iterated segment's range key. If the new range would
+// cause the iterated segment to overlap another segment, or if the new range
+// is invalid, SetRange panics. This operation does not invalidate any
+// iterators.
+func (seg DirtyIterator) SetRange(r __generics_imported0.MappableRange) {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && r.End > next.Start() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range()))
+	}
+	seg.SetRangeUnchecked(r)
+}
+
+// SetStartUnchecked mutates the iterated segment's start. This operation does
+// not invalidate any iterators.
+//
+// Preconditions: The new start must be valid: start < seg.End(); if
+// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+func (seg DirtyIterator) SetStartUnchecked(start uint64) {
+	seg.node.keys[seg.index].Start = start
+}
+
+// SetStart mutates the iterated segment's start. If the new start value would
+// cause the iterated segment to overlap another segment, or would result in an
+// invalid range, SetStart panics. This operation does not invalidate any
+// iterators.
+func (seg DirtyIterator) SetStart(start uint64) {
+	if start >= seg.End() {
+		panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range()))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() {
+		panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range()))
+	}
+	seg.SetStartUnchecked(start)
+}
+
+// SetEndUnchecked mutates the iterated segment's end. This operation does not
+// invalidate any iterators.
+//
+// Preconditions: The new end must be valid: end > seg.Start(); if
+// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+func (seg DirtyIterator) SetEndUnchecked(end uint64) {
+	seg.node.keys[seg.index].End = end
+}
+
+// SetEnd mutates the iterated segment's end. If the new end value would cause
+// the iterated segment to overlap another segment, or would result in an
+// invalid range, SetEnd panics. This operation does not invalidate any
+// iterators.
+func (seg DirtyIterator) SetEnd(end uint64) {
+	if end <= seg.Start() {
+		panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && end > next.Start() {
+		panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range()))
+	}
+	seg.SetEndUnchecked(end)
+}
+
+// Value returns a copy of the iterated segment's value.
+func (seg DirtyIterator) Value() DirtyInfo {
+	return seg.node.values[seg.index]
+}
+
+// ValuePtr returns a pointer to the iterated segment's value. The pointer is
+// invalidated if the iterator is invalidated. This operation does not
+// invalidate any iterators.
+func (seg DirtyIterator) ValuePtr() *DirtyInfo {
+	return &seg.node.values[seg.index]
+}
+
+// SetValue mutates the iterated segment's value. This operation does not
+// invalidate any iterators.
+func (seg DirtyIterator) SetValue(val DirtyInfo) {
+	seg.node.values[seg.index] = val
+}
+
+// PrevSegment returns the iterated segment's predecessor. If there is no
+// preceding segment, PrevSegment returns a terminal iterator.
+func (seg DirtyIterator) PrevSegment() DirtyIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index].lastSegment()
+	}
+	if seg.index > 0 {
+		return DirtyIterator{seg.node, seg.index - 1}
+	}
+	if seg.node.parent == nil {
+		return DirtyIterator{}
+	}
+	return DirtysegmentBeforePosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// NextSegment returns the iterated segment's successor. If there is no
+// succeeding segment, NextSegment returns a terminal iterator.
+func (seg DirtyIterator) NextSegment() DirtyIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment()
+	}
+	if seg.index < seg.node.nrSegments-1 {
+		return DirtyIterator{seg.node, seg.index + 1}
+	}
+	if seg.node.parent == nil {
+		return DirtyIterator{}
+	}
+	return DirtysegmentAfterPosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// PrevGap returns the gap immediately before the iterated segment.
+func (seg DirtyIterator) PrevGap() DirtyGapIterator {
+	if seg.node.hasChildren {
+
+		return seg.node.children[seg.index].lastSegment().NextGap()
+	}
+	return DirtyGapIterator{seg.node, seg.index}
+}
+
+// NextGap returns the gap immediately after the iterated segment.
+func (seg DirtyIterator) NextGap() DirtyGapIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment().PrevGap()
+	}
+	return DirtyGapIterator{seg.node, seg.index + 1}
+}
+
+// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent,
+// or the gap before the iterated segment otherwise. If seg.Start() ==
+// Functions.MinKey(), PrevNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be
+// non-terminal.
+func (seg DirtyIterator) PrevNonEmpty() (DirtyIterator, DirtyGapIterator) {
+	gap := seg.PrevGap()
+	if gap.Range().Length() != 0 {
+		return DirtyIterator{}, gap
+	}
+	return gap.PrevSegment(), DirtyGapIterator{}
+}
+
+// NextNonEmpty returns the iterated segment's successor if it is adjacent, or
+// the gap after the iterated segment otherwise. If seg.End() ==
+// Functions.MaxKey(), NextNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by NextNonEmpty will be
+// non-terminal.
+func (seg DirtyIterator) NextNonEmpty() (DirtyIterator, DirtyGapIterator) {
+	gap := seg.NextGap()
+	if gap.Range().Length() != 0 {
+		return DirtyIterator{}, gap
+	}
+	return gap.NextSegment(), DirtyGapIterator{}
+}
+
+// A GapIterator is conceptually one of:
+//
+// - A pointer to a position between two segments, before the first segment, or
+// after the last segment in a set, called a *gap*; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Note that the gap between two adjacent segments exists (iterators to it are
+// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true
+// for such gaps. An empty set contains a single gap, spanning the entire range
+// of the set's keys.
+//
+// GapIterators are copyable values and are meaningfully equality-comparable.
+// The zero value of GapIterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type DirtyGapIterator struct {
+	// The representation of a GapIterator is identical to that of an Iterator,
+	// except that index corresponds to positions between segments in the same
+	// way as for node.children (see comment for node.nrSegments).
+	node  *Dirtynode
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (gap DirtyGapIterator) Ok() bool {
+	return gap.node != nil
+}
+
+// Range returns the range spanned by the iterated gap.
+func (gap DirtyGapIterator) Range() __generics_imported0.MappableRange {
+	return __generics_imported0.MappableRange{gap.Start(), gap.End()}
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (gap DirtyGapIterator) Start() uint64 {
+	if ps := gap.PrevSegment(); ps.Ok() {
+		return ps.End()
+	}
+	return dirtySetFunctions{}.MinKey()
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (gap DirtyGapIterator) End() uint64 {
+	if ns := gap.NextSegment(); ns.Ok() {
+		return ns.Start()
+	}
+	return dirtySetFunctions{}.MaxKey()
+}
+
+// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is
+// between two adjacent segments.)
+func (gap DirtyGapIterator) IsEmpty() bool {
+	return gap.Range().Length() == 0
+}
+
+// PrevSegment returns the segment immediately before the iterated gap. If no
+// such segment exists, PrevSegment returns a terminal iterator.
+func (gap DirtyGapIterator) PrevSegment() DirtyIterator {
+	return DirtysegmentBeforePosition(gap.node, gap.index)
+}
+
+// NextSegment returns the segment immediately after the iterated gap. If no
+// such segment exists, NextSegment returns a terminal iterator.
+func (gap DirtyGapIterator) NextSegment() DirtyIterator {
+	return DirtysegmentAfterPosition(gap.node, gap.index)
+}
+
+// PrevGap returns the iterated gap's predecessor. If no such gap exists,
+// PrevGap returns a terminal iterator.
+func (gap DirtyGapIterator) PrevGap() DirtyGapIterator {
+	seg := gap.PrevSegment()
+	if !seg.Ok() {
+		return DirtyGapIterator{}
+	}
+	return seg.PrevGap()
+}
+
+// NextGap returns the iterated gap's successor. If no such gap exists, NextGap
+// returns a terminal iterator.
+func (gap DirtyGapIterator) NextGap() DirtyGapIterator {
+	seg := gap.NextSegment()
+	if !seg.Ok() {
+		return DirtyGapIterator{}
+	}
+	return seg.NextGap()
+}
+
+// segmentBeforePosition returns the predecessor segment of the position given
+// by n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentBeforePosition returns a terminal iterator.
+func DirtysegmentBeforePosition(n *Dirtynode, i int) DirtyIterator {
+	for i == 0 {
+		if n.parent == nil {
+			return DirtyIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return DirtyIterator{n, i - 1}
+}
+
+// segmentAfterPosition returns the successor segment of the position given by
+// n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentAfterPosition returns a terminal iterator.
+func DirtysegmentAfterPosition(n *Dirtynode, i int) DirtyIterator {
+	for i == n.nrSegments {
+		if n.parent == nil {
+			return DirtyIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return DirtyIterator{n, i}
+}
+
+func DirtyzeroValueSlice(slice []DirtyInfo) {
+
+	for i := range slice {
+		dirtySetFunctions{}.ClearValue(&slice[i])
+	}
+}
+
+func DirtyzeroNodeSlice(slice []*Dirtynode) {
+	for i := range slice {
+		slice[i] = nil
+	}
+}
+
+// String stringifies a Set for debugging.
+func (s *DirtySet) String() string {
+	return s.root.String()
+}
+
+// String stringifes a node (and all of its children) for debugging.
+func (n *Dirtynode) String() string {
+	var buf bytes.Buffer
+	n.writeDebugString(&buf, "")
+	return buf.String()
+}
+
+func (n *Dirtynode) writeDebugString(buf *bytes.Buffer, prefix string) {
+	if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) {
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren))
+	}
+	for i := 0; i < n.nrSegments; i++ {
+		if child := n.children[i]; child != nil {
+			cprefix := fmt.Sprintf("%s- % 3d ", prefix, i)
+			if child.parent != n || child.parentIndex != i {
+				buf.WriteString(cprefix)
+				buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i))
+			}
+			child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i))
+		}
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i]))
+	}
+	if child := n.children[n.nrSegments]; child != nil {
+		child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments))
+	}
+}
+
+// SegmentDataSlices represents segments from a set as slices of start, end, and
+// values. SegmentDataSlices is primarily used as an intermediate representation
+// for save/restore and the layout here is optimized for that.
+//
+// +stateify savable
+type DirtySegmentDataSlices struct {
+	Start  []uint64
+	End    []uint64
+	Values []DirtyInfo
+}
+
+// ExportSortedSlice returns a copy of all segments in the given set, in ascending
+// key order.
+func (s *DirtySet) ExportSortedSlices() *DirtySegmentDataSlices {
+	var sds DirtySegmentDataSlices
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sds.Start = append(sds.Start, seg.Start())
+		sds.End = append(sds.End, seg.End())
+		sds.Values = append(sds.Values, seg.Value())
+	}
+	sds.Start = sds.Start[:len(sds.Start):len(sds.Start)]
+	sds.End = sds.End[:len(sds.End):len(sds.End)]
+	sds.Values = sds.Values[:len(sds.Values):len(sds.Values)]
+	return &sds
+}
+
+// ImportSortedSlice initializes the given set from the given slice.
+//
+// Preconditions: s must be empty. sds must represent a valid set (the segments
+// in sds must have valid lengths that do not overlap). The segments in sds
+// must be sorted in ascending key order.
+func (s *DirtySet) ImportSortedSlices(sds *DirtySegmentDataSlices) error {
+	if !s.IsEmpty() {
+		return fmt.Errorf("cannot import into non-empty set %v", s)
+	}
+	gap := s.FirstGap()
+	for i := range sds.Start {
+		r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]}
+		if !gap.Range().IsSupersetOf(r) {
+			return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i])
+		}
+		gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap()
+	}
+	return nil
+}
+func (s *DirtySet) saveRoot() *DirtySegmentDataSlices {
+	return s.ExportSortedSlices()
+}
+
+func (s *DirtySet) loadRoot(sds *DirtySegmentDataSlices) {
+	if err := s.ImportSortedSlices(sds); err != nil {
+		panic(err)
+	}
+}
diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go
new file mode 100644
index 000000000..9381963d0
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/file.go
@@ -0,0 +1,394 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+	"gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+// FileNoopRelease implements fs.FileOperations.Release for files that have no
+// resources to release.
+type FileNoopRelease struct{}
+
+// Release is a no-op.
+func (FileNoopRelease) Release() {}
+
+// SeekWithDirCursor is used to implement fs.FileOperations.Seek.  If dirCursor
+// is not nil and the seek was on a directory, the cursor will be updated.
+//
+// Currently only seeking to 0 on a directory is supported.
+//
+// FIXME(b/33075855): Lift directory seeking limitations.
+func SeekWithDirCursor(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64, dirCursor *string) (int64, error) {
+	inode := file.Dirent.Inode
+	current := file.Offset()
+
+	// Does the Inode represents a non-seekable type?
+	if fs.IsPipe(inode.StableAttr) || fs.IsSocket(inode.StableAttr) {
+		return current, syserror.ESPIPE
+	}
+
+	// Does the Inode represent a character device?
+	if fs.IsCharDevice(inode.StableAttr) {
+		// Ignore seek requests.
+		//
+		// FIXME(b/34716638): This preserves existing
+		// behavior but is not universally correct.
+		return 0, nil
+	}
+
+	// Otherwise compute the new offset.
+	switch whence {
+	case fs.SeekSet:
+		switch inode.StableAttr.Type {
+		case fs.RegularFile, fs.SpecialFile, fs.BlockDevice:
+			if offset < 0 {
+				return current, syserror.EINVAL
+			}
+			return offset, nil
+		case fs.Directory, fs.SpecialDirectory:
+			if offset != 0 {
+				return current, syserror.EINVAL
+			}
+			// SEEK_SET to 0 moves the directory "cursor" to the beginning.
+			if dirCursor != nil {
+				*dirCursor = ""
+			}
+			return 0, nil
+		default:
+			return current, syserror.EINVAL
+		}
+	case fs.SeekCurrent:
+		switch inode.StableAttr.Type {
+		case fs.RegularFile, fs.SpecialFile, fs.BlockDevice:
+			if current+offset < 0 {
+				return current, syserror.EINVAL
+			}
+			return current + offset, nil
+		case fs.Directory, fs.SpecialDirectory:
+			if offset != 0 {
+				return current, syserror.EINVAL
+			}
+			return current, nil
+		default:
+			return current, syserror.EINVAL
+		}
+	case fs.SeekEnd:
+		switch inode.StableAttr.Type {
+		case fs.RegularFile, fs.BlockDevice:
+			// Allow the file to determine the end.
+			uattr, err := inode.UnstableAttr(ctx)
+			if err != nil {
+				return current, err
+			}
+			sz := uattr.Size
+			if sz+offset < 0 {
+				return current, syserror.EINVAL
+			}
+			return sz + offset, nil
+		// FIXME(b/34778850): This is not universally correct.
+		// Remove SpecialDirectory.
+		case fs.SpecialDirectory:
+			if offset != 0 {
+				return current, syserror.EINVAL
+			}
+			// SEEK_END to 0 moves the directory "cursor" to the end.
+			//
+			// FIXME(b/35442290): The ensures that after the seek,
+			// reading on the directory will get EOF. But it is not
+			// correct in general because the directory can grow in
+			// size; attempting to read those new entries will be
+			// futile (EOF will always be the result).
+			return fs.FileMaxOffset, nil
+		default:
+			return current, syserror.EINVAL
+		}
+	}
+
+	// Not a valid seek request.
+	return current, syserror.EINVAL
+}
+
+// FileGenericSeek implements fs.FileOperations.Seek for files that use a
+// generic seek implementation.
+type FileGenericSeek struct{}
+
+// Seek implements fs.FileOperations.Seek.
+func (FileGenericSeek) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
+	return SeekWithDirCursor(ctx, file, whence, offset, nil)
+}
+
+// FileZeroSeek implements fs.FileOperations.Seek for files that maintain a
+// constant zero-value offset and require a no-op Seek.
+type FileZeroSeek struct{}
+
+// Seek implements fs.FileOperations.Seek.
+func (FileZeroSeek) Seek(context.Context, *fs.File, fs.SeekWhence, int64) (int64, error) {
+	return 0, nil
+}
+
+// FileNoSeek implements fs.FileOperations.Seek to return EINVAL.
+type FileNoSeek struct{}
+
+// Seek implements fs.FileOperations.Seek.
+func (FileNoSeek) Seek(context.Context, *fs.File, fs.SeekWhence, int64) (int64, error) {
+	return 0, syserror.EINVAL
+}
+
+// FilePipeSeek implements fs.FileOperations.Seek and can be used for files
+// that behave like pipes (seeking is not supported).
+type FilePipeSeek struct{}
+
+// Seek implements fs.FileOperations.Seek.
+func (FilePipeSeek) Seek(context.Context, *fs.File, fs.SeekWhence, int64) (int64, error) {
+	return 0, syserror.ESPIPE
+}
+
+// FileNotDirReaddir implements fs.FileOperations.Readdir for non-directories.
+type FileNotDirReaddir struct{}
+
+// Readdir implements fs.FileOperations.FileNotDirReaddir.
+func (FileNotDirReaddir) Readdir(context.Context, *fs.File, fs.DentrySerializer) (int64, error) {
+	return 0, syserror.ENOTDIR
+}
+
+// FileNoFsync implements fs.FileOperations.Fsync for files that don't support
+// syncing.
+type FileNoFsync struct{}
+
+// Fsync implements fs.FileOperations.Fsync.
+func (FileNoFsync) Fsync(context.Context, *fs.File, int64, int64, fs.SyncType) error {
+	return syserror.EINVAL
+}
+
+// FileNoopFsync implements fs.FileOperations.Fsync for files that don't need
+// to synced.
+type FileNoopFsync struct{}
+
+// Fsync implements fs.FileOperations.Fsync.
+func (FileNoopFsync) Fsync(context.Context, *fs.File, int64, int64, fs.SyncType) error {
+	return nil
+}
+
+// FileNoopFlush implements fs.FileOperations.Flush as a no-op.
+type FileNoopFlush struct{}
+
+// Flush implements fs.FileOperations.Flush.
+func (FileNoopFlush) Flush(context.Context, *fs.File) error {
+	return nil
+}
+
+// FileNoMMap implements fs.FileOperations.Mappable for files that cannot
+// be memory mapped.
+type FileNoMMap struct{}
+
+// ConfigureMMap implements fs.FileOperations.ConfigureMMap.
+func (FileNoMMap) ConfigureMMap(context.Context, *fs.File, *memmap.MMapOpts) error {
+	return syserror.ENODEV
+}
+
+// GenericConfigureMMap implements fs.FileOperations.ConfigureMMap for most
+// filesystems that support memory mapping.
+func GenericConfigureMMap(file *fs.File, m memmap.Mappable, opts *memmap.MMapOpts) error {
+	opts.Mappable = m
+	opts.MappingIdentity = file
+	file.IncRef()
+	return nil
+}
+
+// FileNoIoctl implements fs.FileOperations.Ioctl for files that don't
+// implement the ioctl syscall.
+type FileNoIoctl struct{}
+
+// Ioctl implements fs.FileOperations.Ioctl.
+func (FileNoIoctl) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	return 0, syserror.ENOTTY
+}
+
+// FileNoSplice implements fs.FileOperations.ReadFrom and
+// fs.FileOperations.WriteTo for files that don't support splice.
+type FileNoSplice struct{}
+
+// WriteTo implements fs.FileOperations.WriteTo.
+func (FileNoSplice) WriteTo(context.Context, *fs.File, *fs.File, fs.SpliceOpts) (int64, error) {
+	return 0, syserror.ENOSYS
+}
+
+// ReadFrom implements fs.FileOperations.ReadFrom.
+func (FileNoSplice) ReadFrom(context.Context, *fs.File, *fs.File, fs.SpliceOpts) (int64, error) {
+	return 0, syserror.ENOSYS
+}
+
+// DirFileOperations implements most of fs.FileOperations for directories,
+// except for Readdir and UnstableAttr which the embedding type must implement.
+type DirFileOperations struct {
+	waiter.AlwaysReady
+	FileGenericSeek
+	FileNoIoctl
+	FileNoMMap
+	FileNoopFlush
+	FileNoopFsync
+	FileNoopRelease
+	FileNoSplice
+}
+
+// Read implements fs.FileOperations.Read
+func (*DirFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+	return 0, syserror.EISDIR
+}
+
+// Write implements fs.FileOperations.Write.
+func (*DirFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+	return 0, syserror.EISDIR
+}
+
+// StaticDirFileOperations implements fs.FileOperations for directories with
+// static children.
+//
+// +stateify savable
+type StaticDirFileOperations struct {
+	DirFileOperations        `state:"nosave"`
+	FileUseInodeUnstableAttr `state:"nosave"`
+
+	// dentryMap is a SortedDentryMap used to implement Readdir.
+	dentryMap *fs.SortedDentryMap
+
+	// dirCursor contains the name of the last directory entry that was
+	// serialized.
+	dirCursor string
+}
+
+// NewStaticDirFileOperations returns a new StaticDirFileOperations that will
+// iterate the given denty map.
+func NewStaticDirFileOperations(dentries *fs.SortedDentryMap) *StaticDirFileOperations {
+	return &StaticDirFileOperations{
+		dentryMap: dentries,
+	}
+}
+
+// IterateDir implements DirIterator.IterateDir.
+func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+	n, err := fs.GenericReaddir(dirCtx, sdfo.dentryMap)
+	return offset + n, err
+}
+
+// Readdir implements fs.FileOperations.Readdir.
+func (sdfo *StaticDirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
+	root := fs.RootFromContext(ctx)
+	if root != nil {
+		defer root.DecRef()
+	}
+	dirCtx := &fs.DirCtx{
+		Serializer: serializer,
+		DirCursor:  &sdfo.dirCursor,
+	}
+	return fs.DirentReaddir(ctx, file.Dirent, sdfo, root, dirCtx, file.Offset())
+}
+
+// NoReadWriteFile is a file that does not support reading or writing.
+//
+// +stateify savable
+type NoReadWriteFile struct {
+	waiter.AlwaysReady       `state:"nosave"`
+	FileGenericSeek          `state:"nosave"`
+	FileNoIoctl              `state:"nosave"`
+	FileNoMMap               `state:"nosave"`
+	FileNoopFsync            `state:"nosave"`
+	FileNoopFlush            `state:"nosave"`
+	FileNoopRelease          `state:"nosave"`
+	FileNoRead               `state:"nosave"`
+	FileNoWrite              `state:"nosave"`
+	FileNotDirReaddir        `state:"nosave"`
+	FileUseInodeUnstableAttr `state:"nosave"`
+	FileNoSplice             `state:"nosave"`
+}
+
+var _ fs.FileOperations = (*NoReadWriteFile)(nil)
+
+// FileStaticContentReader is a helper to implement fs.FileOperations.Read with
+// static content.
+//
+// +stateify savable
+type FileStaticContentReader struct {
+	// content is immutable.
+	content []byte
+}
+
+// NewFileStaticContentReader initializes a FileStaticContentReader with the
+// given content.
+func NewFileStaticContentReader(b []byte) FileStaticContentReader {
+	return FileStaticContentReader{
+		content: b,
+	}
+}
+
+// Read implements fs.FileOperations.Read.
+func (scr *FileStaticContentReader) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
+	if offset < 0 {
+		return 0, syserror.EINVAL
+	}
+	if offset >= int64(len(scr.content)) {
+		return 0, nil
+	}
+	n, err := dst.CopyOut(ctx, scr.content[offset:])
+	return int64(n), err
+}
+
+// FileNoopWrite implements fs.FileOperations.Write as a noop.
+type FileNoopWrite struct{}
+
+// Write implements fs.FileOperations.Write.
+func (FileNoopWrite) Write(_ context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
+	return src.NumBytes(), nil
+}
+
+// FileNoRead implements fs.FileOperations.Read to return EINVAL.
+type FileNoRead struct{}
+
+// Read implements fs.FileOperations.Read.
+func (FileNoRead) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+	return 0, syserror.EINVAL
+}
+
+// FileNoWrite implements fs.FileOperations.Write to return EINVAL.
+type FileNoWrite struct{}
+
+// Write implements fs.FileOperations.Write.
+func (FileNoWrite) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+	return 0, syserror.EINVAL
+}
+
+// FileNoopRead implement fs.FileOperations.Read as a noop.
+type FileNoopRead struct{}
+
+// Read implements fs.FileOperations.Read.
+func (FileNoopRead) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+	return 0, nil
+}
+
+// FileUseInodeUnstableAttr implements fs.FileOperations.UnstableAttr by calling
+// InodeOperations.UnstableAttr.
+type FileUseInodeUnstableAttr struct{}
+
+// UnstableAttr implements fs.FileOperations.UnstableAttr.
+func (FileUseInodeUnstableAttr) UnstableAttr(ctx context.Context, file *fs.File) (fs.UnstableAttr, error) {
+	return file.Dirent.Inode.UnstableAttr(ctx)
+}
diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go
new file mode 100644
index 000000000..b5ac6c71c
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/file_range_set.go
@@ -0,0 +1,209 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+	"fmt"
+	"io"
+	"math"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// FileRangeSet maps offsets into a memmap.Mappable to offsets into a
+// platform.File. It is used to implement Mappables that store data in
+// sparsely-allocated memory.
+//
+// type FileRangeSet <generated by go_generics>
+
+// fileRangeSetFunctions implements segment.Functions for FileRangeSet.
+type fileRangeSetFunctions struct{}
+
+// MinKey implements segment.Functions.MinKey.
+func (fileRangeSetFunctions) MinKey() uint64 {
+	return 0
+}
+
+// MaxKey implements segment.Functions.MaxKey.
+func (fileRangeSetFunctions) MaxKey() uint64 {
+	return math.MaxUint64
+}
+
+// ClearValue implements segment.Functions.ClearValue.
+func (fileRangeSetFunctions) ClearValue(_ *uint64) {
+}
+
+// Merge implements segment.Functions.Merge.
+func (fileRangeSetFunctions) Merge(mr1 memmap.MappableRange, frstart1 uint64, _ memmap.MappableRange, frstart2 uint64) (uint64, bool) {
+	if frstart1+mr1.Length() != frstart2 {
+		return 0, false
+	}
+	return frstart1, true
+}
+
+// Split implements segment.Functions.Split.
+func (fileRangeSetFunctions) Split(mr memmap.MappableRange, frstart uint64, split uint64) (uint64, uint64) {
+	return frstart, frstart + (split - mr.Start)
+}
+
+// FileRange returns the FileRange mapped by seg.
+func (seg FileRangeIterator) FileRange() platform.FileRange {
+	return seg.FileRangeOf(seg.Range())
+}
+
+// FileRangeOf returns the FileRange mapped by mr.
+//
+// Preconditions: seg.Range().IsSupersetOf(mr). mr.Length() != 0.
+func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) platform.FileRange {
+	frstart := seg.Value() + (mr.Start - seg.Start())
+	return platform.FileRange{frstart, frstart + mr.Length()}
+}
+
+// Fill attempts to ensure that all memmap.Mappable offsets in required are
+// mapped to a platform.File offset, by allocating from mf with the given
+// memory usage kind and invoking readAt to store data into memory. (If readAt
+// returns a successful partial read, Fill will call it repeatedly until all
+// bytes have been read.) EOF is handled consistently with the requirements of
+// mmap(2): bytes after EOF on the same page are zeroed; pages after EOF are
+// invalid.
+//
+// Fill may read offsets outside of required, but will never read offsets
+// outside of optional. It returns a non-nil error if any error occurs, even
+// if the error only affects offsets in optional, but not in required.
+//
+// Preconditions: required.Length() > 0. optional.IsSupersetOf(required).
+// required and optional must be page-aligned.
+func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.MappableRange, mf *pgalloc.MemoryFile, kind usage.MemoryKind, readAt func(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error)) error {
+	gap := frs.LowerBoundGap(required.Start)
+	for gap.Ok() && gap.Start() < required.End {
+		if gap.Range().Length() == 0 {
+			gap = gap.NextGap()
+			continue
+		}
+		gr := gap.Range().Intersect(optional)
+
+		// Read data into the gap.
+		fr, err := mf.AllocateAndFill(gr.Length(), kind, safemem.ReaderFunc(func(dsts safemem.BlockSeq) (uint64, error) {
+			var done uint64
+			for !dsts.IsEmpty() {
+				n, err := readAt(ctx, dsts, gr.Start+done)
+				done += n
+				dsts = dsts.DropFirst64(n)
+				if err != nil {
+					if err == io.EOF {
+						// MemoryFile.AllocateAndFill truncates down to a page
+						// boundary, but FileRangeSet.Fill is supposed to
+						// zero-fill to the end of the page in this case.
+						donepgaddr, ok := usermem.Addr(done).RoundUp()
+						if donepg := uint64(donepgaddr); ok && donepg != done {
+							dsts.DropFirst64(donepg - done)
+							done = donepg
+							if dsts.IsEmpty() {
+								return done, nil
+							}
+						}
+					}
+					return done, err
+				}
+			}
+			return done, nil
+		}))
+
+		// Store anything we managed to read into the cache.
+		if done := fr.Length(); done != 0 {
+			gr.End = gr.Start + done
+			gap = frs.Insert(gap, gr, fr.Start).NextGap()
+		}
+
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Drop removes segments for memmap.Mappable offsets in mr, freeing the
+// corresponding platform.FileRanges.
+//
+// Preconditions: mr must be page-aligned.
+func (frs *FileRangeSet) Drop(mr memmap.MappableRange, mf *pgalloc.MemoryFile) {
+	seg := frs.LowerBoundSegment(mr.Start)
+	for seg.Ok() && seg.Start() < mr.End {
+		seg = frs.Isolate(seg, mr)
+		mf.DecRef(seg.FileRange())
+		seg = frs.Remove(seg).NextSegment()
+	}
+}
+
+// DropAll removes all segments in mr, freeing the corresponding
+// platform.FileRanges.
+func (frs *FileRangeSet) DropAll(mf *pgalloc.MemoryFile) {
+	for seg := frs.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		mf.DecRef(seg.FileRange())
+	}
+	frs.RemoveAll()
+}
+
+// Truncate updates frs to reflect Mappable truncation to the given length:
+// bytes after the new EOF on the same page are zeroed, and pages after the new
+// EOF are freed.
+func (frs *FileRangeSet) Truncate(end uint64, mf *pgalloc.MemoryFile) {
+	pgendaddr, ok := usermem.Addr(end).RoundUp()
+	if ok {
+		pgend := uint64(pgendaddr)
+
+		// Free truncated pages.
+		frs.SplitAt(pgend)
+		seg := frs.LowerBoundSegment(pgend)
+		for seg.Ok() {
+			mf.DecRef(seg.FileRange())
+			seg = frs.Remove(seg).NextSegment()
+		}
+
+		if end == pgend {
+			return
+		}
+	}
+
+	// Here we know end < end.RoundUp(). If the new EOF lands in the
+	// middle of a page that we have, zero out its contents beyond the new
+	// length.
+	seg := frs.FindSegment(end)
+	if seg.Ok() {
+		fr := seg.FileRange()
+		fr.Start += end - seg.Start()
+		ims, err := mf.MapInternal(fr, usermem.Write)
+		if err != nil {
+			// There's no good recourse from here. This means
+			// that we can't keep cached memory consistent with
+			// the new end of file. The caller may have already
+			// updated the file size on their backing file system.
+			//
+			// We don't want to risk blindly continuing onward,
+			// so in the extremely rare cases this does happen,
+			// we abandon ship.
+			panic(fmt.Sprintf("Failed to map %v: %v", fr, err))
+		}
+		if _, err := safemem.ZeroSeq(ims); err != nil {
+			panic(fmt.Sprintf("Zeroing %v failed: %v", fr, err))
+		}
+	}
+}
diff --git a/pkg/sentry/fs/fsutil/file_range_set_impl.go b/pkg/sentry/fs/fsutil/file_range_set_impl.go
new file mode 100755
index 000000000..a0ab61628
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/file_range_set_impl.go
@@ -0,0 +1,1274 @@
+package fsutil
+
+import (
+	__generics_imported0 "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+)
+
+import (
+	"bytes"
+	"fmt"
+)
+
+const (
+	// minDegree is the minimum degree of an internal node in a Set B-tree.
+	//
+	// - Any non-root node has at least minDegree-1 segments.
+	//
+	// - Any non-root internal (non-leaf) node has at least minDegree children.
+	//
+	// - The root node may have fewer than minDegree-1 segments, but it may
+	// only have 0 segments if the tree is empty.
+	//
+	// Our implementation requires minDegree >= 3. Higher values of minDegree
+	// usually improve performance, but increase memory usage for small sets.
+	FileRangeminDegree = 3
+
+	FileRangemaxDegree = 2 * FileRangeminDegree
+)
+
+// A Set is a mapping of segments with non-overlapping Range keys. The zero
+// value for a Set is an empty set. Set values are not safely movable nor
+// copyable. Set is thread-compatible.
+//
+// +stateify savable
+type FileRangeSet struct {
+	root FileRangenode `state:".(*FileRangeSegmentDataSlices)"`
+}
+
+// IsEmpty returns true if the set contains no segments.
+func (s *FileRangeSet) IsEmpty() bool {
+	return s.root.nrSegments == 0
+}
+
+// IsEmptyRange returns true iff no segments in the set overlap the given
+// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be
+// more efficient.
+func (s *FileRangeSet) IsEmptyRange(r __generics_imported0.MappableRange) bool {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return true
+	}
+	_, gap := s.Find(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	return r.End <= gap.End()
+}
+
+// Span returns the total size of all segments in the set.
+func (s *FileRangeSet) Span() uint64 {
+	var sz uint64
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sz += seg.Range().Length()
+	}
+	return sz
+}
+
+// SpanRange returns the total size of the intersection of segments in the set
+// with the given range.
+func (s *FileRangeSet) SpanRange(r __generics_imported0.MappableRange) uint64 {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return 0
+	}
+	var sz uint64
+	for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() {
+		sz += seg.Range().Intersect(r).Length()
+	}
+	return sz
+}
+
+// FirstSegment returns the first segment in the set. If the set is empty,
+// FirstSegment returns a terminal iterator.
+func (s *FileRangeSet) FirstSegment() FileRangeIterator {
+	if s.root.nrSegments == 0 {
+		return FileRangeIterator{}
+	}
+	return s.root.firstSegment()
+}
+
+// LastSegment returns the last segment in the set. If the set is empty,
+// LastSegment returns a terminal iterator.
+func (s *FileRangeSet) LastSegment() FileRangeIterator {
+	if s.root.nrSegments == 0 {
+		return FileRangeIterator{}
+	}
+	return s.root.lastSegment()
+}
+
+// FirstGap returns the first gap in the set.
+func (s *FileRangeSet) FirstGap() FileRangeGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return FileRangeGapIterator{n, 0}
+}
+
+// LastGap returns the last gap in the set.
+func (s *FileRangeSet) LastGap() FileRangeGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return FileRangeGapIterator{n, n.nrSegments}
+}
+
+// Find returns the segment or gap whose range contains the given key. If a
+// segment is found, the returned Iterator is non-terminal and the
+// returned GapIterator is terminal. Otherwise, the returned Iterator is
+// terminal and the returned GapIterator is non-terminal.
+func (s *FileRangeSet) Find(key uint64) (FileRangeIterator, FileRangeGapIterator) {
+	n := &s.root
+	for {
+
+		lower := 0
+		upper := n.nrSegments
+		for lower < upper {
+			i := lower + (upper-lower)/2
+			if r := n.keys[i]; key < r.End {
+				if key >= r.Start {
+					return FileRangeIterator{n, i}, FileRangeGapIterator{}
+				}
+				upper = i
+			} else {
+				lower = i + 1
+			}
+		}
+		i := lower
+		if !n.hasChildren {
+			return FileRangeIterator{}, FileRangeGapIterator{n, i}
+		}
+		n = n.children[i]
+	}
+}
+
+// FindSegment returns the segment whose range contains the given key. If no
+// such segment exists, FindSegment returns a terminal iterator.
+func (s *FileRangeSet) FindSegment(key uint64) FileRangeIterator {
+	seg, _ := s.Find(key)
+	return seg
+}
+
+// LowerBoundSegment returns the segment with the lowest range that contains a
+// key greater than or equal to min. If no such segment exists,
+// LowerBoundSegment returns a terminal iterator.
+func (s *FileRangeSet) LowerBoundSegment(min uint64) FileRangeIterator {
+	seg, gap := s.Find(min)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.NextSegment()
+}
+
+// UpperBoundSegment returns the segment with the highest range that contains a
+// key less than or equal to max. If no such segment exists, UpperBoundSegment
+// returns a terminal iterator.
+func (s *FileRangeSet) UpperBoundSegment(max uint64) FileRangeIterator {
+	seg, gap := s.Find(max)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.PrevSegment()
+}
+
+// FindGap returns the gap containing the given key. If no such gap exists
+// (i.e. the set contains a segment containing that key), FindGap returns a
+// terminal iterator.
+func (s *FileRangeSet) FindGap(key uint64) FileRangeGapIterator {
+	_, gap := s.Find(key)
+	return gap
+}
+
+// LowerBoundGap returns the gap with the lowest range that is greater than or
+// equal to min.
+func (s *FileRangeSet) LowerBoundGap(min uint64) FileRangeGapIterator {
+	seg, gap := s.Find(min)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.NextGap()
+}
+
+// UpperBoundGap returns the gap with the highest range that is less than or
+// equal to max.
+func (s *FileRangeSet) UpperBoundGap(max uint64) FileRangeGapIterator {
+	seg, gap := s.Find(max)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.PrevGap()
+}
+
+// Add inserts the given segment into the set and returns true. If the new
+// segment can be merged with adjacent segments, Add will do so. If the new
+// segment would overlap an existing segment, Add returns false. If Add
+// succeeds, all existing iterators are invalidated.
+func (s *FileRangeSet) Add(r __generics_imported0.MappableRange, val uint64) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.Insert(gap, r, val)
+	return true
+}
+
+// AddWithoutMerging inserts the given segment into the set and returns true.
+// If it would overlap an existing segment, AddWithoutMerging does nothing and
+// returns false. If AddWithoutMerging succeeds, all existing iterators are
+// invalidated.
+func (s *FileRangeSet) AddWithoutMerging(r __generics_imported0.MappableRange, val uint64) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.InsertWithoutMergingUnchecked(gap, r, val)
+	return true
+}
+
+// Insert inserts the given segment into the given gap. If the new segment can
+// be merged with adjacent segments, Insert will do so. Insert returns an
+// iterator to the segment containing the inserted value (which may have been
+// merged with other values). All existing iterators (including gap, but not
+// including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid, Insert panics.
+//
+// Insert is semantically equivalent to a InsertWithoutMerging followed by a
+// Merge, but may be more efficient. Note that there is no unchecked variant of
+// Insert since Insert must retrieve and inspect gap's predecessor and
+// successor segments regardless.
+func (s *FileRangeSet) Insert(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	prev, next := gap.PrevSegment(), gap.NextSegment()
+	if prev.Ok() && prev.End() > r.Start {
+		panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range()))
+	}
+	if next.Ok() && next.Start() < r.End {
+		panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range()))
+	}
+	if prev.Ok() && prev.End() == r.Start {
+		if mval, ok := (fileRangeSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok {
+			prev.SetEndUnchecked(r.End)
+			prev.SetValue(mval)
+			if next.Ok() && next.Start() == r.End {
+				val = mval
+				if mval, ok := (fileRangeSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok {
+					prev.SetEndUnchecked(next.End())
+					prev.SetValue(mval)
+					return s.Remove(next).PrevSegment()
+				}
+			}
+			return prev
+		}
+	}
+	if next.Ok() && next.Start() == r.End {
+		if mval, ok := (fileRangeSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok {
+			next.SetStartUnchecked(r.Start)
+			next.SetValue(mval)
+			return next
+		}
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMerging inserts the given segment into the given gap and
+// returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid,
+// InsertWithoutMerging panics.
+func (s *FileRangeSet) InsertWithoutMerging(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if gr := gap.Range(); !gr.IsSupersetOf(r) {
+		panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr))
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMergingUnchecked inserts the given segment into the given gap
+// and returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+func (s *FileRangeSet) InsertWithoutMergingUnchecked(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator {
+	gap = gap.node.rebalanceBeforeInsert(gap)
+	copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments])
+	copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments])
+	gap.node.keys[gap.index] = r
+	gap.node.values[gap.index] = val
+	gap.node.nrSegments++
+	return FileRangeIterator{gap.node, gap.index}
+}
+
+// Remove removes the given segment and returns an iterator to the vacated gap.
+// All existing iterators (including seg, but not including the returned
+// iterator) are invalidated.
+func (s *FileRangeSet) Remove(seg FileRangeIterator) FileRangeGapIterator {
+
+	if seg.node.hasChildren {
+
+		victim := seg.PrevSegment()
+
+		seg.SetRangeUnchecked(victim.Range())
+		seg.SetValue(victim.Value())
+		return s.Remove(victim).NextGap()
+	}
+	copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments])
+	copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments])
+	fileRangeSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1])
+	seg.node.nrSegments--
+	return seg.node.rebalanceAfterRemove(FileRangeGapIterator{seg.node, seg.index})
+}
+
+// RemoveAll removes all segments from the set. All existing iterators are
+// invalidated.
+func (s *FileRangeSet) RemoveAll() {
+	s.root = FileRangenode{}
+}
+
+// RemoveRange removes all segments in the given range. An iterator to the
+// newly formed gap is returned, and all existing iterators are invalidated.
+func (s *FileRangeSet) RemoveRange(r __generics_imported0.MappableRange) FileRangeGapIterator {
+	seg, gap := s.Find(r.Start)
+	if seg.Ok() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	return gap
+}
+
+// Merge attempts to merge two neighboring segments. If successful, Merge
+// returns an iterator to the merged segment, and all existing iterators are
+// invalidated. Otherwise, Merge returns a terminal iterator.
+//
+// If first is not the predecessor of second, Merge panics.
+func (s *FileRangeSet) Merge(first, second FileRangeIterator) FileRangeIterator {
+	if first.NextSegment() != second {
+		panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range()))
+	}
+	return s.MergeUnchecked(first, second)
+}
+
+// MergeUnchecked attempts to merge two neighboring segments. If successful,
+// MergeUnchecked returns an iterator to the merged segment, and all existing
+// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal
+// iterator.
+//
+// Precondition: first is the predecessor of second: first.NextSegment() ==
+// second, first == second.PrevSegment().
+func (s *FileRangeSet) MergeUnchecked(first, second FileRangeIterator) FileRangeIterator {
+	if first.End() == second.Start() {
+		if mval, ok := (fileRangeSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok {
+
+			first.SetEndUnchecked(second.End())
+			first.SetValue(mval)
+			return s.Remove(second).PrevSegment()
+		}
+	}
+	return FileRangeIterator{}
+}
+
+// MergeAll attempts to merge all adjacent segments in the set. All existing
+// iterators are invalidated.
+func (s *FileRangeSet) MergeAll() {
+	seg := s.FirstSegment()
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeRange attempts to merge all adjacent segments that contain a key in the
+// specific range. All existing iterators are invalidated.
+func (s *FileRangeSet) MergeRange(r __generics_imported0.MappableRange) {
+	seg := s.LowerBoundSegment(r.Start)
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() && next.Range().Start < r.End {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeAdjacent attempts to merge the segment containing r.Start with its
+// predecessor, and the segment containing r.End-1 with its successor.
+func (s *FileRangeSet) MergeAdjacent(r __generics_imported0.MappableRange) {
+	first := s.FindSegment(r.Start)
+	if first.Ok() {
+		if prev := first.PrevSegment(); prev.Ok() {
+			s.Merge(prev, first)
+		}
+	}
+	last := s.FindSegment(r.End - 1)
+	if last.Ok() {
+		if next := last.NextSegment(); next.Ok() {
+			s.Merge(last, next)
+		}
+	}
+}
+
+// Split splits the given segment at the given key and returns iterators to the
+// two resulting segments. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+//
+// If the segment cannot be split at split (because split is at the start or
+// end of the segment's range, so splitting would produce a segment with zero
+// length, or because split falls outside the segment's range altogether),
+// Split panics.
+func (s *FileRangeSet) Split(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) {
+	if !seg.Range().CanSplitAt(split) {
+		panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split))
+	}
+	return s.SplitUnchecked(seg, split)
+}
+
+// SplitUnchecked splits the given segment at the given key and returns
+// iterators to the two resulting segments. All existing iterators (including
+// seg, but not including the returned iterators) are invalidated.
+//
+// Preconditions: seg.Start() < key < seg.End().
+func (s *FileRangeSet) SplitUnchecked(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) {
+	val1, val2 := (fileRangeSetFunctions{}).Split(seg.Range(), seg.Value(), split)
+	end2 := seg.End()
+	seg.SetEndUnchecked(split)
+	seg.SetValue(val1)
+	seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2)
+
+	return seg2.PrevSegment(), seg2
+}
+
+// SplitAt splits the segment straddling split, if one exists. SplitAt returns
+// true if a segment was split and false otherwise. If SplitAt splits a
+// segment, all existing iterators are invalidated.
+func (s *FileRangeSet) SplitAt(split uint64) bool {
+	if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) {
+		s.SplitUnchecked(seg, split)
+		return true
+	}
+	return false
+}
+
+// Isolate ensures that the given segment's range does not escape r by
+// splitting at r.Start and r.End if necessary, and returns an updated iterator
+// to the bounded segment. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+func (s *FileRangeSet) Isolate(seg FileRangeIterator, r __generics_imported0.MappableRange) FileRangeIterator {
+	if seg.Range().CanSplitAt(r.Start) {
+		_, seg = s.SplitUnchecked(seg, r.Start)
+	}
+	if seg.Range().CanSplitAt(r.End) {
+		seg, _ = s.SplitUnchecked(seg, r.End)
+	}
+	return seg
+}
+
+// ApplyContiguous applies a function to a contiguous range of segments,
+// splitting if necessary. The function is applied until the first gap is
+// encountered, at which point the gap is returned. If the function is applied
+// across the entire range, a terminal gap is returned. All existing iterators
+// are invalidated.
+//
+// N.B. The Iterator must not be invalidated by the function.
+func (s *FileRangeSet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg FileRangeIterator)) FileRangeGapIterator {
+	seg, gap := s.Find(r.Start)
+	if !seg.Ok() {
+		return gap
+	}
+	for {
+		seg = s.Isolate(seg, r)
+		fn(seg)
+		if seg.End() >= r.End {
+			return FileRangeGapIterator{}
+		}
+		gap = seg.NextGap()
+		if !gap.IsEmpty() {
+			return gap
+		}
+		seg = gap.NextSegment()
+		if !seg.Ok() {
+
+			return FileRangeGapIterator{}
+		}
+	}
+}
+
+// +stateify savable
+type FileRangenode struct {
+	// An internal binary tree node looks like:
+	//
+	//   K
+	//  / \
+	// Cl Cr
+	//
+	// where all keys in the subtree rooted by Cl (the left subtree) are less
+	// than K (the key of the parent node), and all keys in the subtree rooted
+	// by Cr (the right subtree) are greater than K.
+	//
+	// An internal B-tree node's indexes work out to look like:
+	//
+	//   K0 K1 K2  ...   Kn-1
+	//  / \/ \/ \  ...  /  \
+	// C0 C1 C2 C3 ... Cn-1 Cn
+	//
+	// where n is nrSegments.
+	nrSegments int
+
+	// parent is a pointer to this node's parent. If this node is root, parent
+	// is nil.
+	parent *FileRangenode
+
+	// parentIndex is the index of this node in parent.children.
+	parentIndex int
+
+	// Flag for internal nodes that is technically redundant with "children[0]
+	// != nil", but is stored in the first cache line. "hasChildren" rather
+	// than "isLeaf" because false must be the correct value for an empty root.
+	hasChildren bool
+
+	// Nodes store keys and values in separate arrays to maximize locality in
+	// the common case (scanning keys for lookup).
+	keys     [FileRangemaxDegree - 1]__generics_imported0.MappableRange
+	values   [FileRangemaxDegree - 1]uint64
+	children [FileRangemaxDegree]*FileRangenode
+}
+
+// firstSegment returns the first segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *FileRangenode) firstSegment() FileRangeIterator {
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return FileRangeIterator{n, 0}
+}
+
+// lastSegment returns the last segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *FileRangenode) lastSegment() FileRangeIterator {
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return FileRangeIterator{n, n.nrSegments - 1}
+}
+
+func (n *FileRangenode) prevSibling() *FileRangenode {
+	if n.parent == nil || n.parentIndex == 0 {
+		return nil
+	}
+	return n.parent.children[n.parentIndex-1]
+}
+
+func (n *FileRangenode) nextSibling() *FileRangenode {
+	if n.parent == nil || n.parentIndex == n.parent.nrSegments {
+		return nil
+	}
+	return n.parent.children[n.parentIndex+1]
+}
+
+// rebalanceBeforeInsert splits n and its ancestors if they are full, as
+// required for insertion, and returns an updated iterator to the position
+// represented by gap.
+func (n *FileRangenode) rebalanceBeforeInsert(gap FileRangeGapIterator) FileRangeGapIterator {
+	if n.parent != nil {
+		gap = n.parent.rebalanceBeforeInsert(gap)
+	}
+	if n.nrSegments < FileRangemaxDegree-1 {
+		return gap
+	}
+	if n.parent == nil {
+
+		left := &FileRangenode{
+			nrSegments:  FileRangeminDegree - 1,
+			parent:      n,
+			parentIndex: 0,
+			hasChildren: n.hasChildren,
+		}
+		right := &FileRangenode{
+			nrSegments:  FileRangeminDegree - 1,
+			parent:      n,
+			parentIndex: 1,
+			hasChildren: n.hasChildren,
+		}
+		copy(left.keys[:FileRangeminDegree-1], n.keys[:FileRangeminDegree-1])
+		copy(left.values[:FileRangeminDegree-1], n.values[:FileRangeminDegree-1])
+		copy(right.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:])
+		copy(right.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:])
+		n.keys[0], n.values[0] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1]
+		FileRangezeroValueSlice(n.values[1:])
+		if n.hasChildren {
+			copy(left.children[:FileRangeminDegree], n.children[:FileRangeminDegree])
+			copy(right.children[:FileRangeminDegree], n.children[FileRangeminDegree:])
+			FileRangezeroNodeSlice(n.children[2:])
+			for i := 0; i < FileRangeminDegree; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+				right.children[i].parent = right
+				right.children[i].parentIndex = i
+			}
+		}
+		n.nrSegments = 1
+		n.hasChildren = true
+		n.children[0] = left
+		n.children[1] = right
+		if gap.node != n {
+			return gap
+		}
+		if gap.index < FileRangeminDegree {
+			return FileRangeGapIterator{left, gap.index}
+		}
+		return FileRangeGapIterator{right, gap.index - FileRangeminDegree}
+	}
+
+	copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments])
+	copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments])
+	n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1]
+	copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1])
+	for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ {
+		n.parent.children[i].parentIndex = i
+	}
+	sibling := &FileRangenode{
+		nrSegments:  FileRangeminDegree - 1,
+		parent:      n.parent,
+		parentIndex: n.parentIndex + 1,
+		hasChildren: n.hasChildren,
+	}
+	n.parent.children[n.parentIndex+1] = sibling
+	n.parent.nrSegments++
+	copy(sibling.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:])
+	copy(sibling.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:])
+	FileRangezeroValueSlice(n.values[FileRangeminDegree-1:])
+	if n.hasChildren {
+		copy(sibling.children[:FileRangeminDegree], n.children[FileRangeminDegree:])
+		FileRangezeroNodeSlice(n.children[FileRangeminDegree:])
+		for i := 0; i < FileRangeminDegree; i++ {
+			sibling.children[i].parent = sibling
+			sibling.children[i].parentIndex = i
+		}
+	}
+	n.nrSegments = FileRangeminDegree - 1
+
+	if gap.node != n {
+		return gap
+	}
+	if gap.index < FileRangeminDegree {
+		return gap
+	}
+	return FileRangeGapIterator{sibling, gap.index - FileRangeminDegree}
+}
+
+// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient
+// (contain fewer segments than required by B-tree invariants), as required for
+// removal, and returns an updated iterator to the position represented by gap.
+//
+// Precondition: n is the only node in the tree that may currently violate a
+// B-tree invariant.
+func (n *FileRangenode) rebalanceAfterRemove(gap FileRangeGapIterator) FileRangeGapIterator {
+	for {
+		if n.nrSegments >= FileRangeminDegree-1 {
+			return gap
+		}
+		if n.parent == nil {
+
+			return gap
+		}
+
+		if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree {
+			copy(n.keys[1:], n.keys[:n.nrSegments])
+			copy(n.values[1:], n.values[:n.nrSegments])
+			n.keys[0] = n.parent.keys[n.parentIndex-1]
+			n.values[0] = n.parent.values[n.parentIndex-1]
+			n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1]
+			n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1]
+			fileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				copy(n.children[1:], n.children[:n.nrSegments+1])
+				n.children[0] = sibling.children[sibling.nrSegments]
+				sibling.children[sibling.nrSegments] = nil
+				n.children[0].parent = n
+				n.children[0].parentIndex = 0
+				for i := 1; i < n.nrSegments+2; i++ {
+					n.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling && gap.index == sibling.nrSegments {
+				return FileRangeGapIterator{n, 0}
+			}
+			if gap.node == n {
+				return FileRangeGapIterator{n, gap.index + 1}
+			}
+			return gap
+		}
+		if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree {
+			n.keys[n.nrSegments] = n.parent.keys[n.parentIndex]
+			n.values[n.nrSegments] = n.parent.values[n.parentIndex]
+			n.parent.keys[n.parentIndex] = sibling.keys[0]
+			n.parent.values[n.parentIndex] = sibling.values[0]
+			copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:])
+			copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:])
+			fileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				n.children[n.nrSegments+1] = sibling.children[0]
+				copy(sibling.children[:sibling.nrSegments], sibling.children[1:])
+				sibling.children[sibling.nrSegments] = nil
+				n.children[n.nrSegments+1].parent = n
+				n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1
+				for i := 0; i < sibling.nrSegments; i++ {
+					sibling.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling {
+				if gap.index == 0 {
+					return FileRangeGapIterator{n, n.nrSegments}
+				}
+				return FileRangeGapIterator{sibling, gap.index - 1}
+			}
+			return gap
+		}
+
+		p := n.parent
+		if p.nrSegments == 1 {
+
+			left, right := p.children[0], p.children[1]
+			p.nrSegments = left.nrSegments + right.nrSegments + 1
+			p.hasChildren = left.hasChildren
+			p.keys[left.nrSegments] = p.keys[0]
+			p.values[left.nrSegments] = p.values[0]
+			copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments])
+			copy(p.values[:left.nrSegments], left.values[:left.nrSegments])
+			copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+			copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments])
+			if left.hasChildren {
+				copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1])
+				copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+				for i := 0; i < p.nrSegments+1; i++ {
+					p.children[i].parent = p
+					p.children[i].parentIndex = i
+				}
+			} else {
+				p.children[0] = nil
+				p.children[1] = nil
+			}
+			if gap.node == left {
+				return FileRangeGapIterator{p, gap.index}
+			}
+			if gap.node == right {
+				return FileRangeGapIterator{p, gap.index + left.nrSegments + 1}
+			}
+			return gap
+		}
+		// Merge n and either sibling, along with the segment separating the
+		// two, into whichever of the two nodes comes first. This is the
+		// reverse of the non-root splitting case in
+		// node.rebalanceBeforeInsert.
+		var left, right *FileRangenode
+		if n.parentIndex > 0 {
+			left = n.prevSibling()
+			right = n
+		} else {
+			left = n
+			right = n.nextSibling()
+		}
+
+		if gap.node == right {
+			gap = FileRangeGapIterator{left, gap.index + left.nrSegments + 1}
+		}
+		left.keys[left.nrSegments] = p.keys[left.parentIndex]
+		left.values[left.nrSegments] = p.values[left.parentIndex]
+		copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+		copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments])
+		if left.hasChildren {
+			copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+			for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+			}
+		}
+		left.nrSegments += right.nrSegments + 1
+		copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments])
+		copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments])
+		fileRangeSetFunctions{}.ClearValue(&p.values[p.nrSegments-1])
+		copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1])
+		for i := 0; i < p.nrSegments; i++ {
+			p.children[i].parentIndex = i
+		}
+		p.children[p.nrSegments] = nil
+		p.nrSegments--
+
+		n = p
+	}
+}
+
+// A Iterator is conceptually one of:
+//
+// - A pointer to a segment in a set; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Iterators are copyable values and are meaningfully equality-comparable. The
+// zero value of Iterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type FileRangeIterator struct {
+	// node is the node containing the iterated segment. If the iterator is
+	// terminal, node is nil.
+	node *FileRangenode
+
+	// index is the index of the segment in node.keys/values.
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (seg FileRangeIterator) Ok() bool {
+	return seg.node != nil
+}
+
+// Range returns the iterated segment's range key.
+func (seg FileRangeIterator) Range() __generics_imported0.MappableRange {
+	return seg.node.keys[seg.index]
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (seg FileRangeIterator) Start() uint64 {
+	return seg.node.keys[seg.index].Start
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (seg FileRangeIterator) End() uint64 {
+	return seg.node.keys[seg.index].End
+}
+
+// SetRangeUnchecked mutates the iterated segment's range key. This operation
+// does not invalidate any iterators.
+//
+// Preconditions:
+//
+// - r.Length() > 0.
+//
+// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
+// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
+// r.start >= seg.PrevSegment().End().
+func (seg FileRangeIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) {
+	seg.node.keys[seg.index] = r
+}
+
+// SetRange mutates the iterated segment's range key. If the new range would
+// cause the iterated segment to overlap another segment, or if the new range
+// is invalid, SetRange panics. This operation does not invalidate any
+// iterators.
+func (seg FileRangeIterator) SetRange(r __generics_imported0.MappableRange) {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && r.End > next.Start() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range()))
+	}
+	seg.SetRangeUnchecked(r)
+}
+
+// SetStartUnchecked mutates the iterated segment's start. This operation does
+// not invalidate any iterators.
+//
+// Preconditions: The new start must be valid: start < seg.End(); if
+// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+func (seg FileRangeIterator) SetStartUnchecked(start uint64) {
+	seg.node.keys[seg.index].Start = start
+}
+
+// SetStart mutates the iterated segment's start. If the new start value would
+// cause the iterated segment to overlap another segment, or would result in an
+// invalid range, SetStart panics. This operation does not invalidate any
+// iterators.
+func (seg FileRangeIterator) SetStart(start uint64) {
+	if start >= seg.End() {
+		panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range()))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() {
+		panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range()))
+	}
+	seg.SetStartUnchecked(start)
+}
+
+// SetEndUnchecked mutates the iterated segment's end. This operation does not
+// invalidate any iterators.
+//
+// Preconditions: The new end must be valid: end > seg.Start(); if
+// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+func (seg FileRangeIterator) SetEndUnchecked(end uint64) {
+	seg.node.keys[seg.index].End = end
+}
+
+// SetEnd mutates the iterated segment's end. If the new end value would cause
+// the iterated segment to overlap another segment, or would result in an
+// invalid range, SetEnd panics. This operation does not invalidate any
+// iterators.
+func (seg FileRangeIterator) SetEnd(end uint64) {
+	if end <= seg.Start() {
+		panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && end > next.Start() {
+		panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range()))
+	}
+	seg.SetEndUnchecked(end)
+}
+
+// Value returns a copy of the iterated segment's value.
+func (seg FileRangeIterator) Value() uint64 {
+	return seg.node.values[seg.index]
+}
+
+// ValuePtr returns a pointer to the iterated segment's value. The pointer is
+// invalidated if the iterator is invalidated. This operation does not
+// invalidate any iterators.
+func (seg FileRangeIterator) ValuePtr() *uint64 {
+	return &seg.node.values[seg.index]
+}
+
+// SetValue mutates the iterated segment's value. This operation does not
+// invalidate any iterators.
+func (seg FileRangeIterator) SetValue(val uint64) {
+	seg.node.values[seg.index] = val
+}
+
+// PrevSegment returns the iterated segment's predecessor. If there is no
+// preceding segment, PrevSegment returns a terminal iterator.
+func (seg FileRangeIterator) PrevSegment() FileRangeIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index].lastSegment()
+	}
+	if seg.index > 0 {
+		return FileRangeIterator{seg.node, seg.index - 1}
+	}
+	if seg.node.parent == nil {
+		return FileRangeIterator{}
+	}
+	return FileRangesegmentBeforePosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// NextSegment returns the iterated segment's successor. If there is no
+// succeeding segment, NextSegment returns a terminal iterator.
+func (seg FileRangeIterator) NextSegment() FileRangeIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment()
+	}
+	if seg.index < seg.node.nrSegments-1 {
+		return FileRangeIterator{seg.node, seg.index + 1}
+	}
+	if seg.node.parent == nil {
+		return FileRangeIterator{}
+	}
+	return FileRangesegmentAfterPosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// PrevGap returns the gap immediately before the iterated segment.
+func (seg FileRangeIterator) PrevGap() FileRangeGapIterator {
+	if seg.node.hasChildren {
+
+		return seg.node.children[seg.index].lastSegment().NextGap()
+	}
+	return FileRangeGapIterator{seg.node, seg.index}
+}
+
+// NextGap returns the gap immediately after the iterated segment.
+func (seg FileRangeIterator) NextGap() FileRangeGapIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment().PrevGap()
+	}
+	return FileRangeGapIterator{seg.node, seg.index + 1}
+}
+
+// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent,
+// or the gap before the iterated segment otherwise. If seg.Start() ==
+// Functions.MinKey(), PrevNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be
+// non-terminal.
+func (seg FileRangeIterator) PrevNonEmpty() (FileRangeIterator, FileRangeGapIterator) {
+	gap := seg.PrevGap()
+	if gap.Range().Length() != 0 {
+		return FileRangeIterator{}, gap
+	}
+	return gap.PrevSegment(), FileRangeGapIterator{}
+}
+
+// NextNonEmpty returns the iterated segment's successor if it is adjacent, or
+// the gap after the iterated segment otherwise. If seg.End() ==
+// Functions.MaxKey(), NextNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by NextNonEmpty will be
+// non-terminal.
+func (seg FileRangeIterator) NextNonEmpty() (FileRangeIterator, FileRangeGapIterator) {
+	gap := seg.NextGap()
+	if gap.Range().Length() != 0 {
+		return FileRangeIterator{}, gap
+	}
+	return gap.NextSegment(), FileRangeGapIterator{}
+}
+
+// A GapIterator is conceptually one of:
+//
+// - A pointer to a position between two segments, before the first segment, or
+// after the last segment in a set, called a *gap*; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Note that the gap between two adjacent segments exists (iterators to it are
+// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true
+// for such gaps. An empty set contains a single gap, spanning the entire range
+// of the set's keys.
+//
+// GapIterators are copyable values and are meaningfully equality-comparable.
+// The zero value of GapIterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type FileRangeGapIterator struct {
+	// The representation of a GapIterator is identical to that of an Iterator,
+	// except that index corresponds to positions between segments in the same
+	// way as for node.children (see comment for node.nrSegments).
+	node  *FileRangenode
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (gap FileRangeGapIterator) Ok() bool {
+	return gap.node != nil
+}
+
+// Range returns the range spanned by the iterated gap.
+func (gap FileRangeGapIterator) Range() __generics_imported0.MappableRange {
+	return __generics_imported0.MappableRange{gap.Start(), gap.End()}
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (gap FileRangeGapIterator) Start() uint64 {
+	if ps := gap.PrevSegment(); ps.Ok() {
+		return ps.End()
+	}
+	return fileRangeSetFunctions{}.MinKey()
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (gap FileRangeGapIterator) End() uint64 {
+	if ns := gap.NextSegment(); ns.Ok() {
+		return ns.Start()
+	}
+	return fileRangeSetFunctions{}.MaxKey()
+}
+
+// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is
+// between two adjacent segments.)
+func (gap FileRangeGapIterator) IsEmpty() bool {
+	return gap.Range().Length() == 0
+}
+
+// PrevSegment returns the segment immediately before the iterated gap. If no
+// such segment exists, PrevSegment returns a terminal iterator.
+func (gap FileRangeGapIterator) PrevSegment() FileRangeIterator {
+	return FileRangesegmentBeforePosition(gap.node, gap.index)
+}
+
+// NextSegment returns the segment immediately after the iterated gap. If no
+// such segment exists, NextSegment returns a terminal iterator.
+func (gap FileRangeGapIterator) NextSegment() FileRangeIterator {
+	return FileRangesegmentAfterPosition(gap.node, gap.index)
+}
+
+// PrevGap returns the iterated gap's predecessor. If no such gap exists,
+// PrevGap returns a terminal iterator.
+func (gap FileRangeGapIterator) PrevGap() FileRangeGapIterator {
+	seg := gap.PrevSegment()
+	if !seg.Ok() {
+		return FileRangeGapIterator{}
+	}
+	return seg.PrevGap()
+}
+
+// NextGap returns the iterated gap's successor. If no such gap exists, NextGap
+// returns a terminal iterator.
+func (gap FileRangeGapIterator) NextGap() FileRangeGapIterator {
+	seg := gap.NextSegment()
+	if !seg.Ok() {
+		return FileRangeGapIterator{}
+	}
+	return seg.NextGap()
+}
+
+// segmentBeforePosition returns the predecessor segment of the position given
+// by n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentBeforePosition returns a terminal iterator.
+func FileRangesegmentBeforePosition(n *FileRangenode, i int) FileRangeIterator {
+	for i == 0 {
+		if n.parent == nil {
+			return FileRangeIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return FileRangeIterator{n, i - 1}
+}
+
+// segmentAfterPosition returns the successor segment of the position given by
+// n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentAfterPosition returns a terminal iterator.
+func FileRangesegmentAfterPosition(n *FileRangenode, i int) FileRangeIterator {
+	for i == n.nrSegments {
+		if n.parent == nil {
+			return FileRangeIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return FileRangeIterator{n, i}
+}
+
+func FileRangezeroValueSlice(slice []uint64) {
+
+	for i := range slice {
+		fileRangeSetFunctions{}.ClearValue(&slice[i])
+	}
+}
+
+func FileRangezeroNodeSlice(slice []*FileRangenode) {
+	for i := range slice {
+		slice[i] = nil
+	}
+}
+
+// String stringifies a Set for debugging.
+func (s *FileRangeSet) String() string {
+	return s.root.String()
+}
+
+// String stringifes a node (and all of its children) for debugging.
+func (n *FileRangenode) String() string {
+	var buf bytes.Buffer
+	n.writeDebugString(&buf, "")
+	return buf.String()
+}
+
+func (n *FileRangenode) writeDebugString(buf *bytes.Buffer, prefix string) {
+	if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) {
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren))
+	}
+	for i := 0; i < n.nrSegments; i++ {
+		if child := n.children[i]; child != nil {
+			cprefix := fmt.Sprintf("%s- % 3d ", prefix, i)
+			if child.parent != n || child.parentIndex != i {
+				buf.WriteString(cprefix)
+				buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i))
+			}
+			child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i))
+		}
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i]))
+	}
+	if child := n.children[n.nrSegments]; child != nil {
+		child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments))
+	}
+}
+
+// SegmentDataSlices represents segments from a set as slices of start, end, and
+// values. SegmentDataSlices is primarily used as an intermediate representation
+// for save/restore and the layout here is optimized for that.
+//
+// +stateify savable
+type FileRangeSegmentDataSlices struct {
+	Start  []uint64
+	End    []uint64
+	Values []uint64
+}
+
+// ExportSortedSlice returns a copy of all segments in the given set, in ascending
+// key order.
+func (s *FileRangeSet) ExportSortedSlices() *FileRangeSegmentDataSlices {
+	var sds FileRangeSegmentDataSlices
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sds.Start = append(sds.Start, seg.Start())
+		sds.End = append(sds.End, seg.End())
+		sds.Values = append(sds.Values, seg.Value())
+	}
+	sds.Start = sds.Start[:len(sds.Start):len(sds.Start)]
+	sds.End = sds.End[:len(sds.End):len(sds.End)]
+	sds.Values = sds.Values[:len(sds.Values):len(sds.Values)]
+	return &sds
+}
+
+// ImportSortedSlice initializes the given set from the given slice.
+//
+// Preconditions: s must be empty. sds must represent a valid set (the segments
+// in sds must have valid lengths that do not overlap). The segments in sds
+// must be sorted in ascending key order.
+func (s *FileRangeSet) ImportSortedSlices(sds *FileRangeSegmentDataSlices) error {
+	if !s.IsEmpty() {
+		return fmt.Errorf("cannot import into non-empty set %v", s)
+	}
+	gap := s.FirstGap()
+	for i := range sds.Start {
+		r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]}
+		if !gap.Range().IsSupersetOf(r) {
+			return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i])
+		}
+		gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap()
+	}
+	return nil
+}
+func (s *FileRangeSet) saveRoot() *FileRangeSegmentDataSlices {
+	return s.ExportSortedSlices()
+}
+
+func (s *FileRangeSet) loadRoot(sds *FileRangeSegmentDataSlices) {
+	if err := s.ImportSortedSlices(sds); err != nil {
+		panic(err)
+	}
+}
diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go
new file mode 100644
index 000000000..6565c28c8
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/frame_ref_set.go
@@ -0,0 +1,50 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+	"math"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+)
+
+type frameRefSetFunctions struct{}
+
+// MinKey implements segment.Functions.MinKey.
+func (frameRefSetFunctions) MinKey() uint64 {
+	return 0
+}
+
+// MaxKey implements segment.Functions.MaxKey.
+func (frameRefSetFunctions) MaxKey() uint64 {
+	return math.MaxUint64
+}
+
+// ClearValue implements segment.Functions.ClearValue.
+func (frameRefSetFunctions) ClearValue(val *uint64) {
+}
+
+// Merge implements segment.Functions.Merge.
+func (frameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) {
+	if val1 != val2 {
+		return 0, false
+	}
+	return val1, true
+}
+
+// Split implements segment.Functions.Split.
+func (frameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) {
+	return val, val
+}
diff --git a/pkg/sentry/fs/fsutil/frame_ref_set_impl.go b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go
new file mode 100755
index 000000000..2f858f419
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go
@@ -0,0 +1,1274 @@
+package fsutil
+
+import (
+	__generics_imported0 "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+)
+
+import (
+	"bytes"
+	"fmt"
+)
+
+const (
+	// minDegree is the minimum degree of an internal node in a Set B-tree.
+	//
+	// - Any non-root node has at least minDegree-1 segments.
+	//
+	// - Any non-root internal (non-leaf) node has at least minDegree children.
+	//
+	// - The root node may have fewer than minDegree-1 segments, but it may
+	// only have 0 segments if the tree is empty.
+	//
+	// Our implementation requires minDegree >= 3. Higher values of minDegree
+	// usually improve performance, but increase memory usage for small sets.
+	frameRefminDegree = 3
+
+	frameRefmaxDegree = 2 * frameRefminDegree
+)
+
+// A Set is a mapping of segments with non-overlapping Range keys. The zero
+// value for a Set is an empty set. Set values are not safely movable nor
+// copyable. Set is thread-compatible.
+//
+// +stateify savable
+type frameRefSet struct {
+	root frameRefnode `state:".(*frameRefSegmentDataSlices)"`
+}
+
+// IsEmpty returns true if the set contains no segments.
+func (s *frameRefSet) IsEmpty() bool {
+	return s.root.nrSegments == 0
+}
+
+// IsEmptyRange returns true iff no segments in the set overlap the given
+// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be
+// more efficient.
+func (s *frameRefSet) IsEmptyRange(r __generics_imported0.FileRange) bool {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return true
+	}
+	_, gap := s.Find(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	return r.End <= gap.End()
+}
+
+// Span returns the total size of all segments in the set.
+func (s *frameRefSet) Span() uint64 {
+	var sz uint64
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sz += seg.Range().Length()
+	}
+	return sz
+}
+
+// SpanRange returns the total size of the intersection of segments in the set
+// with the given range.
+func (s *frameRefSet) SpanRange(r __generics_imported0.FileRange) uint64 {
+	switch {
+	case r.Length() < 0:
+		panic(fmt.Sprintf("invalid range %v", r))
+	case r.Length() == 0:
+		return 0
+	}
+	var sz uint64
+	for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() {
+		sz += seg.Range().Intersect(r).Length()
+	}
+	return sz
+}
+
+// FirstSegment returns the first segment in the set. If the set is empty,
+// FirstSegment returns a terminal iterator.
+func (s *frameRefSet) FirstSegment() frameRefIterator {
+	if s.root.nrSegments == 0 {
+		return frameRefIterator{}
+	}
+	return s.root.firstSegment()
+}
+
+// LastSegment returns the last segment in the set. If the set is empty,
+// LastSegment returns a terminal iterator.
+func (s *frameRefSet) LastSegment() frameRefIterator {
+	if s.root.nrSegments == 0 {
+		return frameRefIterator{}
+	}
+	return s.root.lastSegment()
+}
+
+// FirstGap returns the first gap in the set.
+func (s *frameRefSet) FirstGap() frameRefGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return frameRefGapIterator{n, 0}
+}
+
+// LastGap returns the last gap in the set.
+func (s *frameRefSet) LastGap() frameRefGapIterator {
+	n := &s.root
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return frameRefGapIterator{n, n.nrSegments}
+}
+
+// Find returns the segment or gap whose range contains the given key. If a
+// segment is found, the returned Iterator is non-terminal and the
+// returned GapIterator is terminal. Otherwise, the returned Iterator is
+// terminal and the returned GapIterator is non-terminal.
+func (s *frameRefSet) Find(key uint64) (frameRefIterator, frameRefGapIterator) {
+	n := &s.root
+	for {
+
+		lower := 0
+		upper := n.nrSegments
+		for lower < upper {
+			i := lower + (upper-lower)/2
+			if r := n.keys[i]; key < r.End {
+				if key >= r.Start {
+					return frameRefIterator{n, i}, frameRefGapIterator{}
+				}
+				upper = i
+			} else {
+				lower = i + 1
+			}
+		}
+		i := lower
+		if !n.hasChildren {
+			return frameRefIterator{}, frameRefGapIterator{n, i}
+		}
+		n = n.children[i]
+	}
+}
+
+// FindSegment returns the segment whose range contains the given key. If no
+// such segment exists, FindSegment returns a terminal iterator.
+func (s *frameRefSet) FindSegment(key uint64) frameRefIterator {
+	seg, _ := s.Find(key)
+	return seg
+}
+
+// LowerBoundSegment returns the segment with the lowest range that contains a
+// key greater than or equal to min. If no such segment exists,
+// LowerBoundSegment returns a terminal iterator.
+func (s *frameRefSet) LowerBoundSegment(min uint64) frameRefIterator {
+	seg, gap := s.Find(min)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.NextSegment()
+}
+
+// UpperBoundSegment returns the segment with the highest range that contains a
+// key less than or equal to max. If no such segment exists, UpperBoundSegment
+// returns a terminal iterator.
+func (s *frameRefSet) UpperBoundSegment(max uint64) frameRefIterator {
+	seg, gap := s.Find(max)
+	if seg.Ok() {
+		return seg
+	}
+	return gap.PrevSegment()
+}
+
+// FindGap returns the gap containing the given key. If no such gap exists
+// (i.e. the set contains a segment containing that key), FindGap returns a
+// terminal iterator.
+func (s *frameRefSet) FindGap(key uint64) frameRefGapIterator {
+	_, gap := s.Find(key)
+	return gap
+}
+
+// LowerBoundGap returns the gap with the lowest range that is greater than or
+// equal to min.
+func (s *frameRefSet) LowerBoundGap(min uint64) frameRefGapIterator {
+	seg, gap := s.Find(min)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.NextGap()
+}
+
+// UpperBoundGap returns the gap with the highest range that is less than or
+// equal to max.
+func (s *frameRefSet) UpperBoundGap(max uint64) frameRefGapIterator {
+	seg, gap := s.Find(max)
+	if gap.Ok() {
+		return gap
+	}
+	return seg.PrevGap()
+}
+
+// Add inserts the given segment into the set and returns true. If the new
+// segment can be merged with adjacent segments, Add will do so. If the new
+// segment would overlap an existing segment, Add returns false. If Add
+// succeeds, all existing iterators are invalidated.
+func (s *frameRefSet) Add(r __generics_imported0.FileRange, val uint64) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.Insert(gap, r, val)
+	return true
+}
+
+// AddWithoutMerging inserts the given segment into the set and returns true.
+// If it would overlap an existing segment, AddWithoutMerging does nothing and
+// returns false. If AddWithoutMerging succeeds, all existing iterators are
+// invalidated.
+func (s *frameRefSet) AddWithoutMerging(r __generics_imported0.FileRange, val uint64) bool {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	gap := s.FindGap(r.Start)
+	if !gap.Ok() {
+		return false
+	}
+	if r.End > gap.End() {
+		return false
+	}
+	s.InsertWithoutMergingUnchecked(gap, r, val)
+	return true
+}
+
+// Insert inserts the given segment into the given gap. If the new segment can
+// be merged with adjacent segments, Insert will do so. Insert returns an
+// iterator to the segment containing the inserted value (which may have been
+// merged with other values). All existing iterators (including gap, but not
+// including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid, Insert panics.
+//
+// Insert is semantically equivalent to a InsertWithoutMerging followed by a
+// Merge, but may be more efficient. Note that there is no unchecked variant of
+// Insert since Insert must retrieve and inspect gap's predecessor and
+// successor segments regardless.
+func (s *frameRefSet) Insert(gap frameRefGapIterator, r __generics_imported0.FileRange, val uint64) frameRefIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	prev, next := gap.PrevSegment(), gap.NextSegment()
+	if prev.Ok() && prev.End() > r.Start {
+		panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range()))
+	}
+	if next.Ok() && next.Start() < r.End {
+		panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range()))
+	}
+	if prev.Ok() && prev.End() == r.Start {
+		if mval, ok := (frameRefSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok {
+			prev.SetEndUnchecked(r.End)
+			prev.SetValue(mval)
+			if next.Ok() && next.Start() == r.End {
+				val = mval
+				if mval, ok := (frameRefSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok {
+					prev.SetEndUnchecked(next.End())
+					prev.SetValue(mval)
+					return s.Remove(next).PrevSegment()
+				}
+			}
+			return prev
+		}
+	}
+	if next.Ok() && next.Start() == r.End {
+		if mval, ok := (frameRefSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok {
+			next.SetStartUnchecked(r.Start)
+			next.SetValue(mval)
+			return next
+		}
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMerging inserts the given segment into the given gap and
+// returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid,
+// InsertWithoutMerging panics.
+func (s *frameRefSet) InsertWithoutMerging(gap frameRefGapIterator, r __generics_imported0.FileRange, val uint64) frameRefIterator {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if gr := gap.Range(); !gr.IsSupersetOf(r) {
+		panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr))
+	}
+	return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMergingUnchecked inserts the given segment into the given gap
+// and returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+func (s *frameRefSet) InsertWithoutMergingUnchecked(gap frameRefGapIterator, r __generics_imported0.FileRange, val uint64) frameRefIterator {
+	gap = gap.node.rebalanceBeforeInsert(gap)
+	copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments])
+	copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments])
+	gap.node.keys[gap.index] = r
+	gap.node.values[gap.index] = val
+	gap.node.nrSegments++
+	return frameRefIterator{gap.node, gap.index}
+}
+
+// Remove removes the given segment and returns an iterator to the vacated gap.
+// All existing iterators (including seg, but not including the returned
+// iterator) are invalidated.
+func (s *frameRefSet) Remove(seg frameRefIterator) frameRefGapIterator {
+
+	if seg.node.hasChildren {
+
+		victim := seg.PrevSegment()
+
+		seg.SetRangeUnchecked(victim.Range())
+		seg.SetValue(victim.Value())
+		return s.Remove(victim).NextGap()
+	}
+	copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments])
+	copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments])
+	frameRefSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1])
+	seg.node.nrSegments--
+	return seg.node.rebalanceAfterRemove(frameRefGapIterator{seg.node, seg.index})
+}
+
+// RemoveAll removes all segments from the set. All existing iterators are
+// invalidated.
+func (s *frameRefSet) RemoveAll() {
+	s.root = frameRefnode{}
+}
+
+// RemoveRange removes all segments in the given range. An iterator to the
+// newly formed gap is returned, and all existing iterators are invalidated.
+func (s *frameRefSet) RemoveRange(r __generics_imported0.FileRange) frameRefGapIterator {
+	seg, gap := s.Find(r.Start)
+	if seg.Ok() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() {
+		seg = s.Isolate(seg, r)
+		gap = s.Remove(seg)
+	}
+	return gap
+}
+
+// Merge attempts to merge two neighboring segments. If successful, Merge
+// returns an iterator to the merged segment, and all existing iterators are
+// invalidated. Otherwise, Merge returns a terminal iterator.
+//
+// If first is not the predecessor of second, Merge panics.
+func (s *frameRefSet) Merge(first, second frameRefIterator) frameRefIterator {
+	if first.NextSegment() != second {
+		panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range()))
+	}
+	return s.MergeUnchecked(first, second)
+}
+
+// MergeUnchecked attempts to merge two neighboring segments. If successful,
+// MergeUnchecked returns an iterator to the merged segment, and all existing
+// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal
+// iterator.
+//
+// Precondition: first is the predecessor of second: first.NextSegment() ==
+// second, first == second.PrevSegment().
+func (s *frameRefSet) MergeUnchecked(first, second frameRefIterator) frameRefIterator {
+	if first.End() == second.Start() {
+		if mval, ok := (frameRefSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok {
+
+			first.SetEndUnchecked(second.End())
+			first.SetValue(mval)
+			return s.Remove(second).PrevSegment()
+		}
+	}
+	return frameRefIterator{}
+}
+
+// MergeAll attempts to merge all adjacent segments in the set. All existing
+// iterators are invalidated.
+func (s *frameRefSet) MergeAll() {
+	seg := s.FirstSegment()
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeRange attempts to merge all adjacent segments that contain a key in the
+// specific range. All existing iterators are invalidated.
+func (s *frameRefSet) MergeRange(r __generics_imported0.FileRange) {
+	seg := s.LowerBoundSegment(r.Start)
+	if !seg.Ok() {
+		return
+	}
+	next := seg.NextSegment()
+	for next.Ok() && next.Range().Start < r.End {
+		if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+			seg, next = mseg, mseg.NextSegment()
+		} else {
+			seg, next = next, next.NextSegment()
+		}
+	}
+}
+
+// MergeAdjacent attempts to merge the segment containing r.Start with its
+// predecessor, and the segment containing r.End-1 with its successor.
+func (s *frameRefSet) MergeAdjacent(r __generics_imported0.FileRange) {
+	first := s.FindSegment(r.Start)
+	if first.Ok() {
+		if prev := first.PrevSegment(); prev.Ok() {
+			s.Merge(prev, first)
+		}
+	}
+	last := s.FindSegment(r.End - 1)
+	if last.Ok() {
+		if next := last.NextSegment(); next.Ok() {
+			s.Merge(last, next)
+		}
+	}
+}
+
+// Split splits the given segment at the given key and returns iterators to the
+// two resulting segments. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+//
+// If the segment cannot be split at split (because split is at the start or
+// end of the segment's range, so splitting would produce a segment with zero
+// length, or because split falls outside the segment's range altogether),
+// Split panics.
+func (s *frameRefSet) Split(seg frameRefIterator, split uint64) (frameRefIterator, frameRefIterator) {
+	if !seg.Range().CanSplitAt(split) {
+		panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split))
+	}
+	return s.SplitUnchecked(seg, split)
+}
+
+// SplitUnchecked splits the given segment at the given key and returns
+// iterators to the two resulting segments. All existing iterators (including
+// seg, but not including the returned iterators) are invalidated.
+//
+// Preconditions: seg.Start() < key < seg.End().
+func (s *frameRefSet) SplitUnchecked(seg frameRefIterator, split uint64) (frameRefIterator, frameRefIterator) {
+	val1, val2 := (frameRefSetFunctions{}).Split(seg.Range(), seg.Value(), split)
+	end2 := seg.End()
+	seg.SetEndUnchecked(split)
+	seg.SetValue(val1)
+	seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2)
+
+	return seg2.PrevSegment(), seg2
+}
+
+// SplitAt splits the segment straddling split, if one exists. SplitAt returns
+// true if a segment was split and false otherwise. If SplitAt splits a
+// segment, all existing iterators are invalidated.
+func (s *frameRefSet) SplitAt(split uint64) bool {
+	if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) {
+		s.SplitUnchecked(seg, split)
+		return true
+	}
+	return false
+}
+
+// Isolate ensures that the given segment's range does not escape r by
+// splitting at r.Start and r.End if necessary, and returns an updated iterator
+// to the bounded segment. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+func (s *frameRefSet) Isolate(seg frameRefIterator, r __generics_imported0.FileRange) frameRefIterator {
+	if seg.Range().CanSplitAt(r.Start) {
+		_, seg = s.SplitUnchecked(seg, r.Start)
+	}
+	if seg.Range().CanSplitAt(r.End) {
+		seg, _ = s.SplitUnchecked(seg, r.End)
+	}
+	return seg
+}
+
+// ApplyContiguous applies a function to a contiguous range of segments,
+// splitting if necessary. The function is applied until the first gap is
+// encountered, at which point the gap is returned. If the function is applied
+// across the entire range, a terminal gap is returned. All existing iterators
+// are invalidated.
+//
+// N.B. The Iterator must not be invalidated by the function.
+func (s *frameRefSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg frameRefIterator)) frameRefGapIterator {
+	seg, gap := s.Find(r.Start)
+	if !seg.Ok() {
+		return gap
+	}
+	for {
+		seg = s.Isolate(seg, r)
+		fn(seg)
+		if seg.End() >= r.End {
+			return frameRefGapIterator{}
+		}
+		gap = seg.NextGap()
+		if !gap.IsEmpty() {
+			return gap
+		}
+		seg = gap.NextSegment()
+		if !seg.Ok() {
+
+			return frameRefGapIterator{}
+		}
+	}
+}
+
+// +stateify savable
+type frameRefnode struct {
+	// An internal binary tree node looks like:
+	//
+	//   K
+	//  / \
+	// Cl Cr
+	//
+	// where all keys in the subtree rooted by Cl (the left subtree) are less
+	// than K (the key of the parent node), and all keys in the subtree rooted
+	// by Cr (the right subtree) are greater than K.
+	//
+	// An internal B-tree node's indexes work out to look like:
+	//
+	//   K0 K1 K2  ...   Kn-1
+	//  / \/ \/ \  ...  /  \
+	// C0 C1 C2 C3 ... Cn-1 Cn
+	//
+	// where n is nrSegments.
+	nrSegments int
+
+	// parent is a pointer to this node's parent. If this node is root, parent
+	// is nil.
+	parent *frameRefnode
+
+	// parentIndex is the index of this node in parent.children.
+	parentIndex int
+
+	// Flag for internal nodes that is technically redundant with "children[0]
+	// != nil", but is stored in the first cache line. "hasChildren" rather
+	// than "isLeaf" because false must be the correct value for an empty root.
+	hasChildren bool
+
+	// Nodes store keys and values in separate arrays to maximize locality in
+	// the common case (scanning keys for lookup).
+	keys     [frameRefmaxDegree - 1]__generics_imported0.FileRange
+	values   [frameRefmaxDegree - 1]uint64
+	children [frameRefmaxDegree]*frameRefnode
+}
+
+// firstSegment returns the first segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *frameRefnode) firstSegment() frameRefIterator {
+	for n.hasChildren {
+		n = n.children[0]
+	}
+	return frameRefIterator{n, 0}
+}
+
+// lastSegment returns the last segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *frameRefnode) lastSegment() frameRefIterator {
+	for n.hasChildren {
+		n = n.children[n.nrSegments]
+	}
+	return frameRefIterator{n, n.nrSegments - 1}
+}
+
+func (n *frameRefnode) prevSibling() *frameRefnode {
+	if n.parent == nil || n.parentIndex == 0 {
+		return nil
+	}
+	return n.parent.children[n.parentIndex-1]
+}
+
+func (n *frameRefnode) nextSibling() *frameRefnode {
+	if n.parent == nil || n.parentIndex == n.parent.nrSegments {
+		return nil
+	}
+	return n.parent.children[n.parentIndex+1]
+}
+
+// rebalanceBeforeInsert splits n and its ancestors if they are full, as
+// required for insertion, and returns an updated iterator to the position
+// represented by gap.
+func (n *frameRefnode) rebalanceBeforeInsert(gap frameRefGapIterator) frameRefGapIterator {
+	if n.parent != nil {
+		gap = n.parent.rebalanceBeforeInsert(gap)
+	}
+	if n.nrSegments < frameRefmaxDegree-1 {
+		return gap
+	}
+	if n.parent == nil {
+
+		left := &frameRefnode{
+			nrSegments:  frameRefminDegree - 1,
+			parent:      n,
+			parentIndex: 0,
+			hasChildren: n.hasChildren,
+		}
+		right := &frameRefnode{
+			nrSegments:  frameRefminDegree - 1,
+			parent:      n,
+			parentIndex: 1,
+			hasChildren: n.hasChildren,
+		}
+		copy(left.keys[:frameRefminDegree-1], n.keys[:frameRefminDegree-1])
+		copy(left.values[:frameRefminDegree-1], n.values[:frameRefminDegree-1])
+		copy(right.keys[:frameRefminDegree-1], n.keys[frameRefminDegree:])
+		copy(right.values[:frameRefminDegree-1], n.values[frameRefminDegree:])
+		n.keys[0], n.values[0] = n.keys[frameRefminDegree-1], n.values[frameRefminDegree-1]
+		frameRefzeroValueSlice(n.values[1:])
+		if n.hasChildren {
+			copy(left.children[:frameRefminDegree], n.children[:frameRefminDegree])
+			copy(right.children[:frameRefminDegree], n.children[frameRefminDegree:])
+			frameRefzeroNodeSlice(n.children[2:])
+			for i := 0; i < frameRefminDegree; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+				right.children[i].parent = right
+				right.children[i].parentIndex = i
+			}
+		}
+		n.nrSegments = 1
+		n.hasChildren = true
+		n.children[0] = left
+		n.children[1] = right
+		if gap.node != n {
+			return gap
+		}
+		if gap.index < frameRefminDegree {
+			return frameRefGapIterator{left, gap.index}
+		}
+		return frameRefGapIterator{right, gap.index - frameRefminDegree}
+	}
+
+	copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments])
+	copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments])
+	n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[frameRefminDegree-1], n.values[frameRefminDegree-1]
+	copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1])
+	for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ {
+		n.parent.children[i].parentIndex = i
+	}
+	sibling := &frameRefnode{
+		nrSegments:  frameRefminDegree - 1,
+		parent:      n.parent,
+		parentIndex: n.parentIndex + 1,
+		hasChildren: n.hasChildren,
+	}
+	n.parent.children[n.parentIndex+1] = sibling
+	n.parent.nrSegments++
+	copy(sibling.keys[:frameRefminDegree-1], n.keys[frameRefminDegree:])
+	copy(sibling.values[:frameRefminDegree-1], n.values[frameRefminDegree:])
+	frameRefzeroValueSlice(n.values[frameRefminDegree-1:])
+	if n.hasChildren {
+		copy(sibling.children[:frameRefminDegree], n.children[frameRefminDegree:])
+		frameRefzeroNodeSlice(n.children[frameRefminDegree:])
+		for i := 0; i < frameRefminDegree; i++ {
+			sibling.children[i].parent = sibling
+			sibling.children[i].parentIndex = i
+		}
+	}
+	n.nrSegments = frameRefminDegree - 1
+
+	if gap.node != n {
+		return gap
+	}
+	if gap.index < frameRefminDegree {
+		return gap
+	}
+	return frameRefGapIterator{sibling, gap.index - frameRefminDegree}
+}
+
+// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient
+// (contain fewer segments than required by B-tree invariants), as required for
+// removal, and returns an updated iterator to the position represented by gap.
+//
+// Precondition: n is the only node in the tree that may currently violate a
+// B-tree invariant.
+func (n *frameRefnode) rebalanceAfterRemove(gap frameRefGapIterator) frameRefGapIterator {
+	for {
+		if n.nrSegments >= frameRefminDegree-1 {
+			return gap
+		}
+		if n.parent == nil {
+
+			return gap
+		}
+
+		if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= frameRefminDegree {
+			copy(n.keys[1:], n.keys[:n.nrSegments])
+			copy(n.values[1:], n.values[:n.nrSegments])
+			n.keys[0] = n.parent.keys[n.parentIndex-1]
+			n.values[0] = n.parent.values[n.parentIndex-1]
+			n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1]
+			n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1]
+			frameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				copy(n.children[1:], n.children[:n.nrSegments+1])
+				n.children[0] = sibling.children[sibling.nrSegments]
+				sibling.children[sibling.nrSegments] = nil
+				n.children[0].parent = n
+				n.children[0].parentIndex = 0
+				for i := 1; i < n.nrSegments+2; i++ {
+					n.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling && gap.index == sibling.nrSegments {
+				return frameRefGapIterator{n, 0}
+			}
+			if gap.node == n {
+				return frameRefGapIterator{n, gap.index + 1}
+			}
+			return gap
+		}
+		if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= frameRefminDegree {
+			n.keys[n.nrSegments] = n.parent.keys[n.parentIndex]
+			n.values[n.nrSegments] = n.parent.values[n.parentIndex]
+			n.parent.keys[n.parentIndex] = sibling.keys[0]
+			n.parent.values[n.parentIndex] = sibling.values[0]
+			copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:])
+			copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:])
+			frameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+			if n.hasChildren {
+				n.children[n.nrSegments+1] = sibling.children[0]
+				copy(sibling.children[:sibling.nrSegments], sibling.children[1:])
+				sibling.children[sibling.nrSegments] = nil
+				n.children[n.nrSegments+1].parent = n
+				n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1
+				for i := 0; i < sibling.nrSegments; i++ {
+					sibling.children[i].parentIndex = i
+				}
+			}
+			n.nrSegments++
+			sibling.nrSegments--
+			if gap.node == sibling {
+				if gap.index == 0 {
+					return frameRefGapIterator{n, n.nrSegments}
+				}
+				return frameRefGapIterator{sibling, gap.index - 1}
+			}
+			return gap
+		}
+
+		p := n.parent
+		if p.nrSegments == 1 {
+
+			left, right := p.children[0], p.children[1]
+			p.nrSegments = left.nrSegments + right.nrSegments + 1
+			p.hasChildren = left.hasChildren
+			p.keys[left.nrSegments] = p.keys[0]
+			p.values[left.nrSegments] = p.values[0]
+			copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments])
+			copy(p.values[:left.nrSegments], left.values[:left.nrSegments])
+			copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+			copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments])
+			if left.hasChildren {
+				copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1])
+				copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+				for i := 0; i < p.nrSegments+1; i++ {
+					p.children[i].parent = p
+					p.children[i].parentIndex = i
+				}
+			} else {
+				p.children[0] = nil
+				p.children[1] = nil
+			}
+			if gap.node == left {
+				return frameRefGapIterator{p, gap.index}
+			}
+			if gap.node == right {
+				return frameRefGapIterator{p, gap.index + left.nrSegments + 1}
+			}
+			return gap
+		}
+		// Merge n and either sibling, along with the segment separating the
+		// two, into whichever of the two nodes comes first. This is the
+		// reverse of the non-root splitting case in
+		// node.rebalanceBeforeInsert.
+		var left, right *frameRefnode
+		if n.parentIndex > 0 {
+			left = n.prevSibling()
+			right = n
+		} else {
+			left = n
+			right = n.nextSibling()
+		}
+
+		if gap.node == right {
+			gap = frameRefGapIterator{left, gap.index + left.nrSegments + 1}
+		}
+		left.keys[left.nrSegments] = p.keys[left.parentIndex]
+		left.values[left.nrSegments] = p.values[left.parentIndex]
+		copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+		copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments])
+		if left.hasChildren {
+			copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+			for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ {
+				left.children[i].parent = left
+				left.children[i].parentIndex = i
+			}
+		}
+		left.nrSegments += right.nrSegments + 1
+		copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments])
+		copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments])
+		frameRefSetFunctions{}.ClearValue(&p.values[p.nrSegments-1])
+		copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1])
+		for i := 0; i < p.nrSegments; i++ {
+			p.children[i].parentIndex = i
+		}
+		p.children[p.nrSegments] = nil
+		p.nrSegments--
+
+		n = p
+	}
+}
+
+// A Iterator is conceptually one of:
+//
+// - A pointer to a segment in a set; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Iterators are copyable values and are meaningfully equality-comparable. The
+// zero value of Iterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type frameRefIterator struct {
+	// node is the node containing the iterated segment. If the iterator is
+	// terminal, node is nil.
+	node *frameRefnode
+
+	// index is the index of the segment in node.keys/values.
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (seg frameRefIterator) Ok() bool {
+	return seg.node != nil
+}
+
+// Range returns the iterated segment's range key.
+func (seg frameRefIterator) Range() __generics_imported0.FileRange {
+	return seg.node.keys[seg.index]
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (seg frameRefIterator) Start() uint64 {
+	return seg.node.keys[seg.index].Start
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (seg frameRefIterator) End() uint64 {
+	return seg.node.keys[seg.index].End
+}
+
+// SetRangeUnchecked mutates the iterated segment's range key. This operation
+// does not invalidate any iterators.
+//
+// Preconditions:
+//
+// - r.Length() > 0.
+//
+// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
+// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
+// r.start >= seg.PrevSegment().End().
+func (seg frameRefIterator) SetRangeUnchecked(r __generics_imported0.FileRange) {
+	seg.node.keys[seg.index] = r
+}
+
+// SetRange mutates the iterated segment's range key. If the new range would
+// cause the iterated segment to overlap another segment, or if the new range
+// is invalid, SetRange panics. This operation does not invalidate any
+// iterators.
+func (seg frameRefIterator) SetRange(r __generics_imported0.FileRange) {
+	if r.Length() <= 0 {
+		panic(fmt.Sprintf("invalid segment range %v", r))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && r.End > next.Start() {
+		panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range()))
+	}
+	seg.SetRangeUnchecked(r)
+}
+
+// SetStartUnchecked mutates the iterated segment's start. This operation does
+// not invalidate any iterators.
+//
+// Preconditions: The new start must be valid: start < seg.End(); if
+// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+func (seg frameRefIterator) SetStartUnchecked(start uint64) {
+	seg.node.keys[seg.index].Start = start
+}
+
+// SetStart mutates the iterated segment's start. If the new start value would
+// cause the iterated segment to overlap another segment, or would result in an
+// invalid range, SetStart panics. This operation does not invalidate any
+// iterators.
+func (seg frameRefIterator) SetStart(start uint64) {
+	if start >= seg.End() {
+		panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range()))
+	}
+	if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() {
+		panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range()))
+	}
+	seg.SetStartUnchecked(start)
+}
+
+// SetEndUnchecked mutates the iterated segment's end. This operation does not
+// invalidate any iterators.
+//
+// Preconditions: The new end must be valid: end > seg.Start(); if
+// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+func (seg frameRefIterator) SetEndUnchecked(end uint64) {
+	seg.node.keys[seg.index].End = end
+}
+
+// SetEnd mutates the iterated segment's end. If the new end value would cause
+// the iterated segment to overlap another segment, or would result in an
+// invalid range, SetEnd panics. This operation does not invalidate any
+// iterators.
+func (seg frameRefIterator) SetEnd(end uint64) {
+	if end <= seg.Start() {
+		panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range()))
+	}
+	if next := seg.NextSegment(); next.Ok() && end > next.Start() {
+		panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range()))
+	}
+	seg.SetEndUnchecked(end)
+}
+
+// Value returns a copy of the iterated segment's value.
+func (seg frameRefIterator) Value() uint64 {
+	return seg.node.values[seg.index]
+}
+
+// ValuePtr returns a pointer to the iterated segment's value. The pointer is
+// invalidated if the iterator is invalidated. This operation does not
+// invalidate any iterators.
+func (seg frameRefIterator) ValuePtr() *uint64 {
+	return &seg.node.values[seg.index]
+}
+
+// SetValue mutates the iterated segment's value. This operation does not
+// invalidate any iterators.
+func (seg frameRefIterator) SetValue(val uint64) {
+	seg.node.values[seg.index] = val
+}
+
+// PrevSegment returns the iterated segment's predecessor. If there is no
+// preceding segment, PrevSegment returns a terminal iterator.
+func (seg frameRefIterator) PrevSegment() frameRefIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index].lastSegment()
+	}
+	if seg.index > 0 {
+		return frameRefIterator{seg.node, seg.index - 1}
+	}
+	if seg.node.parent == nil {
+		return frameRefIterator{}
+	}
+	return frameRefsegmentBeforePosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// NextSegment returns the iterated segment's successor. If there is no
+// succeeding segment, NextSegment returns a terminal iterator.
+func (seg frameRefIterator) NextSegment() frameRefIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment()
+	}
+	if seg.index < seg.node.nrSegments-1 {
+		return frameRefIterator{seg.node, seg.index + 1}
+	}
+	if seg.node.parent == nil {
+		return frameRefIterator{}
+	}
+	return frameRefsegmentAfterPosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// PrevGap returns the gap immediately before the iterated segment.
+func (seg frameRefIterator) PrevGap() frameRefGapIterator {
+	if seg.node.hasChildren {
+
+		return seg.node.children[seg.index].lastSegment().NextGap()
+	}
+	return frameRefGapIterator{seg.node, seg.index}
+}
+
+// NextGap returns the gap immediately after the iterated segment.
+func (seg frameRefIterator) NextGap() frameRefGapIterator {
+	if seg.node.hasChildren {
+		return seg.node.children[seg.index+1].firstSegment().PrevGap()
+	}
+	return frameRefGapIterator{seg.node, seg.index + 1}
+}
+
+// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent,
+// or the gap before the iterated segment otherwise. If seg.Start() ==
+// Functions.MinKey(), PrevNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be
+// non-terminal.
+func (seg frameRefIterator) PrevNonEmpty() (frameRefIterator, frameRefGapIterator) {
+	gap := seg.PrevGap()
+	if gap.Range().Length() != 0 {
+		return frameRefIterator{}, gap
+	}
+	return gap.PrevSegment(), frameRefGapIterator{}
+}
+
+// NextNonEmpty returns the iterated segment's successor if it is adjacent, or
+// the gap after the iterated segment otherwise. If seg.End() ==
+// Functions.MaxKey(), NextNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by NextNonEmpty will be
+// non-terminal.
+func (seg frameRefIterator) NextNonEmpty() (frameRefIterator, frameRefGapIterator) {
+	gap := seg.NextGap()
+	if gap.Range().Length() != 0 {
+		return frameRefIterator{}, gap
+	}
+	return gap.NextSegment(), frameRefGapIterator{}
+}
+
+// A GapIterator is conceptually one of:
+//
+// - A pointer to a position between two segments, before the first segment, or
+// after the last segment in a set, called a *gap*; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Note that the gap between two adjacent segments exists (iterators to it are
+// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true
+// for such gaps. An empty set contains a single gap, spanning the entire range
+// of the set's keys.
+//
+// GapIterators are copyable values and are meaningfully equality-comparable.
+// The zero value of GapIterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type frameRefGapIterator struct {
+	// The representation of a GapIterator is identical to that of an Iterator,
+	// except that index corresponds to positions between segments in the same
+	// way as for node.children (see comment for node.nrSegments).
+	node  *frameRefnode
+	index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (gap frameRefGapIterator) Ok() bool {
+	return gap.node != nil
+}
+
+// Range returns the range spanned by the iterated gap.
+func (gap frameRefGapIterator) Range() __generics_imported0.FileRange {
+	return __generics_imported0.FileRange{gap.Start(), gap.End()}
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (gap frameRefGapIterator) Start() uint64 {
+	if ps := gap.PrevSegment(); ps.Ok() {
+		return ps.End()
+	}
+	return frameRefSetFunctions{}.MinKey()
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (gap frameRefGapIterator) End() uint64 {
+	if ns := gap.NextSegment(); ns.Ok() {
+		return ns.Start()
+	}
+	return frameRefSetFunctions{}.MaxKey()
+}
+
+// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is
+// between two adjacent segments.)
+func (gap frameRefGapIterator) IsEmpty() bool {
+	return gap.Range().Length() == 0
+}
+
+// PrevSegment returns the segment immediately before the iterated gap. If no
+// such segment exists, PrevSegment returns a terminal iterator.
+func (gap frameRefGapIterator) PrevSegment() frameRefIterator {
+	return frameRefsegmentBeforePosition(gap.node, gap.index)
+}
+
+// NextSegment returns the segment immediately after the iterated gap. If no
+// such segment exists, NextSegment returns a terminal iterator.
+func (gap frameRefGapIterator) NextSegment() frameRefIterator {
+	return frameRefsegmentAfterPosition(gap.node, gap.index)
+}
+
+// PrevGap returns the iterated gap's predecessor. If no such gap exists,
+// PrevGap returns a terminal iterator.
+func (gap frameRefGapIterator) PrevGap() frameRefGapIterator {
+	seg := gap.PrevSegment()
+	if !seg.Ok() {
+		return frameRefGapIterator{}
+	}
+	return seg.PrevGap()
+}
+
+// NextGap returns the iterated gap's successor. If no such gap exists, NextGap
+// returns a terminal iterator.
+func (gap frameRefGapIterator) NextGap() frameRefGapIterator {
+	seg := gap.NextSegment()
+	if !seg.Ok() {
+		return frameRefGapIterator{}
+	}
+	return seg.NextGap()
+}
+
+// segmentBeforePosition returns the predecessor segment of the position given
+// by n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentBeforePosition returns a terminal iterator.
+func frameRefsegmentBeforePosition(n *frameRefnode, i int) frameRefIterator {
+	for i == 0 {
+		if n.parent == nil {
+			return frameRefIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return frameRefIterator{n, i - 1}
+}
+
+// segmentAfterPosition returns the successor segment of the position given by
+// n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentAfterPosition returns a terminal iterator.
+func frameRefsegmentAfterPosition(n *frameRefnode, i int) frameRefIterator {
+	for i == n.nrSegments {
+		if n.parent == nil {
+			return frameRefIterator{}
+		}
+		n, i = n.parent, n.parentIndex
+	}
+	return frameRefIterator{n, i}
+}
+
+func frameRefzeroValueSlice(slice []uint64) {
+
+	for i := range slice {
+		frameRefSetFunctions{}.ClearValue(&slice[i])
+	}
+}
+
+func frameRefzeroNodeSlice(slice []*frameRefnode) {
+	for i := range slice {
+		slice[i] = nil
+	}
+}
+
+// String stringifies a Set for debugging.
+func (s *frameRefSet) String() string {
+	return s.root.String()
+}
+
+// String stringifes a node (and all of its children) for debugging.
+func (n *frameRefnode) String() string {
+	var buf bytes.Buffer
+	n.writeDebugString(&buf, "")
+	return buf.String()
+}
+
+func (n *frameRefnode) writeDebugString(buf *bytes.Buffer, prefix string) {
+	if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) {
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren))
+	}
+	for i := 0; i < n.nrSegments; i++ {
+		if child := n.children[i]; child != nil {
+			cprefix := fmt.Sprintf("%s- % 3d ", prefix, i)
+			if child.parent != n || child.parentIndex != i {
+				buf.WriteString(cprefix)
+				buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i))
+			}
+			child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i))
+		}
+		buf.WriteString(prefix)
+		buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i]))
+	}
+	if child := n.children[n.nrSegments]; child != nil {
+		child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments))
+	}
+}
+
+// SegmentDataSlices represents segments from a set as slices of start, end, and
+// values. SegmentDataSlices is primarily used as an intermediate representation
+// for save/restore and the layout here is optimized for that.
+//
+// +stateify savable
+type frameRefSegmentDataSlices struct {
+	Start  []uint64
+	End    []uint64
+	Values []uint64
+}
+
+// ExportSortedSlice returns a copy of all segments in the given set, in ascending
+// key order.
+func (s *frameRefSet) ExportSortedSlices() *frameRefSegmentDataSlices {
+	var sds frameRefSegmentDataSlices
+	for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+		sds.Start = append(sds.Start, seg.Start())
+		sds.End = append(sds.End, seg.End())
+		sds.Values = append(sds.Values, seg.Value())
+	}
+	sds.Start = sds.Start[:len(sds.Start):len(sds.Start)]
+	sds.End = sds.End[:len(sds.End):len(sds.End)]
+	sds.Values = sds.Values[:len(sds.Values):len(sds.Values)]
+	return &sds
+}
+
+// ImportSortedSlice initializes the given set from the given slice.
+//
+// Preconditions: s must be empty. sds must represent a valid set (the segments
+// in sds must have valid lengths that do not overlap). The segments in sds
+// must be sorted in ascending key order.
+func (s *frameRefSet) ImportSortedSlices(sds *frameRefSegmentDataSlices) error {
+	if !s.IsEmpty() {
+		return fmt.Errorf("cannot import into non-empty set %v", s)
+	}
+	gap := s.FirstGap()
+	for i := range sds.Start {
+		r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]}
+		if !gap.Range().IsSupersetOf(r) {
+			return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i])
+		}
+		gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap()
+	}
+	return nil
+}
+func (s *frameRefSet) saveRoot() *frameRefSegmentDataSlices {
+	return s.ExportSortedSlices()
+}
+
+func (s *frameRefSet) loadRoot(sds *frameRefSegmentDataSlices) {
+	if err := s.ImportSortedSlices(sds); err != nil {
+		panic(err)
+	}
+}
diff --git a/pkg/sentry/fs/fsutil/fsutil.go b/pkg/sentry/fs/fsutil/fsutil.go
new file mode 100644
index 000000000..c9587b1d9
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/fsutil.go
@@ -0,0 +1,24 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package fsutil provides utilities for implementing fs.InodeOperations
+// and fs.FileOperations:
+//
+// - For embeddable utilities, see inode.go and file.go.
+//
+// - For fs.Inodes that require a page cache to be memory mapped, see
+//   inode_cache.go.
+//
+// - For anon fs.Inodes, see anon.go.
+package fsutil
diff --git a/pkg/sentry/fs/fsutil/fsutil_state_autogen.go b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go
new file mode 100755
index 000000000..5783b151d
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go
@@ -0,0 +1,349 @@
+// automatically generated by stateify.
+
+package fsutil
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/state"
+)
+
+func (x *DirtyInfo) beforeSave() {}
+func (x *DirtyInfo) save(m state.Map) {
+	x.beforeSave()
+	m.Save("Keep", &x.Keep)
+}
+
+func (x *DirtyInfo) afterLoad() {}
+func (x *DirtyInfo) load(m state.Map) {
+	m.Load("Keep", &x.Keep)
+}
+
+func (x *DirtySet) beforeSave() {}
+func (x *DirtySet) save(m state.Map) {
+	x.beforeSave()
+	var root *DirtySegmentDataSlices = x.saveRoot()
+	m.SaveValue("root", root)
+}
+
+func (x *DirtySet) afterLoad() {}
+func (x *DirtySet) load(m state.Map) {
+	m.LoadValue("root", new(*DirtySegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*DirtySegmentDataSlices)) })
+}
+
+func (x *Dirtynode) beforeSave() {}
+func (x *Dirtynode) save(m state.Map) {
+	x.beforeSave()
+	m.Save("nrSegments", &x.nrSegments)
+	m.Save("parent", &x.parent)
+	m.Save("parentIndex", &x.parentIndex)
+	m.Save("hasChildren", &x.hasChildren)
+	m.Save("keys", &x.keys)
+	m.Save("values", &x.values)
+	m.Save("children", &x.children)
+}
+
+func (x *Dirtynode) afterLoad() {}
+func (x *Dirtynode) load(m state.Map) {
+	m.Load("nrSegments", &x.nrSegments)
+	m.Load("parent", &x.parent)
+	m.Load("parentIndex", &x.parentIndex)
+	m.Load("hasChildren", &x.hasChildren)
+	m.Load("keys", &x.keys)
+	m.Load("values", &x.values)
+	m.Load("children", &x.children)
+}
+
+func (x *DirtySegmentDataSlices) beforeSave() {}
+func (x *DirtySegmentDataSlices) save(m state.Map) {
+	x.beforeSave()
+	m.Save("Start", &x.Start)
+	m.Save("End", &x.End)
+	m.Save("Values", &x.Values)
+}
+
+func (x *DirtySegmentDataSlices) afterLoad() {}
+func (x *DirtySegmentDataSlices) load(m state.Map) {
+	m.Load("Start", &x.Start)
+	m.Load("End", &x.End)
+	m.Load("Values", &x.Values)
+}
+
+func (x *StaticDirFileOperations) beforeSave() {}
+func (x *StaticDirFileOperations) save(m state.Map) {
+	x.beforeSave()
+	m.Save("dentryMap", &x.dentryMap)
+	m.Save("dirCursor", &x.dirCursor)
+}
+
+func (x *StaticDirFileOperations) afterLoad() {}
+func (x *StaticDirFileOperations) load(m state.Map) {
+	m.Load("dentryMap", &x.dentryMap)
+	m.Load("dirCursor", &x.dirCursor)
+}
+
+func (x *NoReadWriteFile) beforeSave() {}
+func (x *NoReadWriteFile) save(m state.Map) {
+	x.beforeSave()
+}
+
+func (x *NoReadWriteFile) afterLoad() {}
+func (x *NoReadWriteFile) load(m state.Map) {
+}
+
+func (x *FileStaticContentReader) beforeSave() {}
+func (x *FileStaticContentReader) save(m state.Map) {
+	x.beforeSave()
+	m.Save("content", &x.content)
+}
+
+func (x *FileStaticContentReader) afterLoad() {}
+func (x *FileStaticContentReader) load(m state.Map) {
+	m.Load("content", &x.content)
+}
+
+func (x *FileRangeSet) beforeSave() {}
+func (x *FileRangeSet) save(m state.Map) {
+	x.beforeSave()
+	var root *FileRangeSegmentDataSlices = x.saveRoot()
+	m.SaveValue("root", root)
+}
+
+func (x *FileRangeSet) afterLoad() {}
+func (x *FileRangeSet) load(m state.Map) {
+	m.LoadValue("root", new(*FileRangeSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*FileRangeSegmentDataSlices)) })
+}
+
+func (x *FileRangenode) beforeSave() {}
+func (x *FileRangenode) save(m state.Map) {
+	x.beforeSave()
+	m.Save("nrSegments", &x.nrSegments)
+	m.Save("parent", &x.parent)
+	m.Save("parentIndex", &x.parentIndex)
+	m.Save("hasChildren", &x.hasChildren)
+	m.Save("keys", &x.keys)
+	m.Save("values", &x.values)
+	m.Save("children", &x.children)
+}
+
+func (x *FileRangenode) afterLoad() {}
+func (x *FileRangenode) load(m state.Map) {
+	m.Load("nrSegments", &x.nrSegments)
+	m.Load("parent", &x.parent)
+	m.Load("parentIndex", &x.parentIndex)
+	m.Load("hasChildren", &x.hasChildren)
+	m.Load("keys", &x.keys)
+	m.Load("values", &x.values)
+	m.Load("children", &x.children)
+}
+
+func (x *FileRangeSegmentDataSlices) beforeSave() {}
+func (x *FileRangeSegmentDataSlices) save(m state.Map) {
+	x.beforeSave()
+	m.Save("Start", &x.Start)
+	m.Save("End", &x.End)
+	m.Save("Values", &x.Values)
+}
+
+func (x *FileRangeSegmentDataSlices) afterLoad() {}
+func (x *FileRangeSegmentDataSlices) load(m state.Map) {
+	m.Load("Start", &x.Start)
+	m.Load("End", &x.End)
+	m.Load("Values", &x.Values)
+}
+
+func (x *frameRefSet) beforeSave() {}
+func (x *frameRefSet) save(m state.Map) {
+	x.beforeSave()
+	var root *frameRefSegmentDataSlices = x.saveRoot()
+	m.SaveValue("root", root)
+}
+
+func (x *frameRefSet) afterLoad() {}
+func (x *frameRefSet) load(m state.Map) {
+	m.LoadValue("root", new(*frameRefSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*frameRefSegmentDataSlices)) })
+}
+
+func (x *frameRefnode) beforeSave() {}
+func (x *frameRefnode) save(m state.Map) {
+	x.beforeSave()
+	m.Save("nrSegments", &x.nrSegments)
+	m.Save("parent", &x.parent)
+	m.Save("parentIndex", &x.parentIndex)
+	m.Save("hasChildren", &x.hasChildren)
+	m.Save("keys", &x.keys)
+	m.Save("values", &x.values)
+	m.Save("children", &x.children)
+}
+
+func (x *frameRefnode) afterLoad() {}
+func (x *frameRefnode) load(m state.Map) {
+	m.Load("nrSegments", &x.nrSegments)
+	m.Load("parent", &x.parent)
+	m.Load("parentIndex", &x.parentIndex)
+	m.Load("hasChildren", &x.hasChildren)
+	m.Load("keys", &x.keys)
+	m.Load("values", &x.values)
+	m.Load("children", &x.children)
+}
+
+func (x *frameRefSegmentDataSlices) beforeSave() {}
+func (x *frameRefSegmentDataSlices) save(m state.Map) {
+	x.beforeSave()
+	m.Save("Start", &x.Start)
+	m.Save("End", &x.End)
+	m.Save("Values", &x.Values)
+}
+
+func (x *frameRefSegmentDataSlices) afterLoad() {}
+func (x *frameRefSegmentDataSlices) load(m state.Map) {
+	m.Load("Start", &x.Start)
+	m.Load("End", &x.End)
+	m.Load("Values", &x.Values)
+}
+
+func (x *HostFileMapper) beforeSave() {}
+func (x *HostFileMapper) save(m state.Map) {
+	x.beforeSave()
+	m.Save("refs", &x.refs)
+}
+
+func (x *HostFileMapper) load(m state.Map) {
+	m.Load("refs", &x.refs)
+	m.AfterLoad(x.afterLoad)
+}
+
+func (x *HostMappable) beforeSave() {}
+func (x *HostMappable) save(m state.Map) {
+	x.beforeSave()
+	m.Save("hostFileMapper", &x.hostFileMapper)
+	m.Save("backingFile", &x.backingFile)
+	m.Save("mappings", &x.mappings)
+}
+
+func (x *HostMappable) afterLoad() {}
+func (x *HostMappable) load(m state.Map) {
+	m.Load("hostFileMapper", &x.hostFileMapper)
+	m.Load("backingFile", &x.backingFile)
+	m.Load("mappings", &x.mappings)
+}
+
+func (x *SimpleFileInode) beforeSave() {}
+func (x *SimpleFileInode) save(m state.Map) {
+	x.beforeSave()
+	m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes)
+}
+
+func (x *SimpleFileInode) afterLoad() {}
+func (x *SimpleFileInode) load(m state.Map) {
+	m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes)
+}
+
+func (x *NoReadWriteFileInode) beforeSave() {}
+func (x *NoReadWriteFileInode) save(m state.Map) {
+	x.beforeSave()
+	m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes)
+}
+
+func (x *NoReadWriteFileInode) afterLoad() {}
+func (x *NoReadWriteFileInode) load(m state.Map) {
+	m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes)
+}
+
+func (x *InodeSimpleAttributes) beforeSave() {}
+func (x *InodeSimpleAttributes) save(m state.Map) {
+	x.beforeSave()
+	m.Save("fsType", &x.fsType)
+	m.Save("unstable", &x.unstable)
+}
+
+func (x *InodeSimpleAttributes) afterLoad() {}
+func (x *InodeSimpleAttributes) load(m state.Map) {
+	m.Load("fsType", &x.fsType)
+	m.Load("unstable", &x.unstable)
+}
+
+func (x *InodeSimpleExtendedAttributes) beforeSave() {}
+func (x *InodeSimpleExtendedAttributes) save(m state.Map) {
+	x.beforeSave()
+	m.Save("xattrs", &x.xattrs)
+}
+
+func (x *InodeSimpleExtendedAttributes) afterLoad() {}
+func (x *InodeSimpleExtendedAttributes) load(m state.Map) {
+	m.Load("xattrs", &x.xattrs)
+}
+
+func (x *staticFile) beforeSave() {}
+func (x *staticFile) save(m state.Map) {
+	x.beforeSave()
+	m.Save("FileStaticContentReader", &x.FileStaticContentReader)
+}
+
+func (x *staticFile) afterLoad() {}
+func (x *staticFile) load(m state.Map) {
+	m.Load("FileStaticContentReader", &x.FileStaticContentReader)
+}
+
+func (x *InodeStaticFileGetter) beforeSave() {}
+func (x *InodeStaticFileGetter) save(m state.Map) {
+	x.beforeSave()
+	m.Save("Contents", &x.Contents)
+}
+
+func (x *InodeStaticFileGetter) afterLoad() {}
+func (x *InodeStaticFileGetter) load(m state.Map) {
+	m.Load("Contents", &x.Contents)
+}
+
+func (x *CachingInodeOperations) beforeSave() {}
+func (x *CachingInodeOperations) save(m state.Map) {
+	x.beforeSave()
+	m.Save("backingFile", &x.backingFile)
+	m.Save("mfp", &x.mfp)
+	m.Save("forcePageCache", &x.forcePageCache)
+	m.Save("attr", &x.attr)
+	m.Save("dirtyAttr", &x.dirtyAttr)
+	m.Save("mappings", &x.mappings)
+	m.Save("cache", &x.cache)
+	m.Save("dirty", &x.dirty)
+	m.Save("hostFileMapper", &x.hostFileMapper)
+	m.Save("refs", &x.refs)
+}
+
+func (x *CachingInodeOperations) afterLoad() {}
+func (x *CachingInodeOperations) load(m state.Map) {
+	m.Load("backingFile", &x.backingFile)
+	m.Load("mfp", &x.mfp)
+	m.Load("forcePageCache", &x.forcePageCache)
+	m.Load("attr", &x.attr)
+	m.Load("dirtyAttr", &x.dirtyAttr)
+	m.Load("mappings", &x.mappings)
+	m.Load("cache", &x.cache)
+	m.Load("dirty", &x.dirty)
+	m.Load("hostFileMapper", &x.hostFileMapper)
+	m.Load("refs", &x.refs)
+}
+
+func init() {
+	state.Register("fsutil.DirtyInfo", (*DirtyInfo)(nil), state.Fns{Save: (*DirtyInfo).save, Load: (*DirtyInfo).load})
+	state.Register("fsutil.DirtySet", (*DirtySet)(nil), state.Fns{Save: (*DirtySet).save, Load: (*DirtySet).load})
+	state.Register("fsutil.Dirtynode", (*Dirtynode)(nil), state.Fns{Save: (*Dirtynode).save, Load: (*Dirtynode).load})
+	state.Register("fsutil.DirtySegmentDataSlices", (*DirtySegmentDataSlices)(nil), state.Fns{Save: (*DirtySegmentDataSlices).save, Load: (*DirtySegmentDataSlices).load})
+	state.Register("fsutil.StaticDirFileOperations", (*StaticDirFileOperations)(nil), state.Fns{Save: (*StaticDirFileOperations).save, Load: (*StaticDirFileOperations).load})
+	state.Register("fsutil.NoReadWriteFile", (*NoReadWriteFile)(nil), state.Fns{Save: (*NoReadWriteFile).save, Load: (*NoReadWriteFile).load})
+	state.Register("fsutil.FileStaticContentReader", (*FileStaticContentReader)(nil), state.Fns{Save: (*FileStaticContentReader).save, Load: (*FileStaticContentReader).load})
+	state.Register("fsutil.FileRangeSet", (*FileRangeSet)(nil), state.Fns{Save: (*FileRangeSet).save, Load: (*FileRangeSet).load})
+	state.Register("fsutil.FileRangenode", (*FileRangenode)(nil), state.Fns{Save: (*FileRangenode).save, Load: (*FileRangenode).load})
+	state.Register("fsutil.FileRangeSegmentDataSlices", (*FileRangeSegmentDataSlices)(nil), state.Fns{Save: (*FileRangeSegmentDataSlices).save, Load: (*FileRangeSegmentDataSlices).load})
+	state.Register("fsutil.frameRefSet", (*frameRefSet)(nil), state.Fns{Save: (*frameRefSet).save, Load: (*frameRefSet).load})
+	state.Register("fsutil.frameRefnode", (*frameRefnode)(nil), state.Fns{Save: (*frameRefnode).save, Load: (*frameRefnode).load})
+	state.Register("fsutil.frameRefSegmentDataSlices", (*frameRefSegmentDataSlices)(nil), state.Fns{Save: (*frameRefSegmentDataSlices).save, Load: (*frameRefSegmentDataSlices).load})
+	state.Register("fsutil.HostFileMapper", (*HostFileMapper)(nil), state.Fns{Save: (*HostFileMapper).save, Load: (*HostFileMapper).load})
+	state.Register("fsutil.HostMappable", (*HostMappable)(nil), state.Fns{Save: (*HostMappable).save, Load: (*HostMappable).load})
+	state.Register("fsutil.SimpleFileInode", (*SimpleFileInode)(nil), state.Fns{Save: (*SimpleFileInode).save, Load: (*SimpleFileInode).load})
+	state.Register("fsutil.NoReadWriteFileInode", (*NoReadWriteFileInode)(nil), state.Fns{Save: (*NoReadWriteFileInode).save, Load: (*NoReadWriteFileInode).load})
+	state.Register("fsutil.InodeSimpleAttributes", (*InodeSimpleAttributes)(nil), state.Fns{Save: (*InodeSimpleAttributes).save, Load: (*InodeSimpleAttributes).load})
+	state.Register("fsutil.InodeSimpleExtendedAttributes", (*InodeSimpleExtendedAttributes)(nil), state.Fns{Save: (*InodeSimpleExtendedAttributes).save, Load: (*InodeSimpleExtendedAttributes).load})
+	state.Register("fsutil.staticFile", (*staticFile)(nil), state.Fns{Save: (*staticFile).save, Load: (*staticFile).load})
+	state.Register("fsutil.InodeStaticFileGetter", (*InodeStaticFileGetter)(nil), state.Fns{Save: (*InodeStaticFileGetter).save, Load: (*InodeStaticFileGetter).load})
+	state.Register("fsutil.CachingInodeOperations", (*CachingInodeOperations)(nil), state.Fns{Save: (*CachingInodeOperations).save, Load: (*CachingInodeOperations).load})
+}
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go
new file mode 100644
index 000000000..2bdfc0db6
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/host_file_mapper.go
@@ -0,0 +1,211 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+	"fmt"
+	"sync"
+	"syscall"
+
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// HostFileMapper caches mappings of an arbitrary host file descriptor. It is
+// used by implementations of memmap.Mappable that represent a host file
+// descriptor.
+//
+// +stateify savable
+type HostFileMapper struct {
+	// HostFile conceptually breaks the file into pieces called chunks, of
+	// size and alignment chunkSize, and caches mappings of the file on a chunk
+	// granularity.
+
+	refsMu sync.Mutex `state:"nosave"`
+
+	// refs maps chunk start offsets to the sum of reference counts for all
+	// pages in that chunk. refs is protected by refsMu.
+	refs map[uint64]int32
+
+	mapsMu sync.Mutex `state:"nosave"`
+
+	// mappings maps chunk start offsets to mappings of those chunks,
+	// obtained by calling syscall.Mmap. mappings is protected by
+	// mapsMu.
+	mappings map[uint64]mapping `state:"nosave"`
+}
+
+const (
+	chunkShift = usermem.HugePageShift
+	chunkSize  = 1 << chunkShift
+	chunkMask  = chunkSize - 1
+)
+
+func pagesInChunk(mr memmap.MappableRange, chunkStart uint64) int32 {
+	return int32(mr.Intersect(memmap.MappableRange{chunkStart, chunkStart + chunkSize}).Length() / usermem.PageSize)
+}
+
+type mapping struct {
+	addr     uintptr
+	writable bool
+}
+
+// NewHostFileMapper returns a HostFileMapper with no references or cached
+// mappings.
+func NewHostFileMapper() *HostFileMapper {
+	return &HostFileMapper{
+		refs:     make(map[uint64]int32),
+		mappings: make(map[uint64]mapping),
+	}
+}
+
+// IncRefOn increments the reference count on all offsets in mr.
+//
+// Preconditions: mr.Length() != 0. mr.Start and mr.End must be page-aligned.
+func (f *HostFileMapper) IncRefOn(mr memmap.MappableRange) {
+	f.refsMu.Lock()
+	defer f.refsMu.Unlock()
+	for chunkStart := mr.Start &^ chunkMask; chunkStart < mr.End; chunkStart += chunkSize {
+		refs := f.refs[chunkStart]
+		pgs := pagesInChunk(mr, chunkStart)
+		if refs+pgs < refs {
+			// Would overflow.
+			panic(fmt.Sprintf("HostFileMapper.IncRefOn(%v): adding %d page references to chunk %#x, which has %d page references", mr, pgs, chunkStart, refs))
+		}
+		f.refs[chunkStart] = refs + pgs
+	}
+}
+
+// DecRefOn decrements the reference count on all offsets in mr.
+//
+// Preconditions: mr.Length() != 0. mr.Start and mr.End must be page-aligned.
+func (f *HostFileMapper) DecRefOn(mr memmap.MappableRange) {
+	f.refsMu.Lock()
+	defer f.refsMu.Unlock()
+	for chunkStart := mr.Start &^ chunkMask; chunkStart < mr.End; chunkStart += chunkSize {
+		refs := f.refs[chunkStart]
+		pgs := pagesInChunk(mr, chunkStart)
+		switch {
+		case refs > pgs:
+			f.refs[chunkStart] = refs - pgs
+		case refs == pgs:
+			f.mapsMu.Lock()
+			delete(f.refs, chunkStart)
+			if m, ok := f.mappings[chunkStart]; ok {
+				f.unmapAndRemoveLocked(chunkStart, m)
+			}
+			f.mapsMu.Unlock()
+		case refs < pgs:
+			panic(fmt.Sprintf("HostFileMapper.DecRefOn(%v): removing %d page references from chunk %#x, which has %d page references", mr, pgs, chunkStart, refs))
+		}
+	}
+}
+
+// MapInternal returns a mapping of offsets in fr from fd. The returned
+// safemem.BlockSeq is valid as long as at least one reference is held on all
+// offsets in fr or until the next call to UnmapAll.
+//
+// Preconditions: The caller must hold a reference on all offsets in fr.
+func (f *HostFileMapper) MapInternal(fr platform.FileRange, fd int, write bool) (safemem.BlockSeq, error) {
+	chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift)
+	f.mapsMu.Lock()
+	defer f.mapsMu.Unlock()
+	if chunks == 1 {
+		// Avoid an unnecessary slice allocation.
+		var seq safemem.BlockSeq
+		err := f.forEachMappingBlockLocked(fr, fd, write, func(b safemem.Block) {
+			seq = safemem.BlockSeqOf(b)
+		})
+		return seq, err
+	}
+	blocks := make([]safemem.Block, 0, chunks)
+	err := f.forEachMappingBlockLocked(fr, fd, write, func(b safemem.Block) {
+		blocks = append(blocks, b)
+	})
+	return safemem.BlockSeqFromSlice(blocks), err
+}
+
+// Preconditions: f.mapsMu must be locked.
+func (f *HostFileMapper) forEachMappingBlockLocked(fr platform.FileRange, fd int, write bool, fn func(safemem.Block)) error {
+	prot := syscall.PROT_READ
+	if write {
+		prot |= syscall.PROT_WRITE
+	}
+	for chunkStart := fr.Start &^ chunkMask; chunkStart < fr.End; chunkStart += chunkSize {
+		m, ok := f.mappings[chunkStart]
+		if !ok {
+			addr, _, errno := syscall.Syscall6(
+				syscall.SYS_MMAP,
+				0,
+				chunkSize,
+				uintptr(prot),
+				syscall.MAP_SHARED,
+				uintptr(fd),
+				uintptr(chunkStart))
+			if errno != 0 {
+				return errno
+			}
+			m = mapping{addr, write}
+			f.mappings[chunkStart] = m
+		} else if write && !m.writable {
+			addr, _, errno := syscall.Syscall6(
+				syscall.SYS_MMAP,
+				m.addr,
+				chunkSize,
+				uintptr(prot),
+				syscall.MAP_SHARED|syscall.MAP_FIXED,
+				uintptr(fd),
+				uintptr(chunkStart))
+			if errno != 0 {
+				return errno
+			}
+			m = mapping{addr, write}
+			f.mappings[chunkStart] = m
+		}
+		var startOff uint64
+		if chunkStart < fr.Start {
+			startOff = fr.Start - chunkStart
+		}
+		endOff := uint64(chunkSize)
+		if chunkStart+chunkSize > fr.End {
+			endOff = fr.End - chunkStart
+		}
+		fn(f.unsafeBlockFromChunkMapping(m.addr).TakeFirst64(endOff).DropFirst64(startOff))
+	}
+	return nil
+}
+
+// UnmapAll unmaps all cached mappings. Callers are responsible for
+// synchronization with mappings returned by previous calls to MapInternal.
+func (f *HostFileMapper) UnmapAll() {
+	f.mapsMu.Lock()
+	defer f.mapsMu.Unlock()
+	for chunkStart, m := range f.mappings {
+		f.unmapAndRemoveLocked(chunkStart, m)
+	}
+}
+
+// Preconditions: f.mapsMu must be locked. f.mappings[chunkStart] == m.
+func (f *HostFileMapper) unmapAndRemoveLocked(chunkStart uint64, m mapping) {
+	if _, _, errno := syscall.Syscall(syscall.SYS_MUNMAP, m.addr, chunkSize, 0); errno != 0 {
+		// This leaks address space and is unexpected, but is otherwise
+		// harmless, so complain but don't panic.
+		log.Warningf("HostFileMapper: failed to unmap mapping %#x for chunk %#x: %v", m.addr, chunkStart, errno)
+	}
+	delete(f.mappings, chunkStart)
+}
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper_state.go b/pkg/sentry/fs/fsutil/host_file_mapper_state.go
new file mode 100644
index 000000000..576d2a3df
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/host_file_mapper_state.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+// afterLoad is invoked by stateify.
+func (f *HostFileMapper) afterLoad() {
+	f.mappings = make(map[uint64]mapping)
+}
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go
new file mode 100644
index 000000000..7167be263
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go
@@ -0,0 +1,27 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+	"unsafe"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+)
+
+func (*HostFileMapper) unsafeBlockFromChunkMapping(addr uintptr) safemem.Block {
+	// We don't control the host file's length, so touching its mappings may
+	// raise SIGBUS. Thus accesses to it must use safecopy.
+	return safemem.BlockFromUnsafePointer((unsafe.Pointer)(addr), chunkSize)
+}
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
new file mode 100644
index 000000000..ad0518b8f
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -0,0 +1,197 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+	"math"
+	"sync"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// HostMappable implements memmap.Mappable and platform.File over a
+// CachedFileObject.
+//
+// Lock order (compare the lock order model in mm/mm.go):
+//   truncateMu ("fs locks")
+//     mu ("memmap.Mappable locks not taken by Translate")
+//       ("platform.File locks")
+//   	     backingFile ("CachedFileObject locks")
+//
+// +stateify savable
+type HostMappable struct {
+	hostFileMapper *HostFileMapper
+
+	backingFile CachedFileObject
+
+	mu sync.Mutex `state:"nosave"`
+
+	// mappings tracks mappings of the cached file object into
+	// memmap.MappingSpaces so it can invalidated upon save. Protected by mu.
+	mappings memmap.MappingSet
+
+	// truncateMu protects writes and truncations. See Truncate() for details.
+	truncateMu sync.RWMutex `state:"nosave"`
+}
+
+// NewHostMappable creates a new mappable that maps directly to host FD.
+func NewHostMappable(backingFile CachedFileObject) *HostMappable {
+	return &HostMappable{
+		hostFileMapper: NewHostFileMapper(),
+		backingFile:    backingFile,
+	}
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (h *HostMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+	// Hot path. Avoid defers.
+	h.mu.Lock()
+	mapped := h.mappings.AddMapping(ms, ar, offset, writable)
+	for _, r := range mapped {
+		h.hostFileMapper.IncRefOn(r)
+	}
+	h.mu.Unlock()
+	return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (h *HostMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+	// Hot path. Avoid defers.
+	h.mu.Lock()
+	unmapped := h.mappings.RemoveMapping(ms, ar, offset, writable)
+	for _, r := range unmapped {
+		h.hostFileMapper.DecRefOn(r)
+	}
+	h.mu.Unlock()
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (h *HostMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+	return h.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (h *HostMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+	return []memmap.Translation{
+		{
+			Source: optional,
+			File:   h,
+			Offset: optional.Start,
+			Perms:  usermem.AnyAccess,
+		},
+	}, nil
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (h *HostMappable) InvalidateUnsavable(ctx context.Context) error {
+	h.mu.Lock()
+	h.mappings.InvalidateAll(memmap.InvalidateOpts{})
+	h.mu.Unlock()
+	return nil
+}
+
+// MapInternal implements platform.File.MapInternal.
+func (h *HostMappable) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+	return h.hostFileMapper.MapInternal(fr, h.backingFile.FD(), at.Write)
+}
+
+// FD implements platform.File.FD.
+func (h *HostMappable) FD() int {
+	return h.backingFile.FD()
+}
+
+// IncRef implements platform.File.IncRef.
+func (h *HostMappable) IncRef(fr platform.FileRange) {
+	mr := memmap.MappableRange{Start: fr.Start, End: fr.End}
+	h.hostFileMapper.IncRefOn(mr)
+}
+
+// DecRef implements platform.File.DecRef.
+func (h *HostMappable) DecRef(fr platform.FileRange) {
+	mr := memmap.MappableRange{Start: fr.Start, End: fr.End}
+	h.hostFileMapper.DecRefOn(mr)
+}
+
+// Truncate truncates the file, invalidating any mapping that may have been
+// removed after the size change.
+//
+// Truncation and writes are synchronized to prevent races where writes make the
+// file grow between truncation and invalidation below:
+//   T1: Calls SetMaskedAttributes and stalls
+//   T2: Appends to file causing it to grow
+//   T2: Writes to mapped pages and COW happens
+//   T1: Continues and wronly invalidates the page mapped in step above.
+func (h *HostMappable) Truncate(ctx context.Context, newSize int64) error {
+	h.truncateMu.Lock()
+	defer h.truncateMu.Unlock()
+
+	mask := fs.AttrMask{Size: true}
+	attr := fs.UnstableAttr{Size: newSize}
+	if err := h.backingFile.SetMaskedAttributes(ctx, mask, attr); err != nil {
+		return err
+	}
+
+	// Invalidate COW mappings that may exist beyond the new size in case the file
+	// is being shrunk. Other mappings don't need to be invalidated because
+	// translate will just return identical mappings after invalidation anyway,
+	// and SIGBUS will be raised and handled when the mappings are touched.
+	//
+	// Compare Linux's mm/truncate.c:truncate_setsize() =>
+	// truncate_pagecache() =>
+	// mm/memory.c:unmap_mapping_range(evencows=1).
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	mr := memmap.MappableRange{
+		Start: fs.OffsetPageEnd(newSize),
+		End:   fs.OffsetPageEnd(math.MaxInt64),
+	}
+	h.mappings.Invalidate(mr, memmap.InvalidateOpts{InvalidatePrivate: true})
+
+	return nil
+}
+
+// Allocate reserves space in the backing file.
+func (h *HostMappable) Allocate(ctx context.Context, offset int64, length int64) error {
+	h.truncateMu.RLock()
+	err := h.backingFile.Allocate(ctx, offset, length)
+	h.truncateMu.RUnlock()
+	return err
+}
+
+// Write writes to the file backing this mappable.
+func (h *HostMappable) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+	h.truncateMu.RLock()
+	n, err := src.CopyInTo(ctx, &writer{ctx: ctx, hostMappable: h, off: offset})
+	h.truncateMu.RUnlock()
+	return n, err
+}
+
+type writer struct {
+	ctx          context.Context
+	hostMappable *HostMappable
+	off          int64
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+func (w *writer) WriteFromBlocks(src safemem.BlockSeq) (uint64, error) {
+	n, err := w.hostMappable.backingFile.WriteFromBlocksAt(w.ctx, src, uint64(w.off))
+	w.off += int64(n)
+	return n, err
+}
diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go
new file mode 100644
index 000000000..925887335
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/inode.go
@@ -0,0 +1,503 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+	"sync"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+	ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
+	"gvisor.googlesource.com/gvisor/pkg/syserror"
+	"gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+// SimpleFileInode is a simple implementation of InodeOperations.
+//
+// +stateify savable
+type SimpleFileInode struct {
+	InodeGenericChecker       `state:"nosave"`
+	InodeNoExtendedAttributes `state:"nosave"`
+	InodeNoopRelease          `state:"nosave"`
+	InodeNoopWriteOut         `state:"nosave"`
+	InodeNotAllocatable       `state:"nosave"`
+	InodeNotDirectory         `state:"nosave"`
+	InodeNotMappable          `state:"nosave"`
+	InodeNotOpenable          `state:"nosave"`
+	InodeNotSocket            `state:"nosave"`
+	InodeNotSymlink           `state:"nosave"`
+	InodeNotTruncatable       `state:"nosave"`
+	InodeNotVirtual           `state:"nosave"`
+
+	InodeSimpleAttributes
+}
+
+// NewSimpleFileInode returns a new SimpleFileInode.
+func NewSimpleFileInode(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions, typ uint64) *SimpleFileInode {
+	return &SimpleFileInode{
+		InodeSimpleAttributes: NewInodeSimpleAttributes(ctx, owner, perms, typ),
+	}
+}
+
+// NoReadWriteFileInode is an implementation of InodeOperations that supports
+// opening files that are not readable or writeable.
+//
+// +stateify savable
+type NoReadWriteFileInode struct {
+	InodeGenericChecker       `state:"nosave"`
+	InodeNoExtendedAttributes `state:"nosave"`
+	InodeNoopRelease          `state:"nosave"`
+	InodeNoopWriteOut         `state:"nosave"`
+	InodeNotAllocatable       `state:"nosave"`
+	InodeNotDirectory         `state:"nosave"`
+	InodeNotMappable          `state:"nosave"`
+	InodeNotSocket            `state:"nosave"`
+	InodeNotSymlink           `state:"nosave"`
+	InodeNotTruncatable       `state:"nosave"`
+	InodeNotVirtual           `state:"nosave"`
+
+	InodeSimpleAttributes
+}
+
+// NewNoReadWriteFileInode returns a new NoReadWriteFileInode.
+func NewNoReadWriteFileInode(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions, typ uint64) *NoReadWriteFileInode {
+	return &NoReadWriteFileInode{
+		InodeSimpleAttributes: NewInodeSimpleAttributes(ctx, owner, perms, typ),
+	}
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (*NoReadWriteFileInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+	return fs.NewFile(ctx, dirent, flags, &NoReadWriteFile{}), nil
+}
+
+// InodeSimpleAttributes implements methods for updating in-memory unstable
+// attributes.
+//
+// +stateify savable
+type InodeSimpleAttributes struct {
+	// fsType is the immutable filesystem type that will be returned by
+	// StatFS.
+	fsType uint64
+
+	// mu protects unstable.
+	mu       sync.RWMutex `state:"nosave"`
+	unstable fs.UnstableAttr
+}
+
+// NewInodeSimpleAttributes returns a new InodeSimpleAttributes with the given
+// owner and permissions, and all timestamps set to the current time.
+func NewInodeSimpleAttributes(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions, typ uint64) InodeSimpleAttributes {
+	return NewInodeSimpleAttributesWithUnstable(fs.WithCurrentTime(ctx, fs.UnstableAttr{
+		Owner: owner,
+		Perms: perms,
+	}), typ)
+}
+
+// NewInodeSimpleAttributesWithUnstable returns a new InodeSimpleAttributes
+// with the given unstable attributes.
+func NewInodeSimpleAttributesWithUnstable(uattr fs.UnstableAttr, typ uint64) InodeSimpleAttributes {
+	return InodeSimpleAttributes{
+		fsType:   typ,
+		unstable: uattr,
+	}
+}
+
+// UnstableAttr implements fs.InodeOperations.UnstableAttr.
+func (i *InodeSimpleAttributes) UnstableAttr(ctx context.Context, _ *fs.Inode) (fs.UnstableAttr, error) {
+	i.mu.RLock()
+	u := i.unstable
+	i.mu.RUnlock()
+	return u, nil
+}
+
+// SetPermissions implements fs.InodeOperations.SetPermissions.
+func (i *InodeSimpleAttributes) SetPermissions(ctx context.Context, _ *fs.Inode, p fs.FilePermissions) bool {
+	i.mu.Lock()
+	i.unstable.SetPermissions(ctx, p)
+	i.mu.Unlock()
+	return true
+}
+
+// SetOwner implements fs.InodeOperations.SetOwner.
+func (i *InodeSimpleAttributes) SetOwner(ctx context.Context, _ *fs.Inode, owner fs.FileOwner) error {
+	i.mu.Lock()
+	i.unstable.SetOwner(ctx, owner)
+	i.mu.Unlock()
+	return nil
+}
+
+// SetTimestamps implements fs.InodeOperations.SetTimestamps.
+func (i *InodeSimpleAttributes) SetTimestamps(ctx context.Context, _ *fs.Inode, ts fs.TimeSpec) error {
+	i.mu.Lock()
+	i.unstable.SetTimestamps(ctx, ts)
+	i.mu.Unlock()
+	return nil
+}
+
+// AddLink implements fs.InodeOperations.AddLink.
+func (i *InodeSimpleAttributes) AddLink() {
+	i.mu.Lock()
+	i.unstable.Links++
+	i.mu.Unlock()
+}
+
+// DropLink implements fs.InodeOperations.DropLink.
+func (i *InodeSimpleAttributes) DropLink() {
+	i.mu.Lock()
+	i.unstable.Links--
+	i.mu.Unlock()
+}
+
+// StatFS implements fs.InodeOperations.StatFS.
+func (i *InodeSimpleAttributes) StatFS(context.Context) (fs.Info, error) {
+	if i.fsType == 0 {
+		return fs.Info{}, syserror.ENOSYS
+	}
+	return fs.Info{Type: i.fsType}, nil
+}
+
+// NotifyAccess updates the access time.
+func (i *InodeSimpleAttributes) NotifyAccess(ctx context.Context) {
+	i.mu.Lock()
+	i.unstable.AccessTime = ktime.NowFromContext(ctx)
+	i.mu.Unlock()
+}
+
+// NotifyModification updates the modification time.
+func (i *InodeSimpleAttributes) NotifyModification(ctx context.Context) {
+	i.mu.Lock()
+	i.unstable.ModificationTime = ktime.NowFromContext(ctx)
+	i.mu.Unlock()
+}
+
+// NotifyStatusChange updates the status change time.
+func (i *InodeSimpleAttributes) NotifyStatusChange(ctx context.Context) {
+	i.mu.Lock()
+	i.unstable.StatusChangeTime = ktime.NowFromContext(ctx)
+	i.mu.Unlock()
+}
+
+// NotifyModificationAndStatusChange updates the modification and status change
+// times.
+func (i *InodeSimpleAttributes) NotifyModificationAndStatusChange(ctx context.Context) {
+	i.mu.Lock()
+	now := ktime.NowFromContext(ctx)
+	i.unstable.ModificationTime = now
+	i.unstable.StatusChangeTime = now
+	i.mu.Unlock()
+}
+
+// InodeSimpleExtendedAttributes implements
+// fs.InodeOperations.{Get,Set,List}xattr.
+//
+// +stateify savable
+type InodeSimpleExtendedAttributes struct {
+	// mu protects xattrs.
+	mu     sync.RWMutex `state:"nosave"`
+	xattrs map[string]string
+}
+
+// Getxattr implements fs.InodeOperations.Getxattr.
+func (i *InodeSimpleExtendedAttributes) Getxattr(_ *fs.Inode, name string) (string, error) {
+	i.mu.RLock()
+	value, ok := i.xattrs[name]
+	i.mu.RUnlock()
+	if !ok {
+		return "", syserror.ENOATTR
+	}
+	return value, nil
+}
+
+// Setxattr implements fs.InodeOperations.Setxattr.
+func (i *InodeSimpleExtendedAttributes) Setxattr(_ *fs.Inode, name, value string) error {
+	i.mu.Lock()
+	if i.xattrs == nil {
+		i.xattrs = make(map[string]string)
+	}
+	i.xattrs[name] = value
+	i.mu.Unlock()
+	return nil
+}
+
+// Listxattr implements fs.InodeOperations.Listxattr.
+func (i *InodeSimpleExtendedAttributes) Listxattr(_ *fs.Inode) (map[string]struct{}, error) {
+	i.mu.RLock()
+	names := make(map[string]struct{}, len(i.xattrs))
+	for name := range i.xattrs {
+		names[name] = struct{}{}
+	}
+	i.mu.RUnlock()
+	return names, nil
+}
+
+// staticFile is a file with static contents. It is returned by
+// InodeStaticFileGetter.GetFile.
+//
+// +stateify savable
+type staticFile struct {
+	FileGenericSeek          `state:"nosave"`
+	FileNoIoctl              `state:"nosave"`
+	FileNoMMap               `state:"nosave"`
+	FileNoSplice             `state:"nosave"`
+	FileNoopFsync            `state:"nosave"`
+	FileNoopFlush            `state:"nosave"`
+	FileNoopRelease          `state:"nosave"`
+	FileNoopWrite            `state:"nosave"`
+	FileNotDirReaddir        `state:"nosave"`
+	FileUseInodeUnstableAttr `state:"nosave"`
+	waiter.AlwaysReady       `state:"nosave"`
+
+	FileStaticContentReader
+}
+
+// InodeNoStatFS implement StatFS by retuning ENOSYS.
+type InodeNoStatFS struct{}
+
+// StatFS implements fs.InodeOperations.StatFS.
+func (InodeNoStatFS) StatFS(context.Context) (fs.Info, error) {
+	return fs.Info{}, syserror.ENOSYS
+}
+
+// InodeStaticFileGetter implements GetFile for a file with static contents.
+//
+// +stateify savable
+type InodeStaticFileGetter struct {
+	Contents []byte
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (i *InodeStaticFileGetter) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+	return fs.NewFile(ctx, dirent, flags, &staticFile{
+		FileStaticContentReader: NewFileStaticContentReader(i.Contents),
+	}), nil
+}
+
+// InodeNotMappable returns a nil memmap.Mappable.
+type InodeNotMappable struct{}
+
+// Mappable implements fs.InodeOperations.Mappable.
+func (InodeNotMappable) Mappable(*fs.Inode) memmap.Mappable {
+	return nil
+}
+
+// InodeNoopWriteOut is a no-op implementation of fs.InodeOperations.WriteOut.
+type InodeNoopWriteOut struct{}
+
+// WriteOut is a no-op.
+func (InodeNoopWriteOut) WriteOut(context.Context, *fs.Inode) error {
+	return nil
+}
+
+// InodeNotDirectory can be used by Inodes that are not directories.
+type InodeNotDirectory struct{}
+
+// Lookup implements fs.InodeOperations.Lookup.
+func (InodeNotDirectory) Lookup(context.Context, *fs.Inode, string) (*fs.Dirent, error) {
+	return nil, syserror.ENOTDIR
+}
+
+// Create implements fs.InodeOperations.Create.
+func (InodeNotDirectory) Create(context.Context, *fs.Inode, string, fs.FileFlags, fs.FilePermissions) (*fs.File, error) {
+	return nil, syserror.ENOTDIR
+}
+
+// CreateLink implements fs.InodeOperations.CreateLink.
+func (InodeNotDirectory) CreateLink(context.Context, *fs.Inode, string, string) error {
+	return syserror.ENOTDIR
+}
+
+// CreateHardLink implements fs.InodeOperations.CreateHardLink.
+func (InodeNotDirectory) CreateHardLink(context.Context, *fs.Inode, *fs.Inode, string) error {
+	return syserror.ENOTDIR
+}
+
+// CreateDirectory implements fs.InodeOperations.CreateDirectory.
+func (InodeNotDirectory) CreateDirectory(context.Context, *fs.Inode, string, fs.FilePermissions) error {
+	return syserror.ENOTDIR
+}
+
+// Bind implements fs.InodeOperations.Bind.
+func (InodeNotDirectory) Bind(context.Context, *fs.Inode, string, transport.BoundEndpoint, fs.FilePermissions) (*fs.Dirent, error) {
+	return nil, syserror.ENOTDIR
+}
+
+// CreateFifo implements fs.InodeOperations.CreateFifo.
+func (InodeNotDirectory) CreateFifo(context.Context, *fs.Inode, string, fs.FilePermissions) error {
+	return syserror.ENOTDIR
+}
+
+// Remove implements fs.InodeOperations.Remove.
+func (InodeNotDirectory) Remove(context.Context, *fs.Inode, string) error {
+	return syserror.ENOTDIR
+}
+
+// RemoveDirectory implements fs.InodeOperations.RemoveDirectory.
+func (InodeNotDirectory) RemoveDirectory(context.Context, *fs.Inode, string) error {
+	return syserror.ENOTDIR
+}
+
+// Rename implements fs.FileOperations.Rename.
+func (InodeNotDirectory) Rename(context.Context, *fs.Inode, *fs.Inode, string, *fs.Inode, string, bool) error {
+	return syserror.EINVAL
+}
+
+// InodeNotSocket can be used by Inodes that are not sockets.
+type InodeNotSocket struct{}
+
+// BoundEndpoint implements fs.InodeOperations.BoundEndpoint.
+func (InodeNotSocket) BoundEndpoint(*fs.Inode, string) transport.BoundEndpoint {
+	return nil
+}
+
+// InodeNotTruncatable can be used by Inodes that cannot be truncated.
+type InodeNotTruncatable struct{}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (InodeNotTruncatable) Truncate(context.Context, *fs.Inode, int64) error {
+	return syserror.EINVAL
+}
+
+// InodeIsDirTruncate implements fs.InodeOperations.Truncate for directories.
+type InodeIsDirTruncate struct{}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (InodeIsDirTruncate) Truncate(context.Context, *fs.Inode, int64) error {
+	return syserror.EISDIR
+}
+
+// InodeNoopTruncate implements fs.InodeOperations.Truncate as a noop.
+type InodeNoopTruncate struct{}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (InodeNoopTruncate) Truncate(context.Context, *fs.Inode, int64) error {
+	return nil
+}
+
+// InodeNotRenameable can be used by Inodes that cannot be truncated.
+type InodeNotRenameable struct{}
+
+// Rename implements fs.InodeOperations.Rename.
+func (InodeNotRenameable) Rename(context.Context, *fs.Inode, *fs.Inode, string, *fs.Inode, string, bool) error {
+	return syserror.EINVAL
+}
+
+// InodeNotOpenable can be used by Inodes that cannot be opened.
+type InodeNotOpenable struct{}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (InodeNotOpenable) GetFile(context.Context, *fs.Dirent, fs.FileFlags) (*fs.File, error) {
+	return nil, syserror.EIO
+}
+
+// InodeNotVirtual can be used by Inodes that are not virtual.
+type InodeNotVirtual struct{}
+
+// IsVirtual implements fs.InodeOperations.IsVirtual.
+func (InodeNotVirtual) IsVirtual() bool {
+	return false
+}
+
+// InodeVirtual can be used by Inodes that are virtual.
+type InodeVirtual struct{}
+
+// IsVirtual implements fs.InodeOperations.IsVirtual.
+func (InodeVirtual) IsVirtual() bool {
+	return true
+}
+
+// InodeNotSymlink can be used by Inodes that are not symlinks.
+type InodeNotSymlink struct{}
+
+// Readlink implements fs.InodeOperations.Readlink.
+func (InodeNotSymlink) Readlink(context.Context, *fs.Inode) (string, error) {
+	return "", syserror.ENOLINK
+}
+
+// Getlink implements fs.InodeOperations.Getlink.
+func (InodeNotSymlink) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) {
+	return nil, syserror.ENOLINK
+}
+
+// InodeNoExtendedAttributes can be used by Inodes that do not support
+// extended attributes.
+type InodeNoExtendedAttributes struct{}
+
+// Getxattr implements fs.InodeOperations.Getxattr.
+func (InodeNoExtendedAttributes) Getxattr(*fs.Inode, string) (string, error) {
+	return "", syserror.EOPNOTSUPP
+}
+
+// Setxattr implements fs.InodeOperations.Setxattr.
+func (InodeNoExtendedAttributes) Setxattr(*fs.Inode, string, string) error {
+	return syserror.EOPNOTSUPP
+}
+
+// Listxattr implements fs.InodeOperations.Listxattr.
+func (InodeNoExtendedAttributes) Listxattr(*fs.Inode) (map[string]struct{}, error) {
+	return nil, syserror.EOPNOTSUPP
+}
+
+// InodeNoopRelease implements fs.InodeOperations.Release as a noop.
+type InodeNoopRelease struct{}
+
+// Release implements fs.InodeOperations.Release.
+func (InodeNoopRelease) Release(context.Context) {}
+
+// InodeGenericChecker implements fs.InodeOperations.Check with a generic
+// implementation.
+type InodeGenericChecker struct{}
+
+// Check implements fs.InodeOperations.Check.
+func (InodeGenericChecker) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
+	return fs.ContextCanAccessFile(ctx, inode, p)
+}
+
+// InodeDenyWriteChecker implements fs.InodeOperations.Check which denies all
+// write operations.
+type InodeDenyWriteChecker struct{}
+
+// Check implements fs.InodeOperations.Check.
+func (InodeDenyWriteChecker) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
+	if p.Write {
+		return false
+	}
+	return fs.ContextCanAccessFile(ctx, inode, p)
+}
+
+//InodeNotAllocatable can be used by Inodes that do not support Allocate().
+type InodeNotAllocatable struct{}
+
+func (InodeNotAllocatable) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+	return syserror.EOPNOTSUPP
+}
+
+// InodeNoopAllocate implements fs.InodeOperations.Allocate as a noop.
+type InodeNoopAllocate struct{}
+
+// Allocate implements fs.InodeOperations.Allocate.
+func (InodeNoopAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+	return nil
+}
+
+// InodeIsDirAllocate implements fs.InodeOperations.Allocate for directories.
+type InodeIsDirAllocate struct{}
+
+// Allocate implements fs.InodeOperations.Allocate.
+func (InodeIsDirAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+	return syserror.EISDIR
+}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
new file mode 100644
index 000000000..7bee2eb5f
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -0,0 +1,1004 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+	"fmt"
+	"io"
+	"sync"
+
+	"gvisor.googlesource.com/gvisor/pkg/log"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/context"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+	ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// Lock order (compare the lock order model in mm/mm.go):
+//
+// CachingInodeOperations.attrMu ("fs locks")
+//   CachingInodeOperations.mapsMu ("memmap.Mappable locks not taken by Translate")
+//     CachingInodeOperations.dataMu ("memmap.Mappable locks taken by Translate")
+//       CachedFileObject locks
+
+// CachingInodeOperations caches the metadata and content of a CachedFileObject.
+// It implements a subset of InodeOperations. As a utility it can be used to
+// implement the full set of InodeOperations. Generally it should not be
+// embedded to avoid unexpected inherited behavior.
+//
+// CachingInodeOperations implements Mappable for the CachedFileObject:
+//
+// - If CachedFileObject.FD returns a value >= 0 then the file descriptor
+//   will be memory mapped on the host.
+//
+// - Otherwise, the contents of CachedFileObject are buffered into memory
+//   managed by the CachingInodeOperations.
+//
+// Implementations of FileOperations for a CachedFileObject must read and
+// write through CachingInodeOperations using Read and Write respectively.
+//
+// Implementations of InodeOperations.WriteOut must call Sync to write out
+// in-memory modifications of data and metadata to the CachedFileObject.
+//
+// +stateify savable
+type CachingInodeOperations struct {
+	// backingFile is a handle to a cached file object.
+	backingFile CachedFileObject
+
+	// mfp is used to allocate memory that caches backingFile's contents.
+	mfp pgalloc.MemoryFileProvider
+
+	// forcePageCache indicates the sentry page cache should be used regardless
+	// of whether the platform supports host mapped I/O or not. This must not be
+	// modified after inode creation.
+	forcePageCache bool
+
+	attrMu sync.Mutex `state:"nosave"`
+
+	// attr is unstable cached metadata.
+	//
+	// attr is protected by attrMu. attr.Size is protected by both attrMu and
+	// dataMu; reading it requires locking either mutex, while mutating it
+	// requires locking both.
+	attr fs.UnstableAttr
+
+	// dirtyAttr is metadata that was updated in-place but hasn't yet
+	// been successfully written out.
+	//
+	// dirtyAttr is protected by attrMu.
+	dirtyAttr fs.AttrMask
+
+	mapsMu sync.Mutex `state:"nosave"`
+
+	// mappings tracks mappings of the cached file object into
+	// memmap.MappingSpaces.
+	//
+	// mappings is protected by mapsMu.
+	mappings memmap.MappingSet
+
+	dataMu sync.RWMutex `state:"nosave"`
+
+	// cache maps offsets into the cached file to offsets into
+	// mfp.MemoryFile() that store the file's data.
+	//
+	// cache is protected by dataMu.
+	cache FileRangeSet
+
+	// dirty tracks dirty segments in cache.
+	//
+	// dirty is protected by dataMu.
+	dirty DirtySet
+
+	// hostFileMapper caches internal mappings of backingFile.FD().
+	hostFileMapper *HostFileMapper
+
+	// refs tracks active references to data in the cache.
+	//
+	// refs is protected by dataMu.
+	refs frameRefSet
+}
+
+// CachedFileObject is a file that may require caching.
+type CachedFileObject interface {
+	// ReadToBlocksAt reads up to dsts.NumBytes() bytes from the file to dsts,
+	// starting at offset, and returns the number of bytes read. ReadToBlocksAt
+	// may return a partial read without an error.
+	ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error)
+
+	// WriteFromBlocksAt writes up to srcs.NumBytes() bytes from srcs to the
+	// file, starting at offset, and returns the number of bytes written.
+	// WriteFromBlocksAt may return a partial write without an error.
+	WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)
+
+	// SetMaskedAttributes sets the attributes in attr that are true in mask
+	// on the backing file.
+	//
+	// SetMaskedAttributes may be called at any point, regardless of whether
+	// the file was opened.
+	SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error
+
+	// Allocate allows the caller to reserve disk space for the inode.
+	// It's equivalent to fallocate(2) with 'mode=0'.
+	Allocate(ctx context.Context, offset int64, length int64) error
+
+	// Sync instructs the remote filesystem to sync the file to stable storage.
+	Sync(ctx context.Context) error
+
+	// FD returns a host file descriptor. If it is possible for
+	// CachingInodeOperations.AddMapping to have ever been called with writable
+	// = true, the FD must have been opened O_RDWR; otherwise, it may have been
+	// opened O_RDONLY or O_RDWR. (mmap unconditionally requires that mapped
+	// files are readable.) If no host file descriptor is available, FD returns
+	// a negative number.
+	//
+	// For any given CachedFileObject, if FD() ever succeeds (returns a
+	// non-negative number), it must always succeed.
+	//
+	// FD is called iff the file has been memory mapped. This implies that
+	// the file was opened (see fs.InodeOperations.GetFile).
+	FD() int
+}
+
+// NewCachingInodeOperations returns a new CachingInodeOperations backed by
+// a CachedFileObject and its initial unstable attributes.
+func NewCachingInodeOperations(ctx context.Context, backingFile CachedFileObject, uattr fs.UnstableAttr, forcePageCache bool) *CachingInodeOperations {
+	mfp := pgalloc.MemoryFileProviderFromContext(ctx)
+	if mfp == nil {
+		panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider))
+	}
+	return &CachingInodeOperations{
+		backingFile:    backingFile,
+		mfp:            mfp,
+		forcePageCache: forcePageCache,
+		attr:           uattr,
+		hostFileMapper: NewHostFileMapper(),
+	}
+}
+
+// Release implements fs.InodeOperations.Release.
+func (c *CachingInodeOperations) Release() {
+	c.mapsMu.Lock()
+	defer c.mapsMu.Unlock()
+	c.dataMu.Lock()
+	defer c.dataMu.Unlock()
+
+	// Something has gone terribly wrong if we're releasing an inode that is
+	// still memory-mapped.
+	if !c.mappings.IsEmpty() {
+		panic(fmt.Sprintf("Releasing CachingInodeOperations with mappings:\n%s", &c.mappings))
+	}
+
+	// Drop any cached pages that are still awaiting MemoryFile eviction. (This
+	// means that MemoryFile no longer needs to evict them.)
+	mf := c.mfp.MemoryFile()
+	mf.MarkAllUnevictable(c)
+	if err := SyncDirtyAll(context.Background(), &c.cache, &c.dirty, uint64(c.attr.Size), mf, c.backingFile.WriteFromBlocksAt); err != nil {
+		panic(fmt.Sprintf("Failed to writeback cached data: %v", err))
+	}
+	c.cache.DropAll(mf)
+	c.dirty.RemoveAll()
+}
+
+// UnstableAttr implements fs.InodeOperations.UnstableAttr.
+func (c *CachingInodeOperations) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
+	c.attrMu.Lock()
+	attr := c.attr
+	c.attrMu.Unlock()
+	return attr, nil
+}
+
+// SetPermissions implements fs.InodeOperations.SetPermissions.
+func (c *CachingInodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, perms fs.FilePermissions) bool {
+	c.attrMu.Lock()
+	defer c.attrMu.Unlock()
+
+	now := ktime.NowFromContext(ctx)
+	masked := fs.AttrMask{Perms: true}
+	if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{Perms: perms}); err != nil {
+		return false
+	}
+	c.attr.Perms = perms
+	c.touchStatusChangeTimeLocked(now)
+	return true
+}
+
+// SetOwner implements fs.InodeOperations.SetOwner.
+func (c *CachingInodeOperations) SetOwner(ctx context.Context, inode *fs.Inode, owner fs.FileOwner) error {
+	if !owner.UID.Ok() && !owner.GID.Ok() {
+		return nil
+	}
+
+	c.attrMu.Lock()
+	defer c.attrMu.Unlock()
+
+	now := ktime.NowFromContext(ctx)
+	masked := fs.AttrMask{
+		UID: owner.UID.Ok(),
+		GID: owner.GID.Ok(),
+	}
+	if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{Owner: owner}); err != nil {
+		return err
+	}
+	if owner.UID.Ok() {
+		c.attr.Owner.UID = owner.UID
+	}
+	if owner.GID.Ok() {
+		c.attr.Owner.GID = owner.GID
+	}
+	c.touchStatusChangeTimeLocked(now)
+	return nil
+}
+
+// SetTimestamps implements fs.InodeOperations.SetTimestamps.
+func (c *CachingInodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts fs.TimeSpec) error {
+	if ts.ATimeOmit && ts.MTimeOmit {
+		return nil
+	}
+
+	c.attrMu.Lock()
+	defer c.attrMu.Unlock()
+
+	// Replace requests to use the "system time" with the current time to
+	// ensure that cached timestamps remain consistent with the remote
+	// filesystem.
+	now := ktime.NowFromContext(ctx)
+	if ts.ATimeSetSystemTime {
+		ts.ATime = now
+	}
+	if ts.MTimeSetSystemTime {
+		ts.MTime = now
+	}
+	masked := fs.AttrMask{
+		AccessTime:       !ts.ATimeOmit,
+		ModificationTime: !ts.MTimeOmit,
+	}
+	if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{AccessTime: ts.ATime, ModificationTime: ts.MTime}); err != nil {
+		return err
+	}
+	if !ts.ATimeOmit {
+		c.attr.AccessTime = ts.ATime
+	}
+	if !ts.MTimeOmit {
+		c.attr.ModificationTime = ts.MTime
+	}
+	c.touchStatusChangeTimeLocked(now)
+	return nil
+}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (c *CachingInodeOperations) Truncate(ctx context.Context, inode *fs.Inode, size int64) error {
+	c.attrMu.Lock()
+	defer c.attrMu.Unlock()
+
+	// c.attr.Size is protected by both c.attrMu and c.dataMu.
+	c.dataMu.Lock()
+	now := ktime.NowFromContext(ctx)
+	masked := fs.AttrMask{Size: true}
+	attr := fs.UnstableAttr{Size: size}
+	if err := c.backingFile.SetMaskedAttributes(ctx, masked, attr); err != nil {
+		c.dataMu.Unlock()
+		return err
+	}
+	oldSize := c.attr.Size
+	c.attr.Size = size
+	c.touchModificationAndStatusChangeTimeLocked(now)
+
+	// We drop c.dataMu here so that we can lock c.mapsMu and invalidate
+	// mappings below. This allows concurrent calls to Read/Translate/etc.
+	// These functions synchronize with an in-progress Truncate by refusing to
+	// use cache contents beyond the new c.attr.Size. (We are still holding
+	// c.attrMu, so we can't race with Truncate/Write.)
+	c.dataMu.Unlock()
+
+	// Nothing left to do unless shrinking the file.
+	if size >= oldSize {
+		return nil
+	}
+
+	oldpgend := fs.OffsetPageEnd(oldSize)
+	newpgend := fs.OffsetPageEnd(size)
+
+	// Invalidate past translations of truncated pages.
+	if newpgend != oldpgend {
+		c.mapsMu.Lock()
+		c.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
+			// Compare Linux's mm/truncate.c:truncate_setsize() =>
+			// truncate_pagecache() =>
+			// mm/memory.c:unmap_mapping_range(evencows=1).
+			InvalidatePrivate: true,
+		})
+		c.mapsMu.Unlock()
+	}
+
+	// We are now guaranteed that there are no translations of truncated pages,
+	// and can remove them from the cache. Since truncated pages have been
+	// removed from the backing file, they should be dropped without being
+	// written back.
+	c.dataMu.Lock()
+	defer c.dataMu.Unlock()
+	c.cache.Truncate(uint64(size), c.mfp.MemoryFile())
+	c.dirty.KeepClean(memmap.MappableRange{uint64(size), oldpgend})
+
+	return nil
+}
+
+// Allocate implements fs.InodeOperations.Allocate.
+func (c *CachingInodeOperations) Allocate(ctx context.Context, offset, length int64) error {
+	newSize := offset + length
+
+	// c.attr.Size is protected by both c.attrMu and c.dataMu.
+	c.attrMu.Lock()
+	defer c.attrMu.Unlock()
+	c.dataMu.Lock()
+	defer c.dataMu.Unlock()
+
+	if newSize <= c.attr.Size {
+		return nil
+	}
+
+	now := ktime.NowFromContext(ctx)
+	if err := c.backingFile.Allocate(ctx, offset, length); err != nil {
+		return err
+	}
+
+	c.attr.Size = newSize
+	c.touchModificationAndStatusChangeTimeLocked(now)
+	return nil
+}
+
+// WriteOut implements fs.InodeOperations.WriteOut.
+func (c *CachingInodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
+	c.attrMu.Lock()
+
+	// Write dirty pages back.
+	c.dataMu.Lock()
+	err := SyncDirtyAll(ctx, &c.cache, &c.dirty, uint64(c.attr.Size), c.mfp.MemoryFile(), c.backingFile.WriteFromBlocksAt)
+	c.dataMu.Unlock()
+	if err != nil {
+		c.attrMu.Unlock()
+		return err
+	}
+
+	// SyncDirtyAll above would have grown if needed. On shrinks, the backing
+	// file is called directly, so size is never needs to be updated.
+	c.dirtyAttr.Size = false
+
+	// Write out cached attributes.
+	if err := c.backingFile.SetMaskedAttributes(ctx, c.dirtyAttr, c.attr); err != nil {
+		c.attrMu.Unlock()
+		return err
+	}
+	c.dirtyAttr = fs.AttrMask{}
+
+	c.attrMu.Unlock()
+
+	// Fsync the remote file.
+	return c.backingFile.Sync(ctx)
+}
+
+// IncLinks increases the link count and updates cached modification time.
+func (c *CachingInodeOperations) IncLinks(ctx context.Context) {
+	c.attrMu.Lock()
+	c.attr.Links++
+	c.touchModificationAndStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+	c.attrMu.Unlock()
+}
+
+// DecLinks decreases the link count and updates cached modification time.
+func (c *CachingInodeOperations) DecLinks(ctx context.Context) {
+	c.attrMu.Lock()
+	c.attr.Links--
+	c.touchModificationAndStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+	c.attrMu.Unlock()
+}
+
+// TouchAccessTime updates the cached access time in-place to the
+// current time. It does not update status change time in-place. See
+// mm/filemap.c:do_generic_file_read -> include/linux/h:file_accessed.
+func (c *CachingInodeOperations) TouchAccessTime(ctx context.Context, inode *fs.Inode) {
+	if inode.MountSource.Flags.NoAtime {
+		return
+	}
+
+	c.attrMu.Lock()
+	c.touchAccessTimeLocked(ktime.NowFromContext(ctx))
+	c.attrMu.Unlock()
+}
+
+// touchAccesstimeLocked updates the cached access time in-place to the current
+// time.
+//
+// Preconditions: c.attrMu is locked for writing.
+func (c *CachingInodeOperations) touchAccessTimeLocked(now time.Time) {
+	c.attr.AccessTime = now
+	c.dirtyAttr.AccessTime = true
+}
+
+// TouchModificationAndStatusChangeTime updates the cached modification and
+// status change times in-place to the current time.
+func (c *CachingInodeOperations) TouchModificationAndStatusChangeTime(ctx context.Context) {
+	c.attrMu.Lock()
+	c.touchModificationAndStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+	c.attrMu.Unlock()
+}
+
+// touchModificationAndStatusChangeTimeLocked updates the cached modification
+// and status change times in-place to the current time.
+//
+// Preconditions: c.attrMu is locked for writing.
+func (c *CachingInodeOperations) touchModificationAndStatusChangeTimeLocked(now time.Time) {
+	c.attr.ModificationTime = now
+	c.dirtyAttr.ModificationTime = true
+	c.attr.StatusChangeTime = now
+	c.dirtyAttr.StatusChangeTime = true
+}
+
+// TouchStatusChangeTime updates the cached status change time in-place to the
+// current time.
+func (c *CachingInodeOperations) TouchStatusChangeTime(ctx context.Context) {
+	c.attrMu.Lock()
+	c.touchStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+	c.attrMu.Unlock()
+}
+
+// touchStatusChangeTimeLocked updates the cached status change time
+// in-place to the current time.
+//
+// Preconditions: c.attrMu is locked for writing.
+func (c *CachingInodeOperations) touchStatusChangeTimeLocked(now time.Time) {
+	c.attr.StatusChangeTime = now
+	c.dirtyAttr.StatusChangeTime = true
+}
+
+// UpdateUnstable updates the cached unstable attributes. Only non-dirty
+// attributes are updated.
+func (c *CachingInodeOperations) UpdateUnstable(attr fs.UnstableAttr) {
+	// All attributes are protected by attrMu.
+	c.attrMu.Lock()
+
+	if !c.dirtyAttr.Usage {
+		c.attr.Usage = attr.Usage
+	}
+	if !c.dirtyAttr.Perms {
+		c.attr.Perms = attr.Perms
+	}
+	if !c.dirtyAttr.UID {
+		c.attr.Owner.UID = attr.Owner.UID
+	}
+	if !c.dirtyAttr.GID {
+		c.attr.Owner.GID = attr.Owner.GID
+	}
+	if !c.dirtyAttr.AccessTime {
+		c.attr.AccessTime = attr.AccessTime
+	}
+	if !c.dirtyAttr.ModificationTime {
+		c.attr.ModificationTime = attr.ModificationTime
+	}
+	if !c.dirtyAttr.StatusChangeTime {
+		c.attr.StatusChangeTime = attr.StatusChangeTime
+	}
+	if !c.dirtyAttr.Links {
+		c.attr.Links = attr.Links
+	}
+
+	// Size requires holding attrMu and dataMu.
+	c.dataMu.Lock()
+	if !c.dirtyAttr.Size {
+		c.attr.Size = attr.Size
+	}
+	c.dataMu.Unlock()
+
+	c.attrMu.Unlock()
+}
+
+// Read reads from frames and otherwise directly from the backing file
+// into dst starting at offset until dst is full, EOF is reached, or an
+// error is encountered.
+//
+// Read may partially fill dst and return a nil error.
+func (c *CachingInodeOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
+	if dst.NumBytes() == 0 {
+		return 0, nil
+	}
+
+	// Have we reached EOF? We check for this again in
+	// inodeReadWriter.ReadToBlocks to avoid holding c.attrMu (which would
+	// serialize reads) or c.dataMu (which would violate lock ordering), but
+	// check here first (before calling into MM) since reading at EOF is
+	// common: getting a return value of 0 from a read syscall is the only way
+	// to detect EOF.
+	//
+	// TODO(jamieliu): Separate out c.attr.Size and use atomics instead of
+	// c.dataMu.
+	c.dataMu.RLock()
+	size := c.attr.Size
+	c.dataMu.RUnlock()
+	if offset >= size {
+		return 0, io.EOF
+	}
+
+	n, err := dst.CopyOutFrom(ctx, &inodeReadWriter{ctx, c, offset})
+	// Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed().
+	c.TouchAccessTime(ctx, file.Dirent.Inode)
+	return n, err
+}
+
+// Write writes to frames and otherwise directly to the backing file
+// from src starting at offset and until src is empty or an error is
+// encountered.
+//
+// If Write partially fills src, a non-nil error is returned.
+func (c *CachingInodeOperations) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+	// Hot path. Avoid defers.
+	if src.NumBytes() == 0 {
+		return 0, nil
+	}
+
+	c.attrMu.Lock()
+	// Compare Linux's mm/filemap.c:__generic_file_write_iter() => file_update_time().
+	c.touchModificationAndStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+	n, err := src.CopyInTo(ctx, &inodeReadWriter{ctx, c, offset})
+	c.attrMu.Unlock()
+	return n, err
+}
+
+type inodeReadWriter struct {
+	ctx    context.Context
+	c      *CachingInodeOperations
+	offset int64
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks.
+func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+	// Hot path. Avoid defers.
+	rw.c.dataMu.RLock()
+
+	// Compute the range to read.
+	if rw.offset >= rw.c.attr.Size {
+		rw.c.dataMu.RUnlock()
+		return 0, io.EOF
+	}
+	end := fs.ReadEndOffset(rw.offset, int64(dsts.NumBytes()), rw.c.attr.Size)
+	if end == rw.offset { // dsts.NumBytes() == 0?
+		rw.c.dataMu.RUnlock()
+		return 0, nil
+	}
+
+	mem := rw.c.mfp.MemoryFile()
+	var done uint64
+	seg, gap := rw.c.cache.Find(uint64(rw.offset))
+	for rw.offset < end {
+		mr := memmap.MappableRange{uint64(rw.offset), uint64(end)}
+		switch {
+		case seg.Ok():
+			// Get internal mappings from the cache.
+			ims, err := mem.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
+			if err != nil {
+				rw.c.dataMu.RUnlock()
+				return done, err
+			}
+
+			// Copy from internal mappings.
+			n, err := safemem.CopySeq(dsts, ims)
+			done += n
+			rw.offset += int64(n)
+			dsts = dsts.DropFirst64(n)
+			if err != nil {
+				rw.c.dataMu.RUnlock()
+				return done, err
+			}
+
+			// Continue.
+			seg, gap = seg.NextNonEmpty()
+
+		case gap.Ok():
+			// Read directly from the backing file.
+			gapmr := gap.Range().Intersect(mr)
+			dst := dsts.TakeFirst64(gapmr.Length())
+			n, err := rw.c.backingFile.ReadToBlocksAt(rw.ctx, dst, gapmr.Start)
+			done += n
+			rw.offset += int64(n)
+			dsts = dsts.DropFirst64(n)
+			// Partial reads are fine. But we must stop reading.
+			if n != dst.NumBytes() || err != nil {
+				rw.c.dataMu.RUnlock()
+				return done, err
+			}
+
+			// Continue.
+			seg, gap = gap.NextSegment(), FileRangeGapIterator{}
+
+		default:
+			break
+		}
+	}
+	rw.c.dataMu.RUnlock()
+	return done, nil
+}
+
+// maybeGrowFile grows the file's size if data has been written past the old
+// size.
+//
+// Preconditions: rw.c.attrMu and rw.c.dataMu bust be locked.
+func (rw *inodeReadWriter) maybeGrowFile() {
+	// If the write ends beyond the file's previous size, it causes the
+	// file to grow.
+	if rw.offset > rw.c.attr.Size {
+		rw.c.attr.Size = rw.offset
+		rw.c.dirtyAttr.Size = true
+	}
+	if rw.offset > rw.c.attr.Usage {
+		// This is incorrect if CachingInodeOperations is caching a sparse
+		// file. (In Linux, keeping inode::i_blocks up to date is the
+		// filesystem's responsibility.)
+		rw.c.attr.Usage = rw.offset
+		rw.c.dirtyAttr.Usage = true
+	}
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+//
+// Preconditions: rw.c.attrMu must be locked.
+func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+	// Hot path. Avoid defers.
+	rw.c.dataMu.Lock()
+
+	// Compute the range to write.
+	end := fs.WriteEndOffset(rw.offset, int64(srcs.NumBytes()))
+	if end == rw.offset { // srcs.NumBytes() == 0?
+		rw.c.dataMu.Unlock()
+		return 0, nil
+	}
+
+	mf := rw.c.mfp.MemoryFile()
+	var done uint64
+	seg, gap := rw.c.cache.Find(uint64(rw.offset))
+	for rw.offset < end {
+		mr := memmap.MappableRange{uint64(rw.offset), uint64(end)}
+		switch {
+		case seg.Ok() && seg.Start() < mr.End:
+			// Get internal mappings from the cache.
+			segMR := seg.Range().Intersect(mr)
+			ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write)
+			if err != nil {
+				rw.maybeGrowFile()
+				rw.c.dataMu.Unlock()
+				return done, err
+			}
+
+			// Copy to internal mappings.
+			n, err := safemem.CopySeq(ims, srcs)
+			done += n
+			rw.offset += int64(n)
+			srcs = srcs.DropFirst64(n)
+			rw.c.dirty.MarkDirty(segMR)
+			if err != nil {
+				rw.maybeGrowFile()
+				rw.c.dataMu.Unlock()
+				return done, err
+			}
+
+			// Continue.
+			seg, gap = seg.NextNonEmpty()
+
+		case gap.Ok() && gap.Start() < mr.End:
+			// Write directly to the backing file.
+			gapmr := gap.Range().Intersect(mr)
+			src := srcs.TakeFirst64(gapmr.Length())
+			n, err := rw.c.backingFile.WriteFromBlocksAt(rw.ctx, src, gapmr.Start)
+			done += n
+			rw.offset += int64(n)
+			srcs = srcs.DropFirst64(n)
+			// Partial writes are fine. But we must stop writing.
+			if n != src.NumBytes() || err != nil {
+				rw.maybeGrowFile()
+				rw.c.dataMu.Unlock()
+				return done, err
+			}
+
+			// Continue.
+			seg, gap = gap.NextSegment(), FileRangeGapIterator{}
+
+		default:
+			break
+		}
+	}
+	rw.maybeGrowFile()
+	rw.c.dataMu.Unlock()
+	return done, nil
+}
+
+// useHostPageCache returns true if c uses c.backingFile.FD() for all file I/O
+// and memory mappings, and false if c.cache may contain data cached from
+// c.backingFile.
+func (c *CachingInodeOperations) useHostPageCache() bool {
+	return !c.forcePageCache && c.backingFile.FD() >= 0
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (c *CachingInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+	// Hot path. Avoid defers.
+	c.mapsMu.Lock()
+	mapped := c.mappings.AddMapping(ms, ar, offset, writable)
+	// Do this unconditionally since whether we have c.backingFile.FD() >= 0
+	// can change across save/restore.
+	for _, r := range mapped {
+		c.hostFileMapper.IncRefOn(r)
+	}
+	if !c.useHostPageCache() {
+		// c.Evict() will refuse to evict memory-mapped pages, so tell the
+		// MemoryFile to not bother trying.
+		mf := c.mfp.MemoryFile()
+		for _, r := range mapped {
+			mf.MarkUnevictable(c, pgalloc.EvictableRange{r.Start, r.End})
+		}
+	}
+	if c.useHostPageCache() && !usage.IncrementalMappedAccounting {
+		for _, r := range mapped {
+			usage.MemoryAccounting.Inc(r.Length(), usage.Mapped)
+		}
+	}
+	c.mapsMu.Unlock()
+	return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (c *CachingInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+	// Hot path. Avoid defers.
+	c.mapsMu.Lock()
+	unmapped := c.mappings.RemoveMapping(ms, ar, offset, writable)
+	for _, r := range unmapped {
+		c.hostFileMapper.DecRefOn(r)
+	}
+	if c.useHostPageCache() {
+		if !usage.IncrementalMappedAccounting {
+			for _, r := range unmapped {
+				usage.MemoryAccounting.Dec(r.Length(), usage.Mapped)
+			}
+		}
+		c.mapsMu.Unlock()
+		return
+	}
+
+	// Pages that are no longer referenced by any application memory mappings
+	// are now considered unused; allow MemoryFile to evict them when
+	// necessary.
+	mf := c.mfp.MemoryFile()
+	c.dataMu.Lock()
+	for _, r := range unmapped {
+		// Since these pages are no longer mapped, they are no longer
+		// concurrently dirtyable by a writable memory mapping.
+		c.dirty.AllowClean(r)
+		mf.MarkEvictable(c, pgalloc.EvictableRange{r.Start, r.End})
+	}
+	c.dataMu.Unlock()
+	c.mapsMu.Unlock()
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (c *CachingInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+	return c.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (c *CachingInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+	// Hot path. Avoid defer.
+	if c.useHostPageCache() {
+		return []memmap.Translation{
+			{
+				Source: optional,
+				File:   c,
+				Offset: optional.Start,
+				Perms:  usermem.AnyAccess,
+			},
+		}, nil
+	}
+
+	c.dataMu.Lock()
+
+	// Constrain translations to c.attr.Size (rounded up) to prevent
+	// translation to pages that may be concurrently truncated.
+	pgend := fs.OffsetPageEnd(c.attr.Size)
+	var beyondEOF bool
+	if required.End > pgend {
+		if required.Start >= pgend {
+			c.dataMu.Unlock()
+			return nil, &memmap.BusError{io.EOF}
+		}
+		beyondEOF = true
+		required.End = pgend
+	}
+	if optional.End > pgend {
+		optional.End = pgend
+	}
+
+	mf := c.mfp.MemoryFile()
+	cerr := c.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, c.backingFile.ReadToBlocksAt)
+
+	var ts []memmap.Translation
+	var translatedEnd uint64
+	for seg := c.cache.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() {
+		segMR := seg.Range().Intersect(optional)
+		// TODO(jamieliu): Make Translations writable even if writability is
+		// not required if already kept-dirty by another writable translation.
+		perms := usermem.AccessType{
+			Read:    true,
+			Execute: true,
+		}
+		if at.Write {
+			// From this point forward, this memory can be dirtied through the
+			// mapping at any time.
+			c.dirty.KeepDirty(segMR)
+			perms.Write = true
+		}
+		ts = append(ts, memmap.Translation{
+			Source: segMR,
+			File:   mf,
+			Offset: seg.FileRangeOf(segMR).Start,
+			Perms:  perms,
+		})
+		translatedEnd = segMR.End
+	}
+
+	c.dataMu.Unlock()
+
+	// Don't return the error returned by c.cache.Fill if it occurred outside
+	// of required.
+	if translatedEnd < required.End && cerr != nil {
+		return ts, &memmap.BusError{cerr}
+	}
+	if beyondEOF {
+		return ts, &memmap.BusError{io.EOF}
+	}
+	return ts, nil
+}
+
+func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange {
+	const maxReadahead = 64 << 10 // 64 KB, chosen arbitrarily
+	if required.Length() >= maxReadahead {
+		return required
+	}
+	if optional.Length() <= maxReadahead {
+		return optional
+	}
+	optional.Start = required.Start
+	if optional.Length() <= maxReadahead {
+		return optional
+	}
+	optional.End = optional.Start + maxReadahead
+	return optional
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (c *CachingInodeOperations) InvalidateUnsavable(ctx context.Context) error {
+	// Whether we have a host fd (and consequently what platform.File is
+	// mapped) can change across save/restore, so invalidate all translations
+	// unconditionally.
+	c.mapsMu.Lock()
+	defer c.mapsMu.Unlock()
+	c.mappings.InvalidateAll(memmap.InvalidateOpts{})
+
+	// Sync the cache's contents so that if we have a host fd after restore,
+	// the remote file's contents are coherent.
+	mf := c.mfp.MemoryFile()
+	c.dataMu.Lock()
+	defer c.dataMu.Unlock()
+	if err := SyncDirtyAll(ctx, &c.cache, &c.dirty, uint64(c.attr.Size), mf, c.backingFile.WriteFromBlocksAt); err != nil {
+		return err
+	}
+
+	// Discard the cache so that it's not stored in saved state. This is safe
+	// because per InvalidateUnsavable invariants, no new translations can have
+	// been returned after we invalidated all existing translations above.
+	c.cache.DropAll(mf)
+	c.dirty.RemoveAll()
+
+	return nil
+}
+
+// Evict implements pgalloc.EvictableMemoryUser.Evict.
+func (c *CachingInodeOperations) Evict(ctx context.Context, er pgalloc.EvictableRange) {
+	c.mapsMu.Lock()
+	defer c.mapsMu.Unlock()
+	c.dataMu.Lock()
+	defer c.dataMu.Unlock()
+
+	mr := memmap.MappableRange{er.Start, er.End}
+	mf := c.mfp.MemoryFile()
+	// Only allow pages that are no longer memory-mapped to be evicted.
+	for mgap := c.mappings.LowerBoundGap(mr.Start); mgap.Ok() && mgap.Start() < mr.End; mgap = mgap.NextGap() {
+		mgapMR := mgap.Range().Intersect(mr)
+		if mgapMR.Length() == 0 {
+			continue
+		}
+		if err := SyncDirty(ctx, mgapMR, &c.cache, &c.dirty, uint64(c.attr.Size), mf, c.backingFile.WriteFromBlocksAt); err != nil {
+			log.Warningf("Failed to writeback cached data %v: %v", mgapMR, err)
+		}
+		c.cache.Drop(mgapMR, mf)
+		c.dirty.KeepClean(mgapMR)
+	}
+}
+
+// IncRef implements platform.File.IncRef. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the platform.File
+// during translation.
+func (c *CachingInodeOperations) IncRef(fr platform.FileRange) {
+	// Hot path. Avoid defers.
+	c.dataMu.Lock()
+	seg, gap := c.refs.Find(fr.Start)
+	for {
+		switch {
+		case seg.Ok() && seg.Start() < fr.End:
+			seg = c.refs.Isolate(seg, fr)
+			seg.SetValue(seg.Value() + 1)
+			seg, gap = seg.NextNonEmpty()
+		case gap.Ok() && gap.Start() < fr.End:
+			newRange := gap.Range().Intersect(fr)
+			if usage.IncrementalMappedAccounting {
+				usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped)
+			}
+			seg, gap = c.refs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty()
+		default:
+			c.refs.MergeAdjacent(fr)
+			c.dataMu.Unlock()
+			return
+		}
+	}
+}
+
+// DecRef implements platform.File.DecRef. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the platform.File
+// during translation.
+func (c *CachingInodeOperations) DecRef(fr platform.FileRange) {
+	// Hot path. Avoid defers.
+	c.dataMu.Lock()
+	seg := c.refs.FindSegment(fr.Start)
+
+	for seg.Ok() && seg.Start() < fr.End {
+		seg = c.refs.Isolate(seg, fr)
+		if old := seg.Value(); old == 1 {
+			if usage.IncrementalMappedAccounting {
+				usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped)
+			}
+			seg = c.refs.Remove(seg).NextSegment()
+		} else {
+			seg.SetValue(old - 1)
+			seg = seg.NextSegment()
+		}
+	}
+	c.refs.MergeAdjacent(fr)
+	c.dataMu.Unlock()
+
+}
+
+// MapInternal implements platform.File.MapInternal. This is used when we
+// directly map an underlying host fd and CachingInodeOperations is used as the
+// platform.File during translation.
+func (c *CachingInodeOperations) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+	return c.hostFileMapper.MapInternal(fr, c.backingFile.FD(), at.Write)
+}
+
+// FD implements platform.File.FD. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the platform.File
+// during translation.
+func (c *CachingInodeOperations) FD() int {
+	return c.backingFile.FD()
+}
author	gVisor bot <gvisor-bot@google.com>	2019-06-02 06:44:55 +0000
committer	gVisor bot <gvisor-bot@google.com>	2019-06-02 06:44:55 +0000
commit	ceb0d792f328d1fc0692197d8856a43c3936a571 (patch)
tree	83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/sentry/fs/fsutil
parent	deb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff)
parent	216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff)