summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fs/fsutil
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
committergVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
commitceb0d792f328d1fc0692197d8856a43c3936a571 (patch)
tree83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/sentry/fs/fsutil
parentdeb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff)
parent216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff)
Merge 216da0b7 (automated)
Diffstat (limited to 'pkg/sentry/fs/fsutil')
-rw-r--r--pkg/sentry/fs/fsutil/dirty_set.go237
-rwxr-xr-xpkg/sentry/fs/fsutil/dirty_set_impl.go1274
-rw-r--r--pkg/sentry/fs/fsutil/file.go394
-rw-r--r--pkg/sentry/fs/fsutil/file_range_set.go209
-rwxr-xr-xpkg/sentry/fs/fsutil/file_range_set_impl.go1274
-rw-r--r--pkg/sentry/fs/fsutil/frame_ref_set.go50
-rwxr-xr-xpkg/sentry/fs/fsutil/frame_ref_set_impl.go1274
-rw-r--r--pkg/sentry/fs/fsutil/fsutil.go24
-rwxr-xr-xpkg/sentry/fs/fsutil/fsutil_state_autogen.go349
-rw-r--r--pkg/sentry/fs/fsutil/host_file_mapper.go211
-rw-r--r--pkg/sentry/fs/fsutil/host_file_mapper_state.go20
-rw-r--r--pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go27
-rw-r--r--pkg/sentry/fs/fsutil/host_mappable.go197
-rw-r--r--pkg/sentry/fs/fsutil/inode.go503
-rw-r--r--pkg/sentry/fs/fsutil/inode_cached.go1004
15 files changed, 7047 insertions, 0 deletions
diff --git a/pkg/sentry/fs/fsutil/dirty_set.go b/pkg/sentry/fs/fsutil/dirty_set.go
new file mode 100644
index 000000000..f1451d77a
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/dirty_set.go
@@ -0,0 +1,237 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+ "math"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// DirtySet maps offsets into a memmap.Mappable to DirtyInfo. It is used to
+// implement Mappables that cache data from another source.
+//
+// type DirtySet <generated by go_generics>
+
+// DirtyInfo is the value type of DirtySet, and represents information about a
+// Mappable offset that is dirty (the cached data for that offset is newer than
+// its source).
+//
+// +stateify savable
+type DirtyInfo struct {
+ // Keep is true if the represented offset is concurrently writable, such
+ // that writing the data for that offset back to the source does not
+ // guarantee that the offset is clean (since it may be concurrently
+ // rewritten after the writeback).
+ Keep bool
+}
+
+// dirtySetFunctions implements segment.Functions for DirtySet.
+type dirtySetFunctions struct{}
+
+// MinKey implements segment.Functions.MinKey.
+func (dirtySetFunctions) MinKey() uint64 {
+ return 0
+}
+
+// MaxKey implements segment.Functions.MaxKey.
+func (dirtySetFunctions) MaxKey() uint64 {
+ return math.MaxUint64
+}
+
+// ClearValue implements segment.Functions.ClearValue.
+func (dirtySetFunctions) ClearValue(val *DirtyInfo) {
+}
+
+// Merge implements segment.Functions.Merge.
+func (dirtySetFunctions) Merge(_ memmap.MappableRange, val1 DirtyInfo, _ memmap.MappableRange, val2 DirtyInfo) (DirtyInfo, bool) {
+ if val1 != val2 {
+ return DirtyInfo{}, false
+ }
+ return val1, true
+}
+
+// Split implements segment.Functions.Split.
+func (dirtySetFunctions) Split(_ memmap.MappableRange, val DirtyInfo, _ uint64) (DirtyInfo, DirtyInfo) {
+ return val, val
+}
+
+// MarkClean marks all offsets in mr as not dirty, except for those to which
+// KeepDirty has been applied.
+func (ds *DirtySet) MarkClean(mr memmap.MappableRange) {
+ seg := ds.LowerBoundSegment(mr.Start)
+ for seg.Ok() && seg.Start() < mr.End {
+ if seg.Value().Keep {
+ seg = seg.NextSegment()
+ continue
+ }
+ seg = ds.Isolate(seg, mr)
+ seg = ds.Remove(seg).NextSegment()
+ }
+}
+
+// KeepClean marks all offsets in mr as not dirty, even those that were
+// previously kept dirty by KeepDirty.
+func (ds *DirtySet) KeepClean(mr memmap.MappableRange) {
+ ds.RemoveRange(mr)
+}
+
+// MarkDirty marks all offsets in mr as dirty.
+func (ds *DirtySet) MarkDirty(mr memmap.MappableRange) {
+ ds.setDirty(mr, false)
+}
+
+// KeepDirty marks all offsets in mr as dirty and prevents them from being
+// marked as clean by MarkClean.
+func (ds *DirtySet) KeepDirty(mr memmap.MappableRange) {
+ ds.setDirty(mr, true)
+}
+
+func (ds *DirtySet) setDirty(mr memmap.MappableRange, keep bool) {
+ var changedAny bool
+ defer func() {
+ if changedAny {
+ // Merge segments split by Isolate to reduce cost of iteration.
+ ds.MergeRange(mr)
+ }
+ }()
+ seg, gap := ds.Find(mr.Start)
+ for {
+ switch {
+ case seg.Ok() && seg.Start() < mr.End:
+ if keep && !seg.Value().Keep {
+ changedAny = true
+ seg = ds.Isolate(seg, mr)
+ seg.ValuePtr().Keep = true
+ }
+ seg, gap = seg.NextNonEmpty()
+
+ case gap.Ok() && gap.Start() < mr.End:
+ changedAny = true
+ seg = ds.Insert(gap, gap.Range().Intersect(mr), DirtyInfo{keep})
+ seg, gap = seg.NextNonEmpty()
+
+ default:
+ return
+ }
+ }
+}
+
+// AllowClean allows MarkClean to mark offsets in mr as not dirty, ending the
+// effect of a previous call to KeepDirty. (It does not itself mark those
+// offsets as not dirty.)
+func (ds *DirtySet) AllowClean(mr memmap.MappableRange) {
+ var changedAny bool
+ defer func() {
+ if changedAny {
+ // Merge segments split by Isolate to reduce cost of iteration.
+ ds.MergeRange(mr)
+ }
+ }()
+ for seg := ds.LowerBoundSegment(mr.Start); seg.Ok() && seg.Start() < mr.End; seg = seg.NextSegment() {
+ if seg.Value().Keep {
+ changedAny = true
+ seg = ds.Isolate(seg, mr)
+ seg.ValuePtr().Keep = false
+ }
+ }
+}
+
+// SyncDirty passes pages in the range mr that are stored in cache and
+// identified as dirty to writeAt, updating dirty to reflect successful writes.
+// If writeAt returns a successful partial write, SyncDirty will call it
+// repeatedly until all bytes have been written. max is the true size of the
+// cached object; offsets beyond max will not be passed to writeAt, even if
+// they are marked dirty.
+func SyncDirty(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+ var changedDirty bool
+ defer func() {
+ if changedDirty {
+ // Merge segments split by Isolate to reduce cost of iteration.
+ dirty.MergeRange(mr)
+ }
+ }()
+ dseg := dirty.LowerBoundSegment(mr.Start)
+ for dseg.Ok() && dseg.Start() < mr.End {
+ var dr memmap.MappableRange
+ if dseg.Value().Keep {
+ dr = dseg.Range().Intersect(mr)
+ } else {
+ changedDirty = true
+ dseg = dirty.Isolate(dseg, mr)
+ dr = dseg.Range()
+ }
+ if err := syncDirtyRange(ctx, dr, cache, max, mem, writeAt); err != nil {
+ return err
+ }
+ if dseg.Value().Keep {
+ dseg = dseg.NextSegment()
+ } else {
+ dseg = dirty.Remove(dseg).NextSegment()
+ }
+ }
+ return nil
+}
+
+// SyncDirtyAll passes all pages stored in cache identified as dirty to
+// writeAt, updating dirty to reflect successful writes. If writeAt returns a
+// successful partial write, SyncDirtyAll will call it repeatedly until all
+// bytes have been written. max is the true size of the cached object; offsets
+// beyond max will not be passed to writeAt, even if they are marked dirty.
+func SyncDirtyAll(ctx context.Context, cache *FileRangeSet, dirty *DirtySet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+ dseg := dirty.FirstSegment()
+ for dseg.Ok() {
+ if err := syncDirtyRange(ctx, dseg.Range(), cache, max, mem, writeAt); err != nil {
+ return err
+ }
+ if dseg.Value().Keep {
+ dseg = dseg.NextSegment()
+ } else {
+ dseg = dirty.Remove(dseg).NextSegment()
+ }
+ }
+ return nil
+}
+
+// Preconditions: mr must be page-aligned.
+func syncDirtyRange(ctx context.Context, mr memmap.MappableRange, cache *FileRangeSet, max uint64, mem platform.File, writeAt func(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)) error {
+ for cseg := cache.LowerBoundSegment(mr.Start); cseg.Ok() && cseg.Start() < mr.End; cseg = cseg.NextSegment() {
+ wbr := cseg.Range().Intersect(mr)
+ if max < wbr.Start {
+ break
+ }
+ ims, err := mem.MapInternal(cseg.FileRangeOf(wbr), usermem.Read)
+ if err != nil {
+ return err
+ }
+ if max < wbr.End {
+ ims = ims.TakeFirst64(max - wbr.Start)
+ }
+ offset := wbr.Start
+ for !ims.IsEmpty() {
+ n, err := writeAt(ctx, ims, offset)
+ if err != nil {
+ return err
+ }
+ offset += n
+ ims = ims.DropFirst64(n)
+ }
+ }
+ return nil
+}
diff --git a/pkg/sentry/fs/fsutil/dirty_set_impl.go b/pkg/sentry/fs/fsutil/dirty_set_impl.go
new file mode 100755
index 000000000..5f25068a1
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/dirty_set_impl.go
@@ -0,0 +1,1274 @@
+package fsutil
+
+import (
+ __generics_imported0 "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+)
+
+import (
+ "bytes"
+ "fmt"
+)
+
+const (
+ // minDegree is the minimum degree of an internal node in a Set B-tree.
+ //
+ // - Any non-root node has at least minDegree-1 segments.
+ //
+ // - Any non-root internal (non-leaf) node has at least minDegree children.
+ //
+ // - The root node may have fewer than minDegree-1 segments, but it may
+ // only have 0 segments if the tree is empty.
+ //
+ // Our implementation requires minDegree >= 3. Higher values of minDegree
+ // usually improve performance, but increase memory usage for small sets.
+ DirtyminDegree = 3
+
+ DirtymaxDegree = 2 * DirtyminDegree
+)
+
+// A Set is a mapping of segments with non-overlapping Range keys. The zero
+// value for a Set is an empty set. Set values are not safely movable nor
+// copyable. Set is thread-compatible.
+//
+// +stateify savable
+type DirtySet struct {
+ root Dirtynode `state:".(*DirtySegmentDataSlices)"`
+}
+
+// IsEmpty returns true if the set contains no segments.
+func (s *DirtySet) IsEmpty() bool {
+ return s.root.nrSegments == 0
+}
+
+// IsEmptyRange returns true iff no segments in the set overlap the given
+// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be
+// more efficient.
+func (s *DirtySet) IsEmptyRange(r __generics_imported0.MappableRange) bool {
+ switch {
+ case r.Length() < 0:
+ panic(fmt.Sprintf("invalid range %v", r))
+ case r.Length() == 0:
+ return true
+ }
+ _, gap := s.Find(r.Start)
+ if !gap.Ok() {
+ return false
+ }
+ return r.End <= gap.End()
+}
+
+// Span returns the total size of all segments in the set.
+func (s *DirtySet) Span() uint64 {
+ var sz uint64
+ for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ sz += seg.Range().Length()
+ }
+ return sz
+}
+
+// SpanRange returns the total size of the intersection of segments in the set
+// with the given range.
+func (s *DirtySet) SpanRange(r __generics_imported0.MappableRange) uint64 {
+ switch {
+ case r.Length() < 0:
+ panic(fmt.Sprintf("invalid range %v", r))
+ case r.Length() == 0:
+ return 0
+ }
+ var sz uint64
+ for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() {
+ sz += seg.Range().Intersect(r).Length()
+ }
+ return sz
+}
+
+// FirstSegment returns the first segment in the set. If the set is empty,
+// FirstSegment returns a terminal iterator.
+func (s *DirtySet) FirstSegment() DirtyIterator {
+ if s.root.nrSegments == 0 {
+ return DirtyIterator{}
+ }
+ return s.root.firstSegment()
+}
+
+// LastSegment returns the last segment in the set. If the set is empty,
+// LastSegment returns a terminal iterator.
+func (s *DirtySet) LastSegment() DirtyIterator {
+ if s.root.nrSegments == 0 {
+ return DirtyIterator{}
+ }
+ return s.root.lastSegment()
+}
+
+// FirstGap returns the first gap in the set.
+func (s *DirtySet) FirstGap() DirtyGapIterator {
+ n := &s.root
+ for n.hasChildren {
+ n = n.children[0]
+ }
+ return DirtyGapIterator{n, 0}
+}
+
+// LastGap returns the last gap in the set.
+func (s *DirtySet) LastGap() DirtyGapIterator {
+ n := &s.root
+ for n.hasChildren {
+ n = n.children[n.nrSegments]
+ }
+ return DirtyGapIterator{n, n.nrSegments}
+}
+
+// Find returns the segment or gap whose range contains the given key. If a
+// segment is found, the returned Iterator is non-terminal and the
+// returned GapIterator is terminal. Otherwise, the returned Iterator is
+// terminal and the returned GapIterator is non-terminal.
+func (s *DirtySet) Find(key uint64) (DirtyIterator, DirtyGapIterator) {
+ n := &s.root
+ for {
+
+ lower := 0
+ upper := n.nrSegments
+ for lower < upper {
+ i := lower + (upper-lower)/2
+ if r := n.keys[i]; key < r.End {
+ if key >= r.Start {
+ return DirtyIterator{n, i}, DirtyGapIterator{}
+ }
+ upper = i
+ } else {
+ lower = i + 1
+ }
+ }
+ i := lower
+ if !n.hasChildren {
+ return DirtyIterator{}, DirtyGapIterator{n, i}
+ }
+ n = n.children[i]
+ }
+}
+
+// FindSegment returns the segment whose range contains the given key. If no
+// such segment exists, FindSegment returns a terminal iterator.
+func (s *DirtySet) FindSegment(key uint64) DirtyIterator {
+ seg, _ := s.Find(key)
+ return seg
+}
+
+// LowerBoundSegment returns the segment with the lowest range that contains a
+// key greater than or equal to min. If no such segment exists,
+// LowerBoundSegment returns a terminal iterator.
+func (s *DirtySet) LowerBoundSegment(min uint64) DirtyIterator {
+ seg, gap := s.Find(min)
+ if seg.Ok() {
+ return seg
+ }
+ return gap.NextSegment()
+}
+
+// UpperBoundSegment returns the segment with the highest range that contains a
+// key less than or equal to max. If no such segment exists, UpperBoundSegment
+// returns a terminal iterator.
+func (s *DirtySet) UpperBoundSegment(max uint64) DirtyIterator {
+ seg, gap := s.Find(max)
+ if seg.Ok() {
+ return seg
+ }
+ return gap.PrevSegment()
+}
+
+// FindGap returns the gap containing the given key. If no such gap exists
+// (i.e. the set contains a segment containing that key), FindGap returns a
+// terminal iterator.
+func (s *DirtySet) FindGap(key uint64) DirtyGapIterator {
+ _, gap := s.Find(key)
+ return gap
+}
+
+// LowerBoundGap returns the gap with the lowest range that is greater than or
+// equal to min.
+func (s *DirtySet) LowerBoundGap(min uint64) DirtyGapIterator {
+ seg, gap := s.Find(min)
+ if gap.Ok() {
+ return gap
+ }
+ return seg.NextGap()
+}
+
+// UpperBoundGap returns the gap with the highest range that is less than or
+// equal to max.
+func (s *DirtySet) UpperBoundGap(max uint64) DirtyGapIterator {
+ seg, gap := s.Find(max)
+ if gap.Ok() {
+ return gap
+ }
+ return seg.PrevGap()
+}
+
+// Add inserts the given segment into the set and returns true. If the new
+// segment can be merged with adjacent segments, Add will do so. If the new
+// segment would overlap an existing segment, Add returns false. If Add
+// succeeds, all existing iterators are invalidated.
+func (s *DirtySet) Add(r __generics_imported0.MappableRange, val DirtyInfo) bool {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ gap := s.FindGap(r.Start)
+ if !gap.Ok() {
+ return false
+ }
+ if r.End > gap.End() {
+ return false
+ }
+ s.Insert(gap, r, val)
+ return true
+}
+
+// AddWithoutMerging inserts the given segment into the set and returns true.
+// If it would overlap an existing segment, AddWithoutMerging does nothing and
+// returns false. If AddWithoutMerging succeeds, all existing iterators are
+// invalidated.
+func (s *DirtySet) AddWithoutMerging(r __generics_imported0.MappableRange, val DirtyInfo) bool {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ gap := s.FindGap(r.Start)
+ if !gap.Ok() {
+ return false
+ }
+ if r.End > gap.End() {
+ return false
+ }
+ s.InsertWithoutMergingUnchecked(gap, r, val)
+ return true
+}
+
+// Insert inserts the given segment into the given gap. If the new segment can
+// be merged with adjacent segments, Insert will do so. Insert returns an
+// iterator to the segment containing the inserted value (which may have been
+// merged with other values). All existing iterators (including gap, but not
+// including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid, Insert panics.
+//
+// Insert is semantically equivalent to a InsertWithoutMerging followed by a
+// Merge, but may be more efficient. Note that there is no unchecked variant of
+// Insert since Insert must retrieve and inspect gap's predecessor and
+// successor segments regardless.
+func (s *DirtySet) Insert(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ prev, next := gap.PrevSegment(), gap.NextSegment()
+ if prev.Ok() && prev.End() > r.Start {
+ panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range()))
+ }
+ if next.Ok() && next.Start() < r.End {
+ panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range()))
+ }
+ if prev.Ok() && prev.End() == r.Start {
+ if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok {
+ prev.SetEndUnchecked(r.End)
+ prev.SetValue(mval)
+ if next.Ok() && next.Start() == r.End {
+ val = mval
+ if mval, ok := (dirtySetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok {
+ prev.SetEndUnchecked(next.End())
+ prev.SetValue(mval)
+ return s.Remove(next).PrevSegment()
+ }
+ }
+ return prev
+ }
+ }
+ if next.Ok() && next.Start() == r.End {
+ if mval, ok := (dirtySetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok {
+ next.SetStartUnchecked(r.Start)
+ next.SetValue(mval)
+ return next
+ }
+ }
+ return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMerging inserts the given segment into the given gap and
+// returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid,
+// InsertWithoutMerging panics.
+func (s *DirtySet) InsertWithoutMerging(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ if gr := gap.Range(); !gr.IsSupersetOf(r) {
+ panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr))
+ }
+ return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMergingUnchecked inserts the given segment into the given gap
+// and returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+func (s *DirtySet) InsertWithoutMergingUnchecked(gap DirtyGapIterator, r __generics_imported0.MappableRange, val DirtyInfo) DirtyIterator {
+ gap = gap.node.rebalanceBeforeInsert(gap)
+ copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments])
+ copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments])
+ gap.node.keys[gap.index] = r
+ gap.node.values[gap.index] = val
+ gap.node.nrSegments++
+ return DirtyIterator{gap.node, gap.index}
+}
+
+// Remove removes the given segment and returns an iterator to the vacated gap.
+// All existing iterators (including seg, but not including the returned
+// iterator) are invalidated.
+func (s *DirtySet) Remove(seg DirtyIterator) DirtyGapIterator {
+
+ if seg.node.hasChildren {
+
+ victim := seg.PrevSegment()
+
+ seg.SetRangeUnchecked(victim.Range())
+ seg.SetValue(victim.Value())
+ return s.Remove(victim).NextGap()
+ }
+ copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments])
+ copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments])
+ dirtySetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1])
+ seg.node.nrSegments--
+ return seg.node.rebalanceAfterRemove(DirtyGapIterator{seg.node, seg.index})
+}
+
+// RemoveAll removes all segments from the set. All existing iterators are
+// invalidated.
+func (s *DirtySet) RemoveAll() {
+ s.root = Dirtynode{}
+}
+
+// RemoveRange removes all segments in the given range. An iterator to the
+// newly formed gap is returned, and all existing iterators are invalidated.
+func (s *DirtySet) RemoveRange(r __generics_imported0.MappableRange) DirtyGapIterator {
+ seg, gap := s.Find(r.Start)
+ if seg.Ok() {
+ seg = s.Isolate(seg, r)
+ gap = s.Remove(seg)
+ }
+ for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() {
+ seg = s.Isolate(seg, r)
+ gap = s.Remove(seg)
+ }
+ return gap
+}
+
+// Merge attempts to merge two neighboring segments. If successful, Merge
+// returns an iterator to the merged segment, and all existing iterators are
+// invalidated. Otherwise, Merge returns a terminal iterator.
+//
+// If first is not the predecessor of second, Merge panics.
+func (s *DirtySet) Merge(first, second DirtyIterator) DirtyIterator {
+ if first.NextSegment() != second {
+ panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range()))
+ }
+ return s.MergeUnchecked(first, second)
+}
+
+// MergeUnchecked attempts to merge two neighboring segments. If successful,
+// MergeUnchecked returns an iterator to the merged segment, and all existing
+// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal
+// iterator.
+//
+// Precondition: first is the predecessor of second: first.NextSegment() ==
+// second, first == second.PrevSegment().
+func (s *DirtySet) MergeUnchecked(first, second DirtyIterator) DirtyIterator {
+ if first.End() == second.Start() {
+ if mval, ok := (dirtySetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok {
+
+ first.SetEndUnchecked(second.End())
+ first.SetValue(mval)
+ return s.Remove(second).PrevSegment()
+ }
+ }
+ return DirtyIterator{}
+}
+
+// MergeAll attempts to merge all adjacent segments in the set. All existing
+// iterators are invalidated.
+func (s *DirtySet) MergeAll() {
+ seg := s.FirstSegment()
+ if !seg.Ok() {
+ return
+ }
+ next := seg.NextSegment()
+ for next.Ok() {
+ if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+ seg, next = mseg, mseg.NextSegment()
+ } else {
+ seg, next = next, next.NextSegment()
+ }
+ }
+}
+
+// MergeRange attempts to merge all adjacent segments that contain a key in the
+// specific range. All existing iterators are invalidated.
+func (s *DirtySet) MergeRange(r __generics_imported0.MappableRange) {
+ seg := s.LowerBoundSegment(r.Start)
+ if !seg.Ok() {
+ return
+ }
+ next := seg.NextSegment()
+ for next.Ok() && next.Range().Start < r.End {
+ if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+ seg, next = mseg, mseg.NextSegment()
+ } else {
+ seg, next = next, next.NextSegment()
+ }
+ }
+}
+
+// MergeAdjacent attempts to merge the segment containing r.Start with its
+// predecessor, and the segment containing r.End-1 with its successor.
+func (s *DirtySet) MergeAdjacent(r __generics_imported0.MappableRange) {
+ first := s.FindSegment(r.Start)
+ if first.Ok() {
+ if prev := first.PrevSegment(); prev.Ok() {
+ s.Merge(prev, first)
+ }
+ }
+ last := s.FindSegment(r.End - 1)
+ if last.Ok() {
+ if next := last.NextSegment(); next.Ok() {
+ s.Merge(last, next)
+ }
+ }
+}
+
+// Split splits the given segment at the given key and returns iterators to the
+// two resulting segments. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+//
+// If the segment cannot be split at split (because split is at the start or
+// end of the segment's range, so splitting would produce a segment with zero
+// length, or because split falls outside the segment's range altogether),
+// Split panics.
+func (s *DirtySet) Split(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) {
+ if !seg.Range().CanSplitAt(split) {
+ panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split))
+ }
+ return s.SplitUnchecked(seg, split)
+}
+
+// SplitUnchecked splits the given segment at the given key and returns
+// iterators to the two resulting segments. All existing iterators (including
+// seg, but not including the returned iterators) are invalidated.
+//
+// Preconditions: seg.Start() < key < seg.End().
+func (s *DirtySet) SplitUnchecked(seg DirtyIterator, split uint64) (DirtyIterator, DirtyIterator) {
+ val1, val2 := (dirtySetFunctions{}).Split(seg.Range(), seg.Value(), split)
+ end2 := seg.End()
+ seg.SetEndUnchecked(split)
+ seg.SetValue(val1)
+ seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2)
+
+ return seg2.PrevSegment(), seg2
+}
+
+// SplitAt splits the segment straddling split, if one exists. SplitAt returns
+// true if a segment was split and false otherwise. If SplitAt splits a
+// segment, all existing iterators are invalidated.
+func (s *DirtySet) SplitAt(split uint64) bool {
+ if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) {
+ s.SplitUnchecked(seg, split)
+ return true
+ }
+ return false
+}
+
+// Isolate ensures that the given segment's range does not escape r by
+// splitting at r.Start and r.End if necessary, and returns an updated iterator
+// to the bounded segment. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+func (s *DirtySet) Isolate(seg DirtyIterator, r __generics_imported0.MappableRange) DirtyIterator {
+ if seg.Range().CanSplitAt(r.Start) {
+ _, seg = s.SplitUnchecked(seg, r.Start)
+ }
+ if seg.Range().CanSplitAt(r.End) {
+ seg, _ = s.SplitUnchecked(seg, r.End)
+ }
+ return seg
+}
+
+// ApplyContiguous applies a function to a contiguous range of segments,
+// splitting if necessary. The function is applied until the first gap is
+// encountered, at which point the gap is returned. If the function is applied
+// across the entire range, a terminal gap is returned. All existing iterators
+// are invalidated.
+//
+// N.B. The Iterator must not be invalidated by the function.
+func (s *DirtySet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg DirtyIterator)) DirtyGapIterator {
+ seg, gap := s.Find(r.Start)
+ if !seg.Ok() {
+ return gap
+ }
+ for {
+ seg = s.Isolate(seg, r)
+ fn(seg)
+ if seg.End() >= r.End {
+ return DirtyGapIterator{}
+ }
+ gap = seg.NextGap()
+ if !gap.IsEmpty() {
+ return gap
+ }
+ seg = gap.NextSegment()
+ if !seg.Ok() {
+
+ return DirtyGapIterator{}
+ }
+ }
+}
+
+// +stateify savable
+type Dirtynode struct {
+ // An internal binary tree node looks like:
+ //
+ // K
+ // / \
+ // Cl Cr
+ //
+ // where all keys in the subtree rooted by Cl (the left subtree) are less
+ // than K (the key of the parent node), and all keys in the subtree rooted
+ // by Cr (the right subtree) are greater than K.
+ //
+ // An internal B-tree node's indexes work out to look like:
+ //
+ // K0 K1 K2 ... Kn-1
+ // / \/ \/ \ ... / \
+ // C0 C1 C2 C3 ... Cn-1 Cn
+ //
+ // where n is nrSegments.
+ nrSegments int
+
+ // parent is a pointer to this node's parent. If this node is root, parent
+ // is nil.
+ parent *Dirtynode
+
+ // parentIndex is the index of this node in parent.children.
+ parentIndex int
+
+ // Flag for internal nodes that is technically redundant with "children[0]
+ // != nil", but is stored in the first cache line. "hasChildren" rather
+ // than "isLeaf" because false must be the correct value for an empty root.
+ hasChildren bool
+
+ // Nodes store keys and values in separate arrays to maximize locality in
+ // the common case (scanning keys for lookup).
+ keys [DirtymaxDegree - 1]__generics_imported0.MappableRange
+ values [DirtymaxDegree - 1]DirtyInfo
+ children [DirtymaxDegree]*Dirtynode
+}
+
+// firstSegment returns the first segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *Dirtynode) firstSegment() DirtyIterator {
+ for n.hasChildren {
+ n = n.children[0]
+ }
+ return DirtyIterator{n, 0}
+}
+
+// lastSegment returns the last segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *Dirtynode) lastSegment() DirtyIterator {
+ for n.hasChildren {
+ n = n.children[n.nrSegments]
+ }
+ return DirtyIterator{n, n.nrSegments - 1}
+}
+
+func (n *Dirtynode) prevSibling() *Dirtynode {
+ if n.parent == nil || n.parentIndex == 0 {
+ return nil
+ }
+ return n.parent.children[n.parentIndex-1]
+}
+
+func (n *Dirtynode) nextSibling() *Dirtynode {
+ if n.parent == nil || n.parentIndex == n.parent.nrSegments {
+ return nil
+ }
+ return n.parent.children[n.parentIndex+1]
+}
+
+// rebalanceBeforeInsert splits n and its ancestors if they are full, as
+// required for insertion, and returns an updated iterator to the position
+// represented by gap.
+func (n *Dirtynode) rebalanceBeforeInsert(gap DirtyGapIterator) DirtyGapIterator {
+ if n.parent != nil {
+ gap = n.parent.rebalanceBeforeInsert(gap)
+ }
+ if n.nrSegments < DirtymaxDegree-1 {
+ return gap
+ }
+ if n.parent == nil {
+
+ left := &Dirtynode{
+ nrSegments: DirtyminDegree - 1,
+ parent: n,
+ parentIndex: 0,
+ hasChildren: n.hasChildren,
+ }
+ right := &Dirtynode{
+ nrSegments: DirtyminDegree - 1,
+ parent: n,
+ parentIndex: 1,
+ hasChildren: n.hasChildren,
+ }
+ copy(left.keys[:DirtyminDegree-1], n.keys[:DirtyminDegree-1])
+ copy(left.values[:DirtyminDegree-1], n.values[:DirtyminDegree-1])
+ copy(right.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:])
+ copy(right.values[:DirtyminDegree-1], n.values[DirtyminDegree:])
+ n.keys[0], n.values[0] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1]
+ DirtyzeroValueSlice(n.values[1:])
+ if n.hasChildren {
+ copy(left.children[:DirtyminDegree], n.children[:DirtyminDegree])
+ copy(right.children[:DirtyminDegree], n.children[DirtyminDegree:])
+ DirtyzeroNodeSlice(n.children[2:])
+ for i := 0; i < DirtyminDegree; i++ {
+ left.children[i].parent = left
+ left.children[i].parentIndex = i
+ right.children[i].parent = right
+ right.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments = 1
+ n.hasChildren = true
+ n.children[0] = left
+ n.children[1] = right
+ if gap.node != n {
+ return gap
+ }
+ if gap.index < DirtyminDegree {
+ return DirtyGapIterator{left, gap.index}
+ }
+ return DirtyGapIterator{right, gap.index - DirtyminDegree}
+ }
+
+ copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments])
+ copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments])
+ n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[DirtyminDegree-1], n.values[DirtyminDegree-1]
+ copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1])
+ for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ {
+ n.parent.children[i].parentIndex = i
+ }
+ sibling := &Dirtynode{
+ nrSegments: DirtyminDegree - 1,
+ parent: n.parent,
+ parentIndex: n.parentIndex + 1,
+ hasChildren: n.hasChildren,
+ }
+ n.parent.children[n.parentIndex+1] = sibling
+ n.parent.nrSegments++
+ copy(sibling.keys[:DirtyminDegree-1], n.keys[DirtyminDegree:])
+ copy(sibling.values[:DirtyminDegree-1], n.values[DirtyminDegree:])
+ DirtyzeroValueSlice(n.values[DirtyminDegree-1:])
+ if n.hasChildren {
+ copy(sibling.children[:DirtyminDegree], n.children[DirtyminDegree:])
+ DirtyzeroNodeSlice(n.children[DirtyminDegree:])
+ for i := 0; i < DirtyminDegree; i++ {
+ sibling.children[i].parent = sibling
+ sibling.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments = DirtyminDegree - 1
+
+ if gap.node != n {
+ return gap
+ }
+ if gap.index < DirtyminDegree {
+ return gap
+ }
+ return DirtyGapIterator{sibling, gap.index - DirtyminDegree}
+}
+
+// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient
+// (contain fewer segments than required by B-tree invariants), as required for
+// removal, and returns an updated iterator to the position represented by gap.
+//
+// Precondition: n is the only node in the tree that may currently violate a
+// B-tree invariant.
+func (n *Dirtynode) rebalanceAfterRemove(gap DirtyGapIterator) DirtyGapIterator {
+ for {
+ if n.nrSegments >= DirtyminDegree-1 {
+ return gap
+ }
+ if n.parent == nil {
+
+ return gap
+ }
+
+ if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree {
+ copy(n.keys[1:], n.keys[:n.nrSegments])
+ copy(n.values[1:], n.values[:n.nrSegments])
+ n.keys[0] = n.parent.keys[n.parentIndex-1]
+ n.values[0] = n.parent.values[n.parentIndex-1]
+ n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1]
+ n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1]
+ dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+ if n.hasChildren {
+ copy(n.children[1:], n.children[:n.nrSegments+1])
+ n.children[0] = sibling.children[sibling.nrSegments]
+ sibling.children[sibling.nrSegments] = nil
+ n.children[0].parent = n
+ n.children[0].parentIndex = 0
+ for i := 1; i < n.nrSegments+2; i++ {
+ n.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments++
+ sibling.nrSegments--
+ if gap.node == sibling && gap.index == sibling.nrSegments {
+ return DirtyGapIterator{n, 0}
+ }
+ if gap.node == n {
+ return DirtyGapIterator{n, gap.index + 1}
+ }
+ return gap
+ }
+ if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= DirtyminDegree {
+ n.keys[n.nrSegments] = n.parent.keys[n.parentIndex]
+ n.values[n.nrSegments] = n.parent.values[n.parentIndex]
+ n.parent.keys[n.parentIndex] = sibling.keys[0]
+ n.parent.values[n.parentIndex] = sibling.values[0]
+ copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:])
+ copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:])
+ dirtySetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+ if n.hasChildren {
+ n.children[n.nrSegments+1] = sibling.children[0]
+ copy(sibling.children[:sibling.nrSegments], sibling.children[1:])
+ sibling.children[sibling.nrSegments] = nil
+ n.children[n.nrSegments+1].parent = n
+ n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1
+ for i := 0; i < sibling.nrSegments; i++ {
+ sibling.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments++
+ sibling.nrSegments--
+ if gap.node == sibling {
+ if gap.index == 0 {
+ return DirtyGapIterator{n, n.nrSegments}
+ }
+ return DirtyGapIterator{sibling, gap.index - 1}
+ }
+ return gap
+ }
+
+ p := n.parent
+ if p.nrSegments == 1 {
+
+ left, right := p.children[0], p.children[1]
+ p.nrSegments = left.nrSegments + right.nrSegments + 1
+ p.hasChildren = left.hasChildren
+ p.keys[left.nrSegments] = p.keys[0]
+ p.values[left.nrSegments] = p.values[0]
+ copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments])
+ copy(p.values[:left.nrSegments], left.values[:left.nrSegments])
+ copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+ copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments])
+ if left.hasChildren {
+ copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1])
+ copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+ for i := 0; i < p.nrSegments+1; i++ {
+ p.children[i].parent = p
+ p.children[i].parentIndex = i
+ }
+ } else {
+ p.children[0] = nil
+ p.children[1] = nil
+ }
+ if gap.node == left {
+ return DirtyGapIterator{p, gap.index}
+ }
+ if gap.node == right {
+ return DirtyGapIterator{p, gap.index + left.nrSegments + 1}
+ }
+ return gap
+ }
+ // Merge n and either sibling, along with the segment separating the
+ // two, into whichever of the two nodes comes first. This is the
+ // reverse of the non-root splitting case in
+ // node.rebalanceBeforeInsert.
+ var left, right *Dirtynode
+ if n.parentIndex > 0 {
+ left = n.prevSibling()
+ right = n
+ } else {
+ left = n
+ right = n.nextSibling()
+ }
+
+ if gap.node == right {
+ gap = DirtyGapIterator{left, gap.index + left.nrSegments + 1}
+ }
+ left.keys[left.nrSegments] = p.keys[left.parentIndex]
+ left.values[left.nrSegments] = p.values[left.parentIndex]
+ copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+ copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments])
+ if left.hasChildren {
+ copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+ for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ {
+ left.children[i].parent = left
+ left.children[i].parentIndex = i
+ }
+ }
+ left.nrSegments += right.nrSegments + 1
+ copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments])
+ copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments])
+ dirtySetFunctions{}.ClearValue(&p.values[p.nrSegments-1])
+ copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1])
+ for i := 0; i < p.nrSegments; i++ {
+ p.children[i].parentIndex = i
+ }
+ p.children[p.nrSegments] = nil
+ p.nrSegments--
+
+ n = p
+ }
+}
+
+// A Iterator is conceptually one of:
+//
+// - A pointer to a segment in a set; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Iterators are copyable values and are meaningfully equality-comparable. The
+// zero value of Iterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type DirtyIterator struct {
+ // node is the node containing the iterated segment. If the iterator is
+ // terminal, node is nil.
+ node *Dirtynode
+
+ // index is the index of the segment in node.keys/values.
+ index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (seg DirtyIterator) Ok() bool {
+ return seg.node != nil
+}
+
+// Range returns the iterated segment's range key.
+func (seg DirtyIterator) Range() __generics_imported0.MappableRange {
+ return seg.node.keys[seg.index]
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (seg DirtyIterator) Start() uint64 {
+ return seg.node.keys[seg.index].Start
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (seg DirtyIterator) End() uint64 {
+ return seg.node.keys[seg.index].End
+}
+
+// SetRangeUnchecked mutates the iterated segment's range key. This operation
+// does not invalidate any iterators.
+//
+// Preconditions:
+//
+// - r.Length() > 0.
+//
+// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
+// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
+// r.start >= seg.PrevSegment().End().
+func (seg DirtyIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) {
+ seg.node.keys[seg.index] = r
+}
+
+// SetRange mutates the iterated segment's range key. If the new range would
+// cause the iterated segment to overlap another segment, or if the new range
+// is invalid, SetRange panics. This operation does not invalidate any
+// iterators.
+func (seg DirtyIterator) SetRange(r __generics_imported0.MappableRange) {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() {
+ panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range()))
+ }
+ if next := seg.NextSegment(); next.Ok() && r.End > next.Start() {
+ panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range()))
+ }
+ seg.SetRangeUnchecked(r)
+}
+
+// SetStartUnchecked mutates the iterated segment's start. This operation does
+// not invalidate any iterators.
+//
+// Preconditions: The new start must be valid: start < seg.End(); if
+// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+func (seg DirtyIterator) SetStartUnchecked(start uint64) {
+ seg.node.keys[seg.index].Start = start
+}
+
+// SetStart mutates the iterated segment's start. If the new start value would
+// cause the iterated segment to overlap another segment, or would result in an
+// invalid range, SetStart panics. This operation does not invalidate any
+// iterators.
+func (seg DirtyIterator) SetStart(start uint64) {
+ if start >= seg.End() {
+ panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range()))
+ }
+ if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() {
+ panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range()))
+ }
+ seg.SetStartUnchecked(start)
+}
+
+// SetEndUnchecked mutates the iterated segment's end. This operation does not
+// invalidate any iterators.
+//
+// Preconditions: The new end must be valid: end > seg.Start(); if
+// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+func (seg DirtyIterator) SetEndUnchecked(end uint64) {
+ seg.node.keys[seg.index].End = end
+}
+
+// SetEnd mutates the iterated segment's end. If the new end value would cause
+// the iterated segment to overlap another segment, or would result in an
+// invalid range, SetEnd panics. This operation does not invalidate any
+// iterators.
+func (seg DirtyIterator) SetEnd(end uint64) {
+ if end <= seg.Start() {
+ panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range()))
+ }
+ if next := seg.NextSegment(); next.Ok() && end > next.Start() {
+ panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range()))
+ }
+ seg.SetEndUnchecked(end)
+}
+
+// Value returns a copy of the iterated segment's value.
+func (seg DirtyIterator) Value() DirtyInfo {
+ return seg.node.values[seg.index]
+}
+
+// ValuePtr returns a pointer to the iterated segment's value. The pointer is
+// invalidated if the iterator is invalidated. This operation does not
+// invalidate any iterators.
+func (seg DirtyIterator) ValuePtr() *DirtyInfo {
+ return &seg.node.values[seg.index]
+}
+
+// SetValue mutates the iterated segment's value. This operation does not
+// invalidate any iterators.
+func (seg DirtyIterator) SetValue(val DirtyInfo) {
+ seg.node.values[seg.index] = val
+}
+
+// PrevSegment returns the iterated segment's predecessor. If there is no
+// preceding segment, PrevSegment returns a terminal iterator.
+func (seg DirtyIterator) PrevSegment() DirtyIterator {
+ if seg.node.hasChildren {
+ return seg.node.children[seg.index].lastSegment()
+ }
+ if seg.index > 0 {
+ return DirtyIterator{seg.node, seg.index - 1}
+ }
+ if seg.node.parent == nil {
+ return DirtyIterator{}
+ }
+ return DirtysegmentBeforePosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// NextSegment returns the iterated segment's successor. If there is no
+// succeeding segment, NextSegment returns a terminal iterator.
+func (seg DirtyIterator) NextSegment() DirtyIterator {
+ if seg.node.hasChildren {
+ return seg.node.children[seg.index+1].firstSegment()
+ }
+ if seg.index < seg.node.nrSegments-1 {
+ return DirtyIterator{seg.node, seg.index + 1}
+ }
+ if seg.node.parent == nil {
+ return DirtyIterator{}
+ }
+ return DirtysegmentAfterPosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// PrevGap returns the gap immediately before the iterated segment.
+func (seg DirtyIterator) PrevGap() DirtyGapIterator {
+ if seg.node.hasChildren {
+
+ return seg.node.children[seg.index].lastSegment().NextGap()
+ }
+ return DirtyGapIterator{seg.node, seg.index}
+}
+
+// NextGap returns the gap immediately after the iterated segment.
+func (seg DirtyIterator) NextGap() DirtyGapIterator {
+ if seg.node.hasChildren {
+ return seg.node.children[seg.index+1].firstSegment().PrevGap()
+ }
+ return DirtyGapIterator{seg.node, seg.index + 1}
+}
+
+// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent,
+// or the gap before the iterated segment otherwise. If seg.Start() ==
+// Functions.MinKey(), PrevNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be
+// non-terminal.
+func (seg DirtyIterator) PrevNonEmpty() (DirtyIterator, DirtyGapIterator) {
+ gap := seg.PrevGap()
+ if gap.Range().Length() != 0 {
+ return DirtyIterator{}, gap
+ }
+ return gap.PrevSegment(), DirtyGapIterator{}
+}
+
+// NextNonEmpty returns the iterated segment's successor if it is adjacent, or
+// the gap after the iterated segment otherwise. If seg.End() ==
+// Functions.MaxKey(), NextNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by NextNonEmpty will be
+// non-terminal.
+func (seg DirtyIterator) NextNonEmpty() (DirtyIterator, DirtyGapIterator) {
+ gap := seg.NextGap()
+ if gap.Range().Length() != 0 {
+ return DirtyIterator{}, gap
+ }
+ return gap.NextSegment(), DirtyGapIterator{}
+}
+
+// A GapIterator is conceptually one of:
+//
+// - A pointer to a position between two segments, before the first segment, or
+// after the last segment in a set, called a *gap*; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Note that the gap between two adjacent segments exists (iterators to it are
+// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true
+// for such gaps. An empty set contains a single gap, spanning the entire range
+// of the set's keys.
+//
+// GapIterators are copyable values and are meaningfully equality-comparable.
+// The zero value of GapIterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type DirtyGapIterator struct {
+ // The representation of a GapIterator is identical to that of an Iterator,
+ // except that index corresponds to positions between segments in the same
+ // way as for node.children (see comment for node.nrSegments).
+ node *Dirtynode
+ index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (gap DirtyGapIterator) Ok() bool {
+ return gap.node != nil
+}
+
+// Range returns the range spanned by the iterated gap.
+func (gap DirtyGapIterator) Range() __generics_imported0.MappableRange {
+ return __generics_imported0.MappableRange{gap.Start(), gap.End()}
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (gap DirtyGapIterator) Start() uint64 {
+ if ps := gap.PrevSegment(); ps.Ok() {
+ return ps.End()
+ }
+ return dirtySetFunctions{}.MinKey()
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (gap DirtyGapIterator) End() uint64 {
+ if ns := gap.NextSegment(); ns.Ok() {
+ return ns.Start()
+ }
+ return dirtySetFunctions{}.MaxKey()
+}
+
+// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is
+// between two adjacent segments.)
+func (gap DirtyGapIterator) IsEmpty() bool {
+ return gap.Range().Length() == 0
+}
+
+// PrevSegment returns the segment immediately before the iterated gap. If no
+// such segment exists, PrevSegment returns a terminal iterator.
+func (gap DirtyGapIterator) PrevSegment() DirtyIterator {
+ return DirtysegmentBeforePosition(gap.node, gap.index)
+}
+
+// NextSegment returns the segment immediately after the iterated gap. If no
+// such segment exists, NextSegment returns a terminal iterator.
+func (gap DirtyGapIterator) NextSegment() DirtyIterator {
+ return DirtysegmentAfterPosition(gap.node, gap.index)
+}
+
+// PrevGap returns the iterated gap's predecessor. If no such gap exists,
+// PrevGap returns a terminal iterator.
+func (gap DirtyGapIterator) PrevGap() DirtyGapIterator {
+ seg := gap.PrevSegment()
+ if !seg.Ok() {
+ return DirtyGapIterator{}
+ }
+ return seg.PrevGap()
+}
+
+// NextGap returns the iterated gap's successor. If no such gap exists, NextGap
+// returns a terminal iterator.
+func (gap DirtyGapIterator) NextGap() DirtyGapIterator {
+ seg := gap.NextSegment()
+ if !seg.Ok() {
+ return DirtyGapIterator{}
+ }
+ return seg.NextGap()
+}
+
+// segmentBeforePosition returns the predecessor segment of the position given
+// by n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentBeforePosition returns a terminal iterator.
+func DirtysegmentBeforePosition(n *Dirtynode, i int) DirtyIterator {
+ for i == 0 {
+ if n.parent == nil {
+ return DirtyIterator{}
+ }
+ n, i = n.parent, n.parentIndex
+ }
+ return DirtyIterator{n, i - 1}
+}
+
+// segmentAfterPosition returns the successor segment of the position given by
+// n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentAfterPosition returns a terminal iterator.
+func DirtysegmentAfterPosition(n *Dirtynode, i int) DirtyIterator {
+ for i == n.nrSegments {
+ if n.parent == nil {
+ return DirtyIterator{}
+ }
+ n, i = n.parent, n.parentIndex
+ }
+ return DirtyIterator{n, i}
+}
+
+func DirtyzeroValueSlice(slice []DirtyInfo) {
+
+ for i := range slice {
+ dirtySetFunctions{}.ClearValue(&slice[i])
+ }
+}
+
+func DirtyzeroNodeSlice(slice []*Dirtynode) {
+ for i := range slice {
+ slice[i] = nil
+ }
+}
+
+// String stringifies a Set for debugging.
+func (s *DirtySet) String() string {
+ return s.root.String()
+}
+
+// String stringifes a node (and all of its children) for debugging.
+func (n *Dirtynode) String() string {
+ var buf bytes.Buffer
+ n.writeDebugString(&buf, "")
+ return buf.String()
+}
+
+func (n *Dirtynode) writeDebugString(buf *bytes.Buffer, prefix string) {
+ if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) {
+ buf.WriteString(prefix)
+ buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren))
+ }
+ for i := 0; i < n.nrSegments; i++ {
+ if child := n.children[i]; child != nil {
+ cprefix := fmt.Sprintf("%s- % 3d ", prefix, i)
+ if child.parent != n || child.parentIndex != i {
+ buf.WriteString(cprefix)
+ buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i))
+ }
+ child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i))
+ }
+ buf.WriteString(prefix)
+ buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i]))
+ }
+ if child := n.children[n.nrSegments]; child != nil {
+ child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments))
+ }
+}
+
+// SegmentDataSlices represents segments from a set as slices of start, end, and
+// values. SegmentDataSlices is primarily used as an intermediate representation
+// for save/restore and the layout here is optimized for that.
+//
+// +stateify savable
+type DirtySegmentDataSlices struct {
+ Start []uint64
+ End []uint64
+ Values []DirtyInfo
+}
+
+// ExportSortedSlice returns a copy of all segments in the given set, in ascending
+// key order.
+func (s *DirtySet) ExportSortedSlices() *DirtySegmentDataSlices {
+ var sds DirtySegmentDataSlices
+ for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ sds.Start = append(sds.Start, seg.Start())
+ sds.End = append(sds.End, seg.End())
+ sds.Values = append(sds.Values, seg.Value())
+ }
+ sds.Start = sds.Start[:len(sds.Start):len(sds.Start)]
+ sds.End = sds.End[:len(sds.End):len(sds.End)]
+ sds.Values = sds.Values[:len(sds.Values):len(sds.Values)]
+ return &sds
+}
+
+// ImportSortedSlice initializes the given set from the given slice.
+//
+// Preconditions: s must be empty. sds must represent a valid set (the segments
+// in sds must have valid lengths that do not overlap). The segments in sds
+// must be sorted in ascending key order.
+func (s *DirtySet) ImportSortedSlices(sds *DirtySegmentDataSlices) error {
+ if !s.IsEmpty() {
+ return fmt.Errorf("cannot import into non-empty set %v", s)
+ }
+ gap := s.FirstGap()
+ for i := range sds.Start {
+ r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]}
+ if !gap.Range().IsSupersetOf(r) {
+ return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i])
+ }
+ gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap()
+ }
+ return nil
+}
+func (s *DirtySet) saveRoot() *DirtySegmentDataSlices {
+ return s.ExportSortedSlices()
+}
+
+func (s *DirtySet) loadRoot(sds *DirtySegmentDataSlices) {
+ if err := s.ImportSortedSlices(sds); err != nil {
+ panic(err)
+ }
+}
diff --git a/pkg/sentry/fs/fsutil/file.go b/pkg/sentry/fs/fsutil/file.go
new file mode 100644
index 000000000..9381963d0
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/file.go
@@ -0,0 +1,394 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+ "gvisor.googlesource.com/gvisor/pkg/sentry/arch"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+// FileNoopRelease implements fs.FileOperations.Release for files that have no
+// resources to release.
+type FileNoopRelease struct{}
+
+// Release is a no-op.
+func (FileNoopRelease) Release() {}
+
+// SeekWithDirCursor is used to implement fs.FileOperations.Seek. If dirCursor
+// is not nil and the seek was on a directory, the cursor will be updated.
+//
+// Currently only seeking to 0 on a directory is supported.
+//
+// FIXME(b/33075855): Lift directory seeking limitations.
+func SeekWithDirCursor(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64, dirCursor *string) (int64, error) {
+ inode := file.Dirent.Inode
+ current := file.Offset()
+
+ // Does the Inode represents a non-seekable type?
+ if fs.IsPipe(inode.StableAttr) || fs.IsSocket(inode.StableAttr) {
+ return current, syserror.ESPIPE
+ }
+
+ // Does the Inode represent a character device?
+ if fs.IsCharDevice(inode.StableAttr) {
+ // Ignore seek requests.
+ //
+ // FIXME(b/34716638): This preserves existing
+ // behavior but is not universally correct.
+ return 0, nil
+ }
+
+ // Otherwise compute the new offset.
+ switch whence {
+ case fs.SeekSet:
+ switch inode.StableAttr.Type {
+ case fs.RegularFile, fs.SpecialFile, fs.BlockDevice:
+ if offset < 0 {
+ return current, syserror.EINVAL
+ }
+ return offset, nil
+ case fs.Directory, fs.SpecialDirectory:
+ if offset != 0 {
+ return current, syserror.EINVAL
+ }
+ // SEEK_SET to 0 moves the directory "cursor" to the beginning.
+ if dirCursor != nil {
+ *dirCursor = ""
+ }
+ return 0, nil
+ default:
+ return current, syserror.EINVAL
+ }
+ case fs.SeekCurrent:
+ switch inode.StableAttr.Type {
+ case fs.RegularFile, fs.SpecialFile, fs.BlockDevice:
+ if current+offset < 0 {
+ return current, syserror.EINVAL
+ }
+ return current + offset, nil
+ case fs.Directory, fs.SpecialDirectory:
+ if offset != 0 {
+ return current, syserror.EINVAL
+ }
+ return current, nil
+ default:
+ return current, syserror.EINVAL
+ }
+ case fs.SeekEnd:
+ switch inode.StableAttr.Type {
+ case fs.RegularFile, fs.BlockDevice:
+ // Allow the file to determine the end.
+ uattr, err := inode.UnstableAttr(ctx)
+ if err != nil {
+ return current, err
+ }
+ sz := uattr.Size
+ if sz+offset < 0 {
+ return current, syserror.EINVAL
+ }
+ return sz + offset, nil
+ // FIXME(b/34778850): This is not universally correct.
+ // Remove SpecialDirectory.
+ case fs.SpecialDirectory:
+ if offset != 0 {
+ return current, syserror.EINVAL
+ }
+ // SEEK_END to 0 moves the directory "cursor" to the end.
+ //
+ // FIXME(b/35442290): The ensures that after the seek,
+ // reading on the directory will get EOF. But it is not
+ // correct in general because the directory can grow in
+ // size; attempting to read those new entries will be
+ // futile (EOF will always be the result).
+ return fs.FileMaxOffset, nil
+ default:
+ return current, syserror.EINVAL
+ }
+ }
+
+ // Not a valid seek request.
+ return current, syserror.EINVAL
+}
+
+// FileGenericSeek implements fs.FileOperations.Seek for files that use a
+// generic seek implementation.
+type FileGenericSeek struct{}
+
+// Seek implements fs.FileOperations.Seek.
+func (FileGenericSeek) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
+ return SeekWithDirCursor(ctx, file, whence, offset, nil)
+}
+
+// FileZeroSeek implements fs.FileOperations.Seek for files that maintain a
+// constant zero-value offset and require a no-op Seek.
+type FileZeroSeek struct{}
+
+// Seek implements fs.FileOperations.Seek.
+func (FileZeroSeek) Seek(context.Context, *fs.File, fs.SeekWhence, int64) (int64, error) {
+ return 0, nil
+}
+
+// FileNoSeek implements fs.FileOperations.Seek to return EINVAL.
+type FileNoSeek struct{}
+
+// Seek implements fs.FileOperations.Seek.
+func (FileNoSeek) Seek(context.Context, *fs.File, fs.SeekWhence, int64) (int64, error) {
+ return 0, syserror.EINVAL
+}
+
+// FilePipeSeek implements fs.FileOperations.Seek and can be used for files
+// that behave like pipes (seeking is not supported).
+type FilePipeSeek struct{}
+
+// Seek implements fs.FileOperations.Seek.
+func (FilePipeSeek) Seek(context.Context, *fs.File, fs.SeekWhence, int64) (int64, error) {
+ return 0, syserror.ESPIPE
+}
+
+// FileNotDirReaddir implements fs.FileOperations.Readdir for non-directories.
+type FileNotDirReaddir struct{}
+
+// Readdir implements fs.FileOperations.FileNotDirReaddir.
+func (FileNotDirReaddir) Readdir(context.Context, *fs.File, fs.DentrySerializer) (int64, error) {
+ return 0, syserror.ENOTDIR
+}
+
+// FileNoFsync implements fs.FileOperations.Fsync for files that don't support
+// syncing.
+type FileNoFsync struct{}
+
+// Fsync implements fs.FileOperations.Fsync.
+func (FileNoFsync) Fsync(context.Context, *fs.File, int64, int64, fs.SyncType) error {
+ return syserror.EINVAL
+}
+
+// FileNoopFsync implements fs.FileOperations.Fsync for files that don't need
+// to synced.
+type FileNoopFsync struct{}
+
+// Fsync implements fs.FileOperations.Fsync.
+func (FileNoopFsync) Fsync(context.Context, *fs.File, int64, int64, fs.SyncType) error {
+ return nil
+}
+
+// FileNoopFlush implements fs.FileOperations.Flush as a no-op.
+type FileNoopFlush struct{}
+
+// Flush implements fs.FileOperations.Flush.
+func (FileNoopFlush) Flush(context.Context, *fs.File) error {
+ return nil
+}
+
+// FileNoMMap implements fs.FileOperations.Mappable for files that cannot
+// be memory mapped.
+type FileNoMMap struct{}
+
+// ConfigureMMap implements fs.FileOperations.ConfigureMMap.
+func (FileNoMMap) ConfigureMMap(context.Context, *fs.File, *memmap.MMapOpts) error {
+ return syserror.ENODEV
+}
+
+// GenericConfigureMMap implements fs.FileOperations.ConfigureMMap for most
+// filesystems that support memory mapping.
+func GenericConfigureMMap(file *fs.File, m memmap.Mappable, opts *memmap.MMapOpts) error {
+ opts.Mappable = m
+ opts.MappingIdentity = file
+ file.IncRef()
+ return nil
+}
+
+// FileNoIoctl implements fs.FileOperations.Ioctl for files that don't
+// implement the ioctl syscall.
+type FileNoIoctl struct{}
+
+// Ioctl implements fs.FileOperations.Ioctl.
+func (FileNoIoctl) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ return 0, syserror.ENOTTY
+}
+
+// FileNoSplice implements fs.FileOperations.ReadFrom and
+// fs.FileOperations.WriteTo for files that don't support splice.
+type FileNoSplice struct{}
+
+// WriteTo implements fs.FileOperations.WriteTo.
+func (FileNoSplice) WriteTo(context.Context, *fs.File, *fs.File, fs.SpliceOpts) (int64, error) {
+ return 0, syserror.ENOSYS
+}
+
+// ReadFrom implements fs.FileOperations.ReadFrom.
+func (FileNoSplice) ReadFrom(context.Context, *fs.File, *fs.File, fs.SpliceOpts) (int64, error) {
+ return 0, syserror.ENOSYS
+}
+
+// DirFileOperations implements most of fs.FileOperations for directories,
+// except for Readdir and UnstableAttr which the embedding type must implement.
+type DirFileOperations struct {
+ waiter.AlwaysReady
+ FileGenericSeek
+ FileNoIoctl
+ FileNoMMap
+ FileNoopFlush
+ FileNoopFsync
+ FileNoopRelease
+ FileNoSplice
+}
+
+// Read implements fs.FileOperations.Read
+func (*DirFileOperations) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+ return 0, syserror.EISDIR
+}
+
+// Write implements fs.FileOperations.Write.
+func (*DirFileOperations) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+ return 0, syserror.EISDIR
+}
+
+// StaticDirFileOperations implements fs.FileOperations for directories with
+// static children.
+//
+// +stateify savable
+type StaticDirFileOperations struct {
+ DirFileOperations `state:"nosave"`
+ FileUseInodeUnstableAttr `state:"nosave"`
+
+ // dentryMap is a SortedDentryMap used to implement Readdir.
+ dentryMap *fs.SortedDentryMap
+
+ // dirCursor contains the name of the last directory entry that was
+ // serialized.
+ dirCursor string
+}
+
+// NewStaticDirFileOperations returns a new StaticDirFileOperations that will
+// iterate the given denty map.
+func NewStaticDirFileOperations(dentries *fs.SortedDentryMap) *StaticDirFileOperations {
+ return &StaticDirFileOperations{
+ dentryMap: dentries,
+ }
+}
+
+// IterateDir implements DirIterator.IterateDir.
+func (sdfo *StaticDirFileOperations) IterateDir(ctx context.Context, dirCtx *fs.DirCtx, offset int) (int, error) {
+ n, err := fs.GenericReaddir(dirCtx, sdfo.dentryMap)
+ return offset + n, err
+}
+
+// Readdir implements fs.FileOperations.Readdir.
+func (sdfo *StaticDirFileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
+ root := fs.RootFromContext(ctx)
+ if root != nil {
+ defer root.DecRef()
+ }
+ dirCtx := &fs.DirCtx{
+ Serializer: serializer,
+ DirCursor: &sdfo.dirCursor,
+ }
+ return fs.DirentReaddir(ctx, file.Dirent, sdfo, root, dirCtx, file.Offset())
+}
+
+// NoReadWriteFile is a file that does not support reading or writing.
+//
+// +stateify savable
+type NoReadWriteFile struct {
+ waiter.AlwaysReady `state:"nosave"`
+ FileGenericSeek `state:"nosave"`
+ FileNoIoctl `state:"nosave"`
+ FileNoMMap `state:"nosave"`
+ FileNoopFsync `state:"nosave"`
+ FileNoopFlush `state:"nosave"`
+ FileNoopRelease `state:"nosave"`
+ FileNoRead `state:"nosave"`
+ FileNoWrite `state:"nosave"`
+ FileNotDirReaddir `state:"nosave"`
+ FileUseInodeUnstableAttr `state:"nosave"`
+ FileNoSplice `state:"nosave"`
+}
+
+var _ fs.FileOperations = (*NoReadWriteFile)(nil)
+
+// FileStaticContentReader is a helper to implement fs.FileOperations.Read with
+// static content.
+//
+// +stateify savable
+type FileStaticContentReader struct {
+ // content is immutable.
+ content []byte
+}
+
+// NewFileStaticContentReader initializes a FileStaticContentReader with the
+// given content.
+func NewFileStaticContentReader(b []byte) FileStaticContentReader {
+ return FileStaticContentReader{
+ content: b,
+ }
+}
+
+// Read implements fs.FileOperations.Read.
+func (scr *FileStaticContentReader) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ if offset >= int64(len(scr.content)) {
+ return 0, nil
+ }
+ n, err := dst.CopyOut(ctx, scr.content[offset:])
+ return int64(n), err
+}
+
+// FileNoopWrite implements fs.FileOperations.Write as a noop.
+type FileNoopWrite struct{}
+
+// Write implements fs.FileOperations.Write.
+func (FileNoopWrite) Write(_ context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
+ return src.NumBytes(), nil
+}
+
+// FileNoRead implements fs.FileOperations.Read to return EINVAL.
+type FileNoRead struct{}
+
+// Read implements fs.FileOperations.Read.
+func (FileNoRead) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+ return 0, syserror.EINVAL
+}
+
+// FileNoWrite implements fs.FileOperations.Write to return EINVAL.
+type FileNoWrite struct{}
+
+// Write implements fs.FileOperations.Write.
+func (FileNoWrite) Write(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+ return 0, syserror.EINVAL
+}
+
+// FileNoopRead implement fs.FileOperations.Read as a noop.
+type FileNoopRead struct{}
+
+// Read implements fs.FileOperations.Read.
+func (FileNoopRead) Read(context.Context, *fs.File, usermem.IOSequence, int64) (int64, error) {
+ return 0, nil
+}
+
+// FileUseInodeUnstableAttr implements fs.FileOperations.UnstableAttr by calling
+// InodeOperations.UnstableAttr.
+type FileUseInodeUnstableAttr struct{}
+
+// UnstableAttr implements fs.FileOperations.UnstableAttr.
+func (FileUseInodeUnstableAttr) UnstableAttr(ctx context.Context, file *fs.File) (fs.UnstableAttr, error) {
+ return file.Dirent.Inode.UnstableAttr(ctx)
+}
diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go
new file mode 100644
index 000000000..b5ac6c71c
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/file_range_set.go
@@ -0,0 +1,209 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+ "fmt"
+ "io"
+ "math"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// FileRangeSet maps offsets into a memmap.Mappable to offsets into a
+// platform.File. It is used to implement Mappables that store data in
+// sparsely-allocated memory.
+//
+// type FileRangeSet <generated by go_generics>
+
+// fileRangeSetFunctions implements segment.Functions for FileRangeSet.
+type fileRangeSetFunctions struct{}
+
+// MinKey implements segment.Functions.MinKey.
+func (fileRangeSetFunctions) MinKey() uint64 {
+ return 0
+}
+
+// MaxKey implements segment.Functions.MaxKey.
+func (fileRangeSetFunctions) MaxKey() uint64 {
+ return math.MaxUint64
+}
+
+// ClearValue implements segment.Functions.ClearValue.
+func (fileRangeSetFunctions) ClearValue(_ *uint64) {
+}
+
+// Merge implements segment.Functions.Merge.
+func (fileRangeSetFunctions) Merge(mr1 memmap.MappableRange, frstart1 uint64, _ memmap.MappableRange, frstart2 uint64) (uint64, bool) {
+ if frstart1+mr1.Length() != frstart2 {
+ return 0, false
+ }
+ return frstart1, true
+}
+
+// Split implements segment.Functions.Split.
+func (fileRangeSetFunctions) Split(mr memmap.MappableRange, frstart uint64, split uint64) (uint64, uint64) {
+ return frstart, frstart + (split - mr.Start)
+}
+
+// FileRange returns the FileRange mapped by seg.
+func (seg FileRangeIterator) FileRange() platform.FileRange {
+ return seg.FileRangeOf(seg.Range())
+}
+
+// FileRangeOf returns the FileRange mapped by mr.
+//
+// Preconditions: seg.Range().IsSupersetOf(mr). mr.Length() != 0.
+func (seg FileRangeIterator) FileRangeOf(mr memmap.MappableRange) platform.FileRange {
+ frstart := seg.Value() + (mr.Start - seg.Start())
+ return platform.FileRange{frstart, frstart + mr.Length()}
+}
+
+// Fill attempts to ensure that all memmap.Mappable offsets in required are
+// mapped to a platform.File offset, by allocating from mf with the given
+// memory usage kind and invoking readAt to store data into memory. (If readAt
+// returns a successful partial read, Fill will call it repeatedly until all
+// bytes have been read.) EOF is handled consistently with the requirements of
+// mmap(2): bytes after EOF on the same page are zeroed; pages after EOF are
+// invalid.
+//
+// Fill may read offsets outside of required, but will never read offsets
+// outside of optional. It returns a non-nil error if any error occurs, even
+// if the error only affects offsets in optional, but not in required.
+//
+// Preconditions: required.Length() > 0. optional.IsSupersetOf(required).
+// required and optional must be page-aligned.
+func (frs *FileRangeSet) Fill(ctx context.Context, required, optional memmap.MappableRange, mf *pgalloc.MemoryFile, kind usage.MemoryKind, readAt func(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error)) error {
+ gap := frs.LowerBoundGap(required.Start)
+ for gap.Ok() && gap.Start() < required.End {
+ if gap.Range().Length() == 0 {
+ gap = gap.NextGap()
+ continue
+ }
+ gr := gap.Range().Intersect(optional)
+
+ // Read data into the gap.
+ fr, err := mf.AllocateAndFill(gr.Length(), kind, safemem.ReaderFunc(func(dsts safemem.BlockSeq) (uint64, error) {
+ var done uint64
+ for !dsts.IsEmpty() {
+ n, err := readAt(ctx, dsts, gr.Start+done)
+ done += n
+ dsts = dsts.DropFirst64(n)
+ if err != nil {
+ if err == io.EOF {
+ // MemoryFile.AllocateAndFill truncates down to a page
+ // boundary, but FileRangeSet.Fill is supposed to
+ // zero-fill to the end of the page in this case.
+ donepgaddr, ok := usermem.Addr(done).RoundUp()
+ if donepg := uint64(donepgaddr); ok && donepg != done {
+ dsts.DropFirst64(donepg - done)
+ done = donepg
+ if dsts.IsEmpty() {
+ return done, nil
+ }
+ }
+ }
+ return done, err
+ }
+ }
+ return done, nil
+ }))
+
+ // Store anything we managed to read into the cache.
+ if done := fr.Length(); done != 0 {
+ gr.End = gr.Start + done
+ gap = frs.Insert(gap, gr, fr.Start).NextGap()
+ }
+
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Drop removes segments for memmap.Mappable offsets in mr, freeing the
+// corresponding platform.FileRanges.
+//
+// Preconditions: mr must be page-aligned.
+func (frs *FileRangeSet) Drop(mr memmap.MappableRange, mf *pgalloc.MemoryFile) {
+ seg := frs.LowerBoundSegment(mr.Start)
+ for seg.Ok() && seg.Start() < mr.End {
+ seg = frs.Isolate(seg, mr)
+ mf.DecRef(seg.FileRange())
+ seg = frs.Remove(seg).NextSegment()
+ }
+}
+
+// DropAll removes all segments in mr, freeing the corresponding
+// platform.FileRanges.
+func (frs *FileRangeSet) DropAll(mf *pgalloc.MemoryFile) {
+ for seg := frs.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ mf.DecRef(seg.FileRange())
+ }
+ frs.RemoveAll()
+}
+
+// Truncate updates frs to reflect Mappable truncation to the given length:
+// bytes after the new EOF on the same page are zeroed, and pages after the new
+// EOF are freed.
+func (frs *FileRangeSet) Truncate(end uint64, mf *pgalloc.MemoryFile) {
+ pgendaddr, ok := usermem.Addr(end).RoundUp()
+ if ok {
+ pgend := uint64(pgendaddr)
+
+ // Free truncated pages.
+ frs.SplitAt(pgend)
+ seg := frs.LowerBoundSegment(pgend)
+ for seg.Ok() {
+ mf.DecRef(seg.FileRange())
+ seg = frs.Remove(seg).NextSegment()
+ }
+
+ if end == pgend {
+ return
+ }
+ }
+
+ // Here we know end < end.RoundUp(). If the new EOF lands in the
+ // middle of a page that we have, zero out its contents beyond the new
+ // length.
+ seg := frs.FindSegment(end)
+ if seg.Ok() {
+ fr := seg.FileRange()
+ fr.Start += end - seg.Start()
+ ims, err := mf.MapInternal(fr, usermem.Write)
+ if err != nil {
+ // There's no good recourse from here. This means
+ // that we can't keep cached memory consistent with
+ // the new end of file. The caller may have already
+ // updated the file size on their backing file system.
+ //
+ // We don't want to risk blindly continuing onward,
+ // so in the extremely rare cases this does happen,
+ // we abandon ship.
+ panic(fmt.Sprintf("Failed to map %v: %v", fr, err))
+ }
+ if _, err := safemem.ZeroSeq(ims); err != nil {
+ panic(fmt.Sprintf("Zeroing %v failed: %v", fr, err))
+ }
+ }
+}
diff --git a/pkg/sentry/fs/fsutil/file_range_set_impl.go b/pkg/sentry/fs/fsutil/file_range_set_impl.go
new file mode 100755
index 000000000..a0ab61628
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/file_range_set_impl.go
@@ -0,0 +1,1274 @@
+package fsutil
+
+import (
+ __generics_imported0 "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+)
+
+import (
+ "bytes"
+ "fmt"
+)
+
+const (
+ // minDegree is the minimum degree of an internal node in a Set B-tree.
+ //
+ // - Any non-root node has at least minDegree-1 segments.
+ //
+ // - Any non-root internal (non-leaf) node has at least minDegree children.
+ //
+ // - The root node may have fewer than minDegree-1 segments, but it may
+ // only have 0 segments if the tree is empty.
+ //
+ // Our implementation requires minDegree >= 3. Higher values of minDegree
+ // usually improve performance, but increase memory usage for small sets.
+ FileRangeminDegree = 3
+
+ FileRangemaxDegree = 2 * FileRangeminDegree
+)
+
+// A Set is a mapping of segments with non-overlapping Range keys. The zero
+// value for a Set is an empty set. Set values are not safely movable nor
+// copyable. Set is thread-compatible.
+//
+// +stateify savable
+type FileRangeSet struct {
+ root FileRangenode `state:".(*FileRangeSegmentDataSlices)"`
+}
+
+// IsEmpty returns true if the set contains no segments.
+func (s *FileRangeSet) IsEmpty() bool {
+ return s.root.nrSegments == 0
+}
+
+// IsEmptyRange returns true iff no segments in the set overlap the given
+// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be
+// more efficient.
+func (s *FileRangeSet) IsEmptyRange(r __generics_imported0.MappableRange) bool {
+ switch {
+ case r.Length() < 0:
+ panic(fmt.Sprintf("invalid range %v", r))
+ case r.Length() == 0:
+ return true
+ }
+ _, gap := s.Find(r.Start)
+ if !gap.Ok() {
+ return false
+ }
+ return r.End <= gap.End()
+}
+
+// Span returns the total size of all segments in the set.
+func (s *FileRangeSet) Span() uint64 {
+ var sz uint64
+ for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ sz += seg.Range().Length()
+ }
+ return sz
+}
+
+// SpanRange returns the total size of the intersection of segments in the set
+// with the given range.
+func (s *FileRangeSet) SpanRange(r __generics_imported0.MappableRange) uint64 {
+ switch {
+ case r.Length() < 0:
+ panic(fmt.Sprintf("invalid range %v", r))
+ case r.Length() == 0:
+ return 0
+ }
+ var sz uint64
+ for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() {
+ sz += seg.Range().Intersect(r).Length()
+ }
+ return sz
+}
+
+// FirstSegment returns the first segment in the set. If the set is empty,
+// FirstSegment returns a terminal iterator.
+func (s *FileRangeSet) FirstSegment() FileRangeIterator {
+ if s.root.nrSegments == 0 {
+ return FileRangeIterator{}
+ }
+ return s.root.firstSegment()
+}
+
+// LastSegment returns the last segment in the set. If the set is empty,
+// LastSegment returns a terminal iterator.
+func (s *FileRangeSet) LastSegment() FileRangeIterator {
+ if s.root.nrSegments == 0 {
+ return FileRangeIterator{}
+ }
+ return s.root.lastSegment()
+}
+
+// FirstGap returns the first gap in the set.
+func (s *FileRangeSet) FirstGap() FileRangeGapIterator {
+ n := &s.root
+ for n.hasChildren {
+ n = n.children[0]
+ }
+ return FileRangeGapIterator{n, 0}
+}
+
+// LastGap returns the last gap in the set.
+func (s *FileRangeSet) LastGap() FileRangeGapIterator {
+ n := &s.root
+ for n.hasChildren {
+ n = n.children[n.nrSegments]
+ }
+ return FileRangeGapIterator{n, n.nrSegments}
+}
+
+// Find returns the segment or gap whose range contains the given key. If a
+// segment is found, the returned Iterator is non-terminal and the
+// returned GapIterator is terminal. Otherwise, the returned Iterator is
+// terminal and the returned GapIterator is non-terminal.
+func (s *FileRangeSet) Find(key uint64) (FileRangeIterator, FileRangeGapIterator) {
+ n := &s.root
+ for {
+
+ lower := 0
+ upper := n.nrSegments
+ for lower < upper {
+ i := lower + (upper-lower)/2
+ if r := n.keys[i]; key < r.End {
+ if key >= r.Start {
+ return FileRangeIterator{n, i}, FileRangeGapIterator{}
+ }
+ upper = i
+ } else {
+ lower = i + 1
+ }
+ }
+ i := lower
+ if !n.hasChildren {
+ return FileRangeIterator{}, FileRangeGapIterator{n, i}
+ }
+ n = n.children[i]
+ }
+}
+
+// FindSegment returns the segment whose range contains the given key. If no
+// such segment exists, FindSegment returns a terminal iterator.
+func (s *FileRangeSet) FindSegment(key uint64) FileRangeIterator {
+ seg, _ := s.Find(key)
+ return seg
+}
+
+// LowerBoundSegment returns the segment with the lowest range that contains a
+// key greater than or equal to min. If no such segment exists,
+// LowerBoundSegment returns a terminal iterator.
+func (s *FileRangeSet) LowerBoundSegment(min uint64) FileRangeIterator {
+ seg, gap := s.Find(min)
+ if seg.Ok() {
+ return seg
+ }
+ return gap.NextSegment()
+}
+
+// UpperBoundSegment returns the segment with the highest range that contains a
+// key less than or equal to max. If no such segment exists, UpperBoundSegment
+// returns a terminal iterator.
+func (s *FileRangeSet) UpperBoundSegment(max uint64) FileRangeIterator {
+ seg, gap := s.Find(max)
+ if seg.Ok() {
+ return seg
+ }
+ return gap.PrevSegment()
+}
+
+// FindGap returns the gap containing the given key. If no such gap exists
+// (i.e. the set contains a segment containing that key), FindGap returns a
+// terminal iterator.
+func (s *FileRangeSet) FindGap(key uint64) FileRangeGapIterator {
+ _, gap := s.Find(key)
+ return gap
+}
+
+// LowerBoundGap returns the gap with the lowest range that is greater than or
+// equal to min.
+func (s *FileRangeSet) LowerBoundGap(min uint64) FileRangeGapIterator {
+ seg, gap := s.Find(min)
+ if gap.Ok() {
+ return gap
+ }
+ return seg.NextGap()
+}
+
+// UpperBoundGap returns the gap with the highest range that is less than or
+// equal to max.
+func (s *FileRangeSet) UpperBoundGap(max uint64) FileRangeGapIterator {
+ seg, gap := s.Find(max)
+ if gap.Ok() {
+ return gap
+ }
+ return seg.PrevGap()
+}
+
+// Add inserts the given segment into the set and returns true. If the new
+// segment can be merged with adjacent segments, Add will do so. If the new
+// segment would overlap an existing segment, Add returns false. If Add
+// succeeds, all existing iterators are invalidated.
+func (s *FileRangeSet) Add(r __generics_imported0.MappableRange, val uint64) bool {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ gap := s.FindGap(r.Start)
+ if !gap.Ok() {
+ return false
+ }
+ if r.End > gap.End() {
+ return false
+ }
+ s.Insert(gap, r, val)
+ return true
+}
+
+// AddWithoutMerging inserts the given segment into the set and returns true.
+// If it would overlap an existing segment, AddWithoutMerging does nothing and
+// returns false. If AddWithoutMerging succeeds, all existing iterators are
+// invalidated.
+func (s *FileRangeSet) AddWithoutMerging(r __generics_imported0.MappableRange, val uint64) bool {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ gap := s.FindGap(r.Start)
+ if !gap.Ok() {
+ return false
+ }
+ if r.End > gap.End() {
+ return false
+ }
+ s.InsertWithoutMergingUnchecked(gap, r, val)
+ return true
+}
+
+// Insert inserts the given segment into the given gap. If the new segment can
+// be merged with adjacent segments, Insert will do so. Insert returns an
+// iterator to the segment containing the inserted value (which may have been
+// merged with other values). All existing iterators (including gap, but not
+// including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid, Insert panics.
+//
+// Insert is semantically equivalent to a InsertWithoutMerging followed by a
+// Merge, but may be more efficient. Note that there is no unchecked variant of
+// Insert since Insert must retrieve and inspect gap's predecessor and
+// successor segments regardless.
+func (s *FileRangeSet) Insert(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ prev, next := gap.PrevSegment(), gap.NextSegment()
+ if prev.Ok() && prev.End() > r.Start {
+ panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range()))
+ }
+ if next.Ok() && next.Start() < r.End {
+ panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range()))
+ }
+ if prev.Ok() && prev.End() == r.Start {
+ if mval, ok := (fileRangeSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok {
+ prev.SetEndUnchecked(r.End)
+ prev.SetValue(mval)
+ if next.Ok() && next.Start() == r.End {
+ val = mval
+ if mval, ok := (fileRangeSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok {
+ prev.SetEndUnchecked(next.End())
+ prev.SetValue(mval)
+ return s.Remove(next).PrevSegment()
+ }
+ }
+ return prev
+ }
+ }
+ if next.Ok() && next.Start() == r.End {
+ if mval, ok := (fileRangeSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok {
+ next.SetStartUnchecked(r.Start)
+ next.SetValue(mval)
+ return next
+ }
+ }
+ return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMerging inserts the given segment into the given gap and
+// returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid,
+// InsertWithoutMerging panics.
+func (s *FileRangeSet) InsertWithoutMerging(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ if gr := gap.Range(); !gr.IsSupersetOf(r) {
+ panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr))
+ }
+ return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMergingUnchecked inserts the given segment into the given gap
+// and returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+func (s *FileRangeSet) InsertWithoutMergingUnchecked(gap FileRangeGapIterator, r __generics_imported0.MappableRange, val uint64) FileRangeIterator {
+ gap = gap.node.rebalanceBeforeInsert(gap)
+ copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments])
+ copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments])
+ gap.node.keys[gap.index] = r
+ gap.node.values[gap.index] = val
+ gap.node.nrSegments++
+ return FileRangeIterator{gap.node, gap.index}
+}
+
+// Remove removes the given segment and returns an iterator to the vacated gap.
+// All existing iterators (including seg, but not including the returned
+// iterator) are invalidated.
+func (s *FileRangeSet) Remove(seg FileRangeIterator) FileRangeGapIterator {
+
+ if seg.node.hasChildren {
+
+ victim := seg.PrevSegment()
+
+ seg.SetRangeUnchecked(victim.Range())
+ seg.SetValue(victim.Value())
+ return s.Remove(victim).NextGap()
+ }
+ copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments])
+ copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments])
+ fileRangeSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1])
+ seg.node.nrSegments--
+ return seg.node.rebalanceAfterRemove(FileRangeGapIterator{seg.node, seg.index})
+}
+
+// RemoveAll removes all segments from the set. All existing iterators are
+// invalidated.
+func (s *FileRangeSet) RemoveAll() {
+ s.root = FileRangenode{}
+}
+
+// RemoveRange removes all segments in the given range. An iterator to the
+// newly formed gap is returned, and all existing iterators are invalidated.
+func (s *FileRangeSet) RemoveRange(r __generics_imported0.MappableRange) FileRangeGapIterator {
+ seg, gap := s.Find(r.Start)
+ if seg.Ok() {
+ seg = s.Isolate(seg, r)
+ gap = s.Remove(seg)
+ }
+ for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() {
+ seg = s.Isolate(seg, r)
+ gap = s.Remove(seg)
+ }
+ return gap
+}
+
+// Merge attempts to merge two neighboring segments. If successful, Merge
+// returns an iterator to the merged segment, and all existing iterators are
+// invalidated. Otherwise, Merge returns a terminal iterator.
+//
+// If first is not the predecessor of second, Merge panics.
+func (s *FileRangeSet) Merge(first, second FileRangeIterator) FileRangeIterator {
+ if first.NextSegment() != second {
+ panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range()))
+ }
+ return s.MergeUnchecked(first, second)
+}
+
+// MergeUnchecked attempts to merge two neighboring segments. If successful,
+// MergeUnchecked returns an iterator to the merged segment, and all existing
+// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal
+// iterator.
+//
+// Precondition: first is the predecessor of second: first.NextSegment() ==
+// second, first == second.PrevSegment().
+func (s *FileRangeSet) MergeUnchecked(first, second FileRangeIterator) FileRangeIterator {
+ if first.End() == second.Start() {
+ if mval, ok := (fileRangeSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok {
+
+ first.SetEndUnchecked(second.End())
+ first.SetValue(mval)
+ return s.Remove(second).PrevSegment()
+ }
+ }
+ return FileRangeIterator{}
+}
+
+// MergeAll attempts to merge all adjacent segments in the set. All existing
+// iterators are invalidated.
+func (s *FileRangeSet) MergeAll() {
+ seg := s.FirstSegment()
+ if !seg.Ok() {
+ return
+ }
+ next := seg.NextSegment()
+ for next.Ok() {
+ if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+ seg, next = mseg, mseg.NextSegment()
+ } else {
+ seg, next = next, next.NextSegment()
+ }
+ }
+}
+
+// MergeRange attempts to merge all adjacent segments that contain a key in the
+// specific range. All existing iterators are invalidated.
+func (s *FileRangeSet) MergeRange(r __generics_imported0.MappableRange) {
+ seg := s.LowerBoundSegment(r.Start)
+ if !seg.Ok() {
+ return
+ }
+ next := seg.NextSegment()
+ for next.Ok() && next.Range().Start < r.End {
+ if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+ seg, next = mseg, mseg.NextSegment()
+ } else {
+ seg, next = next, next.NextSegment()
+ }
+ }
+}
+
+// MergeAdjacent attempts to merge the segment containing r.Start with its
+// predecessor, and the segment containing r.End-1 with its successor.
+func (s *FileRangeSet) MergeAdjacent(r __generics_imported0.MappableRange) {
+ first := s.FindSegment(r.Start)
+ if first.Ok() {
+ if prev := first.PrevSegment(); prev.Ok() {
+ s.Merge(prev, first)
+ }
+ }
+ last := s.FindSegment(r.End - 1)
+ if last.Ok() {
+ if next := last.NextSegment(); next.Ok() {
+ s.Merge(last, next)
+ }
+ }
+}
+
+// Split splits the given segment at the given key and returns iterators to the
+// two resulting segments. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+//
+// If the segment cannot be split at split (because split is at the start or
+// end of the segment's range, so splitting would produce a segment with zero
+// length, or because split falls outside the segment's range altogether),
+// Split panics.
+func (s *FileRangeSet) Split(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) {
+ if !seg.Range().CanSplitAt(split) {
+ panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split))
+ }
+ return s.SplitUnchecked(seg, split)
+}
+
+// SplitUnchecked splits the given segment at the given key and returns
+// iterators to the two resulting segments. All existing iterators (including
+// seg, but not including the returned iterators) are invalidated.
+//
+// Preconditions: seg.Start() < key < seg.End().
+func (s *FileRangeSet) SplitUnchecked(seg FileRangeIterator, split uint64) (FileRangeIterator, FileRangeIterator) {
+ val1, val2 := (fileRangeSetFunctions{}).Split(seg.Range(), seg.Value(), split)
+ end2 := seg.End()
+ seg.SetEndUnchecked(split)
+ seg.SetValue(val1)
+ seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.MappableRange{split, end2}, val2)
+
+ return seg2.PrevSegment(), seg2
+}
+
+// SplitAt splits the segment straddling split, if one exists. SplitAt returns
+// true if a segment was split and false otherwise. If SplitAt splits a
+// segment, all existing iterators are invalidated.
+func (s *FileRangeSet) SplitAt(split uint64) bool {
+ if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) {
+ s.SplitUnchecked(seg, split)
+ return true
+ }
+ return false
+}
+
+// Isolate ensures that the given segment's range does not escape r by
+// splitting at r.Start and r.End if necessary, and returns an updated iterator
+// to the bounded segment. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+func (s *FileRangeSet) Isolate(seg FileRangeIterator, r __generics_imported0.MappableRange) FileRangeIterator {
+ if seg.Range().CanSplitAt(r.Start) {
+ _, seg = s.SplitUnchecked(seg, r.Start)
+ }
+ if seg.Range().CanSplitAt(r.End) {
+ seg, _ = s.SplitUnchecked(seg, r.End)
+ }
+ return seg
+}
+
+// ApplyContiguous applies a function to a contiguous range of segments,
+// splitting if necessary. The function is applied until the first gap is
+// encountered, at which point the gap is returned. If the function is applied
+// across the entire range, a terminal gap is returned. All existing iterators
+// are invalidated.
+//
+// N.B. The Iterator must not be invalidated by the function.
+func (s *FileRangeSet) ApplyContiguous(r __generics_imported0.MappableRange, fn func(seg FileRangeIterator)) FileRangeGapIterator {
+ seg, gap := s.Find(r.Start)
+ if !seg.Ok() {
+ return gap
+ }
+ for {
+ seg = s.Isolate(seg, r)
+ fn(seg)
+ if seg.End() >= r.End {
+ return FileRangeGapIterator{}
+ }
+ gap = seg.NextGap()
+ if !gap.IsEmpty() {
+ return gap
+ }
+ seg = gap.NextSegment()
+ if !seg.Ok() {
+
+ return FileRangeGapIterator{}
+ }
+ }
+}
+
+// +stateify savable
+type FileRangenode struct {
+ // An internal binary tree node looks like:
+ //
+ // K
+ // / \
+ // Cl Cr
+ //
+ // where all keys in the subtree rooted by Cl (the left subtree) are less
+ // than K (the key of the parent node), and all keys in the subtree rooted
+ // by Cr (the right subtree) are greater than K.
+ //
+ // An internal B-tree node's indexes work out to look like:
+ //
+ // K0 K1 K2 ... Kn-1
+ // / \/ \/ \ ... / \
+ // C0 C1 C2 C3 ... Cn-1 Cn
+ //
+ // where n is nrSegments.
+ nrSegments int
+
+ // parent is a pointer to this node's parent. If this node is root, parent
+ // is nil.
+ parent *FileRangenode
+
+ // parentIndex is the index of this node in parent.children.
+ parentIndex int
+
+ // Flag for internal nodes that is technically redundant with "children[0]
+ // != nil", but is stored in the first cache line. "hasChildren" rather
+ // than "isLeaf" because false must be the correct value for an empty root.
+ hasChildren bool
+
+ // Nodes store keys and values in separate arrays to maximize locality in
+ // the common case (scanning keys for lookup).
+ keys [FileRangemaxDegree - 1]__generics_imported0.MappableRange
+ values [FileRangemaxDegree - 1]uint64
+ children [FileRangemaxDegree]*FileRangenode
+}
+
+// firstSegment returns the first segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *FileRangenode) firstSegment() FileRangeIterator {
+ for n.hasChildren {
+ n = n.children[0]
+ }
+ return FileRangeIterator{n, 0}
+}
+
+// lastSegment returns the last segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *FileRangenode) lastSegment() FileRangeIterator {
+ for n.hasChildren {
+ n = n.children[n.nrSegments]
+ }
+ return FileRangeIterator{n, n.nrSegments - 1}
+}
+
+func (n *FileRangenode) prevSibling() *FileRangenode {
+ if n.parent == nil || n.parentIndex == 0 {
+ return nil
+ }
+ return n.parent.children[n.parentIndex-1]
+}
+
+func (n *FileRangenode) nextSibling() *FileRangenode {
+ if n.parent == nil || n.parentIndex == n.parent.nrSegments {
+ return nil
+ }
+ return n.parent.children[n.parentIndex+1]
+}
+
+// rebalanceBeforeInsert splits n and its ancestors if they are full, as
+// required for insertion, and returns an updated iterator to the position
+// represented by gap.
+func (n *FileRangenode) rebalanceBeforeInsert(gap FileRangeGapIterator) FileRangeGapIterator {
+ if n.parent != nil {
+ gap = n.parent.rebalanceBeforeInsert(gap)
+ }
+ if n.nrSegments < FileRangemaxDegree-1 {
+ return gap
+ }
+ if n.parent == nil {
+
+ left := &FileRangenode{
+ nrSegments: FileRangeminDegree - 1,
+ parent: n,
+ parentIndex: 0,
+ hasChildren: n.hasChildren,
+ }
+ right := &FileRangenode{
+ nrSegments: FileRangeminDegree - 1,
+ parent: n,
+ parentIndex: 1,
+ hasChildren: n.hasChildren,
+ }
+ copy(left.keys[:FileRangeminDegree-1], n.keys[:FileRangeminDegree-1])
+ copy(left.values[:FileRangeminDegree-1], n.values[:FileRangeminDegree-1])
+ copy(right.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:])
+ copy(right.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:])
+ n.keys[0], n.values[0] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1]
+ FileRangezeroValueSlice(n.values[1:])
+ if n.hasChildren {
+ copy(left.children[:FileRangeminDegree], n.children[:FileRangeminDegree])
+ copy(right.children[:FileRangeminDegree], n.children[FileRangeminDegree:])
+ FileRangezeroNodeSlice(n.children[2:])
+ for i := 0; i < FileRangeminDegree; i++ {
+ left.children[i].parent = left
+ left.children[i].parentIndex = i
+ right.children[i].parent = right
+ right.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments = 1
+ n.hasChildren = true
+ n.children[0] = left
+ n.children[1] = right
+ if gap.node != n {
+ return gap
+ }
+ if gap.index < FileRangeminDegree {
+ return FileRangeGapIterator{left, gap.index}
+ }
+ return FileRangeGapIterator{right, gap.index - FileRangeminDegree}
+ }
+
+ copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments])
+ copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments])
+ n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[FileRangeminDegree-1], n.values[FileRangeminDegree-1]
+ copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1])
+ for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ {
+ n.parent.children[i].parentIndex = i
+ }
+ sibling := &FileRangenode{
+ nrSegments: FileRangeminDegree - 1,
+ parent: n.parent,
+ parentIndex: n.parentIndex + 1,
+ hasChildren: n.hasChildren,
+ }
+ n.parent.children[n.parentIndex+1] = sibling
+ n.parent.nrSegments++
+ copy(sibling.keys[:FileRangeminDegree-1], n.keys[FileRangeminDegree:])
+ copy(sibling.values[:FileRangeminDegree-1], n.values[FileRangeminDegree:])
+ FileRangezeroValueSlice(n.values[FileRangeminDegree-1:])
+ if n.hasChildren {
+ copy(sibling.children[:FileRangeminDegree], n.children[FileRangeminDegree:])
+ FileRangezeroNodeSlice(n.children[FileRangeminDegree:])
+ for i := 0; i < FileRangeminDegree; i++ {
+ sibling.children[i].parent = sibling
+ sibling.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments = FileRangeminDegree - 1
+
+ if gap.node != n {
+ return gap
+ }
+ if gap.index < FileRangeminDegree {
+ return gap
+ }
+ return FileRangeGapIterator{sibling, gap.index - FileRangeminDegree}
+}
+
+// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient
+// (contain fewer segments than required by B-tree invariants), as required for
+// removal, and returns an updated iterator to the position represented by gap.
+//
+// Precondition: n is the only node in the tree that may currently violate a
+// B-tree invariant.
+func (n *FileRangenode) rebalanceAfterRemove(gap FileRangeGapIterator) FileRangeGapIterator {
+ for {
+ if n.nrSegments >= FileRangeminDegree-1 {
+ return gap
+ }
+ if n.parent == nil {
+
+ return gap
+ }
+
+ if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree {
+ copy(n.keys[1:], n.keys[:n.nrSegments])
+ copy(n.values[1:], n.values[:n.nrSegments])
+ n.keys[0] = n.parent.keys[n.parentIndex-1]
+ n.values[0] = n.parent.values[n.parentIndex-1]
+ n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1]
+ n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1]
+ fileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+ if n.hasChildren {
+ copy(n.children[1:], n.children[:n.nrSegments+1])
+ n.children[0] = sibling.children[sibling.nrSegments]
+ sibling.children[sibling.nrSegments] = nil
+ n.children[0].parent = n
+ n.children[0].parentIndex = 0
+ for i := 1; i < n.nrSegments+2; i++ {
+ n.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments++
+ sibling.nrSegments--
+ if gap.node == sibling && gap.index == sibling.nrSegments {
+ return FileRangeGapIterator{n, 0}
+ }
+ if gap.node == n {
+ return FileRangeGapIterator{n, gap.index + 1}
+ }
+ return gap
+ }
+ if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= FileRangeminDegree {
+ n.keys[n.nrSegments] = n.parent.keys[n.parentIndex]
+ n.values[n.nrSegments] = n.parent.values[n.parentIndex]
+ n.parent.keys[n.parentIndex] = sibling.keys[0]
+ n.parent.values[n.parentIndex] = sibling.values[0]
+ copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:])
+ copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:])
+ fileRangeSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+ if n.hasChildren {
+ n.children[n.nrSegments+1] = sibling.children[0]
+ copy(sibling.children[:sibling.nrSegments], sibling.children[1:])
+ sibling.children[sibling.nrSegments] = nil
+ n.children[n.nrSegments+1].parent = n
+ n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1
+ for i := 0; i < sibling.nrSegments; i++ {
+ sibling.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments++
+ sibling.nrSegments--
+ if gap.node == sibling {
+ if gap.index == 0 {
+ return FileRangeGapIterator{n, n.nrSegments}
+ }
+ return FileRangeGapIterator{sibling, gap.index - 1}
+ }
+ return gap
+ }
+
+ p := n.parent
+ if p.nrSegments == 1 {
+
+ left, right := p.children[0], p.children[1]
+ p.nrSegments = left.nrSegments + right.nrSegments + 1
+ p.hasChildren = left.hasChildren
+ p.keys[left.nrSegments] = p.keys[0]
+ p.values[left.nrSegments] = p.values[0]
+ copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments])
+ copy(p.values[:left.nrSegments], left.values[:left.nrSegments])
+ copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+ copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments])
+ if left.hasChildren {
+ copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1])
+ copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+ for i := 0; i < p.nrSegments+1; i++ {
+ p.children[i].parent = p
+ p.children[i].parentIndex = i
+ }
+ } else {
+ p.children[0] = nil
+ p.children[1] = nil
+ }
+ if gap.node == left {
+ return FileRangeGapIterator{p, gap.index}
+ }
+ if gap.node == right {
+ return FileRangeGapIterator{p, gap.index + left.nrSegments + 1}
+ }
+ return gap
+ }
+ // Merge n and either sibling, along with the segment separating the
+ // two, into whichever of the two nodes comes first. This is the
+ // reverse of the non-root splitting case in
+ // node.rebalanceBeforeInsert.
+ var left, right *FileRangenode
+ if n.parentIndex > 0 {
+ left = n.prevSibling()
+ right = n
+ } else {
+ left = n
+ right = n.nextSibling()
+ }
+
+ if gap.node == right {
+ gap = FileRangeGapIterator{left, gap.index + left.nrSegments + 1}
+ }
+ left.keys[left.nrSegments] = p.keys[left.parentIndex]
+ left.values[left.nrSegments] = p.values[left.parentIndex]
+ copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+ copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments])
+ if left.hasChildren {
+ copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+ for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ {
+ left.children[i].parent = left
+ left.children[i].parentIndex = i
+ }
+ }
+ left.nrSegments += right.nrSegments + 1
+ copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments])
+ copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments])
+ fileRangeSetFunctions{}.ClearValue(&p.values[p.nrSegments-1])
+ copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1])
+ for i := 0; i < p.nrSegments; i++ {
+ p.children[i].parentIndex = i
+ }
+ p.children[p.nrSegments] = nil
+ p.nrSegments--
+
+ n = p
+ }
+}
+
+// A Iterator is conceptually one of:
+//
+// - A pointer to a segment in a set; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Iterators are copyable values and are meaningfully equality-comparable. The
+// zero value of Iterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type FileRangeIterator struct {
+ // node is the node containing the iterated segment. If the iterator is
+ // terminal, node is nil.
+ node *FileRangenode
+
+ // index is the index of the segment in node.keys/values.
+ index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (seg FileRangeIterator) Ok() bool {
+ return seg.node != nil
+}
+
+// Range returns the iterated segment's range key.
+func (seg FileRangeIterator) Range() __generics_imported0.MappableRange {
+ return seg.node.keys[seg.index]
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (seg FileRangeIterator) Start() uint64 {
+ return seg.node.keys[seg.index].Start
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (seg FileRangeIterator) End() uint64 {
+ return seg.node.keys[seg.index].End
+}
+
+// SetRangeUnchecked mutates the iterated segment's range key. This operation
+// does not invalidate any iterators.
+//
+// Preconditions:
+//
+// - r.Length() > 0.
+//
+// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
+// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
+// r.start >= seg.PrevSegment().End().
+func (seg FileRangeIterator) SetRangeUnchecked(r __generics_imported0.MappableRange) {
+ seg.node.keys[seg.index] = r
+}
+
+// SetRange mutates the iterated segment's range key. If the new range would
+// cause the iterated segment to overlap another segment, or if the new range
+// is invalid, SetRange panics. This operation does not invalidate any
+// iterators.
+func (seg FileRangeIterator) SetRange(r __generics_imported0.MappableRange) {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() {
+ panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range()))
+ }
+ if next := seg.NextSegment(); next.Ok() && r.End > next.Start() {
+ panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range()))
+ }
+ seg.SetRangeUnchecked(r)
+}
+
+// SetStartUnchecked mutates the iterated segment's start. This operation does
+// not invalidate any iterators.
+//
+// Preconditions: The new start must be valid: start < seg.End(); if
+// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+func (seg FileRangeIterator) SetStartUnchecked(start uint64) {
+ seg.node.keys[seg.index].Start = start
+}
+
+// SetStart mutates the iterated segment's start. If the new start value would
+// cause the iterated segment to overlap another segment, or would result in an
+// invalid range, SetStart panics. This operation does not invalidate any
+// iterators.
+func (seg FileRangeIterator) SetStart(start uint64) {
+ if start >= seg.End() {
+ panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range()))
+ }
+ if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() {
+ panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range()))
+ }
+ seg.SetStartUnchecked(start)
+}
+
+// SetEndUnchecked mutates the iterated segment's end. This operation does not
+// invalidate any iterators.
+//
+// Preconditions: The new end must be valid: end > seg.Start(); if
+// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+func (seg FileRangeIterator) SetEndUnchecked(end uint64) {
+ seg.node.keys[seg.index].End = end
+}
+
+// SetEnd mutates the iterated segment's end. If the new end value would cause
+// the iterated segment to overlap another segment, or would result in an
+// invalid range, SetEnd panics. This operation does not invalidate any
+// iterators.
+func (seg FileRangeIterator) SetEnd(end uint64) {
+ if end <= seg.Start() {
+ panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range()))
+ }
+ if next := seg.NextSegment(); next.Ok() && end > next.Start() {
+ panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range()))
+ }
+ seg.SetEndUnchecked(end)
+}
+
+// Value returns a copy of the iterated segment's value.
+func (seg FileRangeIterator) Value() uint64 {
+ return seg.node.values[seg.index]
+}
+
+// ValuePtr returns a pointer to the iterated segment's value. The pointer is
+// invalidated if the iterator is invalidated. This operation does not
+// invalidate any iterators.
+func (seg FileRangeIterator) ValuePtr() *uint64 {
+ return &seg.node.values[seg.index]
+}
+
+// SetValue mutates the iterated segment's value. This operation does not
+// invalidate any iterators.
+func (seg FileRangeIterator) SetValue(val uint64) {
+ seg.node.values[seg.index] = val
+}
+
+// PrevSegment returns the iterated segment's predecessor. If there is no
+// preceding segment, PrevSegment returns a terminal iterator.
+func (seg FileRangeIterator) PrevSegment() FileRangeIterator {
+ if seg.node.hasChildren {
+ return seg.node.children[seg.index].lastSegment()
+ }
+ if seg.index > 0 {
+ return FileRangeIterator{seg.node, seg.index - 1}
+ }
+ if seg.node.parent == nil {
+ return FileRangeIterator{}
+ }
+ return FileRangesegmentBeforePosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// NextSegment returns the iterated segment's successor. If there is no
+// succeeding segment, NextSegment returns a terminal iterator.
+func (seg FileRangeIterator) NextSegment() FileRangeIterator {
+ if seg.node.hasChildren {
+ return seg.node.children[seg.index+1].firstSegment()
+ }
+ if seg.index < seg.node.nrSegments-1 {
+ return FileRangeIterator{seg.node, seg.index + 1}
+ }
+ if seg.node.parent == nil {
+ return FileRangeIterator{}
+ }
+ return FileRangesegmentAfterPosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// PrevGap returns the gap immediately before the iterated segment.
+func (seg FileRangeIterator) PrevGap() FileRangeGapIterator {
+ if seg.node.hasChildren {
+
+ return seg.node.children[seg.index].lastSegment().NextGap()
+ }
+ return FileRangeGapIterator{seg.node, seg.index}
+}
+
+// NextGap returns the gap immediately after the iterated segment.
+func (seg FileRangeIterator) NextGap() FileRangeGapIterator {
+ if seg.node.hasChildren {
+ return seg.node.children[seg.index+1].firstSegment().PrevGap()
+ }
+ return FileRangeGapIterator{seg.node, seg.index + 1}
+}
+
+// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent,
+// or the gap before the iterated segment otherwise. If seg.Start() ==
+// Functions.MinKey(), PrevNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be
+// non-terminal.
+func (seg FileRangeIterator) PrevNonEmpty() (FileRangeIterator, FileRangeGapIterator) {
+ gap := seg.PrevGap()
+ if gap.Range().Length() != 0 {
+ return FileRangeIterator{}, gap
+ }
+ return gap.PrevSegment(), FileRangeGapIterator{}
+}
+
+// NextNonEmpty returns the iterated segment's successor if it is adjacent, or
+// the gap after the iterated segment otherwise. If seg.End() ==
+// Functions.MaxKey(), NextNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by NextNonEmpty will be
+// non-terminal.
+func (seg FileRangeIterator) NextNonEmpty() (FileRangeIterator, FileRangeGapIterator) {
+ gap := seg.NextGap()
+ if gap.Range().Length() != 0 {
+ return FileRangeIterator{}, gap
+ }
+ return gap.NextSegment(), FileRangeGapIterator{}
+}
+
+// A GapIterator is conceptually one of:
+//
+// - A pointer to a position between two segments, before the first segment, or
+// after the last segment in a set, called a *gap*; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Note that the gap between two adjacent segments exists (iterators to it are
+// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true
+// for such gaps. An empty set contains a single gap, spanning the entire range
+// of the set's keys.
+//
+// GapIterators are copyable values and are meaningfully equality-comparable.
+// The zero value of GapIterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type FileRangeGapIterator struct {
+ // The representation of a GapIterator is identical to that of an Iterator,
+ // except that index corresponds to positions between segments in the same
+ // way as for node.children (see comment for node.nrSegments).
+ node *FileRangenode
+ index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (gap FileRangeGapIterator) Ok() bool {
+ return gap.node != nil
+}
+
+// Range returns the range spanned by the iterated gap.
+func (gap FileRangeGapIterator) Range() __generics_imported0.MappableRange {
+ return __generics_imported0.MappableRange{gap.Start(), gap.End()}
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (gap FileRangeGapIterator) Start() uint64 {
+ if ps := gap.PrevSegment(); ps.Ok() {
+ return ps.End()
+ }
+ return fileRangeSetFunctions{}.MinKey()
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (gap FileRangeGapIterator) End() uint64 {
+ if ns := gap.NextSegment(); ns.Ok() {
+ return ns.Start()
+ }
+ return fileRangeSetFunctions{}.MaxKey()
+}
+
+// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is
+// between two adjacent segments.)
+func (gap FileRangeGapIterator) IsEmpty() bool {
+ return gap.Range().Length() == 0
+}
+
+// PrevSegment returns the segment immediately before the iterated gap. If no
+// such segment exists, PrevSegment returns a terminal iterator.
+func (gap FileRangeGapIterator) PrevSegment() FileRangeIterator {
+ return FileRangesegmentBeforePosition(gap.node, gap.index)
+}
+
+// NextSegment returns the segment immediately after the iterated gap. If no
+// such segment exists, NextSegment returns a terminal iterator.
+func (gap FileRangeGapIterator) NextSegment() FileRangeIterator {
+ return FileRangesegmentAfterPosition(gap.node, gap.index)
+}
+
+// PrevGap returns the iterated gap's predecessor. If no such gap exists,
+// PrevGap returns a terminal iterator.
+func (gap FileRangeGapIterator) PrevGap() FileRangeGapIterator {
+ seg := gap.PrevSegment()
+ if !seg.Ok() {
+ return FileRangeGapIterator{}
+ }
+ return seg.PrevGap()
+}
+
+// NextGap returns the iterated gap's successor. If no such gap exists, NextGap
+// returns a terminal iterator.
+func (gap FileRangeGapIterator) NextGap() FileRangeGapIterator {
+ seg := gap.NextSegment()
+ if !seg.Ok() {
+ return FileRangeGapIterator{}
+ }
+ return seg.NextGap()
+}
+
+// segmentBeforePosition returns the predecessor segment of the position given
+// by n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentBeforePosition returns a terminal iterator.
+func FileRangesegmentBeforePosition(n *FileRangenode, i int) FileRangeIterator {
+ for i == 0 {
+ if n.parent == nil {
+ return FileRangeIterator{}
+ }
+ n, i = n.parent, n.parentIndex
+ }
+ return FileRangeIterator{n, i - 1}
+}
+
+// segmentAfterPosition returns the successor segment of the position given by
+// n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentAfterPosition returns a terminal iterator.
+func FileRangesegmentAfterPosition(n *FileRangenode, i int) FileRangeIterator {
+ for i == n.nrSegments {
+ if n.parent == nil {
+ return FileRangeIterator{}
+ }
+ n, i = n.parent, n.parentIndex
+ }
+ return FileRangeIterator{n, i}
+}
+
+func FileRangezeroValueSlice(slice []uint64) {
+
+ for i := range slice {
+ fileRangeSetFunctions{}.ClearValue(&slice[i])
+ }
+}
+
+func FileRangezeroNodeSlice(slice []*FileRangenode) {
+ for i := range slice {
+ slice[i] = nil
+ }
+}
+
+// String stringifies a Set for debugging.
+func (s *FileRangeSet) String() string {
+ return s.root.String()
+}
+
+// String stringifes a node (and all of its children) for debugging.
+func (n *FileRangenode) String() string {
+ var buf bytes.Buffer
+ n.writeDebugString(&buf, "")
+ return buf.String()
+}
+
+func (n *FileRangenode) writeDebugString(buf *bytes.Buffer, prefix string) {
+ if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) {
+ buf.WriteString(prefix)
+ buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren))
+ }
+ for i := 0; i < n.nrSegments; i++ {
+ if child := n.children[i]; child != nil {
+ cprefix := fmt.Sprintf("%s- % 3d ", prefix, i)
+ if child.parent != n || child.parentIndex != i {
+ buf.WriteString(cprefix)
+ buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i))
+ }
+ child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i))
+ }
+ buf.WriteString(prefix)
+ buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i]))
+ }
+ if child := n.children[n.nrSegments]; child != nil {
+ child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments))
+ }
+}
+
+// SegmentDataSlices represents segments from a set as slices of start, end, and
+// values. SegmentDataSlices is primarily used as an intermediate representation
+// for save/restore and the layout here is optimized for that.
+//
+// +stateify savable
+type FileRangeSegmentDataSlices struct {
+ Start []uint64
+ End []uint64
+ Values []uint64
+}
+
+// ExportSortedSlice returns a copy of all segments in the given set, in ascending
+// key order.
+func (s *FileRangeSet) ExportSortedSlices() *FileRangeSegmentDataSlices {
+ var sds FileRangeSegmentDataSlices
+ for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ sds.Start = append(sds.Start, seg.Start())
+ sds.End = append(sds.End, seg.End())
+ sds.Values = append(sds.Values, seg.Value())
+ }
+ sds.Start = sds.Start[:len(sds.Start):len(sds.Start)]
+ sds.End = sds.End[:len(sds.End):len(sds.End)]
+ sds.Values = sds.Values[:len(sds.Values):len(sds.Values)]
+ return &sds
+}
+
+// ImportSortedSlice initializes the given set from the given slice.
+//
+// Preconditions: s must be empty. sds must represent a valid set (the segments
+// in sds must have valid lengths that do not overlap). The segments in sds
+// must be sorted in ascending key order.
+func (s *FileRangeSet) ImportSortedSlices(sds *FileRangeSegmentDataSlices) error {
+ if !s.IsEmpty() {
+ return fmt.Errorf("cannot import into non-empty set %v", s)
+ }
+ gap := s.FirstGap()
+ for i := range sds.Start {
+ r := __generics_imported0.MappableRange{sds.Start[i], sds.End[i]}
+ if !gap.Range().IsSupersetOf(r) {
+ return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i])
+ }
+ gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap()
+ }
+ return nil
+}
+func (s *FileRangeSet) saveRoot() *FileRangeSegmentDataSlices {
+ return s.ExportSortedSlices()
+}
+
+func (s *FileRangeSet) loadRoot(sds *FileRangeSegmentDataSlices) {
+ if err := s.ImportSortedSlices(sds); err != nil {
+ panic(err)
+ }
+}
diff --git a/pkg/sentry/fs/fsutil/frame_ref_set.go b/pkg/sentry/fs/fsutil/frame_ref_set.go
new file mode 100644
index 000000000..6565c28c8
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/frame_ref_set.go
@@ -0,0 +1,50 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+ "math"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+)
+
+type frameRefSetFunctions struct{}
+
+// MinKey implements segment.Functions.MinKey.
+func (frameRefSetFunctions) MinKey() uint64 {
+ return 0
+}
+
+// MaxKey implements segment.Functions.MaxKey.
+func (frameRefSetFunctions) MaxKey() uint64 {
+ return math.MaxUint64
+}
+
+// ClearValue implements segment.Functions.ClearValue.
+func (frameRefSetFunctions) ClearValue(val *uint64) {
+}
+
+// Merge implements segment.Functions.Merge.
+func (frameRefSetFunctions) Merge(_ platform.FileRange, val1 uint64, _ platform.FileRange, val2 uint64) (uint64, bool) {
+ if val1 != val2 {
+ return 0, false
+ }
+ return val1, true
+}
+
+// Split implements segment.Functions.Split.
+func (frameRefSetFunctions) Split(_ platform.FileRange, val uint64, _ uint64) (uint64, uint64) {
+ return val, val
+}
diff --git a/pkg/sentry/fs/fsutil/frame_ref_set_impl.go b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go
new file mode 100755
index 000000000..2f858f419
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/frame_ref_set_impl.go
@@ -0,0 +1,1274 @@
+package fsutil
+
+import (
+ __generics_imported0 "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+)
+
+import (
+ "bytes"
+ "fmt"
+)
+
+const (
+ // minDegree is the minimum degree of an internal node in a Set B-tree.
+ //
+ // - Any non-root node has at least minDegree-1 segments.
+ //
+ // - Any non-root internal (non-leaf) node has at least minDegree children.
+ //
+ // - The root node may have fewer than minDegree-1 segments, but it may
+ // only have 0 segments if the tree is empty.
+ //
+ // Our implementation requires minDegree >= 3. Higher values of minDegree
+ // usually improve performance, but increase memory usage for small sets.
+ frameRefminDegree = 3
+
+ frameRefmaxDegree = 2 * frameRefminDegree
+)
+
+// A Set is a mapping of segments with non-overlapping Range keys. The zero
+// value for a Set is an empty set. Set values are not safely movable nor
+// copyable. Set is thread-compatible.
+//
+// +stateify savable
+type frameRefSet struct {
+ root frameRefnode `state:".(*frameRefSegmentDataSlices)"`
+}
+
+// IsEmpty returns true if the set contains no segments.
+func (s *frameRefSet) IsEmpty() bool {
+ return s.root.nrSegments == 0
+}
+
+// IsEmptyRange returns true iff no segments in the set overlap the given
+// range. This is semantically equivalent to s.SpanRange(r) == 0, but may be
+// more efficient.
+func (s *frameRefSet) IsEmptyRange(r __generics_imported0.FileRange) bool {
+ switch {
+ case r.Length() < 0:
+ panic(fmt.Sprintf("invalid range %v", r))
+ case r.Length() == 0:
+ return true
+ }
+ _, gap := s.Find(r.Start)
+ if !gap.Ok() {
+ return false
+ }
+ return r.End <= gap.End()
+}
+
+// Span returns the total size of all segments in the set.
+func (s *frameRefSet) Span() uint64 {
+ var sz uint64
+ for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ sz += seg.Range().Length()
+ }
+ return sz
+}
+
+// SpanRange returns the total size of the intersection of segments in the set
+// with the given range.
+func (s *frameRefSet) SpanRange(r __generics_imported0.FileRange) uint64 {
+ switch {
+ case r.Length() < 0:
+ panic(fmt.Sprintf("invalid range %v", r))
+ case r.Length() == 0:
+ return 0
+ }
+ var sz uint64
+ for seg := s.LowerBoundSegment(r.Start); seg.Ok() && seg.Start() < r.End; seg = seg.NextSegment() {
+ sz += seg.Range().Intersect(r).Length()
+ }
+ return sz
+}
+
+// FirstSegment returns the first segment in the set. If the set is empty,
+// FirstSegment returns a terminal iterator.
+func (s *frameRefSet) FirstSegment() frameRefIterator {
+ if s.root.nrSegments == 0 {
+ return frameRefIterator{}
+ }
+ return s.root.firstSegment()
+}
+
+// LastSegment returns the last segment in the set. If the set is empty,
+// LastSegment returns a terminal iterator.
+func (s *frameRefSet) LastSegment() frameRefIterator {
+ if s.root.nrSegments == 0 {
+ return frameRefIterator{}
+ }
+ return s.root.lastSegment()
+}
+
+// FirstGap returns the first gap in the set.
+func (s *frameRefSet) FirstGap() frameRefGapIterator {
+ n := &s.root
+ for n.hasChildren {
+ n = n.children[0]
+ }
+ return frameRefGapIterator{n, 0}
+}
+
+// LastGap returns the last gap in the set.
+func (s *frameRefSet) LastGap() frameRefGapIterator {
+ n := &s.root
+ for n.hasChildren {
+ n = n.children[n.nrSegments]
+ }
+ return frameRefGapIterator{n, n.nrSegments}
+}
+
+// Find returns the segment or gap whose range contains the given key. If a
+// segment is found, the returned Iterator is non-terminal and the
+// returned GapIterator is terminal. Otherwise, the returned Iterator is
+// terminal and the returned GapIterator is non-terminal.
+func (s *frameRefSet) Find(key uint64) (frameRefIterator, frameRefGapIterator) {
+ n := &s.root
+ for {
+
+ lower := 0
+ upper := n.nrSegments
+ for lower < upper {
+ i := lower + (upper-lower)/2
+ if r := n.keys[i]; key < r.End {
+ if key >= r.Start {
+ return frameRefIterator{n, i}, frameRefGapIterator{}
+ }
+ upper = i
+ } else {
+ lower = i + 1
+ }
+ }
+ i := lower
+ if !n.hasChildren {
+ return frameRefIterator{}, frameRefGapIterator{n, i}
+ }
+ n = n.children[i]
+ }
+}
+
+// FindSegment returns the segment whose range contains the given key. If no
+// such segment exists, FindSegment returns a terminal iterator.
+func (s *frameRefSet) FindSegment(key uint64) frameRefIterator {
+ seg, _ := s.Find(key)
+ return seg
+}
+
+// LowerBoundSegment returns the segment with the lowest range that contains a
+// key greater than or equal to min. If no such segment exists,
+// LowerBoundSegment returns a terminal iterator.
+func (s *frameRefSet) LowerBoundSegment(min uint64) frameRefIterator {
+ seg, gap := s.Find(min)
+ if seg.Ok() {
+ return seg
+ }
+ return gap.NextSegment()
+}
+
+// UpperBoundSegment returns the segment with the highest range that contains a
+// key less than or equal to max. If no such segment exists, UpperBoundSegment
+// returns a terminal iterator.
+func (s *frameRefSet) UpperBoundSegment(max uint64) frameRefIterator {
+ seg, gap := s.Find(max)
+ if seg.Ok() {
+ return seg
+ }
+ return gap.PrevSegment()
+}
+
+// FindGap returns the gap containing the given key. If no such gap exists
+// (i.e. the set contains a segment containing that key), FindGap returns a
+// terminal iterator.
+func (s *frameRefSet) FindGap(key uint64) frameRefGapIterator {
+ _, gap := s.Find(key)
+ return gap
+}
+
+// LowerBoundGap returns the gap with the lowest range that is greater than or
+// equal to min.
+func (s *frameRefSet) LowerBoundGap(min uint64) frameRefGapIterator {
+ seg, gap := s.Find(min)
+ if gap.Ok() {
+ return gap
+ }
+ return seg.NextGap()
+}
+
+// UpperBoundGap returns the gap with the highest range that is less than or
+// equal to max.
+func (s *frameRefSet) UpperBoundGap(max uint64) frameRefGapIterator {
+ seg, gap := s.Find(max)
+ if gap.Ok() {
+ return gap
+ }
+ return seg.PrevGap()
+}
+
+// Add inserts the given segment into the set and returns true. If the new
+// segment can be merged with adjacent segments, Add will do so. If the new
+// segment would overlap an existing segment, Add returns false. If Add
+// succeeds, all existing iterators are invalidated.
+func (s *frameRefSet) Add(r __generics_imported0.FileRange, val uint64) bool {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ gap := s.FindGap(r.Start)
+ if !gap.Ok() {
+ return false
+ }
+ if r.End > gap.End() {
+ return false
+ }
+ s.Insert(gap, r, val)
+ return true
+}
+
+// AddWithoutMerging inserts the given segment into the set and returns true.
+// If it would overlap an existing segment, AddWithoutMerging does nothing and
+// returns false. If AddWithoutMerging succeeds, all existing iterators are
+// invalidated.
+func (s *frameRefSet) AddWithoutMerging(r __generics_imported0.FileRange, val uint64) bool {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ gap := s.FindGap(r.Start)
+ if !gap.Ok() {
+ return false
+ }
+ if r.End > gap.End() {
+ return false
+ }
+ s.InsertWithoutMergingUnchecked(gap, r, val)
+ return true
+}
+
+// Insert inserts the given segment into the given gap. If the new segment can
+// be merged with adjacent segments, Insert will do so. Insert returns an
+// iterator to the segment containing the inserted value (which may have been
+// merged with other values). All existing iterators (including gap, but not
+// including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid, Insert panics.
+//
+// Insert is semantically equivalent to a InsertWithoutMerging followed by a
+// Merge, but may be more efficient. Note that there is no unchecked variant of
+// Insert since Insert must retrieve and inspect gap's predecessor and
+// successor segments regardless.
+func (s *frameRefSet) Insert(gap frameRefGapIterator, r __generics_imported0.FileRange, val uint64) frameRefIterator {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ prev, next := gap.PrevSegment(), gap.NextSegment()
+ if prev.Ok() && prev.End() > r.Start {
+ panic(fmt.Sprintf("new segment %v overlaps predecessor %v", r, prev.Range()))
+ }
+ if next.Ok() && next.Start() < r.End {
+ panic(fmt.Sprintf("new segment %v overlaps successor %v", r, next.Range()))
+ }
+ if prev.Ok() && prev.End() == r.Start {
+ if mval, ok := (frameRefSetFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok {
+ prev.SetEndUnchecked(r.End)
+ prev.SetValue(mval)
+ if next.Ok() && next.Start() == r.End {
+ val = mval
+ if mval, ok := (frameRefSetFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok {
+ prev.SetEndUnchecked(next.End())
+ prev.SetValue(mval)
+ return s.Remove(next).PrevSegment()
+ }
+ }
+ return prev
+ }
+ }
+ if next.Ok() && next.Start() == r.End {
+ if mval, ok := (frameRefSetFunctions{}).Merge(r, val, next.Range(), next.Value()); ok {
+ next.SetStartUnchecked(r.Start)
+ next.SetValue(mval)
+ return next
+ }
+ }
+ return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMerging inserts the given segment into the given gap and
+// returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// If the gap cannot accommodate the segment, or if r is invalid,
+// InsertWithoutMerging panics.
+func (s *frameRefSet) InsertWithoutMerging(gap frameRefGapIterator, r __generics_imported0.FileRange, val uint64) frameRefIterator {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ if gr := gap.Range(); !gr.IsSupersetOf(r) {
+ panic(fmt.Sprintf("cannot insert segment range %v into gap range %v", r, gr))
+ }
+ return s.InsertWithoutMergingUnchecked(gap, r, val)
+}
+
+// InsertWithoutMergingUnchecked inserts the given segment into the given gap
+// and returns an iterator to the inserted segment. All existing iterators
+// (including gap, but not including the returned iterator) are invalidated.
+//
+// Preconditions: r.Start >= gap.Start(); r.End <= gap.End().
+func (s *frameRefSet) InsertWithoutMergingUnchecked(gap frameRefGapIterator, r __generics_imported0.FileRange, val uint64) frameRefIterator {
+ gap = gap.node.rebalanceBeforeInsert(gap)
+ copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments])
+ copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments])
+ gap.node.keys[gap.index] = r
+ gap.node.values[gap.index] = val
+ gap.node.nrSegments++
+ return frameRefIterator{gap.node, gap.index}
+}
+
+// Remove removes the given segment and returns an iterator to the vacated gap.
+// All existing iterators (including seg, but not including the returned
+// iterator) are invalidated.
+func (s *frameRefSet) Remove(seg frameRefIterator) frameRefGapIterator {
+
+ if seg.node.hasChildren {
+
+ victim := seg.PrevSegment()
+
+ seg.SetRangeUnchecked(victim.Range())
+ seg.SetValue(victim.Value())
+ return s.Remove(victim).NextGap()
+ }
+ copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments])
+ copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments])
+ frameRefSetFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1])
+ seg.node.nrSegments--
+ return seg.node.rebalanceAfterRemove(frameRefGapIterator{seg.node, seg.index})
+}
+
+// RemoveAll removes all segments from the set. All existing iterators are
+// invalidated.
+func (s *frameRefSet) RemoveAll() {
+ s.root = frameRefnode{}
+}
+
+// RemoveRange removes all segments in the given range. An iterator to the
+// newly formed gap is returned, and all existing iterators are invalidated.
+func (s *frameRefSet) RemoveRange(r __generics_imported0.FileRange) frameRefGapIterator {
+ seg, gap := s.Find(r.Start)
+ if seg.Ok() {
+ seg = s.Isolate(seg, r)
+ gap = s.Remove(seg)
+ }
+ for seg = gap.NextSegment(); seg.Ok() && seg.Start() < r.End; seg = gap.NextSegment() {
+ seg = s.Isolate(seg, r)
+ gap = s.Remove(seg)
+ }
+ return gap
+}
+
+// Merge attempts to merge two neighboring segments. If successful, Merge
+// returns an iterator to the merged segment, and all existing iterators are
+// invalidated. Otherwise, Merge returns a terminal iterator.
+//
+// If first is not the predecessor of second, Merge panics.
+func (s *frameRefSet) Merge(first, second frameRefIterator) frameRefIterator {
+ if first.NextSegment() != second {
+ panic(fmt.Sprintf("attempt to merge non-neighboring segments %v, %v", first.Range(), second.Range()))
+ }
+ return s.MergeUnchecked(first, second)
+}
+
+// MergeUnchecked attempts to merge two neighboring segments. If successful,
+// MergeUnchecked returns an iterator to the merged segment, and all existing
+// iterators are invalidated. Otherwise, MergeUnchecked returns a terminal
+// iterator.
+//
+// Precondition: first is the predecessor of second: first.NextSegment() ==
+// second, first == second.PrevSegment().
+func (s *frameRefSet) MergeUnchecked(first, second frameRefIterator) frameRefIterator {
+ if first.End() == second.Start() {
+ if mval, ok := (frameRefSetFunctions{}).Merge(first.Range(), first.Value(), second.Range(), second.Value()); ok {
+
+ first.SetEndUnchecked(second.End())
+ first.SetValue(mval)
+ return s.Remove(second).PrevSegment()
+ }
+ }
+ return frameRefIterator{}
+}
+
+// MergeAll attempts to merge all adjacent segments in the set. All existing
+// iterators are invalidated.
+func (s *frameRefSet) MergeAll() {
+ seg := s.FirstSegment()
+ if !seg.Ok() {
+ return
+ }
+ next := seg.NextSegment()
+ for next.Ok() {
+ if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+ seg, next = mseg, mseg.NextSegment()
+ } else {
+ seg, next = next, next.NextSegment()
+ }
+ }
+}
+
+// MergeRange attempts to merge all adjacent segments that contain a key in the
+// specific range. All existing iterators are invalidated.
+func (s *frameRefSet) MergeRange(r __generics_imported0.FileRange) {
+ seg := s.LowerBoundSegment(r.Start)
+ if !seg.Ok() {
+ return
+ }
+ next := seg.NextSegment()
+ for next.Ok() && next.Range().Start < r.End {
+ if mseg := s.MergeUnchecked(seg, next); mseg.Ok() {
+ seg, next = mseg, mseg.NextSegment()
+ } else {
+ seg, next = next, next.NextSegment()
+ }
+ }
+}
+
+// MergeAdjacent attempts to merge the segment containing r.Start with its
+// predecessor, and the segment containing r.End-1 with its successor.
+func (s *frameRefSet) MergeAdjacent(r __generics_imported0.FileRange) {
+ first := s.FindSegment(r.Start)
+ if first.Ok() {
+ if prev := first.PrevSegment(); prev.Ok() {
+ s.Merge(prev, first)
+ }
+ }
+ last := s.FindSegment(r.End - 1)
+ if last.Ok() {
+ if next := last.NextSegment(); next.Ok() {
+ s.Merge(last, next)
+ }
+ }
+}
+
+// Split splits the given segment at the given key and returns iterators to the
+// two resulting segments. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+//
+// If the segment cannot be split at split (because split is at the start or
+// end of the segment's range, so splitting would produce a segment with zero
+// length, or because split falls outside the segment's range altogether),
+// Split panics.
+func (s *frameRefSet) Split(seg frameRefIterator, split uint64) (frameRefIterator, frameRefIterator) {
+ if !seg.Range().CanSplitAt(split) {
+ panic(fmt.Sprintf("can't split %v at %v", seg.Range(), split))
+ }
+ return s.SplitUnchecked(seg, split)
+}
+
+// SplitUnchecked splits the given segment at the given key and returns
+// iterators to the two resulting segments. All existing iterators (including
+// seg, but not including the returned iterators) are invalidated.
+//
+// Preconditions: seg.Start() < key < seg.End().
+func (s *frameRefSet) SplitUnchecked(seg frameRefIterator, split uint64) (frameRefIterator, frameRefIterator) {
+ val1, val2 := (frameRefSetFunctions{}).Split(seg.Range(), seg.Value(), split)
+ end2 := seg.End()
+ seg.SetEndUnchecked(split)
+ seg.SetValue(val1)
+ seg2 := s.InsertWithoutMergingUnchecked(seg.NextGap(), __generics_imported0.FileRange{split, end2}, val2)
+
+ return seg2.PrevSegment(), seg2
+}
+
+// SplitAt splits the segment straddling split, if one exists. SplitAt returns
+// true if a segment was split and false otherwise. If SplitAt splits a
+// segment, all existing iterators are invalidated.
+func (s *frameRefSet) SplitAt(split uint64) bool {
+ if seg := s.FindSegment(split); seg.Ok() && seg.Range().CanSplitAt(split) {
+ s.SplitUnchecked(seg, split)
+ return true
+ }
+ return false
+}
+
+// Isolate ensures that the given segment's range does not escape r by
+// splitting at r.Start and r.End if necessary, and returns an updated iterator
+// to the bounded segment. All existing iterators (including seg, but not
+// including the returned iterators) are invalidated.
+func (s *frameRefSet) Isolate(seg frameRefIterator, r __generics_imported0.FileRange) frameRefIterator {
+ if seg.Range().CanSplitAt(r.Start) {
+ _, seg = s.SplitUnchecked(seg, r.Start)
+ }
+ if seg.Range().CanSplitAt(r.End) {
+ seg, _ = s.SplitUnchecked(seg, r.End)
+ }
+ return seg
+}
+
+// ApplyContiguous applies a function to a contiguous range of segments,
+// splitting if necessary. The function is applied until the first gap is
+// encountered, at which point the gap is returned. If the function is applied
+// across the entire range, a terminal gap is returned. All existing iterators
+// are invalidated.
+//
+// N.B. The Iterator must not be invalidated by the function.
+func (s *frameRefSet) ApplyContiguous(r __generics_imported0.FileRange, fn func(seg frameRefIterator)) frameRefGapIterator {
+ seg, gap := s.Find(r.Start)
+ if !seg.Ok() {
+ return gap
+ }
+ for {
+ seg = s.Isolate(seg, r)
+ fn(seg)
+ if seg.End() >= r.End {
+ return frameRefGapIterator{}
+ }
+ gap = seg.NextGap()
+ if !gap.IsEmpty() {
+ return gap
+ }
+ seg = gap.NextSegment()
+ if !seg.Ok() {
+
+ return frameRefGapIterator{}
+ }
+ }
+}
+
+// +stateify savable
+type frameRefnode struct {
+ // An internal binary tree node looks like:
+ //
+ // K
+ // / \
+ // Cl Cr
+ //
+ // where all keys in the subtree rooted by Cl (the left subtree) are less
+ // than K (the key of the parent node), and all keys in the subtree rooted
+ // by Cr (the right subtree) are greater than K.
+ //
+ // An internal B-tree node's indexes work out to look like:
+ //
+ // K0 K1 K2 ... Kn-1
+ // / \/ \/ \ ... / \
+ // C0 C1 C2 C3 ... Cn-1 Cn
+ //
+ // where n is nrSegments.
+ nrSegments int
+
+ // parent is a pointer to this node's parent. If this node is root, parent
+ // is nil.
+ parent *frameRefnode
+
+ // parentIndex is the index of this node in parent.children.
+ parentIndex int
+
+ // Flag for internal nodes that is technically redundant with "children[0]
+ // != nil", but is stored in the first cache line. "hasChildren" rather
+ // than "isLeaf" because false must be the correct value for an empty root.
+ hasChildren bool
+
+ // Nodes store keys and values in separate arrays to maximize locality in
+ // the common case (scanning keys for lookup).
+ keys [frameRefmaxDegree - 1]__generics_imported0.FileRange
+ values [frameRefmaxDegree - 1]uint64
+ children [frameRefmaxDegree]*frameRefnode
+}
+
+// firstSegment returns the first segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *frameRefnode) firstSegment() frameRefIterator {
+ for n.hasChildren {
+ n = n.children[0]
+ }
+ return frameRefIterator{n, 0}
+}
+
+// lastSegment returns the last segment in the subtree rooted by n.
+//
+// Preconditions: n.nrSegments != 0.
+func (n *frameRefnode) lastSegment() frameRefIterator {
+ for n.hasChildren {
+ n = n.children[n.nrSegments]
+ }
+ return frameRefIterator{n, n.nrSegments - 1}
+}
+
+func (n *frameRefnode) prevSibling() *frameRefnode {
+ if n.parent == nil || n.parentIndex == 0 {
+ return nil
+ }
+ return n.parent.children[n.parentIndex-1]
+}
+
+func (n *frameRefnode) nextSibling() *frameRefnode {
+ if n.parent == nil || n.parentIndex == n.parent.nrSegments {
+ return nil
+ }
+ return n.parent.children[n.parentIndex+1]
+}
+
+// rebalanceBeforeInsert splits n and its ancestors if they are full, as
+// required for insertion, and returns an updated iterator to the position
+// represented by gap.
+func (n *frameRefnode) rebalanceBeforeInsert(gap frameRefGapIterator) frameRefGapIterator {
+ if n.parent != nil {
+ gap = n.parent.rebalanceBeforeInsert(gap)
+ }
+ if n.nrSegments < frameRefmaxDegree-1 {
+ return gap
+ }
+ if n.parent == nil {
+
+ left := &frameRefnode{
+ nrSegments: frameRefminDegree - 1,
+ parent: n,
+ parentIndex: 0,
+ hasChildren: n.hasChildren,
+ }
+ right := &frameRefnode{
+ nrSegments: frameRefminDegree - 1,
+ parent: n,
+ parentIndex: 1,
+ hasChildren: n.hasChildren,
+ }
+ copy(left.keys[:frameRefminDegree-1], n.keys[:frameRefminDegree-1])
+ copy(left.values[:frameRefminDegree-1], n.values[:frameRefminDegree-1])
+ copy(right.keys[:frameRefminDegree-1], n.keys[frameRefminDegree:])
+ copy(right.values[:frameRefminDegree-1], n.values[frameRefminDegree:])
+ n.keys[0], n.values[0] = n.keys[frameRefminDegree-1], n.values[frameRefminDegree-1]
+ frameRefzeroValueSlice(n.values[1:])
+ if n.hasChildren {
+ copy(left.children[:frameRefminDegree], n.children[:frameRefminDegree])
+ copy(right.children[:frameRefminDegree], n.children[frameRefminDegree:])
+ frameRefzeroNodeSlice(n.children[2:])
+ for i := 0; i < frameRefminDegree; i++ {
+ left.children[i].parent = left
+ left.children[i].parentIndex = i
+ right.children[i].parent = right
+ right.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments = 1
+ n.hasChildren = true
+ n.children[0] = left
+ n.children[1] = right
+ if gap.node != n {
+ return gap
+ }
+ if gap.index < frameRefminDegree {
+ return frameRefGapIterator{left, gap.index}
+ }
+ return frameRefGapIterator{right, gap.index - frameRefminDegree}
+ }
+
+ copy(n.parent.keys[n.parentIndex+1:], n.parent.keys[n.parentIndex:n.parent.nrSegments])
+ copy(n.parent.values[n.parentIndex+1:], n.parent.values[n.parentIndex:n.parent.nrSegments])
+ n.parent.keys[n.parentIndex], n.parent.values[n.parentIndex] = n.keys[frameRefminDegree-1], n.values[frameRefminDegree-1]
+ copy(n.parent.children[n.parentIndex+2:], n.parent.children[n.parentIndex+1:n.parent.nrSegments+1])
+ for i := n.parentIndex + 2; i < n.parent.nrSegments+2; i++ {
+ n.parent.children[i].parentIndex = i
+ }
+ sibling := &frameRefnode{
+ nrSegments: frameRefminDegree - 1,
+ parent: n.parent,
+ parentIndex: n.parentIndex + 1,
+ hasChildren: n.hasChildren,
+ }
+ n.parent.children[n.parentIndex+1] = sibling
+ n.parent.nrSegments++
+ copy(sibling.keys[:frameRefminDegree-1], n.keys[frameRefminDegree:])
+ copy(sibling.values[:frameRefminDegree-1], n.values[frameRefminDegree:])
+ frameRefzeroValueSlice(n.values[frameRefminDegree-1:])
+ if n.hasChildren {
+ copy(sibling.children[:frameRefminDegree], n.children[frameRefminDegree:])
+ frameRefzeroNodeSlice(n.children[frameRefminDegree:])
+ for i := 0; i < frameRefminDegree; i++ {
+ sibling.children[i].parent = sibling
+ sibling.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments = frameRefminDegree - 1
+
+ if gap.node != n {
+ return gap
+ }
+ if gap.index < frameRefminDegree {
+ return gap
+ }
+ return frameRefGapIterator{sibling, gap.index - frameRefminDegree}
+}
+
+// rebalanceAfterRemove "unsplits" n and its ancestors if they are deficient
+// (contain fewer segments than required by B-tree invariants), as required for
+// removal, and returns an updated iterator to the position represented by gap.
+//
+// Precondition: n is the only node in the tree that may currently violate a
+// B-tree invariant.
+func (n *frameRefnode) rebalanceAfterRemove(gap frameRefGapIterator) frameRefGapIterator {
+ for {
+ if n.nrSegments >= frameRefminDegree-1 {
+ return gap
+ }
+ if n.parent == nil {
+
+ return gap
+ }
+
+ if sibling := n.prevSibling(); sibling != nil && sibling.nrSegments >= frameRefminDegree {
+ copy(n.keys[1:], n.keys[:n.nrSegments])
+ copy(n.values[1:], n.values[:n.nrSegments])
+ n.keys[0] = n.parent.keys[n.parentIndex-1]
+ n.values[0] = n.parent.values[n.parentIndex-1]
+ n.parent.keys[n.parentIndex-1] = sibling.keys[sibling.nrSegments-1]
+ n.parent.values[n.parentIndex-1] = sibling.values[sibling.nrSegments-1]
+ frameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+ if n.hasChildren {
+ copy(n.children[1:], n.children[:n.nrSegments+1])
+ n.children[0] = sibling.children[sibling.nrSegments]
+ sibling.children[sibling.nrSegments] = nil
+ n.children[0].parent = n
+ n.children[0].parentIndex = 0
+ for i := 1; i < n.nrSegments+2; i++ {
+ n.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments++
+ sibling.nrSegments--
+ if gap.node == sibling && gap.index == sibling.nrSegments {
+ return frameRefGapIterator{n, 0}
+ }
+ if gap.node == n {
+ return frameRefGapIterator{n, gap.index + 1}
+ }
+ return gap
+ }
+ if sibling := n.nextSibling(); sibling != nil && sibling.nrSegments >= frameRefminDegree {
+ n.keys[n.nrSegments] = n.parent.keys[n.parentIndex]
+ n.values[n.nrSegments] = n.parent.values[n.parentIndex]
+ n.parent.keys[n.parentIndex] = sibling.keys[0]
+ n.parent.values[n.parentIndex] = sibling.values[0]
+ copy(sibling.keys[:sibling.nrSegments-1], sibling.keys[1:])
+ copy(sibling.values[:sibling.nrSegments-1], sibling.values[1:])
+ frameRefSetFunctions{}.ClearValue(&sibling.values[sibling.nrSegments-1])
+ if n.hasChildren {
+ n.children[n.nrSegments+1] = sibling.children[0]
+ copy(sibling.children[:sibling.nrSegments], sibling.children[1:])
+ sibling.children[sibling.nrSegments] = nil
+ n.children[n.nrSegments+1].parent = n
+ n.children[n.nrSegments+1].parentIndex = n.nrSegments + 1
+ for i := 0; i < sibling.nrSegments; i++ {
+ sibling.children[i].parentIndex = i
+ }
+ }
+ n.nrSegments++
+ sibling.nrSegments--
+ if gap.node == sibling {
+ if gap.index == 0 {
+ return frameRefGapIterator{n, n.nrSegments}
+ }
+ return frameRefGapIterator{sibling, gap.index - 1}
+ }
+ return gap
+ }
+
+ p := n.parent
+ if p.nrSegments == 1 {
+
+ left, right := p.children[0], p.children[1]
+ p.nrSegments = left.nrSegments + right.nrSegments + 1
+ p.hasChildren = left.hasChildren
+ p.keys[left.nrSegments] = p.keys[0]
+ p.values[left.nrSegments] = p.values[0]
+ copy(p.keys[:left.nrSegments], left.keys[:left.nrSegments])
+ copy(p.values[:left.nrSegments], left.values[:left.nrSegments])
+ copy(p.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+ copy(p.values[left.nrSegments+1:], right.values[:right.nrSegments])
+ if left.hasChildren {
+ copy(p.children[:left.nrSegments+1], left.children[:left.nrSegments+1])
+ copy(p.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+ for i := 0; i < p.nrSegments+1; i++ {
+ p.children[i].parent = p
+ p.children[i].parentIndex = i
+ }
+ } else {
+ p.children[0] = nil
+ p.children[1] = nil
+ }
+ if gap.node == left {
+ return frameRefGapIterator{p, gap.index}
+ }
+ if gap.node == right {
+ return frameRefGapIterator{p, gap.index + left.nrSegments + 1}
+ }
+ return gap
+ }
+ // Merge n and either sibling, along with the segment separating the
+ // two, into whichever of the two nodes comes first. This is the
+ // reverse of the non-root splitting case in
+ // node.rebalanceBeforeInsert.
+ var left, right *frameRefnode
+ if n.parentIndex > 0 {
+ left = n.prevSibling()
+ right = n
+ } else {
+ left = n
+ right = n.nextSibling()
+ }
+
+ if gap.node == right {
+ gap = frameRefGapIterator{left, gap.index + left.nrSegments + 1}
+ }
+ left.keys[left.nrSegments] = p.keys[left.parentIndex]
+ left.values[left.nrSegments] = p.values[left.parentIndex]
+ copy(left.keys[left.nrSegments+1:], right.keys[:right.nrSegments])
+ copy(left.values[left.nrSegments+1:], right.values[:right.nrSegments])
+ if left.hasChildren {
+ copy(left.children[left.nrSegments+1:], right.children[:right.nrSegments+1])
+ for i := left.nrSegments + 1; i < left.nrSegments+right.nrSegments+2; i++ {
+ left.children[i].parent = left
+ left.children[i].parentIndex = i
+ }
+ }
+ left.nrSegments += right.nrSegments + 1
+ copy(p.keys[left.parentIndex:], p.keys[left.parentIndex+1:p.nrSegments])
+ copy(p.values[left.parentIndex:], p.values[left.parentIndex+1:p.nrSegments])
+ frameRefSetFunctions{}.ClearValue(&p.values[p.nrSegments-1])
+ copy(p.children[left.parentIndex+1:], p.children[left.parentIndex+2:p.nrSegments+1])
+ for i := 0; i < p.nrSegments; i++ {
+ p.children[i].parentIndex = i
+ }
+ p.children[p.nrSegments] = nil
+ p.nrSegments--
+
+ n = p
+ }
+}
+
+// A Iterator is conceptually one of:
+//
+// - A pointer to a segment in a set; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Iterators are copyable values and are meaningfully equality-comparable. The
+// zero value of Iterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type frameRefIterator struct {
+ // node is the node containing the iterated segment. If the iterator is
+ // terminal, node is nil.
+ node *frameRefnode
+
+ // index is the index of the segment in node.keys/values.
+ index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (seg frameRefIterator) Ok() bool {
+ return seg.node != nil
+}
+
+// Range returns the iterated segment's range key.
+func (seg frameRefIterator) Range() __generics_imported0.FileRange {
+ return seg.node.keys[seg.index]
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (seg frameRefIterator) Start() uint64 {
+ return seg.node.keys[seg.index].Start
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (seg frameRefIterator) End() uint64 {
+ return seg.node.keys[seg.index].End
+}
+
+// SetRangeUnchecked mutates the iterated segment's range key. This operation
+// does not invalidate any iterators.
+//
+// Preconditions:
+//
+// - r.Length() > 0.
+//
+// - The new range must not overlap an existing one: If seg.NextSegment().Ok(),
+// then r.end <= seg.NextSegment().Start(); if seg.PrevSegment().Ok(), then
+// r.start >= seg.PrevSegment().End().
+func (seg frameRefIterator) SetRangeUnchecked(r __generics_imported0.FileRange) {
+ seg.node.keys[seg.index] = r
+}
+
+// SetRange mutates the iterated segment's range key. If the new range would
+// cause the iterated segment to overlap another segment, or if the new range
+// is invalid, SetRange panics. This operation does not invalidate any
+// iterators.
+func (seg frameRefIterator) SetRange(r __generics_imported0.FileRange) {
+ if r.Length() <= 0 {
+ panic(fmt.Sprintf("invalid segment range %v", r))
+ }
+ if prev := seg.PrevSegment(); prev.Ok() && r.Start < prev.End() {
+ panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, prev.Range()))
+ }
+ if next := seg.NextSegment(); next.Ok() && r.End > next.Start() {
+ panic(fmt.Sprintf("new segment range %v overlaps segment range %v", r, next.Range()))
+ }
+ seg.SetRangeUnchecked(r)
+}
+
+// SetStartUnchecked mutates the iterated segment's start. This operation does
+// not invalidate any iterators.
+//
+// Preconditions: The new start must be valid: start < seg.End(); if
+// seg.PrevSegment().Ok(), then start >= seg.PrevSegment().End().
+func (seg frameRefIterator) SetStartUnchecked(start uint64) {
+ seg.node.keys[seg.index].Start = start
+}
+
+// SetStart mutates the iterated segment's start. If the new start value would
+// cause the iterated segment to overlap another segment, or would result in an
+// invalid range, SetStart panics. This operation does not invalidate any
+// iterators.
+func (seg frameRefIterator) SetStart(start uint64) {
+ if start >= seg.End() {
+ panic(fmt.Sprintf("new start %v would invalidate segment range %v", start, seg.Range()))
+ }
+ if prev := seg.PrevSegment(); prev.Ok() && start < prev.End() {
+ panic(fmt.Sprintf("new start %v would cause segment range %v to overlap segment range %v", start, seg.Range(), prev.Range()))
+ }
+ seg.SetStartUnchecked(start)
+}
+
+// SetEndUnchecked mutates the iterated segment's end. This operation does not
+// invalidate any iterators.
+//
+// Preconditions: The new end must be valid: end > seg.Start(); if
+// seg.NextSegment().Ok(), then end <= seg.NextSegment().Start().
+func (seg frameRefIterator) SetEndUnchecked(end uint64) {
+ seg.node.keys[seg.index].End = end
+}
+
+// SetEnd mutates the iterated segment's end. If the new end value would cause
+// the iterated segment to overlap another segment, or would result in an
+// invalid range, SetEnd panics. This operation does not invalidate any
+// iterators.
+func (seg frameRefIterator) SetEnd(end uint64) {
+ if end <= seg.Start() {
+ panic(fmt.Sprintf("new end %v would invalidate segment range %v", end, seg.Range()))
+ }
+ if next := seg.NextSegment(); next.Ok() && end > next.Start() {
+ panic(fmt.Sprintf("new end %v would cause segment range %v to overlap segment range %v", end, seg.Range(), next.Range()))
+ }
+ seg.SetEndUnchecked(end)
+}
+
+// Value returns a copy of the iterated segment's value.
+func (seg frameRefIterator) Value() uint64 {
+ return seg.node.values[seg.index]
+}
+
+// ValuePtr returns a pointer to the iterated segment's value. The pointer is
+// invalidated if the iterator is invalidated. This operation does not
+// invalidate any iterators.
+func (seg frameRefIterator) ValuePtr() *uint64 {
+ return &seg.node.values[seg.index]
+}
+
+// SetValue mutates the iterated segment's value. This operation does not
+// invalidate any iterators.
+func (seg frameRefIterator) SetValue(val uint64) {
+ seg.node.values[seg.index] = val
+}
+
+// PrevSegment returns the iterated segment's predecessor. If there is no
+// preceding segment, PrevSegment returns a terminal iterator.
+func (seg frameRefIterator) PrevSegment() frameRefIterator {
+ if seg.node.hasChildren {
+ return seg.node.children[seg.index].lastSegment()
+ }
+ if seg.index > 0 {
+ return frameRefIterator{seg.node, seg.index - 1}
+ }
+ if seg.node.parent == nil {
+ return frameRefIterator{}
+ }
+ return frameRefsegmentBeforePosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// NextSegment returns the iterated segment's successor. If there is no
+// succeeding segment, NextSegment returns a terminal iterator.
+func (seg frameRefIterator) NextSegment() frameRefIterator {
+ if seg.node.hasChildren {
+ return seg.node.children[seg.index+1].firstSegment()
+ }
+ if seg.index < seg.node.nrSegments-1 {
+ return frameRefIterator{seg.node, seg.index + 1}
+ }
+ if seg.node.parent == nil {
+ return frameRefIterator{}
+ }
+ return frameRefsegmentAfterPosition(seg.node.parent, seg.node.parentIndex)
+}
+
+// PrevGap returns the gap immediately before the iterated segment.
+func (seg frameRefIterator) PrevGap() frameRefGapIterator {
+ if seg.node.hasChildren {
+
+ return seg.node.children[seg.index].lastSegment().NextGap()
+ }
+ return frameRefGapIterator{seg.node, seg.index}
+}
+
+// NextGap returns the gap immediately after the iterated segment.
+func (seg frameRefIterator) NextGap() frameRefGapIterator {
+ if seg.node.hasChildren {
+ return seg.node.children[seg.index+1].firstSegment().PrevGap()
+ }
+ return frameRefGapIterator{seg.node, seg.index + 1}
+}
+
+// PrevNonEmpty returns the iterated segment's predecessor if it is adjacent,
+// or the gap before the iterated segment otherwise. If seg.Start() ==
+// Functions.MinKey(), PrevNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by PrevNonEmpty will be
+// non-terminal.
+func (seg frameRefIterator) PrevNonEmpty() (frameRefIterator, frameRefGapIterator) {
+ gap := seg.PrevGap()
+ if gap.Range().Length() != 0 {
+ return frameRefIterator{}, gap
+ }
+ return gap.PrevSegment(), frameRefGapIterator{}
+}
+
+// NextNonEmpty returns the iterated segment's successor if it is adjacent, or
+// the gap after the iterated segment otherwise. If seg.End() ==
+// Functions.MaxKey(), NextNonEmpty will return two terminal iterators.
+// Otherwise, exactly one of the iterators returned by NextNonEmpty will be
+// non-terminal.
+func (seg frameRefIterator) NextNonEmpty() (frameRefIterator, frameRefGapIterator) {
+ gap := seg.NextGap()
+ if gap.Range().Length() != 0 {
+ return frameRefIterator{}, gap
+ }
+ return gap.NextSegment(), frameRefGapIterator{}
+}
+
+// A GapIterator is conceptually one of:
+//
+// - A pointer to a position between two segments, before the first segment, or
+// after the last segment in a set, called a *gap*; or
+//
+// - A terminal iterator, which is a sentinel indicating that the end of
+// iteration has been reached.
+//
+// Note that the gap between two adjacent segments exists (iterators to it are
+// non-terminal), but has a length of zero. GapIterator.IsEmpty returns true
+// for such gaps. An empty set contains a single gap, spanning the entire range
+// of the set's keys.
+//
+// GapIterators are copyable values and are meaningfully equality-comparable.
+// The zero value of GapIterator is a terminal iterator.
+//
+// Unless otherwise specified, any mutation of a set invalidates all existing
+// iterators into the set.
+type frameRefGapIterator struct {
+ // The representation of a GapIterator is identical to that of an Iterator,
+ // except that index corresponds to positions between segments in the same
+ // way as for node.children (see comment for node.nrSegments).
+ node *frameRefnode
+ index int
+}
+
+// Ok returns true if the iterator is not terminal. All other methods are only
+// valid for non-terminal iterators.
+func (gap frameRefGapIterator) Ok() bool {
+ return gap.node != nil
+}
+
+// Range returns the range spanned by the iterated gap.
+func (gap frameRefGapIterator) Range() __generics_imported0.FileRange {
+ return __generics_imported0.FileRange{gap.Start(), gap.End()}
+}
+
+// Start is equivalent to Range().Start, but should be preferred if only the
+// start of the range is needed.
+func (gap frameRefGapIterator) Start() uint64 {
+ if ps := gap.PrevSegment(); ps.Ok() {
+ return ps.End()
+ }
+ return frameRefSetFunctions{}.MinKey()
+}
+
+// End is equivalent to Range().End, but should be preferred if only the end of
+// the range is needed.
+func (gap frameRefGapIterator) End() uint64 {
+ if ns := gap.NextSegment(); ns.Ok() {
+ return ns.Start()
+ }
+ return frameRefSetFunctions{}.MaxKey()
+}
+
+// IsEmpty returns true if the iterated gap is empty (that is, the "gap" is
+// between two adjacent segments.)
+func (gap frameRefGapIterator) IsEmpty() bool {
+ return gap.Range().Length() == 0
+}
+
+// PrevSegment returns the segment immediately before the iterated gap. If no
+// such segment exists, PrevSegment returns a terminal iterator.
+func (gap frameRefGapIterator) PrevSegment() frameRefIterator {
+ return frameRefsegmentBeforePosition(gap.node, gap.index)
+}
+
+// NextSegment returns the segment immediately after the iterated gap. If no
+// such segment exists, NextSegment returns a terminal iterator.
+func (gap frameRefGapIterator) NextSegment() frameRefIterator {
+ return frameRefsegmentAfterPosition(gap.node, gap.index)
+}
+
+// PrevGap returns the iterated gap's predecessor. If no such gap exists,
+// PrevGap returns a terminal iterator.
+func (gap frameRefGapIterator) PrevGap() frameRefGapIterator {
+ seg := gap.PrevSegment()
+ if !seg.Ok() {
+ return frameRefGapIterator{}
+ }
+ return seg.PrevGap()
+}
+
+// NextGap returns the iterated gap's successor. If no such gap exists, NextGap
+// returns a terminal iterator.
+func (gap frameRefGapIterator) NextGap() frameRefGapIterator {
+ seg := gap.NextSegment()
+ if !seg.Ok() {
+ return frameRefGapIterator{}
+ }
+ return seg.NextGap()
+}
+
+// segmentBeforePosition returns the predecessor segment of the position given
+// by n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentBeforePosition returns a terminal iterator.
+func frameRefsegmentBeforePosition(n *frameRefnode, i int) frameRefIterator {
+ for i == 0 {
+ if n.parent == nil {
+ return frameRefIterator{}
+ }
+ n, i = n.parent, n.parentIndex
+ }
+ return frameRefIterator{n, i - 1}
+}
+
+// segmentAfterPosition returns the successor segment of the position given by
+// n.children[i], which may or may not contain a child. If no such segment
+// exists, segmentAfterPosition returns a terminal iterator.
+func frameRefsegmentAfterPosition(n *frameRefnode, i int) frameRefIterator {
+ for i == n.nrSegments {
+ if n.parent == nil {
+ return frameRefIterator{}
+ }
+ n, i = n.parent, n.parentIndex
+ }
+ return frameRefIterator{n, i}
+}
+
+func frameRefzeroValueSlice(slice []uint64) {
+
+ for i := range slice {
+ frameRefSetFunctions{}.ClearValue(&slice[i])
+ }
+}
+
+func frameRefzeroNodeSlice(slice []*frameRefnode) {
+ for i := range slice {
+ slice[i] = nil
+ }
+}
+
+// String stringifies a Set for debugging.
+func (s *frameRefSet) String() string {
+ return s.root.String()
+}
+
+// String stringifes a node (and all of its children) for debugging.
+func (n *frameRefnode) String() string {
+ var buf bytes.Buffer
+ n.writeDebugString(&buf, "")
+ return buf.String()
+}
+
+func (n *frameRefnode) writeDebugString(buf *bytes.Buffer, prefix string) {
+ if n.hasChildren != (n.nrSegments > 0 && n.children[0] != nil) {
+ buf.WriteString(prefix)
+ buf.WriteString(fmt.Sprintf("WARNING: inconsistent value of hasChildren: got %v, want %v\n", n.hasChildren, !n.hasChildren))
+ }
+ for i := 0; i < n.nrSegments; i++ {
+ if child := n.children[i]; child != nil {
+ cprefix := fmt.Sprintf("%s- % 3d ", prefix, i)
+ if child.parent != n || child.parentIndex != i {
+ buf.WriteString(cprefix)
+ buf.WriteString(fmt.Sprintf("WARNING: inconsistent linkage to parent: got (%p, %d), want (%p, %d)\n", child.parent, child.parentIndex, n, i))
+ }
+ child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i))
+ }
+ buf.WriteString(prefix)
+ buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i]))
+ }
+ if child := n.children[n.nrSegments]; child != nil {
+ child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments))
+ }
+}
+
+// SegmentDataSlices represents segments from a set as slices of start, end, and
+// values. SegmentDataSlices is primarily used as an intermediate representation
+// for save/restore and the layout here is optimized for that.
+//
+// +stateify savable
+type frameRefSegmentDataSlices struct {
+ Start []uint64
+ End []uint64
+ Values []uint64
+}
+
+// ExportSortedSlice returns a copy of all segments in the given set, in ascending
+// key order.
+func (s *frameRefSet) ExportSortedSlices() *frameRefSegmentDataSlices {
+ var sds frameRefSegmentDataSlices
+ for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() {
+ sds.Start = append(sds.Start, seg.Start())
+ sds.End = append(sds.End, seg.End())
+ sds.Values = append(sds.Values, seg.Value())
+ }
+ sds.Start = sds.Start[:len(sds.Start):len(sds.Start)]
+ sds.End = sds.End[:len(sds.End):len(sds.End)]
+ sds.Values = sds.Values[:len(sds.Values):len(sds.Values)]
+ return &sds
+}
+
+// ImportSortedSlice initializes the given set from the given slice.
+//
+// Preconditions: s must be empty. sds must represent a valid set (the segments
+// in sds must have valid lengths that do not overlap). The segments in sds
+// must be sorted in ascending key order.
+func (s *frameRefSet) ImportSortedSlices(sds *frameRefSegmentDataSlices) error {
+ if !s.IsEmpty() {
+ return fmt.Errorf("cannot import into non-empty set %v", s)
+ }
+ gap := s.FirstGap()
+ for i := range sds.Start {
+ r := __generics_imported0.FileRange{sds.Start[i], sds.End[i]}
+ if !gap.Range().IsSupersetOf(r) {
+ return fmt.Errorf("segment overlaps a preceding segment or is incorrectly sorted: [%d, %d) => %v", sds.Start[i], sds.End[i], sds.Values[i])
+ }
+ gap = s.InsertWithoutMerging(gap, r, sds.Values[i]).NextGap()
+ }
+ return nil
+}
+func (s *frameRefSet) saveRoot() *frameRefSegmentDataSlices {
+ return s.ExportSortedSlices()
+}
+
+func (s *frameRefSet) loadRoot(sds *frameRefSegmentDataSlices) {
+ if err := s.ImportSortedSlices(sds); err != nil {
+ panic(err)
+ }
+}
diff --git a/pkg/sentry/fs/fsutil/fsutil.go b/pkg/sentry/fs/fsutil/fsutil.go
new file mode 100644
index 000000000..c9587b1d9
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/fsutil.go
@@ -0,0 +1,24 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package fsutil provides utilities for implementing fs.InodeOperations
+// and fs.FileOperations:
+//
+// - For embeddable utilities, see inode.go and file.go.
+//
+// - For fs.Inodes that require a page cache to be memory mapped, see
+// inode_cache.go.
+//
+// - For anon fs.Inodes, see anon.go.
+package fsutil
diff --git a/pkg/sentry/fs/fsutil/fsutil_state_autogen.go b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go
new file mode 100755
index 000000000..5783b151d
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/fsutil_state_autogen.go
@@ -0,0 +1,349 @@
+// automatically generated by stateify.
+
+package fsutil
+
+import (
+ "gvisor.googlesource.com/gvisor/pkg/state"
+)
+
+func (x *DirtyInfo) beforeSave() {}
+func (x *DirtyInfo) save(m state.Map) {
+ x.beforeSave()
+ m.Save("Keep", &x.Keep)
+}
+
+func (x *DirtyInfo) afterLoad() {}
+func (x *DirtyInfo) load(m state.Map) {
+ m.Load("Keep", &x.Keep)
+}
+
+func (x *DirtySet) beforeSave() {}
+func (x *DirtySet) save(m state.Map) {
+ x.beforeSave()
+ var root *DirtySegmentDataSlices = x.saveRoot()
+ m.SaveValue("root", root)
+}
+
+func (x *DirtySet) afterLoad() {}
+func (x *DirtySet) load(m state.Map) {
+ m.LoadValue("root", new(*DirtySegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*DirtySegmentDataSlices)) })
+}
+
+func (x *Dirtynode) beforeSave() {}
+func (x *Dirtynode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("nrSegments", &x.nrSegments)
+ m.Save("parent", &x.parent)
+ m.Save("parentIndex", &x.parentIndex)
+ m.Save("hasChildren", &x.hasChildren)
+ m.Save("keys", &x.keys)
+ m.Save("values", &x.values)
+ m.Save("children", &x.children)
+}
+
+func (x *Dirtynode) afterLoad() {}
+func (x *Dirtynode) load(m state.Map) {
+ m.Load("nrSegments", &x.nrSegments)
+ m.Load("parent", &x.parent)
+ m.Load("parentIndex", &x.parentIndex)
+ m.Load("hasChildren", &x.hasChildren)
+ m.Load("keys", &x.keys)
+ m.Load("values", &x.values)
+ m.Load("children", &x.children)
+}
+
+func (x *DirtySegmentDataSlices) beforeSave() {}
+func (x *DirtySegmentDataSlices) save(m state.Map) {
+ x.beforeSave()
+ m.Save("Start", &x.Start)
+ m.Save("End", &x.End)
+ m.Save("Values", &x.Values)
+}
+
+func (x *DirtySegmentDataSlices) afterLoad() {}
+func (x *DirtySegmentDataSlices) load(m state.Map) {
+ m.Load("Start", &x.Start)
+ m.Load("End", &x.End)
+ m.Load("Values", &x.Values)
+}
+
+func (x *StaticDirFileOperations) beforeSave() {}
+func (x *StaticDirFileOperations) save(m state.Map) {
+ x.beforeSave()
+ m.Save("dentryMap", &x.dentryMap)
+ m.Save("dirCursor", &x.dirCursor)
+}
+
+func (x *StaticDirFileOperations) afterLoad() {}
+func (x *StaticDirFileOperations) load(m state.Map) {
+ m.Load("dentryMap", &x.dentryMap)
+ m.Load("dirCursor", &x.dirCursor)
+}
+
+func (x *NoReadWriteFile) beforeSave() {}
+func (x *NoReadWriteFile) save(m state.Map) {
+ x.beforeSave()
+}
+
+func (x *NoReadWriteFile) afterLoad() {}
+func (x *NoReadWriteFile) load(m state.Map) {
+}
+
+func (x *FileStaticContentReader) beforeSave() {}
+func (x *FileStaticContentReader) save(m state.Map) {
+ x.beforeSave()
+ m.Save("content", &x.content)
+}
+
+func (x *FileStaticContentReader) afterLoad() {}
+func (x *FileStaticContentReader) load(m state.Map) {
+ m.Load("content", &x.content)
+}
+
+func (x *FileRangeSet) beforeSave() {}
+func (x *FileRangeSet) save(m state.Map) {
+ x.beforeSave()
+ var root *FileRangeSegmentDataSlices = x.saveRoot()
+ m.SaveValue("root", root)
+}
+
+func (x *FileRangeSet) afterLoad() {}
+func (x *FileRangeSet) load(m state.Map) {
+ m.LoadValue("root", new(*FileRangeSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*FileRangeSegmentDataSlices)) })
+}
+
+func (x *FileRangenode) beforeSave() {}
+func (x *FileRangenode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("nrSegments", &x.nrSegments)
+ m.Save("parent", &x.parent)
+ m.Save("parentIndex", &x.parentIndex)
+ m.Save("hasChildren", &x.hasChildren)
+ m.Save("keys", &x.keys)
+ m.Save("values", &x.values)
+ m.Save("children", &x.children)
+}
+
+func (x *FileRangenode) afterLoad() {}
+func (x *FileRangenode) load(m state.Map) {
+ m.Load("nrSegments", &x.nrSegments)
+ m.Load("parent", &x.parent)
+ m.Load("parentIndex", &x.parentIndex)
+ m.Load("hasChildren", &x.hasChildren)
+ m.Load("keys", &x.keys)
+ m.Load("values", &x.values)
+ m.Load("children", &x.children)
+}
+
+func (x *FileRangeSegmentDataSlices) beforeSave() {}
+func (x *FileRangeSegmentDataSlices) save(m state.Map) {
+ x.beforeSave()
+ m.Save("Start", &x.Start)
+ m.Save("End", &x.End)
+ m.Save("Values", &x.Values)
+}
+
+func (x *FileRangeSegmentDataSlices) afterLoad() {}
+func (x *FileRangeSegmentDataSlices) load(m state.Map) {
+ m.Load("Start", &x.Start)
+ m.Load("End", &x.End)
+ m.Load("Values", &x.Values)
+}
+
+func (x *frameRefSet) beforeSave() {}
+func (x *frameRefSet) save(m state.Map) {
+ x.beforeSave()
+ var root *frameRefSegmentDataSlices = x.saveRoot()
+ m.SaveValue("root", root)
+}
+
+func (x *frameRefSet) afterLoad() {}
+func (x *frameRefSet) load(m state.Map) {
+ m.LoadValue("root", new(*frameRefSegmentDataSlices), func(y interface{}) { x.loadRoot(y.(*frameRefSegmentDataSlices)) })
+}
+
+func (x *frameRefnode) beforeSave() {}
+func (x *frameRefnode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("nrSegments", &x.nrSegments)
+ m.Save("parent", &x.parent)
+ m.Save("parentIndex", &x.parentIndex)
+ m.Save("hasChildren", &x.hasChildren)
+ m.Save("keys", &x.keys)
+ m.Save("values", &x.values)
+ m.Save("children", &x.children)
+}
+
+func (x *frameRefnode) afterLoad() {}
+func (x *frameRefnode) load(m state.Map) {
+ m.Load("nrSegments", &x.nrSegments)
+ m.Load("parent", &x.parent)
+ m.Load("parentIndex", &x.parentIndex)
+ m.Load("hasChildren", &x.hasChildren)
+ m.Load("keys", &x.keys)
+ m.Load("values", &x.values)
+ m.Load("children", &x.children)
+}
+
+func (x *frameRefSegmentDataSlices) beforeSave() {}
+func (x *frameRefSegmentDataSlices) save(m state.Map) {
+ x.beforeSave()
+ m.Save("Start", &x.Start)
+ m.Save("End", &x.End)
+ m.Save("Values", &x.Values)
+}
+
+func (x *frameRefSegmentDataSlices) afterLoad() {}
+func (x *frameRefSegmentDataSlices) load(m state.Map) {
+ m.Load("Start", &x.Start)
+ m.Load("End", &x.End)
+ m.Load("Values", &x.Values)
+}
+
+func (x *HostFileMapper) beforeSave() {}
+func (x *HostFileMapper) save(m state.Map) {
+ x.beforeSave()
+ m.Save("refs", &x.refs)
+}
+
+func (x *HostFileMapper) load(m state.Map) {
+ m.Load("refs", &x.refs)
+ m.AfterLoad(x.afterLoad)
+}
+
+func (x *HostMappable) beforeSave() {}
+func (x *HostMappable) save(m state.Map) {
+ x.beforeSave()
+ m.Save("hostFileMapper", &x.hostFileMapper)
+ m.Save("backingFile", &x.backingFile)
+ m.Save("mappings", &x.mappings)
+}
+
+func (x *HostMappable) afterLoad() {}
+func (x *HostMappable) load(m state.Map) {
+ m.Load("hostFileMapper", &x.hostFileMapper)
+ m.Load("backingFile", &x.backingFile)
+ m.Load("mappings", &x.mappings)
+}
+
+func (x *SimpleFileInode) beforeSave() {}
+func (x *SimpleFileInode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes)
+}
+
+func (x *SimpleFileInode) afterLoad() {}
+func (x *SimpleFileInode) load(m state.Map) {
+ m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes)
+}
+
+func (x *NoReadWriteFileInode) beforeSave() {}
+func (x *NoReadWriteFileInode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("InodeSimpleAttributes", &x.InodeSimpleAttributes)
+}
+
+func (x *NoReadWriteFileInode) afterLoad() {}
+func (x *NoReadWriteFileInode) load(m state.Map) {
+ m.Load("InodeSimpleAttributes", &x.InodeSimpleAttributes)
+}
+
+func (x *InodeSimpleAttributes) beforeSave() {}
+func (x *InodeSimpleAttributes) save(m state.Map) {
+ x.beforeSave()
+ m.Save("fsType", &x.fsType)
+ m.Save("unstable", &x.unstable)
+}
+
+func (x *InodeSimpleAttributes) afterLoad() {}
+func (x *InodeSimpleAttributes) load(m state.Map) {
+ m.Load("fsType", &x.fsType)
+ m.Load("unstable", &x.unstable)
+}
+
+func (x *InodeSimpleExtendedAttributes) beforeSave() {}
+func (x *InodeSimpleExtendedAttributes) save(m state.Map) {
+ x.beforeSave()
+ m.Save("xattrs", &x.xattrs)
+}
+
+func (x *InodeSimpleExtendedAttributes) afterLoad() {}
+func (x *InodeSimpleExtendedAttributes) load(m state.Map) {
+ m.Load("xattrs", &x.xattrs)
+}
+
+func (x *staticFile) beforeSave() {}
+func (x *staticFile) save(m state.Map) {
+ x.beforeSave()
+ m.Save("FileStaticContentReader", &x.FileStaticContentReader)
+}
+
+func (x *staticFile) afterLoad() {}
+func (x *staticFile) load(m state.Map) {
+ m.Load("FileStaticContentReader", &x.FileStaticContentReader)
+}
+
+func (x *InodeStaticFileGetter) beforeSave() {}
+func (x *InodeStaticFileGetter) save(m state.Map) {
+ x.beforeSave()
+ m.Save("Contents", &x.Contents)
+}
+
+func (x *InodeStaticFileGetter) afterLoad() {}
+func (x *InodeStaticFileGetter) load(m state.Map) {
+ m.Load("Contents", &x.Contents)
+}
+
+func (x *CachingInodeOperations) beforeSave() {}
+func (x *CachingInodeOperations) save(m state.Map) {
+ x.beforeSave()
+ m.Save("backingFile", &x.backingFile)
+ m.Save("mfp", &x.mfp)
+ m.Save("forcePageCache", &x.forcePageCache)
+ m.Save("attr", &x.attr)
+ m.Save("dirtyAttr", &x.dirtyAttr)
+ m.Save("mappings", &x.mappings)
+ m.Save("cache", &x.cache)
+ m.Save("dirty", &x.dirty)
+ m.Save("hostFileMapper", &x.hostFileMapper)
+ m.Save("refs", &x.refs)
+}
+
+func (x *CachingInodeOperations) afterLoad() {}
+func (x *CachingInodeOperations) load(m state.Map) {
+ m.Load("backingFile", &x.backingFile)
+ m.Load("mfp", &x.mfp)
+ m.Load("forcePageCache", &x.forcePageCache)
+ m.Load("attr", &x.attr)
+ m.Load("dirtyAttr", &x.dirtyAttr)
+ m.Load("mappings", &x.mappings)
+ m.Load("cache", &x.cache)
+ m.Load("dirty", &x.dirty)
+ m.Load("hostFileMapper", &x.hostFileMapper)
+ m.Load("refs", &x.refs)
+}
+
+func init() {
+ state.Register("fsutil.DirtyInfo", (*DirtyInfo)(nil), state.Fns{Save: (*DirtyInfo).save, Load: (*DirtyInfo).load})
+ state.Register("fsutil.DirtySet", (*DirtySet)(nil), state.Fns{Save: (*DirtySet).save, Load: (*DirtySet).load})
+ state.Register("fsutil.Dirtynode", (*Dirtynode)(nil), state.Fns{Save: (*Dirtynode).save, Load: (*Dirtynode).load})
+ state.Register("fsutil.DirtySegmentDataSlices", (*DirtySegmentDataSlices)(nil), state.Fns{Save: (*DirtySegmentDataSlices).save, Load: (*DirtySegmentDataSlices).load})
+ state.Register("fsutil.StaticDirFileOperations", (*StaticDirFileOperations)(nil), state.Fns{Save: (*StaticDirFileOperations).save, Load: (*StaticDirFileOperations).load})
+ state.Register("fsutil.NoReadWriteFile", (*NoReadWriteFile)(nil), state.Fns{Save: (*NoReadWriteFile).save, Load: (*NoReadWriteFile).load})
+ state.Register("fsutil.FileStaticContentReader", (*FileStaticContentReader)(nil), state.Fns{Save: (*FileStaticContentReader).save, Load: (*FileStaticContentReader).load})
+ state.Register("fsutil.FileRangeSet", (*FileRangeSet)(nil), state.Fns{Save: (*FileRangeSet).save, Load: (*FileRangeSet).load})
+ state.Register("fsutil.FileRangenode", (*FileRangenode)(nil), state.Fns{Save: (*FileRangenode).save, Load: (*FileRangenode).load})
+ state.Register("fsutil.FileRangeSegmentDataSlices", (*FileRangeSegmentDataSlices)(nil), state.Fns{Save: (*FileRangeSegmentDataSlices).save, Load: (*FileRangeSegmentDataSlices).load})
+ state.Register("fsutil.frameRefSet", (*frameRefSet)(nil), state.Fns{Save: (*frameRefSet).save, Load: (*frameRefSet).load})
+ state.Register("fsutil.frameRefnode", (*frameRefnode)(nil), state.Fns{Save: (*frameRefnode).save, Load: (*frameRefnode).load})
+ state.Register("fsutil.frameRefSegmentDataSlices", (*frameRefSegmentDataSlices)(nil), state.Fns{Save: (*frameRefSegmentDataSlices).save, Load: (*frameRefSegmentDataSlices).load})
+ state.Register("fsutil.HostFileMapper", (*HostFileMapper)(nil), state.Fns{Save: (*HostFileMapper).save, Load: (*HostFileMapper).load})
+ state.Register("fsutil.HostMappable", (*HostMappable)(nil), state.Fns{Save: (*HostMappable).save, Load: (*HostMappable).load})
+ state.Register("fsutil.SimpleFileInode", (*SimpleFileInode)(nil), state.Fns{Save: (*SimpleFileInode).save, Load: (*SimpleFileInode).load})
+ state.Register("fsutil.NoReadWriteFileInode", (*NoReadWriteFileInode)(nil), state.Fns{Save: (*NoReadWriteFileInode).save, Load: (*NoReadWriteFileInode).load})
+ state.Register("fsutil.InodeSimpleAttributes", (*InodeSimpleAttributes)(nil), state.Fns{Save: (*InodeSimpleAttributes).save, Load: (*InodeSimpleAttributes).load})
+ state.Register("fsutil.InodeSimpleExtendedAttributes", (*InodeSimpleExtendedAttributes)(nil), state.Fns{Save: (*InodeSimpleExtendedAttributes).save, Load: (*InodeSimpleExtendedAttributes).load})
+ state.Register("fsutil.staticFile", (*staticFile)(nil), state.Fns{Save: (*staticFile).save, Load: (*staticFile).load})
+ state.Register("fsutil.InodeStaticFileGetter", (*InodeStaticFileGetter)(nil), state.Fns{Save: (*InodeStaticFileGetter).save, Load: (*InodeStaticFileGetter).load})
+ state.Register("fsutil.CachingInodeOperations", (*CachingInodeOperations)(nil), state.Fns{Save: (*CachingInodeOperations).save, Load: (*CachingInodeOperations).load})
+}
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper.go b/pkg/sentry/fs/fsutil/host_file_mapper.go
new file mode 100644
index 000000000..2bdfc0db6
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/host_file_mapper.go
@@ -0,0 +1,211 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+ "fmt"
+ "sync"
+ "syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// HostFileMapper caches mappings of an arbitrary host file descriptor. It is
+// used by implementations of memmap.Mappable that represent a host file
+// descriptor.
+//
+// +stateify savable
+type HostFileMapper struct {
+ // HostFile conceptually breaks the file into pieces called chunks, of
+ // size and alignment chunkSize, and caches mappings of the file on a chunk
+ // granularity.
+
+ refsMu sync.Mutex `state:"nosave"`
+
+ // refs maps chunk start offsets to the sum of reference counts for all
+ // pages in that chunk. refs is protected by refsMu.
+ refs map[uint64]int32
+
+ mapsMu sync.Mutex `state:"nosave"`
+
+ // mappings maps chunk start offsets to mappings of those chunks,
+ // obtained by calling syscall.Mmap. mappings is protected by
+ // mapsMu.
+ mappings map[uint64]mapping `state:"nosave"`
+}
+
+const (
+ chunkShift = usermem.HugePageShift
+ chunkSize = 1 << chunkShift
+ chunkMask = chunkSize - 1
+)
+
+func pagesInChunk(mr memmap.MappableRange, chunkStart uint64) int32 {
+ return int32(mr.Intersect(memmap.MappableRange{chunkStart, chunkStart + chunkSize}).Length() / usermem.PageSize)
+}
+
+type mapping struct {
+ addr uintptr
+ writable bool
+}
+
+// NewHostFileMapper returns a HostFileMapper with no references or cached
+// mappings.
+func NewHostFileMapper() *HostFileMapper {
+ return &HostFileMapper{
+ refs: make(map[uint64]int32),
+ mappings: make(map[uint64]mapping),
+ }
+}
+
+// IncRefOn increments the reference count on all offsets in mr.
+//
+// Preconditions: mr.Length() != 0. mr.Start and mr.End must be page-aligned.
+func (f *HostFileMapper) IncRefOn(mr memmap.MappableRange) {
+ f.refsMu.Lock()
+ defer f.refsMu.Unlock()
+ for chunkStart := mr.Start &^ chunkMask; chunkStart < mr.End; chunkStart += chunkSize {
+ refs := f.refs[chunkStart]
+ pgs := pagesInChunk(mr, chunkStart)
+ if refs+pgs < refs {
+ // Would overflow.
+ panic(fmt.Sprintf("HostFileMapper.IncRefOn(%v): adding %d page references to chunk %#x, which has %d page references", mr, pgs, chunkStart, refs))
+ }
+ f.refs[chunkStart] = refs + pgs
+ }
+}
+
+// DecRefOn decrements the reference count on all offsets in mr.
+//
+// Preconditions: mr.Length() != 0. mr.Start and mr.End must be page-aligned.
+func (f *HostFileMapper) DecRefOn(mr memmap.MappableRange) {
+ f.refsMu.Lock()
+ defer f.refsMu.Unlock()
+ for chunkStart := mr.Start &^ chunkMask; chunkStart < mr.End; chunkStart += chunkSize {
+ refs := f.refs[chunkStart]
+ pgs := pagesInChunk(mr, chunkStart)
+ switch {
+ case refs > pgs:
+ f.refs[chunkStart] = refs - pgs
+ case refs == pgs:
+ f.mapsMu.Lock()
+ delete(f.refs, chunkStart)
+ if m, ok := f.mappings[chunkStart]; ok {
+ f.unmapAndRemoveLocked(chunkStart, m)
+ }
+ f.mapsMu.Unlock()
+ case refs < pgs:
+ panic(fmt.Sprintf("HostFileMapper.DecRefOn(%v): removing %d page references from chunk %#x, which has %d page references", mr, pgs, chunkStart, refs))
+ }
+ }
+}
+
+// MapInternal returns a mapping of offsets in fr from fd. The returned
+// safemem.BlockSeq is valid as long as at least one reference is held on all
+// offsets in fr or until the next call to UnmapAll.
+//
+// Preconditions: The caller must hold a reference on all offsets in fr.
+func (f *HostFileMapper) MapInternal(fr platform.FileRange, fd int, write bool) (safemem.BlockSeq, error) {
+ chunks := ((fr.End + chunkMask) >> chunkShift) - (fr.Start >> chunkShift)
+ f.mapsMu.Lock()
+ defer f.mapsMu.Unlock()
+ if chunks == 1 {
+ // Avoid an unnecessary slice allocation.
+ var seq safemem.BlockSeq
+ err := f.forEachMappingBlockLocked(fr, fd, write, func(b safemem.Block) {
+ seq = safemem.BlockSeqOf(b)
+ })
+ return seq, err
+ }
+ blocks := make([]safemem.Block, 0, chunks)
+ err := f.forEachMappingBlockLocked(fr, fd, write, func(b safemem.Block) {
+ blocks = append(blocks, b)
+ })
+ return safemem.BlockSeqFromSlice(blocks), err
+}
+
+// Preconditions: f.mapsMu must be locked.
+func (f *HostFileMapper) forEachMappingBlockLocked(fr platform.FileRange, fd int, write bool, fn func(safemem.Block)) error {
+ prot := syscall.PROT_READ
+ if write {
+ prot |= syscall.PROT_WRITE
+ }
+ for chunkStart := fr.Start &^ chunkMask; chunkStart < fr.End; chunkStart += chunkSize {
+ m, ok := f.mappings[chunkStart]
+ if !ok {
+ addr, _, errno := syscall.Syscall6(
+ syscall.SYS_MMAP,
+ 0,
+ chunkSize,
+ uintptr(prot),
+ syscall.MAP_SHARED,
+ uintptr(fd),
+ uintptr(chunkStart))
+ if errno != 0 {
+ return errno
+ }
+ m = mapping{addr, write}
+ f.mappings[chunkStart] = m
+ } else if write && !m.writable {
+ addr, _, errno := syscall.Syscall6(
+ syscall.SYS_MMAP,
+ m.addr,
+ chunkSize,
+ uintptr(prot),
+ syscall.MAP_SHARED|syscall.MAP_FIXED,
+ uintptr(fd),
+ uintptr(chunkStart))
+ if errno != 0 {
+ return errno
+ }
+ m = mapping{addr, write}
+ f.mappings[chunkStart] = m
+ }
+ var startOff uint64
+ if chunkStart < fr.Start {
+ startOff = fr.Start - chunkStart
+ }
+ endOff := uint64(chunkSize)
+ if chunkStart+chunkSize > fr.End {
+ endOff = fr.End - chunkStart
+ }
+ fn(f.unsafeBlockFromChunkMapping(m.addr).TakeFirst64(endOff).DropFirst64(startOff))
+ }
+ return nil
+}
+
+// UnmapAll unmaps all cached mappings. Callers are responsible for
+// synchronization with mappings returned by previous calls to MapInternal.
+func (f *HostFileMapper) UnmapAll() {
+ f.mapsMu.Lock()
+ defer f.mapsMu.Unlock()
+ for chunkStart, m := range f.mappings {
+ f.unmapAndRemoveLocked(chunkStart, m)
+ }
+}
+
+// Preconditions: f.mapsMu must be locked. f.mappings[chunkStart] == m.
+func (f *HostFileMapper) unmapAndRemoveLocked(chunkStart uint64, m mapping) {
+ if _, _, errno := syscall.Syscall(syscall.SYS_MUNMAP, m.addr, chunkSize, 0); errno != 0 {
+ // This leaks address space and is unexpected, but is otherwise
+ // harmless, so complain but don't panic.
+ log.Warningf("HostFileMapper: failed to unmap mapping %#x for chunk %#x: %v", m.addr, chunkStart, errno)
+ }
+ delete(f.mappings, chunkStart)
+}
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper_state.go b/pkg/sentry/fs/fsutil/host_file_mapper_state.go
new file mode 100644
index 000000000..576d2a3df
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/host_file_mapper_state.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+// afterLoad is invoked by stateify.
+func (f *HostFileMapper) afterLoad() {
+ f.mappings = make(map[uint64]mapping)
+}
diff --git a/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go
new file mode 100644
index 000000000..7167be263
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/host_file_mapper_unsafe.go
@@ -0,0 +1,27 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+ "unsafe"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+)
+
+func (*HostFileMapper) unsafeBlockFromChunkMapping(addr uintptr) safemem.Block {
+ // We don't control the host file's length, so touching its mappings may
+ // raise SIGBUS. Thus accesses to it must use safecopy.
+ return safemem.BlockFromUnsafePointer((unsafe.Pointer)(addr), chunkSize)
+}
diff --git a/pkg/sentry/fs/fsutil/host_mappable.go b/pkg/sentry/fs/fsutil/host_mappable.go
new file mode 100644
index 000000000..ad0518b8f
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/host_mappable.go
@@ -0,0 +1,197 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+ "math"
+ "sync"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// HostMappable implements memmap.Mappable and platform.File over a
+// CachedFileObject.
+//
+// Lock order (compare the lock order model in mm/mm.go):
+// truncateMu ("fs locks")
+// mu ("memmap.Mappable locks not taken by Translate")
+// ("platform.File locks")
+// backingFile ("CachedFileObject locks")
+//
+// +stateify savable
+type HostMappable struct {
+ hostFileMapper *HostFileMapper
+
+ backingFile CachedFileObject
+
+ mu sync.Mutex `state:"nosave"`
+
+ // mappings tracks mappings of the cached file object into
+ // memmap.MappingSpaces so it can invalidated upon save. Protected by mu.
+ mappings memmap.MappingSet
+
+ // truncateMu protects writes and truncations. See Truncate() for details.
+ truncateMu sync.RWMutex `state:"nosave"`
+}
+
+// NewHostMappable creates a new mappable that maps directly to host FD.
+func NewHostMappable(backingFile CachedFileObject) *HostMappable {
+ return &HostMappable{
+ hostFileMapper: NewHostFileMapper(),
+ backingFile: backingFile,
+ }
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (h *HostMappable) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+ // Hot path. Avoid defers.
+ h.mu.Lock()
+ mapped := h.mappings.AddMapping(ms, ar, offset, writable)
+ for _, r := range mapped {
+ h.hostFileMapper.IncRefOn(r)
+ }
+ h.mu.Unlock()
+ return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (h *HostMappable) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+ // Hot path. Avoid defers.
+ h.mu.Lock()
+ unmapped := h.mappings.RemoveMapping(ms, ar, offset, writable)
+ for _, r := range unmapped {
+ h.hostFileMapper.DecRefOn(r)
+ }
+ h.mu.Unlock()
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (h *HostMappable) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+ return h.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (h *HostMappable) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+ return []memmap.Translation{
+ {
+ Source: optional,
+ File: h,
+ Offset: optional.Start,
+ Perms: usermem.AnyAccess,
+ },
+ }, nil
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (h *HostMappable) InvalidateUnsavable(ctx context.Context) error {
+ h.mu.Lock()
+ h.mappings.InvalidateAll(memmap.InvalidateOpts{})
+ h.mu.Unlock()
+ return nil
+}
+
+// MapInternal implements platform.File.MapInternal.
+func (h *HostMappable) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+ return h.hostFileMapper.MapInternal(fr, h.backingFile.FD(), at.Write)
+}
+
+// FD implements platform.File.FD.
+func (h *HostMappable) FD() int {
+ return h.backingFile.FD()
+}
+
+// IncRef implements platform.File.IncRef.
+func (h *HostMappable) IncRef(fr platform.FileRange) {
+ mr := memmap.MappableRange{Start: fr.Start, End: fr.End}
+ h.hostFileMapper.IncRefOn(mr)
+}
+
+// DecRef implements platform.File.DecRef.
+func (h *HostMappable) DecRef(fr platform.FileRange) {
+ mr := memmap.MappableRange{Start: fr.Start, End: fr.End}
+ h.hostFileMapper.DecRefOn(mr)
+}
+
+// Truncate truncates the file, invalidating any mapping that may have been
+// removed after the size change.
+//
+// Truncation and writes are synchronized to prevent races where writes make the
+// file grow between truncation and invalidation below:
+// T1: Calls SetMaskedAttributes and stalls
+// T2: Appends to file causing it to grow
+// T2: Writes to mapped pages and COW happens
+// T1: Continues and wronly invalidates the page mapped in step above.
+func (h *HostMappable) Truncate(ctx context.Context, newSize int64) error {
+ h.truncateMu.Lock()
+ defer h.truncateMu.Unlock()
+
+ mask := fs.AttrMask{Size: true}
+ attr := fs.UnstableAttr{Size: newSize}
+ if err := h.backingFile.SetMaskedAttributes(ctx, mask, attr); err != nil {
+ return err
+ }
+
+ // Invalidate COW mappings that may exist beyond the new size in case the file
+ // is being shrunk. Other mappings don't need to be invalidated because
+ // translate will just return identical mappings after invalidation anyway,
+ // and SIGBUS will be raised and handled when the mappings are touched.
+ //
+ // Compare Linux's mm/truncate.c:truncate_setsize() =>
+ // truncate_pagecache() =>
+ // mm/memory.c:unmap_mapping_range(evencows=1).
+ h.mu.Lock()
+ defer h.mu.Unlock()
+ mr := memmap.MappableRange{
+ Start: fs.OffsetPageEnd(newSize),
+ End: fs.OffsetPageEnd(math.MaxInt64),
+ }
+ h.mappings.Invalidate(mr, memmap.InvalidateOpts{InvalidatePrivate: true})
+
+ return nil
+}
+
+// Allocate reserves space in the backing file.
+func (h *HostMappable) Allocate(ctx context.Context, offset int64, length int64) error {
+ h.truncateMu.RLock()
+ err := h.backingFile.Allocate(ctx, offset, length)
+ h.truncateMu.RUnlock()
+ return err
+}
+
+// Write writes to the file backing this mappable.
+func (h *HostMappable) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ h.truncateMu.RLock()
+ n, err := src.CopyInTo(ctx, &writer{ctx: ctx, hostMappable: h, off: offset})
+ h.truncateMu.RUnlock()
+ return n, err
+}
+
+type writer struct {
+ ctx context.Context
+ hostMappable *HostMappable
+ off int64
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+func (w *writer) WriteFromBlocks(src safemem.BlockSeq) (uint64, error) {
+ n, err := w.hostMappable.backingFile.WriteFromBlocksAt(w.ctx, src, uint64(w.off))
+ w.off += int64(n)
+ return n, err
+}
diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go
new file mode 100644
index 000000000..925887335
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/inode.go
@@ -0,0 +1,503 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+ "sync"
+
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+ "gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+// SimpleFileInode is a simple implementation of InodeOperations.
+//
+// +stateify savable
+type SimpleFileInode struct {
+ InodeGenericChecker `state:"nosave"`
+ InodeNoExtendedAttributes `state:"nosave"`
+ InodeNoopRelease `state:"nosave"`
+ InodeNoopWriteOut `state:"nosave"`
+ InodeNotAllocatable `state:"nosave"`
+ InodeNotDirectory `state:"nosave"`
+ InodeNotMappable `state:"nosave"`
+ InodeNotOpenable `state:"nosave"`
+ InodeNotSocket `state:"nosave"`
+ InodeNotSymlink `state:"nosave"`
+ InodeNotTruncatable `state:"nosave"`
+ InodeNotVirtual `state:"nosave"`
+
+ InodeSimpleAttributes
+}
+
+// NewSimpleFileInode returns a new SimpleFileInode.
+func NewSimpleFileInode(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions, typ uint64) *SimpleFileInode {
+ return &SimpleFileInode{
+ InodeSimpleAttributes: NewInodeSimpleAttributes(ctx, owner, perms, typ),
+ }
+}
+
+// NoReadWriteFileInode is an implementation of InodeOperations that supports
+// opening files that are not readable or writeable.
+//
+// +stateify savable
+type NoReadWriteFileInode struct {
+ InodeGenericChecker `state:"nosave"`
+ InodeNoExtendedAttributes `state:"nosave"`
+ InodeNoopRelease `state:"nosave"`
+ InodeNoopWriteOut `state:"nosave"`
+ InodeNotAllocatable `state:"nosave"`
+ InodeNotDirectory `state:"nosave"`
+ InodeNotMappable `state:"nosave"`
+ InodeNotSocket `state:"nosave"`
+ InodeNotSymlink `state:"nosave"`
+ InodeNotTruncatable `state:"nosave"`
+ InodeNotVirtual `state:"nosave"`
+
+ InodeSimpleAttributes
+}
+
+// NewNoReadWriteFileInode returns a new NoReadWriteFileInode.
+func NewNoReadWriteFileInode(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions, typ uint64) *NoReadWriteFileInode {
+ return &NoReadWriteFileInode{
+ InodeSimpleAttributes: NewInodeSimpleAttributes(ctx, owner, perms, typ),
+ }
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (*NoReadWriteFileInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ return fs.NewFile(ctx, dirent, flags, &NoReadWriteFile{}), nil
+}
+
+// InodeSimpleAttributes implements methods for updating in-memory unstable
+// attributes.
+//
+// +stateify savable
+type InodeSimpleAttributes struct {
+ // fsType is the immutable filesystem type that will be returned by
+ // StatFS.
+ fsType uint64
+
+ // mu protects unstable.
+ mu sync.RWMutex `state:"nosave"`
+ unstable fs.UnstableAttr
+}
+
+// NewInodeSimpleAttributes returns a new InodeSimpleAttributes with the given
+// owner and permissions, and all timestamps set to the current time.
+func NewInodeSimpleAttributes(ctx context.Context, owner fs.FileOwner, perms fs.FilePermissions, typ uint64) InodeSimpleAttributes {
+ return NewInodeSimpleAttributesWithUnstable(fs.WithCurrentTime(ctx, fs.UnstableAttr{
+ Owner: owner,
+ Perms: perms,
+ }), typ)
+}
+
+// NewInodeSimpleAttributesWithUnstable returns a new InodeSimpleAttributes
+// with the given unstable attributes.
+func NewInodeSimpleAttributesWithUnstable(uattr fs.UnstableAttr, typ uint64) InodeSimpleAttributes {
+ return InodeSimpleAttributes{
+ fsType: typ,
+ unstable: uattr,
+ }
+}
+
+// UnstableAttr implements fs.InodeOperations.UnstableAttr.
+func (i *InodeSimpleAttributes) UnstableAttr(ctx context.Context, _ *fs.Inode) (fs.UnstableAttr, error) {
+ i.mu.RLock()
+ u := i.unstable
+ i.mu.RUnlock()
+ return u, nil
+}
+
+// SetPermissions implements fs.InodeOperations.SetPermissions.
+func (i *InodeSimpleAttributes) SetPermissions(ctx context.Context, _ *fs.Inode, p fs.FilePermissions) bool {
+ i.mu.Lock()
+ i.unstable.SetPermissions(ctx, p)
+ i.mu.Unlock()
+ return true
+}
+
+// SetOwner implements fs.InodeOperations.SetOwner.
+func (i *InodeSimpleAttributes) SetOwner(ctx context.Context, _ *fs.Inode, owner fs.FileOwner) error {
+ i.mu.Lock()
+ i.unstable.SetOwner(ctx, owner)
+ i.mu.Unlock()
+ return nil
+}
+
+// SetTimestamps implements fs.InodeOperations.SetTimestamps.
+func (i *InodeSimpleAttributes) SetTimestamps(ctx context.Context, _ *fs.Inode, ts fs.TimeSpec) error {
+ i.mu.Lock()
+ i.unstable.SetTimestamps(ctx, ts)
+ i.mu.Unlock()
+ return nil
+}
+
+// AddLink implements fs.InodeOperations.AddLink.
+func (i *InodeSimpleAttributes) AddLink() {
+ i.mu.Lock()
+ i.unstable.Links++
+ i.mu.Unlock()
+}
+
+// DropLink implements fs.InodeOperations.DropLink.
+func (i *InodeSimpleAttributes) DropLink() {
+ i.mu.Lock()
+ i.unstable.Links--
+ i.mu.Unlock()
+}
+
+// StatFS implements fs.InodeOperations.StatFS.
+func (i *InodeSimpleAttributes) StatFS(context.Context) (fs.Info, error) {
+ if i.fsType == 0 {
+ return fs.Info{}, syserror.ENOSYS
+ }
+ return fs.Info{Type: i.fsType}, nil
+}
+
+// NotifyAccess updates the access time.
+func (i *InodeSimpleAttributes) NotifyAccess(ctx context.Context) {
+ i.mu.Lock()
+ i.unstable.AccessTime = ktime.NowFromContext(ctx)
+ i.mu.Unlock()
+}
+
+// NotifyModification updates the modification time.
+func (i *InodeSimpleAttributes) NotifyModification(ctx context.Context) {
+ i.mu.Lock()
+ i.unstable.ModificationTime = ktime.NowFromContext(ctx)
+ i.mu.Unlock()
+}
+
+// NotifyStatusChange updates the status change time.
+func (i *InodeSimpleAttributes) NotifyStatusChange(ctx context.Context) {
+ i.mu.Lock()
+ i.unstable.StatusChangeTime = ktime.NowFromContext(ctx)
+ i.mu.Unlock()
+}
+
+// NotifyModificationAndStatusChange updates the modification and status change
+// times.
+func (i *InodeSimpleAttributes) NotifyModificationAndStatusChange(ctx context.Context) {
+ i.mu.Lock()
+ now := ktime.NowFromContext(ctx)
+ i.unstable.ModificationTime = now
+ i.unstable.StatusChangeTime = now
+ i.mu.Unlock()
+}
+
+// InodeSimpleExtendedAttributes implements
+// fs.InodeOperations.{Get,Set,List}xattr.
+//
+// +stateify savable
+type InodeSimpleExtendedAttributes struct {
+ // mu protects xattrs.
+ mu sync.RWMutex `state:"nosave"`
+ xattrs map[string]string
+}
+
+// Getxattr implements fs.InodeOperations.Getxattr.
+func (i *InodeSimpleExtendedAttributes) Getxattr(_ *fs.Inode, name string) (string, error) {
+ i.mu.RLock()
+ value, ok := i.xattrs[name]
+ i.mu.RUnlock()
+ if !ok {
+ return "", syserror.ENOATTR
+ }
+ return value, nil
+}
+
+// Setxattr implements fs.InodeOperations.Setxattr.
+func (i *InodeSimpleExtendedAttributes) Setxattr(_ *fs.Inode, name, value string) error {
+ i.mu.Lock()
+ if i.xattrs == nil {
+ i.xattrs = make(map[string]string)
+ }
+ i.xattrs[name] = value
+ i.mu.Unlock()
+ return nil
+}
+
+// Listxattr implements fs.InodeOperations.Listxattr.
+func (i *InodeSimpleExtendedAttributes) Listxattr(_ *fs.Inode) (map[string]struct{}, error) {
+ i.mu.RLock()
+ names := make(map[string]struct{}, len(i.xattrs))
+ for name := range i.xattrs {
+ names[name] = struct{}{}
+ }
+ i.mu.RUnlock()
+ return names, nil
+}
+
+// staticFile is a file with static contents. It is returned by
+// InodeStaticFileGetter.GetFile.
+//
+// +stateify savable
+type staticFile struct {
+ FileGenericSeek `state:"nosave"`
+ FileNoIoctl `state:"nosave"`
+ FileNoMMap `state:"nosave"`
+ FileNoSplice `state:"nosave"`
+ FileNoopFsync `state:"nosave"`
+ FileNoopFlush `state:"nosave"`
+ FileNoopRelease `state:"nosave"`
+ FileNoopWrite `state:"nosave"`
+ FileNotDirReaddir `state:"nosave"`
+ FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
+
+ FileStaticContentReader
+}
+
+// InodeNoStatFS implement StatFS by retuning ENOSYS.
+type InodeNoStatFS struct{}
+
+// StatFS implements fs.InodeOperations.StatFS.
+func (InodeNoStatFS) StatFS(context.Context) (fs.Info, error) {
+ return fs.Info{}, syserror.ENOSYS
+}
+
+// InodeStaticFileGetter implements GetFile for a file with static contents.
+//
+// +stateify savable
+type InodeStaticFileGetter struct {
+ Contents []byte
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (i *InodeStaticFileGetter) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ return fs.NewFile(ctx, dirent, flags, &staticFile{
+ FileStaticContentReader: NewFileStaticContentReader(i.Contents),
+ }), nil
+}
+
+// InodeNotMappable returns a nil memmap.Mappable.
+type InodeNotMappable struct{}
+
+// Mappable implements fs.InodeOperations.Mappable.
+func (InodeNotMappable) Mappable(*fs.Inode) memmap.Mappable {
+ return nil
+}
+
+// InodeNoopWriteOut is a no-op implementation of fs.InodeOperations.WriteOut.
+type InodeNoopWriteOut struct{}
+
+// WriteOut is a no-op.
+func (InodeNoopWriteOut) WriteOut(context.Context, *fs.Inode) error {
+ return nil
+}
+
+// InodeNotDirectory can be used by Inodes that are not directories.
+type InodeNotDirectory struct{}
+
+// Lookup implements fs.InodeOperations.Lookup.
+func (InodeNotDirectory) Lookup(context.Context, *fs.Inode, string) (*fs.Dirent, error) {
+ return nil, syserror.ENOTDIR
+}
+
+// Create implements fs.InodeOperations.Create.
+func (InodeNotDirectory) Create(context.Context, *fs.Inode, string, fs.FileFlags, fs.FilePermissions) (*fs.File, error) {
+ return nil, syserror.ENOTDIR
+}
+
+// CreateLink implements fs.InodeOperations.CreateLink.
+func (InodeNotDirectory) CreateLink(context.Context, *fs.Inode, string, string) error {
+ return syserror.ENOTDIR
+}
+
+// CreateHardLink implements fs.InodeOperations.CreateHardLink.
+func (InodeNotDirectory) CreateHardLink(context.Context, *fs.Inode, *fs.Inode, string) error {
+ return syserror.ENOTDIR
+}
+
+// CreateDirectory implements fs.InodeOperations.CreateDirectory.
+func (InodeNotDirectory) CreateDirectory(context.Context, *fs.Inode, string, fs.FilePermissions) error {
+ return syserror.ENOTDIR
+}
+
+// Bind implements fs.InodeOperations.Bind.
+func (InodeNotDirectory) Bind(context.Context, *fs.Inode, string, transport.BoundEndpoint, fs.FilePermissions) (*fs.Dirent, error) {
+ return nil, syserror.ENOTDIR
+}
+
+// CreateFifo implements fs.InodeOperations.CreateFifo.
+func (InodeNotDirectory) CreateFifo(context.Context, *fs.Inode, string, fs.FilePermissions) error {
+ return syserror.ENOTDIR
+}
+
+// Remove implements fs.InodeOperations.Remove.
+func (InodeNotDirectory) Remove(context.Context, *fs.Inode, string) error {
+ return syserror.ENOTDIR
+}
+
+// RemoveDirectory implements fs.InodeOperations.RemoveDirectory.
+func (InodeNotDirectory) RemoveDirectory(context.Context, *fs.Inode, string) error {
+ return syserror.ENOTDIR
+}
+
+// Rename implements fs.FileOperations.Rename.
+func (InodeNotDirectory) Rename(context.Context, *fs.Inode, *fs.Inode, string, *fs.Inode, string, bool) error {
+ return syserror.EINVAL
+}
+
+// InodeNotSocket can be used by Inodes that are not sockets.
+type InodeNotSocket struct{}
+
+// BoundEndpoint implements fs.InodeOperations.BoundEndpoint.
+func (InodeNotSocket) BoundEndpoint(*fs.Inode, string) transport.BoundEndpoint {
+ return nil
+}
+
+// InodeNotTruncatable can be used by Inodes that cannot be truncated.
+type InodeNotTruncatable struct{}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (InodeNotTruncatable) Truncate(context.Context, *fs.Inode, int64) error {
+ return syserror.EINVAL
+}
+
+// InodeIsDirTruncate implements fs.InodeOperations.Truncate for directories.
+type InodeIsDirTruncate struct{}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (InodeIsDirTruncate) Truncate(context.Context, *fs.Inode, int64) error {
+ return syserror.EISDIR
+}
+
+// InodeNoopTruncate implements fs.InodeOperations.Truncate as a noop.
+type InodeNoopTruncate struct{}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (InodeNoopTruncate) Truncate(context.Context, *fs.Inode, int64) error {
+ return nil
+}
+
+// InodeNotRenameable can be used by Inodes that cannot be truncated.
+type InodeNotRenameable struct{}
+
+// Rename implements fs.InodeOperations.Rename.
+func (InodeNotRenameable) Rename(context.Context, *fs.Inode, *fs.Inode, string, *fs.Inode, string, bool) error {
+ return syserror.EINVAL
+}
+
+// InodeNotOpenable can be used by Inodes that cannot be opened.
+type InodeNotOpenable struct{}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (InodeNotOpenable) GetFile(context.Context, *fs.Dirent, fs.FileFlags) (*fs.File, error) {
+ return nil, syserror.EIO
+}
+
+// InodeNotVirtual can be used by Inodes that are not virtual.
+type InodeNotVirtual struct{}
+
+// IsVirtual implements fs.InodeOperations.IsVirtual.
+func (InodeNotVirtual) IsVirtual() bool {
+ return false
+}
+
+// InodeVirtual can be used by Inodes that are virtual.
+type InodeVirtual struct{}
+
+// IsVirtual implements fs.InodeOperations.IsVirtual.
+func (InodeVirtual) IsVirtual() bool {
+ return true
+}
+
+// InodeNotSymlink can be used by Inodes that are not symlinks.
+type InodeNotSymlink struct{}
+
+// Readlink implements fs.InodeOperations.Readlink.
+func (InodeNotSymlink) Readlink(context.Context, *fs.Inode) (string, error) {
+ return "", syserror.ENOLINK
+}
+
+// Getlink implements fs.InodeOperations.Getlink.
+func (InodeNotSymlink) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) {
+ return nil, syserror.ENOLINK
+}
+
+// InodeNoExtendedAttributes can be used by Inodes that do not support
+// extended attributes.
+type InodeNoExtendedAttributes struct{}
+
+// Getxattr implements fs.InodeOperations.Getxattr.
+func (InodeNoExtendedAttributes) Getxattr(*fs.Inode, string) (string, error) {
+ return "", syserror.EOPNOTSUPP
+}
+
+// Setxattr implements fs.InodeOperations.Setxattr.
+func (InodeNoExtendedAttributes) Setxattr(*fs.Inode, string, string) error {
+ return syserror.EOPNOTSUPP
+}
+
+// Listxattr implements fs.InodeOperations.Listxattr.
+func (InodeNoExtendedAttributes) Listxattr(*fs.Inode) (map[string]struct{}, error) {
+ return nil, syserror.EOPNOTSUPP
+}
+
+// InodeNoopRelease implements fs.InodeOperations.Release as a noop.
+type InodeNoopRelease struct{}
+
+// Release implements fs.InodeOperations.Release.
+func (InodeNoopRelease) Release(context.Context) {}
+
+// InodeGenericChecker implements fs.InodeOperations.Check with a generic
+// implementation.
+type InodeGenericChecker struct{}
+
+// Check implements fs.InodeOperations.Check.
+func (InodeGenericChecker) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
+ return fs.ContextCanAccessFile(ctx, inode, p)
+}
+
+// InodeDenyWriteChecker implements fs.InodeOperations.Check which denies all
+// write operations.
+type InodeDenyWriteChecker struct{}
+
+// Check implements fs.InodeOperations.Check.
+func (InodeDenyWriteChecker) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
+ if p.Write {
+ return false
+ }
+ return fs.ContextCanAccessFile(ctx, inode, p)
+}
+
+//InodeNotAllocatable can be used by Inodes that do not support Allocate().
+type InodeNotAllocatable struct{}
+
+func (InodeNotAllocatable) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+ return syserror.EOPNOTSUPP
+}
+
+// InodeNoopAllocate implements fs.InodeOperations.Allocate as a noop.
+type InodeNoopAllocate struct{}
+
+// Allocate implements fs.InodeOperations.Allocate.
+func (InodeNoopAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+ return nil
+}
+
+// InodeIsDirAllocate implements fs.InodeOperations.Allocate for directories.
+type InodeIsDirAllocate struct{}
+
+// Allocate implements fs.InodeOperations.Allocate.
+func (InodeIsDirAllocate) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
+ return syserror.EISDIR
+}
diff --git a/pkg/sentry/fs/fsutil/inode_cached.go b/pkg/sentry/fs/fsutil/inode_cached.go
new file mode 100644
index 000000000..7bee2eb5f
--- /dev/null
+++ b/pkg/sentry/fs/fsutil/inode_cached.go
@@ -0,0 +1,1004 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fsutil
+
+import (
+ "fmt"
+ "io"
+ "sync"
+
+ "gvisor.googlesource.com/gvisor/pkg/log"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/fs"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ ktime "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/time"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/memmap"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/pgalloc"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/platform"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usage"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// Lock order (compare the lock order model in mm/mm.go):
+//
+// CachingInodeOperations.attrMu ("fs locks")
+// CachingInodeOperations.mapsMu ("memmap.Mappable locks not taken by Translate")
+// CachingInodeOperations.dataMu ("memmap.Mappable locks taken by Translate")
+// CachedFileObject locks
+
+// CachingInodeOperations caches the metadata and content of a CachedFileObject.
+// It implements a subset of InodeOperations. As a utility it can be used to
+// implement the full set of InodeOperations. Generally it should not be
+// embedded to avoid unexpected inherited behavior.
+//
+// CachingInodeOperations implements Mappable for the CachedFileObject:
+//
+// - If CachedFileObject.FD returns a value >= 0 then the file descriptor
+// will be memory mapped on the host.
+//
+// - Otherwise, the contents of CachedFileObject are buffered into memory
+// managed by the CachingInodeOperations.
+//
+// Implementations of FileOperations for a CachedFileObject must read and
+// write through CachingInodeOperations using Read and Write respectively.
+//
+// Implementations of InodeOperations.WriteOut must call Sync to write out
+// in-memory modifications of data and metadata to the CachedFileObject.
+//
+// +stateify savable
+type CachingInodeOperations struct {
+ // backingFile is a handle to a cached file object.
+ backingFile CachedFileObject
+
+ // mfp is used to allocate memory that caches backingFile's contents.
+ mfp pgalloc.MemoryFileProvider
+
+ // forcePageCache indicates the sentry page cache should be used regardless
+ // of whether the platform supports host mapped I/O or not. This must not be
+ // modified after inode creation.
+ forcePageCache bool
+
+ attrMu sync.Mutex `state:"nosave"`
+
+ // attr is unstable cached metadata.
+ //
+ // attr is protected by attrMu. attr.Size is protected by both attrMu and
+ // dataMu; reading it requires locking either mutex, while mutating it
+ // requires locking both.
+ attr fs.UnstableAttr
+
+ // dirtyAttr is metadata that was updated in-place but hasn't yet
+ // been successfully written out.
+ //
+ // dirtyAttr is protected by attrMu.
+ dirtyAttr fs.AttrMask
+
+ mapsMu sync.Mutex `state:"nosave"`
+
+ // mappings tracks mappings of the cached file object into
+ // memmap.MappingSpaces.
+ //
+ // mappings is protected by mapsMu.
+ mappings memmap.MappingSet
+
+ dataMu sync.RWMutex `state:"nosave"`
+
+ // cache maps offsets into the cached file to offsets into
+ // mfp.MemoryFile() that store the file's data.
+ //
+ // cache is protected by dataMu.
+ cache FileRangeSet
+
+ // dirty tracks dirty segments in cache.
+ //
+ // dirty is protected by dataMu.
+ dirty DirtySet
+
+ // hostFileMapper caches internal mappings of backingFile.FD().
+ hostFileMapper *HostFileMapper
+
+ // refs tracks active references to data in the cache.
+ //
+ // refs is protected by dataMu.
+ refs frameRefSet
+}
+
+// CachedFileObject is a file that may require caching.
+type CachedFileObject interface {
+ // ReadToBlocksAt reads up to dsts.NumBytes() bytes from the file to dsts,
+ // starting at offset, and returns the number of bytes read. ReadToBlocksAt
+ // may return a partial read without an error.
+ ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error)
+
+ // WriteFromBlocksAt writes up to srcs.NumBytes() bytes from srcs to the
+ // file, starting at offset, and returns the number of bytes written.
+ // WriteFromBlocksAt may return a partial write without an error.
+ WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error)
+
+ // SetMaskedAttributes sets the attributes in attr that are true in mask
+ // on the backing file.
+ //
+ // SetMaskedAttributes may be called at any point, regardless of whether
+ // the file was opened.
+ SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr) error
+
+ // Allocate allows the caller to reserve disk space for the inode.
+ // It's equivalent to fallocate(2) with 'mode=0'.
+ Allocate(ctx context.Context, offset int64, length int64) error
+
+ // Sync instructs the remote filesystem to sync the file to stable storage.
+ Sync(ctx context.Context) error
+
+ // FD returns a host file descriptor. If it is possible for
+ // CachingInodeOperations.AddMapping to have ever been called with writable
+ // = true, the FD must have been opened O_RDWR; otherwise, it may have been
+ // opened O_RDONLY or O_RDWR. (mmap unconditionally requires that mapped
+ // files are readable.) If no host file descriptor is available, FD returns
+ // a negative number.
+ //
+ // For any given CachedFileObject, if FD() ever succeeds (returns a
+ // non-negative number), it must always succeed.
+ //
+ // FD is called iff the file has been memory mapped. This implies that
+ // the file was opened (see fs.InodeOperations.GetFile).
+ FD() int
+}
+
+// NewCachingInodeOperations returns a new CachingInodeOperations backed by
+// a CachedFileObject and its initial unstable attributes.
+func NewCachingInodeOperations(ctx context.Context, backingFile CachedFileObject, uattr fs.UnstableAttr, forcePageCache bool) *CachingInodeOperations {
+ mfp := pgalloc.MemoryFileProviderFromContext(ctx)
+ if mfp == nil {
+ panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider))
+ }
+ return &CachingInodeOperations{
+ backingFile: backingFile,
+ mfp: mfp,
+ forcePageCache: forcePageCache,
+ attr: uattr,
+ hostFileMapper: NewHostFileMapper(),
+ }
+}
+
+// Release implements fs.InodeOperations.Release.
+func (c *CachingInodeOperations) Release() {
+ c.mapsMu.Lock()
+ defer c.mapsMu.Unlock()
+ c.dataMu.Lock()
+ defer c.dataMu.Unlock()
+
+ // Something has gone terribly wrong if we're releasing an inode that is
+ // still memory-mapped.
+ if !c.mappings.IsEmpty() {
+ panic(fmt.Sprintf("Releasing CachingInodeOperations with mappings:\n%s", &c.mappings))
+ }
+
+ // Drop any cached pages that are still awaiting MemoryFile eviction. (This
+ // means that MemoryFile no longer needs to evict them.)
+ mf := c.mfp.MemoryFile()
+ mf.MarkAllUnevictable(c)
+ if err := SyncDirtyAll(context.Background(), &c.cache, &c.dirty, uint64(c.attr.Size), mf, c.backingFile.WriteFromBlocksAt); err != nil {
+ panic(fmt.Sprintf("Failed to writeback cached data: %v", err))
+ }
+ c.cache.DropAll(mf)
+ c.dirty.RemoveAll()
+}
+
+// UnstableAttr implements fs.InodeOperations.UnstableAttr.
+func (c *CachingInodeOperations) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
+ c.attrMu.Lock()
+ attr := c.attr
+ c.attrMu.Unlock()
+ return attr, nil
+}
+
+// SetPermissions implements fs.InodeOperations.SetPermissions.
+func (c *CachingInodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, perms fs.FilePermissions) bool {
+ c.attrMu.Lock()
+ defer c.attrMu.Unlock()
+
+ now := ktime.NowFromContext(ctx)
+ masked := fs.AttrMask{Perms: true}
+ if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{Perms: perms}); err != nil {
+ return false
+ }
+ c.attr.Perms = perms
+ c.touchStatusChangeTimeLocked(now)
+ return true
+}
+
+// SetOwner implements fs.InodeOperations.SetOwner.
+func (c *CachingInodeOperations) SetOwner(ctx context.Context, inode *fs.Inode, owner fs.FileOwner) error {
+ if !owner.UID.Ok() && !owner.GID.Ok() {
+ return nil
+ }
+
+ c.attrMu.Lock()
+ defer c.attrMu.Unlock()
+
+ now := ktime.NowFromContext(ctx)
+ masked := fs.AttrMask{
+ UID: owner.UID.Ok(),
+ GID: owner.GID.Ok(),
+ }
+ if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{Owner: owner}); err != nil {
+ return err
+ }
+ if owner.UID.Ok() {
+ c.attr.Owner.UID = owner.UID
+ }
+ if owner.GID.Ok() {
+ c.attr.Owner.GID = owner.GID
+ }
+ c.touchStatusChangeTimeLocked(now)
+ return nil
+}
+
+// SetTimestamps implements fs.InodeOperations.SetTimestamps.
+func (c *CachingInodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts fs.TimeSpec) error {
+ if ts.ATimeOmit && ts.MTimeOmit {
+ return nil
+ }
+
+ c.attrMu.Lock()
+ defer c.attrMu.Unlock()
+
+ // Replace requests to use the "system time" with the current time to
+ // ensure that cached timestamps remain consistent with the remote
+ // filesystem.
+ now := ktime.NowFromContext(ctx)
+ if ts.ATimeSetSystemTime {
+ ts.ATime = now
+ }
+ if ts.MTimeSetSystemTime {
+ ts.MTime = now
+ }
+ masked := fs.AttrMask{
+ AccessTime: !ts.ATimeOmit,
+ ModificationTime: !ts.MTimeOmit,
+ }
+ if err := c.backingFile.SetMaskedAttributes(ctx, masked, fs.UnstableAttr{AccessTime: ts.ATime, ModificationTime: ts.MTime}); err != nil {
+ return err
+ }
+ if !ts.ATimeOmit {
+ c.attr.AccessTime = ts.ATime
+ }
+ if !ts.MTimeOmit {
+ c.attr.ModificationTime = ts.MTime
+ }
+ c.touchStatusChangeTimeLocked(now)
+ return nil
+}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (c *CachingInodeOperations) Truncate(ctx context.Context, inode *fs.Inode, size int64) error {
+ c.attrMu.Lock()
+ defer c.attrMu.Unlock()
+
+ // c.attr.Size is protected by both c.attrMu and c.dataMu.
+ c.dataMu.Lock()
+ now := ktime.NowFromContext(ctx)
+ masked := fs.AttrMask{Size: true}
+ attr := fs.UnstableAttr{Size: size}
+ if err := c.backingFile.SetMaskedAttributes(ctx, masked, attr); err != nil {
+ c.dataMu.Unlock()
+ return err
+ }
+ oldSize := c.attr.Size
+ c.attr.Size = size
+ c.touchModificationAndStatusChangeTimeLocked(now)
+
+ // We drop c.dataMu here so that we can lock c.mapsMu and invalidate
+ // mappings below. This allows concurrent calls to Read/Translate/etc.
+ // These functions synchronize with an in-progress Truncate by refusing to
+ // use cache contents beyond the new c.attr.Size. (We are still holding
+ // c.attrMu, so we can't race with Truncate/Write.)
+ c.dataMu.Unlock()
+
+ // Nothing left to do unless shrinking the file.
+ if size >= oldSize {
+ return nil
+ }
+
+ oldpgend := fs.OffsetPageEnd(oldSize)
+ newpgend := fs.OffsetPageEnd(size)
+
+ // Invalidate past translations of truncated pages.
+ if newpgend != oldpgend {
+ c.mapsMu.Lock()
+ c.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
+ // Compare Linux's mm/truncate.c:truncate_setsize() =>
+ // truncate_pagecache() =>
+ // mm/memory.c:unmap_mapping_range(evencows=1).
+ InvalidatePrivate: true,
+ })
+ c.mapsMu.Unlock()
+ }
+
+ // We are now guaranteed that there are no translations of truncated pages,
+ // and can remove them from the cache. Since truncated pages have been
+ // removed from the backing file, they should be dropped without being
+ // written back.
+ c.dataMu.Lock()
+ defer c.dataMu.Unlock()
+ c.cache.Truncate(uint64(size), c.mfp.MemoryFile())
+ c.dirty.KeepClean(memmap.MappableRange{uint64(size), oldpgend})
+
+ return nil
+}
+
+// Allocate implements fs.InodeOperations.Allocate.
+func (c *CachingInodeOperations) Allocate(ctx context.Context, offset, length int64) error {
+ newSize := offset + length
+
+ // c.attr.Size is protected by both c.attrMu and c.dataMu.
+ c.attrMu.Lock()
+ defer c.attrMu.Unlock()
+ c.dataMu.Lock()
+ defer c.dataMu.Unlock()
+
+ if newSize <= c.attr.Size {
+ return nil
+ }
+
+ now := ktime.NowFromContext(ctx)
+ if err := c.backingFile.Allocate(ctx, offset, length); err != nil {
+ return err
+ }
+
+ c.attr.Size = newSize
+ c.touchModificationAndStatusChangeTimeLocked(now)
+ return nil
+}
+
+// WriteOut implements fs.InodeOperations.WriteOut.
+func (c *CachingInodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
+ c.attrMu.Lock()
+
+ // Write dirty pages back.
+ c.dataMu.Lock()
+ err := SyncDirtyAll(ctx, &c.cache, &c.dirty, uint64(c.attr.Size), c.mfp.MemoryFile(), c.backingFile.WriteFromBlocksAt)
+ c.dataMu.Unlock()
+ if err != nil {
+ c.attrMu.Unlock()
+ return err
+ }
+
+ // SyncDirtyAll above would have grown if needed. On shrinks, the backing
+ // file is called directly, so size is never needs to be updated.
+ c.dirtyAttr.Size = false
+
+ // Write out cached attributes.
+ if err := c.backingFile.SetMaskedAttributes(ctx, c.dirtyAttr, c.attr); err != nil {
+ c.attrMu.Unlock()
+ return err
+ }
+ c.dirtyAttr = fs.AttrMask{}
+
+ c.attrMu.Unlock()
+
+ // Fsync the remote file.
+ return c.backingFile.Sync(ctx)
+}
+
+// IncLinks increases the link count and updates cached modification time.
+func (c *CachingInodeOperations) IncLinks(ctx context.Context) {
+ c.attrMu.Lock()
+ c.attr.Links++
+ c.touchModificationAndStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+ c.attrMu.Unlock()
+}
+
+// DecLinks decreases the link count and updates cached modification time.
+func (c *CachingInodeOperations) DecLinks(ctx context.Context) {
+ c.attrMu.Lock()
+ c.attr.Links--
+ c.touchModificationAndStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+ c.attrMu.Unlock()
+}
+
+// TouchAccessTime updates the cached access time in-place to the
+// current time. It does not update status change time in-place. See
+// mm/filemap.c:do_generic_file_read -> include/linux/h:file_accessed.
+func (c *CachingInodeOperations) TouchAccessTime(ctx context.Context, inode *fs.Inode) {
+ if inode.MountSource.Flags.NoAtime {
+ return
+ }
+
+ c.attrMu.Lock()
+ c.touchAccessTimeLocked(ktime.NowFromContext(ctx))
+ c.attrMu.Unlock()
+}
+
+// touchAccesstimeLocked updates the cached access time in-place to the current
+// time.
+//
+// Preconditions: c.attrMu is locked for writing.
+func (c *CachingInodeOperations) touchAccessTimeLocked(now time.Time) {
+ c.attr.AccessTime = now
+ c.dirtyAttr.AccessTime = true
+}
+
+// TouchModificationAndStatusChangeTime updates the cached modification and
+// status change times in-place to the current time.
+func (c *CachingInodeOperations) TouchModificationAndStatusChangeTime(ctx context.Context) {
+ c.attrMu.Lock()
+ c.touchModificationAndStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+ c.attrMu.Unlock()
+}
+
+// touchModificationAndStatusChangeTimeLocked updates the cached modification
+// and status change times in-place to the current time.
+//
+// Preconditions: c.attrMu is locked for writing.
+func (c *CachingInodeOperations) touchModificationAndStatusChangeTimeLocked(now time.Time) {
+ c.attr.ModificationTime = now
+ c.dirtyAttr.ModificationTime = true
+ c.attr.StatusChangeTime = now
+ c.dirtyAttr.StatusChangeTime = true
+}
+
+// TouchStatusChangeTime updates the cached status change time in-place to the
+// current time.
+func (c *CachingInodeOperations) TouchStatusChangeTime(ctx context.Context) {
+ c.attrMu.Lock()
+ c.touchStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+ c.attrMu.Unlock()
+}
+
+// touchStatusChangeTimeLocked updates the cached status change time
+// in-place to the current time.
+//
+// Preconditions: c.attrMu is locked for writing.
+func (c *CachingInodeOperations) touchStatusChangeTimeLocked(now time.Time) {
+ c.attr.StatusChangeTime = now
+ c.dirtyAttr.StatusChangeTime = true
+}
+
+// UpdateUnstable updates the cached unstable attributes. Only non-dirty
+// attributes are updated.
+func (c *CachingInodeOperations) UpdateUnstable(attr fs.UnstableAttr) {
+ // All attributes are protected by attrMu.
+ c.attrMu.Lock()
+
+ if !c.dirtyAttr.Usage {
+ c.attr.Usage = attr.Usage
+ }
+ if !c.dirtyAttr.Perms {
+ c.attr.Perms = attr.Perms
+ }
+ if !c.dirtyAttr.UID {
+ c.attr.Owner.UID = attr.Owner.UID
+ }
+ if !c.dirtyAttr.GID {
+ c.attr.Owner.GID = attr.Owner.GID
+ }
+ if !c.dirtyAttr.AccessTime {
+ c.attr.AccessTime = attr.AccessTime
+ }
+ if !c.dirtyAttr.ModificationTime {
+ c.attr.ModificationTime = attr.ModificationTime
+ }
+ if !c.dirtyAttr.StatusChangeTime {
+ c.attr.StatusChangeTime = attr.StatusChangeTime
+ }
+ if !c.dirtyAttr.Links {
+ c.attr.Links = attr.Links
+ }
+
+ // Size requires holding attrMu and dataMu.
+ c.dataMu.Lock()
+ if !c.dirtyAttr.Size {
+ c.attr.Size = attr.Size
+ }
+ c.dataMu.Unlock()
+
+ c.attrMu.Unlock()
+}
+
+// Read reads from frames and otherwise directly from the backing file
+// into dst starting at offset until dst is full, EOF is reached, or an
+// error is encountered.
+//
+// Read may partially fill dst and return a nil error.
+func (c *CachingInodeOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
+ if dst.NumBytes() == 0 {
+ return 0, nil
+ }
+
+ // Have we reached EOF? We check for this again in
+ // inodeReadWriter.ReadToBlocks to avoid holding c.attrMu (which would
+ // serialize reads) or c.dataMu (which would violate lock ordering), but
+ // check here first (before calling into MM) since reading at EOF is
+ // common: getting a return value of 0 from a read syscall is the only way
+ // to detect EOF.
+ //
+ // TODO(jamieliu): Separate out c.attr.Size and use atomics instead of
+ // c.dataMu.
+ c.dataMu.RLock()
+ size := c.attr.Size
+ c.dataMu.RUnlock()
+ if offset >= size {
+ return 0, io.EOF
+ }
+
+ n, err := dst.CopyOutFrom(ctx, &inodeReadWriter{ctx, c, offset})
+ // Compare Linux's mm/filemap.c:do_generic_file_read() => file_accessed().
+ c.TouchAccessTime(ctx, file.Dirent.Inode)
+ return n, err
+}
+
+// Write writes to frames and otherwise directly to the backing file
+// from src starting at offset and until src is empty or an error is
+// encountered.
+//
+// If Write partially fills src, a non-nil error is returned.
+func (c *CachingInodeOperations) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ // Hot path. Avoid defers.
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+
+ c.attrMu.Lock()
+ // Compare Linux's mm/filemap.c:__generic_file_write_iter() => file_update_time().
+ c.touchModificationAndStatusChangeTimeLocked(ktime.NowFromContext(ctx))
+ n, err := src.CopyInTo(ctx, &inodeReadWriter{ctx, c, offset})
+ c.attrMu.Unlock()
+ return n, err
+}
+
+type inodeReadWriter struct {
+ ctx context.Context
+ c *CachingInodeOperations
+ offset int64
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks.
+func (rw *inodeReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+ // Hot path. Avoid defers.
+ rw.c.dataMu.RLock()
+
+ // Compute the range to read.
+ if rw.offset >= rw.c.attr.Size {
+ rw.c.dataMu.RUnlock()
+ return 0, io.EOF
+ }
+ end := fs.ReadEndOffset(rw.offset, int64(dsts.NumBytes()), rw.c.attr.Size)
+ if end == rw.offset { // dsts.NumBytes() == 0?
+ rw.c.dataMu.RUnlock()
+ return 0, nil
+ }
+
+ mem := rw.c.mfp.MemoryFile()
+ var done uint64
+ seg, gap := rw.c.cache.Find(uint64(rw.offset))
+ for rw.offset < end {
+ mr := memmap.MappableRange{uint64(rw.offset), uint64(end)}
+ switch {
+ case seg.Ok():
+ // Get internal mappings from the cache.
+ ims, err := mem.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
+ if err != nil {
+ rw.c.dataMu.RUnlock()
+ return done, err
+ }
+
+ // Copy from internal mappings.
+ n, err := safemem.CopySeq(dsts, ims)
+ done += n
+ rw.offset += int64(n)
+ dsts = dsts.DropFirst64(n)
+ if err != nil {
+ rw.c.dataMu.RUnlock()
+ return done, err
+ }
+
+ // Continue.
+ seg, gap = seg.NextNonEmpty()
+
+ case gap.Ok():
+ // Read directly from the backing file.
+ gapmr := gap.Range().Intersect(mr)
+ dst := dsts.TakeFirst64(gapmr.Length())
+ n, err := rw.c.backingFile.ReadToBlocksAt(rw.ctx, dst, gapmr.Start)
+ done += n
+ rw.offset += int64(n)
+ dsts = dsts.DropFirst64(n)
+ // Partial reads are fine. But we must stop reading.
+ if n != dst.NumBytes() || err != nil {
+ rw.c.dataMu.RUnlock()
+ return done, err
+ }
+
+ // Continue.
+ seg, gap = gap.NextSegment(), FileRangeGapIterator{}
+
+ default:
+ break
+ }
+ }
+ rw.c.dataMu.RUnlock()
+ return done, nil
+}
+
+// maybeGrowFile grows the file's size if data has been written past the old
+// size.
+//
+// Preconditions: rw.c.attrMu and rw.c.dataMu bust be locked.
+func (rw *inodeReadWriter) maybeGrowFile() {
+ // If the write ends beyond the file's previous size, it causes the
+ // file to grow.
+ if rw.offset > rw.c.attr.Size {
+ rw.c.attr.Size = rw.offset
+ rw.c.dirtyAttr.Size = true
+ }
+ if rw.offset > rw.c.attr.Usage {
+ // This is incorrect if CachingInodeOperations is caching a sparse
+ // file. (In Linux, keeping inode::i_blocks up to date is the
+ // filesystem's responsibility.)
+ rw.c.attr.Usage = rw.offset
+ rw.c.dirtyAttr.Usage = true
+ }
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+//
+// Preconditions: rw.c.attrMu must be locked.
+func (rw *inodeReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+ // Hot path. Avoid defers.
+ rw.c.dataMu.Lock()
+
+ // Compute the range to write.
+ end := fs.WriteEndOffset(rw.offset, int64(srcs.NumBytes()))
+ if end == rw.offset { // srcs.NumBytes() == 0?
+ rw.c.dataMu.Unlock()
+ return 0, nil
+ }
+
+ mf := rw.c.mfp.MemoryFile()
+ var done uint64
+ seg, gap := rw.c.cache.Find(uint64(rw.offset))
+ for rw.offset < end {
+ mr := memmap.MappableRange{uint64(rw.offset), uint64(end)}
+ switch {
+ case seg.Ok() && seg.Start() < mr.End:
+ // Get internal mappings from the cache.
+ segMR := seg.Range().Intersect(mr)
+ ims, err := mf.MapInternal(seg.FileRangeOf(segMR), usermem.Write)
+ if err != nil {
+ rw.maybeGrowFile()
+ rw.c.dataMu.Unlock()
+ return done, err
+ }
+
+ // Copy to internal mappings.
+ n, err := safemem.CopySeq(ims, srcs)
+ done += n
+ rw.offset += int64(n)
+ srcs = srcs.DropFirst64(n)
+ rw.c.dirty.MarkDirty(segMR)
+ if err != nil {
+ rw.maybeGrowFile()
+ rw.c.dataMu.Unlock()
+ return done, err
+ }
+
+ // Continue.
+ seg, gap = seg.NextNonEmpty()
+
+ case gap.Ok() && gap.Start() < mr.End:
+ // Write directly to the backing file.
+ gapmr := gap.Range().Intersect(mr)
+ src := srcs.TakeFirst64(gapmr.Length())
+ n, err := rw.c.backingFile.WriteFromBlocksAt(rw.ctx, src, gapmr.Start)
+ done += n
+ rw.offset += int64(n)
+ srcs = srcs.DropFirst64(n)
+ // Partial writes are fine. But we must stop writing.
+ if n != src.NumBytes() || err != nil {
+ rw.maybeGrowFile()
+ rw.c.dataMu.Unlock()
+ return done, err
+ }
+
+ // Continue.
+ seg, gap = gap.NextSegment(), FileRangeGapIterator{}
+
+ default:
+ break
+ }
+ }
+ rw.maybeGrowFile()
+ rw.c.dataMu.Unlock()
+ return done, nil
+}
+
+// useHostPageCache returns true if c uses c.backingFile.FD() for all file I/O
+// and memory mappings, and false if c.cache may contain data cached from
+// c.backingFile.
+func (c *CachingInodeOperations) useHostPageCache() bool {
+ return !c.forcePageCache && c.backingFile.FD() >= 0
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (c *CachingInodeOperations) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error {
+ // Hot path. Avoid defers.
+ c.mapsMu.Lock()
+ mapped := c.mappings.AddMapping(ms, ar, offset, writable)
+ // Do this unconditionally since whether we have c.backingFile.FD() >= 0
+ // can change across save/restore.
+ for _, r := range mapped {
+ c.hostFileMapper.IncRefOn(r)
+ }
+ if !c.useHostPageCache() {
+ // c.Evict() will refuse to evict memory-mapped pages, so tell the
+ // MemoryFile to not bother trying.
+ mf := c.mfp.MemoryFile()
+ for _, r := range mapped {
+ mf.MarkUnevictable(c, pgalloc.EvictableRange{r.Start, r.End})
+ }
+ }
+ if c.useHostPageCache() && !usage.IncrementalMappedAccounting {
+ for _, r := range mapped {
+ usage.MemoryAccounting.Inc(r.Length(), usage.Mapped)
+ }
+ }
+ c.mapsMu.Unlock()
+ return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (c *CachingInodeOperations) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) {
+ // Hot path. Avoid defers.
+ c.mapsMu.Lock()
+ unmapped := c.mappings.RemoveMapping(ms, ar, offset, writable)
+ for _, r := range unmapped {
+ c.hostFileMapper.DecRefOn(r)
+ }
+ if c.useHostPageCache() {
+ if !usage.IncrementalMappedAccounting {
+ for _, r := range unmapped {
+ usage.MemoryAccounting.Dec(r.Length(), usage.Mapped)
+ }
+ }
+ c.mapsMu.Unlock()
+ return
+ }
+
+ // Pages that are no longer referenced by any application memory mappings
+ // are now considered unused; allow MemoryFile to evict them when
+ // necessary.
+ mf := c.mfp.MemoryFile()
+ c.dataMu.Lock()
+ for _, r := range unmapped {
+ // Since these pages are no longer mapped, they are no longer
+ // concurrently dirtyable by a writable memory mapping.
+ c.dirty.AllowClean(r)
+ mf.MarkEvictable(c, pgalloc.EvictableRange{r.Start, r.End})
+ }
+ c.dataMu.Unlock()
+ c.mapsMu.Unlock()
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (c *CachingInodeOperations) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error {
+ return c.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (c *CachingInodeOperations) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) {
+ // Hot path. Avoid defer.
+ if c.useHostPageCache() {
+ return []memmap.Translation{
+ {
+ Source: optional,
+ File: c,
+ Offset: optional.Start,
+ Perms: usermem.AnyAccess,
+ },
+ }, nil
+ }
+
+ c.dataMu.Lock()
+
+ // Constrain translations to c.attr.Size (rounded up) to prevent
+ // translation to pages that may be concurrently truncated.
+ pgend := fs.OffsetPageEnd(c.attr.Size)
+ var beyondEOF bool
+ if required.End > pgend {
+ if required.Start >= pgend {
+ c.dataMu.Unlock()
+ return nil, &memmap.BusError{io.EOF}
+ }
+ beyondEOF = true
+ required.End = pgend
+ }
+ if optional.End > pgend {
+ optional.End = pgend
+ }
+
+ mf := c.mfp.MemoryFile()
+ cerr := c.cache.Fill(ctx, required, maxFillRange(required, optional), mf, usage.PageCache, c.backingFile.ReadToBlocksAt)
+
+ var ts []memmap.Translation
+ var translatedEnd uint64
+ for seg := c.cache.FindSegment(required.Start); seg.Ok() && seg.Start() < required.End; seg, _ = seg.NextNonEmpty() {
+ segMR := seg.Range().Intersect(optional)
+ // TODO(jamieliu): Make Translations writable even if writability is
+ // not required if already kept-dirty by another writable translation.
+ perms := usermem.AccessType{
+ Read: true,
+ Execute: true,
+ }
+ if at.Write {
+ // From this point forward, this memory can be dirtied through the
+ // mapping at any time.
+ c.dirty.KeepDirty(segMR)
+ perms.Write = true
+ }
+ ts = append(ts, memmap.Translation{
+ Source: segMR,
+ File: mf,
+ Offset: seg.FileRangeOf(segMR).Start,
+ Perms: perms,
+ })
+ translatedEnd = segMR.End
+ }
+
+ c.dataMu.Unlock()
+
+ // Don't return the error returned by c.cache.Fill if it occurred outside
+ // of required.
+ if translatedEnd < required.End && cerr != nil {
+ return ts, &memmap.BusError{cerr}
+ }
+ if beyondEOF {
+ return ts, &memmap.BusError{io.EOF}
+ }
+ return ts, nil
+}
+
+func maxFillRange(required, optional memmap.MappableRange) memmap.MappableRange {
+ const maxReadahead = 64 << 10 // 64 KB, chosen arbitrarily
+ if required.Length() >= maxReadahead {
+ return required
+ }
+ if optional.Length() <= maxReadahead {
+ return optional
+ }
+ optional.Start = required.Start
+ if optional.Length() <= maxReadahead {
+ return optional
+ }
+ optional.End = optional.Start + maxReadahead
+ return optional
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (c *CachingInodeOperations) InvalidateUnsavable(ctx context.Context) error {
+ // Whether we have a host fd (and consequently what platform.File is
+ // mapped) can change across save/restore, so invalidate all translations
+ // unconditionally.
+ c.mapsMu.Lock()
+ defer c.mapsMu.Unlock()
+ c.mappings.InvalidateAll(memmap.InvalidateOpts{})
+
+ // Sync the cache's contents so that if we have a host fd after restore,
+ // the remote file's contents are coherent.
+ mf := c.mfp.MemoryFile()
+ c.dataMu.Lock()
+ defer c.dataMu.Unlock()
+ if err := SyncDirtyAll(ctx, &c.cache, &c.dirty, uint64(c.attr.Size), mf, c.backingFile.WriteFromBlocksAt); err != nil {
+ return err
+ }
+
+ // Discard the cache so that it's not stored in saved state. This is safe
+ // because per InvalidateUnsavable invariants, no new translations can have
+ // been returned after we invalidated all existing translations above.
+ c.cache.DropAll(mf)
+ c.dirty.RemoveAll()
+
+ return nil
+}
+
+// Evict implements pgalloc.EvictableMemoryUser.Evict.
+func (c *CachingInodeOperations) Evict(ctx context.Context, er pgalloc.EvictableRange) {
+ c.mapsMu.Lock()
+ defer c.mapsMu.Unlock()
+ c.dataMu.Lock()
+ defer c.dataMu.Unlock()
+
+ mr := memmap.MappableRange{er.Start, er.End}
+ mf := c.mfp.MemoryFile()
+ // Only allow pages that are no longer memory-mapped to be evicted.
+ for mgap := c.mappings.LowerBoundGap(mr.Start); mgap.Ok() && mgap.Start() < mr.End; mgap = mgap.NextGap() {
+ mgapMR := mgap.Range().Intersect(mr)
+ if mgapMR.Length() == 0 {
+ continue
+ }
+ if err := SyncDirty(ctx, mgapMR, &c.cache, &c.dirty, uint64(c.attr.Size), mf, c.backingFile.WriteFromBlocksAt); err != nil {
+ log.Warningf("Failed to writeback cached data %v: %v", mgapMR, err)
+ }
+ c.cache.Drop(mgapMR, mf)
+ c.dirty.KeepClean(mgapMR)
+ }
+}
+
+// IncRef implements platform.File.IncRef. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the platform.File
+// during translation.
+func (c *CachingInodeOperations) IncRef(fr platform.FileRange) {
+ // Hot path. Avoid defers.
+ c.dataMu.Lock()
+ seg, gap := c.refs.Find(fr.Start)
+ for {
+ switch {
+ case seg.Ok() && seg.Start() < fr.End:
+ seg = c.refs.Isolate(seg, fr)
+ seg.SetValue(seg.Value() + 1)
+ seg, gap = seg.NextNonEmpty()
+ case gap.Ok() && gap.Start() < fr.End:
+ newRange := gap.Range().Intersect(fr)
+ if usage.IncrementalMappedAccounting {
+ usage.MemoryAccounting.Inc(newRange.Length(), usage.Mapped)
+ }
+ seg, gap = c.refs.InsertWithoutMerging(gap, newRange, 1).NextNonEmpty()
+ default:
+ c.refs.MergeAdjacent(fr)
+ c.dataMu.Unlock()
+ return
+ }
+ }
+}
+
+// DecRef implements platform.File.DecRef. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the platform.File
+// during translation.
+func (c *CachingInodeOperations) DecRef(fr platform.FileRange) {
+ // Hot path. Avoid defers.
+ c.dataMu.Lock()
+ seg := c.refs.FindSegment(fr.Start)
+
+ for seg.Ok() && seg.Start() < fr.End {
+ seg = c.refs.Isolate(seg, fr)
+ if old := seg.Value(); old == 1 {
+ if usage.IncrementalMappedAccounting {
+ usage.MemoryAccounting.Dec(seg.Range().Length(), usage.Mapped)
+ }
+ seg = c.refs.Remove(seg).NextSegment()
+ } else {
+ seg.SetValue(old - 1)
+ seg = seg.NextSegment()
+ }
+ }
+ c.refs.MergeAdjacent(fr)
+ c.dataMu.Unlock()
+
+}
+
+// MapInternal implements platform.File.MapInternal. This is used when we
+// directly map an underlying host fd and CachingInodeOperations is used as the
+// platform.File during translation.
+func (c *CachingInodeOperations) MapInternal(fr platform.FileRange, at usermem.AccessType) (safemem.BlockSeq, error) {
+ return c.hostFileMapper.MapInternal(fr, c.backingFile.FD(), at.Write)
+}
+
+// FD implements platform.File.FD. This is used when we directly map an
+// underlying host fd and CachingInodeOperations is used as the platform.File
+// during translation.
+func (c *CachingInodeOperations) FD() int {
+ return c.backingFile.FD()
+}