diff options
Diffstat (limited to 'pkg/sentry/kernel')
43 files changed, 638 insertions, 6 deletions
diff --git a/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go b/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go index 4535c958f..4535c958f 100755..100644 --- a/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go +++ b/pkg/sentry/kernel/auth/atomicptr_credentials_unsafe.go diff --git a/pkg/sentry/kernel/auth/auth_state_autogen.go b/pkg/sentry/kernel/auth/auth_state_autogen.go index 5b97d58a6..36e3576e8 100755..100644 --- a/pkg/sentry/kernel/auth/auth_state_autogen.go +++ b/pkg/sentry/kernel/auth/auth_state_autogen.go @@ -88,6 +88,7 @@ func (x *idMapnode) save(m state.Map) { m.Save("parent", &x.parent) m.Save("parentIndex", &x.parentIndex) m.Save("hasChildren", &x.hasChildren) + m.Save("maxGap", &x.maxGap) m.Save("keys", &x.keys) m.Save("values", &x.values) m.Save("children", &x.children) @@ -99,6 +100,7 @@ func (x *idMapnode) load(m state.Map) { m.Load("parent", &x.parent) m.Load("parentIndex", &x.parentIndex) m.Load("hasChildren", &x.hasChildren) + m.Load("maxGap", &x.maxGap) m.Load("keys", &x.keys) m.Load("values", &x.values) m.Load("children", &x.children) diff --git a/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go b/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go index 6589b612c..6589b612c 100755..100644 --- a/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go +++ b/pkg/sentry/kernel/auth/auth_unsafe_state_autogen.go diff --git a/pkg/sentry/kernel/auth/id_map_range.go b/pkg/sentry/kernel/auth/id_map_range.go index 833fa3518..833fa3518 100755..100644 --- a/pkg/sentry/kernel/auth/id_map_range.go +++ b/pkg/sentry/kernel/auth/id_map_range.go diff --git a/pkg/sentry/kernel/auth/id_map_set.go b/pkg/sentry/kernel/auth/id_map_set.go index 73a17f281..983a036c5 100755..100644 --- a/pkg/sentry/kernel/auth/id_map_set.go +++ b/pkg/sentry/kernel/auth/id_map_set.go @@ -5,6 +5,34 @@ import ( "fmt" ) +// trackGaps is an optional parameter. +// +// If trackGaps is 1, the Set will track maximum gap size recursively, +// enabling the GapIterator.{Prev,Next}LargeEnoughGap functions. In this +// case, Key must be an unsigned integer. +// +// trackGaps must be 0 or 1. +const idMaptrackGaps = 0 + +var _ = uint8(idMaptrackGaps << 7) // Will fail if not zero or one. + +// dynamicGap is a type that disappears if trackGaps is 0. +type idMapdynamicGap [idMaptrackGaps]uint32 + +// Get returns the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *idMapdynamicGap) Get() uint32 { + return d[:][0] +} + +// Set sets the value of the gap. +// +// Precondition: trackGaps must be non-zero. +func (d *idMapdynamicGap) Set(v uint32) { + d[:][0] = v +} + const ( // minDegree is the minimum degree of an internal node in a Set B-tree. // @@ -263,8 +291,12 @@ func (s *idMapSet) Insert(gap idMapGapIterator, r idMapRange, val uint32) idMapI } if prev.Ok() && prev.End() == r.Start { if mval, ok := (idMapFunctions{}).Merge(prev.Range(), prev.Value(), r, val); ok { + shrinkMaxGap := idMaptrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() prev.SetEndUnchecked(r.End) prev.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } if next.Ok() && next.Start() == r.End { val = mval if mval, ok := (idMapFunctions{}).Merge(prev.Range(), val, next.Range(), next.Value()); ok { @@ -278,11 +310,16 @@ func (s *idMapSet) Insert(gap idMapGapIterator, r idMapRange, val uint32) idMapI } if next.Ok() && next.Start() == r.End { if mval, ok := (idMapFunctions{}).Merge(r, val, next.Range(), next.Value()); ok { + shrinkMaxGap := idMaptrackGaps != 0 && gap.Range().Length() == gap.node.maxGap.Get() next.SetStartUnchecked(r.Start) next.SetValue(mval) + if shrinkMaxGap { + gap.node.updateMaxGapLeaf() + } return next } } + return s.InsertWithoutMergingUnchecked(gap, r, val) } @@ -309,11 +346,15 @@ func (s *idMapSet) InsertWithoutMerging(gap idMapGapIterator, r idMapRange, val // Preconditions: r.Start >= gap.Start(); r.End <= gap.End(). func (s *idMapSet) InsertWithoutMergingUnchecked(gap idMapGapIterator, r idMapRange, val uint32) idMapIterator { gap = gap.node.rebalanceBeforeInsert(gap) + splitMaxGap := idMaptrackGaps != 0 && (gap.node.nrSegments == 0 || gap.Range().Length() == gap.node.maxGap.Get()) copy(gap.node.keys[gap.index+1:], gap.node.keys[gap.index:gap.node.nrSegments]) copy(gap.node.values[gap.index+1:], gap.node.values[gap.index:gap.node.nrSegments]) gap.node.keys[gap.index] = r gap.node.values[gap.index] = val gap.node.nrSegments++ + if splitMaxGap { + gap.node.updateMaxGapLeaf() + } return idMapIterator{gap.node, gap.index} } @@ -328,12 +369,20 @@ func (s *idMapSet) Remove(seg idMapIterator) idMapGapIterator { seg.SetRangeUnchecked(victim.Range()) seg.SetValue(victim.Value()) + + nextAdjacentNode := seg.NextSegment().node + if idMaptrackGaps != 0 { + nextAdjacentNode.updateMaxGapLeaf() + } return s.Remove(victim).NextGap() } copy(seg.node.keys[seg.index:], seg.node.keys[seg.index+1:seg.node.nrSegments]) copy(seg.node.values[seg.index:], seg.node.values[seg.index+1:seg.node.nrSegments]) idMapFunctions{}.ClearValue(&seg.node.values[seg.node.nrSegments-1]) seg.node.nrSegments-- + if idMaptrackGaps != 0 { + seg.node.updateMaxGapLeaf() + } return seg.node.rebalanceAfterRemove(idMapGapIterator{seg.node, seg.index}) } @@ -383,6 +432,7 @@ func (s *idMapSet) MergeUnchecked(first, second idMapIterator) idMapIterator { first.SetEndUnchecked(second.End()) first.SetValue(mval) + return s.Remove(second).PrevSegment() } } @@ -558,6 +608,12 @@ type idMapnode struct { // than "isLeaf" because false must be the correct value for an empty root. hasChildren bool + // The longest gap within this node. If the node is a leaf, it's simply the + // maximum gap among all the (nrSegments+1) gaps formed by its nrSegments keys + // including the 0th and nrSegments-th gap possibly shared with its upper-level + // nodes; if it's a non-leaf node, it's the max of all children's maxGap. + maxGap idMapdynamicGap + // Nodes store keys and values in separate arrays to maximize locality in // the common case (scanning keys for lookup). keys [idMapmaxDegree - 1]idMapRange @@ -603,12 +659,12 @@ func (n *idMapnode) nextSibling() *idMapnode { // required for insertion, and returns an updated iterator to the position // represented by gap. func (n *idMapnode) rebalanceBeforeInsert(gap idMapGapIterator) idMapGapIterator { - if n.parent != nil { - gap = n.parent.rebalanceBeforeInsert(gap) - } if n.nrSegments < idMapmaxDegree-1 { return gap } + if n.parent != nil { + gap = n.parent.rebalanceBeforeInsert(gap) + } if n.parent == nil { left := &idMapnode{ @@ -644,6 +700,11 @@ func (n *idMapnode) rebalanceBeforeInsert(gap idMapGapIterator) idMapGapIterator n.hasChildren = true n.children[0] = left n.children[1] = right + + if idMaptrackGaps != 0 { + left.updateMaxGapLocal() + right.updateMaxGapLocal() + } if gap.node != n { return gap } @@ -681,6 +742,11 @@ func (n *idMapnode) rebalanceBeforeInsert(gap idMapGapIterator) idMapGapIterator } n.nrSegments = idMapminDegree - 1 + if idMaptrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } + if gap.node != n { return gap } @@ -726,6 +792,11 @@ func (n *idMapnode) rebalanceAfterRemove(gap idMapGapIterator) idMapGapIterator } n.nrSegments++ sibling.nrSegments-- + + if idMaptrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } if gap.node == sibling && gap.index == sibling.nrSegments { return idMapGapIterator{n, 0} } @@ -754,6 +825,11 @@ func (n *idMapnode) rebalanceAfterRemove(gap idMapGapIterator) idMapGapIterator } n.nrSegments++ sibling.nrSegments-- + + if idMaptrackGaps != 0 { + n.updateMaxGapLocal() + sibling.updateMaxGapLocal() + } if gap.node == sibling { if gap.index == 0 { return idMapGapIterator{n, n.nrSegments} @@ -786,6 +862,7 @@ func (n *idMapnode) rebalanceAfterRemove(gap idMapGapIterator) idMapGapIterator p.children[0] = nil p.children[1] = nil } + if gap.node == left { return idMapGapIterator{p, gap.index} } @@ -832,10 +909,146 @@ func (n *idMapnode) rebalanceAfterRemove(gap idMapGapIterator) idMapGapIterator p.children[p.nrSegments] = nil p.nrSegments-- + if idMaptrackGaps != 0 { + left.updateMaxGapLocal() + } + n = p } } +// updateMaxGapLeaf updates maxGap bottom-up from the calling leaf until no +// necessary update. +// +// Preconditions: n must be a leaf node, trackGaps must be 1. +func (n *idMapnode) updateMaxGapLeaf() { + if n.hasChildren { + panic(fmt.Sprintf("updateMaxGapLeaf should always be called on leaf node: %v", n)) + } + max := n.calculateMaxGapLeaf() + if max == n.maxGap.Get() { + + return + } + oldMax := n.maxGap.Get() + n.maxGap.Set(max) + if max > oldMax { + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() >= max { + + break + } + + p.maxGap.Set(max) + } + return + } + + for p := n.parent; p != nil; p = p.parent { + if p.maxGap.Get() > oldMax { + + break + } + + parentNewMax := p.calculateMaxGapInternal() + if p.maxGap.Get() == parentNewMax { + + break + } + + p.maxGap.Set(parentNewMax) + } +} + +// updateMaxGapLocal updates maxGap of the calling node solely with no +// propagation to ancestor nodes. +// +// Precondition: trackGaps must be 1. +func (n *idMapnode) updateMaxGapLocal() { + if !n.hasChildren { + + n.maxGap.Set(n.calculateMaxGapLeaf()) + } else { + + n.maxGap.Set(n.calculateMaxGapInternal()) + } +} + +// calculateMaxGapLeaf iterates the gaps within a leaf node and calculate the +// max. +// +// Preconditions: n must be a leaf node. +func (n *idMapnode) calculateMaxGapLeaf() uint32 { + max := idMapGapIterator{n, 0}.Range().Length() + for i := 1; i <= n.nrSegments; i++ { + if current := (idMapGapIterator{n, i}).Range().Length(); current > max { + max = current + } + } + return max +} + +// calculateMaxGapInternal iterates children's maxGap within an internal node n +// and calculate the max. +// +// Preconditions: n must be a non-leaf node. +func (n *idMapnode) calculateMaxGapInternal() uint32 { + max := n.children[0].maxGap.Get() + for i := 1; i <= n.nrSegments; i++ { + if current := n.children[i].maxGap.Get(); current > max { + max = current + } + } + return max +} + +// searchFirstLargeEnoughGap returns the first gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *idMapnode) searchFirstLargeEnoughGap(minSize uint32) idMapGapIterator { + if n.maxGap.Get() < minSize { + return idMapGapIterator{} + } + if n.hasChildren { + for i := 0; i <= n.nrSegments; i++ { + if largeEnoughGap := n.children[i].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := 0; i <= n.nrSegments; i++ { + currentGap := idMapGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + +// searchLastLargeEnoughGap returns the last gap having at least minSize length +// in the subtree rooted by n. If not found, return a terminal gap iterator. +func (n *idMapnode) searchLastLargeEnoughGap(minSize uint32) idMapGapIterator { + if n.maxGap.Get() < minSize { + return idMapGapIterator{} + } + if n.hasChildren { + for i := n.nrSegments; i >= 0; i-- { + if largeEnoughGap := n.children[i].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } + } else { + for i := n.nrSegments; i >= 0; i-- { + currentGap := idMapGapIterator{n, i} + if currentGap.Range().Length() >= minSize { + return currentGap + } + } + } + panic(fmt.Sprintf("invalid maxGap in %v", n)) +} + // A Iterator is conceptually one of: // // - A pointer to a segment in a set; or @@ -1141,6 +1354,114 @@ func (gap idMapGapIterator) NextGap() idMapGapIterator { return seg.NextGap() } +// NextLargeEnoughGap returns the iterated gap's first next gap with larger +// length than minSize. If not found, return a terminal gap iterator (does NOT +// include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap idMapGapIterator) NextLargeEnoughGap(minSize uint32) idMapGapIterator { + if idMaptrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == gap.node.nrSegments { + + gap.node = gap.NextSegment().node + gap.index = 0 + return gap.nextLargeEnoughGapHelper(minSize) + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// nextLargeEnoughGapHelper is the helper function used by NextLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the trailing gap of a non-leaf node. +func (gap idMapGapIterator) nextLargeEnoughGapHelper(minSize uint32) idMapGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == gap.node.nrSegments)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return idMapGapIterator{} + } + + gap.index++ + for gap.index <= gap.node.nrSegments { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchFirstLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index++ + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == gap.node.nrSegments { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.nextLargeEnoughGapHelper(minSize) +} + +// PrevLargeEnoughGap returns the iterated gap's first prev gap with larger or +// equal length than minSize. If not found, return a terminal gap iterator +// (does NOT include this gap itself). +// +// Precondition: trackGaps must be 1. +func (gap idMapGapIterator) PrevLargeEnoughGap(minSize uint32) idMapGapIterator { + if idMaptrackGaps != 1 { + panic("set is not tracking gaps") + } + if gap.node != nil && gap.node.hasChildren && gap.index == 0 { + + gap.node = gap.PrevSegment().node + gap.index = gap.node.nrSegments + return gap.prevLargeEnoughGapHelper(minSize) + } + return gap.prevLargeEnoughGapHelper(minSize) +} + +// prevLargeEnoughGapHelper is the helper function used by PrevLargeEnoughGap +// to do the real recursions. +// +// Preconditions: gap is NOT the first gap of a non-leaf node. +func (gap idMapGapIterator) prevLargeEnoughGapHelper(minSize uint32) idMapGapIterator { + + for gap.node != nil && + (gap.node.maxGap.Get() < minSize || (!gap.node.hasChildren && gap.index == 0)) { + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + + if gap.node == nil { + return idMapGapIterator{} + } + + gap.index-- + for gap.index >= 0 { + if gap.node.hasChildren { + if largeEnoughGap := gap.node.children[gap.index].searchLastLargeEnoughGap(minSize); largeEnoughGap.Ok() { + return largeEnoughGap + } + } else { + if gap.Range().Length() >= minSize { + return gap + } + } + gap.index-- + } + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + if gap.node != nil && gap.index == 0 { + + gap.node, gap.index = gap.node.parent, gap.node.parentIndex + } + return gap.prevLargeEnoughGapHelper(minSize) +} + // segmentBeforePosition returns the predecessor segment of the position given // by n.children[i], which may or may not contain a child. If no such segment // exists, segmentBeforePosition returns a terminal iterator. @@ -1207,7 +1528,15 @@ func (n *idMapnode) writeDebugString(buf *bytes.Buffer, prefix string) { child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, i)) } buf.WriteString(prefix) - buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + if n.hasChildren { + if idMaptrackGaps != 0 { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v, maxGap: %d\n", i, n.keys[i], n.values[i], n.maxGap.Get())) + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } + } else { + buf.WriteString(fmt.Sprintf("- % 3d: %v => %v\n", i, n.keys[i], n.values[i])) + } } if child := n.children[n.nrSegments]; child != nil { child.writeDebugString(buf, fmt.Sprintf("%s- % 3d ", prefix, n.nrSegments)) @@ -1259,6 +1588,46 @@ func (s *idMapSet) ImportSortedSlices(sds *idMapSegmentDataSlices) error { } return nil } + +// segmentTestCheck returns an error if s is incorrectly sorted, does not +// contain exactly expectedSegments segments, or contains a segment which +// fails the passed check. +// +// This should be used only for testing, and has been added to this package for +// templating convenience. +func (s *idMapSet) segmentTestCheck(expectedSegments int, segFunc func(int, idMapRange, uint32) error) error { + havePrev := false + prev := uint32(0) + nrSegments := 0 + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + next := seg.Start() + if havePrev && prev >= next { + return fmt.Errorf("incorrect order: key %d (segment %d) >= key %d (segment %d)", prev, nrSegments-1, next, nrSegments) + } + if segFunc != nil { + if err := segFunc(nrSegments, seg.Range(), seg.Value()); err != nil { + return err + } + } + prev = next + havePrev = true + nrSegments++ + } + if nrSegments != expectedSegments { + return fmt.Errorf("incorrect number of segments: got %d, wanted %d", nrSegments, expectedSegments) + } + return nil +} + +// countSegments counts the number of segments in the set. +// +// Similar to Check, this should only be used for testing. +func (s *idMapSet) countSegments() (segments int) { + for seg := s.FirstSegment(); seg.Ok(); seg = seg.NextSegment() { + segments++ + } + return segments +} func (s *idMapSet) saveRoot() *idMapSegmentDataSlices { return s.ExportSortedSlices() } diff --git a/pkg/sentry/kernel/epoll/epoll_list.go b/pkg/sentry/kernel/epoll/epoll_list.go index a018f7b5c..a018f7b5c 100755..100644 --- a/pkg/sentry/kernel/epoll/epoll_list.go +++ b/pkg/sentry/kernel/epoll/epoll_list.go diff --git a/pkg/sentry/kernel/epoll/epoll_state_autogen.go b/pkg/sentry/kernel/epoll/epoll_state_autogen.go index da3150465..da3150465 100755..100644 --- a/pkg/sentry/kernel/epoll/epoll_state_autogen.go +++ b/pkg/sentry/kernel/epoll/epoll_state_autogen.go diff --git a/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go b/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go index 636d80ea9..636d80ea9 100755..100644 --- a/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go +++ b/pkg/sentry/kernel/eventfd/eventfd_state_autogen.go diff --git a/pkg/sentry/kernel/fasync/fasync_state_autogen.go b/pkg/sentry/kernel/fasync/fasync_state_autogen.go index fdcd48f64..fdcd48f64 100755..100644 --- a/pkg/sentry/kernel/fasync/fasync_state_autogen.go +++ b/pkg/sentry/kernel/fasync/fasync_state_autogen.go diff --git a/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go b/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go index d3fdf09b0..d3fdf09b0 100755..100644 --- a/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go +++ b/pkg/sentry/kernel/futex/atomicptr_bucket_unsafe.go diff --git a/pkg/sentry/kernel/futex/futex_state_autogen.go b/pkg/sentry/kernel/futex/futex_state_autogen.go index 12a0788ff..12a0788ff 100755..100644 --- a/pkg/sentry/kernel/futex/futex_state_autogen.go +++ b/pkg/sentry/kernel/futex/futex_state_autogen.go diff --git a/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go b/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go index 7fc038fce..7fc038fce 100755..100644 --- a/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go +++ b/pkg/sentry/kernel/futex/futex_unsafe_state_autogen.go diff --git a/pkg/sentry/kernel/futex/waiter_list.go b/pkg/sentry/kernel/futex/waiter_list.go index 1b7a92b62..1b7a92b62 100755..100644 --- a/pkg/sentry/kernel/futex/waiter_list.go +++ b/pkg/sentry/kernel/futex/waiter_list.go diff --git a/pkg/sentry/kernel/kernel_amd64_state_autogen.go b/pkg/sentry/kernel/kernel_amd64_state_autogen.go index 12de47ad0..12de47ad0 100755..100644 --- a/pkg/sentry/kernel/kernel_amd64_state_autogen.go +++ b/pkg/sentry/kernel/kernel_amd64_state_autogen.go diff --git a/pkg/sentry/kernel/kernel_arm64_state_autogen.go b/pkg/sentry/kernel/kernel_arm64_state_autogen.go index 3c040d283..3c040d283 100755..100644 --- a/pkg/sentry/kernel/kernel_arm64_state_autogen.go +++ b/pkg/sentry/kernel/kernel_arm64_state_autogen.go diff --git a/pkg/sentry/kernel/kernel_opts.go b/pkg/sentry/kernel/kernel_opts.go index 2e66ec587..2e66ec587 100755..100644 --- a/pkg/sentry/kernel/kernel_opts.go +++ b/pkg/sentry/kernel/kernel_opts.go diff --git a/pkg/sentry/kernel/kernel_opts_state_autogen.go b/pkg/sentry/kernel/kernel_opts_state_autogen.go index 9ed7e27c9..9ed7e27c9 100755..100644 --- a/pkg/sentry/kernel/kernel_opts_state_autogen.go +++ b/pkg/sentry/kernel/kernel_opts_state_autogen.go diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go index 89ac06522..89ac06522 100755..100644 --- a/pkg/sentry/kernel/kernel_state_autogen.go +++ b/pkg/sentry/kernel/kernel_state_autogen.go diff --git a/pkg/sentry/kernel/kernel_unsafe_state_autogen.go b/pkg/sentry/kernel/kernel_unsafe_state_autogen.go index 12130bf74..12130bf74 100755..100644 --- a/pkg/sentry/kernel/kernel_unsafe_state_autogen.go +++ b/pkg/sentry/kernel/kernel_unsafe_state_autogen.go diff --git a/pkg/sentry/kernel/memevent/memevent_state_autogen.go b/pkg/sentry/kernel/memevent/memevent_state_autogen.go index 4a1679fa9..4a1679fa9 100755..100644 --- a/pkg/sentry/kernel/memevent/memevent_state_autogen.go +++ b/pkg/sentry/kernel/memevent/memevent_state_autogen.go diff --git a/pkg/sentry/kernel/memevent/memory_events.go b/pkg/sentry/kernel/memevent/memory_events.go index 200565bb8..200565bb8 100755..100644 --- a/pkg/sentry/kernel/memevent/memory_events.go +++ b/pkg/sentry/kernel/memevent/memory_events.go diff --git a/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go b/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go index f8b857fa9..f8b857fa9 100755..100644 --- a/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go +++ b/pkg/sentry/kernel/memevent/memory_events_go_proto/memory_events.pb.go diff --git a/pkg/sentry/kernel/pending_signals_list.go b/pkg/sentry/kernel/pending_signals_list.go index 2685c631a..2685c631a 100755..100644 --- a/pkg/sentry/kernel/pending_signals_list.go +++ b/pkg/sentry/kernel/pending_signals_list.go diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 62c8691f1..79645d7d2 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -207,7 +207,10 @@ func (p *Pipe) read(ctx context.Context, ops readOps) (int64, error) { p.mu.Lock() defer p.mu.Unlock() + return p.readLocked(ctx, ops) +} +func (p *Pipe) readLocked(ctx context.Context, ops readOps) (int64, error) { // Is the pipe empty? if p.view.Size() == 0 { if !p.HasWriters() { @@ -246,7 +249,10 @@ type writeOps struct { func (p *Pipe) write(ctx context.Context, ops writeOps) (int64, error) { p.mu.Lock() defer p.mu.Unlock() + return p.writeLocked(ctx, ops) +} +func (p *Pipe) writeLocked(ctx context.Context, ops writeOps) (int64, error) { // Can't write to a pipe with no readers. if !p.HasReaders() { return 0, syscall.EPIPE diff --git a/pkg/sentry/kernel/pipe/pipe_state_autogen.go b/pkg/sentry/kernel/pipe/pipe_state_autogen.go index b49ab46f9..b49ab46f9 100755..100644 --- a/pkg/sentry/kernel/pipe/pipe_state_autogen.go +++ b/pkg/sentry/kernel/pipe/pipe_state_autogen.go diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe.go b/pkg/sentry/kernel/pipe/pipe_unsafe.go new file mode 100644 index 000000000..dd60cba24 --- /dev/null +++ b/pkg/sentry/kernel/pipe/pipe_unsafe.go @@ -0,0 +1,35 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pipe + +import ( + "unsafe" +) + +// lockTwoPipes locks both x.mu and y.mu in an order that is guaranteed to be +// consistent for both lockTwoPipes(x, y) and lockTwoPipes(y, x), such that +// concurrent calls cannot deadlock. +// +// Preconditions: x != y. +func lockTwoPipes(x, y *Pipe) { + // Lock the two pipes in order of increasing address. + if uintptr(unsafe.Pointer(x)) < uintptr(unsafe.Pointer(y)) { + x.mu.Lock() + y.mu.Lock() + } else { + y.mu.Lock() + x.mu.Lock() + } +} diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe_state_autogen.go b/pkg/sentry/kernel/pipe/pipe_unsafe_state_autogen.go new file mode 100644 index 000000000..d3b40feb4 --- /dev/null +++ b/pkg/sentry/kernel/pipe/pipe_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package pipe diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go index aacf28da2..aacf28da2 100755..100644 --- a/pkg/sentry/kernel/pipe/pipe_util.go +++ b/pkg/sentry/kernel/pipe/pipe_util.go diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go index b54f08a30..2602bed72 100755..100644 --- a/pkg/sentry/kernel/pipe/vfs.go +++ b/pkg/sentry/kernel/pipe/vfs.go @@ -16,7 +16,9 @@ package pipe import ( "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/buffer" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" @@ -150,7 +152,9 @@ func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) * return &fd.vfsfd } -// VFSPipeFD implements vfs.FileDescriptionImpl for pipes. +// VFSPipeFD implements vfs.FileDescriptionImpl for pipes. It also implements +// non-atomic usermem.IO methods, allowing it to be passed as usermem.IO to +// other FileDescriptions for splice(2) and tee(2). type VFSPipeFD struct { vfsfd vfs.FileDescription vfs.FileDescriptionDefaultImpl @@ -229,3 +233,216 @@ func (fd *VFSPipeFD) PipeSize() int64 { func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) { return fd.pipe.SetFifoSize(size) } + +// IOSequence returns a useremm.IOSequence that reads up to count bytes from, +// or writes up to count bytes to, fd. +func (fd *VFSPipeFD) IOSequence(count int64) usermem.IOSequence { + return usermem.IOSequence{ + IO: fd, + Addrs: usermem.AddrRangeSeqOf(usermem.AddrRange{0, usermem.Addr(count)}), + } +} + +// CopyIn implements usermem.IO.CopyIn. +func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr usermem.Addr, dst []byte, opts usermem.IOOpts) (int, error) { + origCount := int64(len(dst)) + n, err := fd.pipe.read(ctx, readOps{ + left: func() int64 { + return int64(len(dst)) + }, + limit: func(l int64) { + dst = dst[:l] + }, + read: func(view *buffer.View) (int64, error) { + n, err := view.ReadAt(dst, 0) + view.TrimFront(int64(n)) + return int64(n), err + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventOut) + } + if err == nil && n != origCount { + return int(n), syserror.ErrWouldBlock + } + return int(n), err +} + +// CopyOut implements usermem.IO.CopyOut. +func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr usermem.Addr, src []byte, opts usermem.IOOpts) (int, error) { + origCount := int64(len(src)) + n, err := fd.pipe.write(ctx, writeOps{ + left: func() int64 { + return int64(len(src)) + }, + limit: func(l int64) { + src = src[:l] + }, + write: func(view *buffer.View) (int64, error) { + view.Append(src) + return int64(len(src)), nil + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventIn) + } + if err == nil && n != origCount { + return int(n), syserror.ErrWouldBlock + } + return int(n), err +} + +// ZeroOut implements usermem.IO.ZeroOut. +func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr usermem.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { + origCount := toZero + n, err := fd.pipe.write(ctx, writeOps{ + left: func() int64 { + return toZero + }, + limit: func(l int64) { + toZero = l + }, + write: func(view *buffer.View) (int64, error) { + view.Grow(view.Size()+toZero, true /* zero */) + return toZero, nil + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventIn) + } + if err == nil && n != origCount { + return n, syserror.ErrWouldBlock + } + return n, err +} + +// CopyInTo implements usermem.IO.CopyInTo. +func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { + count := ars.NumBytes() + if count == 0 { + return 0, nil + } + origCount := count + n, err := fd.pipe.read(ctx, readOps{ + left: func() int64 { + return count + }, + limit: func(l int64) { + count = l + }, + read: func(view *buffer.View) (int64, error) { + n, err := view.ReadToSafememWriter(dst, uint64(count)) + view.TrimFront(int64(n)) + return int64(n), err + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventOut) + } + if err == nil && n != origCount { + return n, syserror.ErrWouldBlock + } + return n, err +} + +// CopyOutFrom implements usermem.IO.CopyOutFrom. +func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { + count := ars.NumBytes() + if count == 0 { + return 0, nil + } + origCount := count + n, err := fd.pipe.write(ctx, writeOps{ + left: func() int64 { + return count + }, + limit: func(l int64) { + count = l + }, + write: func(view *buffer.View) (int64, error) { + n, err := view.WriteFromSafememReader(src, uint64(count)) + return int64(n), err + }, + }) + if n > 0 { + fd.pipe.Notify(waiter.EventIn) + } + if err == nil && n != origCount { + return n, syserror.ErrWouldBlock + } + return n, err +} + +// SwapUint32 implements usermem.IO.SwapUint32. +func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr usermem.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { + // How did a pipe get passed as the virtual address space to futex(2)? + panic("VFSPipeFD.SwapUint32 called unexpectedly") +} + +// CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32. +func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr usermem.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { + panic("VFSPipeFD.CompareAndSwapUint32 called unexpectedly") +} + +// LoadUint32 implements usermem.IO.LoadUint32. +func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr usermem.Addr, opts usermem.IOOpts) (uint32, error) { + panic("VFSPipeFD.LoadUint32 called unexpectedly") +} + +// Splice reads up to count bytes from src and writes them to dst. It returns +// the number of bytes moved. +// +// Preconditions: count > 0. +func Splice(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { + return spliceOrTee(ctx, dst, src, count, true /* removeFromSrc */) +} + +// Tee reads up to count bytes from src and writes them to dst, without +// removing the read bytes from src. It returns the number of bytes copied. +// +// Preconditions: count > 0. +func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { + return spliceOrTee(ctx, dst, src, count, false /* removeFromSrc */) +} + +// Preconditions: count > 0. +func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) { + if dst.pipe == src.pipe { + return 0, syserror.EINVAL + } + + lockTwoPipes(dst.pipe, src.pipe) + defer dst.pipe.mu.Unlock() + defer src.pipe.mu.Unlock() + + n, err := dst.pipe.writeLocked(ctx, writeOps{ + left: func() int64 { + return count + }, + limit: func(l int64) { + count = l + }, + write: func(dstView *buffer.View) (int64, error) { + return src.pipe.readLocked(ctx, readOps{ + left: func() int64 { + return count + }, + limit: func(l int64) { + count = l + }, + read: func(srcView *buffer.View) (int64, error) { + n, err := srcView.ReadToSafememWriter(dstView, uint64(count)) + if n > 0 && removeFromSrc { + srcView.TrimFront(int64(n)) + } + return int64(n), err + }, + }) + }, + }) + if n > 0 { + dst.pipe.Notify(waiter.EventIn) + src.pipe.Notify(waiter.EventOut) + } + return n, err +} diff --git a/pkg/sentry/kernel/process_group_list.go b/pkg/sentry/kernel/process_group_list.go index 3c5ea8aa7..3c5ea8aa7 100755..100644 --- a/pkg/sentry/kernel/process_group_list.go +++ b/pkg/sentry/kernel/process_group_list.go diff --git a/pkg/sentry/kernel/sched/sched_state_autogen.go b/pkg/sentry/kernel/sched/sched_state_autogen.go index 9705ca79d..9705ca79d 100755..100644 --- a/pkg/sentry/kernel/sched/sched_state_autogen.go +++ b/pkg/sentry/kernel/sched/sched_state_autogen.go diff --git a/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go b/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go index ba1d073d9..ba1d073d9 100755..100644 --- a/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go +++ b/pkg/sentry/kernel/semaphore/semaphore_state_autogen.go diff --git a/pkg/sentry/kernel/semaphore/waiter_list.go b/pkg/sentry/kernel/semaphore/waiter_list.go index 4bfe5ff95..4bfe5ff95 100755..100644 --- a/pkg/sentry/kernel/semaphore/waiter_list.go +++ b/pkg/sentry/kernel/semaphore/waiter_list.go diff --git a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go index b133423cc..950645965 100755..100644 --- a/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go +++ b/pkg/sentry/kernel/seqatomic_taskgoroutineschedinfo_unsafe.go @@ -1,11 +1,11 @@ package kernel import ( + "fmt" "reflect" "strings" "unsafe" - "fmt" "gvisor.dev/gvisor/pkg/sync" ) diff --git a/pkg/sentry/kernel/session_list.go b/pkg/sentry/kernel/session_list.go index 768482ab6..768482ab6 100755..100644 --- a/pkg/sentry/kernel/session_list.go +++ b/pkg/sentry/kernel/session_list.go diff --git a/pkg/sentry/kernel/shm/shm_state_autogen.go b/pkg/sentry/kernel/shm/shm_state_autogen.go index fa8f896f7..fa8f896f7 100755..100644 --- a/pkg/sentry/kernel/shm/shm_state_autogen.go +++ b/pkg/sentry/kernel/shm/shm_state_autogen.go diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go index 8243bb93e..8243bb93e 100755..100644 --- a/pkg/sentry/kernel/signalfd/signalfd.go +++ b/pkg/sentry/kernel/signalfd/signalfd.go diff --git a/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go b/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go index 2ab5b4702..2ab5b4702 100755..100644 --- a/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go +++ b/pkg/sentry/kernel/signalfd/signalfd_state_autogen.go diff --git a/pkg/sentry/kernel/socket_list.go b/pkg/sentry/kernel/socket_list.go index 294aa99fe..294aa99fe 100755..100644 --- a/pkg/sentry/kernel/socket_list.go +++ b/pkg/sentry/kernel/socket_list.go diff --git a/pkg/sentry/kernel/task_list.go b/pkg/sentry/kernel/task_list.go index e7a3a3d20..e7a3a3d20 100755..100644 --- a/pkg/sentry/kernel/task_list.go +++ b/pkg/sentry/kernel/task_list.go diff --git a/pkg/sentry/kernel/time/time_state_autogen.go b/pkg/sentry/kernel/time/time_state_autogen.go index ab6c6633d..ab6c6633d 100755..100644 --- a/pkg/sentry/kernel/time/time_state_autogen.go +++ b/pkg/sentry/kernel/time/time_state_autogen.go diff --git a/pkg/sentry/kernel/tty.go b/pkg/sentry/kernel/tty.go index d0e0810e8..d0e0810e8 100755..100644 --- a/pkg/sentry/kernel/tty.go +++ b/pkg/sentry/kernel/tty.go diff --git a/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go b/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go index 822e549ab..822e549ab 100755..100644 --- a/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go +++ b/pkg/sentry/kernel/uncaught_signal_go_proto/uncaught_signal.pb.go |