diff options
author | gVisor bot <gvisor-bot@google.com> | 2021-05-13 21:00:53 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-05-13 21:00:53 +0000 |
commit | e1cfd3185c285f4dc69804210cc0d77ec582beb5 (patch) | |
tree | 866c789fcc0f67ec0933e3cf88fed6628e85b6f6 /pkg/buffer | |
parent | f4d9f967005fdf7995439f56839cbb4a7589ff6c (diff) | |
parent | 84f04cc858644e9748a82f33b834a84c8b0fc934 (diff) |
Merge release-20210510.0-27-g84f04cc85 (automated)
Diffstat (limited to 'pkg/buffer')
-rw-r--r-- | pkg/buffer/buffer.go | 105 | ||||
-rw-r--r-- | pkg/buffer/buffer_list.go | 221 | ||||
-rw-r--r-- | pkg/buffer/buffer_state_autogen.go | 163 | ||||
-rw-r--r-- | pkg/buffer/buffer_unsafe_state_autogen.go | 3 | ||||
-rw-r--r-- | pkg/buffer/pool.go | 90 | ||||
-rw-r--r-- | pkg/buffer/safemem.go | 133 | ||||
-rw-r--r-- | pkg/buffer/view.go | 566 | ||||
-rw-r--r-- | pkg/buffer/view_unsafe.go | 25 |
8 files changed, 1306 insertions, 0 deletions
diff --git a/pkg/buffer/buffer.go b/pkg/buffer/buffer.go new file mode 100644 index 000000000..5b77a6a3f --- /dev/null +++ b/pkg/buffer/buffer.go @@ -0,0 +1,105 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package buffer provides the implementation of a buffer view. +// +// A view is an flexible buffer, supporting the safecopy operations natively as +// well as the ability to grow via either prepend or append, as well as shrink. +package buffer + +// buffer encapsulates a queueable byte buffer. +// +// +stateify savable +type buffer struct { + data []byte + read int + write int + bufferEntry +} + +// init performs in-place initialization for zero value. +func (b *buffer) init(size int) { + b.data = make([]byte, size) +} + +// initWithData initializes b with data, taking ownership. +func (b *buffer) initWithData(data []byte) { + b.data = data + b.read = 0 + b.write = len(data) +} + +// Reset resets read and write locations, effectively emptying the buffer. +func (b *buffer) Reset() { + b.read = 0 + b.write = 0 +} + +// Remove removes r from the unread portion. It returns false if r does not +// fully reside in b. +func (b *buffer) Remove(r Range) bool { + sz := b.ReadSize() + switch { + case r.Len() != r.Intersect(Range{end: sz}).Len(): + return false + case r.Len() == 0: + // Noop + case r.begin == 0: + b.read += r.end + case r.end == sz: + b.write -= r.Len() + default: + // Remove from the middle of b.data. + copy(b.data[b.read+r.begin:], b.data[b.read+r.end:b.write]) + b.write -= r.Len() + } + return true +} + +// Full indicates the buffer is full. +// +// This indicates there is no capacity left to write. +func (b *buffer) Full() bool { + return b.write == len(b.data) +} + +// ReadSize returns the number of bytes available for reading. +func (b *buffer) ReadSize() int { + return b.write - b.read +} + +// ReadMove advances the read index by the given amount. +func (b *buffer) ReadMove(n int) { + b.read += n +} + +// ReadSlice returns the read slice for this buffer. +func (b *buffer) ReadSlice() []byte { + return b.data[b.read:b.write] +} + +// WriteSize returns the number of bytes available for writing. +func (b *buffer) WriteSize() int { + return len(b.data) - b.write +} + +// WriteMove advances the write index by the given amount. +func (b *buffer) WriteMove(n int) { + b.write += n +} + +// WriteSlice returns the write slice for this buffer. +func (b *buffer) WriteSlice() []byte { + return b.data[b.write:] +} diff --git a/pkg/buffer/buffer_list.go b/pkg/buffer/buffer_list.go new file mode 100644 index 000000000..6b5bea3fc --- /dev/null +++ b/pkg/buffer/buffer_list.go @@ -0,0 +1,221 @@ +package buffer + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type bufferElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (bufferElementMapper) linkerFor(elem *buffer) *buffer { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type bufferList struct { + head *buffer + tail *buffer +} + +// Reset resets list l to the empty state. +func (l *bufferList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *bufferList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *bufferList) Front() *buffer { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *bufferList) Back() *buffer { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *bufferList) Len() (count int) { + for e := l.Front(); e != nil; e = (bufferElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *bufferList) PushFront(e *buffer) { + linker := bufferElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + bufferElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *bufferList) PushBack(e *buffer) { + linker := bufferElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + bufferElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *bufferList) PushBackList(m *bufferList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + bufferElementMapper{}.linkerFor(l.tail).SetNext(m.head) + bufferElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *bufferList) InsertAfter(b, e *buffer) { + bLinker := bufferElementMapper{}.linkerFor(b) + eLinker := bufferElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + bufferElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *bufferList) InsertBefore(a, e *buffer) { + aLinker := bufferElementMapper{}.linkerFor(a) + eLinker := bufferElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + bufferElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *bufferList) Remove(e *buffer) { + linker := bufferElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + bufferElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + bufferElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type bufferEntry struct { + next *buffer + prev *buffer +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *bufferEntry) Next() *buffer { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *bufferEntry) Prev() *buffer { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *bufferEntry) SetNext(elem *buffer) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *bufferEntry) SetPrev(elem *buffer) { + e.prev = elem +} diff --git a/pkg/buffer/buffer_state_autogen.go b/pkg/buffer/buffer_state_autogen.go new file mode 100644 index 000000000..aaa72b723 --- /dev/null +++ b/pkg/buffer/buffer_state_autogen.go @@ -0,0 +1,163 @@ +// automatically generated by stateify. + +package buffer + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (b *buffer) StateTypeName() string { + return "pkg/buffer.buffer" +} + +func (b *buffer) StateFields() []string { + return []string{ + "data", + "read", + "write", + "bufferEntry", + } +} + +func (b *buffer) beforeSave() {} + +// +checklocksignore +func (b *buffer) StateSave(stateSinkObject state.Sink) { + b.beforeSave() + stateSinkObject.Save(0, &b.data) + stateSinkObject.Save(1, &b.read) + stateSinkObject.Save(2, &b.write) + stateSinkObject.Save(3, &b.bufferEntry) +} + +func (b *buffer) afterLoad() {} + +// +checklocksignore +func (b *buffer) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &b.data) + stateSourceObject.Load(1, &b.read) + stateSourceObject.Load(2, &b.write) + stateSourceObject.Load(3, &b.bufferEntry) +} + +func (l *bufferList) StateTypeName() string { + return "pkg/buffer.bufferList" +} + +func (l *bufferList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *bufferList) beforeSave() {} + +// +checklocksignore +func (l *bufferList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *bufferList) afterLoad() {} + +// +checklocksignore +func (l *bufferList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *bufferEntry) StateTypeName() string { + return "pkg/buffer.bufferEntry" +} + +func (e *bufferEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *bufferEntry) beforeSave() {} + +// +checklocksignore +func (e *bufferEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *bufferEntry) afterLoad() {} + +// +checklocksignore +func (e *bufferEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (p *pool) StateTypeName() string { + return "pkg/buffer.pool" +} + +func (p *pool) StateFields() []string { + return []string{ + "bufferSize", + "embeddedStorage", + } +} + +func (p *pool) beforeSave() {} + +// +checklocksignore +func (p *pool) StateSave(stateSinkObject state.Sink) { + p.beforeSave() + stateSinkObject.Save(0, &p.bufferSize) + stateSinkObject.Save(1, &p.embeddedStorage) +} + +// +checklocksignore +func (p *pool) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &p.bufferSize) + stateSourceObject.LoadWait(1, &p.embeddedStorage) + stateSourceObject.AfterLoad(p.afterLoad) +} + +func (v *View) StateTypeName() string { + return "pkg/buffer.View" +} + +func (v *View) StateFields() []string { + return []string{ + "data", + "size", + "pool", + } +} + +func (v *View) beforeSave() {} + +// +checklocksignore +func (v *View) StateSave(stateSinkObject state.Sink) { + v.beforeSave() + stateSinkObject.Save(0, &v.data) + stateSinkObject.Save(1, &v.size) + stateSinkObject.Save(2, &v.pool) +} + +func (v *View) afterLoad() {} + +// +checklocksignore +func (v *View) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &v.data) + stateSourceObject.Load(1, &v.size) + stateSourceObject.Load(2, &v.pool) +} + +func init() { + state.Register((*buffer)(nil)) + state.Register((*bufferList)(nil)) + state.Register((*bufferEntry)(nil)) + state.Register((*pool)(nil)) + state.Register((*View)(nil)) +} diff --git a/pkg/buffer/buffer_unsafe_state_autogen.go b/pkg/buffer/buffer_unsafe_state_autogen.go new file mode 100644 index 000000000..5a5c40722 --- /dev/null +++ b/pkg/buffer/buffer_unsafe_state_autogen.go @@ -0,0 +1,3 @@ +// automatically generated by stateify. + +package buffer diff --git a/pkg/buffer/pool.go b/pkg/buffer/pool.go new file mode 100644 index 000000000..2ec41dd4f --- /dev/null +++ b/pkg/buffer/pool.go @@ -0,0 +1,90 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package buffer + +const ( + // embeddedCount is the number of buffer structures embedded in the pool. It + // is also the number for overflow allocations. + embeddedCount = 8 + + // defaultBufferSize is the default size for each underlying storage buffer. + // + // It is slightly less than two pages. This is done intentionally to ensure + // that the buffer object aligns with runtime internals. This two page size + // will effectively minimize internal fragmentation, but still have a large + // enough chunk to limit excessive segmentation. + defaultBufferSize = 8144 +) + +// pool allocates buffer. +// +// It contains an embedded buffer storage for fast path when the number of +// buffers needed is small. +// +// +stateify savable +type pool struct { + bufferSize int + avail []buffer `state:"nosave"` + embeddedStorage [embeddedCount]buffer `state:"wait"` +} + +// get gets a new buffer from p. +func (p *pool) get() *buffer { + buf := p.getNoInit() + buf.init(p.bufferSize) + return buf +} + +// get gets a new buffer from p without initializing it. +func (p *pool) getNoInit() *buffer { + if p.avail == nil { + p.avail = p.embeddedStorage[:] + } + if len(p.avail) == 0 { + p.avail = make([]buffer, embeddedCount) + } + if p.bufferSize <= 0 { + p.bufferSize = defaultBufferSize + } + buf := &p.avail[0] + p.avail = p.avail[1:] + return buf +} + +// put releases buf. +func (p *pool) put(buf *buffer) { + // Remove reference to the underlying storage, allowing it to be garbage + // collected. + buf.data = nil + buf.Reset() +} + +// setBufferSize sets the size of underlying storage buffer for future +// allocations. It can be called at any time. +func (p *pool) setBufferSize(size int) { + p.bufferSize = size +} + +// afterLoad is invoked by stateify. +func (p *pool) afterLoad() { + // S/R does not save subslice into embeddedStorage correctly. Restore + // available portion of embeddedStorage manually. Restore as nil if none used. + for i := len(p.embeddedStorage); i > 0; i-- { + if p.embeddedStorage[i-1].data != nil { + p.avail = p.embeddedStorage[i:] + break + } + } +} diff --git a/pkg/buffer/safemem.go b/pkg/buffer/safemem.go new file mode 100644 index 000000000..8b42575b4 --- /dev/null +++ b/pkg/buffer/safemem.go @@ -0,0 +1,133 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package buffer + +import ( + "gvisor.dev/gvisor/pkg/safemem" +) + +// WriteBlock returns this buffer as a write Block. +func (b *buffer) WriteBlock() safemem.Block { + return safemem.BlockFromSafeSlice(b.WriteSlice()) +} + +// ReadBlock returns this buffer as a read Block. +func (b *buffer) ReadBlock() safemem.Block { + return safemem.BlockFromSafeSlice(b.ReadSlice()) +} + +// WriteFromSafememReader writes up to count bytes from r to v and advances the +// write index by the number of bytes written. It calls r.ReadToBlocks() at +// most once. +func (v *View) WriteFromSafememReader(r safemem.Reader, count uint64) (uint64, error) { + if count == 0 { + return 0, nil + } + + var ( + dst safemem.BlockSeq + blocks []safemem.Block + ) + + // Need at least one buffer. + firstBuf := v.data.Back() + if firstBuf == nil { + firstBuf = v.pool.get() + v.data.PushBack(firstBuf) + } + + // Does the last block have sufficient capacity alone? + if l := uint64(firstBuf.WriteSize()); l >= count { + dst = safemem.BlockSeqOf(firstBuf.WriteBlock().TakeFirst64(count)) + } else { + // Append blocks until sufficient. + count -= l + blocks = append(blocks, firstBuf.WriteBlock()) + for count > 0 { + emptyBuf := v.pool.get() + v.data.PushBack(emptyBuf) + block := emptyBuf.WriteBlock().TakeFirst64(count) + count -= uint64(block.Len()) + blocks = append(blocks, block) + } + dst = safemem.BlockSeqFromSlice(blocks) + } + + // Perform I/O. + n, err := r.ReadToBlocks(dst) + v.size += int64(n) + + // Update all indices. + for left := n; left > 0; firstBuf = firstBuf.Next() { + if l := firstBuf.WriteSize(); left >= uint64(l) { + firstBuf.WriteMove(l) // Whole block. + left -= uint64(l) + } else { + firstBuf.WriteMove(int(left)) // Partial block. + left = 0 + } + } + + return n, err +} + +// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. It advances the +// write index by the number of bytes written. +func (v *View) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { + return v.WriteFromSafememReader(&safemem.BlockSeqReader{srcs}, srcs.NumBytes()) +} + +// ReadToSafememWriter reads up to count bytes from v to w. It does not advance +// the read index. It calls w.WriteFromBlocks() at most once. +func (v *View) ReadToSafememWriter(w safemem.Writer, count uint64) (uint64, error) { + if count == 0 { + return 0, nil + } + + var ( + src safemem.BlockSeq + blocks []safemem.Block + ) + + firstBuf := v.data.Front() + if firstBuf == nil { + return 0, nil // No EOF. + } + + // Is all the data in a single block? + if l := uint64(firstBuf.ReadSize()); l >= count { + src = safemem.BlockSeqOf(firstBuf.ReadBlock().TakeFirst64(count)) + } else { + // Build a list of all the buffers. + count -= l + blocks = append(blocks, firstBuf.ReadBlock()) + for buf := firstBuf.Next(); buf != nil && count > 0; buf = buf.Next() { + block := buf.ReadBlock().TakeFirst64(count) + count -= uint64(block.Len()) + blocks = append(blocks, block) + } + src = safemem.BlockSeqFromSlice(blocks) + } + + // Perform I/O. As documented, we don't advance the read index. + return w.WriteFromBlocks(src) +} + +// ReadToBlocks implements safemem.Reader.ReadToBlocks. It does not advance the +// read index by the number of bytes read, such that it's only safe to call if +// the caller guarantees that ReadToBlocks will only be called once. +func (v *View) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) { + return v.ReadToSafememWriter(&safemem.BlockSeqWriter{dsts}, dsts.NumBytes()) +} diff --git a/pkg/buffer/view.go b/pkg/buffer/view.go new file mode 100644 index 000000000..7bcfcd543 --- /dev/null +++ b/pkg/buffer/view.go @@ -0,0 +1,566 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package buffer + +import ( + "fmt" + "io" +) + +// Buffer is an alias to View. +type Buffer = View + +// View is a non-linear buffer. +// +// All methods are thread compatible. +// +// +stateify savable +type View struct { + data bufferList + size int64 + pool pool +} + +// TrimFront removes the first count bytes from the buffer. +func (v *View) TrimFront(count int64) { + if count >= v.size { + v.advanceRead(v.size) + } else { + v.advanceRead(count) + } +} + +// Remove deletes data at specified location in v. It returns false if specified +// range does not fully reside in v. +func (v *View) Remove(offset, length int) bool { + if offset < 0 || length < 0 { + return false + } + tgt := Range{begin: offset, end: offset + length} + if tgt.Len() != tgt.Intersect(Range{end: int(v.size)}).Len() { + return false + } + + // Scan through each buffer and remove intersections. + var curr Range + for buf := v.data.Front(); buf != nil; { + origLen := buf.ReadSize() + curr.end = curr.begin + origLen + + if x := curr.Intersect(tgt); x.Len() > 0 { + if !buf.Remove(x.Offset(-curr.begin)) { + panic("buf.Remove() failed") + } + if buf.ReadSize() == 0 { + // buf fully removed, removing it from the list. + oldBuf := buf + buf = buf.Next() + v.data.Remove(oldBuf) + v.pool.put(oldBuf) + } else { + // Only partial data intersects, moving on to next one. + buf = buf.Next() + } + v.size -= int64(x.Len()) + } else { + // This buffer is not in range, moving on to next one. + buf = buf.Next() + } + + curr.begin += origLen + if curr.begin >= tgt.end { + break + } + } + return true +} + +// ReadAt implements io.ReaderAt.ReadAt. +func (v *View) ReadAt(p []byte, offset int64) (int, error) { + var ( + skipped int64 + done int64 + ) + for buf := v.data.Front(); buf != nil && done < int64(len(p)); buf = buf.Next() { + needToSkip := int(offset - skipped) + if sz := buf.ReadSize(); sz <= needToSkip { + skipped += int64(sz) + continue + } + + // Actually read data. + n := copy(p[done:], buf.ReadSlice()[needToSkip:]) + skipped += int64(needToSkip) + done += int64(n) + } + if int(done) < len(p) || offset+done == v.size { + return int(done), io.EOF + } + return int(done), nil +} + +// advanceRead advances the view's read index. +// +// Precondition: there must be sufficient bytes in the buffer. +func (v *View) advanceRead(count int64) { + for buf := v.data.Front(); buf != nil && count > 0; { + sz := int64(buf.ReadSize()) + if sz > count { + // There is still data for reading. + buf.ReadMove(int(count)) + v.size -= count + count = 0 + break + } + + // Consume the whole buffer. + oldBuf := buf + buf = buf.Next() // Iterate. + v.data.Remove(oldBuf) + v.pool.put(oldBuf) + + // Update counts. + count -= sz + v.size -= sz + } + if count > 0 { + panic(fmt.Sprintf("advanceRead still has %d bytes remaining", count)) + } +} + +// Truncate truncates the view to the given bytes. +// +// This will not grow the view, only shrink it. If a length is passed that is +// greater than the current size of the view, then nothing will happen. +// +// Precondition: length must be >= 0. +func (v *View) Truncate(length int64) { + if length < 0 { + panic("negative length provided") + } + if length >= v.size { + return // Nothing to do. + } + for buf := v.data.Back(); buf != nil && v.size > length; buf = v.data.Back() { + sz := int64(buf.ReadSize()) + if after := v.size - sz; after < length { + // Truncate the buffer locally. + left := (length - after) + buf.write = buf.read + int(left) + v.size = length + break + } + + // Drop the buffer completely; see above. + v.data.Remove(buf) + v.pool.put(buf) + v.size -= sz + } +} + +// Grow grows the given view to the number of bytes, which will be appended. If +// zero is true, all these bytes will be zero. If zero is false, then this is +// the caller's responsibility. +// +// Precondition: length must be >= 0. +func (v *View) Grow(length int64, zero bool) { + if length < 0 { + panic("negative length provided") + } + for v.size < length { + buf := v.data.Back() + + // Is there some space in the last buffer? + if buf == nil || buf.Full() { + buf = v.pool.get() + v.data.PushBack(buf) + } + + // Write up to length bytes. + sz := buf.WriteSize() + if int64(sz) > length-v.size { + sz = int(length - v.size) + } + + // Zero the written section; note that this pattern is + // specifically recognized and optimized by the compiler. + if zero { + for i := buf.write; i < buf.write+sz; i++ { + buf.data[i] = 0 + } + } + + // Advance the index. + buf.WriteMove(sz) + v.size += int64(sz) + } +} + +// Prepend prepends the given data. +func (v *View) Prepend(data []byte) { + // Is there any space in the first buffer? + if buf := v.data.Front(); buf != nil && buf.read > 0 { + // Fill up before the first write. + avail := buf.read + bStart := 0 + dStart := len(data) - avail + if avail > len(data) { + bStart = avail - len(data) + dStart = 0 + } + n := copy(buf.data[bStart:], data[dStart:]) + data = data[:dStart] + v.size += int64(n) + buf.read -= n + } + + for len(data) > 0 { + // Do we need an empty buffer? + buf := v.pool.get() + v.data.PushFront(buf) + + // The buffer is empty; copy last chunk. + avail := len(buf.data) + bStart := 0 + dStart := len(data) - avail + if avail > len(data) { + bStart = avail - len(data) + dStart = 0 + } + + // We have to put the data at the end of the current + // buffer in order to ensure that the next prepend will + // correctly fill up the beginning of this buffer. + n := copy(buf.data[bStart:], data[dStart:]) + data = data[:dStart] + v.size += int64(n) + buf.read = len(buf.data) - n + buf.write = len(buf.data) + } +} + +// Append appends the given data. +func (v *View) Append(data []byte) { + for done := 0; done < len(data); { + buf := v.data.Back() + + // Ensure there's a buffer with space. + if buf == nil || buf.Full() { + buf = v.pool.get() + v.data.PushBack(buf) + } + + // Copy in to the given buffer. + n := copy(buf.WriteSlice(), data[done:]) + done += n + buf.WriteMove(n) + v.size += int64(n) + } +} + +// AppendOwned takes ownership of data and appends it to v. +func (v *View) AppendOwned(data []byte) { + if len(data) > 0 { + buf := v.pool.getNoInit() + buf.initWithData(data) + v.data.PushBack(buf) + v.size += int64(len(data)) + } +} + +// PullUp makes the specified range contiguous and returns the backing memory. +func (v *View) PullUp(offset, length int) ([]byte, bool) { + if length == 0 { + return nil, true + } + tgt := Range{begin: offset, end: offset + length} + if tgt.Intersect(Range{end: int(v.size)}).Len() != length { + return nil, false + } + + curr := Range{} + buf := v.data.Front() + for ; buf != nil; buf = buf.Next() { + origLen := buf.ReadSize() + curr.end = curr.begin + origLen + + if x := curr.Intersect(tgt); x.Len() == tgt.Len() { + // buf covers the whole requested target range. + sub := x.Offset(-curr.begin) + return buf.ReadSlice()[sub.begin:sub.end], true + } else if x.Len() > 0 { + // buf is pointing at the starting buffer we want to merge. + break + } + + curr.begin += origLen + } + + // Calculate the total merged length. + totLen := 0 + for n := buf; n != nil; n = n.Next() { + totLen += n.ReadSize() + if curr.begin+totLen >= tgt.end { + break + } + } + + // Merge the buffers. + data := make([]byte, totLen) + off := 0 + for n := buf; n != nil && off < totLen; { + copy(data[off:], n.ReadSlice()) + off += n.ReadSize() + + // Remove buffers except for the first one, which will be reused. + if n == buf { + n = n.Next() + } else { + old := n + n = n.Next() + v.data.Remove(old) + v.pool.put(old) + } + } + + // Update the first buffer with merged data. + buf.initWithData(data) + + r := tgt.Offset(-curr.begin) + return buf.data[r.begin:r.end], true +} + +// Flatten returns a flattened copy of this data. +// +// This method should not be used in any performance-sensitive paths. It may +// allocate a fresh byte slice sufficiently large to contain all the data in +// the buffer. This is principally for debugging. +// +// N.B. Tee data still belongs to this view, as if there is a single buffer +// present, then it will be returned directly. This should be used for +// temporary use only, and a reference to the given slice should not be held. +func (v *View) Flatten() []byte { + if buf := v.data.Front(); buf == nil { + return nil // No data at all. + } else if buf.Next() == nil { + return buf.ReadSlice() // Only one buffer. + } + data := make([]byte, 0, v.size) // Need to flatten. + for buf := v.data.Front(); buf != nil; buf = buf.Next() { + // Copy to the allocated slice. + data = append(data, buf.ReadSlice()...) + } + return data +} + +// Size indicates the total amount of data available in this view. +func (v *View) Size() int64 { + return v.size +} + +// Copy makes a strict copy of this view. +func (v *View) Copy() (other View) { + for buf := v.data.Front(); buf != nil; buf = buf.Next() { + other.Append(buf.ReadSlice()) + } + return +} + +// Apply applies the given function across all valid data. +func (v *View) Apply(fn func([]byte)) { + for buf := v.data.Front(); buf != nil; buf = buf.Next() { + fn(buf.ReadSlice()) + } +} + +// SubApply applies fn to a given range of data in v. Any part of the range +// outside of v is ignored. +func (v *View) SubApply(offset, length int, fn func([]byte)) { + for buf := v.data.Front(); length > 0 && buf != nil; buf = buf.Next() { + d := buf.ReadSlice() + if offset >= len(d) { + offset -= len(d) + continue + } + if offset > 0 { + d = d[offset:] + offset = 0 + } + if length < len(d) { + d = d[:length] + } + fn(d) + length -= len(d) + } +} + +// Merge merges the provided View with this one. +// +// The other view will be appended to v, and other will be empty after this +// operation completes. +func (v *View) Merge(other *View) { + // Copy over all buffers. + for buf := other.data.Front(); buf != nil; buf = other.data.Front() { + other.data.Remove(buf) + v.data.PushBack(buf) + } + + // Adjust sizes. + v.size += other.size + other.size = 0 +} + +// WriteFromReader writes to the buffer from an io.Reader. +// +// A minimum read size equal to unsafe.Sizeof(unintptr) is enforced, +// provided that count is greater than or equal to unsafe.Sizeof(uintptr). +func (v *View) WriteFromReader(r io.Reader, count int64) (int64, error) { + var ( + done int64 + n int + err error + ) + for done < count { + buf := v.data.Back() + + // Ensure we have an empty buffer. + if buf == nil || buf.Full() { + buf = v.pool.get() + v.data.PushBack(buf) + } + + // Is this less than the minimum batch? + if buf.WriteSize() < minBatch && (count-done) >= int64(minBatch) { + tmp := make([]byte, minBatch) + n, err = r.Read(tmp) + v.Append(tmp[:n]) + done += int64(n) + if err != nil { + break + } + continue + } + + // Limit the read, if necessary. + sz := buf.WriteSize() + if left := count - done; int64(sz) > left { + sz = int(left) + } + + // Pass the relevant portion of the buffer. + n, err = r.Read(buf.WriteSlice()[:sz]) + buf.WriteMove(n) + done += int64(n) + v.size += int64(n) + if err == io.EOF { + err = nil // Short write allowed. + break + } else if err != nil { + break + } + } + return done, err +} + +// ReadToWriter reads from the buffer into an io.Writer. +// +// N.B. This does not consume the bytes read. TrimFront should +// be called appropriately after this call in order to do so. +// +// A minimum write size equal to unsafe.Sizeof(unintptr) is enforced, +// provided that count is greater than or equal to unsafe.Sizeof(uintptr). +func (v *View) ReadToWriter(w io.Writer, count int64) (int64, error) { + var ( + done int64 + n int + err error + ) + offset := 0 // Spill-over for batching. + for buf := v.data.Front(); buf != nil && done < count; buf = buf.Next() { + // Has this been consumed? Skip it. + sz := buf.ReadSize() + if sz <= offset { + offset -= sz + continue + } + sz -= offset + + // Is this less than the minimum batch? + left := count - done + if sz < minBatch && left >= int64(minBatch) && (v.size-done) >= int64(minBatch) { + tmp := make([]byte, minBatch) + n, err = v.ReadAt(tmp, done) + w.Write(tmp[:n]) + done += int64(n) + offset = n - sz // Reset below. + if err != nil { + break + } + continue + } + + // Limit the write if necessary. + if int64(sz) >= left { + sz = int(left) + } + + // Perform the actual write. + n, err = w.Write(buf.ReadSlice()[offset : offset+sz]) + done += int64(n) + if err != nil { + break + } + + // Reset spill-over. + offset = 0 + } + return done, err +} + +// A Range specifies a range of buffer. +type Range struct { + begin int + end int +} + +// Intersect returns the intersection of x and y. +func (x Range) Intersect(y Range) Range { + if x.begin < y.begin { + x.begin = y.begin + } + if x.end > y.end { + x.end = y.end + } + if x.begin >= x.end { + return Range{} + } + return x +} + +// Offset returns x offset by off. +func (x Range) Offset(off int) Range { + x.begin += off + x.end += off + return x +} + +// Len returns the length of x. +func (x Range) Len() int { + l := x.end - x.begin + if l < 0 { + l = 0 + } + return l +} diff --git a/pkg/buffer/view_unsafe.go b/pkg/buffer/view_unsafe.go new file mode 100644 index 000000000..d1ef39b26 --- /dev/null +++ b/pkg/buffer/view_unsafe.go @@ -0,0 +1,25 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package buffer + +import ( + "unsafe" +) + +// minBatch is the smallest Read or Write operation that the +// WriteFromReader and ReadToWriter functions will use. +// +// This is defined as the size of a native pointer. +const minBatch = int(unsafe.Sizeof(uintptr(0))) |