summaryrefslogtreecommitdiffhomepage
path: root/pkg/buffer
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2021-05-13 21:00:53 +0000
committergVisor bot <gvisor-bot@google.com>2021-05-13 21:00:53 +0000
commite1cfd3185c285f4dc69804210cc0d77ec582beb5 (patch)
tree866c789fcc0f67ec0933e3cf88fed6628e85b6f6 /pkg/buffer
parentf4d9f967005fdf7995439f56839cbb4a7589ff6c (diff)
parent84f04cc858644e9748a82f33b834a84c8b0fc934 (diff)
Merge release-20210510.0-27-g84f04cc85 (automated)
Diffstat (limited to 'pkg/buffer')
-rw-r--r--pkg/buffer/buffer.go105
-rw-r--r--pkg/buffer/buffer_list.go221
-rw-r--r--pkg/buffer/buffer_state_autogen.go163
-rw-r--r--pkg/buffer/buffer_unsafe_state_autogen.go3
-rw-r--r--pkg/buffer/pool.go90
-rw-r--r--pkg/buffer/safemem.go133
-rw-r--r--pkg/buffer/view.go566
-rw-r--r--pkg/buffer/view_unsafe.go25
8 files changed, 1306 insertions, 0 deletions
diff --git a/pkg/buffer/buffer.go b/pkg/buffer/buffer.go
new file mode 100644
index 000000000..5b77a6a3f
--- /dev/null
+++ b/pkg/buffer/buffer.go
@@ -0,0 +1,105 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package buffer provides the implementation of a buffer view.
+//
+// A view is an flexible buffer, supporting the safecopy operations natively as
+// well as the ability to grow via either prepend or append, as well as shrink.
+package buffer
+
+// buffer encapsulates a queueable byte buffer.
+//
+// +stateify savable
+type buffer struct {
+ data []byte
+ read int
+ write int
+ bufferEntry
+}
+
+// init performs in-place initialization for zero value.
+func (b *buffer) init(size int) {
+ b.data = make([]byte, size)
+}
+
+// initWithData initializes b with data, taking ownership.
+func (b *buffer) initWithData(data []byte) {
+ b.data = data
+ b.read = 0
+ b.write = len(data)
+}
+
+// Reset resets read and write locations, effectively emptying the buffer.
+func (b *buffer) Reset() {
+ b.read = 0
+ b.write = 0
+}
+
+// Remove removes r from the unread portion. It returns false if r does not
+// fully reside in b.
+func (b *buffer) Remove(r Range) bool {
+ sz := b.ReadSize()
+ switch {
+ case r.Len() != r.Intersect(Range{end: sz}).Len():
+ return false
+ case r.Len() == 0:
+ // Noop
+ case r.begin == 0:
+ b.read += r.end
+ case r.end == sz:
+ b.write -= r.Len()
+ default:
+ // Remove from the middle of b.data.
+ copy(b.data[b.read+r.begin:], b.data[b.read+r.end:b.write])
+ b.write -= r.Len()
+ }
+ return true
+}
+
+// Full indicates the buffer is full.
+//
+// This indicates there is no capacity left to write.
+func (b *buffer) Full() bool {
+ return b.write == len(b.data)
+}
+
+// ReadSize returns the number of bytes available for reading.
+func (b *buffer) ReadSize() int {
+ return b.write - b.read
+}
+
+// ReadMove advances the read index by the given amount.
+func (b *buffer) ReadMove(n int) {
+ b.read += n
+}
+
+// ReadSlice returns the read slice for this buffer.
+func (b *buffer) ReadSlice() []byte {
+ return b.data[b.read:b.write]
+}
+
+// WriteSize returns the number of bytes available for writing.
+func (b *buffer) WriteSize() int {
+ return len(b.data) - b.write
+}
+
+// WriteMove advances the write index by the given amount.
+func (b *buffer) WriteMove(n int) {
+ b.write += n
+}
+
+// WriteSlice returns the write slice for this buffer.
+func (b *buffer) WriteSlice() []byte {
+ return b.data[b.write:]
+}
diff --git a/pkg/buffer/buffer_list.go b/pkg/buffer/buffer_list.go
new file mode 100644
index 000000000..6b5bea3fc
--- /dev/null
+++ b/pkg/buffer/buffer_list.go
@@ -0,0 +1,221 @@
+package buffer
+
+// ElementMapper provides an identity mapping by default.
+//
+// This can be replaced to provide a struct that maps elements to linker
+// objects, if they are not the same. An ElementMapper is not typically
+// required if: Linker is left as is, Element is left as is, or Linker and
+// Element are the same type.
+type bufferElementMapper struct{}
+
+// linkerFor maps an Element to a Linker.
+//
+// This default implementation should be inlined.
+//
+//go:nosplit
+func (bufferElementMapper) linkerFor(elem *buffer) *buffer { return elem }
+
+// List is an intrusive list. Entries can be added to or removed from the list
+// in O(1) time and with no additional memory allocations.
+//
+// The zero value for List is an empty list ready to use.
+//
+// To iterate over a list (where l is a List):
+// for e := l.Front(); e != nil; e = e.Next() {
+// // do something with e.
+// }
+//
+// +stateify savable
+type bufferList struct {
+ head *buffer
+ tail *buffer
+}
+
+// Reset resets list l to the empty state.
+func (l *bufferList) Reset() {
+ l.head = nil
+ l.tail = nil
+}
+
+// Empty returns true iff the list is empty.
+//
+//go:nosplit
+func (l *bufferList) Empty() bool {
+ return l.head == nil
+}
+
+// Front returns the first element of list l or nil.
+//
+//go:nosplit
+func (l *bufferList) Front() *buffer {
+ return l.head
+}
+
+// Back returns the last element of list l or nil.
+//
+//go:nosplit
+func (l *bufferList) Back() *buffer {
+ return l.tail
+}
+
+// Len returns the number of elements in the list.
+//
+// NOTE: This is an O(n) operation.
+//
+//go:nosplit
+func (l *bufferList) Len() (count int) {
+ for e := l.Front(); e != nil; e = (bufferElementMapper{}.linkerFor(e)).Next() {
+ count++
+ }
+ return count
+}
+
+// PushFront inserts the element e at the front of list l.
+//
+//go:nosplit
+func (l *bufferList) PushFront(e *buffer) {
+ linker := bufferElementMapper{}.linkerFor(e)
+ linker.SetNext(l.head)
+ linker.SetPrev(nil)
+ if l.head != nil {
+ bufferElementMapper{}.linkerFor(l.head).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+
+ l.head = e
+}
+
+// PushBack inserts the element e at the back of list l.
+//
+//go:nosplit
+func (l *bufferList) PushBack(e *buffer) {
+ linker := bufferElementMapper{}.linkerFor(e)
+ linker.SetNext(nil)
+ linker.SetPrev(l.tail)
+ if l.tail != nil {
+ bufferElementMapper{}.linkerFor(l.tail).SetNext(e)
+ } else {
+ l.head = e
+ }
+
+ l.tail = e
+}
+
+// PushBackList inserts list m at the end of list l, emptying m.
+//
+//go:nosplit
+func (l *bufferList) PushBackList(m *bufferList) {
+ if l.head == nil {
+ l.head = m.head
+ l.tail = m.tail
+ } else if m.head != nil {
+ bufferElementMapper{}.linkerFor(l.tail).SetNext(m.head)
+ bufferElementMapper{}.linkerFor(m.head).SetPrev(l.tail)
+
+ l.tail = m.tail
+ }
+ m.head = nil
+ m.tail = nil
+}
+
+// InsertAfter inserts e after b.
+//
+//go:nosplit
+func (l *bufferList) InsertAfter(b, e *buffer) {
+ bLinker := bufferElementMapper{}.linkerFor(b)
+ eLinker := bufferElementMapper{}.linkerFor(e)
+
+ a := bLinker.Next()
+
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ bLinker.SetNext(e)
+
+ if a != nil {
+ bufferElementMapper{}.linkerFor(a).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+}
+
+// InsertBefore inserts e before a.
+//
+//go:nosplit
+func (l *bufferList) InsertBefore(a, e *buffer) {
+ aLinker := bufferElementMapper{}.linkerFor(a)
+ eLinker := bufferElementMapper{}.linkerFor(e)
+
+ b := aLinker.Prev()
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ aLinker.SetPrev(e)
+
+ if b != nil {
+ bufferElementMapper{}.linkerFor(b).SetNext(e)
+ } else {
+ l.head = e
+ }
+}
+
+// Remove removes e from l.
+//
+//go:nosplit
+func (l *bufferList) Remove(e *buffer) {
+ linker := bufferElementMapper{}.linkerFor(e)
+ prev := linker.Prev()
+ next := linker.Next()
+
+ if prev != nil {
+ bufferElementMapper{}.linkerFor(prev).SetNext(next)
+ } else if l.head == e {
+ l.head = next
+ }
+
+ if next != nil {
+ bufferElementMapper{}.linkerFor(next).SetPrev(prev)
+ } else if l.tail == e {
+ l.tail = prev
+ }
+
+ linker.SetNext(nil)
+ linker.SetPrev(nil)
+}
+
+// Entry is a default implementation of Linker. Users can add anonymous fields
+// of this type to their structs to make them automatically implement the
+// methods needed by List.
+//
+// +stateify savable
+type bufferEntry struct {
+ next *buffer
+ prev *buffer
+}
+
+// Next returns the entry that follows e in the list.
+//
+//go:nosplit
+func (e *bufferEntry) Next() *buffer {
+ return e.next
+}
+
+// Prev returns the entry that precedes e in the list.
+//
+//go:nosplit
+func (e *bufferEntry) Prev() *buffer {
+ return e.prev
+}
+
+// SetNext assigns 'entry' as the entry that follows e in the list.
+//
+//go:nosplit
+func (e *bufferEntry) SetNext(elem *buffer) {
+ e.next = elem
+}
+
+// SetPrev assigns 'entry' as the entry that precedes e in the list.
+//
+//go:nosplit
+func (e *bufferEntry) SetPrev(elem *buffer) {
+ e.prev = elem
+}
diff --git a/pkg/buffer/buffer_state_autogen.go b/pkg/buffer/buffer_state_autogen.go
new file mode 100644
index 000000000..aaa72b723
--- /dev/null
+++ b/pkg/buffer/buffer_state_autogen.go
@@ -0,0 +1,163 @@
+// automatically generated by stateify.
+
+package buffer
+
+import (
+ "gvisor.dev/gvisor/pkg/state"
+)
+
+func (b *buffer) StateTypeName() string {
+ return "pkg/buffer.buffer"
+}
+
+func (b *buffer) StateFields() []string {
+ return []string{
+ "data",
+ "read",
+ "write",
+ "bufferEntry",
+ }
+}
+
+func (b *buffer) beforeSave() {}
+
+// +checklocksignore
+func (b *buffer) StateSave(stateSinkObject state.Sink) {
+ b.beforeSave()
+ stateSinkObject.Save(0, &b.data)
+ stateSinkObject.Save(1, &b.read)
+ stateSinkObject.Save(2, &b.write)
+ stateSinkObject.Save(3, &b.bufferEntry)
+}
+
+func (b *buffer) afterLoad() {}
+
+// +checklocksignore
+func (b *buffer) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &b.data)
+ stateSourceObject.Load(1, &b.read)
+ stateSourceObject.Load(2, &b.write)
+ stateSourceObject.Load(3, &b.bufferEntry)
+}
+
+func (l *bufferList) StateTypeName() string {
+ return "pkg/buffer.bufferList"
+}
+
+func (l *bufferList) StateFields() []string {
+ return []string{
+ "head",
+ "tail",
+ }
+}
+
+func (l *bufferList) beforeSave() {}
+
+// +checklocksignore
+func (l *bufferList) StateSave(stateSinkObject state.Sink) {
+ l.beforeSave()
+ stateSinkObject.Save(0, &l.head)
+ stateSinkObject.Save(1, &l.tail)
+}
+
+func (l *bufferList) afterLoad() {}
+
+// +checklocksignore
+func (l *bufferList) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &l.head)
+ stateSourceObject.Load(1, &l.tail)
+}
+
+func (e *bufferEntry) StateTypeName() string {
+ return "pkg/buffer.bufferEntry"
+}
+
+func (e *bufferEntry) StateFields() []string {
+ return []string{
+ "next",
+ "prev",
+ }
+}
+
+func (e *bufferEntry) beforeSave() {}
+
+// +checklocksignore
+func (e *bufferEntry) StateSave(stateSinkObject state.Sink) {
+ e.beforeSave()
+ stateSinkObject.Save(0, &e.next)
+ stateSinkObject.Save(1, &e.prev)
+}
+
+func (e *bufferEntry) afterLoad() {}
+
+// +checklocksignore
+func (e *bufferEntry) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &e.next)
+ stateSourceObject.Load(1, &e.prev)
+}
+
+func (p *pool) StateTypeName() string {
+ return "pkg/buffer.pool"
+}
+
+func (p *pool) StateFields() []string {
+ return []string{
+ "bufferSize",
+ "embeddedStorage",
+ }
+}
+
+func (p *pool) beforeSave() {}
+
+// +checklocksignore
+func (p *pool) StateSave(stateSinkObject state.Sink) {
+ p.beforeSave()
+ stateSinkObject.Save(0, &p.bufferSize)
+ stateSinkObject.Save(1, &p.embeddedStorage)
+}
+
+// +checklocksignore
+func (p *pool) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &p.bufferSize)
+ stateSourceObject.LoadWait(1, &p.embeddedStorage)
+ stateSourceObject.AfterLoad(p.afterLoad)
+}
+
+func (v *View) StateTypeName() string {
+ return "pkg/buffer.View"
+}
+
+func (v *View) StateFields() []string {
+ return []string{
+ "data",
+ "size",
+ "pool",
+ }
+}
+
+func (v *View) beforeSave() {}
+
+// +checklocksignore
+func (v *View) StateSave(stateSinkObject state.Sink) {
+ v.beforeSave()
+ stateSinkObject.Save(0, &v.data)
+ stateSinkObject.Save(1, &v.size)
+ stateSinkObject.Save(2, &v.pool)
+}
+
+func (v *View) afterLoad() {}
+
+// +checklocksignore
+func (v *View) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &v.data)
+ stateSourceObject.Load(1, &v.size)
+ stateSourceObject.Load(2, &v.pool)
+}
+
+func init() {
+ state.Register((*buffer)(nil))
+ state.Register((*bufferList)(nil))
+ state.Register((*bufferEntry)(nil))
+ state.Register((*pool)(nil))
+ state.Register((*View)(nil))
+}
diff --git a/pkg/buffer/buffer_unsafe_state_autogen.go b/pkg/buffer/buffer_unsafe_state_autogen.go
new file mode 100644
index 000000000..5a5c40722
--- /dev/null
+++ b/pkg/buffer/buffer_unsafe_state_autogen.go
@@ -0,0 +1,3 @@
+// automatically generated by stateify.
+
+package buffer
diff --git a/pkg/buffer/pool.go b/pkg/buffer/pool.go
new file mode 100644
index 000000000..2ec41dd4f
--- /dev/null
+++ b/pkg/buffer/pool.go
@@ -0,0 +1,90 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package buffer
+
+const (
+ // embeddedCount is the number of buffer structures embedded in the pool. It
+ // is also the number for overflow allocations.
+ embeddedCount = 8
+
+ // defaultBufferSize is the default size for each underlying storage buffer.
+ //
+ // It is slightly less than two pages. This is done intentionally to ensure
+ // that the buffer object aligns with runtime internals. This two page size
+ // will effectively minimize internal fragmentation, but still have a large
+ // enough chunk to limit excessive segmentation.
+ defaultBufferSize = 8144
+)
+
+// pool allocates buffer.
+//
+// It contains an embedded buffer storage for fast path when the number of
+// buffers needed is small.
+//
+// +stateify savable
+type pool struct {
+ bufferSize int
+ avail []buffer `state:"nosave"`
+ embeddedStorage [embeddedCount]buffer `state:"wait"`
+}
+
+// get gets a new buffer from p.
+func (p *pool) get() *buffer {
+ buf := p.getNoInit()
+ buf.init(p.bufferSize)
+ return buf
+}
+
+// get gets a new buffer from p without initializing it.
+func (p *pool) getNoInit() *buffer {
+ if p.avail == nil {
+ p.avail = p.embeddedStorage[:]
+ }
+ if len(p.avail) == 0 {
+ p.avail = make([]buffer, embeddedCount)
+ }
+ if p.bufferSize <= 0 {
+ p.bufferSize = defaultBufferSize
+ }
+ buf := &p.avail[0]
+ p.avail = p.avail[1:]
+ return buf
+}
+
+// put releases buf.
+func (p *pool) put(buf *buffer) {
+ // Remove reference to the underlying storage, allowing it to be garbage
+ // collected.
+ buf.data = nil
+ buf.Reset()
+}
+
+// setBufferSize sets the size of underlying storage buffer for future
+// allocations. It can be called at any time.
+func (p *pool) setBufferSize(size int) {
+ p.bufferSize = size
+}
+
+// afterLoad is invoked by stateify.
+func (p *pool) afterLoad() {
+ // S/R does not save subslice into embeddedStorage correctly. Restore
+ // available portion of embeddedStorage manually. Restore as nil if none used.
+ for i := len(p.embeddedStorage); i > 0; i-- {
+ if p.embeddedStorage[i-1].data != nil {
+ p.avail = p.embeddedStorage[i:]
+ break
+ }
+ }
+}
diff --git a/pkg/buffer/safemem.go b/pkg/buffer/safemem.go
new file mode 100644
index 000000000..8b42575b4
--- /dev/null
+++ b/pkg/buffer/safemem.go
@@ -0,0 +1,133 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package buffer
+
+import (
+ "gvisor.dev/gvisor/pkg/safemem"
+)
+
+// WriteBlock returns this buffer as a write Block.
+func (b *buffer) WriteBlock() safemem.Block {
+ return safemem.BlockFromSafeSlice(b.WriteSlice())
+}
+
+// ReadBlock returns this buffer as a read Block.
+func (b *buffer) ReadBlock() safemem.Block {
+ return safemem.BlockFromSafeSlice(b.ReadSlice())
+}
+
+// WriteFromSafememReader writes up to count bytes from r to v and advances the
+// write index by the number of bytes written. It calls r.ReadToBlocks() at
+// most once.
+func (v *View) WriteFromSafememReader(r safemem.Reader, count uint64) (uint64, error) {
+ if count == 0 {
+ return 0, nil
+ }
+
+ var (
+ dst safemem.BlockSeq
+ blocks []safemem.Block
+ )
+
+ // Need at least one buffer.
+ firstBuf := v.data.Back()
+ if firstBuf == nil {
+ firstBuf = v.pool.get()
+ v.data.PushBack(firstBuf)
+ }
+
+ // Does the last block have sufficient capacity alone?
+ if l := uint64(firstBuf.WriteSize()); l >= count {
+ dst = safemem.BlockSeqOf(firstBuf.WriteBlock().TakeFirst64(count))
+ } else {
+ // Append blocks until sufficient.
+ count -= l
+ blocks = append(blocks, firstBuf.WriteBlock())
+ for count > 0 {
+ emptyBuf := v.pool.get()
+ v.data.PushBack(emptyBuf)
+ block := emptyBuf.WriteBlock().TakeFirst64(count)
+ count -= uint64(block.Len())
+ blocks = append(blocks, block)
+ }
+ dst = safemem.BlockSeqFromSlice(blocks)
+ }
+
+ // Perform I/O.
+ n, err := r.ReadToBlocks(dst)
+ v.size += int64(n)
+
+ // Update all indices.
+ for left := n; left > 0; firstBuf = firstBuf.Next() {
+ if l := firstBuf.WriteSize(); left >= uint64(l) {
+ firstBuf.WriteMove(l) // Whole block.
+ left -= uint64(l)
+ } else {
+ firstBuf.WriteMove(int(left)) // Partial block.
+ left = 0
+ }
+ }
+
+ return n, err
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks. It advances the
+// write index by the number of bytes written.
+func (v *View) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+ return v.WriteFromSafememReader(&safemem.BlockSeqReader{srcs}, srcs.NumBytes())
+}
+
+// ReadToSafememWriter reads up to count bytes from v to w. It does not advance
+// the read index. It calls w.WriteFromBlocks() at most once.
+func (v *View) ReadToSafememWriter(w safemem.Writer, count uint64) (uint64, error) {
+ if count == 0 {
+ return 0, nil
+ }
+
+ var (
+ src safemem.BlockSeq
+ blocks []safemem.Block
+ )
+
+ firstBuf := v.data.Front()
+ if firstBuf == nil {
+ return 0, nil // No EOF.
+ }
+
+ // Is all the data in a single block?
+ if l := uint64(firstBuf.ReadSize()); l >= count {
+ src = safemem.BlockSeqOf(firstBuf.ReadBlock().TakeFirst64(count))
+ } else {
+ // Build a list of all the buffers.
+ count -= l
+ blocks = append(blocks, firstBuf.ReadBlock())
+ for buf := firstBuf.Next(); buf != nil && count > 0; buf = buf.Next() {
+ block := buf.ReadBlock().TakeFirst64(count)
+ count -= uint64(block.Len())
+ blocks = append(blocks, block)
+ }
+ src = safemem.BlockSeqFromSlice(blocks)
+ }
+
+ // Perform I/O. As documented, we don't advance the read index.
+ return w.WriteFromBlocks(src)
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks. It does not advance the
+// read index by the number of bytes read, such that it's only safe to call if
+// the caller guarantees that ReadToBlocks will only be called once.
+func (v *View) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+ return v.ReadToSafememWriter(&safemem.BlockSeqWriter{dsts}, dsts.NumBytes())
+}
diff --git a/pkg/buffer/view.go b/pkg/buffer/view.go
new file mode 100644
index 000000000..7bcfcd543
--- /dev/null
+++ b/pkg/buffer/view.go
@@ -0,0 +1,566 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package buffer
+
+import (
+ "fmt"
+ "io"
+)
+
+// Buffer is an alias to View.
+type Buffer = View
+
+// View is a non-linear buffer.
+//
+// All methods are thread compatible.
+//
+// +stateify savable
+type View struct {
+ data bufferList
+ size int64
+ pool pool
+}
+
+// TrimFront removes the first count bytes from the buffer.
+func (v *View) TrimFront(count int64) {
+ if count >= v.size {
+ v.advanceRead(v.size)
+ } else {
+ v.advanceRead(count)
+ }
+}
+
+// Remove deletes data at specified location in v. It returns false if specified
+// range does not fully reside in v.
+func (v *View) Remove(offset, length int) bool {
+ if offset < 0 || length < 0 {
+ return false
+ }
+ tgt := Range{begin: offset, end: offset + length}
+ if tgt.Len() != tgt.Intersect(Range{end: int(v.size)}).Len() {
+ return false
+ }
+
+ // Scan through each buffer and remove intersections.
+ var curr Range
+ for buf := v.data.Front(); buf != nil; {
+ origLen := buf.ReadSize()
+ curr.end = curr.begin + origLen
+
+ if x := curr.Intersect(tgt); x.Len() > 0 {
+ if !buf.Remove(x.Offset(-curr.begin)) {
+ panic("buf.Remove() failed")
+ }
+ if buf.ReadSize() == 0 {
+ // buf fully removed, removing it from the list.
+ oldBuf := buf
+ buf = buf.Next()
+ v.data.Remove(oldBuf)
+ v.pool.put(oldBuf)
+ } else {
+ // Only partial data intersects, moving on to next one.
+ buf = buf.Next()
+ }
+ v.size -= int64(x.Len())
+ } else {
+ // This buffer is not in range, moving on to next one.
+ buf = buf.Next()
+ }
+
+ curr.begin += origLen
+ if curr.begin >= tgt.end {
+ break
+ }
+ }
+ return true
+}
+
+// ReadAt implements io.ReaderAt.ReadAt.
+func (v *View) ReadAt(p []byte, offset int64) (int, error) {
+ var (
+ skipped int64
+ done int64
+ )
+ for buf := v.data.Front(); buf != nil && done < int64(len(p)); buf = buf.Next() {
+ needToSkip := int(offset - skipped)
+ if sz := buf.ReadSize(); sz <= needToSkip {
+ skipped += int64(sz)
+ continue
+ }
+
+ // Actually read data.
+ n := copy(p[done:], buf.ReadSlice()[needToSkip:])
+ skipped += int64(needToSkip)
+ done += int64(n)
+ }
+ if int(done) < len(p) || offset+done == v.size {
+ return int(done), io.EOF
+ }
+ return int(done), nil
+}
+
+// advanceRead advances the view's read index.
+//
+// Precondition: there must be sufficient bytes in the buffer.
+func (v *View) advanceRead(count int64) {
+ for buf := v.data.Front(); buf != nil && count > 0; {
+ sz := int64(buf.ReadSize())
+ if sz > count {
+ // There is still data for reading.
+ buf.ReadMove(int(count))
+ v.size -= count
+ count = 0
+ break
+ }
+
+ // Consume the whole buffer.
+ oldBuf := buf
+ buf = buf.Next() // Iterate.
+ v.data.Remove(oldBuf)
+ v.pool.put(oldBuf)
+
+ // Update counts.
+ count -= sz
+ v.size -= sz
+ }
+ if count > 0 {
+ panic(fmt.Sprintf("advanceRead still has %d bytes remaining", count))
+ }
+}
+
+// Truncate truncates the view to the given bytes.
+//
+// This will not grow the view, only shrink it. If a length is passed that is
+// greater than the current size of the view, then nothing will happen.
+//
+// Precondition: length must be >= 0.
+func (v *View) Truncate(length int64) {
+ if length < 0 {
+ panic("negative length provided")
+ }
+ if length >= v.size {
+ return // Nothing to do.
+ }
+ for buf := v.data.Back(); buf != nil && v.size > length; buf = v.data.Back() {
+ sz := int64(buf.ReadSize())
+ if after := v.size - sz; after < length {
+ // Truncate the buffer locally.
+ left := (length - after)
+ buf.write = buf.read + int(left)
+ v.size = length
+ break
+ }
+
+ // Drop the buffer completely; see above.
+ v.data.Remove(buf)
+ v.pool.put(buf)
+ v.size -= sz
+ }
+}
+
+// Grow grows the given view to the number of bytes, which will be appended. If
+// zero is true, all these bytes will be zero. If zero is false, then this is
+// the caller's responsibility.
+//
+// Precondition: length must be >= 0.
+func (v *View) Grow(length int64, zero bool) {
+ if length < 0 {
+ panic("negative length provided")
+ }
+ for v.size < length {
+ buf := v.data.Back()
+
+ // Is there some space in the last buffer?
+ if buf == nil || buf.Full() {
+ buf = v.pool.get()
+ v.data.PushBack(buf)
+ }
+
+ // Write up to length bytes.
+ sz := buf.WriteSize()
+ if int64(sz) > length-v.size {
+ sz = int(length - v.size)
+ }
+
+ // Zero the written section; note that this pattern is
+ // specifically recognized and optimized by the compiler.
+ if zero {
+ for i := buf.write; i < buf.write+sz; i++ {
+ buf.data[i] = 0
+ }
+ }
+
+ // Advance the index.
+ buf.WriteMove(sz)
+ v.size += int64(sz)
+ }
+}
+
+// Prepend prepends the given data.
+func (v *View) Prepend(data []byte) {
+ // Is there any space in the first buffer?
+ if buf := v.data.Front(); buf != nil && buf.read > 0 {
+ // Fill up before the first write.
+ avail := buf.read
+ bStart := 0
+ dStart := len(data) - avail
+ if avail > len(data) {
+ bStart = avail - len(data)
+ dStart = 0
+ }
+ n := copy(buf.data[bStart:], data[dStart:])
+ data = data[:dStart]
+ v.size += int64(n)
+ buf.read -= n
+ }
+
+ for len(data) > 0 {
+ // Do we need an empty buffer?
+ buf := v.pool.get()
+ v.data.PushFront(buf)
+
+ // The buffer is empty; copy last chunk.
+ avail := len(buf.data)
+ bStart := 0
+ dStart := len(data) - avail
+ if avail > len(data) {
+ bStart = avail - len(data)
+ dStart = 0
+ }
+
+ // We have to put the data at the end of the current
+ // buffer in order to ensure that the next prepend will
+ // correctly fill up the beginning of this buffer.
+ n := copy(buf.data[bStart:], data[dStart:])
+ data = data[:dStart]
+ v.size += int64(n)
+ buf.read = len(buf.data) - n
+ buf.write = len(buf.data)
+ }
+}
+
+// Append appends the given data.
+func (v *View) Append(data []byte) {
+ for done := 0; done < len(data); {
+ buf := v.data.Back()
+
+ // Ensure there's a buffer with space.
+ if buf == nil || buf.Full() {
+ buf = v.pool.get()
+ v.data.PushBack(buf)
+ }
+
+ // Copy in to the given buffer.
+ n := copy(buf.WriteSlice(), data[done:])
+ done += n
+ buf.WriteMove(n)
+ v.size += int64(n)
+ }
+}
+
+// AppendOwned takes ownership of data and appends it to v.
+func (v *View) AppendOwned(data []byte) {
+ if len(data) > 0 {
+ buf := v.pool.getNoInit()
+ buf.initWithData(data)
+ v.data.PushBack(buf)
+ v.size += int64(len(data))
+ }
+}
+
+// PullUp makes the specified range contiguous and returns the backing memory.
+func (v *View) PullUp(offset, length int) ([]byte, bool) {
+ if length == 0 {
+ return nil, true
+ }
+ tgt := Range{begin: offset, end: offset + length}
+ if tgt.Intersect(Range{end: int(v.size)}).Len() != length {
+ return nil, false
+ }
+
+ curr := Range{}
+ buf := v.data.Front()
+ for ; buf != nil; buf = buf.Next() {
+ origLen := buf.ReadSize()
+ curr.end = curr.begin + origLen
+
+ if x := curr.Intersect(tgt); x.Len() == tgt.Len() {
+ // buf covers the whole requested target range.
+ sub := x.Offset(-curr.begin)
+ return buf.ReadSlice()[sub.begin:sub.end], true
+ } else if x.Len() > 0 {
+ // buf is pointing at the starting buffer we want to merge.
+ break
+ }
+
+ curr.begin += origLen
+ }
+
+ // Calculate the total merged length.
+ totLen := 0
+ for n := buf; n != nil; n = n.Next() {
+ totLen += n.ReadSize()
+ if curr.begin+totLen >= tgt.end {
+ break
+ }
+ }
+
+ // Merge the buffers.
+ data := make([]byte, totLen)
+ off := 0
+ for n := buf; n != nil && off < totLen; {
+ copy(data[off:], n.ReadSlice())
+ off += n.ReadSize()
+
+ // Remove buffers except for the first one, which will be reused.
+ if n == buf {
+ n = n.Next()
+ } else {
+ old := n
+ n = n.Next()
+ v.data.Remove(old)
+ v.pool.put(old)
+ }
+ }
+
+ // Update the first buffer with merged data.
+ buf.initWithData(data)
+
+ r := tgt.Offset(-curr.begin)
+ return buf.data[r.begin:r.end], true
+}
+
+// Flatten returns a flattened copy of this data.
+//
+// This method should not be used in any performance-sensitive paths. It may
+// allocate a fresh byte slice sufficiently large to contain all the data in
+// the buffer. This is principally for debugging.
+//
+// N.B. Tee data still belongs to this view, as if there is a single buffer
+// present, then it will be returned directly. This should be used for
+// temporary use only, and a reference to the given slice should not be held.
+func (v *View) Flatten() []byte {
+ if buf := v.data.Front(); buf == nil {
+ return nil // No data at all.
+ } else if buf.Next() == nil {
+ return buf.ReadSlice() // Only one buffer.
+ }
+ data := make([]byte, 0, v.size) // Need to flatten.
+ for buf := v.data.Front(); buf != nil; buf = buf.Next() {
+ // Copy to the allocated slice.
+ data = append(data, buf.ReadSlice()...)
+ }
+ return data
+}
+
+// Size indicates the total amount of data available in this view.
+func (v *View) Size() int64 {
+ return v.size
+}
+
+// Copy makes a strict copy of this view.
+func (v *View) Copy() (other View) {
+ for buf := v.data.Front(); buf != nil; buf = buf.Next() {
+ other.Append(buf.ReadSlice())
+ }
+ return
+}
+
+// Apply applies the given function across all valid data.
+func (v *View) Apply(fn func([]byte)) {
+ for buf := v.data.Front(); buf != nil; buf = buf.Next() {
+ fn(buf.ReadSlice())
+ }
+}
+
+// SubApply applies fn to a given range of data in v. Any part of the range
+// outside of v is ignored.
+func (v *View) SubApply(offset, length int, fn func([]byte)) {
+ for buf := v.data.Front(); length > 0 && buf != nil; buf = buf.Next() {
+ d := buf.ReadSlice()
+ if offset >= len(d) {
+ offset -= len(d)
+ continue
+ }
+ if offset > 0 {
+ d = d[offset:]
+ offset = 0
+ }
+ if length < len(d) {
+ d = d[:length]
+ }
+ fn(d)
+ length -= len(d)
+ }
+}
+
+// Merge merges the provided View with this one.
+//
+// The other view will be appended to v, and other will be empty after this
+// operation completes.
+func (v *View) Merge(other *View) {
+ // Copy over all buffers.
+ for buf := other.data.Front(); buf != nil; buf = other.data.Front() {
+ other.data.Remove(buf)
+ v.data.PushBack(buf)
+ }
+
+ // Adjust sizes.
+ v.size += other.size
+ other.size = 0
+}
+
+// WriteFromReader writes to the buffer from an io.Reader.
+//
+// A minimum read size equal to unsafe.Sizeof(unintptr) is enforced,
+// provided that count is greater than or equal to unsafe.Sizeof(uintptr).
+func (v *View) WriteFromReader(r io.Reader, count int64) (int64, error) {
+ var (
+ done int64
+ n int
+ err error
+ )
+ for done < count {
+ buf := v.data.Back()
+
+ // Ensure we have an empty buffer.
+ if buf == nil || buf.Full() {
+ buf = v.pool.get()
+ v.data.PushBack(buf)
+ }
+
+ // Is this less than the minimum batch?
+ if buf.WriteSize() < minBatch && (count-done) >= int64(minBatch) {
+ tmp := make([]byte, minBatch)
+ n, err = r.Read(tmp)
+ v.Append(tmp[:n])
+ done += int64(n)
+ if err != nil {
+ break
+ }
+ continue
+ }
+
+ // Limit the read, if necessary.
+ sz := buf.WriteSize()
+ if left := count - done; int64(sz) > left {
+ sz = int(left)
+ }
+
+ // Pass the relevant portion of the buffer.
+ n, err = r.Read(buf.WriteSlice()[:sz])
+ buf.WriteMove(n)
+ done += int64(n)
+ v.size += int64(n)
+ if err == io.EOF {
+ err = nil // Short write allowed.
+ break
+ } else if err != nil {
+ break
+ }
+ }
+ return done, err
+}
+
+// ReadToWriter reads from the buffer into an io.Writer.
+//
+// N.B. This does not consume the bytes read. TrimFront should
+// be called appropriately after this call in order to do so.
+//
+// A minimum write size equal to unsafe.Sizeof(unintptr) is enforced,
+// provided that count is greater than or equal to unsafe.Sizeof(uintptr).
+func (v *View) ReadToWriter(w io.Writer, count int64) (int64, error) {
+ var (
+ done int64
+ n int
+ err error
+ )
+ offset := 0 // Spill-over for batching.
+ for buf := v.data.Front(); buf != nil && done < count; buf = buf.Next() {
+ // Has this been consumed? Skip it.
+ sz := buf.ReadSize()
+ if sz <= offset {
+ offset -= sz
+ continue
+ }
+ sz -= offset
+
+ // Is this less than the minimum batch?
+ left := count - done
+ if sz < minBatch && left >= int64(minBatch) && (v.size-done) >= int64(minBatch) {
+ tmp := make([]byte, minBatch)
+ n, err = v.ReadAt(tmp, done)
+ w.Write(tmp[:n])
+ done += int64(n)
+ offset = n - sz // Reset below.
+ if err != nil {
+ break
+ }
+ continue
+ }
+
+ // Limit the write if necessary.
+ if int64(sz) >= left {
+ sz = int(left)
+ }
+
+ // Perform the actual write.
+ n, err = w.Write(buf.ReadSlice()[offset : offset+sz])
+ done += int64(n)
+ if err != nil {
+ break
+ }
+
+ // Reset spill-over.
+ offset = 0
+ }
+ return done, err
+}
+
+// A Range specifies a range of buffer.
+type Range struct {
+ begin int
+ end int
+}
+
+// Intersect returns the intersection of x and y.
+func (x Range) Intersect(y Range) Range {
+ if x.begin < y.begin {
+ x.begin = y.begin
+ }
+ if x.end > y.end {
+ x.end = y.end
+ }
+ if x.begin >= x.end {
+ return Range{}
+ }
+ return x
+}
+
+// Offset returns x offset by off.
+func (x Range) Offset(off int) Range {
+ x.begin += off
+ x.end += off
+ return x
+}
+
+// Len returns the length of x.
+func (x Range) Len() int {
+ l := x.end - x.begin
+ if l < 0 {
+ l = 0
+ }
+ return l
+}
diff --git a/pkg/buffer/view_unsafe.go b/pkg/buffer/view_unsafe.go
new file mode 100644
index 000000000..d1ef39b26
--- /dev/null
+++ b/pkg/buffer/view_unsafe.go
@@ -0,0 +1,25 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package buffer
+
+import (
+ "unsafe"
+)
+
+// minBatch is the smallest Read or Write operation that the
+// WriteFromReader and ReadToWriter functions will use.
+//
+// This is defined as the size of a native pointer.
+const minBatch = int(unsafe.Sizeof(uintptr(0)))