summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2020-07-27 20:39:38 +0000
committergVisor bot <gvisor-bot@google.com>2020-07-27 20:39:38 +0000
commitc5d1303be1bbd95df2b1120f5bbb1ac0bd3ca6c6 (patch)
tree1f780c30493506c89d652b1608df23b725b4a5c0
parent6146e5a09884bd94eb605d74616c4cfe2a2bb4be (diff)
parent112eb0c5b9e6d45b58470fb5536a9fd91fb4222b (diff)
Merge release-20200622.1-236-g112eb0c5b (automated)
-rw-r--r--pkg/abi/linux/fuse.go143
-rw-r--r--pkg/abi/linux/linux_abi_autogen_unsafe.go587
-rw-r--r--pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go6
-rw-r--r--pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go8
-rw-r--r--pkg/sentry/fsimpl/fuse/connection.go255
-rw-r--r--pkg/sentry/fsimpl/fuse/dev.go289
-rw-r--r--pkg/sentry/fsimpl/fuse/fuse_state_autogen.go157
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go52
-rw-r--r--pkg/sentry/fsimpl/fuse/register.go42
-rw-r--r--pkg/sentry/fsimpl/fuse/request_list.go193
10 files changed, 1695 insertions, 37 deletions
diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
new file mode 100644
index 000000000..d3ebbccc4
--- /dev/null
+++ b/pkg/abi/linux/fuse.go
@@ -0,0 +1,143 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package linux
+
+// +marshal
+type FUSEOpcode uint32
+
+// +marshal
+type FUSEOpID uint64
+
+// Opcodes for FUSE operations. Analogous to the opcodes in include/linux/fuse.h.
+const (
+ FUSE_LOOKUP FUSEOpcode = 1
+ FUSE_FORGET = 2 /* no reply */
+ FUSE_GETATTR = 3
+ FUSE_SETATTR = 4
+ FUSE_READLINK = 5
+ FUSE_SYMLINK = 6
+ _
+ FUSE_MKNOD = 8
+ FUSE_MKDIR = 9
+ FUSE_UNLINK = 10
+ FUSE_RMDIR = 11
+ FUSE_RENAME = 12
+ FUSE_LINK = 13
+ FUSE_OPEN = 14
+ FUSE_READ = 15
+ FUSE_WRITE = 16
+ FUSE_STATFS = 17
+ FUSE_RELEASE = 18
+ _
+ FUSE_FSYNC = 20
+ FUSE_SETXATTR = 21
+ FUSE_GETXATTR = 22
+ FUSE_LISTXATTR = 23
+ FUSE_REMOVEXATTR = 24
+ FUSE_FLUSH = 25
+ FUSE_INIT = 26
+ FUSE_OPENDIR = 27
+ FUSE_READDIR = 28
+ FUSE_RELEASEDIR = 29
+ FUSE_FSYNCDIR = 30
+ FUSE_GETLK = 31
+ FUSE_SETLK = 32
+ FUSE_SETLKW = 33
+ FUSE_ACCESS = 34
+ FUSE_CREATE = 35
+ FUSE_INTERRUPT = 36
+ FUSE_BMAP = 37
+ FUSE_DESTROY = 38
+ FUSE_IOCTL = 39
+ FUSE_POLL = 40
+ FUSE_NOTIFY_REPLY = 41
+ FUSE_BATCH_FORGET = 42
+)
+
+const (
+ // FUSE_MIN_READ_BUFFER is the minimum size the read can be for any FUSE filesystem.
+ // This is the minimum size Linux supports. See linux.fuse.h.
+ FUSE_MIN_READ_BUFFER uint32 = 8192
+)
+
+// FUSEHeaderIn is the header read by the daemon with each request.
+//
+// +marshal
+type FUSEHeaderIn struct {
+ // Len specifies the total length of the data, including this header.
+ Len uint32
+
+ // Opcode specifies the kind of operation of the request.
+ Opcode FUSEOpcode
+
+ // Unique specifies the unique identifier for this request.
+ Unique FUSEOpID
+
+ // NodeID is the ID of the filesystem object being operated on.
+ NodeID uint64
+
+ // UID is the UID of the requesting process.
+ UID uint32
+
+ // GID is the GID of the requesting process.
+ GID uint32
+
+ // PID is the PID of the requesting process.
+ PID uint32
+
+ _ uint32
+}
+
+// FUSEHeaderOut is the header written by the daemon when it processes
+// a request and wants to send a reply (almost all operations require a
+// reply; if they do not, this will be explicitly documented).
+//
+// +marshal
+type FUSEHeaderOut struct {
+ // Len specifies the total length of the data, including this header.
+ Len uint32
+
+ // Error specifies the error that occurred (0 if none).
+ Error int32
+
+ // Unique specifies the unique identifier of the corresponding request.
+ Unique FUSEOpID
+}
+
+// FUSEWriteIn is the header written by a daemon when it makes a
+// write request to the FUSE filesystem.
+//
+// +marshal
+type FUSEWriteIn struct {
+ // Fh specifies the file handle that is being written to.
+ Fh uint64
+
+ // Offset is the offset of the write.
+ Offset uint64
+
+ // Size is the size of data being written.
+ Size uint32
+
+ // WriteFlags is the flags used during the write.
+ WriteFlags uint32
+
+ // LockOwner is the ID of the lock owner.
+ LockOwner uint64
+
+ // Flags is the flags for the request.
+ Flags uint32
+
+ _ uint32
+}
diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go
index 540981b56..414a8ec20 100644
--- a/pkg/abi/linux/linux_abi_autogen_unsafe.go
+++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go
@@ -15,6 +15,11 @@ import (
// Marshallable types used by this file.
var _ marshal.Marshallable = (*ControlMessageCredentials)(nil)
+var _ marshal.Marshallable = (*FUSEHeaderIn)(nil)
+var _ marshal.Marshallable = (*FUSEHeaderOut)(nil)
+var _ marshal.Marshallable = (*FUSEOpID)(nil)
+var _ marshal.Marshallable = (*FUSEOpcode)(nil)
+var _ marshal.Marshallable = (*FUSEWriteIn)(nil)
var _ marshal.Marshallable = (*IFConf)(nil)
var _ marshal.Marshallable = (*IFReq)(nil)
var _ marshal.Marshallable = (*IPTEntry)(nil)
@@ -142,7 +147,7 @@ func (s *Statx) Packed() bool {
// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
func (s *Statx) MarshalUnsafe(dst []byte) {
- if s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() {
+ if s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() {
safecopy.CopyIn(dst, unsafe.Pointer(s))
} else {
// Type Statx doesn't have a packed layout in memory, fallback to MarshalBytes.
@@ -385,6 +390,584 @@ func (s *Statfs) WriteTo(w io.Writer) (int64, error) {
}
// SizeBytes implements marshal.Marshallable.SizeBytes.
+//go:nosplit
+func (f *FUSEOpcode) SizeBytes() int {
+ return 4
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (f *FUSEOpcode) MarshalBytes(dst []byte) {
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(*f))
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (f *FUSEOpcode) UnmarshalBytes(src []byte) {
+ *f = FUSEOpcode(uint32(usermem.ByteOrder.Uint32(src[:4])))
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (f *FUSEOpcode) Packed() bool {
+ // Scalar newtypes are always packed.
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (f *FUSEOpcode) MarshalUnsafe(dst []byte) {
+ safecopy.CopyIn(dst, unsafe.Pointer(f))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (f *FUSEOpcode) UnmarshalUnsafe(src []byte) {
+ safecopy.CopyOut(unsafe.Pointer(f), src)
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (f *FUSEOpcode) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (f *FUSEOpcode) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
+ return f.CopyOutN(task, addr, f.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (f *FUSEOpcode) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (f *FUSEOpcode) WriteTo(w io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := w.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+//go:nosplit
+func (f *FUSEOpID) SizeBytes() int {
+ return 8
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (f *FUSEOpID) MarshalBytes(dst []byte) {
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(*f))
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (f *FUSEOpID) UnmarshalBytes(src []byte) {
+ *f = FUSEOpID(uint64(usermem.ByteOrder.Uint64(src[:8])))
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (f *FUSEOpID) Packed() bool {
+ // Scalar newtypes are always packed.
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (f *FUSEOpID) MarshalUnsafe(dst []byte) {
+ safecopy.CopyIn(dst, unsafe.Pointer(f))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (f *FUSEOpID) UnmarshalUnsafe(src []byte) {
+ safecopy.CopyOut(unsafe.Pointer(f), src)
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (f *FUSEOpID) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (f *FUSEOpID) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
+ return f.CopyOutN(task, addr, f.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (f *FUSEOpID) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (f *FUSEOpID) WriteTo(w io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := w.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (f *FUSEHeaderIn) SizeBytes() int {
+ return 28 +
+ (*FUSEOpcode)(nil).SizeBytes() +
+ (*FUSEOpID)(nil).SizeBytes()
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (f *FUSEHeaderIn) MarshalBytes(dst []byte) {
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.Len))
+ dst = dst[4:]
+ f.Opcode.MarshalBytes(dst[:f.Opcode.SizeBytes()])
+ dst = dst[f.Opcode.SizeBytes():]
+ f.Unique.MarshalBytes(dst[:f.Unique.SizeBytes()])
+ dst = dst[f.Unique.SizeBytes():]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(f.NodeID))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.UID))
+ dst = dst[4:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.GID))
+ dst = dst[4:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.PID))
+ dst = dst[4:]
+ // Padding: dst[:sizeof(uint32)] ~= uint32(0)
+ dst = dst[4:]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (f *FUSEHeaderIn) UnmarshalBytes(src []byte) {
+ f.Len = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ f.Opcode.UnmarshalBytes(src[:f.Opcode.SizeBytes()])
+ src = src[f.Opcode.SizeBytes():]
+ f.Unique.UnmarshalBytes(src[:f.Unique.SizeBytes()])
+ src = src[f.Unique.SizeBytes():]
+ f.NodeID = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ f.UID = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ f.GID = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ f.PID = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ // Padding: var _ uint32 ~= src[:sizeof(uint32)]
+ src = src[4:]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (f *FUSEHeaderIn) Packed() bool {
+ return f.Opcode.Packed() && f.Unique.Packed()
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (f *FUSEHeaderIn) MarshalUnsafe(dst []byte) {
+ if f.Opcode.Packed() && f.Unique.Packed() {
+ safecopy.CopyIn(dst, unsafe.Pointer(f))
+ } else {
+ // Type FUSEHeaderIn doesn't have a packed layout in memory, fallback to MarshalBytes.
+ f.MarshalBytes(dst)
+ }
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (f *FUSEHeaderIn) UnmarshalUnsafe(src []byte) {
+ if f.Opcode.Packed() && f.Unique.Packed() {
+ safecopy.CopyOut(unsafe.Pointer(f), src)
+ } else {
+ // Type FUSEHeaderIn doesn't have a packed layout in memory, fallback to UnmarshalBytes.
+ f.UnmarshalBytes(src)
+ }
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (f *FUSEHeaderIn) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) {
+ if !f.Opcode.Packed() && f.Unique.Packed() {
+ // Type FUSEHeaderIn doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := task.CopyScratchBuffer(f.SizeBytes()) // escapes: okay.
+ f.MarshalBytes(buf) // escapes: fallback.
+ return task.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (f *FUSEHeaderIn) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
+ return f.CopyOutN(task, addr, f.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (f *FUSEHeaderIn) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
+ if !f.Opcode.Packed() && f.Unique.Packed() {
+ // Type FUSEHeaderIn doesn't have a packed layout in memory, fall back to UnmarshalBytes.
+ buf := task.CopyScratchBuffer(f.SizeBytes()) // escapes: okay.
+ length, err := task.CopyInBytes(addr, buf) // escapes: okay.
+ // Unmarshal unconditionally. If we had a short copy-in, this results in a
+ // partially unmarshalled struct.
+ f.UnmarshalBytes(buf) // escapes: fallback.
+ return length, err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (f *FUSEHeaderIn) WriteTo(w io.Writer) (int64, error) {
+ if !f.Opcode.Packed() && f.Unique.Packed() {
+ // Type FUSEHeaderIn doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := make([]byte, f.SizeBytes())
+ f.MarshalBytes(buf)
+ length, err := w.Write(buf)
+ return int64(length), err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := w.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (f *FUSEHeaderOut) SizeBytes() int {
+ return 8 +
+ (*FUSEOpID)(nil).SizeBytes()
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (f *FUSEHeaderOut) MarshalBytes(dst []byte) {
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.Len))
+ dst = dst[4:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.Error))
+ dst = dst[4:]
+ f.Unique.MarshalBytes(dst[:f.Unique.SizeBytes()])
+ dst = dst[f.Unique.SizeBytes():]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (f *FUSEHeaderOut) UnmarshalBytes(src []byte) {
+ f.Len = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ f.Error = int32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ f.Unique.UnmarshalBytes(src[:f.Unique.SizeBytes()])
+ src = src[f.Unique.SizeBytes():]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (f *FUSEHeaderOut) Packed() bool {
+ return f.Unique.Packed()
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (f *FUSEHeaderOut) MarshalUnsafe(dst []byte) {
+ if f.Unique.Packed() {
+ safecopy.CopyIn(dst, unsafe.Pointer(f))
+ } else {
+ // Type FUSEHeaderOut doesn't have a packed layout in memory, fallback to MarshalBytes.
+ f.MarshalBytes(dst)
+ }
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (f *FUSEHeaderOut) UnmarshalUnsafe(src []byte) {
+ if f.Unique.Packed() {
+ safecopy.CopyOut(unsafe.Pointer(f), src)
+ } else {
+ // Type FUSEHeaderOut doesn't have a packed layout in memory, fallback to UnmarshalBytes.
+ f.UnmarshalBytes(src)
+ }
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (f *FUSEHeaderOut) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) {
+ if !f.Unique.Packed() {
+ // Type FUSEHeaderOut doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := task.CopyScratchBuffer(f.SizeBytes()) // escapes: okay.
+ f.MarshalBytes(buf) // escapes: fallback.
+ return task.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (f *FUSEHeaderOut) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
+ return f.CopyOutN(task, addr, f.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (f *FUSEHeaderOut) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
+ if !f.Unique.Packed() {
+ // Type FUSEHeaderOut doesn't have a packed layout in memory, fall back to UnmarshalBytes.
+ buf := task.CopyScratchBuffer(f.SizeBytes()) // escapes: okay.
+ length, err := task.CopyInBytes(addr, buf) // escapes: okay.
+ // Unmarshal unconditionally. If we had a short copy-in, this results in a
+ // partially unmarshalled struct.
+ f.UnmarshalBytes(buf) // escapes: fallback.
+ return length, err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (f *FUSEHeaderOut) WriteTo(w io.Writer) (int64, error) {
+ if !f.Unique.Packed() {
+ // Type FUSEHeaderOut doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := make([]byte, f.SizeBytes())
+ f.MarshalBytes(buf)
+ length, err := w.Write(buf)
+ return int64(length), err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := w.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (f *FUSEWriteIn) SizeBytes() int {
+ return 40
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (f *FUSEWriteIn) MarshalBytes(dst []byte) {
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(f.Fh))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(f.Offset))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.Size))
+ dst = dst[4:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.WriteFlags))
+ dst = dst[4:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(f.LockOwner))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(f.Flags))
+ dst = dst[4:]
+ // Padding: dst[:sizeof(uint32)] ~= uint32(0)
+ dst = dst[4:]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (f *FUSEWriteIn) UnmarshalBytes(src []byte) {
+ f.Fh = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ f.Offset = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ f.Size = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ f.WriteFlags = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ f.LockOwner = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ f.Flags = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ // Padding: var _ uint32 ~= src[:sizeof(uint32)]
+ src = src[4:]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (f *FUSEWriteIn) Packed() bool {
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (f *FUSEWriteIn) MarshalUnsafe(dst []byte) {
+ safecopy.CopyIn(dst, unsafe.Pointer(f))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (f *FUSEWriteIn) UnmarshalUnsafe(src []byte) {
+ safecopy.CopyOut(unsafe.Pointer(f), src)
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (f *FUSEWriteIn) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (f *FUSEWriteIn) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
+ return f.CopyOutN(task, addr, f.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (f *FUSEWriteIn) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := task.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (f *FUSEWriteIn) WriteTo(w io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := w.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f)
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
func (r *RobustListHead) SizeBytes() int {
return 24
}
@@ -818,7 +1401,7 @@ func (i *IPTEntry) Packed() bool {
// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
func (i *IPTEntry) MarshalUnsafe(dst []byte) {
- if i.Counters.Packed() && i.IP.Packed() {
+ if i.IP.Packed() && i.Counters.Packed() {
safecopy.CopyIn(dst, unsafe.Pointer(i))
} else {
// Type IPTEntry doesn't have a packed layout in memory, fallback to MarshalBytes.
diff --git a/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go b/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go
index 238af9fb4..42c9623af 100644
--- a/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go
+++ b/pkg/abi/linux/linux_amd64_abi_autogen_unsafe.go
@@ -288,7 +288,7 @@ func (s *Stat) UnmarshalBytes(src []byte) {
// Packed implements marshal.Marshallable.Packed.
//go:nosplit
func (s *Stat) Packed() bool {
- return s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed()
+ return s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed()
}
// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
@@ -344,7 +344,7 @@ func (s *Stat) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
// CopyIn implements marshal.Marshallable.CopyIn.
//go:nosplit
func (s *Stat) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
- if !s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() {
+ if !s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed() {
// Type Stat doesn't have a packed layout in memory, fall back to UnmarshalBytes.
buf := task.CopyScratchBuffer(s.SizeBytes()) // escapes: okay.
length, err := task.CopyInBytes(addr, buf) // escapes: okay.
@@ -370,7 +370,7 @@ func (s *Stat) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
// WriteTo implements io.WriterTo.WriteTo.
func (s *Stat) WriteTo(w io.Writer) (int64, error) {
- if !s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() {
+ if !s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed() {
// Type Stat doesn't have a packed layout in memory, fall back to MarshalBytes.
buf := make([]byte, s.SizeBytes())
s.MarshalBytes(buf)
diff --git a/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go b/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go
index bdc129008..4d0ebca49 100644
--- a/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go
+++ b/pkg/abi/linux/linux_arm64_abi_autogen_unsafe.go
@@ -300,7 +300,7 @@ func (s *Stat) Packed() bool {
// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
func (s *Stat) MarshalUnsafe(dst []byte) {
- if s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() {
+ if s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() {
safecopy.CopyIn(dst, unsafe.Pointer(s))
} else {
// Type Stat doesn't have a packed layout in memory, fallback to MarshalBytes.
@@ -321,7 +321,7 @@ func (s *Stat) UnmarshalUnsafe(src []byte) {
// CopyOutN implements marshal.Marshallable.CopyOutN.
//go:nosplit
func (s *Stat) CopyOutN(task marshal.Task, addr usermem.Addr, limit int) (int, error) {
- if !s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() {
+ if !s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() {
// Type Stat doesn't have a packed layout in memory, fall back to MarshalBytes.
buf := task.CopyScratchBuffer(s.SizeBytes()) // escapes: okay.
s.MarshalBytes(buf) // escapes: fallback.
@@ -351,7 +351,7 @@ func (s *Stat) CopyOut(task marshal.Task, addr usermem.Addr) (int, error) {
// CopyIn implements marshal.Marshallable.CopyIn.
//go:nosplit
func (s *Stat) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
- if !s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() {
+ if !s.MTime.Packed() && s.CTime.Packed() && s.ATime.Packed() {
// Type Stat doesn't have a packed layout in memory, fall back to UnmarshalBytes.
buf := task.CopyScratchBuffer(s.SizeBytes()) // escapes: okay.
length, err := task.CopyInBytes(addr, buf) // escapes: okay.
@@ -377,7 +377,7 @@ func (s *Stat) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
// WriteTo implements io.WriterTo.WriteTo.
func (s *Stat) WriteTo(w io.Writer) (int64, error) {
- if !s.ATime.Packed() && s.MTime.Packed() && s.CTime.Packed() {
+ if !s.CTime.Packed() && s.ATime.Packed() && s.MTime.Packed() {
// Type Stat doesn't have a packed layout in memory, fall back to MarshalBytes.
buf := make([]byte, s.SizeBytes())
s.MarshalBytes(buf)
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
new file mode 100644
index 000000000..f330da0bd
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -0,0 +1,255 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+ "errors"
+ "fmt"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/tools/go_marshal/marshal"
+)
+
+// MaxActiveRequestsDefault is the default setting controlling the upper bound
+// on the number of active requests at any given time.
+const MaxActiveRequestsDefault = 10000
+
+var (
+ // Ordinary requests have even IDs, while interrupts IDs are odd.
+ InitReqBit uint64 = 1
+ ReqIDStep uint64 = 2
+)
+
+// Request represents a FUSE operation request that hasn't been sent to the
+// server yet.
+//
+// +stateify savable
+type Request struct {
+ requestEntry
+
+ id linux.FUSEOpID
+ hdr *linux.FUSEHeaderIn
+ data []byte
+}
+
+// Response represents an actual response from the server, including the
+// response payload.
+//
+// +stateify savable
+type Response struct {
+ opcode linux.FUSEOpcode
+ hdr linux.FUSEHeaderOut
+ data []byte
+}
+
+// Connection is the struct by which the sentry communicates with the FUSE server daemon.
+type Connection struct {
+ fd *DeviceFD
+
+ // MaxWrite is the daemon's maximum size of a write buffer.
+ // This is negotiated during FUSE_INIT.
+ MaxWrite uint32
+}
+
+// NewFUSEConnection creates a FUSE connection to fd
+func NewFUSEConnection(_ context.Context, fd *vfs.FileDescription, maxInFlightRequests uint64) (*Connection, error) {
+ // Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
+ // mount a FUSE filesystem.
+ fuseFD := fd.Impl().(*DeviceFD)
+ fuseFD.mounted = true
+
+ // Create the writeBuf for the header to be stored in.
+ hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+ fuseFD.writeBuf = make([]byte, hdrLen)
+ fuseFD.completions = make(map[linux.FUSEOpID]*futureResponse)
+ fuseFD.fullQueueCh = make(chan struct{}, maxInFlightRequests)
+ fuseFD.writeCursor = 0
+
+ return &Connection{
+ fd: fuseFD,
+ }, nil
+}
+
+// NewRequest creates a new request that can be sent to the FUSE server.
+func (conn *Connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint64, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*Request, error) {
+ conn.fd.mu.Lock()
+ defer conn.fd.mu.Unlock()
+ conn.fd.nextOpID += linux.FUSEOpID(ReqIDStep)
+
+ hdrLen := (*linux.FUSEHeaderIn)(nil).SizeBytes()
+ hdr := linux.FUSEHeaderIn{
+ Len: uint32(hdrLen + payload.SizeBytes()),
+ Opcode: opcode,
+ Unique: conn.fd.nextOpID,
+ NodeID: ino,
+ UID: uint32(creds.EffectiveKUID),
+ GID: uint32(creds.EffectiveKGID),
+ PID: pid,
+ }
+
+ buf := make([]byte, hdr.Len)
+ hdr.MarshalUnsafe(buf[:hdrLen])
+ payload.MarshalUnsafe(buf[hdrLen:])
+
+ return &Request{
+ id: hdr.Unique,
+ hdr: &hdr,
+ data: buf,
+ }, nil
+}
+
+// Call makes a request to the server and blocks the invoking task until a
+// server responds with a response.
+// NOTE: If no task is provided then the Call will simply enqueue the request
+// and return a nil response. No blocking will happen in this case. Instead,
+// this is used to signify that the processing of this request will happen by
+// the kernel.Task that writes the response. See FUSE_INIT for such an
+// invocation.
+func (conn *Connection) Call(t *kernel.Task, r *Request) (*Response, error) {
+ fut, err := conn.callFuture(t, r)
+ if err != nil {
+ return nil, err
+ }
+
+ return fut.resolve(t)
+}
+
+// Error returns the error of the FUSE call.
+func (r *Response) Error() error {
+ errno := r.hdr.Error
+ if errno >= 0 {
+ return nil
+ }
+
+ sysErrNo := syscall.Errno(-errno)
+ return error(sysErrNo)
+}
+
+// UnmarshalPayload unmarshals the response data into m.
+func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
+ hdrLen := r.hdr.SizeBytes()
+ haveDataLen := r.hdr.Len - uint32(hdrLen)
+ wantDataLen := uint32(m.SizeBytes())
+
+ if haveDataLen < wantDataLen {
+ return fmt.Errorf("payload too small. Minimum data lenth required: %d, but got data length %d", wantDataLen, haveDataLen)
+ }
+
+ m.UnmarshalUnsafe(r.data[hdrLen:])
+ return nil
+}
+
+// callFuture makes a request to the server and returns a future response.
+// Call resolve() when the response needs to be fulfilled.
+func (conn *Connection) callFuture(t *kernel.Task, r *Request) (*futureResponse, error) {
+ conn.fd.mu.Lock()
+ defer conn.fd.mu.Unlock()
+
+ // Is the queue full?
+ //
+ // We must busy wait here until the request can be queued. We don't
+ // block on the fd.fullQueueCh with a lock - so after being signalled,
+ // before we acquire the lock, it is possible that a barging task enters
+ // and queues a request. As a result, upon acquiring the lock we must
+ // again check if the room is available.
+ //
+ // This can potentially starve a request forever but this can only happen
+ // if there are always too many ongoing requests all the time. The
+ // supported maxActiveRequests setting should be really high to avoid this.
+ for conn.fd.numActiveRequests == conn.fd.fs.opts.maxActiveRequests {
+ if t == nil {
+ // Since there is no task that is waiting. We must error out.
+ return nil, errors.New("FUSE request queue full")
+ }
+
+ log.Infof("Blocking request %v from being queued. Too many active requests: %v",
+ r.id, conn.fd.numActiveRequests)
+ conn.fd.mu.Unlock()
+ err := t.Block(conn.fd.fullQueueCh)
+ conn.fd.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return conn.callFutureLocked(t, r)
+}
+
+// callFutureLocked makes a request to the server and returns a future response.
+func (conn *Connection) callFutureLocked(t *kernel.Task, r *Request) (*futureResponse, error) {
+ conn.fd.queue.PushBack(r)
+ conn.fd.numActiveRequests += 1
+ fut := newFutureResponse(r.hdr.Opcode)
+ conn.fd.completions[r.id] = fut
+
+ // Signal the readers that there is something to read.
+ conn.fd.waitQueue.Notify(waiter.EventIn)
+
+ return fut, nil
+}
+
+// futureResponse represents an in-flight request, that may or may not have
+// completed yet. Convert it to a resolved Response by calling Resolve, but note
+// that this may block.
+//
+// +stateify savable
+type futureResponse struct {
+ opcode linux.FUSEOpcode
+ ch chan struct{}
+ hdr *linux.FUSEHeaderOut
+ data []byte
+}
+
+// newFutureResponse creates a future response to a FUSE request.
+func newFutureResponse(opcode linux.FUSEOpcode) *futureResponse {
+ return &futureResponse{
+ opcode: opcode,
+ ch: make(chan struct{}),
+ }
+}
+
+// resolve blocks the task until the server responds to its corresponding request,
+// then returns a resolved response.
+func (f *futureResponse) resolve(t *kernel.Task) (*Response, error) {
+ // If there is no Task associated with this request - then we don't try to resolve
+ // the response. Instead, the task writing the response (proxy to the server) will
+ // process the response on our behalf.
+ if t == nil {
+ log.Infof("fuse.Response.resolve: Not waiting on a response from server.")
+ return nil, nil
+ }
+
+ if err := t.Block(f.ch); err != nil {
+ return nil, err
+ }
+
+ return f.getResponse(), nil
+}
+
+// getResponse creates a Response from the data the futureResponse has.
+func (f *futureResponse) getResponse() *Response {
+ return &Response{
+ opcode: f.opcode,
+ hdr: *f.hdr,
+ data: f.data,
+ }
+}
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index c9e12a94f..f3443ac71 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -15,13 +15,17 @@
package fuse
import (
+ "syscall"
+
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const fuseDevMinor = 229
@@ -54,9 +58,43 @@ type DeviceFD struct {
// mounted specifies whether a FUSE filesystem was mounted using the DeviceFD.
mounted bool
- // TODO(gvisor.dev/issue/2987): Add all the data structures needed to enqueue
- // and deque requests, control synchronization and establish communication
- // between the FUSE kernel module and the /dev/fuse character device.
+ // nextOpID is used to create new requests.
+ nextOpID linux.FUSEOpID
+
+ // queue is the list of requests that need to be processed by the FUSE server.
+ queue requestList
+
+ // numActiveRequests is the number of requests made by the Sentry that has
+ // yet to be responded to.
+ numActiveRequests uint64
+
+ // completions is used to map a request to its response. A Writer will use this
+ // to notify the caller of a completed response.
+ completions map[linux.FUSEOpID]*futureResponse
+
+ writeCursor uint32
+
+ // writeBuf is the memory buffer used to copy in the FUSE out header from
+ // userspace.
+ writeBuf []byte
+
+ // writeCursorFR current FR being copied from server.
+ writeCursorFR *futureResponse
+
+ // mu protects all the queues, maps, buffers and cursors and nextOpID.
+ mu sync.Mutex
+
+ // waitQueue is used to notify interested parties when the device becomes
+ // readable or writable.
+ waitQueue waiter.Queue
+
+ // fullQueueCh is a channel used to synchronize the readers with the writers.
+ // Writers (inbound requests to the filesystem) block if there are too many
+ // unprocessed in-flight requests.
+ fullQueueCh chan struct{}
+
+ // fs is the FUSE filesystem that this FD is being used for.
+ fs *filesystem
}
// Release implements vfs.FileDescriptionImpl.Release.
@@ -79,7 +117,75 @@ func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.R
return 0, syserror.EPERM
}
- return 0, syserror.ENOSYS
+ // We require that any Read done on this filesystem have a sane minimum
+ // read buffer. It must have the capacity for the fixed parts of any request
+ // header (Linux uses the request header and the FUSEWriteIn header for this
+ // calculation) + the negotiated MaxWrite room for the data.
+ minBuffSize := linux.FUSE_MIN_READ_BUFFER
+ inHdrLen := uint32((*linux.FUSEHeaderIn)(nil).SizeBytes())
+ writeHdrLen := uint32((*linux.FUSEWriteIn)(nil).SizeBytes())
+ negotiatedMinBuffSize := inHdrLen + writeHdrLen + fd.fs.conn.MaxWrite
+ if minBuffSize < negotiatedMinBuffSize {
+ minBuffSize = negotiatedMinBuffSize
+ }
+
+ // If the read buffer is too small, error out.
+ if dst.NumBytes() < int64(minBuffSize) {
+ return 0, syserror.EINVAL
+ }
+
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ return fd.readLocked(ctx, dst, opts)
+}
+
+// readLocked implements the reading of the fuse device while locked with DeviceFD.mu.
+func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ if fd.queue.Empty() {
+ return 0, syserror.ErrWouldBlock
+ }
+
+ var readCursor uint32
+ var bytesRead int64
+ for {
+ req := fd.queue.Front()
+ if dst.NumBytes() < int64(req.hdr.Len) {
+ // The request is too large. Cannot process it. All requests must be smaller than the
+ // negotiated size as specified by Connection.MaxWrite set as part of the FUSE_INIT
+ // handshake.
+ errno := -int32(syscall.EIO)
+ if req.hdr.Opcode == linux.FUSE_SETXATTR {
+ errno = -int32(syscall.E2BIG)
+ }
+
+ // Return the error to the calling task.
+ if err := fd.sendError(ctx, errno, req); err != nil {
+ return 0, err
+ }
+
+ // We're done with this request.
+ fd.queue.Remove(req)
+
+ // Restart the read as this request was invalid.
+ log.Warningf("fuse.DeviceFD.Read: request found was too large. Restarting read.")
+ return fd.readLocked(ctx, dst, opts)
+ }
+
+ n, err := dst.CopyOut(ctx, req.data[readCursor:])
+ if err != nil {
+ return 0, err
+ }
+ readCursor += uint32(n)
+ bytesRead += int64(n)
+
+ if readCursor >= req.hdr.Len {
+ // Fully done with this req, remove it from the queue.
+ fd.queue.Remove(req)
+ break
+ }
+ }
+
+ return bytesRead, nil
}
// PWrite implements vfs.FileDescriptionImpl.PWrite.
@@ -94,12 +200,128 @@ func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset i
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ return fd.writeLocked(ctx, src, opts)
+}
+
+// writeLocked implements writing to the fuse device while locked with DeviceFD.mu.
+func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
if !fd.mounted {
return 0, syserror.EPERM
}
- return 0, syserror.ENOSYS
+ var cn, n int64
+ hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+
+ for src.NumBytes() > 0 {
+ if fd.writeCursorFR != nil {
+ // Already have common header, and we're now copying the payload.
+ wantBytes := fd.writeCursorFR.hdr.Len
+
+ // Note that the FR data doesn't have the header. Copy it over if its necessary.
+ if fd.writeCursorFR.data == nil {
+ fd.writeCursorFR.data = make([]byte, wantBytes)
+ }
+
+ bytesCopied, err := src.CopyIn(ctx, fd.writeCursorFR.data[fd.writeCursor:wantBytes])
+ if err != nil {
+ return 0, err
+ }
+ src = src.DropFirst(bytesCopied)
+
+ cn = int64(bytesCopied)
+ n += cn
+ fd.writeCursor += uint32(cn)
+ if fd.writeCursor == wantBytes {
+ // Done reading this full response. Clean up and unblock the
+ // initiator.
+ break
+ }
+
+ // Check if we have more data in src.
+ continue
+ }
+
+ // Assert that the header isn't read into the writeBuf yet.
+ if fd.writeCursor >= hdrLen {
+ return 0, syserror.EINVAL
+ }
+
+ // We don't have the full common response header yet.
+ wantBytes := hdrLen - fd.writeCursor
+ bytesCopied, err := src.CopyIn(ctx, fd.writeBuf[fd.writeCursor:wantBytes])
+ if err != nil {
+ return 0, err
+ }
+ src = src.DropFirst(bytesCopied)
+
+ cn = int64(bytesCopied)
+ n += cn
+ fd.writeCursor += uint32(cn)
+ if fd.writeCursor == hdrLen {
+ // Have full header in the writeBuf. Use it to fetch the actual futureResponse
+ // from the device's completions map.
+ var hdr linux.FUSEHeaderOut
+ hdr.UnmarshalBytes(fd.writeBuf)
+
+ // We have the header now and so the writeBuf has served its purpose.
+ // We could reset it manually here but instead of doing that, at the
+ // end of the write, the writeCursor will be set to 0 thereby allowing
+ // the next request to overwrite whats in the buffer,
+
+ fut, ok := fd.completions[hdr.Unique]
+ if !ok {
+ // Server sent us a response for a request we never sent?
+ return 0, syserror.EINVAL
+ }
+
+ delete(fd.completions, hdr.Unique)
+
+ // Copy over the header into the future response. The rest of the payload
+ // will be copied over to the FR's data in the next iteration.
+ fut.hdr = &hdr
+ fd.writeCursorFR = fut
+
+ // Next iteration will now try read the complete request, if src has
+ // any data remaining. Otherwise we're done.
+ }
+ }
+
+ if fd.writeCursorFR != nil {
+ if err := fd.sendResponse(ctx, fd.writeCursorFR); err != nil {
+ return 0, err
+ }
+
+ // Ready the device for the next request.
+ fd.writeCursorFR = nil
+ fd.writeCursor = 0
+ }
+
+ return n, nil
+}
+
+// Readiness implements vfs.FileDescriptionImpl.Readiness.
+func (fd *DeviceFD) Readiness(mask waiter.EventMask) waiter.EventMask {
+ var ready waiter.EventMask
+ ready |= waiter.EventOut // FD is always writable
+ if !fd.queue.Empty() {
+ // Have reqs available, FD is readable.
+ ready |= waiter.EventIn
+ }
+
+ return ready & mask
+}
+
+// EventRegister implements waiter.Waitable.EventRegister.
+func (fd *DeviceFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+ fd.waitQueue.EventRegister(e, mask)
+}
+
+// EventUnregister implements waiter.Waitable.EventUnregister.
+func (fd *DeviceFD) EventUnregister(e *waiter.Entry) {
+ fd.waitQueue.EventUnregister(e)
}
// Seek implements vfs.FileDescriptionImpl.Seek.
@@ -112,22 +334,61 @@ func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64
return 0, syserror.ENOSYS
}
-// Register registers the FUSE device with vfsObj.
-func Register(vfsObj *vfs.VirtualFilesystem) error {
- if err := vfsObj.RegisterDevice(vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, fuseDevice{}, &vfs.RegisterDeviceOptions{
- GroupName: "misc",
- }); err != nil {
+// sendResponse sends a response to the waiting task (if any).
+func (fd *DeviceFD) sendResponse(ctx context.Context, fut *futureResponse) error {
+ // See if the running task need to perform some action before returning.
+ // Since we just finished writing the future, we can be sure that
+ // getResponse generates a populated response.
+ if err := fd.noReceiverAction(ctx, fut.getResponse()); err != nil {
return err
}
+ // Signal that the queue is no longer full.
+ select {
+ case fd.fullQueueCh <- struct{}{}:
+ default:
+ }
+ fd.numActiveRequests -= 1
+
+ // Signal the task waiting on a response.
+ close(fut.ch)
return nil
}
-// CreateDevtmpfsFile creates a device special file in devtmpfs.
-func CreateDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor) error {
- if err := dev.CreateDeviceFile(ctx, "fuse", vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, 0666 /* mode */); err != nil {
+// sendError sends an error response to the waiting task (if any).
+func (fd *DeviceFD) sendError(ctx context.Context, errno int32, req *Request) error {
+ // Return the error to the calling task.
+ outHdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+ respHdr := linux.FUSEHeaderOut{
+ Len: outHdrLen,
+ Error: errno,
+ Unique: req.hdr.Unique,
+ }
+
+ fut, ok := fd.completions[respHdr.Unique]
+ if !ok {
+ // Server sent us a response for a request we never sent?
+ return syserror.EINVAL
+ }
+ delete(fd.completions, respHdr.Unique)
+
+ fut.hdr = &respHdr
+ if err := fd.sendResponse(ctx, fut); err != nil {
return err
}
return nil
}
+
+// noReceiverAction has the calling kernel.Task do some action if its known that no
+// receiver is going to be waiting on the future channel. This is to be used by:
+// FUSE_INIT.
+func (fd *DeviceFD) noReceiverAction(ctx context.Context, r *Response) error {
+ if r.opcode == linux.FUSE_INIT {
+ // TODO: process init response here.
+ // Maybe get the creds from the context?
+ // creds := auth.CredentialsFromContext(ctx)
+ }
+
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
index 2b9c882fb..e4ce04322 100644
--- a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
+++ b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
@@ -1,3 +1,160 @@
// automatically generated by stateify.
package fuse
+
+import (
+ "gvisor.dev/gvisor/pkg/state"
+)
+
+func (x *Request) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.Request"
+}
+
+func (x *Request) StateFields() []string {
+ return []string{
+ "requestEntry",
+ "id",
+ "hdr",
+ "data",
+ }
+}
+
+func (x *Request) beforeSave() {}
+
+func (x *Request) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.requestEntry)
+ m.Save(1, &x.id)
+ m.Save(2, &x.hdr)
+ m.Save(3, &x.data)
+}
+
+func (x *Request) afterLoad() {}
+
+func (x *Request) StateLoad(m state.Source) {
+ m.Load(0, &x.requestEntry)
+ m.Load(1, &x.id)
+ m.Load(2, &x.hdr)
+ m.Load(3, &x.data)
+}
+
+func (x *Response) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.Response"
+}
+
+func (x *Response) StateFields() []string {
+ return []string{
+ "opcode",
+ "hdr",
+ "data",
+ }
+}
+
+func (x *Response) beforeSave() {}
+
+func (x *Response) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.opcode)
+ m.Save(1, &x.hdr)
+ m.Save(2, &x.data)
+}
+
+func (x *Response) afterLoad() {}
+
+func (x *Response) StateLoad(m state.Source) {
+ m.Load(0, &x.opcode)
+ m.Load(1, &x.hdr)
+ m.Load(2, &x.data)
+}
+
+func (x *futureResponse) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.futureResponse"
+}
+
+func (x *futureResponse) StateFields() []string {
+ return []string{
+ "opcode",
+ "ch",
+ "hdr",
+ "data",
+ }
+}
+
+func (x *futureResponse) beforeSave() {}
+
+func (x *futureResponse) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.opcode)
+ m.Save(1, &x.ch)
+ m.Save(2, &x.hdr)
+ m.Save(3, &x.data)
+}
+
+func (x *futureResponse) afterLoad() {}
+
+func (x *futureResponse) StateLoad(m state.Source) {
+ m.Load(0, &x.opcode)
+ m.Load(1, &x.ch)
+ m.Load(2, &x.hdr)
+ m.Load(3, &x.data)
+}
+
+func (x *requestList) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.requestList"
+}
+
+func (x *requestList) StateFields() []string {
+ return []string{
+ "head",
+ "tail",
+ }
+}
+
+func (x *requestList) beforeSave() {}
+
+func (x *requestList) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.head)
+ m.Save(1, &x.tail)
+}
+
+func (x *requestList) afterLoad() {}
+
+func (x *requestList) StateLoad(m state.Source) {
+ m.Load(0, &x.head)
+ m.Load(1, &x.tail)
+}
+
+func (x *requestEntry) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.requestEntry"
+}
+
+func (x *requestEntry) StateFields() []string {
+ return []string{
+ "next",
+ "prev",
+ }
+}
+
+func (x *requestEntry) beforeSave() {}
+
+func (x *requestEntry) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.next)
+ m.Save(1, &x.prev)
+}
+
+func (x *requestEntry) afterLoad() {}
+
+func (x *requestEntry) StateLoad(m state.Source) {
+ m.Load(0, &x.next)
+ m.Load(1, &x.prev)
+}
+
+func init() {
+ state.Register((*Request)(nil))
+ state.Register((*Response)(nil))
+ state.Register((*futureResponse)(nil))
+ state.Register((*requestList)(nil))
+ state.Register((*requestEntry)(nil))
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index f7775fb9b..911b6f7cb 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -51,6 +51,11 @@ type filesystemOptions struct {
// rootMode specifies the the file mode of the filesystem's root.
rootMode linux.FileMode
+
+ // maxActiveRequests specifies the maximum number of active requests that can
+ // exist at any time. Any further requests will block when trying to
+ // Call the server.
+ maxActiveRequests uint64
}
// filesystem implements vfs.FilesystemImpl.
@@ -58,12 +63,12 @@ type filesystem struct {
kernfs.Filesystem
devMinor uint32
- // fuseFD is the FD returned when opening /dev/fuse. It is used for communication
- // between the FUSE server daemon and the sentry fusefs.
- fuseFD *DeviceFD
+ // conn is used for communication between the FUSE server
+ // daemon and the sentry fusefs.
+ conn *Connection
// opts is the options the fusefs is initialized with.
- opts filesystemOptions
+ opts *filesystemOptions
}
// Name implements vfs.FilesystemType.Name.
@@ -100,7 +105,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fuseFd := kernelTask.GetFileVFS2(int32(deviceDescriptor))
// Parse and set all the other supported FUSE mount options.
- // TODO: Expand the supported mount options.
+ // TODO(gVisor.dev/issue/3229): Expand the supported mount options.
if userIDStr, ok := mopts["user_id"]; ok {
delete(mopts, "user_id")
userID, err := strconv.ParseUint(userIDStr, 10, 32)
@@ -134,21 +139,20 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}
fsopts.rootMode = rootMode
+ // Set the maxInFlightRequests option.
+ fsopts.maxActiveRequests = MaxActiveRequestsDefault
+
// Check for unparsed options.
if len(mopts) != 0 {
log.Warningf("%s.GetFilesystem: unknown options: %v", fsType.Name(), mopts)
return nil, nil, syserror.EINVAL
}
- // Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
- // mount a FUSE filesystem.
- fuseFD := fuseFd.Impl().(*DeviceFD)
- fuseFD.mounted = true
-
- fs := &filesystem{
- devMinor: devMinor,
- fuseFD: fuseFD,
- opts: fsopts,
+ // Create a new FUSE filesystem.
+ fs, err := NewFUSEFilesystem(ctx, devMinor, &fsopts, fuseFd)
+ if err != nil {
+ log.Warningf("%s.NewFUSEFilesystem: failed with error: %v", fsType.Name(), err)
+ return nil, nil, err
}
fs.VFSFilesystem().Init(vfsObj, &fsType, fs)
@@ -162,6 +166,26 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return fs.VFSFilesystem(), root.VFSDentry(), nil
}
+// NewFUSEFilesystem creates a new FUSE filesystem.
+func NewFUSEFilesystem(ctx context.Context, devMinor uint32, opts *filesystemOptions, device *vfs.FileDescription) (*filesystem, error) {
+ fs := &filesystem{
+ devMinor: devMinor,
+ opts: opts,
+ }
+
+ conn, err := NewFUSEConnection(ctx, device, opts.maxActiveRequests)
+ if err != nil {
+ log.Warningf("fuse.NewFUSEFilesystem: NewFUSEConnection failed with error: %v", err)
+ return nil, syserror.EINVAL
+ }
+
+ fs.conn = conn
+ fuseFD := device.Impl().(*DeviceFD)
+ fuseFD.fs = fs
+
+ return fs, nil
+}
+
// Release implements vfs.FilesystemImpl.Release.
func (fs *filesystem) Release() {
fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
diff --git a/pkg/sentry/fsimpl/fuse/register.go b/pkg/sentry/fsimpl/fuse/register.go
new file mode 100644
index 000000000..b5b581152
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/register.go
@@ -0,0 +1,42 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// Register registers the FUSE device with vfsObj.
+func Register(vfsObj *vfs.VirtualFilesystem) error {
+ if err := vfsObj.RegisterDevice(vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, fuseDevice{}, &vfs.RegisterDeviceOptions{
+ GroupName: "misc",
+ }); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// CreateDevtmpfsFile creates a device special file in devtmpfs.
+func CreateDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor) error {
+ if err := dev.CreateDeviceFile(ctx, "fuse", vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, 0666 /* mode */); err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/fuse/request_list.go b/pkg/sentry/fsimpl/fuse/request_list.go
new file mode 100644
index 000000000..002262f23
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/request_list.go
@@ -0,0 +1,193 @@
+package fuse
+
+// ElementMapper provides an identity mapping by default.
+//
+// This can be replaced to provide a struct that maps elements to linker
+// objects, if they are not the same. An ElementMapper is not typically
+// required if: Linker is left as is, Element is left as is, or Linker and
+// Element are the same type.
+type requestElementMapper struct{}
+
+// linkerFor maps an Element to a Linker.
+//
+// This default implementation should be inlined.
+//
+//go:nosplit
+func (requestElementMapper) linkerFor(elem *Request) *Request { return elem }
+
+// List is an intrusive list. Entries can be added to or removed from the list
+// in O(1) time and with no additional memory allocations.
+//
+// The zero value for List is an empty list ready to use.
+//
+// To iterate over a list (where l is a List):
+// for e := l.Front(); e != nil; e = e.Next() {
+// // do something with e.
+// }
+//
+// +stateify savable
+type requestList struct {
+ head *Request
+ tail *Request
+}
+
+// Reset resets list l to the empty state.
+func (l *requestList) Reset() {
+ l.head = nil
+ l.tail = nil
+}
+
+// Empty returns true iff the list is empty.
+func (l *requestList) Empty() bool {
+ return l.head == nil
+}
+
+// Front returns the first element of list l or nil.
+func (l *requestList) Front() *Request {
+ return l.head
+}
+
+// Back returns the last element of list l or nil.
+func (l *requestList) Back() *Request {
+ return l.tail
+}
+
+// Len returns the number of elements in the list.
+//
+// NOTE: This is an O(n) operation.
+func (l *requestList) Len() (count int) {
+ for e := l.Front(); e != nil; e = (requestElementMapper{}.linkerFor(e)).Next() {
+ count++
+ }
+ return count
+}
+
+// PushFront inserts the element e at the front of list l.
+func (l *requestList) PushFront(e *Request) {
+ linker := requestElementMapper{}.linkerFor(e)
+ linker.SetNext(l.head)
+ linker.SetPrev(nil)
+ if l.head != nil {
+ requestElementMapper{}.linkerFor(l.head).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+
+ l.head = e
+}
+
+// PushBack inserts the element e at the back of list l.
+func (l *requestList) PushBack(e *Request) {
+ linker := requestElementMapper{}.linkerFor(e)
+ linker.SetNext(nil)
+ linker.SetPrev(l.tail)
+ if l.tail != nil {
+ requestElementMapper{}.linkerFor(l.tail).SetNext(e)
+ } else {
+ l.head = e
+ }
+
+ l.tail = e
+}
+
+// PushBackList inserts list m at the end of list l, emptying m.
+func (l *requestList) PushBackList(m *requestList) {
+ if l.head == nil {
+ l.head = m.head
+ l.tail = m.tail
+ } else if m.head != nil {
+ requestElementMapper{}.linkerFor(l.tail).SetNext(m.head)
+ requestElementMapper{}.linkerFor(m.head).SetPrev(l.tail)
+
+ l.tail = m.tail
+ }
+ m.head = nil
+ m.tail = nil
+}
+
+// InsertAfter inserts e after b.
+func (l *requestList) InsertAfter(b, e *Request) {
+ bLinker := requestElementMapper{}.linkerFor(b)
+ eLinker := requestElementMapper{}.linkerFor(e)
+
+ a := bLinker.Next()
+
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ bLinker.SetNext(e)
+
+ if a != nil {
+ requestElementMapper{}.linkerFor(a).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+}
+
+// InsertBefore inserts e before a.
+func (l *requestList) InsertBefore(a, e *Request) {
+ aLinker := requestElementMapper{}.linkerFor(a)
+ eLinker := requestElementMapper{}.linkerFor(e)
+
+ b := aLinker.Prev()
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ aLinker.SetPrev(e)
+
+ if b != nil {
+ requestElementMapper{}.linkerFor(b).SetNext(e)
+ } else {
+ l.head = e
+ }
+}
+
+// Remove removes e from l.
+func (l *requestList) Remove(e *Request) {
+ linker := requestElementMapper{}.linkerFor(e)
+ prev := linker.Prev()
+ next := linker.Next()
+
+ if prev != nil {
+ requestElementMapper{}.linkerFor(prev).SetNext(next)
+ } else if l.head == e {
+ l.head = next
+ }
+
+ if next != nil {
+ requestElementMapper{}.linkerFor(next).SetPrev(prev)
+ } else if l.tail == e {
+ l.tail = prev
+ }
+
+ linker.SetNext(nil)
+ linker.SetPrev(nil)
+}
+
+// Entry is a default implementation of Linker. Users can add anonymous fields
+// of this type to their structs to make them automatically implement the
+// methods needed by List.
+//
+// +stateify savable
+type requestEntry struct {
+ next *Request
+ prev *Request
+}
+
+// Next returns the entry that follows e in the list.
+func (e *requestEntry) Next() *Request {
+ return e.next
+}
+
+// Prev returns the entry that precedes e in the list.
+func (e *requestEntry) Prev() *Request {
+ return e.prev
+}
+
+// SetNext assigns 'entry' as the entry that follows e in the list.
+func (e *requestEntry) SetNext(elem *Request) {
+ e.next = elem
+}
+
+// SetPrev assigns 'entry' as the entry that precedes e in the list.
+func (e *requestEntry) SetPrev(elem *Request) {
+ e.prev = elem
+}