summaryrefslogtreecommitdiffhomepage
path: root/pkg/lisafs
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/lisafs')
-rw-r--r--pkg/lisafs/channel.go190
-rw-r--r--pkg/lisafs/client.go377
-rw-r--r--pkg/lisafs/communicator.go80
-rw-r--r--pkg/lisafs/connection.go304
-rw-r--r--pkg/lisafs/control_fd_list.go221
-rw-r--r--pkg/lisafs/control_fd_refs.go140
-rw-r--r--pkg/lisafs/fd.go348
-rw-r--r--pkg/lisafs/handlers.go124
-rw-r--r--pkg/lisafs/lisafs.go18
-rw-r--r--pkg/lisafs/lisafs_abi_autogen_unsafe.go1534
-rw-r--r--pkg/lisafs/lisafs_state_autogen.go176
-rw-r--r--pkg/lisafs/message.go258
-rw-r--r--pkg/lisafs/open_fd_list.go221
-rw-r--r--pkg/lisafs/open_fd_refs.go140
-rw-r--r--pkg/lisafs/sample_message.go110
-rw-r--r--pkg/lisafs/server.go113
-rw-r--r--pkg/lisafs/sock.go208
17 files changed, 4562 insertions, 0 deletions
diff --git a/pkg/lisafs/channel.go b/pkg/lisafs/channel.go
new file mode 100644
index 000000000..301212e51
--- /dev/null
+++ b/pkg/lisafs/channel.go
@@ -0,0 +1,190 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import (
+ "math"
+ "runtime"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/fdchannel"
+ "gvisor.dev/gvisor/pkg/flipcall"
+ "gvisor.dev/gvisor/pkg/log"
+)
+
+var (
+ chanHeaderLen = uint32((*channelHeader)(nil).SizeBytes())
+)
+
+// maxChannels returns the number of channels a client can create.
+//
+// The server will reject channel creation requests beyond this (per client).
+// Note that we don't want the number of channels to be too large, because each
+// accounts for a large region of shared memory.
+// TODO(gvisor.dev/issue/6313): Tune the number of channels.
+func maxChannels() int {
+ maxChans := runtime.GOMAXPROCS(0)
+ if maxChans < 2 {
+ maxChans = 2
+ }
+ if maxChans > 4 {
+ maxChans = 4
+ }
+ return maxChans
+}
+
+// channel implements Communicator and represents the communication endpoint
+// for the client and server and is used to perform fast IPC. Apart from
+// communicating data, a channel is also capable of donating file descriptors.
+type channel struct {
+ fdTracker
+ dead bool
+ data flipcall.Endpoint
+ fdChan fdchannel.Endpoint
+}
+
+var _ Communicator = (*channel)(nil)
+
+// PayloadBuf implements Communicator.PayloadBuf.
+func (ch *channel) PayloadBuf(size uint32) []byte {
+ return ch.data.Data()[chanHeaderLen : chanHeaderLen+size]
+}
+
+// SndRcvMessage implements Communicator.SndRcvMessage.
+func (ch *channel) SndRcvMessage(m MID, payloadLen uint32, wantFDs uint8) (MID, uint32, error) {
+ // Write header. Requests can not donate FDs.
+ ch.marshalHdr(m, 0 /* numFDs */)
+
+ // One-shot communication. RPCs are expected to be quick rather than block.
+ rcvDataLen, err := ch.data.SendRecvFast(chanHeaderLen + payloadLen)
+ if err != nil {
+ // This channel is now unusable.
+ ch.dead = true
+ // Map the transport errors to EIO, but also log the real error.
+ log.Warningf("lisafs.sndRcvMessage: flipcall.Endpoint.SendRecv: %v", err)
+ return 0, 0, unix.EIO
+ }
+
+ return ch.rcvMsg(rcvDataLen)
+}
+
+func (ch *channel) shutdown() {
+ ch.data.Shutdown()
+}
+
+func (ch *channel) destroy() {
+ ch.dead = true
+ ch.fdChan.Destroy()
+ ch.data.Destroy()
+}
+
+// createChannel creates a server side channel. It returns a packet window
+// descriptor (for the data channel) and an open socket for the FD channel.
+func (c *Connection) createChannel(maxMessageSize uint32) (*channel, flipcall.PacketWindowDescriptor, int, error) {
+ c.channelsMu.Lock()
+ defer c.channelsMu.Unlock()
+ // If c.channels is nil, the connection has closed.
+ if c.channels == nil || len(c.channels) >= maxChannels() {
+ return nil, flipcall.PacketWindowDescriptor{}, -1, unix.ENOSYS
+ }
+ ch := &channel{}
+
+ // Set up data channel.
+ desc, err := c.channelAlloc.Allocate(flipcall.PacketHeaderBytes + int(chanHeaderLen+maxMessageSize))
+ if err != nil {
+ return nil, flipcall.PacketWindowDescriptor{}, -1, err
+ }
+ if err := ch.data.Init(flipcall.ServerSide, desc); err != nil {
+ return nil, flipcall.PacketWindowDescriptor{}, -1, err
+ }
+
+ // Set up FD channel.
+ fdSocks, err := fdchannel.NewConnectedSockets()
+ if err != nil {
+ ch.data.Destroy()
+ return nil, flipcall.PacketWindowDescriptor{}, -1, err
+ }
+ ch.fdChan.Init(fdSocks[0])
+ clientFDSock := fdSocks[1]
+
+ c.channels = append(c.channels, ch)
+ return ch, desc, clientFDSock, nil
+}
+
+// sendFDs sends as many FDs as it can. The failure to send an FD does not
+// cause an error and fail the entire RPC. FDs are considered supplementary
+// responses that are not critical to the RPC response itself. The failure to
+// send the (i)th FD will cause all the following FDs to not be sent as well
+// because the order in which FDs are donated is important.
+func (ch *channel) sendFDs(fds []int) uint8 {
+ numFDs := len(fds)
+ if numFDs == 0 {
+ return 0
+ }
+
+ if numFDs > math.MaxUint8 {
+ log.Warningf("dropping all FDs because too many FDs to donate: %v", numFDs)
+ return 0
+ }
+
+ for i, fd := range fds {
+ if err := ch.fdChan.SendFD(fd); err != nil {
+ log.Warningf("error occurred while sending (%d/%d)th FD on channel(%p): %v", i+1, numFDs, ch, err)
+ return uint8(i)
+ }
+ }
+ return uint8(numFDs)
+}
+
+// channelHeader is the header present in front of each message received on
+// flipcall endpoint when the protocol version being used is 1.
+//
+// +marshal
+type channelHeader struct {
+ message MID
+ numFDs uint8
+ _ uint8 // Need to make struct packed.
+}
+
+func (ch *channel) marshalHdr(m MID, numFDs uint8) {
+ header := &channelHeader{
+ message: m,
+ numFDs: numFDs,
+ }
+ header.MarshalUnsafe(ch.data.Data())
+}
+
+func (ch *channel) rcvMsg(dataLen uint32) (MID, uint32, error) {
+ if dataLen < chanHeaderLen {
+ log.Warningf("received data has size smaller than header length: %d", dataLen)
+ return 0, 0, unix.EIO
+ }
+
+ // Read header first.
+ var header channelHeader
+ header.UnmarshalUnsafe(ch.data.Data())
+
+ // Read any FDs.
+ for i := 0; i < int(header.numFDs); i++ {
+ fd, err := ch.fdChan.RecvFDNonblock()
+ if err != nil {
+ log.Warningf("expected %d FDs, received %d successfully, got err after that: %v", header.numFDs, i, err)
+ break
+ }
+ ch.TrackFD(fd)
+ }
+
+ return header.message, dataLen - chanHeaderLen, nil
+}
diff --git a/pkg/lisafs/client.go b/pkg/lisafs/client.go
new file mode 100644
index 000000000..c99f8c73d
--- /dev/null
+++ b/pkg/lisafs/client.go
@@ -0,0 +1,377 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import (
+ "fmt"
+ "math"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/cleanup"
+ "gvisor.dev/gvisor/pkg/flipcall"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/unet"
+)
+
+// Client helps manage a connection to the lisafs server and pass messages
+// efficiently. There is a 1:1 mapping between a Connection and a Client.
+type Client struct {
+ // sockComm is the main socket by which this connections is established.
+ // Communication over the socket is synchronized by sockMu.
+ sockMu sync.Mutex
+ sockComm *sockCommunicator
+
+ // channelsMu protects channels and availableChannels.
+ channelsMu sync.Mutex
+ // channels tracks all the channels.
+ channels []*channel
+ // availableChannels is a LIFO (stack) of channels available to be used.
+ availableChannels []*channel
+ // activeWg represents active channels.
+ activeWg sync.WaitGroup
+
+ // watchdogWg only holds the watchdog goroutine.
+ watchdogWg sync.WaitGroup
+
+ // supported caches information about which messages are supported. It is
+ // indexed by MID. An MID is supported if supported[MID] is true.
+ supported []bool
+
+ // maxMessageSize is the maximum payload length (in bytes) that can be sent.
+ // It is initialized on Mount and is immutable.
+ maxMessageSize uint32
+}
+
+// NewClient creates a new client for communication with the server. It mounts
+// the server and creates channels for fast IPC. NewClient takes ownership over
+// the passed socket. On success, it returns the initialized client along with
+// the root Inode.
+func NewClient(sock *unet.Socket, mountPath string) (*Client, *Inode, error) {
+ maxChans := maxChannels()
+ c := &Client{
+ sockComm: newSockComm(sock),
+ channels: make([]*channel, 0, maxChans),
+ availableChannels: make([]*channel, 0, maxChans),
+ maxMessageSize: 1 << 20, // 1 MB for now.
+ }
+
+ // Start a goroutine to check socket health. This goroutine is also
+ // responsible for client cleanup.
+ c.watchdogWg.Add(1)
+ go c.watchdog()
+
+ // Clean everything up if anything fails.
+ cu := cleanup.Make(func() {
+ c.Close()
+ })
+ defer cu.Clean()
+
+ // Mount the server first. Assume Mount is supported so that we can make the
+ // Mount RPC below.
+ c.supported = make([]bool, Mount+1)
+ c.supported[Mount] = true
+ mountMsg := MountReq{
+ MountPath: SizedString(mountPath),
+ }
+ var mountResp MountResp
+ if err := c.SndRcvMessage(Mount, uint32(mountMsg.SizeBytes()), mountMsg.MarshalBytes, mountResp.UnmarshalBytes, nil); err != nil {
+ return nil, nil, err
+ }
+
+ // Initialize client.
+ c.maxMessageSize = uint32(mountResp.MaxMessageSize)
+ var maxSuppMID MID
+ for _, suppMID := range mountResp.SupportedMs {
+ if suppMID > maxSuppMID {
+ maxSuppMID = suppMID
+ }
+ }
+ c.supported = make([]bool, maxSuppMID+1)
+ for _, suppMID := range mountResp.SupportedMs {
+ c.supported[suppMID] = true
+ }
+
+ // Create channels parallely so that channels can be used to create more
+ // channels and costly initialization like flipcall.Endpoint.Connect can
+ // proceed parallely.
+ var channelsWg sync.WaitGroup
+ channelErrs := make([]error, maxChans)
+ for i := 0; i < maxChans; i++ {
+ channelsWg.Add(1)
+ curChanID := i
+ go func() {
+ defer channelsWg.Done()
+ ch, err := c.createChannel()
+ if err != nil {
+ log.Warningf("channel creation failed: %v", err)
+ channelErrs[curChanID] = err
+ return
+ }
+ c.channelsMu.Lock()
+ c.channels = append(c.channels, ch)
+ c.availableChannels = append(c.availableChannels, ch)
+ c.channelsMu.Unlock()
+ }()
+ }
+ channelsWg.Wait()
+
+ for _, channelErr := range channelErrs {
+ // Return the first non-nil channel creation error.
+ if channelErr != nil {
+ return nil, nil, channelErr
+ }
+ }
+ cu.Release()
+
+ return c, &mountResp.Root, nil
+}
+
+func (c *Client) watchdog() {
+ defer c.watchdogWg.Done()
+
+ events := []unix.PollFd{
+ {
+ Fd: int32(c.sockComm.FD()),
+ Events: unix.POLLHUP | unix.POLLRDHUP,
+ },
+ }
+
+ // Wait for a shutdown event.
+ for {
+ n, err := unix.Ppoll(events, nil, nil)
+ if err == unix.EINTR || err == unix.EAGAIN {
+ continue
+ }
+ if err != nil {
+ log.Warningf("lisafs.Client.watch(): %v", err)
+ } else if n != 1 {
+ log.Warningf("lisafs.Client.watch(): got %d events, wanted 1", n)
+ }
+ break
+ }
+
+ // Shutdown all active channels and wait for them to complete.
+ c.shutdownActiveChans()
+ c.activeWg.Wait()
+
+ // Close all channels.
+ c.channelsMu.Lock()
+ for _, ch := range c.channels {
+ ch.destroy()
+ }
+ c.channelsMu.Unlock()
+
+ // Close main socket.
+ c.sockComm.destroy()
+}
+
+func (c *Client) shutdownActiveChans() {
+ c.channelsMu.Lock()
+ defer c.channelsMu.Unlock()
+
+ availableChans := make(map[*channel]bool)
+ for _, ch := range c.availableChannels {
+ availableChans[ch] = true
+ }
+ for _, ch := range c.channels {
+ // A channel that is not available is active.
+ if _, ok := availableChans[ch]; !ok {
+ log.Debugf("shutting down active channel@%p...", ch)
+ ch.shutdown()
+ }
+ }
+
+ // Prevent channels from becoming available and serving new requests.
+ c.availableChannels = nil
+}
+
+// Close shuts down the main socket and waits for the watchdog to clean up.
+func (c *Client) Close() {
+ // This shutdown has no effect if the watchdog has already fired and closed
+ // the main socket.
+ c.sockComm.shutdown()
+ c.watchdogWg.Wait()
+}
+
+func (c *Client) createChannel() (*channel, error) {
+ var chanResp ChannelResp
+ var fds [2]int
+ if err := c.SndRcvMessage(Channel, 0, NoopMarshal, chanResp.UnmarshalUnsafe, fds[:]); err != nil {
+ return nil, err
+ }
+ if fds[0] < 0 || fds[1] < 0 {
+ closeFDs(fds[:])
+ return nil, fmt.Errorf("insufficient FDs provided in Channel response: %v", fds)
+ }
+
+ // Lets create the channel.
+ defer closeFDs(fds[:1]) // The data FD is not needed after this.
+ desc := flipcall.PacketWindowDescriptor{
+ FD: fds[0],
+ Offset: chanResp.dataOffset,
+ Length: int(chanResp.dataLength),
+ }
+
+ ch := &channel{}
+ if err := ch.data.Init(flipcall.ClientSide, desc); err != nil {
+ closeFDs(fds[1:])
+ return nil, err
+ }
+ ch.fdChan.Init(fds[1]) // fdChan now owns this FD.
+
+ // Only a connected channel is usable.
+ if err := ch.data.Connect(); err != nil {
+ ch.destroy()
+ return nil, err
+ }
+ return ch, nil
+}
+
+// IsSupported returns true if this connection supports the passed message.
+func (c *Client) IsSupported(m MID) bool {
+ return int(m) < len(c.supported) && c.supported[m]
+}
+
+// SndRcvMessage invokes reqMarshal to marshal the request onto the payload
+// buffer, wakes up the server to process the request, waits for the response
+// and invokes respUnmarshal with the response payload. respFDs is populated
+// with the received FDs, extra fields are set to -1.
+//
+// Note that the function arguments intentionally accept marshal.Marshallable
+// functions like Marshal{Bytes/Unsafe} and Unmarshal{Bytes/Unsafe} instead of
+// directly accepting the marshal.Marshallable interface. Even though just
+// accepting marshal.Marshallable is cleaner, it leads to a heap allocation
+// (even if that interface variable itself does not escape). In other words,
+// implicit conversion to an interface leads to an allocation.
+//
+// Precondition: reqMarshal and respUnmarshal must be non-nil.
+func (c *Client) SndRcvMessage(m MID, payloadLen uint32, reqMarshal func(dst []byte), respUnmarshal func(src []byte), respFDs []int) error {
+ if !c.IsSupported(m) {
+ return unix.EOPNOTSUPP
+ }
+ if payloadLen > c.maxMessageSize {
+ log.Warningf("message %d has message size = %d which is larger than client.maxMessageSize = %d", m, payloadLen, c.maxMessageSize)
+ return unix.EIO
+ }
+ wantFDs := len(respFDs)
+ if wantFDs > math.MaxUint8 {
+ log.Warningf("want too many FDs: %d", wantFDs)
+ return unix.EINVAL
+ }
+
+ // Acquire a communicator.
+ comm := c.acquireCommunicator()
+ defer c.releaseCommunicator(comm)
+
+ // Marshal the request into comm's payload buffer and make the RPC.
+ reqMarshal(comm.PayloadBuf(payloadLen))
+ respM, respPayloadLen, err := comm.SndRcvMessage(m, payloadLen, uint8(wantFDs))
+
+ // Handle FD donation.
+ rcvFDs := comm.ReleaseFDs()
+ if numRcvFDs := len(rcvFDs); numRcvFDs+wantFDs > 0 {
+ // releasedFDs is memory owned by comm which can not be returned to caller.
+ // Copy it into the caller's buffer.
+ numFDCopied := copy(respFDs, rcvFDs)
+ if numFDCopied < numRcvFDs {
+ log.Warningf("%d unexpected FDs were donated by the server, wanted", numRcvFDs-numFDCopied, wantFDs)
+ closeFDs(rcvFDs[numFDCopied:])
+ }
+ if numFDCopied < wantFDs {
+ for i := numFDCopied; i < wantFDs; i++ {
+ respFDs[i] = -1
+ }
+ }
+ }
+
+ // Error cases.
+ if err != nil {
+ closeFDs(respFDs)
+ return err
+ }
+ if respM == Error {
+ closeFDs(respFDs)
+ var resp ErrorResp
+ resp.UnmarshalUnsafe(comm.PayloadBuf(respPayloadLen))
+ return unix.Errno(resp.errno)
+ }
+ if respM != m {
+ closeFDs(respFDs)
+ log.Warningf("sent %d message but got %d in response", m, respM)
+ return unix.EINVAL
+ }
+
+ // Success. The payload must be unmarshalled *before* comm is released.
+ respUnmarshal(comm.PayloadBuf(respPayloadLen))
+ return nil
+}
+
+// Postcondition: releaseCommunicator() must be called on the returned value.
+func (c *Client) acquireCommunicator() Communicator {
+ // Prefer using channel over socket because:
+ // - Channel uses a shared memory region for passing messages. IO from shared
+ // memory is faster and does not involve making a syscall.
+ // - No intermediate buffer allocation needed. With a channel, the message
+ // can be directly pasted into the shared memory region.
+ if ch := c.getChannel(); ch != nil {
+ return ch
+ }
+
+ c.sockMu.Lock()
+ return c.sockComm
+}
+
+// Precondition: comm must have been acquired via acquireCommunicator().
+func (c *Client) releaseCommunicator(comm Communicator) {
+ switch t := comm.(type) {
+ case *sockCommunicator:
+ c.sockMu.Unlock() // +checklocksforce: locked in acquireCommunicator().
+ case *channel:
+ c.releaseChannel(t)
+ default:
+ panic(fmt.Sprintf("unknown communicator type %T", t))
+ }
+}
+
+// getChannel pops a channel from the available channels stack. The caller must
+// release the channel after use.
+func (c *Client) getChannel() *channel {
+ c.channelsMu.Lock()
+ defer c.channelsMu.Unlock()
+ if len(c.availableChannels) == 0 {
+ return nil
+ }
+
+ idx := len(c.availableChannels) - 1
+ ch := c.availableChannels[idx]
+ c.availableChannels = c.availableChannels[:idx]
+ c.activeWg.Add(1)
+ return ch
+}
+
+// releaseChannel pushes the passed channel onto the available channel stack if
+// reinsert is true.
+func (c *Client) releaseChannel(ch *channel) {
+ c.channelsMu.Lock()
+ defer c.channelsMu.Unlock()
+
+ // If availableChannels is nil, then watchdog has fired and the client is
+ // shutting down. So don't make this channel available again.
+ if !ch.dead && c.availableChannels != nil {
+ c.availableChannels = append(c.availableChannels, ch)
+ }
+ c.activeWg.Done()
+}
diff --git a/pkg/lisafs/communicator.go b/pkg/lisafs/communicator.go
new file mode 100644
index 000000000..ec2035158
--- /dev/null
+++ b/pkg/lisafs/communicator.go
@@ -0,0 +1,80 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import "golang.org/x/sys/unix"
+
+// Communicator is a server side utility which represents exactly how the
+// server is communicating with the client.
+type Communicator interface {
+ // PayloadBuf returns a slice to the payload section of its internal buffer
+ // where the message can be marshalled. The handlers should use this to
+ // populate the payload buffer with the message.
+ //
+ // The payload buffer contents *should* be preserved across calls with
+ // different sizes. Note that this is not a guarantee, because a compromised
+ // owner of a "shared" payload buffer can tamper with its contents anytime,
+ // even when it's not its turn to do so.
+ PayloadBuf(size uint32) []byte
+
+ // SndRcvMessage sends message m. The caller must have populated PayloadBuf()
+ // with payloadLen bytes. The caller expects to receive wantFDs FDs.
+ // Any received FDs must be accessible via ReleaseFDs(). It returns the
+ // response message along with the response payload length.
+ SndRcvMessage(m MID, payloadLen uint32, wantFDs uint8) (MID, uint32, error)
+
+ // DonateFD makes fd non-blocking and starts tracking it. The next call to
+ // ReleaseFDs will include fd in the order it was added. Communicator takes
+ // ownership of fd. Server side should call this.
+ DonateFD(fd int) error
+
+ // Track starts tracking fd. The next call to ReleaseFDs will include fd in
+ // the order it was added. Communicator takes ownership of fd. Client side
+ // should use this for accumulating received FDs.
+ TrackFD(fd int)
+
+ // ReleaseFDs returns the accumulated FDs and stops tracking them. The
+ // ownership of the FDs is transferred to the caller.
+ ReleaseFDs() []int
+}
+
+// fdTracker is a partial implementation of Communicator. It can be embedded in
+// Communicator implementations to keep track of FD donations.
+type fdTracker struct {
+ fds []int
+}
+
+// DonateFD implements Communicator.DonateFD.
+func (d *fdTracker) DonateFD(fd int) error {
+ // Make sure the FD is non-blocking.
+ if err := unix.SetNonblock(fd, true); err != nil {
+ unix.Close(fd)
+ return err
+ }
+ d.TrackFD(fd)
+ return nil
+}
+
+// TrackFD implements Communicator.TrackFD.
+func (d *fdTracker) TrackFD(fd int) {
+ d.fds = append(d.fds, fd)
+}
+
+// ReleaseFDs implements Communicator.ReleaseFDs.
+func (d *fdTracker) ReleaseFDs() []int {
+ ret := d.fds
+ d.fds = d.fds[:0]
+ return ret
+}
diff --git a/pkg/lisafs/connection.go b/pkg/lisafs/connection.go
new file mode 100644
index 000000000..8dba4805f
--- /dev/null
+++ b/pkg/lisafs/connection.go
@@ -0,0 +1,304 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import (
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/flipcall"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/unet"
+)
+
+// Connection represents a connection between a mount point in the client and a
+// mount point in the server. It is owned by the server on which it was started
+// and facilitates communication with the client mount.
+//
+// Each connection is set up using a unix domain socket. One end is owned by
+// the server and the other end is owned by the client. The connection may
+// spawn additional comunicational channels for the same mount for increased
+// RPC concurrency.
+type Connection struct {
+ // server is the server on which this connection was created. It is immutably
+ // associated with it for its entire lifetime.
+ server *Server
+
+ // mounted is a one way flag indicating whether this connection has been
+ // mounted correctly and the server is initialized properly.
+ mounted bool
+
+ // readonly indicates if this connection is readonly. All write operations
+ // will fail with EROFS.
+ readonly bool
+
+ // sockComm is the main socket by which this connections is established.
+ sockComm *sockCommunicator
+
+ // channelsMu protects channels.
+ channelsMu sync.Mutex
+ // channels keeps track of all open channels.
+ channels []*channel
+
+ // activeWg represents active channels.
+ activeWg sync.WaitGroup
+
+ // reqGate counts requests that are still being handled.
+ reqGate sync.Gate
+
+ // channelAlloc is used to allocate memory for channels.
+ channelAlloc *flipcall.PacketWindowAllocator
+
+ fdsMu sync.RWMutex
+ // fds keeps tracks of open FDs on this server. It is protected by fdsMu.
+ fds map[FDID]genericFD
+ // nextFDID is the next available FDID. It is protected by fdsMu.
+ nextFDID FDID
+}
+
+// CreateConnection initializes a new connection - creating a server if
+// required. The connection must be started separately.
+func (s *Server) CreateConnection(sock *unet.Socket, readonly bool) (*Connection, error) {
+ c := &Connection{
+ sockComm: newSockComm(sock),
+ server: s,
+ readonly: readonly,
+ channels: make([]*channel, 0, maxChannels()),
+ fds: make(map[FDID]genericFD),
+ nextFDID: InvalidFDID + 1,
+ }
+
+ alloc, err := flipcall.NewPacketWindowAllocator()
+ if err != nil {
+ return nil, err
+ }
+ c.channelAlloc = alloc
+ return c, nil
+}
+
+// Server returns the associated server.
+func (c *Connection) Server() *Server {
+ return c.server
+}
+
+// ServerImpl returns the associated server implementation.
+func (c *Connection) ServerImpl() ServerImpl {
+ return c.server.impl
+}
+
+// Run defines the lifecycle of a connection.
+func (c *Connection) Run() {
+ defer c.close()
+
+ // Start handling requests on this connection.
+ for {
+ m, payloadLen, err := c.sockComm.rcvMsg(0 /* wantFDs */)
+ if err != nil {
+ log.Debugf("sock read failed, closing connection: %v", err)
+ return
+ }
+
+ respM, respPayloadLen, respFDs := c.handleMsg(c.sockComm, m, payloadLen)
+ err = c.sockComm.sndPrepopulatedMsg(respM, respPayloadLen, respFDs)
+ closeFDs(respFDs)
+ if err != nil {
+ log.Debugf("sock write failed, closing connection: %v", err)
+ return
+ }
+ }
+}
+
+// service starts servicing the passed channel until the channel is shutdown.
+// This is a blocking method and hence must be called in a separate goroutine.
+func (c *Connection) service(ch *channel) error {
+ rcvDataLen, err := ch.data.RecvFirst()
+ if err != nil {
+ return err
+ }
+ for rcvDataLen > 0 {
+ m, payloadLen, err := ch.rcvMsg(rcvDataLen)
+ if err != nil {
+ return err
+ }
+ respM, respPayloadLen, respFDs := c.handleMsg(ch, m, payloadLen)
+ numFDs := ch.sendFDs(respFDs)
+ closeFDs(respFDs)
+
+ ch.marshalHdr(respM, numFDs)
+ rcvDataLen, err = ch.data.SendRecv(respPayloadLen + chanHeaderLen)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (c *Connection) respondError(comm Communicator, err unix.Errno) (MID, uint32, []int) {
+ resp := &ErrorResp{errno: uint32(err)}
+ respLen := uint32(resp.SizeBytes())
+ resp.MarshalUnsafe(comm.PayloadBuf(respLen))
+ return Error, respLen, nil
+}
+
+func (c *Connection) handleMsg(comm Communicator, m MID, payloadLen uint32) (MID, uint32, []int) {
+ if !c.reqGate.Enter() {
+ // c.close() has been called; the connection is shutting down.
+ return c.respondError(comm, unix.ECONNRESET)
+ }
+ defer c.reqGate.Leave()
+
+ if !c.mounted && m != Mount {
+ log.Warningf("connection must first be mounted")
+ return c.respondError(comm, unix.EINVAL)
+ }
+
+ // Check if the message is supported for forward compatibility.
+ if int(m) >= len(c.server.handlers) || c.server.handlers[m] == nil {
+ log.Warningf("received request which is not supported by the server, MID = %d", m)
+ return c.respondError(comm, unix.EOPNOTSUPP)
+ }
+
+ // Try handling the request.
+ respPayloadLen, err := c.server.handlers[m](c, comm, payloadLen)
+ fds := comm.ReleaseFDs()
+ if err != nil {
+ closeFDs(fds)
+ return c.respondError(comm, p9.ExtractErrno(err))
+ }
+
+ return m, respPayloadLen, fds
+}
+
+func (c *Connection) close() {
+ // Wait for completion of all inflight requests. This is mostly so that if
+ // a request is stuck, the sandbox supervisor has the opportunity to kill
+ // us with SIGABRT to get a stack dump of the offending handler.
+ c.reqGate.Close()
+
+ // Shutdown and clean up channels.
+ c.channelsMu.Lock()
+ for _, ch := range c.channels {
+ ch.shutdown()
+ }
+ c.activeWg.Wait()
+ for _, ch := range c.channels {
+ ch.destroy()
+ }
+ // This is to prevent additional channels from being created.
+ c.channels = nil
+ c.channelsMu.Unlock()
+
+ // Free the channel memory.
+ if c.channelAlloc != nil {
+ c.channelAlloc.Destroy()
+ }
+
+ // Ensure the connection is closed.
+ c.sockComm.destroy()
+
+ // Cleanup all FDs.
+ c.fdsMu.Lock()
+ for fdid := range c.fds {
+ fd := c.removeFDLocked(fdid)
+ fd.DecRef(nil) // Drop the ref held by c.
+ }
+ c.fdsMu.Unlock()
+}
+
+// The caller gains a ref on the FD on success.
+func (c *Connection) lookupFD(id FDID) (genericFD, error) {
+ c.fdsMu.RLock()
+ defer c.fdsMu.RUnlock()
+
+ fd, ok := c.fds[id]
+ if !ok {
+ return nil, unix.EBADF
+ }
+ fd.IncRef()
+ return fd, nil
+}
+
+// LookupControlFD retrieves the control FD identified by id on this
+// connection. On success, the caller gains a ref on the FD.
+func (c *Connection) LookupControlFD(id FDID) (*ControlFD, error) {
+ fd, err := c.lookupFD(id)
+ if err != nil {
+ return nil, err
+ }
+
+ cfd, ok := fd.(*ControlFD)
+ if !ok {
+ fd.DecRef(nil)
+ return nil, unix.EINVAL
+ }
+ return cfd, nil
+}
+
+// LookupOpenFD retrieves the open FD identified by id on this
+// connection. On success, the caller gains a ref on the FD.
+func (c *Connection) LookupOpenFD(id FDID) (*OpenFD, error) {
+ fd, err := c.lookupFD(id)
+ if err != nil {
+ return nil, err
+ }
+
+ ofd, ok := fd.(*OpenFD)
+ if !ok {
+ fd.DecRef(nil)
+ return nil, unix.EINVAL
+ }
+ return ofd, nil
+}
+
+// insertFD inserts the passed fd into the internal datastructure to track FDs.
+// The caller must hold a ref on fd which is transferred to the connection.
+func (c *Connection) insertFD(fd genericFD) FDID {
+ c.fdsMu.Lock()
+ defer c.fdsMu.Unlock()
+
+ res := c.nextFDID
+ c.nextFDID++
+ if c.nextFDID < res {
+ panic("ran out of FDIDs")
+ }
+ c.fds[res] = fd
+ return res
+}
+
+// RemoveFD makes c stop tracking the passed FDID and drops its ref on it.
+func (c *Connection) RemoveFD(id FDID) {
+ c.fdsMu.Lock()
+ fd := c.removeFDLocked(id)
+ c.fdsMu.Unlock()
+ if fd != nil {
+ // Drop the ref held by c. This can take arbitrarily long. So do not hold
+ // c.fdsMu while calling it.
+ fd.DecRef(nil)
+ }
+}
+
+// removeFDLocked makes c stop tracking the passed FDID. Note that the caller
+// must drop ref on the returned fd (preferably without holding c.fdsMu).
+//
+// Precondition: c.fdsMu is locked.
+func (c *Connection) removeFDLocked(id FDID) genericFD {
+ fd := c.fds[id]
+ if fd == nil {
+ log.Warningf("removeFDLocked called on non-existent FDID %d", id)
+ return nil
+ }
+ delete(c.fds, id)
+ return fd
+}
diff --git a/pkg/lisafs/control_fd_list.go b/pkg/lisafs/control_fd_list.go
new file mode 100644
index 000000000..684d9c265
--- /dev/null
+++ b/pkg/lisafs/control_fd_list.go
@@ -0,0 +1,221 @@
+package lisafs
+
+// ElementMapper provides an identity mapping by default.
+//
+// This can be replaced to provide a struct that maps elements to linker
+// objects, if they are not the same. An ElementMapper is not typically
+// required if: Linker is left as is, Element is left as is, or Linker and
+// Element are the same type.
+type controlFDElementMapper struct{}
+
+// linkerFor maps an Element to a Linker.
+//
+// This default implementation should be inlined.
+//
+//go:nosplit
+func (controlFDElementMapper) linkerFor(elem *ControlFD) *ControlFD { return elem }
+
+// List is an intrusive list. Entries can be added to or removed from the list
+// in O(1) time and with no additional memory allocations.
+//
+// The zero value for List is an empty list ready to use.
+//
+// To iterate over a list (where l is a List):
+// for e := l.Front(); e != nil; e = e.Next() {
+// // do something with e.
+// }
+//
+// +stateify savable
+type controlFDList struct {
+ head *ControlFD
+ tail *ControlFD
+}
+
+// Reset resets list l to the empty state.
+func (l *controlFDList) Reset() {
+ l.head = nil
+ l.tail = nil
+}
+
+// Empty returns true iff the list is empty.
+//
+//go:nosplit
+func (l *controlFDList) Empty() bool {
+ return l.head == nil
+}
+
+// Front returns the first element of list l or nil.
+//
+//go:nosplit
+func (l *controlFDList) Front() *ControlFD {
+ return l.head
+}
+
+// Back returns the last element of list l or nil.
+//
+//go:nosplit
+func (l *controlFDList) Back() *ControlFD {
+ return l.tail
+}
+
+// Len returns the number of elements in the list.
+//
+// NOTE: This is an O(n) operation.
+//
+//go:nosplit
+func (l *controlFDList) Len() (count int) {
+ for e := l.Front(); e != nil; e = (controlFDElementMapper{}.linkerFor(e)).Next() {
+ count++
+ }
+ return count
+}
+
+// PushFront inserts the element e at the front of list l.
+//
+//go:nosplit
+func (l *controlFDList) PushFront(e *ControlFD) {
+ linker := controlFDElementMapper{}.linkerFor(e)
+ linker.SetNext(l.head)
+ linker.SetPrev(nil)
+ if l.head != nil {
+ controlFDElementMapper{}.linkerFor(l.head).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+
+ l.head = e
+}
+
+// PushBack inserts the element e at the back of list l.
+//
+//go:nosplit
+func (l *controlFDList) PushBack(e *ControlFD) {
+ linker := controlFDElementMapper{}.linkerFor(e)
+ linker.SetNext(nil)
+ linker.SetPrev(l.tail)
+ if l.tail != nil {
+ controlFDElementMapper{}.linkerFor(l.tail).SetNext(e)
+ } else {
+ l.head = e
+ }
+
+ l.tail = e
+}
+
+// PushBackList inserts list m at the end of list l, emptying m.
+//
+//go:nosplit
+func (l *controlFDList) PushBackList(m *controlFDList) {
+ if l.head == nil {
+ l.head = m.head
+ l.tail = m.tail
+ } else if m.head != nil {
+ controlFDElementMapper{}.linkerFor(l.tail).SetNext(m.head)
+ controlFDElementMapper{}.linkerFor(m.head).SetPrev(l.tail)
+
+ l.tail = m.tail
+ }
+ m.head = nil
+ m.tail = nil
+}
+
+// InsertAfter inserts e after b.
+//
+//go:nosplit
+func (l *controlFDList) InsertAfter(b, e *ControlFD) {
+ bLinker := controlFDElementMapper{}.linkerFor(b)
+ eLinker := controlFDElementMapper{}.linkerFor(e)
+
+ a := bLinker.Next()
+
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ bLinker.SetNext(e)
+
+ if a != nil {
+ controlFDElementMapper{}.linkerFor(a).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+}
+
+// InsertBefore inserts e before a.
+//
+//go:nosplit
+func (l *controlFDList) InsertBefore(a, e *ControlFD) {
+ aLinker := controlFDElementMapper{}.linkerFor(a)
+ eLinker := controlFDElementMapper{}.linkerFor(e)
+
+ b := aLinker.Prev()
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ aLinker.SetPrev(e)
+
+ if b != nil {
+ controlFDElementMapper{}.linkerFor(b).SetNext(e)
+ } else {
+ l.head = e
+ }
+}
+
+// Remove removes e from l.
+//
+//go:nosplit
+func (l *controlFDList) Remove(e *ControlFD) {
+ linker := controlFDElementMapper{}.linkerFor(e)
+ prev := linker.Prev()
+ next := linker.Next()
+
+ if prev != nil {
+ controlFDElementMapper{}.linkerFor(prev).SetNext(next)
+ } else if l.head == e {
+ l.head = next
+ }
+
+ if next != nil {
+ controlFDElementMapper{}.linkerFor(next).SetPrev(prev)
+ } else if l.tail == e {
+ l.tail = prev
+ }
+
+ linker.SetNext(nil)
+ linker.SetPrev(nil)
+}
+
+// Entry is a default implementation of Linker. Users can add anonymous fields
+// of this type to their structs to make them automatically implement the
+// methods needed by List.
+//
+// +stateify savable
+type controlFDEntry struct {
+ next *ControlFD
+ prev *ControlFD
+}
+
+// Next returns the entry that follows e in the list.
+//
+//go:nosplit
+func (e *controlFDEntry) Next() *ControlFD {
+ return e.next
+}
+
+// Prev returns the entry that precedes e in the list.
+//
+//go:nosplit
+func (e *controlFDEntry) Prev() *ControlFD {
+ return e.prev
+}
+
+// SetNext assigns 'entry' as the entry that follows e in the list.
+//
+//go:nosplit
+func (e *controlFDEntry) SetNext(elem *ControlFD) {
+ e.next = elem
+}
+
+// SetPrev assigns 'entry' as the entry that precedes e in the list.
+//
+//go:nosplit
+func (e *controlFDEntry) SetPrev(elem *ControlFD) {
+ e.prev = elem
+}
diff --git a/pkg/lisafs/control_fd_refs.go b/pkg/lisafs/control_fd_refs.go
new file mode 100644
index 000000000..cc24833f2
--- /dev/null
+++ b/pkg/lisafs/control_fd_refs.go
@@ -0,0 +1,140 @@
+package lisafs
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/refsvfs2"
+)
+
+// enableLogging indicates whether reference-related events should be logged (with
+// stack traces). This is false by default and should only be set to true for
+// debugging purposes, as it can generate an extremely large amount of output
+// and drastically degrade performance.
+const controlFDenableLogging = false
+
+// obj is used to customize logging. Note that we use a pointer to T so that
+// we do not copy the entire object when passed as a format parameter.
+var controlFDobj *ControlFD
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// NOTE: Do not introduce additional fields to the Refs struct. It is used by
+// many filesystem objects, and we want to keep it as small as possible (i.e.,
+// the same size as using an int64 directly) to avoid taking up extra cache
+// space. In general, this template should not be extended at the cost of
+// performance. If it does not offer enough flexibility for a particular object
+// (example: b/187877947), we should implement the RefCounter/CheckedObject
+// interfaces manually.
+//
+// +stateify savable
+type controlFDRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+// InitRefs initializes r with one reference and, if enabled, activates leak
+// checking.
+func (r *controlFDRefs) InitRefs() {
+ atomic.StoreInt64(&r.refCount, 1)
+ refsvfs2.Register(r)
+}
+
+// RefType implements refsvfs2.CheckedObject.RefType.
+func (r *controlFDRefs) RefType() string {
+ return fmt.Sprintf("%T", controlFDobj)[1:]
+}
+
+// LeakMessage implements refsvfs2.CheckedObject.LeakMessage.
+func (r *controlFDRefs) LeakMessage() string {
+ return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs())
+}
+
+// LogRefs implements refsvfs2.CheckedObject.LogRefs.
+func (r *controlFDRefs) LogRefs() bool {
+ return controlFDenableLogging
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *controlFDRefs) ReadRefs() int64 {
+ return atomic.LoadInt64(&r.refCount)
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *controlFDRefs) IncRef() {
+ v := atomic.AddInt64(&r.refCount, 1)
+ if controlFDenableLogging {
+ refsvfs2.LogIncRef(r, v)
+ }
+ if v <= 1 {
+ panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType()))
+ }
+}
+
+// TryIncRef implements refs.TryRefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *controlFDRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ v := atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ if controlFDenableLogging {
+ refsvfs2.LogTryIncRef(r, v)
+ }
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *controlFDRefs) DecRef(destroy func()) {
+ v := atomic.AddInt64(&r.refCount, -1)
+ if controlFDenableLogging {
+ refsvfs2.LogDecRef(r, v)
+ }
+ switch {
+ case v < 0:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType()))
+
+ case v == 0:
+ refsvfs2.Unregister(r)
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
+
+func (r *controlFDRefs) afterLoad() {
+ if r.ReadRefs() > 0 {
+ refsvfs2.Register(r)
+ }
+}
diff --git a/pkg/lisafs/fd.go b/pkg/lisafs/fd.go
new file mode 100644
index 000000000..9dd8ba384
--- /dev/null
+++ b/pkg/lisafs/fd.go
@@ -0,0 +1,348 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import (
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/refsvfs2"
+ "gvisor.dev/gvisor/pkg/sync"
+)
+
+// FDID (file descriptor identifier) is used to identify FDs on a connection.
+// Each connection has its own FDID namespace.
+//
+// +marshal slice:FDIDSlice
+type FDID uint32
+
+// InvalidFDID represents an invalid FDID.
+const InvalidFDID FDID = 0
+
+// Ok returns true if f is a valid FDID.
+func (f FDID) Ok() bool {
+ return f != InvalidFDID
+}
+
+// genericFD can represent a ControlFD or OpenFD.
+type genericFD interface {
+ refsvfs2.RefCounter
+}
+
+// A ControlFD is the gateway to the backing filesystem tree node. It is an
+// unusual concept. This exists to provide a safe way to do path-based
+// operations on the file. It performs operations that can modify the
+// filesystem tree and synchronizes these operations. See ControlFDImpl for
+// supported operations.
+//
+// It is not an inode, because multiple control FDs are allowed to exist on the
+// same file. It is not a file descriptor because it is not tied to any access
+// mode, i.e. a control FD can change its access mode based on the operation
+// being performed.
+//
+// Reference Model:
+// * When a control FD is created, the connection takes a ref on it which
+// represents the client's ref on the FD.
+// * The client can drop its ref via the Close RPC which will in turn make the
+// connection drop its ref.
+// * Each control FD holds a ref on its parent for its entire life time.
+type ControlFD struct {
+ controlFDRefs
+ controlFDEntry
+
+ // parent is the parent directory FD containing the file this FD represents.
+ // A ControlFD holds a ref on parent for its entire lifetime. If this FD
+ // represents the root, then parent is nil. parent may be a control FD from
+ // another connection (another mount point). parent is protected by the
+ // backing server's rename mutex.
+ parent *ControlFD
+
+ // name is the file path's last component name. If this FD represents the
+ // root directory, then name is the mount path. name is protected by the
+ // backing server's rename mutex.
+ name string
+
+ // children is a linked list of all children control FDs. As per reference
+ // model, all children hold a ref on this FD.
+ // children is protected by childrenMu and server's rename mutex. To have
+ // mutual exclusion, it is sufficient to:
+ // * Hold rename mutex for reading and lock childrenMu. OR
+ // * Or hold rename mutex for writing.
+ childrenMu sync.Mutex
+ children controlFDList
+
+ // openFDs is a linked list of all FDs opened on this FD. As per reference
+ // model, all open FDs hold a ref on this FD.
+ openFDsMu sync.RWMutex
+ openFDs openFDList
+
+ // All the following fields are immutable.
+
+ // id is the unique FD identifier which identifies this FD on its connection.
+ id FDID
+
+ // conn is the backing connection owning this FD.
+ conn *Connection
+
+ // ftype is the file type of the backing inode. ftype.FileType() == ftype.
+ ftype linux.FileMode
+
+ // impl is the control FD implementation which embeds this struct. It
+ // contains all the implementation specific details.
+ impl ControlFDImpl
+}
+
+var _ genericFD = (*ControlFD)(nil)
+
+// DecRef implements refsvfs2.RefCounter.DecRef. Note that the context
+// parameter should never be used. It exists solely to comply with the
+// refsvfs2.RefCounter interface.
+func (fd *ControlFD) DecRef(context.Context) {
+ fd.controlFDRefs.DecRef(func() {
+ if fd.parent != nil {
+ fd.conn.server.RenameMu.RLock()
+ fd.parent.childrenMu.Lock()
+ fd.parent.children.Remove(fd)
+ fd.parent.childrenMu.Unlock()
+ fd.conn.server.RenameMu.RUnlock()
+ fd.parent.DecRef(nil) // Drop the ref on the parent.
+ }
+ fd.impl.Close(fd.conn)
+ })
+}
+
+// DecRefLocked is the same as DecRef except the added precondition.
+//
+// Precondition: server's rename mutex must be at least read locked.
+func (fd *ControlFD) DecRefLocked() {
+ fd.controlFDRefs.DecRef(func() {
+ fd.clearParentLocked()
+ fd.impl.Close(fd.conn)
+ })
+}
+
+// Precondition: server's rename mutex must be at least read locked.
+func (fd *ControlFD) clearParentLocked() {
+ if fd.parent == nil {
+ return
+ }
+ fd.parent.childrenMu.Lock()
+ fd.parent.children.Remove(fd)
+ fd.parent.childrenMu.Unlock()
+ fd.parent.DecRefLocked() // Drop the ref on the parent.
+}
+
+// Init must be called before first use of fd. It inserts fd into the
+// filesystem tree.
+//
+// Precondition: server's rename mutex must be at least read locked.
+func (fd *ControlFD) Init(c *Connection, parent *ControlFD, name string, mode linux.FileMode, impl ControlFDImpl) {
+ // Initialize fd with 1 ref which is transferred to c via c.insertFD().
+ fd.controlFDRefs.InitRefs()
+ fd.conn = c
+ fd.id = c.insertFD(fd)
+ fd.name = name
+ fd.ftype = mode.FileType()
+ fd.impl = impl
+ fd.setParentLocked(parent)
+}
+
+// Precondition: server's rename mutex must be at least read locked.
+func (fd *ControlFD) setParentLocked(parent *ControlFD) {
+ fd.parent = parent
+ if parent != nil {
+ parent.IncRef() // Hold a ref on parent.
+ parent.childrenMu.Lock()
+ parent.children.PushBack(fd)
+ parent.childrenMu.Unlock()
+ }
+}
+
+// FileType returns the file mode only containing the file type bits.
+func (fd *ControlFD) FileType() linux.FileMode {
+ return fd.ftype
+}
+
+// IsDir indicates whether fd represents a directory.
+func (fd *ControlFD) IsDir() bool {
+ return fd.ftype == unix.S_IFDIR
+}
+
+// IsRegular indicates whether fd represents a regular file.
+func (fd *ControlFD) IsRegular() bool {
+ return fd.ftype == unix.S_IFREG
+}
+
+// IsSymlink indicates whether fd represents a symbolic link.
+func (fd *ControlFD) IsSymlink() bool {
+ return fd.ftype == unix.S_IFLNK
+}
+
+// IsSocket indicates whether fd represents a socket.
+func (fd *ControlFD) IsSocket() bool {
+ return fd.ftype == unix.S_IFSOCK
+}
+
+// NameLocked returns the backing file's last component name.
+//
+// Precondition: server's rename mutex must be at least read locked.
+func (fd *ControlFD) NameLocked() string {
+ return fd.name
+}
+
+// ParentLocked returns the parent control FD. Note that parent might be a
+// control FD from another connection on this server. So its ID must not
+// returned on this connection because FDIDs are local to their connection.
+//
+// Precondition: server's rename mutex must be at least read locked.
+func (fd *ControlFD) ParentLocked() ControlFDImpl {
+ if fd.parent == nil {
+ return nil
+ }
+ return fd.parent.impl
+}
+
+// ID returns fd's ID.
+func (fd *ControlFD) ID() FDID {
+ return fd.id
+}
+
+// FilePath returns the absolute path of the file fd was opened on. This is
+// expensive and must not be called on hot paths. FilePath acquires the rename
+// mutex for reading so callers should not be holding it.
+func (fd *ControlFD) FilePath() string {
+ // Lock the rename mutex for reading to ensure that the filesystem tree is not
+ // changed while we traverse it upwards.
+ fd.conn.server.RenameMu.RLock()
+ defer fd.conn.server.RenameMu.RUnlock()
+ return fd.FilePathLocked()
+}
+
+// FilePathLocked is the same as FilePath with the additonal precondition.
+//
+// Precondition: server's rename mutex must be at least read locked.
+func (fd *ControlFD) FilePathLocked() string {
+ // Walk upwards and prepend name to res.
+ var res fspath.Builder
+ for fd != nil {
+ res.PrependComponent(fd.name)
+ fd = fd.parent
+ }
+ return res.String()
+}
+
+// ForEachOpenFD executes fn on each FD opened on fd.
+func (fd *ControlFD) ForEachOpenFD(fn func(ofd OpenFDImpl)) {
+ fd.openFDsMu.RLock()
+ defer fd.openFDsMu.RUnlock()
+ for ofd := fd.openFDs.Front(); ofd != nil; ofd = ofd.Next() {
+ fn(ofd.impl)
+ }
+}
+
+// OpenFD represents an open file descriptor on the protocol. It resonates
+// closely with a Linux file descriptor. Its operations are limited to the
+// file. Its operations are not allowed to modify or traverse the filesystem
+// tree. See OpenFDImpl for the supported operations.
+//
+// Reference Model:
+// * An OpenFD takes a reference on the control FD it was opened on.
+type OpenFD struct {
+ openFDRefs
+ openFDEntry
+
+ // All the following fields are immutable.
+
+ // controlFD is the ControlFD on which this FD was opened. OpenFD holds a ref
+ // on controlFD for its entire lifetime.
+ controlFD *ControlFD
+
+ // id is the unique FD identifier which identifies this FD on its connection.
+ id FDID
+
+ // Access mode for this FD.
+ readable bool
+ writable bool
+
+ // impl is the open FD implementation which embeds this struct. It
+ // contains all the implementation specific details.
+ impl OpenFDImpl
+}
+
+var _ genericFD = (*OpenFD)(nil)
+
+// ID returns fd's ID.
+func (fd *OpenFD) ID() FDID {
+ return fd.id
+}
+
+// ControlFD returns the control FD on which this FD was opened.
+func (fd *OpenFD) ControlFD() ControlFDImpl {
+ return fd.controlFD.impl
+}
+
+// DecRef implements refsvfs2.RefCounter.DecRef. Note that the context
+// parameter should never be used. It exists solely to comply with the
+// refsvfs2.RefCounter interface.
+func (fd *OpenFD) DecRef(context.Context) {
+ fd.openFDRefs.DecRef(func() {
+ fd.controlFD.openFDsMu.Lock()
+ fd.controlFD.openFDs.Remove(fd)
+ fd.controlFD.openFDsMu.Unlock()
+ fd.controlFD.DecRef(nil) // Drop the ref on the control FD.
+ fd.impl.Close(fd.controlFD.conn)
+ })
+}
+
+// Init must be called before first use of fd.
+func (fd *OpenFD) Init(cfd *ControlFD, flags uint32, impl OpenFDImpl) {
+ // Initialize fd with 1 ref which is transferred to c via c.insertFD().
+ fd.openFDRefs.InitRefs()
+ fd.controlFD = cfd
+ fd.id = cfd.conn.insertFD(fd)
+ accessMode := flags & unix.O_ACCMODE
+ fd.readable = accessMode == unix.O_RDONLY || accessMode == unix.O_RDWR
+ fd.writable = accessMode == unix.O_WRONLY || accessMode == unix.O_RDWR
+ fd.impl = impl
+ cfd.IncRef() // Holds a ref on cfd for its lifetime.
+ cfd.openFDsMu.Lock()
+ cfd.openFDs.PushBack(fd)
+ cfd.openFDsMu.Unlock()
+}
+
+// ControlFDImpl contains implementation details for a ControlFD.
+// Implementations of ControlFDImpl should contain their associated ControlFD
+// by value as their first field.
+//
+// The operations that perform path traversal or any modification to the
+// filesystem tree must synchronize those modifications with the server's
+// rename mutex.
+type ControlFDImpl interface {
+ FD() *ControlFD
+ Close(c *Connection)
+}
+
+// OpenFDImpl contains implementation details for a OpenFD. Implementations of
+// OpenFDImpl should contain their associated OpenFD by value as their first
+// field.
+//
+// Since these operations do not perform any path traversal or any modification
+// to the filesystem tree, there is no need to synchronize with rename
+// operations.
+type OpenFDImpl interface {
+ FD() *OpenFD
+ Close(c *Connection)
+}
diff --git a/pkg/lisafs/handlers.go b/pkg/lisafs/handlers.go
new file mode 100644
index 000000000..9b8d8164a
--- /dev/null
+++ b/pkg/lisafs/handlers.go
@@ -0,0 +1,124 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import (
+ "path"
+ "path/filepath"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/flipcall"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/marshal/primitive"
+)
+
+// RPCHandler defines a handler that is invoked when the associated message is
+// received. The handler is responsible for:
+//
+// * Unmarshalling the request from the passed payload and interpreting it.
+// * Marshalling the response into the communicator's payload buffer.
+// * Return the number of payload bytes written.
+// * Donate any FDs (if needed) to comm which will in turn donate it to client.
+type RPCHandler func(c *Connection, comm Communicator, payloadLen uint32) (uint32, error)
+
+var handlers = [...]RPCHandler{
+ Error: ErrorHandler,
+ Mount: MountHandler,
+ Channel: ChannelHandler,
+}
+
+// ErrorHandler handles Error message.
+func ErrorHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
+ // Client should never send Error.
+ return 0, unix.EINVAL
+}
+
+// MountHandler handles the Mount RPC. Note that there can not be concurrent
+// executions of MountHandler on a connection because the connection enforces
+// that Mount is the first message on the connection. Only after the connection
+// has been successfully mounted can other channels be created.
+func MountHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
+ var req MountReq
+ req.UnmarshalBytes(comm.PayloadBuf(payloadLen))
+
+ mountPath := path.Clean(string(req.MountPath))
+ if !filepath.IsAbs(mountPath) {
+ log.Warningf("mountPath %q is not absolute", mountPath)
+ return 0, unix.EINVAL
+ }
+
+ if c.mounted {
+ log.Warningf("connection has already been mounted at %q", mountPath)
+ return 0, unix.EBUSY
+ }
+
+ rootFD, rootIno, err := c.ServerImpl().Mount(c, mountPath)
+ if err != nil {
+ return 0, err
+ }
+
+ c.server.addMountPoint(rootFD.FD())
+ c.mounted = true
+ resp := MountResp{
+ Root: rootIno,
+ SupportedMs: c.ServerImpl().SupportedMessages(),
+ MaxMessageSize: primitive.Uint32(c.ServerImpl().MaxMessageSize()),
+ }
+ respPayloadLen := uint32(resp.SizeBytes())
+ resp.MarshalBytes(comm.PayloadBuf(respPayloadLen))
+ return respPayloadLen, nil
+}
+
+// ChannelHandler handles the Channel RPC.
+func ChannelHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) {
+ ch, desc, fdSock, err := c.createChannel(c.ServerImpl().MaxMessageSize())
+ if err != nil {
+ return 0, err
+ }
+
+ // Start servicing the channel in a separate goroutine.
+ c.activeWg.Add(1)
+ go func() {
+ if err := c.service(ch); err != nil {
+ // Don't log shutdown error which is expected during server shutdown.
+ if _, ok := err.(flipcall.ShutdownError); !ok {
+ log.Warningf("lisafs.Connection.service(channel = @%p): %v", ch, err)
+ }
+ }
+ c.activeWg.Done()
+ }()
+
+ clientDataFD, err := unix.Dup(desc.FD)
+ if err != nil {
+ unix.Close(fdSock)
+ ch.shutdown()
+ return 0, err
+ }
+
+ // Respond to client with successful channel creation message.
+ if err := comm.DonateFD(clientDataFD); err != nil {
+ return 0, err
+ }
+ if err := comm.DonateFD(fdSock); err != nil {
+ return 0, err
+ }
+ resp := ChannelResp{
+ dataOffset: desc.Offset,
+ dataLength: uint64(desc.Length),
+ }
+ respLen := uint32(resp.SizeBytes())
+ resp.MarshalUnsafe(comm.PayloadBuf(respLen))
+ return respLen, nil
+}
diff --git a/pkg/lisafs/lisafs.go b/pkg/lisafs/lisafs.go
new file mode 100644
index 000000000..4d8e956ab
--- /dev/null
+++ b/pkg/lisafs/lisafs.go
@@ -0,0 +1,18 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package lisafs (LInux SAndbox FileSystem) defines the protocol for
+// filesystem RPCs between an untrusted Sandbox (client) and a trusted
+// filesystem server.
+package lisafs
diff --git a/pkg/lisafs/lisafs_abi_autogen_unsafe.go b/pkg/lisafs/lisafs_abi_autogen_unsafe.go
new file mode 100644
index 000000000..ece422578
--- /dev/null
+++ b/pkg/lisafs/lisafs_abi_autogen_unsafe.go
@@ -0,0 +1,1534 @@
+// Automatically generated marshal implementation. See tools/go_marshal.
+
+package lisafs
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/gohacks"
+ "gvisor.dev/gvisor/pkg/hostarch"
+ "gvisor.dev/gvisor/pkg/marshal"
+ "io"
+ "reflect"
+ "runtime"
+ "unsafe"
+)
+
+// Marshallable types used by this file.
+var _ marshal.Marshallable = (*ChannelResp)(nil)
+var _ marshal.Marshallable = (*ErrorResp)(nil)
+var _ marshal.Marshallable = (*FDID)(nil)
+var _ marshal.Marshallable = (*GID)(nil)
+var _ marshal.Marshallable = (*Inode)(nil)
+var _ marshal.Marshallable = (*MID)(nil)
+var _ marshal.Marshallable = (*MsgDynamic)(nil)
+var _ marshal.Marshallable = (*MsgSimple)(nil)
+var _ marshal.Marshallable = (*P9Version)(nil)
+var _ marshal.Marshallable = (*UID)(nil)
+var _ marshal.Marshallable = (*channelHeader)(nil)
+var _ marshal.Marshallable = (*linux.Statx)(nil)
+var _ marshal.Marshallable = (*sockHeader)(nil)
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (c *channelHeader) SizeBytes() int {
+ return 2 +
+ (*MID)(nil).SizeBytes()
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (c *channelHeader) MarshalBytes(dst []byte) {
+ c.message.MarshalBytes(dst[:c.message.SizeBytes()])
+ dst = dst[c.message.SizeBytes():]
+ dst[0] = byte(c.numFDs)
+ dst = dst[1:]
+ // Padding: dst[:sizeof(uint8)] ~= uint8(0)
+ dst = dst[1:]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (c *channelHeader) UnmarshalBytes(src []byte) {
+ c.message.UnmarshalBytes(src[:c.message.SizeBytes()])
+ src = src[c.message.SizeBytes():]
+ c.numFDs = uint8(src[0])
+ src = src[1:]
+ // Padding: var _ uint8 ~= src[:sizeof(uint8)]
+ src = src[1:]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (c *channelHeader) Packed() bool {
+ return c.message.Packed()
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (c *channelHeader) MarshalUnsafe(dst []byte) {
+ if c.message.Packed() {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(c), uintptr(c.SizeBytes()))
+ } else {
+ // Type channelHeader doesn't have a packed layout in memory, fallback to MarshalBytes.
+ c.MarshalBytes(dst)
+ }
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (c *channelHeader) UnmarshalUnsafe(src []byte) {
+ if c.message.Packed() {
+ gohacks.Memmove(unsafe.Pointer(c), unsafe.Pointer(&src[0]), uintptr(c.SizeBytes()))
+ } else {
+ // Type channelHeader doesn't have a packed layout in memory, fallback to UnmarshalBytes.
+ c.UnmarshalBytes(src)
+ }
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (c *channelHeader) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ if !c.message.Packed() {
+ // Type channelHeader doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := cc.CopyScratchBuffer(c.SizeBytes()) // escapes: okay.
+ c.MarshalBytes(buf) // escapes: fallback.
+ return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c)))
+ hdr.Len = c.SizeBytes()
+ hdr.Cap = c.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that c
+ // must live until the use above.
+ runtime.KeepAlive(c) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (c *channelHeader) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return c.CopyOutN(cc, addr, c.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (c *channelHeader) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ if !c.message.Packed() {
+ // Type channelHeader doesn't have a packed layout in memory, fall back to UnmarshalBytes.
+ buf := cc.CopyScratchBuffer(c.SizeBytes()) // escapes: okay.
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Unmarshal unconditionally. If we had a short copy-in, this results in a
+ // partially unmarshalled struct.
+ c.UnmarshalBytes(buf) // escapes: fallback.
+ return length, err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c)))
+ hdr.Len = c.SizeBytes()
+ hdr.Cap = c.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that c
+ // must live until the use above.
+ runtime.KeepAlive(c) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (c *channelHeader) WriteTo(writer io.Writer) (int64, error) {
+ if !c.message.Packed() {
+ // Type channelHeader doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := make([]byte, c.SizeBytes())
+ c.MarshalBytes(buf)
+ length, err := writer.Write(buf)
+ return int64(length), err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c)))
+ hdr.Len = c.SizeBytes()
+ hdr.Cap = c.SizeBytes()
+
+ length, err := writer.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that c
+ // must live until the use above.
+ runtime.KeepAlive(c) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+//go:nosplit
+func (f *FDID) SizeBytes() int {
+ return 4
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (f *FDID) MarshalBytes(dst []byte) {
+ hostarch.ByteOrder.PutUint32(dst[:4], uint32(*f))
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (f *FDID) UnmarshalBytes(src []byte) {
+ *f = FDID(uint32(hostarch.ByteOrder.Uint32(src[:4])))
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (f *FDID) Packed() bool {
+ // Scalar newtypes are always packed.
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (f *FDID) MarshalUnsafe(dst []byte) {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(f), uintptr(f.SizeBytes()))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (f *FDID) UnmarshalUnsafe(src []byte) {
+ gohacks.Memmove(unsafe.Pointer(f), unsafe.Pointer(&src[0]), uintptr(f.SizeBytes()))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (f *FDID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (f *FDID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return f.CopyOutN(cc, addr, f.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (f *FDID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (f *FDID) WriteTo(w io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f)))
+ hdr.Len = f.SizeBytes()
+ hdr.Cap = f.SizeBytes()
+
+ length, err := w.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that f
+ // must live until the use above.
+ runtime.KeepAlive(f) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
+// CopyFDIDSliceIn copies in a slice of FDID objects from the task's memory.
+//go:nosplit
+func CopyFDIDSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []FDID) (int, error) {
+ count := len(dst)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*FDID)(nil).SizeBytes()
+
+ ptr := unsafe.Pointer(&dst)
+ val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data))
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(val)
+ hdr.Len = size * count
+ hdr.Cap = size * count
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that dst
+ // must live until the use above.
+ runtime.KeepAlive(dst) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyFDIDSliceOut copies a slice of FDID objects to the task's memory.
+//go:nosplit
+func CopyFDIDSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []FDID) (int, error) {
+ count := len(src)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*FDID)(nil).SizeBytes()
+
+ ptr := unsafe.Pointer(&src)
+ val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data))
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(val)
+ hdr.Len = size * count
+ hdr.Cap = size * count
+
+ length, err := cc.CopyOutBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that src
+ // must live until the use above.
+ runtime.KeepAlive(src) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// MarshalUnsafeFDIDSlice is like FDID.MarshalUnsafe, but for a []FDID.
+func MarshalUnsafeFDIDSlice(src []FDID, dst []byte) (int, error) {
+ count := len(src)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*FDID)(nil).SizeBytes()
+
+ dst = dst[:size*count]
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst)))
+ return size*count, nil
+}
+
+// UnmarshalUnsafeFDIDSlice is like FDID.UnmarshalUnsafe, but for a []FDID.
+func UnmarshalUnsafeFDIDSlice(dst []FDID, src []byte) (int, error) {
+ count := len(dst)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*FDID)(nil).SizeBytes()
+
+ src = src[:(size*count)]
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src)))
+ return size*count, nil
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (c *ChannelResp) SizeBytes() int {
+ return 16
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (c *ChannelResp) MarshalBytes(dst []byte) {
+ hostarch.ByteOrder.PutUint64(dst[:8], uint64(c.dataOffset))
+ dst = dst[8:]
+ hostarch.ByteOrder.PutUint64(dst[:8], uint64(c.dataLength))
+ dst = dst[8:]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (c *ChannelResp) UnmarshalBytes(src []byte) {
+ c.dataOffset = int64(hostarch.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ c.dataLength = uint64(hostarch.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (c *ChannelResp) Packed() bool {
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (c *ChannelResp) MarshalUnsafe(dst []byte) {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(c), uintptr(c.SizeBytes()))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (c *ChannelResp) UnmarshalUnsafe(src []byte) {
+ gohacks.Memmove(unsafe.Pointer(c), unsafe.Pointer(&src[0]), uintptr(c.SizeBytes()))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (c *ChannelResp) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c)))
+ hdr.Len = c.SizeBytes()
+ hdr.Cap = c.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that c
+ // must live until the use above.
+ runtime.KeepAlive(c) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (c *ChannelResp) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return c.CopyOutN(cc, addr, c.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (c *ChannelResp) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c)))
+ hdr.Len = c.SizeBytes()
+ hdr.Cap = c.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that c
+ // must live until the use above.
+ runtime.KeepAlive(c) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (c *ChannelResp) WriteTo(writer io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c)))
+ hdr.Len = c.SizeBytes()
+ hdr.Cap = c.SizeBytes()
+
+ length, err := writer.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that c
+ // must live until the use above.
+ runtime.KeepAlive(c) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (e *ErrorResp) SizeBytes() int {
+ return 4
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (e *ErrorResp) MarshalBytes(dst []byte) {
+ hostarch.ByteOrder.PutUint32(dst[:4], uint32(e.errno))
+ dst = dst[4:]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (e *ErrorResp) UnmarshalBytes(src []byte) {
+ e.errno = uint32(hostarch.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (e *ErrorResp) Packed() bool {
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (e *ErrorResp) MarshalUnsafe(dst []byte) {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(e), uintptr(e.SizeBytes()))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (e *ErrorResp) UnmarshalUnsafe(src []byte) {
+ gohacks.Memmove(unsafe.Pointer(e), unsafe.Pointer(&src[0]), uintptr(e.SizeBytes()))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (e *ErrorResp) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(e)))
+ hdr.Len = e.SizeBytes()
+ hdr.Cap = e.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that e
+ // must live until the use above.
+ runtime.KeepAlive(e) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (e *ErrorResp) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return e.CopyOutN(cc, addr, e.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (e *ErrorResp) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(e)))
+ hdr.Len = e.SizeBytes()
+ hdr.Cap = e.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that e
+ // must live until the use above.
+ runtime.KeepAlive(e) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (e *ErrorResp) WriteTo(writer io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(e)))
+ hdr.Len = e.SizeBytes()
+ hdr.Cap = e.SizeBytes()
+
+ length, err := writer.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that e
+ // must live until the use above.
+ runtime.KeepAlive(e) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+//go:nosplit
+func (gid *GID) SizeBytes() int {
+ return 4
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (gid *GID) MarshalBytes(dst []byte) {
+ hostarch.ByteOrder.PutUint32(dst[:4], uint32(*gid))
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (gid *GID) UnmarshalBytes(src []byte) {
+ *gid = GID(uint32(hostarch.ByteOrder.Uint32(src[:4])))
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (gid *GID) Packed() bool {
+ // Scalar newtypes are always packed.
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (gid *GID) MarshalUnsafe(dst []byte) {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(gid), uintptr(gid.SizeBytes()))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (gid *GID) UnmarshalUnsafe(src []byte) {
+ gohacks.Memmove(unsafe.Pointer(gid), unsafe.Pointer(&src[0]), uintptr(gid.SizeBytes()))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (gid *GID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid)))
+ hdr.Len = gid.SizeBytes()
+ hdr.Cap = gid.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that gid
+ // must live until the use above.
+ runtime.KeepAlive(gid) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (gid *GID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return gid.CopyOutN(cc, addr, gid.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (gid *GID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid)))
+ hdr.Len = gid.SizeBytes()
+ hdr.Cap = gid.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that gid
+ // must live until the use above.
+ runtime.KeepAlive(gid) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (gid *GID) WriteTo(w io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid)))
+ hdr.Len = gid.SizeBytes()
+ hdr.Cap = gid.SizeBytes()
+
+ length, err := w.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that gid
+ // must live until the use above.
+ runtime.KeepAlive(gid) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (i *Inode) SizeBytes() int {
+ return 4 +
+ (*FDID)(nil).SizeBytes() +
+ (*linux.Statx)(nil).SizeBytes()
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (i *Inode) MarshalBytes(dst []byte) {
+ i.ControlFD.MarshalBytes(dst[:i.ControlFD.SizeBytes()])
+ dst = dst[i.ControlFD.SizeBytes():]
+ // Padding: dst[:sizeof(uint32)] ~= uint32(0)
+ dst = dst[4:]
+ i.Stat.MarshalBytes(dst[:i.Stat.SizeBytes()])
+ dst = dst[i.Stat.SizeBytes():]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (i *Inode) UnmarshalBytes(src []byte) {
+ i.ControlFD.UnmarshalBytes(src[:i.ControlFD.SizeBytes()])
+ src = src[i.ControlFD.SizeBytes():]
+ // Padding: var _ uint32 ~= src[:sizeof(uint32)]
+ src = src[4:]
+ i.Stat.UnmarshalBytes(src[:i.Stat.SizeBytes()])
+ src = src[i.Stat.SizeBytes():]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (i *Inode) Packed() bool {
+ return i.ControlFD.Packed() && i.Stat.Packed()
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (i *Inode) MarshalUnsafe(dst []byte) {
+ if i.ControlFD.Packed() && i.Stat.Packed() {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(i), uintptr(i.SizeBytes()))
+ } else {
+ // Type Inode doesn't have a packed layout in memory, fallback to MarshalBytes.
+ i.MarshalBytes(dst)
+ }
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (i *Inode) UnmarshalUnsafe(src []byte) {
+ if i.ControlFD.Packed() && i.Stat.Packed() {
+ gohacks.Memmove(unsafe.Pointer(i), unsafe.Pointer(&src[0]), uintptr(i.SizeBytes()))
+ } else {
+ // Type Inode doesn't have a packed layout in memory, fallback to UnmarshalBytes.
+ i.UnmarshalBytes(src)
+ }
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (i *Inode) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ if !i.ControlFD.Packed() && i.Stat.Packed() {
+ // Type Inode doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay.
+ i.MarshalBytes(buf) // escapes: fallback.
+ return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(i)))
+ hdr.Len = i.SizeBytes()
+ hdr.Cap = i.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that i
+ // must live until the use above.
+ runtime.KeepAlive(i) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (i *Inode) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return i.CopyOutN(cc, addr, i.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (i *Inode) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ if !i.ControlFD.Packed() && i.Stat.Packed() {
+ // Type Inode doesn't have a packed layout in memory, fall back to UnmarshalBytes.
+ buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay.
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Unmarshal unconditionally. If we had a short copy-in, this results in a
+ // partially unmarshalled struct.
+ i.UnmarshalBytes(buf) // escapes: fallback.
+ return length, err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(i)))
+ hdr.Len = i.SizeBytes()
+ hdr.Cap = i.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that i
+ // must live until the use above.
+ runtime.KeepAlive(i) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (i *Inode) WriteTo(writer io.Writer) (int64, error) {
+ if !i.ControlFD.Packed() && i.Stat.Packed() {
+ // Type Inode doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := make([]byte, i.SizeBytes())
+ i.MarshalBytes(buf)
+ length, err := writer.Write(buf)
+ return int64(length), err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(i)))
+ hdr.Len = i.SizeBytes()
+ hdr.Cap = i.SizeBytes()
+
+ length, err := writer.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that i
+ // must live until the use above.
+ runtime.KeepAlive(i) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
+// CopyInodeSliceIn copies in a slice of Inode objects from the task's memory.
+func CopyInodeSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []Inode) (int, error) {
+ count := len(dst)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*Inode)(nil).SizeBytes()
+
+ if !dst[0].Packed() {
+ // Type Inode doesn't have a packed layout in memory, fall back to UnmarshalBytes.
+ buf := cc.CopyScratchBuffer(size * count)
+ length, err := cc.CopyInBytes(addr, buf)
+
+ // Unmarshal as much as possible, even on error. First handle full objects.
+ limit := length/size
+ for idx := 0; idx < limit; idx++ {
+ dst[idx].UnmarshalBytes(buf[size*idx:size*(idx+1)])
+ }
+
+ // Handle any final partial object. buf is guaranteed to be long enough for the
+ // final element, but may not contain valid data for the entire range. This may
+ // result in unmarshalling zero values for some parts of the object.
+ if length%size != 0 {
+ idx := limit
+ dst[idx].UnmarshalBytes(buf[size*idx:size*(idx+1)])
+ }
+
+ return length, err
+ }
+
+ ptr := unsafe.Pointer(&dst)
+ val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data))
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(val)
+ hdr.Len = size * count
+ hdr.Cap = size * count
+
+ length, err := cc.CopyInBytes(addr, buf)
+ // Since we bypassed the compiler's escape analysis, indicate that dst
+ // must live until the use above.
+ runtime.KeepAlive(dst) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyInodeSliceOut copies a slice of Inode objects to the task's memory.
+func CopyInodeSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []Inode) (int, error) {
+ count := len(src)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*Inode)(nil).SizeBytes()
+
+ if !src[0].Packed() {
+ // Type Inode doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := cc.CopyScratchBuffer(size * count)
+ for idx := 0; idx < count; idx++ {
+ src[idx].MarshalBytes(buf[size*idx:size*(idx+1)])
+ }
+ return cc.CopyOutBytes(addr, buf)
+ }
+
+ ptr := unsafe.Pointer(&src)
+ val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data))
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(val)
+ hdr.Len = size * count
+ hdr.Cap = size * count
+
+ length, err := cc.CopyOutBytes(addr, buf)
+ // Since we bypassed the compiler's escape analysis, indicate that src
+ // must live until the use above.
+ runtime.KeepAlive(src) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// MarshalUnsafeInodeSlice is like Inode.MarshalUnsafe, but for a []Inode.
+func MarshalUnsafeInodeSlice(src []Inode, dst []byte) (int, error) {
+ count := len(src)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*Inode)(nil).SizeBytes()
+
+ if !src[0].Packed() {
+ // Type Inode doesn't have a packed layout in memory, fall back to MarshalBytes.
+ for idx := 0; idx < count; idx++ {
+ src[idx].MarshalBytes(dst[size*idx:(size)*(idx+1)])
+ }
+ return size * count, nil
+ }
+
+ dst = dst[:size*count]
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst)))
+ return size * count, nil
+}
+
+// UnmarshalUnsafeInodeSlice is like Inode.UnmarshalUnsafe, but for a []Inode.
+func UnmarshalUnsafeInodeSlice(dst []Inode, src []byte) (int, error) {
+ count := len(dst)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*Inode)(nil).SizeBytes()
+
+ if !dst[0].Packed() {
+ // Type Inode doesn't have a packed layout in memory, fall back to UnmarshalBytes.
+ for idx := 0; idx < count; idx++ {
+ dst[idx].UnmarshalBytes(src[size*idx:size*(idx+1)])
+ }
+ return size * count, nil
+ }
+
+ src = src[:(size*count)]
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src)))
+ return count*size, nil
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+//go:nosplit
+func (m *MID) SizeBytes() int {
+ return 2
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (m *MID) MarshalBytes(dst []byte) {
+ hostarch.ByteOrder.PutUint16(dst[:2], uint16(*m))
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (m *MID) UnmarshalBytes(src []byte) {
+ *m = MID(uint16(hostarch.ByteOrder.Uint16(src[:2])))
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (m *MID) Packed() bool {
+ // Scalar newtypes are always packed.
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (m *MID) MarshalUnsafe(dst []byte) {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(m), uintptr(m.SizeBytes()))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (m *MID) UnmarshalUnsafe(src []byte) {
+ gohacks.Memmove(unsafe.Pointer(m), unsafe.Pointer(&src[0]), uintptr(m.SizeBytes()))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (m *MID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m)))
+ hdr.Len = m.SizeBytes()
+ hdr.Cap = m.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that m
+ // must live until the use above.
+ runtime.KeepAlive(m) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (m *MID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return m.CopyOutN(cc, addr, m.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (m *MID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m)))
+ hdr.Len = m.SizeBytes()
+ hdr.Cap = m.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that m
+ // must live until the use above.
+ runtime.KeepAlive(m) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (m *MID) WriteTo(w io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m)))
+ hdr.Len = m.SizeBytes()
+ hdr.Cap = m.SizeBytes()
+
+ length, err := w.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that m
+ // must live until the use above.
+ runtime.KeepAlive(m) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
+// CopyMIDSliceIn copies in a slice of MID objects from the task's memory.
+//go:nosplit
+func CopyMIDSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []MID) (int, error) {
+ count := len(dst)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*MID)(nil).SizeBytes()
+
+ ptr := unsafe.Pointer(&dst)
+ val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data))
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(val)
+ hdr.Len = size * count
+ hdr.Cap = size * count
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that dst
+ // must live until the use above.
+ runtime.KeepAlive(dst) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyMIDSliceOut copies a slice of MID objects to the task's memory.
+//go:nosplit
+func CopyMIDSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []MID) (int, error) {
+ count := len(src)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*MID)(nil).SizeBytes()
+
+ ptr := unsafe.Pointer(&src)
+ val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data))
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(val)
+ hdr.Len = size * count
+ hdr.Cap = size * count
+
+ length, err := cc.CopyOutBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that src
+ // must live until the use above.
+ runtime.KeepAlive(src) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// MarshalUnsafeMIDSlice is like MID.MarshalUnsafe, but for a []MID.
+func MarshalUnsafeMIDSlice(src []MID, dst []byte) (int, error) {
+ count := len(src)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*MID)(nil).SizeBytes()
+
+ dst = dst[:size*count]
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst)))
+ return size*count, nil
+}
+
+// UnmarshalUnsafeMIDSlice is like MID.UnmarshalUnsafe, but for a []MID.
+func UnmarshalUnsafeMIDSlice(dst []MID, src []byte) (int, error) {
+ count := len(dst)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*MID)(nil).SizeBytes()
+
+ src = src[:(size*count)]
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src)))
+ return size*count, nil
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+//go:nosplit
+func (uid *UID) SizeBytes() int {
+ return 4
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (uid *UID) MarshalBytes(dst []byte) {
+ hostarch.ByteOrder.PutUint32(dst[:4], uint32(*uid))
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (uid *UID) UnmarshalBytes(src []byte) {
+ *uid = UID(uint32(hostarch.ByteOrder.Uint32(src[:4])))
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (uid *UID) Packed() bool {
+ // Scalar newtypes are always packed.
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (uid *UID) MarshalUnsafe(dst []byte) {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(uid), uintptr(uid.SizeBytes()))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (uid *UID) UnmarshalUnsafe(src []byte) {
+ gohacks.Memmove(unsafe.Pointer(uid), unsafe.Pointer(&src[0]), uintptr(uid.SizeBytes()))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (uid *UID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid)))
+ hdr.Len = uid.SizeBytes()
+ hdr.Cap = uid.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that uid
+ // must live until the use above.
+ runtime.KeepAlive(uid) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (uid *UID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return uid.CopyOutN(cc, addr, uid.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (uid *UID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid)))
+ hdr.Len = uid.SizeBytes()
+ hdr.Cap = uid.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that uid
+ // must live until the use above.
+ runtime.KeepAlive(uid) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (uid *UID) WriteTo(w io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid)))
+ hdr.Len = uid.SizeBytes()
+ hdr.Cap = uid.SizeBytes()
+
+ length, err := w.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that uid
+ // must live until the use above.
+ runtime.KeepAlive(uid) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (m *MsgDynamic) Packed() bool {
+ // Type MsgDynamic is dynamic so it might have slice/string headers. Hence, it is not packed.
+ return false
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (m *MsgDynamic) MarshalUnsafe(dst []byte) {
+ // Type MsgDynamic doesn't have a packed layout in memory, fallback to MarshalBytes.
+ m.MarshalBytes(dst)
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (m *MsgDynamic) UnmarshalUnsafe(src []byte) {
+ // Type MsgDynamic doesn't have a packed layout in memory, fallback to UnmarshalBytes.
+ m.UnmarshalBytes(src)
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (m *MsgDynamic) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ // Type MsgDynamic doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := cc.CopyScratchBuffer(m.SizeBytes()) // escapes: okay.
+ m.MarshalBytes(buf) // escapes: fallback.
+ return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (m *MsgDynamic) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return m.CopyOutN(cc, addr, m.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (m *MsgDynamic) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ // Type MsgDynamic doesn't have a packed layout in memory, fall back to UnmarshalBytes.
+ buf := cc.CopyScratchBuffer(m.SizeBytes()) // escapes: okay.
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Unmarshal unconditionally. If we had a short copy-in, this results in a
+ // partially unmarshalled struct.
+ m.UnmarshalBytes(buf) // escapes: fallback.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (m *MsgDynamic) WriteTo(writer io.Writer) (int64, error) {
+ // Type MsgDynamic doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := make([]byte, m.SizeBytes())
+ m.MarshalBytes(buf)
+ length, err := writer.Write(buf)
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (m *MsgSimple) SizeBytes() int {
+ return 16
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (m *MsgSimple) MarshalBytes(dst []byte) {
+ hostarch.ByteOrder.PutUint16(dst[:2], uint16(m.A))
+ dst = dst[2:]
+ hostarch.ByteOrder.PutUint16(dst[:2], uint16(m.B))
+ dst = dst[2:]
+ hostarch.ByteOrder.PutUint32(dst[:4], uint32(m.C))
+ dst = dst[4:]
+ hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.D))
+ dst = dst[8:]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (m *MsgSimple) UnmarshalBytes(src []byte) {
+ m.A = uint16(hostarch.ByteOrder.Uint16(src[:2]))
+ src = src[2:]
+ m.B = uint16(hostarch.ByteOrder.Uint16(src[:2]))
+ src = src[2:]
+ m.C = uint32(hostarch.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ m.D = uint64(hostarch.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (m *MsgSimple) Packed() bool {
+ return true
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (m *MsgSimple) MarshalUnsafe(dst []byte) {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(m), uintptr(m.SizeBytes()))
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (m *MsgSimple) UnmarshalUnsafe(src []byte) {
+ gohacks.Memmove(unsafe.Pointer(m), unsafe.Pointer(&src[0]), uintptr(m.SizeBytes()))
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (m *MsgSimple) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m)))
+ hdr.Len = m.SizeBytes()
+ hdr.Cap = m.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that m
+ // must live until the use above.
+ runtime.KeepAlive(m) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (m *MsgSimple) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return m.CopyOutN(cc, addr, m.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (m *MsgSimple) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m)))
+ hdr.Len = m.SizeBytes()
+ hdr.Cap = m.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that m
+ // must live until the use above.
+ runtime.KeepAlive(m) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (m *MsgSimple) WriteTo(writer io.Writer) (int64, error) {
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m)))
+ hdr.Len = m.SizeBytes()
+ hdr.Cap = m.SizeBytes()
+
+ length, err := writer.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that m
+ // must live until the use above.
+ runtime.KeepAlive(m) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
+// CopyMsg1SliceIn copies in a slice of MsgSimple objects from the task's memory.
+func CopyMsg1SliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []MsgSimple) (int, error) {
+ count := len(dst)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*MsgSimple)(nil).SizeBytes()
+
+ ptr := unsafe.Pointer(&dst)
+ val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data))
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(val)
+ hdr.Len = size * count
+ hdr.Cap = size * count
+
+ length, err := cc.CopyInBytes(addr, buf)
+ // Since we bypassed the compiler's escape analysis, indicate that dst
+ // must live until the use above.
+ runtime.KeepAlive(dst) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyMsg1SliceOut copies a slice of MsgSimple objects to the task's memory.
+func CopyMsg1SliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []MsgSimple) (int, error) {
+ count := len(src)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*MsgSimple)(nil).SizeBytes()
+
+ ptr := unsafe.Pointer(&src)
+ val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data))
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(val)
+ hdr.Len = size * count
+ hdr.Cap = size * count
+
+ length, err := cc.CopyOutBytes(addr, buf)
+ // Since we bypassed the compiler's escape analysis, indicate that src
+ // must live until the use above.
+ runtime.KeepAlive(src) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// MarshalUnsafeMsg1Slice is like MsgSimple.MarshalUnsafe, but for a []MsgSimple.
+func MarshalUnsafeMsg1Slice(src []MsgSimple, dst []byte) (int, error) {
+ count := len(src)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*MsgSimple)(nil).SizeBytes()
+
+ dst = dst[:size*count]
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst)))
+ return size * count, nil
+}
+
+// UnmarshalUnsafeMsg1Slice is like MsgSimple.UnmarshalUnsafe, but for a []MsgSimple.
+func UnmarshalUnsafeMsg1Slice(dst []MsgSimple, src []byte) (int, error) {
+ count := len(dst)
+ if count == 0 {
+ return 0, nil
+ }
+ size := (*MsgSimple)(nil).SizeBytes()
+
+ src = src[:(size*count)]
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src)))
+ return count*size, nil
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (v *P9Version) Packed() bool {
+ // Type P9Version is dynamic so it might have slice/string headers. Hence, it is not packed.
+ return false
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (v *P9Version) MarshalUnsafe(dst []byte) {
+ // Type P9Version doesn't have a packed layout in memory, fallback to MarshalBytes.
+ v.MarshalBytes(dst)
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (v *P9Version) UnmarshalUnsafe(src []byte) {
+ // Type P9Version doesn't have a packed layout in memory, fallback to UnmarshalBytes.
+ v.UnmarshalBytes(src)
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (v *P9Version) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ // Type P9Version doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := cc.CopyScratchBuffer(v.SizeBytes()) // escapes: okay.
+ v.MarshalBytes(buf) // escapes: fallback.
+ return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (v *P9Version) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return v.CopyOutN(cc, addr, v.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (v *P9Version) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ // Type P9Version doesn't have a packed layout in memory, fall back to UnmarshalBytes.
+ buf := cc.CopyScratchBuffer(v.SizeBytes()) // escapes: okay.
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Unmarshal unconditionally. If we had a short copy-in, this results in a
+ // partially unmarshalled struct.
+ v.UnmarshalBytes(buf) // escapes: fallback.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (v *P9Version) WriteTo(writer io.Writer) (int64, error) {
+ // Type P9Version doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := make([]byte, v.SizeBytes())
+ v.MarshalBytes(buf)
+ length, err := writer.Write(buf)
+ return int64(length), err
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (s *sockHeader) SizeBytes() int {
+ return 6 +
+ (*MID)(nil).SizeBytes()
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (s *sockHeader) MarshalBytes(dst []byte) {
+ hostarch.ByteOrder.PutUint32(dst[:4], uint32(s.payloadLen))
+ dst = dst[4:]
+ s.message.MarshalBytes(dst[:s.message.SizeBytes()])
+ dst = dst[s.message.SizeBytes():]
+ // Padding: dst[:sizeof(uint16)] ~= uint16(0)
+ dst = dst[2:]
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (s *sockHeader) UnmarshalBytes(src []byte) {
+ s.payloadLen = uint32(hostarch.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ s.message.UnmarshalBytes(src[:s.message.SizeBytes()])
+ src = src[s.message.SizeBytes():]
+ // Padding: var _ uint16 ~= src[:sizeof(uint16)]
+ src = src[2:]
+}
+
+// Packed implements marshal.Marshallable.Packed.
+//go:nosplit
+func (s *sockHeader) Packed() bool {
+ return s.message.Packed()
+}
+
+// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
+func (s *sockHeader) MarshalUnsafe(dst []byte) {
+ if s.message.Packed() {
+ gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(s), uintptr(s.SizeBytes()))
+ } else {
+ // Type sockHeader doesn't have a packed layout in memory, fallback to MarshalBytes.
+ s.MarshalBytes(dst)
+ }
+}
+
+// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
+func (s *sockHeader) UnmarshalUnsafe(src []byte) {
+ if s.message.Packed() {
+ gohacks.Memmove(unsafe.Pointer(s), unsafe.Pointer(&src[0]), uintptr(s.SizeBytes()))
+ } else {
+ // Type sockHeader doesn't have a packed layout in memory, fallback to UnmarshalBytes.
+ s.UnmarshalBytes(src)
+ }
+}
+
+// CopyOutN implements marshal.Marshallable.CopyOutN.
+//go:nosplit
+func (s *sockHeader) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) {
+ if !s.message.Packed() {
+ // Type sockHeader doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay.
+ s.MarshalBytes(buf) // escapes: fallback.
+ return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s)))
+ hdr.Len = s.SizeBytes()
+ hdr.Cap = s.SizeBytes()
+
+ length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that s
+ // must live until the use above.
+ runtime.KeepAlive(s) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// CopyOut implements marshal.Marshallable.CopyOut.
+//go:nosplit
+func (s *sockHeader) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ return s.CopyOutN(cc, addr, s.SizeBytes())
+}
+
+// CopyIn implements marshal.Marshallable.CopyIn.
+//go:nosplit
+func (s *sockHeader) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) {
+ if !s.message.Packed() {
+ // Type sockHeader doesn't have a packed layout in memory, fall back to UnmarshalBytes.
+ buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay.
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Unmarshal unconditionally. If we had a short copy-in, this results in a
+ // partially unmarshalled struct.
+ s.UnmarshalBytes(buf) // escapes: fallback.
+ return length, err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s)))
+ hdr.Len = s.SizeBytes()
+ hdr.Cap = s.SizeBytes()
+
+ length, err := cc.CopyInBytes(addr, buf) // escapes: okay.
+ // Since we bypassed the compiler's escape analysis, indicate that s
+ // must live until the use above.
+ runtime.KeepAlive(s) // escapes: replaced by intrinsic.
+ return length, err
+}
+
+// WriteTo implements io.WriterTo.WriteTo.
+func (s *sockHeader) WriteTo(writer io.Writer) (int64, error) {
+ if !s.message.Packed() {
+ // Type sockHeader doesn't have a packed layout in memory, fall back to MarshalBytes.
+ buf := make([]byte, s.SizeBytes())
+ s.MarshalBytes(buf)
+ length, err := writer.Write(buf)
+ return int64(length), err
+ }
+
+ // Construct a slice backed by dst's underlying memory.
+ var buf []byte
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
+ hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s)))
+ hdr.Len = s.SizeBytes()
+ hdr.Cap = s.SizeBytes()
+
+ length, err := writer.Write(buf)
+ // Since we bypassed the compiler's escape analysis, indicate that s
+ // must live until the use above.
+ runtime.KeepAlive(s) // escapes: replaced by intrinsic.
+ return int64(length), err
+}
+
diff --git a/pkg/lisafs/lisafs_state_autogen.go b/pkg/lisafs/lisafs_state_autogen.go
new file mode 100644
index 000000000..fc032f947
--- /dev/null
+++ b/pkg/lisafs/lisafs_state_autogen.go
@@ -0,0 +1,176 @@
+// automatically generated by stateify.
+
+package lisafs
+
+import (
+ "gvisor.dev/gvisor/pkg/state"
+)
+
+func (l *controlFDList) StateTypeName() string {
+ return "pkg/lisafs.controlFDList"
+}
+
+func (l *controlFDList) StateFields() []string {
+ return []string{
+ "head",
+ "tail",
+ }
+}
+
+func (l *controlFDList) beforeSave() {}
+
+// +checklocksignore
+func (l *controlFDList) StateSave(stateSinkObject state.Sink) {
+ l.beforeSave()
+ stateSinkObject.Save(0, &l.head)
+ stateSinkObject.Save(1, &l.tail)
+}
+
+func (l *controlFDList) afterLoad() {}
+
+// +checklocksignore
+func (l *controlFDList) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &l.head)
+ stateSourceObject.Load(1, &l.tail)
+}
+
+func (e *controlFDEntry) StateTypeName() string {
+ return "pkg/lisafs.controlFDEntry"
+}
+
+func (e *controlFDEntry) StateFields() []string {
+ return []string{
+ "next",
+ "prev",
+ }
+}
+
+func (e *controlFDEntry) beforeSave() {}
+
+// +checklocksignore
+func (e *controlFDEntry) StateSave(stateSinkObject state.Sink) {
+ e.beforeSave()
+ stateSinkObject.Save(0, &e.next)
+ stateSinkObject.Save(1, &e.prev)
+}
+
+func (e *controlFDEntry) afterLoad() {}
+
+// +checklocksignore
+func (e *controlFDEntry) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &e.next)
+ stateSourceObject.Load(1, &e.prev)
+}
+
+func (r *controlFDRefs) StateTypeName() string {
+ return "pkg/lisafs.controlFDRefs"
+}
+
+func (r *controlFDRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (r *controlFDRefs) beforeSave() {}
+
+// +checklocksignore
+func (r *controlFDRefs) StateSave(stateSinkObject state.Sink) {
+ r.beforeSave()
+ stateSinkObject.Save(0, &r.refCount)
+}
+
+// +checklocksignore
+func (r *controlFDRefs) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &r.refCount)
+ stateSourceObject.AfterLoad(r.afterLoad)
+}
+
+func (l *openFDList) StateTypeName() string {
+ return "pkg/lisafs.openFDList"
+}
+
+func (l *openFDList) StateFields() []string {
+ return []string{
+ "head",
+ "tail",
+ }
+}
+
+func (l *openFDList) beforeSave() {}
+
+// +checklocksignore
+func (l *openFDList) StateSave(stateSinkObject state.Sink) {
+ l.beforeSave()
+ stateSinkObject.Save(0, &l.head)
+ stateSinkObject.Save(1, &l.tail)
+}
+
+func (l *openFDList) afterLoad() {}
+
+// +checklocksignore
+func (l *openFDList) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &l.head)
+ stateSourceObject.Load(1, &l.tail)
+}
+
+func (e *openFDEntry) StateTypeName() string {
+ return "pkg/lisafs.openFDEntry"
+}
+
+func (e *openFDEntry) StateFields() []string {
+ return []string{
+ "next",
+ "prev",
+ }
+}
+
+func (e *openFDEntry) beforeSave() {}
+
+// +checklocksignore
+func (e *openFDEntry) StateSave(stateSinkObject state.Sink) {
+ e.beforeSave()
+ stateSinkObject.Save(0, &e.next)
+ stateSinkObject.Save(1, &e.prev)
+}
+
+func (e *openFDEntry) afterLoad() {}
+
+// +checklocksignore
+func (e *openFDEntry) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &e.next)
+ stateSourceObject.Load(1, &e.prev)
+}
+
+func (r *openFDRefs) StateTypeName() string {
+ return "pkg/lisafs.openFDRefs"
+}
+
+func (r *openFDRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (r *openFDRefs) beforeSave() {}
+
+// +checklocksignore
+func (r *openFDRefs) StateSave(stateSinkObject state.Sink) {
+ r.beforeSave()
+ stateSinkObject.Save(0, &r.refCount)
+}
+
+// +checklocksignore
+func (r *openFDRefs) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &r.refCount)
+ stateSourceObject.AfterLoad(r.afterLoad)
+}
+
+func init() {
+ state.Register((*controlFDList)(nil))
+ state.Register((*controlFDEntry)(nil))
+ state.Register((*controlFDRefs)(nil))
+ state.Register((*openFDList)(nil))
+ state.Register((*openFDEntry)(nil))
+ state.Register((*openFDRefs)(nil))
+}
diff --git a/pkg/lisafs/message.go b/pkg/lisafs/message.go
new file mode 100644
index 000000000..55fd2c0b1
--- /dev/null
+++ b/pkg/lisafs/message.go
@@ -0,0 +1,258 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import (
+ "math"
+ "os"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/hostarch"
+ "gvisor.dev/gvisor/pkg/marshal/primitive"
+)
+
+// Messages have two parts:
+// * A transport header used to decipher received messages.
+// * A byte array referred to as "payload" which contains the actual message.
+//
+// "dataLen" refers to the size of both combined.
+
+// MID (message ID) is used to identify messages to parse from payload.
+//
+// +marshal slice:MIDSlice
+type MID uint16
+
+// These constants are used to identify their corresponding message types.
+const (
+ // Error is only used in responses to pass errors to client.
+ Error MID = 0
+
+ // Mount is used to establish connection between the client and server mount
+ // point. lisafs requires that the client makes a successful Mount RPC before
+ // making other RPCs.
+ Mount MID = 1
+
+ // Channel requests to start a new communicational channel.
+ Channel MID = 2
+)
+
+const (
+ // NoUID is a sentinel used to indicate no valid UID.
+ NoUID UID = math.MaxUint32
+
+ // NoGID is a sentinel used to indicate no valid GID.
+ NoGID GID = math.MaxUint32
+)
+
+// MaxMessageSize is the recommended max message size that can be used by
+// connections. Server implementations may choose to use other values.
+func MaxMessageSize() uint32 {
+ // Return HugePageSize - PageSize so that when flipcall packet window is
+ // created with MaxMessageSize() + flipcall header size + channel header
+ // size, HugePageSize is allocated and can be backed by a single huge page
+ // if supported by the underlying memfd.
+ return uint32(hostarch.HugePageSize - os.Getpagesize())
+}
+
+// TODO(gvisor.dev/issue/6450): Once this is resolved:
+// * Update manual implementations and function signatures.
+// * Update RPC handlers and appropriate callers to handle errors correctly.
+// * Update manual implementations to get rid of buffer shifting.
+
+// UID represents a user ID.
+//
+// +marshal
+type UID uint32
+
+// Ok returns true if uid is not NoUID.
+func (uid UID) Ok() bool {
+ return uid != NoUID
+}
+
+// GID represents a group ID.
+//
+// +marshal
+type GID uint32
+
+// Ok returns true if gid is not NoGID.
+func (gid GID) Ok() bool {
+ return gid != NoGID
+}
+
+// NoopMarshal is a noop implementation of marshal.Marshallable.MarshalBytes.
+func NoopMarshal([]byte) {}
+
+// NoopUnmarshal is a noop implementation of marshal.Marshallable.UnmarshalBytes.
+func NoopUnmarshal([]byte) {}
+
+// SizedString represents a string in memory. The marshalled string bytes are
+// preceded by a uint32 signifying the string length.
+type SizedString string
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (s *SizedString) SizeBytes() int {
+ return (*primitive.Uint32)(nil).SizeBytes() + len(*s)
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (s *SizedString) MarshalBytes(dst []byte) {
+ strLen := primitive.Uint32(len(*s))
+ strLen.MarshalUnsafe(dst)
+ dst = dst[strLen.SizeBytes():]
+ // Copy without any allocation.
+ copy(dst[:strLen], *s)
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (s *SizedString) UnmarshalBytes(src []byte) {
+ var strLen primitive.Uint32
+ strLen.UnmarshalUnsafe(src)
+ src = src[strLen.SizeBytes():]
+ // Take the hit, this leads to an allocation + memcpy. No way around it.
+ *s = SizedString(src[:strLen])
+}
+
+// StringArray represents an array of SizedStrings in memory. The marshalled
+// array data is preceded by a uint32 signifying the array length.
+type StringArray []string
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (s *StringArray) SizeBytes() int {
+ size := (*primitive.Uint32)(nil).SizeBytes()
+ for _, str := range *s {
+ sstr := SizedString(str)
+ size += sstr.SizeBytes()
+ }
+ return size
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (s *StringArray) MarshalBytes(dst []byte) {
+ arrLen := primitive.Uint32(len(*s))
+ arrLen.MarshalUnsafe(dst)
+ dst = dst[arrLen.SizeBytes():]
+ for _, str := range *s {
+ sstr := SizedString(str)
+ sstr.MarshalBytes(dst)
+ dst = dst[sstr.SizeBytes():]
+ }
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (s *StringArray) UnmarshalBytes(src []byte) {
+ var arrLen primitive.Uint32
+ arrLen.UnmarshalUnsafe(src)
+ src = src[arrLen.SizeBytes():]
+
+ if cap(*s) < int(arrLen) {
+ *s = make([]string, arrLen)
+ } else {
+ *s = (*s)[:arrLen]
+ }
+
+ for i := primitive.Uint32(0); i < arrLen; i++ {
+ var sstr SizedString
+ sstr.UnmarshalBytes(src)
+ src = src[sstr.SizeBytes():]
+ (*s)[i] = string(sstr)
+ }
+}
+
+// Inode represents an inode on the remote filesystem.
+//
+// +marshal slice:InodeSlice
+type Inode struct {
+ ControlFD FDID
+ _ uint32 // Need to make struct packed.
+ Stat linux.Statx
+}
+
+// MountReq represents a Mount request.
+type MountReq struct {
+ MountPath SizedString
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (m *MountReq) SizeBytes() int {
+ return m.MountPath.SizeBytes()
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (m *MountReq) MarshalBytes(dst []byte) {
+ m.MountPath.MarshalBytes(dst)
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (m *MountReq) UnmarshalBytes(src []byte) {
+ m.MountPath.UnmarshalBytes(src)
+}
+
+// MountResp represents a Mount response.
+type MountResp struct {
+ Root Inode
+ // MaxMessageSize is the maximum size of messages communicated between the
+ // client and server in bytes. This includes the communication header.
+ MaxMessageSize primitive.Uint32
+ // SupportedMs holds all the supported messages.
+ SupportedMs []MID
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (m *MountResp) SizeBytes() int {
+ return m.Root.SizeBytes() +
+ m.MaxMessageSize.SizeBytes() +
+ (*primitive.Uint16)(nil).SizeBytes() +
+ (len(m.SupportedMs) * (*MID)(nil).SizeBytes())
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (m *MountResp) MarshalBytes(dst []byte) {
+ m.Root.MarshalUnsafe(dst)
+ dst = dst[m.Root.SizeBytes():]
+ m.MaxMessageSize.MarshalUnsafe(dst)
+ dst = dst[m.MaxMessageSize.SizeBytes():]
+ numSupported := primitive.Uint16(len(m.SupportedMs))
+ numSupported.MarshalBytes(dst)
+ dst = dst[numSupported.SizeBytes():]
+ MarshalUnsafeMIDSlice(m.SupportedMs, dst)
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (m *MountResp) UnmarshalBytes(src []byte) {
+ m.Root.UnmarshalUnsafe(src)
+ src = src[m.Root.SizeBytes():]
+ m.MaxMessageSize.UnmarshalUnsafe(src)
+ src = src[m.MaxMessageSize.SizeBytes():]
+ var numSupported primitive.Uint16
+ numSupported.UnmarshalBytes(src)
+ src = src[numSupported.SizeBytes():]
+ m.SupportedMs = make([]MID, numSupported)
+ UnmarshalUnsafeMIDSlice(m.SupportedMs, src)
+}
+
+// ChannelResp is the response to the create channel request.
+//
+// +marshal
+type ChannelResp struct {
+ dataOffset int64
+ dataLength uint64
+}
+
+// ErrorResp is returned to represent an error while handling a request.
+//
+// +marshal
+type ErrorResp struct {
+ errno uint32
+}
diff --git a/pkg/lisafs/open_fd_list.go b/pkg/lisafs/open_fd_list.go
new file mode 100644
index 000000000..9a8b1e30b
--- /dev/null
+++ b/pkg/lisafs/open_fd_list.go
@@ -0,0 +1,221 @@
+package lisafs
+
+// ElementMapper provides an identity mapping by default.
+//
+// This can be replaced to provide a struct that maps elements to linker
+// objects, if they are not the same. An ElementMapper is not typically
+// required if: Linker is left as is, Element is left as is, or Linker and
+// Element are the same type.
+type openFDElementMapper struct{}
+
+// linkerFor maps an Element to a Linker.
+//
+// This default implementation should be inlined.
+//
+//go:nosplit
+func (openFDElementMapper) linkerFor(elem *OpenFD) *OpenFD { return elem }
+
+// List is an intrusive list. Entries can be added to or removed from the list
+// in O(1) time and with no additional memory allocations.
+//
+// The zero value for List is an empty list ready to use.
+//
+// To iterate over a list (where l is a List):
+// for e := l.Front(); e != nil; e = e.Next() {
+// // do something with e.
+// }
+//
+// +stateify savable
+type openFDList struct {
+ head *OpenFD
+ tail *OpenFD
+}
+
+// Reset resets list l to the empty state.
+func (l *openFDList) Reset() {
+ l.head = nil
+ l.tail = nil
+}
+
+// Empty returns true iff the list is empty.
+//
+//go:nosplit
+func (l *openFDList) Empty() bool {
+ return l.head == nil
+}
+
+// Front returns the first element of list l or nil.
+//
+//go:nosplit
+func (l *openFDList) Front() *OpenFD {
+ return l.head
+}
+
+// Back returns the last element of list l or nil.
+//
+//go:nosplit
+func (l *openFDList) Back() *OpenFD {
+ return l.tail
+}
+
+// Len returns the number of elements in the list.
+//
+// NOTE: This is an O(n) operation.
+//
+//go:nosplit
+func (l *openFDList) Len() (count int) {
+ for e := l.Front(); e != nil; e = (openFDElementMapper{}.linkerFor(e)).Next() {
+ count++
+ }
+ return count
+}
+
+// PushFront inserts the element e at the front of list l.
+//
+//go:nosplit
+func (l *openFDList) PushFront(e *OpenFD) {
+ linker := openFDElementMapper{}.linkerFor(e)
+ linker.SetNext(l.head)
+ linker.SetPrev(nil)
+ if l.head != nil {
+ openFDElementMapper{}.linkerFor(l.head).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+
+ l.head = e
+}
+
+// PushBack inserts the element e at the back of list l.
+//
+//go:nosplit
+func (l *openFDList) PushBack(e *OpenFD) {
+ linker := openFDElementMapper{}.linkerFor(e)
+ linker.SetNext(nil)
+ linker.SetPrev(l.tail)
+ if l.tail != nil {
+ openFDElementMapper{}.linkerFor(l.tail).SetNext(e)
+ } else {
+ l.head = e
+ }
+
+ l.tail = e
+}
+
+// PushBackList inserts list m at the end of list l, emptying m.
+//
+//go:nosplit
+func (l *openFDList) PushBackList(m *openFDList) {
+ if l.head == nil {
+ l.head = m.head
+ l.tail = m.tail
+ } else if m.head != nil {
+ openFDElementMapper{}.linkerFor(l.tail).SetNext(m.head)
+ openFDElementMapper{}.linkerFor(m.head).SetPrev(l.tail)
+
+ l.tail = m.tail
+ }
+ m.head = nil
+ m.tail = nil
+}
+
+// InsertAfter inserts e after b.
+//
+//go:nosplit
+func (l *openFDList) InsertAfter(b, e *OpenFD) {
+ bLinker := openFDElementMapper{}.linkerFor(b)
+ eLinker := openFDElementMapper{}.linkerFor(e)
+
+ a := bLinker.Next()
+
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ bLinker.SetNext(e)
+
+ if a != nil {
+ openFDElementMapper{}.linkerFor(a).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+}
+
+// InsertBefore inserts e before a.
+//
+//go:nosplit
+func (l *openFDList) InsertBefore(a, e *OpenFD) {
+ aLinker := openFDElementMapper{}.linkerFor(a)
+ eLinker := openFDElementMapper{}.linkerFor(e)
+
+ b := aLinker.Prev()
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ aLinker.SetPrev(e)
+
+ if b != nil {
+ openFDElementMapper{}.linkerFor(b).SetNext(e)
+ } else {
+ l.head = e
+ }
+}
+
+// Remove removes e from l.
+//
+//go:nosplit
+func (l *openFDList) Remove(e *OpenFD) {
+ linker := openFDElementMapper{}.linkerFor(e)
+ prev := linker.Prev()
+ next := linker.Next()
+
+ if prev != nil {
+ openFDElementMapper{}.linkerFor(prev).SetNext(next)
+ } else if l.head == e {
+ l.head = next
+ }
+
+ if next != nil {
+ openFDElementMapper{}.linkerFor(next).SetPrev(prev)
+ } else if l.tail == e {
+ l.tail = prev
+ }
+
+ linker.SetNext(nil)
+ linker.SetPrev(nil)
+}
+
+// Entry is a default implementation of Linker. Users can add anonymous fields
+// of this type to their structs to make them automatically implement the
+// methods needed by List.
+//
+// +stateify savable
+type openFDEntry struct {
+ next *OpenFD
+ prev *OpenFD
+}
+
+// Next returns the entry that follows e in the list.
+//
+//go:nosplit
+func (e *openFDEntry) Next() *OpenFD {
+ return e.next
+}
+
+// Prev returns the entry that precedes e in the list.
+//
+//go:nosplit
+func (e *openFDEntry) Prev() *OpenFD {
+ return e.prev
+}
+
+// SetNext assigns 'entry' as the entry that follows e in the list.
+//
+//go:nosplit
+func (e *openFDEntry) SetNext(elem *OpenFD) {
+ e.next = elem
+}
+
+// SetPrev assigns 'entry' as the entry that precedes e in the list.
+//
+//go:nosplit
+func (e *openFDEntry) SetPrev(elem *OpenFD) {
+ e.prev = elem
+}
diff --git a/pkg/lisafs/open_fd_refs.go b/pkg/lisafs/open_fd_refs.go
new file mode 100644
index 000000000..f1a99f335
--- /dev/null
+++ b/pkg/lisafs/open_fd_refs.go
@@ -0,0 +1,140 @@
+package lisafs
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/refsvfs2"
+)
+
+// enableLogging indicates whether reference-related events should be logged (with
+// stack traces). This is false by default and should only be set to true for
+// debugging purposes, as it can generate an extremely large amount of output
+// and drastically degrade performance.
+const openFDenableLogging = false
+
+// obj is used to customize logging. Note that we use a pointer to T so that
+// we do not copy the entire object when passed as a format parameter.
+var openFDobj *OpenFD
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// NOTE: Do not introduce additional fields to the Refs struct. It is used by
+// many filesystem objects, and we want to keep it as small as possible (i.e.,
+// the same size as using an int64 directly) to avoid taking up extra cache
+// space. In general, this template should not be extended at the cost of
+// performance. If it does not offer enough flexibility for a particular object
+// (example: b/187877947), we should implement the RefCounter/CheckedObject
+// interfaces manually.
+//
+// +stateify savable
+type openFDRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+// InitRefs initializes r with one reference and, if enabled, activates leak
+// checking.
+func (r *openFDRefs) InitRefs() {
+ atomic.StoreInt64(&r.refCount, 1)
+ refsvfs2.Register(r)
+}
+
+// RefType implements refsvfs2.CheckedObject.RefType.
+func (r *openFDRefs) RefType() string {
+ return fmt.Sprintf("%T", openFDobj)[1:]
+}
+
+// LeakMessage implements refsvfs2.CheckedObject.LeakMessage.
+func (r *openFDRefs) LeakMessage() string {
+ return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs())
+}
+
+// LogRefs implements refsvfs2.CheckedObject.LogRefs.
+func (r *openFDRefs) LogRefs() bool {
+ return openFDenableLogging
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *openFDRefs) ReadRefs() int64 {
+ return atomic.LoadInt64(&r.refCount)
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *openFDRefs) IncRef() {
+ v := atomic.AddInt64(&r.refCount, 1)
+ if openFDenableLogging {
+ refsvfs2.LogIncRef(r, v)
+ }
+ if v <= 1 {
+ panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType()))
+ }
+}
+
+// TryIncRef implements refs.TryRefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *openFDRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ v := atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ if openFDenableLogging {
+ refsvfs2.LogTryIncRef(r, v)
+ }
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *openFDRefs) DecRef(destroy func()) {
+ v := atomic.AddInt64(&r.refCount, -1)
+ if openFDenableLogging {
+ refsvfs2.LogDecRef(r, v)
+ }
+ switch {
+ case v < 0:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType()))
+
+ case v == 0:
+ refsvfs2.Unregister(r)
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
+
+func (r *openFDRefs) afterLoad() {
+ if r.ReadRefs() > 0 {
+ refsvfs2.Register(r)
+ }
+}
diff --git a/pkg/lisafs/sample_message.go b/pkg/lisafs/sample_message.go
new file mode 100644
index 000000000..3868dfa08
--- /dev/null
+++ b/pkg/lisafs/sample_message.go
@@ -0,0 +1,110 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import (
+ "math/rand"
+
+ "gvisor.dev/gvisor/pkg/marshal/primitive"
+)
+
+// MsgSimple is a sample packed struct which can be used to test message passing.
+//
+// +marshal slice:Msg1Slice
+type MsgSimple struct {
+ A uint16
+ B uint16
+ C uint32
+ D uint64
+}
+
+// Randomize randomizes the contents of m.
+func (m *MsgSimple) Randomize() {
+ m.A = uint16(rand.Uint32())
+ m.B = uint16(rand.Uint32())
+ m.C = rand.Uint32()
+ m.D = rand.Uint64()
+}
+
+// MsgDynamic is a sample dynamic struct which can be used to test message passing.
+//
+// +marshal dynamic
+type MsgDynamic struct {
+ N primitive.Uint32
+ Arr []MsgSimple
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (m *MsgDynamic) SizeBytes() int {
+ return m.N.SizeBytes() +
+ (int(m.N) * (*MsgSimple)(nil).SizeBytes())
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (m *MsgDynamic) MarshalBytes(dst []byte) {
+ m.N.MarshalUnsafe(dst)
+ dst = dst[m.N.SizeBytes():]
+ MarshalUnsafeMsg1Slice(m.Arr, dst)
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (m *MsgDynamic) UnmarshalBytes(src []byte) {
+ m.N.UnmarshalUnsafe(src)
+ src = src[m.N.SizeBytes():]
+ m.Arr = make([]MsgSimple, m.N)
+ UnmarshalUnsafeMsg1Slice(m.Arr, src)
+}
+
+// Randomize randomizes the contents of m.
+func (m *MsgDynamic) Randomize(arrLen int) {
+ m.N = primitive.Uint32(arrLen)
+ m.Arr = make([]MsgSimple, arrLen)
+ for i := 0; i < arrLen; i++ {
+ m.Arr[i].Randomize()
+ }
+}
+
+// P9Version mimics p9.TVersion and p9.Rversion.
+//
+// +marshal dynamic
+type P9Version struct {
+ MSize primitive.Uint32
+ Version string
+}
+
+// SizeBytes implements marshal.Marshallable.SizeBytes.
+func (v *P9Version) SizeBytes() int {
+ return (*primitive.Uint32)(nil).SizeBytes() + (*primitive.Uint16)(nil).SizeBytes() + len(v.Version)
+}
+
+// MarshalBytes implements marshal.Marshallable.MarshalBytes.
+func (v *P9Version) MarshalBytes(dst []byte) {
+ v.MSize.MarshalUnsafe(dst)
+ dst = dst[v.MSize.SizeBytes():]
+ versionLen := primitive.Uint16(len(v.Version))
+ versionLen.MarshalUnsafe(dst)
+ dst = dst[versionLen.SizeBytes():]
+ copy(dst, v.Version)
+}
+
+// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
+func (v *P9Version) UnmarshalBytes(src []byte) {
+ v.MSize.UnmarshalUnsafe(src)
+ src = src[v.MSize.SizeBytes():]
+ var versionLen primitive.Uint16
+ versionLen.UnmarshalUnsafe(src)
+ src = src[versionLen.SizeBytes():]
+ v.Version = string(src[:versionLen])
+}
diff --git a/pkg/lisafs/server.go b/pkg/lisafs/server.go
new file mode 100644
index 000000000..7515355ec
--- /dev/null
+++ b/pkg/lisafs/server.go
@@ -0,0 +1,113 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import (
+ "gvisor.dev/gvisor/pkg/sync"
+)
+
+// Server serves a filesystem tree. Multiple connections on different mount
+// points can be started on a server. The server provides utilities to safely
+// modify the filesystem tree across its connections (mount points). Note that
+// it does not support synchronizing filesystem tree mutations across other
+// servers serving the same filesystem subtree. Server also manages the
+// lifecycle of all connections.
+type Server struct {
+ // connWg counts the number of active connections being tracked.
+ connWg sync.WaitGroup
+
+ // RenameMu synchronizes rename operations within this filesystem tree.
+ RenameMu sync.RWMutex
+
+ // handlers is a list of RPC handlers which can be indexed by the handler's
+ // corresponding MID.
+ handlers []RPCHandler
+
+ // mountPoints keeps track of all the mount points this server serves.
+ mpMu sync.RWMutex
+ mountPoints []*ControlFD
+
+ // impl is the server implementation which embeds this server.
+ impl ServerImpl
+}
+
+// Init must be called before first use of server.
+func (s *Server) Init(impl ServerImpl) {
+ s.impl = impl
+ s.handlers = handlers[:]
+}
+
+// InitTestOnly is the same as Init except that it allows to swap out the
+// underlying handlers with something custom. This is for test only.
+func (s *Server) InitTestOnly(impl ServerImpl, handlers []RPCHandler) {
+ s.impl = impl
+ s.handlers = handlers
+}
+
+// WithRenameReadLock invokes fn with the server's rename mutex locked for
+// reading. This ensures that no rename operations occur concurrently.
+func (s *Server) WithRenameReadLock(fn func() error) error {
+ s.RenameMu.RLock()
+ err := fn()
+ s.RenameMu.RUnlock()
+ return err
+}
+
+// StartConnection starts the connection on a separate goroutine and tracks it.
+func (s *Server) StartConnection(c *Connection) {
+ s.connWg.Add(1)
+ go func() {
+ c.Run()
+ s.connWg.Done()
+ }()
+}
+
+// Wait waits for all connections started via StartConnection() to terminate.
+func (s *Server) Wait() {
+ s.connWg.Wait()
+}
+
+func (s *Server) addMountPoint(root *ControlFD) {
+ s.mpMu.Lock()
+ defer s.mpMu.Unlock()
+ s.mountPoints = append(s.mountPoints, root)
+}
+
+func (s *Server) forEachMountPoint(fn func(root *ControlFD)) {
+ s.mpMu.RLock()
+ defer s.mpMu.RUnlock()
+ for _, mp := range s.mountPoints {
+ fn(mp)
+ }
+}
+
+// ServerImpl contains the implementation details for a Server.
+// Implementations of ServerImpl should contain their associated Server by
+// value as their first field.
+type ServerImpl interface {
+ // Mount is called when a Mount RPC is made. It mounts the connection at
+ // mountPath.
+ //
+ // Precondition: mountPath == path.Clean(mountPath).
+ Mount(c *Connection, mountPath string) (ControlFDImpl, Inode, error)
+
+ // SupportedMessages returns a list of messages that the server
+ // implementation supports.
+ SupportedMessages() []MID
+
+ // MaxMessageSize is the maximum payload length (in bytes) that can be sent
+ // to this server implementation.
+ MaxMessageSize() uint32
+}
diff --git a/pkg/lisafs/sock.go b/pkg/lisafs/sock.go
new file mode 100644
index 000000000..88210242f
--- /dev/null
+++ b/pkg/lisafs/sock.go
@@ -0,0 +1,208 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lisafs
+
+import (
+ "io"
+
+ "golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/unet"
+)
+
+var (
+ sockHeaderLen = uint32((*sockHeader)(nil).SizeBytes())
+)
+
+// sockHeader is the header present in front of each message received on a UDS.
+//
+// +marshal
+type sockHeader struct {
+ payloadLen uint32
+ message MID
+ _ uint16 // Need to make struct packed.
+}
+
+// sockCommunicator implements Communicator. This is not thread safe.
+type sockCommunicator struct {
+ fdTracker
+ sock *unet.Socket
+ buf []byte
+}
+
+var _ Communicator = (*sockCommunicator)(nil)
+
+func newSockComm(sock *unet.Socket) *sockCommunicator {
+ return &sockCommunicator{
+ sock: sock,
+ buf: make([]byte, sockHeaderLen),
+ }
+}
+
+func (s *sockCommunicator) FD() int {
+ return s.sock.FD()
+}
+
+func (s *sockCommunicator) destroy() {
+ s.sock.Close()
+}
+
+func (s *sockCommunicator) shutdown() {
+ if err := s.sock.Shutdown(); err != nil {
+ log.Warningf("Socket.Shutdown() failed (FD: %d): %v", s.sock.FD(), err)
+ }
+}
+
+func (s *sockCommunicator) resizeBuf(size uint32) {
+ if cap(s.buf) < int(size) {
+ s.buf = s.buf[:cap(s.buf)]
+ s.buf = append(s.buf, make([]byte, int(size)-cap(s.buf))...)
+ } else {
+ s.buf = s.buf[:size]
+ }
+}
+
+// PayloadBuf implements Communicator.PayloadBuf.
+func (s *sockCommunicator) PayloadBuf(size uint32) []byte {
+ s.resizeBuf(sockHeaderLen + size)
+ return s.buf[sockHeaderLen : sockHeaderLen+size]
+}
+
+// SndRcvMessage implements Communicator.SndRcvMessage.
+func (s *sockCommunicator) SndRcvMessage(m MID, payloadLen uint32, wantFDs uint8) (MID, uint32, error) {
+ if err := s.sndPrepopulatedMsg(m, payloadLen, nil); err != nil {
+ return 0, 0, err
+ }
+
+ return s.rcvMsg(wantFDs)
+}
+
+// sndPrepopulatedMsg assumes that s.buf has already been populated with
+// `payloadLen` bytes of data.
+func (s *sockCommunicator) sndPrepopulatedMsg(m MID, payloadLen uint32, fds []int) error {
+ header := sockHeader{payloadLen: payloadLen, message: m}
+ header.MarshalUnsafe(s.buf)
+ dataLen := sockHeaderLen + payloadLen
+ return writeTo(s.sock, [][]byte{s.buf[:dataLen]}, int(dataLen), fds)
+}
+
+// writeTo writes the passed iovec to the UDS and donates any passed FDs.
+func writeTo(sock *unet.Socket, iovec [][]byte, dataLen int, fds []int) error {
+ w := sock.Writer(true)
+ if len(fds) > 0 {
+ w.PackFDs(fds...)
+ }
+
+ fdsUnpacked := false
+ for n := 0; n < dataLen; {
+ cur, err := w.WriteVec(iovec)
+ if err != nil {
+ return err
+ }
+ n += cur
+
+ // Fast common path.
+ if n >= dataLen {
+ break
+ }
+
+ // Consume iovecs.
+ for consumed := 0; consumed < cur; {
+ if len(iovec[0]) <= cur-consumed {
+ consumed += len(iovec[0])
+ iovec = iovec[1:]
+ } else {
+ iovec[0] = iovec[0][cur-consumed:]
+ break
+ }
+ }
+
+ if n > 0 && !fdsUnpacked {
+ // Don't resend any control message.
+ fdsUnpacked = true
+ w.UnpackFDs()
+ }
+ }
+ return nil
+}
+
+// rcvMsg reads the message header and payload from the UDS. It also populates
+// fds with any donated FDs.
+func (s *sockCommunicator) rcvMsg(wantFDs uint8) (MID, uint32, error) {
+ fds, err := readFrom(s.sock, s.buf[:sockHeaderLen], wantFDs)
+ if err != nil {
+ return 0, 0, err
+ }
+ for _, fd := range fds {
+ s.TrackFD(fd)
+ }
+
+ var header sockHeader
+ header.UnmarshalUnsafe(s.buf)
+
+ // No payload? We are done.
+ if header.payloadLen == 0 {
+ return header.message, 0, nil
+ }
+
+ if _, err := readFrom(s.sock, s.PayloadBuf(header.payloadLen), 0); err != nil {
+ return 0, 0, err
+ }
+
+ return header.message, header.payloadLen, nil
+}
+
+// readFrom fills the passed buffer with data from the socket. It also returns
+// any donated FDs.
+func readFrom(sock *unet.Socket, buf []byte, wantFDs uint8) ([]int, error) {
+ r := sock.Reader(true)
+ r.EnableFDs(int(wantFDs))
+
+ var (
+ fds []int
+ fdInit bool
+ )
+ n := len(buf)
+ for got := 0; got < n; {
+ cur, err := r.ReadVec([][]byte{buf[got:]})
+
+ // Ignore EOF if cur > 0.
+ if err != nil && (err != io.EOF || cur == 0) {
+ r.CloseFDs()
+ return nil, err
+ }
+
+ if !fdInit && cur > 0 {
+ fds, err = r.ExtractFDs()
+ if err != nil {
+ return nil, err
+ }
+
+ fdInit = true
+ r.EnableFDs(0)
+ }
+
+ got += cur
+ }
+ return fds, nil
+}
+
+func closeFDs(fds []int) {
+ for _, fd := range fds {
+ if fd >= 0 {
+ unix.Close(fd)
+ }
+ }
+}