diff options
29 files changed, 4748 insertions, 35 deletions
diff --git a/pkg/lisafs/channel.go b/pkg/lisafs/channel.go new file mode 100644 index 000000000..301212e51 --- /dev/null +++ b/pkg/lisafs/channel.go @@ -0,0 +1,190 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import ( + "math" + "runtime" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/fdchannel" + "gvisor.dev/gvisor/pkg/flipcall" + "gvisor.dev/gvisor/pkg/log" +) + +var ( + chanHeaderLen = uint32((*channelHeader)(nil).SizeBytes()) +) + +// maxChannels returns the number of channels a client can create. +// +// The server will reject channel creation requests beyond this (per client). +// Note that we don't want the number of channels to be too large, because each +// accounts for a large region of shared memory. +// TODO(gvisor.dev/issue/6313): Tune the number of channels. +func maxChannels() int { + maxChans := runtime.GOMAXPROCS(0) + if maxChans < 2 { + maxChans = 2 + } + if maxChans > 4 { + maxChans = 4 + } + return maxChans +} + +// channel implements Communicator and represents the communication endpoint +// for the client and server and is used to perform fast IPC. Apart from +// communicating data, a channel is also capable of donating file descriptors. +type channel struct { + fdTracker + dead bool + data flipcall.Endpoint + fdChan fdchannel.Endpoint +} + +var _ Communicator = (*channel)(nil) + +// PayloadBuf implements Communicator.PayloadBuf. +func (ch *channel) PayloadBuf(size uint32) []byte { + return ch.data.Data()[chanHeaderLen : chanHeaderLen+size] +} + +// SndRcvMessage implements Communicator.SndRcvMessage. +func (ch *channel) SndRcvMessage(m MID, payloadLen uint32, wantFDs uint8) (MID, uint32, error) { + // Write header. Requests can not donate FDs. + ch.marshalHdr(m, 0 /* numFDs */) + + // One-shot communication. RPCs are expected to be quick rather than block. + rcvDataLen, err := ch.data.SendRecvFast(chanHeaderLen + payloadLen) + if err != nil { + // This channel is now unusable. + ch.dead = true + // Map the transport errors to EIO, but also log the real error. + log.Warningf("lisafs.sndRcvMessage: flipcall.Endpoint.SendRecv: %v", err) + return 0, 0, unix.EIO + } + + return ch.rcvMsg(rcvDataLen) +} + +func (ch *channel) shutdown() { + ch.data.Shutdown() +} + +func (ch *channel) destroy() { + ch.dead = true + ch.fdChan.Destroy() + ch.data.Destroy() +} + +// createChannel creates a server side channel. It returns a packet window +// descriptor (for the data channel) and an open socket for the FD channel. +func (c *Connection) createChannel(maxMessageSize uint32) (*channel, flipcall.PacketWindowDescriptor, int, error) { + c.channelsMu.Lock() + defer c.channelsMu.Unlock() + // If c.channels is nil, the connection has closed. + if c.channels == nil || len(c.channels) >= maxChannels() { + return nil, flipcall.PacketWindowDescriptor{}, -1, unix.ENOSYS + } + ch := &channel{} + + // Set up data channel. + desc, err := c.channelAlloc.Allocate(flipcall.PacketHeaderBytes + int(chanHeaderLen+maxMessageSize)) + if err != nil { + return nil, flipcall.PacketWindowDescriptor{}, -1, err + } + if err := ch.data.Init(flipcall.ServerSide, desc); err != nil { + return nil, flipcall.PacketWindowDescriptor{}, -1, err + } + + // Set up FD channel. + fdSocks, err := fdchannel.NewConnectedSockets() + if err != nil { + ch.data.Destroy() + return nil, flipcall.PacketWindowDescriptor{}, -1, err + } + ch.fdChan.Init(fdSocks[0]) + clientFDSock := fdSocks[1] + + c.channels = append(c.channels, ch) + return ch, desc, clientFDSock, nil +} + +// sendFDs sends as many FDs as it can. The failure to send an FD does not +// cause an error and fail the entire RPC. FDs are considered supplementary +// responses that are not critical to the RPC response itself. The failure to +// send the (i)th FD will cause all the following FDs to not be sent as well +// because the order in which FDs are donated is important. +func (ch *channel) sendFDs(fds []int) uint8 { + numFDs := len(fds) + if numFDs == 0 { + return 0 + } + + if numFDs > math.MaxUint8 { + log.Warningf("dropping all FDs because too many FDs to donate: %v", numFDs) + return 0 + } + + for i, fd := range fds { + if err := ch.fdChan.SendFD(fd); err != nil { + log.Warningf("error occurred while sending (%d/%d)th FD on channel(%p): %v", i+1, numFDs, ch, err) + return uint8(i) + } + } + return uint8(numFDs) +} + +// channelHeader is the header present in front of each message received on +// flipcall endpoint when the protocol version being used is 1. +// +// +marshal +type channelHeader struct { + message MID + numFDs uint8 + _ uint8 // Need to make struct packed. +} + +func (ch *channel) marshalHdr(m MID, numFDs uint8) { + header := &channelHeader{ + message: m, + numFDs: numFDs, + } + header.MarshalUnsafe(ch.data.Data()) +} + +func (ch *channel) rcvMsg(dataLen uint32) (MID, uint32, error) { + if dataLen < chanHeaderLen { + log.Warningf("received data has size smaller than header length: %d", dataLen) + return 0, 0, unix.EIO + } + + // Read header first. + var header channelHeader + header.UnmarshalUnsafe(ch.data.Data()) + + // Read any FDs. + for i := 0; i < int(header.numFDs); i++ { + fd, err := ch.fdChan.RecvFDNonblock() + if err != nil { + log.Warningf("expected %d FDs, received %d successfully, got err after that: %v", header.numFDs, i, err) + break + } + ch.TrackFD(fd) + } + + return header.message, dataLen - chanHeaderLen, nil +} diff --git a/pkg/lisafs/client.go b/pkg/lisafs/client.go new file mode 100644 index 000000000..c99f8c73d --- /dev/null +++ b/pkg/lisafs/client.go @@ -0,0 +1,377 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import ( + "fmt" + "math" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/cleanup" + "gvisor.dev/gvisor/pkg/flipcall" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/unet" +) + +// Client helps manage a connection to the lisafs server and pass messages +// efficiently. There is a 1:1 mapping between a Connection and a Client. +type Client struct { + // sockComm is the main socket by which this connections is established. + // Communication over the socket is synchronized by sockMu. + sockMu sync.Mutex + sockComm *sockCommunicator + + // channelsMu protects channels and availableChannels. + channelsMu sync.Mutex + // channels tracks all the channels. + channels []*channel + // availableChannels is a LIFO (stack) of channels available to be used. + availableChannels []*channel + // activeWg represents active channels. + activeWg sync.WaitGroup + + // watchdogWg only holds the watchdog goroutine. + watchdogWg sync.WaitGroup + + // supported caches information about which messages are supported. It is + // indexed by MID. An MID is supported if supported[MID] is true. + supported []bool + + // maxMessageSize is the maximum payload length (in bytes) that can be sent. + // It is initialized on Mount and is immutable. + maxMessageSize uint32 +} + +// NewClient creates a new client for communication with the server. It mounts +// the server and creates channels for fast IPC. NewClient takes ownership over +// the passed socket. On success, it returns the initialized client along with +// the root Inode. +func NewClient(sock *unet.Socket, mountPath string) (*Client, *Inode, error) { + maxChans := maxChannels() + c := &Client{ + sockComm: newSockComm(sock), + channels: make([]*channel, 0, maxChans), + availableChannels: make([]*channel, 0, maxChans), + maxMessageSize: 1 << 20, // 1 MB for now. + } + + // Start a goroutine to check socket health. This goroutine is also + // responsible for client cleanup. + c.watchdogWg.Add(1) + go c.watchdog() + + // Clean everything up if anything fails. + cu := cleanup.Make(func() { + c.Close() + }) + defer cu.Clean() + + // Mount the server first. Assume Mount is supported so that we can make the + // Mount RPC below. + c.supported = make([]bool, Mount+1) + c.supported[Mount] = true + mountMsg := MountReq{ + MountPath: SizedString(mountPath), + } + var mountResp MountResp + if err := c.SndRcvMessage(Mount, uint32(mountMsg.SizeBytes()), mountMsg.MarshalBytes, mountResp.UnmarshalBytes, nil); err != nil { + return nil, nil, err + } + + // Initialize client. + c.maxMessageSize = uint32(mountResp.MaxMessageSize) + var maxSuppMID MID + for _, suppMID := range mountResp.SupportedMs { + if suppMID > maxSuppMID { + maxSuppMID = suppMID + } + } + c.supported = make([]bool, maxSuppMID+1) + for _, suppMID := range mountResp.SupportedMs { + c.supported[suppMID] = true + } + + // Create channels parallely so that channels can be used to create more + // channels and costly initialization like flipcall.Endpoint.Connect can + // proceed parallely. + var channelsWg sync.WaitGroup + channelErrs := make([]error, maxChans) + for i := 0; i < maxChans; i++ { + channelsWg.Add(1) + curChanID := i + go func() { + defer channelsWg.Done() + ch, err := c.createChannel() + if err != nil { + log.Warningf("channel creation failed: %v", err) + channelErrs[curChanID] = err + return + } + c.channelsMu.Lock() + c.channels = append(c.channels, ch) + c.availableChannels = append(c.availableChannels, ch) + c.channelsMu.Unlock() + }() + } + channelsWg.Wait() + + for _, channelErr := range channelErrs { + // Return the first non-nil channel creation error. + if channelErr != nil { + return nil, nil, channelErr + } + } + cu.Release() + + return c, &mountResp.Root, nil +} + +func (c *Client) watchdog() { + defer c.watchdogWg.Done() + + events := []unix.PollFd{ + { + Fd: int32(c.sockComm.FD()), + Events: unix.POLLHUP | unix.POLLRDHUP, + }, + } + + // Wait for a shutdown event. + for { + n, err := unix.Ppoll(events, nil, nil) + if err == unix.EINTR || err == unix.EAGAIN { + continue + } + if err != nil { + log.Warningf("lisafs.Client.watch(): %v", err) + } else if n != 1 { + log.Warningf("lisafs.Client.watch(): got %d events, wanted 1", n) + } + break + } + + // Shutdown all active channels and wait for them to complete. + c.shutdownActiveChans() + c.activeWg.Wait() + + // Close all channels. + c.channelsMu.Lock() + for _, ch := range c.channels { + ch.destroy() + } + c.channelsMu.Unlock() + + // Close main socket. + c.sockComm.destroy() +} + +func (c *Client) shutdownActiveChans() { + c.channelsMu.Lock() + defer c.channelsMu.Unlock() + + availableChans := make(map[*channel]bool) + for _, ch := range c.availableChannels { + availableChans[ch] = true + } + for _, ch := range c.channels { + // A channel that is not available is active. + if _, ok := availableChans[ch]; !ok { + log.Debugf("shutting down active channel@%p...", ch) + ch.shutdown() + } + } + + // Prevent channels from becoming available and serving new requests. + c.availableChannels = nil +} + +// Close shuts down the main socket and waits for the watchdog to clean up. +func (c *Client) Close() { + // This shutdown has no effect if the watchdog has already fired and closed + // the main socket. + c.sockComm.shutdown() + c.watchdogWg.Wait() +} + +func (c *Client) createChannel() (*channel, error) { + var chanResp ChannelResp + var fds [2]int + if err := c.SndRcvMessage(Channel, 0, NoopMarshal, chanResp.UnmarshalUnsafe, fds[:]); err != nil { + return nil, err + } + if fds[0] < 0 || fds[1] < 0 { + closeFDs(fds[:]) + return nil, fmt.Errorf("insufficient FDs provided in Channel response: %v", fds) + } + + // Lets create the channel. + defer closeFDs(fds[:1]) // The data FD is not needed after this. + desc := flipcall.PacketWindowDescriptor{ + FD: fds[0], + Offset: chanResp.dataOffset, + Length: int(chanResp.dataLength), + } + + ch := &channel{} + if err := ch.data.Init(flipcall.ClientSide, desc); err != nil { + closeFDs(fds[1:]) + return nil, err + } + ch.fdChan.Init(fds[1]) // fdChan now owns this FD. + + // Only a connected channel is usable. + if err := ch.data.Connect(); err != nil { + ch.destroy() + return nil, err + } + return ch, nil +} + +// IsSupported returns true if this connection supports the passed message. +func (c *Client) IsSupported(m MID) bool { + return int(m) < len(c.supported) && c.supported[m] +} + +// SndRcvMessage invokes reqMarshal to marshal the request onto the payload +// buffer, wakes up the server to process the request, waits for the response +// and invokes respUnmarshal with the response payload. respFDs is populated +// with the received FDs, extra fields are set to -1. +// +// Note that the function arguments intentionally accept marshal.Marshallable +// functions like Marshal{Bytes/Unsafe} and Unmarshal{Bytes/Unsafe} instead of +// directly accepting the marshal.Marshallable interface. Even though just +// accepting marshal.Marshallable is cleaner, it leads to a heap allocation +// (even if that interface variable itself does not escape). In other words, +// implicit conversion to an interface leads to an allocation. +// +// Precondition: reqMarshal and respUnmarshal must be non-nil. +func (c *Client) SndRcvMessage(m MID, payloadLen uint32, reqMarshal func(dst []byte), respUnmarshal func(src []byte), respFDs []int) error { + if !c.IsSupported(m) { + return unix.EOPNOTSUPP + } + if payloadLen > c.maxMessageSize { + log.Warningf("message %d has message size = %d which is larger than client.maxMessageSize = %d", m, payloadLen, c.maxMessageSize) + return unix.EIO + } + wantFDs := len(respFDs) + if wantFDs > math.MaxUint8 { + log.Warningf("want too many FDs: %d", wantFDs) + return unix.EINVAL + } + + // Acquire a communicator. + comm := c.acquireCommunicator() + defer c.releaseCommunicator(comm) + + // Marshal the request into comm's payload buffer and make the RPC. + reqMarshal(comm.PayloadBuf(payloadLen)) + respM, respPayloadLen, err := comm.SndRcvMessage(m, payloadLen, uint8(wantFDs)) + + // Handle FD donation. + rcvFDs := comm.ReleaseFDs() + if numRcvFDs := len(rcvFDs); numRcvFDs+wantFDs > 0 { + // releasedFDs is memory owned by comm which can not be returned to caller. + // Copy it into the caller's buffer. + numFDCopied := copy(respFDs, rcvFDs) + if numFDCopied < numRcvFDs { + log.Warningf("%d unexpected FDs were donated by the server, wanted", numRcvFDs-numFDCopied, wantFDs) + closeFDs(rcvFDs[numFDCopied:]) + } + if numFDCopied < wantFDs { + for i := numFDCopied; i < wantFDs; i++ { + respFDs[i] = -1 + } + } + } + + // Error cases. + if err != nil { + closeFDs(respFDs) + return err + } + if respM == Error { + closeFDs(respFDs) + var resp ErrorResp + resp.UnmarshalUnsafe(comm.PayloadBuf(respPayloadLen)) + return unix.Errno(resp.errno) + } + if respM != m { + closeFDs(respFDs) + log.Warningf("sent %d message but got %d in response", m, respM) + return unix.EINVAL + } + + // Success. The payload must be unmarshalled *before* comm is released. + respUnmarshal(comm.PayloadBuf(respPayloadLen)) + return nil +} + +// Postcondition: releaseCommunicator() must be called on the returned value. +func (c *Client) acquireCommunicator() Communicator { + // Prefer using channel over socket because: + // - Channel uses a shared memory region for passing messages. IO from shared + // memory is faster and does not involve making a syscall. + // - No intermediate buffer allocation needed. With a channel, the message + // can be directly pasted into the shared memory region. + if ch := c.getChannel(); ch != nil { + return ch + } + + c.sockMu.Lock() + return c.sockComm +} + +// Precondition: comm must have been acquired via acquireCommunicator(). +func (c *Client) releaseCommunicator(comm Communicator) { + switch t := comm.(type) { + case *sockCommunicator: + c.sockMu.Unlock() // +checklocksforce: locked in acquireCommunicator(). + case *channel: + c.releaseChannel(t) + default: + panic(fmt.Sprintf("unknown communicator type %T", t)) + } +} + +// getChannel pops a channel from the available channels stack. The caller must +// release the channel after use. +func (c *Client) getChannel() *channel { + c.channelsMu.Lock() + defer c.channelsMu.Unlock() + if len(c.availableChannels) == 0 { + return nil + } + + idx := len(c.availableChannels) - 1 + ch := c.availableChannels[idx] + c.availableChannels = c.availableChannels[:idx] + c.activeWg.Add(1) + return ch +} + +// releaseChannel pushes the passed channel onto the available channel stack if +// reinsert is true. +func (c *Client) releaseChannel(ch *channel) { + c.channelsMu.Lock() + defer c.channelsMu.Unlock() + + // If availableChannels is nil, then watchdog has fired and the client is + // shutting down. So don't make this channel available again. + if !ch.dead && c.availableChannels != nil { + c.availableChannels = append(c.availableChannels, ch) + } + c.activeWg.Done() +} diff --git a/pkg/lisafs/communicator.go b/pkg/lisafs/communicator.go new file mode 100644 index 000000000..ec2035158 --- /dev/null +++ b/pkg/lisafs/communicator.go @@ -0,0 +1,80 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import "golang.org/x/sys/unix" + +// Communicator is a server side utility which represents exactly how the +// server is communicating with the client. +type Communicator interface { + // PayloadBuf returns a slice to the payload section of its internal buffer + // where the message can be marshalled. The handlers should use this to + // populate the payload buffer with the message. + // + // The payload buffer contents *should* be preserved across calls with + // different sizes. Note that this is not a guarantee, because a compromised + // owner of a "shared" payload buffer can tamper with its contents anytime, + // even when it's not its turn to do so. + PayloadBuf(size uint32) []byte + + // SndRcvMessage sends message m. The caller must have populated PayloadBuf() + // with payloadLen bytes. The caller expects to receive wantFDs FDs. + // Any received FDs must be accessible via ReleaseFDs(). It returns the + // response message along with the response payload length. + SndRcvMessage(m MID, payloadLen uint32, wantFDs uint8) (MID, uint32, error) + + // DonateFD makes fd non-blocking and starts tracking it. The next call to + // ReleaseFDs will include fd in the order it was added. Communicator takes + // ownership of fd. Server side should call this. + DonateFD(fd int) error + + // Track starts tracking fd. The next call to ReleaseFDs will include fd in + // the order it was added. Communicator takes ownership of fd. Client side + // should use this for accumulating received FDs. + TrackFD(fd int) + + // ReleaseFDs returns the accumulated FDs and stops tracking them. The + // ownership of the FDs is transferred to the caller. + ReleaseFDs() []int +} + +// fdTracker is a partial implementation of Communicator. It can be embedded in +// Communicator implementations to keep track of FD donations. +type fdTracker struct { + fds []int +} + +// DonateFD implements Communicator.DonateFD. +func (d *fdTracker) DonateFD(fd int) error { + // Make sure the FD is non-blocking. + if err := unix.SetNonblock(fd, true); err != nil { + unix.Close(fd) + return err + } + d.TrackFD(fd) + return nil +} + +// TrackFD implements Communicator.TrackFD. +func (d *fdTracker) TrackFD(fd int) { + d.fds = append(d.fds, fd) +} + +// ReleaseFDs implements Communicator.ReleaseFDs. +func (d *fdTracker) ReleaseFDs() []int { + ret := d.fds + d.fds = d.fds[:0] + return ret +} diff --git a/pkg/lisafs/connection.go b/pkg/lisafs/connection.go new file mode 100644 index 000000000..8dba4805f --- /dev/null +++ b/pkg/lisafs/connection.go @@ -0,0 +1,304 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import ( + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/flipcall" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/p9" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/unet" +) + +// Connection represents a connection between a mount point in the client and a +// mount point in the server. It is owned by the server on which it was started +// and facilitates communication with the client mount. +// +// Each connection is set up using a unix domain socket. One end is owned by +// the server and the other end is owned by the client. The connection may +// spawn additional comunicational channels for the same mount for increased +// RPC concurrency. +type Connection struct { + // server is the server on which this connection was created. It is immutably + // associated with it for its entire lifetime. + server *Server + + // mounted is a one way flag indicating whether this connection has been + // mounted correctly and the server is initialized properly. + mounted bool + + // readonly indicates if this connection is readonly. All write operations + // will fail with EROFS. + readonly bool + + // sockComm is the main socket by which this connections is established. + sockComm *sockCommunicator + + // channelsMu protects channels. + channelsMu sync.Mutex + // channels keeps track of all open channels. + channels []*channel + + // activeWg represents active channels. + activeWg sync.WaitGroup + + // reqGate counts requests that are still being handled. + reqGate sync.Gate + + // channelAlloc is used to allocate memory for channels. + channelAlloc *flipcall.PacketWindowAllocator + + fdsMu sync.RWMutex + // fds keeps tracks of open FDs on this server. It is protected by fdsMu. + fds map[FDID]genericFD + // nextFDID is the next available FDID. It is protected by fdsMu. + nextFDID FDID +} + +// CreateConnection initializes a new connection - creating a server if +// required. The connection must be started separately. +func (s *Server) CreateConnection(sock *unet.Socket, readonly bool) (*Connection, error) { + c := &Connection{ + sockComm: newSockComm(sock), + server: s, + readonly: readonly, + channels: make([]*channel, 0, maxChannels()), + fds: make(map[FDID]genericFD), + nextFDID: InvalidFDID + 1, + } + + alloc, err := flipcall.NewPacketWindowAllocator() + if err != nil { + return nil, err + } + c.channelAlloc = alloc + return c, nil +} + +// Server returns the associated server. +func (c *Connection) Server() *Server { + return c.server +} + +// ServerImpl returns the associated server implementation. +func (c *Connection) ServerImpl() ServerImpl { + return c.server.impl +} + +// Run defines the lifecycle of a connection. +func (c *Connection) Run() { + defer c.close() + + // Start handling requests on this connection. + for { + m, payloadLen, err := c.sockComm.rcvMsg(0 /* wantFDs */) + if err != nil { + log.Debugf("sock read failed, closing connection: %v", err) + return + } + + respM, respPayloadLen, respFDs := c.handleMsg(c.sockComm, m, payloadLen) + err = c.sockComm.sndPrepopulatedMsg(respM, respPayloadLen, respFDs) + closeFDs(respFDs) + if err != nil { + log.Debugf("sock write failed, closing connection: %v", err) + return + } + } +} + +// service starts servicing the passed channel until the channel is shutdown. +// This is a blocking method and hence must be called in a separate goroutine. +func (c *Connection) service(ch *channel) error { + rcvDataLen, err := ch.data.RecvFirst() + if err != nil { + return err + } + for rcvDataLen > 0 { + m, payloadLen, err := ch.rcvMsg(rcvDataLen) + if err != nil { + return err + } + respM, respPayloadLen, respFDs := c.handleMsg(ch, m, payloadLen) + numFDs := ch.sendFDs(respFDs) + closeFDs(respFDs) + + ch.marshalHdr(respM, numFDs) + rcvDataLen, err = ch.data.SendRecv(respPayloadLen + chanHeaderLen) + if err != nil { + return err + } + } + return nil +} + +func (c *Connection) respondError(comm Communicator, err unix.Errno) (MID, uint32, []int) { + resp := &ErrorResp{errno: uint32(err)} + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return Error, respLen, nil +} + +func (c *Connection) handleMsg(comm Communicator, m MID, payloadLen uint32) (MID, uint32, []int) { + if !c.reqGate.Enter() { + // c.close() has been called; the connection is shutting down. + return c.respondError(comm, unix.ECONNRESET) + } + defer c.reqGate.Leave() + + if !c.mounted && m != Mount { + log.Warningf("connection must first be mounted") + return c.respondError(comm, unix.EINVAL) + } + + // Check if the message is supported for forward compatibility. + if int(m) >= len(c.server.handlers) || c.server.handlers[m] == nil { + log.Warningf("received request which is not supported by the server, MID = %d", m) + return c.respondError(comm, unix.EOPNOTSUPP) + } + + // Try handling the request. + respPayloadLen, err := c.server.handlers[m](c, comm, payloadLen) + fds := comm.ReleaseFDs() + if err != nil { + closeFDs(fds) + return c.respondError(comm, p9.ExtractErrno(err)) + } + + return m, respPayloadLen, fds +} + +func (c *Connection) close() { + // Wait for completion of all inflight requests. This is mostly so that if + // a request is stuck, the sandbox supervisor has the opportunity to kill + // us with SIGABRT to get a stack dump of the offending handler. + c.reqGate.Close() + + // Shutdown and clean up channels. + c.channelsMu.Lock() + for _, ch := range c.channels { + ch.shutdown() + } + c.activeWg.Wait() + for _, ch := range c.channels { + ch.destroy() + } + // This is to prevent additional channels from being created. + c.channels = nil + c.channelsMu.Unlock() + + // Free the channel memory. + if c.channelAlloc != nil { + c.channelAlloc.Destroy() + } + + // Ensure the connection is closed. + c.sockComm.destroy() + + // Cleanup all FDs. + c.fdsMu.Lock() + for fdid := range c.fds { + fd := c.removeFDLocked(fdid) + fd.DecRef(nil) // Drop the ref held by c. + } + c.fdsMu.Unlock() +} + +// The caller gains a ref on the FD on success. +func (c *Connection) lookupFD(id FDID) (genericFD, error) { + c.fdsMu.RLock() + defer c.fdsMu.RUnlock() + + fd, ok := c.fds[id] + if !ok { + return nil, unix.EBADF + } + fd.IncRef() + return fd, nil +} + +// LookupControlFD retrieves the control FD identified by id on this +// connection. On success, the caller gains a ref on the FD. +func (c *Connection) LookupControlFD(id FDID) (*ControlFD, error) { + fd, err := c.lookupFD(id) + if err != nil { + return nil, err + } + + cfd, ok := fd.(*ControlFD) + if !ok { + fd.DecRef(nil) + return nil, unix.EINVAL + } + return cfd, nil +} + +// LookupOpenFD retrieves the open FD identified by id on this +// connection. On success, the caller gains a ref on the FD. +func (c *Connection) LookupOpenFD(id FDID) (*OpenFD, error) { + fd, err := c.lookupFD(id) + if err != nil { + return nil, err + } + + ofd, ok := fd.(*OpenFD) + if !ok { + fd.DecRef(nil) + return nil, unix.EINVAL + } + return ofd, nil +} + +// insertFD inserts the passed fd into the internal datastructure to track FDs. +// The caller must hold a ref on fd which is transferred to the connection. +func (c *Connection) insertFD(fd genericFD) FDID { + c.fdsMu.Lock() + defer c.fdsMu.Unlock() + + res := c.nextFDID + c.nextFDID++ + if c.nextFDID < res { + panic("ran out of FDIDs") + } + c.fds[res] = fd + return res +} + +// RemoveFD makes c stop tracking the passed FDID and drops its ref on it. +func (c *Connection) RemoveFD(id FDID) { + c.fdsMu.Lock() + fd := c.removeFDLocked(id) + c.fdsMu.Unlock() + if fd != nil { + // Drop the ref held by c. This can take arbitrarily long. So do not hold + // c.fdsMu while calling it. + fd.DecRef(nil) + } +} + +// removeFDLocked makes c stop tracking the passed FDID. Note that the caller +// must drop ref on the returned fd (preferably without holding c.fdsMu). +// +// Precondition: c.fdsMu is locked. +func (c *Connection) removeFDLocked(id FDID) genericFD { + fd := c.fds[id] + if fd == nil { + log.Warningf("removeFDLocked called on non-existent FDID %d", id) + return nil + } + delete(c.fds, id) + return fd +} diff --git a/pkg/lisafs/control_fd_list.go b/pkg/lisafs/control_fd_list.go new file mode 100644 index 000000000..684d9c265 --- /dev/null +++ b/pkg/lisafs/control_fd_list.go @@ -0,0 +1,221 @@ +package lisafs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type controlFDElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (controlFDElementMapper) linkerFor(elem *ControlFD) *ControlFD { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type controlFDList struct { + head *ControlFD + tail *ControlFD +} + +// Reset resets list l to the empty state. +func (l *controlFDList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *controlFDList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *controlFDList) Front() *ControlFD { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *controlFDList) Back() *ControlFD { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *controlFDList) Len() (count int) { + for e := l.Front(); e != nil; e = (controlFDElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *controlFDList) PushFront(e *ControlFD) { + linker := controlFDElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + controlFDElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *controlFDList) PushBack(e *ControlFD) { + linker := controlFDElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + controlFDElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *controlFDList) PushBackList(m *controlFDList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + controlFDElementMapper{}.linkerFor(l.tail).SetNext(m.head) + controlFDElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *controlFDList) InsertAfter(b, e *ControlFD) { + bLinker := controlFDElementMapper{}.linkerFor(b) + eLinker := controlFDElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + controlFDElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *controlFDList) InsertBefore(a, e *ControlFD) { + aLinker := controlFDElementMapper{}.linkerFor(a) + eLinker := controlFDElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + controlFDElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *controlFDList) Remove(e *ControlFD) { + linker := controlFDElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + controlFDElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + controlFDElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type controlFDEntry struct { + next *ControlFD + prev *ControlFD +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *controlFDEntry) Next() *ControlFD { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *controlFDEntry) Prev() *ControlFD { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *controlFDEntry) SetNext(elem *ControlFD) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *controlFDEntry) SetPrev(elem *ControlFD) { + e.prev = elem +} diff --git a/pkg/lisafs/control_fd_refs.go b/pkg/lisafs/control_fd_refs.go new file mode 100644 index 000000000..cc24833f2 --- /dev/null +++ b/pkg/lisafs/control_fd_refs.go @@ -0,0 +1,140 @@ +package lisafs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const controlFDenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var controlFDobj *ControlFD + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type controlFDRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *controlFDRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *controlFDRefs) RefType() string { + return fmt.Sprintf("%T", controlFDobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *controlFDRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *controlFDRefs) LogRefs() bool { + return controlFDenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *controlFDRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *controlFDRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if controlFDenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *controlFDRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if controlFDenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *controlFDRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if controlFDenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *controlFDRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/lisafs/fd.go b/pkg/lisafs/fd.go new file mode 100644 index 000000000..9dd8ba384 --- /dev/null +++ b/pkg/lisafs/fd.go @@ -0,0 +1,348 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import ( + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/refsvfs2" + "gvisor.dev/gvisor/pkg/sync" +) + +// FDID (file descriptor identifier) is used to identify FDs on a connection. +// Each connection has its own FDID namespace. +// +// +marshal slice:FDIDSlice +type FDID uint32 + +// InvalidFDID represents an invalid FDID. +const InvalidFDID FDID = 0 + +// Ok returns true if f is a valid FDID. +func (f FDID) Ok() bool { + return f != InvalidFDID +} + +// genericFD can represent a ControlFD or OpenFD. +type genericFD interface { + refsvfs2.RefCounter +} + +// A ControlFD is the gateway to the backing filesystem tree node. It is an +// unusual concept. This exists to provide a safe way to do path-based +// operations on the file. It performs operations that can modify the +// filesystem tree and synchronizes these operations. See ControlFDImpl for +// supported operations. +// +// It is not an inode, because multiple control FDs are allowed to exist on the +// same file. It is not a file descriptor because it is not tied to any access +// mode, i.e. a control FD can change its access mode based on the operation +// being performed. +// +// Reference Model: +// * When a control FD is created, the connection takes a ref on it which +// represents the client's ref on the FD. +// * The client can drop its ref via the Close RPC which will in turn make the +// connection drop its ref. +// * Each control FD holds a ref on its parent for its entire life time. +type ControlFD struct { + controlFDRefs + controlFDEntry + + // parent is the parent directory FD containing the file this FD represents. + // A ControlFD holds a ref on parent for its entire lifetime. If this FD + // represents the root, then parent is nil. parent may be a control FD from + // another connection (another mount point). parent is protected by the + // backing server's rename mutex. + parent *ControlFD + + // name is the file path's last component name. If this FD represents the + // root directory, then name is the mount path. name is protected by the + // backing server's rename mutex. + name string + + // children is a linked list of all children control FDs. As per reference + // model, all children hold a ref on this FD. + // children is protected by childrenMu and server's rename mutex. To have + // mutual exclusion, it is sufficient to: + // * Hold rename mutex for reading and lock childrenMu. OR + // * Or hold rename mutex for writing. + childrenMu sync.Mutex + children controlFDList + + // openFDs is a linked list of all FDs opened on this FD. As per reference + // model, all open FDs hold a ref on this FD. + openFDsMu sync.RWMutex + openFDs openFDList + + // All the following fields are immutable. + + // id is the unique FD identifier which identifies this FD on its connection. + id FDID + + // conn is the backing connection owning this FD. + conn *Connection + + // ftype is the file type of the backing inode. ftype.FileType() == ftype. + ftype linux.FileMode + + // impl is the control FD implementation which embeds this struct. It + // contains all the implementation specific details. + impl ControlFDImpl +} + +var _ genericFD = (*ControlFD)(nil) + +// DecRef implements refsvfs2.RefCounter.DecRef. Note that the context +// parameter should never be used. It exists solely to comply with the +// refsvfs2.RefCounter interface. +func (fd *ControlFD) DecRef(context.Context) { + fd.controlFDRefs.DecRef(func() { + if fd.parent != nil { + fd.conn.server.RenameMu.RLock() + fd.parent.childrenMu.Lock() + fd.parent.children.Remove(fd) + fd.parent.childrenMu.Unlock() + fd.conn.server.RenameMu.RUnlock() + fd.parent.DecRef(nil) // Drop the ref on the parent. + } + fd.impl.Close(fd.conn) + }) +} + +// DecRefLocked is the same as DecRef except the added precondition. +// +// Precondition: server's rename mutex must be at least read locked. +func (fd *ControlFD) DecRefLocked() { + fd.controlFDRefs.DecRef(func() { + fd.clearParentLocked() + fd.impl.Close(fd.conn) + }) +} + +// Precondition: server's rename mutex must be at least read locked. +func (fd *ControlFD) clearParentLocked() { + if fd.parent == nil { + return + } + fd.parent.childrenMu.Lock() + fd.parent.children.Remove(fd) + fd.parent.childrenMu.Unlock() + fd.parent.DecRefLocked() // Drop the ref on the parent. +} + +// Init must be called before first use of fd. It inserts fd into the +// filesystem tree. +// +// Precondition: server's rename mutex must be at least read locked. +func (fd *ControlFD) Init(c *Connection, parent *ControlFD, name string, mode linux.FileMode, impl ControlFDImpl) { + // Initialize fd with 1 ref which is transferred to c via c.insertFD(). + fd.controlFDRefs.InitRefs() + fd.conn = c + fd.id = c.insertFD(fd) + fd.name = name + fd.ftype = mode.FileType() + fd.impl = impl + fd.setParentLocked(parent) +} + +// Precondition: server's rename mutex must be at least read locked. +func (fd *ControlFD) setParentLocked(parent *ControlFD) { + fd.parent = parent + if parent != nil { + parent.IncRef() // Hold a ref on parent. + parent.childrenMu.Lock() + parent.children.PushBack(fd) + parent.childrenMu.Unlock() + } +} + +// FileType returns the file mode only containing the file type bits. +func (fd *ControlFD) FileType() linux.FileMode { + return fd.ftype +} + +// IsDir indicates whether fd represents a directory. +func (fd *ControlFD) IsDir() bool { + return fd.ftype == unix.S_IFDIR +} + +// IsRegular indicates whether fd represents a regular file. +func (fd *ControlFD) IsRegular() bool { + return fd.ftype == unix.S_IFREG +} + +// IsSymlink indicates whether fd represents a symbolic link. +func (fd *ControlFD) IsSymlink() bool { + return fd.ftype == unix.S_IFLNK +} + +// IsSocket indicates whether fd represents a socket. +func (fd *ControlFD) IsSocket() bool { + return fd.ftype == unix.S_IFSOCK +} + +// NameLocked returns the backing file's last component name. +// +// Precondition: server's rename mutex must be at least read locked. +func (fd *ControlFD) NameLocked() string { + return fd.name +} + +// ParentLocked returns the parent control FD. Note that parent might be a +// control FD from another connection on this server. So its ID must not +// returned on this connection because FDIDs are local to their connection. +// +// Precondition: server's rename mutex must be at least read locked. +func (fd *ControlFD) ParentLocked() ControlFDImpl { + if fd.parent == nil { + return nil + } + return fd.parent.impl +} + +// ID returns fd's ID. +func (fd *ControlFD) ID() FDID { + return fd.id +} + +// FilePath returns the absolute path of the file fd was opened on. This is +// expensive and must not be called on hot paths. FilePath acquires the rename +// mutex for reading so callers should not be holding it. +func (fd *ControlFD) FilePath() string { + // Lock the rename mutex for reading to ensure that the filesystem tree is not + // changed while we traverse it upwards. + fd.conn.server.RenameMu.RLock() + defer fd.conn.server.RenameMu.RUnlock() + return fd.FilePathLocked() +} + +// FilePathLocked is the same as FilePath with the additonal precondition. +// +// Precondition: server's rename mutex must be at least read locked. +func (fd *ControlFD) FilePathLocked() string { + // Walk upwards and prepend name to res. + var res fspath.Builder + for fd != nil { + res.PrependComponent(fd.name) + fd = fd.parent + } + return res.String() +} + +// ForEachOpenFD executes fn on each FD opened on fd. +func (fd *ControlFD) ForEachOpenFD(fn func(ofd OpenFDImpl)) { + fd.openFDsMu.RLock() + defer fd.openFDsMu.RUnlock() + for ofd := fd.openFDs.Front(); ofd != nil; ofd = ofd.Next() { + fn(ofd.impl) + } +} + +// OpenFD represents an open file descriptor on the protocol. It resonates +// closely with a Linux file descriptor. Its operations are limited to the +// file. Its operations are not allowed to modify or traverse the filesystem +// tree. See OpenFDImpl for the supported operations. +// +// Reference Model: +// * An OpenFD takes a reference on the control FD it was opened on. +type OpenFD struct { + openFDRefs + openFDEntry + + // All the following fields are immutable. + + // controlFD is the ControlFD on which this FD was opened. OpenFD holds a ref + // on controlFD for its entire lifetime. + controlFD *ControlFD + + // id is the unique FD identifier which identifies this FD on its connection. + id FDID + + // Access mode for this FD. + readable bool + writable bool + + // impl is the open FD implementation which embeds this struct. It + // contains all the implementation specific details. + impl OpenFDImpl +} + +var _ genericFD = (*OpenFD)(nil) + +// ID returns fd's ID. +func (fd *OpenFD) ID() FDID { + return fd.id +} + +// ControlFD returns the control FD on which this FD was opened. +func (fd *OpenFD) ControlFD() ControlFDImpl { + return fd.controlFD.impl +} + +// DecRef implements refsvfs2.RefCounter.DecRef. Note that the context +// parameter should never be used. It exists solely to comply with the +// refsvfs2.RefCounter interface. +func (fd *OpenFD) DecRef(context.Context) { + fd.openFDRefs.DecRef(func() { + fd.controlFD.openFDsMu.Lock() + fd.controlFD.openFDs.Remove(fd) + fd.controlFD.openFDsMu.Unlock() + fd.controlFD.DecRef(nil) // Drop the ref on the control FD. + fd.impl.Close(fd.controlFD.conn) + }) +} + +// Init must be called before first use of fd. +func (fd *OpenFD) Init(cfd *ControlFD, flags uint32, impl OpenFDImpl) { + // Initialize fd with 1 ref which is transferred to c via c.insertFD(). + fd.openFDRefs.InitRefs() + fd.controlFD = cfd + fd.id = cfd.conn.insertFD(fd) + accessMode := flags & unix.O_ACCMODE + fd.readable = accessMode == unix.O_RDONLY || accessMode == unix.O_RDWR + fd.writable = accessMode == unix.O_WRONLY || accessMode == unix.O_RDWR + fd.impl = impl + cfd.IncRef() // Holds a ref on cfd for its lifetime. + cfd.openFDsMu.Lock() + cfd.openFDs.PushBack(fd) + cfd.openFDsMu.Unlock() +} + +// ControlFDImpl contains implementation details for a ControlFD. +// Implementations of ControlFDImpl should contain their associated ControlFD +// by value as their first field. +// +// The operations that perform path traversal or any modification to the +// filesystem tree must synchronize those modifications with the server's +// rename mutex. +type ControlFDImpl interface { + FD() *ControlFD + Close(c *Connection) +} + +// OpenFDImpl contains implementation details for a OpenFD. Implementations of +// OpenFDImpl should contain their associated OpenFD by value as their first +// field. +// +// Since these operations do not perform any path traversal or any modification +// to the filesystem tree, there is no need to synchronize with rename +// operations. +type OpenFDImpl interface { + FD() *OpenFD + Close(c *Connection) +} diff --git a/pkg/lisafs/handlers.go b/pkg/lisafs/handlers.go new file mode 100644 index 000000000..9b8d8164a --- /dev/null +++ b/pkg/lisafs/handlers.go @@ -0,0 +1,124 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import ( + "path" + "path/filepath" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/flipcall" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/marshal/primitive" +) + +// RPCHandler defines a handler that is invoked when the associated message is +// received. The handler is responsible for: +// +// * Unmarshalling the request from the passed payload and interpreting it. +// * Marshalling the response into the communicator's payload buffer. +// * Return the number of payload bytes written. +// * Donate any FDs (if needed) to comm which will in turn donate it to client. +type RPCHandler func(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) + +var handlers = [...]RPCHandler{ + Error: ErrorHandler, + Mount: MountHandler, + Channel: ChannelHandler, +} + +// ErrorHandler handles Error message. +func ErrorHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) { + // Client should never send Error. + return 0, unix.EINVAL +} + +// MountHandler handles the Mount RPC. Note that there can not be concurrent +// executions of MountHandler on a connection because the connection enforces +// that Mount is the first message on the connection. Only after the connection +// has been successfully mounted can other channels be created. +func MountHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) { + var req MountReq + req.UnmarshalBytes(comm.PayloadBuf(payloadLen)) + + mountPath := path.Clean(string(req.MountPath)) + if !filepath.IsAbs(mountPath) { + log.Warningf("mountPath %q is not absolute", mountPath) + return 0, unix.EINVAL + } + + if c.mounted { + log.Warningf("connection has already been mounted at %q", mountPath) + return 0, unix.EBUSY + } + + rootFD, rootIno, err := c.ServerImpl().Mount(c, mountPath) + if err != nil { + return 0, err + } + + c.server.addMountPoint(rootFD.FD()) + c.mounted = true + resp := MountResp{ + Root: rootIno, + SupportedMs: c.ServerImpl().SupportedMessages(), + MaxMessageSize: primitive.Uint32(c.ServerImpl().MaxMessageSize()), + } + respPayloadLen := uint32(resp.SizeBytes()) + resp.MarshalBytes(comm.PayloadBuf(respPayloadLen)) + return respPayloadLen, nil +} + +// ChannelHandler handles the Channel RPC. +func ChannelHandler(c *Connection, comm Communicator, payloadLen uint32) (uint32, error) { + ch, desc, fdSock, err := c.createChannel(c.ServerImpl().MaxMessageSize()) + if err != nil { + return 0, err + } + + // Start servicing the channel in a separate goroutine. + c.activeWg.Add(1) + go func() { + if err := c.service(ch); err != nil { + // Don't log shutdown error which is expected during server shutdown. + if _, ok := err.(flipcall.ShutdownError); !ok { + log.Warningf("lisafs.Connection.service(channel = @%p): %v", ch, err) + } + } + c.activeWg.Done() + }() + + clientDataFD, err := unix.Dup(desc.FD) + if err != nil { + unix.Close(fdSock) + ch.shutdown() + return 0, err + } + + // Respond to client with successful channel creation message. + if err := comm.DonateFD(clientDataFD); err != nil { + return 0, err + } + if err := comm.DonateFD(fdSock); err != nil { + return 0, err + } + resp := ChannelResp{ + dataOffset: desc.Offset, + dataLength: uint64(desc.Length), + } + respLen := uint32(resp.SizeBytes()) + resp.MarshalUnsafe(comm.PayloadBuf(respLen)) + return respLen, nil +} diff --git a/pkg/lisafs/lisafs.go b/pkg/lisafs/lisafs.go new file mode 100644 index 000000000..4d8e956ab --- /dev/null +++ b/pkg/lisafs/lisafs.go @@ -0,0 +1,18 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package lisafs (LInux SAndbox FileSystem) defines the protocol for +// filesystem RPCs between an untrusted Sandbox (client) and a trusted +// filesystem server. +package lisafs diff --git a/pkg/lisafs/lisafs_abi_autogen_unsafe.go b/pkg/lisafs/lisafs_abi_autogen_unsafe.go new file mode 100644 index 000000000..ece422578 --- /dev/null +++ b/pkg/lisafs/lisafs_abi_autogen_unsafe.go @@ -0,0 +1,1534 @@ +// Automatically generated marshal implementation. See tools/go_marshal. + +package lisafs + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/gohacks" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal" + "io" + "reflect" + "runtime" + "unsafe" +) + +// Marshallable types used by this file. +var _ marshal.Marshallable = (*ChannelResp)(nil) +var _ marshal.Marshallable = (*ErrorResp)(nil) +var _ marshal.Marshallable = (*FDID)(nil) +var _ marshal.Marshallable = (*GID)(nil) +var _ marshal.Marshallable = (*Inode)(nil) +var _ marshal.Marshallable = (*MID)(nil) +var _ marshal.Marshallable = (*MsgDynamic)(nil) +var _ marshal.Marshallable = (*MsgSimple)(nil) +var _ marshal.Marshallable = (*P9Version)(nil) +var _ marshal.Marshallable = (*UID)(nil) +var _ marshal.Marshallable = (*channelHeader)(nil) +var _ marshal.Marshallable = (*linux.Statx)(nil) +var _ marshal.Marshallable = (*sockHeader)(nil) + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (c *channelHeader) SizeBytes() int { + return 2 + + (*MID)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (c *channelHeader) MarshalBytes(dst []byte) { + c.message.MarshalBytes(dst[:c.message.SizeBytes()]) + dst = dst[c.message.SizeBytes():] + dst[0] = byte(c.numFDs) + dst = dst[1:] + // Padding: dst[:sizeof(uint8)] ~= uint8(0) + dst = dst[1:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (c *channelHeader) UnmarshalBytes(src []byte) { + c.message.UnmarshalBytes(src[:c.message.SizeBytes()]) + src = src[c.message.SizeBytes():] + c.numFDs = uint8(src[0]) + src = src[1:] + // Padding: var _ uint8 ~= src[:sizeof(uint8)] + src = src[1:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (c *channelHeader) Packed() bool { + return c.message.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (c *channelHeader) MarshalUnsafe(dst []byte) { + if c.message.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(c), uintptr(c.SizeBytes())) + } else { + // Type channelHeader doesn't have a packed layout in memory, fallback to MarshalBytes. + c.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (c *channelHeader) UnmarshalUnsafe(src []byte) { + if c.message.Packed() { + gohacks.Memmove(unsafe.Pointer(c), unsafe.Pointer(&src[0]), uintptr(c.SizeBytes())) + } else { + // Type channelHeader doesn't have a packed layout in memory, fallback to UnmarshalBytes. + c.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (c *channelHeader) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !c.message.Packed() { + // Type channelHeader doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(c.SizeBytes()) // escapes: okay. + c.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c))) + hdr.Len = c.SizeBytes() + hdr.Cap = c.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that c + // must live until the use above. + runtime.KeepAlive(c) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (c *channelHeader) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return c.CopyOutN(cc, addr, c.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (c *channelHeader) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !c.message.Packed() { + // Type channelHeader doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(c.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + c.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c))) + hdr.Len = c.SizeBytes() + hdr.Cap = c.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that c + // must live until the use above. + runtime.KeepAlive(c) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (c *channelHeader) WriteTo(writer io.Writer) (int64, error) { + if !c.message.Packed() { + // Type channelHeader doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, c.SizeBytes()) + c.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c))) + hdr.Len = c.SizeBytes() + hdr.Cap = c.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that c + // must live until the use above. + runtime.KeepAlive(c) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (f *FDID) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (f *FDID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(*f)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (f *FDID) UnmarshalBytes(src []byte) { + *f = FDID(uint32(hostarch.ByteOrder.Uint32(src[:4]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (f *FDID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (f *FDID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(f), uintptr(f.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (f *FDID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(f), unsafe.Pointer(&src[0]), uintptr(f.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (f *FDID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f))) + hdr.Len = f.SizeBytes() + hdr.Cap = f.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that f + // must live until the use above. + runtime.KeepAlive(f) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (f *FDID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return f.CopyOutN(cc, addr, f.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (f *FDID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f))) + hdr.Len = f.SizeBytes() + hdr.Cap = f.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that f + // must live until the use above. + runtime.KeepAlive(f) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (f *FDID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(f))) + hdr.Len = f.SizeBytes() + hdr.Cap = f.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that f + // must live until the use above. + runtime.KeepAlive(f) // escapes: replaced by intrinsic. + return int64(length), err +} + +// CopyFDIDSliceIn copies in a slice of FDID objects from the task's memory. +//go:nosplit +func CopyFDIDSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []FDID) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*FDID)(nil).SizeBytes() + + ptr := unsafe.Pointer(&dst) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that dst + // must live until the use above. + runtime.KeepAlive(dst) // escapes: replaced by intrinsic. + return length, err +} + +// CopyFDIDSliceOut copies a slice of FDID objects to the task's memory. +//go:nosplit +func CopyFDIDSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []FDID) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*FDID)(nil).SizeBytes() + + ptr := unsafe.Pointer(&src) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyOutBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that src + // must live until the use above. + runtime.KeepAlive(src) // escapes: replaced by intrinsic. + return length, err +} + +// MarshalUnsafeFDIDSlice is like FDID.MarshalUnsafe, but for a []FDID. +func MarshalUnsafeFDIDSlice(src []FDID, dst []byte) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*FDID)(nil).SizeBytes() + + dst = dst[:size*count] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst))) + return size*count, nil +} + +// UnmarshalUnsafeFDIDSlice is like FDID.UnmarshalUnsafe, but for a []FDID. +func UnmarshalUnsafeFDIDSlice(dst []FDID, src []byte) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*FDID)(nil).SizeBytes() + + src = src[:(size*count)] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src))) + return size*count, nil +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (c *ChannelResp) SizeBytes() int { + return 16 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (c *ChannelResp) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint64(dst[:8], uint64(c.dataOffset)) + dst = dst[8:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(c.dataLength)) + dst = dst[8:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (c *ChannelResp) UnmarshalBytes(src []byte) { + c.dataOffset = int64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] + c.dataLength = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (c *ChannelResp) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (c *ChannelResp) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(c), uintptr(c.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (c *ChannelResp) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(c), unsafe.Pointer(&src[0]), uintptr(c.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (c *ChannelResp) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c))) + hdr.Len = c.SizeBytes() + hdr.Cap = c.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that c + // must live until the use above. + runtime.KeepAlive(c) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (c *ChannelResp) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return c.CopyOutN(cc, addr, c.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (c *ChannelResp) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c))) + hdr.Len = c.SizeBytes() + hdr.Cap = c.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that c + // must live until the use above. + runtime.KeepAlive(c) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (c *ChannelResp) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(c))) + hdr.Len = c.SizeBytes() + hdr.Cap = c.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that c + // must live until the use above. + runtime.KeepAlive(c) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (e *ErrorResp) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (e *ErrorResp) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(e.errno)) + dst = dst[4:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (e *ErrorResp) UnmarshalBytes(src []byte) { + e.errno = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (e *ErrorResp) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (e *ErrorResp) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(e), uintptr(e.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (e *ErrorResp) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(e), unsafe.Pointer(&src[0]), uintptr(e.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (e *ErrorResp) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(e))) + hdr.Len = e.SizeBytes() + hdr.Cap = e.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that e + // must live until the use above. + runtime.KeepAlive(e) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (e *ErrorResp) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return e.CopyOutN(cc, addr, e.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (e *ErrorResp) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(e))) + hdr.Len = e.SizeBytes() + hdr.Cap = e.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that e + // must live until the use above. + runtime.KeepAlive(e) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (e *ErrorResp) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(e))) + hdr.Len = e.SizeBytes() + hdr.Cap = e.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that e + // must live until the use above. + runtime.KeepAlive(e) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (gid *GID) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (gid *GID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(*gid)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (gid *GID) UnmarshalBytes(src []byte) { + *gid = GID(uint32(hostarch.ByteOrder.Uint32(src[:4]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (gid *GID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (gid *GID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(gid), uintptr(gid.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (gid *GID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(gid), unsafe.Pointer(&src[0]), uintptr(gid.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (gid *GID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid))) + hdr.Len = gid.SizeBytes() + hdr.Cap = gid.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that gid + // must live until the use above. + runtime.KeepAlive(gid) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (gid *GID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return gid.CopyOutN(cc, addr, gid.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (gid *GID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid))) + hdr.Len = gid.SizeBytes() + hdr.Cap = gid.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that gid + // must live until the use above. + runtime.KeepAlive(gid) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (gid *GID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(gid))) + hdr.Len = gid.SizeBytes() + hdr.Cap = gid.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that gid + // must live until the use above. + runtime.KeepAlive(gid) // escapes: replaced by intrinsic. + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (i *Inode) SizeBytes() int { + return 4 + + (*FDID)(nil).SizeBytes() + + (*linux.Statx)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (i *Inode) MarshalBytes(dst []byte) { + i.ControlFD.MarshalBytes(dst[:i.ControlFD.SizeBytes()]) + dst = dst[i.ControlFD.SizeBytes():] + // Padding: dst[:sizeof(uint32)] ~= uint32(0) + dst = dst[4:] + i.Stat.MarshalBytes(dst[:i.Stat.SizeBytes()]) + dst = dst[i.Stat.SizeBytes():] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (i *Inode) UnmarshalBytes(src []byte) { + i.ControlFD.UnmarshalBytes(src[:i.ControlFD.SizeBytes()]) + src = src[i.ControlFD.SizeBytes():] + // Padding: var _ uint32 ~= src[:sizeof(uint32)] + src = src[4:] + i.Stat.UnmarshalBytes(src[:i.Stat.SizeBytes()]) + src = src[i.Stat.SizeBytes():] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (i *Inode) Packed() bool { + return i.ControlFD.Packed() && i.Stat.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (i *Inode) MarshalUnsafe(dst []byte) { + if i.ControlFD.Packed() && i.Stat.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(i), uintptr(i.SizeBytes())) + } else { + // Type Inode doesn't have a packed layout in memory, fallback to MarshalBytes. + i.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (i *Inode) UnmarshalUnsafe(src []byte) { + if i.ControlFD.Packed() && i.Stat.Packed() { + gohacks.Memmove(unsafe.Pointer(i), unsafe.Pointer(&src[0]), uintptr(i.SizeBytes())) + } else { + // Type Inode doesn't have a packed layout in memory, fallback to UnmarshalBytes. + i.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (i *Inode) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !i.ControlFD.Packed() && i.Stat.Packed() { + // Type Inode doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. + i.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(i))) + hdr.Len = i.SizeBytes() + hdr.Cap = i.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that i + // must live until the use above. + runtime.KeepAlive(i) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (i *Inode) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return i.CopyOutN(cc, addr, i.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (i *Inode) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !i.ControlFD.Packed() && i.Stat.Packed() { + // Type Inode doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(i.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + i.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(i))) + hdr.Len = i.SizeBytes() + hdr.Cap = i.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that i + // must live until the use above. + runtime.KeepAlive(i) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (i *Inode) WriteTo(writer io.Writer) (int64, error) { + if !i.ControlFD.Packed() && i.Stat.Packed() { + // Type Inode doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, i.SizeBytes()) + i.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(i))) + hdr.Len = i.SizeBytes() + hdr.Cap = i.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that i + // must live until the use above. + runtime.KeepAlive(i) // escapes: replaced by intrinsic. + return int64(length), err +} + +// CopyInodeSliceIn copies in a slice of Inode objects from the task's memory. +func CopyInodeSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []Inode) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*Inode)(nil).SizeBytes() + + if !dst[0].Packed() { + // Type Inode doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(size * count) + length, err := cc.CopyInBytes(addr, buf) + + // Unmarshal as much as possible, even on error. First handle full objects. + limit := length/size + for idx := 0; idx < limit; idx++ { + dst[idx].UnmarshalBytes(buf[size*idx:size*(idx+1)]) + } + + // Handle any final partial object. buf is guaranteed to be long enough for the + // final element, but may not contain valid data for the entire range. This may + // result in unmarshalling zero values for some parts of the object. + if length%size != 0 { + idx := limit + dst[idx].UnmarshalBytes(buf[size*idx:size*(idx+1)]) + } + + return length, err + } + + ptr := unsafe.Pointer(&dst) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyInBytes(addr, buf) + // Since we bypassed the compiler's escape analysis, indicate that dst + // must live until the use above. + runtime.KeepAlive(dst) // escapes: replaced by intrinsic. + return length, err +} + +// CopyInodeSliceOut copies a slice of Inode objects to the task's memory. +func CopyInodeSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []Inode) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*Inode)(nil).SizeBytes() + + if !src[0].Packed() { + // Type Inode doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(size * count) + for idx := 0; idx < count; idx++ { + src[idx].MarshalBytes(buf[size*idx:size*(idx+1)]) + } + return cc.CopyOutBytes(addr, buf) + } + + ptr := unsafe.Pointer(&src) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyOutBytes(addr, buf) + // Since we bypassed the compiler's escape analysis, indicate that src + // must live until the use above. + runtime.KeepAlive(src) // escapes: replaced by intrinsic. + return length, err +} + +// MarshalUnsafeInodeSlice is like Inode.MarshalUnsafe, but for a []Inode. +func MarshalUnsafeInodeSlice(src []Inode, dst []byte) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*Inode)(nil).SizeBytes() + + if !src[0].Packed() { + // Type Inode doesn't have a packed layout in memory, fall back to MarshalBytes. + for idx := 0; idx < count; idx++ { + src[idx].MarshalBytes(dst[size*idx:(size)*(idx+1)]) + } + return size * count, nil + } + + dst = dst[:size*count] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst))) + return size * count, nil +} + +// UnmarshalUnsafeInodeSlice is like Inode.UnmarshalUnsafe, but for a []Inode. +func UnmarshalUnsafeInodeSlice(dst []Inode, src []byte) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*Inode)(nil).SizeBytes() + + if !dst[0].Packed() { + // Type Inode doesn't have a packed layout in memory, fall back to UnmarshalBytes. + for idx := 0; idx < count; idx++ { + dst[idx].UnmarshalBytes(src[size*idx:size*(idx+1)]) + } + return size * count, nil + } + + src = src[:(size*count)] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src))) + return count*size, nil +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (m *MID) SizeBytes() int { + return 2 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (m *MID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint16(dst[:2], uint16(*m)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (m *MID) UnmarshalBytes(src []byte) { + *m = MID(uint16(hostarch.ByteOrder.Uint16(src[:2]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (m *MID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (m *MID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(m), uintptr(m.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (m *MID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(m), unsafe.Pointer(&src[0]), uintptr(m.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (m *MID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (m *MID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return m.CopyOutN(cc, addr, m.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (m *MID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (m *MID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return int64(length), err +} + +// CopyMIDSliceIn copies in a slice of MID objects from the task's memory. +//go:nosplit +func CopyMIDSliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []MID) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*MID)(nil).SizeBytes() + + ptr := unsafe.Pointer(&dst) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that dst + // must live until the use above. + runtime.KeepAlive(dst) // escapes: replaced by intrinsic. + return length, err +} + +// CopyMIDSliceOut copies a slice of MID objects to the task's memory. +//go:nosplit +func CopyMIDSliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []MID) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*MID)(nil).SizeBytes() + + ptr := unsafe.Pointer(&src) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyOutBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that src + // must live until the use above. + runtime.KeepAlive(src) // escapes: replaced by intrinsic. + return length, err +} + +// MarshalUnsafeMIDSlice is like MID.MarshalUnsafe, but for a []MID. +func MarshalUnsafeMIDSlice(src []MID, dst []byte) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*MID)(nil).SizeBytes() + + dst = dst[:size*count] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst))) + return size*count, nil +} + +// UnmarshalUnsafeMIDSlice is like MID.UnmarshalUnsafe, but for a []MID. +func UnmarshalUnsafeMIDSlice(dst []MID, src []byte) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*MID)(nil).SizeBytes() + + src = src[:(size*count)] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src))) + return size*count, nil +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +//go:nosplit +func (uid *UID) SizeBytes() int { + return 4 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (uid *UID) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(*uid)) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (uid *UID) UnmarshalBytes(src []byte) { + *uid = UID(uint32(hostarch.ByteOrder.Uint32(src[:4]))) +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (uid *UID) Packed() bool { + // Scalar newtypes are always packed. + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (uid *UID) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(uid), uintptr(uid.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (uid *UID) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(uid), unsafe.Pointer(&src[0]), uintptr(uid.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (uid *UID) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid))) + hdr.Len = uid.SizeBytes() + hdr.Cap = uid.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that uid + // must live until the use above. + runtime.KeepAlive(uid) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (uid *UID) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return uid.CopyOutN(cc, addr, uid.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (uid *UID) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid))) + hdr.Len = uid.SizeBytes() + hdr.Cap = uid.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that uid + // must live until the use above. + runtime.KeepAlive(uid) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (uid *UID) WriteTo(w io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(uid))) + hdr.Len = uid.SizeBytes() + hdr.Cap = uid.SizeBytes() + + length, err := w.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that uid + // must live until the use above. + runtime.KeepAlive(uid) // escapes: replaced by intrinsic. + return int64(length), err +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (m *MsgDynamic) Packed() bool { + // Type MsgDynamic is dynamic so it might have slice/string headers. Hence, it is not packed. + return false +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (m *MsgDynamic) MarshalUnsafe(dst []byte) { + // Type MsgDynamic doesn't have a packed layout in memory, fallback to MarshalBytes. + m.MarshalBytes(dst) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (m *MsgDynamic) UnmarshalUnsafe(src []byte) { + // Type MsgDynamic doesn't have a packed layout in memory, fallback to UnmarshalBytes. + m.UnmarshalBytes(src) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (m *MsgDynamic) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Type MsgDynamic doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(m.SizeBytes()) // escapes: okay. + m.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (m *MsgDynamic) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return m.CopyOutN(cc, addr, m.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (m *MsgDynamic) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Type MsgDynamic doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(m.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + m.UnmarshalBytes(buf) // escapes: fallback. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (m *MsgDynamic) WriteTo(writer io.Writer) (int64, error) { + // Type MsgDynamic doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, m.SizeBytes()) + m.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (m *MsgSimple) SizeBytes() int { + return 16 +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (m *MsgSimple) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint16(dst[:2], uint16(m.A)) + dst = dst[2:] + hostarch.ByteOrder.PutUint16(dst[:2], uint16(m.B)) + dst = dst[2:] + hostarch.ByteOrder.PutUint32(dst[:4], uint32(m.C)) + dst = dst[4:] + hostarch.ByteOrder.PutUint64(dst[:8], uint64(m.D)) + dst = dst[8:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (m *MsgSimple) UnmarshalBytes(src []byte) { + m.A = uint16(hostarch.ByteOrder.Uint16(src[:2])) + src = src[2:] + m.B = uint16(hostarch.ByteOrder.Uint16(src[:2])) + src = src[2:] + m.C = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + m.D = uint64(hostarch.ByteOrder.Uint64(src[:8])) + src = src[8:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (m *MsgSimple) Packed() bool { + return true +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (m *MsgSimple) MarshalUnsafe(dst []byte) { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(m), uintptr(m.SizeBytes())) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (m *MsgSimple) UnmarshalUnsafe(src []byte) { + gohacks.Memmove(unsafe.Pointer(m), unsafe.Pointer(&src[0]), uintptr(m.SizeBytes())) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (m *MsgSimple) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (m *MsgSimple) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return m.CopyOutN(cc, addr, m.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (m *MsgSimple) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (m *MsgSimple) WriteTo(writer io.Writer) (int64, error) { + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(m))) + hdr.Len = m.SizeBytes() + hdr.Cap = m.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that m + // must live until the use above. + runtime.KeepAlive(m) // escapes: replaced by intrinsic. + return int64(length), err +} + +// CopyMsg1SliceIn copies in a slice of MsgSimple objects from the task's memory. +func CopyMsg1SliceIn(cc marshal.CopyContext, addr hostarch.Addr, dst []MsgSimple) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*MsgSimple)(nil).SizeBytes() + + ptr := unsafe.Pointer(&dst) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyInBytes(addr, buf) + // Since we bypassed the compiler's escape analysis, indicate that dst + // must live until the use above. + runtime.KeepAlive(dst) // escapes: replaced by intrinsic. + return length, err +} + +// CopyMsg1SliceOut copies a slice of MsgSimple objects to the task's memory. +func CopyMsg1SliceOut(cc marshal.CopyContext, addr hostarch.Addr, src []MsgSimple) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*MsgSimple)(nil).SizeBytes() + + ptr := unsafe.Pointer(&src) + val := gohacks.Noescape(unsafe.Pointer((*reflect.SliceHeader)(ptr).Data)) + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(val) + hdr.Len = size * count + hdr.Cap = size * count + + length, err := cc.CopyOutBytes(addr, buf) + // Since we bypassed the compiler's escape analysis, indicate that src + // must live until the use above. + runtime.KeepAlive(src) // escapes: replaced by intrinsic. + return length, err +} + +// MarshalUnsafeMsg1Slice is like MsgSimple.MarshalUnsafe, but for a []MsgSimple. +func MarshalUnsafeMsg1Slice(src []MsgSimple, dst []byte) (int, error) { + count := len(src) + if count == 0 { + return 0, nil + } + size := (*MsgSimple)(nil).SizeBytes() + + dst = dst[:size*count] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(dst))) + return size * count, nil +} + +// UnmarshalUnsafeMsg1Slice is like MsgSimple.UnmarshalUnsafe, but for a []MsgSimple. +func UnmarshalUnsafeMsg1Slice(dst []MsgSimple, src []byte) (int, error) { + count := len(dst) + if count == 0 { + return 0, nil + } + size := (*MsgSimple)(nil).SizeBytes() + + src = src[:(size*count)] + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(&src[0]), uintptr(len(src))) + return count*size, nil +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (v *P9Version) Packed() bool { + // Type P9Version is dynamic so it might have slice/string headers. Hence, it is not packed. + return false +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (v *P9Version) MarshalUnsafe(dst []byte) { + // Type P9Version doesn't have a packed layout in memory, fallback to MarshalBytes. + v.MarshalBytes(dst) +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (v *P9Version) UnmarshalUnsafe(src []byte) { + // Type P9Version doesn't have a packed layout in memory, fallback to UnmarshalBytes. + v.UnmarshalBytes(src) +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (v *P9Version) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + // Type P9Version doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(v.SizeBytes()) // escapes: okay. + v.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (v *P9Version) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return v.CopyOutN(cc, addr, v.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (v *P9Version) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + // Type P9Version doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(v.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + v.UnmarshalBytes(buf) // escapes: fallback. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (v *P9Version) WriteTo(writer io.Writer) (int64, error) { + // Type P9Version doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, v.SizeBytes()) + v.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (s *sockHeader) SizeBytes() int { + return 6 + + (*MID)(nil).SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (s *sockHeader) MarshalBytes(dst []byte) { + hostarch.ByteOrder.PutUint32(dst[:4], uint32(s.payloadLen)) + dst = dst[4:] + s.message.MarshalBytes(dst[:s.message.SizeBytes()]) + dst = dst[s.message.SizeBytes():] + // Padding: dst[:sizeof(uint16)] ~= uint16(0) + dst = dst[2:] +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (s *sockHeader) UnmarshalBytes(src []byte) { + s.payloadLen = uint32(hostarch.ByteOrder.Uint32(src[:4])) + src = src[4:] + s.message.UnmarshalBytes(src[:s.message.SizeBytes()]) + src = src[s.message.SizeBytes():] + // Padding: var _ uint16 ~= src[:sizeof(uint16)] + src = src[2:] +} + +// Packed implements marshal.Marshallable.Packed. +//go:nosplit +func (s *sockHeader) Packed() bool { + return s.message.Packed() +} + +// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe. +func (s *sockHeader) MarshalUnsafe(dst []byte) { + if s.message.Packed() { + gohacks.Memmove(unsafe.Pointer(&dst[0]), unsafe.Pointer(s), uintptr(s.SizeBytes())) + } else { + // Type sockHeader doesn't have a packed layout in memory, fallback to MarshalBytes. + s.MarshalBytes(dst) + } +} + +// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe. +func (s *sockHeader) UnmarshalUnsafe(src []byte) { + if s.message.Packed() { + gohacks.Memmove(unsafe.Pointer(s), unsafe.Pointer(&src[0]), uintptr(s.SizeBytes())) + } else { + // Type sockHeader doesn't have a packed layout in memory, fallback to UnmarshalBytes. + s.UnmarshalBytes(src) + } +} + +// CopyOutN implements marshal.Marshallable.CopyOutN. +//go:nosplit +func (s *sockHeader) CopyOutN(cc marshal.CopyContext, addr hostarch.Addr, limit int) (int, error) { + if !s.message.Packed() { + // Type sockHeader doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. + s.MarshalBytes(buf) // escapes: fallback. + return cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyOutBytes(addr, buf[:limit]) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// CopyOut implements marshal.Marshallable.CopyOut. +//go:nosplit +func (s *sockHeader) CopyOut(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + return s.CopyOutN(cc, addr, s.SizeBytes()) +} + +// CopyIn implements marshal.Marshallable.CopyIn. +//go:nosplit +func (s *sockHeader) CopyIn(cc marshal.CopyContext, addr hostarch.Addr) (int, error) { + if !s.message.Packed() { + // Type sockHeader doesn't have a packed layout in memory, fall back to UnmarshalBytes. + buf := cc.CopyScratchBuffer(s.SizeBytes()) // escapes: okay. + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Unmarshal unconditionally. If we had a short copy-in, this results in a + // partially unmarshalled struct. + s.UnmarshalBytes(buf) // escapes: fallback. + return length, err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := cc.CopyInBytes(addr, buf) // escapes: okay. + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return length, err +} + +// WriteTo implements io.WriterTo.WriteTo. +func (s *sockHeader) WriteTo(writer io.Writer) (int64, error) { + if !s.message.Packed() { + // Type sockHeader doesn't have a packed layout in memory, fall back to MarshalBytes. + buf := make([]byte, s.SizeBytes()) + s.MarshalBytes(buf) + length, err := writer.Write(buf) + return int64(length), err + } + + // Construct a slice backed by dst's underlying memory. + var buf []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + hdr.Data = uintptr(gohacks.Noescape(unsafe.Pointer(s))) + hdr.Len = s.SizeBytes() + hdr.Cap = s.SizeBytes() + + length, err := writer.Write(buf) + // Since we bypassed the compiler's escape analysis, indicate that s + // must live until the use above. + runtime.KeepAlive(s) // escapes: replaced by intrinsic. + return int64(length), err +} + diff --git a/pkg/lisafs/lisafs_state_autogen.go b/pkg/lisafs/lisafs_state_autogen.go new file mode 100644 index 000000000..fc032f947 --- /dev/null +++ b/pkg/lisafs/lisafs_state_autogen.go @@ -0,0 +1,176 @@ +// automatically generated by stateify. + +package lisafs + +import ( + "gvisor.dev/gvisor/pkg/state" +) + +func (l *controlFDList) StateTypeName() string { + return "pkg/lisafs.controlFDList" +} + +func (l *controlFDList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *controlFDList) beforeSave() {} + +// +checklocksignore +func (l *controlFDList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *controlFDList) afterLoad() {} + +// +checklocksignore +func (l *controlFDList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *controlFDEntry) StateTypeName() string { + return "pkg/lisafs.controlFDEntry" +} + +func (e *controlFDEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *controlFDEntry) beforeSave() {} + +// +checklocksignore +func (e *controlFDEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *controlFDEntry) afterLoad() {} + +// +checklocksignore +func (e *controlFDEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (r *controlFDRefs) StateTypeName() string { + return "pkg/lisafs.controlFDRefs" +} + +func (r *controlFDRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *controlFDRefs) beforeSave() {} + +// +checklocksignore +func (r *controlFDRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *controlFDRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func (l *openFDList) StateTypeName() string { + return "pkg/lisafs.openFDList" +} + +func (l *openFDList) StateFields() []string { + return []string{ + "head", + "tail", + } +} + +func (l *openFDList) beforeSave() {} + +// +checklocksignore +func (l *openFDList) StateSave(stateSinkObject state.Sink) { + l.beforeSave() + stateSinkObject.Save(0, &l.head) + stateSinkObject.Save(1, &l.tail) +} + +func (l *openFDList) afterLoad() {} + +// +checklocksignore +func (l *openFDList) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &l.head) + stateSourceObject.Load(1, &l.tail) +} + +func (e *openFDEntry) StateTypeName() string { + return "pkg/lisafs.openFDEntry" +} + +func (e *openFDEntry) StateFields() []string { + return []string{ + "next", + "prev", + } +} + +func (e *openFDEntry) beforeSave() {} + +// +checklocksignore +func (e *openFDEntry) StateSave(stateSinkObject state.Sink) { + e.beforeSave() + stateSinkObject.Save(0, &e.next) + stateSinkObject.Save(1, &e.prev) +} + +func (e *openFDEntry) afterLoad() {} + +// +checklocksignore +func (e *openFDEntry) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &e.next) + stateSourceObject.Load(1, &e.prev) +} + +func (r *openFDRefs) StateTypeName() string { + return "pkg/lisafs.openFDRefs" +} + +func (r *openFDRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *openFDRefs) beforeSave() {} + +// +checklocksignore +func (r *openFDRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +// +checklocksignore +func (r *openFDRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) + stateSourceObject.AfterLoad(r.afterLoad) +} + +func init() { + state.Register((*controlFDList)(nil)) + state.Register((*controlFDEntry)(nil)) + state.Register((*controlFDRefs)(nil)) + state.Register((*openFDList)(nil)) + state.Register((*openFDEntry)(nil)) + state.Register((*openFDRefs)(nil)) +} diff --git a/pkg/lisafs/message.go b/pkg/lisafs/message.go new file mode 100644 index 000000000..55fd2c0b1 --- /dev/null +++ b/pkg/lisafs/message.go @@ -0,0 +1,258 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import ( + "math" + "os" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/marshal/primitive" +) + +// Messages have two parts: +// * A transport header used to decipher received messages. +// * A byte array referred to as "payload" which contains the actual message. +// +// "dataLen" refers to the size of both combined. + +// MID (message ID) is used to identify messages to parse from payload. +// +// +marshal slice:MIDSlice +type MID uint16 + +// These constants are used to identify their corresponding message types. +const ( + // Error is only used in responses to pass errors to client. + Error MID = 0 + + // Mount is used to establish connection between the client and server mount + // point. lisafs requires that the client makes a successful Mount RPC before + // making other RPCs. + Mount MID = 1 + + // Channel requests to start a new communicational channel. + Channel MID = 2 +) + +const ( + // NoUID is a sentinel used to indicate no valid UID. + NoUID UID = math.MaxUint32 + + // NoGID is a sentinel used to indicate no valid GID. + NoGID GID = math.MaxUint32 +) + +// MaxMessageSize is the recommended max message size that can be used by +// connections. Server implementations may choose to use other values. +func MaxMessageSize() uint32 { + // Return HugePageSize - PageSize so that when flipcall packet window is + // created with MaxMessageSize() + flipcall header size + channel header + // size, HugePageSize is allocated and can be backed by a single huge page + // if supported by the underlying memfd. + return uint32(hostarch.HugePageSize - os.Getpagesize()) +} + +// TODO(gvisor.dev/issue/6450): Once this is resolved: +// * Update manual implementations and function signatures. +// * Update RPC handlers and appropriate callers to handle errors correctly. +// * Update manual implementations to get rid of buffer shifting. + +// UID represents a user ID. +// +// +marshal +type UID uint32 + +// Ok returns true if uid is not NoUID. +func (uid UID) Ok() bool { + return uid != NoUID +} + +// GID represents a group ID. +// +// +marshal +type GID uint32 + +// Ok returns true if gid is not NoGID. +func (gid GID) Ok() bool { + return gid != NoGID +} + +// NoopMarshal is a noop implementation of marshal.Marshallable.MarshalBytes. +func NoopMarshal([]byte) {} + +// NoopUnmarshal is a noop implementation of marshal.Marshallable.UnmarshalBytes. +func NoopUnmarshal([]byte) {} + +// SizedString represents a string in memory. The marshalled string bytes are +// preceded by a uint32 signifying the string length. +type SizedString string + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (s *SizedString) SizeBytes() int { + return (*primitive.Uint32)(nil).SizeBytes() + len(*s) +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (s *SizedString) MarshalBytes(dst []byte) { + strLen := primitive.Uint32(len(*s)) + strLen.MarshalUnsafe(dst) + dst = dst[strLen.SizeBytes():] + // Copy without any allocation. + copy(dst[:strLen], *s) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (s *SizedString) UnmarshalBytes(src []byte) { + var strLen primitive.Uint32 + strLen.UnmarshalUnsafe(src) + src = src[strLen.SizeBytes():] + // Take the hit, this leads to an allocation + memcpy. No way around it. + *s = SizedString(src[:strLen]) +} + +// StringArray represents an array of SizedStrings in memory. The marshalled +// array data is preceded by a uint32 signifying the array length. +type StringArray []string + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (s *StringArray) SizeBytes() int { + size := (*primitive.Uint32)(nil).SizeBytes() + for _, str := range *s { + sstr := SizedString(str) + size += sstr.SizeBytes() + } + return size +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (s *StringArray) MarshalBytes(dst []byte) { + arrLen := primitive.Uint32(len(*s)) + arrLen.MarshalUnsafe(dst) + dst = dst[arrLen.SizeBytes():] + for _, str := range *s { + sstr := SizedString(str) + sstr.MarshalBytes(dst) + dst = dst[sstr.SizeBytes():] + } +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (s *StringArray) UnmarshalBytes(src []byte) { + var arrLen primitive.Uint32 + arrLen.UnmarshalUnsafe(src) + src = src[arrLen.SizeBytes():] + + if cap(*s) < int(arrLen) { + *s = make([]string, arrLen) + } else { + *s = (*s)[:arrLen] + } + + for i := primitive.Uint32(0); i < arrLen; i++ { + var sstr SizedString + sstr.UnmarshalBytes(src) + src = src[sstr.SizeBytes():] + (*s)[i] = string(sstr) + } +} + +// Inode represents an inode on the remote filesystem. +// +// +marshal slice:InodeSlice +type Inode struct { + ControlFD FDID + _ uint32 // Need to make struct packed. + Stat linux.Statx +} + +// MountReq represents a Mount request. +type MountReq struct { + MountPath SizedString +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (m *MountReq) SizeBytes() int { + return m.MountPath.SizeBytes() +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (m *MountReq) MarshalBytes(dst []byte) { + m.MountPath.MarshalBytes(dst) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (m *MountReq) UnmarshalBytes(src []byte) { + m.MountPath.UnmarshalBytes(src) +} + +// MountResp represents a Mount response. +type MountResp struct { + Root Inode + // MaxMessageSize is the maximum size of messages communicated between the + // client and server in bytes. This includes the communication header. + MaxMessageSize primitive.Uint32 + // SupportedMs holds all the supported messages. + SupportedMs []MID +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (m *MountResp) SizeBytes() int { + return m.Root.SizeBytes() + + m.MaxMessageSize.SizeBytes() + + (*primitive.Uint16)(nil).SizeBytes() + + (len(m.SupportedMs) * (*MID)(nil).SizeBytes()) +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (m *MountResp) MarshalBytes(dst []byte) { + m.Root.MarshalUnsafe(dst) + dst = dst[m.Root.SizeBytes():] + m.MaxMessageSize.MarshalUnsafe(dst) + dst = dst[m.MaxMessageSize.SizeBytes():] + numSupported := primitive.Uint16(len(m.SupportedMs)) + numSupported.MarshalBytes(dst) + dst = dst[numSupported.SizeBytes():] + MarshalUnsafeMIDSlice(m.SupportedMs, dst) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (m *MountResp) UnmarshalBytes(src []byte) { + m.Root.UnmarshalUnsafe(src) + src = src[m.Root.SizeBytes():] + m.MaxMessageSize.UnmarshalUnsafe(src) + src = src[m.MaxMessageSize.SizeBytes():] + var numSupported primitive.Uint16 + numSupported.UnmarshalBytes(src) + src = src[numSupported.SizeBytes():] + m.SupportedMs = make([]MID, numSupported) + UnmarshalUnsafeMIDSlice(m.SupportedMs, src) +} + +// ChannelResp is the response to the create channel request. +// +// +marshal +type ChannelResp struct { + dataOffset int64 + dataLength uint64 +} + +// ErrorResp is returned to represent an error while handling a request. +// +// +marshal +type ErrorResp struct { + errno uint32 +} diff --git a/pkg/lisafs/open_fd_list.go b/pkg/lisafs/open_fd_list.go new file mode 100644 index 000000000..9a8b1e30b --- /dev/null +++ b/pkg/lisafs/open_fd_list.go @@ -0,0 +1,221 @@ +package lisafs + +// ElementMapper provides an identity mapping by default. +// +// This can be replaced to provide a struct that maps elements to linker +// objects, if they are not the same. An ElementMapper is not typically +// required if: Linker is left as is, Element is left as is, or Linker and +// Element are the same type. +type openFDElementMapper struct{} + +// linkerFor maps an Element to a Linker. +// +// This default implementation should be inlined. +// +//go:nosplit +func (openFDElementMapper) linkerFor(elem *OpenFD) *OpenFD { return elem } + +// List is an intrusive list. Entries can be added to or removed from the list +// in O(1) time and with no additional memory allocations. +// +// The zero value for List is an empty list ready to use. +// +// To iterate over a list (where l is a List): +// for e := l.Front(); e != nil; e = e.Next() { +// // do something with e. +// } +// +// +stateify savable +type openFDList struct { + head *OpenFD + tail *OpenFD +} + +// Reset resets list l to the empty state. +func (l *openFDList) Reset() { + l.head = nil + l.tail = nil +} + +// Empty returns true iff the list is empty. +// +//go:nosplit +func (l *openFDList) Empty() bool { + return l.head == nil +} + +// Front returns the first element of list l or nil. +// +//go:nosplit +func (l *openFDList) Front() *OpenFD { + return l.head +} + +// Back returns the last element of list l or nil. +// +//go:nosplit +func (l *openFDList) Back() *OpenFD { + return l.tail +} + +// Len returns the number of elements in the list. +// +// NOTE: This is an O(n) operation. +// +//go:nosplit +func (l *openFDList) Len() (count int) { + for e := l.Front(); e != nil; e = (openFDElementMapper{}.linkerFor(e)).Next() { + count++ + } + return count +} + +// PushFront inserts the element e at the front of list l. +// +//go:nosplit +func (l *openFDList) PushFront(e *OpenFD) { + linker := openFDElementMapper{}.linkerFor(e) + linker.SetNext(l.head) + linker.SetPrev(nil) + if l.head != nil { + openFDElementMapper{}.linkerFor(l.head).SetPrev(e) + } else { + l.tail = e + } + + l.head = e +} + +// PushBack inserts the element e at the back of list l. +// +//go:nosplit +func (l *openFDList) PushBack(e *OpenFD) { + linker := openFDElementMapper{}.linkerFor(e) + linker.SetNext(nil) + linker.SetPrev(l.tail) + if l.tail != nil { + openFDElementMapper{}.linkerFor(l.tail).SetNext(e) + } else { + l.head = e + } + + l.tail = e +} + +// PushBackList inserts list m at the end of list l, emptying m. +// +//go:nosplit +func (l *openFDList) PushBackList(m *openFDList) { + if l.head == nil { + l.head = m.head + l.tail = m.tail + } else if m.head != nil { + openFDElementMapper{}.linkerFor(l.tail).SetNext(m.head) + openFDElementMapper{}.linkerFor(m.head).SetPrev(l.tail) + + l.tail = m.tail + } + m.head = nil + m.tail = nil +} + +// InsertAfter inserts e after b. +// +//go:nosplit +func (l *openFDList) InsertAfter(b, e *OpenFD) { + bLinker := openFDElementMapper{}.linkerFor(b) + eLinker := openFDElementMapper{}.linkerFor(e) + + a := bLinker.Next() + + eLinker.SetNext(a) + eLinker.SetPrev(b) + bLinker.SetNext(e) + + if a != nil { + openFDElementMapper{}.linkerFor(a).SetPrev(e) + } else { + l.tail = e + } +} + +// InsertBefore inserts e before a. +// +//go:nosplit +func (l *openFDList) InsertBefore(a, e *OpenFD) { + aLinker := openFDElementMapper{}.linkerFor(a) + eLinker := openFDElementMapper{}.linkerFor(e) + + b := aLinker.Prev() + eLinker.SetNext(a) + eLinker.SetPrev(b) + aLinker.SetPrev(e) + + if b != nil { + openFDElementMapper{}.linkerFor(b).SetNext(e) + } else { + l.head = e + } +} + +// Remove removes e from l. +// +//go:nosplit +func (l *openFDList) Remove(e *OpenFD) { + linker := openFDElementMapper{}.linkerFor(e) + prev := linker.Prev() + next := linker.Next() + + if prev != nil { + openFDElementMapper{}.linkerFor(prev).SetNext(next) + } else if l.head == e { + l.head = next + } + + if next != nil { + openFDElementMapper{}.linkerFor(next).SetPrev(prev) + } else if l.tail == e { + l.tail = prev + } + + linker.SetNext(nil) + linker.SetPrev(nil) +} + +// Entry is a default implementation of Linker. Users can add anonymous fields +// of this type to their structs to make them automatically implement the +// methods needed by List. +// +// +stateify savable +type openFDEntry struct { + next *OpenFD + prev *OpenFD +} + +// Next returns the entry that follows e in the list. +// +//go:nosplit +func (e *openFDEntry) Next() *OpenFD { + return e.next +} + +// Prev returns the entry that precedes e in the list. +// +//go:nosplit +func (e *openFDEntry) Prev() *OpenFD { + return e.prev +} + +// SetNext assigns 'entry' as the entry that follows e in the list. +// +//go:nosplit +func (e *openFDEntry) SetNext(elem *OpenFD) { + e.next = elem +} + +// SetPrev assigns 'entry' as the entry that precedes e in the list. +// +//go:nosplit +func (e *openFDEntry) SetPrev(elem *OpenFD) { + e.prev = elem +} diff --git a/pkg/lisafs/open_fd_refs.go b/pkg/lisafs/open_fd_refs.go new file mode 100644 index 000000000..f1a99f335 --- /dev/null +++ b/pkg/lisafs/open_fd_refs.go @@ -0,0 +1,140 @@ +package lisafs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/refsvfs2" +) + +// enableLogging indicates whether reference-related events should be logged (with +// stack traces). This is false by default and should only be set to true for +// debugging purposes, as it can generate an extremely large amount of output +// and drastically degrade performance. +const openFDenableLogging = false + +// obj is used to customize logging. Note that we use a pointer to T so that +// we do not copy the entire object when passed as a format parameter. +var openFDobj *OpenFD + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// NOTE: Do not introduce additional fields to the Refs struct. It is used by +// many filesystem objects, and we want to keep it as small as possible (i.e., +// the same size as using an int64 directly) to avoid taking up extra cache +// space. In general, this template should not be extended at the cost of +// performance. If it does not offer enough flexibility for a particular object +// (example: b/187877947), we should implement the RefCounter/CheckedObject +// interfaces manually. +// +// +stateify savable +type openFDRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +// InitRefs initializes r with one reference and, if enabled, activates leak +// checking. +func (r *openFDRefs) InitRefs() { + atomic.StoreInt64(&r.refCount, 1) + refsvfs2.Register(r) +} + +// RefType implements refsvfs2.CheckedObject.RefType. +func (r *openFDRefs) RefType() string { + return fmt.Sprintf("%T", openFDobj)[1:] +} + +// LeakMessage implements refsvfs2.CheckedObject.LeakMessage. +func (r *openFDRefs) LeakMessage() string { + return fmt.Sprintf("[%s %p] reference count of %d instead of 0", r.RefType(), r, r.ReadRefs()) +} + +// LogRefs implements refsvfs2.CheckedObject.LogRefs. +func (r *openFDRefs) LogRefs() bool { + return openFDenableLogging +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *openFDRefs) ReadRefs() int64 { + return atomic.LoadInt64(&r.refCount) +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *openFDRefs) IncRef() { + v := atomic.AddInt64(&r.refCount, 1) + if openFDenableLogging { + refsvfs2.LogIncRef(r, v) + } + if v <= 1 { + panic(fmt.Sprintf("Incrementing non-positive count %p on %s", r, r.RefType())) + } +} + +// TryIncRef implements refs.TryRefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *openFDRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + if v := atomic.AddInt64(&r.refCount, speculativeRef); int32(v) == 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + v := atomic.AddInt64(&r.refCount, -speculativeRef+1) + if openFDenableLogging { + refsvfs2.LogTryIncRef(r, v) + } + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *openFDRefs) DecRef(destroy func()) { + v := atomic.AddInt64(&r.refCount, -1) + if openFDenableLogging { + refsvfs2.LogDecRef(r, v) + } + switch { + case v < 0: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %s", r, r.RefType())) + + case v == 0: + refsvfs2.Unregister(r) + + if destroy != nil { + destroy() + } + } +} + +func (r *openFDRefs) afterLoad() { + if r.ReadRefs() > 0 { + refsvfs2.Register(r) + } +} diff --git a/pkg/lisafs/sample_message.go b/pkg/lisafs/sample_message.go new file mode 100644 index 000000000..3868dfa08 --- /dev/null +++ b/pkg/lisafs/sample_message.go @@ -0,0 +1,110 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import ( + "math/rand" + + "gvisor.dev/gvisor/pkg/marshal/primitive" +) + +// MsgSimple is a sample packed struct which can be used to test message passing. +// +// +marshal slice:Msg1Slice +type MsgSimple struct { + A uint16 + B uint16 + C uint32 + D uint64 +} + +// Randomize randomizes the contents of m. +func (m *MsgSimple) Randomize() { + m.A = uint16(rand.Uint32()) + m.B = uint16(rand.Uint32()) + m.C = rand.Uint32() + m.D = rand.Uint64() +} + +// MsgDynamic is a sample dynamic struct which can be used to test message passing. +// +// +marshal dynamic +type MsgDynamic struct { + N primitive.Uint32 + Arr []MsgSimple +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (m *MsgDynamic) SizeBytes() int { + return m.N.SizeBytes() + + (int(m.N) * (*MsgSimple)(nil).SizeBytes()) +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (m *MsgDynamic) MarshalBytes(dst []byte) { + m.N.MarshalUnsafe(dst) + dst = dst[m.N.SizeBytes():] + MarshalUnsafeMsg1Slice(m.Arr, dst) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (m *MsgDynamic) UnmarshalBytes(src []byte) { + m.N.UnmarshalUnsafe(src) + src = src[m.N.SizeBytes():] + m.Arr = make([]MsgSimple, m.N) + UnmarshalUnsafeMsg1Slice(m.Arr, src) +} + +// Randomize randomizes the contents of m. +func (m *MsgDynamic) Randomize(arrLen int) { + m.N = primitive.Uint32(arrLen) + m.Arr = make([]MsgSimple, arrLen) + for i := 0; i < arrLen; i++ { + m.Arr[i].Randomize() + } +} + +// P9Version mimics p9.TVersion and p9.Rversion. +// +// +marshal dynamic +type P9Version struct { + MSize primitive.Uint32 + Version string +} + +// SizeBytes implements marshal.Marshallable.SizeBytes. +func (v *P9Version) SizeBytes() int { + return (*primitive.Uint32)(nil).SizeBytes() + (*primitive.Uint16)(nil).SizeBytes() + len(v.Version) +} + +// MarshalBytes implements marshal.Marshallable.MarshalBytes. +func (v *P9Version) MarshalBytes(dst []byte) { + v.MSize.MarshalUnsafe(dst) + dst = dst[v.MSize.SizeBytes():] + versionLen := primitive.Uint16(len(v.Version)) + versionLen.MarshalUnsafe(dst) + dst = dst[versionLen.SizeBytes():] + copy(dst, v.Version) +} + +// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. +func (v *P9Version) UnmarshalBytes(src []byte) { + v.MSize.UnmarshalUnsafe(src) + src = src[v.MSize.SizeBytes():] + var versionLen primitive.Uint16 + versionLen.UnmarshalUnsafe(src) + src = src[versionLen.SizeBytes():] + v.Version = string(src[:versionLen]) +} diff --git a/pkg/lisafs/server.go b/pkg/lisafs/server.go new file mode 100644 index 000000000..7515355ec --- /dev/null +++ b/pkg/lisafs/server.go @@ -0,0 +1,113 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import ( + "gvisor.dev/gvisor/pkg/sync" +) + +// Server serves a filesystem tree. Multiple connections on different mount +// points can be started on a server. The server provides utilities to safely +// modify the filesystem tree across its connections (mount points). Note that +// it does not support synchronizing filesystem tree mutations across other +// servers serving the same filesystem subtree. Server also manages the +// lifecycle of all connections. +type Server struct { + // connWg counts the number of active connections being tracked. + connWg sync.WaitGroup + + // RenameMu synchronizes rename operations within this filesystem tree. + RenameMu sync.RWMutex + + // handlers is a list of RPC handlers which can be indexed by the handler's + // corresponding MID. + handlers []RPCHandler + + // mountPoints keeps track of all the mount points this server serves. + mpMu sync.RWMutex + mountPoints []*ControlFD + + // impl is the server implementation which embeds this server. + impl ServerImpl +} + +// Init must be called before first use of server. +func (s *Server) Init(impl ServerImpl) { + s.impl = impl + s.handlers = handlers[:] +} + +// InitTestOnly is the same as Init except that it allows to swap out the +// underlying handlers with something custom. This is for test only. +func (s *Server) InitTestOnly(impl ServerImpl, handlers []RPCHandler) { + s.impl = impl + s.handlers = handlers +} + +// WithRenameReadLock invokes fn with the server's rename mutex locked for +// reading. This ensures that no rename operations occur concurrently. +func (s *Server) WithRenameReadLock(fn func() error) error { + s.RenameMu.RLock() + err := fn() + s.RenameMu.RUnlock() + return err +} + +// StartConnection starts the connection on a separate goroutine and tracks it. +func (s *Server) StartConnection(c *Connection) { + s.connWg.Add(1) + go func() { + c.Run() + s.connWg.Done() + }() +} + +// Wait waits for all connections started via StartConnection() to terminate. +func (s *Server) Wait() { + s.connWg.Wait() +} + +func (s *Server) addMountPoint(root *ControlFD) { + s.mpMu.Lock() + defer s.mpMu.Unlock() + s.mountPoints = append(s.mountPoints, root) +} + +func (s *Server) forEachMountPoint(fn func(root *ControlFD)) { + s.mpMu.RLock() + defer s.mpMu.RUnlock() + for _, mp := range s.mountPoints { + fn(mp) + } +} + +// ServerImpl contains the implementation details for a Server. +// Implementations of ServerImpl should contain their associated Server by +// value as their first field. +type ServerImpl interface { + // Mount is called when a Mount RPC is made. It mounts the connection at + // mountPath. + // + // Precondition: mountPath == path.Clean(mountPath). + Mount(c *Connection, mountPath string) (ControlFDImpl, Inode, error) + + // SupportedMessages returns a list of messages that the server + // implementation supports. + SupportedMessages() []MID + + // MaxMessageSize is the maximum payload length (in bytes) that can be sent + // to this server implementation. + MaxMessageSize() uint32 +} diff --git a/pkg/lisafs/sock.go b/pkg/lisafs/sock.go new file mode 100644 index 000000000..88210242f --- /dev/null +++ b/pkg/lisafs/sock.go @@ -0,0 +1,208 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lisafs + +import ( + "io" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/unet" +) + +var ( + sockHeaderLen = uint32((*sockHeader)(nil).SizeBytes()) +) + +// sockHeader is the header present in front of each message received on a UDS. +// +// +marshal +type sockHeader struct { + payloadLen uint32 + message MID + _ uint16 // Need to make struct packed. +} + +// sockCommunicator implements Communicator. This is not thread safe. +type sockCommunicator struct { + fdTracker + sock *unet.Socket + buf []byte +} + +var _ Communicator = (*sockCommunicator)(nil) + +func newSockComm(sock *unet.Socket) *sockCommunicator { + return &sockCommunicator{ + sock: sock, + buf: make([]byte, sockHeaderLen), + } +} + +func (s *sockCommunicator) FD() int { + return s.sock.FD() +} + +func (s *sockCommunicator) destroy() { + s.sock.Close() +} + +func (s *sockCommunicator) shutdown() { + if err := s.sock.Shutdown(); err != nil { + log.Warningf("Socket.Shutdown() failed (FD: %d): %v", s.sock.FD(), err) + } +} + +func (s *sockCommunicator) resizeBuf(size uint32) { + if cap(s.buf) < int(size) { + s.buf = s.buf[:cap(s.buf)] + s.buf = append(s.buf, make([]byte, int(size)-cap(s.buf))...) + } else { + s.buf = s.buf[:size] + } +} + +// PayloadBuf implements Communicator.PayloadBuf. +func (s *sockCommunicator) PayloadBuf(size uint32) []byte { + s.resizeBuf(sockHeaderLen + size) + return s.buf[sockHeaderLen : sockHeaderLen+size] +} + +// SndRcvMessage implements Communicator.SndRcvMessage. +func (s *sockCommunicator) SndRcvMessage(m MID, payloadLen uint32, wantFDs uint8) (MID, uint32, error) { + if err := s.sndPrepopulatedMsg(m, payloadLen, nil); err != nil { + return 0, 0, err + } + + return s.rcvMsg(wantFDs) +} + +// sndPrepopulatedMsg assumes that s.buf has already been populated with +// `payloadLen` bytes of data. +func (s *sockCommunicator) sndPrepopulatedMsg(m MID, payloadLen uint32, fds []int) error { + header := sockHeader{payloadLen: payloadLen, message: m} + header.MarshalUnsafe(s.buf) + dataLen := sockHeaderLen + payloadLen + return writeTo(s.sock, [][]byte{s.buf[:dataLen]}, int(dataLen), fds) +} + +// writeTo writes the passed iovec to the UDS and donates any passed FDs. +func writeTo(sock *unet.Socket, iovec [][]byte, dataLen int, fds []int) error { + w := sock.Writer(true) + if len(fds) > 0 { + w.PackFDs(fds...) + } + + fdsUnpacked := false + for n := 0; n < dataLen; { + cur, err := w.WriteVec(iovec) + if err != nil { + return err + } + n += cur + + // Fast common path. + if n >= dataLen { + break + } + + // Consume iovecs. + for consumed := 0; consumed < cur; { + if len(iovec[0]) <= cur-consumed { + consumed += len(iovec[0]) + iovec = iovec[1:] + } else { + iovec[0] = iovec[0][cur-consumed:] + break + } + } + + if n > 0 && !fdsUnpacked { + // Don't resend any control message. + fdsUnpacked = true + w.UnpackFDs() + } + } + return nil +} + +// rcvMsg reads the message header and payload from the UDS. It also populates +// fds with any donated FDs. +func (s *sockCommunicator) rcvMsg(wantFDs uint8) (MID, uint32, error) { + fds, err := readFrom(s.sock, s.buf[:sockHeaderLen], wantFDs) + if err != nil { + return 0, 0, err + } + for _, fd := range fds { + s.TrackFD(fd) + } + + var header sockHeader + header.UnmarshalUnsafe(s.buf) + + // No payload? We are done. + if header.payloadLen == 0 { + return header.message, 0, nil + } + + if _, err := readFrom(s.sock, s.PayloadBuf(header.payloadLen), 0); err != nil { + return 0, 0, err + } + + return header.message, header.payloadLen, nil +} + +// readFrom fills the passed buffer with data from the socket. It also returns +// any donated FDs. +func readFrom(sock *unet.Socket, buf []byte, wantFDs uint8) ([]int, error) { + r := sock.Reader(true) + r.EnableFDs(int(wantFDs)) + + var ( + fds []int + fdInit bool + ) + n := len(buf) + for got := 0; got < n; { + cur, err := r.ReadVec([][]byte{buf[got:]}) + + // Ignore EOF if cur > 0. + if err != nil && (err != io.EOF || cur == 0) { + r.CloseFDs() + return nil, err + } + + if !fdInit && cur > 0 { + fds, err = r.ExtractFDs() + if err != nil { + return nil, err + } + + fdInit = true + r.EnableFDs(0) + } + + got += cur + } + return fds, nil +} + +func closeFDs(fds []int) { + for _, fd := range fds { + if fd >= 0 { + unix.Close(fd) + } + } +} diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 00228c469..9d943bd4a 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -1612,6 +1612,9 @@ func (fs *filesystem) MountOptions() string { if fs.opts.overlayfsStaleRead { optsKV = append(optsKV, mopt{moptOverlayfsStaleRead, nil}) } + if fs.opts.lisaEnabled { + optsKV = append(optsKV, mopt{moptLisafs, nil}) + } opts := make([]string, 0, len(optsKV)) for _, opt := range optsKV { diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 43440ec19..13971d086 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -83,6 +83,7 @@ const ( moptForcePageCache = "force_page_cache" moptLimitHostFDTranslation = "limit_host_fd_translation" moptOverlayfsStaleRead = "overlayfs_stale_read" + moptLisafs = "lisafs" ) // Valid values for the "cache" mount option. @@ -214,6 +215,10 @@ type filesystemOptions struct { // way that application FDs representing "special files" such as sockets // do. Note that this disables client caching and mmap for regular files. regularFilesUseSpecialFileFD bool + + // lisaEnabled indicates whether the client will use lisafs protocol to + // communicate with the server instead of 9P. + lisaEnabled bool } // InteropMode controls the client's interaction with other remote filesystem @@ -427,6 +432,14 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt delete(mopts, moptOverlayfsStaleRead) fsopts.overlayfsStaleRead = true } + if lisafs, ok := mopts[moptLisafs]; ok { + delete(mopts, moptLisafs) + fsopts.lisaEnabled, err = strconv.ParseBool(lisafs) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid lisafs option: %s", lisafs) + return nil, nil, linuxerr.EINVAL + } + } // fsopts.regularFilesUseSpecialFileFD can only be enabled by specifying // "cache=none". diff --git a/pkg/sentry/fsimpl/gofer/gofer_state_autogen.go b/pkg/sentry/fsimpl/gofer/gofer_state_autogen.go index 756def82a..deed65b60 100644 --- a/pkg/sentry/fsimpl/gofer/gofer_state_autogen.go +++ b/pkg/sentry/fsimpl/gofer/gofer_state_autogen.go @@ -199,6 +199,7 @@ func (f *filesystemOptions) StateFields() []string { "limitHostFDTranslation", "overlayfsStaleRead", "regularFilesUseSpecialFileFD", + "lisaEnabled", } } @@ -219,6 +220,7 @@ func (f *filesystemOptions) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(9, &f.limitHostFDTranslation) stateSinkObject.Save(10, &f.overlayfsStaleRead) stateSinkObject.Save(11, &f.regularFilesUseSpecialFileFD) + stateSinkObject.Save(12, &f.lisaEnabled) } func (f *filesystemOptions) afterLoad() {} @@ -237,6 +239,7 @@ func (f *filesystemOptions) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(9, &f.limitHostFDTranslation) stateSourceObject.Load(10, &f.overlayfsStaleRead) stateSourceObject.Load(11, &f.regularFilesUseSpecialFileFD) + stateSourceObject.Load(12, &f.lisaEnabled) } func (i *InteropMode) StateTypeName() string { diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 40cf2a3df..ba5460f1c 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -188,8 +188,8 @@ func compileMounts(spec *specs.Spec, conf *config.Config, vfs2Enabled bool) []sp return mounts } -// p9MountData creates a slice of p9 mount data. -func p9MountData(fd int, fa config.FileAccessType, vfs2 bool) []string { +// goferMountData creates a slice of gofer mount data. +func goferMountData(fd int, fa config.FileAccessType, attachPath string, vfs2 bool, lisafs bool) []string { opts := []string{ "trans=fd", "rfdno=" + strconv.Itoa(fd), @@ -203,6 +203,10 @@ func p9MountData(fd int, fa config.FileAccessType, vfs2 bool) []string { if fa == config.FileAccessShared { opts = append(opts, "cache=remote_revalidating") } + if vfs2 && lisafs { + opts = append(opts, "lisafs=true") + opts = append(opts, "aname="+attachPath) + } return opts } @@ -761,7 +765,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con fd := c.fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) p9FS := mustFindFilesystem("9p") - opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) + opts := goferMountData(fd, conf.FileAccess, "/", false /* vfs2 */, false /* lisafs */) // We can't check for overlayfs here because sandbox is chroot'ed and gofer // can only send mount options for specs.Mounts (specs.Root is missing @@ -822,7 +826,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m *specs. case bind: fd := c.fds.remove() fsName = gofervfs2.Name - opts = p9MountData(fd, c.getMountAccessType(conf, m), conf.VFS2) + opts = goferMountData(fd, c.getMountAccessType(conf, m), m.Destination, conf.VFS2, conf.Lisafs) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly case cgroupfs.Name: @@ -980,7 +984,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.Re // Add root mount. fd := c.fds.remove() - opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) + opts := goferMountData(fd, conf.FileAccess, "/", false /* vfs2 */, false /* lisafs */) mf := fs.MountSourceFlags{} if c.root.Readonly || conf.Overlay { diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 346796d9c..2f1332566 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -208,7 +208,7 @@ func (c *containerMounter) mountAll(conf *config.Config, procArgs *kernel.Create // createMountNamespaceVFS2 creates the container's root mount and namespace. func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *config.Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { fd := c.fds.remove() - data := p9MountData(fd, conf.FileAccess, true /* vfs2 */) + data := goferMountData(fd, conf.FileAccess, "/", true /* vfs2 */, conf.Lisafs) // We can't check for overlayfs here because sandbox is chroot'ed and gofer // can only send mount options for specs.Mounts (specs.Root is missing @@ -515,7 +515,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo // but unlikely to be correct in this context. return "", nil, false, fmt.Errorf("9P mount requires a connection FD") } - data = p9MountData(m.fd, c.getMountAccessType(conf, m.mount), true /* vfs2 */) + data = goferMountData(m.fd, c.getMountAccessType(conf, m.mount), m.mount.Destination, true /* vfs2 */, conf.Lisafs) internalData = gofer.InternalFilesystemOptions{ UniqueID: m.mount.Destination, } diff --git a/runsc/cli/main.go b/runsc/cli/main.go index 3556d7665..058ab8232 100644 --- a/runsc/cli/main.go +++ b/runsc/cli/main.go @@ -228,7 +228,7 @@ func Main(version string) { log.Infof("\t\tFileAccess: %v, overlay: %t", conf.FileAccess, conf.Overlay) log.Infof("\t\tNetwork: %v, logging: %t", conf.Network, conf.LogPackets) log.Infof("\t\tStrace: %t, max size: %d, syscalls: %s", conf.Strace, conf.StraceLogSize, conf.StraceSyscalls) - log.Infof("\t\tVFS2 enabled: %v", conf.VFS2) + log.Infof("\t\tVFS2 enabled: %t, LISAFS: %t", conf.VFS2, conf.Lisafs) log.Infof("***************************") if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 2193e9040..c65e0267a 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -71,8 +71,8 @@ func (*Gofer) Name() string { } // Synopsis implements subcommands.Command. -func (*Gofer) Synopsis() string { - return "launch a gofer process that serves files over 9P protocol (internal use only)" +func (g *Gofer) Synopsis() string { + return fmt.Sprintf("launch a gofer process that serves files over the protocol (9P or lisafs) defined in the config (internal use only)") } // Usage implements subcommands.Command. @@ -83,7 +83,7 @@ func (*Gofer) Usage() string { // SetFlags implements subcommands.Command. func (g *Gofer) SetFlags(f *flag.FlagSet) { f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory") - f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec") + f.Var(&g.ioFDs, "io-fds", "list of FDs to connect gofer servers. They must follow this order: root first, then mounts as defined in the spec") f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do") f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process") f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec") @@ -160,10 +160,98 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } log.Infof("Process chroot'd to %q", root) + // Initialize filters. + if conf.FSGoferHostUDS { + filter.InstallUDSFilters() + } + + if conf.Verity { + filter.InstallXattrFilters() + } + + if err := filter.Install(); err != nil { + Fatalf("installing seccomp filters: %v", err) + } + + if conf.Lisafs { + return g.serveLisafs(spec, conf, root) + } + return g.serve9P(spec, conf, root) +} + +func newSocket(ioFD int) *unet.Socket { + socket, err := unet.NewSocket(ioFD) + if err != nil { + Fatalf("creating server on FD %d: %v", ioFD, err) + } + return socket +} + +func (g *Gofer) serveLisafs(spec *specs.Spec, conf *config.Config, root string) subcommands.ExitStatus { + type connectionConfig struct { + sock *unet.Socket + readonly bool + } + cfgs := make([]connectionConfig, 0, len(spec.Mounts)+1) + server := fsgofer.NewLisafsServer(fsgofer.Config{ + // These are global options. Ignore readonly configuration, that is set on + // a per connection basis. + HostUDS: conf.FSGoferHostUDS, + EnableVerityXattr: conf.Verity, + }) + + // Start with root mount, then add any other additional mount as needed. + cfgs = append(cfgs, connectionConfig{ + sock: newSocket(g.ioFDs[0]), + readonly: spec.Root.Readonly || conf.Overlay, + }) + log.Infof("Serving %q mapped to %q on FD %d (ro: %t)", "/", root, g.ioFDs[0], cfgs[0].readonly) + + mountIdx := 1 // first one is the root + for _, m := range spec.Mounts { + if !specutils.IsGoferMount(m, conf.VFS2) { + continue + } + + if !filepath.IsAbs(m.Destination) { + Fatalf("mount destination must be absolute: %q", m.Destination) + } + if mountIdx >= len(g.ioFDs) { + Fatalf("no FD found for mount. Did you forget --io-fd? FDs: %d, Mount: %+v", len(g.ioFDs), m) + } + + cfgs = append(cfgs, connectionConfig{ + sock: newSocket(g.ioFDs[mountIdx]), + readonly: isReadonlyMount(m.Options) || conf.Overlay, + }) + + log.Infof("Serving %q mapped on FD %d (ro: %t)", m.Destination, g.ioFDs[mountIdx], cfgs[mountIdx].readonly) + mountIdx++ + } + + if mountIdx != len(g.ioFDs) { + Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs)) + } + cfgs = cfgs[:mountIdx] + + for _, cfg := range cfgs { + conn, err := server.CreateConnection(cfg.sock, cfg.readonly) + if err != nil { + Fatalf("starting connection on FD %d for gofer mount failed: %v", cfg.sock.FD(), err) + } + server.StartConnection(conn) + } + server.Wait() + log.Infof("All lisafs servers exited.") + return subcommands.ExitSuccess +} + +func (g *Gofer) serve9P(spec *specs.Spec, conf *config.Config, root string) subcommands.ExitStatus { // Start with root mount, then add any other additional mount as needed. ats := make([]p9.Attacher, 0, len(spec.Mounts)+1) ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{ ROMount: spec.Root.Readonly || conf.Overlay, + HostUDS: conf.FSGoferHostUDS, EnableVerityXattr: conf.Verity, }) if err != nil { @@ -174,7 +262,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) mountIdx := 1 // first one is the root for _, m := range spec.Mounts { - if specutils.Is9PMount(m, conf.VFS2) { + if specutils.IsGoferMount(m, conf.VFS2) { cfg := fsgofer.Config{ ROMount: isReadonlyMount(m.Options) || conf.Overlay, HostUDS: conf.FSGoferHostUDS, @@ -197,26 +285,9 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs)) } - if conf.FSGoferHostUDS { - filter.InstallUDSFilters() - } - - if conf.Verity { - filter.InstallXattrFilters() - } - - if err := filter.Install(); err != nil { - Fatalf("installing seccomp filters: %v", err) - } - - runServers(ats, g.ioFDs) - return subcommands.ExitSuccess -} - -func runServers(ats []p9.Attacher, ioFDs []int) { // Run the loops and wait for all to exit. var wg sync.WaitGroup - for i, ioFD := range ioFDs { + for i, ioFD := range g.ioFDs { wg.Add(1) go func(ioFD int, at p9.Attacher) { socket, err := unet.NewSocket(ioFD) @@ -232,6 +303,7 @@ func runServers(ats []p9.Attacher, ioFDs []int) { } wg.Wait() log.Infof("All 9P servers exited.") + return subcommands.ExitSuccess } func (g *Gofer) writeMounts(mounts []specs.Mount) error { @@ -362,7 +434,7 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { // creates directories as needed. func setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string) error { for _, m := range mounts { - if !specutils.Is9PMount(m, conf.VFS2) { + if !specutils.IsGoferMount(m, conf.VFS2) { continue } @@ -402,7 +474,7 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath strin func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) { cleanMounts := make([]specs.Mount, 0, len(mounts)) for _, m := range mounts { - if !specutils.Is9PMount(m, conf.VFS2) { + if !specutils.IsGoferMount(m, conf.VFS2) { cleanMounts = append(cleanMounts, m) continue } diff --git a/runsc/config/config.go b/runsc/config/config.go index a562f7bf4..91142888f 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -193,6 +193,9 @@ type Config struct { // Enables VFS2. VFS2 bool `flag:"vfs2"` + // Enable lisafs. + Lisafs bool `flag:"lisafs"` + // Enables FUSE usage. FUSE bool `flag:"fuse"` diff --git a/runsc/config/flags.go b/runsc/config/flags.go index 1bf23951a..11cea71b8 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -82,6 +82,7 @@ func RegisterFlags() { flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") flag.Bool("vfs2", false, "enables VFSv2. This uses the new VFS layer that is faster than the previous one.") flag.Bool("fuse", false, "TEST ONLY; use while FUSE in VFSv2 is landing. This allows the use of the new experimental FUSE filesystem.") + flag.Bool("lisafs", false, "Enables lisafs protocol instead of 9P. This is only effective with VFS2.") flag.Bool("cgroupfs", false, "Automatically mount cgroupfs.") // Flags that control sandbox runtime behavior: network related. diff --git a/runsc/container/container.go b/runsc/container/container.go index 50b0dd5e7..9c0004753 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -917,7 +917,7 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu // Add root mount and then add any other additional mounts. mountCount := 1 for _, m := range spec.Mounts { - if specutils.Is9PMount(m, conf.VFS2) { + if specutils.IsGoferMount(m, conf.VFS2) { mountCount++ } } diff --git a/runsc/fsgofer/lisafs.go b/runsc/fsgofer/lisafs.go new file mode 100644 index 000000000..9d745f461 --- /dev/null +++ b/runsc/fsgofer/lisafs.go @@ -0,0 +1,52 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fsgofer + +import ( + "gvisor.dev/gvisor/pkg/lisafs" +) + +// LisafsServer implements lisafs.ServerImpl for fsgofer. +type LisafsServer struct { + lisafs.Server + config Config +} + +var _ lisafs.ServerImpl = (*LisafsServer)(nil) + +// NewLisafsServer initializes a new lisafs server for fsgofer. +func NewLisafsServer(config Config) *LisafsServer { + s := &LisafsServer{config: config} + s.Server.Init(s) + return s +} + +// Mount implements lisafs.ServerImpl.Mount. +func (s *LisafsServer) Mount(c *lisafs.Connection, mountPath string) (lisafs.ControlFDImpl, lisafs.Inode, error) { + panic("unimplemented") +} + +// MaxMessageSize implements lisafs.ServerImpl.MaxMessageSize. +func (s *LisafsServer) MaxMessageSize() uint32 { + return lisafs.MaxMessageSize() +} + +// SupportedMessages implements lisafs.ServerImpl.SupportedMessages. +func (s *LisafsServer) SupportedMessages() []lisafs.MID { + return []lisafs.MID{ + lisafs.Mount, + lisafs.Channel, + } +} diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 5365b5b1b..9d3b97277 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -332,9 +332,9 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth. return auth.CapabilitySetOfMany(caps), nil } -// Is9PMount returns true if the given mount can be mounted as an external +// IsGoferMount returns true if the given mount can be mounted as an external // gofer. -func Is9PMount(m specs.Mount, vfs2Enabled bool) bool { +func IsGoferMount(m specs.Mount, vfs2Enabled bool) bool { MaybeConvertToBindMount(&m) return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m, vfs2Enabled) } |