summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2020-07-27 20:39:38 +0000
committergVisor bot <gvisor-bot@google.com>2020-07-27 20:39:38 +0000
commitc5d1303be1bbd95df2b1120f5bbb1ac0bd3ca6c6 (patch)
tree1f780c30493506c89d652b1608df23b725b4a5c0 /pkg/sentry
parent6146e5a09884bd94eb605d74616c4cfe2a2bb4be (diff)
parent112eb0c5b9e6d45b58470fb5536a9fd91fb4222b (diff)
Merge release-20200622.1-236-g112eb0c5b (automated)
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fsimpl/fuse/connection.go255
-rw-r--r--pkg/sentry/fsimpl/fuse/dev.go289
-rw-r--r--pkg/sentry/fsimpl/fuse/fuse_state_autogen.go157
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go52
-rw-r--r--pkg/sentry/fsimpl/fuse/register.go42
-rw-r--r--pkg/sentry/fsimpl/fuse/request_list.go193
6 files changed, 960 insertions, 28 deletions
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
new file mode 100644
index 000000000..f330da0bd
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -0,0 +1,255 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+ "errors"
+ "fmt"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/waiter"
+ "gvisor.dev/gvisor/tools/go_marshal/marshal"
+)
+
+// MaxActiveRequestsDefault is the default setting controlling the upper bound
+// on the number of active requests at any given time.
+const MaxActiveRequestsDefault = 10000
+
+var (
+ // Ordinary requests have even IDs, while interrupts IDs are odd.
+ InitReqBit uint64 = 1
+ ReqIDStep uint64 = 2
+)
+
+// Request represents a FUSE operation request that hasn't been sent to the
+// server yet.
+//
+// +stateify savable
+type Request struct {
+ requestEntry
+
+ id linux.FUSEOpID
+ hdr *linux.FUSEHeaderIn
+ data []byte
+}
+
+// Response represents an actual response from the server, including the
+// response payload.
+//
+// +stateify savable
+type Response struct {
+ opcode linux.FUSEOpcode
+ hdr linux.FUSEHeaderOut
+ data []byte
+}
+
+// Connection is the struct by which the sentry communicates with the FUSE server daemon.
+type Connection struct {
+ fd *DeviceFD
+
+ // MaxWrite is the daemon's maximum size of a write buffer.
+ // This is negotiated during FUSE_INIT.
+ MaxWrite uint32
+}
+
+// NewFUSEConnection creates a FUSE connection to fd
+func NewFUSEConnection(_ context.Context, fd *vfs.FileDescription, maxInFlightRequests uint64) (*Connection, error) {
+ // Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
+ // mount a FUSE filesystem.
+ fuseFD := fd.Impl().(*DeviceFD)
+ fuseFD.mounted = true
+
+ // Create the writeBuf for the header to be stored in.
+ hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+ fuseFD.writeBuf = make([]byte, hdrLen)
+ fuseFD.completions = make(map[linux.FUSEOpID]*futureResponse)
+ fuseFD.fullQueueCh = make(chan struct{}, maxInFlightRequests)
+ fuseFD.writeCursor = 0
+
+ return &Connection{
+ fd: fuseFD,
+ }, nil
+}
+
+// NewRequest creates a new request that can be sent to the FUSE server.
+func (conn *Connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint64, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*Request, error) {
+ conn.fd.mu.Lock()
+ defer conn.fd.mu.Unlock()
+ conn.fd.nextOpID += linux.FUSEOpID(ReqIDStep)
+
+ hdrLen := (*linux.FUSEHeaderIn)(nil).SizeBytes()
+ hdr := linux.FUSEHeaderIn{
+ Len: uint32(hdrLen + payload.SizeBytes()),
+ Opcode: opcode,
+ Unique: conn.fd.nextOpID,
+ NodeID: ino,
+ UID: uint32(creds.EffectiveKUID),
+ GID: uint32(creds.EffectiveKGID),
+ PID: pid,
+ }
+
+ buf := make([]byte, hdr.Len)
+ hdr.MarshalUnsafe(buf[:hdrLen])
+ payload.MarshalUnsafe(buf[hdrLen:])
+
+ return &Request{
+ id: hdr.Unique,
+ hdr: &hdr,
+ data: buf,
+ }, nil
+}
+
+// Call makes a request to the server and blocks the invoking task until a
+// server responds with a response.
+// NOTE: If no task is provided then the Call will simply enqueue the request
+// and return a nil response. No blocking will happen in this case. Instead,
+// this is used to signify that the processing of this request will happen by
+// the kernel.Task that writes the response. See FUSE_INIT for such an
+// invocation.
+func (conn *Connection) Call(t *kernel.Task, r *Request) (*Response, error) {
+ fut, err := conn.callFuture(t, r)
+ if err != nil {
+ return nil, err
+ }
+
+ return fut.resolve(t)
+}
+
+// Error returns the error of the FUSE call.
+func (r *Response) Error() error {
+ errno := r.hdr.Error
+ if errno >= 0 {
+ return nil
+ }
+
+ sysErrNo := syscall.Errno(-errno)
+ return error(sysErrNo)
+}
+
+// UnmarshalPayload unmarshals the response data into m.
+func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
+ hdrLen := r.hdr.SizeBytes()
+ haveDataLen := r.hdr.Len - uint32(hdrLen)
+ wantDataLen := uint32(m.SizeBytes())
+
+ if haveDataLen < wantDataLen {
+ return fmt.Errorf("payload too small. Minimum data lenth required: %d, but got data length %d", wantDataLen, haveDataLen)
+ }
+
+ m.UnmarshalUnsafe(r.data[hdrLen:])
+ return nil
+}
+
+// callFuture makes a request to the server and returns a future response.
+// Call resolve() when the response needs to be fulfilled.
+func (conn *Connection) callFuture(t *kernel.Task, r *Request) (*futureResponse, error) {
+ conn.fd.mu.Lock()
+ defer conn.fd.mu.Unlock()
+
+ // Is the queue full?
+ //
+ // We must busy wait here until the request can be queued. We don't
+ // block on the fd.fullQueueCh with a lock - so after being signalled,
+ // before we acquire the lock, it is possible that a barging task enters
+ // and queues a request. As a result, upon acquiring the lock we must
+ // again check if the room is available.
+ //
+ // This can potentially starve a request forever but this can only happen
+ // if there are always too many ongoing requests all the time. The
+ // supported maxActiveRequests setting should be really high to avoid this.
+ for conn.fd.numActiveRequests == conn.fd.fs.opts.maxActiveRequests {
+ if t == nil {
+ // Since there is no task that is waiting. We must error out.
+ return nil, errors.New("FUSE request queue full")
+ }
+
+ log.Infof("Blocking request %v from being queued. Too many active requests: %v",
+ r.id, conn.fd.numActiveRequests)
+ conn.fd.mu.Unlock()
+ err := t.Block(conn.fd.fullQueueCh)
+ conn.fd.mu.Lock()
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return conn.callFutureLocked(t, r)
+}
+
+// callFutureLocked makes a request to the server and returns a future response.
+func (conn *Connection) callFutureLocked(t *kernel.Task, r *Request) (*futureResponse, error) {
+ conn.fd.queue.PushBack(r)
+ conn.fd.numActiveRequests += 1
+ fut := newFutureResponse(r.hdr.Opcode)
+ conn.fd.completions[r.id] = fut
+
+ // Signal the readers that there is something to read.
+ conn.fd.waitQueue.Notify(waiter.EventIn)
+
+ return fut, nil
+}
+
+// futureResponse represents an in-flight request, that may or may not have
+// completed yet. Convert it to a resolved Response by calling Resolve, but note
+// that this may block.
+//
+// +stateify savable
+type futureResponse struct {
+ opcode linux.FUSEOpcode
+ ch chan struct{}
+ hdr *linux.FUSEHeaderOut
+ data []byte
+}
+
+// newFutureResponse creates a future response to a FUSE request.
+func newFutureResponse(opcode linux.FUSEOpcode) *futureResponse {
+ return &futureResponse{
+ opcode: opcode,
+ ch: make(chan struct{}),
+ }
+}
+
+// resolve blocks the task until the server responds to its corresponding request,
+// then returns a resolved response.
+func (f *futureResponse) resolve(t *kernel.Task) (*Response, error) {
+ // If there is no Task associated with this request - then we don't try to resolve
+ // the response. Instead, the task writing the response (proxy to the server) will
+ // process the response on our behalf.
+ if t == nil {
+ log.Infof("fuse.Response.resolve: Not waiting on a response from server.")
+ return nil, nil
+ }
+
+ if err := t.Block(f.ch); err != nil {
+ return nil, err
+ }
+
+ return f.getResponse(), nil
+}
+
+// getResponse creates a Response from the data the futureResponse has.
+func (f *futureResponse) getResponse() *Response {
+ return &Response{
+ opcode: f.opcode,
+ hdr: *f.hdr,
+ data: f.data,
+ }
+}
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index c9e12a94f..f3443ac71 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -15,13 +15,17 @@
package fuse
import (
+ "syscall"
+
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
const fuseDevMinor = 229
@@ -54,9 +58,43 @@ type DeviceFD struct {
// mounted specifies whether a FUSE filesystem was mounted using the DeviceFD.
mounted bool
- // TODO(gvisor.dev/issue/2987): Add all the data structures needed to enqueue
- // and deque requests, control synchronization and establish communication
- // between the FUSE kernel module and the /dev/fuse character device.
+ // nextOpID is used to create new requests.
+ nextOpID linux.FUSEOpID
+
+ // queue is the list of requests that need to be processed by the FUSE server.
+ queue requestList
+
+ // numActiveRequests is the number of requests made by the Sentry that has
+ // yet to be responded to.
+ numActiveRequests uint64
+
+ // completions is used to map a request to its response. A Writer will use this
+ // to notify the caller of a completed response.
+ completions map[linux.FUSEOpID]*futureResponse
+
+ writeCursor uint32
+
+ // writeBuf is the memory buffer used to copy in the FUSE out header from
+ // userspace.
+ writeBuf []byte
+
+ // writeCursorFR current FR being copied from server.
+ writeCursorFR *futureResponse
+
+ // mu protects all the queues, maps, buffers and cursors and nextOpID.
+ mu sync.Mutex
+
+ // waitQueue is used to notify interested parties when the device becomes
+ // readable or writable.
+ waitQueue waiter.Queue
+
+ // fullQueueCh is a channel used to synchronize the readers with the writers.
+ // Writers (inbound requests to the filesystem) block if there are too many
+ // unprocessed in-flight requests.
+ fullQueueCh chan struct{}
+
+ // fs is the FUSE filesystem that this FD is being used for.
+ fs *filesystem
}
// Release implements vfs.FileDescriptionImpl.Release.
@@ -79,7 +117,75 @@ func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.R
return 0, syserror.EPERM
}
- return 0, syserror.ENOSYS
+ // We require that any Read done on this filesystem have a sane minimum
+ // read buffer. It must have the capacity for the fixed parts of any request
+ // header (Linux uses the request header and the FUSEWriteIn header for this
+ // calculation) + the negotiated MaxWrite room for the data.
+ minBuffSize := linux.FUSE_MIN_READ_BUFFER
+ inHdrLen := uint32((*linux.FUSEHeaderIn)(nil).SizeBytes())
+ writeHdrLen := uint32((*linux.FUSEWriteIn)(nil).SizeBytes())
+ negotiatedMinBuffSize := inHdrLen + writeHdrLen + fd.fs.conn.MaxWrite
+ if minBuffSize < negotiatedMinBuffSize {
+ minBuffSize = negotiatedMinBuffSize
+ }
+
+ // If the read buffer is too small, error out.
+ if dst.NumBytes() < int64(minBuffSize) {
+ return 0, syserror.EINVAL
+ }
+
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ return fd.readLocked(ctx, dst, opts)
+}
+
+// readLocked implements the reading of the fuse device while locked with DeviceFD.mu.
+func (fd *DeviceFD) readLocked(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ if fd.queue.Empty() {
+ return 0, syserror.ErrWouldBlock
+ }
+
+ var readCursor uint32
+ var bytesRead int64
+ for {
+ req := fd.queue.Front()
+ if dst.NumBytes() < int64(req.hdr.Len) {
+ // The request is too large. Cannot process it. All requests must be smaller than the
+ // negotiated size as specified by Connection.MaxWrite set as part of the FUSE_INIT
+ // handshake.
+ errno := -int32(syscall.EIO)
+ if req.hdr.Opcode == linux.FUSE_SETXATTR {
+ errno = -int32(syscall.E2BIG)
+ }
+
+ // Return the error to the calling task.
+ if err := fd.sendError(ctx, errno, req); err != nil {
+ return 0, err
+ }
+
+ // We're done with this request.
+ fd.queue.Remove(req)
+
+ // Restart the read as this request was invalid.
+ log.Warningf("fuse.DeviceFD.Read: request found was too large. Restarting read.")
+ return fd.readLocked(ctx, dst, opts)
+ }
+
+ n, err := dst.CopyOut(ctx, req.data[readCursor:])
+ if err != nil {
+ return 0, err
+ }
+ readCursor += uint32(n)
+ bytesRead += int64(n)
+
+ if readCursor >= req.hdr.Len {
+ // Fully done with this req, remove it from the queue.
+ fd.queue.Remove(req)
+ break
+ }
+ }
+
+ return bytesRead, nil
}
// PWrite implements vfs.FileDescriptionImpl.PWrite.
@@ -94,12 +200,128 @@ func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset i
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ return fd.writeLocked(ctx, src, opts)
+}
+
+// writeLocked implements writing to the fuse device while locked with DeviceFD.mu.
+func (fd *DeviceFD) writeLocked(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
// Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
if !fd.mounted {
return 0, syserror.EPERM
}
- return 0, syserror.ENOSYS
+ var cn, n int64
+ hdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+
+ for src.NumBytes() > 0 {
+ if fd.writeCursorFR != nil {
+ // Already have common header, and we're now copying the payload.
+ wantBytes := fd.writeCursorFR.hdr.Len
+
+ // Note that the FR data doesn't have the header. Copy it over if its necessary.
+ if fd.writeCursorFR.data == nil {
+ fd.writeCursorFR.data = make([]byte, wantBytes)
+ }
+
+ bytesCopied, err := src.CopyIn(ctx, fd.writeCursorFR.data[fd.writeCursor:wantBytes])
+ if err != nil {
+ return 0, err
+ }
+ src = src.DropFirst(bytesCopied)
+
+ cn = int64(bytesCopied)
+ n += cn
+ fd.writeCursor += uint32(cn)
+ if fd.writeCursor == wantBytes {
+ // Done reading this full response. Clean up and unblock the
+ // initiator.
+ break
+ }
+
+ // Check if we have more data in src.
+ continue
+ }
+
+ // Assert that the header isn't read into the writeBuf yet.
+ if fd.writeCursor >= hdrLen {
+ return 0, syserror.EINVAL
+ }
+
+ // We don't have the full common response header yet.
+ wantBytes := hdrLen - fd.writeCursor
+ bytesCopied, err := src.CopyIn(ctx, fd.writeBuf[fd.writeCursor:wantBytes])
+ if err != nil {
+ return 0, err
+ }
+ src = src.DropFirst(bytesCopied)
+
+ cn = int64(bytesCopied)
+ n += cn
+ fd.writeCursor += uint32(cn)
+ if fd.writeCursor == hdrLen {
+ // Have full header in the writeBuf. Use it to fetch the actual futureResponse
+ // from the device's completions map.
+ var hdr linux.FUSEHeaderOut
+ hdr.UnmarshalBytes(fd.writeBuf)
+
+ // We have the header now and so the writeBuf has served its purpose.
+ // We could reset it manually here but instead of doing that, at the
+ // end of the write, the writeCursor will be set to 0 thereby allowing
+ // the next request to overwrite whats in the buffer,
+
+ fut, ok := fd.completions[hdr.Unique]
+ if !ok {
+ // Server sent us a response for a request we never sent?
+ return 0, syserror.EINVAL
+ }
+
+ delete(fd.completions, hdr.Unique)
+
+ // Copy over the header into the future response. The rest of the payload
+ // will be copied over to the FR's data in the next iteration.
+ fut.hdr = &hdr
+ fd.writeCursorFR = fut
+
+ // Next iteration will now try read the complete request, if src has
+ // any data remaining. Otherwise we're done.
+ }
+ }
+
+ if fd.writeCursorFR != nil {
+ if err := fd.sendResponse(ctx, fd.writeCursorFR); err != nil {
+ return 0, err
+ }
+
+ // Ready the device for the next request.
+ fd.writeCursorFR = nil
+ fd.writeCursor = 0
+ }
+
+ return n, nil
+}
+
+// Readiness implements vfs.FileDescriptionImpl.Readiness.
+func (fd *DeviceFD) Readiness(mask waiter.EventMask) waiter.EventMask {
+ var ready waiter.EventMask
+ ready |= waiter.EventOut // FD is always writable
+ if !fd.queue.Empty() {
+ // Have reqs available, FD is readable.
+ ready |= waiter.EventIn
+ }
+
+ return ready & mask
+}
+
+// EventRegister implements waiter.Waitable.EventRegister.
+func (fd *DeviceFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+ fd.waitQueue.EventRegister(e, mask)
+}
+
+// EventUnregister implements waiter.Waitable.EventUnregister.
+func (fd *DeviceFD) EventUnregister(e *waiter.Entry) {
+ fd.waitQueue.EventUnregister(e)
}
// Seek implements vfs.FileDescriptionImpl.Seek.
@@ -112,22 +334,61 @@ func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64
return 0, syserror.ENOSYS
}
-// Register registers the FUSE device with vfsObj.
-func Register(vfsObj *vfs.VirtualFilesystem) error {
- if err := vfsObj.RegisterDevice(vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, fuseDevice{}, &vfs.RegisterDeviceOptions{
- GroupName: "misc",
- }); err != nil {
+// sendResponse sends a response to the waiting task (if any).
+func (fd *DeviceFD) sendResponse(ctx context.Context, fut *futureResponse) error {
+ // See if the running task need to perform some action before returning.
+ // Since we just finished writing the future, we can be sure that
+ // getResponse generates a populated response.
+ if err := fd.noReceiverAction(ctx, fut.getResponse()); err != nil {
return err
}
+ // Signal that the queue is no longer full.
+ select {
+ case fd.fullQueueCh <- struct{}{}:
+ default:
+ }
+ fd.numActiveRequests -= 1
+
+ // Signal the task waiting on a response.
+ close(fut.ch)
return nil
}
-// CreateDevtmpfsFile creates a device special file in devtmpfs.
-func CreateDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor) error {
- if err := dev.CreateDeviceFile(ctx, "fuse", vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, 0666 /* mode */); err != nil {
+// sendError sends an error response to the waiting task (if any).
+func (fd *DeviceFD) sendError(ctx context.Context, errno int32, req *Request) error {
+ // Return the error to the calling task.
+ outHdrLen := uint32((*linux.FUSEHeaderOut)(nil).SizeBytes())
+ respHdr := linux.FUSEHeaderOut{
+ Len: outHdrLen,
+ Error: errno,
+ Unique: req.hdr.Unique,
+ }
+
+ fut, ok := fd.completions[respHdr.Unique]
+ if !ok {
+ // Server sent us a response for a request we never sent?
+ return syserror.EINVAL
+ }
+ delete(fd.completions, respHdr.Unique)
+
+ fut.hdr = &respHdr
+ if err := fd.sendResponse(ctx, fut); err != nil {
return err
}
return nil
}
+
+// noReceiverAction has the calling kernel.Task do some action if its known that no
+// receiver is going to be waiting on the future channel. This is to be used by:
+// FUSE_INIT.
+func (fd *DeviceFD) noReceiverAction(ctx context.Context, r *Response) error {
+ if r.opcode == linux.FUSE_INIT {
+ // TODO: process init response here.
+ // Maybe get the creds from the context?
+ // creds := auth.CredentialsFromContext(ctx)
+ }
+
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
index 2b9c882fb..e4ce04322 100644
--- a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
+++ b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
@@ -1,3 +1,160 @@
// automatically generated by stateify.
package fuse
+
+import (
+ "gvisor.dev/gvisor/pkg/state"
+)
+
+func (x *Request) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.Request"
+}
+
+func (x *Request) StateFields() []string {
+ return []string{
+ "requestEntry",
+ "id",
+ "hdr",
+ "data",
+ }
+}
+
+func (x *Request) beforeSave() {}
+
+func (x *Request) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.requestEntry)
+ m.Save(1, &x.id)
+ m.Save(2, &x.hdr)
+ m.Save(3, &x.data)
+}
+
+func (x *Request) afterLoad() {}
+
+func (x *Request) StateLoad(m state.Source) {
+ m.Load(0, &x.requestEntry)
+ m.Load(1, &x.id)
+ m.Load(2, &x.hdr)
+ m.Load(3, &x.data)
+}
+
+func (x *Response) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.Response"
+}
+
+func (x *Response) StateFields() []string {
+ return []string{
+ "opcode",
+ "hdr",
+ "data",
+ }
+}
+
+func (x *Response) beforeSave() {}
+
+func (x *Response) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.opcode)
+ m.Save(1, &x.hdr)
+ m.Save(2, &x.data)
+}
+
+func (x *Response) afterLoad() {}
+
+func (x *Response) StateLoad(m state.Source) {
+ m.Load(0, &x.opcode)
+ m.Load(1, &x.hdr)
+ m.Load(2, &x.data)
+}
+
+func (x *futureResponse) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.futureResponse"
+}
+
+func (x *futureResponse) StateFields() []string {
+ return []string{
+ "opcode",
+ "ch",
+ "hdr",
+ "data",
+ }
+}
+
+func (x *futureResponse) beforeSave() {}
+
+func (x *futureResponse) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.opcode)
+ m.Save(1, &x.ch)
+ m.Save(2, &x.hdr)
+ m.Save(3, &x.data)
+}
+
+func (x *futureResponse) afterLoad() {}
+
+func (x *futureResponse) StateLoad(m state.Source) {
+ m.Load(0, &x.opcode)
+ m.Load(1, &x.ch)
+ m.Load(2, &x.hdr)
+ m.Load(3, &x.data)
+}
+
+func (x *requestList) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.requestList"
+}
+
+func (x *requestList) StateFields() []string {
+ return []string{
+ "head",
+ "tail",
+ }
+}
+
+func (x *requestList) beforeSave() {}
+
+func (x *requestList) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.head)
+ m.Save(1, &x.tail)
+}
+
+func (x *requestList) afterLoad() {}
+
+func (x *requestList) StateLoad(m state.Source) {
+ m.Load(0, &x.head)
+ m.Load(1, &x.tail)
+}
+
+func (x *requestEntry) StateTypeName() string {
+ return "pkg/sentry/fsimpl/fuse.requestEntry"
+}
+
+func (x *requestEntry) StateFields() []string {
+ return []string{
+ "next",
+ "prev",
+ }
+}
+
+func (x *requestEntry) beforeSave() {}
+
+func (x *requestEntry) StateSave(m state.Sink) {
+ x.beforeSave()
+ m.Save(0, &x.next)
+ m.Save(1, &x.prev)
+}
+
+func (x *requestEntry) afterLoad() {}
+
+func (x *requestEntry) StateLoad(m state.Source) {
+ m.Load(0, &x.next)
+ m.Load(1, &x.prev)
+}
+
+func init() {
+ state.Register((*Request)(nil))
+ state.Register((*Response)(nil))
+ state.Register((*futureResponse)(nil))
+ state.Register((*requestList)(nil))
+ state.Register((*requestEntry)(nil))
+}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index f7775fb9b..911b6f7cb 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -51,6 +51,11 @@ type filesystemOptions struct {
// rootMode specifies the the file mode of the filesystem's root.
rootMode linux.FileMode
+
+ // maxActiveRequests specifies the maximum number of active requests that can
+ // exist at any time. Any further requests will block when trying to
+ // Call the server.
+ maxActiveRequests uint64
}
// filesystem implements vfs.FilesystemImpl.
@@ -58,12 +63,12 @@ type filesystem struct {
kernfs.Filesystem
devMinor uint32
- // fuseFD is the FD returned when opening /dev/fuse. It is used for communication
- // between the FUSE server daemon and the sentry fusefs.
- fuseFD *DeviceFD
+ // conn is used for communication between the FUSE server
+ // daemon and the sentry fusefs.
+ conn *Connection
// opts is the options the fusefs is initialized with.
- opts filesystemOptions
+ opts *filesystemOptions
}
// Name implements vfs.FilesystemType.Name.
@@ -100,7 +105,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fuseFd := kernelTask.GetFileVFS2(int32(deviceDescriptor))
// Parse and set all the other supported FUSE mount options.
- // TODO: Expand the supported mount options.
+ // TODO(gVisor.dev/issue/3229): Expand the supported mount options.
if userIDStr, ok := mopts["user_id"]; ok {
delete(mopts, "user_id")
userID, err := strconv.ParseUint(userIDStr, 10, 32)
@@ -134,21 +139,20 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}
fsopts.rootMode = rootMode
+ // Set the maxInFlightRequests option.
+ fsopts.maxActiveRequests = MaxActiveRequestsDefault
+
// Check for unparsed options.
if len(mopts) != 0 {
log.Warningf("%s.GetFilesystem: unknown options: %v", fsType.Name(), mopts)
return nil, nil, syserror.EINVAL
}
- // Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
- // mount a FUSE filesystem.
- fuseFD := fuseFd.Impl().(*DeviceFD)
- fuseFD.mounted = true
-
- fs := &filesystem{
- devMinor: devMinor,
- fuseFD: fuseFD,
- opts: fsopts,
+ // Create a new FUSE filesystem.
+ fs, err := NewFUSEFilesystem(ctx, devMinor, &fsopts, fuseFd)
+ if err != nil {
+ log.Warningf("%s.NewFUSEFilesystem: failed with error: %v", fsType.Name(), err)
+ return nil, nil, err
}
fs.VFSFilesystem().Init(vfsObj, &fsType, fs)
@@ -162,6 +166,26 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return fs.VFSFilesystem(), root.VFSDentry(), nil
}
+// NewFUSEFilesystem creates a new FUSE filesystem.
+func NewFUSEFilesystem(ctx context.Context, devMinor uint32, opts *filesystemOptions, device *vfs.FileDescription) (*filesystem, error) {
+ fs := &filesystem{
+ devMinor: devMinor,
+ opts: opts,
+ }
+
+ conn, err := NewFUSEConnection(ctx, device, opts.maxActiveRequests)
+ if err != nil {
+ log.Warningf("fuse.NewFUSEFilesystem: NewFUSEConnection failed with error: %v", err)
+ return nil, syserror.EINVAL
+ }
+
+ fs.conn = conn
+ fuseFD := device.Impl().(*DeviceFD)
+ fuseFD.fs = fs
+
+ return fs, nil
+}
+
// Release implements vfs.FilesystemImpl.Release.
func (fs *filesystem) Release() {
fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
diff --git a/pkg/sentry/fsimpl/fuse/register.go b/pkg/sentry/fsimpl/fuse/register.go
new file mode 100644
index 000000000..b5b581152
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/register.go
@@ -0,0 +1,42 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// Register registers the FUSE device with vfsObj.
+func Register(vfsObj *vfs.VirtualFilesystem) error {
+ if err := vfsObj.RegisterDevice(vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, fuseDevice{}, &vfs.RegisterDeviceOptions{
+ GroupName: "misc",
+ }); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// CreateDevtmpfsFile creates a device special file in devtmpfs.
+func CreateDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor) error {
+ if err := dev.CreateDeviceFile(ctx, "fuse", vfs.CharDevice, linux.MISC_MAJOR, fuseDevMinor, 0666 /* mode */); err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/fuse/request_list.go b/pkg/sentry/fsimpl/fuse/request_list.go
new file mode 100644
index 000000000..002262f23
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/request_list.go
@@ -0,0 +1,193 @@
+package fuse
+
+// ElementMapper provides an identity mapping by default.
+//
+// This can be replaced to provide a struct that maps elements to linker
+// objects, if they are not the same. An ElementMapper is not typically
+// required if: Linker is left as is, Element is left as is, or Linker and
+// Element are the same type.
+type requestElementMapper struct{}
+
+// linkerFor maps an Element to a Linker.
+//
+// This default implementation should be inlined.
+//
+//go:nosplit
+func (requestElementMapper) linkerFor(elem *Request) *Request { return elem }
+
+// List is an intrusive list. Entries can be added to or removed from the list
+// in O(1) time and with no additional memory allocations.
+//
+// The zero value for List is an empty list ready to use.
+//
+// To iterate over a list (where l is a List):
+// for e := l.Front(); e != nil; e = e.Next() {
+// // do something with e.
+// }
+//
+// +stateify savable
+type requestList struct {
+ head *Request
+ tail *Request
+}
+
+// Reset resets list l to the empty state.
+func (l *requestList) Reset() {
+ l.head = nil
+ l.tail = nil
+}
+
+// Empty returns true iff the list is empty.
+func (l *requestList) Empty() bool {
+ return l.head == nil
+}
+
+// Front returns the first element of list l or nil.
+func (l *requestList) Front() *Request {
+ return l.head
+}
+
+// Back returns the last element of list l or nil.
+func (l *requestList) Back() *Request {
+ return l.tail
+}
+
+// Len returns the number of elements in the list.
+//
+// NOTE: This is an O(n) operation.
+func (l *requestList) Len() (count int) {
+ for e := l.Front(); e != nil; e = (requestElementMapper{}.linkerFor(e)).Next() {
+ count++
+ }
+ return count
+}
+
+// PushFront inserts the element e at the front of list l.
+func (l *requestList) PushFront(e *Request) {
+ linker := requestElementMapper{}.linkerFor(e)
+ linker.SetNext(l.head)
+ linker.SetPrev(nil)
+ if l.head != nil {
+ requestElementMapper{}.linkerFor(l.head).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+
+ l.head = e
+}
+
+// PushBack inserts the element e at the back of list l.
+func (l *requestList) PushBack(e *Request) {
+ linker := requestElementMapper{}.linkerFor(e)
+ linker.SetNext(nil)
+ linker.SetPrev(l.tail)
+ if l.tail != nil {
+ requestElementMapper{}.linkerFor(l.tail).SetNext(e)
+ } else {
+ l.head = e
+ }
+
+ l.tail = e
+}
+
+// PushBackList inserts list m at the end of list l, emptying m.
+func (l *requestList) PushBackList(m *requestList) {
+ if l.head == nil {
+ l.head = m.head
+ l.tail = m.tail
+ } else if m.head != nil {
+ requestElementMapper{}.linkerFor(l.tail).SetNext(m.head)
+ requestElementMapper{}.linkerFor(m.head).SetPrev(l.tail)
+
+ l.tail = m.tail
+ }
+ m.head = nil
+ m.tail = nil
+}
+
+// InsertAfter inserts e after b.
+func (l *requestList) InsertAfter(b, e *Request) {
+ bLinker := requestElementMapper{}.linkerFor(b)
+ eLinker := requestElementMapper{}.linkerFor(e)
+
+ a := bLinker.Next()
+
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ bLinker.SetNext(e)
+
+ if a != nil {
+ requestElementMapper{}.linkerFor(a).SetPrev(e)
+ } else {
+ l.tail = e
+ }
+}
+
+// InsertBefore inserts e before a.
+func (l *requestList) InsertBefore(a, e *Request) {
+ aLinker := requestElementMapper{}.linkerFor(a)
+ eLinker := requestElementMapper{}.linkerFor(e)
+
+ b := aLinker.Prev()
+ eLinker.SetNext(a)
+ eLinker.SetPrev(b)
+ aLinker.SetPrev(e)
+
+ if b != nil {
+ requestElementMapper{}.linkerFor(b).SetNext(e)
+ } else {
+ l.head = e
+ }
+}
+
+// Remove removes e from l.
+func (l *requestList) Remove(e *Request) {
+ linker := requestElementMapper{}.linkerFor(e)
+ prev := linker.Prev()
+ next := linker.Next()
+
+ if prev != nil {
+ requestElementMapper{}.linkerFor(prev).SetNext(next)
+ } else if l.head == e {
+ l.head = next
+ }
+
+ if next != nil {
+ requestElementMapper{}.linkerFor(next).SetPrev(prev)
+ } else if l.tail == e {
+ l.tail = prev
+ }
+
+ linker.SetNext(nil)
+ linker.SetPrev(nil)
+}
+
+// Entry is a default implementation of Linker. Users can add anonymous fields
+// of this type to their structs to make them automatically implement the
+// methods needed by List.
+//
+// +stateify savable
+type requestEntry struct {
+ next *Request
+ prev *Request
+}
+
+// Next returns the entry that follows e in the list.
+func (e *requestEntry) Next() *Request {
+ return e.next
+}
+
+// Prev returns the entry that precedes e in the list.
+func (e *requestEntry) Prev() *Request {
+ return e.prev
+}
+
+// SetNext assigns 'entry' as the entry that follows e in the list.
+func (e *requestEntry) SetNext(elem *Request) {
+ e.next = elem
+}
+
+// SetPrev assigns 'entry' as the entry that precedes e in the list.
+func (e *requestEntry) SetPrev(elem *Request) {
+ e.prev = elem
+}