summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2020-07-30 15:53:40 -0700
committergVisor bot <gvisor-bot@google.com>2020-07-30 15:53:40 -0700
commit103a178026dc64986ff8329aaeafcd6c1fd34b91 (patch)
tree5df567939c9e2797a3e34a20cae83f0061e80c19 /pkg
parent1b11326ecdd788fccc8e396b7467bbbb591d563f (diff)
parent2e19a8b951e9402b28b4e601e65c51e69c815db1 (diff)
Merge pull request #3179 from jinmouil:fuse_init
PiperOrigin-RevId: 324100220
Diffstat (limited to 'pkg')
-rw-r--r--pkg/abi/linux/fuse.go105
-rw-r--r--pkg/sentry/fsimpl/fuse/BUILD4
-rw-r--r--pkg/sentry/fsimpl/fuse/connection.go234
-rw-r--r--pkg/sentry/fsimpl/fuse/dev.go13
-rw-r--r--pkg/sentry/fsimpl/fuse/dev_test.go17
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go22
-rw-r--r--pkg/sentry/fsimpl/fuse/init.go166
-rw-r--r--pkg/syserror/syserror.go1
8 files changed, 511 insertions, 51 deletions
diff --git a/pkg/abi/linux/fuse.go b/pkg/abi/linux/fuse.go
index d3ebbccc4..5c6ffe4a3 100644
--- a/pkg/abi/linux/fuse.go
+++ b/pkg/abi/linux/fuse.go
@@ -141,3 +141,108 @@ type FUSEWriteIn struct {
_ uint32
}
+
+// FUSE_INIT flags, consistent with the ones in include/uapi/linux/fuse.h.
+const (
+ FUSE_ASYNC_READ = 1 << 0
+ FUSE_POSIX_LOCKS = 1 << 1
+ FUSE_FILE_OPS = 1 << 2
+ FUSE_ATOMIC_O_TRUNC = 1 << 3
+ FUSE_EXPORT_SUPPORT = 1 << 4
+ FUSE_BIG_WRITES = 1 << 5
+ FUSE_DONT_MASK = 1 << 6
+ FUSE_SPLICE_WRITE = 1 << 7
+ FUSE_SPLICE_MOVE = 1 << 8
+ FUSE_SPLICE_READ = 1 << 9
+ FUSE_FLOCK_LOCKS = 1 << 10
+ FUSE_HAS_IOCTL_DIR = 1 << 11
+ FUSE_AUTO_INVAL_DATA = 1 << 12
+ FUSE_DO_READDIRPLUS = 1 << 13
+ FUSE_READDIRPLUS_AUTO = 1 << 14
+ FUSE_ASYNC_DIO = 1 << 15
+ FUSE_WRITEBACK_CACHE = 1 << 16
+ FUSE_NO_OPEN_SUPPORT = 1 << 17
+ FUSE_PARALLEL_DIROPS = 1 << 18
+ FUSE_HANDLE_KILLPRIV = 1 << 19
+ FUSE_POSIX_ACL = 1 << 20
+ FUSE_ABORT_ERROR = 1 << 21
+ FUSE_MAX_PAGES = 1 << 22
+ FUSE_CACHE_SYMLINKS = 1 << 23
+ FUSE_NO_OPENDIR_SUPPORT = 1 << 24
+ FUSE_EXPLICIT_INVAL_DATA = 1 << 25
+ FUSE_MAP_ALIGNMENT = 1 << 26
+)
+
+// currently supported FUSE protocol version numbers.
+const (
+ FUSE_KERNEL_VERSION = 7
+ FUSE_KERNEL_MINOR_VERSION = 31
+)
+
+// FUSEInitIn is the request sent by the kernel to the daemon,
+// to negotiate the version and flags.
+//
+// +marshal
+type FUSEInitIn struct {
+ // Major version supported by kernel.
+ Major uint32
+
+ // Minor version supported by the kernel.
+ Minor uint32
+
+ // MaxReadahead is the maximum number of bytes to read-ahead
+ // decided by the kernel.
+ MaxReadahead uint32
+
+ // Flags of this init request.
+ Flags uint32
+}
+
+// FUSEInitOut is the reply sent by the daemon to the kernel
+// for FUSEInitIn.
+//
+// +marshal
+type FUSEInitOut struct {
+ // Major version supported by daemon.
+ Major uint32
+
+ // Minor version supported by daemon.
+ Minor uint32
+
+ // MaxReadahead is the maximum number of bytes to read-ahead.
+ // Decided by the daemon, after receiving the value from kernel.
+ MaxReadahead uint32
+
+ // Flags of this init reply.
+ Flags uint32
+
+ // MaxBackground is the maximum number of pending background requests
+ // that the daemon wants.
+ MaxBackground uint16
+
+ // CongestionThreshold is the daemon-decided threshold for
+ // the number of the pending background requests.
+ CongestionThreshold uint16
+
+ // MaxWrite is the daemon's maximum size of a write buffer.
+ // Kernel adjusts it to the minimum (fuse/init.go:fuseMinMaxWrite).
+ // if the value from daemon is too small.
+ MaxWrite uint32
+
+ // TimeGran is the daemon's time granularity for mtime and ctime metadata.
+ // The unit is nanosecond.
+ // Value should be power of 10.
+ // 1 indicates full nanosecond granularity support.
+ TimeGran uint32
+
+ // MaxPages is the daemon's maximum number of pages for one write operation.
+ // Kernel adjusts it to the maximum (fuse/init.go:FUSE_MAX_MAX_PAGES).
+ // if the value from daemon is too large.
+ MaxPages uint16
+
+ // MapAlignment is an unknown field and not used by this package at this moment.
+ // Use as a placeholder to be consistent with the FUSE protocol.
+ MapAlignment uint16
+
+ _ [8]uint32
+}
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 67649e811..999111deb 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -21,6 +21,7 @@ go_library(
"connection.go",
"dev.go",
"fusefs.go",
+ "init.go",
"register.go",
"request_list.go",
],
@@ -44,14 +45,13 @@ go_library(
)
go_test(
- name = "dev_test",
+ name = "fuse_test",
size = "small",
srcs = ["dev_test.go"],
library = ":fuse",
deps = [
"//pkg/abi/linux",
"//pkg/sentry/fsimpl/testutil",
- "//pkg/sentry/fsimpl/tmpfs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
diff --git a/pkg/sentry/fsimpl/fuse/connection.go b/pkg/sentry/fsimpl/fuse/connection.go
index f330da0bd..6df2728ab 100644
--- a/pkg/sentry/fsimpl/fuse/connection.go
+++ b/pkg/sentry/fsimpl/fuse/connection.go
@@ -17,6 +17,8 @@ package fuse
import (
"errors"
"fmt"
+ "sync"
+ "sync/atomic"
"syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -25,18 +27,29 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/waiter"
"gvisor.dev/gvisor/tools/go_marshal/marshal"
)
-// MaxActiveRequestsDefault is the default setting controlling the upper bound
+// maxActiveRequestsDefault is the default setting controlling the upper bound
// on the number of active requests at any given time.
-const MaxActiveRequestsDefault = 10000
+const maxActiveRequestsDefault = 10000
-var (
- // Ordinary requests have even IDs, while interrupts IDs are odd.
- InitReqBit uint64 = 1
- ReqIDStep uint64 = 2
+// Ordinary requests have even IDs, while interrupts IDs are odd.
+// Used to increment the unique ID for each FUSE request.
+var reqIDStep uint64 = 2
+
+const (
+ // fuseDefaultMaxBackground is the default value for MaxBackground.
+ fuseDefaultMaxBackground = 12
+
+ // fuseDefaultCongestionThreshold is the default value for CongestionThreshold,
+ // and is 75% of the default maximum of MaxGround.
+ fuseDefaultCongestionThreshold = (fuseDefaultMaxBackground * 3 / 4)
+
+ // fuseDefaultMaxPagesPerReq is the default value for MaxPagesPerReq.
+ fuseDefaultMaxPagesPerReq = 32
)
// Request represents a FUSE operation request that hasn't been sent to the
@@ -61,17 +74,125 @@ type Response struct {
data []byte
}
-// Connection is the struct by which the sentry communicates with the FUSE server daemon.
-type Connection struct {
+// connection is the struct by which the sentry communicates with the FUSE server daemon.
+type connection struct {
fd *DeviceFD
- // MaxWrite is the daemon's maximum size of a write buffer.
- // This is negotiated during FUSE_INIT.
- MaxWrite uint32
+ // The following FUSE_INIT flags are currently unsupported by this implementation:
+ // - FUSE_ATOMIC_O_TRUNC: requires open(..., O_TRUNC)
+ // - FUSE_EXPORT_SUPPORT
+ // - FUSE_HANDLE_KILLPRIV
+ // - FUSE_POSIX_LOCKS: requires POSIX locks
+ // - FUSE_FLOCK_LOCKS: requires POSIX locks
+ // - FUSE_AUTO_INVAL_DATA: requires page caching eviction
+ // - FUSE_EXPLICIT_INVAL_DATA: requires page caching eviction
+ // - FUSE_DO_READDIRPLUS/FUSE_READDIRPLUS_AUTO: requires FUSE_READDIRPLUS implementation
+ // - FUSE_ASYNC_DIO
+ // - FUSE_POSIX_ACL: affects defaultPermissions, posixACL, xattr handler
+
+ // initialized after receiving FUSE_INIT reply.
+ // Until it's set, suspend sending FUSE requests.
+ // Use SetInitialized() and IsInitialized() for atomic access.
+ initialized int32
+
+ // initializedChan is used to block requests before initialization.
+ initializedChan chan struct{}
+
+ // blocked when there are too many outstading backgrounds requests (NumBackground == MaxBackground).
+ // TODO(gvisor.dev/issue/3185): update the numBackground accordingly; use a channel to block.
+ blocked bool
+
+ // connected (connection established) when a new FUSE file system is created.
+ // Set to false when:
+ // umount,
+ // connection abort,
+ // device release.
+ connected bool
+
+ // aborted via sysfs.
+ // TODO(gvisor.dev/issue/3185): abort all queued requests.
+ aborted bool
+
+ // connInitError if FUSE_INIT encountered error (major version mismatch).
+ // Only set in INIT.
+ connInitError bool
+
+ // connInitSuccess if FUSE_INIT is successful.
+ // Only set in INIT.
+ // Used for destory.
+ connInitSuccess bool
+
+ // TODO(gvisor.dev/issue/3185): All the queue logic are working in progress.
+
+ // NumberBackground is the number of requests in the background.
+ numBackground uint16
+
+ // congestionThreshold for NumBackground.
+ // Negotiated in FUSE_INIT.
+ congestionThreshold uint16
+
+ // maxBackground is the maximum number of NumBackground.
+ // Block connection when it is reached.
+ // Negotiated in FUSE_INIT.
+ maxBackground uint16
+
+ // numActiveBackground is the number of requests in background and has being marked as active.
+ numActiveBackground uint16
+
+ // numWating is the number of requests waiting for completion.
+ numWaiting uint32
+
+ // TODO(gvisor.dev/issue/3185): BgQueue
+ // some queue for background queued requests.
+
+ // bgLock protects:
+ // MaxBackground, CongestionThreshold, NumBackground,
+ // NumActiveBackground, BgQueue, Blocked.
+ bgLock sync.Mutex
+
+ // maxRead is the maximum size of a read buffer in in bytes.
+ maxRead uint32
+
+ // maxWrite is the maximum size of a write buffer in bytes.
+ // Negotiated in FUSE_INIT.
+ maxWrite uint32
+
+ // maxPages is the maximum number of pages for a single request to use.
+ // Negotiated in FUSE_INIT.
+ maxPages uint16
+
+ // minor version of the FUSE protocol.
+ // Negotiated and only set in INIT.
+ minor uint32
+
+ // asyncRead if read pages asynchronously.
+ // Negotiated and only set in INIT.
+ asyncRead bool
+
+ // abortErr is true if kernel need to return an unique read error after abort.
+ // Negotiated and only set in INIT.
+ abortErr bool
+
+ // writebackCache is true for write-back cache policy,
+ // false for write-through policy.
+ // Negotiated and only set in INIT.
+ writebackCache bool
+
+ // cacheSymlinks if filesystem needs to cache READLINK responses in page cache.
+ // Negotiated and only set in INIT.
+ cacheSymlinks bool
+
+ // bigWrites if doing multi-page cached writes.
+ // Negotiated and only set in INIT.
+ bigWrites bool
+
+ // dontMask if filestestem does not apply umask to creation modes.
+ // Negotiated in INIT.
+ dontMask bool
}
-// NewFUSEConnection creates a FUSE connection to fd
-func NewFUSEConnection(_ context.Context, fd *vfs.FileDescription, maxInFlightRequests uint64) (*Connection, error) {
+// newFUSEConnection creates a FUSE connection to fd.
+func newFUSEConnection(_ context.Context, fd *vfs.FileDescription, maxInFlightRequests uint64) (*connection, error) {
// Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
// mount a FUSE filesystem.
fuseFD := fd.Impl().(*DeviceFD)
@@ -84,16 +205,41 @@ func NewFUSEConnection(_ context.Context, fd *vfs.FileDescription, maxInFlightRe
fuseFD.fullQueueCh = make(chan struct{}, maxInFlightRequests)
fuseFD.writeCursor = 0
- return &Connection{
- fd: fuseFD,
+ return &connection{
+ fd: fuseFD,
+ maxBackground: fuseDefaultMaxBackground,
+ congestionThreshold: fuseDefaultCongestionThreshold,
+ maxPages: fuseDefaultMaxPagesPerReq,
+ initializedChan: make(chan struct{}),
+ connected: true,
}, nil
}
+// SetInitialized atomically sets the connection as initialized.
+func (conn *connection) SetInitialized() {
+ // Unblock the requests sent before INIT.
+ close(conn.initializedChan)
+
+ // Close the channel first to avoid the non-atomic situation
+ // where conn.initialized is true but there are
+ // tasks being blocked on the channel.
+ // And it prevents the newer tasks from gaining
+ // unnecessary higher chance to be issued before the blocked one.
+
+ atomic.StoreInt32(&(conn.initialized), int32(1))
+}
+
+// IsInitialized atomically check if the connection is initialized.
+// pairs with SetInitialized().
+func (conn *connection) Initialized() bool {
+ return atomic.LoadInt32(&(conn.initialized)) != 0
+}
+
// NewRequest creates a new request that can be sent to the FUSE server.
-func (conn *Connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint64, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*Request, error) {
+func (conn *connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint64, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*Request, error) {
conn.fd.mu.Lock()
defer conn.fd.mu.Unlock()
- conn.fd.nextOpID += linux.FUSEOpID(ReqIDStep)
+ conn.fd.nextOpID += linux.FUSEOpID(reqIDStep)
hdrLen := (*linux.FUSEHeaderIn)(nil).SizeBytes()
hdr := linux.FUSEHeaderIn{
@@ -118,13 +264,49 @@ func (conn *Connection) NewRequest(creds *auth.Credentials, pid uint32, ino uint
}
// Call makes a request to the server and blocks the invoking task until a
-// server responds with a response.
-// NOTE: If no task is provided then the Call will simply enqueue the request
-// and return a nil response. No blocking will happen in this case. Instead,
-// this is used to signify that the processing of this request will happen by
-// the kernel.Task that writes the response. See FUSE_INIT for such an
-// invocation.
-func (conn *Connection) Call(t *kernel.Task, r *Request) (*Response, error) {
+// server responds with a response. Task should never be nil.
+// Requests will not be sent before the connection is initialized.
+// For async tasks, use CallAsync().
+func (conn *connection) Call(t *kernel.Task, r *Request) (*Response, error) {
+ // Block requests sent before connection is initalized.
+ if !conn.Initialized() {
+ if err := t.Block(conn.initializedChan); err != nil {
+ return nil, err
+ }
+ }
+
+ return conn.call(t, r)
+}
+
+// CallAsync makes an async (aka background) request.
+// Those requests either do not expect a response (e.g. release) or
+// the response should be handled by others (e.g. init).
+// Return immediately unless the connection is blocked (before initialization).
+// Async call example: init, release, forget, aio, interrupt.
+// When the Request is FUSE_INIT, it will not be blocked before initialization.
+func (conn *connection) CallAsync(t *kernel.Task, r *Request) error {
+ // Block requests sent before connection is initalized.
+ if !conn.Initialized() && r.hdr.Opcode != linux.FUSE_INIT {
+ if err := t.Block(conn.initializedChan); err != nil {
+ return err
+ }
+ }
+
+ // This should be the only place that invokes call() with a nil task.
+ _, err := conn.call(nil, r)
+ return err
+}
+
+// call makes a call without blocking checks.
+func (conn *connection) call(t *kernel.Task, r *Request) (*Response, error) {
+ if !conn.connected {
+ return nil, syserror.ENOTCONN
+ }
+
+ if conn.connInitError {
+ return nil, syserror.ECONNREFUSED
+ }
+
fut, err := conn.callFuture(t, r)
if err != nil {
return nil, err
@@ -160,7 +342,7 @@ func (r *Response) UnmarshalPayload(m marshal.Marshallable) error {
// callFuture makes a request to the server and returns a future response.
// Call resolve() when the response needs to be fulfilled.
-func (conn *Connection) callFuture(t *kernel.Task, r *Request) (*futureResponse, error) {
+func (conn *connection) callFuture(t *kernel.Task, r *Request) (*futureResponse, error) {
conn.fd.mu.Lock()
defer conn.fd.mu.Unlock()
@@ -195,7 +377,7 @@ func (conn *Connection) callFuture(t *kernel.Task, r *Request) (*futureResponse,
}
// callFutureLocked makes a request to the server and returns a future response.
-func (conn *Connection) callFutureLocked(t *kernel.Task, r *Request) (*futureResponse, error) {
+func (conn *connection) callFutureLocked(t *kernel.Task, r *Request) (*futureResponse, error) {
conn.fd.queue.PushBack(r)
conn.fd.numActiveRequests += 1
fut := newFutureResponse(r.hdr.Opcode)
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index f3443ac71..2225076bc 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -21,6 +21,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
@@ -98,7 +99,9 @@ type DeviceFD struct {
}
// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *DeviceFD) Release() {}
+func (fd *DeviceFD) Release() {
+ fd.fs.conn.connected = false
+}
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
@@ -124,7 +127,7 @@ func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.R
minBuffSize := linux.FUSE_MIN_READ_BUFFER
inHdrLen := uint32((*linux.FUSEHeaderIn)(nil).SizeBytes())
writeHdrLen := uint32((*linux.FUSEWriteIn)(nil).SizeBytes())
- negotiatedMinBuffSize := inHdrLen + writeHdrLen + fd.fs.conn.MaxWrite
+ negotiatedMinBuffSize := inHdrLen + writeHdrLen + fd.fs.conn.maxWrite
if minBuffSize < negotiatedMinBuffSize {
minBuffSize = negotiatedMinBuffSize
}
@@ -385,9 +388,9 @@ func (fd *DeviceFD) sendError(ctx context.Context, errno int32, req *Request) er
// FUSE_INIT.
func (fd *DeviceFD) noReceiverAction(ctx context.Context, r *Response) error {
if r.opcode == linux.FUSE_INIT {
- // TODO: process init response here.
- // Maybe get the creds from the context?
- // creds := auth.CredentialsFromContext(ctx)
+ creds := auth.CredentialsFromContext(ctx)
+ rootUserNs := kernel.KernelFromContext(ctx).RootUserNamespace()
+ return fd.fs.conn.InitRecv(r, creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, rootUserNs))
}
return nil
diff --git a/pkg/sentry/fsimpl/fuse/dev_test.go b/pkg/sentry/fsimpl/fuse/dev_test.go
index fcd77832a..84c222ad6 100644
--- a/pkg/sentry/fsimpl/fuse/dev_test.go
+++ b/pkg/sentry/fsimpl/fuse/dev_test.go
@@ -22,7 +22,6 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -60,25 +59,25 @@ func TestFUSECommunication(t *testing.T) {
Name: "SingleClientSingleServer",
NumClients: 1,
NumServers: 1,
- MaxActiveRequests: MaxActiveRequestsDefault,
+ MaxActiveRequests: maxActiveRequestsDefault,
},
{
Name: "SingleClientMultipleServers",
NumClients: 1,
NumServers: 10,
- MaxActiveRequests: MaxActiveRequestsDefault,
+ MaxActiveRequests: maxActiveRequestsDefault,
},
{
Name: "MultipleClientsSingleServer",
NumClients: 10,
NumServers: 1,
- MaxActiveRequests: MaxActiveRequestsDefault,
+ MaxActiveRequests: maxActiveRequestsDefault,
},
{
Name: "MultipleClientsMultipleServers",
NumClients: 10,
NumServers: 10,
- MaxActiveRequests: MaxActiveRequestsDefault,
+ MaxActiveRequests: maxActiveRequestsDefault,
},
{
Name: "RequestCapacityFull",
@@ -145,7 +144,7 @@ func TestFUSECommunication(t *testing.T) {
// CallTest makes a request to the server and blocks the invoking
// goroutine until a server responds with a response. Doesn't block
// a kernel.Task. Analogous to Connection.Call but used for testing.
-func CallTest(conn *Connection, t *kernel.Task, r *Request, i uint32) (*Response, error) {
+func CallTest(conn *connection, t *kernel.Task, r *Request, i uint32) (*Response, error) {
conn.fd.mu.Lock()
// Wait until we're certain that a new request can be processed.
@@ -214,7 +213,7 @@ func ReadTest(serverTask *kernel.Task, fd *vfs.FileDescription, inIOseq usermem.
// fuseClientRun emulates all the actions of a normal FUSE request. It creates
// a header, a payload, calls the server, waits for the response, and processes
// the response.
-func fuseClientRun(t *testing.T, s *testutil.System, k *kernel.Kernel, conn *Connection, creds *auth.Credentials, pid uint32, inode uint64, clientDone chan struct{}) {
+func fuseClientRun(t *testing.T, s *testutil.System, k *kernel.Kernel, conn *connection, creds *auth.Credentials, pid uint32, inode uint64, clientDone chan struct{}) {
defer func() { clientDone <- struct{}{} }()
tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
@@ -343,7 +342,7 @@ func setup(t *testing.T) *testutil.System {
AllowUserMount: true,
})
- mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", tmpfs.Name, &vfs.GetFilesystemOptions{})
+ mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
if err != nil {
t.Fatalf("NewMountNamespace(): %v", err)
}
@@ -353,7 +352,7 @@ func setup(t *testing.T) *testutil.System {
// newTestConnection creates a fuse connection that the sentry can communicate with
// and the FD for the server to communicate with.
-func newTestConnection(system *testutil.System, k *kernel.Kernel, maxActiveRequests uint64) (*Connection, *vfs.FileDescription, error) {
+func newTestConnection(system *testutil.System, k *kernel.Kernel, maxActiveRequests uint64) (*connection, *vfs.FileDescription, error) {
vfsObj := &vfs.VirtualFilesystem{}
fuseDev := &DeviceFD{}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 911b6f7cb..200a93bbf 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -65,7 +65,7 @@ type filesystem struct {
// conn is used for communication between the FUSE server
// daemon and the sentry fusefs.
- conn *Connection
+ conn *connection
// opts is the options the fusefs is initialized with.
opts *filesystemOptions
@@ -140,7 +140,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fsopts.rootMode = rootMode
// Set the maxInFlightRequests option.
- fsopts.maxActiveRequests = MaxActiveRequestsDefault
+ fsopts.maxActiveRequests = maxActiveRequestsDefault
// Check for unparsed options.
if len(mopts) != 0 {
@@ -157,8 +157,12 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fs.VFSFilesystem().Init(vfsObj, &fsType, fs)
- // TODO: dispatch a FUSE_INIT request to the FUSE daemon server before
- // returning. Mount will not block on this dispatched request.
+ // Send a FUSE_INIT request to the FUSE daemon server before returning.
+ // This call is not blocking.
+ if err := fs.conn.InitSend(creds, uint32(kernelTask.ThreadID())); err != nil {
+ log.Warningf("%s.InitSend: failed with error: %v", fsType.Name(), err)
+ return nil, nil, err
+ }
// root is the fusefs root directory.
root := fs.newInode(creds, fsopts.rootMode)
@@ -173,7 +177,7 @@ func NewFUSEFilesystem(ctx context.Context, devMinor uint32, opts *filesystemOpt
opts: opts,
}
- conn, err := NewFUSEConnection(ctx, device, opts.maxActiveRequests)
+ conn, err := newFUSEConnection(ctx, device, opts.maxActiveRequests)
if err != nil {
log.Warningf("fuse.NewFUSEFilesystem: NewFUSEConnection failed with error: %v", err)
return nil, syserror.EINVAL
@@ -192,8 +196,8 @@ func (fs *filesystem) Release() {
fs.Filesystem.Release()
}
-// Inode implements kernfs.Inode.
-type Inode struct {
+// inode implements kernfs.Inode.
+type inode struct {
kernfs.InodeAttrs
kernfs.InodeNoDynamicLookup
kernfs.InodeNotSymlink
@@ -206,7 +210,7 @@ type Inode struct {
}
func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry {
- i := &Inode{}
+ i := &inode{}
i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
i.dentry.Init(i)
@@ -215,7 +219,7 @@ func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *ke
}
// Open implements kernfs.Inode.Open.
-func (i *Inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts)
if err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/fuse/init.go b/pkg/sentry/fsimpl/fuse/init.go
new file mode 100644
index 000000000..779c2bd3f
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/init.go
@@ -0,0 +1,166 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fuse
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+// consts used by FUSE_INIT negotiation.
+const (
+ // fuseMaxMaxPages is the maximum value for MaxPages received in InitOut.
+ // Follow the same behavior as unix fuse implementation.
+ fuseMaxMaxPages = 256
+
+ // Maximum value for the time granularity for file time stamps, 1s.
+ // Follow the same behavior as unix fuse implementation.
+ fuseMaxTimeGranNs = 1000000000
+
+ // Minimum value for MaxWrite.
+ // Follow the same behavior as unix fuse implementation.
+ fuseMinMaxWrite = 4096
+
+ // Temporary default value for max readahead, 128kb.
+ fuseDefaultMaxReadahead = 131072
+
+ // The FUSE_INIT_IN flags sent to the daemon.
+ // TODO(gvisor.dev/issue/3199): complete the flags.
+ fuseDefaultInitFlags = linux.FUSE_MAX_PAGES
+)
+
+// Adjustable maximums for Connection's cogestion control parameters.
+// Used as the upperbound of the config values.
+// Currently we do not support adjustment to them.
+var (
+ MaxUserBackgroundRequest uint16 = fuseDefaultMaxBackground
+ MaxUserCongestionThreshold uint16 = fuseDefaultCongestionThreshold
+)
+
+// InitSend sends a FUSE_INIT request.
+func (conn *connection) InitSend(creds *auth.Credentials, pid uint32) error {
+ in := linux.FUSEInitIn{
+ Major: linux.FUSE_KERNEL_VERSION,
+ Minor: linux.FUSE_KERNEL_MINOR_VERSION,
+ // TODO(gvisor.dev/issue/3196): find appropriate way to calculate this
+ MaxReadahead: fuseDefaultMaxReadahead,
+ Flags: fuseDefaultInitFlags,
+ }
+
+ req, err := conn.NewRequest(creds, pid, 0, linux.FUSE_INIT, &in)
+ if err != nil {
+ return err
+ }
+
+ // Since there is no task to block on and FUSE_INIT is the request
+ // to unblock other requests, use nil.
+ return conn.CallAsync(nil, req)
+}
+
+// InitRecv receives a FUSE_INIT reply and process it.
+func (conn *connection) InitRecv(res *Response, hasSysAdminCap bool) error {
+ if err := res.Error(); err != nil {
+ return err
+ }
+
+ var out linux.FUSEInitOut
+ if err := res.UnmarshalPayload(&out); err != nil {
+ return err
+ }
+
+ return conn.initProcessReply(&out, hasSysAdminCap)
+}
+
+// Process the FUSE_INIT reply from the FUSE server.
+func (conn *connection) initProcessReply(out *linux.FUSEInitOut, hasSysAdminCap bool) error {
+ // No support for old major fuse versions.
+ if out.Major != linux.FUSE_KERNEL_VERSION {
+ conn.connInitError = true
+
+ // Set the connection as initialized and unblock the blocked requests
+ // (i.e. return error for them).
+ conn.SetInitialized()
+
+ return nil
+ }
+
+ // Start processing the reply.
+ conn.connInitSuccess = true
+ conn.minor = out.Minor
+
+ // No support for limits before minor version 13.
+ if out.Minor >= 13 {
+ conn.bgLock.Lock()
+
+ if out.MaxBackground > 0 {
+ conn.maxBackground = out.MaxBackground
+
+ if !hasSysAdminCap &&
+ conn.maxBackground > MaxUserBackgroundRequest {
+ conn.maxBackground = MaxUserBackgroundRequest
+ }
+ }
+
+ if out.CongestionThreshold > 0 {
+ conn.congestionThreshold = out.CongestionThreshold
+
+ if !hasSysAdminCap &&
+ conn.congestionThreshold > MaxUserCongestionThreshold {
+ conn.congestionThreshold = MaxUserCongestionThreshold
+ }
+ }
+
+ conn.bgLock.Unlock()
+ }
+
+ // No support for the following flags before minor version 6.
+ if out.Minor >= 6 {
+ conn.asyncRead = out.Flags&linux.FUSE_ASYNC_READ != 0
+ conn.bigWrites = out.Flags&linux.FUSE_BIG_WRITES != 0
+ conn.dontMask = out.Flags&linux.FUSE_DONT_MASK != 0
+ conn.writebackCache = out.Flags&linux.FUSE_WRITEBACK_CACHE != 0
+ conn.cacheSymlinks = out.Flags&linux.FUSE_CACHE_SYMLINKS != 0
+ conn.abortErr = out.Flags&linux.FUSE_ABORT_ERROR != 0
+
+ // TODO(gvisor.dev/issue/3195): figure out how to use TimeGran (0 < TimeGran <= fuseMaxTimeGranNs).
+
+ if out.Flags&linux.FUSE_MAX_PAGES != 0 {
+ maxPages := out.MaxPages
+ if maxPages < 1 {
+ maxPages = 1
+ }
+ if maxPages > fuseMaxMaxPages {
+ maxPages = fuseMaxMaxPages
+ }
+ conn.maxPages = maxPages
+ }
+ }
+
+ // No support for negotiating MaxWrite before minor version 5.
+ if out.Minor >= 5 {
+ conn.maxWrite = out.MaxWrite
+ } else {
+ conn.maxWrite = fuseMinMaxWrite
+ }
+ if conn.maxWrite < fuseMinMaxWrite {
+ conn.maxWrite = fuseMinMaxWrite
+ }
+
+ // Set connection as initialized and unblock the requests
+ // issued before init.
+ conn.SetInitialized()
+
+ return nil
+}
diff --git a/pkg/syserror/syserror.go b/pkg/syserror/syserror.go
index c73072c42..798e07b01 100644
--- a/pkg/syserror/syserror.go
+++ b/pkg/syserror/syserror.go
@@ -61,6 +61,7 @@ var (
ENOMEM = error(syscall.ENOMEM)
ENOSPC = error(syscall.ENOSPC)
ENOSYS = error(syscall.ENOSYS)
+ ENOTCONN = error(syscall.ENOTCONN)
ENOTDIR = error(syscall.ENOTDIR)
ENOTEMPTY = error(syscall.ENOTEMPTY)
ENOTSOCK = error(syscall.ENOTSOCK)