diff options
Diffstat (limited to 'pkg/sentry')
25 files changed, 625 insertions, 79 deletions
diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go index 52061175f..bbe282c03 100644 --- a/pkg/sentry/fs/proc/sys_net.go +++ b/pkg/sentry/fs/proc/sys_net.go @@ -17,6 +17,7 @@ package proc import ( "fmt" "io" + "math" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" @@ -26,6 +27,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4" "gvisor.dev/gvisor/pkg/usermem" "gvisor.dev/gvisor/pkg/waiter" @@ -498,6 +500,120 @@ func (f *ipForwardingFile) Write(ctx context.Context, _ *fs.File, src usermem.IO return n, f.stack.SetForwarding(ipv4.ProtocolNumber, *f.ipf.enabled) } +// portRangeInode implements fs.InodeOperations. It provides and allows +// modification of the range of ephemeral ports that IPv4 and IPv6 sockets +// choose from. +// +// +stateify savable +type portRangeInode struct { + fsutil.SimpleFileInode + + stack inet.Stack `state:"wait"` + + // start and end store the port range. We must save/restore this here, + // since a netstack instance is created on restore. + start *uint16 + end *uint16 +} + +func newPortRangeInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { + ipf := &portRangeInode{ + SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), + stack: s, + } + sattr := fs.StableAttr{ + DeviceID: device.ProcDevice.DeviceID(), + InodeID: device.ProcDevice.NextIno(), + BlockSize: usermem.PageSize, + Type: fs.SpecialFile, + } + return fs.NewInode(ctx, ipf, msrc, sattr) +} + +// Truncate implements fs.InodeOperations.Truncate. Truncate is called when +// O_TRUNC is specified for any kind of existing Dirent but is not called via +// (f)truncate for proc files. +func (*portRangeInode) Truncate(context.Context, *fs.Inode, int64) error { + return nil +} + +// +stateify savable +type portRangeFile struct { + fsutil.FileGenericSeek `state:"nosave"` + fsutil.FileNoIoctl `state:"nosave"` + fsutil.FileNoMMap `state:"nosave"` + fsutil.FileNoSplice `state:"nosave"` + fsutil.FileNoopFlush `state:"nosave"` + fsutil.FileNoopFsync `state:"nosave"` + fsutil.FileNoopRelease `state:"nosave"` + fsutil.FileNotDirReaddir `state:"nosave"` + fsutil.FileUseInodeUnstableAttr `state:"nosave"` + waiter.AlwaysReady `state:"nosave"` + + inode *portRangeInode +} + +// GetFile implements fs.InodeOperations.GetFile. +func (in *portRangeInode) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { + flags.Pread = true + flags.Pwrite = true + return fs.NewFile(ctx, dirent, flags, &portRangeFile{ + inode: in, + }), nil +} + +// Read implements fs.FileOperations.Read. +func (pf *portRangeFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + return 0, io.EOF + } + + if pf.inode.start == nil { + start, end := pf.inode.stack.PortRange() + pf.inode.start = &start + pf.inode.end = &end + } + + contents := fmt.Sprintf("%d %d\n", *pf.inode.start, *pf.inode.end) + n, err := dst.CopyOut(ctx, []byte(contents)) + return int64(n), err +} + +// Write implements fs.FileOperations.Write. +// +// Offset is ignored, multiple writes are not supported. +func (pf *portRangeFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + if src.NumBytes() == 0 { + return 0, nil + } + + // Only consider size of one memory page for input for performance + // reasons. + src = src.TakeFirst(usermem.PageSize - 1) + + ports := make([]int32, 2) + n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, ports, src.Opts) + if err != nil { + return 0, err + } + + // Port numbers must be uint16s. + if ports[0] < 0 || ports[1] < 0 || ports[0] > math.MaxUint16 || ports[1] > math.MaxUint16 { + return 0, syserror.EINVAL + } + + if err := pf.inode.stack.SetPortRange(uint16(ports[0]), uint16(ports[1])); err != nil { + return 0, err + } + if pf.inode.start == nil { + pf.inode.start = new(uint16) + pf.inode.end = new(uint16) + } + *pf.inode.start = uint16(ports[0]) + *pf.inode.end = uint16(ports[1]) + return n, nil +} + func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode { contents := map[string]*fs.Inode{ // Add tcp_sack. @@ -506,12 +622,15 @@ func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s ine // Add ip_forward. "ip_forward": newIPForwardingInode(ctx, msrc, s), + // Allow for configurable ephemeral port ranges. Note that this + // controls ports for both IPv4 and IPv6 sockets. + "ip_local_port_range": newPortRangeInode(ctx, msrc, s), + // The following files are simple stubs until they are // implemented in netstack, most of these files are // configuration related. We use the value closest to the // actual netstack behavior or any empty file, all of these // files will have mode 0444 (read-only for all users). - "ip_local_port_range": newStaticProcInode(ctx, msrc, []byte("16000 65535")), "ip_local_reserved_ports": newStaticProcInode(ctx, msrc, []byte("")), "ipfrag_time": newStaticProcInode(ctx, msrc, []byte("30")), "ip_nonlocal_bind": newStaticProcInode(ctx, msrc, []byte("0")), diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index d8c237753..e75954105 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -137,6 +137,11 @@ func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.Release(ctx) } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + // rootInode is the root directory inode for the devpts mounts. // // +stateify savable diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index 917f1873d..d4fc484a2 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -548,3 +548,8 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe defer fs.mu.RUnlock() return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) } + +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index 204d8d143..fef857afb 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -47,19 +47,14 @@ type FilesystemType struct{} // +stateify savable type filesystemOptions struct { - // userID specifies the numeric uid of the mount owner. - // This option should not be specified by the filesystem owner. - // It is set by libfuse (or, if libfuse is not used, must be set - // by the filesystem itself). For more information, see man page - // for fuse(8) - userID uint32 - - // groupID specifies the numeric gid of the mount owner. - // This option should not be specified by the filesystem owner. - // It is set by libfuse (or, if libfuse is not used, must be set - // by the filesystem itself). For more information, see man page - // for fuse(8) - groupID uint32 + // mopts contains the raw, unparsed mount options passed to this filesystem. + mopts string + + // uid of the mount owner. + uid auth.KUID + + // gid of the mount owner. + gid auth.KGID // rootMode specifies the the file mode of the filesystem's root. rootMode linux.FileMode @@ -73,6 +68,19 @@ type filesystemOptions struct { // specified as "max_read" in fs parameters. // If not specified by user, use math.MaxUint32 as default value. maxRead uint32 + + // defaultPermissions is the default_permissions mount option. It instructs + // the kernel to perform a standard unix permission checks based on + // ownership and mode bits, instead of deferring the check to the server. + // + // Immutable after mount. + defaultPermissions bool + + // allowOther is the allow_other mount option. It allows processes that + // don't own the FUSE mount to call into it. + // + // Immutable after mount. + allowOther bool } // filesystem implements vfs.FilesystemImpl. @@ -108,18 +116,18 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt return nil, nil, err } - var fsopts filesystemOptions + fsopts := filesystemOptions{mopts: opts.Data} mopts := vfs.GenericParseMountOptions(opts.Data) deviceDescriptorStr, ok := mopts["fd"] if !ok { - log.Warningf("%s.GetFilesystem: communication file descriptor N (obtained by opening /dev/fuse) must be specified as 'fd=N'", fsType.Name()) + ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option fd missing") return nil, nil, syserror.EINVAL } delete(mopts, "fd") deviceDescriptor, err := strconv.ParseInt(deviceDescriptorStr, 10 /* base */, 32 /* bitSize */) if err != nil { - log.Debugf("%s.GetFilesystem: device FD '%v' not parsable: %v", fsType.Name(), deviceDescriptorStr, err) + ctx.Debugf("fusefs.FilesystemType.GetFilesystem: invalid fd: %q (%v)", deviceDescriptorStr, err) return nil, nil, syserror.EINVAL } @@ -141,38 +149,54 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Parse and set all the other supported FUSE mount options. // TODO(gVisor.dev/issue/3229): Expand the supported mount options. - if userIDStr, ok := mopts["user_id"]; ok { + if uidStr, ok := mopts["user_id"]; ok { delete(mopts, "user_id") - userID, err := strconv.ParseUint(userIDStr, 10, 32) + uid, err := strconv.ParseUint(uidStr, 10, 32) if err != nil { - log.Warningf("%s.GetFilesystem: invalid user_id: user_id=%s", fsType.Name(), userIDStr) + log.Warningf("%s.GetFilesystem: invalid user_id: user_id=%s", fsType.Name(), uidStr) return nil, nil, syserror.EINVAL } - fsopts.userID = uint32(userID) + kuid := creds.UserNamespace.MapToKUID(auth.UID(uid)) + if !kuid.Ok() { + ctx.Warningf("fusefs.FilesystemType.GetFilesystem: unmapped uid: %d", uid) + return nil, nil, syserror.EINVAL + } + fsopts.uid = kuid + } else { + ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option user_id missing") + return nil, nil, syserror.EINVAL } - if groupIDStr, ok := mopts["group_id"]; ok { + if gidStr, ok := mopts["group_id"]; ok { delete(mopts, "group_id") - groupID, err := strconv.ParseUint(groupIDStr, 10, 32) + gid, err := strconv.ParseUint(gidStr, 10, 32) if err != nil { - log.Warningf("%s.GetFilesystem: invalid group_id: group_id=%s", fsType.Name(), groupIDStr) + log.Warningf("%s.GetFilesystem: invalid group_id: group_id=%s", fsType.Name(), gidStr) + return nil, nil, syserror.EINVAL + } + kgid := creds.UserNamespace.MapToKGID(auth.GID(gid)) + if !kgid.Ok() { + ctx.Warningf("fusefs.FilesystemType.GetFilesystem: unmapped gid: %d", gid) return nil, nil, syserror.EINVAL } - fsopts.groupID = uint32(groupID) + fsopts.gid = kgid + } else { + ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option group_id missing") + return nil, nil, syserror.EINVAL } - rootMode := linux.FileMode(0777) - modeStr, ok := mopts["rootmode"] - if ok { + if modeStr, ok := mopts["rootmode"]; ok { delete(mopts, "rootmode") mode, err := strconv.ParseUint(modeStr, 8, 32) if err != nil { log.Warningf("%s.GetFilesystem: invalid mode: %q", fsType.Name(), modeStr) return nil, nil, syserror.EINVAL } - rootMode = linux.FileMode(mode) + fsopts.rootMode = linux.FileMode(mode) + } else { + ctx.Warningf("fusefs.FilesystemType.GetFilesystem: mandatory mount option rootmode missing") + return nil, nil, syserror.EINVAL } - fsopts.rootMode = rootMode // Set the maxInFlightRequests option. fsopts.maxActiveRequests = maxActiveRequestsDefault @@ -192,6 +216,16 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt fsopts.maxRead = math.MaxUint32 } + if _, ok := mopts["default_permissions"]; ok { + delete(mopts, "default_permissions") + fsopts.defaultPermissions = true + } + + if _, ok := mopts["allow_other"]; ok { + delete(mopts, "allow_other") + fsopts.allowOther = true + } + // Check for unparsed options. if len(mopts) != 0 { log.Warningf("%s.GetFilesystem: unsupported or unknown options: %v", fsType.Name(), mopts) @@ -260,6 +294,11 @@ func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.Release(ctx) } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return fs.opts.mopts +} + // inode implements kernfs.Inode. // // +stateify savable @@ -318,6 +357,37 @@ func (fs *filesystem) newInode(ctx context.Context, nodeID uint64, attr linux.FU return i } +// CheckPermissions implements kernfs.Inode.CheckPermissions. +func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { + // Since FUSE operations are ultimately backed by a userspace process (the + // fuse daemon), allowing a process to call into fusefs grants the daemon + // ptrace-like capabilities over the calling process. Because of this, by + // default FUSE only allows the mount owner to interact with the + // filesystem. This explicitly excludes setuid/setgid processes. + // + // This behaviour can be overriden with the 'allow_other' mount option. + // + // See fs/fuse/dir.c:fuse_allow_current_process() in Linux. + if !i.fs.opts.allowOther { + if creds.RealKUID != i.fs.opts.uid || + creds.EffectiveKUID != i.fs.opts.uid || + creds.SavedKUID != i.fs.opts.uid || + creds.RealKGID != i.fs.opts.gid || + creds.EffectiveKGID != i.fs.opts.gid || + creds.SavedKGID != i.fs.opts.gid { + return syserror.EACCES + } + } + + // By default, fusefs delegates all permission checks to the server. + // However, standard unix permission checks can be enabled with the + // default_permissions mount option. + if i.fs.opts.defaultPermissions { + return i.InodeAttrs.CheckPermissions(ctx, creds, ats) + } + return nil +} + // Open implements kernfs.Inode.Open. func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { isDir := i.InodeAttrs.Mode().IsDir() diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 8f95473b6..c34451269 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -15,7 +15,9 @@ package gofer import ( + "fmt" "math" + "strings" "sync" "sync/atomic" @@ -1608,3 +1610,58 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe defer fs.renameMu.RUnlock() return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) } + +type mopt struct { + key string + value interface{} +} + +func (m mopt) String() string { + if m.value == nil { + return fmt.Sprintf("%s", m.key) + } + return fmt.Sprintf("%s=%v", m.key, m.value) +} + +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + optsKV := []mopt{ + {moptTransport, transportModeFD}, // Only valid value, currently. + {moptReadFD, fs.opts.fd}, // Currently, read and write FD are the same. + {moptWriteFD, fs.opts.fd}, // Currently, read and write FD are the same. + {moptAname, fs.opts.aname}, + {moptDfltUID, fs.opts.dfltuid}, + {moptDfltGID, fs.opts.dfltgid}, + {moptMsize, fs.opts.msize}, + {moptVersion, fs.opts.version}, + {moptDentryCacheLimit, fs.opts.maxCachedDentries}, + } + + switch fs.opts.interop { + case InteropModeExclusive: + optsKV = append(optsKV, mopt{moptCache, cacheFSCache}) + case InteropModeWritethrough: + optsKV = append(optsKV, mopt{moptCache, cacheFSCacheWritethrough}) + case InteropModeShared: + if fs.opts.regularFilesUseSpecialFileFD { + optsKV = append(optsKV, mopt{moptCache, cacheNone}) + } else { + optsKV = append(optsKV, mopt{moptCache, cacheRemoteRevalidating}) + } + } + if fs.opts.forcePageCache { + optsKV = append(optsKV, mopt{moptForcePageCache, nil}) + } + if fs.opts.limitHostFDTranslation { + optsKV = append(optsKV, mopt{moptLimitHostFDTranslation, nil}) + } + if fs.opts.overlayfsStaleRead { + optsKV = append(optsKV, mopt{moptOverlayfsStaleRead, nil}) + } + + opts := make([]string, 0, len(optsKV)) + for _, opt := range optsKV { + opts = append(opts, opt.String()) + } + return strings.Join(opts, ",") +} diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 1508cbdf1..71569dc65 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -66,6 +66,34 @@ import ( // Name is the default filesystem name. const Name = "9p" +// Mount option names for goferfs. +const ( + moptTransport = "trans" + moptReadFD = "rfdno" + moptWriteFD = "wfdno" + moptAname = "aname" + moptDfltUID = "dfltuid" + moptDfltGID = "dfltgid" + moptMsize = "msize" + moptVersion = "version" + moptDentryCacheLimit = "dentry_cache_limit" + moptCache = "cache" + moptForcePageCache = "force_page_cache" + moptLimitHostFDTranslation = "limit_host_fd_translation" + moptOverlayfsStaleRead = "overlayfs_stale_read" +) + +// Valid values for the "cache" mount option. +const ( + cacheNone = "none" + cacheFSCache = "fscache" + cacheFSCacheWritethrough = "fscache_writethrough" + cacheRemoteRevalidating = "remote_revalidating" +) + +// Valid values for "trans" mount option. +const transportModeFD = "fd" + // FilesystemType implements vfs.FilesystemType. // // +stateify savable @@ -301,39 +329,39 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Get the attach name. fsopts.aname = "/" - if aname, ok := mopts["aname"]; ok { - delete(mopts, "aname") + if aname, ok := mopts[moptAname]; ok { + delete(mopts, moptAname) fsopts.aname = aname } // Parse the cache policy. For historical reasons, this defaults to the // least generally-applicable option, InteropModeExclusive. fsopts.interop = InteropModeExclusive - if cache, ok := mopts["cache"]; ok { - delete(mopts, "cache") + if cache, ok := mopts[moptCache]; ok { + delete(mopts, moptCache) switch cache { - case "fscache": + case cacheFSCache: fsopts.interop = InteropModeExclusive - case "fscache_writethrough": + case cacheFSCacheWritethrough: fsopts.interop = InteropModeWritethrough - case "none": + case cacheNone: fsopts.regularFilesUseSpecialFileFD = true fallthrough - case "remote_revalidating": + case cacheRemoteRevalidating: fsopts.interop = InteropModeShared default: - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: cache=%s", cache) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: %s=%s", moptCache, cache) return nil, nil, syserror.EINVAL } } // Parse the default UID and GID. fsopts.dfltuid = _V9FS_DEFUID - if dfltuidstr, ok := mopts["dfltuid"]; ok { - delete(mopts, "dfltuid") + if dfltuidstr, ok := mopts[moptDfltUID]; ok { + delete(mopts, moptDfltUID) dfltuid, err := strconv.ParseUint(dfltuidstr, 10, 32) if err != nil { - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: dfltuid=%s", dfltuidstr) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: %s=%s", moptDfltUID, dfltuidstr) return nil, nil, syserror.EINVAL } // In Linux, dfltuid is interpreted as a UID and is converted to a KUID @@ -342,11 +370,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt fsopts.dfltuid = auth.KUID(dfltuid) } fsopts.dfltgid = _V9FS_DEFGID - if dfltgidstr, ok := mopts["dfltgid"]; ok { - delete(mopts, "dfltgid") + if dfltgidstr, ok := mopts[moptDfltGID]; ok { + delete(mopts, moptDfltGID) dfltgid, err := strconv.ParseUint(dfltgidstr, 10, 32) if err != nil { - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: dfltgid=%s", dfltgidstr) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: %s=%s", moptDfltGID, dfltgidstr) return nil, nil, syserror.EINVAL } fsopts.dfltgid = auth.KGID(dfltgid) @@ -354,11 +382,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Parse the 9P message size. fsopts.msize = 1024 * 1024 // 1M, tested to give good enough performance up to 64M - if msizestr, ok := mopts["msize"]; ok { - delete(mopts, "msize") + if msizestr, ok := mopts[moptMsize]; ok { + delete(mopts, moptMsize) msize, err := strconv.ParseUint(msizestr, 10, 32) if err != nil { - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: msize=%s", msizestr) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: %s=%s", moptMsize, msizestr) return nil, nil, syserror.EINVAL } fsopts.msize = uint32(msize) @@ -366,34 +394,34 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Parse the 9P protocol version. fsopts.version = p9.HighestVersionString() - if version, ok := mopts["version"]; ok { - delete(mopts, "version") + if version, ok := mopts[moptVersion]; ok { + delete(mopts, moptVersion) fsopts.version = version } // Parse the dentry cache limit. fsopts.maxCachedDentries = 1000 - if str, ok := mopts["dentry_cache_limit"]; ok { - delete(mopts, "dentry_cache_limit") + if str, ok := mopts[moptDentryCacheLimit]; ok { + delete(mopts, moptDentryCacheLimit) maxCachedDentries, err := strconv.ParseUint(str, 10, 64) if err != nil { - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: %s=%s", moptDentryCacheLimit, str) return nil, nil, syserror.EINVAL } fsopts.maxCachedDentries = maxCachedDentries } // Handle simple flags. - if _, ok := mopts["force_page_cache"]; ok { - delete(mopts, "force_page_cache") + if _, ok := mopts[moptForcePageCache]; ok { + delete(mopts, moptForcePageCache) fsopts.forcePageCache = true } - if _, ok := mopts["limit_host_fd_translation"]; ok { - delete(mopts, "limit_host_fd_translation") + if _, ok := mopts[moptLimitHostFDTranslation]; ok { + delete(mopts, moptLimitHostFDTranslation) fsopts.limitHostFDTranslation = true } - if _, ok := mopts["overlayfs_stale_read"]; ok { - delete(mopts, "overlayfs_stale_read") + if _, ok := mopts[moptOverlayfsStaleRead]; ok { + delete(mopts, moptOverlayfsStaleRead) fsopts.overlayfsStaleRead = true } // fsopts.regularFilesUseSpecialFileFD can only be enabled by specifying @@ -469,34 +497,34 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt func getFDFromMountOptionsMap(ctx context.Context, mopts map[string]string) (int, error) { // Check that the transport is "fd". - trans, ok := mopts["trans"] - if !ok || trans != "fd" { - ctx.Warningf("gofer.getFDFromMountOptionsMap: transport must be specified as 'trans=fd'") + trans, ok := mopts[moptTransport] + if !ok || trans != transportModeFD { + ctx.Warningf("gofer.getFDFromMountOptionsMap: transport must be specified as '%s=%s'", moptTransport, transportModeFD) return -1, syserror.EINVAL } - delete(mopts, "trans") + delete(mopts, moptTransport) // Check that read and write FDs are provided and identical. - rfdstr, ok := mopts["rfdno"] + rfdstr, ok := mopts[moptReadFD] if !ok { - ctx.Warningf("gofer.getFDFromMountOptionsMap: read FD must be specified as 'rfdno=<file descriptor>'") + ctx.Warningf("gofer.getFDFromMountOptionsMap: read FD must be specified as '%s=<file descriptor>'", moptReadFD) return -1, syserror.EINVAL } - delete(mopts, "rfdno") + delete(mopts, moptReadFD) rfd, err := strconv.Atoi(rfdstr) if err != nil { - ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid read FD: rfdno=%s", rfdstr) + ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid read FD: %s=%s", moptReadFD, rfdstr) return -1, syserror.EINVAL } - wfdstr, ok := mopts["wfdno"] + wfdstr, ok := mopts[moptWriteFD] if !ok { - ctx.Warningf("gofer.getFDFromMountOptionsMap: write FD must be specified as 'wfdno=<file descriptor>'") + ctx.Warningf("gofer.getFDFromMountOptionsMap: write FD must be specified as '%s=<file descriptor>'", moptWriteFD) return -1, syserror.EINVAL } - delete(mopts, "wfdno") + delete(mopts, moptWriteFD) wfd, err := strconv.Atoi(wfdstr) if err != nil { - ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid write FD: wfdno=%s", wfdstr) + ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid write FD: %s=%s", moptWriteFD, wfdstr) return -1, syserror.EINVAL } if rfd != wfd { diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index ad5de80dc..b9cce4181 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -260,6 +260,11 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe return vfs.PrependPathSyntheticError{} } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + // CheckPermissions implements kernfs.Inode.CheckPermissions. func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { var s unix.Stat_t diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index e63588e33..1cd3137e6 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -67,6 +67,11 @@ type filesystem struct { kernfs.Filesystem } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + type file struct { kernfs.DynamicBytesFile content string diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index 917709d75..84e37f793 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -1764,3 +1764,15 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe defer fs.renameMu.RUnlock() return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) } + +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + // Return the mount options from the topmost layer. + var vd vfs.VirtualDentry + if fs.opts.UpperRoot.Ok() { + vd = fs.opts.UpperRoot + } else { + vd = fs.opts.LowerRoots[0] + } + return vd.Mount().Filesystem().Impl().MountOptions() +} diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index 429733c10..3f05e444e 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -80,6 +80,11 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe return vfs.PrependPathSyntheticError{} } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + // inode implements kernfs.Inode. // // +stateify savable diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index 8716d0a3c..254a8b062 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -104,6 +104,11 @@ func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.Release(ctx) } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return fmt.Sprintf("dentry_cache_limit=%d", fs.MaxCachedDentries) +} + // dynamicInode is an overfitted interface for common Inodes with // dynamicByteSource types used in procfs. // diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index fd7823daa..fb274b78e 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -17,6 +17,7 @@ package proc import ( "bytes" "fmt" + "math" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" @@ -69,17 +70,17 @@ func (fs *filesystem) newSysNetDir(ctx context.Context, root *auth.Credentials, if stack := k.RootNetworkNamespace().Stack(); stack != nil { contents = map[string]kernfs.Inode{ "ipv4": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ - "tcp_recovery": fs.newInode(ctx, root, 0644, &tcpRecoveryData{stack: stack}), - "tcp_rmem": fs.newInode(ctx, root, 0644, &tcpMemData{stack: stack, dir: tcpRMem}), - "tcp_sack": fs.newInode(ctx, root, 0644, &tcpSackData{stack: stack}), - "tcp_wmem": fs.newInode(ctx, root, 0644, &tcpMemData{stack: stack, dir: tcpWMem}), - "ip_forward": fs.newInode(ctx, root, 0444, &ipForwarding{stack: stack}), + "ip_forward": fs.newInode(ctx, root, 0444, &ipForwarding{stack: stack}), + "ip_local_port_range": fs.newInode(ctx, root, 0644, &portRange{stack: stack}), + "tcp_recovery": fs.newInode(ctx, root, 0644, &tcpRecoveryData{stack: stack}), + "tcp_rmem": fs.newInode(ctx, root, 0644, &tcpMemData{stack: stack, dir: tcpRMem}), + "tcp_sack": fs.newInode(ctx, root, 0644, &tcpSackData{stack: stack}), + "tcp_wmem": fs.newInode(ctx, root, 0644, &tcpMemData{stack: stack, dir: tcpWMem}), // The following files are simple stubs until they are implemented in // netstack, most of these files are configuration related. We use the // value closest to the actual netstack behavior or any empty file, all // of these files will have mode 0444 (read-only for all users). - "ip_local_port_range": fs.newInode(ctx, root, 0444, newStaticFile("16000 65535")), "ip_local_reserved_ports": fs.newInode(ctx, root, 0444, newStaticFile("")), "ipfrag_time": fs.newInode(ctx, root, 0444, newStaticFile("30")), "ip_nonlocal_bind": fs.newInode(ctx, root, 0444, newStaticFile("0")), @@ -421,3 +422,68 @@ func (ipf *ipForwarding) Write(ctx context.Context, src usermem.IOSequence, offs } return n, nil } + +// portRange implements vfs.WritableDynamicBytesSource for +// /proc/sys/net/ipv4/ip_local_port_range. +// +// +stateify savable +type portRange struct { + kernfs.DynamicBytesFile + + stack inet.Stack `state:"wait"` + + // start and end store the port range. We must save/restore this here, + // since a netstack instance is created on restore. + start *uint16 + end *uint16 +} + +var _ vfs.WritableDynamicBytesSource = (*portRange)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (pr *portRange) Generate(ctx context.Context, buf *bytes.Buffer) error { + if pr.start == nil { + start, end := pr.stack.PortRange() + pr.start = &start + pr.end = &end + } + _, err := fmt.Fprintf(buf, "%d %d\n", *pr.start, *pr.end) + return err +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (pr *portRange) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + if offset != 0 { + // No need to handle partial writes thus far. + return 0, syserror.EINVAL + } + if src.NumBytes() == 0 { + return 0, nil + } + + // Limit input size so as not to impact performance if input size is + // large. + src = src.TakeFirst(usermem.PageSize - 1) + + ports := make([]int32, 2) + n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, ports, src.Opts) + if err != nil { + return 0, err + } + + // Port numbers must be uint16s. + if ports[0] < 0 || ports[1] < 0 || ports[0] > math.MaxUint16 || ports[1] > math.MaxUint16 { + return 0, syserror.EINVAL + } + + if err := pr.stack.SetPortRange(uint16(ports[0]), uint16(ports[1])); err != nil { + return 0, err + } + if pr.start == nil { + pr.start = new(uint16) + pr.end = new(uint16) + } + *pr.start = uint16(ports[0]) + *pr.end = uint16(ports[1]) + return n, nil +} diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go index fda1fa942..735756280 100644 --- a/pkg/sentry/fsimpl/sockfs/sockfs.go +++ b/pkg/sentry/fsimpl/sockfs/sockfs.go @@ -85,6 +85,11 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe return vfs.PrependPathSyntheticError{} } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + // inode implements kernfs.Inode. // // +stateify savable diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index dbd9ebdda..1d9280dae 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -143,6 +143,11 @@ func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.Release(ctx) } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return fmt.Sprintf("dentry_cache_limit=%d", fs.MaxCachedDentries) +} + // dir implements kernfs.Inode. // // +stateify savable diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 4f675c21e..5fdca1d46 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -898,3 +898,8 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe d = d.parent } } + +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return fs.mopts +} diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index a01e413e0..8df81f589 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -70,6 +70,10 @@ type filesystem struct { // devMinor is the filesystem's minor device number. devMinor is immutable. devMinor uint32 + // mopts contains the tmpfs-specific mount options passed to this + // filesystem. Immutable. + mopts string + // mu serializes changes to the Dentry tree. mu sync.RWMutex `state:"nosave"` @@ -184,6 +188,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt mfp: mfp, clock: clock, devMinor: devMinor, + mopts: opts.Data, } fs.vfsfs.Init(vfsObj, newFSType, &fs) diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go index 9057d2b4e..6cb1a23e0 100644 --- a/pkg/sentry/fsimpl/verity/filesystem.go +++ b/pkg/sentry/fsimpl/verity/filesystem.go @@ -590,6 +590,23 @@ func (fs *filesystem) lookupAndVerifyLocked(ctx context.Context, parent *dentry, return nil, err } + // Clear the Merkle tree file if they are to be generated at runtime. + // TODO(b/182315468): Optimize the Merkle tree generate process to + // allow only updating certain files/directories. + if fs.allowRuntimeEnable { + childMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ + Root: childMerkleVD, + Start: childMerkleVD, + }, &vfs.OpenOptions{ + Flags: linux.O_RDWR | linux.O_TRUNC, + Mode: 0644, + }) + if err != nil { + return nil, err + } + childMerkleFD.DecRef(ctx) + } + // The dentry needs to be cleaned up if any error occurs. IncRef will be // called if a verity child dentry is successfully created. defer childMerkleVD.DecRef(ctx) diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go index 374f71568..0d9b0ee2c 100644 --- a/pkg/sentry/fsimpl/verity/verity.go +++ b/pkg/sentry/fsimpl/verity/verity.go @@ -38,6 +38,7 @@ import ( "fmt" "math" "strconv" + "strings" "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" @@ -310,6 +311,24 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt d.DecRef(ctx) return nil, nil, alertIntegrityViolation("Failed to find root Merkle file") } + + // Clear the Merkle tree file if they are to be generated at runtime. + // TODO(b/182315468): Optimize the Merkle tree generate process to + // allow only updating certain files/directories. + if fs.allowRuntimeEnable { + lowerMerkleFD, err := vfsObj.OpenAt(ctx, fs.creds, &vfs.PathOperation{ + Root: lowerMerkleVD, + Start: lowerMerkleVD, + }, &vfs.OpenOptions{ + Flags: linux.O_RDWR | linux.O_TRUNC, + Mode: 0644, + }) + if err != nil { + return nil, nil, err + } + lowerMerkleFD.DecRef(ctx) + } + d.lowerMerkleVD = lowerMerkleVD // Get metadata from the underlying file system. @@ -418,6 +437,11 @@ func (fs *filesystem) Release(ctx context.Context) { fs.lowerMount.DecRef(ctx) } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + // dentry implements vfs.DentryImpl. // // +stateify savable @@ -750,6 +774,50 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) return syserror.EPERM } +// IterDirents implements vfs.FileDescriptionImpl.IterDirents. +func (fd *fileDescription) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { + if !fd.d.isDir() { + return syserror.ENOTDIR + } + fd.mu.Lock() + defer fd.mu.Unlock() + + var ds []vfs.Dirent + err := fd.lowerFD.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error { + // Do not include the Merkle tree files. + if strings.Contains(dirent.Name, merklePrefix) || strings.Contains(dirent.Name, merkleRootPrefix) { + return nil + } + if fd.d.verityEnabled() { + // Verify that the child is expected. + if dirent.Name != "." && dirent.Name != ".." { + if _, ok := fd.d.childrenNames[dirent.Name]; !ok { + return alertIntegrityViolation(fmt.Sprintf("Unexpected children %s", dirent.Name)) + } + } + } + ds = append(ds, dirent) + return nil + })) + + if err != nil { + return err + } + + // The result should contain all children plus "." and "..". + if fd.d.verityEnabled() && len(ds) != len(fd.d.childrenNames)+2 { + return alertIntegrityViolation(fmt.Sprintf("Unexpected children number %d", len(ds))) + } + + for fd.off < int64(len(ds)) { + if err := cb.Handle(ds[fd.off]); err != nil { + return err + } + fd.off++ + } + return nil +} + // Seek implements vfs.FileDescriptionImpl.Seek. func (fd *fileDescription) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { fd.mu.Lock() diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go index f31277d30..6b71bd3a9 100644 --- a/pkg/sentry/inet/inet.go +++ b/pkg/sentry/inet/inet.go @@ -93,6 +93,14 @@ type Stack interface { // SetForwarding enables or disables packet forwarding between NICs. SetForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) error + + // PortRange returns the UDP and TCP inclusive range of ephemeral ports + // used in both IPv4 and IPv6. + PortRange() (uint16, uint16) + + // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range + // (inclusive). + SetPortRange(start uint16, end uint16) error } // Interface contains information about a network interface. diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go index 9ebeba8a3..03e2608c2 100644 --- a/pkg/sentry/inet/test_stack.go +++ b/pkg/sentry/inet/test_stack.go @@ -164,3 +164,15 @@ func (s *TestStack) SetForwarding(protocol tcpip.NetworkProtocolNumber, enable b s.IPForwarding = enable return nil } + +// PortRange implements inet.Stack.PortRange. +func (*TestStack) PortRange() (uint16, uint16) { + // Use the default Linux values per net/ipv4/af_inet.c:inet_init_net(). + return 32768, 28232 +} + +// SetPortRange implements inet.Stack.SetPortRange. +func (*TestStack) SetPortRange(start uint16, end uint16) error { + // No-op. + return nil +} diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go index e6323244c..5bcf92e14 100644 --- a/pkg/sentry/socket/hostinet/stack.go +++ b/pkg/sentry/socket/hostinet/stack.go @@ -504,3 +504,14 @@ func (s *Stack) Forwarding(protocol tcpip.NetworkProtocolNumber) bool { func (s *Stack) SetForwarding(tcpip.NetworkProtocolNumber, bool) error { return syserror.EACCES } + +// PortRange implements inet.Stack.PortRange. +func (*Stack) PortRange() (uint16, uint16) { + // Use the default Linux values per net/ipv4/af_inet.c:inet_init_net(). + return 32768, 28232 +} + +// SetPortRange implements inet.Stack.SetPortRange. +func (*Stack) SetPortRange(start uint16, end uint16) error { + return syserror.EACCES +} diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go index 71c3bc034..b215067cf 100644 --- a/pkg/sentry/socket/netstack/stack.go +++ b/pkg/sentry/socket/netstack/stack.go @@ -478,3 +478,13 @@ func (s *Stack) SetForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) } return nil } + +// PortRange implements inet.Stack.PortRange. +func (s *Stack) PortRange() (uint16, uint16) { + return s.Stack.PortRange() +} + +// SetPortRange implements inet.Stack.SetPortRange. +func (s *Stack) SetPortRange(start uint16, end uint16) error { + return syserr.TranslateNetstackError(s.Stack.SetPortRange(start, end)).ToError() +} diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index 7ad0eaf86..3caf417ca 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -291,6 +291,11 @@ func (fs *anonFilesystem) PrependPath(ctx context.Context, vfsroot, vd VirtualDe return PrependPathSyntheticError{} } +// MountOptions implements FilesystemImpl.MountOptions. +func (fs *anonFilesystem) MountOptions() string { + return "" +} + // IncRef implements DentryImpl.IncRef. func (d *anonDentry) IncRef() { // no-op diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index 2c4b81e78..059939010 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -502,6 +502,15 @@ type FilesystemImpl interface { // // Preconditions: vd.Mount().Filesystem().Impl() == this FilesystemImpl. PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error + + // MountOptions returns mount options for the current filesystem. This + // should only return options specific to the filesystem (i.e. don't return + // "ro", "rw", etc). Options should be returned as a comma-separated string, + // similar to the input to the 5th argument to mount. + // + // If the implementation has no filesystem-specific options, it should + // return the empty string. + MountOptions() string } // PrependPathAtVFSRootError is returned by implementations of diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index bac9eb905..922f9e697 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -959,13 +959,17 @@ func manglePath(p string) string { // superBlockOpts returns the super block options string for the the mount at // the given path. func superBlockOpts(mountPath string, mnt *Mount) string { - // gVisor doesn't (yet) have a concept of super block options, so we - // use the ro/rw bit from the mount flag. + // Compose super block options by combining global mount flags with + // FS-specific mount options. opts := "rw" if mnt.ReadOnly() { opts = "ro" } + if mopts := mnt.fs.Impl().MountOptions(); mopts != "" { + opts += "," + mopts + } + // NOTE(b/147673608): If the mount is a cgroup, we also need to include // the cgroup name in the options. For now we just read that from the // path. |