diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/compat.go | 8 | ||||
-rw-r--r-- | runsc/boot/compat_amd64.go | 22 | ||||
-rw-r--r-- | runsc/boot/compat_arm64.go | 22 | ||||
-rw-r--r-- | runsc/boot/config.go | 6 | ||||
-rw-r--r-- | runsc/boot/fs.go | 89 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 187 |
6 files changed, 147 insertions, 187 deletions
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index b7cfb35bf..84c67cbc2 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -119,7 +119,13 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) { } if tr.shouldReport(regs) { - c.sink.Infof("Unsupported syscall: %s, regs: %+v", c.nameMap.Name(uintptr(sysnr)), regs) + name := c.nameMap.Name(uintptr(sysnr)) + c.sink.Infof("Unsupported syscall %s(%#x,%#x,%#x,%#x,%#x,%#x). It is "+ + "likely that you can safely ignore this message and that this is not "+ + "the cause of any error. Please, refer to %s/%s for more information.", + name, argVal(0, regs), argVal(1, regs), argVal(2, regs), argVal(3, regs), + argVal(4, regs), argVal(5, regs), syscallLink, name) + tr.onReported(regs) } } diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go index 42b0ca8b0..8eb76b2ba 100644 --- a/runsc/boot/compat_amd64.go +++ b/runsc/boot/compat_amd64.go @@ -24,8 +24,12 @@ import ( "gvisor.dev/gvisor/pkg/sentry/strace" ) -// reportLimit is the max number of events that should be reported per tracker. -const reportLimit = 100 +const ( + // reportLimit is the max number of events that should be reported per + // tracker. + reportLimit = 100 + syscallLink = "https://gvisor.dev/c/linux/amd64" +) // newRegs create a empty Registers instance. func newRegs() *rpb.Registers { @@ -36,22 +40,22 @@ func newRegs() *rpb.Registers { } } -func argVal(argIdx int, regs *rpb.Registers) uint32 { +func argVal(argIdx int, regs *rpb.Registers) uint64 { amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64 switch argIdx { case 0: - return uint32(amd64Regs.Rdi) + return amd64Regs.Rdi case 1: - return uint32(amd64Regs.Rsi) + return amd64Regs.Rsi case 2: - return uint32(amd64Regs.Rdx) + return amd64Regs.Rdx case 3: - return uint32(amd64Regs.R10) + return amd64Regs.R10 case 4: - return uint32(amd64Regs.R8) + return amd64Regs.R8 case 5: - return uint32(amd64Regs.R9) + return amd64Regs.R9 } panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) } diff --git a/runsc/boot/compat_arm64.go b/runsc/boot/compat_arm64.go index f784cd237..bce9d95b3 100644 --- a/runsc/boot/compat_arm64.go +++ b/runsc/boot/compat_arm64.go @@ -23,8 +23,12 @@ import ( "gvisor.dev/gvisor/pkg/sentry/strace" ) -// reportLimit is the max number of events that should be reported per tracker. -const reportLimit = 100 +const ( + // reportLimit is the max number of events that should be reported per + // tracker. + reportLimit = 100 + syscallLink = "https://gvisor.dev/c/linux/arm64" +) // newRegs create a empty Registers instance. func newRegs() *rpb.Registers { @@ -35,22 +39,22 @@ func newRegs() *rpb.Registers { } } -func argVal(argIdx int, regs *rpb.Registers) uint32 { +func argVal(argIdx int, regs *rpb.Registers) uint64 { arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64 switch argIdx { case 0: - return uint32(arm64Regs.R0) + return arm64Regs.R0 case 1: - return uint32(arm64Regs.R1) + return arm64Regs.R1 case 2: - return uint32(arm64Regs.R2) + return arm64Regs.R2 case 3: - return uint32(arm64Regs.R3) + return arm64Regs.R3 case 4: - return uint32(arm64Regs.R4) + return arm64Regs.R4 case 5: - return uint32(arm64Regs.R5) + return arm64Regs.R5 } panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) } diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 6d6a705f8..bcec7e4db 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -241,8 +241,10 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeakMode refs.LeakMode - // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for - // write to workaround overlayfs limitation on kernels before 4.19. + // OverlayfsStaleRead instructs the sandbox to assume that the root mount + // is on a Linux overlayfs mount, which does not necessarily preserve + // coherence between read-only and subsequent writable file descriptors + // representing the "same" file. OverlayfsStaleRead bool // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 4875452e2..52f8344ca 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -37,6 +37,13 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" + procvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" + sysvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" + tmpfsvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" @@ -44,23 +51,15 @@ import ( ) const ( - // Filesystem name for 9p gofer mounts. - rootFsName = "9p" - // Device name for root mount. rootDevice = "9pfs-/" // MountPrefix is the annotation prefix for mount hints. MountPrefix = "dev.gvisor.spec.mount." - // Filesystems that runsc supports. - bind = "bind" - devpts = "devpts" - devtmpfs = "devtmpfs" - proc = "proc" - sysfs = "sysfs" - tmpfs = "tmpfs" - nonefs = "none" + // Supported filesystems that map to different internal filesystem. + bind = "bind" + nonefs = "none" ) // tmpfs has some extra supported options that we must pass through. @@ -108,12 +107,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // Always mount /dev. mounts = append(mounts, specs.Mount{ - Type: devtmpfs, + Type: devtmpfs.Name, Destination: "/dev", }) mounts = append(mounts, specs.Mount{ - Type: devpts, + Type: devpts.Name, Destination: "/dev/pts", }) @@ -137,13 +136,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount { var mandatoryMounts []specs.Mount if !procMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: proc, + Type: procvfs2.Name, Destination: "/proc", }) } if !sysMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: sysfs, + Type: sysvfs2.Name, Destination: "/sys", }) } @@ -156,12 +155,16 @@ func compileMounts(spec *specs.Spec) []specs.Mount { } // p9MountOptions creates a slice of options for a p9 mount. -func p9MountOptions(fd int, fa FileAccessType) []string { +func p9MountOptions(fd int, fa FileAccessType, vfs2 bool) []string { opts := []string{ "trans=fd", "rfdno=" + strconv.Itoa(fd), "wfdno=" + strconv.Itoa(fd), - "privateunixsocket=true", + } + if !vfs2 { + // privateunixsocket is always enabled in VFS2. VFS1 requires explicit + // enablement. + opts = append(opts, "privateunixsocket=true") } if fa == FileAccessShared { opts = append(opts, "cache=remote_revalidating") @@ -219,9 +222,6 @@ func mountFlags(opts []string) fs.MountSourceFlags { mf.NoAtime = true case "noexec": mf.NoExec = true - case "bind", "rbind": - // When options include either "bind" or "rbind", - // it's converted to a 9P mount. default: log.Warningf("ignoring unknown mount option %q", o) } @@ -234,7 +234,7 @@ func isSupportedMountFlag(fstype, opt string) bool { case "rw", "ro", "noatime", "noexec": return true } - if fstype == tmpfs { + if fstype == tmpfsvfs2.Name { ok, err := parseMountOption(opt, tmpfsAllowedOptions...) return ok && err == nil } @@ -292,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, // Set namespace here so that it can be found in ctx. procArgs.MountNamespace = mns - return setExecutablePath(ctx, procArgs) -} - -// setExecutablePath sets the procArgs.Filename by searching the PATH for an -// executable matching the procArgs.Argv[0]. -func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - paths := fs.GetPath(procArgs.Envv) - exe := procArgs.Argv[0] - f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths) + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) if err != nil { - return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err) + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } procArgs.Filename = f return nil @@ -444,7 +437,7 @@ func (m *mountHint) setOptions(val string) error { } func (m *mountHint) isSupported() bool { - return m.mount.Type == tmpfs && m.share == pod + return m.mount.Type == tmpfsvfs2.Name && m.share == pod } // checkCompatible verifies that shared mount is compatible with master. @@ -586,7 +579,7 @@ func (c *containerMounter) processHints(conf *Config) error { for _, hint := range c.hints.mounts { // TODO(b/142076984): Only support tmpfs for now. Bind mounts require a // common gofer to mount all shared volumes. - if hint.mount.Type != tmpfs { + if hint.mount.Type != tmpfsvfs2.Name { continue } log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) @@ -723,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (* fd := c.fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) p9FS := mustFindFilesystem("9p") - opts := p9MountOptions(fd, conf.FileAccess) + opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */) if conf.OverlayfsStaleRead { // We can't check for overlayfs here because sandbox is chroot'ed and gofer @@ -768,22 +761,12 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( useOverlay bool ) - for _, opt := range m.Options { - // When options include either "bind" or "rbind", this behaves as - // bind mount even if the mount type is equal to a filesystem supported - // on runsc. - if opt == "bind" || opt == "rbind" { - m.Type = bind - break - } - } - switch m.Type { - case devpts, devtmpfs, proc, sysfs: + case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name: fsName = m.Type case nonefs: - fsName = sysfs - case tmpfs: + fsName = sysvfs2.Name + case tmpfsvfs2.Name: fsName = m.Type var err error @@ -794,8 +777,8 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( case bind: fd := c.fds.remove() - fsName = "9p" - opts = p9MountOptions(fd, c.getMountAccessType(m)) + fsName = gofervfs2.Name + opts = p9MountOptions(fd, c.getMountAccessType(m), conf.VFS2) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -948,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // Add root mount. fd := c.fds.remove() - opts := p9MountOptions(fd, conf.FileAccess) + opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */) mf := fs.MountSourceFlags{} if c.root.Readonly || conf.Overlay { @@ -960,7 +943,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn Flags: mf, DataString: strings.Join(opts, ","), } - renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount) + renv.MountSources[gofervfs2.Name] = append(renv.MountSources[gofervfs2.Name], rootMount) // Add submounts. var tmpMounted bool @@ -976,7 +959,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // TODO(b/67958150): handle '/tmp' properly (see mountTmp()). if !tmpMounted { tmpMount := specs.Mount{ - Type: tmpfs, + Type: tmpfsvfs2.Name, Destination: "/tmp", } if err := c.addRestoreMount(conf, renv, tmpMount); err != nil { @@ -1032,7 +1015,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M // No '/tmp' found (or fallthrough from above). Safe to mount internal // tmpfs. tmpMount := specs.Mount{ - Type: tmpfs, + Type: tmpfsvfs2.Name, Destination: "/tmp", // Sticky bit is added to prevent accidental deletion of files from // another user. This is normally done for /tmp. diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index ef95d0c65..f48e6b0f1 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -18,52 +18,50 @@ import ( "fmt" "path" "sort" - "strconv" "strings" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/devices/memdev" - "gvisor.dev/gvisor/pkg/sentry/fs" - devpts2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" - devtmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" - goferimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" - procimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" - sysimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" - tmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/devices/memdev" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" ) func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error { - vfsObj.MustRegisterFilesystemType(devpts2.Name, &devpts2.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, // TODO(b/29356795): Users may mount this once the terminals are in a // usable state. AllowUserMount: false, }) - vfsObj.MustRegisterFilesystemType(devtmpfsimpl.Name, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(devtmpfs.Name, &devtmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(goferimpl.Name, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(procimpl.Name, &procimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(proc.Name, &proc.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(sysimpl.Name, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(sys.Name, &sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(tmpfsimpl.Name, &tmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) @@ -72,7 +70,7 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre if err := memdev.Register(vfsObj); err != nil { return fmt.Errorf("registering memdev: %w", err) } - a, err := devtmpfsimpl.NewAccessor(ctx, vfsObj, creds, devtmpfsimpl.Name) + a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name) if err != nil { return fmt.Errorf("creating devtmpfs accessor: %w", err) } @@ -96,69 +94,14 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte return fmt.Errorf("failed to setupFS: %w", err) } procArgs.MountNamespaceVFS2 = mns - return setExecutablePathVFS2(ctx, procArgs) -} - -func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - exe := procArgs.Argv[0] - - // Absolute paths can be used directly. - if path.IsAbs(exe) { - procArgs.Filename = exe - return nil - } - // Paths with '/' in them should be joined to the working directory, or - // to the root if working directory is not set. - if strings.IndexByte(exe, '/') > 0 { - if !path.IsAbs(procArgs.WorkingDirectory) { - return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory) - } - procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) - return nil - } - - // Paths with a '/' are relative to the CWD. - if strings.IndexByte(exe, '/') > 0 { - procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) - return nil - } - - // Otherwise, We must lookup the name in the paths, starting from the - // root directory. - root := procArgs.MountNamespaceVFS2.Root() - defer root.DecRef() - - paths := fs.GetPath(procArgs.Envv) - creds := procArgs.Credentials - - for _, p := range paths { - binPath := path.Join(p, exe) - pop := &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(binPath), - FollowFinalSymlink: true, - } - opts := &vfs.OpenOptions{ - FileExec: true, - Flags: linux.O_RDONLY, - } - dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts) - if err == syserror.ENOENT || err == syserror.EACCES { - // Didn't find it here. - continue - } - if err != nil { - return err - } - dentry.DecRef() - - procArgs.Filename = binPath - return nil + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) + if err != nil { + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } - - return fmt.Errorf("executable %q not found in $PATH=%q", exe, strings.Join(paths, ":")) + procArgs.Filename = f + return nil } func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) { @@ -193,10 +136,10 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { fd := c.fds.remove() - opts := strings.Join(p9MountOptionsVFS2(fd, conf.FileAccess), ",") + opts := strings.Join(p9MountOptions(fd, conf.FileAccess, true /* vfs2 */), ",") log.Infof("Mounting root over 9P, ioFD: %d", fd) - mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", rootFsName, &vfs.GetFilesystemOptions{Data: opts}) + mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts}) if err != nil { return nil, fmt.Errorf("setting up mount namespace: %w", err) } @@ -204,27 +147,61 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *C } func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error { - c.prepareMountsVFS2() + mounts, err := c.prepareMountsVFS2() + if err != nil { + return err + } - for _, submount := range c.mounts { + for i := range mounts { + submount := &mounts[i] log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options) - if err := c.mountSubmountVFS2(ctx, conf, mns, creds, &submount); err != nil { + if err := c.mountSubmountVFS2(ctx, conf, mns, creds, submount); err != nil { return err } } // TODO(gvisor.dev/issue/1487): implement mountTmp from fs.go. - return c.checkDispenser() + return nil +} + +type mountAndFD struct { + specs.Mount + fd int } -func (c *containerMounter) prepareMountsVFS2() { +func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { + // Associate bind mounts with their FDs before sorting since there is an + // undocumented assumption that FDs are dispensed in the order in which + // they are required by mounts. + var mounts []mountAndFD + for _, m := range c.mounts { + fd := -1 + // Only bind mounts use host FDs; see + // containerMounter.getMountNameAndOptionsVFS2. + if m.Type == bind { + fd = c.fds.remove() + } + mounts = append(mounts, mountAndFD{ + Mount: m, + fd: fd, + }) + } + if err := c.checkDispenser(); err != nil { + return nil, err + } + // Sort the mounts so that we don't place children before parents. - sort.Slice(c.mounts, func(i, j int) bool { return len(c.mounts[i].Destination) < len(c.mounts[j].Destination) }) + sort.Slice(mounts, func(i, j int) bool { + return len(mounts[i].Destination) < len(mounts[j].Destination) + }) + + return mounts, nil } -// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 version. -func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *specs.Mount) error { +// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 +// version. +func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error { root := mns.Root() defer root.DecRef() target := &vfs.PathOperation{ @@ -233,7 +210,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, Path: fspath.Parse(submount.Destination), } - fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, *submount) + fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount) if err != nil { return fmt.Errorf("mountOptions failed: %w", err) } @@ -265,7 +242,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values // used for mounts. -func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Mount) (string, []string, bool, error) { +func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, []string, bool, error) { var ( fsName string opts []string @@ -273,11 +250,11 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun ) switch m.Type { - case devpts, devtmpfs, proc, sysfs: + case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: fsName = m.Type case nonefs: - fsName = sysfs - case tmpfs: + fsName = sys.Name + case tmpfs.Name: fsName = m.Type var err error @@ -287,9 +264,8 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun } case bind: - fd := c.fds.remove() - fsName = "9p" - opts = p9MountOptionsVFS2(fd, c.getMountAccessType(m)) + fsName = gofer.Name + opts = p9MountOptions(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -299,21 +275,6 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun return fsName, opts, useOverlay, nil } -// p9MountOptions creates a slice of options for a p9 mount. -// TODO(gvisor.dev/issue/1624): Remove this version once privateunixsocket is -// deleted, along with the rest of VFS1. -func p9MountOptionsVFS2(fd int, fa FileAccessType) []string { - opts := []string{ - "trans=fd", - "rfdno=" + strconv.Itoa(fd), - "wfdno=" + strconv.Itoa(fd), - } - if fa == FileAccessShared { - opts = append(opts, "cache=remote_revalidating") - } - return opts -} - func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error { target := &vfs.PathOperation{ Root: root, |