summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/compat.go8
-rw-r--r--runsc/boot/compat_amd64.go22
-rw-r--r--runsc/boot/compat_arm64.go22
-rw-r--r--runsc/boot/config.go6
-rw-r--r--runsc/boot/fs.go89
-rw-r--r--runsc/boot/vfs.go187
6 files changed, 147 insertions, 187 deletions
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index b7cfb35bf..84c67cbc2 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -119,7 +119,13 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
}
if tr.shouldReport(regs) {
- c.sink.Infof("Unsupported syscall: %s, regs: %+v", c.nameMap.Name(uintptr(sysnr)), regs)
+ name := c.nameMap.Name(uintptr(sysnr))
+ c.sink.Infof("Unsupported syscall %s(%#x,%#x,%#x,%#x,%#x,%#x). It is "+
+ "likely that you can safely ignore this message and that this is not "+
+ "the cause of any error. Please, refer to %s/%s for more information.",
+ name, argVal(0, regs), argVal(1, regs), argVal(2, regs), argVal(3, regs),
+ argVal(4, regs), argVal(5, regs), syscallLink, name)
+
tr.onReported(regs)
}
}
diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go
index 42b0ca8b0..8eb76b2ba 100644
--- a/runsc/boot/compat_amd64.go
+++ b/runsc/boot/compat_amd64.go
@@ -24,8 +24,12 @@ import (
"gvisor.dev/gvisor/pkg/sentry/strace"
)
-// reportLimit is the max number of events that should be reported per tracker.
-const reportLimit = 100
+const (
+ // reportLimit is the max number of events that should be reported per
+ // tracker.
+ reportLimit = 100
+ syscallLink = "https://gvisor.dev/c/linux/amd64"
+)
// newRegs create a empty Registers instance.
func newRegs() *rpb.Registers {
@@ -36,22 +40,22 @@ func newRegs() *rpb.Registers {
}
}
-func argVal(argIdx int, regs *rpb.Registers) uint32 {
+func argVal(argIdx int, regs *rpb.Registers) uint64 {
amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64
switch argIdx {
case 0:
- return uint32(amd64Regs.Rdi)
+ return amd64Regs.Rdi
case 1:
- return uint32(amd64Regs.Rsi)
+ return amd64Regs.Rsi
case 2:
- return uint32(amd64Regs.Rdx)
+ return amd64Regs.Rdx
case 3:
- return uint32(amd64Regs.R10)
+ return amd64Regs.R10
case 4:
- return uint32(amd64Regs.R8)
+ return amd64Regs.R8
case 5:
- return uint32(amd64Regs.R9)
+ return amd64Regs.R9
}
panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
}
diff --git a/runsc/boot/compat_arm64.go b/runsc/boot/compat_arm64.go
index f784cd237..bce9d95b3 100644
--- a/runsc/boot/compat_arm64.go
+++ b/runsc/boot/compat_arm64.go
@@ -23,8 +23,12 @@ import (
"gvisor.dev/gvisor/pkg/sentry/strace"
)
-// reportLimit is the max number of events that should be reported per tracker.
-const reportLimit = 100
+const (
+ // reportLimit is the max number of events that should be reported per
+ // tracker.
+ reportLimit = 100
+ syscallLink = "https://gvisor.dev/c/linux/arm64"
+)
// newRegs create a empty Registers instance.
func newRegs() *rpb.Registers {
@@ -35,22 +39,22 @@ func newRegs() *rpb.Registers {
}
}
-func argVal(argIdx int, regs *rpb.Registers) uint32 {
+func argVal(argIdx int, regs *rpb.Registers) uint64 {
arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64
switch argIdx {
case 0:
- return uint32(arm64Regs.R0)
+ return arm64Regs.R0
case 1:
- return uint32(arm64Regs.R1)
+ return arm64Regs.R1
case 2:
- return uint32(arm64Regs.R2)
+ return arm64Regs.R2
case 3:
- return uint32(arm64Regs.R3)
+ return arm64Regs.R3
case 4:
- return uint32(arm64Regs.R4)
+ return arm64Regs.R4
case 5:
- return uint32(arm64Regs.R5)
+ return arm64Regs.R5
}
panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
}
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 6d6a705f8..bcec7e4db 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -241,8 +241,10 @@ type Config struct {
// ReferenceLeakMode sets reference leak check mode
ReferenceLeakMode refs.LeakMode
- // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for
- // write to workaround overlayfs limitation on kernels before 4.19.
+ // OverlayfsStaleRead instructs the sandbox to assume that the root mount
+ // is on a Linux overlayfs mount, which does not necessarily preserve
+ // coherence between read-only and subsequent writable file descriptors
+ // representing the "same" file.
OverlayfsStaleRead bool
// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 4875452e2..52f8344ca 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -37,6 +37,13 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
+ procvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
+ sysvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
+ tmpfsvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/syserror"
@@ -44,23 +51,15 @@ import (
)
const (
- // Filesystem name for 9p gofer mounts.
- rootFsName = "9p"
-
// Device name for root mount.
rootDevice = "9pfs-/"
// MountPrefix is the annotation prefix for mount hints.
MountPrefix = "dev.gvisor.spec.mount."
- // Filesystems that runsc supports.
- bind = "bind"
- devpts = "devpts"
- devtmpfs = "devtmpfs"
- proc = "proc"
- sysfs = "sysfs"
- tmpfs = "tmpfs"
- nonefs = "none"
+ // Supported filesystems that map to different internal filesystem.
+ bind = "bind"
+ nonefs = "none"
)
// tmpfs has some extra supported options that we must pass through.
@@ -108,12 +107,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
// Always mount /dev.
mounts = append(mounts, specs.Mount{
- Type: devtmpfs,
+ Type: devtmpfs.Name,
Destination: "/dev",
})
mounts = append(mounts, specs.Mount{
- Type: devpts,
+ Type: devpts.Name,
Destination: "/dev/pts",
})
@@ -137,13 +136,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
var mandatoryMounts []specs.Mount
if !procMounted {
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: proc,
+ Type: procvfs2.Name,
Destination: "/proc",
})
}
if !sysMounted {
mandatoryMounts = append(mandatoryMounts, specs.Mount{
- Type: sysfs,
+ Type: sysvfs2.Name,
Destination: "/sys",
})
}
@@ -156,12 +155,16 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
}
// p9MountOptions creates a slice of options for a p9 mount.
-func p9MountOptions(fd int, fa FileAccessType) []string {
+func p9MountOptions(fd int, fa FileAccessType, vfs2 bool) []string {
opts := []string{
"trans=fd",
"rfdno=" + strconv.Itoa(fd),
"wfdno=" + strconv.Itoa(fd),
- "privateunixsocket=true",
+ }
+ if !vfs2 {
+ // privateunixsocket is always enabled in VFS2. VFS1 requires explicit
+ // enablement.
+ opts = append(opts, "privateunixsocket=true")
}
if fa == FileAccessShared {
opts = append(opts, "cache=remote_revalidating")
@@ -219,9 +222,6 @@ func mountFlags(opts []string) fs.MountSourceFlags {
mf.NoAtime = true
case "noexec":
mf.NoExec = true
- case "bind", "rbind":
- // When options include either "bind" or "rbind",
- // it's converted to a 9P mount.
default:
log.Warningf("ignoring unknown mount option %q", o)
}
@@ -234,7 +234,7 @@ func isSupportedMountFlag(fstype, opt string) bool {
case "rw", "ro", "noatime", "noexec":
return true
}
- if fstype == tmpfs {
+ if fstype == tmpfsvfs2.Name {
ok, err := parseMountOption(opt, tmpfsAllowedOptions...)
return ok && err == nil
}
@@ -292,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter,
// Set namespace here so that it can be found in ctx.
procArgs.MountNamespace = mns
- return setExecutablePath(ctx, procArgs)
-}
-
-// setExecutablePath sets the procArgs.Filename by searching the PATH for an
-// executable matching the procArgs.Argv[0].
-func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
- paths := fs.GetPath(procArgs.Envv)
- exe := procArgs.Argv[0]
- f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths)
+ // Resolve the executable path from working dir and environment.
+ f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0])
if err != nil {
- return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err)
+ return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err)
}
procArgs.Filename = f
return nil
@@ -444,7 +437,7 @@ func (m *mountHint) setOptions(val string) error {
}
func (m *mountHint) isSupported() bool {
- return m.mount.Type == tmpfs && m.share == pod
+ return m.mount.Type == tmpfsvfs2.Name && m.share == pod
}
// checkCompatible verifies that shared mount is compatible with master.
@@ -586,7 +579,7 @@ func (c *containerMounter) processHints(conf *Config) error {
for _, hint := range c.hints.mounts {
// TODO(b/142076984): Only support tmpfs for now. Bind mounts require a
// common gofer to mount all shared volumes.
- if hint.mount.Type != tmpfs {
+ if hint.mount.Type != tmpfsvfs2.Name {
continue
}
log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
@@ -723,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
fd := c.fds.remove()
log.Infof("Mounting root over 9P, ioFD: %d", fd)
p9FS := mustFindFilesystem("9p")
- opts := p9MountOptions(fd, conf.FileAccess)
+ opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */)
if conf.OverlayfsStaleRead {
// We can't check for overlayfs here because sandbox is chroot'ed and gofer
@@ -768,22 +761,12 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
useOverlay bool
)
- for _, opt := range m.Options {
- // When options include either "bind" or "rbind", this behaves as
- // bind mount even if the mount type is equal to a filesystem supported
- // on runsc.
- if opt == "bind" || opt == "rbind" {
- m.Type = bind
- break
- }
- }
-
switch m.Type {
- case devpts, devtmpfs, proc, sysfs:
+ case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name:
fsName = m.Type
case nonefs:
- fsName = sysfs
- case tmpfs:
+ fsName = sysvfs2.Name
+ case tmpfsvfs2.Name:
fsName = m.Type
var err error
@@ -794,8 +777,8 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
case bind:
fd := c.fds.remove()
- fsName = "9p"
- opts = p9MountOptions(fd, c.getMountAccessType(m))
+ fsName = gofervfs2.Name
+ opts = p9MountOptions(fd, c.getMountAccessType(m), conf.VFS2)
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
@@ -948,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
// Add root mount.
fd := c.fds.remove()
- opts := p9MountOptions(fd, conf.FileAccess)
+ opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */)
mf := fs.MountSourceFlags{}
if c.root.Readonly || conf.Overlay {
@@ -960,7 +943,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
Flags: mf,
DataString: strings.Join(opts, ","),
}
- renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount)
+ renv.MountSources[gofervfs2.Name] = append(renv.MountSources[gofervfs2.Name], rootMount)
// Add submounts.
var tmpMounted bool
@@ -976,7 +959,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
// TODO(b/67958150): handle '/tmp' properly (see mountTmp()).
if !tmpMounted {
tmpMount := specs.Mount{
- Type: tmpfs,
+ Type: tmpfsvfs2.Name,
Destination: "/tmp",
}
if err := c.addRestoreMount(conf, renv, tmpMount); err != nil {
@@ -1032,7 +1015,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M
// No '/tmp' found (or fallthrough from above). Safe to mount internal
// tmpfs.
tmpMount := specs.Mount{
- Type: tmpfs,
+ Type: tmpfsvfs2.Name,
Destination: "/tmp",
// Sticky bit is added to prevent accidental deletion of files from
// another user. This is normally done for /tmp.
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index ef95d0c65..f48e6b0f1 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -18,52 +18,50 @@ import (
"fmt"
"path"
"sort"
- "strconv"
"strings"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/devices/memdev"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- devpts2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
- devtmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
- goferimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
- procimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
- sysimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
- tmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
- "gvisor.dev/gvisor/pkg/syserror"
-
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/devices/memdev"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error {
- vfsObj.MustRegisterFilesystemType(devpts2.Name, &devpts2.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserList: true,
// TODO(b/29356795): Users may mount this once the terminals are in a
// usable state.
AllowUserMount: false,
})
- vfsObj.MustRegisterFilesystemType(devtmpfsimpl.Name, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(devtmpfs.Name, &devtmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
AllowUserList: true,
})
- vfsObj.MustRegisterFilesystemType(goferimpl.Name, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserList: true,
})
- vfsObj.MustRegisterFilesystemType(procimpl.Name, &procimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(proc.Name, &proc.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
AllowUserList: true,
})
- vfsObj.MustRegisterFilesystemType(sysimpl.Name, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(sys.Name, &sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
AllowUserList: true,
})
- vfsObj.MustRegisterFilesystemType(tmpfsimpl.Name, &tmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
AllowUserList: true,
})
@@ -72,7 +70,7 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre
if err := memdev.Register(vfsObj); err != nil {
return fmt.Errorf("registering memdev: %w", err)
}
- a, err := devtmpfsimpl.NewAccessor(ctx, vfsObj, creds, devtmpfsimpl.Name)
+ a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name)
if err != nil {
return fmt.Errorf("creating devtmpfs accessor: %w", err)
}
@@ -96,69 +94,14 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte
return fmt.Errorf("failed to setupFS: %w", err)
}
procArgs.MountNamespaceVFS2 = mns
- return setExecutablePathVFS2(ctx, procArgs)
-}
-
-func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
- exe := procArgs.Argv[0]
-
- // Absolute paths can be used directly.
- if path.IsAbs(exe) {
- procArgs.Filename = exe
- return nil
- }
- // Paths with '/' in them should be joined to the working directory, or
- // to the root if working directory is not set.
- if strings.IndexByte(exe, '/') > 0 {
- if !path.IsAbs(procArgs.WorkingDirectory) {
- return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory)
- }
- procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe)
- return nil
- }
-
- // Paths with a '/' are relative to the CWD.
- if strings.IndexByte(exe, '/') > 0 {
- procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe)
- return nil
- }
-
- // Otherwise, We must lookup the name in the paths, starting from the
- // root directory.
- root := procArgs.MountNamespaceVFS2.Root()
- defer root.DecRef()
-
- paths := fs.GetPath(procArgs.Envv)
- creds := procArgs.Credentials
-
- for _, p := range paths {
- binPath := path.Join(p, exe)
- pop := &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(binPath),
- FollowFinalSymlink: true,
- }
- opts := &vfs.OpenOptions{
- FileExec: true,
- Flags: linux.O_RDONLY,
- }
- dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts)
- if err == syserror.ENOENT || err == syserror.EACCES {
- // Didn't find it here.
- continue
- }
- if err != nil {
- return err
- }
- dentry.DecRef()
-
- procArgs.Filename = binPath
- return nil
+ // Resolve the executable path from working dir and environment.
+ f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0])
+ if err != nil {
+ return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err)
}
-
- return fmt.Errorf("executable %q not found in $PATH=%q", exe, strings.Join(paths, ":"))
+ procArgs.Filename = f
+ return nil
}
func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
@@ -193,10 +136,10 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs
func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
fd := c.fds.remove()
- opts := strings.Join(p9MountOptionsVFS2(fd, conf.FileAccess), ",")
+ opts := strings.Join(p9MountOptions(fd, conf.FileAccess, true /* vfs2 */), ",")
log.Infof("Mounting root over 9P, ioFD: %d", fd)
- mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", rootFsName, &vfs.GetFilesystemOptions{Data: opts})
+ mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts})
if err != nil {
return nil, fmt.Errorf("setting up mount namespace: %w", err)
}
@@ -204,27 +147,61 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *C
}
func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error {
- c.prepareMountsVFS2()
+ mounts, err := c.prepareMountsVFS2()
+ if err != nil {
+ return err
+ }
- for _, submount := range c.mounts {
+ for i := range mounts {
+ submount := &mounts[i]
log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options)
- if err := c.mountSubmountVFS2(ctx, conf, mns, creds, &submount); err != nil {
+ if err := c.mountSubmountVFS2(ctx, conf, mns, creds, submount); err != nil {
return err
}
}
// TODO(gvisor.dev/issue/1487): implement mountTmp from fs.go.
- return c.checkDispenser()
+ return nil
+}
+
+type mountAndFD struct {
+ specs.Mount
+ fd int
}
-func (c *containerMounter) prepareMountsVFS2() {
+func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
+ // Associate bind mounts with their FDs before sorting since there is an
+ // undocumented assumption that FDs are dispensed in the order in which
+ // they are required by mounts.
+ var mounts []mountAndFD
+ for _, m := range c.mounts {
+ fd := -1
+ // Only bind mounts use host FDs; see
+ // containerMounter.getMountNameAndOptionsVFS2.
+ if m.Type == bind {
+ fd = c.fds.remove()
+ }
+ mounts = append(mounts, mountAndFD{
+ Mount: m,
+ fd: fd,
+ })
+ }
+ if err := c.checkDispenser(); err != nil {
+ return nil, err
+ }
+
// Sort the mounts so that we don't place children before parents.
- sort.Slice(c.mounts, func(i, j int) bool { return len(c.mounts[i].Destination) < len(c.mounts[j].Destination) })
+ sort.Slice(mounts, func(i, j int) bool {
+ return len(mounts[i].Destination) < len(mounts[j].Destination)
+ })
+
+ return mounts, nil
}
-// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 version.
-func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *specs.Mount) error {
+// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1
+// version.
+func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error {
root := mns.Root()
defer root.DecRef()
target := &vfs.PathOperation{
@@ -233,7 +210,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
Path: fspath.Parse(submount.Destination),
}
- fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, *submount)
+ fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount)
if err != nil {
return fmt.Errorf("mountOptions failed: %w", err)
}
@@ -265,7 +242,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
// used for mounts.
-func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Mount) (string, []string, bool, error) {
+func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, []string, bool, error) {
var (
fsName string
opts []string
@@ -273,11 +250,11 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun
)
switch m.Type {
- case devpts, devtmpfs, proc, sysfs:
+ case devpts.Name, devtmpfs.Name, proc.Name, sys.Name:
fsName = m.Type
case nonefs:
- fsName = sysfs
- case tmpfs:
+ fsName = sys.Name
+ case tmpfs.Name:
fsName = m.Type
var err error
@@ -287,9 +264,8 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun
}
case bind:
- fd := c.fds.remove()
- fsName = "9p"
- opts = p9MountOptionsVFS2(fd, c.getMountAccessType(m))
+ fsName = gofer.Name
+ opts = p9MountOptions(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
@@ -299,21 +275,6 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun
return fsName, opts, useOverlay, nil
}
-// p9MountOptions creates a slice of options for a p9 mount.
-// TODO(gvisor.dev/issue/1624): Remove this version once privateunixsocket is
-// deleted, along with the rest of VFS1.
-func p9MountOptionsVFS2(fd int, fa FileAccessType) []string {
- opts := []string{
- "trans=fd",
- "rfdno=" + strconv.Itoa(fd),
- "wfdno=" + strconv.Itoa(fd),
- }
- if fa == FileAccessShared {
- opts = append(opts, "cache=remote_revalidating")
- }
- return opts
-}
-
func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error {
target := &vfs.PathOperation{
Root: root,