diff options
Diffstat (limited to 'runsc/boot/fs.go')
-rw-r--r-- | runsc/boot/fs.go | 110 |
1 files changed, 61 insertions, 49 deletions
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 0f62842ea..b98a1eb50 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -37,6 +37,13 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" + procvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" + sysvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" + tmpfsvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" @@ -44,27 +51,19 @@ import ( ) const ( - // Filesystem name for 9p gofer mounts. - rootFsName = "9p" - // Device name for root mount. rootDevice = "9pfs-/" // MountPrefix is the annotation prefix for mount hints. MountPrefix = "dev.gvisor.spec.mount." - // Filesystems that runsc supports. - bind = "bind" - devpts = "devpts" - devtmpfs = "devtmpfs" - proc = "proc" - sysfs = "sysfs" - tmpfs = "tmpfs" - nonefs = "none" + // Supported filesystems that map to different internal filesystem. + bind = "bind" + nonefs = "none" ) // tmpfs has some extra supported options that we must pass through. -var tmpfsAllowedOptions = []string{"mode", "uid", "gid"} +var tmpfsAllowedData = []string{"mode", "uid", "gid"} func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) { // Upper layer uses the same flags as lower, but it must be read-write. @@ -108,12 +107,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // Always mount /dev. mounts = append(mounts, specs.Mount{ - Type: devtmpfs, + Type: devtmpfs.Name, Destination: "/dev", }) mounts = append(mounts, specs.Mount{ - Type: devpts, + Type: devpts.Name, Destination: "/dev/pts", }) @@ -137,13 +136,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount { var mandatoryMounts []specs.Mount if !procMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: proc, + Type: procvfs2.Name, Destination: "/proc", }) } if !sysMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: sysfs, + Type: sysvfs2.Name, Destination: "/sys", }) } @@ -155,13 +154,17 @@ func compileMounts(spec *specs.Spec) []specs.Mount { return mounts } -// p9MountOptions creates a slice of options for a p9 mount. -func p9MountOptions(fd int, fa FileAccessType) []string { +// p9MountData creates a slice of p9 mount data. +func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string { opts := []string{ "trans=fd", "rfdno=" + strconv.Itoa(fd), "wfdno=" + strconv.Itoa(fd), - "privateunixsocket=true", + } + if !vfs2 { + // privateunixsocket is always enabled in VFS2. VFS1 requires explicit + // enablement. + opts = append(opts, "privateunixsocket=true") } if fa == FileAccessShared { opts = append(opts, "cache=remote_revalidating") @@ -231,8 +234,8 @@ func isSupportedMountFlag(fstype, opt string) bool { case "rw", "ro", "noatime", "noexec": return true } - if fstype == tmpfs { - ok, err := parseMountOption(opt, tmpfsAllowedOptions...) + if fstype == tmpfsvfs2.Name { + ok, err := parseMountOption(opt, tmpfsAllowedData...) return ok && err == nil } return false @@ -278,6 +281,9 @@ func subtargets(root string, mnts []specs.Mount) []string { } func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { + if conf.VFS2 { + return setupContainerVFS2(ctx, conf, mntr, procArgs) + } mns, err := mntr.setupFS(conf, procArgs) if err != nil { return err @@ -286,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, // Set namespace here so that it can be found in ctx. procArgs.MountNamespace = mns - return setExecutablePath(ctx, procArgs) -} - -// setExecutablePath sets the procArgs.Filename by searching the PATH for an -// executable matching the procArgs.Argv[0]. -func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - paths := fs.GetPath(procArgs.Envv) - exe := procArgs.Argv[0] - f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths) + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) if err != nil { - return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err) + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } procArgs.Filename = f return nil @@ -438,7 +437,7 @@ func (m *mountHint) setOptions(val string) error { } func (m *mountHint) isSupported() bool { - return m.mount.Type == tmpfs && m.share == pod + return m.mount.Type == tmpfsvfs2.Name && m.share == pod } // checkCompatible verifies that shared mount is compatible with master. @@ -573,11 +572,14 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin // should be mounted (e.g. a volume shared between containers). It must be // called for the root container only. func (c *containerMounter) processHints(conf *Config) error { + if conf.VFS2 { + return nil + } ctx := c.k.SupervisorContext() for _, hint := range c.hints.mounts { // TODO(b/142076984): Only support tmpfs for now. Bind mounts require a // common gofer to mount all shared volumes. - if hint.mount.Type != tmpfs { + if hint.mount.Type != tmpfsvfs2.Name { continue } log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) @@ -714,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (* fd := c.fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) p9FS := mustFindFilesystem("9p") - opts := p9MountOptions(fd, conf.FileAccess) + opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) if conf.OverlayfsStaleRead { // We can't check for overlayfs here because sandbox is chroot'ed and gofer @@ -760,30 +762,27 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( ) switch m.Type { - case devpts, devtmpfs, proc, sysfs: + case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name: fsName = m.Type case nonefs: - fsName = sysfs - case tmpfs: + fsName = sysvfs2.Name + case tmpfsvfs2.Name: fsName = m.Type var err error - opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...) + opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) if err != nil { return "", nil, false, err } case bind: fd := c.fds.remove() - fsName = "9p" - opts = p9MountOptions(fd, c.getMountAccessType(m)) + fsName = gofervfs2.Name + opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly default: - // TODO(nlacasse): Support all the mount types and make this a fatal error. - // Most applications will "just work" without them, so this is a warning - // for now. log.Warningf("ignoring unknown filesystem type %q", m.Type) } return fsName, opts, useOverlay, nil @@ -824,7 +823,20 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","), nil) if err != nil { - return fmt.Errorf("creating mount with source %q: %v", m.Source, err) + err := fmt.Errorf("creating mount with source %q: %v", m.Source, err) + // Check to see if this is a common error due to a Linux bug. + // This error is generated here in order to cause it to be + // printed to the user using Docker via 'runsc create' etc. rather + // than simply printed to the logs for the 'runsc boot' command. + // + // We check the error message string rather than type because the + // actual error types (syscall.EIO, syscall.EPIPE) are lost by file system + // implementation (e.g. p9). + // TODO(gvisor.dev/issue/1765): Remove message when bug is resolved. + if strings.Contains(err.Error(), syscall.EIO.Error()) || strings.Contains(err.Error(), syscall.EPIPE.Error()) { + return fmt.Errorf("%v: %s", err, specutils.FaqErrorMsg("memlock", "you may be encountering a Linux kernel bug")) + } + return err } // If there are submounts, we need to overlay the mount on top of a ramfs @@ -919,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // Add root mount. fd := c.fds.remove() - opts := p9MountOptions(fd, conf.FileAccess) + opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) mf := fs.MountSourceFlags{} if c.root.Readonly || conf.Overlay { @@ -931,7 +943,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn Flags: mf, DataString: strings.Join(opts, ","), } - renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount) + renv.MountSources[gofervfs2.Name] = append(renv.MountSources[gofervfs2.Name], rootMount) // Add submounts. var tmpMounted bool @@ -947,7 +959,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // TODO(b/67958150): handle '/tmp' properly (see mountTmp()). if !tmpMounted { tmpMount := specs.Mount{ - Type: tmpfs, + Type: tmpfsvfs2.Name, Destination: "/tmp", } if err := c.addRestoreMount(conf, renv, tmpMount); err != nil { @@ -1003,11 +1015,11 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M // No '/tmp' found (or fallthrough from above). Safe to mount internal // tmpfs. tmpMount := specs.Mount{ - Type: tmpfs, + Type: tmpfsvfs2.Name, Destination: "/tmp", // Sticky bit is added to prevent accidental deletion of files from // another user. This is normally done for /tmp. - Options: []string{"mode=1777"}, + Options: []string{"mode=01777"}, } return c.mountSubmount(ctx, conf, mns, root, tmpMount) |