diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/compat.go | 8 | ||||
-rw-r--r-- | runsc/boot/compat_amd64.go | 22 | ||||
-rw-r--r-- | runsc/boot/compat_arm64.go | 22 | ||||
-rw-r--r-- | runsc/boot/config.go | 6 | ||||
-rw-r--r-- | runsc/boot/fs.go | 89 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 187 | ||||
-rw-r--r-- | runsc/cgroup/BUILD | 2 | ||||
-rw-r--r-- | runsc/cgroup/cgroup.go | 54 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 4 | ||||
-rw-r--r-- | runsc/cmd/help.go | 12 | ||||
-rw-r--r-- | runsc/cmd/spec.go | 18 | ||||
-rw-r--r-- | runsc/container/BUILD | 2 | ||||
-rw-r--r-- | runsc/container/container.go | 7 | ||||
-rw-r--r-- | runsc/container/container_test.go | 61 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 28 | ||||
-rw-r--r-- | runsc/fsgofer/BUILD | 2 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer.go | 8 | ||||
-rw-r--r-- | runsc/main.go | 2 | ||||
-rw-r--r-- | runsc/sandbox/BUILD | 1 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 3 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 44 |
21 files changed, 227 insertions, 355 deletions
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go index b7cfb35bf..84c67cbc2 100644 --- a/runsc/boot/compat.go +++ b/runsc/boot/compat.go @@ -119,7 +119,13 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) { } if tr.shouldReport(regs) { - c.sink.Infof("Unsupported syscall: %s, regs: %+v", c.nameMap.Name(uintptr(sysnr)), regs) + name := c.nameMap.Name(uintptr(sysnr)) + c.sink.Infof("Unsupported syscall %s(%#x,%#x,%#x,%#x,%#x,%#x). It is "+ + "likely that you can safely ignore this message and that this is not "+ + "the cause of any error. Please, refer to %s/%s for more information.", + name, argVal(0, regs), argVal(1, regs), argVal(2, regs), argVal(3, regs), + argVal(4, regs), argVal(5, regs), syscallLink, name) + tr.onReported(regs) } } diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go index 42b0ca8b0..8eb76b2ba 100644 --- a/runsc/boot/compat_amd64.go +++ b/runsc/boot/compat_amd64.go @@ -24,8 +24,12 @@ import ( "gvisor.dev/gvisor/pkg/sentry/strace" ) -// reportLimit is the max number of events that should be reported per tracker. -const reportLimit = 100 +const ( + // reportLimit is the max number of events that should be reported per + // tracker. + reportLimit = 100 + syscallLink = "https://gvisor.dev/c/linux/amd64" +) // newRegs create a empty Registers instance. func newRegs() *rpb.Registers { @@ -36,22 +40,22 @@ func newRegs() *rpb.Registers { } } -func argVal(argIdx int, regs *rpb.Registers) uint32 { +func argVal(argIdx int, regs *rpb.Registers) uint64 { amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64 switch argIdx { case 0: - return uint32(amd64Regs.Rdi) + return amd64Regs.Rdi case 1: - return uint32(amd64Regs.Rsi) + return amd64Regs.Rsi case 2: - return uint32(amd64Regs.Rdx) + return amd64Regs.Rdx case 3: - return uint32(amd64Regs.R10) + return amd64Regs.R10 case 4: - return uint32(amd64Regs.R8) + return amd64Regs.R8 case 5: - return uint32(amd64Regs.R9) + return amd64Regs.R9 } panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) } diff --git a/runsc/boot/compat_arm64.go b/runsc/boot/compat_arm64.go index f784cd237..bce9d95b3 100644 --- a/runsc/boot/compat_arm64.go +++ b/runsc/boot/compat_arm64.go @@ -23,8 +23,12 @@ import ( "gvisor.dev/gvisor/pkg/sentry/strace" ) -// reportLimit is the max number of events that should be reported per tracker. -const reportLimit = 100 +const ( + // reportLimit is the max number of events that should be reported per + // tracker. + reportLimit = 100 + syscallLink = "https://gvisor.dev/c/linux/arm64" +) // newRegs create a empty Registers instance. func newRegs() *rpb.Registers { @@ -35,22 +39,22 @@ func newRegs() *rpb.Registers { } } -func argVal(argIdx int, regs *rpb.Registers) uint32 { +func argVal(argIdx int, regs *rpb.Registers) uint64 { arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64 switch argIdx { case 0: - return uint32(arm64Regs.R0) + return arm64Regs.R0 case 1: - return uint32(arm64Regs.R1) + return arm64Regs.R1 case 2: - return uint32(arm64Regs.R2) + return arm64Regs.R2 case 3: - return uint32(arm64Regs.R3) + return arm64Regs.R3 case 4: - return uint32(arm64Regs.R4) + return arm64Regs.R4 case 5: - return uint32(arm64Regs.R5) + return arm64Regs.R5 } panic(fmt.Sprintf("invalid syscall argument index %d", argIdx)) } diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 6d6a705f8..bcec7e4db 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -241,8 +241,10 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeakMode refs.LeakMode - // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for - // write to workaround overlayfs limitation on kernels before 4.19. + // OverlayfsStaleRead instructs the sandbox to assume that the root mount + // is on a Linux overlayfs mount, which does not necessarily preserve + // coherence between read-only and subsequent writable file descriptors + // representing the "same" file. OverlayfsStaleRead bool // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 4875452e2..52f8344ca 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -37,6 +37,13 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" + procvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" + sysvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" + tmpfsvfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" @@ -44,23 +51,15 @@ import ( ) const ( - // Filesystem name for 9p gofer mounts. - rootFsName = "9p" - // Device name for root mount. rootDevice = "9pfs-/" // MountPrefix is the annotation prefix for mount hints. MountPrefix = "dev.gvisor.spec.mount." - // Filesystems that runsc supports. - bind = "bind" - devpts = "devpts" - devtmpfs = "devtmpfs" - proc = "proc" - sysfs = "sysfs" - tmpfs = "tmpfs" - nonefs = "none" + // Supported filesystems that map to different internal filesystem. + bind = "bind" + nonefs = "none" ) // tmpfs has some extra supported options that we must pass through. @@ -108,12 +107,12 @@ func compileMounts(spec *specs.Spec) []specs.Mount { // Always mount /dev. mounts = append(mounts, specs.Mount{ - Type: devtmpfs, + Type: devtmpfs.Name, Destination: "/dev", }) mounts = append(mounts, specs.Mount{ - Type: devpts, + Type: devpts.Name, Destination: "/dev/pts", }) @@ -137,13 +136,13 @@ func compileMounts(spec *specs.Spec) []specs.Mount { var mandatoryMounts []specs.Mount if !procMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: proc, + Type: procvfs2.Name, Destination: "/proc", }) } if !sysMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ - Type: sysfs, + Type: sysvfs2.Name, Destination: "/sys", }) } @@ -156,12 +155,16 @@ func compileMounts(spec *specs.Spec) []specs.Mount { } // p9MountOptions creates a slice of options for a p9 mount. -func p9MountOptions(fd int, fa FileAccessType) []string { +func p9MountOptions(fd int, fa FileAccessType, vfs2 bool) []string { opts := []string{ "trans=fd", "rfdno=" + strconv.Itoa(fd), "wfdno=" + strconv.Itoa(fd), - "privateunixsocket=true", + } + if !vfs2 { + // privateunixsocket is always enabled in VFS2. VFS1 requires explicit + // enablement. + opts = append(opts, "privateunixsocket=true") } if fa == FileAccessShared { opts = append(opts, "cache=remote_revalidating") @@ -219,9 +222,6 @@ func mountFlags(opts []string) fs.MountSourceFlags { mf.NoAtime = true case "noexec": mf.NoExec = true - case "bind", "rbind": - // When options include either "bind" or "rbind", - // it's converted to a 9P mount. default: log.Warningf("ignoring unknown mount option %q", o) } @@ -234,7 +234,7 @@ func isSupportedMountFlag(fstype, opt string) bool { case "rw", "ro", "noatime", "noexec": return true } - if fstype == tmpfs { + if fstype == tmpfsvfs2.Name { ok, err := parseMountOption(opt, tmpfsAllowedOptions...) return ok && err == nil } @@ -292,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, // Set namespace here so that it can be found in ctx. procArgs.MountNamespace = mns - return setExecutablePath(ctx, procArgs) -} - -// setExecutablePath sets the procArgs.Filename by searching the PATH for an -// executable matching the procArgs.Argv[0]. -func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - paths := fs.GetPath(procArgs.Envv) - exe := procArgs.Argv[0] - f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths) + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) if err != nil { - return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err) + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } procArgs.Filename = f return nil @@ -444,7 +437,7 @@ func (m *mountHint) setOptions(val string) error { } func (m *mountHint) isSupported() bool { - return m.mount.Type == tmpfs && m.share == pod + return m.mount.Type == tmpfsvfs2.Name && m.share == pod } // checkCompatible verifies that shared mount is compatible with master. @@ -586,7 +579,7 @@ func (c *containerMounter) processHints(conf *Config) error { for _, hint := range c.hints.mounts { // TODO(b/142076984): Only support tmpfs for now. Bind mounts require a // common gofer to mount all shared volumes. - if hint.mount.Type != tmpfs { + if hint.mount.Type != tmpfsvfs2.Name { continue } log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) @@ -723,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (* fd := c.fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) p9FS := mustFindFilesystem("9p") - opts := p9MountOptions(fd, conf.FileAccess) + opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */) if conf.OverlayfsStaleRead { // We can't check for overlayfs here because sandbox is chroot'ed and gofer @@ -768,22 +761,12 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( useOverlay bool ) - for _, opt := range m.Options { - // When options include either "bind" or "rbind", this behaves as - // bind mount even if the mount type is equal to a filesystem supported - // on runsc. - if opt == "bind" || opt == "rbind" { - m.Type = bind - break - } - } - switch m.Type { - case devpts, devtmpfs, proc, sysfs: + case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name: fsName = m.Type case nonefs: - fsName = sysfs - case tmpfs: + fsName = sysvfs2.Name + case tmpfsvfs2.Name: fsName = m.Type var err error @@ -794,8 +777,8 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( case bind: fd := c.fds.remove() - fsName = "9p" - opts = p9MountOptions(fd, c.getMountAccessType(m)) + fsName = gofervfs2.Name + opts = p9MountOptions(fd, c.getMountAccessType(m), conf.VFS2) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -948,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // Add root mount. fd := c.fds.remove() - opts := p9MountOptions(fd, conf.FileAccess) + opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */) mf := fs.MountSourceFlags{} if c.root.Readonly || conf.Overlay { @@ -960,7 +943,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn Flags: mf, DataString: strings.Join(opts, ","), } - renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount) + renv.MountSources[gofervfs2.Name] = append(renv.MountSources[gofervfs2.Name], rootMount) // Add submounts. var tmpMounted bool @@ -976,7 +959,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // TODO(b/67958150): handle '/tmp' properly (see mountTmp()). if !tmpMounted { tmpMount := specs.Mount{ - Type: tmpfs, + Type: tmpfsvfs2.Name, Destination: "/tmp", } if err := c.addRestoreMount(conf, renv, tmpMount); err != nil { @@ -1032,7 +1015,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M // No '/tmp' found (or fallthrough from above). Safe to mount internal // tmpfs. tmpMount := specs.Mount{ - Type: tmpfs, + Type: tmpfsvfs2.Name, Destination: "/tmp", // Sticky bit is added to prevent accidental deletion of files from // another user. This is normally done for /tmp. diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index ef95d0c65..f48e6b0f1 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -18,52 +18,50 @@ import ( "fmt" "path" "sort" - "strconv" "strings" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/devices/memdev" - "gvisor.dev/gvisor/pkg/sentry/fs" - devpts2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" - devtmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" - goferimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" - procimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" - sysimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" - tmpfsimpl "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/devices/memdev" + "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" ) func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error { - vfsObj.MustRegisterFilesystemType(devpts2.Name, &devpts2.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, // TODO(b/29356795): Users may mount this once the terminals are in a // usable state. AllowUserMount: false, }) - vfsObj.MustRegisterFilesystemType(devtmpfsimpl.Name, &devtmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(devtmpfs.Name, &devtmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(goferimpl.Name, &goferimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(procimpl.Name, &procimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(proc.Name, &proc.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(sysimpl.Name, &sysimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(sys.Name, &sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(tmpfsimpl.Name, &tmpfsimpl.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserMount: true, AllowUserList: true, }) @@ -72,7 +70,7 @@ func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, cre if err := memdev.Register(vfsObj); err != nil { return fmt.Errorf("registering memdev: %w", err) } - a, err := devtmpfsimpl.NewAccessor(ctx, vfsObj, creds, devtmpfsimpl.Name) + a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name) if err != nil { return fmt.Errorf("creating devtmpfs accessor: %w", err) } @@ -96,69 +94,14 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte return fmt.Errorf("failed to setupFS: %w", err) } procArgs.MountNamespaceVFS2 = mns - return setExecutablePathVFS2(ctx, procArgs) -} - -func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - exe := procArgs.Argv[0] - - // Absolute paths can be used directly. - if path.IsAbs(exe) { - procArgs.Filename = exe - return nil - } - // Paths with '/' in them should be joined to the working directory, or - // to the root if working directory is not set. - if strings.IndexByte(exe, '/') > 0 { - if !path.IsAbs(procArgs.WorkingDirectory) { - return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory) - } - procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) - return nil - } - - // Paths with a '/' are relative to the CWD. - if strings.IndexByte(exe, '/') > 0 { - procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) - return nil - } - - // Otherwise, We must lookup the name in the paths, starting from the - // root directory. - root := procArgs.MountNamespaceVFS2.Root() - defer root.DecRef() - - paths := fs.GetPath(procArgs.Envv) - creds := procArgs.Credentials - - for _, p := range paths { - binPath := path.Join(p, exe) - pop := &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(binPath), - FollowFinalSymlink: true, - } - opts := &vfs.OpenOptions{ - FileExec: true, - Flags: linux.O_RDONLY, - } - dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts) - if err == syserror.ENOENT || err == syserror.EACCES { - // Didn't find it here. - continue - } - if err != nil { - return err - } - dentry.DecRef() - - procArgs.Filename = binPath - return nil + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) + if err != nil { + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } - - return fmt.Errorf("executable %q not found in $PATH=%q", exe, strings.Join(paths, ":")) + procArgs.Filename = f + return nil } func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) { @@ -193,10 +136,10 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { fd := c.fds.remove() - opts := strings.Join(p9MountOptionsVFS2(fd, conf.FileAccess), ",") + opts := strings.Join(p9MountOptions(fd, conf.FileAccess, true /* vfs2 */), ",") log.Infof("Mounting root over 9P, ioFD: %d", fd) - mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", rootFsName, &vfs.GetFilesystemOptions{Data: opts}) + mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts}) if err != nil { return nil, fmt.Errorf("setting up mount namespace: %w", err) } @@ -204,27 +147,61 @@ func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *C } func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error { - c.prepareMountsVFS2() + mounts, err := c.prepareMountsVFS2() + if err != nil { + return err + } - for _, submount := range c.mounts { + for i := range mounts { + submount := &mounts[i] log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options) - if err := c.mountSubmountVFS2(ctx, conf, mns, creds, &submount); err != nil { + if err := c.mountSubmountVFS2(ctx, conf, mns, creds, submount); err != nil { return err } } // TODO(gvisor.dev/issue/1487): implement mountTmp from fs.go. - return c.checkDispenser() + return nil +} + +type mountAndFD struct { + specs.Mount + fd int } -func (c *containerMounter) prepareMountsVFS2() { +func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { + // Associate bind mounts with their FDs before sorting since there is an + // undocumented assumption that FDs are dispensed in the order in which + // they are required by mounts. + var mounts []mountAndFD + for _, m := range c.mounts { + fd := -1 + // Only bind mounts use host FDs; see + // containerMounter.getMountNameAndOptionsVFS2. + if m.Type == bind { + fd = c.fds.remove() + } + mounts = append(mounts, mountAndFD{ + Mount: m, + fd: fd, + }) + } + if err := c.checkDispenser(); err != nil { + return nil, err + } + // Sort the mounts so that we don't place children before parents. - sort.Slice(c.mounts, func(i, j int) bool { return len(c.mounts[i].Destination) < len(c.mounts[j].Destination) }) + sort.Slice(mounts, func(i, j int) bool { + return len(mounts[i].Destination) < len(mounts[j].Destination) + }) + + return mounts, nil } -// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 version. -func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *specs.Mount) error { +// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 +// version. +func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error { root := mns.Root() defer root.DecRef() target := &vfs.PathOperation{ @@ -233,7 +210,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, Path: fspath.Parse(submount.Destination), } - fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, *submount) + fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount) if err != nil { return fmt.Errorf("mountOptions failed: %w", err) } @@ -265,7 +242,7 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values // used for mounts. -func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Mount) (string, []string, bool, error) { +func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, []string, bool, error) { var ( fsName string opts []string @@ -273,11 +250,11 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun ) switch m.Type { - case devpts, devtmpfs, proc, sysfs: + case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: fsName = m.Type case nonefs: - fsName = sysfs - case tmpfs: + fsName = sys.Name + case tmpfs.Name: fsName = m.Type var err error @@ -287,9 +264,8 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun } case bind: - fd := c.fds.remove() - fsName = "9p" - opts = p9MountOptionsVFS2(fd, c.getMountAccessType(m)) + fsName = gofer.Name + opts = p9MountOptions(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -299,21 +275,6 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m specs.Moun return fsName, opts, useOverlay, nil } -// p9MountOptions creates a slice of options for a p9 mount. -// TODO(gvisor.dev/issue/1624): Remove this version once privateunixsocket is -// deleted, along with the rest of VFS1. -func p9MountOptionsVFS2(fd int, fa FileAccessType) []string { - opts := []string{ - "trans=fd", - "rfdno=" + strconv.Itoa(fd), - "wfdno=" + strconv.Itoa(fd), - } - if fa == FileAccessShared { - opts = append(opts, "cache=remote_revalidating") - } - return opts -} - func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error { target := &vfs.PathOperation{ Root: root, diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD index d4c7bdfbb..c087e1a3c 100644 --- a/runsc/cgroup/BUILD +++ b/runsc/cgroup/BUILD @@ -7,8 +7,8 @@ go_library( srcs = ["cgroup.go"], visibility = ["//:sandbox"], deps = [ + "//pkg/cleanup", "//pkg/log", - "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", ], diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index fa40ee509..ef01820ef 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -19,6 +19,7 @@ package cgroup import ( "bufio" "context" + "errors" "fmt" "io/ioutil" "os" @@ -30,29 +31,31 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/runsc/specutils" ) const ( cgroupRoot = "/sys/fs/cgroup" ) -var controllers = map[string]controller{ - "blkio": &blockIO{}, - "cpu": &cpu{}, - "cpuset": &cpuSet{}, - "memory": &memory{}, - "net_cls": &networkClass{}, - "net_prio": &networkPrio{}, - "pids": &pids{}, +var controllers = map[string]config{ + "blkio": config{ctrlr: &blockIO{}}, + "cpu": config{ctrlr: &cpu{}}, + "cpuset": config{ctrlr: &cpuSet{}}, + "memory": config{ctrlr: &memory{}}, + "net_cls": config{ctrlr: &networkClass{}}, + "net_prio": config{ctrlr: &networkPrio{}}, + "pids": config{ctrlr: &pids{}}, // These controllers either don't have anything in the OCI spec or is // irrelevant for a sandbox. - "devices": &noop{}, - "freezer": &noop{}, - "perf_event": &noop{}, - "systemd": &noop{}, + "devices": config{ctrlr: &noop{}}, + "freezer": config{ctrlr: &noop{}}, + "hugetlb": config{ctrlr: &noop{}, optional: true}, + "perf_event": config{ctrlr: &noop{}}, + "rdma": config{ctrlr: &noop{}, optional: true}, + "systemd": config{ctrlr: &noop{}}, } func setOptionalValueInt(path, name string, val *int64) error { @@ -196,8 +199,9 @@ func LoadPaths(pid string) (map[string]string, error) { return paths, nil } -// Cgroup represents a group inside all controllers. For example: Name='/foo/bar' -// maps to /sys/fs/cgroup/<controller>/foo/bar on all controllers. +// Cgroup represents a group inside all controllers. For example: +// Name='/foo/bar' maps to /sys/fs/cgroup/<controller>/foo/bar on +// all controllers. type Cgroup struct { Name string `json:"name"` Parents map[string]string `json:"parents"` @@ -242,16 +246,20 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error { // The Cleanup object cleans up partially created cgroups when an error occurs. // Errors occuring during cleanup itself are ignored. - clean := specutils.MakeCleanup(func() { _ = c.Uninstall() }) + clean := cleanup.Make(func() { _ = c.Uninstall() }) defer clean.Clean() - for key, ctrl := range controllers { + for key, cfg := range controllers { path := c.makePath(key) if err := os.MkdirAll(path, 0755); err != nil { + if cfg.optional && errors.Is(err, syscall.EROFS) { + log.Infof("Skipping cgroup %q", key) + continue + } return err } if res != nil { - if err := ctrl.set(res, path); err != nil { + if err := cfg.ctrlr.set(res, path); err != nil { return err } } @@ -321,10 +329,13 @@ func (c *Cgroup) Join() (func(), error) { } // Now join the cgroups. - for key := range controllers { + for key, cfg := range controllers { path := c.makePath(key) log.Debugf("Joining cgroup %q", path) if err := setValue(path, "cgroup.procs", "0"); err != nil { + if cfg.optional && os.IsNotExist(err) { + continue + } return undo, err } } @@ -375,6 +386,11 @@ func (c *Cgroup) makePath(controllerName string) string { return filepath.Join(cgroupRoot, controllerName, path) } +type config struct { + ctrlr controller + optional bool +} + type controller interface { set(*specs.LinuxResources, string) error } diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 28f0d54b9..10448a759 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -168,7 +168,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Start with root mount, then add any other additional mount as needed. ats := make([]p9.Attacher, 0, len(spec.Mounts)+1) ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{ - ROMount: spec.Root.Readonly, + ROMount: spec.Root.Readonly || conf.Overlay, PanicOnWrite: g.panicOnWrite, }) if err != nil { @@ -181,7 +181,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) for _, m := range spec.Mounts { if specutils.Is9PMount(m) { cfg := fsgofer.Config{ - ROMount: isReadonlyMount(m.Options), + ROMount: isReadonlyMount(m.Options) || conf.Overlay, PanicOnWrite: g.panicOnWrite, HostUDS: conf.FSGoferHostUDS, } diff --git a/runsc/cmd/help.go b/runsc/cmd/help.go index c7d210140..cd85dabbb 100644 --- a/runsc/cmd/help.go +++ b/runsc/cmd/help.go @@ -65,16 +65,10 @@ func (h *Help) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{} switch f.NArg() { case 0: fmt.Fprintf(h.cdr.Output, "Usage: %s <flags> <subcommand> <subcommand args>\n\n", h.cdr.Name()) - fmt.Fprintf(h.cdr.Output, `runsc is a command line client for running applications packaged in the Open -Container Initiative (OCI) format. Applications run by runsc are run in an -isolated gVisor sandbox that emulates a Linux environment. + fmt.Fprintf(h.cdr.Output, `runsc is the gVisor container runtime. -gVisor is a user-space kernel, written in Go, that implements a substantial -portion of the Linux system call interface. It provides an additional layer -of isolation between running applications and the host operating system. - -Functionality is provided by subcommands. For additonal help on individual -subcommands use "%s %s <subcommand>". +Functionality is provided by subcommands. For help with a specific subcommand, +use "%s %s <subcommand>". `, h.cdr.Name(), h.Name()) h.cdr.VisitGroups(func(g *subcommands.CommandGroup) { diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go index 8e2b36e85..a2b0a4b14 100644 --- a/runsc/cmd/spec.go +++ b/runsc/cmd/spec.go @@ -16,6 +16,7 @@ package cmd import ( "context" + "fmt" "io/ioutil" "os" "path/filepath" @@ -24,7 +25,8 @@ import ( "gvisor.dev/gvisor/runsc/flag" ) -var specTemplate = []byte(`{ +func genSpec(cwd string) []byte { + var template = fmt.Sprintf(`{ "ociVersion": "1.0.0", "process": { "terminal": true, @@ -39,7 +41,7 @@ var specTemplate = []byte(`{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm" ], - "cwd": "/", + "cwd": "%s", "capabilities": { "bounding": [ "CAP_AUDIT_WRITE", @@ -123,11 +125,15 @@ var specTemplate = []byte(`{ } ] } -}`) +}`, cwd) + + return []byte(template) +} // Spec implements subcommands.Command for the "spec" command. type Spec struct { bundle string + cwd string } // Name implements subcommands.Command.Name. @@ -165,6 +171,8 @@ EXAMPLE: // SetFlags implements subcommands.Command.SetFlags. func (s *Spec) SetFlags(f *flag.FlagSet) { f.StringVar(&s.bundle, "bundle", ".", "path to the root of the OCI bundle") + f.StringVar(&s.cwd, "cwd", "/", "working directory that will be set for the executable, "+ + "this value MUST be an absolute path") } // Execute implements subcommands.Command.Execute. @@ -174,7 +182,9 @@ func (s *Spec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("file %q already exists", confPath) } - if err := ioutil.WriteFile(confPath, specTemplate, 0664); err != nil { + var spec = genSpec(s.cwd) + + if err := ioutil.WriteFile(confPath, spec, 0664); err != nil { Fatalf("writing to %q: %v", confPath, err) } diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 46154df60..9a856d65c 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -16,6 +16,7 @@ go_library( ], deps = [ "//pkg/abi/linux", + "//pkg/cleanup", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/sighandling", @@ -53,6 +54,7 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/bits", + "//pkg/cleanup", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/kernel", diff --git a/runsc/container/container.go b/runsc/container/container.go index 8539f252d..6d297d0df 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -31,6 +31,7 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/sighandling" @@ -293,7 +294,7 @@ func New(conf *boot.Config, args Args) (*Container, error) { } // The Cleanup object cleans up partially created containers when an error // occurs. Any errors occurring during cleanup itself are ignored. - cu := specutils.MakeCleanup(func() { _ = c.Destroy() }) + cu := cleanup.Make(func() { _ = c.Destroy() }) defer cu.Clean() // Lock the container metadata file to prevent concurrent creations of @@ -402,7 +403,7 @@ func (c *Container) Start(conf *boot.Config) error { if err := c.Saver.lock(); err != nil { return err } - unlock := specutils.MakeCleanup(func() { c.Saver.unlock() }) + unlock := cleanup.Make(func() { c.Saver.unlock() }) defer unlock.Clean() if err := c.requireStatus("start", Created); err != nil { @@ -506,7 +507,7 @@ func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) { } // Clean up partially created container if an error occurs. // Any errors returned by Destroy() itself are ignored. - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { c.Destroy() }) defer cu.Clean() diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index acf988aa0..d59a1d97e 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -1537,28 +1537,6 @@ func TestReadonlyMount(t *testing.T) { } } -func TestBindMountByOption(t *testing.T) { - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "bind-mount") - spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: dir, - Source: dir, - Type: "none", - Options: []string{"rw", "bind"}, - }) - - if err := run(spec, conf); err != nil { - t.Fatalf("error running sandbox: %v", err) - } - } -} - // TestAbbreviatedIDs checks that runsc supports using abbreviated container // IDs in place of full IDs. func TestAbbreviatedIDs(t *testing.T) { @@ -1760,7 +1738,7 @@ func TestUserLog(t *testing.T) { if err != nil { t.Fatalf("error opening user log file %q: %v", userLog, err) } - if want := "Unsupported syscall: sched_rr_get_interval"; !strings.Contains(string(out), want) { + if want := "Unsupported syscall sched_rr_get_interval("; !strings.Contains(string(out), want) { t.Errorf("user log file doesn't contain %q, out: %s", want, string(out)) } } @@ -2126,43 +2104,6 @@ func TestNetRaw(t *testing.T) { } } -// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works. -func TestOverlayfsStaleRead(t *testing.T) { - conf := testutil.TestConfig(t) - conf.OverlayfsStaleRead = true - - in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer in.Close() - if _, err := in.WriteString("stale data"); err != nil { - t.Fatalf("in.Write() failed: %v", err) - } - - out, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.out") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer out.Close() - - const want = "foobar" - cmd := fmt.Sprintf("cat %q >&2 && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name()) - spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd) - if err := run(spec, conf); err != nil { - t.Fatalf("Error running container: %v", err) - } - - gotBytes, err := ioutil.ReadAll(out) - if err != nil { - t.Fatalf("out.Read() failed: %v", err) - } - got := strings.TrimSpace(string(gotBytes)) - if want != got { - t.Errorf("Wrong content in out file, got: %q. want: %q", got, want) - } -} - // TestTTYField checks TTY field returned by container.Processes(). func TestTTYField(t *testing.T) { stop := testutil.StartReaper() diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index f6861b1dd..dc825abd9 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -27,6 +27,7 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" @@ -64,29 +65,16 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.") } - var ( - containers []*Container - cleanups []func() - ) - cleanups = append(cleanups, func() { - for _, c := range containers { - c.Destroy() - } - }) - cleanupAll := func() { - for _, c := range cleanups { - c() - } - } - localClean := specutils.MakeCleanup(cleanupAll) - defer localClean.Clean() + cu := cleanup.Cleanup{} + defer cu.Clean() + var containers []*Container for i, spec := range specs { bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { return nil, nil, fmt.Errorf("error setting up container: %v", err) } - cleanups = append(cleanups, cleanup) + cu.Add(cleanup) args := Args{ ID: ids[i], @@ -97,6 +85,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C if err != nil { return nil, nil, fmt.Errorf("error creating container: %v", err) } + cu.Add(func() { cont.Destroy() }) containers = append(containers, cont) if err := cont.Start(conf); err != nil { @@ -104,8 +93,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C } } - localClean.Release() - return containers, cleanupAll, nil + return containers, cu.Release(), nil } type execDesc struct { @@ -1394,7 +1382,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { Destination: "/mydir/test", Source: "/some/dir", Type: "tmpfs", - Options: []string{"rw", "relatime"}, + Options: []string{"rw", "rbind", "relatime"}, } podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index 64a406ae2..1036b0630 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -13,12 +13,12 @@ go_library( visibility = ["//runsc:__subpackages__"], deps = [ "//pkg/abi/linux", + "//pkg/cleanup", "//pkg/fd", "//pkg/log", "//pkg/p9", "//pkg/sync", "//pkg/syserr", - "//runsc/specutils", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 1942f50d7..edc239013 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -33,11 +33,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -439,7 +439,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid if err != nil { return nil, nil, p9.QID{}, 0, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { child.Close() // Best effort attempt to remove the file in case of failure. if err := syscall.Unlinkat(l.file.FD(), name); err != nil { @@ -480,7 +480,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID) if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil { return p9.QID{}, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { // Best effort attempt to remove the dir in case of failure. if err := unix.Unlinkat(l.file.FD(), name, unix.AT_REMOVEDIR); err != nil { log.Warningf("error unlinking dir %q after failure: %v", path.Join(l.hostPath, name), err) @@ -864,7 +864,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9. if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil { return p9.QID{}, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { // Best effort attempt to remove the symlink in case of failure. if err := syscall.Unlinkat(l.file.FD(), newName); err != nil { log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err) diff --git a/runsc/main.go b/runsc/main.go index 0625a06e0..920ed84a5 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -76,7 +76,7 @@ var ( fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") - overlayfsStaleRead = flag.Bool("overlayfs-stale-read", false, "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19.") + overlayfsStaleRead = flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index c95d50294..035dcd3e3 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -13,6 +13,7 @@ go_library( "//runsc:__subpackages__", ], deps = [ + "//pkg/cleanup", "//pkg/control/client", "//pkg/control/server", "//pkg/log", diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index e4ec16e2f..6e1a2af25 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -30,6 +30,7 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/control/client" "gvisor.dev/gvisor/pkg/control/server" "gvisor.dev/gvisor/pkg/log" @@ -119,7 +120,7 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) { s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup} // The Cleanup object cleans up partially created sandboxes when an error // occurs. Any errors occurring during cleanup itself are ignored. - c := specutils.MakeCleanup(func() { + c := cleanup.Make(func() { err := s.destroy() log.Warningf("error destroying sandbox: %v", err) }) diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 202518b58..f1fa573c5 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -311,19 +311,7 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth. // Is9PMount returns true if the given mount can be mounted as an external gofer. func Is9PMount(m specs.Mount) bool { - var isBind bool - switch m.Type { - case "bind": - isBind = true - default: - for _, opt := range m.Options { - if opt == "bind" || opt == "rbind" { - isBind = true - break - } - } - } - return isBind && m.Source != "" && IsSupportedDevMount(m) + return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m) } // IsSupportedDevMount returns true if the mount is a supported /dev mount. @@ -456,36 +444,6 @@ func ContainsStr(strs []string, str string) bool { return false } -// Cleanup allows defers to be aborted when cleanup needs to happen -// conditionally. Usage: -// c := MakeCleanup(func() { f.Close() }) -// defer c.Clean() // any failure before release is called will close the file. -// ... -// c.Release() // on success, aborts closing the file and return it. -// return f -type Cleanup struct { - clean func() -} - -// MakeCleanup creates a new Cleanup object. -func MakeCleanup(f func()) Cleanup { - return Cleanup{clean: f} -} - -// Clean calls the cleanup function. -func (c *Cleanup) Clean() { - if c.clean != nil { - c.clean() - c.clean = nil - } -} - -// Release releases the cleanup from its duties, i.e. cleanup function is not -// called after this point. -func (c *Cleanup) Release() { - c.clean = nil -} - // RetryEintr retries the function until an error different than EINTR is // returned. func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) { |