diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/filter/config.go | 8 | ||||
-rw-r--r-- | runsc/boot/fs.go | 51 | ||||
-rw-r--r-- | runsc/boot/loader.go | 2 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 212 | ||||
-rw-r--r-- | runsc/cgroup/BUILD | 2 | ||||
-rw-r--r-- | runsc/cgroup/cgroup.go | 4 | ||||
-rw-r--r-- | runsc/cmd/boot.go | 2 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 4 | ||||
-rw-r--r-- | runsc/cmd/spec.go | 18 | ||||
-rw-r--r-- | runsc/container/BUILD | 4 | ||||
-rw-r--r-- | runsc/container/console_test.go | 2 | ||||
-rw-r--r-- | runsc/container/container.go | 7 | ||||
-rw-r--r-- | runsc/container/container_test.go | 47 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 30 | ||||
-rw-r--r-- | runsc/fsgofer/BUILD | 2 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer.go | 8 | ||||
-rw-r--r-- | runsc/sandbox/BUILD | 1 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 3 | ||||
-rw-r--r-- | runsc/specutils/namespace.go | 16 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 44 |
20 files changed, 209 insertions, 258 deletions
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 98cdd90dd..60e33425f 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -288,6 +288,14 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_SIGALTSTACK: {}, unix.SYS_STATX: {}, syscall.SYS_SYNC_FILE_RANGE: {}, + syscall.SYS_TEE: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(1), /* len */ + seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */ + }, + }, syscall.SYS_TGKILL: []seccomp.Rule{ { seccomp.AllowValue(uint64(os.Getpid())), diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index e1181271a..b98a1eb50 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -37,6 +37,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" + "gvisor.dev/gvisor/pkg/sentry/fs/user" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" @@ -62,7 +63,7 @@ const ( ) // tmpfs has some extra supported options that we must pass through. -var tmpfsAllowedOptions = []string{"mode", "uid", "gid"} +var tmpfsAllowedData = []string{"mode", "uid", "gid"} func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) { // Upper layer uses the same flags as lower, but it must be read-write. @@ -153,8 +154,8 @@ func compileMounts(spec *specs.Spec) []specs.Mount { return mounts } -// p9MountOptions creates a slice of options for a p9 mount. -func p9MountOptions(fd int, fa FileAccessType, vfs2 bool) []string { +// p9MountData creates a slice of p9 mount data. +func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string { opts := []string{ "trans=fd", "rfdno=" + strconv.Itoa(fd), @@ -221,9 +222,6 @@ func mountFlags(opts []string) fs.MountSourceFlags { mf.NoAtime = true case "noexec": mf.NoExec = true - case "bind", "rbind": - // When options include either "bind" or "rbind", - // it's converted to a 9P mount. default: log.Warningf("ignoring unknown mount option %q", o) } @@ -237,7 +235,7 @@ func isSupportedMountFlag(fstype, opt string) bool { return true } if fstype == tmpfsvfs2.Name { - ok, err := parseMountOption(opt, tmpfsAllowedOptions...) + ok, err := parseMountOption(opt, tmpfsAllowedData...) return ok && err == nil } return false @@ -294,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter, // Set namespace here so that it can be found in ctx. procArgs.MountNamespace = mns - return setExecutablePath(ctx, procArgs) -} - -// setExecutablePath sets the procArgs.Filename by searching the PATH for an -// executable matching the procArgs.Argv[0]. -func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - paths := fs.GetPath(procArgs.Envv) - exe := procArgs.Argv[0] - f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths) + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) if err != nil { - return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err) + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } procArgs.Filename = f return nil @@ -725,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (* fd := c.fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) p9FS := mustFindFilesystem("9p") - opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */) + opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) if conf.OverlayfsStaleRead { // We can't check for overlayfs here because sandbox is chroot'ed and gofer @@ -770,10 +761,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( useOverlay bool ) - if isBindMount(m) { - m.Type = bind - } - switch m.Type { case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name: fsName = m.Type @@ -783,7 +770,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( fsName = m.Type var err error - opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...) + opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) if err != nil { return "", nil, false, err } @@ -791,7 +778,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( case bind: fd := c.fds.remove() fsName = gofervfs2.Name - opts = p9MountOptions(fd, c.getMountAccessType(m), conf.VFS2) + opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -801,18 +788,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( return fsName, opts, useOverlay, nil } -func isBindMount(m specs.Mount) bool { - for _, opt := range m.Options { - // When options include either "bind" or "rbind", this behaves as - // bind mount even if the mount type is equal to a filesystem supported - // on runsc. - if opt == "bind" || opt == "rbind" { - return true - } - } - return false -} - func (c *containerMounter) getMountAccessType(mount specs.Mount) FileAccessType { if hint := c.hints.findMount(mount); hint != nil { return hint.fileAccessType() @@ -956,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // Add root mount. fd := c.fds.remove() - opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */) + opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) mf := fs.MountSourceFlags{} if c.root.Readonly || conf.Overlay { @@ -1044,7 +1019,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M Destination: "/tmp", // Sticky bit is added to prevent accidental deletion of files from // another user. This is normally done for /tmp. - Options: []string{"mode=1777"}, + Options: []string{"mode=01777"}, } return c.mountSubmount(ctx, conf, mns, root, tmpMount) diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index f802bc9fb..002479612 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -1056,7 +1056,7 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err) } - s.FillDefaultIPTables() + s.FillIPTablesMetadata() return &s, nil } diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 147c901c4..6c84f0794 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -22,22 +22,21 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/devices/memdev" - "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/fs/user" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/syserror" - - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserror" ) func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error { @@ -95,69 +94,14 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte return fmt.Errorf("failed to setupFS: %w", err) } procArgs.MountNamespaceVFS2 = mns - return setExecutablePathVFS2(ctx, procArgs) -} - -func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error { - exe := procArgs.Argv[0] - - // Absolute paths can be used directly. - if path.IsAbs(exe) { - procArgs.Filename = exe - return nil - } - - // Paths with '/' in them should be joined to the working directory, or - // to the root if working directory is not set. - if strings.IndexByte(exe, '/') > 0 { - if !path.IsAbs(procArgs.WorkingDirectory) { - return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory) - } - procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) - return nil - } - - // Paths with a '/' are relative to the CWD. - if strings.IndexByte(exe, '/') > 0 { - procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe) - return nil - } - // Otherwise, We must lookup the name in the paths, starting from the - // root directory. - root := procArgs.MountNamespaceVFS2.Root() - defer root.DecRef() - - paths := fs.GetPath(procArgs.Envv) - creds := procArgs.Credentials - - for _, p := range paths { - binPath := path.Join(p, exe) - pop := &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(binPath), - FollowFinalSymlink: true, - } - opts := &vfs.OpenOptions{ - FileExec: true, - Flags: linux.O_RDONLY, - } - dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts) - if err == syserror.ENOENT || err == syserror.EACCES { - // Didn't find it here. - continue - } - if err != nil { - return err - } - dentry.DecRef() - - procArgs.Filename = binPath - return nil + // Resolve the executable path from working dir and environment. + f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0]) + if err != nil { + return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err) } - - return fmt.Errorf("executable %q not found in $PATH=%q", exe, strings.Join(paths, ":")) + procArgs.Filename = f + return nil } func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) { @@ -192,7 +136,7 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { fd := c.fds.remove() - opts := strings.Join(p9MountOptions(fd, conf.FileAccess, true /* vfs2 */), ",") + opts := strings.Join(p9MountData(fd, conf.FileAccess, true /* vfs2 */), ",") log.Infof("Mounting root over 9P, ioFD: %d", fd) mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts}) @@ -216,8 +160,9 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, } } - // TODO(gvisor.dev/issue/1487): implement mountTmp from fs.go. - + if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil { + return fmt.Errorf(`mount submount "\tmp": %w`, err) + } return nil } @@ -235,7 +180,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { fd := -1 // Only bind mounts use host FDs; see // containerMounter.getMountNameAndOptionsVFS2. - if m.Type == bind || isBindMount(m) { + if m.Type == bind { fd = c.fds.remove() } mounts = append(mounts, mountAndFD{ @@ -255,8 +200,6 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { return mounts, nil } -// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 -// version. func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error { root := mns.Root() defer root.DecRef() @@ -265,12 +208,11 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, Start: root, Path: fspath.Parse(submount.Destination), } - - fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount) + fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount) if err != nil { return fmt.Errorf("mountOptions failed: %w", err) } - if fsName == "" { + if len(fsName) == 0 { // Filesystem is not supported (e.g. cgroup), just skip it. return nil } @@ -278,17 +220,6 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil { return err } - - opts := &vfs.MountOptions{ - GetFilesystemOptions: vfs.GetFilesystemOptions{ - Data: strings.Join(options, ","), - }, - InternalMount: true, - } - - // All writes go to upper, be paranoid and make lower readonly. - opts.ReadOnly = useOverlay - if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil { return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) } @@ -298,17 +229,13 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values // used for mounts. -func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, []string, bool, error) { +func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, *vfs.MountOptions, error) { var ( - fsName string - opts []string - useOverlay bool + fsName string + data []string ) - if isBindMount(m.Mount) { - m.Type = bind - } - + // Find filesystem name and FS specific data field. switch m.Type { case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: fsName = m.Type @@ -318,21 +245,46 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndF fsName = m.Type var err error - opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...) + data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) if err != nil { - return "", nil, false, err + return "", nil, err } case bind: fsName = gofer.Name - opts = p9MountOptions(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) - // If configured, add overlay to all writable mounts. - useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly + data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) default: log.Warningf("ignoring unknown filesystem type %q", m.Type) } - return fsName, opts, useOverlay, nil + + opts := &vfs.MountOptions{ + GetFilesystemOptions: vfs.GetFilesystemOptions{ + Data: strings.Join(data, ","), + }, + InternalMount: true, + } + + for _, o := range m.Options { + switch o { + case "rw": + opts.ReadOnly = false + case "ro": + opts.ReadOnly = true + case "noatime": + // TODO(gvisor.dev/issue/1193): Implement MS_NOATIME. + case "noexec": + opts.Flags.NoExec = true + default: + log.Warningf("ignoring unknown mount option %q", o) + } + } + + if conf.Overlay { + // All writes go to upper, be paranoid and make lower readonly. + opts.ReadOnly = true + } + return fsName, opts, nil } func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error { @@ -361,3 +313,63 @@ func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath s } return nil } + +// mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so. +// Technically we don't have to mount tmpfs at /tmp, as we could just rely on +// the host /tmp, but this is a nice optimization, and fixes some apps that call +// mknod in /tmp. It's unsafe to mount tmpfs if: +// 1. /tmp is mounted explicitly: we should not override user's wish +// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp +// +// Note that when there are submounts inside of '/tmp', directories for the +// mount points must be present, making '/tmp' not empty anymore. +func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds *auth.Credentials, mns *vfs.MountNamespace) error { + for _, m := range c.mounts { + // m.Destination has been cleaned, so it's to use equality here. + if m.Destination == "/tmp" { + log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m) + return nil + } + } + + root := mns.Root() + defer root.DecRef() + pop := vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse("/tmp"), + } + // TODO(gvisor.dev/issue/2782): Use O_PATH when available. + statx, err := c.k.VFS().StatAt(ctx, creds, &pop, &vfs.StatOptions{}) + switch err { + case nil: + // Found '/tmp' in filesystem, check if it's empty. + if linux.FileMode(statx.Mode).FileType() != linux.ModeDirectory { + // Not a dir?! Leave it be. + return nil + } + if statx.Nlink > 2 { + // If more than "." and ".." is found, skip internal tmpfs to prevent + // hiding existing files. + log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`) + return nil + } + log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`) + fallthrough + + case syserror.ENOENT: + // No '/tmp' found (or fallthrough from above). It's safe to mount internal + // tmpfs. + tmpMount := specs.Mount{ + Type: tmpfs.Name, + Destination: "/tmp", + // Sticky bit is added to prevent accidental deletion of files from + // another user. This is normally done for /tmp. + Options: []string{"mode=01777"}, + } + return c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount}) + + default: + return fmt.Errorf(`stating "/tmp" inside container: %w`, err) + } +} diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD index d4c7bdfbb..c087e1a3c 100644 --- a/runsc/cgroup/BUILD +++ b/runsc/cgroup/BUILD @@ -7,8 +7,8 @@ go_library( srcs = ["cgroup.go"], visibility = ["//:sandbox"], deps = [ + "//pkg/cleanup", "//pkg/log", - "//runsc/specutils", "@com_github_cenkalti_backoff//:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", ], diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go index 19c8b0db6..ef01820ef 100644 --- a/runsc/cgroup/cgroup.go +++ b/runsc/cgroup/cgroup.go @@ -31,8 +31,8 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -246,7 +246,7 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error { // The Cleanup object cleans up partially created cgroups when an error occurs. // Errors occuring during cleanup itself are ignored. - clean := specutils.MakeCleanup(func() { _ = c.Uninstall() }) + clean := cleanup.Make(func() { _ = c.Uninstall() }) defer clean.Clean() for key, cfg := range controllers { diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 4c2ac6ff0..01204ab4d 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -136,7 +136,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } // Ensure that if there is a panic, all goroutine stacks are printed. - debug.SetTraceback("all") + debug.SetTraceback("system") conf := args[0].(*boot.Config) diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 28f0d54b9..10448a759 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -168,7 +168,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Start with root mount, then add any other additional mount as needed. ats := make([]p9.Attacher, 0, len(spec.Mounts)+1) ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{ - ROMount: spec.Root.Readonly, + ROMount: spec.Root.Readonly || conf.Overlay, PanicOnWrite: g.panicOnWrite, }) if err != nil { @@ -181,7 +181,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) for _, m := range spec.Mounts { if specutils.Is9PMount(m) { cfg := fsgofer.Config{ - ROMount: isReadonlyMount(m.Options), + ROMount: isReadonlyMount(m.Options) || conf.Overlay, PanicOnWrite: g.panicOnWrite, HostUDS: conf.FSGoferHostUDS, } diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go index 8e2b36e85..a2b0a4b14 100644 --- a/runsc/cmd/spec.go +++ b/runsc/cmd/spec.go @@ -16,6 +16,7 @@ package cmd import ( "context" + "fmt" "io/ioutil" "os" "path/filepath" @@ -24,7 +25,8 @@ import ( "gvisor.dev/gvisor/runsc/flag" ) -var specTemplate = []byte(`{ +func genSpec(cwd string) []byte { + var template = fmt.Sprintf(`{ "ociVersion": "1.0.0", "process": { "terminal": true, @@ -39,7 +41,7 @@ var specTemplate = []byte(`{ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "TERM=xterm" ], - "cwd": "/", + "cwd": "%s", "capabilities": { "bounding": [ "CAP_AUDIT_WRITE", @@ -123,11 +125,15 @@ var specTemplate = []byte(`{ } ] } -}`) +}`, cwd) + + return []byte(template) +} // Spec implements subcommands.Command for the "spec" command. type Spec struct { bundle string + cwd string } // Name implements subcommands.Command.Name. @@ -165,6 +171,8 @@ EXAMPLE: // SetFlags implements subcommands.Command.SetFlags. func (s *Spec) SetFlags(f *flag.FlagSet) { f.StringVar(&s.bundle, "bundle", ".", "path to the root of the OCI bundle") + f.StringVar(&s.cwd, "cwd", "/", "working directory that will be set for the executable, "+ + "this value MUST be an absolute path") } // Execute implements subcommands.Command.Execute. @@ -174,7 +182,9 @@ func (s *Spec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("file %q already exists", confPath) } - if err := ioutil.WriteFile(confPath, specTemplate, 0664); err != nil { + var spec = genSpec(s.cwd) + + if err := ioutil.WriteFile(confPath, spec, 0664); err != nil { Fatalf("writing to %q: %v", confPath, err) } diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 46154df60..49cfb0837 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -16,6 +16,7 @@ go_library( ], deps = [ "//pkg/abi/linux", + "//pkg/cleanup", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/sighandling", @@ -46,13 +47,14 @@ go_test( "//test/cmd/test_app", ], library = ":container", - shard_count = 5, + shard_count = 10, tags = [ "requires-kvm", ], deps = [ "//pkg/abi/linux", "//pkg/bits", + "//pkg/cleanup", "//pkg/log", "//pkg/sentry/control", "//pkg/sentry/kernel", diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 294dca5e7..3813c6b93 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -119,7 +119,7 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) { // Test that an pty FD is sent over the console socket if one is provided. func TestConsoleSocket(t *testing.T) { - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { spec := testutil.NewSpecWithArgs("true") _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) diff --git a/runsc/container/container.go b/runsc/container/container.go index 8539f252d..6d297d0df 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -31,6 +31,7 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/sighandling" @@ -293,7 +294,7 @@ func New(conf *boot.Config, args Args) (*Container, error) { } // The Cleanup object cleans up partially created containers when an error // occurs. Any errors occurring during cleanup itself are ignored. - cu := specutils.MakeCleanup(func() { _ = c.Destroy() }) + cu := cleanup.Make(func() { _ = c.Destroy() }) defer cu.Clean() // Lock the container metadata file to prevent concurrent creations of @@ -402,7 +403,7 @@ func (c *Container) Start(conf *boot.Config) error { if err := c.Saver.lock(); err != nil { return err } - unlock := specutils.MakeCleanup(func() { c.Saver.unlock() }) + unlock := cleanup.Make(func() { c.Saver.unlock() }) defer unlock.Clean() if err := c.requireStatus("start", Created); err != nil { @@ -506,7 +507,7 @@ func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) { } // Clean up partially created container if an error occurs. // Any errors returned by Destroy() itself are ignored. - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { c.Destroy() }) defer cu.Clean() diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 1a6d50d0d..e7715b6f7 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -256,8 +256,6 @@ var ( func configs(t *testing.T, opts ...configOption) map[string]*boot.Config { // Always load the default config. cs := make(map[string]*boot.Config) - cs["default"] = testutil.TestConfig(t) - for _, o := range opts { switch o { case overlay: @@ -285,9 +283,16 @@ func configs(t *testing.T, opts ...configOption) map[string]*boot.Config { func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*boot.Config { vfs1 := configs(t, opts...) - vfs2 := configs(t, opts...) - for key, value := range vfs2 { + var optsVFS2 []configOption + for _, opt := range opts { + // TODO(gvisor.dev/issue/1487): Enable overlay tests. + if opt != overlay { + optsVFS2 = append(optsVFS2, opt) + } + } + + for key, value := range configs(t, optsVFS2...) { value.VFS2 = true vfs1[key+"VFS2"] = value } @@ -603,7 +608,7 @@ func doAppExitStatus(t *testing.T, vfs2 bool) { // TestExec verifies that a container can exec a new program. func TestExec(t *testing.T) { - for name, conf := range configs(t, overlay) { + for name, conf := range configsWithVFS2(t, overlay) { t.Run(name, func(t *testing.T) { const uid = 343 spec := testutil.NewSpecWithArgs("sleep", "100") @@ -695,7 +700,7 @@ func TestExec(t *testing.T) { // TestKillPid verifies that we can signal individual exec'd processes. func TestKillPid(t *testing.T) { - for name, conf := range configs(t, overlay) { + for name, conf := range configsWithVFS2(t, overlay) { t.Run(name, func(t *testing.T) { app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { @@ -1211,7 +1216,7 @@ func TestCapabilities(t *testing.T) { uid := auth.KUID(os.Getuid() + 1) gid := auth.KGID(os.Getgid() + 1) - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { spec := testutil.NewSpecWithArgs("sleep", "100") rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) @@ -1409,7 +1414,7 @@ func TestReadonlyRoot(t *testing.T) { } func TestUIDMap(t *testing.T) { - for name, conf := range configs(t, noOverlay...) { + for name, conf := range configsWithVFS2(t, noOverlay...) { t.Run(name, func(t *testing.T) { testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") if err != nil { @@ -1537,28 +1542,6 @@ func TestReadonlyMount(t *testing.T) { } } -func TestBindMountByOption(t *testing.T) { - for _, conf := range configs(t, overlay) { - t.Logf("Running test with conf: %+v", conf) - - dir, err := ioutil.TempDir(testutil.TmpDir(), "bind-mount") - spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) - if err != nil { - t.Fatalf("ioutil.TempDir() failed: %v", err) - } - spec.Mounts = append(spec.Mounts, specs.Mount{ - Destination: dir, - Source: dir, - Type: "none", - Options: []string{"rw", "bind"}, - }) - - if err := run(spec, conf); err != nil { - t.Fatalf("error running sandbox: %v", err) - } - } -} - // TestAbbreviatedIDs checks that runsc supports using abbreviated container // IDs in place of full IDs. func TestAbbreviatedIDs(t *testing.T) { @@ -1908,7 +1891,7 @@ func doDestroyStartingTest(t *testing.T, vfs2 bool) { } func TestCreateWorkingDir(t *testing.T) { - for name, conf := range configs(t, overlay) { + for name, conf := range configsWithVFS2(t, overlay) { t.Run(name, func(t *testing.T) { tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") if err != nil { @@ -2031,7 +2014,7 @@ func TestMountPropagation(t *testing.T) { } func TestMountSymlink(t *testing.T) { - for name, conf := range configs(t, overlay) { + for name, conf := range configsWithVFS2(t, overlay) { t.Run(name, func(t *testing.T) { dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") if err != nil { diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index f6861b1dd..207206dd2 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -27,6 +27,7 @@ import ( "time" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sync" @@ -64,29 +65,16 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.") } - var ( - containers []*Container - cleanups []func() - ) - cleanups = append(cleanups, func() { - for _, c := range containers { - c.Destroy() - } - }) - cleanupAll := func() { - for _, c := range cleanups { - c() - } - } - localClean := specutils.MakeCleanup(cleanupAll) - defer localClean.Clean() + cu := cleanup.Cleanup{} + defer cu.Clean() + var containers []*Container for i, spec := range specs { bundleDir, cleanup, err := testutil.SetupBundleDir(spec) if err != nil { return nil, nil, fmt.Errorf("error setting up container: %v", err) } - cleanups = append(cleanups, cleanup) + cu.Add(cleanup) args := Args{ ID: ids[i], @@ -97,6 +85,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C if err != nil { return nil, nil, fmt.Errorf("error creating container: %v", err) } + cu.Add(func() { cont.Destroy() }) containers = append(containers, cont) if err := cont.Start(conf); err != nil { @@ -104,8 +93,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C } } - localClean.Release() - return containers, cleanupAll, nil + return containers, cu.Release(), nil } type execDesc struct { @@ -141,7 +129,7 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) { // TestMultiContainerSanity checks that it is possible to run 2 dead-simple // containers in the same sandbox. func TestMultiContainerSanity(t *testing.T) { - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { @@ -1394,7 +1382,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { Destination: "/mydir/test", Source: "/some/dir", Type: "tmpfs", - Options: []string{"rw", "relatime"}, + Options: []string{"rw", "rbind", "relatime"}, } podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD index 64a406ae2..1036b0630 100644 --- a/runsc/fsgofer/BUILD +++ b/runsc/fsgofer/BUILD @@ -13,12 +13,12 @@ go_library( visibility = ["//runsc:__subpackages__"], deps = [ "//pkg/abi/linux", + "//pkg/cleanup", "//pkg/fd", "//pkg/log", "//pkg/p9", "//pkg/sync", "//pkg/syserr", - "//runsc/specutils", "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 1942f50d7..edc239013 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -33,11 +33,11 @@ import ( "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -439,7 +439,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid if err != nil { return nil, nil, p9.QID{}, 0, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { child.Close() // Best effort attempt to remove the file in case of failure. if err := syscall.Unlinkat(l.file.FD(), name); err != nil { @@ -480,7 +480,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID) if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil { return p9.QID{}, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { // Best effort attempt to remove the dir in case of failure. if err := unix.Unlinkat(l.file.FD(), name, unix.AT_REMOVEDIR); err != nil { log.Warningf("error unlinking dir %q after failure: %v", path.Join(l.hostPath, name), err) @@ -864,7 +864,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9. if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil { return p9.QID{}, extractErrno(err) } - cu := specutils.MakeCleanup(func() { + cu := cleanup.Make(func() { // Best effort attempt to remove the symlink in case of failure. if err := syscall.Unlinkat(l.file.FD(), newName); err != nil { log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err) diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index c95d50294..035dcd3e3 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -13,6 +13,7 @@ go_library( "//runsc:__subpackages__", ], deps = [ + "//pkg/cleanup", "//pkg/control/client", "//pkg/control/server", "//pkg/log", diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index e4ec16e2f..6e1a2af25 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -30,6 +30,7 @@ import ( "github.com/cenkalti/backoff" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/syndtr/gocapability/capability" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/control/client" "gvisor.dev/gvisor/pkg/control/server" "gvisor.dev/gvisor/pkg/log" @@ -119,7 +120,7 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) { s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup} // The Cleanup object cleans up partially created sandboxes when an error // occurs. Any errors occurring during cleanup itself are ignored. - c := specutils.MakeCleanup(func() { + c := cleanup.Make(func() { err := s.destroy() log.Warningf("error destroying sandbox: %v", err) }) diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 60bb7b7ee..23001d67c 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -18,6 +18,7 @@ import ( "fmt" "os" "os/exec" + "os/signal" "path/filepath" "runtime" "syscall" @@ -261,7 +262,18 @@ func MaybeRunAsRoot() error { cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { + if err := cmd.Start(); err != nil { + return fmt.Errorf("re-executing self: %w", err) + } + ch := make(chan os.Signal, 1) + signal.Notify(ch) + go func() { + for { + // Forward all signals to child process. + cmd.Process.Signal(<-ch) + } + }() + if err := cmd.Wait(); err != nil { if exit, ok := err.(*exec.ExitError); ok { if ws, ok := exit.Sys().(syscall.WaitStatus); ok { os.Exit(ws.ExitStatus()) @@ -269,7 +281,7 @@ func MaybeRunAsRoot() error { log.Warningf("No wait status provided, exiting with -1: %v", err) os.Exit(-1) } - return fmt.Errorf("re-executing self: %v", err) + return err } // Child completed with success. os.Exit(0) diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 202518b58..f1fa573c5 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -311,19 +311,7 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth. // Is9PMount returns true if the given mount can be mounted as an external gofer. func Is9PMount(m specs.Mount) bool { - var isBind bool - switch m.Type { - case "bind": - isBind = true - default: - for _, opt := range m.Options { - if opt == "bind" || opt == "rbind" { - isBind = true - break - } - } - } - return isBind && m.Source != "" && IsSupportedDevMount(m) + return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m) } // IsSupportedDevMount returns true if the mount is a supported /dev mount. @@ -456,36 +444,6 @@ func ContainsStr(strs []string, str string) bool { return false } -// Cleanup allows defers to be aborted when cleanup needs to happen -// conditionally. Usage: -// c := MakeCleanup(func() { f.Close() }) -// defer c.Clean() // any failure before release is called will close the file. -// ... -// c.Release() // on success, aborts closing the file and return it. -// return f -type Cleanup struct { - clean func() -} - -// MakeCleanup creates a new Cleanup object. -func MakeCleanup(f func()) Cleanup { - return Cleanup{clean: f} -} - -// Clean calls the cleanup function. -func (c *Cleanup) Clean() { - if c.clean != nil { - c.clean() - c.clean = nil - } -} - -// Release releases the cleanup from its duties, i.e. cleanup function is not -// called after this point. -func (c *Cleanup) Release() { - c.clean = nil -} - // RetryEintr retries the function until an error different than EINTR is // returned. func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) { |