summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/filter/config.go8
-rw-r--r--runsc/boot/fs.go51
-rw-r--r--runsc/boot/loader.go2
-rw-r--r--runsc/boot/vfs.go212
-rw-r--r--runsc/cgroup/BUILD2
-rw-r--r--runsc/cgroup/cgroup.go4
-rw-r--r--runsc/cmd/boot.go2
-rw-r--r--runsc/cmd/gofer.go4
-rw-r--r--runsc/cmd/spec.go18
-rw-r--r--runsc/container/BUILD4
-rw-r--r--runsc/container/console_test.go2
-rw-r--r--runsc/container/container.go7
-rw-r--r--runsc/container/container_test.go47
-rw-r--r--runsc/container/multi_container_test.go30
-rw-r--r--runsc/fsgofer/BUILD2
-rw-r--r--runsc/fsgofer/fsgofer.go8
-rw-r--r--runsc/sandbox/BUILD1
-rw-r--r--runsc/sandbox/sandbox.go3
-rw-r--r--runsc/specutils/namespace.go16
-rw-r--r--runsc/specutils/specutils.go44
20 files changed, 209 insertions, 258 deletions
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 98cdd90dd..60e33425f 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -288,6 +288,14 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.SYS_SIGALTSTACK: {},
unix.SYS_STATX: {},
syscall.SYS_SYNC_FILE_RANGE: {},
+ syscall.SYS_TEE: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(1), /* len */
+ seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */
+ },
+ },
syscall.SYS_TGKILL: []seccomp.Rule{
{
seccomp.AllowValue(uint64(os.Getpid())),
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index e1181271a..b98a1eb50 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -37,6 +37,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/fs/gofer"
"gvisor.dev/gvisor/pkg/sentry/fs/ramfs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
@@ -62,7 +63,7 @@ const (
)
// tmpfs has some extra supported options that we must pass through.
-var tmpfsAllowedOptions = []string{"mode", "uid", "gid"}
+var tmpfsAllowedData = []string{"mode", "uid", "gid"}
func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
// Upper layer uses the same flags as lower, but it must be read-write.
@@ -153,8 +154,8 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
return mounts
}
-// p9MountOptions creates a slice of options for a p9 mount.
-func p9MountOptions(fd int, fa FileAccessType, vfs2 bool) []string {
+// p9MountData creates a slice of p9 mount data.
+func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string {
opts := []string{
"trans=fd",
"rfdno=" + strconv.Itoa(fd),
@@ -221,9 +222,6 @@ func mountFlags(opts []string) fs.MountSourceFlags {
mf.NoAtime = true
case "noexec":
mf.NoExec = true
- case "bind", "rbind":
- // When options include either "bind" or "rbind",
- // it's converted to a 9P mount.
default:
log.Warningf("ignoring unknown mount option %q", o)
}
@@ -237,7 +235,7 @@ func isSupportedMountFlag(fstype, opt string) bool {
return true
}
if fstype == tmpfsvfs2.Name {
- ok, err := parseMountOption(opt, tmpfsAllowedOptions...)
+ ok, err := parseMountOption(opt, tmpfsAllowedData...)
return ok && err == nil
}
return false
@@ -294,17 +292,10 @@ func setupContainerFS(ctx context.Context, conf *Config, mntr *containerMounter,
// Set namespace here so that it can be found in ctx.
procArgs.MountNamespace = mns
- return setExecutablePath(ctx, procArgs)
-}
-
-// setExecutablePath sets the procArgs.Filename by searching the PATH for an
-// executable matching the procArgs.Argv[0].
-func setExecutablePath(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
- paths := fs.GetPath(procArgs.Envv)
- exe := procArgs.Argv[0]
- f, err := procArgs.MountNamespace.ResolveExecutablePath(ctx, procArgs.WorkingDirectory, exe, paths)
+ // Resolve the executable path from working dir and environment.
+ f, err := user.ResolveExecutablePath(ctx, procArgs.Credentials, procArgs.MountNamespace, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0])
if err != nil {
- return fmt.Errorf("searching for executable %q, cwd: %q, $PATH=%q: %v", exe, procArgs.WorkingDirectory, strings.Join(paths, ":"), err)
+ return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err)
}
procArgs.Filename = f
return nil
@@ -725,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
fd := c.fds.remove()
log.Infof("Mounting root over 9P, ioFD: %d", fd)
p9FS := mustFindFilesystem("9p")
- opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */)
+ opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
if conf.OverlayfsStaleRead {
// We can't check for overlayfs here because sandbox is chroot'ed and gofer
@@ -770,10 +761,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
useOverlay bool
)
- if isBindMount(m) {
- m.Type = bind
- }
-
switch m.Type {
case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name:
fsName = m.Type
@@ -783,7 +770,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
fsName = m.Type
var err error
- opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...)
+ opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
if err != nil {
return "", nil, false, err
}
@@ -791,7 +778,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
case bind:
fd := c.fds.remove()
fsName = gofervfs2.Name
- opts = p9MountOptions(fd, c.getMountAccessType(m), conf.VFS2)
+ opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2)
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
@@ -801,18 +788,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
return fsName, opts, useOverlay, nil
}
-func isBindMount(m specs.Mount) bool {
- for _, opt := range m.Options {
- // When options include either "bind" or "rbind", this behaves as
- // bind mount even if the mount type is equal to a filesystem supported
- // on runsc.
- if opt == "bind" || opt == "rbind" {
- return true
- }
- }
- return false
-}
-
func (c *containerMounter) getMountAccessType(mount specs.Mount) FileAccessType {
if hint := c.hints.findMount(mount); hint != nil {
return hint.fileAccessType()
@@ -956,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn
// Add root mount.
fd := c.fds.remove()
- opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */)
+ opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */)
mf := fs.MountSourceFlags{}
if c.root.Readonly || conf.Overlay {
@@ -1044,7 +1019,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M
Destination: "/tmp",
// Sticky bit is added to prevent accidental deletion of files from
// another user. This is normally done for /tmp.
- Options: []string{"mode=1777"},
+ Options: []string{"mode=01777"},
}
return c.mountSubmount(ctx, conf, mns, root, tmpMount)
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index f802bc9fb..002479612 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -1056,7 +1056,7 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
}
- s.FillDefaultIPTables()
+ s.FillIPTablesMetadata()
return &s, nil
}
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 147c901c4..6c84f0794 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -22,22 +22,21 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/devices/memdev"
- "gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/fs/user"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
- "gvisor.dev/gvisor/pkg/syserror"
-
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
)
func registerFilesystems(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) error {
@@ -95,69 +94,14 @@ func setupContainerVFS2(ctx context.Context, conf *Config, mntr *containerMounte
return fmt.Errorf("failed to setupFS: %w", err)
}
procArgs.MountNamespaceVFS2 = mns
- return setExecutablePathVFS2(ctx, procArgs)
-}
-
-func setExecutablePathVFS2(ctx context.Context, procArgs *kernel.CreateProcessArgs) error {
- exe := procArgs.Argv[0]
-
- // Absolute paths can be used directly.
- if path.IsAbs(exe) {
- procArgs.Filename = exe
- return nil
- }
-
- // Paths with '/' in them should be joined to the working directory, or
- // to the root if working directory is not set.
- if strings.IndexByte(exe, '/') > 0 {
- if !path.IsAbs(procArgs.WorkingDirectory) {
- return fmt.Errorf("working directory %q must be absolute", procArgs.WorkingDirectory)
- }
- procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe)
- return nil
- }
-
- // Paths with a '/' are relative to the CWD.
- if strings.IndexByte(exe, '/') > 0 {
- procArgs.Filename = path.Join(procArgs.WorkingDirectory, exe)
- return nil
- }
- // Otherwise, We must lookup the name in the paths, starting from the
- // root directory.
- root := procArgs.MountNamespaceVFS2.Root()
- defer root.DecRef()
-
- paths := fs.GetPath(procArgs.Envv)
- creds := procArgs.Credentials
-
- for _, p := range paths {
- binPath := path.Join(p, exe)
- pop := &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(binPath),
- FollowFinalSymlink: true,
- }
- opts := &vfs.OpenOptions{
- FileExec: true,
- Flags: linux.O_RDONLY,
- }
- dentry, err := root.Mount().Filesystem().VirtualFilesystem().OpenAt(ctx, creds, pop, opts)
- if err == syserror.ENOENT || err == syserror.EACCES {
- // Didn't find it here.
- continue
- }
- if err != nil {
- return err
- }
- dentry.DecRef()
-
- procArgs.Filename = binPath
- return nil
+ // Resolve the executable path from working dir and environment.
+ f, err := user.ResolveExecutablePathVFS2(ctx, procArgs.Credentials, procArgs.MountNamespaceVFS2, procArgs.Envv, procArgs.WorkingDirectory, procArgs.Argv[0])
+ if err != nil {
+ return fmt.Errorf("searching for executable %q, cwd: %q, envv: %q: %v", procArgs.Argv[0], procArgs.WorkingDirectory, procArgs.Envv, err)
}
-
- return fmt.Errorf("executable %q not found in $PATH=%q", exe, strings.Join(paths, ":"))
+ procArgs.Filename = f
+ return nil
}
func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) {
@@ -192,7 +136,7 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs
func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) {
fd := c.fds.remove()
- opts := strings.Join(p9MountOptions(fd, conf.FileAccess, true /* vfs2 */), ",")
+ opts := strings.Join(p9MountData(fd, conf.FileAccess, true /* vfs2 */), ",")
log.Infof("Mounting root over 9P, ioFD: %d", fd)
mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts})
@@ -216,8 +160,9 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config,
}
}
- // TODO(gvisor.dev/issue/1487): implement mountTmp from fs.go.
-
+ if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil {
+ return fmt.Errorf(`mount submount "\tmp": %w`, err)
+ }
return nil
}
@@ -235,7 +180,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
fd := -1
// Only bind mounts use host FDs; see
// containerMounter.getMountNameAndOptionsVFS2.
- if m.Type == bind || isBindMount(m) {
+ if m.Type == bind {
fd = c.fds.remove()
}
mounts = append(mounts, mountAndFD{
@@ -255,8 +200,6 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
return mounts, nil
}
-// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1
-// version.
func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error {
root := mns.Root()
defer root.DecRef()
@@ -265,12 +208,11 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
Start: root,
Path: fspath.Parse(submount.Destination),
}
-
- fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount)
+ fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount)
if err != nil {
return fmt.Errorf("mountOptions failed: %w", err)
}
- if fsName == "" {
+ if len(fsName) == 0 {
// Filesystem is not supported (e.g. cgroup), just skip it.
return nil
}
@@ -278,17 +220,6 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil {
return err
}
-
- opts := &vfs.MountOptions{
- GetFilesystemOptions: vfs.GetFilesystemOptions{
- Data: strings.Join(options, ","),
- },
- InternalMount: true,
- }
-
- // All writes go to upper, be paranoid and make lower readonly.
- opts.ReadOnly = useOverlay
-
if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil {
return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
}
@@ -298,17 +229,13 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config,
// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
// used for mounts.
-func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, []string, bool, error) {
+func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, *vfs.MountOptions, error) {
var (
- fsName string
- opts []string
- useOverlay bool
+ fsName string
+ data []string
)
- if isBindMount(m.Mount) {
- m.Type = bind
- }
-
+ // Find filesystem name and FS specific data field.
switch m.Type {
case devpts.Name, devtmpfs.Name, proc.Name, sys.Name:
fsName = m.Type
@@ -318,21 +245,46 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndF
fsName = m.Type
var err error
- opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...)
+ data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
if err != nil {
- return "", nil, false, err
+ return "", nil, err
}
case bind:
fsName = gofer.Name
- opts = p9MountOptions(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
- // If configured, add overlay to all writable mounts.
- useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
+ data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
default:
log.Warningf("ignoring unknown filesystem type %q", m.Type)
}
- return fsName, opts, useOverlay, nil
+
+ opts := &vfs.MountOptions{
+ GetFilesystemOptions: vfs.GetFilesystemOptions{
+ Data: strings.Join(data, ","),
+ },
+ InternalMount: true,
+ }
+
+ for _, o := range m.Options {
+ switch o {
+ case "rw":
+ opts.ReadOnly = false
+ case "ro":
+ opts.ReadOnly = true
+ case "noatime":
+ // TODO(gvisor.dev/issue/1193): Implement MS_NOATIME.
+ case "noexec":
+ opts.Flags.NoExec = true
+ default:
+ log.Warningf("ignoring unknown mount option %q", o)
+ }
+ }
+
+ if conf.Overlay {
+ // All writes go to upper, be paranoid and make lower readonly.
+ opts.ReadOnly = true
+ }
+ return fsName, opts, nil
}
func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error {
@@ -361,3 +313,63 @@ func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath s
}
return nil
}
+
+// mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so.
+// Technically we don't have to mount tmpfs at /tmp, as we could just rely on
+// the host /tmp, but this is a nice optimization, and fixes some apps that call
+// mknod in /tmp. It's unsafe to mount tmpfs if:
+// 1. /tmp is mounted explicitly: we should not override user's wish
+// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp
+//
+// Note that when there are submounts inside of '/tmp', directories for the
+// mount points must be present, making '/tmp' not empty anymore.
+func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds *auth.Credentials, mns *vfs.MountNamespace) error {
+ for _, m := range c.mounts {
+ // m.Destination has been cleaned, so it's to use equality here.
+ if m.Destination == "/tmp" {
+ log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m)
+ return nil
+ }
+ }
+
+ root := mns.Root()
+ defer root.DecRef()
+ pop := vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse("/tmp"),
+ }
+ // TODO(gvisor.dev/issue/2782): Use O_PATH when available.
+ statx, err := c.k.VFS().StatAt(ctx, creds, &pop, &vfs.StatOptions{})
+ switch err {
+ case nil:
+ // Found '/tmp' in filesystem, check if it's empty.
+ if linux.FileMode(statx.Mode).FileType() != linux.ModeDirectory {
+ // Not a dir?! Leave it be.
+ return nil
+ }
+ if statx.Nlink > 2 {
+ // If more than "." and ".." is found, skip internal tmpfs to prevent
+ // hiding existing files.
+ log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`)
+ return nil
+ }
+ log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`)
+ fallthrough
+
+ case syserror.ENOENT:
+ // No '/tmp' found (or fallthrough from above). It's safe to mount internal
+ // tmpfs.
+ tmpMount := specs.Mount{
+ Type: tmpfs.Name,
+ Destination: "/tmp",
+ // Sticky bit is added to prevent accidental deletion of files from
+ // another user. This is normally done for /tmp.
+ Options: []string{"mode=01777"},
+ }
+ return c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount})
+
+ default:
+ return fmt.Errorf(`stating "/tmp" inside container: %w`, err)
+ }
+}
diff --git a/runsc/cgroup/BUILD b/runsc/cgroup/BUILD
index d4c7bdfbb..c087e1a3c 100644
--- a/runsc/cgroup/BUILD
+++ b/runsc/cgroup/BUILD
@@ -7,8 +7,8 @@ go_library(
srcs = ["cgroup.go"],
visibility = ["//:sandbox"],
deps = [
+ "//pkg/cleanup",
"//pkg/log",
- "//runsc/specutils",
"@com_github_cenkalti_backoff//:go_default_library",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
],
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 19c8b0db6..ef01820ef 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -31,8 +31,8 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -246,7 +246,7 @@ func (c *Cgroup) Install(res *specs.LinuxResources) error {
// The Cleanup object cleans up partially created cgroups when an error occurs.
// Errors occuring during cleanup itself are ignored.
- clean := specutils.MakeCleanup(func() { _ = c.Uninstall() })
+ clean := cleanup.Make(func() { _ = c.Uninstall() })
defer clean.Clean()
for key, cfg := range controllers {
diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go
index 4c2ac6ff0..01204ab4d 100644
--- a/runsc/cmd/boot.go
+++ b/runsc/cmd/boot.go
@@ -136,7 +136,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
}
// Ensure that if there is a panic, all goroutine stacks are printed.
- debug.SetTraceback("all")
+ debug.SetTraceback("system")
conf := args[0].(*boot.Config)
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 28f0d54b9..10448a759 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -168,7 +168,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Start with root mount, then add any other additional mount as needed.
ats := make([]p9.Attacher, 0, len(spec.Mounts)+1)
ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{
- ROMount: spec.Root.Readonly,
+ ROMount: spec.Root.Readonly || conf.Overlay,
PanicOnWrite: g.panicOnWrite,
})
if err != nil {
@@ -181,7 +181,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
for _, m := range spec.Mounts {
if specutils.Is9PMount(m) {
cfg := fsgofer.Config{
- ROMount: isReadonlyMount(m.Options),
+ ROMount: isReadonlyMount(m.Options) || conf.Overlay,
PanicOnWrite: g.panicOnWrite,
HostUDS: conf.FSGoferHostUDS,
}
diff --git a/runsc/cmd/spec.go b/runsc/cmd/spec.go
index 8e2b36e85..a2b0a4b14 100644
--- a/runsc/cmd/spec.go
+++ b/runsc/cmd/spec.go
@@ -16,6 +16,7 @@ package cmd
import (
"context"
+ "fmt"
"io/ioutil"
"os"
"path/filepath"
@@ -24,7 +25,8 @@ import (
"gvisor.dev/gvisor/runsc/flag"
)
-var specTemplate = []byte(`{
+func genSpec(cwd string) []byte {
+ var template = fmt.Sprintf(`{
"ociVersion": "1.0.0",
"process": {
"terminal": true,
@@ -39,7 +41,7 @@ var specTemplate = []byte(`{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm"
],
- "cwd": "/",
+ "cwd": "%s",
"capabilities": {
"bounding": [
"CAP_AUDIT_WRITE",
@@ -123,11 +125,15 @@ var specTemplate = []byte(`{
}
]
}
-}`)
+}`, cwd)
+
+ return []byte(template)
+}
// Spec implements subcommands.Command for the "spec" command.
type Spec struct {
bundle string
+ cwd string
}
// Name implements subcommands.Command.Name.
@@ -165,6 +171,8 @@ EXAMPLE:
// SetFlags implements subcommands.Command.SetFlags.
func (s *Spec) SetFlags(f *flag.FlagSet) {
f.StringVar(&s.bundle, "bundle", ".", "path to the root of the OCI bundle")
+ f.StringVar(&s.cwd, "cwd", "/", "working directory that will be set for the executable, "+
+ "this value MUST be an absolute path")
}
// Execute implements subcommands.Command.Execute.
@@ -174,7 +182,9 @@ func (s *Spec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
Fatalf("file %q already exists", confPath)
}
- if err := ioutil.WriteFile(confPath, specTemplate, 0664); err != nil {
+ var spec = genSpec(s.cwd)
+
+ if err := ioutil.WriteFile(confPath, spec, 0664); err != nil {
Fatalf("writing to %q: %v", confPath, err)
}
diff --git a/runsc/container/BUILD b/runsc/container/BUILD
index 46154df60..49cfb0837 100644
--- a/runsc/container/BUILD
+++ b/runsc/container/BUILD
@@ -16,6 +16,7 @@ go_library(
],
deps = [
"//pkg/abi/linux",
+ "//pkg/cleanup",
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/sighandling",
@@ -46,13 +47,14 @@ go_test(
"//test/cmd/test_app",
],
library = ":container",
- shard_count = 5,
+ shard_count = 10,
tags = [
"requires-kvm",
],
deps = [
"//pkg/abi/linux",
"//pkg/bits",
+ "//pkg/cleanup",
"//pkg/log",
"//pkg/sentry/control",
"//pkg/sentry/kernel",
diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go
index 294dca5e7..3813c6b93 100644
--- a/runsc/container/console_test.go
+++ b/runsc/container/console_test.go
@@ -119,7 +119,7 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) {
// Test that an pty FD is sent over the console socket if one is provided.
func TestConsoleSocket(t *testing.T) {
- for name, conf := range configs(t, all...) {
+ for name, conf := range configsWithVFS2(t, all...) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("true")
_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 8539f252d..6d297d0df 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -31,6 +31,7 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/sighandling"
@@ -293,7 +294,7 @@ func New(conf *boot.Config, args Args) (*Container, error) {
}
// The Cleanup object cleans up partially created containers when an error
// occurs. Any errors occurring during cleanup itself are ignored.
- cu := specutils.MakeCleanup(func() { _ = c.Destroy() })
+ cu := cleanup.Make(func() { _ = c.Destroy() })
defer cu.Clean()
// Lock the container metadata file to prevent concurrent creations of
@@ -402,7 +403,7 @@ func (c *Container) Start(conf *boot.Config) error {
if err := c.Saver.lock(); err != nil {
return err
}
- unlock := specutils.MakeCleanup(func() { c.Saver.unlock() })
+ unlock := cleanup.Make(func() { c.Saver.unlock() })
defer unlock.Clean()
if err := c.requireStatus("start", Created); err != nil {
@@ -506,7 +507,7 @@ func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) {
}
// Clean up partially created container if an error occurs.
// Any errors returned by Destroy() itself are ignored.
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
c.Destroy()
})
defer cu.Clean()
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 1a6d50d0d..e7715b6f7 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -256,8 +256,6 @@ var (
func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
// Always load the default config.
cs := make(map[string]*boot.Config)
- cs["default"] = testutil.TestConfig(t)
-
for _, o := range opts {
switch o {
case overlay:
@@ -285,9 +283,16 @@ func configs(t *testing.T, opts ...configOption) map[string]*boot.Config {
func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*boot.Config {
vfs1 := configs(t, opts...)
- vfs2 := configs(t, opts...)
- for key, value := range vfs2 {
+ var optsVFS2 []configOption
+ for _, opt := range opts {
+ // TODO(gvisor.dev/issue/1487): Enable overlay tests.
+ if opt != overlay {
+ optsVFS2 = append(optsVFS2, opt)
+ }
+ }
+
+ for key, value := range configs(t, optsVFS2...) {
value.VFS2 = true
vfs1[key+"VFS2"] = value
}
@@ -603,7 +608,7 @@ func doAppExitStatus(t *testing.T, vfs2 bool) {
// TestExec verifies that a container can exec a new program.
func TestExec(t *testing.T) {
- for name, conf := range configs(t, overlay) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
const uid = 343
spec := testutil.NewSpecWithArgs("sleep", "100")
@@ -695,7 +700,7 @@ func TestExec(t *testing.T) {
// TestKillPid verifies that we can signal individual exec'd processes.
func TestKillPid(t *testing.T) {
- for name, conf := range configs(t, overlay) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
app, err := testutil.FindFile("test/cmd/test_app/test_app")
if err != nil {
@@ -1211,7 +1216,7 @@ func TestCapabilities(t *testing.T) {
uid := auth.KUID(os.Getuid() + 1)
gid := auth.KGID(os.Getgid() + 1)
- for name, conf := range configs(t, all...) {
+ for name, conf := range configsWithVFS2(t, all...) {
t.Run(name, func(t *testing.T) {
spec := testutil.NewSpecWithArgs("sleep", "100")
rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
@@ -1409,7 +1414,7 @@ func TestReadonlyRoot(t *testing.T) {
}
func TestUIDMap(t *testing.T) {
- for name, conf := range configs(t, noOverlay...) {
+ for name, conf := range configsWithVFS2(t, noOverlay...) {
t.Run(name, func(t *testing.T) {
testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount")
if err != nil {
@@ -1537,28 +1542,6 @@ func TestReadonlyMount(t *testing.T) {
}
}
-func TestBindMountByOption(t *testing.T) {
- for _, conf := range configs(t, overlay) {
- t.Logf("Running test with conf: %+v", conf)
-
- dir, err := ioutil.TempDir(testutil.TmpDir(), "bind-mount")
- spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
- if err != nil {
- t.Fatalf("ioutil.TempDir() failed: %v", err)
- }
- spec.Mounts = append(spec.Mounts, specs.Mount{
- Destination: dir,
- Source: dir,
- Type: "none",
- Options: []string{"rw", "bind"},
- })
-
- if err := run(spec, conf); err != nil {
- t.Fatalf("error running sandbox: %v", err)
- }
- }
-}
-
// TestAbbreviatedIDs checks that runsc supports using abbreviated container
// IDs in place of full IDs.
func TestAbbreviatedIDs(t *testing.T) {
@@ -1908,7 +1891,7 @@ func doDestroyStartingTest(t *testing.T, vfs2 bool) {
}
func TestCreateWorkingDir(t *testing.T) {
- for name, conf := range configs(t, overlay) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create")
if err != nil {
@@ -2031,7 +2014,7 @@ func TestMountPropagation(t *testing.T) {
}
func TestMountSymlink(t *testing.T) {
- for name, conf := range configs(t, overlay) {
+ for name, conf := range configsWithVFS2(t, overlay) {
t.Run(name, func(t *testing.T) {
dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink")
if err != nil {
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index f6861b1dd..207206dd2 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -27,6 +27,7 @@ import (
"time"
specs "github.com/opencontainers/runtime-spec/specs-go"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sync"
@@ -64,29 +65,16 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
panic("conf.RootDir not set. Call testutil.SetupRootDir() to set.")
}
- var (
- containers []*Container
- cleanups []func()
- )
- cleanups = append(cleanups, func() {
- for _, c := range containers {
- c.Destroy()
- }
- })
- cleanupAll := func() {
- for _, c := range cleanups {
- c()
- }
- }
- localClean := specutils.MakeCleanup(cleanupAll)
- defer localClean.Clean()
+ cu := cleanup.Cleanup{}
+ defer cu.Clean()
+ var containers []*Container
for i, spec := range specs {
bundleDir, cleanup, err := testutil.SetupBundleDir(spec)
if err != nil {
return nil, nil, fmt.Errorf("error setting up container: %v", err)
}
- cleanups = append(cleanups, cleanup)
+ cu.Add(cleanup)
args := Args{
ID: ids[i],
@@ -97,6 +85,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
if err != nil {
return nil, nil, fmt.Errorf("error creating container: %v", err)
}
+ cu.Add(func() { cont.Destroy() })
containers = append(containers, cont)
if err := cont.Start(conf); err != nil {
@@ -104,8 +93,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
}
}
- localClean.Release()
- return containers, cleanupAll, nil
+ return containers, cu.Release(), nil
}
type execDesc struct {
@@ -141,7 +129,7 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) {
// TestMultiContainerSanity checks that it is possible to run 2 dead-simple
// containers in the same sandbox.
func TestMultiContainerSanity(t *testing.T) {
- for name, conf := range configs(t, all...) {
+ for name, conf := range configsWithVFS2(t, all...) {
t.Run(name, func(t *testing.T) {
rootDir, cleanup, err := testutil.SetupRootDir()
if err != nil {
@@ -1394,7 +1382,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
Destination: "/mydir/test",
Source: "/some/dir",
Type: "tmpfs",
- Options: []string{"rw", "relatime"},
+ Options: []string{"rw", "rbind", "relatime"},
}
podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
diff --git a/runsc/fsgofer/BUILD b/runsc/fsgofer/BUILD
index 64a406ae2..1036b0630 100644
--- a/runsc/fsgofer/BUILD
+++ b/runsc/fsgofer/BUILD
@@ -13,12 +13,12 @@ go_library(
visibility = ["//runsc:__subpackages__"],
deps = [
"//pkg/abi/linux",
+ "//pkg/cleanup",
"//pkg/fd",
"//pkg/log",
"//pkg/p9",
"//pkg/sync",
"//pkg/syserr",
- "//runsc/specutils",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 1942f50d7..edc239013 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -33,11 +33,11 @@ import (
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/fd"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -439,7 +439,7 @@ func (l *localFile) Create(name string, mode p9.OpenFlags, perm p9.FileMode, uid
if err != nil {
return nil, nil, p9.QID{}, 0, extractErrno(err)
}
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
child.Close()
// Best effort attempt to remove the file in case of failure.
if err := syscall.Unlinkat(l.file.FD(), name); err != nil {
@@ -480,7 +480,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID)
if err := syscall.Mkdirat(l.file.FD(), name, uint32(perm.Permissions())); err != nil {
return p9.QID{}, extractErrno(err)
}
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
// Best effort attempt to remove the dir in case of failure.
if err := unix.Unlinkat(l.file.FD(), name, unix.AT_REMOVEDIR); err != nil {
log.Warningf("error unlinking dir %q after failure: %v", path.Join(l.hostPath, name), err)
@@ -864,7 +864,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9.
if err := unix.Symlinkat(target, l.file.FD(), newName); err != nil {
return p9.QID{}, extractErrno(err)
}
- cu := specutils.MakeCleanup(func() {
+ cu := cleanup.Make(func() {
// Best effort attempt to remove the symlink in case of failure.
if err := syscall.Unlinkat(l.file.FD(), newName); err != nil {
log.Warningf("error unlinking file %q after failure: %v", path.Join(l.hostPath, newName), err)
diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD
index c95d50294..035dcd3e3 100644
--- a/runsc/sandbox/BUILD
+++ b/runsc/sandbox/BUILD
@@ -13,6 +13,7 @@ go_library(
"//runsc:__subpackages__",
],
deps = [
+ "//pkg/cleanup",
"//pkg/control/client",
"//pkg/control/server",
"//pkg/log",
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index e4ec16e2f..6e1a2af25 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -30,6 +30,7 @@ import (
"github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/syndtr/gocapability/capability"
+ "gvisor.dev/gvisor/pkg/cleanup"
"gvisor.dev/gvisor/pkg/control/client"
"gvisor.dev/gvisor/pkg/control/server"
"gvisor.dev/gvisor/pkg/log"
@@ -119,7 +120,7 @@ func New(conf *boot.Config, args *Args) (*Sandbox, error) {
s := &Sandbox{ID: args.ID, Cgroup: args.Cgroup}
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
- c := specutils.MakeCleanup(func() {
+ c := cleanup.Make(func() {
err := s.destroy()
log.Warningf("error destroying sandbox: %v", err)
})
diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go
index 60bb7b7ee..23001d67c 100644
--- a/runsc/specutils/namespace.go
+++ b/runsc/specutils/namespace.go
@@ -18,6 +18,7 @@ import (
"fmt"
"os"
"os/exec"
+ "os/signal"
"path/filepath"
"runtime"
"syscall"
@@ -261,7 +262,18 @@ func MaybeRunAsRoot() error {
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
- if err := cmd.Run(); err != nil {
+ if err := cmd.Start(); err != nil {
+ return fmt.Errorf("re-executing self: %w", err)
+ }
+ ch := make(chan os.Signal, 1)
+ signal.Notify(ch)
+ go func() {
+ for {
+ // Forward all signals to child process.
+ cmd.Process.Signal(<-ch)
+ }
+ }()
+ if err := cmd.Wait(); err != nil {
if exit, ok := err.(*exec.ExitError); ok {
if ws, ok := exit.Sys().(syscall.WaitStatus); ok {
os.Exit(ws.ExitStatus())
@@ -269,7 +281,7 @@ func MaybeRunAsRoot() error {
log.Warningf("No wait status provided, exiting with -1: %v", err)
os.Exit(-1)
}
- return fmt.Errorf("re-executing self: %v", err)
+ return err
}
// Child completed with success.
os.Exit(0)
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 202518b58..f1fa573c5 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -311,19 +311,7 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth.
// Is9PMount returns true if the given mount can be mounted as an external gofer.
func Is9PMount(m specs.Mount) bool {
- var isBind bool
- switch m.Type {
- case "bind":
- isBind = true
- default:
- for _, opt := range m.Options {
- if opt == "bind" || opt == "rbind" {
- isBind = true
- break
- }
- }
- }
- return isBind && m.Source != "" && IsSupportedDevMount(m)
+ return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m)
}
// IsSupportedDevMount returns true if the mount is a supported /dev mount.
@@ -456,36 +444,6 @@ func ContainsStr(strs []string, str string) bool {
return false
}
-// Cleanup allows defers to be aborted when cleanup needs to happen
-// conditionally. Usage:
-// c := MakeCleanup(func() { f.Close() })
-// defer c.Clean() // any failure before release is called will close the file.
-// ...
-// c.Release() // on success, aborts closing the file and return it.
-// return f
-type Cleanup struct {
- clean func()
-}
-
-// MakeCleanup creates a new Cleanup object.
-func MakeCleanup(f func()) Cleanup {
- return Cleanup{clean: f}
-}
-
-// Clean calls the cleanup function.
-func (c *Cleanup) Clean() {
- if c.clean != nil {
- c.clean()
- c.clean = nil
- }
-}
-
-// Release releases the cleanup from its duties, i.e. cleanup function is not
-// called after this point.
-func (c *Cleanup) Release() {
- c.clean = nil
-}
-
// RetryEintr retries the function until an error different than EINTR is
// returned.
func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) {