diff options
-rw-r--r-- | pkg/sentry/fsimpl/gofer/directory.go | 1 | ||||
-rw-r--r-- | runsc/boot/BUILD | 2 | ||||
-rw-r--r-- | runsc/boot/loader.go | 13 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 152 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 78 | ||||
-rwxr-xr-x | scripts/docker_tests.sh | 3 |
6 files changed, 183 insertions, 66 deletions
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go index c67766ab2..55f9ed911 100644 --- a/pkg/sentry/fsimpl/gofer/directory.go +++ b/pkg/sentry/fsimpl/gofer/directory.go @@ -75,6 +75,7 @@ func (d *dentry) createSyntheticDirectoryLocked(name string, mode linux.FileMode handle: handle{ fd: -1, }, + nlink: uint32(2), } d2.pf.dentry = d2 d2.vfsd.Init(d2) diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 69dcc74f2..ed3c8f546 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -119,11 +119,13 @@ go_test( library = ":boot", deps = [ "//pkg/control/server", + "//pkg/fspath", "//pkg/log", "//pkg/p9", "//pkg/sentry/contexttest", "//pkg/sentry/fs", "//pkg/sentry/kernel", + "//pkg/sentry/vfs", "//pkg/sync", "//pkg/unet", "//runsc/fsgofer", diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 3f41d8357..f6ea4c102 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -625,11 +625,14 @@ func (l *Loader) run() error { // l.stdioFDs are derived from dup() in boot.New() and they are now dup()ed again // either in createFDTable() during initial start or in descriptor.initAfterLoad() - // during restore, we can release l.stdioFDs now. - for _, fd := range l.stdioFDs { - err := syscall.Close(fd) - if err != nil { - return fmt.Errorf("close dup()ed stdioFDs: %v", err) + // during restore, we can release l.stdioFDs now. VFS2 takes ownership of the + // passed FDs, so only close for VFS1. + if !kernel.VFS2Enabled { + for _, fd := range l.stdioFDs { + err := syscall.Close(fd) + if err != nil { + return fmt.Errorf("close dup()ed stdioFDs: %v", err) + } } } diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index e7c71734f..55d27a632 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -26,11 +26,13 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/control/server" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/p9" "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/unet" "gvisor.dev/gvisor/runsc/fsgofer" @@ -107,14 +109,12 @@ func startGofer(root string) (int, func(), error) { return sandboxEnd, cleanup, nil } -func createLoader(vfsEnabled bool) (*Loader, func(), error) { +func createLoader(vfsEnabled bool, spec *specs.Spec) (*Loader, func(), error) { fd, err := server.CreateSocket(ControlSocketAddr(fmt.Sprintf("%010d", rand.Int())[:10])) if err != nil { return nil, nil, err } conf := testConfig() - spec := testSpec() - conf.VFS2 = vfsEnabled sandEnd, cleanup, err := startGofer(spec.Root.Path) @@ -161,7 +161,7 @@ func TestRunVFS2(t *testing.T) { } func doRun(t *testing.T, vfsEnabled bool) { - l, cleanup, err := createLoader(vfsEnabled) + l, cleanup, err := createLoader(vfsEnabled, testSpec()) if err != nil { t.Fatalf("error creating loader: %v", err) } @@ -210,7 +210,7 @@ func TestStartSignalVFS2(t *testing.T) { } func doStartSignal(t *testing.T, vfsEnabled bool) { - l, cleanup, err := createLoader(vfsEnabled) + l, cleanup, err := createLoader(vfsEnabled, testSpec()) if err != nil { t.Fatalf("error creating loader: %v", err) } @@ -258,18 +258,19 @@ func doStartSignal(t *testing.T, vfsEnabled bool) { } -// Test that MountNamespace can be created with various specs. -func TestCreateMountNamespace(t *testing.T) { - testCases := []struct { - name string - // Spec that will be used to create the mount manager. Note - // that we can't mount procfs without a kernel, so each spec - // MUST contain something other than procfs mounted at /proc. - spec specs.Spec - // Paths that are expected to exist in the resulting fs. - expectedPaths []string - }{ - { +type CreateMountTestcase struct { + name string + // Spec that will be used to create the mount manager. Note + // that we can't mount procfs without a kernel, so each spec + // MUST contain something other than procfs mounted at /proc. + spec specs.Spec + // Paths that are expected to exist in the resulting fs. + expectedPaths []string +} + +func createMountTestcases(vfs2 bool) []*CreateMountTestcase { + testCases := []*CreateMountTestcase{ + &CreateMountTestcase{ // Only proc. name: "only proc mount", spec: specs.Spec{ @@ -311,7 +312,7 @@ func TestCreateMountNamespace(t *testing.T) { // /dev, and /sys. expectedPaths: []string{"/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - { + &CreateMountTestcase{ // Mounts are nested inside each other. name: "nested mounts", spec: specs.Spec{ @@ -355,7 +356,7 @@ func TestCreateMountNamespace(t *testing.T) { expectedPaths: []string{"/foo", "/foo/bar", "/foo/bar/baz", "/foo/qux", "/foo/qux-quz", "/foo/some/very/very/deep/path", "/proc", "/dev", "/sys"}, }, - { + &CreateMountTestcase{ name: "mount inside /dev", spec: specs.Spec{ Root: &specs.Root{ @@ -398,40 +399,47 @@ func TestCreateMountNamespace(t *testing.T) { }, expectedPaths: []string{"/proc", "/dev", "/dev/fd-foo", "/dev/foo", "/dev/bar", "/sys"}, }, - { - name: "mounts inside mandatory mounts", - spec: specs.Spec{ - Root: &specs.Root{ - Path: os.TempDir(), - Readonly: true, + } + + vfsCase := &CreateMountTestcase{ + name: "mounts inside mandatory mounts", + spec: specs.Spec{ + Root: &specs.Root{ + Path: os.TempDir(), + Readonly: true, + }, + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "tmpfs", }, - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "tmpfs", - }, - // We don't include /sys, and /tmp in - // the spec, since they will be added - // automatically. - // - // Instead, add submounts inside these - // directories and make sure they are - // visible under the mandatory mounts. - { - Destination: "/sys/bar", - Type: "tmpfs", - }, - { - Destination: "/tmp/baz", - Type: "tmpfs", - }, + // TODO (gvisor.dev/issue/1487): Re-add this case when sysfs supports + // MkDirAt in VFS2 (and remove the reduntant append). + // { + // Destination: "/sys/bar", + // Type: "tmpfs", + // }, + // + { + Destination: "/tmp/baz", + Type: "tmpfs", }, }, - expectedPaths: []string{"/proc", "/sys", "/sys/bar", "/tmp", "/tmp/baz"}, }, + expectedPaths: []string{"/proc", "/sys" /* "/sys/bar" ,*/, "/tmp", "/tmp/baz"}, } - for _, tc := range testCases { + if !vfs2 { + vfsCase.spec.Mounts = append(vfsCase.spec.Mounts, specs.Mount{Destination: "/sys/bar", Type: "tmpfs"}) + vfsCase.expectedPaths = append(vfsCase.expectedPaths, "/sys/bar") + } + return append(testCases, vfsCase) +} + +// Test that MountNamespace can be created with various specs. +func TestCreateMountNamespace(t *testing.T) { + + for _, tc := range createMountTestcases(false /* vfs2 */) { t.Run(tc.name, func(t *testing.T) { conf := testConfig() ctx := contexttest.Context(t) @@ -466,6 +474,56 @@ func TestCreateMountNamespace(t *testing.T) { } } +// Test that MountNamespace can be created with various specs. +func TestCreateMountNamespaceVFS2(t *testing.T) { + + for _, tc := range createMountTestcases(true /* vfs2 */) { + t.Run(tc.name, func(t *testing.T) { + defer resetSyscallTable() + + spec := testSpec() + spec.Mounts = tc.spec.Mounts + spec.Root = tc.spec.Root + + l, loaderCleanup, err := createLoader(true /* VFS2 Enabled */, spec) + if err != nil { + t.Fatalf("failed to create loader: %v", err) + } + defer l.Destroy() + defer loaderCleanup() + + mntr := newContainerMounter(l.spec, l.goferFDs, l.k, l.mountHints) + if err := mntr.processHints(l.conf); err != nil { + t.Fatalf("failed process hints: %v", err) + } + + ctx := l.rootProcArgs.NewContext(l.k) + mns, err := mntr.setupVFS2(ctx, l.conf, &l.rootProcArgs) + if err != nil { + t.Fatalf("failed to setupVFS2: %v", err) + } + + root := mns.Root() + defer root.DecRef() + for _, p := range tc.expectedPaths { + + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(p), + } + + if d, err := l.k.VFS().GetDentryAt(ctx, l.rootProcArgs.Credentials, target, &vfs.GetDentryOptions{}); err != nil { + t.Errorf("expected path %v to exist with spec %v, but got error %v", p, tc.spec, err) + } else { + d.DecRef() + } + + } + }) + } +} + // TestRestoreEnvironment tests that the correct mounts are collected from the spec and config // in order to build the environment for restoring. func TestRestoreEnvironment(t *testing.T) { diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index bce3a3593..0b9b0b436 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -17,6 +17,7 @@ package boot import ( "fmt" "path" + "sort" "strconv" "strings" @@ -192,14 +193,9 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs return nil, fmt.Errorf("register filesystems: %w", err) } - fd := c.fds.remove() - - opts := strings.Join(p9MountOptionsVFS2(fd, conf.FileAccess), ",") - - log.Infof("Mounting root over 9P, ioFD: %d", fd) - mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", rootFsName, &vfs.GetFilesystemOptions{Data: opts}) + mns, err := c.createMountNamespaceVFS2(ctx, conf, creds) if err != nil { - return nil, fmt.Errorf("setting up mountnamespace: %w", err) + return nil, fmt.Errorf("creating mount namespace: %w", err) } rootProcArgs.MountNamespaceVFS2 = mns @@ -212,8 +208,23 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs return mns, nil } +func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { + + fd := c.fds.remove() + opts := strings.Join(p9MountOptionsVFS2(fd, conf.FileAccess), ",") + + log.Infof("Mounting root over 9P, ioFD: %d", fd) + mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", rootFsName, &vfs.GetFilesystemOptions{Data: opts}) + if err != nil { + return nil, fmt.Errorf("setting up mount namespace: %w", err) + } + return mns, nil +} + func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials) error { + c.prepareMountsVFS2() + for _, submount := range c.mounts { log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options) if err := c.mountSubmountVFS2(ctx, conf, mns, creds, &submount); err != nil { @@ -226,6 +237,11 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, return c.checkDispenser() } +func (c *containerMounter) prepareMountsVFS2() { + // Sort the mounts so that we don't place children before parents. + sort.Slice(c.mounts, func(i, j int) bool { return len(c.mounts[i].Destination) < len(c.mounts[j].Destination) }) +} + // TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 version. func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *specs.Mount) error { root := mns.Root() @@ -236,11 +252,21 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, Path: fspath.Parse(submount.Destination), } - _, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, *submount) + fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, *submount) if err != nil { return fmt.Errorf("mountOptions failed: %w", err) } + if fsName == "" { + // Filesystem is not supported (e.g. cgroup), just skip it. + return nil + } + + if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil { + return err + } + log.Debugf("directory exists or made directory for submount: %s", submount.Destination) + opts := &vfs.MountOptions{ GetFilesystemOptions: vfs.GetFilesystemOptions{ Data: strings.Join(options, ","), @@ -251,12 +277,6 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, // All writes go to upper, be paranoid and make lower readonly. opts.ReadOnly = useOverlay - if err := c.k.VFS().MkdirAt(ctx, creds, target, &vfs.MkdirOptions{ - ForSyntheticMountpoint: true, - }); err != nil && err != syserror.EEXIST { - // Log a warning, but attempt the mount anyway. - log.Warningf("Failed to create mount point at %q: %v", submount.Destination, err) - } if err := c.k.VFS().MountAt(ctx, creds, "", target, submount.Type, opts); err != nil { return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) } @@ -314,3 +334,33 @@ func p9MountOptionsVFS2(fd int, fa FileAccessType) []string { } return opts } + +func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error { + + target := &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(currentPath), + } + + _, err := c.k.VFS().StatAt(ctx, creds, target, &vfs.StatOptions{}) + switch { + + case err == syserror.ENOENT: + if err := c.makeSyntheticMount(ctx, path.Dir(currentPath), root, creds); err != nil { + return err + } + + mkdirOpts := &vfs.MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true} + if err := c.k.VFS().MkdirAt(ctx, creds, target, mkdirOpts); err != nil { + return fmt.Errorf("failed to makedir for mount %+v: %w", target, err) + } + return nil + + case err != nil: + return fmt.Errorf("stat failed for mount %+v: %w", target, err) + + default: + return nil + } +} diff --git a/scripts/docker_tests.sh b/scripts/docker_tests.sh index 931ce1aa4..dce0a4085 100755 --- a/scripts/docker_tests.sh +++ b/scripts/docker_tests.sh @@ -20,3 +20,6 @@ make load-all-images install_runsc_for_test docker test_runsc //test/image:image_test //test/e2e:integration_test + +install_runsc_for_test docker --vfs2 +test_runsc //test/image:image_test --test_filter=.*TestHelloWorld |