From 95df852bf283bf5eb173cc92b14d487b2367a8a7 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 4 May 2021 14:33:53 -0700 Subject: Make Mount.Type optional for bind mounts According to the OCI spec Mount.Type is an optional field and it defaults to "bind" when any of "bind" or "rbind" is included in Mount.Options. Also fix the shim to remove bind/rbind from options when mount is converted from bind to tmpfs inside the Sentry. Fixes #2330 Fixes #3274 PiperOrigin-RevId: 371996891 --- pkg/shim/utils/volumes.go | 46 +++++---- pkg/shim/utils/volumes_test.go | 160 ++++++++++++++++++++------------ runsc/boot/controller.go | 2 +- runsc/boot/fs.go | 39 ++++---- runsc/boot/fs_test.go | 2 +- runsc/boot/vfs.go | 96 ++++++++++--------- runsc/container/container_test.go | 21 +++++ runsc/container/multi_container_test.go | 2 +- runsc/specutils/specutils.go | 18 ++++ 9 files changed, 245 insertions(+), 141 deletions(-) diff --git a/pkg/shim/utils/volumes.go b/pkg/shim/utils/volumes.go index 52a428179..cdcb88229 100644 --- a/pkg/shim/utils/volumes.go +++ b/pkg/shim/utils/volumes.go @@ -91,11 +91,9 @@ func isVolumePath(volume, path string) (bool, error) { // UpdateVolumeAnnotations add necessary OCI annotations for gvisor // volume optimization. func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { - var ( - uid string - err error - ) + var uid string if IsSandbox(s) { + var err error uid, err = podUID(s) if err != nil { // Skip if we can't get pod UID, because this doesn't work @@ -123,21 +121,18 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { } else { // This is a container. for i := range s.Mounts { - // An error is returned for sandbox if source - // annotation is not successfully applied, so - // it is guaranteed that the source annotation - // for sandbox has already been successfully - // applied at this point. + // An error is returned for sandbox if source annotation is not + // successfully applied, so it is guaranteed that the source annotation + // for sandbox has already been successfully applied at this point. // - // The volume name is unique inside a pod, so - // matching without podUID is fine here. + // The volume name is unique inside a pod, so matching without podUID + // is fine here. // - // TODO: Pass podUID down to shim for containers to do - // more accurate matching. + // TODO: Pass podUID down to shim for containers to do more accurate + // matching. if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes { - // gVisor requires the container mount type to match - // sandbox mount type. - s.Mounts[i].Type = v + // Container mount type must match the sandbox's mount type. + changeMountType(&s.Mounts[i], v) updated = true } } @@ -153,3 +148,22 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { } return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666) } + +func changeMountType(m *specs.Mount, newType string) { + m.Type = newType + + // OCI spec allows bind mounts to be specified in options only. So if new type + // is not bind, remove bind/rbind from options. + // + // "For bind mounts (when options include either bind or rbind), the type is + // a dummy, often "none" (not listed in /proc/filesystems)." + if newType != "bind" { + newOpts := make([]string, 0, len(m.Options)) + for _, opt := range m.Options { + if opt != "rbind" && opt != "bind" { + newOpts = append(newOpts, opt) + } + } + m.Options = newOpts + } +} diff --git a/pkg/shim/utils/volumes_test.go b/pkg/shim/utils/volumes_test.go index 3e02c6151..b25c53c73 100644 --- a/pkg/shim/utils/volumes_test.go +++ b/pkg/shim/utils/volumes_test.go @@ -47,60 +47,60 @@ func TestUpdateVolumeAnnotations(t *testing.T) { } for _, test := range []struct { - desc string + name string spec *specs.Spec expected *specs.Spec expectErr bool expectUpdate bool }{ { - desc: "volume annotations for sandbox", + name: "volume annotations for sandbox", spec: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", + volumeKeyPrefix + testVolumeName + ".source": testVolumePath, }, }, expectUpdate: true, }, { - desc: "volume annotations for sandbox with legacy log path", + name: "volume annotations for sandbox with legacy log path", spec: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLegacyLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + sandboxLogDirAnnotation: testLegacyLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLegacyLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + sandboxLogDirAnnotation: testLegacyLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", + volumeKeyPrefix + testVolumeName + ".source": testVolumePath, }, }, expectUpdate: true, }, { - desc: "tmpfs: volume annotations for container", + name: "tmpfs: volume annotations for container", spec: &specs.Spec{ Mounts: []specs.Mount{ { @@ -117,10 +117,10 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ @@ -139,16 +139,16 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expectUpdate: true, }, { - desc: "bind: volume annotations for container", + name: "bind: volume annotations for container", spec: &specs.Spec{ Mounts: []specs.Mount{ { @@ -159,10 +159,10 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "container", + volumeKeyPrefix + testVolumeName + ".type": "bind", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ @@ -175,48 +175,48 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "container", + volumeKeyPrefix + testVolumeName + ".type": "bind", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expectUpdate: true, }, { - desc: "should not return error without pod log directory", + name: "should not return error without pod log directory", spec: &specs.Spec{ Annotations: map[string]string{ - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ Annotations: map[string]string{ - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, }, { - desc: "should return error if volume path does not exist", + name: "should return error if volume path does not exist", spec: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount.notexist.share": "pod", - "dev.gvisor.spec.mount.notexist.type": "tmpfs", - "dev.gvisor.spec.mount.notexist.options": "ro", + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + "notexist.share": "pod", + volumeKeyPrefix + "notexist.type": "tmpfs", + volumeKeyPrefix + "notexist.options": "ro", }, }, expectErr: true, }, { - desc: "no volume annotations for sandbox", + name: "no volume annotations for sandbox", spec: &specs.Spec{ Annotations: map[string]string{ sandboxLogDirAnnotation: testLogDirPath, @@ -231,7 +231,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, { - desc: "no volume annotations for container", + name: "no volume annotations for container", spec: &specs.Spec{ Mounts: []specs.Mount{ { @@ -271,8 +271,46 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, }, + { + name: "bind options removed", + spec: &specs.Spec{ + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", + volumeKeyPrefix + testVolumeName + ".source": testVolumePath, + }, + Mounts: []specs.Mount{ + { + Destination: "/dst", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro", "bind", "rbind"}, + }, + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", + volumeKeyPrefix + testVolumeName + ".source": testVolumePath, + }, + Mounts: []specs.Mount{ + { + Destination: "/dst", + Type: "tmpfs", + Source: testVolumePath, + Options: []string{"ro"}, + }, + }, + }, + expectUpdate: true, + }, } { - t.Run(test.desc, func(t *testing.T) { + t.Run(test.name, func(t *testing.T) { bundle, err := ioutil.TempDir(dir, "test-bundle") if err != nil { t.Fatalf("Create test bundle: %v", err) diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 05b721b28..9b270cbf2 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -402,7 +402,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { ctx := k.SupervisorContext() mntr := newContainerMounter(&cm.l.root, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled) if kernel.VFS2Enabled { - ctx, err = mntr.configureRestore(ctx, cm.l.root.conf) + ctx, err = mntr.configureRestore(ctx) if err != nil { return fmt.Errorf("configuring filesystem restore: %v", err) } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 3c0cef6db..bf4a41f77 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -232,7 +232,7 @@ func parseMountOption(opt string, allowedKeys ...string) (bool, error) { // mountDevice returns a device string based on the fs type and target // of the mount. -func mountDevice(m specs.Mount) string { +func mountDevice(m *specs.Mount) string { if m.Type == bind { // Make a device string that includes the target, which is consistent across // S/R and uniquely identifies the connection. @@ -256,6 +256,8 @@ func mountFlags(opts []string) fs.MountSourceFlags { mf.NoAtime = true case "noexec": mf.NoExec = true + case "bind", "rbind": + // These are the same as a mount with type="bind". default: log.Warningf("ignoring unknown mount option %q", o) } @@ -486,9 +488,9 @@ func (m *mountHint) isSupported() bool { // For now enforce that all options are the same. Once bind mount is properly // supported, then we should ensure the master is less restrictive than the // container, e.g. master can be 'rw' while container mounts as 'ro'. -func (m *mountHint) checkCompatible(mount specs.Mount) error { +func (m *mountHint) checkCompatible(mount *specs.Mount) error { // Remove options that don't affect to mount's behavior. - masterOpts := filterUnsupportedOptions(m.mount) + masterOpts := filterUnsupportedOptions(&m.mount) replicaOpts := filterUnsupportedOptions(mount) if len(masterOpts) != len(replicaOpts) { @@ -512,7 +514,7 @@ func (m *mountHint) fileAccessType() config.FileAccessType { return config.FileAccessShared } -func filterUnsupportedOptions(mount specs.Mount) []string { +func filterUnsupportedOptions(mount *specs.Mount) []string { rv := make([]string, 0, len(mount.Options)) for _, o := range mount.Options { if isSupportedMountFlag(mount.Type, o) { @@ -576,7 +578,7 @@ func newPodMountHints(spec *specs.Spec) (*podMountHints, error) { return &podMountHints{mounts: mnts}, nil } -func (p *podMountHints) findMount(mount specs.Mount) *mountHint { +func (p *podMountHints) findMount(mount *specs.Mount) *mountHint { for _, m := range p.mounts { if m.mount.Source == mount.Source { return m @@ -679,7 +681,8 @@ func (c *containerMounter) mountSubmounts(ctx context.Context, conf *config.Conf root := mns.Root() defer root.DecRef(ctx) - for _, m := range c.mounts { + for i := range c.mounts { + m := &c.mounts[i] log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options) if hint := c.hints.findMount(m); hint != nil && hint.isSupported() { if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil { @@ -714,7 +717,7 @@ func (c *containerMounter) checkDispenser() error { func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.Config, hint *mountHint) (*fs.Inode, error) { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. - fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount) + fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, &hint.mount) if err != nil { return nil, err } @@ -734,7 +737,7 @@ func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.C mf.ReadOnly = true } - inode, err := filesystem.Mount(ctx, mountDevice(hint.mount), mf, strings.Join(opts, ","), nil) + inode, err := filesystem.Mount(ctx, mountDevice(&hint.mount), mf, strings.Join(opts, ","), nil) if err != nil { return nil, fmt.Errorf("creating mount %q: %v", hint.name, err) } @@ -796,13 +799,14 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con // getMountNameAndOptions retrieves the fsName, opts, and useOverlay values // used for mounts. -func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.Mount) (string, []string, bool, error) { +func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m *specs.Mount) (string, []string, bool, error) { + specutils.MaybeConvertToBindMount(m) + var ( fsName string opts []string useOverlay bool ) - switch m.Type { case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name: fsName = m.Type @@ -836,7 +840,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M return fsName, opts, useOverlay, nil } -func (c *containerMounter) getMountAccessType(conf *config.Config, mount specs.Mount) config.FileAccessType { +func (c *containerMounter) getMountAccessType(conf *config.Config, mount *specs.Mount) config.FileAccessType { if hint := c.hints.findMount(mount); hint != nil { return hint.fileAccessType() } @@ -847,7 +851,7 @@ func (c *containerMounter) getMountAccessType(conf *config.Config, mount specs.M // be readonly, a lower ramfs overlay is added to create the mount point dir. // Another overlay is added with tmpfs on top if Config.Overlay is true. // 'm.Destination' must be an absolute path with '..' and symlinks resolved. -func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error { +func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent, m *specs.Mount) error { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m) @@ -921,7 +925,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Confi // mountSharedSubmount binds mount to a previously mounted volume that is shared // among containers in the same pod. -func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount specs.Mount, source *mountHint) error { +func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount *specs.Mount, source *mountHint) error { if err := source.checkCompatible(mount); err != nil { return err } @@ -946,7 +950,7 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun // addRestoreMount adds a mount to the MountSources map used for restoring a // checkpointed container. -func (c *containerMounter) addRestoreMount(conf *config.Config, renv *fs.RestoreEnvironment, m specs.Mount) error { +func (c *containerMounter) addRestoreMount(conf *config.Config, renv *fs.RestoreEnvironment, m *specs.Mount) error { fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m) if err != nil { return err @@ -994,7 +998,8 @@ func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.Re // Add submounts. var tmpMounted bool - for _, m := range c.mounts { + for i := range c.mounts { + m := &c.mounts[i] if err := c.addRestoreMount(conf, renv, m); err != nil { return nil, err } @@ -1009,7 +1014,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.Re Type: tmpfsvfs2.Name, Destination: "/tmp", } - if err := c.addRestoreMount(conf, renv, tmpMount); err != nil { + if err := c.addRestoreMount(conf, renv, &tmpMount); err != nil { return nil, err } } @@ -1068,7 +1073,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mn // another user. This is normally done for /tmp. Options: []string{"mode=01777"}, } - return c.mountSubmount(ctx, conf, mns, root, tmpMount) + return c.mountSubmount(ctx, conf, mns, root, &tmpMount) default: return err diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go index b4f12d034..09ffda628 100644 --- a/runsc/boot/fs_test.go +++ b/runsc/boot/fs_test.go @@ -244,7 +244,7 @@ func TestGetMountAccessType(t *testing.T) { } mounter := containerMounter{hints: podHints} conf := &config.Config{FileAccessMounts: config.FileAccessShared} - if got := mounter.getMountAccessType(conf, specs.Mount{Source: source}); got != tst.want { + if got := mounter.getMountAccessType(conf, &specs.Mount{Source: source}); got != tst.want { t.Errorf("getMountAccessType(), want: %v, got: %v", tst.want, got) } }) diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 7d8fd0483..c1828bd3d 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -46,6 +46,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/runsc/config" + "gvisor.dev/gvisor/runsc/specutils" ) func registerFilesystems(k *kernel.Kernel) error { @@ -362,33 +363,33 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config. for i := range mounts { submount := &mounts[i] - log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options) + log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.mount.Source, submount.mount.Destination, submount.mount.Type, submount.mount.Options) var ( mnt *vfs.Mount err error ) - if hint := c.hints.findMount(submount.Mount); hint != nil && hint.isSupported() { - mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.Mount, hint) + if hint := c.hints.findMount(submount.mount); hint != nil && hint.isSupported() { + mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.mount, hint) if err != nil { - return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.Destination, err) + return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.mount.Destination, err) } } else { mnt, err = c.mountSubmountVFS2(ctx, conf, mns, creds, submount) if err != nil { - return fmt.Errorf("mount submount %q: %w", submount.Destination, err) + return fmt.Errorf("mount submount %q: %w", submount.mount.Destination, err) } } if mnt != nil && mnt.ReadOnly() { // Switch to ReadWrite while we setup submounts. if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil { - return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.Destination, err) + return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.mount.Destination, err) } // Restore back to ReadOnly at the end. defer func() { if err := c.k.VFS().SetMountReadOnly(mnt, true); err != nil { - panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.Destination, err)) + panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.mount.Destination, err)) } }() } @@ -401,8 +402,8 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config. } type mountAndFD struct { - specs.Mount - fd int + mount *specs.Mount + fd int } func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { @@ -410,15 +411,18 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { // undocumented assumption that FDs are dispensed in the order in which // they are required by mounts. var mounts []mountAndFD - for _, m := range c.mounts { - fd := -1 + for i := range c.mounts { + m := &c.mounts[i] + specutils.MaybeConvertToBindMount(m) + // Only bind mounts use host FDs; see // containerMounter.getMountNameAndOptionsVFS2. + fd := -1 if m.Type == bind { fd = c.fds.remove() } mounts = append(mounts, mountAndFD{ - Mount: m, + mount: m, fd: fd, }) } @@ -428,7 +432,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { // Sort the mounts so that we don't place children before parents. sort.Slice(mounts, func(i, j int) bool { - return len(mounts[i].Destination) < len(mounts[j].Destination) + return len(mounts[i].mount.Destination) < len(mounts[j].mount.Destination) }) return mounts, nil @@ -444,16 +448,16 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C return nil, nil } - if err := c.makeMountPoint(ctx, creds, mns, submount.Destination); err != nil { - return nil, fmt.Errorf("creating mount point %q: %w", submount.Destination, err) + if err := c.makeMountPoint(ctx, creds, mns, submount.mount.Destination); err != nil { + return nil, fmt.Errorf("creating mount point %q: %w", submount.mount.Destination, err) } if useOverlay { - log.Infof("Adding overlay on top of mount %q", submount.Destination) + log.Infof("Adding overlay on top of mount %q", submount.mount.Destination) var cleanup func() opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) if err != nil { - return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.Destination, err) + return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.mount.Destination, err) } defer cleanup() fsName = overlay.Name @@ -465,32 +469,34 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C target := &vfs.PathOperation{ Root: root, Start: root, - Path: fspath.Parse(submount.Destination), + Path: fspath.Parse(submount.mount.Destination), } mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts) if err != nil { - return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) + return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.mount.Destination, submount.mount.Type, err, opts) } - log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts.GetFilesystemOptions.Data) + log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.mount.Source, submount.mount.Destination, submount.mount.Type, opts.GetFilesystemOptions.Data) return mnt, nil } // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values // used for mounts. func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mountAndFD) (string, *vfs.MountOptions, bool, error) { - fsName := m.Type + fsName := m.mount.Type useOverlay := false - var data []string - var iopts interface{} + var ( + data []string + internalData interface{} + ) - verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.Options) + verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.mount.Options) if err != nil { return "", nil, false, err } - m.Options = remainingMOpts + m.mount.Options = remainingMOpts // Find filesystem name and FS specific data field. - switch m.Type { + switch m.mount.Type { case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: // Nothing to do. @@ -499,7 +505,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo case tmpfs.Name: var err error - data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) + data, err = parseAndFilterOptions(m.mount.Options, tmpfsAllowedData...) if err != nil { return "", nil, false, err } @@ -511,35 +517,35 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo // but unlikely to be correct in this context. return "", nil, false, fmt.Errorf("9P mount requires a connection FD") } - data = p9MountData(m.fd, c.getMountAccessType(conf, m.Mount), true /* vfs2 */) - iopts = gofer.InternalFilesystemOptions{ - UniqueID: m.Destination, + data = p9MountData(m.fd, c.getMountAccessType(conf, m.mount), true /* vfs2 */) + internalData = gofer.InternalFilesystemOptions{ + UniqueID: m.mount.Destination, } // If configured, add overlay to all writable mounts. - useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly + useOverlay = conf.Overlay && !mountFlags(m.mount.Options).ReadOnly case cgroupfs.Name: var err error - data, err = parseAndFilterOptions(m.Options, cgroupfs.SupportedMountOptions...) + data, err = parseAndFilterOptions(m.mount.Options, cgroupfs.SupportedMountOptions...) if err != nil { return "", nil, false, err } default: - log.Warningf("ignoring unknown filesystem type %q", m.Type) + log.Warningf("ignoring unknown filesystem type %q", m.mount.Type) return "", nil, false, nil } opts := &vfs.MountOptions{ GetFilesystemOptions: vfs.GetFilesystemOptions{ Data: strings.Join(data, ","), - InternalData: iopts, + InternalData: internalData, }, InternalMount: true, } - for _, o := range m.Options { + for _, o := range m.mount.Options { switch o { case "rw": opts.ReadOnly = false @@ -549,13 +555,15 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo opts.Flags.NoATime = true case "noexec": opts.Flags.NoExec = true + case "bind", "rbind": + // These are the same as a mount with type="bind". default: log.Warningf("ignoring unknown mount option %q", o) } } if verityRequested { - verityData = verityData + "root_name=" + path.Base(m.Mount.Destination) + verityData = verityData + "root_name=" + path.Base(m.mount.Destination) verityOpts.LowerName = fsName verityOpts.LowerGetFSOptions = opts.GetFilesystemOptions fsName = verity.Name @@ -684,7 +692,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config // another user. This is normally done for /tmp. Options: []string{"mode=01777"}, } - _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount}) + _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{mount: &tmpMount}) return err case syserror.ENOTDIR: @@ -723,7 +731,7 @@ func (c *containerMounter) processHintsVFS2(conf *config.Config, creds *auth.Cre func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *config.Config, hint *mountHint, creds *auth.Credentials) (*vfs.Mount, error) { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. - mntFD := &mountAndFD{Mount: hint.mount} + mntFD := &mountAndFD{mount: &hint.mount} fsName, opts, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, mntFD) if err != nil { return nil, err @@ -733,11 +741,11 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *conf } if useOverlay { - log.Infof("Adding overlay on top of shared mount %q", mntFD.Destination) + log.Infof("Adding overlay on top of shared mount %q", mntFD.mount.Destination) var cleanup func() opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) if err != nil { - return nil, fmt.Errorf("mounting shared volume with overlay at %q: %w", mntFD.Destination, err) + return nil, fmt.Errorf("mounting shared volume with overlay at %q: %w", mntFD.mount.Destination, err) } defer cleanup() fsName = overlay.Name @@ -748,14 +756,14 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *conf // mountSharedSubmount binds mount to a previously mounted volume that is shared // among containers in the same pod. -func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) (*vfs.Mount, error) { +func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount *specs.Mount, source *mountHint) (*vfs.Mount, error) { if err := source.checkCompatible(mount); err != nil { return nil, err } // Ignore data and useOverlay because these were already applied to // the master mount. - _, opts, _, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{Mount: mount}) + _, opts, _, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{mount: mount}) if err != nil { return nil, err } @@ -808,7 +816,7 @@ func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Crede // configureRestore returns an updated context.Context including filesystem // state used by restore defined by conf. -func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Config) (context.Context, error) { +func (c *containerMounter) configureRestore(ctx context.Context) (context.Context, error) { fdmap := make(map[string]int) fdmap["/"] = c.fds.remove() mounts, err := c.prepareMountsVFS2() @@ -818,7 +826,7 @@ func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Co for i := range c.mounts { submount := &mounts[i] if submount.fd >= 0 { - fdmap[submount.Destination] = submount.fd + fdmap[submount.mount.Destination] = submount.fd } } return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 5a0c468a4..0e79877b7 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -2449,6 +2449,27 @@ func TestCreateWithCorruptedStateFile(t *testing.T) { } } +func TestBindMountByOption(t *testing.T) { + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "bind-mount") + spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) + if err != nil { + t.Fatalf("ioutil.TempDir(): %v", err) + } + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: dir, + Type: "none", + Options: []string{"rw", "bind"}, + }) + if err := run(spec, conf); err != nil { + t.Fatalf("error running sandbox: %v", err) + } + }) + } +} + func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { args := &control.ExecArgs{ Filename: name, diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 0f0a223ce..37ad7d2e1 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -1510,7 +1510,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { Destination: "/mydir/test", Source: "/some/dir", Type: "tmpfs", - Options: []string{"rw", "rbind", "relatime"}, + Options: []string{"rw", "relatime"}, } podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index e5e66546c..11b476690 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -335,9 +335,27 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth. // Is9PMount returns true if the given mount can be mounted as an external // gofer. func Is9PMount(m specs.Mount, vfs2Enabled bool) bool { + MaybeConvertToBindMount(&m) return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m, vfs2Enabled) } +// MaybeConvertToBindMount converts mount type to "bind" in case any of the +// mount options are either "bind" or "rbind" as required by the OCI spec. +// +// "For bind mounts (when options include either bind or rbind), the type is a +// dummy, often "none" (not listed in /proc/filesystems)." +func MaybeConvertToBindMount(m *specs.Mount) { + if m.Type == "bind" { + return + } + for _, opt := range m.Options { + if opt == "bind" || opt == "rbind" { + m.Type = "bind" + return + } + } +} + // IsSupportedDevMount returns true if m.Destination does not specify a // path that is hardcoded by VFS1's implementation of /dev. func IsSupportedDevMount(m specs.Mount, vfs2Enabled bool) bool { -- cgit v1.2.3