diff options
-rw-r--r-- | pkg/shim/utils/volumes.go | 46 | ||||
-rw-r--r-- | pkg/shim/utils/volumes_test.go | 160 | ||||
-rw-r--r-- | runsc/boot/controller.go | 2 | ||||
-rw-r--r-- | runsc/boot/fs.go | 39 | ||||
-rw-r--r-- | runsc/boot/fs_test.go | 2 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 96 | ||||
-rw-r--r-- | runsc/container/container_test.go | 21 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 2 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 18 |
9 files changed, 245 insertions, 141 deletions
diff --git a/pkg/shim/utils/volumes.go b/pkg/shim/utils/volumes.go index 52a428179..cdcb88229 100644 --- a/pkg/shim/utils/volumes.go +++ b/pkg/shim/utils/volumes.go @@ -91,11 +91,9 @@ func isVolumePath(volume, path string) (bool, error) { // UpdateVolumeAnnotations add necessary OCI annotations for gvisor // volume optimization. func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { - var ( - uid string - err error - ) + var uid string if IsSandbox(s) { + var err error uid, err = podUID(s) if err != nil { // Skip if we can't get pod UID, because this doesn't work @@ -123,21 +121,18 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { } else { // This is a container. for i := range s.Mounts { - // An error is returned for sandbox if source - // annotation is not successfully applied, so - // it is guaranteed that the source annotation - // for sandbox has already been successfully - // applied at this point. + // An error is returned for sandbox if source annotation is not + // successfully applied, so it is guaranteed that the source annotation + // for sandbox has already been successfully applied at this point. // - // The volume name is unique inside a pod, so - // matching without podUID is fine here. + // The volume name is unique inside a pod, so matching without podUID + // is fine here. // - // TODO: Pass podUID down to shim for containers to do - // more accurate matching. + // TODO: Pass podUID down to shim for containers to do more accurate + // matching. if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes { - // gVisor requires the container mount type to match - // sandbox mount type. - s.Mounts[i].Type = v + // Container mount type must match the sandbox's mount type. + changeMountType(&s.Mounts[i], v) updated = true } } @@ -153,3 +148,22 @@ func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { } return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666) } + +func changeMountType(m *specs.Mount, newType string) { + m.Type = newType + + // OCI spec allows bind mounts to be specified in options only. So if new type + // is not bind, remove bind/rbind from options. + // + // "For bind mounts (when options include either bind or rbind), the type is + // a dummy, often "none" (not listed in /proc/filesystems)." + if newType != "bind" { + newOpts := make([]string, 0, len(m.Options)) + for _, opt := range m.Options { + if opt != "rbind" && opt != "bind" { + newOpts = append(newOpts, opt) + } + } + m.Options = newOpts + } +} diff --git a/pkg/shim/utils/volumes_test.go b/pkg/shim/utils/volumes_test.go index 3e02c6151..b25c53c73 100644 --- a/pkg/shim/utils/volumes_test.go +++ b/pkg/shim/utils/volumes_test.go @@ -47,60 +47,60 @@ func TestUpdateVolumeAnnotations(t *testing.T) { } for _, test := range []struct { - desc string + name string spec *specs.Spec expected *specs.Spec expectErr bool expectUpdate bool }{ { - desc: "volume annotations for sandbox", + name: "volume annotations for sandbox", spec: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", + volumeKeyPrefix + testVolumeName + ".source": testVolumePath, }, }, expectUpdate: true, }, { - desc: "volume annotations for sandbox with legacy log path", + name: "volume annotations for sandbox with legacy log path", spec: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLegacyLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + sandboxLogDirAnnotation: testLegacyLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLegacyLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + sandboxLogDirAnnotation: testLegacyLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", + volumeKeyPrefix + testVolumeName + ".source": testVolumePath, }, }, expectUpdate: true, }, { - desc: "tmpfs: volume annotations for container", + name: "tmpfs: volume annotations for container", spec: &specs.Spec{ Mounts: []specs.Mount{ { @@ -117,10 +117,10 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ @@ -139,16 +139,16 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expectUpdate: true, }, { - desc: "bind: volume annotations for container", + name: "bind: volume annotations for container", spec: &specs.Spec{ Mounts: []specs.Mount{ { @@ -159,10 +159,10 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "container", + volumeKeyPrefix + testVolumeName + ".type": "bind", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ @@ -175,48 +175,48 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "container", + volumeKeyPrefix + testVolumeName + ".type": "bind", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expectUpdate: true, }, { - desc: "should not return error without pod log directory", + name: "should not return error without pod log directory", spec: &specs.Spec{ Annotations: map[string]string{ - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, expected: &specs.Spec{ Annotations: map[string]string{ - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", }, }, }, { - desc: "should return error if volume path does not exist", + name: "should return error if volume path does not exist", spec: &specs.Spec{ Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount.notexist.share": "pod", - "dev.gvisor.spec.mount.notexist.type": "tmpfs", - "dev.gvisor.spec.mount.notexist.options": "ro", + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + volumeKeyPrefix + "notexist.share": "pod", + volumeKeyPrefix + "notexist.type": "tmpfs", + volumeKeyPrefix + "notexist.options": "ro", }, }, expectErr: true, }, { - desc: "no volume annotations for sandbox", + name: "no volume annotations for sandbox", spec: &specs.Spec{ Annotations: map[string]string{ sandboxLogDirAnnotation: testLogDirPath, @@ -231,7 +231,7 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, { - desc: "no volume annotations for container", + name: "no volume annotations for container", spec: &specs.Spec{ Mounts: []specs.Mount{ { @@ -271,8 +271,46 @@ func TestUpdateVolumeAnnotations(t *testing.T) { }, }, }, + { + name: "bind options removed", + spec: &specs.Spec{ + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", + volumeKeyPrefix + testVolumeName + ".source": testVolumePath, + }, + Mounts: []specs.Mount{ + { + Destination: "/dst", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro", "bind", "rbind"}, + }, + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + volumeKeyPrefix + testVolumeName + ".share": "pod", + volumeKeyPrefix + testVolumeName + ".type": "tmpfs", + volumeKeyPrefix + testVolumeName + ".options": "ro", + volumeKeyPrefix + testVolumeName + ".source": testVolumePath, + }, + Mounts: []specs.Mount{ + { + Destination: "/dst", + Type: "tmpfs", + Source: testVolumePath, + Options: []string{"ro"}, + }, + }, + }, + expectUpdate: true, + }, } { - t.Run(test.desc, func(t *testing.T) { + t.Run(test.name, func(t *testing.T) { bundle, err := ioutil.TempDir(dir, "test-bundle") if err != nil { t.Fatalf("Create test bundle: %v", err) diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 05b721b28..9b270cbf2 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -402,7 +402,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { ctx := k.SupervisorContext() mntr := newContainerMounter(&cm.l.root, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled) if kernel.VFS2Enabled { - ctx, err = mntr.configureRestore(ctx, cm.l.root.conf) + ctx, err = mntr.configureRestore(ctx) if err != nil { return fmt.Errorf("configuring filesystem restore: %v", err) } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 3c0cef6db..bf4a41f77 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -232,7 +232,7 @@ func parseMountOption(opt string, allowedKeys ...string) (bool, error) { // mountDevice returns a device string based on the fs type and target // of the mount. -func mountDevice(m specs.Mount) string { +func mountDevice(m *specs.Mount) string { if m.Type == bind { // Make a device string that includes the target, which is consistent across // S/R and uniquely identifies the connection. @@ -256,6 +256,8 @@ func mountFlags(opts []string) fs.MountSourceFlags { mf.NoAtime = true case "noexec": mf.NoExec = true + case "bind", "rbind": + // These are the same as a mount with type="bind". default: log.Warningf("ignoring unknown mount option %q", o) } @@ -486,9 +488,9 @@ func (m *mountHint) isSupported() bool { // For now enforce that all options are the same. Once bind mount is properly // supported, then we should ensure the master is less restrictive than the // container, e.g. master can be 'rw' while container mounts as 'ro'. -func (m *mountHint) checkCompatible(mount specs.Mount) error { +func (m *mountHint) checkCompatible(mount *specs.Mount) error { // Remove options that don't affect to mount's behavior. - masterOpts := filterUnsupportedOptions(m.mount) + masterOpts := filterUnsupportedOptions(&m.mount) replicaOpts := filterUnsupportedOptions(mount) if len(masterOpts) != len(replicaOpts) { @@ -512,7 +514,7 @@ func (m *mountHint) fileAccessType() config.FileAccessType { return config.FileAccessShared } -func filterUnsupportedOptions(mount specs.Mount) []string { +func filterUnsupportedOptions(mount *specs.Mount) []string { rv := make([]string, 0, len(mount.Options)) for _, o := range mount.Options { if isSupportedMountFlag(mount.Type, o) { @@ -576,7 +578,7 @@ func newPodMountHints(spec *specs.Spec) (*podMountHints, error) { return &podMountHints{mounts: mnts}, nil } -func (p *podMountHints) findMount(mount specs.Mount) *mountHint { +func (p *podMountHints) findMount(mount *specs.Mount) *mountHint { for _, m := range p.mounts { if m.mount.Source == mount.Source { return m @@ -679,7 +681,8 @@ func (c *containerMounter) mountSubmounts(ctx context.Context, conf *config.Conf root := mns.Root() defer root.DecRef(ctx) - for _, m := range c.mounts { + for i := range c.mounts { + m := &c.mounts[i] log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options) if hint := c.hints.findMount(m); hint != nil && hint.isSupported() { if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil { @@ -714,7 +717,7 @@ func (c *containerMounter) checkDispenser() error { func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.Config, hint *mountHint) (*fs.Inode, error) { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. - fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount) + fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, &hint.mount) if err != nil { return nil, err } @@ -734,7 +737,7 @@ func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.C mf.ReadOnly = true } - inode, err := filesystem.Mount(ctx, mountDevice(hint.mount), mf, strings.Join(opts, ","), nil) + inode, err := filesystem.Mount(ctx, mountDevice(&hint.mount), mf, strings.Join(opts, ","), nil) if err != nil { return nil, fmt.Errorf("creating mount %q: %v", hint.name, err) } @@ -796,13 +799,14 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con // getMountNameAndOptions retrieves the fsName, opts, and useOverlay values // used for mounts. -func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.Mount) (string, []string, bool, error) { +func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m *specs.Mount) (string, []string, bool, error) { + specutils.MaybeConvertToBindMount(m) + var ( fsName string opts []string useOverlay bool ) - switch m.Type { case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name: fsName = m.Type @@ -836,7 +840,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M return fsName, opts, useOverlay, nil } -func (c *containerMounter) getMountAccessType(conf *config.Config, mount specs.Mount) config.FileAccessType { +func (c *containerMounter) getMountAccessType(conf *config.Config, mount *specs.Mount) config.FileAccessType { if hint := c.hints.findMount(mount); hint != nil { return hint.fileAccessType() } @@ -847,7 +851,7 @@ func (c *containerMounter) getMountAccessType(conf *config.Config, mount specs.M // be readonly, a lower ramfs overlay is added to create the mount point dir. // Another overlay is added with tmpfs on top if Config.Overlay is true. // 'm.Destination' must be an absolute path with '..' and symlinks resolved. -func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error { +func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent, m *specs.Mount) error { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m) @@ -921,7 +925,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Confi // mountSharedSubmount binds mount to a previously mounted volume that is shared // among containers in the same pod. -func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount specs.Mount, source *mountHint) error { +func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount *specs.Mount, source *mountHint) error { if err := source.checkCompatible(mount); err != nil { return err } @@ -946,7 +950,7 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun // addRestoreMount adds a mount to the MountSources map used for restoring a // checkpointed container. -func (c *containerMounter) addRestoreMount(conf *config.Config, renv *fs.RestoreEnvironment, m specs.Mount) error { +func (c *containerMounter) addRestoreMount(conf *config.Config, renv *fs.RestoreEnvironment, m *specs.Mount) error { fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m) if err != nil { return err @@ -994,7 +998,8 @@ func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.Re // Add submounts. var tmpMounted bool - for _, m := range c.mounts { + for i := range c.mounts { + m := &c.mounts[i] if err := c.addRestoreMount(conf, renv, m); err != nil { return nil, err } @@ -1009,7 +1014,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.Re Type: tmpfsvfs2.Name, Destination: "/tmp", } - if err := c.addRestoreMount(conf, renv, tmpMount); err != nil { + if err := c.addRestoreMount(conf, renv, &tmpMount); err != nil { return nil, err } } @@ -1068,7 +1073,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mn // another user. This is normally done for /tmp. Options: []string{"mode=01777"}, } - return c.mountSubmount(ctx, conf, mns, root, tmpMount) + return c.mountSubmount(ctx, conf, mns, root, &tmpMount) default: return err diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go index b4f12d034..09ffda628 100644 --- a/runsc/boot/fs_test.go +++ b/runsc/boot/fs_test.go @@ -244,7 +244,7 @@ func TestGetMountAccessType(t *testing.T) { } mounter := containerMounter{hints: podHints} conf := &config.Config{FileAccessMounts: config.FileAccessShared} - if got := mounter.getMountAccessType(conf, specs.Mount{Source: source}); got != tst.want { + if got := mounter.getMountAccessType(conf, &specs.Mount{Source: source}); got != tst.want { t.Errorf("getMountAccessType(), want: %v, got: %v", tst.want, got) } }) diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 7d8fd0483..c1828bd3d 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -46,6 +46,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/runsc/config" + "gvisor.dev/gvisor/runsc/specutils" ) func registerFilesystems(k *kernel.Kernel) error { @@ -362,33 +363,33 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config. for i := range mounts { submount := &mounts[i] - log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options) + log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.mount.Source, submount.mount.Destination, submount.mount.Type, submount.mount.Options) var ( mnt *vfs.Mount err error ) - if hint := c.hints.findMount(submount.Mount); hint != nil && hint.isSupported() { - mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.Mount, hint) + if hint := c.hints.findMount(submount.mount); hint != nil && hint.isSupported() { + mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.mount, hint) if err != nil { - return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.Destination, err) + return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.mount.Destination, err) } } else { mnt, err = c.mountSubmountVFS2(ctx, conf, mns, creds, submount) if err != nil { - return fmt.Errorf("mount submount %q: %w", submount.Destination, err) + return fmt.Errorf("mount submount %q: %w", submount.mount.Destination, err) } } if mnt != nil && mnt.ReadOnly() { // Switch to ReadWrite while we setup submounts. if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil { - return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.Destination, err) + return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.mount.Destination, err) } // Restore back to ReadOnly at the end. defer func() { if err := c.k.VFS().SetMountReadOnly(mnt, true); err != nil { - panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.Destination, err)) + panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.mount.Destination, err)) } }() } @@ -401,8 +402,8 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config. } type mountAndFD struct { - specs.Mount - fd int + mount *specs.Mount + fd int } func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { @@ -410,15 +411,18 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { // undocumented assumption that FDs are dispensed in the order in which // they are required by mounts. var mounts []mountAndFD - for _, m := range c.mounts { - fd := -1 + for i := range c.mounts { + m := &c.mounts[i] + specutils.MaybeConvertToBindMount(m) + // Only bind mounts use host FDs; see // containerMounter.getMountNameAndOptionsVFS2. + fd := -1 if m.Type == bind { fd = c.fds.remove() } mounts = append(mounts, mountAndFD{ - Mount: m, + mount: m, fd: fd, }) } @@ -428,7 +432,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { // Sort the mounts so that we don't place children before parents. sort.Slice(mounts, func(i, j int) bool { - return len(mounts[i].Destination) < len(mounts[j].Destination) + return len(mounts[i].mount.Destination) < len(mounts[j].mount.Destination) }) return mounts, nil @@ -444,16 +448,16 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C return nil, nil } - if err := c.makeMountPoint(ctx, creds, mns, submount.Destination); err != nil { - return nil, fmt.Errorf("creating mount point %q: %w", submount.Destination, err) + if err := c.makeMountPoint(ctx, creds, mns, submount.mount.Destination); err != nil { + return nil, fmt.Errorf("creating mount point %q: %w", submount.mount.Destination, err) } if useOverlay { - log.Infof("Adding overlay on top of mount %q", submount.Destination) + log.Infof("Adding overlay on top of mount %q", submount.mount.Destination) var cleanup func() opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) if err != nil { - return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.Destination, err) + return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.mount.Destination, err) } defer cleanup() fsName = overlay.Name @@ -465,32 +469,34 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C target := &vfs.PathOperation{ Root: root, Start: root, - Path: fspath.Parse(submount.Destination), + Path: fspath.Parse(submount.mount.Destination), } mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts) if err != nil { - return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) + return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.mount.Destination, submount.mount.Type, err, opts) } - log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts.GetFilesystemOptions.Data) + log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.mount.Source, submount.mount.Destination, submount.mount.Type, opts.GetFilesystemOptions.Data) return mnt, nil } // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values // used for mounts. func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mountAndFD) (string, *vfs.MountOptions, bool, error) { - fsName := m.Type + fsName := m.mount.Type useOverlay := false - var data []string - var iopts interface{} + var ( + data []string + internalData interface{} + ) - verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.Options) + verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.mount.Options) if err != nil { return "", nil, false, err } - m.Options = remainingMOpts + m.mount.Options = remainingMOpts // Find filesystem name and FS specific data field. - switch m.Type { + switch m.mount.Type { case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: // Nothing to do. @@ -499,7 +505,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo case tmpfs.Name: var err error - data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) + data, err = parseAndFilterOptions(m.mount.Options, tmpfsAllowedData...) if err != nil { return "", nil, false, err } @@ -511,35 +517,35 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo // but unlikely to be correct in this context. return "", nil, false, fmt.Errorf("9P mount requires a connection FD") } - data = p9MountData(m.fd, c.getMountAccessType(conf, m.Mount), true /* vfs2 */) - iopts = gofer.InternalFilesystemOptions{ - UniqueID: m.Destination, + data = p9MountData(m.fd, c.getMountAccessType(conf, m.mount), true /* vfs2 */) + internalData = gofer.InternalFilesystemOptions{ + UniqueID: m.mount.Destination, } // If configured, add overlay to all writable mounts. - useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly + useOverlay = conf.Overlay && !mountFlags(m.mount.Options).ReadOnly case cgroupfs.Name: var err error - data, err = parseAndFilterOptions(m.Options, cgroupfs.SupportedMountOptions...) + data, err = parseAndFilterOptions(m.mount.Options, cgroupfs.SupportedMountOptions...) if err != nil { return "", nil, false, err } default: - log.Warningf("ignoring unknown filesystem type %q", m.Type) + log.Warningf("ignoring unknown filesystem type %q", m.mount.Type) return "", nil, false, nil } opts := &vfs.MountOptions{ GetFilesystemOptions: vfs.GetFilesystemOptions{ Data: strings.Join(data, ","), - InternalData: iopts, + InternalData: internalData, }, InternalMount: true, } - for _, o := range m.Options { + for _, o := range m.mount.Options { switch o { case "rw": opts.ReadOnly = false @@ -549,13 +555,15 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo opts.Flags.NoATime = true case "noexec": opts.Flags.NoExec = true + case "bind", "rbind": + // These are the same as a mount with type="bind". default: log.Warningf("ignoring unknown mount option %q", o) } } if verityRequested { - verityData = verityData + "root_name=" + path.Base(m.Mount.Destination) + verityData = verityData + "root_name=" + path.Base(m.mount.Destination) verityOpts.LowerName = fsName verityOpts.LowerGetFSOptions = opts.GetFilesystemOptions fsName = verity.Name @@ -684,7 +692,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config // another user. This is normally done for /tmp. Options: []string{"mode=01777"}, } - _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount}) + _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{mount: &tmpMount}) return err case syserror.ENOTDIR: @@ -723,7 +731,7 @@ func (c *containerMounter) processHintsVFS2(conf *config.Config, creds *auth.Cre func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *config.Config, hint *mountHint, creds *auth.Credentials) (*vfs.Mount, error) { // Map mount type to filesystem name, and parse out the options that we are // capable of dealing with. - mntFD := &mountAndFD{Mount: hint.mount} + mntFD := &mountAndFD{mount: &hint.mount} fsName, opts, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, mntFD) if err != nil { return nil, err @@ -733,11 +741,11 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *conf } if useOverlay { - log.Infof("Adding overlay on top of shared mount %q", mntFD.Destination) + log.Infof("Adding overlay on top of shared mount %q", mntFD.mount.Destination) var cleanup func() opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) if err != nil { - return nil, fmt.Errorf("mounting shared volume with overlay at %q: %w", mntFD.Destination, err) + return nil, fmt.Errorf("mounting shared volume with overlay at %q: %w", mntFD.mount.Destination, err) } defer cleanup() fsName = overlay.Name @@ -748,14 +756,14 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *conf // mountSharedSubmount binds mount to a previously mounted volume that is shared // among containers in the same pod. -func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) (*vfs.Mount, error) { +func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount *specs.Mount, source *mountHint) (*vfs.Mount, error) { if err := source.checkCompatible(mount); err != nil { return nil, err } // Ignore data and useOverlay because these were already applied to // the master mount. - _, opts, _, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{Mount: mount}) + _, opts, _, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{mount: mount}) if err != nil { return nil, err } @@ -808,7 +816,7 @@ func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Crede // configureRestore returns an updated context.Context including filesystem // state used by restore defined by conf. -func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Config) (context.Context, error) { +func (c *containerMounter) configureRestore(ctx context.Context) (context.Context, error) { fdmap := make(map[string]int) fdmap["/"] = c.fds.remove() mounts, err := c.prepareMountsVFS2() @@ -818,7 +826,7 @@ func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Co for i := range c.mounts { submount := &mounts[i] if submount.fd >= 0 { - fdmap[submount.Destination] = submount.fd + fdmap[submount.mount.Destination] = submount.fd } } return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 5a0c468a4..0e79877b7 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -2449,6 +2449,27 @@ func TestCreateWithCorruptedStateFile(t *testing.T) { } } +func TestBindMountByOption(t *testing.T) { + for name, conf := range configs(t, all...) { + t.Run(name, func(t *testing.T) { + dir, err := ioutil.TempDir(testutil.TmpDir(), "bind-mount") + spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file")) + if err != nil { + t.Fatalf("ioutil.TempDir(): %v", err) + } + spec.Mounts = append(spec.Mounts, specs.Mount{ + Destination: dir, + Source: dir, + Type: "none", + Options: []string{"rw", "bind"}, + }) + if err := run(spec, conf); err != nil { + t.Fatalf("error running sandbox: %v", err) + } + }) + } +} + func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) { args := &control.ExecArgs{ Filename: name, diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index 0f0a223ce..37ad7d2e1 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -1510,7 +1510,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) { Destination: "/mydir/test", Source: "/some/dir", Type: "tmpfs", - Options: []string{"rw", "rbind", "relatime"}, + Options: []string{"rw", "relatime"}, } podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0) diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index e5e66546c..11b476690 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -335,9 +335,27 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth. // Is9PMount returns true if the given mount can be mounted as an external // gofer. func Is9PMount(m specs.Mount, vfs2Enabled bool) bool { + MaybeConvertToBindMount(&m) return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m, vfs2Enabled) } +// MaybeConvertToBindMount converts mount type to "bind" in case any of the +// mount options are either "bind" or "rbind" as required by the OCI spec. +// +// "For bind mounts (when options include either bind or rbind), the type is a +// dummy, often "none" (not listed in /proc/filesystems)." +func MaybeConvertToBindMount(m *specs.Mount) { + if m.Type == "bind" { + return + } + for _, opt := range m.Options { + if opt == "bind" || opt == "rbind" { + m.Type = "bind" + return + } + } +} + // IsSupportedDevMount returns true if m.Destination does not specify a // path that is hardcoded by VFS1's implementation of /dev. func IsSupportedDevMount(m specs.Mount, vfs2Enabled bool) bool { |