summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/controller.go2
-rw-r--r--runsc/boot/fs.go39
-rw-r--r--runsc/boot/fs_test.go2
-rw-r--r--runsc/boot/loader.go62
-rw-r--r--runsc/boot/vfs.go97
-rw-r--r--runsc/cgroup/cgroup.go287
-rw-r--r--runsc/cgroup/cgroup_test.go116
-rw-r--r--runsc/cmd/mitigate.go6
-rw-r--r--runsc/cmd/mitigate_test.go2
-rw-r--r--runsc/container/container.go15
-rw-r--r--runsc/container/container_test.go21
-rw-r--r--runsc/container/multi_container_test.go60
-rw-r--r--runsc/fsgofer/fsgofer.go61
-rw-r--r--runsc/fsgofer/fsgofer_test.go10
-rw-r--r--runsc/mitigate/mitigate_test.go2
-rw-r--r--runsc/sandbox/sandbox.go22
-rw-r--r--runsc/specutils/seccomp/BUILD4
-rw-r--r--runsc/specutils/seccomp/seccomp_test.go29
-rw-r--r--runsc/specutils/specutils.go18
19 files changed, 558 insertions, 297 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 05b721b28..9b270cbf2 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -402,7 +402,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
ctx := k.SupervisorContext()
mntr := newContainerMounter(&cm.l.root, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled)
if kernel.VFS2Enabled {
- ctx, err = mntr.configureRestore(ctx, cm.l.root.conf)
+ ctx, err = mntr.configureRestore(ctx)
if err != nil {
return fmt.Errorf("configuring filesystem restore: %v", err)
}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 3c0cef6db..bf4a41f77 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -232,7 +232,7 @@ func parseMountOption(opt string, allowedKeys ...string) (bool, error) {
// mountDevice returns a device string based on the fs type and target
// of the mount.
-func mountDevice(m specs.Mount) string {
+func mountDevice(m *specs.Mount) string {
if m.Type == bind {
// Make a device string that includes the target, which is consistent across
// S/R and uniquely identifies the connection.
@@ -256,6 +256,8 @@ func mountFlags(opts []string) fs.MountSourceFlags {
mf.NoAtime = true
case "noexec":
mf.NoExec = true
+ case "bind", "rbind":
+ // These are the same as a mount with type="bind".
default:
log.Warningf("ignoring unknown mount option %q", o)
}
@@ -486,9 +488,9 @@ func (m *mountHint) isSupported() bool {
// For now enforce that all options are the same. Once bind mount is properly
// supported, then we should ensure the master is less restrictive than the
// container, e.g. master can be 'rw' while container mounts as 'ro'.
-func (m *mountHint) checkCompatible(mount specs.Mount) error {
+func (m *mountHint) checkCompatible(mount *specs.Mount) error {
// Remove options that don't affect to mount's behavior.
- masterOpts := filterUnsupportedOptions(m.mount)
+ masterOpts := filterUnsupportedOptions(&m.mount)
replicaOpts := filterUnsupportedOptions(mount)
if len(masterOpts) != len(replicaOpts) {
@@ -512,7 +514,7 @@ func (m *mountHint) fileAccessType() config.FileAccessType {
return config.FileAccessShared
}
-func filterUnsupportedOptions(mount specs.Mount) []string {
+func filterUnsupportedOptions(mount *specs.Mount) []string {
rv := make([]string, 0, len(mount.Options))
for _, o := range mount.Options {
if isSupportedMountFlag(mount.Type, o) {
@@ -576,7 +578,7 @@ func newPodMountHints(spec *specs.Spec) (*podMountHints, error) {
return &podMountHints{mounts: mnts}, nil
}
-func (p *podMountHints) findMount(mount specs.Mount) *mountHint {
+func (p *podMountHints) findMount(mount *specs.Mount) *mountHint {
for _, m := range p.mounts {
if m.mount.Source == mount.Source {
return m
@@ -679,7 +681,8 @@ func (c *containerMounter) mountSubmounts(ctx context.Context, conf *config.Conf
root := mns.Root()
defer root.DecRef(ctx)
- for _, m := range c.mounts {
+ for i := range c.mounts {
+ m := &c.mounts[i]
log.Debugf("Mounting %q to %q, type: %s, options: %s", m.Source, m.Destination, m.Type, m.Options)
if hint := c.hints.findMount(m); hint != nil && hint.isSupported() {
if err := c.mountSharedSubmount(ctx, mns, root, m, hint); err != nil {
@@ -714,7 +717,7 @@ func (c *containerMounter) checkDispenser() error {
func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.Config, hint *mountHint) (*fs.Inode, error) {
// Map mount type to filesystem name, and parse out the options that we are
// capable of dealing with.
- fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, hint.mount)
+ fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, &hint.mount)
if err != nil {
return nil, err
}
@@ -734,7 +737,7 @@ func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.C
mf.ReadOnly = true
}
- inode, err := filesystem.Mount(ctx, mountDevice(hint.mount), mf, strings.Join(opts, ","), nil)
+ inode, err := filesystem.Mount(ctx, mountDevice(&hint.mount), mf, strings.Join(opts, ","), nil)
if err != nil {
return nil, fmt.Errorf("creating mount %q: %v", hint.name, err)
}
@@ -796,13 +799,14 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *config.Con
// getMountNameAndOptions retrieves the fsName, opts, and useOverlay values
// used for mounts.
-func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.Mount) (string, []string, bool, error) {
+func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m *specs.Mount) (string, []string, bool, error) {
+ specutils.MaybeConvertToBindMount(m)
+
var (
fsName string
opts []string
useOverlay bool
)
-
switch m.Type {
case devpts.Name, devtmpfs.Name, procvfs2.Name, sysvfs2.Name:
fsName = m.Type
@@ -836,7 +840,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M
return fsName, opts, useOverlay, nil
}
-func (c *containerMounter) getMountAccessType(conf *config.Config, mount specs.Mount) config.FileAccessType {
+func (c *containerMounter) getMountAccessType(conf *config.Config, mount *specs.Mount) config.FileAccessType {
if hint := c.hints.findMount(mount); hint != nil {
return hint.fileAccessType()
}
@@ -847,7 +851,7 @@ func (c *containerMounter) getMountAccessType(conf *config.Config, mount specs.M
// be readonly, a lower ramfs overlay is added to create the mount point dir.
// Another overlay is added with tmpfs on top if Config.Overlay is true.
// 'm.Destination' must be an absolute path with '..' and symlinks resolved.
-func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent, m specs.Mount) error {
+func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *fs.MountNamespace, root *fs.Dirent, m *specs.Mount) error {
// Map mount type to filesystem name, and parse out the options that we are
// capable of dealing with.
fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m)
@@ -921,7 +925,7 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Confi
// mountSharedSubmount binds mount to a previously mounted volume that is shared
// among containers in the same pod.
-func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount specs.Mount, source *mountHint) error {
+func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount *specs.Mount, source *mountHint) error {
if err := source.checkCompatible(mount); err != nil {
return err
}
@@ -946,7 +950,7 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun
// addRestoreMount adds a mount to the MountSources map used for restoring a
// checkpointed container.
-func (c *containerMounter) addRestoreMount(conf *config.Config, renv *fs.RestoreEnvironment, m specs.Mount) error {
+func (c *containerMounter) addRestoreMount(conf *config.Config, renv *fs.RestoreEnvironment, m *specs.Mount) error {
fsName, opts, useOverlay, err := c.getMountNameAndOptions(conf, m)
if err != nil {
return err
@@ -994,7 +998,8 @@ func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.Re
// Add submounts.
var tmpMounted bool
- for _, m := range c.mounts {
+ for i := range c.mounts {
+ m := &c.mounts[i]
if err := c.addRestoreMount(conf, renv, m); err != nil {
return nil, err
}
@@ -1009,7 +1014,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *config.Config) (*fs.Re
Type: tmpfsvfs2.Name,
Destination: "/tmp",
}
- if err := c.addRestoreMount(conf, renv, tmpMount); err != nil {
+ if err := c.addRestoreMount(conf, renv, &tmpMount); err != nil {
return nil, err
}
}
@@ -1068,7 +1073,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, mn
// another user. This is normally done for /tmp.
Options: []string{"mode=01777"},
}
- return c.mountSubmount(ctx, conf, mns, root, tmpMount)
+ return c.mountSubmount(ctx, conf, mns, root, &tmpMount)
default:
return err
diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go
index b4f12d034..09ffda628 100644
--- a/runsc/boot/fs_test.go
+++ b/runsc/boot/fs_test.go
@@ -244,7 +244,7 @@ func TestGetMountAccessType(t *testing.T) {
}
mounter := containerMounter{hints: podHints}
conf := &config.Config{FileAccessMounts: config.FileAccessShared}
- if got := mounter.getMountAccessType(conf, specs.Mount{Source: source}); got != tst.want {
+ if got := mounter.getMountAccessType(conf, &specs.Mount{Source: source}); got != tst.want {
t.Errorf("getMountAccessType(), want: %v, got: %v", tst.want, got)
}
})
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 25f06165f..10f2d3d35 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -230,6 +230,33 @@ func New(args Args) (*Loader, error) {
vfs2.Override()
}
+ // Make host FDs stable between invocations. Host FDs must map to the exact
+ // same number when the sandbox is restored. Otherwise the wrong FD will be
+ // used.
+ info := containerInfo{}
+ newfd := startingStdioFD
+
+ for _, stdioFD := range args.StdioFDs {
+ // Check that newfd is unused to avoid clobbering over it.
+ if _, err := unix.FcntlInt(uintptr(newfd), unix.F_GETFD, 0); !errors.Is(err, unix.EBADF) {
+ if err != nil {
+ return nil, fmt.Errorf("error checking for FD (%d) conflict: %w", newfd, err)
+ }
+ return nil, fmt.Errorf("unable to remap stdios, FD %d is already in use", newfd)
+ }
+
+ err := unix.Dup3(stdioFD, newfd, unix.O_CLOEXEC)
+ if err != nil {
+ return nil, fmt.Errorf("dup3 of stdios failed: %w", err)
+ }
+ info.stdioFDs = append(info.stdioFDs, fd.New(newfd))
+ _ = unix.Close(stdioFD)
+ newfd++
+ }
+ for _, goferFD := range args.GoferFDs {
+ info.goferFDs = append(info.goferFDs, fd.New(goferFD))
+ }
+
// Create kernel and platform.
p, err := createPlatform(args.Conf, args.Device)
if err != nil {
@@ -349,6 +376,7 @@ func New(args Args) (*Loader, error) {
if err != nil {
return nil, fmt.Errorf("creating init process for root container: %v", err)
}
+ info.procArgs = procArgs
if err := initCompatLogs(args.UserLogFD); err != nil {
return nil, fmt.Errorf("initializing compat logs: %v", err)
@@ -359,6 +387,9 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("creating pod mount hints: %v", err)
}
+ info.conf = args.Conf
+ info.spec = args.Spec
+
if kernel.VFS2Enabled {
// Set up host mount that will be used for imported fds.
hostFilesystem, err := hostvfs2.NewFilesystem(k.VFS())
@@ -373,37 +404,6 @@ func New(args Args) (*Loader, error) {
k.SetHostMount(hostMount)
}
- info := containerInfo{
- conf: args.Conf,
- spec: args.Spec,
- procArgs: procArgs,
- }
-
- // Make host FDs stable between invocations. Host FDs must map to the exact
- // same number when the sandbox is restored. Otherwise the wrong FD will be
- // used.
- newfd := startingStdioFD
- for _, stdioFD := range args.StdioFDs {
- // Check that newfd is unused to avoid clobbering over it.
- if _, err := unix.FcntlInt(uintptr(newfd), unix.F_GETFD, 0); !errors.Is(err, unix.EBADF) {
- if err != nil {
- return nil, fmt.Errorf("error checking for FD (%d) conflict: %w", newfd, err)
- }
- return nil, fmt.Errorf("unable to remap stdios, FD %d is already in use", newfd)
- }
-
- err := unix.Dup3(stdioFD, newfd, unix.O_CLOEXEC)
- if err != nil {
- return nil, fmt.Errorf("dup3 of stdios failed: %w", err)
- }
- info.stdioFDs = append(info.stdioFDs, fd.New(newfd))
- _ = unix.Close(stdioFD)
- newfd++
- }
- for _, goferFD := range args.GoferFDs {
- info.goferFDs = append(info.goferFDs, fd.New(goferFD))
- }
-
eid := execID{cid: args.ID}
l := &Loader{
k: k,
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 7d8fd0483..7be5176b0 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -46,6 +46,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/runsc/config"
+ "gvisor.dev/gvisor/runsc/specutils"
)
func registerFilesystems(k *kernel.Kernel) error {
@@ -362,33 +363,33 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config.
for i := range mounts {
submount := &mounts[i]
- log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.Source, submount.Destination, submount.Type, submount.Options)
+ log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.mount.Source, submount.mount.Destination, submount.mount.Type, submount.mount.Options)
var (
mnt *vfs.Mount
err error
)
- if hint := c.hints.findMount(submount.Mount); hint != nil && hint.isSupported() {
- mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.Mount, hint)
+ if hint := c.hints.findMount(submount.mount); hint != nil && hint.isSupported() {
+ mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.mount, hint)
if err != nil {
- return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.Destination, err)
+ return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.mount.Destination, err)
}
} else {
mnt, err = c.mountSubmountVFS2(ctx, conf, mns, creds, submount)
if err != nil {
- return fmt.Errorf("mount submount %q: %w", submount.Destination, err)
+ return fmt.Errorf("mount submount %q: %w", submount.mount.Destination, err)
}
}
if mnt != nil && mnt.ReadOnly() {
// Switch to ReadWrite while we setup submounts.
if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil {
- return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.Destination, err)
+ return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.mount.Destination, err)
}
// Restore back to ReadOnly at the end.
defer func() {
if err := c.k.VFS().SetMountReadOnly(mnt, true); err != nil {
- panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.Destination, err))
+ panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.mount.Destination, err))
}
}()
}
@@ -401,8 +402,8 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config.
}
type mountAndFD struct {
- specs.Mount
- fd int
+ mount *specs.Mount
+ fd int
}
func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
@@ -410,15 +411,18 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
// undocumented assumption that FDs are dispensed in the order in which
// they are required by mounts.
var mounts []mountAndFD
- for _, m := range c.mounts {
- fd := -1
+ for i := range c.mounts {
+ m := &c.mounts[i]
+ specutils.MaybeConvertToBindMount(m)
+
// Only bind mounts use host FDs; see
// containerMounter.getMountNameAndOptionsVFS2.
+ fd := -1
if m.Type == bind {
fd = c.fds.remove()
}
mounts = append(mounts, mountAndFD{
- Mount: m,
+ mount: m,
fd: fd,
})
}
@@ -428,7 +432,7 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) {
// Sort the mounts so that we don't place children before parents.
sort.Slice(mounts, func(i, j int) bool {
- return len(mounts[i].Destination) < len(mounts[j].Destination)
+ return len(mounts[i].mount.Destination) < len(mounts[j].mount.Destination)
})
return mounts, nil
@@ -444,16 +448,16 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C
return nil, nil
}
- if err := c.makeMountPoint(ctx, creds, mns, submount.Destination); err != nil {
- return nil, fmt.Errorf("creating mount point %q: %w", submount.Destination, err)
+ if err := c.makeMountPoint(ctx, creds, mns, submount.mount.Destination); err != nil {
+ return nil, fmt.Errorf("creating mount point %q: %w", submount.mount.Destination, err)
}
if useOverlay {
- log.Infof("Adding overlay on top of mount %q", submount.Destination)
+ log.Infof("Adding overlay on top of mount %q", submount.mount.Destination)
var cleanup func()
opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName)
if err != nil {
- return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.Destination, err)
+ return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.mount.Destination, err)
}
defer cleanup()
fsName = overlay.Name
@@ -465,32 +469,34 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.C
target := &vfs.PathOperation{
Root: root,
Start: root,
- Path: fspath.Parse(submount.Destination),
+ Path: fspath.Parse(submount.mount.Destination),
}
mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts)
if err != nil {
- return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts)
+ return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.mount.Destination, submount.mount.Type, err, opts)
}
- log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.Source, submount.Destination, submount.Type, opts.GetFilesystemOptions.Data)
+ log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.mount.Source, submount.mount.Destination, submount.mount.Type, opts.GetFilesystemOptions.Data)
return mnt, nil
}
// getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values
// used for mounts.
func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mountAndFD) (string, *vfs.MountOptions, bool, error) {
- fsName := m.Type
+ fsName := m.mount.Type
useOverlay := false
- var data []string
- var iopts interface{}
+ var (
+ data []string
+ internalData interface{}
+ )
- verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.Options)
+ verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.mount.Options)
if err != nil {
return "", nil, false, err
}
- m.Options = remainingMOpts
+ m.mount.Options = remainingMOpts
// Find filesystem name and FS specific data field.
- switch m.Type {
+ switch m.mount.Type {
case devpts.Name, devtmpfs.Name, proc.Name, sys.Name:
// Nothing to do.
@@ -499,7 +505,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
case tmpfs.Name:
var err error
- data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...)
+ data, err = parseAndFilterOptions(m.mount.Options, tmpfsAllowedData...)
if err != nil {
return "", nil, false, err
}
@@ -511,35 +517,35 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
// but unlikely to be correct in this context.
return "", nil, false, fmt.Errorf("9P mount requires a connection FD")
}
- data = p9MountData(m.fd, c.getMountAccessType(conf, m.Mount), true /* vfs2 */)
- iopts = gofer.InternalFilesystemOptions{
- UniqueID: m.Destination,
+ data = p9MountData(m.fd, c.getMountAccessType(conf, m.mount), true /* vfs2 */)
+ internalData = gofer.InternalFilesystemOptions{
+ UniqueID: m.mount.Destination,
}
// If configured, add overlay to all writable mounts.
- useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
+ useOverlay = conf.Overlay && !mountFlags(m.mount.Options).ReadOnly
case cgroupfs.Name:
var err error
- data, err = parseAndFilterOptions(m.Options, cgroupfs.SupportedMountOptions...)
+ data, err = parseAndFilterOptions(m.mount.Options, cgroupfs.SupportedMountOptions...)
if err != nil {
return "", nil, false, err
}
default:
- log.Warningf("ignoring unknown filesystem type %q", m.Type)
+ log.Warningf("ignoring unknown filesystem type %q", m.mount.Type)
return "", nil, false, nil
}
opts := &vfs.MountOptions{
GetFilesystemOptions: vfs.GetFilesystemOptions{
Data: strings.Join(data, ","),
- InternalData: iopts,
+ InternalData: internalData,
},
InternalMount: true,
}
- for _, o := range m.Options {
+ for _, o := range m.mount.Options {
switch o {
case "rw":
opts.ReadOnly = false
@@ -549,13 +555,15 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
opts.Flags.NoATime = true
case "noexec":
opts.Flags.NoExec = true
+ case "bind", "rbind":
+ // These are the same as a mount with type="bind".
default:
log.Warningf("ignoring unknown mount option %q", o)
}
}
if verityRequested {
- verityData = verityData + "root_name=" + path.Base(m.Mount.Destination)
+ verityData = verityData + "root_name=" + path.Base(m.mount.Destination)
verityOpts.LowerName = fsName
verityOpts.LowerGetFSOptions = opts.GetFilesystemOptions
fsName = verity.Name
@@ -649,7 +657,6 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config
Start: root,
Path: fspath.Parse("/tmp"),
}
- // TODO(gvisor.dev/issue/2782): Use O_PATH when available.
fd, err := c.k.VFS().OpenAt(ctx, creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY})
switch err {
case nil:
@@ -684,7 +691,7 @@ func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config
// another user. This is normally done for /tmp.
Options: []string{"mode=01777"},
}
- _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount})
+ _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{mount: &tmpMount})
return err
case syserror.ENOTDIR:
@@ -723,7 +730,7 @@ func (c *containerMounter) processHintsVFS2(conf *config.Config, creds *auth.Cre
func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *config.Config, hint *mountHint, creds *auth.Credentials) (*vfs.Mount, error) {
// Map mount type to filesystem name, and parse out the options that we are
// capable of dealing with.
- mntFD := &mountAndFD{Mount: hint.mount}
+ mntFD := &mountAndFD{mount: &hint.mount}
fsName, opts, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, mntFD)
if err != nil {
return nil, err
@@ -733,11 +740,11 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *conf
}
if useOverlay {
- log.Infof("Adding overlay on top of shared mount %q", mntFD.Destination)
+ log.Infof("Adding overlay on top of shared mount %q", mntFD.mount.Destination)
var cleanup func()
opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName)
if err != nil {
- return nil, fmt.Errorf("mounting shared volume with overlay at %q: %w", mntFD.Destination, err)
+ return nil, fmt.Errorf("mounting shared volume with overlay at %q: %w", mntFD.mount.Destination, err)
}
defer cleanup()
fsName = overlay.Name
@@ -748,14 +755,14 @@ func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *conf
// mountSharedSubmount binds mount to a previously mounted volume that is shared
// among containers in the same pod.
-func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount specs.Mount, source *mountHint) (*vfs.Mount, error) {
+func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount *specs.Mount, source *mountHint) (*vfs.Mount, error) {
if err := source.checkCompatible(mount); err != nil {
return nil, err
}
// Ignore data and useOverlay because these were already applied to
// the master mount.
- _, opts, _, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{Mount: mount})
+ _, opts, _, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{mount: mount})
if err != nil {
return nil, err
}
@@ -808,7 +815,7 @@ func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Crede
// configureRestore returns an updated context.Context including filesystem
// state used by restore defined by conf.
-func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Config) (context.Context, error) {
+func (c *containerMounter) configureRestore(ctx context.Context) (context.Context, error) {
fdmap := make(map[string]int)
fdmap["/"] = c.fds.remove()
mounts, err := c.prepareMountsVFS2()
@@ -818,7 +825,7 @@ func (c *containerMounter) configureRestore(ctx context.Context, conf *config.Co
for i := range c.mounts {
submount := &mounts[i]
if submount.fd >= 0 {
- fdmap[submount.Destination] = submount.fd
+ fdmap[submount.mount.Destination] = submount.fd
}
}
return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil
diff --git a/runsc/cgroup/cgroup.go b/runsc/cgroup/cgroup.go
index 438b7ef3e..66a6a0f68 100644
--- a/runsc/cgroup/cgroup.go
+++ b/runsc/cgroup/cgroup.go
@@ -40,23 +40,24 @@ const (
cgroupRoot = "/sys/fs/cgroup"
)
-var controllers = map[string]config{
- "blkio": {ctrlr: &blockIO{}},
- "cpu": {ctrlr: &cpu{}},
- "cpuset": {ctrlr: &cpuSet{}},
- "hugetlb": {ctrlr: &hugeTLB{}, optional: true},
- "memory": {ctrlr: &memory{}},
- "net_cls": {ctrlr: &networkClass{}},
- "net_prio": {ctrlr: &networkPrio{}},
- "pids": {ctrlr: &pids{}},
+var controllers = map[string]controller{
+ "blkio": &blockIO{},
+ "cpu": &cpu{},
+ "cpuset": &cpuSet{},
+ "hugetlb": &hugeTLB{},
+ "memory": &memory{},
+ "net_cls": &networkClass{},
+ "net_prio": &networkPrio{},
+ "pids": &pids{},
// These controllers either don't have anything in the OCI spec or is
// irrelevant for a sandbox.
- "devices": {ctrlr: &noop{}},
- "freezer": {ctrlr: &noop{}},
- "perf_event": {ctrlr: &noop{}},
- "rdma": {ctrlr: &noop{}, optional: true},
- "systemd": {ctrlr: &noop{}},
+ "cpuacct": &noop{},
+ "devices": &noop{},
+ "freezer": &noop{},
+ "perf_event": &noop{},
+ "rdma": &noop{isOptional: true},
+ "systemd": &noop{},
}
// IsOnlyV2 checks whether cgroups V2 is enabled and V1 is not.
@@ -201,31 +202,26 @@ func countCpuset(cpuset string) (int, error) {
return count, nil
}
-// LoadPaths loads cgroup paths for given 'pid', may be set to 'self'.
-func LoadPaths(pid string) (map[string]string, error) {
- f, err := os.Open(filepath.Join("/proc", pid, "cgroup"))
+// loadPaths loads cgroup paths for given 'pid', may be set to 'self'.
+func loadPaths(pid string) (map[string]string, error) {
+ procCgroup, err := os.Open(filepath.Join("/proc", pid, "cgroup"))
if err != nil {
return nil, err
}
- defer f.Close()
+ defer procCgroup.Close()
- return loadPathsHelper(f)
-}
-
-func loadPathsHelper(cgroup io.Reader) (map[string]string, error) {
- // For nested containers, in /proc/self/cgroup we see paths from host,
- // which don't exist in container, so recover the container paths here by
- // double-checking with /proc/pid/mountinfo
- mountinfo, err := os.Open("/proc/self/mountinfo")
+ // Load mountinfo for the current process, because it's where cgroups is
+ // being accessed from.
+ mountinfo, err := os.Open(filepath.Join("/proc/self/mountinfo"))
if err != nil {
return nil, err
}
defer mountinfo.Close()
- return loadPathsHelperWithMountinfo(cgroup, mountinfo)
+ return loadPathsHelper(procCgroup, mountinfo)
}
-func loadPathsHelperWithMountinfo(cgroup, mountinfo io.Reader) (map[string]string, error) {
+func loadPathsHelper(cgroup, mountinfo io.Reader) (map[string]string, error) {
paths := make(map[string]string)
scanner := bufio.NewScanner(cgroup)
@@ -242,34 +238,51 @@ func loadPathsHelperWithMountinfo(cgroup, mountinfo io.Reader) (map[string]strin
for _, ctrlr := range strings.Split(tokens[1], ",") {
// Remove prefix for cgroups with no controller, eg. systemd.
ctrlr = strings.TrimPrefix(ctrlr, "name=")
- paths[ctrlr] = tokens[2]
+ // Discard unknown controllers.
+ if _, ok := controllers[ctrlr]; ok {
+ paths[ctrlr] = tokens[2]
+ }
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
- mfScanner := bufio.NewScanner(mountinfo)
- for mfScanner.Scan() {
- txt := mfScanner.Text()
- fields := strings.Fields(txt)
+ // For nested containers, in /proc/[pid]/cgroup we see paths from host,
+ // which don't exist in container, so recover the container paths here by
+ // double-checking with /proc/[pid]/mountinfo
+ mountScanner := bufio.NewScanner(mountinfo)
+ for mountScanner.Scan() {
+ // Format: ID parent major:minor root mount-point options opt-fields - fs-type source super-options
+ // Example: 39 32 0:34 / /sys/fs/cgroup/devices rw,noexec shared:18 - cgroup cgroup rw,devices
+ fields := strings.Fields(mountScanner.Text())
if len(fields) < 9 || fields[len(fields)-3] != "cgroup" {
+ // Skip mounts that are not cgroup mounts.
continue
}
- for _, opt := range strings.Split(fields[len(fields)-1], ",") {
+ // Cgroup controller type is in the super-options field.
+ superOptions := strings.Split(fields[len(fields)-1], ",")
+ for _, opt := range superOptions {
// Remove prefix for cgroups with no controller, eg. systemd.
opt = strings.TrimPrefix(opt, "name=")
+
+ // Only considers cgroup controllers that are registered, and skip other
+ // irrelevant options, e.g. rw.
if cgroupPath, ok := paths[opt]; ok {
- root := fields[3]
- relCgroupPath, err := filepath.Rel(root, cgroupPath)
- if err != nil {
- return nil, err
+ rootDir := fields[3]
+ if rootDir != "/" {
+ // When cgroup is in submount, remove repeated path components from
+ // cgroup path to avoid duplicating them.
+ relCgroupPath, err := filepath.Rel(rootDir, cgroupPath)
+ if err != nil {
+ return nil, err
+ }
+ paths[opt] = relCgroupPath
}
- paths[opt] = relCgroupPath
}
}
}
- if err := mfScanner.Err(); err != nil {
+ if err := mountScanner.Err(); err != nil {
return nil, err
}
@@ -279,70 +292,95 @@ func loadPathsHelperWithMountinfo(cgroup, mountinfo io.Reader) (map[string]strin
// Cgroup represents a group inside all controllers. For example:
// Name='/foo/bar' maps to /sys/fs/cgroup/<controller>/foo/bar on
// all controllers.
+//
+// If Name is relative, it uses the parent cgroup path to determine the
+// location. For example:
+// Name='foo/bar' and Parent[ctrl]="/user.slice", then it will map to
+// /sys/fs/cgroup/<ctrl>/user.slice/foo/bar
type Cgroup struct {
Name string `json:"name"`
Parents map[string]string `json:"parents"`
Own map[string]bool `json:"own"`
}
-// New creates a new Cgroup instance if the spec includes a cgroup path.
-// Returns nil otherwise.
-func New(spec *specs.Spec) (*Cgroup, error) {
+// NewFromSpec creates a new Cgroup instance if the spec includes a cgroup path.
+// Returns nil otherwise. Cgroup paths are loaded based on the current process.
+func NewFromSpec(spec *specs.Spec) (*Cgroup, error) {
if spec.Linux == nil || spec.Linux.CgroupsPath == "" {
return nil, nil
}
- return NewFromPath(spec.Linux.CgroupsPath)
+ return new("self", spec.Linux.CgroupsPath)
}
-// NewFromPath creates a new Cgroup instance.
-func NewFromPath(cgroupsPath string) (*Cgroup, error) {
+// NewFromPid loads cgroup for the given process.
+func NewFromPid(pid int) (*Cgroup, error) {
+ return new(strconv.Itoa(pid), "")
+}
+
+func new(pid, cgroupsPath string) (*Cgroup, error) {
var parents map[string]string
+
+ // If path is relative, load cgroup paths for the process to build the
+ // relative paths.
if !filepath.IsAbs(cgroupsPath) {
var err error
- parents, err = LoadPaths("self")
+ parents, err = loadPaths(pid)
if err != nil {
return nil, fmt.Errorf("finding current cgroups: %w", err)
}
}
- own := make(map[string]bool)
- return &Cgroup{
+ cg := &Cgroup{
Name: cgroupsPath,
Parents: parents,
- Own: own,
- }, nil
+ Own: make(map[string]bool),
+ }
+ log.Debugf("New cgroup for pid: %s, %+v", pid, cg)
+ return cg, nil
}
// Install creates and configures cgroups according to 'res'. If cgroup path
// already exists, it means that the caller has already provided a
// pre-configured cgroups, and 'res' is ignored.
func (c *Cgroup) Install(res *specs.LinuxResources) error {
- log.Debugf("Creating cgroup %q", c.Name)
+ log.Debugf("Installing cgroup path %q", c.Name)
- // The Cleanup object cleans up partially created cgroups when an error occurs.
- // Errors occuring during cleanup itself are ignored.
+ // Clean up partially created cgroups on error. Errors during cleanup itself
+ // are ignored.
clean := cleanup.Make(func() { _ = c.Uninstall() })
defer clean.Clean()
- for key, cfg := range controllers {
- path := c.makePath(key)
- if _, err := os.Stat(path); err == nil {
- // If cgroup has already been created; it has been setup by caller. Don't
- // make any changes to configuration, just join when sandbox/gofer starts.
- log.Debugf("Using pre-created cgroup %q", path)
- continue
+ // Controllers can be symlinks to a group of controllers (e.g. cpu,cpuacct).
+ // So first check what directories need to be created. Otherwise, when
+ // the directory for one of the controllers in a group is created, it will
+ // make it seem like the directory already existed and it's not owned by the
+ // other controllers in the group.
+ var missing []string
+ for key := range controllers {
+ path := c.MakePath(key)
+ if _, err := os.Stat(path); err != nil {
+ missing = append(missing, key)
+ } else {
+ log.Debugf("Using pre-created cgroup %q: %q", key, path)
}
-
- // Mark that cgroup resources are owned by me.
- c.Own[key] = true
-
+ }
+ for _, key := range missing {
+ ctrlr := controllers[key]
+ path := c.MakePath(key)
+ log.Debugf("Creating cgroup %q: %q", key, path)
if err := os.MkdirAll(path, 0755); err != nil {
- if cfg.optional && errors.Is(err, unix.EROFS) {
+ if ctrlr.optional() && errors.Is(err, unix.EROFS) {
+ if err := ctrlr.skip(res); err != nil {
+ return err
+ }
log.Infof("Skipping cgroup %q", key)
continue
}
return err
}
- if err := cfg.ctrlr.set(res, path); err != nil {
+
+ // Only set controllers that were created by me.
+ c.Own[key] = true
+ if err := ctrlr.set(res, path); err != nil {
return err
}
}
@@ -359,7 +397,7 @@ func (c *Cgroup) Uninstall() error {
// cgroup is managed by caller, don't touch it.
continue
}
- path := c.makePath(key)
+ path := c.MakePath(key)
log.Debugf("Removing cgroup controller for key=%q path=%q", key, path)
// If we try to remove the cgroup too soon after killing the
@@ -387,7 +425,7 @@ func (c *Cgroup) Uninstall() error {
func (c *Cgroup) Join() (func(), error) {
// First save the current state so it can be restored.
undo := func() {}
- paths, err := LoadPaths("self")
+ paths, err := loadPaths("self")
if err != nil {
return undo, err
}
@@ -414,14 +452,13 @@ func (c *Cgroup) Join() (func(), error) {
}
// Now join the cgroups.
- for key, cfg := range controllers {
- path := c.makePath(key)
+ for key, ctrlr := range controllers {
+ path := c.MakePath(key)
log.Debugf("Joining cgroup %q", path)
- // Writing the value 0 to a cgroup.procs file causes the
- // writing process to be moved to the corresponding cgroup.
- // - cgroups(7).
+ // Writing the value 0 to a cgroup.procs file causes the writing process to
+ // be moved to the corresponding cgroup - cgroups(7).
if err := setValue(path, "cgroup.procs", "0"); err != nil {
- if cfg.optional && os.IsNotExist(err) {
+ if ctrlr.optional() && os.IsNotExist(err) {
continue
}
return undo, err
@@ -432,7 +469,7 @@ func (c *Cgroup) Join() (func(), error) {
// CPUQuota returns the CFS CPU quota.
func (c *Cgroup) CPUQuota() (float64, error) {
- path := c.makePath("cpu")
+ path := c.MakePath("cpu")
quota, err := getInt(path, "cpu.cfs_quota_us")
if err != nil {
return -1, err
@@ -449,7 +486,7 @@ func (c *Cgroup) CPUQuota() (float64, error) {
// CPUUsage returns the total CPU usage of the cgroup.
func (c *Cgroup) CPUUsage() (uint64, error) {
- path := c.makePath("cpuacct")
+ path := c.MakePath("cpuacct")
usage, err := getValue(path, "cpuacct.usage")
if err != nil {
return 0, err
@@ -459,7 +496,7 @@ func (c *Cgroup) CPUUsage() (uint64, error) {
// NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'.
func (c *Cgroup) NumCPU() (int, error) {
- path := c.makePath("cpuset")
+ path := c.MakePath("cpuset")
cpuset, err := getValue(path, "cpuset.cpus")
if err != nil {
return 0, err
@@ -469,7 +506,7 @@ func (c *Cgroup) NumCPU() (int, error) {
// MemoryLimit returns the memory limit.
func (c *Cgroup) MemoryLimit() (uint64, error) {
- path := c.makePath("memory")
+ path := c.MakePath("memory")
limStr, err := getValue(path, "memory.limit_in_bytes")
if err != nil {
return 0, err
@@ -477,7 +514,8 @@ func (c *Cgroup) MemoryLimit() (uint64, error) {
return strconv.ParseUint(strings.TrimSpace(limStr), 10, 64)
}
-func (c *Cgroup) makePath(controllerName string) string {
+// MakePath builds a path to the given controller.
+func (c *Cgroup) MakePath(controllerName string) string {
path := c.Name
if parent, ok := c.Parents[controllerName]; ok {
path = filepath.Join(parent, c.Name)
@@ -485,22 +523,48 @@ func (c *Cgroup) makePath(controllerName string) string {
return filepath.Join(cgroupRoot, controllerName, path)
}
-type config struct {
- ctrlr controller
- optional bool
-}
-
type controller interface {
+ // optional controllers don't fail if not found.
+ optional() bool
+ // set applies resource limits to controller.
set(*specs.LinuxResources, string) error
+ // skip is called when controller is not found to check if it can be safely
+ // skipped or not based on the spec.
+ skip(*specs.LinuxResources) error
}
-type noop struct{}
+type noop struct {
+ isOptional bool
+}
+
+func (n *noop) optional() bool {
+ return n.isOptional
+}
func (*noop) set(*specs.LinuxResources, string) error {
return nil
}
-type memory struct{}
+func (n *noop) skip(*specs.LinuxResources) error {
+ if !n.isOptional {
+ panic("cgroup controller is not optional")
+ }
+ return nil
+}
+
+type mandatory struct{}
+
+func (*mandatory) optional() bool {
+ return false
+}
+
+func (*mandatory) skip(*specs.LinuxResources) error {
+ panic("cgroup controller is not optional")
+}
+
+type memory struct {
+ mandatory
+}
func (*memory) set(spec *specs.LinuxResources, path string) error {
if spec == nil || spec.Memory == nil {
@@ -533,7 +597,9 @@ func (*memory) set(spec *specs.LinuxResources, path string) error {
return nil
}
-type cpu struct{}
+type cpu struct {
+ mandatory
+}
func (*cpu) set(spec *specs.LinuxResources, path string) error {
if spec == nil || spec.CPU == nil {
@@ -554,7 +620,9 @@ func (*cpu) set(spec *specs.LinuxResources, path string) error {
return setOptionalValueInt(path, "cpu.rt_runtime_us", spec.CPU.RealtimeRuntime)
}
-type cpuSet struct{}
+type cpuSet struct {
+ mandatory
+}
func (*cpuSet) set(spec *specs.LinuxResources, path string) error {
// cpuset.cpus and mems are required fields, but are not set on a new cgroup.
@@ -576,7 +644,9 @@ func (*cpuSet) set(spec *specs.LinuxResources, path string) error {
return setValue(path, "cpuset.mems", spec.CPU.Mems)
}
-type blockIO struct{}
+type blockIO struct {
+ mandatory
+}
func (*blockIO) set(spec *specs.LinuxResources, path string) error {
if spec == nil || spec.BlockIO == nil {
@@ -628,6 +698,10 @@ func setThrottle(path, name string, devs []specs.LinuxThrottleDevice) error {
type networkClass struct{}
+func (*networkClass) optional() bool {
+ return true
+}
+
func (*networkClass) set(spec *specs.LinuxResources, path string) error {
if spec == nil || spec.Network == nil {
return nil
@@ -635,8 +709,19 @@ func (*networkClass) set(spec *specs.LinuxResources, path string) error {
return setOptionalValueUint32(path, "net_cls.classid", spec.Network.ClassID)
}
+func (*networkClass) skip(spec *specs.LinuxResources) error {
+ if spec != nil && spec.Network != nil && spec.Network.ClassID != nil {
+ return fmt.Errorf("Network.ClassID set but net_cls cgroup controller not found")
+ }
+ return nil
+}
+
type networkPrio struct{}
+func (*networkPrio) optional() bool {
+ return true
+}
+
func (*networkPrio) set(spec *specs.LinuxResources, path string) error {
if spec == nil || spec.Network == nil {
return nil
@@ -650,7 +735,16 @@ func (*networkPrio) set(spec *specs.LinuxResources, path string) error {
return nil
}
-type pids struct{}
+func (*networkPrio) skip(spec *specs.LinuxResources) error {
+ if spec != nil && spec.Network != nil && len(spec.Network.Priorities) > 0 {
+ return fmt.Errorf("Network.Priorities set but net_prio cgroup controller not found")
+ }
+ return nil
+}
+
+type pids struct {
+ mandatory
+}
func (*pids) set(spec *specs.LinuxResources, path string) error {
if spec == nil || spec.Pids == nil || spec.Pids.Limit <= 0 {
@@ -662,6 +756,17 @@ func (*pids) set(spec *specs.LinuxResources, path string) error {
type hugeTLB struct{}
+func (*hugeTLB) optional() bool {
+ return true
+}
+
+func (*hugeTLB) skip(spec *specs.LinuxResources) error {
+ if spec != nil && len(spec.HugepageLimits) > 0 {
+ return fmt.Errorf("HugepageLimits set but hugetlb cgroup controller not found")
+ }
+ return nil
+}
+
func (*hugeTLB) set(spec *specs.LinuxResources, path string) error {
if spec == nil {
return nil
diff --git a/runsc/cgroup/cgroup_test.go b/runsc/cgroup/cgroup_test.go
index 48d71cfa6..eba40621e 100644
--- a/runsc/cgroup/cgroup_test.go
+++ b/runsc/cgroup/cgroup_test.go
@@ -43,27 +43,27 @@ var debianMountinfo = `
`
var dindMountinfo = `
-1305 1304 0:64 / /sys/fs/cgroup rw - tmpfs tmpfs rw,mode=755
-1306 1305 0:32 /docker/136 /sys/fs/cgroup/systemd ro master:11 - cgroup cgroup rw,xattr,name=systemd
-1307 1305 0:36 /docker/136 /sys/fs/cgroup/cpu,cpuacct ro master:16 - cgroup cgroup rw,cpu,cpuacct
-1308 1305 0:37 /docker/136 /sys/fs/cgroup/freezer ro master:17 - cgroup cgroup rw,freezer
-1309 1305 0:38 /docker/136 /sys/fs/cgroup/hugetlb ro master:18 - cgroup cgroup rw,hugetlb
-1310 1305 0:39 /docker/136 /sys/fs/cgroup/cpuset ro master:19 - cgroup cgroup rw,cpuset
-1311 1305 0:40 /docker/136 /sys/fs/cgroup/net_cls,net_prio ro master:20 - cgroup cgroup rw,net_cls,net_prio
-1312 1305 0:41 /docker/136 /sys/fs/cgroup/pids ro master:21 - cgroup cgroup rw,pids
-1313 1305 0:42 /docker/136 /sys/fs/cgroup/perf_event ro master:22 - cgroup cgroup rw,perf_event
-1314 1305 0:43 /docker/136 /sys/fs/cgroup/memory ro master:23 - cgroup cgroup rw,memory
-1316 1305 0:44 /docker/136 /sys/fs/cgroup/blkio ro master:24 - cgroup cgroup rw,blkio
-1317 1305 0:45 /docker/136 /sys/fs/cgroup/devices ro master:25 - cgroup cgroup rw,devices
-1318 1305 0:46 / /sys/fs/cgroup/rdma ro master:26 - cgroup cgroup rw,rdma
+05 04 0:64 / /sys/fs/cgroup rw - tmpfs tmpfs rw,mode=755
+06 05 0:32 /docker/136 /sys/fs/cgroup/systemd ro master:11 - cgroup cgroup rw,xattr,name=systemd
+07 05 0:36 /docker/136 /sys/fs/cgroup/cpu,cpuacct ro master:16 - cgroup cgroup rw,cpu,cpuacct
+08 05 0:37 /docker/136 /sys/fs/cgroup/freezer ro master:17 - cgroup cgroup rw,freezer
+09 05 0:38 /docker/136 /sys/fs/cgroup/hugetlb ro master:18 - cgroup cgroup rw,hugetlb
+10 05 0:39 /docker/136 /sys/fs/cgroup/cpuset ro master:19 - cgroup cgroup rw,cpuset
+11 05 0:40 /docker/136 /sys/fs/cgroup/net_cls,net_prio ro master:20 - cgroup cgroup rw,net_cls,net_prio
+12 05 0:41 /docker/136 /sys/fs/cgroup/pids ro master:21 - cgroup cgroup rw,pids
+13 05 0:42 /docker/136 /sys/fs/cgroup/perf_event ro master:22 - cgroup cgroup rw,perf_event
+14 05 0:43 /docker/136 /sys/fs/cgroup/memory ro master:23 - cgroup cgroup rw,memory
+16 05 0:44 /docker/136 /sys/fs/cgroup/blkio ro master:24 - cgroup cgroup rw,blkio
+17 05 0:45 /docker/136 /sys/fs/cgroup/devices ro master:25 - cgroup cgroup rw,devices
+18 05 0:46 / /sys/fs/cgroup/rdma ro master:26 - cgroup cgroup rw,rdma
`
func TestUninstallEnoent(t *testing.T) {
c := Cgroup{
- // set a non-existent name
+ // Use a non-existent name.
Name: "runsc-test-uninstall-656e6f656e740a",
+ Own: make(map[string]bool),
}
- c.Own = make(map[string]bool)
for key := range controllers {
c.Own[key] = true
}
@@ -693,36 +693,42 @@ func TestLoadPaths(t *testing.T) {
err string
}{
{
- name: "abs-path-unknown-controller",
- cgroups: "0:ctr:/path",
+ name: "empty",
mountinfo: debianMountinfo,
- want: map[string]string{"ctr": "/path"},
+ },
+ {
+ name: "abs-path",
+ cgroups: "0:cpu:/path",
+ mountinfo: debianMountinfo,
+ want: map[string]string{"cpu": "/path"},
},
{
name: "rel-path",
- cgroups: "0:ctr:rel-path",
+ cgroups: "0:cpu:rel-path",
mountinfo: debianMountinfo,
- want: map[string]string{"ctr": "rel-path"},
+ want: map[string]string{"cpu": "rel-path"},
},
{
name: "non-controller",
cgroups: "0:name=systemd:/path",
mountinfo: debianMountinfo,
- want: map[string]string{"systemd": "path"},
+ want: map[string]string{"systemd": "/path"},
},
{
- name: "empty",
+ name: "unknown-controller",
+ cgroups: "0:ctr:/path",
mountinfo: debianMountinfo,
+ want: map[string]string{},
},
{
name: "multiple",
- cgroups: "0:ctr0:/path0\n" +
- "1:ctr1:/path1\n" +
+ cgroups: "0:cpu:/path0\n" +
+ "1:memory:/path1\n" +
"2::/empty\n",
mountinfo: debianMountinfo,
want: map[string]string{
- "ctr0": "/path0",
- "ctr1": "/path1",
+ "cpu": "/path0",
+ "memory": "/path1",
},
},
{
@@ -747,10 +753,10 @@ func TestLoadPaths(t *testing.T) {
},
{
name: "nested-cgroup",
- cgroups: `9:memory:/docker/136
-2:cpu,cpuacct:/docker/136
-1:name=systemd:/docker/136
-0::/system.slice/containerd.service`,
+ cgroups: "9:memory:/docker/136\n" +
+ "2:cpu,cpuacct:/docker/136\n" +
+ "1:name=systemd:/docker/136\n" +
+ "0::/system.slice/containerd.service\n",
mountinfo: dindMountinfo,
// we want relative path to /sys/fs/cgroup inside the nested container.
// Subcroup inside the container will be created at /sys/fs/cgroup/cpu
@@ -781,15 +787,15 @@ func TestLoadPaths(t *testing.T) {
},
{
name: "invalid-rel-path-in-proc-cgroup",
- cgroups: "9:memory:./invalid",
+ cgroups: "9:memory:invalid",
mountinfo: dindMountinfo,
- err: "can't make ./invalid relative to /docker/136",
+ err: "can't make invalid relative to /docker/136",
},
} {
t.Run(tc.name, func(t *testing.T) {
r := strings.NewReader(tc.cgroups)
mountinfo := strings.NewReader(tc.mountinfo)
- got, err := loadPathsHelperWithMountinfo(r, mountinfo)
+ got, err := loadPathsHelper(r, mountinfo)
if len(tc.err) == 0 {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
@@ -813,3 +819,47 @@ func TestLoadPaths(t *testing.T) {
})
}
}
+
+func TestOptional(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ ctrlr controller
+ spec *specs.LinuxResources
+ err string
+ }{
+ {
+ name: "net-cls",
+ ctrlr: &networkClass{},
+ spec: &specs.LinuxResources{Network: &specs.LinuxNetwork{ClassID: uint32Ptr(1)}},
+ err: "Network.ClassID set but net_cls cgroup controller not found",
+ },
+ {
+ name: "net-prio",
+ ctrlr: &networkPrio{},
+ spec: &specs.LinuxResources{Network: &specs.LinuxNetwork{
+ Priorities: []specs.LinuxInterfacePriority{
+ {Name: "foo", Priority: 1},
+ },
+ }},
+ err: "Network.Priorities set but net_prio cgroup controller not found",
+ },
+ {
+ name: "hugetlb",
+ ctrlr: &hugeTLB{},
+ spec: &specs.LinuxResources{HugepageLimits: []specs.LinuxHugepageLimit{
+ {Pagesize: "1", Limit: 2},
+ }},
+ err: "HugepageLimits set but hugetlb cgroup controller not found",
+ },
+ } {
+ t.Run(tc.name, func(t *testing.T) {
+ err := tc.ctrlr.skip(tc.spec)
+ if err == nil {
+ t.Fatalf("ctrlr.skip() didn't fail")
+ }
+ if !strings.Contains(err.Error(), tc.err) {
+ t.Errorf("ctrlr.skip() want: *%s*, got: %q", tc.err, err)
+ }
+ })
+ }
+}
diff --git a/runsc/cmd/mitigate.go b/runsc/cmd/mitigate.go
index d37ab80ba..f4e65adb8 100644
--- a/runsc/cmd/mitigate.go
+++ b/runsc/cmd/mitigate.go
@@ -18,6 +18,7 @@ import (
"context"
"fmt"
"io/ioutil"
+ "runtime"
"github.com/google/subcommands"
"gvisor.dev/gvisor/pkg/log"
@@ -72,6 +73,11 @@ func (m *Mitigate) SetFlags(f *flag.FlagSet) {
// Execute implements subcommands.Command.Execute.
func (m *Mitigate) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
+ if runtime.GOARCH == "arm64" || runtime.GOARCH == "arm" {
+ log.Warningf("As ARM is not affected by MDS, mitigate does not support")
+ return subcommands.ExitFailure
+ }
+
if f.NArg() != 0 {
f.Usage()
return subcommands.ExitUsageError
diff --git a/runsc/cmd/mitigate_test.go b/runsc/cmd/mitigate_test.go
index 5a76667e3..2d3fef7c1 100644
--- a/runsc/cmd/mitigate_test.go
+++ b/runsc/cmd/mitigate_test.go
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// +build amd64
+
package cmd
import (
diff --git a/runsc/container/container.go b/runsc/container/container.go
index e72ada311..0820edaec 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -233,7 +233,7 @@ func New(conf *config.Config, args Args) (*Container, error) {
}
// Create and join cgroup before processes are created to ensure they are
// part of the cgroup from the start (and all their children processes).
- cg, err := cgroup.New(args.Spec)
+ cg, err := cgroup.NewFromSpec(args.Spec)
if err != nil {
return nil, err
}
@@ -1132,7 +1132,7 @@ func (c *Container) populateStats(event *boot.EventOut) {
// account for the full cgroup CPU usage. We split cgroup usage
// proportionally according to the sentry-internal usage measurements,
// only counting Running containers.
- log.Warningf("event.ContainerUsage: %v", event.ContainerUsage)
+ log.Debugf("event.ContainerUsage: %v", event.ContainerUsage)
var containerUsage uint64
var allContainersUsage uint64
for ID, usage := range event.ContainerUsage {
@@ -1142,7 +1142,7 @@ func (c *Container) populateStats(event *boot.EventOut) {
}
}
- cgroup, err := c.Sandbox.FindCgroup()
+ cgroup, err := c.Sandbox.NewCGroup()
if err != nil {
// No cgroup, so rely purely on the sentry's accounting.
log.Warningf("events: no cgroups")
@@ -1159,17 +1159,18 @@ func (c *Container) populateStats(event *boot.EventOut) {
return
}
- // If the sentry reports no memory usage, fall back on cgroups and
- // split usage equally across containers.
+ // If the sentry reports no CPU usage, fall back on cgroups and split usage
+ // equally across containers.
if allContainersUsage == 0 {
log.Warningf("events: no sentry CPU usage reported")
allContainersUsage = cgroupsUsage
containerUsage = cgroupsUsage / uint64(len(event.ContainerUsage))
}
- log.Warningf("%f, %f, %f", containerUsage, cgroupsUsage, allContainersUsage)
// Scaling can easily overflow a uint64 (e.g. a containerUsage and
// cgroupsUsage of 16 seconds each will overflow), so use floats.
- event.Event.Data.CPU.Usage.Total = uint64(float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage)))
+ total := float64(containerUsage) * (float64(cgroupsUsage) / float64(allContainersUsage))
+ log.Debugf("Usage, container: %d, cgroups: %d, all: %d, total: %.0f", containerUsage, cgroupsUsage, allContainersUsage, total)
+ event.Event.Data.CPU.Usage.Total = uint64(total)
return
}
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index 5a0c468a4..0e79877b7 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -2449,6 +2449,27 @@ func TestCreateWithCorruptedStateFile(t *testing.T) {
}
}
+func TestBindMountByOption(t *testing.T) {
+ for name, conf := range configs(t, all...) {
+ t.Run(name, func(t *testing.T) {
+ dir, err := ioutil.TempDir(testutil.TmpDir(), "bind-mount")
+ spec := testutil.NewSpecWithArgs("/bin/touch", path.Join(dir, "file"))
+ if err != nil {
+ t.Fatalf("ioutil.TempDir(): %v", err)
+ }
+ spec.Mounts = append(spec.Mounts, specs.Mount{
+ Destination: dir,
+ Source: dir,
+ Type: "none",
+ Options: []string{"rw", "bind"},
+ })
+ if err := run(spec, conf); err != nil {
+ t.Fatalf("error running sandbox: %v", err)
+ }
+ })
+ }
+}
+
func execute(cont *Container, name string, arg ...string) (unix.WaitStatus, error) {
args := &control.ExecArgs{
Filename: name,
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 0f0a223ce..0dbe1e323 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -25,6 +25,7 @@ import (
"testing"
"time"
+ "github.com/cenkalti/backoff"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/cleanup"
@@ -1510,7 +1511,7 @@ func TestMultiContainerSharedMountUnsupportedOptions(t *testing.T) {
Destination: "/mydir/test",
Source: "/some/dir",
Type: "tmpfs",
- Options: []string{"rw", "rbind", "relatime"},
+ Options: []string{"rw", "relatime"},
}
podSpec[0].Mounts = append(podSpec[0].Mounts, mnt0)
@@ -1917,9 +1918,9 @@ func TestMultiContainerEvent(t *testing.T) {
}
defer cleanup()
- for _, cont := range containers {
- t.Logf("Running containerd %s", cont.ID)
- }
+ t.Logf("Running container sleep %s", containers[0].ID)
+ t.Logf("Running container busy %s", containers[1].ID)
+ t.Logf("Running container quick %s", containers[2].ID)
// Wait for last container to stabilize the process count that is
// checked further below.
@@ -1940,50 +1941,61 @@ func TestMultiContainerEvent(t *testing.T) {
}
// Check events for running containers.
- var prevUsage uint64
for _, cont := range containers[:2] {
ret, err := cont.Event()
if err != nil {
- t.Errorf("Container.Events(): %v", err)
+ t.Errorf("Container.Event(%q): %v", cont.ID, err)
}
evt := ret.Event
if want := "stats"; evt.Type != want {
- t.Errorf("Wrong event type, want: %s, got: %s", want, evt.Type)
+ t.Errorf("Wrong event type, cid: %q, want: %s, got: %s", cont.ID, want, evt.Type)
}
if cont.ID != evt.ID {
t.Errorf("Wrong container ID, want: %s, got: %s", cont.ID, evt.ID)
}
// One process per remaining container.
if got, want := evt.Data.Pids.Current, uint64(2); got != want {
- t.Errorf("Wrong number of PIDs, want: %d, got: %d", want, got)
+ t.Errorf("Wrong number of PIDs, cid: %q, want: %d, got: %d", cont.ID, want, got)
}
- // Both remaining containers should have nonzero usage, and
- // 'busy' should have higher usage than 'sleep'.
- usage := evt.Data.CPU.Usage.Total
- if usage == 0 {
- t.Errorf("Running container should report nonzero CPU usage, but got %d", usage)
+ // The exited container should always have a usage of zero.
+ if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 {
+ t.Errorf("Exited container should report 0 CPU usage, got: %d", exited)
+ }
+ }
+
+ // Check that CPU reported by busy container is higher than sleep.
+ cb := func() error {
+ sleepEvt, err := containers[0].Event()
+ if err != nil {
+ return &backoff.PermanentError{Err: err}
}
- if usage <= prevUsage {
- t.Errorf("Expected container %s to use more than %d ns of CPU, but used %d", cont.ID, prevUsage, usage)
+ sleepUsage := sleepEvt.Event.Data.CPU.Usage.Total
+
+ busyEvt, err := containers[1].Event()
+ if err != nil {
+ return &backoff.PermanentError{Err: err}
}
- t.Logf("Container %s usage: %d", cont.ID, usage)
- prevUsage = usage
+ busyUsage := busyEvt.Event.Data.CPU.Usage.Total
- // The exited container should have a usage of zero.
- if exited := ret.ContainerUsage[containers[2].ID]; exited != 0 {
- t.Errorf("Exited container should report 0 CPU usage, but got %d", exited)
+ if busyUsage <= sleepUsage {
+ t.Logf("Busy container usage lower than sleep (busy: %d, sleep: %d), retrying...", busyUsage, sleepUsage)
+ return fmt.Errorf("Busy container should have higher usage than sleep, busy: %d, sleep: %d", busyUsage, sleepUsage)
}
+ return nil
+ }
+ // Give time for busy container to run and use more CPU than sleep.
+ if err := testutil.Poll(cb, 10*time.Second); err != nil {
+ t.Fatal(err)
}
- // Check that stop and destroyed containers return error.
+ // Check that stopped and destroyed containers return error.
if err := containers[1].Destroy(); err != nil {
t.Fatalf("container.Destroy: %v", err)
}
for _, cont := range containers[1:] {
- _, err := cont.Event()
- if err == nil {
- t.Errorf("Container.Events() should have failed, cid:%s, state: %v", cont.ID, cont.Status)
+ if _, err := cont.Event(); err == nil {
+ t.Errorf("Container.Event() should have failed, cid: %q, state: %v", cont.ID, cont.Status)
}
}
}
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index e04ddda47..3f362b25e 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -21,6 +21,7 @@
package fsgofer
import (
+ "errors"
"fmt"
"io"
"math"
@@ -58,9 +59,6 @@ var verityXattrs = map[string]struct{}{
// join is equivalent to path.Join() but skips path.Clean() which is expensive.
func join(parent, child string) string {
- if child == "." || child == ".." {
- panic(fmt.Sprintf("invalid child path %q", child))
- }
return parent + "/" + child
}
@@ -1226,3 +1224,60 @@ func (l *localFile) checkROMount() error {
}
return nil
}
+
+func (l *localFile) MultiGetAttr(names []string) ([]p9.FullStat, error) {
+ stats := make([]p9.FullStat, 0, len(names))
+
+ if len(names) > 0 && names[0] == "" {
+ qid, valid, attr, err := l.GetAttr(p9.AttrMask{})
+ if err != nil {
+ return nil, err
+ }
+ stats = append(stats, p9.FullStat{
+ QID: qid,
+ Valid: valid,
+ Attr: attr,
+ })
+ names = names[1:]
+ }
+
+ parent := l.file.FD()
+ for _, name := range names {
+ child, err := unix.Openat(parent, name, openFlags|unix.O_PATH, 0)
+ if parent != l.file.FD() {
+ // Parent is no longer needed.
+ _ = unix.Close(parent)
+ parent = -1
+ }
+ if err != nil {
+ if errors.Is(err, unix.ENOENT) {
+ // No pont in continuing any further.
+ break
+ }
+ return nil, err
+ }
+
+ var stat unix.Stat_t
+ if err := unix.Fstat(child, &stat); err != nil {
+ _ = unix.Close(child)
+ return nil, err
+ }
+ valid, attr := l.fillAttr(&stat)
+ stats = append(stats, p9.FullStat{
+ QID: l.attachPoint.makeQID(&stat),
+ Valid: valid,
+ Attr: attr,
+ })
+ if (stat.Mode & unix.S_IFMT) != unix.S_IFDIR {
+ // Doesn't need to continue if entry is not a dir. Including symlinks
+ // that cannot be followed.
+ _ = unix.Close(child)
+ break
+ }
+ parent = child
+ }
+ if parent != -1 && parent != l.file.FD() {
+ _ = unix.Close(parent)
+ }
+ return stats, nil
+}
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index d7e141476..77723827a 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -703,16 +703,6 @@ func TestWalkNotFound(t *testing.T) {
})
}
-func TestWalkPanic(t *testing.T) {
- runCustom(t, []uint32{unix.S_IFDIR}, allConfs, func(t *testing.T, s state) {
- for _, name := range []string{".", ".."} {
- assertPanic(t, func() {
- s.file.Walk([]string{name})
- })
- }
- })
-}
-
func TestWalkDup(t *testing.T) {
runAll(t, func(t *testing.T, s state) {
_, dup, err := s.file.Walk([]string{})
diff --git a/runsc/mitigate/mitigate_test.go b/runsc/mitigate/mitigate_test.go
index 3bf9ef547..890c65f05 100644
--- a/runsc/mitigate/mitigate_test.go
+++ b/runsc/mitigate/mitigate_test.go
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// +build amd64
+
package mitigate
import (
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index f3f60f116..29e202b7d 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -131,8 +131,9 @@ func New(conf *config.Config, args *Args) (*Sandbox, error) {
// The Cleanup object cleans up partially created sandboxes when an error
// occurs. Any errors occurring during cleanup itself are ignored.
c := cleanup.Make(func() {
- err := s.destroy()
- log.Warningf("error destroying sandbox: %v", err)
+ if err := s.destroy(); err != nil {
+ log.Warningf("error destroying sandbox: %v", err)
+ }
})
defer c.Clean()
@@ -310,20 +311,9 @@ func (s *Sandbox) Processes(cid string) ([]*control.Process, error) {
return pl, nil
}
-// FindCgroup returns the sandbox's Cgroup, or an error if it does not have one.
-func (s *Sandbox) FindCgroup() (*cgroup.Cgroup, error) {
- paths, err := cgroup.LoadPaths(strconv.Itoa(s.Pid))
- if err != nil {
- return nil, err
- }
- // runsc places sandboxes in the same cgroup for each controller, so we
- // pick an arbitrary controller here to get the cgroup path.
- const controller = "cpuacct"
- controllerPath, ok := paths[controller]
- if !ok {
- return nil, fmt.Errorf("no %q controller found", controller)
- }
- return cgroup.NewFromPath(controllerPath)
+// NewCGroup returns the sandbox's Cgroup, or an error if it does not have one.
+func (s *Sandbox) NewCGroup() (*cgroup.Cgroup, error) {
+ return cgroup.NewFromPid(s.Pid)
}
// Execute runs the specified command in the container. It returns the PID of
diff --git a/runsc/specutils/seccomp/BUILD b/runsc/specutils/seccomp/BUILD
index e9e647d82..c5f5b863e 100644
--- a/runsc/specutils/seccomp/BUILD
+++ b/runsc/specutils/seccomp/BUILD
@@ -28,8 +28,10 @@ go_test(
srcs = ["seccomp_test.go"],
library = ":seccomp",
deps = [
- "//pkg/binary",
+ "//pkg/abi/linux",
"//pkg/bpf",
+ "//pkg/hostarch",
+ "//pkg/marshal",
"@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
],
diff --git a/runsc/specutils/seccomp/seccomp_test.go b/runsc/specutils/seccomp/seccomp_test.go
index 11a6c8daa..20796bf14 100644
--- a/runsc/specutils/seccomp/seccomp_test.go
+++ b/runsc/specutils/seccomp/seccomp_test.go
@@ -20,20 +20,15 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
+ "gvisor.dev/gvisor/pkg/hostarch"
+ "gvisor.dev/gvisor/pkg/marshal"
)
-type seccompData struct {
- nr uint32
- arch uint32
- instructionPointer uint64
- args [6]uint64
-}
-
-// asInput converts a seccompData to a bpf.Input.
-func asInput(d seccompData) bpf.Input {
- return bpf.InputBytes{binary.Marshal(nil, binary.LittleEndian, d), binary.LittleEndian}
+// asInput converts a linux.SeccompData to a bpf.Input.
+func asInput(d *linux.SeccompData) bpf.Input {
+ return bpf.InputBytes{marshal.Marshal(d), hostarch.ByteOrder}
}
// testInput creates an Input struct with given seccomp input values.
@@ -49,13 +44,13 @@ func testInput(arch uint32, syscallName string, args *[6]uint64) bpf.Input {
args = &argArray
}
- data := seccompData{
- nr: syscallNo,
- arch: arch,
- args: *args,
+ data := linux.SeccompData{
+ Nr: int32(syscallNo),
+ Arch: arch,
+ Args: *args,
}
- return asInput(data)
+ return asInput(&data)
}
// testCase holds a seccomp test case.
@@ -100,7 +95,7 @@ var (
},
// Syscall matches but the arch is AUDIT_ARCH_X86 so the return
// value is the bad arch action.
- input: asInput(seccompData{nr: 183, arch: 0x40000003}), //
+ input: asInput(&linux.SeccompData{Nr: 183, Arch: 0x40000003}), //
expected: uint32(killThreadAction),
},
{
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index e5e66546c..11b476690 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -335,9 +335,27 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth.
// Is9PMount returns true if the given mount can be mounted as an external
// gofer.
func Is9PMount(m specs.Mount, vfs2Enabled bool) bool {
+ MaybeConvertToBindMount(&m)
return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m, vfs2Enabled)
}
+// MaybeConvertToBindMount converts mount type to "bind" in case any of the
+// mount options are either "bind" or "rbind" as required by the OCI spec.
+//
+// "For bind mounts (when options include either bind or rbind), the type is a
+// dummy, often "none" (not listed in /proc/filesystems)."
+func MaybeConvertToBindMount(m *specs.Mount) {
+ if m.Type == "bind" {
+ return
+ }
+ for _, opt := range m.Options {
+ if opt == "bind" || opt == "rbind" {
+ m.Type = "bind"
+ return
+ }
+ }
+}
+
// IsSupportedDevMount returns true if m.Destination does not specify a
// path that is hardcoded by VFS1's implementation of /dev.
func IsSupportedDevMount(m specs.Mount, vfs2Enabled bool) bool {