diff options
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/BUILD | 3 | ||||
-rw-r--r-- | runsc/boot/controller.go | 2 | ||||
-rw-r--r-- | runsc/boot/fs.go | 48 | ||||
-rw-r--r-- | runsc/boot/loader.go | 16 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 17 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 94 |
6 files changed, 162 insertions, 18 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 67307ab3c..a79afbdc4 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -30,6 +30,7 @@ go_library( "//pkg/cleanup", "//pkg/context", "//pkg/control/server", + "//pkg/coverage", "//pkg/cpuid", "//pkg/eventchannel", "//pkg/fd", @@ -57,6 +58,7 @@ go_library( "//pkg/sentry/fs/tmpfs", "//pkg/sentry/fs/tty", "//pkg/sentry/fs/user", + "//pkg/sentry/fsimpl/cgroupfs", "//pkg/sentry/fsimpl/devpts", "//pkg/sentry/fsimpl/devtmpfs", "//pkg/sentry/fsimpl/fuse", @@ -66,6 +68,7 @@ go_library( "//pkg/sentry/fsimpl/proc", "//pkg/sentry/fsimpl/sys", "//pkg/sentry/fsimpl/tmpfs", + "//pkg/sentry/fsimpl/verity", "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel:uncaught_signal_go_proto", diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 1ae76d7d7..05b721b28 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -400,7 +400,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // Set up the restore environment. ctx := k.SupervisorContext() - mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled) + mntr := newContainerMounter(&cm.l.root, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled) if kernel.VFS2Enabled { ctx, err = mntr.configureRestore(ctx, cm.l.root.conf) if err != nil { diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 32adde643..3c0cef6db 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -31,6 +31,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs/gofer" "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/cgroupfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" gofervfs2 "gvisor.dev/gvisor/pkg/sentry/fsimpl/gofer" @@ -103,17 +104,22 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name // compileMounts returns the supported mounts from the mount spec, adding any // mandatory mounts that are required by the OCI specification. -func compileMounts(spec *specs.Spec, vfs2Enabled bool) []specs.Mount { +func compileMounts(spec *specs.Spec, conf *config.Config, vfs2Enabled bool) []specs.Mount { // Keep track of whether proc and sys were mounted. var procMounted, sysMounted, devMounted, devptsMounted bool var mounts []specs.Mount // Mount all submounts from the spec. for _, m := range spec.Mounts { - if !vfs2Enabled && !specutils.IsVFS1SupportedDevMount(m) { + if !specutils.IsSupportedDevMount(m, vfs2Enabled) { log.Warningf("ignoring dev mount at %q", m.Destination) continue } + // Unconditionally drop any cgroupfs mounts. If requested, we'll add our + // own below. + if m.Type == cgroupfs.Name { + continue + } switch filepath.Clean(m.Destination) { case "/proc": procMounted = true @@ -132,6 +138,24 @@ func compileMounts(spec *specs.Spec, vfs2Enabled bool) []specs.Mount { // Mount proc and sys even if the user did not ask for it, as the spec // says we SHOULD. var mandatoryMounts []specs.Mount + + if conf.Cgroupfs { + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: tmpfsvfs2.Name, + Destination: "/sys/fs/cgroup", + }) + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: cgroupfs.Name, + Destination: "/sys/fs/cgroup/memory", + Options: []string{"memory"}, + }) + mandatoryMounts = append(mandatoryMounts, specs.Mount{ + Type: cgroupfs.Name, + Destination: "/sys/fs/cgroup/cpu", + Options: []string{"cpu"}, + }) + } + if !procMounted { mandatoryMounts = append(mandatoryMounts, specs.Mount{ Type: procvfs2.Name, @@ -248,6 +272,10 @@ func isSupportedMountFlag(fstype, opt string) bool { ok, err := parseMountOption(opt, tmpfsAllowedData...) return ok && err == nil } + if fstype == cgroupfs.Name { + ok, err := parseMountOption(opt, cgroupfs.SupportedMountOptions...) + return ok && err == nil + } return false } @@ -572,11 +600,11 @@ type containerMounter struct { hints *podMountHints } -func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints, vfs2Enabled bool) *containerMounter { +func newContainerMounter(info *containerInfo, k *kernel.Kernel, hints *podMountHints, vfs2Enabled bool) *containerMounter { return &containerMounter{ - root: spec.Root, - mounts: compileMounts(spec, vfs2Enabled), - fds: fdDispenser{fds: goferFDs}, + root: info.spec.Root, + mounts: compileMounts(info.spec, info.conf, vfs2Enabled), + fds: fdDispenser{fds: info.goferFDs}, k: k, hints: hints, } @@ -795,7 +823,13 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M opts = p9MountData(fd, c.getMountAccessType(conf, m), conf.VFS2) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly - + case cgroupfs.Name: + fsName = m.Type + var err error + opts, err = parseAndFilterOptions(m.Options, cgroupfs.SupportedMountOptions...) + if err != nil { + return "", nil, false, err + } default: log.Warningf("ignoring unknown filesystem type %q", m.Type) } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 774621970..798c1a7a7 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -29,6 +29,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/bpf" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/coverage" "gvisor.dev/gvisor/pkg/cpuid" "gvisor.dev/gvisor/pkg/fd" "gvisor.dev/gvisor/pkg/log" @@ -491,10 +492,6 @@ func (l *Loader) Destroy() { // save/restore. l.k.Release() - // All sentry-created resources should have been released at this point; - // check for reference leaks. - refsvfs2.DoLeakCheck() - // In the success case, stdioFDs and goferFDs will only contain // released/closed FDs that ownership has been passed over to host FDs and // gofer sessions. Close them here in case of failure. @@ -752,7 +749,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn // Setup the child container file system. l.startGoferMonitor(cid, info.goferFDs) - mntr := newContainerMounter(info.spec, info.goferFDs, l.k, l.mountHints, kernel.VFS2Enabled) + mntr := newContainerMounter(info, l.k, l.mountHints, kernel.VFS2Enabled) if root { if err := mntr.processHints(info.conf, info.procArgs.Credentials); err != nil { return nil, nil, nil, err @@ -1000,6 +997,15 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error { // consider the container exited. ws := l.wait(tg) *waitStatus = ws + + // Check for leaks and write coverage report after the root container has + // exited. This guarantees that the report is written in cases where the + // sandbox is killed by a signal after the ContainerWait request is completed. + if l.root.procArgs.ContainerID == cid { + // All sentry-created resources should have been released at this point. + refsvfs2.DoLeakCheck() + coverage.Report() + } return nil } diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 8b39bc59a..93c476971 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -439,7 +439,13 @@ func TestCreateMountNamespace(t *testing.T) { } defer cleanup() - mntr := newContainerMounter(&tc.spec, []*fd.FD{fd.New(sandEnd)}, nil, &podMountHints{}, false /* vfs2Enabled */) + info := containerInfo{ + conf: conf, + spec: &tc.spec, + goferFDs: []*fd.FD{fd.New(sandEnd)}, + } + + mntr := newContainerMounter(&info, nil, &podMountHints{}, false /* vfs2Enabled */) mns, err := mntr.createMountNamespace(ctx, conf) if err != nil { t.Fatalf("failed to create mount namespace: %v", err) @@ -479,7 +485,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) { defer l.Destroy() defer loaderCleanup() - mntr := newContainerMounter(l.root.spec, l.root.goferFDs, l.k, l.mountHints, true /* vfs2Enabled */) + mntr := newContainerMounter(&l.root, l.k, l.mountHints, true /* vfs2Enabled */) if err := mntr.processHints(l.root.conf, l.root.procArgs.Credentials); err != nil { t.Fatalf("failed process hints: %v", err) } @@ -702,7 +708,12 @@ func TestRestoreEnvironment(t *testing.T) { for _, ioFD := range tc.ioFDs { ioFDs = append(ioFDs, fd.New(ioFD)) } - mntr := newContainerMounter(tc.spec, ioFDs, nil, &podMountHints{}, false /* vfs2Enabled */) + info := containerInfo{ + conf: conf, + spec: tc.spec, + goferFDs: ioFDs, + } + mntr := newContainerMounter(&info, nil, &podMountHints{}, false /* vfs2Enabled */) actualRenv, err := mntr.createRestoreEnvironment(conf) if !tc.errorExpected && err != nil { t.Fatalf("could not create restore environment for test:%s", tc.name) diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 9b3dacf46..7d8fd0483 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -16,6 +16,7 @@ package boot import ( "fmt" + "path" "sort" "strings" @@ -29,6 +30,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/devices/ttydev" "gvisor.dev/gvisor/pkg/sentry/devices/tundev" "gvisor.dev/gvisor/pkg/sentry/fs/user" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/cgroupfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devpts" "gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/fuse" @@ -37,6 +39,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sys" "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/verity" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -50,6 +53,10 @@ func registerFilesystems(k *kernel.Kernel) error { creds := auth.NewRootCredentials(k.RootUserNamespace()) vfsObj := k.VFS() + vfsObj.MustRegisterFilesystemType(cgroupfs.Name, &cgroupfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, // TODO(b/29356795): Users may mount this once the terminals are in a @@ -60,6 +67,10 @@ func registerFilesystems(k *kernel.Kernel) error { AllowUserMount: true, AllowUserList: true, }) + vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + AllowUserList: true, + }) vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, }) @@ -79,9 +90,9 @@ func registerFilesystems(k *kernel.Kernel) error { AllowUserMount: true, AllowUserList: true, }) - vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, + vfsObj.MustRegisterFilesystemType(verity.Name, &verity.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ AllowUserList: true, + AllowUserMount: true, }) // Setup files in devtmpfs. @@ -472,6 +483,12 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo var data []string var iopts interface{} + verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.Options) + if err != nil { + return "", nil, false, err + } + m.Options = remainingMOpts + // Find filesystem name and FS specific data field. switch m.Type { case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: @@ -502,6 +519,13 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly + case cgroupfs.Name: + var err error + data, err = parseAndFilterOptions(m.Options, cgroupfs.SupportedMountOptions...) + if err != nil { + return "", nil, false, err + } + default: log.Warningf("ignoring unknown filesystem type %q", m.Type) return "", nil, false, nil @@ -530,9 +554,75 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo } } + if verityRequested { + verityData = verityData + "root_name=" + path.Base(m.Mount.Destination) + verityOpts.LowerName = fsName + verityOpts.LowerGetFSOptions = opts.GetFilesystemOptions + fsName = verity.Name + opts = &vfs.MountOptions{ + GetFilesystemOptions: vfs.GetFilesystemOptions{ + Data: verityData, + InternalData: verityOpts, + }, + InternalMount: true, + } + } + return fsName, opts, useOverlay, nil } +func parseKeyValue(s string) (string, string, bool) { + tokens := strings.SplitN(s, "=", 2) + if len(tokens) < 2 { + return "", "", false + } + return strings.TrimSpace(tokens[0]), strings.TrimSpace(tokens[1]), true +} + +// parseAndFilterOptions scans the provided mount options for verity-related +// mount options. It returns the parsed set of verity mount options, as well as +// the filtered set of mount options unrelated to verity. +func parseVerityMountOptions(mopts []string) (string, verity.InternalFilesystemOptions, bool, []string, error) { + nonVerity := []string{} + found := false + var rootHash string + verityOpts := verity.InternalFilesystemOptions{ + Action: verity.PanicOnViolation, + } + for _, o := range mopts { + if !strings.HasPrefix(o, "verity.") { + nonVerity = append(nonVerity, o) + continue + } + + k, v, ok := parseKeyValue(o) + if !ok { + return "", verityOpts, found, nonVerity, fmt.Errorf("invalid verity mount option with no value: %q", o) + } + + found = true + switch k { + case "verity.roothash": + rootHash = v + case "verity.action": + switch v { + case "error": + verityOpts.Action = verity.ErrorOnViolation + case "panic": + verityOpts.Action = verity.PanicOnViolation + default: + log.Warningf("Invalid verity action %q", v) + verityOpts.Action = verity.PanicOnViolation + } + default: + return "", verityOpts, found, nonVerity, fmt.Errorf("unknown verity mount option: %q", k) + } + } + verityOpts.AllowRuntimeEnable = len(rootHash) == 0 + verityData := "root_hash=" + rootHash + "," + return verityData, verityOpts, found, nonVerity, nil +} + // mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so. // Technically we don't have to mount tmpfs at /tmp, as we could just rely on // the host /tmp, but this is a nice optimization, and fixes some apps that call |