diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/controller.go | 2 | ||||
-rw-r--r-- | runsc/boot/fs.go | 15 | ||||
-rw-r--r-- | runsc/boot/fs_test.go | 3 | ||||
-rw-r--r-- | runsc/boot/loader.go | 2 | ||||
-rw-r--r-- | runsc/boot/loader_test.go | 6 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 2 | ||||
-rw-r--r-- | runsc/cmd/do.go | 15 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 18 | ||||
-rw-r--r-- | runsc/config/config.go | 21 | ||||
-rw-r--r-- | runsc/config/flags.go | 3 | ||||
-rw-r--r-- | runsc/fsgofer/filter/config.go | 5 | ||||
-rw-r--r-- | runsc/fsgofer/filter/filter.go | 6 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer.go | 23 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer_test.go | 26 | ||||
-rw-r--r-- | runsc/specutils/specutils.go | 11 |
15 files changed, 101 insertions, 57 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index 1cd5fba5c..1ae76d7d7 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -400,7 +400,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error { // Set up the restore environment. ctx := k.SupervisorContext() - mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints) + mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled) if kernel.VFS2Enabled { ctx, err = mntr.configureRestore(ctx, cm.l.root.conf) if err != nil { diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 77f632bb9..32adde643 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -103,14 +103,14 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name // compileMounts returns the supported mounts from the mount spec, adding any // mandatory mounts that are required by the OCI specification. -func compileMounts(spec *specs.Spec) []specs.Mount { +func compileMounts(spec *specs.Spec, vfs2Enabled bool) []specs.Mount { // Keep track of whether proc and sys were mounted. var procMounted, sysMounted, devMounted, devptsMounted bool var mounts []specs.Mount // Mount all submounts from the spec. for _, m := range spec.Mounts { - if !specutils.IsSupportedDevMount(m) { + if !vfs2Enabled && !specutils.IsVFS1SupportedDevMount(m) { log.Warningf("ignoring dev mount at %q", m.Destination) continue } @@ -572,10 +572,10 @@ type containerMounter struct { hints *podMountHints } -func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints) *containerMounter { +func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints, vfs2Enabled bool) *containerMounter { return &containerMounter{ root: spec.Root, - mounts: compileMounts(spec), + mounts: compileMounts(spec, vfs2Enabled), fds: fdDispenser{fds: goferFDs}, k: k, hints: hints, @@ -792,7 +792,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M case bind: fd := c.fds.remove() fsName = gofervfs2.Name - opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2) + opts = p9MountData(fd, c.getMountAccessType(conf, m), conf.VFS2) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -802,12 +802,11 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M return fsName, opts, useOverlay, nil } -func (c *containerMounter) getMountAccessType(mount specs.Mount) config.FileAccessType { +func (c *containerMounter) getMountAccessType(conf *config.Config, mount specs.Mount) config.FileAccessType { if hint := c.hints.findMount(mount); hint != nil { return hint.fileAccessType() } - // Non-root bind mounts are always shared if no hints were provided. - return config.FileAccessShared + return conf.FileAccessMounts } // mountSubmount mounts volumes inside the container's root. Because mounts may diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go index e986231e5..b4f12d034 100644 --- a/runsc/boot/fs_test.go +++ b/runsc/boot/fs_test.go @@ -243,7 +243,8 @@ func TestGetMountAccessType(t *testing.T) { t.Fatalf("newPodMountHints failed: %v", err) } mounter := containerMounter{hints: podHints} - if got := mounter.getMountAccessType(specs.Mount{Source: source}); got != tst.want { + conf := &config.Config{FileAccessMounts: config.FileAccessShared} + if got := mounter.getMountAccessType(conf, specs.Mount{Source: source}); got != tst.want { t.Errorf("getMountAccessType(), want: %v, got: %v", tst.want, got) } }) diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 5afce232d..774621970 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -752,7 +752,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn // Setup the child container file system. l.startGoferMonitor(cid, info.goferFDs) - mntr := newContainerMounter(info.spec, info.goferFDs, l.k, l.mountHints) + mntr := newContainerMounter(info.spec, info.goferFDs, l.k, l.mountHints, kernel.VFS2Enabled) if root { if err := mntr.processHints(info.conf, info.procArgs.Credentials); err != nil { return nil, nil, nil, err diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 3121ca6eb..8b39bc59a 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -439,7 +439,7 @@ func TestCreateMountNamespace(t *testing.T) { } defer cleanup() - mntr := newContainerMounter(&tc.spec, []*fd.FD{fd.New(sandEnd)}, nil, &podMountHints{}) + mntr := newContainerMounter(&tc.spec, []*fd.FD{fd.New(sandEnd)}, nil, &podMountHints{}, false /* vfs2Enabled */) mns, err := mntr.createMountNamespace(ctx, conf) if err != nil { t.Fatalf("failed to create mount namespace: %v", err) @@ -479,7 +479,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) { defer l.Destroy() defer loaderCleanup() - mntr := newContainerMounter(l.root.spec, l.root.goferFDs, l.k, l.mountHints) + mntr := newContainerMounter(l.root.spec, l.root.goferFDs, l.k, l.mountHints, true /* vfs2Enabled */) if err := mntr.processHints(l.root.conf, l.root.procArgs.Credentials); err != nil { t.Fatalf("failed process hints: %v", err) } @@ -702,7 +702,7 @@ func TestRestoreEnvironment(t *testing.T) { for _, ioFD := range tc.ioFDs { ioFDs = append(ioFDs, fd.New(ioFD)) } - mntr := newContainerMounter(tc.spec, ioFDs, nil, &podMountHints{}) + mntr := newContainerMounter(tc.spec, ioFDs, nil, &podMountHints{}, false /* vfs2Enabled */) actualRenv, err := mntr.createRestoreEnvironment(conf) if !tc.errorExpected && err != nil { t.Fatalf("could not create restore environment for test:%s", tc.name) diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index 3fd28e516..9b3dacf46 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -494,7 +494,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo // but unlikely to be correct in this context. return "", nil, false, fmt.Errorf("9P mount requires a connection FD") } - data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) + data = p9MountData(m.fd, c.getMountAccessType(conf, m.Mount), true /* vfs2 */) iopts = gofer.InternalFilesystemOptions{ UniqueID: m.Destination, } diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go index 22c1dfeb8..455c57692 100644 --- a/runsc/cmd/do.go +++ b/runsc/cmd/do.go @@ -42,10 +42,11 @@ var errNoDefaultInterface = errors.New("no default interface found") // Do implements subcommands.Command for the "do" command. It sets up a simple // sandbox and executes the command inside it. See Usage() for more details. type Do struct { - root string - cwd string - ip string - quiet bool + root string + cwd string + ip string + quiet bool + overlay bool } // Name implements subcommands.Command.Name. @@ -76,6 +77,7 @@ func (c *Do) SetFlags(f *flag.FlagSet) { f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory") f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox") f.BoolVar(&c.quiet, "quiet", false, "suppress runsc messages to stdout. Application output is still sent to stdout and stderr") + f.BoolVar(&c.overlay, "force-overlay", true, "use an overlay. WARNING: disabling gives the command write access to the host") } // Execute implements subcommands.Command.Execute. @@ -100,9 +102,8 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su return Errorf("Error to retrieve hostname: %v", err) } - // Map the entire host file system, but make it readonly with a writable - // overlay on top (ignore --overlay option). - conf.Overlay = true + // Map the entire host file system, optionally using an overlay. + conf.Overlay = c.overlay absRoot, err := resolvePath(c.root) if err != nil { return Errorf("Error resolving root: %v", err) diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 639b2219c..4cb0164dd 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -165,8 +165,8 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Start with root mount, then add any other additional mount as needed. ats := make([]p9.Attacher, 0, len(spec.Mounts)+1) ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{ - ROMount: spec.Root.Readonly || conf.Overlay, - EnableXattr: conf.Verity, + ROMount: spec.Root.Readonly || conf.Overlay, + EnableVerityXattr: conf.Verity, }) if err != nil { Fatalf("creating attach point: %v", err) @@ -178,9 +178,9 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) for _, m := range spec.Mounts { if specutils.Is9PMount(m) { cfg := fsgofer.Config{ - ROMount: isReadonlyMount(m.Options) || conf.Overlay, - HostUDS: conf.FSGoferHostUDS, - EnableXattr: conf.Verity, + ROMount: isReadonlyMount(m.Options) || conf.Overlay, + HostUDS: conf.FSGoferHostUDS, + EnableVerityXattr: conf.Verity, } ap, err := fsgofer.NewAttachPoint(m.Destination, cfg) if err != nil { @@ -203,6 +203,10 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) filter.InstallUDSFilters() } + if conf.Verity { + filter.InstallXattrFilters() + } + if err := filter.Install(); err != nil { Fatalf("installing seccomp filters: %v", err) } @@ -346,7 +350,7 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error { // creates directories as needed. func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error { for _, m := range mounts { - if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + if m.Type != "bind" || !specutils.IsVFS1SupportedDevMount(m) { continue } @@ -386,7 +390,7 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error { func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) { cleanMounts := make([]specs.Mount, 0, len(mounts)) for _, m := range mounts { - if m.Type != "bind" || !specutils.IsSupportedDevMount(m) { + if m.Type != "bind" || !specutils.IsVFS1SupportedDevMount(m) { cleanMounts = append(cleanMounts, m) continue } diff --git a/runsc/config/config.go b/runsc/config/config.go index 34ef48825..1e5858837 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -58,9 +58,12 @@ type Config struct { // DebugLogFormat is the log format for debug. DebugLogFormat string `flag:"debug-log-format"` - // FileAccess indicates how the filesystem is accessed. + // FileAccess indicates how the root filesystem is accessed. FileAccess FileAccessType `flag:"file-access"` + // FileAccessMounts indicates how non-root volumes are accessed. + FileAccessMounts FileAccessType `flag:"file-access-mounts"` + // Overlay is whether to wrap the root filesystem in an overlay. Overlay bool `flag:"overlay"` @@ -197,13 +200,19 @@ func (c *Config) validate() error { type FileAccessType int const ( - // FileAccessExclusive is the same as FileAccessShared, but enables - // extra caching for improved performance. It should only be used if - // the sandbox has exclusive access to the filesystem. + // FileAccessExclusive gives the sandbox exclusive access over files and + // directories in the filesystem. No external modifications are permitted and + // can lead to undefined behavior. + // + // Exclusive filesystem access enables more aggressive caching and offers + // significantly better performance. This is the default mode for the root + // volume. FileAccessExclusive FileAccessType = iota - // FileAccessShared sends IO requests to a Gofer process that validates the - // requests and forwards them to the host. + // FileAccessShared is used for volumes that can have external changes. It + // requires revalidation on every filesystem access to detect external + // changes, and reduces the amount of caching that can be done. This is the + // default mode for non-root volumes. FileAccessShared ) diff --git a/runsc/config/flags.go b/runsc/config/flags.go index adbee506c..1d996c841 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -67,7 +67,8 @@ func RegisterFlags() { flag.Bool("oci-seccomp", false, "Enables loading OCI seccomp filters inside the sandbox.") // Flags that control sandbox runtime behavior: FS related. - flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") + flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem validation to use for the root mount: exclusive (default), shared.") + flag.Var(fileAccessTypePtr(FileAccessShared), "file-access-mounts", "specifies which filesystem validation to use for volumes other than the root mount: shared (default), exclusive.") flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") flag.Bool("verity", false, "specifies whether a verity file system will be mounted.") flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index fd72414ce..246b7ed3c 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -247,3 +247,8 @@ var udsSyscalls = seccomp.SyscallRules{ }, }, } + +var xattrSyscalls = seccomp.SyscallRules{ + unix.SYS_FGETXATTR: {}, + unix.SYS_FSETXATTR: {}, +} diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go index 289886720..6c67ee288 100644 --- a/runsc/fsgofer/filter/filter.go +++ b/runsc/fsgofer/filter/filter.go @@ -36,3 +36,9 @@ func InstallUDSFilters() { // Add additional filters required for connecting to the host's sockets. allowedSyscalls.Merge(udsSyscalls) } + +// InstallXattrFilters extends the allowed syscalls to include xattr calls that +// are necessary for Verity enabled file systems. +func InstallXattrFilters() { + allowedSyscalls.Merge(xattrSyscalls) +} diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 1e80a634d..e04ddda47 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -48,6 +48,14 @@ const ( allowedOpenFlags = unix.O_TRUNC ) +// verityXattrs are the extended attributes used by verity file system. +var verityXattrs = map[string]struct{}{ + "user.merkle.offset": struct{}{}, + "user.merkle.size": struct{}{}, + "user.merkle.childrenOffset": struct{}{}, + "user.merkle.childrenSize": struct{}{}, +} + // join is equivalent to path.Join() but skips path.Clean() which is expensive. func join(parent, child string) string { if child == "." || child == ".." { @@ -67,8 +75,9 @@ type Config struct { // HostUDS signals whether the gofer can mount a host's UDS. HostUDS bool - // enableXattr allows Get/SetXattr for the mounted file systems. - EnableXattr bool + // EnableVerityXattr allows access to extended attributes used by the + // verity file system. + EnableVerityXattr bool } type attachPoint struct { @@ -799,7 +808,10 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { } func (l *localFile) GetXattr(name string, size uint64) (string, error) { - if !l.attachPoint.conf.EnableXattr { + if !l.attachPoint.conf.EnableVerityXattr { + return "", unix.EOPNOTSUPP + } + if _, ok := verityXattrs[name]; !ok { return "", unix.EOPNOTSUPP } buffer := make([]byte, size) @@ -810,7 +822,10 @@ func (l *localFile) GetXattr(name string, size uint64) (string, error) { } func (l *localFile) SetXattr(name string, value string, flags uint32) error { - if !l.attachPoint.conf.EnableXattr { + if !l.attachPoint.conf.EnableVerityXattr { + return unix.EOPNOTSUPP + } + if _, ok := verityXattrs[name]; !ok { return unix.EOPNOTSUPP } return unix.Fsetxattr(l.file.FD(), name, []byte(value), int(flags)) diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go index a5f09f88f..d7e141476 100644 --- a/runsc/fsgofer/fsgofer_test.go +++ b/runsc/fsgofer/fsgofer_test.go @@ -579,20 +579,24 @@ func SetGetXattr(l *localFile, name string, value string) error { return nil } +func TestSetGetDisabledXattr(t *testing.T) { + runCustom(t, []uint32{unix.S_IFREG}, rwConfs, func(t *testing.T, s state) { + name := "user.merkle.offset" + value := "tmp" + err := SetGetXattr(s.file, name, value) + if err == nil { + t.Fatalf("%v: SetGetXattr should have failed", s) + } + }) +} + func TestSetGetXattr(t *testing.T) { - xattrConfs := []Config{{ROMount: false, EnableXattr: false}, {ROMount: false, EnableXattr: true}} - runCustom(t, []uint32{unix.S_IFREG}, xattrConfs, func(t *testing.T, s state) { - name := "user.test" + runCustom(t, []uint32{unix.S_IFREG}, []Config{{ROMount: false, EnableVerityXattr: true}}, func(t *testing.T, s state) { + name := "user.merkle.offset" value := "tmp" err := SetGetXattr(s.file, name, value) - if s.conf.EnableXattr { - if err != nil { - t.Fatalf("%v: SetGetXattr failed, err: %v", s, err) - } - } else { - if err == nil { - t.Fatalf("%v: SetGetXattr should have failed", s) - } + if err != nil { + t.Fatalf("%v: SetGetXattr failed, err: %v", s, err) } }) } diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 5ba38bfe4..45856fd58 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -334,14 +334,13 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth. // Is9PMount returns true if the given mount can be mounted as an external gofer. func Is9PMount(m specs.Mount) bool { - return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m) + return m.Type == "bind" && m.Source != "" && IsVFS1SupportedDevMount(m) } -// IsSupportedDevMount returns true if the mount is a supported /dev mount. -// Only mount that does not conflict with runsc default /dev mount is -// supported. -func IsSupportedDevMount(m specs.Mount) bool { - // These are devices exist inside sentry. See pkg/sentry/fs/dev/dev.go +// IsVFS1SupportedDevMount returns true if m.Destination does not specify a +// path that is hardcoded by VFS1's implementation of /dev. +func IsVFS1SupportedDevMount(m specs.Mount) bool { + // See pkg/sentry/fs/dev/dev.go. var existingDevices = []string{ "/dev/fd", "/dev/stdin", "/dev/stdout", "/dev/stderr", "/dev/null", "/dev/zero", "/dev/full", "/dev/random", |