summaryrefslogtreecommitdiffhomepage
path: root/runsc
diff options
context:
space:
mode:
Diffstat (limited to 'runsc')
-rw-r--r--runsc/boot/controller.go2
-rw-r--r--runsc/boot/fs.go15
-rw-r--r--runsc/boot/fs_test.go3
-rw-r--r--runsc/boot/loader.go2
-rw-r--r--runsc/boot/loader_test.go6
-rw-r--r--runsc/boot/vfs.go2
-rw-r--r--runsc/cmd/do.go15
-rw-r--r--runsc/cmd/gofer.go18
-rw-r--r--runsc/config/config.go21
-rw-r--r--runsc/config/flags.go3
-rw-r--r--runsc/fsgofer/filter/config.go5
-rw-r--r--runsc/fsgofer/filter/filter.go6
-rw-r--r--runsc/fsgofer/fsgofer.go23
-rw-r--r--runsc/fsgofer/fsgofer_test.go26
-rw-r--r--runsc/specutils/specutils.go11
15 files changed, 101 insertions, 57 deletions
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 1cd5fba5c..1ae76d7d7 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -400,7 +400,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
// Set up the restore environment.
ctx := k.SupervisorContext()
- mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints)
+ mntr := newContainerMounter(cm.l.root.spec, cm.l.root.goferFDs, cm.l.k, cm.l.mountHints, kernel.VFS2Enabled)
if kernel.VFS2Enabled {
ctx, err = mntr.configureRestore(ctx, cm.l.root.conf)
if err != nil {
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 77f632bb9..32adde643 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -103,14 +103,14 @@ func addOverlay(ctx context.Context, conf *config.Config, lower *fs.Inode, name
// compileMounts returns the supported mounts from the mount spec, adding any
// mandatory mounts that are required by the OCI specification.
-func compileMounts(spec *specs.Spec) []specs.Mount {
+func compileMounts(spec *specs.Spec, vfs2Enabled bool) []specs.Mount {
// Keep track of whether proc and sys were mounted.
var procMounted, sysMounted, devMounted, devptsMounted bool
var mounts []specs.Mount
// Mount all submounts from the spec.
for _, m := range spec.Mounts {
- if !specutils.IsSupportedDevMount(m) {
+ if !vfs2Enabled && !specutils.IsVFS1SupportedDevMount(m) {
log.Warningf("ignoring dev mount at %q", m.Destination)
continue
}
@@ -572,10 +572,10 @@ type containerMounter struct {
hints *podMountHints
}
-func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints) *containerMounter {
+func newContainerMounter(spec *specs.Spec, goferFDs []*fd.FD, k *kernel.Kernel, hints *podMountHints, vfs2Enabled bool) *containerMounter {
return &containerMounter{
root: spec.Root,
- mounts: compileMounts(spec),
+ mounts: compileMounts(spec, vfs2Enabled),
fds: fdDispenser{fds: goferFDs},
k: k,
hints: hints,
@@ -792,7 +792,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M
case bind:
fd := c.fds.remove()
fsName = gofervfs2.Name
- opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2)
+ opts = p9MountData(fd, c.getMountAccessType(conf, m), conf.VFS2)
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
@@ -802,12 +802,11 @@ func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m specs.M
return fsName, opts, useOverlay, nil
}
-func (c *containerMounter) getMountAccessType(mount specs.Mount) config.FileAccessType {
+func (c *containerMounter) getMountAccessType(conf *config.Config, mount specs.Mount) config.FileAccessType {
if hint := c.hints.findMount(mount); hint != nil {
return hint.fileAccessType()
}
- // Non-root bind mounts are always shared if no hints were provided.
- return config.FileAccessShared
+ return conf.FileAccessMounts
}
// mountSubmount mounts volumes inside the container's root. Because mounts may
diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go
index e986231e5..b4f12d034 100644
--- a/runsc/boot/fs_test.go
+++ b/runsc/boot/fs_test.go
@@ -243,7 +243,8 @@ func TestGetMountAccessType(t *testing.T) {
t.Fatalf("newPodMountHints failed: %v", err)
}
mounter := containerMounter{hints: podHints}
- if got := mounter.getMountAccessType(specs.Mount{Source: source}); got != tst.want {
+ conf := &config.Config{FileAccessMounts: config.FileAccessShared}
+ if got := mounter.getMountAccessType(conf, specs.Mount{Source: source}); got != tst.want {
t.Errorf("getMountAccessType(), want: %v, got: %v", tst.want, got)
}
})
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 5afce232d..774621970 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -752,7 +752,7 @@ func (l *Loader) createContainerProcess(root bool, cid string, info *containerIn
// Setup the child container file system.
l.startGoferMonitor(cid, info.goferFDs)
- mntr := newContainerMounter(info.spec, info.goferFDs, l.k, l.mountHints)
+ mntr := newContainerMounter(info.spec, info.goferFDs, l.k, l.mountHints, kernel.VFS2Enabled)
if root {
if err := mntr.processHints(info.conf, info.procArgs.Credentials); err != nil {
return nil, nil, nil, err
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 3121ca6eb..8b39bc59a 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -439,7 +439,7 @@ func TestCreateMountNamespace(t *testing.T) {
}
defer cleanup()
- mntr := newContainerMounter(&tc.spec, []*fd.FD{fd.New(sandEnd)}, nil, &podMountHints{})
+ mntr := newContainerMounter(&tc.spec, []*fd.FD{fd.New(sandEnd)}, nil, &podMountHints{}, false /* vfs2Enabled */)
mns, err := mntr.createMountNamespace(ctx, conf)
if err != nil {
t.Fatalf("failed to create mount namespace: %v", err)
@@ -479,7 +479,7 @@ func TestCreateMountNamespaceVFS2(t *testing.T) {
defer l.Destroy()
defer loaderCleanup()
- mntr := newContainerMounter(l.root.spec, l.root.goferFDs, l.k, l.mountHints)
+ mntr := newContainerMounter(l.root.spec, l.root.goferFDs, l.k, l.mountHints, true /* vfs2Enabled */)
if err := mntr.processHints(l.root.conf, l.root.procArgs.Credentials); err != nil {
t.Fatalf("failed process hints: %v", err)
}
@@ -702,7 +702,7 @@ func TestRestoreEnvironment(t *testing.T) {
for _, ioFD := range tc.ioFDs {
ioFDs = append(ioFDs, fd.New(ioFD))
}
- mntr := newContainerMounter(tc.spec, ioFDs, nil, &podMountHints{})
+ mntr := newContainerMounter(tc.spec, ioFDs, nil, &podMountHints{}, false /* vfs2Enabled */)
actualRenv, err := mntr.createRestoreEnvironment(conf)
if !tc.errorExpected && err != nil {
t.Fatalf("could not create restore environment for test:%s", tc.name)
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 3fd28e516..9b3dacf46 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -494,7 +494,7 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mo
// but unlikely to be correct in this context.
return "", nil, false, fmt.Errorf("9P mount requires a connection FD")
}
- data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */)
+ data = p9MountData(m.fd, c.getMountAccessType(conf, m.Mount), true /* vfs2 */)
iopts = gofer.InternalFilesystemOptions{
UniqueID: m.Destination,
}
diff --git a/runsc/cmd/do.go b/runsc/cmd/do.go
index 22c1dfeb8..455c57692 100644
--- a/runsc/cmd/do.go
+++ b/runsc/cmd/do.go
@@ -42,10 +42,11 @@ var errNoDefaultInterface = errors.New("no default interface found")
// Do implements subcommands.Command for the "do" command. It sets up a simple
// sandbox and executes the command inside it. See Usage() for more details.
type Do struct {
- root string
- cwd string
- ip string
- quiet bool
+ root string
+ cwd string
+ ip string
+ quiet bool
+ overlay bool
}
// Name implements subcommands.Command.Name.
@@ -76,6 +77,7 @@ func (c *Do) SetFlags(f *flag.FlagSet) {
f.StringVar(&c.cwd, "cwd", ".", "path to the current directory, defaults to the current directory")
f.StringVar(&c.ip, "ip", "192.168.10.2", "IPv4 address for the sandbox")
f.BoolVar(&c.quiet, "quiet", false, "suppress runsc messages to stdout. Application output is still sent to stdout and stderr")
+ f.BoolVar(&c.overlay, "force-overlay", true, "use an overlay. WARNING: disabling gives the command write access to the host")
}
// Execute implements subcommands.Command.Execute.
@@ -100,9 +102,8 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
return Errorf("Error to retrieve hostname: %v", err)
}
- // Map the entire host file system, but make it readonly with a writable
- // overlay on top (ignore --overlay option).
- conf.Overlay = true
+ // Map the entire host file system, optionally using an overlay.
+ conf.Overlay = c.overlay
absRoot, err := resolvePath(c.root)
if err != nil {
return Errorf("Error resolving root: %v", err)
diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go
index 639b2219c..4cb0164dd 100644
--- a/runsc/cmd/gofer.go
+++ b/runsc/cmd/gofer.go
@@ -165,8 +165,8 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
// Start with root mount, then add any other additional mount as needed.
ats := make([]p9.Attacher, 0, len(spec.Mounts)+1)
ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{
- ROMount: spec.Root.Readonly || conf.Overlay,
- EnableXattr: conf.Verity,
+ ROMount: spec.Root.Readonly || conf.Overlay,
+ EnableVerityXattr: conf.Verity,
})
if err != nil {
Fatalf("creating attach point: %v", err)
@@ -178,9 +178,9 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
for _, m := range spec.Mounts {
if specutils.Is9PMount(m) {
cfg := fsgofer.Config{
- ROMount: isReadonlyMount(m.Options) || conf.Overlay,
- HostUDS: conf.FSGoferHostUDS,
- EnableXattr: conf.Verity,
+ ROMount: isReadonlyMount(m.Options) || conf.Overlay,
+ HostUDS: conf.FSGoferHostUDS,
+ EnableVerityXattr: conf.Verity,
}
ap, err := fsgofer.NewAttachPoint(m.Destination, cfg)
if err != nil {
@@ -203,6 +203,10 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
filter.InstallUDSFilters()
}
+ if conf.Verity {
+ filter.InstallXattrFilters()
+ }
+
if err := filter.Install(); err != nil {
Fatalf("installing seccomp filters: %v", err)
}
@@ -346,7 +350,7 @@ func setupRootFS(spec *specs.Spec, conf *config.Config) error {
// creates directories as needed.
func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error {
for _, m := range mounts {
- if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ if m.Type != "bind" || !specutils.IsVFS1SupportedDevMount(m) {
continue
}
@@ -386,7 +390,7 @@ func setupMounts(conf *config.Config, mounts []specs.Mount, root string) error {
func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) {
cleanMounts := make([]specs.Mount, 0, len(mounts))
for _, m := range mounts {
- if m.Type != "bind" || !specutils.IsSupportedDevMount(m) {
+ if m.Type != "bind" || !specutils.IsVFS1SupportedDevMount(m) {
cleanMounts = append(cleanMounts, m)
continue
}
diff --git a/runsc/config/config.go b/runsc/config/config.go
index 34ef48825..1e5858837 100644
--- a/runsc/config/config.go
+++ b/runsc/config/config.go
@@ -58,9 +58,12 @@ type Config struct {
// DebugLogFormat is the log format for debug.
DebugLogFormat string `flag:"debug-log-format"`
- // FileAccess indicates how the filesystem is accessed.
+ // FileAccess indicates how the root filesystem is accessed.
FileAccess FileAccessType `flag:"file-access"`
+ // FileAccessMounts indicates how non-root volumes are accessed.
+ FileAccessMounts FileAccessType `flag:"file-access-mounts"`
+
// Overlay is whether to wrap the root filesystem in an overlay.
Overlay bool `flag:"overlay"`
@@ -197,13 +200,19 @@ func (c *Config) validate() error {
type FileAccessType int
const (
- // FileAccessExclusive is the same as FileAccessShared, but enables
- // extra caching for improved performance. It should only be used if
- // the sandbox has exclusive access to the filesystem.
+ // FileAccessExclusive gives the sandbox exclusive access over files and
+ // directories in the filesystem. No external modifications are permitted and
+ // can lead to undefined behavior.
+ //
+ // Exclusive filesystem access enables more aggressive caching and offers
+ // significantly better performance. This is the default mode for the root
+ // volume.
FileAccessExclusive FileAccessType = iota
- // FileAccessShared sends IO requests to a Gofer process that validates the
- // requests and forwards them to the host.
+ // FileAccessShared is used for volumes that can have external changes. It
+ // requires revalidation on every filesystem access to detect external
+ // changes, and reduces the amount of caching that can be done. This is the
+ // default mode for non-root volumes.
FileAccessShared
)
diff --git a/runsc/config/flags.go b/runsc/config/flags.go
index adbee506c..1d996c841 100644
--- a/runsc/config/flags.go
+++ b/runsc/config/flags.go
@@ -67,7 +67,8 @@ func RegisterFlags() {
flag.Bool("oci-seccomp", false, "Enables loading OCI seccomp filters inside the sandbox.")
// Flags that control sandbox runtime behavior: FS related.
- flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
+ flag.Var(fileAccessTypePtr(FileAccessExclusive), "file-access", "specifies which filesystem validation to use for the root mount: exclusive (default), shared.")
+ flag.Var(fileAccessTypePtr(FileAccessShared), "file-access-mounts", "specifies which filesystem validation to use for volumes other than the root mount: shared (default), exclusive.")
flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
flag.Bool("verity", false, "specifies whether a verity file system will be mounted.")
flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go
index fd72414ce..246b7ed3c 100644
--- a/runsc/fsgofer/filter/config.go
+++ b/runsc/fsgofer/filter/config.go
@@ -247,3 +247,8 @@ var udsSyscalls = seccomp.SyscallRules{
},
},
}
+
+var xattrSyscalls = seccomp.SyscallRules{
+ unix.SYS_FGETXATTR: {},
+ unix.SYS_FSETXATTR: {},
+}
diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go
index 289886720..6c67ee288 100644
--- a/runsc/fsgofer/filter/filter.go
+++ b/runsc/fsgofer/filter/filter.go
@@ -36,3 +36,9 @@ func InstallUDSFilters() {
// Add additional filters required for connecting to the host's sockets.
allowedSyscalls.Merge(udsSyscalls)
}
+
+// InstallXattrFilters extends the allowed syscalls to include xattr calls that
+// are necessary for Verity enabled file systems.
+func InstallXattrFilters() {
+ allowedSyscalls.Merge(xattrSyscalls)
+}
diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go
index 1e80a634d..e04ddda47 100644
--- a/runsc/fsgofer/fsgofer.go
+++ b/runsc/fsgofer/fsgofer.go
@@ -48,6 +48,14 @@ const (
allowedOpenFlags = unix.O_TRUNC
)
+// verityXattrs are the extended attributes used by verity file system.
+var verityXattrs = map[string]struct{}{
+ "user.merkle.offset": struct{}{},
+ "user.merkle.size": struct{}{},
+ "user.merkle.childrenOffset": struct{}{},
+ "user.merkle.childrenSize": struct{}{},
+}
+
// join is equivalent to path.Join() but skips path.Clean() which is expensive.
func join(parent, child string) string {
if child == "." || child == ".." {
@@ -67,8 +75,9 @@ type Config struct {
// HostUDS signals whether the gofer can mount a host's UDS.
HostUDS bool
- // enableXattr allows Get/SetXattr for the mounted file systems.
- EnableXattr bool
+ // EnableVerityXattr allows access to extended attributes used by the
+ // verity file system.
+ EnableVerityXattr bool
}
type attachPoint struct {
@@ -799,7 +808,10 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error {
}
func (l *localFile) GetXattr(name string, size uint64) (string, error) {
- if !l.attachPoint.conf.EnableXattr {
+ if !l.attachPoint.conf.EnableVerityXattr {
+ return "", unix.EOPNOTSUPP
+ }
+ if _, ok := verityXattrs[name]; !ok {
return "", unix.EOPNOTSUPP
}
buffer := make([]byte, size)
@@ -810,7 +822,10 @@ func (l *localFile) GetXattr(name string, size uint64) (string, error) {
}
func (l *localFile) SetXattr(name string, value string, flags uint32) error {
- if !l.attachPoint.conf.EnableXattr {
+ if !l.attachPoint.conf.EnableVerityXattr {
+ return unix.EOPNOTSUPP
+ }
+ if _, ok := verityXattrs[name]; !ok {
return unix.EOPNOTSUPP
}
return unix.Fsetxattr(l.file.FD(), name, []byte(value), int(flags))
diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go
index a5f09f88f..d7e141476 100644
--- a/runsc/fsgofer/fsgofer_test.go
+++ b/runsc/fsgofer/fsgofer_test.go
@@ -579,20 +579,24 @@ func SetGetXattr(l *localFile, name string, value string) error {
return nil
}
+func TestSetGetDisabledXattr(t *testing.T) {
+ runCustom(t, []uint32{unix.S_IFREG}, rwConfs, func(t *testing.T, s state) {
+ name := "user.merkle.offset"
+ value := "tmp"
+ err := SetGetXattr(s.file, name, value)
+ if err == nil {
+ t.Fatalf("%v: SetGetXattr should have failed", s)
+ }
+ })
+}
+
func TestSetGetXattr(t *testing.T) {
- xattrConfs := []Config{{ROMount: false, EnableXattr: false}, {ROMount: false, EnableXattr: true}}
- runCustom(t, []uint32{unix.S_IFREG}, xattrConfs, func(t *testing.T, s state) {
- name := "user.test"
+ runCustom(t, []uint32{unix.S_IFREG}, []Config{{ROMount: false, EnableVerityXattr: true}}, func(t *testing.T, s state) {
+ name := "user.merkle.offset"
value := "tmp"
err := SetGetXattr(s.file, name, value)
- if s.conf.EnableXattr {
- if err != nil {
- t.Fatalf("%v: SetGetXattr failed, err: %v", s, err)
- }
- } else {
- if err == nil {
- t.Fatalf("%v: SetGetXattr should have failed", s)
- }
+ if err != nil {
+ t.Fatalf("%v: SetGetXattr failed, err: %v", s, err)
}
})
}
diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go
index 5ba38bfe4..45856fd58 100644
--- a/runsc/specutils/specutils.go
+++ b/runsc/specutils/specutils.go
@@ -334,14 +334,13 @@ func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth.
// Is9PMount returns true if the given mount can be mounted as an external gofer.
func Is9PMount(m specs.Mount) bool {
- return m.Type == "bind" && m.Source != "" && IsSupportedDevMount(m)
+ return m.Type == "bind" && m.Source != "" && IsVFS1SupportedDevMount(m)
}
-// IsSupportedDevMount returns true if the mount is a supported /dev mount.
-// Only mount that does not conflict with runsc default /dev mount is
-// supported.
-func IsSupportedDevMount(m specs.Mount) bool {
- // These are devices exist inside sentry. See pkg/sentry/fs/dev/dev.go
+// IsVFS1SupportedDevMount returns true if m.Destination does not specify a
+// path that is hardcoded by VFS1's implementation of /dev.
+func IsVFS1SupportedDevMount(m specs.Mount) bool {
+ // See pkg/sentry/fs/dev/dev.go.
var existingDevices = []string{
"/dev/fd", "/dev/stdin", "/dev/stdout", "/dev/stderr",
"/dev/null", "/dev/zero", "/dev/full", "/dev/random",