summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
authorNicolas Lacasse <nlacasse@google.com>2018-08-14 16:24:46 -0700
committerShentubot <shentubot@google.com>2018-08-14 16:25:58 -0700
commite8a4f2e133c3a7fb4a2dceb6675ebc57ea4f7350 (patch)
treec95b1a34bbf725905ea6afa5a74e52982abaff28 /runsc/boot
parentd4939f6dc22e5607cf2ff8d2a9eb1178e47b0a22 (diff)
runsc: Change cache policy for root fs and volume mounts.
Previously, gofer filesystems were configured with the default "fscache" policy, which caches filesystem metadata and contents aggressively. While this setting is best for performance, it means that changes from inside the sandbox may not be immediately propagated outside the sandbox, and vice-versa. This CL changes volumes and the root fs configuration to use a new "remote-revalidate" cache policy which tries to retain as much caching as possible while still making fs changes visible across the sandbox boundary. This cache policy is enabled by default for the root filesystem. The default value for the "--file-access" flag is still "proxy", but the behavior is changed to use the new cache policy. A new value for the "--file-access" flag is added, called "proxy-exclusive", which turns on the previous aggressive caching behavior. As the name implies, this flag should be used when the sandbox has "exclusive" access to the filesystem. All volume mounts are configured to use the new cache policy, since it is safest and most likely to be correct. There is not currently a way to change this behavior, but it's possible to add such a mechanism in the future. The configurability is a smaller issue for volumes, since most of the expensive application fs operations (walking + stating files) will likely served by the root fs. PiperOrigin-RevId: 208735037 Change-Id: Ife048fab1948205f6665df8563434dbc6ca8cfc9
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/config.go9
-rw-r--r--runsc/boot/fs.go62
-rw-r--r--runsc/boot/loader_test.go36
3 files changed, 68 insertions, 39 deletions
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 074cd6a63..6c69a7c38 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -64,6 +64,11 @@ const (
// requests and forwards them to the host.
FileAccessProxy FileAccessType = iota
+ // FileAccessProxyExclusive is the same as FileAccessProxy, but enables
+ // extra caching for improved performance. It should only be used if
+ // the sandbox has exclusive access to the filesystem.
+ FileAccessProxyExclusive
+
// FileAccessDirect connects the sandbox directly to the host filesystem.
FileAccessDirect
)
@@ -73,6 +78,8 @@ func MakeFileAccessType(s string) (FileAccessType, error) {
switch s {
case "proxy":
return FileAccessProxy, nil
+ case "proxy-exclusive":
+ return FileAccessProxyExclusive, nil
case "direct":
return FileAccessDirect, nil
default:
@@ -84,6 +91,8 @@ func (f FileAccessType) String() string {
switch f {
case FileAccessProxy:
return "proxy"
+ case FileAccessProxyExclusive:
+ return "proxy-exclusive"
case FileAccessDirect:
return "direct"
default:
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index e596c739f..eea2ec1f5 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -17,6 +17,7 @@ package boot
import (
"fmt"
"path/filepath"
+ "strconv"
"strings"
// Include filesystem types that OCI spec might mount.
@@ -54,6 +55,9 @@ type fdDispenser struct {
}
func (f *fdDispenser) remove() int {
+ if f.empty() {
+ panic("fdDispenser out of fds")
+ }
rv := f.fds[0]
f.fds = f.fds[1:]
return rv
@@ -160,8 +164,6 @@ func compileMounts(spec *specs.Spec) []specs.Mount {
// setMounts iterates over mounts and mounts them in the specified
// mount namespace.
func setMounts(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, mounts []specs.Mount) error {
-
- // Mount all submounts from mounts.
for _, m := range mounts {
if err := mountSubmount(ctx, conf, mns, fds, m, mounts); err != nil {
return err
@@ -181,11 +183,12 @@ func createRootMount(ctx context.Context, spec *specs.Spec, conf *Config, fds *f
)
switch conf.FileAccess {
- case FileAccessProxy:
+ case FileAccessProxy, FileAccessProxyExclusive:
fd := fds.remove()
log.Infof("Mounting root over 9P, ioFD: %d", fd)
hostFS := mustFindFilesystem("9p")
- rootInode, err = hostFS.Mount(ctx, rootDevice, mf, fmt.Sprintf("trans=fd,rfdno=%d,wfdno=%d,privateunixsocket=true", fd, fd))
+ opts := p9MountOptions(conf, fd)
+ rootInode, err = hostFS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","))
if err != nil {
return nil, fmt.Errorf("failed to generate root mount point: %v", err)
}
@@ -242,13 +245,16 @@ func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string,
return fs.NewOverlayRoot(ctx, upper, lower, lowerFlags)
}
-// getMountNameAndOptions retrieves the fsName, data, and useOverlay values
+// getMountNameAndOptions retrieves the fsName, opts, and useOverlay values
// used for mounts.
func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (string, []string, bool, error) {
- var fsName string
- var data []string
- var useOverlay bool
- var err error
+ var (
+ fsName string
+ opts []string
+ useOverlay bool
+ err error
+ )
+
switch m.Type {
case "devpts", "devtmpfs", "proc", "sysfs":
fsName = m.Type
@@ -258,17 +264,17 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri
fsName = m.Type
// tmpfs has some extra supported options that we must pass through.
- data, err = parseAndFilterOptions(m.Options, "mode", "uid", "gid")
+ opts, err = parseAndFilterOptions(m.Options, "mode", "uid", "gid")
case "bind":
switch conf.FileAccess {
- case FileAccessProxy:
+ case FileAccessProxy, FileAccessProxyExclusive:
fd := fds.remove()
fsName = "9p"
- data = []string{"trans=fd", fmt.Sprintf("rfdno=%d", fd), fmt.Sprintf("wfdno=%d", fd), "privateunixsocket=true"}
+ opts = p9MountOptions(conf, fd)
case FileAccessDirect:
fsName = "whitelistfs"
- data = []string{"root=" + m.Source, "dont_translate_ownership=true"}
+ opts = []string{"root=" + m.Source, "dont_translate_ownership=true"}
default:
err = fmt.Errorf("invalid file access type: %v", conf.FileAccess)
}
@@ -282,13 +288,13 @@ func getMountNameAndOptions(conf *Config, m specs.Mount, fds *fdDispenser) (stri
// we do not support.
log.Warningf("ignoring unknown filesystem type %q", m.Type)
}
- return fsName, data, useOverlay, err
+ return fsName, opts, useOverlay, err
}
func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fds *fdDispenser, m specs.Mount, mounts []specs.Mount) error {
// Map mount type to filesystem name, and parse out the options that we are
// capable of dealing with.
- fsName, data, useOverlay, err := getMountNameAndOptions(conf, m, fds)
+ fsName, opts, useOverlay, err := getMountNameAndOptions(conf, m, fds)
// Return the error or nil that corresponds to the default case in getMountNameAndOptions.
if err != nil {
@@ -307,7 +313,7 @@ func mountSubmount(ctx context.Context, conf *Config, mns *fs.MountNamespace, fd
mf.ReadOnly = true
}
- inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(data, ","))
+ inode, err := filesystem.Mount(ctx, mountDevice(m), mf, strings.Join(opts, ","))
if err != nil {
return fmt.Errorf("failed to create mount with source %q: %v", m.Source, err)
}
@@ -387,6 +393,20 @@ func mkdirAll(ctx context.Context, mns *fs.MountNamespace, path string) error {
return nil
}
+// p9MountOptions creates a slice of options for a p9 mount.
+func p9MountOptions(conf *Config, fd int) []string {
+ opts := []string{
+ "trans=fd",
+ "rfdno=" + strconv.Itoa(fd),
+ "wfdno=" + strconv.Itoa(fd),
+ "privateunixsocket=true",
+ }
+ if conf.FileAccess == FileAccessProxy {
+ opts = append(opts, "cache=remote_revalidating")
+ }
+ return opts
+}
+
// parseAndFilterOptions parses a MountOptions slice and filters by the allowed
// keys.
func parseAndFilterOptions(opts []string, allowedKeys ...string) ([]string, error) {
@@ -436,8 +456,7 @@ func mountDevice(m specs.Mount) string {
// addRestoreMount adds a mount to the MountSources map used for restoring a
// checkpointed container.
func addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount, fds *fdDispenser) error {
- fsName, data, _, err := getMountNameAndOptions(conf, m, fds)
- dataString := strings.Join(data, ",")
+ fsName, opts, _, err := getMountNameAndOptions(conf, m, fds)
// Return the error or nil that corresponds to the default case in getMountNameAndOptions.
if err != nil {
@@ -452,7 +471,7 @@ func addRestoreMount(conf *Config, renv *fs.RestoreEnvironment, m specs.Mount, f
newMount := fs.MountArgs{
Dev: mountDevice(m),
Flags: mountFlags(m.Options),
- Data: dataString,
+ Data: strings.Join(opts, ","),
}
renv.MountSources[fsName] = append(renv.MountSources[fsName], newMount)
log.Infof("Added mount at %q: %+v", fsName, newMount)
@@ -473,7 +492,8 @@ func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser)
// Add root mount.
fd := fds.remove()
- dataString := strings.Join([]string{"trans=fd", fmt.Sprintf("rfdno=%d", fd), fmt.Sprintf("wfdno=%d", fd), "privateunixsocket=true"}, ",")
+ opts := p9MountOptions(conf, fd)
+
mf := fs.MountSourceFlags{}
if spec.Root.Readonly {
mf.ReadOnly = true
@@ -482,7 +502,7 @@ func createRestoreEnvironment(spec *specs.Spec, conf *Config, fds *fdDispenser)
rootMount := fs.MountArgs{
Dev: rootDevice,
Flags: mf,
- Data: dataString,
+ Data: strings.Join(opts, ","),
}
renv.MountSources[rootFsName] = append(renv.MountSources[rootFsName], rootMount)
diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go
index 7ea2e1ee5..f2f690b5d 100644
--- a/runsc/boot/loader_test.go
+++ b/runsc/boot/loader_test.go
@@ -398,7 +398,7 @@ func TestRestoreEnvironment(t *testing.T) {
{
Dev: "9pfs-/",
Flags: fs.MountSourceFlags{ReadOnly: true},
- Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true",
+ Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true,cache=remote_revalidating",
},
},
"tmpfs": {
@@ -458,11 +458,11 @@ func TestRestoreEnvironment(t *testing.T) {
{
Dev: "9pfs-/",
Flags: fs.MountSourceFlags{ReadOnly: true},
- Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true",
+ Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true,cache=remote_revalidating",
},
{
Dev: "9pfs-/dev/fd-foo",
- Data: "trans=fd,rfdno=1,wfdno=1,privateunixsocket=true",
+ Data: "trans=fd,rfdno=1,wfdno=1,privateunixsocket=true,cache=remote_revalidating",
},
},
"tmpfs": {
@@ -522,7 +522,7 @@ func TestRestoreEnvironment(t *testing.T) {
{
Dev: "9pfs-/",
Flags: fs.MountSourceFlags{ReadOnly: true},
- Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true",
+ Data: "trans=fd,rfdno=0,wfdno=0,privateunixsocket=true,cache=remote_revalidating",
},
},
"tmpfs": {
@@ -606,21 +606,21 @@ func TestRestoreEnvironment(t *testing.T) {
errorExpected: true,
},
}
-
for _, tc := range testCases {
- fds := &fdDispenser{fds: tc.ioFDs}
-
- actualRenv, err := createRestoreEnvironment(tc.spec, tc.conf, fds)
- if !tc.errorExpected && err != nil {
- t.Fatalf("could not create restore environment for test:%s", tc.name)
- } else if tc.errorExpected {
- if err == nil {
- t.Fatalf("expected an error, but no error occurred.")
+ t.Run(tc.name, func(t *testing.T) {
+ fds := &fdDispenser{fds: tc.ioFDs}
+ actualRenv, err := createRestoreEnvironment(tc.spec, tc.conf, fds)
+ if !tc.errorExpected && err != nil {
+ t.Fatalf("could not create restore environment for test:%s", tc.name)
+ } else if tc.errorExpected {
+ if err == nil {
+ t.Errorf("expected an error, but no error occurred.")
+ }
+ } else {
+ if !reflect.DeepEqual(*actualRenv, tc.expectedRenv) {
+ t.Errorf("restore environments did not match for test:%s\ngot:%+v\nwant:%+v\n", tc.name, *actualRenv, tc.expectedRenv)
+ }
}
- } else {
- if !reflect.DeepEqual(*actualRenv, tc.expectedRenv) {
- t.Fatalf("restore environments did not match for test:%s\ngot:%+v\nwant:%+v\n", tc.name, *actualRenv, tc.expectedRenv)
- }
- }
+ })
}
}