diff options
author | Fabricio Voznika <fvoznika@google.com> | 2019-10-16 14:33:23 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2019-10-16 15:06:24 -0700 |
commit | 9fb562234e7858dbc60e8771f851629464edf205 (patch) | |
tree | 81249d5366078261dd759a34b9d42dd22ab6f140 /runsc/boot | |
parent | fd4e43600242c4929269afd529a8c1de4681e62e (diff) |
Fix problem with open FD when copy up is triggered in overlayfs
Linux kernel before 4.19 doesn't implement a feature that updates
open FD after a file is open for write (and is copied to the upper
layer). Already open FD will continue to read the old file content
until they are reopened. This is especially problematic for gVisor
because it caches open files.
Flag was added to force readonly files to be reopenned when the
same file is open for write. This is only needed if using kernels
prior to 4.19.
Closes #1006
It's difficult to really test this because we never run on tests
on older kernels. I'm adding a test in GKE which uses kernels
with the overlayfs problem for 1.14 and lower.
PiperOrigin-RevId: 275115289
Diffstat (limited to 'runsc/boot')
-rw-r--r-- | runsc/boot/config.go | 5 | ||||
-rw-r--r-- | runsc/boot/controller.go | 4 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 1 | ||||
-rw-r--r-- | runsc/boot/fs.go | 16 |
4 files changed, 24 insertions, 2 deletions
diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 38278d0a2..01a29e8d5 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -231,6 +231,10 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeakMode refs.LeakMode + // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for + // write to workaround overlayfs limitation on kernels before 4.19. + OverlayfsStaleRead bool + // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in // tests. It allows runsc to start the sandbox process as the current // user, and without chrooting the sandbox process. This can be @@ -271,6 +275,7 @@ func (c *Config) ToFlags() []string { "--rootless=" + strconv.FormatBool(c.Rootless), "--alsologtostderr=" + strconv.FormatBool(c.AlsoLogToStderr), "--ref-leak-mode=" + refsLeakModeToString(c.ReferenceLeakMode), + "--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead), } // Only include these if set since it is never to be used by users. if c.TestOnlyAllowRunAsCurrentUserWithoutChroot { diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index a73c593ea..5f644b57e 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -32,6 +32,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/watchdog" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/urpc" + "gvisor.dev/gvisor/runsc/specutils" ) const ( @@ -237,6 +238,9 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error { return fmt.Errorf("start arguments must contain stdin, stderr, and stdout followed by at least one file for the container root gofer") } + // All validation passed, logs the spec for debugging. + specutils.LogSpec(args.Spec) + err := cm.l.startContainer(args.Spec, args.Conf, args.CID, args.FilePayload.Files) if err != nil { log.Debugf("containerManager.Start failed %q: %+v: %v", args.CID, args, err) diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index a2ecc6bcb..efbf1fd4a 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -44,6 +44,7 @@ var allowedSyscalls = seccomp.SyscallRules{ }, syscall.SYS_CLOSE: {}, syscall.SYS_DUP: {}, + syscall.SYS_DUP2: {}, syscall.SYS_EPOLL_CREATE1: {}, syscall.SYS_EPOLL_CTL: {}, syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{ diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 393c2a88b..76036c147 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -703,6 +703,14 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (* log.Infof("Mounting root over 9P, ioFD: %d", fd) p9FS := mustFindFilesystem("9p") opts := p9MountOptions(fd, conf.FileAccess) + + if conf.OverlayfsStaleRead { + // We can't check for overlayfs here because sandbox is chroot'ed and gofer + // can only send mount options for specs.Mounts (specs.Root is missing + // Options field). So assume root is always on top of overlayfs. + opts = append(opts, "overlayfs_stale_read") + } + rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil) if err != nil { return nil, fmt.Errorf("creating root mount point: %v", err) @@ -737,7 +745,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( fsName string opts []string useOverlay bool - err error ) switch m.Type { @@ -747,7 +754,12 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( fsName = sysfs case tmpfs: fsName = m.Type + + var err error opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...) + if err != nil { + return "", nil, false, err + } case bind: fd := c.fds.remove() @@ -763,7 +775,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( // for now. log.Warningf("ignoring unknown filesystem type %q", m.Type) } - return fsName, opts, useOverlay, err + return fsName, opts, useOverlay, nil } // mountSubmount mounts volumes inside the container's root. Because mounts may |