diff options
-rw-r--r-- | images/hostoverlaytest/Dockerfile | 7 | ||||
-rw-r--r-- | images/hostoverlaytest/test.c | 88 | ||||
-rw-r--r-- | images/hostoverlaytest/testfile.txt | 1 | ||||
-rw-r--r-- | pkg/sentry/fs/gofer/fs.go | 3 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/gofer.go | 9 | ||||
-rw-r--r-- | runsc/boot/config.go | 6 | ||||
-rw-r--r-- | runsc/container/container_test.go | 37 | ||||
-rw-r--r-- | runsc/main.go | 2 | ||||
-rw-r--r-- | test/e2e/integration_test.go | 15 |
9 files changed, 123 insertions, 45 deletions
diff --git a/images/hostoverlaytest/Dockerfile b/images/hostoverlaytest/Dockerfile new file mode 100644 index 000000000..d83439e9c --- /dev/null +++ b/images/hostoverlaytest/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:bionic + +WORKDIR /root +COPY . . + +RUN apt-get update && apt-get install -y gcc +RUN gcc -O2 -o test test.c diff --git a/images/hostoverlaytest/test.c b/images/hostoverlaytest/test.c new file mode 100644 index 000000000..088f90746 --- /dev/null +++ b/images/hostoverlaytest/test.c @@ -0,0 +1,88 @@ +#include <err.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> + +int main(int argc, char** argv) { + const char kTestFilePath[] = "testfile.txt"; + const char kOldFileData[] = "old data\n"; + const char kNewFileData[] = "new data\n"; + const size_t kPageSize = sysconf(_SC_PAGE_SIZE); + + // Open a file that already exists in a host overlayfs lower layer. + const int fd_rdonly = open(kTestFilePath, O_RDONLY); + if (fd_rdonly < 0) { + err(1, "open(%s, O_RDONLY)", kTestFilePath); + } + + // Check that the file's initial contents are what we expect when read via + // syscall. + char oldbuf[sizeof(kOldFileData)] = {}; + ssize_t n = pread(fd_rdonly, oldbuf, sizeof(oldbuf), 0); + if (n < 0) { + err(1, "initial pread"); + } + if (n != strlen(kOldFileData)) { + errx(1, "short initial pread (%ld/%lu bytes)", n, strlen(kOldFileData)); + } + if (strcmp(oldbuf, kOldFileData) != 0) { + errx(1, "initial pread returned wrong data: %s", oldbuf); + } + + // Check that the file's initial contents are what we expect when read via + // memory mapping. + void* page = mmap(NULL, kPageSize, PROT_READ, MAP_SHARED, fd_rdonly, 0); + if (page == MAP_FAILED) { + err(1, "mmap"); + } + if (strcmp(page, kOldFileData) != 0) { + errx(1, "mapping contains wrong initial data: %s", (const char*)page); + } + + // Open the same file writably, causing host overlayfs to copy it up, and + // replace its contents. + const int fd_rdwr = open(kTestFilePath, O_RDWR); + if (fd_rdwr < 0) { + err(1, "open(%s, O_RDWR)", kTestFilePath); + } + n = write(fd_rdwr, kNewFileData, strlen(kNewFileData)); + if (n < 0) { + err(1, "write"); + } + if (n != strlen(kNewFileData)) { + errx(1, "short write (%ld/%lu bytes)", n, strlen(kNewFileData)); + } + if (ftruncate(fd_rdwr, strlen(kNewFileData)) < 0) { + err(1, "truncate"); + } + + int failed = 0; + + // Check that syscalls on the old FD return updated contents. (Before Linux + // 4.18, this requires that runsc use a post-copy-up FD to service the read.) + char newbuf[sizeof(kNewFileData)] = {}; + n = pread(fd_rdonly, newbuf, sizeof(newbuf), 0); + if (n < 0) { + err(1, "final pread"); + } + if (n != strlen(kNewFileData)) { + warnx("short final pread (%ld/%lu bytes)", n, strlen(kNewFileData)); + failed = 1; + } else if (strcmp(newbuf, kNewFileData) != 0) { + warnx("final pread returned wrong data: %s", newbuf); + failed = 1; + } + + // Check that the memory mapping of the old FD has been updated. (Linux + // overlayfs does not do this, so regardless of kernel version this requires + // that runsc replace existing memory mappings with mappings of a + // post-copy-up FD.) + if (strcmp(page, kNewFileData) != 0) { + warnx("mapping contains wrong final data: %s", (const char*)page); + failed = 1; + } + + return failed; +} diff --git a/images/hostoverlaytest/testfile.txt b/images/hostoverlaytest/testfile.txt new file mode 100644 index 000000000..e4188c841 --- /dev/null +++ b/images/hostoverlaytest/testfile.txt @@ -0,0 +1 @@ +old data diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go index 9d41fcbdb..8ae2d78d7 100644 --- a/pkg/sentry/fs/gofer/fs.go +++ b/pkg/sentry/fs/gofer/fs.go @@ -60,8 +60,7 @@ const ( limitHostFDTranslationKey = "limit_host_fd_translation" // overlayfsStaleRead if present closes cached readonly file after the first - // write. This is done to workaround a limitation of overlayfs in kernels - // before 4.19 where open FDs are not updated after the file is copied up. + // write. This is done to workaround a limitation of Linux overlayfs. overlayfsStaleRead = "overlayfs_stale_read" ) diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 1da8d5d82..353e2cf5b 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -143,9 +143,12 @@ type filesystemOptions struct { // If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote // filesystem may not be coherent with writable host FDs opened later, so - // mappings of the former must be replaced by mappings of the latter. This - // is usually only the case when the remote filesystem is an overlayfs - // mount on Linux < 4.19. + // all uses of the former must be replaced by uses of the latter. This is + // usually only the case when the remote filesystem is a Linux overlayfs + // mount. (Prior to Linux 4.18, patch series centered on commit + // d1d04ef8572b "ovl: stack file ops", both I/O and memory mappings were + // incoherent between pre-copy-up and post-copy-up FDs; after that patch + // series, only memory mappings are incoherent.) overlayfsStaleRead bool // If regularFilesUseSpecialFileFD is true, application FDs representing diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 6d6a705f8..bcec7e4db 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -241,8 +241,10 @@ type Config struct { // ReferenceLeakMode sets reference leak check mode ReferenceLeakMode refs.LeakMode - // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for - // write to workaround overlayfs limitation on kernels before 4.19. + // OverlayfsStaleRead instructs the sandbox to assume that the root mount + // is on a Linux overlayfs mount, which does not necessarily preserve + // coherence between read-only and subsequent writable file descriptors + // representing the "same" file. OverlayfsStaleRead bool // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index acf988aa0..7ba301331 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -2126,43 +2126,6 @@ func TestNetRaw(t *testing.T) { } } -// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works. -func TestOverlayfsStaleRead(t *testing.T) { - conf := testutil.TestConfig(t) - conf.OverlayfsStaleRead = true - - in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer in.Close() - if _, err := in.WriteString("stale data"); err != nil { - t.Fatalf("in.Write() failed: %v", err) - } - - out, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.out") - if err != nil { - t.Fatalf("ioutil.TempFile() failed: %v", err) - } - defer out.Close() - - const want = "foobar" - cmd := fmt.Sprintf("cat %q >&2 && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name()) - spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd) - if err := run(spec, conf); err != nil { - t.Fatalf("Error running container: %v", err) - } - - gotBytes, err := ioutil.ReadAll(out) - if err != nil { - t.Fatalf("out.Read() failed: %v", err) - } - got := strings.TrimSpace(string(gotBytes)) - if want != got { - t.Errorf("Wrong content in out file, got: %q. want: %q", got, want) - } -} - // TestTTYField checks TTY field returned by container.Processes(). func TestTTYField(t *testing.T) { stop := testutil.StartReaper() diff --git a/runsc/main.go b/runsc/main.go index 0625a06e0..920ed84a5 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -76,7 +76,7 @@ var ( fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") - overlayfsStaleRead = flag.Bool("overlayfs-stale-read", false, "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19.") + overlayfsStaleRead = flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem") watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).") diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go index 404e37689..ff856883a 100644 --- a/test/e2e/integration_test.go +++ b/test/e2e/integration_test.go @@ -361,6 +361,21 @@ func TestTmpFile(t *testing.T) { } } +// TestHostOverlayfsCopyUp tests that the --overlayfs-stale-read option causes +// runsc to hide the incoherence of FDs opened before and after overlayfs +// copy-up on the host. +func TestHostOverlayfsCopyUp(t *testing.T) { + d := dockerutil.MakeDocker(t) + defer d.CleanUp() + + if _, err := d.Run(dockerutil.RunOpts{ + Image: "hostoverlaytest", + WorkDir: "/root", + }, "./test"); err != nil { + t.Fatalf("docker run failed: %v", err) + } +} + func TestMain(m *testing.M) { dockerutil.EnsureSupportedDockerVersion() flag.Parse() |