summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJamie Liu <jamieliu@google.com>2020-05-13 10:51:50 -0700
committergVisor bot <gvisor-bot@google.com>2020-05-13 10:53:37 -0700
commitd84607762889d999f93b89e64e09d2a4dda63bf7 (patch)
treedd1b4d4736021073dfae875bdb3c89300a1ba154
parent18cb3d24cb3b59da11ebeac444021346495c95e4 (diff)
Enable overlayfs_stale_read by default for runsc.
Linux 4.18 and later make reads and writes coherent between pre-copy-up and post-copy-up FDs representing the same file on an overlay filesystem. However, memory mappings remain incoherent: - Documentation/filesystems/overlayfs.rst, "Non-standard behavior": "If a file residing on a lower layer is opened for read-only and then memory mapped with MAP_SHARED, then subsequent changes to the file are not reflected in the memory mapping." - fs/overlay/file.c:ovl_mmap() passes through to the underlying FD without any management of coherence in the overlay. - Experimentally on Linux 5.2: ``` $ cat mmap_cat_page.c #include <err.h> #include <fcntl.h> #include <stdio.h> #include <string.h> #include <sys/mman.h> #include <unistd.h> int main(int argc, char **argv) { if (argc < 2) { errx(1, "syntax: %s [FILE]", argv[0]); } const int fd = open(argv[1], O_RDONLY); if (fd < 0) { err(1, "open(%s)", argv[1]); } const size_t page_size = sysconf(_SC_PAGE_SIZE); void* page = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); if (page == MAP_FAILED) { err(1, "mmap"); } for (;;) { write(1, page, strnlen(page, page_size)); if (getc(stdin) == EOF) { break; } } return 0; } $ gcc -O2 -o mmap_cat_page mmap_cat_page.c $ mkdir lowerdir upperdir workdir overlaydir $ echo old > lowerdir/file $ sudo mount -t overlay -o "lowerdir=lowerdir,upperdir=upperdir,workdir=workdir" none overlaydir $ ./mmap_cat_page overlaydir/file old ^Z [1]+ Stopped ./mmap_cat_page overlaydir/file $ echo new > overlaydir/file $ cat overlaydir/file new $ fg ./mmap_cat_page overlaydir/file old ``` Therefore, while the VFS1 gofer client's behavior of reopening read FDs is only necessary pre-4.18, replacing existing memory mappings (in both sentry and application address spaces) with mappings of the new FD is required regardless of kernel version, and this latter behavior is common to both VFS1 and VFS2. Re-document accordingly, and change the runsc flag to enabled by default. New test: - Before this CL: https://source.cloud.google.com/results/invocations/5b222d2c-e918-4bae-afc4-407f5bac509b - After this CL: https://source.cloud.google.com/results/invocations/f28c747e-d89c-4d8c-a461-602b33e71aab PiperOrigin-RevId: 311361267
-rw-r--r--images/hostoverlaytest/Dockerfile7
-rw-r--r--images/hostoverlaytest/test.c88
-rw-r--r--images/hostoverlaytest/testfile.txt1
-rw-r--r--pkg/sentry/fs/gofer/fs.go3
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go9
-rw-r--r--runsc/boot/config.go6
-rw-r--r--runsc/container/container_test.go37
-rw-r--r--runsc/main.go2
-rw-r--r--test/e2e/integration_test.go15
9 files changed, 123 insertions, 45 deletions
diff --git a/images/hostoverlaytest/Dockerfile b/images/hostoverlaytest/Dockerfile
new file mode 100644
index 000000000..d83439e9c
--- /dev/null
+++ b/images/hostoverlaytest/Dockerfile
@@ -0,0 +1,7 @@
+FROM ubuntu:bionic
+
+WORKDIR /root
+COPY . .
+
+RUN apt-get update && apt-get install -y gcc
+RUN gcc -O2 -o test test.c
diff --git a/images/hostoverlaytest/test.c b/images/hostoverlaytest/test.c
new file mode 100644
index 000000000..088f90746
--- /dev/null
+++ b/images/hostoverlaytest/test.c
@@ -0,0 +1,88 @@
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+int main(int argc, char** argv) {
+ const char kTestFilePath[] = "testfile.txt";
+ const char kOldFileData[] = "old data\n";
+ const char kNewFileData[] = "new data\n";
+ const size_t kPageSize = sysconf(_SC_PAGE_SIZE);
+
+ // Open a file that already exists in a host overlayfs lower layer.
+ const int fd_rdonly = open(kTestFilePath, O_RDONLY);
+ if (fd_rdonly < 0) {
+ err(1, "open(%s, O_RDONLY)", kTestFilePath);
+ }
+
+ // Check that the file's initial contents are what we expect when read via
+ // syscall.
+ char oldbuf[sizeof(kOldFileData)] = {};
+ ssize_t n = pread(fd_rdonly, oldbuf, sizeof(oldbuf), 0);
+ if (n < 0) {
+ err(1, "initial pread");
+ }
+ if (n != strlen(kOldFileData)) {
+ errx(1, "short initial pread (%ld/%lu bytes)", n, strlen(kOldFileData));
+ }
+ if (strcmp(oldbuf, kOldFileData) != 0) {
+ errx(1, "initial pread returned wrong data: %s", oldbuf);
+ }
+
+ // Check that the file's initial contents are what we expect when read via
+ // memory mapping.
+ void* page = mmap(NULL, kPageSize, PROT_READ, MAP_SHARED, fd_rdonly, 0);
+ if (page == MAP_FAILED) {
+ err(1, "mmap");
+ }
+ if (strcmp(page, kOldFileData) != 0) {
+ errx(1, "mapping contains wrong initial data: %s", (const char*)page);
+ }
+
+ // Open the same file writably, causing host overlayfs to copy it up, and
+ // replace its contents.
+ const int fd_rdwr = open(kTestFilePath, O_RDWR);
+ if (fd_rdwr < 0) {
+ err(1, "open(%s, O_RDWR)", kTestFilePath);
+ }
+ n = write(fd_rdwr, kNewFileData, strlen(kNewFileData));
+ if (n < 0) {
+ err(1, "write");
+ }
+ if (n != strlen(kNewFileData)) {
+ errx(1, "short write (%ld/%lu bytes)", n, strlen(kNewFileData));
+ }
+ if (ftruncate(fd_rdwr, strlen(kNewFileData)) < 0) {
+ err(1, "truncate");
+ }
+
+ int failed = 0;
+
+ // Check that syscalls on the old FD return updated contents. (Before Linux
+ // 4.18, this requires that runsc use a post-copy-up FD to service the read.)
+ char newbuf[sizeof(kNewFileData)] = {};
+ n = pread(fd_rdonly, newbuf, sizeof(newbuf), 0);
+ if (n < 0) {
+ err(1, "final pread");
+ }
+ if (n != strlen(kNewFileData)) {
+ warnx("short final pread (%ld/%lu bytes)", n, strlen(kNewFileData));
+ failed = 1;
+ } else if (strcmp(newbuf, kNewFileData) != 0) {
+ warnx("final pread returned wrong data: %s", newbuf);
+ failed = 1;
+ }
+
+ // Check that the memory mapping of the old FD has been updated. (Linux
+ // overlayfs does not do this, so regardless of kernel version this requires
+ // that runsc replace existing memory mappings with mappings of a
+ // post-copy-up FD.)
+ if (strcmp(page, kNewFileData) != 0) {
+ warnx("mapping contains wrong final data: %s", (const char*)page);
+ failed = 1;
+ }
+
+ return failed;
+}
diff --git a/images/hostoverlaytest/testfile.txt b/images/hostoverlaytest/testfile.txt
new file mode 100644
index 000000000..e4188c841
--- /dev/null
+++ b/images/hostoverlaytest/testfile.txt
@@ -0,0 +1 @@
+old data
diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go
index 9d41fcbdb..8ae2d78d7 100644
--- a/pkg/sentry/fs/gofer/fs.go
+++ b/pkg/sentry/fs/gofer/fs.go
@@ -60,8 +60,7 @@ const (
limitHostFDTranslationKey = "limit_host_fd_translation"
// overlayfsStaleRead if present closes cached readonly file after the first
- // write. This is done to workaround a limitation of overlayfs in kernels
- // before 4.19 where open FDs are not updated after the file is copied up.
+ // write. This is done to workaround a limitation of Linux overlayfs.
overlayfsStaleRead = "overlayfs_stale_read"
)
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 1da8d5d82..353e2cf5b 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -143,9 +143,12 @@ type filesystemOptions struct {
// If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote
// filesystem may not be coherent with writable host FDs opened later, so
- // mappings of the former must be replaced by mappings of the latter. This
- // is usually only the case when the remote filesystem is an overlayfs
- // mount on Linux < 4.19.
+ // all uses of the former must be replaced by uses of the latter. This is
+ // usually only the case when the remote filesystem is a Linux overlayfs
+ // mount. (Prior to Linux 4.18, patch series centered on commit
+ // d1d04ef8572b "ovl: stack file ops", both I/O and memory mappings were
+ // incoherent between pre-copy-up and post-copy-up FDs; after that patch
+ // series, only memory mappings are incoherent.)
overlayfsStaleRead bool
// If regularFilesUseSpecialFileFD is true, application FDs representing
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 6d6a705f8..bcec7e4db 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -241,8 +241,10 @@ type Config struct {
// ReferenceLeakMode sets reference leak check mode
ReferenceLeakMode refs.LeakMode
- // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for
- // write to workaround overlayfs limitation on kernels before 4.19.
+ // OverlayfsStaleRead instructs the sandbox to assume that the root mount
+ // is on a Linux overlayfs mount, which does not necessarily preserve
+ // coherence between read-only and subsequent writable file descriptors
+ // representing the "same" file.
OverlayfsStaleRead bool
// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go
index acf988aa0..7ba301331 100644
--- a/runsc/container/container_test.go
+++ b/runsc/container/container_test.go
@@ -2126,43 +2126,6 @@ func TestNetRaw(t *testing.T) {
}
}
-// TestOverlayfsStaleRead most basic test that '--overlayfs-stale-read' works.
-func TestOverlayfsStaleRead(t *testing.T) {
- conf := testutil.TestConfig(t)
- conf.OverlayfsStaleRead = true
-
- in, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.in")
- if err != nil {
- t.Fatalf("ioutil.TempFile() failed: %v", err)
- }
- defer in.Close()
- if _, err := in.WriteString("stale data"); err != nil {
- t.Fatalf("in.Write() failed: %v", err)
- }
-
- out, err := ioutil.TempFile(testutil.TmpDir(), "stale-read.out")
- if err != nil {
- t.Fatalf("ioutil.TempFile() failed: %v", err)
- }
- defer out.Close()
-
- const want = "foobar"
- cmd := fmt.Sprintf("cat %q >&2 && echo %q> %q && cp %q %q", in.Name(), want, in.Name(), in.Name(), out.Name())
- spec := testutil.NewSpecWithArgs("/bin/bash", "-c", cmd)
- if err := run(spec, conf); err != nil {
- t.Fatalf("Error running container: %v", err)
- }
-
- gotBytes, err := ioutil.ReadAll(out)
- if err != nil {
- t.Fatalf("out.Read() failed: %v", err)
- }
- got := strings.TrimSpace(string(gotBytes))
- if want != got {
- t.Errorf("Wrong content in out file, got: %q. want: %q", got, want)
- }
-}
-
// TestTTYField checks TTY field returned by container.Processes().
func TestTTYField(t *testing.T) {
stop := testutil.StartReaper()
diff --git a/runsc/main.go b/runsc/main.go
index 0625a06e0..920ed84a5 100644
--- a/runsc/main.go
+++ b/runsc/main.go
@@ -76,7 +76,7 @@ var (
fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.")
fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "allow the gofer to mount Unix Domain Sockets.")
overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.")
- overlayfsStaleRead = flag.Bool("overlayfs-stale-read", false, "reopen cached FDs after a file is opened for write to workaround overlayfs limitation on kernels before 4.19.")
+ overlayfsStaleRead = flag.Bool("overlayfs-stale-read", true, "assume root mount is an overlay filesystem")
watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.")
panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
profile = flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
diff --git a/test/e2e/integration_test.go b/test/e2e/integration_test.go
index 404e37689..ff856883a 100644
--- a/test/e2e/integration_test.go
+++ b/test/e2e/integration_test.go
@@ -361,6 +361,21 @@ func TestTmpFile(t *testing.T) {
}
}
+// TestHostOverlayfsCopyUp tests that the --overlayfs-stale-read option causes
+// runsc to hide the incoherence of FDs opened before and after overlayfs
+// copy-up on the host.
+func TestHostOverlayfsCopyUp(t *testing.T) {
+ d := dockerutil.MakeDocker(t)
+ defer d.CleanUp()
+
+ if _, err := d.Run(dockerutil.RunOpts{
+ Image: "hostoverlaytest",
+ WorkDir: "/root",
+ }, "./test"); err != nil {
+ t.Fatalf("docker run failed: %v", err)
+ }
+}
+
func TestMain(m *testing.M) {
dockerutil.EnsureSupportedDockerVersion()
flag.Parse()