From 51f3ab85e024fcd74c49d273ce5202a207577d31 Mon Sep 17 00:00:00 2001
From: Nicolas Lacasse <nlacasse@google.com>
Date: Mon, 6 Jan 2020 12:51:35 -0800
Subject: Convert memfs into proto-tmpfs.

- Renamed memfs to tmpfs.
- Copied fileRangeSet bits from fs/fsutil/ to fsimpl/tmpfs/
- Changed tmpfs to be backed by filemem instead of byte slice.
- regularFileReadWriter uses a sync.Pool, similar to gofer client.

PiperOrigin-RevId: 288356380
---
 pkg/sentry/fs/fsutil/BUILD                   |   2 +-
 pkg/sentry/fs/fsutil/file_range_set.go       |  14 +-
 pkg/sentry/fsimpl/memfs/BUILD                |  80 ---
 pkg/sentry/fsimpl/memfs/benchmark_test.go    | 487 -------------------
 pkg/sentry/fsimpl/memfs/directory.go         | 187 -------
 pkg/sentry/fsimpl/memfs/filesystem.go        | 698 ---------------------------
 pkg/sentry/fsimpl/memfs/memfs.go             | 293 -----------
 pkg/sentry/fsimpl/memfs/named_pipe.go        |  60 ---
 pkg/sentry/fsimpl/memfs/pipe_test.go         | 235 ---------
 pkg/sentry/fsimpl/memfs/regular_file.go      | 154 ------
 pkg/sentry/fsimpl/memfs/symlink.go           |  36 --
 pkg/sentry/fsimpl/tmpfs/BUILD                |  92 ++++
 pkg/sentry/fsimpl/tmpfs/benchmark_test.go    | 487 +++++++++++++++++++
 pkg/sentry/fsimpl/tmpfs/directory.go         | 187 +++++++
 pkg/sentry/fsimpl/tmpfs/filesystem.go        | 698 +++++++++++++++++++++++++++
 pkg/sentry/fsimpl/tmpfs/named_pipe.go        |  60 +++
 pkg/sentry/fsimpl/tmpfs/pipe_test.go         | 235 +++++++++
 pkg/sentry/fsimpl/tmpfs/regular_file.go      | 357 ++++++++++++++
 pkg/sentry/fsimpl/tmpfs/regular_file_test.go | 224 +++++++++
 pkg/sentry/fsimpl/tmpfs/symlink.go           |  36 ++
 pkg/sentry/fsimpl/tmpfs/tmpfs.go             | 299 ++++++++++++
 21 files changed, 2683 insertions(+), 2238 deletions(-)
 delete mode 100644 pkg/sentry/fsimpl/memfs/BUILD
 delete mode 100644 pkg/sentry/fsimpl/memfs/benchmark_test.go
 delete mode 100644 pkg/sentry/fsimpl/memfs/directory.go
 delete mode 100644 pkg/sentry/fsimpl/memfs/filesystem.go
 delete mode 100644 pkg/sentry/fsimpl/memfs/memfs.go
 delete mode 100644 pkg/sentry/fsimpl/memfs/named_pipe.go
 delete mode 100644 pkg/sentry/fsimpl/memfs/pipe_test.go
 delete mode 100644 pkg/sentry/fsimpl/memfs/regular_file.go
 delete mode 100644 pkg/sentry/fsimpl/memfs/symlink.go
 create mode 100644 pkg/sentry/fsimpl/tmpfs/BUILD
 create mode 100644 pkg/sentry/fsimpl/tmpfs/benchmark_test.go
 create mode 100644 pkg/sentry/fsimpl/tmpfs/directory.go
 create mode 100644 pkg/sentry/fsimpl/tmpfs/filesystem.go
 create mode 100644 pkg/sentry/fsimpl/tmpfs/named_pipe.go
 create mode 100644 pkg/sentry/fsimpl/tmpfs/pipe_test.go
 create mode 100644 pkg/sentry/fsimpl/tmpfs/regular_file.go
 create mode 100644 pkg/sentry/fsimpl/tmpfs/regular_file_test.go
 create mode 100644 pkg/sentry/fsimpl/tmpfs/symlink.go
 create mode 100644 pkg/sentry/fsimpl/tmpfs/tmpfs.go

(limited to 'pkg/sentry')
diff --git a/pkg/sentry/fs/fsutil/BUILD b/pkg/sentry/fs/fsutil/BUILD
index b2e8d9c77..9ca695a95 100644
--- a/pkg/sentry/fs/fsutil/BUILD
+++ b/pkg/sentry/fs/fsutil/BUILD
@@ -53,7 +53,7 @@ go_template_instance(
         "Key": "uint64",
         "Range": "memmap.MappableRange",
         "Value": "uint64",
-        "Functions": "fileRangeSetFunctions",
+        "Functions": "FileRangeSetFunctions",
     },
 )
 
diff --git a/pkg/sentry/fs/fsutil/file_range_set.go b/pkg/sentry/fs/fsutil/file_range_set.go
index 0a5466b0a..f52d712e3 100644
--- a/pkg/sentry/fs/fsutil/file_range_set.go
+++ b/pkg/sentry/fs/fsutil/file_range_set.go
@@ -34,25 +34,25 @@ import (
 //
 // type FileRangeSet <generated by go_generics>
 
-// fileRangeSetFunctions implements segment.Functions for FileRangeSet.
-type fileRangeSetFunctions struct{}
+// FileRangeSetFunctions implements segment.Functions for FileRangeSet.
+type FileRangeSetFunctions struct{}
 
 // MinKey implements segment.Functions.MinKey.
-func (fileRangeSetFunctions) MinKey() uint64 {
+func (FileRangeSetFunctions) MinKey() uint64 {
 	return 0
 }
 
 // MaxKey implements segment.Functions.MaxKey.
-func (fileRangeSetFunctions) MaxKey() uint64 {
+func (FileRangeSetFunctions) MaxKey() uint64 {
 	return math.MaxUint64
 }
 
 // ClearValue implements segment.Functions.ClearValue.
-func (fileRangeSetFunctions) ClearValue(_ *uint64) {
+func (FileRangeSetFunctions) ClearValue(_ *uint64) {
 }
 
 // Merge implements segment.Functions.Merge.
-func (fileRangeSetFunctions) Merge(mr1 memmap.MappableRange, frstart1 uint64, _ memmap.MappableRange, frstart2 uint64) (uint64, bool) {
+func (FileRangeSetFunctions) Merge(mr1 memmap.MappableRange, frstart1 uint64, _ memmap.MappableRange, frstart2 uint64) (uint64, bool) {
 	if frstart1+mr1.Length() != frstart2 {
 		return 0, false
 	}
@@ -60,7 +60,7 @@ func (fileRangeSetFunctions) Merge(mr1 memmap.MappableRange, frstart1 uint64, _
 }
 
 // Split implements segment.Functions.Split.
-func (fileRangeSetFunctions) Split(mr memmap.MappableRange, frstart uint64, split uint64) (uint64, uint64) {
+func (FileRangeSetFunctions) Split(mr memmap.MappableRange, frstart uint64, split uint64) (uint64, uint64) {
 	return frstart, frstart + (split - mr.Start)
 }
 
diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD
deleted file mode 100644
index 5689bed3b..000000000
--- a/pkg/sentry/fsimpl/memfs/BUILD
+++ /dev/null
@@ -1,80 +0,0 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
-
-package(licenses = ["notice"])
-
-load("//tools/go_generics:defs.bzl", "go_template_instance")
-
-go_template_instance(
-    name = "dentry_list",
-    out = "dentry_list.go",
-    package = "memfs",
-    prefix = "dentry",
-    template = "//pkg/ilist:generic_list",
-    types = {
-        "Element": "*dentry",
-        "Linker": "*dentry",
-    },
-)
-
-go_library(
-    name = "memfs",
-    srcs = [
-        "dentry_list.go",
-        "directory.go",
-        "filesystem.go",
-        "memfs.go",
-        "named_pipe.go",
-        "regular_file.go",
-        "symlink.go",
-    ],
-    importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/memfs",
-    deps = [
-        "//pkg/abi/linux",
-        "//pkg/amutex",
-        "//pkg/fspath",
-        "//pkg/sentry/arch",
-        "//pkg/sentry/context",
-        "//pkg/sentry/kernel/auth",
-        "//pkg/sentry/kernel/pipe",
-        "//pkg/sentry/usermem",
-        "//pkg/sentry/vfs",
-        "//pkg/syserror",
-    ],
-)
-
-go_test(
-    name = "benchmark_test",
-    size = "small",
-    srcs = ["benchmark_test.go"],
-    deps = [
-        ":memfs",
-        "//pkg/abi/linux",
-        "//pkg/fspath",
-        "//pkg/refs",
-        "//pkg/sentry/context",
-        "//pkg/sentry/context/contexttest",
-        "//pkg/sentry/fs",
-        "//pkg/sentry/fs/tmpfs",
-        "//pkg/sentry/kernel/auth",
-        "//pkg/sentry/vfs",
-        "//pkg/syserror",
-    ],
-)
-
-go_test(
-    name = "memfs_test",
-    size = "small",
-    srcs = ["pipe_test.go"],
-    embed = [":memfs"],
-    deps = [
-        "//pkg/abi/linux",
-        "//pkg/fspath",
-        "//pkg/sentry/context",
-        "//pkg/sentry/context/contexttest",
-        "//pkg/sentry/kernel/auth",
-        "//pkg/sentry/usermem",
-        "//pkg/sentry/vfs",
-        "//pkg/syserror",
-    ],
-)
diff --git a/pkg/sentry/fsimpl/memfs/benchmark_test.go b/pkg/sentry/fsimpl/memfs/benchmark_test.go
deleted file mode 100644
index a27876a4e..000000000
--- a/pkg/sentry/fsimpl/memfs/benchmark_test.go
+++ /dev/null
@@ -1,487 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package benchmark_test
-
-import (
-	"fmt"
-	"runtime"
-	"strings"
-	"testing"
-
-	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/fspath"
-	"gvisor.dev/gvisor/pkg/refs"
-	"gvisor.dev/gvisor/pkg/sentry/context"
-	"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
-	"gvisor.dev/gvisor/pkg/sentry/fs"
-	_ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
-	"gvisor.dev/gvisor/pkg/sentry/fsimpl/memfs"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-	"gvisor.dev/gvisor/pkg/sentry/vfs"
-	"gvisor.dev/gvisor/pkg/syserror"
-)
-
-// Differences from stat_benchmark:
-//
-// - Syscall interception, CopyInPath, copyOutStat, and overlayfs overheads are
-// not included.
-//
-// - *MountStat benchmarks use a tmpfs root mount and a tmpfs submount at /tmp.
-// Non-MountStat benchmarks use a tmpfs root mount and no submounts.
-// stat_benchmark uses a varying root mount, a tmpfs submount at /tmp, and a
-// subdirectory /tmp/<top_dir> (assuming TEST_TMPDIR == "/tmp"). Thus
-// stat_benchmark at depth 1 does a comparable amount of work to *MountStat
-// benchmarks at depth 2, and non-MountStat benchmarks at depth 3.
-var depths = []int{1, 2, 3, 8, 64, 100}
-
-const (
-	mountPointName = "tmp"
-	filename       = "gvisor_test_temp_0_1557494568"
-)
-
-// This is copied from syscalls/linux/sys_file.go, with the dependency on
-// kernel.Task stripped out.
-func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error {
-	var (
-		d   *fs.Dirent // The file.
-		rel *fs.Dirent // The relative directory for search (if required.)
-		err error
-	)
-
-	// Extract the working directory (maybe).
-	if len(path) > 0 && path[0] == '/' {
-		// Absolute path; rel can be nil.
-	} else if dirFD == linux.AT_FDCWD {
-		// Need to reference the working directory.
-		rel = wd
-	} else {
-		// Need to extract the given FD.
-		return syserror.EBADF
-	}
-
-	// Lookup the node.
-	remainingTraversals := uint(linux.MaxSymlinkTraversals)
-	if resolve {
-		d, err = mntns.FindInode(ctx, root, rel, path, &remainingTraversals)
-	} else {
-		d, err = mntns.FindLink(ctx, root, rel, path, &remainingTraversals)
-	}
-	if err != nil {
-		return err
-	}
-
-	err = fn(root, d)
-	d.DecRef()
-	return err
-}
-
-func BenchmarkVFS1TmpfsStat(b *testing.B) {
-	for _, depth := range depths {
-		b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
-			ctx := contexttest.Context(b)
-
-			// Create VFS.
-			tmpfsFS, ok := fs.FindFilesystem("tmpfs")
-			if !ok {
-				b.Fatalf("failed to find tmpfs filesystem type")
-			}
-			rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
-			if err != nil {
-				b.Fatalf("failed to create tmpfs root mount: %v", err)
-			}
-			mntns, err := fs.NewMountNamespace(ctx, rootInode)
-			if err != nil {
-				b.Fatalf("failed to create mount namespace: %v", err)
-			}
-			defer mntns.DecRef()
-
-			var filePathBuilder strings.Builder
-			filePathBuilder.WriteByte('/')
-
-			// Create nested directories with given depth.
-			root := mntns.Root()
-			defer root.DecRef()
-			d := root
-			d.IncRef()
-			defer d.DecRef()
-			for i := depth; i > 0; i-- {
-				name := fmt.Sprintf("%d", i)
-				if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil {
-					b.Fatalf("failed to create directory %q: %v", name, err)
-				}
-				next, err := d.Walk(ctx, root, name)
-				if err != nil {
-					b.Fatalf("failed to walk to directory %q: %v", name, err)
-				}
-				d.DecRef()
-				d = next
-				filePathBuilder.WriteString(name)
-				filePathBuilder.WriteByte('/')
-			}
-
-			// Create the file that will be stat'd.
-			file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644))
-			if err != nil {
-				b.Fatalf("failed to create file %q: %v", filename, err)
-			}
-			file.DecRef()
-			filePathBuilder.WriteString(filename)
-			filePath := filePathBuilder.String()
-
-			dirPath := false
-			runtime.GC()
-			b.ResetTimer()
-			for i := 0; i < b.N; i++ {
-				err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
-					if dirPath && !fs.IsDir(d.Inode.StableAttr) {
-						return syserror.ENOTDIR
-					}
-					uattr, err := d.Inode.UnstableAttr(ctx)
-					if err != nil {
-						return err
-					}
-					// Sanity check.
-					if uattr.Perms.User.Execute {
-						b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode())
-					}
-					return nil
-				})
-				if err != nil {
-					b.Fatalf("stat(%q) failed: %v", filePath, err)
-				}
-			}
-			// Don't include deferred cleanup in benchmark time.
-			b.StopTimer()
-		})
-	}
-}
-
-func BenchmarkVFS2MemfsStat(b *testing.B) {
-	for _, depth := range depths {
-		b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
-			ctx := contexttest.Context(b)
-			creds := auth.CredentialsFromContext(ctx)
-
-			// Create VFS.
-			vfsObj := vfs.New()
-			vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
-				AllowUserMount: true,
-			})
-			mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.GetFilesystemOptions{})
-			if err != nil {
-				b.Fatalf("failed to create tmpfs root mount: %v", err)
-			}
-			defer mntns.DecRef(vfsObj)
-
-			var filePathBuilder strings.Builder
-			filePathBuilder.WriteByte('/')
-
-			// Create nested directories with given depth.
-			root := mntns.Root()
-			defer root.DecRef()
-			vd := root
-			vd.IncRef()
-			for i := depth; i > 0; i-- {
-				name := fmt.Sprintf("%d", i)
-				pop := vfs.PathOperation{
-					Root:  root,
-					Start: vd,
-					Path:  fspath.Parse(name),
-				}
-				if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
-					Mode: 0755,
-				}); err != nil {
-					b.Fatalf("failed to create directory %q: %v", name, err)
-				}
-				nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
-				if err != nil {
-					b.Fatalf("failed to walk to directory %q: %v", name, err)
-				}
-				vd.DecRef()
-				vd = nextVD
-				filePathBuilder.WriteString(name)
-				filePathBuilder.WriteByte('/')
-			}
-
-			// Create the file that will be stat'd.
-			fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
-				Root:               root,
-				Start:              vd,
-				Path:               fspath.Parse(filename),
-				FollowFinalSymlink: true,
-			}, &vfs.OpenOptions{
-				Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
-				Mode:  0644,
-			})
-			vd.DecRef()
-			vd = vfs.VirtualDentry{}
-			if err != nil {
-				b.Fatalf("failed to create file %q: %v", filename, err)
-			}
-			defer fd.DecRef()
-			filePathBuilder.WriteString(filename)
-			filePath := filePathBuilder.String()
-
-			runtime.GC()
-			b.ResetTimer()
-			for i := 0; i < b.N; i++ {
-				stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
-					Root:               root,
-					Start:              root,
-					Path:               fspath.Parse(filePath),
-					FollowFinalSymlink: true,
-				}, &vfs.StatOptions{})
-				if err != nil {
-					b.Fatalf("stat(%q) failed: %v", filePath, err)
-				}
-				// Sanity check.
-				if stat.Mode&^linux.S_IFMT != 0644 {
-					b.Fatalf("got wrong permissions (%0o)", stat.Mode)
-				}
-			}
-			// Don't include deferred cleanup in benchmark time.
-			b.StopTimer()
-		})
-	}
-}
-
-func BenchmarkVFS1TmpfsMountStat(b *testing.B) {
-	for _, depth := range depths {
-		b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
-			ctx := contexttest.Context(b)
-
-			// Create VFS.
-			tmpfsFS, ok := fs.FindFilesystem("tmpfs")
-			if !ok {
-				b.Fatalf("failed to find tmpfs filesystem type")
-			}
-			rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
-			if err != nil {
-				b.Fatalf("failed to create tmpfs root mount: %v", err)
-			}
-			mntns, err := fs.NewMountNamespace(ctx, rootInode)
-			if err != nil {
-				b.Fatalf("failed to create mount namespace: %v", err)
-			}
-			defer mntns.DecRef()
-
-			var filePathBuilder strings.Builder
-			filePathBuilder.WriteByte('/')
-
-			// Create and mount the submount.
-			root := mntns.Root()
-			defer root.DecRef()
-			if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil {
-				b.Fatalf("failed to create mount point: %v", err)
-			}
-			mountPoint, err := root.Walk(ctx, root, mountPointName)
-			if err != nil {
-				b.Fatalf("failed to walk to mount point: %v", err)
-			}
-			defer mountPoint.DecRef()
-			submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
-			if err != nil {
-				b.Fatalf("failed to create tmpfs submount: %v", err)
-			}
-			if err := mntns.Mount(ctx, mountPoint, submountInode); err != nil {
-				b.Fatalf("failed to mount tmpfs submount: %v", err)
-			}
-			filePathBuilder.WriteString(mountPointName)
-			filePathBuilder.WriteByte('/')
-
-			// Create nested directories with given depth.
-			d, err := root.Walk(ctx, root, mountPointName)
-			if err != nil {
-				b.Fatalf("failed to walk to mount root: %v", err)
-			}
-			defer d.DecRef()
-			for i := depth; i > 0; i-- {
-				name := fmt.Sprintf("%d", i)
-				if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil {
-					b.Fatalf("failed to create directory %q: %v", name, err)
-				}
-				next, err := d.Walk(ctx, root, name)
-				if err != nil {
-					b.Fatalf("failed to walk to directory %q: %v", name, err)
-				}
-				d.DecRef()
-				d = next
-				filePathBuilder.WriteString(name)
-				filePathBuilder.WriteByte('/')
-			}
-
-			// Create the file that will be stat'd.
-			file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644))
-			if err != nil {
-				b.Fatalf("failed to create file %q: %v", filename, err)
-			}
-			file.DecRef()
-			filePathBuilder.WriteString(filename)
-			filePath := filePathBuilder.String()
-
-			dirPath := false
-			runtime.GC()
-			b.ResetTimer()
-			for i := 0; i < b.N; i++ {
-				err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
-					if dirPath && !fs.IsDir(d.Inode.StableAttr) {
-						return syserror.ENOTDIR
-					}
-					uattr, err := d.Inode.UnstableAttr(ctx)
-					if err != nil {
-						return err
-					}
-					// Sanity check.
-					if uattr.Perms.User.Execute {
-						b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode())
-					}
-					return nil
-				})
-				if err != nil {
-					b.Fatalf("stat(%q) failed: %v", filePath, err)
-				}
-			}
-			// Don't include deferred cleanup in benchmark time.
-			b.StopTimer()
-		})
-	}
-}
-
-func BenchmarkVFS2MemfsMountStat(b *testing.B) {
-	for _, depth := range depths {
-		b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
-			ctx := contexttest.Context(b)
-			creds := auth.CredentialsFromContext(ctx)
-
-			// Create VFS.
-			vfsObj := vfs.New()
-			vfsObj.MustRegisterFilesystemType("memfs", memfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
-				AllowUserMount: true,
-			})
-			mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.GetFilesystemOptions{})
-			if err != nil {
-				b.Fatalf("failed to create tmpfs root mount: %v", err)
-			}
-			defer mntns.DecRef(vfsObj)
-
-			var filePathBuilder strings.Builder
-			filePathBuilder.WriteByte('/')
-
-			// Create the mount point.
-			root := mntns.Root()
-			defer root.DecRef()
-			pop := vfs.PathOperation{
-				Root:  root,
-				Start: root,
-				Path:  fspath.Parse(mountPointName),
-			}
-			if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
-				Mode: 0755,
-			}); err != nil {
-				b.Fatalf("failed to create mount point: %v", err)
-			}
-			// Save the mount point for later use.
-			mountPoint, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
-			if err != nil {
-				b.Fatalf("failed to walk to mount point: %v", err)
-			}
-			defer mountPoint.DecRef()
-			// Create and mount the submount.
-			if err := vfsObj.MountAt(ctx, creds, "", &pop, "memfs", &vfs.MountOptions{}); err != nil {
-				b.Fatalf("failed to mount tmpfs submount: %v", err)
-			}
-			filePathBuilder.WriteString(mountPointName)
-			filePathBuilder.WriteByte('/')
-
-			// Create nested directories with given depth.
-			vd, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
-			if err != nil {
-				b.Fatalf("failed to walk to mount root: %v", err)
-			}
-			for i := depth; i > 0; i-- {
-				name := fmt.Sprintf("%d", i)
-				pop := vfs.PathOperation{
-					Root:  root,
-					Start: vd,
-					Path:  fspath.Parse(name),
-				}
-				if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
-					Mode: 0755,
-				}); err != nil {
-					b.Fatalf("failed to create directory %q: %v", name, err)
-				}
-				nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
-				if err != nil {
-					b.Fatalf("failed to walk to directory %q: %v", name, err)
-				}
-				vd.DecRef()
-				vd = nextVD
-				filePathBuilder.WriteString(name)
-				filePathBuilder.WriteByte('/')
-			}
-
-			// Verify that we didn't create any directories under the mount
-			// point (i.e. they were all created on the submount).
-			firstDirName := fmt.Sprintf("%d", depth)
-			if child := mountPoint.Dentry().Child(firstDirName); child != nil {
-				b.Fatalf("created directory %q under root mount, not submount", firstDirName)
-			}
-
-			// Create the file that will be stat'd.
-			fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
-				Root:               root,
-				Start:              vd,
-				Path:               fspath.Parse(filename),
-				FollowFinalSymlink: true,
-			}, &vfs.OpenOptions{
-				Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
-				Mode:  0644,
-			})
-			vd.DecRef()
-			if err != nil {
-				b.Fatalf("failed to create file %q: %v", filename, err)
-			}
-			fd.DecRef()
-			filePathBuilder.WriteString(filename)
-			filePath := filePathBuilder.String()
-
-			runtime.GC()
-			b.ResetTimer()
-			for i := 0; i < b.N; i++ {
-				stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
-					Root:               root,
-					Start:              root,
-					Path:               fspath.Parse(filePath),
-					FollowFinalSymlink: true,
-				}, &vfs.StatOptions{})
-				if err != nil {
-					b.Fatalf("stat(%q) failed: %v", filePath, err)
-				}
-				// Sanity check.
-				if stat.Mode&^linux.S_IFMT != 0644 {
-					b.Fatalf("got wrong permissions (%0o)", stat.Mode)
-				}
-			}
-			// Don't include deferred cleanup in benchmark time.
-			b.StopTimer()
-		})
-	}
-}
-
-func init() {
-	// Turn off reference leak checking for a fair comparison between vfs1 and
-	// vfs2.
-	refs.SetLeakMode(refs.NoLeakChecking)
-}
diff --git a/pkg/sentry/fsimpl/memfs/directory.go b/pkg/sentry/fsimpl/memfs/directory.go
deleted file mode 100644
index 0bd82e480..000000000
--- a/pkg/sentry/fsimpl/memfs/directory.go
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
-	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/sentry/context"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-	"gvisor.dev/gvisor/pkg/sentry/vfs"
-	"gvisor.dev/gvisor/pkg/syserror"
-)
-
-type directory struct {
-	inode inode
-
-	// childList is a list containing (1) child Dentries and (2) fake Dentries
-	// (with inode == nil) that represent the iteration position of
-	// directoryFDs. childList is used to support directoryFD.IterDirents()
-	// efficiently. childList is protected by filesystem.mu.
-	childList dentryList
-}
-
-func (fs *filesystem) newDirectory(creds *auth.Credentials, mode linux.FileMode) *inode {
-	dir := &directory{}
-	dir.inode.init(dir, fs, creds, mode)
-	dir.inode.nlink = 2 // from "." and parent directory or ".." for root
-	return &dir.inode
-}
-
-func (i *inode) isDir() bool {
-	_, ok := i.impl.(*directory)
-	return ok
-}
-
-type directoryFD struct {
-	fileDescription
-	vfs.DirectoryFileDescriptionDefaultImpl
-
-	// Protected by filesystem.mu.
-	iter *dentry
-	off  int64
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *directoryFD) Release() {
-	if fd.iter != nil {
-		fs := fd.filesystem()
-		dir := fd.inode().impl.(*directory)
-		fs.mu.Lock()
-		dir.childList.Remove(fd.iter)
-		fs.mu.Unlock()
-		fd.iter = nil
-	}
-}
-
-// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
-func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
-	fs := fd.filesystem()
-	vfsd := fd.vfsfd.VirtualDentry().Dentry()
-
-	fs.mu.Lock()
-	defer fs.mu.Unlock()
-
-	if fd.off == 0 {
-		if !cb.Handle(vfs.Dirent{
-			Name:    ".",
-			Type:    linux.DT_DIR,
-			Ino:     vfsd.Impl().(*dentry).inode.ino,
-			NextOff: 1,
-		}) {
-			return nil
-		}
-		fd.off++
-	}
-	if fd.off == 1 {
-		parentInode := vfsd.ParentOrSelf().Impl().(*dentry).inode
-		if !cb.Handle(vfs.Dirent{
-			Name:    "..",
-			Type:    parentInode.direntType(),
-			Ino:     parentInode.ino,
-			NextOff: 2,
-		}) {
-			return nil
-		}
-		fd.off++
-	}
-
-	dir := vfsd.Impl().(*dentry).inode.impl.(*directory)
-	var child *dentry
-	if fd.iter == nil {
-		// Start iteration at the beginning of dir.
-		child = dir.childList.Front()
-		fd.iter = &dentry{}
-	} else {
-		// Continue iteration from where we left off.
-		child = fd.iter.Next()
-		dir.childList.Remove(fd.iter)
-	}
-	for child != nil {
-		// Skip other directoryFD iterators.
-		if child.inode != nil {
-			if !cb.Handle(vfs.Dirent{
-				Name:    child.vfsd.Name(),
-				Type:    child.inode.direntType(),
-				Ino:     child.inode.ino,
-				NextOff: fd.off + 1,
-			}) {
-				dir.childList.InsertBefore(child, fd.iter)
-				return nil
-			}
-			fd.off++
-		}
-		child = child.Next()
-	}
-	dir.childList.PushBack(fd.iter)
-	return nil
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
-	fs := fd.filesystem()
-	fs.mu.Lock()
-	defer fs.mu.Unlock()
-
-	switch whence {
-	case linux.SEEK_SET:
-		// Use offset as given.
-	case linux.SEEK_CUR:
-		offset += fd.off
-	default:
-		return 0, syserror.EINVAL
-	}
-	if offset < 0 {
-		return 0, syserror.EINVAL
-	}
-
-	// If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't
-	// seek even if doing so might reposition the iterator due to concurrent
-	// mutation of the directory. Compare fs/libfs.c:dcache_dir_lseek().
-	if fd.off == offset {
-		return offset, nil
-	}
-
-	fd.off = offset
-	// Compensate for "." and "..".
-	remChildren := int64(0)
-	if offset >= 2 {
-		remChildren = offset - 2
-	}
-
-	dir := fd.inode().impl.(*directory)
-
-	// Ensure that fd.iter exists and is not linked into dir.childList.
-	if fd.iter == nil {
-		fd.iter = &dentry{}
-	} else {
-		dir.childList.Remove(fd.iter)
-	}
-	// Insert fd.iter before the remChildren'th child, or at the end of the
-	// list if remChildren >= number of children.
-	child := dir.childList.Front()
-	for child != nil {
-		// Skip other directoryFD iterators.
-		if child.inode != nil {
-			if remChildren == 0 {
-				dir.childList.InsertBefore(child, fd.iter)
-				return offset, nil
-			}
-			remChildren--
-		}
-		child = child.Next()
-	}
-	dir.childList.PushBack(fd.iter)
-	return offset, nil
-}
diff --git a/pkg/sentry/fsimpl/memfs/filesystem.go b/pkg/sentry/fsimpl/memfs/filesystem.go
deleted file mode 100644
index b063e09a3..000000000
--- a/pkg/sentry/fsimpl/memfs/filesystem.go
+++ /dev/null
@@ -1,698 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
-	"fmt"
-	"sync/atomic"
-
-	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/fspath"
-	"gvisor.dev/gvisor/pkg/sentry/context"
-	"gvisor.dev/gvisor/pkg/sentry/vfs"
-	"gvisor.dev/gvisor/pkg/syserror"
-)
-
-// Sync implements vfs.FilesystemImpl.Sync.
-func (fs *filesystem) Sync(ctx context.Context) error {
-	// All filesystem state is in-memory.
-	return nil
-}
-
-// stepLocked resolves rp.Component() to an existing file, starting from the
-// given directory.
-//
-// stepLocked is loosely analogous to fs/namei.c:walk_component().
-//
-// Preconditions: filesystem.mu must be locked. !rp.Done().
-func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
-	if !d.inode.isDir() {
-		return nil, syserror.ENOTDIR
-	}
-	if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
-		return nil, err
-	}
-afterSymlink:
-	nextVFSD, err := rp.ResolveComponent(&d.vfsd)
-	if err != nil {
-		return nil, err
-	}
-	if nextVFSD == nil {
-		// Since the Dentry tree is the sole source of truth for memfs, if it's
-		// not in the Dentry tree, it doesn't exist.
-		return nil, syserror.ENOENT
-	}
-	next := nextVFSD.Impl().(*dentry)
-	if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
-		// TODO: symlink traversals update access time
-		if err := rp.HandleSymlink(symlink.target); err != nil {
-			return nil, err
-		}
-		goto afterSymlink // don't check the current directory again
-	}
-	rp.Advance()
-	return next, nil
-}
-
-// walkParentDirLocked resolves all but the last path component of rp to an
-// existing directory, starting from the given directory (which is usually
-// rp.Start().Impl().(*dentry)). It does not check that the returned directory
-// is searchable by the provider of rp.
-//
-// walkParentDirLocked is loosely analogous to Linux's
-// fs/namei.c:path_parentat().
-//
-// Preconditions: filesystem.mu must be locked. !rp.Done().
-func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
-	for !rp.Final() {
-		next, err := stepLocked(rp, d)
-		if err != nil {
-			return nil, err
-		}
-		d = next
-	}
-	if !d.inode.isDir() {
-		return nil, syserror.ENOTDIR
-	}
-	return d, nil
-}
-
-// resolveLocked resolves rp to an existing file.
-//
-// resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat().
-//
-// Preconditions: filesystem.mu must be locked.
-func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) {
-	d := rp.Start().Impl().(*dentry)
-	for !rp.Done() {
-		next, err := stepLocked(rp, d)
-		if err != nil {
-			return nil, err
-		}
-		d = next
-	}
-	if rp.MustBeDir() && !d.inode.isDir() {
-		return nil, syserror.ENOTDIR
-	}
-	return d, nil
-}
-
-// doCreateAt checks that creating a file at rp is permitted, then invokes
-// create to do so.
-//
-// doCreateAt is loosely analogous to a conjunction of Linux's
-// fs/namei.c:filename_create() and done_path_create().
-//
-// Preconditions: !rp.Done(). For the final path component in rp,
-// !rp.ShouldFollowSymlink().
-func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error {
-	fs.mu.Lock()
-	defer fs.mu.Unlock()
-	parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
-	if err != nil {
-		return err
-	}
-	if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
-		return err
-	}
-	name := rp.Component()
-	if name == "." || name == ".." {
-		return syserror.EEXIST
-	}
-	// Call parent.vfsd.Child() instead of stepLocked() or rp.ResolveChild(),
-	// because if the child exists we want to return EEXIST immediately instead
-	// of attempting symlink/mount traversal.
-	if parent.vfsd.Child(name) != nil {
-		return syserror.EEXIST
-	}
-	if !dir && rp.MustBeDir() {
-		return syserror.ENOENT
-	}
-	// In memfs, the only way to cause a dentry to be disowned is by removing
-	// it from the filesystem, so this check is equivalent to checking if
-	// parent has been removed.
-	if parent.vfsd.IsDisowned() {
-		return syserror.ENOENT
-	}
-	mnt := rp.Mount()
-	if err := mnt.CheckBeginWrite(); err != nil {
-		return err
-	}
-	defer mnt.EndWrite()
-	return create(parent, name)
-}
-
-// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
-func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	d, err := resolveLocked(rp)
-	if err != nil {
-		return nil, err
-	}
-	if opts.CheckSearchable {
-		if !d.inode.isDir() {
-			return nil, syserror.ENOTDIR
-		}
-		if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true /* isDir */); err != nil {
-			return nil, err
-		}
-	}
-	d.IncRef()
-	return &d.vfsd, nil
-}
-
-// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
-func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	d, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
-	if err != nil {
-		return nil, err
-	}
-	d.IncRef()
-	return &d.vfsd, nil
-}
-
-// LinkAt implements vfs.FilesystemImpl.LinkAt.
-func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
-	return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
-		if rp.Mount() != vd.Mount() {
-			return syserror.EXDEV
-		}
-		d := vd.Dentry().Impl().(*dentry)
-		if d.inode.isDir() {
-			return syserror.EPERM
-		}
-		if d.inode.nlink == 0 {
-			return syserror.ENOENT
-		}
-		if d.inode.nlink == maxLinks {
-			return syserror.EMLINK
-		}
-		d.inode.incLinksLocked()
-		child := fs.newDentry(d.inode)
-		parent.vfsd.InsertChild(&child.vfsd, name)
-		parent.inode.impl.(*directory).childList.PushBack(child)
-		return nil
-	})
-}
-
-// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
-func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
-	return fs.doCreateAt(rp, true /* dir */, func(parent *dentry, name string) error {
-		if parent.inode.nlink == maxLinks {
-			return syserror.EMLINK
-		}
-		parent.inode.incLinksLocked() // from child's ".."
-		child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode))
-		parent.vfsd.InsertChild(&child.vfsd, name)
-		parent.inode.impl.(*directory).childList.PushBack(child)
-		return nil
-	})
-}
-
-// MknodAt implements vfs.FilesystemImpl.MknodAt.
-func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
-	return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
-		switch opts.Mode.FileType() {
-		case 0, linux.S_IFREG:
-			child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
-			parent.vfsd.InsertChild(&child.vfsd, name)
-			parent.inode.impl.(*directory).childList.PushBack(child)
-			return nil
-		case linux.S_IFIFO:
-			child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode))
-			parent.vfsd.InsertChild(&child.vfsd, name)
-			parent.inode.impl.(*directory).childList.PushBack(child)
-			return nil
-		case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK:
-			// Not yet supported.
-			return syserror.EPERM
-		default:
-			return syserror.EINVAL
-		}
-	})
-}
-
-// OpenAt implements vfs.FilesystemImpl.OpenAt.
-func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	if opts.Flags&linux.O_TMPFILE != 0 {
-		// Not yet supported.
-		return nil, syserror.EOPNOTSUPP
-	}
-
-	// Handle O_CREAT and !O_CREAT separately, since in the latter case we
-	// don't need fs.mu for writing.
-	if opts.Flags&linux.O_CREAT == 0 {
-		fs.mu.RLock()
-		defer fs.mu.RUnlock()
-		d, err := resolveLocked(rp)
-		if err != nil {
-			return nil, err
-		}
-		return d.open(ctx, rp, opts.Flags, false /* afterCreate */)
-	}
-
-	mustCreate := opts.Flags&linux.O_EXCL != 0
-	start := rp.Start().Impl().(*dentry)
-	fs.mu.Lock()
-	defer fs.mu.Unlock()
-	if rp.Done() {
-		// Reject attempts to open directories with O_CREAT.
-		if rp.MustBeDir() {
-			return nil, syserror.EISDIR
-		}
-		if mustCreate {
-			return nil, syserror.EEXIST
-		}
-		return start.open(ctx, rp, opts.Flags, false /* afterCreate */)
-	}
-afterTrailingSymlink:
-	parent, err := walkParentDirLocked(rp, start)
-	if err != nil {
-		return nil, err
-	}
-	// Check for search permission in the parent directory.
-	if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
-		return nil, err
-	}
-	// Reject attempts to open directories with O_CREAT.
-	if rp.MustBeDir() {
-		return nil, syserror.EISDIR
-	}
-	name := rp.Component()
-	if name == "." || name == ".." {
-		return nil, syserror.EISDIR
-	}
-	// Determine whether or not we need to create a file.
-	child, err := stepLocked(rp, parent)
-	if err == syserror.ENOENT {
-		// Already checked for searchability above; now check for writability.
-		if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
-			return nil, err
-		}
-		if err := rp.Mount().CheckBeginWrite(); err != nil {
-			return nil, err
-		}
-		defer rp.Mount().EndWrite()
-		// Create and open the child.
-		child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
-		parent.vfsd.InsertChild(&child.vfsd, name)
-		parent.inode.impl.(*directory).childList.PushBack(child)
-		return child.open(ctx, rp, opts.Flags, true)
-	}
-	if err != nil {
-		return nil, err
-	}
-	// Do we need to resolve a trailing symlink?
-	if !rp.Done() {
-		start = parent
-		goto afterTrailingSymlink
-	}
-	// Open existing file.
-	if mustCreate {
-		return nil, syserror.EEXIST
-	}
-	return child.open(ctx, rp, opts.Flags, false)
-}
-
-func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) {
-	ats := vfs.AccessTypesForOpenFlags(flags)
-	if !afterCreate {
-		if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil {
-			return nil, err
-		}
-	}
-	mnt := rp.Mount()
-	switch impl := d.inode.impl.(type) {
-	case *regularFile:
-		var fd regularFileFD
-		fd.readable = vfs.MayReadFileWithOpenFlags(flags)
-		fd.writable = vfs.MayWriteFileWithOpenFlags(flags)
-		if fd.writable {
-			if err := mnt.CheckBeginWrite(); err != nil {
-				return nil, err
-			}
-			// mnt.EndWrite() is called by regularFileFD.Release().
-		}
-		fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
-		if flags&linux.O_TRUNC != 0 {
-			impl.mu.Lock()
-			impl.data = impl.data[:0]
-			atomic.StoreInt64(&impl.dataLen, 0)
-			impl.mu.Unlock()
-		}
-		return &fd.vfsfd, nil
-	case *directory:
-		// Can't open directories writably.
-		if ats&vfs.MayWrite != 0 {
-			return nil, syserror.EISDIR
-		}
-		var fd directoryFD
-		fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
-		return &fd.vfsfd, nil
-	case *symlink:
-		// Can't open symlinks without O_PATH (which is unimplemented).
-		return nil, syserror.ELOOP
-	case *namedPipe:
-		return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags)
-	default:
-		panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl))
-	}
-}
-
-// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
-func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	d, err := resolveLocked(rp)
-	if err != nil {
-		return "", err
-	}
-	symlink, ok := d.inode.impl.(*symlink)
-	if !ok {
-		return "", syserror.EINVAL
-	}
-	return symlink.target, nil
-}
-
-// RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
-	if opts.Flags != 0 {
-		// TODO(b/145974740): Support renameat2 flags.
-		return syserror.EINVAL
-	}
-
-	// Resolve newParent first to verify that it's on this Mount.
-	fs.mu.Lock()
-	defer fs.mu.Unlock()
-	newParent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
-	if err != nil {
-		return err
-	}
-	newName := rp.Component()
-	if newName == "." || newName == ".." {
-		return syserror.EBUSY
-	}
-	mnt := rp.Mount()
-	if mnt != oldParentVD.Mount() {
-		return syserror.EXDEV
-	}
-	if err := mnt.CheckBeginWrite(); err != nil {
-		return err
-	}
-	defer mnt.EndWrite()
-
-	oldParent := oldParentVD.Dentry().Impl().(*dentry)
-	if err := oldParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
-		return err
-	}
-	// Call vfs.Dentry.Child() instead of stepLocked() or rp.ResolveChild(),
-	// because if the existing child is a symlink or mount point then we want
-	// to rename over it rather than follow it.
-	renamedVFSD := oldParent.vfsd.Child(oldName)
-	if renamedVFSD == nil {
-		return syserror.ENOENT
-	}
-	renamed := renamedVFSD.Impl().(*dentry)
-	if renamed.inode.isDir() {
-		if renamed == newParent || renamedVFSD.IsAncestorOf(&newParent.vfsd) {
-			return syserror.EINVAL
-		}
-		if oldParent != newParent {
-			// Writability is needed to change renamed's "..".
-			if err := renamed.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true /* isDir */); err != nil {
-				return err
-			}
-		}
-	} else {
-		if opts.MustBeDir || rp.MustBeDir() {
-			return syserror.ENOTDIR
-		}
-	}
-
-	if err := newParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
-		return err
-	}
-	replacedVFSD := newParent.vfsd.Child(newName)
-	var replaced *dentry
-	if replacedVFSD != nil {
-		replaced = replacedVFSD.Impl().(*dentry)
-		if replaced.inode.isDir() {
-			if !renamed.inode.isDir() {
-				return syserror.EISDIR
-			}
-			if replaced.vfsd.HasChildren() {
-				return syserror.ENOTEMPTY
-			}
-		} else {
-			if rp.MustBeDir() {
-				return syserror.ENOTDIR
-			}
-			if renamed.inode.isDir() {
-				return syserror.ENOTDIR
-			}
-		}
-	} else {
-		if renamed.inode.isDir() && newParent.inode.nlink == maxLinks {
-			return syserror.EMLINK
-		}
-	}
-	if newParent.vfsd.IsDisowned() {
-		return syserror.ENOENT
-	}
-
-	// Linux places this check before some of those above; we do it here for
-	// simplicity, under the assumption that applications are not intentionally
-	// doing noop renames expecting them to succeed where non-noop renames
-	// would fail.
-	if renamedVFSD == replacedVFSD {
-		return nil
-	}
-	vfsObj := rp.VirtualFilesystem()
-	oldParentDir := oldParent.inode.impl.(*directory)
-	newParentDir := newParent.inode.impl.(*directory)
-	if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil {
-		return err
-	}
-	if replaced != nil {
-		newParentDir.childList.Remove(replaced)
-		if replaced.inode.isDir() {
-			newParent.inode.decLinksLocked() // from replaced's ".."
-		}
-		replaced.inode.decLinksLocked()
-	}
-	oldParentDir.childList.Remove(renamed)
-	newParentDir.childList.PushBack(renamed)
-	if renamed.inode.isDir() {
-		oldParent.inode.decLinksLocked()
-		newParent.inode.incLinksLocked()
-	}
-	// TODO: update timestamps and parent directory sizes
-	vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD)
-	return nil
-}
-
-// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
-func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
-	fs.mu.Lock()
-	defer fs.mu.Unlock()
-	parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
-	if err != nil {
-		return err
-	}
-	if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
-		return err
-	}
-	name := rp.Component()
-	if name == "." {
-		return syserror.EINVAL
-	}
-	if name == ".." {
-		return syserror.ENOTEMPTY
-	}
-	childVFSD := parent.vfsd.Child(name)
-	if childVFSD == nil {
-		return syserror.ENOENT
-	}
-	child := childVFSD.Impl().(*dentry)
-	if !child.inode.isDir() {
-		return syserror.ENOTDIR
-	}
-	if childVFSD.HasChildren() {
-		return syserror.ENOTEMPTY
-	}
-	mnt := rp.Mount()
-	if err := mnt.CheckBeginWrite(); err != nil {
-		return err
-	}
-	defer mnt.EndWrite()
-	vfsObj := rp.VirtualFilesystem()
-	if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil {
-		return err
-	}
-	parent.inode.impl.(*directory).childList.Remove(child)
-	parent.inode.decLinksLocked() // from child's ".."
-	child.inode.decLinksLocked()
-	vfsObj.CommitDeleteDentry(childVFSD)
-	return nil
-}
-
-// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
-func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	_, err := resolveLocked(rp)
-	if err != nil {
-		return err
-	}
-	if opts.Stat.Mask == 0 {
-		return nil
-	}
-	// TODO: implement inode.setStat
-	return syserror.EPERM
-}
-
-// StatAt implements vfs.FilesystemImpl.StatAt.
-func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	d, err := resolveLocked(rp)
-	if err != nil {
-		return linux.Statx{}, err
-	}
-	var stat linux.Statx
-	d.inode.statTo(&stat)
-	return stat, nil
-}
-
-// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
-func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	_, err := resolveLocked(rp)
-	if err != nil {
-		return linux.Statfs{}, err
-	}
-	// TODO: actually implement statfs
-	return linux.Statfs{}, syserror.ENOSYS
-}
-
-// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
-func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
-	return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
-		child := fs.newDentry(fs.newSymlink(rp.Credentials(), target))
-		parent.vfsd.InsertChild(&child.vfsd, name)
-		parent.inode.impl.(*directory).childList.PushBack(child)
-		return nil
-	})
-}
-
-// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
-func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
-	fs.mu.Lock()
-	defer fs.mu.Unlock()
-	parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
-	if err != nil {
-		return err
-	}
-	if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
-		return err
-	}
-	name := rp.Component()
-	if name == "." || name == ".." {
-		return syserror.EISDIR
-	}
-	childVFSD := parent.vfsd.Child(name)
-	if childVFSD == nil {
-		return syserror.ENOENT
-	}
-	child := childVFSD.Impl().(*dentry)
-	if child.inode.isDir() {
-		return syserror.EISDIR
-	}
-	if !rp.MustBeDir() {
-		return syserror.ENOTDIR
-	}
-	mnt := rp.Mount()
-	if err := mnt.CheckBeginWrite(); err != nil {
-		return err
-	}
-	defer mnt.EndWrite()
-	vfsObj := rp.VirtualFilesystem()
-	if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil {
-		return err
-	}
-	parent.inode.impl.(*directory).childList.Remove(child)
-	child.inode.decLinksLocked()
-	vfsObj.CommitDeleteDentry(childVFSD)
-	return nil
-}
-
-// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
-func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	_, err := resolveLocked(rp)
-	if err != nil {
-		return nil, err
-	}
-	// TODO(b/127675828): support extended attributes
-	return nil, syserror.ENOTSUP
-}
-
-// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
-func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	_, err := resolveLocked(rp)
-	if err != nil {
-		return "", err
-	}
-	// TODO(b/127675828): support extended attributes
-	return "", syserror.ENOTSUP
-}
-
-// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
-func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	_, err := resolveLocked(rp)
-	if err != nil {
-		return err
-	}
-	// TODO(b/127675828): support extended attributes
-	return syserror.ENOTSUP
-}
-
-// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
-func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	_, err := resolveLocked(rp)
-	if err != nil {
-		return err
-	}
-	// TODO(b/127675828): support extended attributes
-	return syserror.ENOTSUP
-}
-
-// PrependPath implements vfs.FilesystemImpl.PrependPath.
-func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
-	fs.mu.RLock()
-	defer fs.mu.RUnlock()
-	return vfs.GenericPrependPath(vfsroot, vd, b)
-}
diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go
deleted file mode 100644
index 8d0167c93..000000000
--- a/pkg/sentry/fsimpl/memfs/memfs.go
+++ /dev/null
@@ -1,293 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package memfs provides a filesystem implementation that behaves like tmpfs:
-// the Dentry tree is the sole source of truth for the state of the filesystem.
-//
-// memfs is intended primarily to demonstrate filesystem implementation
-// patterns. Real uses cases for an in-memory filesystem should use tmpfs
-// instead.
-//
-// Lock order:
-//
-// filesystem.mu
-//   regularFileFD.offMu
-//     regularFile.mu
-//   inode.mu
-package memfs
-
-import (
-	"fmt"
-	"math"
-	"sync"
-	"sync/atomic"
-
-	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/sentry/context"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-	"gvisor.dev/gvisor/pkg/sentry/vfs"
-	"gvisor.dev/gvisor/pkg/syserror"
-)
-
-// FilesystemType implements vfs.FilesystemType.
-type FilesystemType struct{}
-
-// filesystem implements vfs.FilesystemImpl.
-type filesystem struct {
-	vfsfs vfs.Filesystem
-
-	// mu serializes changes to the Dentry tree.
-	mu sync.RWMutex
-
-	nextInoMinusOne uint64 // accessed using atomic memory operations
-}
-
-// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
-func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
-	var fs filesystem
-	fs.vfsfs.Init(vfsObj, &fs)
-	root := fs.newDentry(fs.newDirectory(creds, 01777))
-	return &fs.vfsfs, &root.vfsd, nil
-}
-
-// Release implements vfs.FilesystemImpl.Release.
-func (fs *filesystem) Release() {
-}
-
-// dentry implements vfs.DentryImpl.
-type dentry struct {
-	vfsd vfs.Dentry
-
-	// inode is the inode represented by this dentry. Multiple Dentries may
-	// share a single non-directory inode (with hard links). inode is
-	// immutable.
-	inode *inode
-
-	// memfs doesn't count references on dentries; because the dentry tree is
-	// the sole source of truth, it is by definition always consistent with the
-	// state of the filesystem. However, it does count references on inodes,
-	// because inode resources are released when all references are dropped.
-	// (memfs doesn't really have resources to release, but we implement
-	// reference counting because tmpfs regular files will.)
-
-	// dentryEntry (ugh) links dentries into their parent directory.childList.
-	dentryEntry
-}
-
-func (fs *filesystem) newDentry(inode *inode) *dentry {
-	d := &dentry{
-		inode: inode,
-	}
-	d.vfsd.Init(d)
-	return d
-}
-
-// IncRef implements vfs.DentryImpl.IncRef.
-func (d *dentry) IncRef() {
-	d.inode.incRef()
-}
-
-// TryIncRef implements vfs.DentryImpl.TryIncRef.
-func (d *dentry) TryIncRef() bool {
-	return d.inode.tryIncRef()
-}
-
-// DecRef implements vfs.DentryImpl.DecRef.
-func (d *dentry) DecRef() {
-	d.inode.decRef()
-}
-
-// inode represents a filesystem object.
-type inode struct {
-	// refs is a reference count. refs is accessed using atomic memory
-	// operations.
-	//
-	// A reference is held on all inodes that are reachable in the filesystem
-	// tree. For non-directories (which may have multiple hard links), this
-	// means that a reference is dropped when nlink reaches 0. For directories,
-	// nlink never reaches 0 due to the "." entry; instead,
-	// filesystem.RmdirAt() drops the reference.
-	refs int64
-
-	// Inode metadata; protected by mu and accessed using atomic memory
-	// operations unless otherwise specified.
-	mu    sync.RWMutex
-	mode  uint32 // excluding file type bits, which are based on impl
-	nlink uint32 // protected by filesystem.mu instead of inode.mu
-	uid   uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
-	gid   uint32 // auth.KGID, but ...
-	ino   uint64 // immutable
-
-	impl interface{} // immutable
-}
-
-const maxLinks = math.MaxUint32
-
-func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) {
-	i.refs = 1
-	i.mode = uint32(mode)
-	i.uid = uint32(creds.EffectiveKUID)
-	i.gid = uint32(creds.EffectiveKGID)
-	i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1)
-	// i.nlink initialized by caller
-	i.impl = impl
-}
-
-// incLinksLocked increments i's link count.
-//
-// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
-// i.nlink < maxLinks.
-func (i *inode) incLinksLocked() {
-	if i.nlink == 0 {
-		panic("memfs.inode.incLinksLocked() called with no existing links")
-	}
-	if i.nlink == maxLinks {
-		panic("memfs.inode.incLinksLocked() called with maximum link count")
-	}
-	atomic.AddUint32(&i.nlink, 1)
-}
-
-// decLinksLocked decrements i's link count.
-//
-// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
-func (i *inode) decLinksLocked() {
-	if i.nlink == 0 {
-		panic("memfs.inode.decLinksLocked() called with no existing links")
-	}
-	atomic.AddUint32(&i.nlink, ^uint32(0))
-}
-
-func (i *inode) incRef() {
-	if atomic.AddInt64(&i.refs, 1) <= 1 {
-		panic("memfs.inode.incRef() called without holding a reference")
-	}
-}
-
-func (i *inode) tryIncRef() bool {
-	for {
-		refs := atomic.LoadInt64(&i.refs)
-		if refs == 0 {
-			return false
-		}
-		if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) {
-			return true
-		}
-	}
-}
-
-func (i *inode) decRef() {
-	if refs := atomic.AddInt64(&i.refs, -1); refs == 0 {
-		// This is unnecessary; it's mostly to simulate what tmpfs would do.
-		if regfile, ok := i.impl.(*regularFile); ok {
-			regfile.mu.Lock()
-			regfile.data = nil
-			atomic.StoreInt64(&regfile.dataLen, 0)
-			regfile.mu.Unlock()
-		}
-	} else if refs < 0 {
-		panic("memfs.inode.decRef() called without holding a reference")
-	}
-}
-
-func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error {
-	return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid)))
-}
-
-// Go won't inline this function, and returning linux.Statx (which is quite
-// big) means spending a lot of time in runtime.duffcopy(), so instead it's an
-// output parameter.
-func (i *inode) statTo(stat *linux.Statx) {
-	stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
-	stat.Blksize = 1 // usermem.PageSize in tmpfs
-	stat.Nlink = atomic.LoadUint32(&i.nlink)
-	stat.UID = atomic.LoadUint32(&i.uid)
-	stat.GID = atomic.LoadUint32(&i.gid)
-	stat.Mode = uint16(atomic.LoadUint32(&i.mode))
-	stat.Ino = i.ino
-	// TODO: device number
-	switch impl := i.impl.(type) {
-	case *regularFile:
-		stat.Mode |= linux.S_IFREG
-		stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
-		stat.Size = uint64(atomic.LoadInt64(&impl.dataLen))
-		// In tmpfs, this will be FileRangeSet.Span() / 512 (but also cached in
-		// a uint64 accessed using atomic memory operations to avoid taking
-		// locks).
-		stat.Blocks = allocatedBlocksForSize(stat.Size)
-	case *directory:
-		stat.Mode |= linux.S_IFDIR
-	case *symlink:
-		stat.Mode |= linux.S_IFLNK
-		stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
-		stat.Size = uint64(len(impl.target))
-		stat.Blocks = allocatedBlocksForSize(stat.Size)
-	case *namedPipe:
-		stat.Mode |= linux.S_IFIFO
-	default:
-		panic(fmt.Sprintf("unknown inode type: %T", i.impl))
-	}
-}
-
-// allocatedBlocksForSize returns the number of 512B blocks needed to
-// accommodate the given size in bytes, as appropriate for struct
-// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block
-// size is independent of the "preferred block size for I/O", struct
-// stat::st_blksize and struct statx::stx_blksize.)
-func allocatedBlocksForSize(size uint64) uint64 {
-	return (size + 511) / 512
-}
-
-func (i *inode) direntType() uint8 {
-	switch i.impl.(type) {
-	case *regularFile:
-		return linux.DT_REG
-	case *directory:
-		return linux.DT_DIR
-	case *symlink:
-		return linux.DT_LNK
-	default:
-		panic(fmt.Sprintf("unknown inode type: %T", i.impl))
-	}
-}
-
-// fileDescription is embedded by memfs implementations of
-// vfs.FileDescriptionImpl.
-type fileDescription struct {
-	vfsfd vfs.FileDescription
-	vfs.FileDescriptionDefaultImpl
-}
-
-func (fd *fileDescription) filesystem() *filesystem {
-	return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
-}
-
-func (fd *fileDescription) inode() *inode {
-	return fd.vfsfd.Dentry().Impl().(*dentry).inode
-}
-
-// Stat implements vfs.FileDescriptionImpl.Stat.
-func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
-	var stat linux.Statx
-	fd.inode().statTo(&stat)
-	return stat, nil
-}
-
-// SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
-	if opts.Stat.Mask == 0 {
-		return nil
-	}
-	// TODO: implement inode.setStat
-	return syserror.EPERM
-}
diff --git a/pkg/sentry/fsimpl/memfs/named_pipe.go b/pkg/sentry/fsimpl/memfs/named_pipe.go
deleted file mode 100644
index b5a204438..000000000
--- a/pkg/sentry/fsimpl/memfs/named_pipe.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
-	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/sentry/context"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
-	"gvisor.dev/gvisor/pkg/sentry/usermem"
-	"gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-type namedPipe struct {
-	inode inode
-
-	pipe *pipe.VFSPipe
-}
-
-// Preconditions:
-//   * fs.mu must be locked.
-//   * rp.Mount().CheckBeginWrite() has been called successfully.
-func (fs *filesystem) newNamedPipe(creds *auth.Credentials, mode linux.FileMode) *inode {
-	file := &namedPipe{pipe: pipe.NewVFSPipe(pipe.DefaultPipeSize, usermem.PageSize)}
-	file.inode.init(file, fs, creds, mode)
-	file.inode.nlink = 1 // Only the parent has a link.
-	return &file.inode
-}
-
-// namedPipeFD implements vfs.FileDescriptionImpl. Methods are implemented
-// entirely via struct embedding.
-type namedPipeFD struct {
-	fileDescription
-
-	*pipe.VFSPipeFD
-}
-
-func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
-	var err error
-	var fd namedPipeFD
-	fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, rp, vfsd, &fd.vfsfd, flags)
-	if err != nil {
-		return nil, err
-	}
-	mnt := rp.Mount()
-	fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{})
-	return &fd.vfsfd, nil
-}
diff --git a/pkg/sentry/fsimpl/memfs/pipe_test.go b/pkg/sentry/fsimpl/memfs/pipe_test.go
deleted file mode 100644
index 807c1af7a..000000000
--- a/pkg/sentry/fsimpl/memfs/pipe_test.go
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
-	"bytes"
-	"testing"
-
-	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/fspath"
-	"gvisor.dev/gvisor/pkg/sentry/context"
-	"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-	"gvisor.dev/gvisor/pkg/sentry/usermem"
-	"gvisor.dev/gvisor/pkg/sentry/vfs"
-	"gvisor.dev/gvisor/pkg/syserror"
-)
-
-const fileName = "mypipe"
-
-func TestSeparateFDs(t *testing.T) {
-	ctx, creds, vfsObj, root := setup(t)
-	defer root.DecRef()
-
-	// Open the read side. This is done in a concurrently because opening
-	// One end the pipe blocks until the other end is opened.
-	pop := vfs.PathOperation{
-		Root:               root,
-		Start:              root,
-		Path:               fspath.Parse(fileName),
-		FollowFinalSymlink: true,
-	}
-	rfdchan := make(chan *vfs.FileDescription)
-	go func() {
-		openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY}
-		rfd, _ := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
-		rfdchan <- rfd
-	}()
-
-	// Open the write side.
-	openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY}
-	wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
-	if err != nil {
-		t.Fatalf("failed to open pipe for writing %q: %v", fileName, err)
-	}
-	defer wfd.DecRef()
-
-	rfd, ok := <-rfdchan
-	if !ok {
-		t.Fatalf("failed to open pipe for reading %q", fileName)
-	}
-	defer rfd.DecRef()
-
-	const msg = "vamos azul"
-	checkEmpty(ctx, t, rfd)
-	checkWrite(ctx, t, wfd, msg)
-	checkRead(ctx, t, rfd, msg)
-}
-
-func TestNonblockingRead(t *testing.T) {
-	ctx, creds, vfsObj, root := setup(t)
-	defer root.DecRef()
-
-	// Open the read side as nonblocking.
-	pop := vfs.PathOperation{
-		Root:               root,
-		Start:              root,
-		Path:               fspath.Parse(fileName),
-		FollowFinalSymlink: true,
-	}
-	openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK}
-	rfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
-	if err != nil {
-		t.Fatalf("failed to open pipe for reading %q: %v", fileName, err)
-	}
-	defer rfd.DecRef()
-
-	// Open the write side.
-	openOpts = vfs.OpenOptions{Flags: linux.O_WRONLY}
-	wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
-	if err != nil {
-		t.Fatalf("failed to open pipe for writing %q: %v", fileName, err)
-	}
-	defer wfd.DecRef()
-
-	const msg = "geh blau"
-	checkEmpty(ctx, t, rfd)
-	checkWrite(ctx, t, wfd, msg)
-	checkRead(ctx, t, rfd, msg)
-}
-
-func TestNonblockingWriteError(t *testing.T) {
-	ctx, creds, vfsObj, root := setup(t)
-	defer root.DecRef()
-
-	// Open the write side as nonblocking, which should return ENXIO.
-	pop := vfs.PathOperation{
-		Root:               root,
-		Start:              root,
-		Path:               fspath.Parse(fileName),
-		FollowFinalSymlink: true,
-	}
-	openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK}
-	_, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
-	if err != syserror.ENXIO {
-		t.Fatalf("expected ENXIO, but got error: %v", err)
-	}
-}
-
-func TestSingleFD(t *testing.T) {
-	ctx, creds, vfsObj, root := setup(t)
-	defer root.DecRef()
-
-	// Open the pipe as readable and writable.
-	pop := vfs.PathOperation{
-		Root:               root,
-		Start:              root,
-		Path:               fspath.Parse(fileName),
-		FollowFinalSymlink: true,
-	}
-	openOpts := vfs.OpenOptions{Flags: linux.O_RDWR}
-	fd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
-	if err != nil {
-		t.Fatalf("failed to open pipe for writing %q: %v", fileName, err)
-	}
-	defer fd.DecRef()
-
-	const msg = "forza blu"
-	checkEmpty(ctx, t, fd)
-	checkWrite(ctx, t, fd, msg)
-	checkRead(ctx, t, fd, msg)
-}
-
-// setup creates a VFS with a pipe in the root directory at path fileName. The
-// returned VirtualDentry must be DecRef()'d be the caller. It calls t.Fatal
-// upon failure.
-func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesystem, vfs.VirtualDentry) {
-	ctx := contexttest.Context(t)
-	creds := auth.CredentialsFromContext(ctx)
-
-	// Create VFS.
-	vfsObj := vfs.New()
-	vfsObj.MustRegisterFilesystemType("memfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
-		AllowUserMount: true,
-	})
-	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "memfs", &vfs.GetFilesystemOptions{})
-	if err != nil {
-		t.Fatalf("failed to create tmpfs root mount: %v", err)
-	}
-
-	// Create the pipe.
-	root := mntns.Root()
-	pop := vfs.PathOperation{
-		Root:  root,
-		Start: root,
-		Path:  fspath.Parse(fileName),
-	}
-	mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644}
-	if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil {
-		t.Fatalf("failed to create file %q: %v", fileName, err)
-	}
-
-	// Sanity check: the file pipe exists and has the correct mode.
-	stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
-		Root:               root,
-		Start:              root,
-		Path:               fspath.Parse(fileName),
-		FollowFinalSymlink: true,
-	}, &vfs.StatOptions{})
-	if err != nil {
-		t.Fatalf("stat(%q) failed: %v", fileName, err)
-	}
-	if stat.Mode&^linux.S_IFMT != 0644 {
-		t.Errorf("got wrong permissions (%0o)", stat.Mode)
-	}
-	if stat.Mode&linux.S_IFMT != linux.ModeNamedPipe {
-		t.Errorf("got wrong file type (%0o)", stat.Mode)
-	}
-
-	return ctx, creds, vfsObj, root
-}
-
-// checkEmpty calls t.Fatal if the pipe in fd is not empty.
-func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) {
-	readData := make([]byte, 1)
-	dst := usermem.BytesIOSequence(readData)
-	bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{})
-	if err != syserror.ErrWouldBlock {
-		t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err)
-	}
-	if bytesRead != 0 {
-		t.Fatalf("expected to read 0 bytes, but got %d", bytesRead)
-	}
-}
-
-// checkWrite calls t.Fatal if it fails to write all of msg to fd.
-func checkWrite(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) {
-	writeData := []byte(msg)
-	src := usermem.BytesIOSequence(writeData)
-	bytesWritten, err := fd.Write(ctx, src, vfs.WriteOptions{})
-	if err != nil {
-		t.Fatalf("error writing to pipe %q: %v", fileName, err)
-	}
-	if bytesWritten != int64(len(writeData)) {
-		t.Fatalf("expected to write %d bytes, but wrote %d", len(writeData), bytesWritten)
-	}
-}
-
-// checkRead calls t.Fatal if it fails to read msg from fd.
-func checkRead(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) {
-	readData := make([]byte, len(msg))
-	dst := usermem.BytesIOSequence(readData)
-	bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{})
-	if err != nil {
-		t.Fatalf("error reading from pipe %q: %v", fileName, err)
-	}
-	if bytesRead != int64(len(msg)) {
-		t.Fatalf("expected to read %d bytes, but got %d", len(msg), bytesRead)
-	}
-	if !bytes.Equal(readData, []byte(msg)) {
-		t.Fatalf("expected to read %q from pipe, but got %q", msg, string(readData))
-	}
-}
diff --git a/pkg/sentry/fsimpl/memfs/regular_file.go b/pkg/sentry/fsimpl/memfs/regular_file.go
deleted file mode 100644
index b7f4853b3..000000000
--- a/pkg/sentry/fsimpl/memfs/regular_file.go
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
-	"io"
-	"sync"
-	"sync/atomic"
-
-	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/sentry/context"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-	"gvisor.dev/gvisor/pkg/sentry/usermem"
-	"gvisor.dev/gvisor/pkg/sentry/vfs"
-	"gvisor.dev/gvisor/pkg/syserror"
-)
-
-type regularFile struct {
-	inode inode
-
-	mu   sync.RWMutex
-	data []byte
-	// dataLen is len(data), but accessed using atomic memory operations to
-	// avoid locking in inode.stat().
-	dataLen int64
-}
-
-func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode {
-	file := &regularFile{}
-	file.inode.init(file, fs, creds, mode)
-	file.inode.nlink = 1 // from parent directory
-	return &file.inode
-}
-
-type regularFileFD struct {
-	fileDescription
-
-	// These are immutable.
-	readable bool
-	writable bool
-
-	// off is the file offset. off is accessed using atomic memory operations.
-	// offMu serializes operations that may mutate off.
-	off   int64
-	offMu sync.Mutex
-}
-
-// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *regularFileFD) Release() {
-	if fd.writable {
-		fd.vfsfd.VirtualDentry().Mount().EndWrite()
-	}
-}
-
-// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
-	if !fd.readable {
-		return 0, syserror.EINVAL
-	}
-	f := fd.inode().impl.(*regularFile)
-	f.mu.RLock()
-	if offset >= int64(len(f.data)) {
-		f.mu.RUnlock()
-		return 0, io.EOF
-	}
-	n, err := dst.CopyOut(ctx, f.data[offset:])
-	f.mu.RUnlock()
-	return int64(n), err
-}
-
-// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
-	fd.offMu.Lock()
-	n, err := fd.PRead(ctx, dst, fd.off, opts)
-	fd.off += n
-	fd.offMu.Unlock()
-	return n, err
-}
-
-// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
-	if !fd.writable {
-		return 0, syserror.EINVAL
-	}
-	if offset < 0 {
-		return 0, syserror.EINVAL
-	}
-	srclen := src.NumBytes()
-	if srclen == 0 {
-		return 0, nil
-	}
-	f := fd.inode().impl.(*regularFile)
-	f.mu.Lock()
-	end := offset + srclen
-	if end < offset {
-		// Overflow.
-		f.mu.Unlock()
-		return 0, syserror.EFBIG
-	}
-	if end > f.dataLen {
-		f.data = append(f.data, make([]byte, end-f.dataLen)...)
-		atomic.StoreInt64(&f.dataLen, end)
-	}
-	n, err := src.CopyIn(ctx, f.data[offset:end])
-	f.mu.Unlock()
-	return int64(n), err
-}
-
-// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
-	fd.offMu.Lock()
-	n, err := fd.PWrite(ctx, src, fd.off, opts)
-	fd.off += n
-	fd.offMu.Unlock()
-	return n, err
-}
-
-// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
-	fd.offMu.Lock()
-	defer fd.offMu.Unlock()
-	switch whence {
-	case linux.SEEK_SET:
-		// use offset as specified
-	case linux.SEEK_CUR:
-		offset += fd.off
-	case linux.SEEK_END:
-		offset += atomic.LoadInt64(&fd.inode().impl.(*regularFile).dataLen)
-	default:
-		return 0, syserror.EINVAL
-	}
-	if offset < 0 {
-		return 0, syserror.EINVAL
-	}
-	fd.off = offset
-	return offset, nil
-}
-
-// Sync implements vfs.FileDescriptionImpl.Sync.
-func (fd *regularFileFD) Sync(ctx context.Context) error {
-	return nil
-}
diff --git a/pkg/sentry/fsimpl/memfs/symlink.go b/pkg/sentry/fsimpl/memfs/symlink.go
deleted file mode 100644
index b2ac2cbeb..000000000
--- a/pkg/sentry/fsimpl/memfs/symlink.go
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package memfs
-
-import (
-	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
-)
-
-type symlink struct {
-	inode  inode
-	target string // immutable
-}
-
-func (fs *filesystem) newSymlink(creds *auth.Credentials, target string) *inode {
-	link := &symlink{
-		target: target,
-	}
-	link.inode.init(link, fs, creds, 0777)
-	link.inode.nlink = 1 // from parent directory
-	return &link.inode
-}
-
-// O_PATH is unimplemented, so there's no way to get a FileDescription
-// representing a symlink yet.
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
new file mode 100644
index 000000000..a5b285987
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -0,0 +1,92 @@
+load("//tools/go_stateify:defs.bzl", "go_library")
+load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
+
+package(licenses = ["notice"])
+
+go_template_instance(
+    name = "dentry_list",
+    out = "dentry_list.go",
+    package = "tmpfs",
+    prefix = "dentry",
+    template = "//pkg/ilist:generic_list",
+    types = {
+        "Element": "*dentry",
+        "Linker": "*dentry",
+    },
+)
+
+go_library(
+    name = "tmpfs",
+    srcs = [
+        "dentry_list.go",
+        "directory.go",
+        "filesystem.go",
+        "named_pipe.go",
+        "regular_file.go",
+        "symlink.go",
+        "tmpfs.go",
+    ],
+    importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs",
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/amutex",
+        "//pkg/fspath",
+        "//pkg/log",
+        "//pkg/sentry/arch",
+        "//pkg/sentry/context",
+        "//pkg/sentry/fs",
+        "//pkg/sentry/fs/fsutil",
+        "//pkg/sentry/kernel",
+        "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/kernel/pipe",
+        "//pkg/sentry/memmap",
+        "//pkg/sentry/pgalloc",
+        "//pkg/sentry/platform",
+        "//pkg/sentry/safemem",
+        "//pkg/sentry/usage",
+        "//pkg/sentry/usermem",
+        "//pkg/sentry/vfs",
+        "//pkg/syserror",
+    ],
+)
+
+go_test(
+    name = "benchmark_test",
+    size = "small",
+    srcs = ["benchmark_test.go"],
+    deps = [
+        ":tmpfs",
+        "//pkg/abi/linux",
+        "//pkg/fspath",
+        "//pkg/refs",
+        "//pkg/sentry/context",
+        "//pkg/sentry/context/contexttest",
+        "//pkg/sentry/fs",
+        "//pkg/sentry/fs/tmpfs",
+        "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/vfs",
+        "//pkg/syserror",
+    ],
+)
+
+go_test(
+    name = "tmpfs_test",
+    size = "small",
+    srcs = [
+        "pipe_test.go",
+        "regular_file_test.go",
+    ],
+    embed = [":tmpfs"],
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/fspath",
+        "//pkg/sentry/context",
+        "//pkg/sentry/context/contexttest",
+        "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/kernel/contexttest",
+        "//pkg/sentry/usermem",
+        "//pkg/sentry/vfs",
+        "//pkg/syserror",
+    ],
+)
diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
new file mode 100644
index 000000000..d88c83499
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
@@ -0,0 +1,487 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package benchmark_test
+
+import (
+	"fmt"
+	"runtime"
+	"strings"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/fspath"
+	"gvisor.dev/gvisor/pkg/refs"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+	"gvisor.dev/gvisor/pkg/sentry/fs"
+	_ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs"
+	"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Differences from stat_benchmark:
+//
+// - Syscall interception, CopyInPath, copyOutStat, and overlayfs overheads are
+// not included.
+//
+// - *MountStat benchmarks use a tmpfs root mount and a tmpfs submount at /tmp.
+// Non-MountStat benchmarks use a tmpfs root mount and no submounts.
+// stat_benchmark uses a varying root mount, a tmpfs submount at /tmp, and a
+// subdirectory /tmp/<top_dir> (assuming TEST_TMPDIR == "/tmp"). Thus
+// stat_benchmark at depth 1 does a comparable amount of work to *MountStat
+// benchmarks at depth 2, and non-MountStat benchmarks at depth 3.
+var depths = []int{1, 2, 3, 8, 64, 100}
+
+const (
+	mountPointName = "tmp"
+	filename       = "gvisor_test_temp_0_1557494568"
+)
+
+// This is copied from syscalls/linux/sys_file.go, with the dependency on
+// kernel.Task stripped out.
+func fileOpOn(ctx context.Context, mntns *fs.MountNamespace, root, wd *fs.Dirent, dirFD int32, path string, resolve bool, fn func(root *fs.Dirent, d *fs.Dirent) error) error {
+	var (
+		d   *fs.Dirent // The file.
+		rel *fs.Dirent // The relative directory for search (if required.)
+		err error
+	)
+
+	// Extract the working directory (maybe).
+	if len(path) > 0 && path[0] == '/' {
+		// Absolute path; rel can be nil.
+	} else if dirFD == linux.AT_FDCWD {
+		// Need to reference the working directory.
+		rel = wd
+	} else {
+		// Need to extract the given FD.
+		return syserror.EBADF
+	}
+
+	// Lookup the node.
+	remainingTraversals := uint(linux.MaxSymlinkTraversals)
+	if resolve {
+		d, err = mntns.FindInode(ctx, root, rel, path, &remainingTraversals)
+	} else {
+		d, err = mntns.FindLink(ctx, root, rel, path, &remainingTraversals)
+	}
+	if err != nil {
+		return err
+	}
+
+	err = fn(root, d)
+	d.DecRef()
+	return err
+}
+
+func BenchmarkVFS1TmpfsStat(b *testing.B) {
+	for _, depth := range depths {
+		b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
+			ctx := contexttest.Context(b)
+
+			// Create VFS.
+			tmpfsFS, ok := fs.FindFilesystem("tmpfs")
+			if !ok {
+				b.Fatalf("failed to find tmpfs filesystem type")
+			}
+			rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
+			if err != nil {
+				b.Fatalf("failed to create tmpfs root mount: %v", err)
+			}
+			mntns, err := fs.NewMountNamespace(ctx, rootInode)
+			if err != nil {
+				b.Fatalf("failed to create mount namespace: %v", err)
+			}
+			defer mntns.DecRef()
+
+			var filePathBuilder strings.Builder
+			filePathBuilder.WriteByte('/')
+
+			// Create nested directories with given depth.
+			root := mntns.Root()
+			defer root.DecRef()
+			d := root
+			d.IncRef()
+			defer d.DecRef()
+			for i := depth; i > 0; i-- {
+				name := fmt.Sprintf("%d", i)
+				if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil {
+					b.Fatalf("failed to create directory %q: %v", name, err)
+				}
+				next, err := d.Walk(ctx, root, name)
+				if err != nil {
+					b.Fatalf("failed to walk to directory %q: %v", name, err)
+				}
+				d.DecRef()
+				d = next
+				filePathBuilder.WriteString(name)
+				filePathBuilder.WriteByte('/')
+			}
+
+			// Create the file that will be stat'd.
+			file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644))
+			if err != nil {
+				b.Fatalf("failed to create file %q: %v", filename, err)
+			}
+			file.DecRef()
+			filePathBuilder.WriteString(filename)
+			filePath := filePathBuilder.String()
+
+			dirPath := false
+			runtime.GC()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+					if dirPath && !fs.IsDir(d.Inode.StableAttr) {
+						return syserror.ENOTDIR
+					}
+					uattr, err := d.Inode.UnstableAttr(ctx)
+					if err != nil {
+						return err
+					}
+					// Sanity check.
+					if uattr.Perms.User.Execute {
+						b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode())
+					}
+					return nil
+				})
+				if err != nil {
+					b.Fatalf("stat(%q) failed: %v", filePath, err)
+				}
+			}
+			// Don't include deferred cleanup in benchmark time.
+			b.StopTimer()
+		})
+	}
+}
+
+func BenchmarkVFS2MemfsStat(b *testing.B) {
+	for _, depth := range depths {
+		b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
+			ctx := contexttest.Context(b)
+			creds := auth.CredentialsFromContext(ctx)
+
+			// Create VFS.
+			vfsObj := vfs.New()
+			vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+				AllowUserMount: true,
+			})
+			mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+			if err != nil {
+				b.Fatalf("failed to create tmpfs root mount: %v", err)
+			}
+			defer mntns.DecRef(vfsObj)
+
+			var filePathBuilder strings.Builder
+			filePathBuilder.WriteByte('/')
+
+			// Create nested directories with given depth.
+			root := mntns.Root()
+			defer root.DecRef()
+			vd := root
+			vd.IncRef()
+			for i := depth; i > 0; i-- {
+				name := fmt.Sprintf("%d", i)
+				pop := vfs.PathOperation{
+					Root:  root,
+					Start: vd,
+					Path:  fspath.Parse(name),
+				}
+				if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
+					Mode: 0755,
+				}); err != nil {
+					b.Fatalf("failed to create directory %q: %v", name, err)
+				}
+				nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
+				if err != nil {
+					b.Fatalf("failed to walk to directory %q: %v", name, err)
+				}
+				vd.DecRef()
+				vd = nextVD
+				filePathBuilder.WriteString(name)
+				filePathBuilder.WriteByte('/')
+			}
+
+			// Create the file that will be stat'd.
+			fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+				Root:               root,
+				Start:              vd,
+				Path:               fspath.Parse(filename),
+				FollowFinalSymlink: true,
+			}, &vfs.OpenOptions{
+				Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
+				Mode:  0644,
+			})
+			vd.DecRef()
+			vd = vfs.VirtualDentry{}
+			if err != nil {
+				b.Fatalf("failed to create file %q: %v", filename, err)
+			}
+			defer fd.DecRef()
+			filePathBuilder.WriteString(filename)
+			filePath := filePathBuilder.String()
+
+			runtime.GC()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
+					Root:               root,
+					Start:              root,
+					Path:               fspath.Parse(filePath),
+					FollowFinalSymlink: true,
+				}, &vfs.StatOptions{})
+				if err != nil {
+					b.Fatalf("stat(%q) failed: %v", filePath, err)
+				}
+				// Sanity check.
+				if stat.Mode&^linux.S_IFMT != 0644 {
+					b.Fatalf("got wrong permissions (%0o)", stat.Mode)
+				}
+			}
+			// Don't include deferred cleanup in benchmark time.
+			b.StopTimer()
+		})
+	}
+}
+
+func BenchmarkVFS1TmpfsMountStat(b *testing.B) {
+	for _, depth := range depths {
+		b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
+			ctx := contexttest.Context(b)
+
+			// Create VFS.
+			tmpfsFS, ok := fs.FindFilesystem("tmpfs")
+			if !ok {
+				b.Fatalf("failed to find tmpfs filesystem type")
+			}
+			rootInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
+			if err != nil {
+				b.Fatalf("failed to create tmpfs root mount: %v", err)
+			}
+			mntns, err := fs.NewMountNamespace(ctx, rootInode)
+			if err != nil {
+				b.Fatalf("failed to create mount namespace: %v", err)
+			}
+			defer mntns.DecRef()
+
+			var filePathBuilder strings.Builder
+			filePathBuilder.WriteByte('/')
+
+			// Create and mount the submount.
+			root := mntns.Root()
+			defer root.DecRef()
+			if err := root.Inode.CreateDirectory(ctx, root, mountPointName, fs.FilePermsFromMode(0755)); err != nil {
+				b.Fatalf("failed to create mount point: %v", err)
+			}
+			mountPoint, err := root.Walk(ctx, root, mountPointName)
+			if err != nil {
+				b.Fatalf("failed to walk to mount point: %v", err)
+			}
+			defer mountPoint.DecRef()
+			submountInode, err := tmpfsFS.Mount(ctx, "tmpfs", fs.MountSourceFlags{}, "", nil)
+			if err != nil {
+				b.Fatalf("failed to create tmpfs submount: %v", err)
+			}
+			if err := mntns.Mount(ctx, mountPoint, submountInode); err != nil {
+				b.Fatalf("failed to mount tmpfs submount: %v", err)
+			}
+			filePathBuilder.WriteString(mountPointName)
+			filePathBuilder.WriteByte('/')
+
+			// Create nested directories with given depth.
+			d, err := root.Walk(ctx, root, mountPointName)
+			if err != nil {
+				b.Fatalf("failed to walk to mount root: %v", err)
+			}
+			defer d.DecRef()
+			for i := depth; i > 0; i-- {
+				name := fmt.Sprintf("%d", i)
+				if err := d.Inode.CreateDirectory(ctx, d, name, fs.FilePermsFromMode(0755)); err != nil {
+					b.Fatalf("failed to create directory %q: %v", name, err)
+				}
+				next, err := d.Walk(ctx, root, name)
+				if err != nil {
+					b.Fatalf("failed to walk to directory %q: %v", name, err)
+				}
+				d.DecRef()
+				d = next
+				filePathBuilder.WriteString(name)
+				filePathBuilder.WriteByte('/')
+			}
+
+			// Create the file that will be stat'd.
+			file, err := d.Inode.Create(ctx, d, filename, fs.FileFlags{Read: true, Write: true}, fs.FilePermsFromMode(0644))
+			if err != nil {
+				b.Fatalf("failed to create file %q: %v", filename, err)
+			}
+			file.DecRef()
+			filePathBuilder.WriteString(filename)
+			filePath := filePathBuilder.String()
+
+			dirPath := false
+			runtime.GC()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				err := fileOpOn(ctx, mntns, root, root, linux.AT_FDCWD, filePath, true /* resolve */, func(root *fs.Dirent, d *fs.Dirent) error {
+					if dirPath && !fs.IsDir(d.Inode.StableAttr) {
+						return syserror.ENOTDIR
+					}
+					uattr, err := d.Inode.UnstableAttr(ctx)
+					if err != nil {
+						return err
+					}
+					// Sanity check.
+					if uattr.Perms.User.Execute {
+						b.Fatalf("got wrong permissions (%0o)", uattr.Perms.LinuxMode())
+					}
+					return nil
+				})
+				if err != nil {
+					b.Fatalf("stat(%q) failed: %v", filePath, err)
+				}
+			}
+			// Don't include deferred cleanup in benchmark time.
+			b.StopTimer()
+		})
+	}
+}
+
+func BenchmarkVFS2MemfsMountStat(b *testing.B) {
+	for _, depth := range depths {
+		b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
+			ctx := contexttest.Context(b)
+			creds := auth.CredentialsFromContext(ctx)
+
+			// Create VFS.
+			vfsObj := vfs.New()
+			vfsObj.MustRegisterFilesystemType("tmpfs", tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+				AllowUserMount: true,
+			})
+			mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+			if err != nil {
+				b.Fatalf("failed to create tmpfs root mount: %v", err)
+			}
+			defer mntns.DecRef(vfsObj)
+
+			var filePathBuilder strings.Builder
+			filePathBuilder.WriteByte('/')
+
+			// Create the mount point.
+			root := mntns.Root()
+			defer root.DecRef()
+			pop := vfs.PathOperation{
+				Root:  root,
+				Start: root,
+				Path:  fspath.Parse(mountPointName),
+			}
+			if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
+				Mode: 0755,
+			}); err != nil {
+				b.Fatalf("failed to create mount point: %v", err)
+			}
+			// Save the mount point for later use.
+			mountPoint, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
+			if err != nil {
+				b.Fatalf("failed to walk to mount point: %v", err)
+			}
+			defer mountPoint.DecRef()
+			// Create and mount the submount.
+			if err := vfsObj.MountAt(ctx, creds, "", &pop, "tmpfs", &vfs.MountOptions{}); err != nil {
+				b.Fatalf("failed to mount tmpfs submount: %v", err)
+			}
+			filePathBuilder.WriteString(mountPointName)
+			filePathBuilder.WriteByte('/')
+
+			// Create nested directories with given depth.
+			vd, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
+			if err != nil {
+				b.Fatalf("failed to walk to mount root: %v", err)
+			}
+			for i := depth; i > 0; i-- {
+				name := fmt.Sprintf("%d", i)
+				pop := vfs.PathOperation{
+					Root:  root,
+					Start: vd,
+					Path:  fspath.Parse(name),
+				}
+				if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{
+					Mode: 0755,
+				}); err != nil {
+					b.Fatalf("failed to create directory %q: %v", name, err)
+				}
+				nextVD, err := vfsObj.GetDentryAt(ctx, creds, &pop, &vfs.GetDentryOptions{})
+				if err != nil {
+					b.Fatalf("failed to walk to directory %q: %v", name, err)
+				}
+				vd.DecRef()
+				vd = nextVD
+				filePathBuilder.WriteString(name)
+				filePathBuilder.WriteByte('/')
+			}
+
+			// Verify that we didn't create any directories under the mount
+			// point (i.e. they were all created on the submount).
+			firstDirName := fmt.Sprintf("%d", depth)
+			if child := mountPoint.Dentry().Child(firstDirName); child != nil {
+				b.Fatalf("created directory %q under root mount, not submount", firstDirName)
+			}
+
+			// Create the file that will be stat'd.
+			fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+				Root:               root,
+				Start:              vd,
+				Path:               fspath.Parse(filename),
+				FollowFinalSymlink: true,
+			}, &vfs.OpenOptions{
+				Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
+				Mode:  0644,
+			})
+			vd.DecRef()
+			if err != nil {
+				b.Fatalf("failed to create file %q: %v", filename, err)
+			}
+			fd.DecRef()
+			filePathBuilder.WriteString(filename)
+			filePath := filePathBuilder.String()
+
+			runtime.GC()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
+					Root:               root,
+					Start:              root,
+					Path:               fspath.Parse(filePath),
+					FollowFinalSymlink: true,
+				}, &vfs.StatOptions{})
+				if err != nil {
+					b.Fatalf("stat(%q) failed: %v", filePath, err)
+				}
+				// Sanity check.
+				if stat.Mode&^linux.S_IFMT != 0644 {
+					b.Fatalf("got wrong permissions (%0o)", stat.Mode)
+				}
+			}
+			// Don't include deferred cleanup in benchmark time.
+			b.StopTimer()
+		})
+	}
+}
+
+func init() {
+	// Turn off reference leak checking for a fair comparison between vfs1 and
+	// vfs2.
+	refs.SetLeakMode(refs.NoLeakChecking)
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go
new file mode 100644
index 000000000..887ca2619
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/directory.go
@@ -0,0 +1,187 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+type directory struct {
+	inode inode
+
+	// childList is a list containing (1) child Dentries and (2) fake Dentries
+	// (with inode == nil) that represent the iteration position of
+	// directoryFDs. childList is used to support directoryFD.IterDirents()
+	// efficiently. childList is protected by filesystem.mu.
+	childList dentryList
+}
+
+func (fs *filesystem) newDirectory(creds *auth.Credentials, mode linux.FileMode) *inode {
+	dir := &directory{}
+	dir.inode.init(dir, fs, creds, mode)
+	dir.inode.nlink = 2 // from "." and parent directory or ".." for root
+	return &dir.inode
+}
+
+func (i *inode) isDir() bool {
+	_, ok := i.impl.(*directory)
+	return ok
+}
+
+type directoryFD struct {
+	fileDescription
+	vfs.DirectoryFileDescriptionDefaultImpl
+
+	// Protected by filesystem.mu.
+	iter *dentry
+	off  int64
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *directoryFD) Release() {
+	if fd.iter != nil {
+		fs := fd.filesystem()
+		dir := fd.inode().impl.(*directory)
+		fs.mu.Lock()
+		dir.childList.Remove(fd.iter)
+		fs.mu.Unlock()
+		fd.iter = nil
+	}
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
+	fs := fd.filesystem()
+	vfsd := fd.vfsfd.VirtualDentry().Dentry()
+
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+
+	if fd.off == 0 {
+		if !cb.Handle(vfs.Dirent{
+			Name:    ".",
+			Type:    linux.DT_DIR,
+			Ino:     vfsd.Impl().(*dentry).inode.ino,
+			NextOff: 1,
+		}) {
+			return nil
+		}
+		fd.off++
+	}
+	if fd.off == 1 {
+		parentInode := vfsd.ParentOrSelf().Impl().(*dentry).inode
+		if !cb.Handle(vfs.Dirent{
+			Name:    "..",
+			Type:    parentInode.direntType(),
+			Ino:     parentInode.ino,
+			NextOff: 2,
+		}) {
+			return nil
+		}
+		fd.off++
+	}
+
+	dir := vfsd.Impl().(*dentry).inode.impl.(*directory)
+	var child *dentry
+	if fd.iter == nil {
+		// Start iteration at the beginning of dir.
+		child = dir.childList.Front()
+		fd.iter = &dentry{}
+	} else {
+		// Continue iteration from where we left off.
+		child = fd.iter.Next()
+		dir.childList.Remove(fd.iter)
+	}
+	for child != nil {
+		// Skip other directoryFD iterators.
+		if child.inode != nil {
+			if !cb.Handle(vfs.Dirent{
+				Name:    child.vfsd.Name(),
+				Type:    child.inode.direntType(),
+				Ino:     child.inode.ino,
+				NextOff: fd.off + 1,
+			}) {
+				dir.childList.InsertBefore(child, fd.iter)
+				return nil
+			}
+			fd.off++
+		}
+		child = child.Next()
+	}
+	dir.childList.PushBack(fd.iter)
+	return nil
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+	fs := fd.filesystem()
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+
+	switch whence {
+	case linux.SEEK_SET:
+		// Use offset as given.
+	case linux.SEEK_CUR:
+		offset += fd.off
+	default:
+		return 0, syserror.EINVAL
+	}
+	if offset < 0 {
+		return 0, syserror.EINVAL
+	}
+
+	// If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't
+	// seek even if doing so might reposition the iterator due to concurrent
+	// mutation of the directory. Compare fs/libfs.c:dcache_dir_lseek().
+	if fd.off == offset {
+		return offset, nil
+	}
+
+	fd.off = offset
+	// Compensate for "." and "..".
+	remChildren := int64(0)
+	if offset >= 2 {
+		remChildren = offset - 2
+	}
+
+	dir := fd.inode().impl.(*directory)
+
+	// Ensure that fd.iter exists and is not linked into dir.childList.
+	if fd.iter == nil {
+		fd.iter = &dentry{}
+	} else {
+		dir.childList.Remove(fd.iter)
+	}
+	// Insert fd.iter before the remChildren'th child, or at the end of the
+	// list if remChildren >= number of children.
+	child := dir.childList.Front()
+	for child != nil {
+		// Skip other directoryFD iterators.
+		if child.inode != nil {
+			if remChildren == 0 {
+				dir.childList.InsertBefore(child, fd.iter)
+				return offset, nil
+			}
+			remChildren--
+		}
+		child = child.Next()
+	}
+	dir.childList.PushBack(fd.iter)
+	return offset, nil
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
new file mode 100644
index 000000000..26979729e
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -0,0 +1,698 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+	"fmt"
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/fspath"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Sync implements vfs.FilesystemImpl.Sync.
+func (fs *filesystem) Sync(ctx context.Context) error {
+	// All filesystem state is in-memory.
+	return nil
+}
+
+// stepLocked resolves rp.Component() to an existing file, starting from the
+// given directory.
+//
+// stepLocked is loosely analogous to fs/namei.c:walk_component().
+//
+// Preconditions: filesystem.mu must be locked. !rp.Done().
+func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
+	if !d.inode.isDir() {
+		return nil, syserror.ENOTDIR
+	}
+	if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
+		return nil, err
+	}
+afterSymlink:
+	nextVFSD, err := rp.ResolveComponent(&d.vfsd)
+	if err != nil {
+		return nil, err
+	}
+	if nextVFSD == nil {
+		// Since the Dentry tree is the sole source of truth for tmpfs, if it's
+		// not in the Dentry tree, it doesn't exist.
+		return nil, syserror.ENOENT
+	}
+	next := nextVFSD.Impl().(*dentry)
+	if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
+		// TODO: symlink traversals update access time
+		if err := rp.HandleSymlink(symlink.target); err != nil {
+			return nil, err
+		}
+		goto afterSymlink // don't check the current directory again
+	}
+	rp.Advance()
+	return next, nil
+}
+
+// walkParentDirLocked resolves all but the last path component of rp to an
+// existing directory, starting from the given directory (which is usually
+// rp.Start().Impl().(*dentry)). It does not check that the returned directory
+// is searchable by the provider of rp.
+//
+// walkParentDirLocked is loosely analogous to Linux's
+// fs/namei.c:path_parentat().
+//
+// Preconditions: filesystem.mu must be locked. !rp.Done().
+func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) {
+	for !rp.Final() {
+		next, err := stepLocked(rp, d)
+		if err != nil {
+			return nil, err
+		}
+		d = next
+	}
+	if !d.inode.isDir() {
+		return nil, syserror.ENOTDIR
+	}
+	return d, nil
+}
+
+// resolveLocked resolves rp to an existing file.
+//
+// resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat().
+//
+// Preconditions: filesystem.mu must be locked.
+func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) {
+	d := rp.Start().Impl().(*dentry)
+	for !rp.Done() {
+		next, err := stepLocked(rp, d)
+		if err != nil {
+			return nil, err
+		}
+		d = next
+	}
+	if rp.MustBeDir() && !d.inode.isDir() {
+		return nil, syserror.ENOTDIR
+	}
+	return d, nil
+}
+
+// doCreateAt checks that creating a file at rp is permitted, then invokes
+// create to do so.
+//
+// doCreateAt is loosely analogous to a conjunction of Linux's
+// fs/namei.c:filename_create() and done_path_create().
+//
+// Preconditions: !rp.Done(). For the final path component in rp,
+// !rp.ShouldFollowSymlink().
+func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error {
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
+	if err != nil {
+		return err
+	}
+	if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
+		return err
+	}
+	name := rp.Component()
+	if name == "." || name == ".." {
+		return syserror.EEXIST
+	}
+	// Call parent.vfsd.Child() instead of stepLocked() or rp.ResolveChild(),
+	// because if the child exists we want to return EEXIST immediately instead
+	// of attempting symlink/mount traversal.
+	if parent.vfsd.Child(name) != nil {
+		return syserror.EEXIST
+	}
+	if !dir && rp.MustBeDir() {
+		return syserror.ENOENT
+	}
+	// In memfs, the only way to cause a dentry to be disowned is by removing
+	// it from the filesystem, so this check is equivalent to checking if
+	// parent has been removed.
+	if parent.vfsd.IsDisowned() {
+		return syserror.ENOENT
+	}
+	mnt := rp.Mount()
+	if err := mnt.CheckBeginWrite(); err != nil {
+		return err
+	}
+	defer mnt.EndWrite()
+	return create(parent, name)
+}
+
+// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
+func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	d, err := resolveLocked(rp)
+	if err != nil {
+		return nil, err
+	}
+	if opts.CheckSearchable {
+		if !d.inode.isDir() {
+			return nil, syserror.ENOTDIR
+		}
+		if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true /* isDir */); err != nil {
+			return nil, err
+		}
+	}
+	d.IncRef()
+	return &d.vfsd, nil
+}
+
+// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
+func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	d, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
+	if err != nil {
+		return nil, err
+	}
+	d.IncRef()
+	return &d.vfsd, nil
+}
+
+// LinkAt implements vfs.FilesystemImpl.LinkAt.
+func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
+	return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
+		if rp.Mount() != vd.Mount() {
+			return syserror.EXDEV
+		}
+		d := vd.Dentry().Impl().(*dentry)
+		if d.inode.isDir() {
+			return syserror.EPERM
+		}
+		if d.inode.nlink == 0 {
+			return syserror.ENOENT
+		}
+		if d.inode.nlink == maxLinks {
+			return syserror.EMLINK
+		}
+		d.inode.incLinksLocked()
+		child := fs.newDentry(d.inode)
+		parent.vfsd.InsertChild(&child.vfsd, name)
+		parent.inode.impl.(*directory).childList.PushBack(child)
+		return nil
+	})
+}
+
+// MkdirAt implements vfs.FilesystemImpl.MkdirAt.
+func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
+	return fs.doCreateAt(rp, true /* dir */, func(parent *dentry, name string) error {
+		if parent.inode.nlink == maxLinks {
+			return syserror.EMLINK
+		}
+		parent.inode.incLinksLocked() // from child's ".."
+		child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode))
+		parent.vfsd.InsertChild(&child.vfsd, name)
+		parent.inode.impl.(*directory).childList.PushBack(child)
+		return nil
+	})
+}
+
+// MknodAt implements vfs.FilesystemImpl.MknodAt.
+func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
+	return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
+		switch opts.Mode.FileType() {
+		case 0, linux.S_IFREG:
+			child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
+			parent.vfsd.InsertChild(&child.vfsd, name)
+			parent.inode.impl.(*directory).childList.PushBack(child)
+			return nil
+		case linux.S_IFIFO:
+			child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode))
+			parent.vfsd.InsertChild(&child.vfsd, name)
+			parent.inode.impl.(*directory).childList.PushBack(child)
+			return nil
+		case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK:
+			// Not yet supported.
+			return syserror.EPERM
+		default:
+			return syserror.EINVAL
+		}
+	})
+}
+
+// OpenAt implements vfs.FilesystemImpl.OpenAt.
+func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+	if opts.Flags&linux.O_TMPFILE != 0 {
+		// Not yet supported.
+		return nil, syserror.EOPNOTSUPP
+	}
+
+	// Handle O_CREAT and !O_CREAT separately, since in the latter case we
+	// don't need fs.mu for writing.
+	if opts.Flags&linux.O_CREAT == 0 {
+		fs.mu.RLock()
+		defer fs.mu.RUnlock()
+		d, err := resolveLocked(rp)
+		if err != nil {
+			return nil, err
+		}
+		return d.open(ctx, rp, opts.Flags, false /* afterCreate */)
+	}
+
+	mustCreate := opts.Flags&linux.O_EXCL != 0
+	start := rp.Start().Impl().(*dentry)
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if rp.Done() {
+		// Reject attempts to open directories with O_CREAT.
+		if rp.MustBeDir() {
+			return nil, syserror.EISDIR
+		}
+		if mustCreate {
+			return nil, syserror.EEXIST
+		}
+		return start.open(ctx, rp, opts.Flags, false /* afterCreate */)
+	}
+afterTrailingSymlink:
+	parent, err := walkParentDirLocked(rp, start)
+	if err != nil {
+		return nil, err
+	}
+	// Check for search permission in the parent directory.
+	if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil {
+		return nil, err
+	}
+	// Reject attempts to open directories with O_CREAT.
+	if rp.MustBeDir() {
+		return nil, syserror.EISDIR
+	}
+	name := rp.Component()
+	if name == "." || name == ".." {
+		return nil, syserror.EISDIR
+	}
+	// Determine whether or not we need to create a file.
+	child, err := stepLocked(rp, parent)
+	if err == syserror.ENOENT {
+		// Already checked for searchability above; now check for writability.
+		if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil {
+			return nil, err
+		}
+		if err := rp.Mount().CheckBeginWrite(); err != nil {
+			return nil, err
+		}
+		defer rp.Mount().EndWrite()
+		// Create and open the child.
+		child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode))
+		parent.vfsd.InsertChild(&child.vfsd, name)
+		parent.inode.impl.(*directory).childList.PushBack(child)
+		return child.open(ctx, rp, opts.Flags, true)
+	}
+	if err != nil {
+		return nil, err
+	}
+	// Do we need to resolve a trailing symlink?
+	if !rp.Done() {
+		start = parent
+		goto afterTrailingSymlink
+	}
+	// Open existing file.
+	if mustCreate {
+		return nil, syserror.EEXIST
+	}
+	return child.open(ctx, rp, opts.Flags, false)
+}
+
+func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) {
+	ats := vfs.AccessTypesForOpenFlags(flags)
+	if !afterCreate {
+		if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil {
+			return nil, err
+		}
+	}
+	mnt := rp.Mount()
+	switch impl := d.inode.impl.(type) {
+	case *regularFile:
+		var fd regularFileFD
+		fd.readable = vfs.MayReadFileWithOpenFlags(flags)
+		fd.writable = vfs.MayWriteFileWithOpenFlags(flags)
+		if fd.writable {
+			if err := mnt.CheckBeginWrite(); err != nil {
+				return nil, err
+			}
+			// mnt.EndWrite() is called by regularFileFD.Release().
+		}
+		fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
+		if flags&linux.O_TRUNC != 0 {
+			impl.mu.Lock()
+			impl.data.Truncate(0, impl.memFile)
+			atomic.StoreUint64(&impl.size, 0)
+			impl.mu.Unlock()
+		}
+		return &fd.vfsfd, nil
+	case *directory:
+		// Can't open directories writably.
+		if ats&vfs.MayWrite != 0 {
+			return nil, syserror.EISDIR
+		}
+		var fd directoryFD
+		fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{})
+		return &fd.vfsfd, nil
+	case *symlink:
+		// Can't open symlinks without O_PATH (which is unimplemented).
+		return nil, syserror.ELOOP
+	case *namedPipe:
+		return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags)
+	default:
+		panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl))
+	}
+}
+
+// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
+func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	d, err := resolveLocked(rp)
+	if err != nil {
+		return "", err
+	}
+	symlink, ok := d.inode.impl.(*symlink)
+	if !ok {
+		return "", syserror.EINVAL
+	}
+	return symlink.target, nil
+}
+
+// RenameAt implements vfs.FilesystemImpl.RenameAt.
+func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
+	if opts.Flags != 0 {
+		// TODO(b/145974740): Support renameat2 flags.
+		return syserror.EINVAL
+	}
+
+	// Resolve newParent first to verify that it's on this Mount.
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	newParent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
+	if err != nil {
+		return err
+	}
+	newName := rp.Component()
+	if newName == "." || newName == ".." {
+		return syserror.EBUSY
+	}
+	mnt := rp.Mount()
+	if mnt != oldParentVD.Mount() {
+		return syserror.EXDEV
+	}
+	if err := mnt.CheckBeginWrite(); err != nil {
+		return err
+	}
+	defer mnt.EndWrite()
+
+	oldParent := oldParentVD.Dentry().Impl().(*dentry)
+	if err := oldParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
+		return err
+	}
+	// Call vfs.Dentry.Child() instead of stepLocked() or rp.ResolveChild(),
+	// because if the existing child is a symlink or mount point then we want
+	// to rename over it rather than follow it.
+	renamedVFSD := oldParent.vfsd.Child(oldName)
+	if renamedVFSD == nil {
+		return syserror.ENOENT
+	}
+	renamed := renamedVFSD.Impl().(*dentry)
+	if renamed.inode.isDir() {
+		if renamed == newParent || renamedVFSD.IsAncestorOf(&newParent.vfsd) {
+			return syserror.EINVAL
+		}
+		if oldParent != newParent {
+			// Writability is needed to change renamed's "..".
+			if err := renamed.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true /* isDir */); err != nil {
+				return err
+			}
+		}
+	} else {
+		if opts.MustBeDir || rp.MustBeDir() {
+			return syserror.ENOTDIR
+		}
+	}
+
+	if err := newParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
+		return err
+	}
+	replacedVFSD := newParent.vfsd.Child(newName)
+	var replaced *dentry
+	if replacedVFSD != nil {
+		replaced = replacedVFSD.Impl().(*dentry)
+		if replaced.inode.isDir() {
+			if !renamed.inode.isDir() {
+				return syserror.EISDIR
+			}
+			if replaced.vfsd.HasChildren() {
+				return syserror.ENOTEMPTY
+			}
+		} else {
+			if rp.MustBeDir() {
+				return syserror.ENOTDIR
+			}
+			if renamed.inode.isDir() {
+				return syserror.ENOTDIR
+			}
+		}
+	} else {
+		if renamed.inode.isDir() && newParent.inode.nlink == maxLinks {
+			return syserror.EMLINK
+		}
+	}
+	if newParent.vfsd.IsDisowned() {
+		return syserror.ENOENT
+	}
+
+	// Linux places this check before some of those above; we do it here for
+	// simplicity, under the assumption that applications are not intentionally
+	// doing noop renames expecting them to succeed where non-noop renames
+	// would fail.
+	if renamedVFSD == replacedVFSD {
+		return nil
+	}
+	vfsObj := rp.VirtualFilesystem()
+	oldParentDir := oldParent.inode.impl.(*directory)
+	newParentDir := newParent.inode.impl.(*directory)
+	if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil {
+		return err
+	}
+	if replaced != nil {
+		newParentDir.childList.Remove(replaced)
+		if replaced.inode.isDir() {
+			newParent.inode.decLinksLocked() // from replaced's ".."
+		}
+		replaced.inode.decLinksLocked()
+	}
+	oldParentDir.childList.Remove(renamed)
+	newParentDir.childList.PushBack(renamed)
+	if renamed.inode.isDir() {
+		oldParent.inode.decLinksLocked()
+		newParent.inode.incLinksLocked()
+	}
+	// TODO: update timestamps and parent directory sizes
+	vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD)
+	return nil
+}
+
+// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
+func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
+	if err != nil {
+		return err
+	}
+	if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
+		return err
+	}
+	name := rp.Component()
+	if name == "." {
+		return syserror.EINVAL
+	}
+	if name == ".." {
+		return syserror.ENOTEMPTY
+	}
+	childVFSD := parent.vfsd.Child(name)
+	if childVFSD == nil {
+		return syserror.ENOENT
+	}
+	child := childVFSD.Impl().(*dentry)
+	if !child.inode.isDir() {
+		return syserror.ENOTDIR
+	}
+	if childVFSD.HasChildren() {
+		return syserror.ENOTEMPTY
+	}
+	mnt := rp.Mount()
+	if err := mnt.CheckBeginWrite(); err != nil {
+		return err
+	}
+	defer mnt.EndWrite()
+	vfsObj := rp.VirtualFilesystem()
+	if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil {
+		return err
+	}
+	parent.inode.impl.(*directory).childList.Remove(child)
+	parent.inode.decLinksLocked() // from child's ".."
+	child.inode.decLinksLocked()
+	vfsObj.CommitDeleteDentry(childVFSD)
+	return nil
+}
+
+// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
+func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	_, err := resolveLocked(rp)
+	if err != nil {
+		return err
+	}
+	if opts.Stat.Mask == 0 {
+		return nil
+	}
+	// TODO: implement inode.setStat
+	return syserror.EPERM
+}
+
+// StatAt implements vfs.FilesystemImpl.StatAt.
+func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	d, err := resolveLocked(rp)
+	if err != nil {
+		return linux.Statx{}, err
+	}
+	var stat linux.Statx
+	d.inode.statTo(&stat)
+	return stat, nil
+}
+
+// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
+func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	_, err := resolveLocked(rp)
+	if err != nil {
+		return linux.Statfs{}, err
+	}
+	// TODO: actually implement statfs
+	return linux.Statfs{}, syserror.ENOSYS
+}
+
+// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
+func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
+	return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error {
+		child := fs.newDentry(fs.newSymlink(rp.Credentials(), target))
+		parent.vfsd.InsertChild(&child.vfsd, name)
+		parent.inode.impl.(*directory).childList.PushBack(child)
+		return nil
+	})
+}
+
+// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
+func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry))
+	if err != nil {
+		return err
+	}
+	if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil {
+		return err
+	}
+	name := rp.Component()
+	if name == "." || name == ".." {
+		return syserror.EISDIR
+	}
+	childVFSD := parent.vfsd.Child(name)
+	if childVFSD == nil {
+		return syserror.ENOENT
+	}
+	child := childVFSD.Impl().(*dentry)
+	if child.inode.isDir() {
+		return syserror.EISDIR
+	}
+	if !rp.MustBeDir() {
+		return syserror.ENOTDIR
+	}
+	mnt := rp.Mount()
+	if err := mnt.CheckBeginWrite(); err != nil {
+		return err
+	}
+	defer mnt.EndWrite()
+	vfsObj := rp.VirtualFilesystem()
+	if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil {
+		return err
+	}
+	parent.inode.impl.(*directory).childList.Remove(child)
+	child.inode.decLinksLocked()
+	vfsObj.CommitDeleteDentry(childVFSD)
+	return nil
+}
+
+// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt.
+func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	_, err := resolveLocked(rp)
+	if err != nil {
+		return nil, err
+	}
+	// TODO(b/127675828): support extended attributes
+	return nil, syserror.ENOTSUP
+}
+
+// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt.
+func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	_, err := resolveLocked(rp)
+	if err != nil {
+		return "", err
+	}
+	// TODO(b/127675828): support extended attributes
+	return "", syserror.ENOTSUP
+}
+
+// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt.
+func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	_, err := resolveLocked(rp)
+	if err != nil {
+		return err
+	}
+	// TODO(b/127675828): support extended attributes
+	return syserror.ENOTSUP
+}
+
+// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt.
+func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	_, err := resolveLocked(rp)
+	if err != nil {
+		return err
+	}
+	// TODO(b/127675828): support extended attributes
+	return syserror.ENOTSUP
+}
+
+// PrependPath implements vfs.FilesystemImpl.PrependPath.
+func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
+	fs.mu.RLock()
+	defer fs.mu.RUnlock()
+	return vfs.GenericPrependPath(vfsroot, vd, b)
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/named_pipe.go b/pkg/sentry/fsimpl/tmpfs/named_pipe.go
new file mode 100644
index 000000000..40bde54de
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/named_pipe.go
@@ -0,0 +1,60 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+type namedPipe struct {
+	inode inode
+
+	pipe *pipe.VFSPipe
+}
+
+// Preconditions:
+//   * fs.mu must be locked.
+//   * rp.Mount().CheckBeginWrite() has been called successfully.
+func (fs *filesystem) newNamedPipe(creds *auth.Credentials, mode linux.FileMode) *inode {
+	file := &namedPipe{pipe: pipe.NewVFSPipe(pipe.DefaultPipeSize, usermem.PageSize)}
+	file.inode.init(file, fs, creds, mode)
+	file.inode.nlink = 1 // Only the parent has a link.
+	return &file.inode
+}
+
+// namedPipeFD implements vfs.FileDescriptionImpl. Methods are implemented
+// entirely via struct embedding.
+type namedPipeFD struct {
+	fileDescription
+
+	*pipe.VFSPipeFD
+}
+
+func newNamedPipeFD(ctx context.Context, np *namedPipe, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
+	var err error
+	var fd namedPipeFD
+	fd.VFSPipeFD, err = np.pipe.NewVFSPipeFD(ctx, rp, vfsd, &fd.vfsfd, flags)
+	if err != nil {
+		return nil, err
+	}
+	mnt := rp.Mount()
+	fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{})
+	return &fd.vfsfd, nil
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/pipe_test.go b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
new file mode 100644
index 000000000..70b42a6ec
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/pipe_test.go
@@ -0,0 +1,235 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+	"bytes"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/fspath"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+const fileName = "mypipe"
+
+func TestSeparateFDs(t *testing.T) {
+	ctx, creds, vfsObj, root := setup(t)
+	defer root.DecRef()
+
+	// Open the read side. This is done in a concurrently because opening
+	// One end the pipe blocks until the other end is opened.
+	pop := vfs.PathOperation{
+		Root:               root,
+		Start:              root,
+		Path:               fspath.Parse(fileName),
+		FollowFinalSymlink: true,
+	}
+	rfdchan := make(chan *vfs.FileDescription)
+	go func() {
+		openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY}
+		rfd, _ := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
+		rfdchan <- rfd
+	}()
+
+	// Open the write side.
+	openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY}
+	wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
+	if err != nil {
+		t.Fatalf("failed to open pipe for writing %q: %v", fileName, err)
+	}
+	defer wfd.DecRef()
+
+	rfd, ok := <-rfdchan
+	if !ok {
+		t.Fatalf("failed to open pipe for reading %q", fileName)
+	}
+	defer rfd.DecRef()
+
+	const msg = "vamos azul"
+	checkEmpty(ctx, t, rfd)
+	checkWrite(ctx, t, wfd, msg)
+	checkRead(ctx, t, rfd, msg)
+}
+
+func TestNonblockingRead(t *testing.T) {
+	ctx, creds, vfsObj, root := setup(t)
+	defer root.DecRef()
+
+	// Open the read side as nonblocking.
+	pop := vfs.PathOperation{
+		Root:               root,
+		Start:              root,
+		Path:               fspath.Parse(fileName),
+		FollowFinalSymlink: true,
+	}
+	openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK}
+	rfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
+	if err != nil {
+		t.Fatalf("failed to open pipe for reading %q: %v", fileName, err)
+	}
+	defer rfd.DecRef()
+
+	// Open the write side.
+	openOpts = vfs.OpenOptions{Flags: linux.O_WRONLY}
+	wfd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
+	if err != nil {
+		t.Fatalf("failed to open pipe for writing %q: %v", fileName, err)
+	}
+	defer wfd.DecRef()
+
+	const msg = "geh blau"
+	checkEmpty(ctx, t, rfd)
+	checkWrite(ctx, t, wfd, msg)
+	checkRead(ctx, t, rfd, msg)
+}
+
+func TestNonblockingWriteError(t *testing.T) {
+	ctx, creds, vfsObj, root := setup(t)
+	defer root.DecRef()
+
+	// Open the write side as nonblocking, which should return ENXIO.
+	pop := vfs.PathOperation{
+		Root:               root,
+		Start:              root,
+		Path:               fspath.Parse(fileName),
+		FollowFinalSymlink: true,
+	}
+	openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK}
+	_, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
+	if err != syserror.ENXIO {
+		t.Fatalf("expected ENXIO, but got error: %v", err)
+	}
+}
+
+func TestSingleFD(t *testing.T) {
+	ctx, creds, vfsObj, root := setup(t)
+	defer root.DecRef()
+
+	// Open the pipe as readable and writable.
+	pop := vfs.PathOperation{
+		Root:               root,
+		Start:              root,
+		Path:               fspath.Parse(fileName),
+		FollowFinalSymlink: true,
+	}
+	openOpts := vfs.OpenOptions{Flags: linux.O_RDWR}
+	fd, err := vfsObj.OpenAt(ctx, creds, &pop, &openOpts)
+	if err != nil {
+		t.Fatalf("failed to open pipe for writing %q: %v", fileName, err)
+	}
+	defer fd.DecRef()
+
+	const msg = "forza blu"
+	checkEmpty(ctx, t, fd)
+	checkWrite(ctx, t, fd, msg)
+	checkRead(ctx, t, fd, msg)
+}
+
+// setup creates a VFS with a pipe in the root directory at path fileName. The
+// returned VirtualDentry must be DecRef()'d be the caller. It calls t.Fatal
+// upon failure.
+func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesystem, vfs.VirtualDentry) {
+	ctx := contexttest.Context(t)
+	creds := auth.CredentialsFromContext(ctx)
+
+	// Create VFS.
+	vfsObj := vfs.New()
+	vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserMount: true,
+	})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+	if err != nil {
+		t.Fatalf("failed to create tmpfs root mount: %v", err)
+	}
+
+	// Create the pipe.
+	root := mntns.Root()
+	pop := vfs.PathOperation{
+		Root:  root,
+		Start: root,
+		Path:  fspath.Parse(fileName),
+	}
+	mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644}
+	if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil {
+		t.Fatalf("failed to create file %q: %v", fileName, err)
+	}
+
+	// Sanity check: the file pipe exists and has the correct mode.
+	stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{
+		Root:               root,
+		Start:              root,
+		Path:               fspath.Parse(fileName),
+		FollowFinalSymlink: true,
+	}, &vfs.StatOptions{})
+	if err != nil {
+		t.Fatalf("stat(%q) failed: %v", fileName, err)
+	}
+	if stat.Mode&^linux.S_IFMT != 0644 {
+		t.Errorf("got wrong permissions (%0o)", stat.Mode)
+	}
+	if stat.Mode&linux.S_IFMT != linux.ModeNamedPipe {
+		t.Errorf("got wrong file type (%0o)", stat.Mode)
+	}
+
+	return ctx, creds, vfsObj, root
+}
+
+// checkEmpty calls t.Fatal if the pipe in fd is not empty.
+func checkEmpty(ctx context.Context, t *testing.T, fd *vfs.FileDescription) {
+	readData := make([]byte, 1)
+	dst := usermem.BytesIOSequence(readData)
+	bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{})
+	if err != syserror.ErrWouldBlock {
+		t.Fatalf("expected ErrWouldBlock reading from empty pipe %q, but got: %v", fileName, err)
+	}
+	if bytesRead != 0 {
+		t.Fatalf("expected to read 0 bytes, but got %d", bytesRead)
+	}
+}
+
+// checkWrite calls t.Fatal if it fails to write all of msg to fd.
+func checkWrite(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) {
+	writeData := []byte(msg)
+	src := usermem.BytesIOSequence(writeData)
+	bytesWritten, err := fd.Write(ctx, src, vfs.WriteOptions{})
+	if err != nil {
+		t.Fatalf("error writing to pipe %q: %v", fileName, err)
+	}
+	if bytesWritten != int64(len(writeData)) {
+		t.Fatalf("expected to write %d bytes, but wrote %d", len(writeData), bytesWritten)
+	}
+}
+
+// checkRead calls t.Fatal if it fails to read msg from fd.
+func checkRead(ctx context.Context, t *testing.T, fd *vfs.FileDescription, msg string) {
+	readData := make([]byte, len(msg))
+	dst := usermem.BytesIOSequence(readData)
+	bytesRead, err := fd.Read(ctx, dst, vfs.ReadOptions{})
+	if err != nil {
+		t.Fatalf("error reading from pipe %q: %v", fileName, err)
+	}
+	if bytesRead != int64(len(msg)) {
+		t.Fatalf("expected to read %d bytes, but got %d", len(msg), bytesRead)
+	}
+	if !bytes.Equal(readData, []byte(msg)) {
+		t.Fatalf("expected to read %q from pipe, but got %q", msg, string(readData))
+	}
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
new file mode 100644
index 000000000..f51e247a7
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -0,0 +1,357 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+	"io"
+	"math"
+	"sync"
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
+	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
+	"gvisor.dev/gvisor/pkg/sentry/safemem"
+	"gvisor.dev/gvisor/pkg/sentry/usage"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+type regularFile struct {
+	inode inode
+
+	// memFile is a platform.File used to allocate pages to this regularFile.
+	memFile *pgalloc.MemoryFile
+
+	// mu protects the fields below.
+	mu sync.RWMutex
+
+	// data maps offsets into the file to offsets into memFile that store
+	// the file's data.
+	data fsutil.FileRangeSet
+
+	// size is the size of data, but accessed using atomic memory
+	// operations to avoid locking in inode.stat().
+	size uint64
+
+	// seals represents file seals on this inode.
+	seals uint32
+}
+
+func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode {
+	file := &regularFile{
+		memFile: fs.memFile,
+	}
+	file.inode.init(file, fs, creds, mode)
+	file.inode.nlink = 1 // from parent directory
+	return &file.inode
+}
+
+type regularFileFD struct {
+	fileDescription
+
+	// These are immutable.
+	readable bool
+	writable bool
+
+	// off is the file offset. off is accessed using atomic memory operations.
+	// offMu serializes operations that may mutate off.
+	off   int64
+	offMu sync.Mutex
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *regularFileFD) Release() {
+	if fd.writable {
+		fd.vfsfd.VirtualDentry().Mount().EndWrite()
+	}
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+	if !fd.readable {
+		return 0, syserror.EINVAL
+	}
+	if offset < 0 {
+		return 0, syserror.EINVAL
+	}
+	if dst.NumBytes() == 0 {
+		return 0, nil
+	}
+	f := fd.inode().impl.(*regularFile)
+	rw := getRegularFileReadWriter(f, offset)
+	n, err := dst.CopyOutFrom(ctx, rw)
+	putRegularFileReadWriter(rw)
+	return int64(n), err
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+	fd.offMu.Lock()
+	n, err := fd.PRead(ctx, dst, fd.off, opts)
+	fd.off += n
+	fd.offMu.Unlock()
+	return n, err
+}
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	if !fd.writable {
+		return 0, syserror.EINVAL
+	}
+	if offset < 0 {
+		return 0, syserror.EINVAL
+	}
+	srclen := src.NumBytes()
+	if srclen == 0 {
+		return 0, nil
+	}
+	f := fd.inode().impl.(*regularFile)
+	end := offset + srclen
+	if end < offset {
+		// Overflow.
+		return 0, syserror.EFBIG
+	}
+	rw := getRegularFileReadWriter(f, offset)
+	n, err := src.CopyInTo(ctx, rw)
+	putRegularFileReadWriter(rw)
+	return n, err
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+	fd.offMu.Lock()
+	n, err := fd.PWrite(ctx, src, fd.off, opts)
+	fd.off += n
+	fd.offMu.Unlock()
+	return n, err
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+	fd.offMu.Lock()
+	defer fd.offMu.Unlock()
+	switch whence {
+	case linux.SEEK_SET:
+		// use offset as specified
+	case linux.SEEK_CUR:
+		offset += fd.off
+	case linux.SEEK_END:
+		offset += int64(atomic.LoadUint64(&fd.inode().impl.(*regularFile).size))
+	default:
+		return 0, syserror.EINVAL
+	}
+	if offset < 0 {
+		return 0, syserror.EINVAL
+	}
+	fd.off = offset
+	return offset, nil
+}
+
+// Sync implements vfs.FileDescriptionImpl.Sync.
+func (fd *regularFileFD) Sync(ctx context.Context) error {
+	return nil
+}
+
+// regularFileReadWriter implements safemem.Reader and Safemem.Writer.
+type regularFileReadWriter struct {
+	file *regularFile
+
+	// Offset into the file to read/write at. Note that this may be
+	// different from the FD offset if PRead/PWrite is used.
+	off uint64
+}
+
+var regularFileReadWriterPool = sync.Pool{
+	New: func() interface{} {
+		return &regularFileReadWriter{}
+	},
+}
+
+func getRegularFileReadWriter(file *regularFile, offset int64) *regularFileReadWriter {
+	rw := regularFileReadWriterPool.Get().(*regularFileReadWriter)
+	rw.file = file
+	rw.off = uint64(offset)
+	return rw
+}
+
+func putRegularFileReadWriter(rw *regularFileReadWriter) {
+	rw.file = nil
+	regularFileReadWriterPool.Put(rw)
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks.
+func (rw *regularFileReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+	rw.file.mu.RLock()
+
+	// Compute the range to read (limited by file size and overflow-checked).
+	if rw.off >= rw.file.size {
+		rw.file.mu.RUnlock()
+		return 0, io.EOF
+	}
+	end := rw.file.size
+	if rend := rw.off + dsts.NumBytes(); rend > rw.off && rend < end {
+		end = rend
+	}
+
+	var done uint64
+	seg, gap := rw.file.data.Find(uint64(rw.off))
+	for rw.off < end {
+		mr := memmap.MappableRange{uint64(rw.off), uint64(end)}
+		switch {
+		case seg.Ok():
+			// Get internal mappings.
+			ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Read)
+			if err != nil {
+				rw.file.mu.RUnlock()
+				return done, err
+			}
+
+			// Copy from internal mappings.
+			n, err := safemem.CopySeq(dsts, ims)
+			done += n
+			rw.off += uint64(n)
+			dsts = dsts.DropFirst64(n)
+			if err != nil {
+				rw.file.mu.RUnlock()
+				return done, err
+			}
+
+			// Continue.
+			seg, gap = seg.NextNonEmpty()
+
+		case gap.Ok():
+			// Tmpfs holes are zero-filled.
+			gapmr := gap.Range().Intersect(mr)
+			dst := dsts.TakeFirst64(gapmr.Length())
+			n, err := safemem.ZeroSeq(dst)
+			done += n
+			rw.off += uint64(n)
+			dsts = dsts.DropFirst64(n)
+			if err != nil {
+				rw.file.mu.RUnlock()
+				return done, err
+			}
+
+			// Continue.
+			seg, gap = gap.NextSegment(), fsutil.FileRangeGapIterator{}
+		}
+	}
+	rw.file.mu.RUnlock()
+	return done, nil
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+func (rw *regularFileReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+	rw.file.mu.Lock()
+
+	// Compute the range to write (overflow-checked).
+	end := rw.off + srcs.NumBytes()
+	if end <= rw.off {
+		end = math.MaxInt64
+	}
+
+	// Check if seals prevent either file growth or all writes.
+	switch {
+	case rw.file.seals&linux.F_SEAL_WRITE != 0: // Write sealed
+		rw.file.mu.Unlock()
+		return 0, syserror.EPERM
+	case end > rw.file.size && rw.file.seals&linux.F_SEAL_GROW != 0: // Grow sealed
+		// When growth is sealed, Linux effectively allows writes which would
+		// normally grow the file to partially succeed up to the current EOF,
+		// rounded down to the page boundary before the EOF.
+		//
+		// This happens because writes (and thus the growth check) for tmpfs
+		// files proceed page-by-page on Linux, and the final write to the page
+		// containing EOF fails, resulting in a partial write up to the start of
+		// that page.
+		//
+		// To emulate this behaviour, artifically truncate the write to the
+		// start of the page containing the current EOF.
+		//
+		// See Linux, mm/filemap.c:generic_perform_write() and
+		// mm/shmem.c:shmem_write_begin().
+		if pgstart := uint64(usermem.Addr(rw.file.size).RoundDown()); end > pgstart {
+			end = pgstart
+		}
+		if end <= rw.off {
+			// Truncation would result in no data being written.
+			rw.file.mu.Unlock()
+			return 0, syserror.EPERM
+		}
+	}
+
+	// Page-aligned mr for when we need to allocate memory. RoundUp can't
+	// overflow since end is an int64.
+	pgstartaddr := usermem.Addr(rw.off).RoundDown()
+	pgendaddr, _ := usermem.Addr(end).RoundUp()
+	pgMR := memmap.MappableRange{uint64(pgstartaddr), uint64(pgendaddr)}
+
+	var (
+		done   uint64
+		retErr error
+	)
+	seg, gap := rw.file.data.Find(uint64(rw.off))
+	for rw.off < end {
+		mr := memmap.MappableRange{uint64(rw.off), uint64(end)}
+		switch {
+		case seg.Ok():
+			// Get internal mappings.
+			ims, err := rw.file.memFile.MapInternal(seg.FileRangeOf(seg.Range().Intersect(mr)), usermem.Write)
+			if err != nil {
+				retErr = err
+				goto exitLoop
+			}
+
+			// Copy to internal mappings.
+			n, err := safemem.CopySeq(ims, srcs)
+			done += n
+			rw.off += uint64(n)
+			srcs = srcs.DropFirst64(n)
+			if err != nil {
+				retErr = err
+				goto exitLoop
+			}
+
+			// Continue.
+			seg, gap = seg.NextNonEmpty()
+
+		case gap.Ok():
+			// Allocate memory for the write.
+			gapMR := gap.Range().Intersect(pgMR)
+			fr, err := rw.file.memFile.Allocate(gapMR.Length(), usage.Tmpfs)
+			if err != nil {
+				retErr = err
+				goto exitLoop
+			}
+
+			// Write to that memory as usual.
+			seg, gap = rw.file.data.Insert(gap, gapMR, fr.Start), fsutil.FileRangeGapIterator{}
+		}
+	}
+exitLoop:
+	// If the write ends beyond the file's previous size, it causes the
+	// file to grow.
+	if rw.off > rw.file.size {
+		atomic.StoreUint64(&rw.file.size, rw.off)
+	}
+
+	rw.file.mu.Unlock()
+	return done, retErr
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
new file mode 100644
index 000000000..3731c5b6f
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
@@ -0,0 +1,224 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/fspath"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If
+// the returned err is not nil, then cleanup should be called when the FD is no
+// longer needed.
+func newFileFD(ctx context.Context, filename string) (*vfs.FileDescription, func(), error) {
+	creds := auth.CredentialsFromContext(ctx)
+
+	vfsObj := vfs.New()
+	vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+		AllowUserMount: true,
+	})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+	if err != nil {
+		return nil, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err)
+	}
+	root := mntns.Root()
+
+	// Create the file that will be write/read.
+	fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+		Root:               root,
+		Start:              root,
+		Path:               fspath.Parse(filename),
+		FollowFinalSymlink: true,
+	}, &vfs.OpenOptions{
+		Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
+		Mode:  0644,
+	})
+	if err != nil {
+		root.DecRef()
+		mntns.DecRef(vfsObj)
+		return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err)
+	}
+
+	return fd, func() {
+		root.DecRef()
+		mntns.DecRef(vfsObj)
+	}, nil
+}
+
+// Test that we can write some data to a file and read it back.`
+func TestSimpleWriteRead(t *testing.T) {
+	ctx := contexttest.Context(t)
+	fd, cleanup, err := newFileFD(ctx, "simpleReadWrite")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cleanup()
+
+	// Write.
+	data := []byte("foobarbaz")
+	n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{})
+	if err != nil {
+		t.Fatalf("fd.Write failed: %v", err)
+	}
+	if n != int64(len(data)) {
+		t.Errorf("fd.Write got short write length %d, want %d", n, len(data))
+	}
+	if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want {
+		t.Errorf("fd.Write left offset at %d, want %d", got, want)
+	}
+
+	// Seek back to beginning.
+	if _, err := fd.Seek(ctx, 0, linux.SEEK_SET); err != nil {
+		t.Fatalf("fd.Seek failed: %v", err)
+	}
+	if got, want := fd.Impl().(*regularFileFD).off, int64(0); got != want {
+		t.Errorf("fd.Seek(0) left offset at %d, want %d", got, want)
+	}
+
+	// Read.
+	buf := make([]byte, len(data))
+	n, err = fd.Read(ctx, usermem.BytesIOSequence(buf), vfs.ReadOptions{})
+	if err != nil && err != io.EOF {
+		t.Fatalf("fd.Read failed: %v", err)
+	}
+	if n != int64(len(data)) {
+		t.Errorf("fd.Read got short read length %d, want %d", n, len(data))
+	}
+	if got, want := string(buf), string(data); got != want {
+		t.Errorf("Read got %q want %s", got, want)
+	}
+	if got, want := fd.Impl().(*regularFileFD).off, int64(len(data)); got != want {
+		t.Errorf("fd.Write left offset at %d, want %d", got, want)
+	}
+}
+
+func TestPWrite(t *testing.T) {
+	ctx := contexttest.Context(t)
+	fd, cleanup, err := newFileFD(ctx, "PRead")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cleanup()
+
+	// Fill file with 1k 'a's.
+	data := bytes.Repeat([]byte{'a'}, 1000)
+	n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{})
+	if err != nil {
+		t.Fatalf("fd.Write failed: %v", err)
+	}
+	if n != int64(len(data)) {
+		t.Errorf("fd.Write got short write length %d, want %d", n, len(data))
+	}
+
+	// Write "gVisor is awesome" at various offsets.
+	buf := []byte("gVisor is awesome")
+	offsets := []int{0, 1, 2, 10, 20, 50, 100, len(data) - 100, len(data) - 1, len(data), len(data) + 1}
+	for _, offset := range offsets {
+		name := fmt.Sprintf("PWrite offset=%d", offset)
+		t.Run(name, func(t *testing.T) {
+			n, err := fd.PWrite(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.WriteOptions{})
+			if err != nil {
+				t.Errorf("fd.PWrite got err %v want nil", err)
+			}
+			if n != int64(len(buf)) {
+				t.Errorf("fd.PWrite got %d bytes want %d", n, len(buf))
+			}
+
+			// Update data to reflect expected file contents.
+			if len(data) < offset+len(buf) {
+				data = append(data, make([]byte, (offset+len(buf))-len(data))...)
+			}
+			copy(data[offset:], buf)
+
+			// Read the whole file and compare with data.
+			readBuf := make([]byte, len(data))
+			n, err = fd.PRead(ctx, usermem.BytesIOSequence(readBuf), 0, vfs.ReadOptions{})
+			if err != nil {
+				t.Fatalf("fd.PRead failed: %v", err)
+			}
+			if n != int64(len(data)) {
+				t.Errorf("fd.PRead got short read length %d, want %d", n, len(data))
+			}
+			if got, want := string(readBuf), string(data); got != want {
+				t.Errorf("PRead got %q want %s", got, want)
+			}
+
+		})
+	}
+}
+
+func TestPRead(t *testing.T) {
+	ctx := contexttest.Context(t)
+	fd, cleanup, err := newFileFD(ctx, "PRead")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cleanup()
+
+	// Write 100 sequences of 'gVisor is awesome'.
+	data := bytes.Repeat([]byte("gVisor is awsome"), 100)
+	n, err := fd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{})
+	if err != nil {
+		t.Fatalf("fd.Write failed: %v", err)
+	}
+	if n != int64(len(data)) {
+		t.Errorf("fd.Write got short write length %d, want %d", n, len(data))
+	}
+
+	// Read various sizes from various offsets.
+	sizes := []int{0, 1, 2, 10, 20, 50, 100, 1000}
+	offsets := []int{0, 1, 2, 10, 20, 50, 100, 1000, len(data) - 100, len(data) - 1, len(data), len(data) + 1}
+
+	for _, size := range sizes {
+		for _, offset := range offsets {
+			name := fmt.Sprintf("PRead offset=%d size=%d", offset, size)
+			t.Run(name, func(t *testing.T) {
+				var (
+					wantRead []byte
+					wantErr  error
+				)
+				if offset < len(data) {
+					wantRead = data[offset:]
+				} else if size > 0 {
+					wantErr = io.EOF
+				}
+				if offset+size < len(data) {
+					wantRead = wantRead[:size]
+				}
+				buf := make([]byte, size)
+				n, err := fd.PRead(ctx, usermem.BytesIOSequence(buf), int64(offset), vfs.ReadOptions{})
+				if err != wantErr {
+					t.Errorf("fd.PRead got err %v want %v", err, wantErr)
+				}
+				if n != int64(len(wantRead)) {
+					t.Errorf("fd.PRead got %d bytes want %d", n, len(wantRead))
+				}
+				if got := string(buf[:n]); got != string(wantRead) {
+					t.Errorf("fd.PRead got %q want %q", got, string(wantRead))
+				}
+			})
+		}
+	}
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/symlink.go b/pkg/sentry/fsimpl/tmpfs/symlink.go
new file mode 100644
index 000000000..5246aca84
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/symlink.go
@@ -0,0 +1,36 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+type symlink struct {
+	inode  inode
+	target string // immutable
+}
+
+func (fs *filesystem) newSymlink(creds *auth.Credentials, target string) *inode {
+	link := &symlink{
+		target: target,
+	}
+	link.inode.init(link, fs, creds, 0777)
+	link.inode.nlink = 1 // from parent directory
+	return &link.inode
+}
+
+// O_PATH is unimplemented, so there's no way to get a FileDescription
+// representing a symlink yet.
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
new file mode 100644
index 000000000..7be6faa5b
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -0,0 +1,299 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package tmpfs provides a filesystem implementation that behaves like tmpfs:
+// the Dentry tree is the sole source of truth for the state of the filesystem.
+//
+// Lock order:
+//
+// filesystem.mu
+//   regularFileFD.offMu
+//     regularFile.mu
+//   inode.mu
+package tmpfs
+
+import (
+	"fmt"
+	"math"
+	"sync"
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// FilesystemType implements vfs.FilesystemType.
+type FilesystemType struct{}
+
+// filesystem implements vfs.FilesystemImpl.
+type filesystem struct {
+	vfsfs vfs.Filesystem
+
+	// memFile is used to allocate pages to for regular files.
+	memFile *pgalloc.MemoryFile
+
+	// mu serializes changes to the Dentry tree.
+	mu sync.RWMutex
+
+	nextInoMinusOne uint64 // accessed using atomic memory operations
+}
+
+// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
+func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+	memFileProvider := pgalloc.MemoryFileProviderFromContext(ctx)
+	if memFileProvider == nil {
+		panic("MemoryFileProviderFromContext returned nil")
+	}
+	fs := filesystem{
+		memFile: memFileProvider.MemoryFile(),
+	}
+	fs.vfsfs.Init(vfsObj, &fs)
+	root := fs.newDentry(fs.newDirectory(creds, 01777))
+	return &fs.vfsfs, &root.vfsd, nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {
+}
+
+// dentry implements vfs.DentryImpl.
+type dentry struct {
+	vfsd vfs.Dentry
+
+	// inode is the inode represented by this dentry. Multiple Dentries may
+	// share a single non-directory inode (with hard links). inode is
+	// immutable.
+	inode *inode
+
+	// tmpfs doesn't count references on dentries; because the dentry tree is
+	// the sole source of truth, it is by definition always consistent with the
+	// state of the filesystem. However, it does count references on inodes,
+	// because inode resources are released when all references are dropped.
+	// (tmpfs doesn't really have resources to release, but we implement
+	// reference counting because tmpfs regular files will.)
+
+	// dentryEntry (ugh) links dentries into their parent directory.childList.
+	dentryEntry
+}
+
+func (fs *filesystem) newDentry(inode *inode) *dentry {
+	d := &dentry{
+		inode: inode,
+	}
+	d.vfsd.Init(d)
+	return d
+}
+
+// IncRef implements vfs.DentryImpl.IncRef.
+func (d *dentry) IncRef() {
+	d.inode.incRef()
+}
+
+// TryIncRef implements vfs.DentryImpl.TryIncRef.
+func (d *dentry) TryIncRef() bool {
+	return d.inode.tryIncRef()
+}
+
+// DecRef implements vfs.DentryImpl.DecRef.
+func (d *dentry) DecRef() {
+	d.inode.decRef()
+}
+
+// inode represents a filesystem object.
+type inode struct {
+	// refs is a reference count. refs is accessed using atomic memory
+	// operations.
+	//
+	// A reference is held on all inodes that are reachable in the filesystem
+	// tree. For non-directories (which may have multiple hard links), this
+	// means that a reference is dropped when nlink reaches 0. For directories,
+	// nlink never reaches 0 due to the "." entry; instead,
+	// filesystem.RmdirAt() drops the reference.
+	refs int64
+
+	// Inode metadata; protected by mu and accessed using atomic memory
+	// operations unless otherwise specified.
+	mu    sync.RWMutex
+	mode  uint32 // excluding file type bits, which are based on impl
+	nlink uint32 // protected by filesystem.mu instead of inode.mu
+	uid   uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
+	gid   uint32 // auth.KGID, but ...
+	ino   uint64 // immutable
+
+	impl interface{} // immutable
+}
+
+const maxLinks = math.MaxUint32
+
+func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) {
+	i.refs = 1
+	i.mode = uint32(mode)
+	i.uid = uint32(creds.EffectiveKUID)
+	i.gid = uint32(creds.EffectiveKGID)
+	i.ino = atomic.AddUint64(&fs.nextInoMinusOne, 1)
+	// i.nlink initialized by caller
+	i.impl = impl
+}
+
+// incLinksLocked increments i's link count.
+//
+// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
+// i.nlink < maxLinks.
+func (i *inode) incLinksLocked() {
+	if i.nlink == 0 {
+		panic("tmpfs.inode.incLinksLocked() called with no existing links")
+	}
+	if i.nlink == maxLinks {
+		panic("memfs.inode.incLinksLocked() called with maximum link count")
+	}
+	atomic.AddUint32(&i.nlink, 1)
+}
+
+// decLinksLocked decrements i's link count.
+//
+// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0.
+func (i *inode) decLinksLocked() {
+	if i.nlink == 0 {
+		panic("tmpfs.inode.decLinksLocked() called with no existing links")
+	}
+	atomic.AddUint32(&i.nlink, ^uint32(0))
+}
+
+func (i *inode) incRef() {
+	if atomic.AddInt64(&i.refs, 1) <= 1 {
+		panic("tmpfs.inode.incRef() called without holding a reference")
+	}
+}
+
+func (i *inode) tryIncRef() bool {
+	for {
+		refs := atomic.LoadInt64(&i.refs)
+		if refs == 0 {
+			return false
+		}
+		if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) {
+			return true
+		}
+	}
+}
+
+func (i *inode) decRef() {
+	if refs := atomic.AddInt64(&i.refs, -1); refs == 0 {
+		// This is unnecessary; it's mostly to simulate what tmpfs would do.
+		if regFile, ok := i.impl.(*regularFile); ok {
+			regFile.mu.Lock()
+			regFile.data.DropAll(regFile.memFile)
+			atomic.StoreUint64(&regFile.size, 0)
+			regFile.mu.Unlock()
+		}
+	} else if refs < 0 {
+		panic("tmpfs.inode.decRef() called without holding a reference")
+	}
+}
+
+func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error {
+	return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid)))
+}
+
+// Go won't inline this function, and returning linux.Statx (which is quite
+// big) means spending a lot of time in runtime.duffcopy(), so instead it's an
+// output parameter.
+func (i *inode) statTo(stat *linux.Statx) {
+	stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
+	stat.Blksize = 1 // usermem.PageSize in tmpfs
+	stat.Nlink = atomic.LoadUint32(&i.nlink)
+	stat.UID = atomic.LoadUint32(&i.uid)
+	stat.GID = atomic.LoadUint32(&i.gid)
+	stat.Mode = uint16(atomic.LoadUint32(&i.mode))
+	stat.Ino = i.ino
+	// TODO: device number
+	switch impl := i.impl.(type) {
+	case *regularFile:
+		stat.Mode |= linux.S_IFREG
+		stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
+		stat.Size = uint64(atomic.LoadUint64(&impl.size))
+		// In tmpfs, this will be FileRangeSet.Span() / 512 (but also cached in
+		// a uint64 accessed using atomic memory operations to avoid taking
+		// locks).
+		stat.Blocks = allocatedBlocksForSize(stat.Size)
+	case *directory:
+		stat.Mode |= linux.S_IFDIR
+	case *symlink:
+		stat.Mode |= linux.S_IFLNK
+		stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
+		stat.Size = uint64(len(impl.target))
+		stat.Blocks = allocatedBlocksForSize(stat.Size)
+	case *namedPipe:
+		stat.Mode |= linux.S_IFIFO
+	default:
+		panic(fmt.Sprintf("unknown inode type: %T", i.impl))
+	}
+}
+
+// allocatedBlocksForSize returns the number of 512B blocks needed to
+// accommodate the given size in bytes, as appropriate for struct
+// stat::st_blocks and struct statx::stx_blocks. (Note that this 512B block
+// size is independent of the "preferred block size for I/O", struct
+// stat::st_blksize and struct statx::stx_blksize.)
+func allocatedBlocksForSize(size uint64) uint64 {
+	return (size + 511) / 512
+}
+
+func (i *inode) direntType() uint8 {
+	switch i.impl.(type) {
+	case *regularFile:
+		return linux.DT_REG
+	case *directory:
+		return linux.DT_DIR
+	case *symlink:
+		return linux.DT_LNK
+	default:
+		panic(fmt.Sprintf("unknown inode type: %T", i.impl))
+	}
+}
+
+// fileDescription is embedded by tmpfs implementations of
+// vfs.FileDescriptionImpl.
+type fileDescription struct {
+	vfsfd vfs.FileDescription
+	vfs.FileDescriptionDefaultImpl
+}
+
+func (fd *fileDescription) filesystem() *filesystem {
+	return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
+}
+
+func (fd *fileDescription) inode() *inode {
+	return fd.vfsfd.Dentry().Impl().(*dentry).inode
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+	var stat linux.Statx
+	fd.inode().statTo(&stat)
+	return stat, nil
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+	if opts.Stat.Mask == 0 {
+		return nil
+	}
+	// TODO: implement inode.setStat
+	return syserror.EPERM
+}
-- 
cgit v1.2.3