48 files changed, 2438 insertions, 451 deletions
diff --git a/pkg/sentry/fs/attr.go b/pkg/sentry/fs/attr.go
index 9fc6a5bc2..4f3d6410e 100644
--- a/pkg/sentry/fs/attr.go
+++ b/pkg/sentry/fs/attr.go
@@ -111,6 +111,50 @@ func (n InodeType) LinuxType() uint32 {
 	}
 }
 
+// ToDirentType converts an InodeType to a linux dirent type field.
+func ToDirentType(nodeType InodeType) uint8 {
+	switch nodeType {
+	case RegularFile, SpecialFile:
+		return linux.DT_REG
+	case Symlink:
+		return linux.DT_LNK
+	case Directory, SpecialDirectory:
+		return linux.DT_DIR
+	case Pipe:
+		return linux.DT_FIFO
+	case CharacterDevice:
+		return linux.DT_CHR
+	case BlockDevice:
+		return linux.DT_BLK
+	case Socket:
+		return linux.DT_SOCK
+	default:
+		return linux.DT_UNKNOWN
+	}
+}
+
+// ToInodeType coverts a linux file type to InodeType.
+func ToInodeType(linuxFileType linux.FileMode) InodeType {
+	switch linuxFileType {
+	case linux.ModeRegular:
+		return RegularFile
+	case linux.ModeDirectory:
+		return Directory
+	case linux.ModeSymlink:
+		return Symlink
+	case linux.ModeNamedPipe:
+		return Pipe
+	case linux.ModeCharacterDevice:
+		return CharacterDevice
+	case linux.ModeBlockDevice:
+		return BlockDevice
+	case linux.ModeSocket:
+		return Socket
+	default:
+		panic(fmt.Sprintf("unknown file mode: %d", linuxFileType))
+	}
+}
+
 // StableAttr contains Inode attributes that will be stable throughout the
 // lifetime of the Inode.
 //
diff --git a/pkg/sentry/fs/ext/BUILD b/pkg/sentry/fs/ext/BUILD
index e3d617576..a3b1e4bd6 100644
--- a/pkg/sentry/fs/ext/BUILD
+++ b/pkg/sentry/fs/ext/BUILD
@@ -4,14 +4,14 @@ load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
 load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 go_template_instance(
-    name = "dentry_list",
-    out = "dentry_list.go",
+    name = "dirent_list",
+    out = "dirent_list.go",
     package = "ext",
-    prefix = "dentry",
+    prefix = "dirent",
     template = "//pkg/ilist:generic_list",
     types = {
-        "Element": "*dentry",
-        "Linker": "*dentry",
+        "Element": "*dirent",
+        "Linker": "*dirent",
     },
 )
 
@@ -20,14 +20,13 @@ go_library(
     srcs = [
         "block_map_file.go",
         "dentry.go",
-        "dentry_list.go",
         "directory.go",
+        "dirent_list.go",
         "ext.go",
         "extent_file.go",
+        "file_description.go",
         "filesystem.go",
-        "inline_file.go",
         "inode.go",
-        "named_pipe.go",
         "regular_file.go",
         "symlink.go",
         "utils.go",
@@ -38,15 +37,19 @@ go_library(
         "//pkg/abi/linux",
         "//pkg/binary",
         "//pkg/fd",
+        "//pkg/log",
+        "//pkg/sentry/arch",
         "//pkg/sentry/context",
         "//pkg/sentry/fs",
         "//pkg/sentry/fs/ext/disklayout",
         "//pkg/sentry/kernel/auth",
-        "//pkg/sentry/kernel/pipe",
+        "//pkg/sentry/memmap",
         "//pkg/sentry/safemem",
+        "//pkg/sentry/syscalls/linux",
         "//pkg/sentry/usermem",
         "//pkg/sentry/vfs",
         "//pkg/syserror",
+        "//pkg/waiter",
     ],
 )
 
@@ -73,7 +76,9 @@ go_test(
         "//pkg/sentry/context/contexttest",
         "//pkg/sentry/fs/ext/disklayout",
         "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/usermem",
         "//pkg/sentry/vfs",
+        "//pkg/syserror",
         "//runsc/test/testutil",
         "@com_github_google_go-cmp//cmp:go_default_library",
         "@com_github_google_go-cmp//cmp/cmpopts:go_default_library",
diff --git a/pkg/sentry/fs/ext/README.md b/pkg/sentry/fs/ext/README.md
new file mode 100644
index 000000000..e212717aa
--- /dev/null
+++ b/pkg/sentry/fs/ext/README.md
@@ -0,0 +1,117 @@
+## EXT(2/3/4) File System
+
+This is a filesystem driver which supports ext2, ext3 and ext4 filesystems.
+Linux has specialized drivers for each variant but none which supports all. This
+library takes advantage of ext's backward compatibility and understands the
+internal organization of on-disk structures to support all variants.
+
+This driver implementation diverges from the Linux implementations in being more
+forgiving about versioning. For instance, if a filesystem contains both extent
+based inodes and classical block map based inodes, this driver will not complain
+and interpret them both correctly. While in Linux this would be an issue. This
+blurs the line between the three ext fs variants.
+
+Ext2 is considered deprecated as of Red Hat Enterprise Linux 7, and ext3 has
+been superseded by ext4 by large performance gains. Thus it is recommended to
+upgrade older filesystem images to ext4 using e2fsprogs for better performance.
+
+### Read Only
+
+This driver currently only allows read only operations. A lot of the design
+decisions are based on this feature. There are plans to implement write (the
+process for which is documented in the future work section).
+
+### Performance
+
+One of the biggest wins about this driver is that it directly talks to the
+underlying block device (or whatever persistent storage is being used), instead
+of making expensive RPCs to a gofer.
+
+Another advantage is that ext fs supports fast concurrent reads. Currently the
+device is represented using a `io.ReaderAt` which allows for concurrent reads.
+All reads are directly passed to the device driver which intelligently serves
+the read requests in the optimal order. There is no congestion due to locking
+while reading in the filesystem level.
+
+Reads are optimized further in the way file data is transferred over to user
+memory. Ext fs directly copies over file data from disk into user memory with no
+additional allocations on the way. We can only get faster by preloading file
+data into memory (see future work section).
+
+The internal structures used to represent files, inodes and file descriptors use
+a lot of inheritance. With the level of indirection that an interface adds with
+an internal pointer, it can quickly fragment a structure across memory. As this
+runs along side a full blown kernel (which is memory intensive), having a
+fragmented struct might hurt performance. Hence these internal structures,
+though interfaced, are tightly packed in memory using the same inheritance
+pattern that pkg/sentry/vfs uses. The pkg/sentry/fs/ext/disklayout package makes
+an execption to this pattern for reasons documented in the package.
+
+### Security
+
+This driver also intends to help sandbox the container better by reducing the
+surface of the host kernel that the application touches. It prevents the
+application from exploiting vulnerabilities in the host filesystem driver. All
+`io.ReaderAt.ReadAt()` calls are translated to `pread(2)` which are directly
+passed to the device driver in the kernel. Hence this reduces the surface for
+attack.
+
+The application can not affect any host filesystems other than the one passed
+via block device by the user.
+
+### Future Work
+
+#### Write
+
+To support write operations we would need to modify the block device underneath.
+Currently, the driver does not modify the device at all, not even for updating
+the access times for reads. Modifying the filesystem incorrectly can corrupt it
+and render it unreadable for other correct ext(x) drivers. Hence caution must be
+maintained while modifying metadata structures.
+
+Ext4 specifically is built for performance and has added a lot of complexity as
+to how metadata structures are modified. For instance, files that are organized
+via an extent tree which must be balanced and file data blocks must be placed in
+the same extent as much as possible to increase locality. Such properties must
+be maintained while modifying the tree.
+
+Ext filesystems boast a lot about locality, which plays a big role in them being
+performant. The block allocation algorithm in Linux does a good job in keeping
+related data together. This behavior must be maintained as much as possible,
+else we might end up degrading the filesystem performance over time.
+
+Ext4 also supports a wide variety of features which are specialized for varying
+use cases. Implementing all of them can get difficult very quickly.
+
+Ext(x) checksums all its metadata structures to check for corruption, so
+modification of any metadata struct must correspond with re-checksumming the
+struct. Linux filesystem drivers also order on-disk updates intelligently to not
+corrupt the filesystem and also remain performant. The in-memory metadata
+structures must be kept in sync with what is on disk.
+
+There is also replication of some important structures across the filesystem.
+All replicas must be updated when their original copy is updated. There is also
+provisioning for snapshotting which must be kept in mind, although it should not
+affect this implementation unless we allow users to create filesystem snapshots.
+
+Ext4 also introduced journaling (jbd2). The journal must be updated
+appropriately.
+
+#### Performance
+
+To improve performance we should implement a buffer cache, and optionally, read
+ahead for small files. While doing so we must also keep in mind the memory usage
+and have a reasonable cap on how much file data we want to hold in memory.
+
+#### Features
+
+Our current implementation will work with most ext4 filesystems for readonly
+purposed. However, the following features are not supported yet:
+
+-   Journal
+-   Snapshotting
+-   Extended Attributes
+-   Hash Tree Directories
+-   Meta Block Groups
+-   Multiple Mount Protection
+-   Bigalloc
diff --git a/pkg/sentry/fs/ext/block_map_file.go b/pkg/sentry/fs/ext/block_map_file.go
index f30c3a174..cea89bcd9 100644
--- a/pkg/sentry/fs/ext/block_map_file.go
+++ b/pkg/sentry/fs/ext/block_map_file.go
@@ -85,7 +85,8 @@ func (f *blockMapFile) ReadAt(dst []byte, off int64) (int, error) {
 	}
 
 	offset := uint64(off)
-	if offset >= f.regFile.inode.diskInode.Size() {
+	size := f.regFile.inode.diskInode.Size()
+	if offset >= size {
 		return 0, io.EOF
 	}
 
@@ -104,6 +105,9 @@ func (f *blockMapFile) ReadAt(dst []byte, off int64) (int, error) {
 
 	read := 0
 	toRead := len(dst)
+	if uint64(toRead)+offset > size {
+		toRead = int(size - offset)
+	}
 	for read < toRead {
 		var err error
 		var curR int
@@ -131,6 +135,9 @@ func (f *blockMapFile) ReadAt(dst []byte, off int64) (int, error) {
 		}
 	}
 
+	if read < len(dst) {
+		return read, io.EOF
+	}
 	return read, nil
 }
 
diff --git a/pkg/sentry/fs/ext/dentry.go b/pkg/sentry/fs/ext/dentry.go
index 19c9b3b2d..054fb42b6 100644
--- a/pkg/sentry/fs/ext/dentry.go
+++ b/pkg/sentry/fs/ext/dentry.go
@@ -26,8 +26,6 @@ type dentry struct {
 	// share a single non-directory Inode (with hard links). inode is
 	// immutable.
 	inode *inode
-	// dentryEntry links Dentries into their parent directory.childList.
-	dentryEntry
 }
 
 // Compiles only if dentry implements vfs.DentryImpl.
diff --git a/pkg/sentry/fs/ext/directory.go b/pkg/sentry/fs/ext/directory.go
index ab2b59e44..1ba8bf54c 100644
--- a/pkg/sentry/fs/ext/directory.go
+++ b/pkg/sentry/fs/ext/directory.go
@@ -14,23 +14,295 @@
 
 package ext
 
+import (
+	"sync"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/binary"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
 // directory represents a directory inode. It holds the childList in memory.
 type directory struct {
 	inode inode
 
-	// childList is a list containing (1) child Dentries and (2) fake Dentries
-	// (with inode == nil) that represent the iteration position of
+	// mu serializes the changes to childList.
+	// Lock Order (outermost locks must be taken first):
+	//   directory.mu
+	//     filesystem.mu
+	mu sync.Mutex
+
+	// childList is a list containing (1) child dirents and (2) fake dirents
+	// (with diskDirent == nil) that represent the iteration position of
 	// directoryFDs. childList is used to support directoryFD.IterDirents()
-	// efficiently. childList is immutable.
-	childList dentryList
+	// efficiently. childList is protected by mu.
+	childList direntList
 
-	// TODO(b/134676337): Add directory navigators.
+	// childMap maps the child's filename to the dirent structure stored in
+	// childList. This adds some data replication but helps in faster path
+	// traversal. For consistency, key == childMap[key].diskDirent.FileName().
+	// Immutable.
+	childMap map[string]*dirent
 }
 
 // newDirectroy is the directory constructor.
-func newDirectroy(inode inode) *directory {
-	// TODO(b/134676337): initialize childList.
-	file := &directory{inode: inode}
+func newDirectroy(inode inode, newDirent bool) (*directory, error) {
+	file := &directory{inode: inode, childMap: make(map[string]*dirent)}
 	file.inode.impl = file
-	return file
+
+	// Initialize childList by reading dirents from the underlying file.
+	if inode.diskInode.Flags().Index {
+		// TODO(b/134676337): Support hash tree directories. Currently only the '.'
+		// and '..' entries are read in.
+
+		// Users cannot navigate this hash tree directory yet.
+		log.Warningf("hash tree directory being used which is unsupported")
+		return file, nil
+	}
+
+	// The dirents are organized in a linear array in the file data.
+	// Extract the file data and decode the dirents.
+	regFile, err := newRegularFile(inode)
+	if err != nil {
+		return nil, err
+	}
+
+	// buf is used as scratch space for reading in dirents from disk and
+	// unmarshalling them into dirent structs.
+	buf := make([]byte, disklayout.DirentSize)
+	size := inode.diskInode.Size()
+	for off, inc := uint64(0), uint64(0); off < size; off += inc {
+		toRead := size - off
+		if toRead > disklayout.DirentSize {
+			toRead = disklayout.DirentSize
+		}
+		if n, err := regFile.impl.ReadAt(buf[:toRead], int64(off)); uint64(n) < toRead {
+			return nil, err
+		}
+
+		var curDirent dirent
+		if newDirent {
+			curDirent.diskDirent = &disklayout.DirentNew{}
+		} else {
+			curDirent.diskDirent = &disklayout.DirentOld{}
+		}
+		binary.Unmarshal(buf, binary.LittleEndian, curDirent.diskDirent)
+
+		if curDirent.diskDirent.Inode() != 0 && len(curDirent.diskDirent.FileName()) != 0 {
+			// Inode number and name length fields being set to 0 is used to indicate
+			// an unused dirent.
+			file.childList.PushBack(&curDirent)
+			file.childMap[curDirent.diskDirent.FileName()] = &curDirent
+		}
+
+		// The next dirent is placed exactly after this dirent record on disk.
+		inc = uint64(curDirent.diskDirent.RecordSize())
+	}
+
+	return file, nil
+}
+
+func (i *inode) isDir() bool {
+	_, ok := i.impl.(*directory)
+	return ok
+}
+
+// dirent is the directory.childList node.
+type dirent struct {
+	diskDirent disklayout.Dirent
+
+	// direntEntry links dirents into their parent directory.childList.
+	direntEntry
+}
+
+// directoryFD represents a directory file description. It implements
+// vfs.FileDescriptionImpl.
+type directoryFD struct {
+	fileDescription
+	vfs.DirectoryFileDescriptionDefaultImpl
+
+	// Protected by directory.mu.
+	iter *dirent
+	off  int64
+}
+
+// Compiles only if directoryFD implements vfs.FileDescriptionImpl.
+var _ vfs.FileDescriptionImpl = (*directoryFD)(nil)
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *directoryFD) Release() {
+	if fd.iter == nil {
+		return
+	}
+
+	dir := fd.inode().impl.(*directory)
+	dir.mu.Lock()
+	dir.childList.Remove(fd.iter)
+	dir.mu.Unlock()
+	fd.iter = nil
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
+	extfs := fd.filesystem()
+	dir := fd.inode().impl.(*directory)
+
+	dir.mu.Lock()
+	defer dir.mu.Unlock()
+
+	// Ensure that fd.iter exists and is not linked into dir.childList.
+	var child *dirent
+	if fd.iter == nil {
+		// Start iteration at the beginning of dir.
+		child = dir.childList.Front()
+		fd.iter = &dirent{}
+	} else {
+		// Continue iteration from where we left off.
+		child = fd.iter.Next()
+		dir.childList.Remove(fd.iter)
+	}
+	for ; child != nil; child = child.Next() {
+		// Skip other directoryFD iterators.
+		if child.diskDirent != nil {
+			childType, ok := child.diskDirent.FileType()
+			if !ok {
+				// We will need to read the inode off disk. Do not increment
+				// ref count here because this inode is not being added to the
+				// dentry tree.
+				extfs.mu.Lock()
+				childInode, err := extfs.getOrCreateInodeLocked(child.diskDirent.Inode())
+				extfs.mu.Unlock()
+				if err != nil {
+					// Usage of the file description after the error is
+					// undefined. This implementation would continue reading
+					// from the next dirent.
+					fd.off++
+					dir.childList.InsertAfter(child, fd.iter)
+					return err
+				}
+				childType = fs.ToInodeType(childInode.diskInode.Mode().FileType())
+			}
+
+			if !cb.Handle(vfs.Dirent{
+				Name: child.diskDirent.FileName(),
+				Type: fs.ToDirentType(childType),
+				Ino:  uint64(child.diskDirent.Inode()),
+				Off:  fd.off,
+			}) {
+				dir.childList.InsertBefore(child, fd.iter)
+				return nil
+			}
+			fd.off++
+		}
+	}
+	dir.childList.PushBack(fd.iter)
+	return nil
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+	if whence != linux.SEEK_SET && whence != linux.SEEK_CUR {
+		return 0, syserror.EINVAL
+	}
+
+	dir := fd.inode().impl.(*directory)
+
+	dir.mu.Lock()
+	defer dir.mu.Unlock()
+
+	// Find resulting offset.
+	if whence == linux.SEEK_CUR {
+		offset += fd.off
+	}
+
+	if offset < 0 {
+		// lseek(2) specifies that EINVAL should be returned if the resulting offset
+		// is negative.
+		return 0, syserror.EINVAL
+	}
+
+	n := int64(len(dir.childMap))
+	realWantOff := offset
+	if realWantOff > n {
+		realWantOff = n
+	}
+	realCurOff := fd.off
+	if realCurOff > n {
+		realCurOff = n
+	}
+
+	// Ensure that fd.iter exists and is linked into dir.childList so we can
+	// intelligently seek from the optimal position.
+	if fd.iter == nil {
+		fd.iter = &dirent{}
+		dir.childList.PushFront(fd.iter)
+	}
+
+	// Guess that iterating from the current position is optimal.
+	child := fd.iter
+	diff := realWantOff - realCurOff // Shows direction and magnitude of travel.
+
+	// See if starting from the beginning or end is better.
+	abDiff := diff
+	if diff < 0 {
+		abDiff = -diff
+	}
+	if abDiff > realWantOff {
+		// Starting from the beginning is best.
+		child = dir.childList.Front()
+		diff = realWantOff
+	} else if abDiff > (n - realWantOff) {
+		// Starting from the end is best.
+		child = dir.childList.Back()
+		// (n - 1) because the last non-nil dirent represents the (n-1)th offset.
+		diff = realWantOff - (n - 1)
+	}
+
+	for child != nil {
+		// Skip other directoryFD iterators.
+		if child.diskDirent != nil {
+			if diff == 0 {
+				if child != fd.iter {
+					dir.childList.Remove(fd.iter)
+					dir.childList.InsertBefore(child, fd.iter)
+				}
+
+				fd.off = offset
+				return offset, nil
+			}
+
+			if diff < 0 {
+				diff++
+				child = child.Prev()
+			} else {
+				diff--
+				child = child.Next()
+			}
+			continue
+		}
+
+		if diff < 0 {
+			child = child.Prev()
+		} else {
+			child = child.Next()
+		}
+	}
+
+	// Reaching here indicates that the offset is beyond the end of the childList.
+	dir.childList.Remove(fd.iter)
+	dir.childList.PushBack(fd.iter)
+	fd.off = offset
+	return offset, nil
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *directoryFD) ConfigureMMap(ctx context.Context, opts memmap.MMapOpts) error {
+	// mmap(2) specifies that EACCESS should be returned for non-regular file fds.
+	return syserror.EACCES
 }
diff --git a/pkg/sentry/fs/ext/disklayout/dirent.go b/pkg/sentry/fs/ext/disklayout/dirent.go
index 685bf57b8..417b6cf65 100644
--- a/pkg/sentry/fs/ext/disklayout/dirent.go
+++ b/pkg/sentry/fs/ext/disklayout/dirent.go
@@ -21,6 +21,9 @@ import (
 const (
 	// MaxFileName is the maximum length of an ext fs file's name.
 	MaxFileName = 255
+
+	// DirentSize is the size of ext dirent structures.
+	DirentSize = 263
 )
 
 var (
diff --git a/pkg/sentry/fs/ext/disklayout/dirent_test.go b/pkg/sentry/fs/ext/disklayout/dirent_test.go
index cc6dff2c9..934919f8a 100644
--- a/pkg/sentry/fs/ext/disklayout/dirent_test.go
+++ b/pkg/sentry/fs/ext/disklayout/dirent_test.go
@@ -21,8 +21,6 @@ import (
 // TestDirentSize tests that the dirent structs are of the correct
 // size.
 func TestDirentSize(t *testing.T) {
-	want := uintptr(263)
-
-	assertSize(t, DirentOld{}, want)
-	assertSize(t, DirentNew{}, want)
+	assertSize(t, DirentOld{}, uintptr(DirentSize))
+	assertSize(t, DirentNew{}, uintptr(DirentSize))
 }
diff --git a/pkg/sentry/fs/ext/disklayout/superblock.go b/pkg/sentry/fs/ext/disklayout/superblock.go
index 7a337a5e0..8bb327006 100644
--- a/pkg/sentry/fs/ext/disklayout/superblock.go
+++ b/pkg/sentry/fs/ext/disklayout/superblock.go
@@ -221,7 +221,7 @@ func CompatFeaturesFromInt(f uint32) CompatFeatures {
 // This is not exhaustive, unused features are not listed.
 const (
 	// SbDirentFileType indicates that directory entries record the file type.
-	// We should use struct ext4_dir_entry_2 for dirents then.
+	// We should use struct DirentNew for dirents then.
 	SbDirentFileType = 0x2
 
 	// SbRecovery indicates that the filesystem needs recovery.
diff --git a/pkg/sentry/fs/ext/ext.go b/pkg/sentry/fs/ext/ext.go
index d303dd122..c3e2c9efb 100644
--- a/pkg/sentry/fs/ext/ext.go
+++ b/pkg/sentry/fs/ext/ext.go
@@ -22,6 +22,7 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/fd"
+	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/context"
 	"gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -62,8 +63,40 @@ func getDeviceFd(source string, opts vfs.NewFilesystemOptions) (io.ReaderAt, err
 	return fd.NewReadWriter(devFd), nil
 }
 
+// isCompatible checks if the superblock has feature sets which are compatible.
+// We only need to check the superblock incompatible feature set since we are
+// mounting readonly. We will also need to check readonly compatible feature
+// set when mounting for read/write.
+func isCompatible(sb disklayout.SuperBlock) bool {
+	// Please note that what is being checked is limited based on the fact that we
+	// are mounting readonly and that we are not journaling. When mounting
+	// read/write or with a journal, this must be reevaluated.
+	incompatFeatures := sb.IncompatibleFeatures()
+	if incompatFeatures.MetaBG {
+		log.Warningf("ext fs: meta block groups are not supported")
+		return false
+	}
+	if incompatFeatures.MMP {
+		log.Warningf("ext fs: multiple mount protection is not supported")
+		return false
+	}
+	if incompatFeatures.Encrypted {
+		log.Warningf("ext fs: encrypted inodes not supported")
+		return false
+	}
+	if incompatFeatures.InlineData {
+		log.Warningf("ext fs: inline files not supported")
+		return false
+	}
+	return true
+}
+
 // NewFilesystem implements vfs.FilesystemType.NewFilesystem.
 func (fstype filesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts vfs.NewFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+	// TODO(b/134676337): Ensure that the user is mounting readonly. If not,
+	// EACCESS should be returned according to mount(2). Filesystem independent
+	// flags (like readonly) are currently not available in pkg/sentry/vfs.
+
 	dev, err := getDeviceFd(source, opts)
 	if err != nil {
 		return nil, nil, err
@@ -82,15 +115,21 @@ func (fstype filesystemType) NewFilesystem(ctx context.Context, creds *auth.Cred
 		return nil, nil, syserror.EINVAL
 	}
 
+	// Refuse to mount if the filesystem is incompatible.
+	if !isCompatible(fs.sb) {
+		return nil, nil, syserror.EINVAL
+	}
+
 	fs.bgs, err = readBlockGroups(dev, fs.sb)
 	if err != nil {
 		return nil, nil, err
 	}
 
-	rootInode, err := fs.getOrCreateInode(ctx, disklayout.RootDirInode)
+	rootInode, err := fs.getOrCreateInodeLocked(disklayout.RootDirInode)
 	if err != nil {
 		return nil, nil, err
 	}
+	rootInode.incRef()
 
 	return &fs.vfsfs, &newDentry(rootInode).vfsd, nil
 }
diff --git a/pkg/sentry/fs/ext/ext_test.go b/pkg/sentry/fs/ext/ext_test.go
index 6396886cc..270f38074 100644
--- a/pkg/sentry/fs/ext/ext_test.go
+++ b/pkg/sentry/fs/ext/ext_test.go
@@ -16,17 +16,22 @@ package ext
 
 import (
 	"fmt"
+	"io"
 	"os"
 	"path"
+	"sort"
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/sentry/context"
 	"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
 	"gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
 
 	"gvisor.dev/gvisor/runsc/test/testutil"
 )
@@ -44,7 +49,7 @@ var (
 // setUp opens imagePath as an ext Filesystem and returns all necessary
 // elements required to run tests. If error is non-nil, it also returns a tear
 // down function which must be called after the test is run for clean up.
-func setUp(t *testing.T, imagePath string) (context.Context, *vfs.Filesystem, *vfs.Dentry, func(), error) {
+func setUp(t *testing.T, imagePath string) (context.Context, *vfs.VirtualFilesystem, *vfs.VirtualDentry, func(), error) {
 	localImagePath, err := testutil.FindFile(imagePath)
 	if err != nil {
 		return nil, nil, nil, nil, fmt.Errorf("failed to open local image at path %s: %v", imagePath, err)
@@ -55,20 +60,537 @@ func setUp(t *testing.T, imagePath string) (context.Context, *vfs.Filesystem, *v
 		return nil, nil, nil, nil, err
 	}
 
-	// Mount the ext4 fs and retrieve the inode structure for the file.
-	mockCtx := contexttest.Context(t)
-	fs, d, err := filesystemType{}.NewFilesystem(mockCtx, nil, localImagePath, vfs.NewFilesystemOptions{InternalData: int(f.Fd())})
+	ctx := contexttest.Context(t)
+	creds := auth.CredentialsFromContext(ctx)
+
+	// Create VFS.
+	vfsObj := vfs.New()
+	vfsObj.MustRegisterFilesystemType("extfs", filesystemType{})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, localImagePath, "extfs", &vfs.NewFilesystemOptions{InternalData: int(f.Fd())})
 	if err != nil {
 		f.Close()
 		return nil, nil, nil, nil, err
 	}
 
+	root := mntns.Root()
+
 	tearDown := func() {
+		root.DecRef()
+
 		if err := f.Close(); err != nil {
 			t.Fatalf("tearDown failed: %v", err)
 		}
 	}
-	return mockCtx, fs, d, tearDown, nil
+	return ctx, vfsObj, &root, tearDown, nil
+}
+
+// TODO(b/134676337): Test vfs.FilesystemImpl.ReadlinkAt and
+// vfs.FilesystemImpl.StatFSAt which are not implemented in
+// vfs.VirtualFilesystem yet.
+
+// TestSeek tests vfs.FileDescriptionImpl.Seek functionality.
+func TestSeek(t *testing.T) {
+	type seekTest struct {
+		name  string
+		image string
+		path  string
+	}
+
+	tests := []seekTest{
+		{
+			name:  "ext4 root dir seek",
+			image: ext4ImagePath,
+			path:  "/",
+		},
+		{
+			name:  "ext3 root dir seek",
+			image: ext3ImagePath,
+			path:  "/",
+		},
+		{
+			name:  "ext2 root dir seek",
+			image: ext2ImagePath,
+			path:  "/",
+		},
+		{
+			name:  "ext4 reg file seek",
+			image: ext4ImagePath,
+			path:  "/file.txt",
+		},
+		{
+			name:  "ext3 reg file seek",
+			image: ext3ImagePath,
+			path:  "/file.txt",
+		},
+		{
+			name:  "ext2 reg file seek",
+			image: ext2ImagePath,
+			path:  "/file.txt",
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ctx, vfsfs, root, tearDown, err := setUp(t, test.image)
+			if err != nil {
+				t.Fatalf("setUp failed: %v", err)
+			}
+			defer tearDown()
+
+			fd, err := vfsfs.OpenAt(
+				ctx,
+				auth.CredentialsFromContext(ctx),
+				&vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path},
+				&vfs.OpenOptions{},
+			)
+			if err != nil {
+				t.Fatalf("vfsfs.OpenAt failed: %v", err)
+			}
+
+			if n, err := fd.Impl().Seek(ctx, 0, linux.SEEK_SET); n != 0 || err != nil {
+				t.Errorf("expected seek position 0, got %d and error %v", n, err)
+			}
+
+			stat, err := fd.Impl().Stat(ctx, vfs.StatOptions{})
+			if err != nil {
+				t.Errorf("fd.stat failed for file %s in image %s: %v", test.path, test.image, err)
+			}
+
+			// We should be able to seek beyond the end of file.
+			size := int64(stat.Size)
+			if n, err := fd.Impl().Seek(ctx, size, linux.SEEK_SET); n != size || err != nil {
+				t.Errorf("expected seek position %d, got %d and error %v", size, n, err)
+			}
+
+			// EINVAL should be returned if the resulting offset is negative.
+			if _, err := fd.Impl().Seek(ctx, -1, linux.SEEK_SET); err != syserror.EINVAL {
+				t.Errorf("expected error EINVAL but got %v", err)
+			}
+
+			if n, err := fd.Impl().Seek(ctx, 3, linux.SEEK_CUR); n != size+3 || err != nil {
+				t.Errorf("expected seek position %d, got %d and error %v", size+3, n, err)
+			}
+
+			// Make sure negative offsets work with SEEK_CUR.
+			if n, err := fd.Impl().Seek(ctx, -2, linux.SEEK_CUR); n != size+1 || err != nil {
+				t.Errorf("expected seek position %d, got %d and error %v", size+1, n, err)
+			}
+
+			// EINVAL should be returned if the resulting offset is negative.
+			if _, err := fd.Impl().Seek(ctx, -(size + 2), linux.SEEK_CUR); err != syserror.EINVAL {
+				t.Errorf("expected error EINVAL but got %v", err)
+			}
+
+			// Make sure SEEK_END works with regular files.
+			switch fd.Impl().(type) {
+			case *regularFileFD:
+				// Seek back to 0.
+				if n, err := fd.Impl().Seek(ctx, -size, linux.SEEK_END); n != 0 || err != nil {
+					t.Errorf("expected seek position %d, got %d and error %v", 0, n, err)
+				}
+
+				// Seek forward beyond EOF.
+				if n, err := fd.Impl().Seek(ctx, 1, linux.SEEK_END); n != size+1 || err != nil {
+					t.Errorf("expected seek position %d, got %d and error %v", size+1, n, err)
+				}
+
+				// EINVAL should be returned if the resulting offset is negative.
+				if _, err := fd.Impl().Seek(ctx, -(size + 1), linux.SEEK_END); err != syserror.EINVAL {
+					t.Errorf("expected error EINVAL but got %v", err)
+				}
+			}
+		})
+	}
+}
+
+// TestStatAt tests filesystem.StatAt functionality.
+func TestStatAt(t *testing.T) {
+	type statAtTest struct {
+		name  string
+		image string
+		path  string
+		want  linux.Statx
+	}
+
+	tests := []statAtTest{
+		{
+			name:  "ext4 statx small file",
+			image: ext4ImagePath,
+			path:  "/file.txt",
+			want: linux.Statx{
+				Blksize: 0x400,
+				Nlink:   1,
+				UID:     0,
+				GID:     0,
+				Mode:    0644 | linux.ModeRegular,
+				Size:    13,
+			},
+		},
+		{
+			name:  "ext3 statx small file",
+			image: ext3ImagePath,
+			path:  "/file.txt",
+			want: linux.Statx{
+				Blksize: 0x400,
+				Nlink:   1,
+				UID:     0,
+				GID:     0,
+				Mode:    0644 | linux.ModeRegular,
+				Size:    13,
+			},
+		},
+		{
+			name:  "ext2 statx small file",
+			image: ext2ImagePath,
+			path:  "/file.txt",
+			want: linux.Statx{
+				Blksize: 0x400,
+				Nlink:   1,
+				UID:     0,
+				GID:     0,
+				Mode:    0644 | linux.ModeRegular,
+				Size:    13,
+			},
+		},
+		{
+			name:  "ext4 statx big file",
+			image: ext4ImagePath,
+			path:  "/bigfile.txt",
+			want: linux.Statx{
+				Blksize: 0x400,
+				Nlink:   1,
+				UID:     0,
+				GID:     0,
+				Mode:    0644 | linux.ModeRegular,
+				Size:    13042,
+			},
+		},
+		{
+			name:  "ext3 statx big file",
+			image: ext3ImagePath,
+			path:  "/bigfile.txt",
+			want: linux.Statx{
+				Blksize: 0x400,
+				Nlink:   1,
+				UID:     0,
+				GID:     0,
+				Mode:    0644 | linux.ModeRegular,
+				Size:    13042,
+			},
+		},
+		{
+			name:  "ext2 statx big file",
+			image: ext2ImagePath,
+			path:  "/bigfile.txt",
+			want: linux.Statx{
+				Blksize: 0x400,
+				Nlink:   1,
+				UID:     0,
+				GID:     0,
+				Mode:    0644 | linux.ModeRegular,
+				Size:    13042,
+			},
+		},
+		{
+			name:  "ext4 statx symlink file",
+			image: ext4ImagePath,
+			path:  "/symlink.txt",
+			want: linux.Statx{
+				Blksize: 0x400,
+				Nlink:   1,
+				UID:     0,
+				GID:     0,
+				Mode:    0777 | linux.ModeSymlink,
+				Size:    8,
+			},
+		},
+		{
+			name:  "ext3 statx symlink file",
+			image: ext3ImagePath,
+			path:  "/symlink.txt",
+			want: linux.Statx{
+				Blksize: 0x400,
+				Nlink:   1,
+				UID:     0,
+				GID:     0,
+				Mode:    0777 | linux.ModeSymlink,
+				Size:    8,
+			},
+		},
+		{
+			name:  "ext2 statx symlink file",
+			image: ext2ImagePath,
+			path:  "/symlink.txt",
+			want: linux.Statx{
+				Blksize: 0x400,
+				Nlink:   1,
+				UID:     0,
+				GID:     0,
+				Mode:    0777 | linux.ModeSymlink,
+				Size:    8,
+			},
+		},
+	}
+
+	// Ignore the fields that are not supported by filesystem.StatAt yet and
+	// those which are likely to change as the image does.
+	ignoredFields := map[string]bool{
+		"Attributes":     true,
+		"AttributesMask": true,
+		"Atime":          true,
+		"Blocks":         true,
+		"Btime":          true,
+		"Ctime":          true,
+		"DevMajor":       true,
+		"DevMinor":       true,
+		"Ino":            true,
+		"Mask":           true,
+		"Mtime":          true,
+		"RdevMajor":      true,
+		"RdevMinor":      true,
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ctx, vfsfs, root, tearDown, err := setUp(t, test.image)
+			if err != nil {
+				t.Fatalf("setUp failed: %v", err)
+			}
+			defer tearDown()
+
+			got, err := vfsfs.StatAt(ctx,
+				auth.CredentialsFromContext(ctx),
+				&vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path},
+				&vfs.StatOptions{},
+			)
+			if err != nil {
+				t.Fatalf("vfsfs.StatAt failed for file %s in image %s: %v", test.path, test.image, err)
+			}
+
+			cmpIgnoreFields := cmp.FilterPath(func(p cmp.Path) bool {
+				_, ok := ignoredFields[p.String()]
+				return ok
+			}, cmp.Ignore())
+			if diff := cmp.Diff(got, test.want, cmpIgnoreFields, cmpopts.IgnoreUnexported(linux.Statx{})); diff != "" {
+				t.Errorf("stat mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+// TestRead tests the read functionality for vfs file descriptions.
+func TestRead(t *testing.T) {
+	type readTest struct {
+		name    string
+		image   string
+		absPath string
+	}
+
+	tests := []readTest{
+		{
+			name:    "ext4 read small file",
+			image:   ext4ImagePath,
+			absPath: "/file.txt",
+		},
+		{
+			name:    "ext3 read small file",
+			image:   ext3ImagePath,
+			absPath: "/file.txt",
+		},
+		{
+			name:    "ext2 read small file",
+			image:   ext2ImagePath,
+			absPath: "/file.txt",
+		},
+		{
+			name:    "ext4 read big file",
+			image:   ext4ImagePath,
+			absPath: "/bigfile.txt",
+		},
+		{
+			name:    "ext3 read big file",
+			image:   ext3ImagePath,
+			absPath: "/bigfile.txt",
+		},
+		{
+			name:    "ext2 read big file",
+			image:   ext2ImagePath,
+			absPath: "/bigfile.txt",
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ctx, vfsfs, root, tearDown, err := setUp(t, test.image)
+			if err != nil {
+				t.Fatalf("setUp failed: %v", err)
+			}
+			defer tearDown()
+
+			fd, err := vfsfs.OpenAt(
+				ctx,
+				auth.CredentialsFromContext(ctx),
+				&vfs.PathOperation{Root: *root, Start: *root, Pathname: test.absPath},
+				&vfs.OpenOptions{},
+			)
+			if err != nil {
+				t.Fatalf("vfsfs.OpenAt failed: %v", err)
+			}
+
+			// Get a local file descriptor and compare its functionality with a vfs file
+			// description for the same file.
+			localFile, err := testutil.FindFile(path.Join(assetsDir, test.absPath))
+			if err != nil {
+				t.Fatalf("testutil.FindFile failed for %s: %v", test.absPath, err)
+			}
+
+			f, err := os.Open(localFile)
+			if err != nil {
+				t.Fatalf("os.Open failed for %s: %v", localFile, err)
+			}
+			defer f.Close()
+
+			// Read the entire file by reading one byte repeatedly. Doing this stress
+			// tests the underlying file reader implementation.
+			got := make([]byte, 1)
+			want := make([]byte, 1)
+			for {
+				n, err := f.Read(want)
+				fd.Impl().Read(ctx, usermem.BytesIOSequence(got), vfs.ReadOptions{})
+
+				if diff := cmp.Diff(got, want); diff != "" {
+					t.Errorf("file data mismatch (-want +got):\n%s", diff)
+				}
+
+				// Make sure there is no more file data left after getting EOF.
+				if n == 0 || err == io.EOF {
+					if n, _ := fd.Impl().Read(ctx, usermem.BytesIOSequence(got), vfs.ReadOptions{}); n != 0 {
+						t.Errorf("extra unexpected file data in file %s in image %s", test.absPath, test.image)
+					}
+
+					break
+				}
+
+				if err != nil {
+					t.Fatalf("read failed: %v", err)
+				}
+			}
+		})
+	}
+}
+
+// iterDirentsCb is a simple callback which just keeps adding the dirents to an
+// internal list. Implements vfs.IterDirentsCallback.
+type iterDirentsCb struct {
+	dirents []vfs.Dirent
+}
+
+// Compiles only if iterDirentCb implements vfs.IterDirentsCallback.
+var _ vfs.IterDirentsCallback = (*iterDirentsCb)(nil)
+
+// newIterDirentsCb is the iterDirent
+func newIterDirentCb() *iterDirentsCb {
+	return &iterDirentsCb{dirents: make([]vfs.Dirent, 0)}
+}
+
+// Handle implements vfs.IterDirentsCallback.Handle.
+func (cb *iterDirentsCb) Handle(dirent vfs.Dirent) bool {
+	cb.dirents = append(cb.dirents, dirent)
+	return true
+}
+
+// TestIterDirents tests the FileDescriptionImpl.IterDirents functionality.
+func TestIterDirents(t *testing.T) {
+	type iterDirentTest struct {
+		name  string
+		image string
+		path  string
+		want  []vfs.Dirent
+	}
+
+	wantDirents := []vfs.Dirent{
+		vfs.Dirent{
+			Name: ".",
+			Type: linux.DT_DIR,
+		},
+		vfs.Dirent{
+			Name: "..",
+			Type: linux.DT_DIR,
+		},
+		vfs.Dirent{
+			Name: "lost+found",
+			Type: linux.DT_DIR,
+		},
+		vfs.Dirent{
+			Name: "file.txt",
+			Type: linux.DT_REG,
+		},
+		vfs.Dirent{
+			Name: "bigfile.txt",
+			Type: linux.DT_REG,
+		},
+		vfs.Dirent{
+			Name: "symlink.txt",
+			Type: linux.DT_LNK,
+		},
+	}
+	tests := []iterDirentTest{
+		{
+			name:  "ext4 root dir iteration",
+			image: ext4ImagePath,
+			path:  "/",
+			want:  wantDirents,
+		},
+		{
+			name:  "ext3 root dir iteration",
+			image: ext3ImagePath,
+			path:  "/",
+			want:  wantDirents,
+		},
+		{
+			name:  "ext2 root dir iteration",
+			image: ext2ImagePath,
+			path:  "/",
+			want:  wantDirents,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ctx, vfsfs, root, tearDown, err := setUp(t, test.image)
+			if err != nil {
+				t.Fatalf("setUp failed: %v", err)
+			}
+			defer tearDown()
+
+			fd, err := vfsfs.OpenAt(
+				ctx,
+				auth.CredentialsFromContext(ctx),
+				&vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path},
+				&vfs.OpenOptions{},
+			)
+			if err != nil {
+				t.Fatalf("vfsfs.OpenAt failed: %v", err)
+			}
+
+			cb := &iterDirentsCb{}
+			if err = fd.Impl().IterDirents(ctx, cb); err != nil {
+				t.Fatalf("dir fd.IterDirents() failed: %v", err)
+			}
+
+			sort.Slice(cb.dirents, func(i int, j int) bool { return cb.dirents[i].Name < cb.dirents[j].Name })
+			sort.Slice(test.want, func(i int, j int) bool { return test.want[i].Name < test.want[j].Name })
+
+			// Ignore the inode number and offset of dirents because those are likely to
+			// change as the underlying image changes.
+			cmpIgnoreFields := cmp.FilterPath(func(p cmp.Path) bool {
+				return p.String() == "Ino" || p.String() == "Off"
+			}, cmp.Ignore())
+			if diff := cmp.Diff(cb.dirents, test.want, cmpIgnoreFields); diff != "" {
+				t.Errorf("dirents mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
 }
 
 // TestRootDir tests that the root directory inode is correctly initialized and
@@ -126,15 +648,15 @@ func TestRootDir(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			_, _, vfsd, tearDown, err := setUp(t, test.image)
+			_, _, vd, tearDown, err := setUp(t, test.image)
 			if err != nil {
 				t.Fatalf("setUp failed: %v", err)
 			}
 			defer tearDown()
 
-			d, ok := vfsd.Impl().(*dentry)
+			d, ok := vd.Dentry().Impl().(*dentry)
 			if !ok {
-				t.Fatalf("ext dentry of incorrect type: %T", vfsd.Impl())
+				t.Fatalf("ext dentry of incorrect type: %T", vd.Dentry().Impl())
 			}
 
 			// Offload inode contents into local structs for comparison.
@@ -329,15 +851,15 @@ func TestFilesystemInit(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			_, vfsfs, _, tearDown, err := setUp(t, test.image)
+			_, _, vd, tearDown, err := setUp(t, test.image)
 			if err != nil {
 				t.Fatalf("setUp failed: %v", err)
 			}
 			defer tearDown()
 
-			fs, ok := vfsfs.Impl().(*filesystem)
+			fs, ok := vd.Mount().Filesystem().Impl().(*filesystem)
 			if !ok {
-				t.Fatalf("ext filesystem of incorrect type: %T", vfsfs.Impl())
+				t.Fatalf("ext filesystem of incorrect type: %T", vd.Mount().Filesystem().Impl())
 			}
 
 			// Offload superblock and block group descriptors contents into
diff --git a/pkg/sentry/fs/ext/extent_file.go b/pkg/sentry/fs/ext/extent_file.go
index 44fb9c01f..1b9bf449b 100644
--- a/pkg/sentry/fs/ext/extent_file.go
+++ b/pkg/sentry/fs/ext/extent_file.go
@@ -150,7 +150,11 @@ func (f *extentFile) ReadAt(dst []byte, off int64) (int, error) {
 		return 0, io.EOF
 	}
 
-	return f.read(&f.root, uint64(off), dst)
+	n, err := f.read(&f.root, uint64(off), dst)
+	if n < len(dst) && err == nil {
+		err = io.EOF
+	}
+	return n, err
 }
 
 // read is the recursive step of extentFile.ReadAt which traverses the extent
diff --git a/pkg/sentry/fs/ext/file_description.go b/pkg/sentry/fs/ext/file_description.go
new file mode 100644
index 000000000..d244cf1e7
--- /dev/null
+++ b/pkg/sentry/fs/ext/file_description.go
@@ -0,0 +1,110 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ext
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/arch"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/waiter"
+)
+
+// fileDescription is embedded by ext implementations of
+// vfs.FileDescriptionImpl.
+type fileDescription struct {
+	vfsfd vfs.FileDescription
+
+	// flags is the same as vfs.OpenOptions.Flags which are passed to
+	// vfs.FilesystemImpl.OpenAt.
+	// TODO(b/134676337): syscalls like read(2), write(2), fchmod(2), fchown(2),
+	// fgetxattr(2), ioctl(2), mmap(2) should fail with EBADF if O_PATH is set.
+	// Only close(2), fstat(2), fstatfs(2) should work.
+	flags uint32
+}
+
+func (fd *fileDescription) filesystem() *filesystem {
+	return fd.vfsfd.VirtualDentry().Mount().Filesystem().Impl().(*filesystem)
+}
+
+func (fd *fileDescription) inode() *inode {
+	return fd.vfsfd.VirtualDentry().Dentry().Impl().(*dentry).inode
+}
+
+// OnClose implements vfs.FileDescriptionImpl.OnClose.
+func (fd *fileDescription) OnClose() error { return nil }
+
+// StatusFlags implements vfs.FileDescriptionImpl.StatusFlags.
+func (fd *fileDescription) StatusFlags(ctx context.Context) (uint32, error) {
+	return fd.flags, nil
+}
+
+// SetStatusFlags implements vfs.FileDescriptionImpl.SetStatusFlags.
+func (fd *fileDescription) SetStatusFlags(ctx context.Context, flags uint32) error {
+	// None of the flags settable by fcntl(F_SETFL) are supported, so this is a
+	// no-op.
+	return nil
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+	var stat linux.Statx
+	fd.inode().statTo(&stat)
+	return stat, nil
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+	if opts.Stat.Mask == 0 {
+		return nil
+	}
+	return syserror.EPERM
+}
+
+// SetStat implements vfs.FileDescriptionImpl.StatFS.
+func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) {
+	var stat linux.Statfs
+	fd.filesystem().statTo(&stat)
+	return stat, nil
+}
+
+// Readiness implements waiter.Waitable.Readiness analogously to
+// file_operations::poll == NULL in Linux.
+func (fd *fileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
+	// include/linux/poll.h:vfs_poll() => DEFAULT_POLLMASK
+	return waiter.EventIn | waiter.EventOut
+}
+
+// EventRegister implements waiter.Waitable.EventRegister analogously to
+// file_operations::poll == NULL in Linux.
+func (fd *fileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) {}
+
+// EventUnregister implements waiter.Waitable.EventUnregister analogously to
+// file_operations::poll == NULL in Linux.
+func (fd *fileDescription) EventUnregister(e *waiter.Entry) {}
+
+// Sync implements vfs.FileDescriptionImpl.Sync.
+func (fd *fileDescription) Sync(ctx context.Context) error {
+	return nil
+}
+
+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
+func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+	// ioctl(2) specifies that ENOTTY must be returned if the file descriptor is
+	// not associated with a character special device (which is unimplemented).
+	return 0, syserror.ENOTTY
+}
diff --git a/pkg/sentry/fs/ext/filesystem.go b/pkg/sentry/fs/ext/filesystem.go
index 45b43b9e2..e08839f48 100644
--- a/pkg/sentry/fs/ext/filesystem.go
+++ b/pkg/sentry/fs/ext/filesystem.go
@@ -15,20 +15,27 @@
 package ext
 
 import (
+	"errors"
 	"io"
 	"sync"
 
+	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/sentry/context"
 	"gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 )
 
+var (
+	// errResolveDirent indicates that the vfs.ResolvingPath.Component() does
+	// not exist on the dentry tree but does exist on disk. So it has to be read in
+	// using the in-memory dirent and added to the dentry tree. Usually indicates
+	// the need to lock filesystem.mu for writing.
+	errResolveDirent = errors.New("resolve path component using dirent")
+)
+
 // filesystem implements vfs.FilesystemImpl.
 type filesystem struct {
-	// TODO(b/134676337): Remove when all methods have been implemented.
-	vfs.FilesystemImpl
-
 	vfsfs vfs.Filesystem
 
 	// mu serializes changes to the Dentry tree.
@@ -44,8 +51,8 @@ type filesystem struct {
 	// inodeCache maps absolute inode numbers to the corresponding Inode struct.
 	// Inodes should be removed from this once their reference count hits 0.
 	//
-	// Protected by mu because every addition and removal from this corresponds to
-	// a change in the dentry tree.
+	// Protected by mu because most additions (see IterDirents) and all removals
+	// from this corresponds to a change in the dentry tree.
 	inodeCache map[uint32]*inode
 
 	// sb represents the filesystem superblock. Immutable after initialization.
@@ -59,16 +66,172 @@ type filesystem struct {
 // Compiles only if filesystem implements vfs.FilesystemImpl.
 var _ vfs.FilesystemImpl = (*filesystem)(nil)
 
-// getOrCreateInode gets the inode corresponding to the inode number passed in.
+// stepLocked resolves rp.Component() in parent directory vfsd. The write
+// parameter passed tells if the caller has acquired filesystem.mu for writing
+// or not. If set to true, an existing inode on disk can be added to the dentry
+// tree if not present already.
+//
+// stepLocked is loosely analogous to fs/namei.c:walk_component().
+//
+// Preconditions:
+//     - filesystem.mu must be locked (for writing if write param is true).
+//     - !rp.Done().
+//     - inode == vfsd.Impl().(*Dentry).inode.
+func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write bool) (*vfs.Dentry, *inode, error) {
+	if !inode.isDir() {
+		return nil, nil, syserror.ENOTDIR
+	}
+	if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+		return nil, nil, err
+	}
+
+	for {
+		nextVFSD, err := rp.ResolveComponent(vfsd)
+		if err != nil {
+			return nil, nil, err
+		}
+		if nextVFSD == nil {
+			// Since the Dentry tree is not the sole source of truth for extfs, if it's
+			// not in the Dentry tree, it might need to be pulled from disk.
+			childDirent, ok := inode.impl.(*directory).childMap[rp.Component()]
+			if !ok {
+				// The underlying inode does not exist on disk.
+				return nil, nil, syserror.ENOENT
+			}
+
+			if !write {
+				// filesystem.mu must be held for writing to add to the dentry tree.
+				return nil, nil, errResolveDirent
+			}
+
+			// Create and add the component's dirent to the dentry tree.
+			fs := rp.Mount().Filesystem().Impl().(*filesystem)
+			childInode, err := fs.getOrCreateInodeLocked(childDirent.diskDirent.Inode())
+			if err != nil {
+				return nil, nil, err
+			}
+			// incRef because this is being added to the dentry tree.
+			childInode.incRef()
+			child := newDentry(childInode)
+			vfsd.InsertChild(&child.vfsd, rp.Component())
+
+			// Continue as usual now that nextVFSD is not nil.
+			nextVFSD = &child.vfsd
+		}
+		nextInode := nextVFSD.Impl().(*dentry).inode
+		if nextInode.isSymlink() && rp.ShouldFollowSymlink() {
+			if err := rp.HandleSymlink(inode.impl.(*symlink).target); err != nil {
+				return nil, nil, err
+			}
+			continue
+		}
+		rp.Advance()
+		return nextVFSD, nextInode, nil
+	}
+}
+
+// walkLocked resolves rp to an existing file. The write parameter
+// passed tells if the caller has acquired filesystem.mu for writing or not.
+// If set to true, additions can be made to the dentry tree while walking.
+// If errResolveDirent is returned, the walk needs to be continued with an
+// upgraded filesystem.mu.
+//
+// walkLocked is loosely analogous to Linux's fs/namei.c:path_lookupat().
+//
+// Preconditions:
+//     - filesystem.mu must be locked (for writing if write param is true).
+func walkLocked(rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) {
+	vfsd := rp.Start()
+	inode := vfsd.Impl().(*dentry).inode
+	for !rp.Done() {
+		var err error
+		vfsd, inode, err = stepLocked(rp, vfsd, inode, write)
+		if err != nil {
+			return nil, nil, err
+		}
+	}
+	if rp.MustBeDir() && !inode.isDir() {
+		return nil, nil, syserror.ENOTDIR
+	}
+	return vfsd, inode, nil
+}
+
+// walkParentLocked resolves all but the last path component of rp to an
+// existing directory. It does not check that the returned directory is
+// searchable by the provider of rp. The write parameter passed tells if the
+// caller has acquired filesystem.mu for writing or not. If set to true,
+// additions can be made to the dentry tree while walking.
+// If errResolveDirent is returned, the walk needs to be continued with an
+// upgraded filesystem.mu.
+//
+// walkParentLocked is loosely analogous to Linux's fs/namei.c:path_parentat().
+//
+// Preconditions:
+//     - filesystem.mu must be locked (for writing if write param is true).
+//     - !rp.Done().
+func walkParentLocked(rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) {
+	vfsd := rp.Start()
+	inode := vfsd.Impl().(*dentry).inode
+	for !rp.Final() {
+		var err error
+		vfsd, inode, err = stepLocked(rp, vfsd, inode, write)
+		if err != nil {
+			return nil, nil, err
+		}
+	}
+	if !inode.isDir() {
+		return nil, nil, syserror.ENOTDIR
+	}
+	return vfsd, inode, nil
+}
+
+// walk resolves rp to an existing file. If parent is set to true, it resolves
+// the rp till the parent of the last component which should be an existing
+// directory. If parent is false then resolves rp entirely. Attemps to resolve
+// the path as far as it can with a read lock and upgrades the lock if needed.
+func (fs *filesystem) walk(rp *vfs.ResolvingPath, parent bool) (*vfs.Dentry, *inode, error) {
+	var (
+		vfsd  *vfs.Dentry
+		inode *inode
+		err   error
+	)
+
+	// Try walking with the hopes that all dentries have already been pulled out
+	// of disk. This reduces congestion (allows concurrent walks).
+	fs.mu.RLock()
+	if parent {
+		vfsd, inode, err = walkParentLocked(rp, false)
+	} else {
+		vfsd, inode, err = walkLocked(rp, false)
+	}
+	fs.mu.RUnlock()
+
+	if err == errResolveDirent {
+		// Upgrade lock and continue walking. Lock upgrading in the middle of the
+		// walk is fine as this is a read only filesystem.
+		fs.mu.Lock()
+		if parent {
+			vfsd, inode, err = walkParentLocked(rp, true)
+		} else {
+			vfsd, inode, err = walkLocked(rp, true)
+		}
+		fs.mu.Unlock()
+	}
+
+	return vfsd, inode, err
+}
+
+// getOrCreateInodeLocked gets the inode corresponding to the inode number passed in.
 // It creates a new one with the given inode number if one does not exist.
+// The caller must increment the ref count if adding this to the dentry tree.
 //
-// Precondition: must be holding fs.mu.
-func (fs *filesystem) getOrCreateInode(ctx context.Context, inodeNum uint32) (*inode, error) {
+// Precondition: must be holding fs.mu for writing.
+func (fs *filesystem) getOrCreateInodeLocked(inodeNum uint32) (*inode, error) {
 	if in, ok := fs.inodeCache[inodeNum]; ok {
 		return in, nil
 	}
 
-	in, err := newInode(ctx, fs, inodeNum)
+	in, err := newInode(fs, inodeNum)
 	if err != nil {
 		return nil, err
 	}
@@ -77,10 +240,92 @@ func (fs *filesystem) getOrCreateInode(ctx context.Context, inodeNum uint32) (*i
 	return in, nil
 }
 
-// Release implements vfs.FilesystemImpl.Release.
-func (fs *filesystem) Release() {
+// statTo writes the statfs fields to the output parameter.
+func (fs *filesystem) statTo(stat *linux.Statfs) {
+	stat.Type = uint64(fs.sb.Magic())
+	stat.BlockSize = int64(fs.sb.BlockSize())
+	stat.Blocks = fs.sb.BlocksCount()
+	stat.BlocksFree = fs.sb.FreeBlocksCount()
+	stat.BlocksAvailable = fs.sb.FreeBlocksCount()
+	stat.Files = uint64(fs.sb.InodesCount())
+	stat.FilesFree = uint64(fs.sb.FreeInodesCount())
+	stat.NameLength = disklayout.MaxFileName
+	stat.FragmentSize = int64(fs.sb.BlockSize())
+	// TODO(b/134676337): Set Statfs.Flags and Statfs.FSID.
+}
+
+// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
+func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
+	vfsd, inode, err := fs.walk(rp, false)
+	if err != nil {
+		return nil, err
+	}
+
+	if opts.CheckSearchable {
+		if !inode.isDir() {
+			return nil, syserror.ENOTDIR
+		}
+		if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
+			return nil, err
+		}
+	}
+
+	inode.incRef()
+	return vfsd, nil
+}
+
+// OpenAt implements vfs.FilesystemImpl.OpenAt.
+func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+	vfsd, inode, err := fs.walk(rp, false)
+	if err != nil {
+		return nil, err
+	}
+
+	// EROFS is returned if write access is needed.
+	if vfs.MayWriteFileWithOpenFlags(opts.Flags) || opts.Flags&(linux.O_CREAT|linux.O_EXCL|linux.O_TMPFILE) != 0 {
+		return nil, syserror.EROFS
+	}
+	return inode.open(rp, vfsd, opts.Flags)
+}
+
+// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
+func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
+	_, inode, err := fs.walk(rp, false)
+	if err != nil {
+		return "", err
+	}
+	symlink, ok := inode.impl.(*symlink)
+	if !ok {
+		return "", syserror.EINVAL
+	}
+	return symlink.target, nil
+}
+
+// StatAt implements vfs.FilesystemImpl.StatAt.
+func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
+	_, inode, err := fs.walk(rp, false)
+	if err != nil {
+		return linux.Statx{}, err
+	}
+	var stat linux.Statx
+	inode.statTo(&stat)
+	return stat, nil
 }
 
+// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
+func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
+	if _, _, err := fs.walk(rp, false); err != nil {
+		return linux.Statfs{}, err
+	}
+
+	var stat linux.Statfs
+	fs.statTo(&stat)
+	return stat, nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {}
+
 // Sync implements vfs.FilesystemImpl.Sync.
 func (fs *filesystem) Sync(ctx context.Context) error {
 	// This is a readonly filesystem for now.
@@ -89,42 +334,110 @@ func (fs *filesystem) Sync(ctx context.Context) error {
 
 // The vfs.FilesystemImpl functions below return EROFS because their respective
 // man pages say that EROFS must be returned if the path resolves to a file on
-// a read-only filesystem.
+// this read-only filesystem.
 
-// TODO(b/134676337): Implement path traversal and return EROFS only if the
-// path resolves to a Dentry within ext fs.
+// LinkAt implements vfs.FilesystemImpl.LinkAt.
+func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
+	if rp.Done() {
+		return syserror.EEXIST
+	}
+
+	if _, _, err := fs.walk(rp, true); err != nil {
+		return err
+	}
+
+	return syserror.EROFS
+}
 
 // MkdirAt implements vfs.FilesystemImpl.MkdirAt.
 func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
+	if rp.Done() {
+		return syserror.EEXIST
+	}
+
+	if _, _, err := fs.walk(rp, true); err != nil {
+		return err
+	}
+
 	return syserror.EROFS
 }
 
 // MknodAt implements vfs.FilesystemImpl.MknodAt.
 func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
+	if rp.Done() {
+		return syserror.EEXIST
+	}
+
+	_, _, err := fs.walk(rp, true)
+	if err != nil {
+		return err
+	}
+
 	return syserror.EROFS
 }
 
 // RenameAt implements vfs.FilesystemImpl.RenameAt.
 func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error {
+	if rp.Done() {
+		return syserror.ENOENT
+	}
+
+	_, _, err := fs.walk(rp, false)
+	if err != nil {
+		return err
+	}
+
 	return syserror.EROFS
 }
 
 // RmdirAt implements vfs.FilesystemImpl.RmdirAt.
 func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+	_, inode, err := fs.walk(rp, false)
+	if err != nil {
+		return err
+	}
+
+	if !inode.isDir() {
+		return syserror.ENOTDIR
+	}
+
 	return syserror.EROFS
 }
 
 // SetStatAt implements vfs.FilesystemImpl.SetStatAt.
 func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
+	_, _, err := fs.walk(rp, false)
+	if err != nil {
+		return err
+	}
+
 	return syserror.EROFS
 }
 
 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
 func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
+	if rp.Done() {
+		return syserror.EEXIST
+	}
+
+	_, _, err := fs.walk(rp, true)
+	if err != nil {
+		return err
+	}
+
 	return syserror.EROFS
 }
 
 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
 func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+	_, inode, err := fs.walk(rp, false)
+	if err != nil {
+		return err
+	}
+
+	if inode.isDir() {
+		return syserror.EISDIR
+	}
+
 	return syserror.EROFS
 }
diff --git a/pkg/sentry/fs/ext/inline_file.go b/pkg/sentry/fs/ext/inline_file.go
deleted file mode 100644
index 67a538ba0..000000000
--- a/pkg/sentry/fs/ext/inline_file.go
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
-	"io"
-)
-
-// inlineFile is a type of regular file. All the data here is stored in the
-// inode.Data() array.
-type inlineFile struct {
-	regFile regularFile
-}
-
-// Compiles only if inlineFile implements io.ReaderAt.
-var _ io.ReaderAt = (*inlineFile)(nil)
-
-// newInlineFile is the inlineFile constructor.
-func newInlineFile(regFile regularFile) *inlineFile {
-	file := &inlineFile{regFile: regFile}
-	file.regFile.impl = file
-	return file
-}
-
-// ReadAt implements io.ReaderAt.ReadAt.
-func (f *inlineFile) ReadAt(dst []byte, off int64) (int, error) {
-	if len(dst) == 0 {
-		return 0, nil
-	}
-
-	size := f.regFile.inode.diskInode.Size()
-	if uint64(off) >= size {
-		return 0, io.EOF
-	}
-
-	to := uint64(off) + uint64(len(dst))
-	if to > size {
-		to = size
-	}
-
-	n := copy(dst, f.regFile.inode.diskInode.Data()[off:to])
-	return n, nil
-}
diff --git a/pkg/sentry/fs/ext/inode.go b/pkg/sentry/fs/ext/inode.go
index 364980e4c..178bd6376 100644
--- a/pkg/sentry/fs/ext/inode.go
+++ b/pkg/sentry/fs/ext/inode.go
@@ -15,12 +15,14 @@
 package ext
 
 import (
+	"fmt"
 	"io"
 	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/sentry/context"
 	"gvisor.dev/gvisor/pkg/sentry/fs/ext/disklayout"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 )
 
@@ -31,13 +33,11 @@ import (
 //
 // Implementations:
 //    inode --
-//           |-- pipe
 //           |-- dir
 //           |-- symlink
 //           |-- regular--
 //                       |-- extent file
 //                       |-- block map file
-//                       |-- inline file
 type inode struct {
 	// refs is a reference count. refs is accessed using atomic memory operations.
 	refs int64
@@ -92,7 +92,7 @@ func (in *inode) decRef(fs *filesystem) {
 
 // newInode is the inode constructor. Reads the inode off disk. Identifies
 // inodes based on the absolute inode number on disk.
-func newInode(ctx context.Context, fs *filesystem, inodeNum uint32) (*inode, error) {
+func newInode(fs *filesystem, inodeNum uint32) (*inode, error) {
 	if inodeNum == 0 {
 		panic("inode number 0 on ext filesystems is not possible")
 	}
@@ -117,7 +117,6 @@ func newInode(ctx context.Context, fs *filesystem, inodeNum uint32) (*inode, err
 
 	// Build the inode based on its type.
 	inode := inode{
-		refs:      1,
 		inodeNum:  inodeNum,
 		dev:       fs.dev,
 		blkSize:   blkSize,
@@ -138,15 +137,76 @@ func newInode(ctx context.Context, fs *filesystem, inodeNum uint32) (*inode, err
 		}
 		return &f.inode, nil
 	case linux.ModeDirectory:
-		return &newDirectroy(inode).inode, nil
-	case linux.ModeNamedPipe:
-		return &newNamedPipe(ctx, inode).inode, nil
+		f, err := newDirectroy(inode, fs.sb.IncompatibleFeatures().DirentFileType)
+		if err != nil {
+			return nil, err
+		}
+		return &f.inode, nil
 	default:
-		// TODO(b/134676337): Return appropriate errors for sockets and devices.
+		// TODO(b/134676337): Return appropriate errors for sockets, pipes and devices.
 		return nil, syserror.EINVAL
 	}
 }
 
+// open creates and returns a file description for the dentry passed in.
+func (in *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) {
+	ats := vfs.AccessTypesForOpenFlags(flags)
+	if err := in.checkPermissions(rp.Credentials(), ats); err != nil {
+		return nil, err
+	}
+	switch in.impl.(type) {
+	case *regularFile:
+		var fd regularFileFD
+		fd.flags = flags
+		fd.vfsfd.Init(&fd, rp.Mount(), vfsd)
+		return &fd.vfsfd, nil
+	case *directory:
+		// Can't open directories writably. This check is not necessary for a read
+		// only filesystem but will be required when write is implemented.
+		if ats&vfs.MayWrite != 0 {
+			return nil, syserror.EISDIR
+		}
+		var fd directoryFD
+		fd.vfsfd.Init(&fd, rp.Mount(), vfsd)
+		fd.flags = flags
+		return &fd.vfsfd, nil
+	case *symlink:
+		if flags&linux.O_PATH == 0 {
+			// Can't open symlinks without O_PATH.
+			return nil, syserror.ELOOP
+		}
+		var fd symlinkFD
+		fd.flags = flags
+		fd.vfsfd.Init(&fd, rp.Mount(), vfsd)
+		return &fd.vfsfd, nil
+	default:
+		panic(fmt.Sprintf("unknown inode type: %T", in.impl))
+	}
+}
+
+func (in *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
+	return vfs.GenericCheckPermissions(creds, ats, in.isDir(), uint16(in.diskInode.Mode()), in.diskInode.UID(), in.diskInode.GID())
+}
+
+// statTo writes the statx fields to the output parameter.
+func (in *inode) statTo(stat *linux.Statx) {
+	stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK |
+		linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_SIZE |
+		linux.STATX_ATIME | linux.STATX_CTIME | linux.STATX_MTIME
+	stat.Blksize = uint32(in.blkSize)
+	stat.Mode = uint16(in.diskInode.Mode())
+	stat.Nlink = uint32(in.diskInode.LinksCount())
+	stat.UID = uint32(in.diskInode.UID())
+	stat.GID = uint32(in.diskInode.GID())
+	stat.Ino = uint64(in.inodeNum)
+	stat.Size = in.diskInode.Size()
+	stat.Atime = in.diskInode.AccessTime().StatxTimestamp()
+	stat.Ctime = in.diskInode.ChangeTime().StatxTimestamp()
+	stat.Mtime = in.diskInode.ModificationTime().StatxTimestamp()
+	// TODO(b/134676337): Set stat.Blocks which is the number of 512 byte blocks
+	// (including metadata blocks) required to represent this file.
+}
+
 // getBGNum returns the block group number that a given inode belongs to.
 func getBGNum(inodeNum uint32, inodesPerGrp uint32) uint32 {
 	return (inodeNum - 1) / inodesPerGrp
diff --git a/pkg/sentry/fs/ext/named_pipe.go b/pkg/sentry/fs/ext/named_pipe.go
deleted file mode 100644
index 0f3af1b53..000000000
--- a/pkg/sentry/fs/ext/named_pipe.go
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ext
-
-import (
-	"gvisor.dev/gvisor/pkg/sentry/context"
-	"gvisor.dev/gvisor/pkg/sentry/fs"
-	"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
-	"gvisor.dev/gvisor/pkg/sentry/usermem"
-)
-
-// namedPipe represents a named pipe inode. It is currently just a wrapper
-// around pkg/sentry/kernel/pipe.
-type namedPipe struct {
-	inode inode
-
-	p        *pipe.Pipe
-	inodeOps fs.InodeOperations
-}
-
-// newNamedPipe is the namedPipe constructor.
-func newNamedPipe(ctx context.Context, inode inode) *namedPipe {
-	file := &namedPipe{inode: inode}
-	file.inode.impl = file
-	file.p = pipe.NewPipe(ctx, true /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize)
-	file.inodeOps = pipe.NewInodeOperations(ctx, fs.FilePermsFromMode(file.inode.diskInode.Mode()), file.p)
-	return file
-}
diff --git a/pkg/sentry/fs/ext/regular_file.go b/pkg/sentry/fs/ext/regular_file.go
index fb1bd38ef..ffc76ba5b 100644
--- a/pkg/sentry/fs/ext/regular_file.go
+++ b/pkg/sentry/fs/ext/regular_file.go
@@ -16,6 +16,15 @@ package ext
 
 import (
 	"io"
+	"sync"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
+	"gvisor.dev/gvisor/pkg/sentry/safemem"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // regularFile represents a regular file's inode. This too follows the
@@ -26,6 +35,9 @@ type regularFile struct {
 
 	// This is immutable. The first field of fileReader implementations must be
 	// regularFile to ensure temporality.
+	// io.ReaderAt is more strict than io.Reader in the sense that a partial read
+	// is always accompanied by an error. If a read spans past the end of file, a
+	// partial read (within file range) is done and io.EOF is returned.
 	impl io.ReaderAt
 }
 
@@ -48,16 +60,6 @@ func newRegularFile(inode inode) (*regularFile, error) {
 		return &file.regFile, nil
 	}
 
-	if inodeFlags.Inline {
-		if inode.diskInode.Size() > 60 {
-			panic("ext fs: inline file larger than 60 bytes")
-		}
-
-		file := newInlineFile(regFile)
-		file.regFile.inode.impl = &file.regFile
-		return &file.regFile, nil
-	}
-
 	file, err := newBlockMapFile(regFile)
 	if err != nil {
 		return nil, err
@@ -66,6 +68,92 @@ func newRegularFile(inode inode) (*regularFile, error) {
 	return &file.regFile, nil
 }
 
-func (f *regularFile) blksUsed(blkSize uint64) uint64 {
-	return (f.inode.diskInode.Size() + blkSize - 1) / blkSize
+func (in *inode) isRegular() bool {
+	_, ok := in.impl.(*regularFile)
+	return ok
+}
+
+// directoryFD represents a directory file description. It implements
+// vfs.FileDescriptionImpl.
+type regularFileFD struct {
+	fileDescription
+
+	// off is the file offset. off is accessed using atomic memory operations.
+	off int64
+
+	// offMu serializes operations that may mutate off.
+	offMu sync.Mutex
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *regularFileFD) Release() {}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+	safeReader := safemem.FromIOReaderAt{
+		ReaderAt: fd.inode().impl.(*regularFile).impl,
+		Offset:   offset,
+	}
+
+	// Copies data from disk directly into usermem without any intermediate
+	// allocations (if dst is converted into BlockSeq such that it does not need
+	// safe copying).
+	return dst.CopyOutFrom(ctx, safeReader)
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+	n, err := fd.PRead(ctx, dst, fd.off, opts)
+	fd.offMu.Lock()
+	fd.off += n
+	fd.offMu.Unlock()
+	return n, err
+}
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	// write(2) specifies that EBADF must be returned if the fd is not open for
+	// writing.
+	return 0, syserror.EBADF
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+	n, err := fd.PWrite(ctx, src, fd.off, opts)
+	fd.offMu.Lock()
+	fd.off += n
+	fd.offMu.Unlock()
+	return n, err
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *regularFileFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
+	return syserror.ENOTDIR
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+	fd.offMu.Lock()
+	defer fd.offMu.Unlock()
+	switch whence {
+	case linux.SEEK_SET:
+		// Use offset as specified.
+	case linux.SEEK_CUR:
+		offset += fd.off
+	case linux.SEEK_END:
+		offset += int64(fd.inode().diskInode.Size())
+	default:
+		return 0, syserror.EINVAL
+	}
+	if offset < 0 {
+		return 0, syserror.EINVAL
+	}
+	fd.off = offset
+	return offset, nil
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts memmap.MMapOpts) error {
+	// TODO(b/134676337): Implement mmap(2).
+	return syserror.ENODEV
 }
diff --git a/pkg/sentry/fs/ext/symlink.go b/pkg/sentry/fs/ext/symlink.go
index 9f498d989..e06548a98 100644
--- a/pkg/sentry/fs/ext/symlink.go
+++ b/pkg/sentry/fs/ext/symlink.go
@@ -15,6 +15,10 @@
 package ext
 
 import (
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 )
 
@@ -43,8 +47,8 @@ func newSymlink(inode inode) (*symlink, error) {
 		}
 
 		link = make([]byte, size)
-		if n, _ := regFile.impl.ReadAt(link, 0); uint64(n) < size {
-			return nil, syserror.EIO
+		if n, err := regFile.impl.ReadAt(link, 0); uint64(n) < size {
+			return nil, err
 		}
 	}
 
@@ -52,3 +56,56 @@ func newSymlink(inode inode) (*symlink, error) {
 	file.inode.impl = file
 	return file, nil
 }
+
+func (in *inode) isSymlink() bool {
+	_, ok := in.impl.(*symlink)
+	return ok
+}
+
+// symlinkFD represents a symlink file description and implements implements
+// vfs.FileDescriptionImpl. which may only be used if open options contains
+// O_PATH. For this reason most of the functions return EBADF.
+type symlinkFD struct {
+	fileDescription
+}
+
+// Compiles only if symlinkFD implements vfs.FileDescriptionImpl.
+var _ vfs.FileDescriptionImpl = (*symlinkFD)(nil)
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *symlinkFD) Release() {}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *symlinkFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+	return 0, syserror.EBADF
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *symlinkFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+	return 0, syserror.EBADF
+}
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *symlinkFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+	return 0, syserror.EBADF
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *symlinkFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+	return 0, syserror.EBADF
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *symlinkFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
+	return syserror.ENOTDIR
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *symlinkFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+	return 0, syserror.EBADF
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *symlinkFD) ConfigureMMap(ctx context.Context, opts memmap.MMapOpts) error {
+	return syserror.EBADF
+}
diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD
index d5d4f68df..d2450e810 100644
--- a/pkg/sentry/fsimpl/memfs/BUILD
+++ b/pkg/sentry/fsimpl/memfs/BUILD
@@ -11,8 +11,8 @@ go_template_instance(
     prefix = "dentry",
     template = "//pkg/ilist:generic_list",
     types = {
-        "Element": "*Dentry",
-        "Linker": "*Dentry",
+        "Element": "*dentry",
+        "Linker": "*dentry",
     },
 )
 
diff --git a/pkg/sentry/fsimpl/memfs/directory.go b/pkg/sentry/fsimpl/memfs/directory.go
index b0c3ea39a..c52dc781c 100644
--- a/pkg/sentry/fsimpl/memfs/directory.go
+++ b/pkg/sentry/fsimpl/memfs/directory.go
@@ -23,23 +23,23 @@ import (
 )
 
 type directory struct {
-	inode Inode
+	inode inode
 
 	// childList is a list containing (1) child Dentries and (2) fake Dentries
 	// (with inode == nil) that represent the iteration position of
 	// directoryFDs. childList is used to support directoryFD.IterDirents()
-	// efficiently. childList is protected by Filesystem.mu.
+	// efficiently. childList is protected by filesystem.mu.
 	childList dentryList
 }
 
-func (fs *Filesystem) newDirectory(creds *auth.Credentials, mode uint16) *Inode {
+func (fs *filesystem) newDirectory(creds *auth.Credentials, mode uint16) *inode {
 	dir := &directory{}
 	dir.inode.init(dir, fs, creds, mode)
 	dir.inode.nlink = 2 // from "." and parent directory or ".." for root
 	return &dir.inode
 }
 
-func (i *Inode) isDir() bool {
+func (i *inode) isDir() bool {
 	_, ok := i.impl.(*directory)
 	return ok
 }
@@ -48,8 +48,8 @@ type directoryFD struct {
 	fileDescription
 	vfs.DirectoryFileDescriptionDefaultImpl
 
-	// Protected by Filesystem.mu.
-	iter *Dentry
+	// Protected by filesystem.mu.
+	iter *dentry
 	off  int64
 }
 
@@ -68,7 +68,7 @@ func (fd *directoryFD) Release() {
 // IterDirents implements vfs.FileDescriptionImpl.IterDirents.
 func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
 	fs := fd.filesystem()
-	d := fd.vfsfd.VirtualDentry().Dentry()
+	vfsd := fd.vfsfd.VirtualDentry().Dentry()
 
 	fs.mu.Lock()
 	defer fs.mu.Unlock()
@@ -77,7 +77,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba
 		if !cb.Handle(vfs.Dirent{
 			Name: ".",
 			Type: linux.DT_DIR,
-			Ino:  d.Impl().(*Dentry).inode.ino,
+			Ino:  vfsd.Impl().(*dentry).inode.ino,
 			Off:  0,
 		}) {
 			return nil
@@ -85,7 +85,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba
 		fd.off++
 	}
 	if fd.off == 1 {
-		parentInode := d.ParentOrSelf().Impl().(*Dentry).inode
+		parentInode := vfsd.ParentOrSelf().Impl().(*dentry).inode
 		if !cb.Handle(vfs.Dirent{
 			Name: "..",
 			Type: parentInode.direntType(),
@@ -97,12 +97,12 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba
 		fd.off++
 	}
 
-	dir := d.Impl().(*Dentry).inode.impl.(*directory)
-	var child *Dentry
+	dir := vfsd.Impl().(*dentry).inode.impl.(*directory)
+	var child *dentry
 	if fd.iter == nil {
 		// Start iteration at the beginning of dir.
 		child = dir.childList.Front()
-		fd.iter = &Dentry{}
+		fd.iter = &dentry{}
 	} else {
 		// Continue iteration from where we left off.
 		child = fd.iter.Next()
@@ -130,32 +130,41 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba
 
 // Seek implements vfs.FileDescriptionImpl.Seek.
 func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
-	if whence != linux.SEEK_SET {
-		// TODO: Linux also allows SEEK_CUR.
+	fs := fd.filesystem()
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+
+	switch whence {
+	case linux.SEEK_SET:
+		// Use offset as given.
+	case linux.SEEK_CUR:
+		offset += fd.off
+	default:
 		return 0, syserror.EINVAL
 	}
 	if offset < 0 {
 		return 0, syserror.EINVAL
 	}
 
+	// If the offset isn't changing (e.g. due to lseek(0, SEEK_CUR)), don't
+	// seek even if doing so might reposition the iterator due to concurrent
+	// mutation of the directory. Compare fs/libfs.c:dcache_dir_lseek().
+	if fd.off == offset {
+		return offset, nil
+	}
+
 	fd.off = offset
 	// Compensate for "." and "..".
-	var remChildren int64
-	if offset < 2 {
-		remChildren = 0
-	} else {
+	remChildren := int64(0)
+	if offset >= 2 {
 		remChildren = offset - 2
 	}
 
-	fs := fd.filesystem()
 	dir := fd.inode().impl.(*directory)
 
-	fs.mu.Lock()
-	defer fs.mu.Unlock()
-
 	// Ensure that fd.iter exists and is not linked into dir.childList.
 	if fd.iter == nil {
-		fd.iter = &Dentry{}
+		fd.iter = &dentry{}
 	} else {
 		dir.childList.Remove(fd.iter)
 	}
diff --git a/pkg/sentry/fsimpl/memfs/filesystem.go b/pkg/sentry/fsimpl/memfs/filesystem.go
index 4d989eeaf..f79e2d9c8 100644
--- a/pkg/sentry/fsimpl/memfs/filesystem.go
+++ b/pkg/sentry/fsimpl/memfs/filesystem.go
@@ -28,9 +28,9 @@ import (
 //
 // stepLocked is loosely analogous to fs/namei.c:walk_component().
 //
-// Preconditions: Filesystem.mu must be locked. !rp.Done(). inode ==
-// vfsd.Impl().(*Dentry).inode.
-func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *Inode) (*vfs.Dentry, *Inode, error) {
+// Preconditions: filesystem.mu must be locked. !rp.Done(). inode ==
+// vfsd.Impl().(*dentry).inode.
+func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode) (*vfs.Dentry, *inode, error) {
 	if !inode.isDir() {
 		return nil, nil, syserror.ENOTDIR
 	}
@@ -47,7 +47,7 @@ afterSymlink:
 		// not in the Dentry tree, it doesn't exist.
 		return nil, nil, syserror.ENOENT
 	}
-	nextInode := nextVFSD.Impl().(*Dentry).inode
+	nextInode := nextVFSD.Impl().(*dentry).inode
 	if symlink, ok := nextInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
 		// TODO: symlink traversals update access time
 		if err := rp.HandleSymlink(symlink.target); err != nil {
@@ -64,10 +64,10 @@ afterSymlink:
 // walkExistingLocked is loosely analogous to Linux's
 // fs/namei.c:path_lookupat().
 //
-// Preconditions: Filesystem.mu must be locked.
-func walkExistingLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *Inode, error) {
+// Preconditions: filesystem.mu must be locked.
+func walkExistingLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) {
 	vfsd := rp.Start()
-	inode := vfsd.Impl().(*Dentry).inode
+	inode := vfsd.Impl().(*dentry).inode
 	for !rp.Done() {
 		var err error
 		vfsd, inode, err = stepLocked(rp, vfsd, inode)
@@ -88,10 +88,10 @@ func walkExistingLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *Inode, error) {
 // walkParentDirLocked is loosely analogous to Linux's
 // fs/namei.c:path_parentat().
 //
-// Preconditions: Filesystem.mu must be locked. !rp.Done().
-func walkParentDirLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *Inode, error) {
+// Preconditions: filesystem.mu must be locked. !rp.Done().
+func walkParentDirLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) {
 	vfsd := rp.Start()
-	inode := vfsd.Impl().(*Dentry).inode
+	inode := vfsd.Impl().(*dentry).inode
 	for !rp.Final() {
 		var err error
 		vfsd, inode, err = stepLocked(rp, vfsd, inode)
@@ -108,9 +108,9 @@ func walkParentDirLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *Inode, error) {
 // checkCreateLocked checks that a file named rp.Component() may be created in
 // directory parentVFSD, then returns rp.Component().
 //
-// Preconditions: Filesystem.mu must be locked. parentInode ==
-// parentVFSD.Impl().(*Dentry).inode. parentInode.isDir() == true.
-func checkCreateLocked(rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode *Inode) (string, error) {
+// Preconditions: filesystem.mu must be locked. parentInode ==
+// parentVFSD.Impl().(*dentry).inode. parentInode.isDir() == true.
+func checkCreateLocked(rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode *inode) (string, error) {
 	if err := parentInode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil {
 		return "", err
 	}
@@ -144,7 +144,7 @@ func checkDeleteLocked(vfsd *vfs.Dentry) error {
 }
 
 // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
-func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
+func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
 	fs.mu.RLock()
 	defer fs.mu.RUnlock()
 	vfsd, inode, err := walkExistingLocked(rp)
@@ -164,7 +164,7 @@ func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
 }
 
 // LinkAt implements vfs.FilesystemImpl.LinkAt.
-func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
+func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
 	if rp.Done() {
 		return syserror.EEXIST
 	}
@@ -185,7 +185,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
 		return err
 	}
 	defer rp.Mount().EndWrite()
-	d := vd.Dentry().Impl().(*Dentry)
+	d := vd.Dentry().Impl().(*dentry)
 	if d.inode.isDir() {
 		return syserror.EPERM
 	}
@@ -197,7 +197,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
 }
 
 // MkdirAt implements vfs.FilesystemImpl.MkdirAt.
-func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
+func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
 	if rp.Done() {
 		return syserror.EEXIST
 	}
@@ -223,7 +223,7 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
 }
 
 // MknodAt implements vfs.FilesystemImpl.MknodAt.
-func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
+func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
 	if rp.Done() {
 		return syserror.EEXIST
 	}
@@ -246,7 +246,7 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
 }
 
 // OpenAt implements vfs.FilesystemImpl.OpenAt.
-func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
 	// Filter out flags that are not supported by memfs. O_DIRECTORY and
 	// O_NOFOLLOW have no effect here (they're handled by VFS by setting
 	// appropriate bits in rp), but are returned by
@@ -265,11 +265,10 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 
 	mustCreate := opts.Flags&linux.O_EXCL != 0
 	vfsd := rp.Start()
-	inode := vfsd.Impl().(*Dentry).inode
+	inode := vfsd.Impl().(*dentry).inode
 	fs.mu.Lock()
 	defer fs.mu.Unlock()
 	if rp.Done() {
-		// FIXME: ???
 		if rp.MustBeDir() {
 			return nil, syserror.EISDIR
 		}
@@ -327,7 +326,7 @@ afterTrailingSymlink:
 	if mustCreate {
 		return nil, syserror.EEXIST
 	}
-	childInode := childVFSD.Impl().(*Dentry).inode
+	childInode := childVFSD.Impl().(*dentry).inode
 	if symlink, ok := childInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
 		// TODO: symlink traversals update access time
 		if err := rp.HandleSymlink(symlink.target); err != nil {
@@ -340,7 +339,7 @@ afterTrailingSymlink:
 	return childInode.open(rp, childVFSD, opts.Flags, false)
 }
 
-func (i *Inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32, afterCreate bool) (*vfs.FileDescription, error) {
+func (i *inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32, afterCreate bool) (*vfs.FileDescription, error) {
 	ats := vfs.AccessTypesForOpenFlags(flags)
 	if !afterCreate {
 		if err := i.checkPermissions(rp.Credentials(), ats, i.isDir()); err != nil {
@@ -385,7 +384,7 @@ func (i *Inode) open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32, afte
 }
 
 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
-func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
+func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
 	fs.mu.RLock()
 	_, inode, err := walkExistingLocked(rp)
 	fs.mu.RUnlock()
@@ -400,9 +399,8 @@ func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st
 }
 
 // RenameAt implements vfs.FilesystemImpl.RenameAt.
-func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error {
+func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error {
 	if rp.Done() {
-		// FIXME
 		return syserror.ENOENT
 	}
 	fs.mu.Lock()
@@ -424,7 +422,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vf
 }
 
 // RmdirAt implements vfs.FilesystemImpl.RmdirAt.
-func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
 	fs.mu.Lock()
 	defer fs.mu.Unlock()
 	vfsd, inode, err := walkExistingLocked(rp)
@@ -447,12 +445,14 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
 	if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil {
 		return err
 	}
+	// Remove from parent directory's childList.
+	vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry))
 	inode.decRef()
 	return nil
 }
 
 // SetStatAt implements vfs.FilesystemImpl.SetStatAt.
-func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
+func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
 	fs.mu.RLock()
 	_, _, err := walkExistingLocked(rp)
 	fs.mu.RUnlock()
@@ -462,12 +462,12 @@ func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
 	if opts.Stat.Mask == 0 {
 		return nil
 	}
-	// TODO: implement Inode.setStat
+	// TODO: implement inode.setStat
 	return syserror.EPERM
 }
 
 // StatAt implements vfs.FilesystemImpl.StatAt.
-func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
+func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
 	fs.mu.RLock()
 	_, inode, err := walkExistingLocked(rp)
 	fs.mu.RUnlock()
@@ -480,7 +480,7 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 }
 
 // StatFSAt implements vfs.FilesystemImpl.StatFSAt.
-func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
+func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
 	fs.mu.RLock()
 	_, _, err := walkExistingLocked(rp)
 	fs.mu.RUnlock()
@@ -492,7 +492,7 @@ func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu
 }
 
 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
-func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
+func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
 	if rp.Done() {
 		return syserror.EEXIST
 	}
@@ -517,7 +517,7 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
 }
 
 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
-func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
+func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
 	fs.mu.Lock()
 	defer fs.mu.Unlock()
 	vfsd, inode, err := walkExistingLocked(rp)
@@ -537,6 +537,8 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 	if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil {
 		return err
 	}
+	// Remove from parent directory's childList.
+	vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry))
 	inode.decLinksLocked()
 	return nil
 }
diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go
index f381e1a88..59612da14 100644
--- a/pkg/sentry/fsimpl/memfs/memfs.go
+++ b/pkg/sentry/fsimpl/memfs/memfs.go
@@ -21,10 +21,10 @@
 //
 // Lock order:
 //
-// Filesystem.mu
+// filesystem.mu
 //   regularFileFD.offMu
 //     regularFile.mu
-//   Inode.mu
+//   inode.mu
 package memfs
 
 import (
@@ -42,8 +42,8 @@ import (
 // FilesystemType implements vfs.FilesystemType.
 type FilesystemType struct{}
 
-// Filesystem implements vfs.FilesystemImpl.
-type Filesystem struct {
+// filesystem implements vfs.FilesystemImpl.
+type filesystem struct {
 	vfsfs vfs.Filesystem
 
 	// mu serializes changes to the Dentry tree.
@@ -54,44 +54,44 @@ type Filesystem struct {
 
 // NewFilesystem implements vfs.FilesystemType.NewFilesystem.
 func (fstype FilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts vfs.NewFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
-	var fs Filesystem
+	var fs filesystem
 	fs.vfsfs.Init(&fs)
 	root := fs.newDentry(fs.newDirectory(creds, 01777))
 	return &fs.vfsfs, &root.vfsd, nil
 }
 
 // Release implements vfs.FilesystemImpl.Release.
-func (fs *Filesystem) Release() {
+func (fs *filesystem) Release() {
 }
 
 // Sync implements vfs.FilesystemImpl.Sync.
-func (fs *Filesystem) Sync(ctx context.Context) error {
+func (fs *filesystem) Sync(ctx context.Context) error {
 	// All filesystem state is in-memory.
 	return nil
 }
 
-// Dentry implements vfs.DentryImpl.
-type Dentry struct {
+// dentry implements vfs.DentryImpl.
+type dentry struct {
 	vfsd vfs.Dentry
 
-	// inode is the inode represented by this Dentry. Multiple Dentries may
-	// share a single non-directory Inode (with hard links). inode is
+	// inode is the inode represented by this dentry. Multiple Dentries may
+	// share a single non-directory inode (with hard links). inode is
 	// immutable.
-	inode *Inode
+	inode *inode
 
-	// memfs doesn't count references on Dentries; because the Dentry tree is
+	// memfs doesn't count references on dentries; because the dentry tree is
 	// the sole source of truth, it is by definition always consistent with the
-	// state of the filesystem. However, it does count references on Inodes,
-	// because Inode resources are released when all references are dropped.
+	// state of the filesystem. However, it does count references on inodes,
+	// because inode resources are released when all references are dropped.
 	// (memfs doesn't really have resources to release, but we implement
 	// reference counting because tmpfs regular files will.)
 
-	// dentryEntry (ugh) links Dentries into their parent directory.childList.
+	// dentryEntry (ugh) links dentries into their parent directory.childList.
 	dentryEntry
 }
 
-func (fs *Filesystem) newDentry(inode *Inode) *Dentry {
-	d := &Dentry{
+func (fs *filesystem) newDentry(inode *inode) *dentry {
+	d := &dentry{
 		inode: inode,
 	}
 	d.vfsd.Init(d)
@@ -99,37 +99,37 @@ func (fs *Filesystem) newDentry(inode *Inode) *Dentry {
 }
 
 // IncRef implements vfs.DentryImpl.IncRef.
-func (d *Dentry) IncRef(vfsfs *vfs.Filesystem) {
+func (d *dentry) IncRef(vfsfs *vfs.Filesystem) {
 	d.inode.incRef()
 }
 
 // TryIncRef implements vfs.DentryImpl.TryIncRef.
-func (d *Dentry) TryIncRef(vfsfs *vfs.Filesystem) bool {
+func (d *dentry) TryIncRef(vfsfs *vfs.Filesystem) bool {
 	return d.inode.tryIncRef()
 }
 
 // DecRef implements vfs.DentryImpl.DecRef.
-func (d *Dentry) DecRef(vfsfs *vfs.Filesystem) {
+func (d *dentry) DecRef(vfsfs *vfs.Filesystem) {
 	d.inode.decRef()
 }
 
-// Inode represents a filesystem object.
-type Inode struct {
+// inode represents a filesystem object.
+type inode struct {
 	// refs is a reference count. refs is accessed using atomic memory
 	// operations.
 	//
-	// A reference is held on all Inodes that are reachable in the filesystem
+	// A reference is held on all inodes that are reachable in the filesystem
 	// tree. For non-directories (which may have multiple hard links), this
 	// means that a reference is dropped when nlink reaches 0. For directories,
 	// nlink never reaches 0 due to the "." entry; instead,
-	// Filesystem.RmdirAt() drops the reference.
+	// filesystem.RmdirAt() drops the reference.
 	refs int64
 
 	// Inode metadata; protected by mu and accessed using atomic memory
 	// operations unless otherwise specified.
 	mu    sync.RWMutex
 	mode  uint32 // excluding file type bits, which are based on impl
-	nlink uint32 // protected by Filesystem.mu instead of Inode.mu
+	nlink uint32 // protected by filesystem.mu instead of inode.mu
 	uid   uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
 	gid   uint32 // auth.KGID, but ...
 	ino   uint64 // immutable
@@ -137,7 +137,7 @@ type Inode struct {
 	impl interface{} // immutable
 }
 
-func (i *Inode) init(impl interface{}, fs *Filesystem, creds *auth.Credentials, mode uint16) {
+func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode uint16) {
 	i.refs = 1
 	i.mode = uint32(mode)
 	i.uid = uint32(creds.EffectiveKUID)
@@ -147,29 +147,29 @@ func (i *Inode) init(impl interface{}, fs *Filesystem, creds *auth.Credentials,
 	i.impl = impl
 }
 
-// Preconditions: Filesystem.mu must be locked for writing.
-func (i *Inode) incLinksLocked() {
+// Preconditions: filesystem.mu must be locked for writing.
+func (i *inode) incLinksLocked() {
 	if atomic.AddUint32(&i.nlink, 1) <= 1 {
-		panic("memfs.Inode.incLinksLocked() called with no existing links")
+		panic("memfs.inode.incLinksLocked() called with no existing links")
 	}
 }
 
-// Preconditions: Filesystem.mu must be locked for writing.
-func (i *Inode) decLinksLocked() {
+// Preconditions: filesystem.mu must be locked for writing.
+func (i *inode) decLinksLocked() {
 	if nlink := atomic.AddUint32(&i.nlink, ^uint32(0)); nlink == 0 {
 		i.decRef()
 	} else if nlink == ^uint32(0) { // negative overflow
-		panic("memfs.Inode.decLinksLocked() called with no existing links")
+		panic("memfs.inode.decLinksLocked() called with no existing links")
 	}
 }
 
-func (i *Inode) incRef() {
+func (i *inode) incRef() {
 	if atomic.AddInt64(&i.refs, 1) <= 1 {
-		panic("memfs.Inode.incRef() called without holding a reference")
+		panic("memfs.inode.incRef() called without holding a reference")
 	}
 }
 
-func (i *Inode) tryIncRef() bool {
+func (i *inode) tryIncRef() bool {
 	for {
 		refs := atomic.LoadInt64(&i.refs)
 		if refs == 0 {
@@ -181,7 +181,7 @@ func (i *Inode) tryIncRef() bool {
 	}
 }
 
-func (i *Inode) decRef() {
+func (i *inode) decRef() {
 	if refs := atomic.AddInt64(&i.refs, -1); refs == 0 {
 		// This is unnecessary; it's mostly to simulate what tmpfs would do.
 		if regfile, ok := i.impl.(*regularFile); ok {
@@ -191,18 +191,18 @@ func (i *Inode) decRef() {
 			regfile.mu.Unlock()
 		}
 	} else if refs < 0 {
-		panic("memfs.Inode.decRef() called without holding a reference")
+		panic("memfs.inode.decRef() called without holding a reference")
 	}
 }
 
-func (i *Inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error {
+func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes, isDir bool) error {
 	return vfs.GenericCheckPermissions(creds, ats, isDir, uint16(atomic.LoadUint32(&i.mode)), auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid)))
 }
 
 // Go won't inline this function, and returning linux.Statx (which is quite
 // big) means spending a lot of time in runtime.duffcopy(), so instead it's an
 // output parameter.
-func (i *Inode) statTo(stat *linux.Statx) {
+func (i *inode) statTo(stat *linux.Statx) {
 	stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
 	stat.Blksize = 1 // usermem.PageSize in tmpfs
 	stat.Nlink = atomic.LoadUint32(&i.nlink)
@@ -241,7 +241,7 @@ func allocatedBlocksForSize(size uint64) uint64 {
 	return (size + 511) / 512
 }
 
-func (i *Inode) direntType() uint8 {
+func (i *inode) direntType() uint8 {
 	switch i.impl.(type) {
 	case *regularFile:
 		return linux.DT_REG
@@ -262,12 +262,12 @@ type fileDescription struct {
 	flags uint32 // status flags; immutable
 }
 
-func (fd *fileDescription) filesystem() *Filesystem {
-	return fd.vfsfd.VirtualDentry().Mount().Filesystem().Impl().(*Filesystem)
+func (fd *fileDescription) filesystem() *filesystem {
+	return fd.vfsfd.VirtualDentry().Mount().Filesystem().Impl().(*filesystem)
 }
 
-func (fd *fileDescription) inode() *Inode {
-	return fd.vfsfd.VirtualDentry().Dentry().Impl().(*Dentry).inode
+func (fd *fileDescription) inode() *inode {
+	return fd.vfsfd.VirtualDentry().Dentry().Impl().(*dentry).inode
 }
 
 // StatusFlags implements vfs.FileDescriptionImpl.StatusFlags.
@@ -294,6 +294,6 @@ func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions)
 	if opts.Stat.Mask == 0 {
 		return nil
 	}
-	// TODO: implement Inode.setStat
+	// TODO: implement inode.setStat
 	return syserror.EPERM
 }
diff --git a/pkg/sentry/fsimpl/memfs/regular_file.go b/pkg/sentry/fsimpl/memfs/regular_file.go
index 4a3603cc8..7a16d5719 100644
--- a/pkg/sentry/fsimpl/memfs/regular_file.go
+++ b/pkg/sentry/fsimpl/memfs/regular_file.go
@@ -28,16 +28,16 @@ import (
 )
 
 type regularFile struct {
-	inode Inode
+	inode inode
 
 	mu   sync.RWMutex
 	data []byte
 	// dataLen is len(data), but accessed using atomic memory operations to
-	// avoid locking in Inode.stat().
+	// avoid locking in inode.stat().
 	dataLen int64
 }
 
-func (fs *Filesystem) newRegularFile(creds *auth.Credentials, mode uint16) *Inode {
+func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode uint16) *inode {
 	file := &regularFile{}
 	file.inode.init(file, fs, creds, mode)
 	file.inode.nlink = 1 // from parent directory
diff --git a/pkg/sentry/fsimpl/memfs/symlink.go b/pkg/sentry/fsimpl/memfs/symlink.go
index e002d1727..b2ac2cbeb 100644
--- a/pkg/sentry/fsimpl/memfs/symlink.go
+++ b/pkg/sentry/fsimpl/memfs/symlink.go
@@ -19,11 +19,11 @@ import (
 )
 
 type symlink struct {
-	inode  Inode
+	inode  inode
 	target string // immutable
 }
 
-func (fs *Filesystem) newSymlink(creds *auth.Credentials, target string) *Inode {
+func (fs *filesystem) newSymlink(creds *auth.Credentials, target string) *inode {
 	link := &symlink{
 		target: target,
 	}
diff --git a/pkg/sentry/safemem/io.go b/pkg/sentry/safemem/io.go
index 5c3d73eb7..f039a5c34 100644
--- a/pkg/sentry/safemem/io.go
+++ b/pkg/sentry/safemem/io.go
@@ -157,7 +157,8 @@ func (w ToIOWriter) Write(src []byte) (int, error) {
 }
 
 // FromIOReader implements Reader for an io.Reader by repeatedly invoking
-// io.Reader.Read until it returns an error or partial read.
+// io.Reader.Read until it returns an error or partial read. This is not
+// thread-safe.
 //
 // FromIOReader will return a successful partial read iff Reader.Read does so.
 type FromIOReader struct {
@@ -206,6 +207,58 @@ func (r FromIOReader) readToBlock(dst Block, buf []byte) (int, []byte, error) {
 	return wbn, buf, rerr
 }
 
+// FromIOReaderAt implements Reader for an io.ReaderAt. Does not repeatedly
+// invoke io.ReaderAt.ReadAt because ReadAt is more strict than Read. A partial
+// read indicates an error. This is not thread-safe.
+type FromIOReaderAt struct {
+	ReaderAt io.ReaderAt
+	Offset   int64
+}
+
+// ReadToBlocks implements Reader.ReadToBlocks.
+func (r FromIOReaderAt) ReadToBlocks(dsts BlockSeq) (uint64, error) {
+	var buf []byte
+	var done uint64
+	for !dsts.IsEmpty() {
+		dst := dsts.Head()
+		var n int
+		var err error
+		n, buf, err = r.readToBlock(dst, buf)
+		done += uint64(n)
+		if n != dst.Len() {
+			return done, err
+		}
+		dsts = dsts.Tail()
+		if err != nil {
+			if dsts.IsEmpty() && err == io.EOF {
+				return done, nil
+			}
+			return done, err
+		}
+	}
+	return done, nil
+}
+
+func (r FromIOReaderAt) readToBlock(dst Block, buf []byte) (int, []byte, error) {
+	// io.Reader isn't safecopy-aware, so we have to buffer Blocks that require
+	// safecopy.
+	if !dst.NeedSafecopy() {
+		n, err := r.ReaderAt.ReadAt(dst.ToSlice(), r.Offset)
+		r.Offset += int64(n)
+		return n, buf, err
+	}
+	if len(buf) < dst.Len() {
+		buf = make([]byte, dst.Len())
+	}
+	rn, rerr := r.ReaderAt.ReadAt(buf[:dst.Len()], r.Offset)
+	r.Offset += int64(rn)
+	wbn, wberr := Copy(dst, BlockFromSafeSlice(buf[:rn]))
+	if wberr != nil {
+		return wbn, buf, wberr
+	}
+	return wbn, buf, rerr
+}
+
 // FromIOWriter implements Writer for an io.Writer by repeatedly invoking
 // io.Writer.Write until it returns an error or partial write.
 //
diff --git a/pkg/sentry/socket/rpcinet/notifier/BUILD b/pkg/sentry/socket/rpcinet/notifier/BUILD
index e43775898..a3585e10d 100644
--- a/pkg/sentry/socket/rpcinet/notifier/BUILD
+++ b/pkg/sentry/socket/rpcinet/notifier/BUILD
@@ -6,7 +6,7 @@ go_library(
     name = "notifier",
     srcs = ["notifier.go"],
     importpath = "gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier",
-    visibility = ["//pkg/sentry:internal"],
+    visibility = ["//:sandbox"],
     deps = [
         "//pkg/sentry/socket/rpcinet:syscall_rpc_go_proto",
         "//pkg/sentry/socket/rpcinet/conn",
diff --git a/pkg/sentry/syscalls/linux/sys_getdents.go b/pkg/sentry/syscalls/linux/sys_getdents.go
index 63e2c5a5d..912cbe4ff 100644
--- a/pkg/sentry/syscalls/linux/sys_getdents.go
+++ b/pkg/sentry/syscalls/linux/sys_getdents.go
@@ -120,7 +120,7 @@ func newDirent(width uint, name string, attr fs.DentAttr, offset uint64) *dirent
 				Ino: attr.InodeID,
 				Off: offset,
 			},
-			Typ: toType(attr.Type),
+			Typ: fs.ToDirentType(attr.Type),
 		},
 		Name: []byte(name),
 	}
@@ -142,28 +142,6 @@ func smallestDirent64(a arch.Context) uint {
 	return uint(binary.Size(d.Hdr)) + a.Width()
 }
 
-// toType converts an fs.InodeOperationsInfo to a linux dirent typ field.
-func toType(nodeType fs.InodeType) uint8 {
-	switch nodeType {
-	case fs.RegularFile, fs.SpecialFile:
-		return linux.DT_REG
-	case fs.Symlink:
-		return linux.DT_LNK
-	case fs.Directory, fs.SpecialDirectory:
-		return linux.DT_DIR
-	case fs.Pipe:
-		return linux.DT_FIFO
-	case fs.CharacterDevice:
-		return linux.DT_CHR
-	case fs.BlockDevice:
-		return linux.DT_BLK
-	case fs.Socket:
-		return linux.DT_SOCK
-	default:
-		return linux.DT_UNKNOWN
-	}
-}
-
 // padRec pads the name field until the rec length is a multiple of the width,
 // which must be a power of 2. It returns the padded rec length.
 func (d *dirent) padRec(width int) uint16 {
diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go
index a7c98efcb..17e3dde1f 100644
--- a/pkg/sentry/syscalls/linux/sys_splice.go
+++ b/pkg/sentry/syscalls/linux/sys_splice.go
@@ -207,6 +207,10 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 			return 0, nil, syserror.ESPIPE
 		}
 		if outOffset != 0 {
+			if !outFile.Flags().Pwrite {
+				return 0, nil, syserror.EINVAL
+			}
+
 			var offset int64
 			if _, err := t.CopyIn(outOffset, &offset); err != nil {
 				return 0, nil, err
@@ -220,6 +224,10 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 			return 0, nil, syserror.ESPIPE
 		}
 		if inOffset != 0 {
+			if !inFile.Flags().Pread {
+				return 0, nil, syserror.EINVAL
+			}
+
 			var offset int64
 			if _, err := t.CopyIn(inOffset, &offset); err != nil {
 				return 0, nil, err
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index 60070874d..ea7296e6a 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -109,6 +109,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) {
 		pkt.SetOp(header.ARPReply)
 		copy(pkt.HardwareAddressSender(), r.LocalLinkAddress[:])
 		copy(pkt.ProtocolAddressSender(), h.ProtocolAddressTarget())
+		copy(pkt.HardwareAddressTarget(), h.HardwareAddressSender())
 		copy(pkt.ProtocolAddressTarget(), h.ProtocolAddressSender())
 		e.linkEP.WritePacket(r, nil /* gso */, hdr, buffer.VectorisedView{}, ProtocolNumber)
 		fallthrough // also fill the cache from requests
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index 66c55821b..e477046db 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -15,6 +15,7 @@
 package arp_test
 
 import (
+	"strconv"
 	"testing"
 	"time"
 
@@ -101,40 +102,30 @@ func TestDirectRequest(t *testing.T) {
 		c.linkEP.Inject(arp.ProtocolNumber, v.ToVectorisedView())
 	}
 
-	inject(stackAddr1)
-	{
-		pkt := <-c.linkEP.C
-		if pkt.Proto != arp.ProtocolNumber {
-			t.Fatalf("stackAddr1: expected ARP response, got network protocol number %v", pkt.Proto)
-		}
-		rep := header.ARP(pkt.Header)
-		if !rep.IsValid() {
-			t.Fatalf("stackAddr1: invalid ARP response len(pkt.Header)=%d", len(pkt.Header))
-		}
-		if tcpip.Address(rep.ProtocolAddressSender()) != stackAddr1 {
-			t.Errorf("stackAddr1: expected sender to be set")
-		}
-		if got := tcpip.LinkAddress(rep.HardwareAddressSender()); got != stackLinkAddr {
-			t.Errorf("stackAddr1: expected sender to be stackLinkAddr, got %q", got)
-		}
-	}
-
-	inject(stackAddr2)
-	{
-		pkt := <-c.linkEP.C
-		if pkt.Proto != arp.ProtocolNumber {
-			t.Fatalf("stackAddr2: expected ARP response, got network protocol number %v", pkt.Proto)
-		}
-		rep := header.ARP(pkt.Header)
-		if !rep.IsValid() {
-			t.Fatalf("stackAddr2: invalid ARP response len(pkt.Header)=%d", len(pkt.Header))
-		}
-		if tcpip.Address(rep.ProtocolAddressSender()) != stackAddr2 {
-			t.Errorf("stackAddr2: expected sender to be set")
-		}
-		if got := tcpip.LinkAddress(rep.HardwareAddressSender()); got != stackLinkAddr {
-			t.Errorf("stackAddr2: expected sender to be stackLinkAddr, got %q", got)
-		}
+	for i, address := range []tcpip.Address{stackAddr1, stackAddr2} {
+		t.Run(strconv.Itoa(i), func(t *testing.T) {
+			inject(address)
+			pkt := <-c.linkEP.C
+			if pkt.Proto != arp.ProtocolNumber {
+				t.Fatalf("expected ARP response, got network protocol number %d", pkt.Proto)
+			}
+			rep := header.ARP(pkt.Header)
+			if !rep.IsValid() {
+				t.Fatalf("invalid ARP response len(pkt.Header)=%d", len(pkt.Header))
+			}
+			if got, want := tcpip.LinkAddress(rep.HardwareAddressSender()), stackLinkAddr; got != want {
+				t.Errorf("got HardwareAddressSender = %s, want = %s", got, want)
+			}
+			if got, want := tcpip.Address(rep.ProtocolAddressSender()), tcpip.Address(h.ProtocolAddressTarget()); got != want {
+				t.Errorf("got ProtocolAddressSender = %s, want = %s", got, want)
+			}
+			if got, want := tcpip.LinkAddress(rep.HardwareAddressTarget()), tcpip.LinkAddress(h.HardwareAddressSender()); got != want {
+				t.Errorf("got HardwareAddressTarget = %s, want = %s", got, want)
+			}
+			if got, want := tcpip.Address(rep.ProtocolAddressTarget()), tcpip.Address(h.ProtocolAddressSender()); got != want {
+				t.Errorf("got ProtocolAddressTarget = %s, want = %s", got, want)
+			}
+		})
 	}
 
 	inject(stackAddrBad)
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index 52fd1bfa3..e9c5099ea 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -96,6 +96,17 @@ type listenContext struct {
 	hasher   hash.Hash
 	v6only   bool
 	netProto tcpip.NetworkProtocolNumber
+	// pendingMu protects pendingEndpoints. This should only be accessed
+	// by the listening endpoint's worker goroutine.
+	//
+	// Lock Ordering: listenEP.workerMu -> pendingMu
+	pendingMu sync.Mutex
+	// pending is used to wait for all pendingEndpoints to finish when
+	// a socket is closed.
+	pending sync.WaitGroup
+	// pendingEndpoints is a map of all endpoints for which a handshake is
+	// in progress.
+	pendingEndpoints map[stack.TransportEndpointID]*endpoint
 }
 
 // timeStamp returns an 8-bit timestamp with a granularity of 64 seconds.
@@ -133,14 +144,15 @@ func decSynRcvdCount() {
 }
 
 // newListenContext creates a new listen context.
-func newListenContext(stack *stack.Stack, listenEP *endpoint, rcvWnd seqnum.Size, v6only bool, netProto tcpip.NetworkProtocolNumber) *listenContext {
+func newListenContext(stk *stack.Stack, listenEP *endpoint, rcvWnd seqnum.Size, v6only bool, netProto tcpip.NetworkProtocolNumber) *listenContext {
 	l := &listenContext{
-		stack:    stack,
-		rcvWnd:   rcvWnd,
-		hasher:   sha1.New(),
-		v6only:   v6only,
-		netProto: netProto,
-		listenEP: listenEP,
+		stack:            stk,
+		rcvWnd:           rcvWnd,
+		hasher:           sha1.New(),
+		v6only:           v6only,
+		netProto:         netProto,
+		listenEP:         listenEP,
+		pendingEndpoints: make(map[stack.TransportEndpointID]*endpoint),
 	}
 
 	rand.Read(l.nonce[0][:])
@@ -253,6 +265,17 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head
 		return nil, err
 	}
 
+	// listenEP is nil when listenContext is used by tcp.Forwarder.
+	if l.listenEP != nil {
+		l.listenEP.mu.Lock()
+		if l.listenEP.state != StateListen {
+			l.listenEP.mu.Unlock()
+			return nil, tcpip.ErrConnectionAborted
+		}
+		l.addPendingEndpoint(ep)
+		l.listenEP.mu.Unlock()
+	}
+
 	// Perform the 3-way handshake.
 	h := newHandshake(ep, seqnum.Size(ep.initialReceiveWindow()))
 
@@ -260,6 +283,9 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head
 	if err := h.execute(); err != nil {
 		ep.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 		ep.Close()
+		if l.listenEP != nil {
+			l.removePendingEndpoint(ep)
+		}
 		return nil, err
 	}
 	ep.mu.Lock()
@@ -274,15 +300,41 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head
 	return ep, nil
 }
 
+func (l *listenContext) addPendingEndpoint(n *endpoint) {
+	l.pendingMu.Lock()
+	l.pendingEndpoints[n.id] = n
+	l.pending.Add(1)
+	l.pendingMu.Unlock()
+}
+
+func (l *listenContext) removePendingEndpoint(n *endpoint) {
+	l.pendingMu.Lock()
+	delete(l.pendingEndpoints, n.id)
+	l.pending.Done()
+	l.pendingMu.Unlock()
+}
+
+func (l *listenContext) closeAllPendingEndpoints() {
+	l.pendingMu.Lock()
+	for _, n := range l.pendingEndpoints {
+		n.notifyProtocolGoroutine(notifyClose)
+	}
+	l.pendingMu.Unlock()
+	l.pending.Wait()
+}
+
 // deliverAccepted delivers the newly-accepted endpoint to the listener. If the
 // endpoint has transitioned out of the listen state, the new endpoint is closed
 // instead.
 func (e *endpoint) deliverAccepted(n *endpoint) {
-	e.mu.RLock()
+	e.mu.Lock()
 	state := e.state
-	e.mu.RUnlock()
+	e.pendingAccepted.Add(1)
+	defer e.pendingAccepted.Done()
+	acceptedChan := e.acceptedChan
+	e.mu.Unlock()
 	if state == StateListen {
-		e.acceptedChan <- n
+		acceptedChan <- n
 		e.waiterQueue.Notify(waiter.EventIn)
 	} else {
 		n.Close()
@@ -304,7 +356,7 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header
 		e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 		return
 	}
-
+	ctx.removePendingEndpoint(n)
 	e.deliverAccepted(n)
 }
 
@@ -451,6 +503,11 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 // protocolListenLoop is the main loop of a listening TCP endpoint. It runs in
 // its own goroutine and is responsible for handling connection requests.
 func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error {
+	e.mu.Lock()
+	v6only := e.v6only
+	e.mu.Unlock()
+	ctx := newListenContext(e.stack, e, rcvWnd, v6only, e.netProto)
+
 	defer func() {
 		// Mark endpoint as closed. This will prevent goroutines running
 		// handleSynSegment() from attempting to queue new connections
@@ -458,6 +515,9 @@ func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error {
 		e.mu.Lock()
 		e.state = StateClose
 
+		// close any endpoints in SYN-RCVD state.
+		ctx.closeAllPendingEndpoints()
+
 		// Do cleanup if needed.
 		e.completeWorkerLocked()
 
@@ -470,12 +530,6 @@ func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error {
 		e.waiterQueue.Notify(waiter.EventIn | waiter.EventOut)
 	}()
 
-	e.mu.Lock()
-	v6only := e.v6only
-	e.mu.Unlock()
-
-	ctx := newListenContext(e.stack, e, rcvWnd, v6only, e.netProto)
-
 	s := sleep.Sleeper{}
 	s.AddWaker(&e.notificationWaker, wakerForNotification)
 	s.AddWaker(&e.newSegmentWaker, wakerForNewSegment)
@@ -492,7 +546,6 @@ func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error {
 					e.handleListenSegment(ctx, s)
 					s.decRef()
 				}
-				synRcvdCount.pending.Wait()
 				close(e.drainDone)
 				<-e.undrain
 			}
diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go
index d9f79e8c5..c54610a87 100644
--- a/pkg/tcpip/transport/tcp/dual_stack_test.go
+++ b/pkg/tcpip/transport/tcp/dual_stack_test.go
@@ -570,3 +570,89 @@ func TestV4AcceptOnV4(t *testing.T) {
 	// Test acceptance.
 	testV4Accept(t, c)
 }
+
+func testV4ListenClose(t *testing.T, c *context.Context) {
+	// Set the SynRcvd threshold to zero to force a syn cookie based accept
+	// to happen.
+	saved := tcp.SynRcvdCountThreshold
+	defer func() {
+		tcp.SynRcvdCountThreshold = saved
+	}()
+	tcp.SynRcvdCountThreshold = 0
+	const n = uint16(32)
+
+	// Start listening.
+	if err := c.EP.Listen(int(tcp.SynRcvdCountThreshold + 1)); err != nil {
+		t.Fatalf("Listen failed: %v", err)
+	}
+
+	irs := seqnum.Value(789)
+	for i := uint16(0); i < n; i++ {
+		// Send a SYN request.
+		c.SendPacket(nil, &context.Headers{
+			SrcPort: context.TestPort + i,
+			DstPort: context.StackPort,
+			Flags:   header.TCPFlagSyn,
+			SeqNum:  irs,
+			RcvWnd:  30000,
+		})
+	}
+
+	// Each of these ACK's will cause a syn-cookie based connection to be
+	// accepted and delivered to the listening endpoint.
+	for i := uint16(0); i < n; i++ {
+		b := c.GetPacket()
+		tcp := header.TCP(header.IPv4(b).Payload())
+		iss := seqnum.Value(tcp.SequenceNumber())
+		// Send ACK.
+		c.SendPacket(nil, &context.Headers{
+			SrcPort: tcp.DestinationPort(),
+			DstPort: context.StackPort,
+			Flags:   header.TCPFlagAck,
+			SeqNum:  irs + 1,
+			AckNum:  iss + 1,
+			RcvWnd:  30000,
+		})
+	}
+
+	// Try to accept the connection.
+	we, ch := waiter.NewChannelEntry(nil)
+	c.WQ.EventRegister(&we, waiter.EventIn)
+	defer c.WQ.EventUnregister(&we)
+	nep, _, err := c.EP.Accept()
+	if err == tcpip.ErrWouldBlock {
+		// Wait for connection to be established.
+		select {
+		case <-ch:
+			nep, _, err = c.EP.Accept()
+			if err != nil {
+				t.Fatalf("Accept failed: %v", err)
+			}
+
+		case <-time.After(10 * time.Second):
+			t.Fatalf("Timed out waiting for accept")
+		}
+	}
+	nep.Close()
+	c.EP.Close()
+}
+
+func TestV4ListenCloseOnV4(t *testing.T) {
+	c := context.New(t, defaultMTU)
+	defer c.Cleanup()
+
+	// Create TCP endpoint.
+	var err *tcpip.Error
+	c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
+	if err != nil {
+		t.Fatalf("NewEndpoint failed: %v", err)
+	}
+
+	// Bind to wildcard.
+	if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
+		t.Fatalf("Bind failed: %v", err)
+	}
+
+	// Test acceptance.
+	testV4ListenClose(t, c)
+}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 353e2efaf..0e16877e7 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -362,6 +362,12 @@ type endpoint struct {
 	// without hearing a response, the connection is closed.
 	keepalive keepalive
 
+	// pendingAccepted is a synchronization primitive used to track number
+	// of connections that are queued up to be delivered to the accepted
+	// channel. We use this to ensure that all goroutines blocked on writing
+	// to the acceptedChan below terminate before we close acceptedChan.
+	pendingAccepted sync.WaitGroup `state:"nosave"`
+
 	// acceptedChan is used by a listening endpoint protocol goroutine to
 	// send newly accepted connections to the endpoint so that they can be
 	// read by Accept() calls.
@@ -375,7 +381,11 @@ type endpoint struct {
 	// The goroutine drain completion notification channel.
 	drainDone chan struct{} `state:"nosave"`
 
-	// The goroutine undrain notification channel.
+	// The goroutine undrain notification channel. This is currently used as
+	// a way to block the worker goroutines. Today nothing closes/writes
+	// this channel and this causes any goroutines waiting on this to just
+	// block. This is used during save/restore to prevent worker goroutines
+	// from mutating state as it's being saved.
 	undrain chan struct{} `state:"nosave"`
 
 	// probe if not nil is invoked on every received segment. It is passed
@@ -575,6 +585,34 @@ func (e *endpoint) Close() {
 	e.mu.Unlock()
 }
 
+// closePendingAcceptableConnections closes all connections that have completed
+// handshake but not yet been delivered to the application.
+func (e *endpoint) closePendingAcceptableConnectionsLocked() {
+	done := make(chan struct{})
+	// Spin a goroutine up as ranging on e.acceptedChan will just block when
+	// there are no more connections in the channel. Using a non-blocking
+	// select does not work as it can potentially select the default case
+	// even when there are pending writes but that are not yet written to
+	// the channel.
+	go func() {
+		defer close(done)
+		for n := range e.acceptedChan {
+			n.mu.Lock()
+			n.resetConnectionLocked(tcpip.ErrConnectionAborted)
+			n.mu.Unlock()
+			n.Close()
+		}
+	}()
+	// pendingAccepted(see endpoint.deliverAccepted) tracks the number of
+	// endpoints which have completed handshake but are not yet written to
+	// the e.acceptedChan. We wait here till the goroutine above can drain
+	// all such connections from e.acceptedChan.
+	e.pendingAccepted.Wait()
+	close(e.acceptedChan)
+	<-done
+	e.acceptedChan = nil
+}
+
 // cleanupLocked frees all resources associated with the endpoint. It is called
 // after Close() is called and the worker goroutine (if any) is done with its
 // work.
@@ -582,14 +620,7 @@ func (e *endpoint) cleanupLocked() {
 	// Close all endpoints that might have been accepted by TCP but not by
 	// the client.
 	if e.acceptedChan != nil {
-		close(e.acceptedChan)
-		for n := range e.acceptedChan {
-			n.mu.Lock()
-			n.resetConnectionLocked(tcpip.ErrConnectionAborted)
-			n.mu.Unlock()
-			n.Close()
-		}
-		e.acceptedChan = nil
+		e.closePendingAcceptableConnectionsLocked()
 	}
 	e.workerCleanup = false
 
diff --git a/runsc/container/container.go b/runsc/container/container.go
index 27e9c2e0f..bbb364214 100644
--- a/runsc/container/container.go
+++ b/runsc/container/container.go
@@ -138,6 +138,34 @@ type Container struct {
 	RootContainerDir string
 }
 
+// loadSandbox loads all containers that belong to the sandbox with the given
+// ID.
+func loadSandbox(rootDir, id string) ([]*Container, error) {
+	cids, err := List(rootDir)
+	if err != nil {
+		return nil, err
+	}
+
+	// Load the container metadata.
+	var containers []*Container
+	for _, cid := range cids {
+		container, err := Load(rootDir, cid)
+		if err != nil {
+			// Container file may not exist if it raced with creation/deletion or
+			// directory was left behind. Load provides a snapshot in time, so it's
+			// fine to skip it.
+			if os.IsNotExist(err) {
+				continue
+			}
+			return nil, fmt.Errorf("loading container %q: %v", id, err)
+		}
+		if container.Sandbox.ID == id {
+			containers = append(containers, container)
+		}
+	}
+	return containers, nil
+}
+
 // Load loads a container with the given id from a metadata file. id may be an
 // abbreviation of the full container id, in which case Load loads the
 // container to which id unambiguously refers to.
@@ -180,7 +208,7 @@ func Load(rootDir, id string) (*Container, error) {
 	// If the status is "Running" or "Created", check that the sandbox
 	// process still exists, and set it to Stopped if it does not.
 	//
-	// This is inherently racey.
+	// This is inherently racy.
 	if c.Status == Running || c.Status == Created {
 		// Check if the sandbox process is still running.
 		if !c.isSandboxRunning() {
@@ -237,7 +265,13 @@ func List(rootDir string) ([]string, error) {
 	}
 	var out []string
 	for _, f := range fs {
-		out = append(out, f.Name())
+		// Filter out directories that do no belong to a container.
+		cid := f.Name()
+		if validateID(cid) == nil {
+			if _, err := os.Stat(filepath.Join(rootDir, cid, metadataFilename)); err == nil {
+				out = append(out, f.Name())
+			}
+		}
 	}
 	return out, nil
 }
@@ -1108,6 +1142,10 @@ func runInCgroup(cg *cgroup.Cgroup, fn func() error) error {
 // adjustOOMScoreAdj sets the oom_score_adj for the sandbox and all gofers.
 // oom_score_adj is set to the lowest oom_score_adj among the containers
 // running in the sandbox.
+//
+// TODO(gvisor.dev/issue/512): This call could race with other containers being
+// created at the same time and end up setting the wrong oom_score_adj to the
+// sandbox.
 func (c *Container) adjustOOMScoreAdj(conf *boot.Config) error {
 	// If this container's OOMScoreAdj is nil then we can exit early as no
 	// change should be made to oom_score_adj for the sandbox.
@@ -1115,21 +1153,9 @@ func (c *Container) adjustOOMScoreAdj(conf *boot.Config) error {
 		return nil
 	}
 
-	ids, err := List(conf.RootDir)
+	containers, err := loadSandbox(conf.RootDir, c.Sandbox.ID)
 	if err != nil {
-		return err
-	}
-
-	// Load the container metadata.
-	var containers []*Container
-	for _, id := range ids {
-		container, err := Load(conf.RootDir, id)
-		if err != nil {
-			return fmt.Errorf("loading container %q: %v", id, err)
-		}
-		if container.Sandbox.ID == c.Sandbox.ID {
-			containers = append(containers, container)
-		}
+		return fmt.Errorf("loading sandbox containers: %v", err)
 	}
 
 	// Get the lowest score for all containers.
@@ -1150,30 +1176,16 @@ func (c *Container) adjustOOMScoreAdj(conf *boot.Config) error {
 		return nil
 	}
 
-	// Set oom_score_adj for the sandbox.
+	// Set the lowest of all containers oom_score_adj to the sandbox.
 	if err := setOOMScoreAdj(c.Sandbox.Pid, lowScore); err != nil {
 		return fmt.Errorf("setting oom_score_adj for sandbox %q: %v", c.Sandbox.ID, err)
 	}
 
-	// Set the gofer's oom_score_adj to the minimum of -500 and the
-	// sandbox's oom_score_adj to better ensure that the sandbox is killed
-	// before the gofer.
-	//
-	// TODO(gvisor.dev/issue/601) Set oom_score_adj for the gofer to
-	// the same oom_score_adj as the sandbox.
-	goferScoreAdj := -500
-	if lowScore < goferScoreAdj {
-		goferScoreAdj = lowScore
-	}
-
-	// Set oom_score_adj for gofers for all containers in the sandbox.
-	for _, container := range containers {
-		err := setOOMScoreAdj(container.GoferPid, goferScoreAdj)
-		if err != nil {
-			return fmt.Errorf("setting oom_score_adj for container %q: %v", container.ID, err)
-		}
+	// Set container's oom_score_adj to the gofer since it is dedicated to the
+	// container, in case the gofer uses up too much memory.
+	if err := setOOMScoreAdj(c.GoferPid, *c.Spec.Process.OOMScoreAdj); err != nil {
+		return fmt.Errorf("setting gofer oom_score_adj for container %q: %v", c.ID, err)
 	}
-
 	return nil
 }
 
diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go
index 2ef065a15..2d51fecc6 100644
--- a/runsc/container/multi_container_test.go
+++ b/runsc/container/multi_container_test.go
@@ -60,11 +60,14 @@ func createSpecs(cmds ...[]string) ([]*specs.Spec, []string) {
 }
 
 func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*Container, func(), error) {
-	rootDir, err := testutil.SetupRootDir()
-	if err != nil {
-		return nil, nil, fmt.Errorf("error creating root dir: %v", err)
+	// Setup root dir if one hasn't been provided.
+	if len(conf.RootDir) == 0 {
+		rootDir, err := testutil.SetupRootDir()
+		if err != nil {
+			return nil, nil, fmt.Errorf("error creating root dir: %v", err)
+		}
+		conf.RootDir = rootDir
 	}
-	conf.RootDir = rootDir
 
 	var containers []*Container
 	var bundles []string
@@ -75,7 +78,7 @@ func startContainers(conf *boot.Config, specs []*specs.Spec, ids []string) ([]*C
 		for _, b := range bundles {
 			os.RemoveAll(b)
 		}
-		os.RemoveAll(rootDir)
+		os.RemoveAll(conf.RootDir)
 	}
 	for i, spec := range specs {
 		bundleDir, err := testutil.SetupBundleDir(spec)
@@ -1423,3 +1426,62 @@ func TestMultiContainerGoferKilled(t *testing.T) {
 		}
 	}
 }
+
+func TestMultiContainerLoadSandbox(t *testing.T) {
+	sleep := []string{"sleep", "100"}
+	specs, ids := createSpecs(sleep, sleep, sleep)
+	conf := testutil.TestConfig()
+
+	// Create containers for the sandbox.
+	wants, cleanup, err := startContainers(conf, specs, ids)
+	if err != nil {
+		t.Fatalf("error starting containers: %v", err)
+	}
+	defer cleanup()
+
+	// Then create unrelated containers.
+	for i := 0; i < 3; i++ {
+		specs, ids = createSpecs(sleep, sleep, sleep)
+		_, cleanup, err = startContainers(conf, specs, ids)
+		if err != nil {
+			t.Fatalf("error starting containers: %v", err)
+		}
+		defer cleanup()
+	}
+
+	// Create an unrelated directory under root.
+	dir := filepath.Join(conf.RootDir, "not-a-container")
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		t.Fatalf("os.MkdirAll(%q)=%v", dir, err)
+	}
+
+	// Create a valid but empty container directory.
+	randomCID := testutil.UniqueContainerID()
+	dir = filepath.Join(conf.RootDir, randomCID)
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		t.Fatalf("os.MkdirAll(%q)=%v", dir, err)
+	}
+
+	// Load the sandbox and check that the correct containers were returned.
+	id := wants[0].Sandbox.ID
+	gots, err := loadSandbox(conf.RootDir, id)
+	if err != nil {
+		t.Fatalf("loadSandbox()=%v", err)
+	}
+	wantIDs := make(map[string]struct{})
+	for _, want := range wants {
+		wantIDs[want.ID] = struct{}{}
+	}
+	for _, got := range gots {
+		if got.Sandbox.ID != id {
+			t.Errorf("wrong sandbox ID, got: %v, want: %v", got.Sandbox.ID, id)
+		}
+		if _, ok := wantIDs[got.ID]; !ok {
+			t.Errorf("wrong container ID, got: %v, wants: %v", got.ID, wantIDs)
+		}
+		delete(wantIDs, got.ID)
+	}
+	if len(wantIDs) != 0 {
+		t.Errorf("containers not found: %v", wantIDs)
+	}
+}
diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go
index 406f6d08a..e288c7758 100644
--- a/runsc/test/testutil/testutil.go
+++ b/runsc/test/testutil/testutil.go
@@ -189,11 +189,14 @@ func SetupRootDir() (string, error) {
 // SetupContainer creates a bundle and root dir for the container, generates a
 // test config, and writes the spec to config.json in the bundle dir.
 func SetupContainer(spec *specs.Spec, conf *boot.Config) (rootDir, bundleDir string, err error) {
-	rootDir, err = SetupRootDir()
-	if err != nil {
-		return "", "", err
+	// Setup root dir if one hasn't been provided.
+	if len(conf.RootDir) == 0 {
+		rootDir, err = SetupRootDir()
+		if err != nil {
+			return "", "", err
+		}
+		conf.RootDir = rootDir
 	}
-	conf.RootDir = rootDir
 	bundleDir, err = SetupBundleDir(spec)
 	return rootDir, bundleDir, err
 }
diff --git a/test/runtimes/README.md b/test/runtimes/README.md
index 4e5a950bc..34d3507be 100644
--- a/test/runtimes/README.md
+++ b/test/runtimes/README.md
@@ -16,7 +16,7 @@ The following runtimes are currently supported:
 
 1) [Install and configure Docker](https://docs.docker.com/install/)
 
-2) Build each Docker container:
+2) Build each Docker container from the runtimes directory:
 
 ```bash
 $ docker build -f $LANG/Dockerfile [-t $NAME] .
diff --git a/test/runtimes/go/Dockerfile b/test/runtimes/go/Dockerfile
index cd55608cd..1d5202b70 100644
--- a/test/runtimes/go/Dockerfile
+++ b/test/runtimes/go/Dockerfile
@@ -23,9 +23,12 @@ ENV LANG_DIR=${GOROOT}
 
 WORKDIR ${LANG_DIR}/src
 RUN ./make.bash
+# Pre-compile the tests for faster execution
+RUN ["/root/go/bin/go", "tool", "dist", "test", "-compile-only"]
 
 WORKDIR ${LANG_DIR}
 
-COPY proctor-go.go ${LANG_DIR}
+COPY common /root/go/src/gvisor.dev/gvisor/test/runtimes/common/common
+COPY go/proctor-go.go ${LANG_DIR}
 
 ENTRYPOINT ["/root/go/bin/go", "run", "proctor-go.go"]
diff --git a/test/runtimes/java/Dockerfile b/test/runtimes/java/Dockerfile
index e162d7218..b9132b575 100644
--- a/test/runtimes/java/Dockerfile
+++ b/test/runtimes/java/Dockerfile
@@ -1,25 +1,16 @@
 FROM ubuntu:bionic
 # This hash is associated with a specific JDK release and needed for ensuring
 # the same version is downloaded every time.
-ENV LANG_HASH=af47e0398606
-ENV LANG_VER=11u-dev
+ENV LANG_HASH=76072a077ee1
+ENV LANG_VER=11
 ENV LANG_NAME=Java
 
 RUN apt-get update && apt-get install -y \
   autoconf \
   build-essential \
-  curl\
-  file \
-  libasound2-dev \
-  libcups2-dev \
-  libfontconfig1-dev \
-  libx11-dev \
-  libxext-dev \
-  libxrandr-dev \
-  libxrender-dev \
-  libxt-dev \
-  libxtst-dev \
+  curl \
   make \
+  openjdk-${LANG_VER}-jdk \
   unzip \
   zip
 
@@ -27,26 +18,18 @@ WORKDIR /root
 RUN curl -o go.tar.gz https://dl.google.com/go/go1.12.6.linux-amd64.tar.gz
 RUN tar -zxf go.tar.gz
 
-# Use curl instead of ADD to prevent redownload every time.
-RUN curl -o jdk.tar.gz http://hg.openjdk.java.net/jdk-updates/jdk${LANG_VER}/archive/${LANG_HASH}.tar.gz
-# Download Java version N-1 to be used as the Boot JDK to build Java version N.
-RUN curl -o bootjdk.tar.gz https://download.java.net/openjdk/jdk10/ri/openjdk-10+44_linux-x64_bin_ri.tar.gz
-
-RUN tar -zxf jdk.tar.gz
-RUN tar -zxf bootjdk.tar.gz
-
-# Specify the JDK to be used by jtreg.
-ENV JT_JAVA=/root/jdk${LANG_VER}-${LANG_HASH}/build/linux-x86_64-normal-server-release/jdk
-ENV LANG_DIR=/root/jdk${LANG_VER}-${LANG_HASH}
-
-WORKDIR ${LANG_DIR}
+# Download the JDK test library.
+RUN set -ex \
+ && curl -fsSL --retry 10 -o /tmp/jdktests.tar.gz http://hg.openjdk.java.net/jdk/jdk${LANG_VER}/archive/${LANG_HASH}.tar.gz/test \
+ && tar -xzf /tmp/jdktests.tar.gz -C /root \
+ && rm -f /tmp/jdktests.tar.gz
 
 RUN curl -o jtreg.tar.gz https://ci.adoptopenjdk.net/view/Dependencies/job/jtreg/lastSuccessfulBuild/artifact/jtreg-4.2.0-tip.tar.gz
 RUN tar -xzf jtreg.tar.gz
-RUN bash configure --with-boot-jdk=/root/jdk-10 --with-jtreg=${LANG_DIR}/jtreg
-RUN make clean
-RUN make images
 
-COPY proctor-java.go ${LANG_DIR}
+ENV LANG_DIR=/root
+
+COPY common /root/go/src/gvisor.dev/gvisor/test/runtimes/common/common
+COPY java/proctor-java.go ${LANG_DIR}
 
 ENTRYPOINT ["/root/go/bin/go", "run", "proctor-java.go"]
diff --git a/test/runtimes/java/proctor-java.go b/test/runtimes/java/proctor-java.go
index c1e611b4b..7f6a66f4f 100644
--- a/test/runtimes/java/proctor-java.go
+++ b/test/runtimes/java/proctor-java.go
@@ -29,6 +29,7 @@ import (
 
 var (
 	dir      = os.Getenv("LANG_DIR")
+	hash     = os.Getenv("LANG_HASH")
 	jtreg    = filepath.Join(dir, "jtreg/bin/jtreg")
 	exclDirs = regexp.MustCompile(`(^(sun\/security)|(java\/util\/stream)|(java\/time)| )`)
 )
@@ -44,7 +45,7 @@ func main() {
 
 func (j javaRunner) ListTests() ([]string, error) {
 	args := []string{
-		"-dir:test/jdk",
+		"-dir:/root/jdk11-" + hash + "/test/jdk",
 		"-ignore:quiet",
 		"-a",
 		"-listtests",
@@ -69,7 +70,7 @@ func (j javaRunner) ListTests() ([]string, error) {
 }
 
 func (j javaRunner) RunTest(test string) error {
-	args := []string{"-dir:test/jdk/", test}
+	args := []string{"-noreport", "-dir:/root/jdk11-" + hash + "/test/jdk", test}
 	cmd := exec.Command(jtreg, args...)
 	cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr
 	if err := cmd.Run(); err != nil {
diff --git a/test/runtimes/nodejs/Dockerfile b/test/runtimes/nodejs/Dockerfile
index b2416cce8..aba30d2ee 100644
--- a/test/runtimes/nodejs/Dockerfile
+++ b/test/runtimes/nodejs/Dockerfile
@@ -22,7 +22,8 @@ RUN ./configure
 RUN make
 RUN make test-build
 
-COPY proctor-nodejs.go ${LANG_DIR}
+COPY common /root/go/src/gvisor.dev/gvisor/test/runtimes/common/common
+COPY nodejs/proctor-nodejs.go ${LANG_DIR}
 
 # Including dumb-init emulates the Linux "init" process, preventing the failure
 # of tests involving worker processes.
diff --git a/test/runtimes/php/Dockerfile b/test/runtimes/php/Dockerfile
index 1f8959b50..491ab902d 100644
--- a/test/runtimes/php/Dockerfile
+++ b/test/runtimes/php/Dockerfile
@@ -24,6 +24,7 @@ WORKDIR ${LANG_DIR}
 RUN ./configure
 RUN make
 
-COPY proctor-php.go ${LANG_DIR}
+COPY common /root/go/src/gvisor.dev/gvisor/test/runtimes/common/common
+COPY php/proctor-php.go ${LANG_DIR}
 
 ENTRYPOINT ["/root/go/bin/go", "run", "proctor-php.go"]
diff --git a/test/runtimes/python/Dockerfile b/test/runtimes/python/Dockerfile
index 811f48f8a..710daee43 100644
--- a/test/runtimes/python/Dockerfile
+++ b/test/runtimes/python/Dockerfile
@@ -26,6 +26,7 @@ WORKDIR ${LANG_DIR}
 RUN ./configure --with-pydebug
 RUN make -s -j2
 
-COPY proctor-python.go ${LANG_DIR}
+COPY common /root/go/src/gvisor.dev/gvisor/test/runtimes/common/common
+COPY python/proctor-python.go ${LANG_DIR}
 
 ENTRYPOINT ["/root/go/bin/go", "run", "proctor-python.go"]
diff --git a/test/runtimes/runtimes_test.go b/test/runtimes/runtimes_test.go
index 6bf954e78..43dd6f5b7 100644
--- a/test/runtimes/runtimes_test.go
+++ b/test/runtimes/runtimes_test.go
@@ -22,8 +22,16 @@ import (
 	"gvisor.dev/gvisor/runsc/test/testutil"
 )
 
-func TestNodeJS(t *testing.T) {
-	const img = "gcr.io/gvisor-proctor/nodejs"
+// Wait time for each test to run.
+const timeout = 180 * time.Second
+
+// Helper function to execute the docker container associated with the
+// language passed.  Captures the output of the list function and executes
+// each test individually, supplying any errors recieved.
+func testLang(t *testing.T, lang string) {
+	t.Helper()
+
+	img := "gcr.io/gvisor-presubmit/" + lang
 	if err := testutil.Pull(img); err != nil {
 		t.Fatalf("docker pull failed: %v", err)
 	}
@@ -49,7 +57,7 @@ func TestNodeJS(t *testing.T) {
 			}
 			defer d.CleanUp()
 
-			status, err := d.Wait(60 * time.Second)
+			status, err := d.Wait(timeout)
 			if err != nil {
 				t.Fatalf("docker test %q failed to wait: %v", tc, err)
 			}
@@ -65,3 +73,23 @@ func TestNodeJS(t *testing.T) {
 		})
 	}
 }
+
+func TestGo(t *testing.T) {
+	testLang(t, "go")
+}
+
+func TestJava(t *testing.T) {
+	testLang(t, "java")
+}
+
+func TestNodejs(t *testing.T) {
+	testLang(t, "nodejs")
+}
+
+func TestPhp(t *testing.T) {
+	testLang(t, "php")
+}
+
+func TestPython(t *testing.T) {
+	testLang(t, "python")
+}
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index a3e43cad2..aa1e33fb4 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -440,7 +440,9 @@ syscall_test(
 )
 
 syscall_test(
-    size = "medium",
+    size = "large",
+    parallel = False,
+    shard_count = 10,
     test = "//test/syscalls/linux:socket_inet_loopback_test",
 )
 
diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc
index df31d25b5..322ee07ad 100644
--- a/test/syscalls/linux/socket_inet_loopback.cc
+++ b/test/syscalls/linux/socket_inet_loopback.cc
@@ -145,6 +145,67 @@ TEST_P(SocketInetLoopbackTest, TCP) {
   ASSERT_THAT(shutdown(conn_fd.get(), SHUT_RDWR), SyscallSucceeds());
 }
 
+TEST_P(SocketInetLoopbackTest, TCPListenClose) {
+  auto const& param = GetParam();
+
+  TestAddress const& listener = param.listener;
+  TestAddress const& connector = param.connector;
+
+  // Create the listening socket.
+  FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(listener.family(), SOCK_STREAM, IPPROTO_TCP));
+  sockaddr_storage listen_addr = listener.addr;
+  ASSERT_THAT(bind(listen_fd.get(), reinterpret_cast<sockaddr*>(&listen_addr),
+                   listener.addr_len),
+              SyscallSucceeds());
+  ASSERT_THAT(listen(listen_fd.get(), 1001), SyscallSucceeds());
+
+  // Get the port bound by the listening socket.
+  socklen_t addrlen = listener.addr_len;
+  ASSERT_THAT(getsockname(listen_fd.get(),
+                          reinterpret_cast<sockaddr*>(&listen_addr), &addrlen),
+              SyscallSucceeds());
+  uint16_t const port =
+      ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener.family(), listen_addr));
+
+  DisableSave ds;  // Too many system calls.
+  sockaddr_storage conn_addr = connector.addr;
+  ASSERT_NO_ERRNO(SetAddrPort(connector.family(), &conn_addr, port));
+  constexpr int kFDs = 2048;
+  constexpr int kThreadCount = 4;
+  constexpr int kFDsPerThread = kFDs / kThreadCount;
+  FileDescriptor clients[kFDs];
+  std::unique_ptr<ScopedThread> threads[kThreadCount];
+  for (int i = 0; i < kFDs; i++) {
+    clients[i] = ASSERT_NO_ERRNO_AND_VALUE(
+        Socket(connector.family(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
+  }
+  for (int i = 0; i < kThreadCount; i++) {
+    threads[i] = absl::make_unique<ScopedThread>([&connector, &conn_addr,
+                                                  &clients, i]() {
+      for (int j = 0; j < kFDsPerThread; j++) {
+        int k = i * kFDsPerThread + j;
+        int ret =
+            connect(clients[k].get(), reinterpret_cast<sockaddr*>(&conn_addr),
+                    connector.addr_len);
+        if (ret != 0) {
+          EXPECT_THAT(ret, SyscallFailsWithErrno(EINPROGRESS));
+        }
+      }
+    });
+  }
+  for (int i = 0; i < kThreadCount; i++) {
+    threads[i]->Join();
+  }
+  for (int i = 0; i < 32; i++) {
+    auto accepted =
+        ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr));
+  }
+  // TODO(b/138400178): Fix cooperative S/R failure when ds.reset() is invoked
+  // before function end.
+  // ds.reset()
+}
+
 TEST_P(SocketInetLoopbackTest, TCPbacklog) {
   auto const& param = GetParam();
 
diff --git a/test/syscalls/linux/splice.cc b/test/syscalls/linux/splice.cc
index 1875f4533..e25f264f6 100644
--- a/test/syscalls/linux/splice.cc
+++ b/test/syscalls/linux/splice.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <fcntl.h>
+#include <sys/eventfd.h>
 #include <sys/sendfile.h>
 #include <unistd.h>
 
@@ -135,6 +136,80 @@ TEST(SpliceTest, PipeOffsets) {
               SyscallFailsWithErrno(ESPIPE));
 }
 
+// Event FDs may be used with splice without an offset.
+TEST(SpliceTest, FromEventFD) {
+  // Open the input eventfd with an initial value so that it is readable.
+  constexpr uint64_t kEventFDValue = 1;
+  int efd;
+  ASSERT_THAT(efd = eventfd(kEventFDValue, 0), SyscallSucceeds());
+  const FileDescriptor inf(efd);
+
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Splice 8-byte eventfd value to pipe.
+  constexpr int kEventFDSize = 8;
+  EXPECT_THAT(splice(inf.get(), nullptr, wfd.get(), nullptr, kEventFDSize, 0),
+              SyscallSucceedsWithValue(kEventFDSize));
+
+  // Contents should be equal.
+  std::vector<char> rbuf(kEventFDSize);
+  ASSERT_THAT(read(rfd.get(), rbuf.data(), rbuf.size()),
+              SyscallSucceedsWithValue(kEventFDSize));
+  EXPECT_EQ(memcmp(rbuf.data(), &kEventFDValue, rbuf.size()), 0);
+}
+
+// Event FDs may not be used with splice with an offset.
+TEST(SpliceTest, FromEventFDOffset) {
+  int efd;
+  ASSERT_THAT(efd = eventfd(0, 0), SyscallSucceeds());
+  const FileDescriptor inf(efd);
+
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Attempt to splice 8-byte eventfd value to pipe with offset.
+  //
+  // This is not allowed because eventfd doesn't support pread.
+  constexpr int kEventFDSize = 8;
+  loff_t in_off = 0;
+  EXPECT_THAT(splice(inf.get(), &in_off, wfd.get(), nullptr, kEventFDSize, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
+// Event FDs may not be used with splice with an offset.
+TEST(SpliceTest, ToEventFDOffset) {
+  // Create a new pipe.
+  int fds[2];
+  ASSERT_THAT(pipe(fds), SyscallSucceeds());
+  const FileDescriptor rfd(fds[0]);
+  const FileDescriptor wfd(fds[1]);
+
+  // Fill with a value.
+  constexpr int kEventFDSize = 8;
+  std::vector<char> buf(kEventFDSize);
+  buf[0] = 1;
+  ASSERT_THAT(write(wfd.get(), buf.data(), buf.size()),
+              SyscallSucceedsWithValue(kEventFDSize));
+
+  int efd;
+  ASSERT_THAT(efd = eventfd(0, 0), SyscallSucceeds());
+  const FileDescriptor outf(efd);
+
+  // Attempt to splice 8-byte eventfd value to pipe with offset.
+  //
+  // This is not allowed because eventfd doesn't support pwrite.
+  loff_t out_off = 0;
+  EXPECT_THAT(splice(rfd.get(), nullptr, outf.get(), &out_off, kEventFDSize, 0),
+              SyscallFailsWithErrno(EINVAL));
+}
+
 TEST(SpliceTest, ToPipe) {
   // Open the input file.
   const TempPath in_file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile());