6 files changed, 398 insertions, 100 deletions
diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go
index 5c57f6fea..02540a754 100644
--- a/pkg/sentry/fsimpl/gofer/handle.go
+++ b/pkg/sentry/fsimpl/gofer/handle.go
@@ -20,6 +20,7 @@ import (
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/hostfd"
+	"gvisor.dev/gvisor/pkg/sync"
 )
 
 // handle represents a remote "open file descriptor", consisting of an opened
@@ -130,3 +131,43 @@ func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, o
 	}
 	return uint64(n), cperr
 }
+
+type handleReadWriter struct {
+	ctx context.Context
+	h   *handle
+	off uint64
+}
+
+var handleReadWriterPool = sync.Pool{
+	New: func() interface{} {
+		return &handleReadWriter{}
+	},
+}
+
+func getHandleReadWriter(ctx context.Context, h *handle, offset int64) *handleReadWriter {
+	rw := handleReadWriterPool.Get().(*handleReadWriter)
+	rw.ctx = ctx
+	rw.h = h
+	rw.off = uint64(offset)
+	return rw
+}
+
+func putHandleReadWriter(rw *handleReadWriter) {
+	rw.ctx = nil
+	rw.h = nil
+	handleReadWriterPool.Put(rw)
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks.
+func (rw *handleReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+	n, err := rw.h.readToBlocksAt(rw.ctx, dsts, rw.off)
+	rw.off += n
+	return n, err
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+func (rw *handleReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+	n, err := rw.h.writeFromBlocksAt(rw.ctx, srcs, rw.off)
+	rw.off += n
+	return n, err
+}
diff --git a/pkg/sentry/fsimpl/gofer/save_restore.go b/pkg/sentry/fsimpl/gofer/save_restore.go
index e67422a2f..8dcbc61ed 100644
--- a/pkg/sentry/fsimpl/gofer/save_restore.go
+++ b/pkg/sentry/fsimpl/gofer/save_restore.go
@@ -158,6 +158,10 @@ func (d *dentryPlatformFile) afterLoad() {
 // afterLoad is invoked by stateify.
 func (fd *specialFileFD) afterLoad() {
 	fd.handle.fd = -1
+	if fd.hostFileMapper.IsInited() {
+		// Ensure that we don't call fd.hostFileMapper.Init() again.
+		fd.hostFileMapperInitOnce.Do(func() {})
+	}
 }
 
 // CompleteRestore implements
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index 144a1045e..a8d47b65b 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -22,10 +22,13 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/fdnotifier"
+	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/metric"
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/safemem"
+	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
 	"gvisor.dev/gvisor/pkg/sentry/fsmetric"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -75,6 +78,16 @@ type specialFileFD struct {
 	bufMu   sync.Mutex `state:"nosave"`
 	haveBuf uint32
 	buf     []byte
+
+	// If handle.fd >= 0, hostFileMapper caches mappings of handle.fd, and
+	// hostFileMapperInitOnce is used to initialize it on first use.
+	hostFileMapperInitOnce sync.Once `state:"nosave"`
+	hostFileMapper         fsutil.HostFileMapper
+
+	// If handle.fd >= 0, fileRefs counts references on memmap.File offsets.
+	// fileRefs is protected by fileRefsMu.
+	fileRefsMu sync.Mutex `state:"nosave"`
+	fileRefs   fsutil.FrameRefSet
 }
 
 func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) {
@@ -229,23 +242,13 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
 		}
 	}
 
-	// Going through dst.CopyOutFrom() would hold MM locks around file
-	// operations of unknown duration. For regularFileFD, doing so is necessary
-	// to support mmap due to lock ordering; MM locks precede dentry.dataMu.
-	// That doesn't hold here since specialFileFD doesn't client-cache data.
-	// Just buffer the read instead.
-	buf := make([]byte, dst.NumBytes())
-	n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
+	rw := getHandleReadWriter(ctx, &fd.handle, offset)
+	n, err := dst.CopyOutFrom(ctx, rw)
+	putHandleReadWriter(rw)
 	if linuxerr.Equals(linuxerr.EAGAIN, err) {
 		err = linuxerr.ErrWouldBlock
 	}
-	if n == 0 {
-		return bufN, err
-	}
-	if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil {
-		return bufN + int64(cp), cperr
-	}
-	return bufN + int64(n), err
+	return bufN + n, err
 }
 
 // Read implements vfs.FileDescriptionImpl.Read.
@@ -316,20 +319,15 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
 		}
 	}
 
-	// Do a buffered write. See rationale in PRead.
-	buf := make([]byte, src.NumBytes())
-	copied, copyErr := src.CopyIn(ctx, buf)
-	if copied == 0 && copyErr != nil {
-		// Only return the error if we didn't get any data.
-		return 0, offset, copyErr
-	}
-	n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:copied])), uint64(offset))
+	rw := getHandleReadWriter(ctx, &fd.handle, offset)
+	n, err := src.CopyInTo(ctx, rw)
+	putHandleReadWriter(rw)
 	if linuxerr.Equals(linuxerr.EAGAIN, err) {
 		err = linuxerr.ErrWouldBlock
 	}
 	// Update offset if the offset is valid.
 	if offset >= 0 {
-		offset += int64(n)
+		offset += n
 	}
 	// Update file size for regular files.
 	if fd.isRegularFile {
@@ -340,10 +338,7 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
 			atomic.StoreUint64(&d.size, uint64(offset))
 		}
 	}
-	if err != nil {
-		return int64(n), offset, err
-	}
-	return int64(n), offset, copyErr
+	return int64(n), offset, err
 }
 
 // Write implements vfs.FileDescriptionImpl.Write.
@@ -411,3 +406,85 @@ func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error
 	}
 	return nil
 }
+
+// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+func (fd *specialFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+	if fd.handle.fd < 0 || fd.filesystem().opts.forcePageCache {
+		return linuxerr.ENODEV
+	}
+	// After this point, fd may be used as a memmap.Mappable and memmap.File.
+	fd.hostFileMapperInitOnce.Do(fd.hostFileMapper.Init)
+	return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (fd *specialFileFD) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
+	fd.hostFileMapper.IncRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
+	return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (fd *specialFileFD) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
+	fd.hostFileMapper.DecRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (fd *specialFileFD) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
+	return fd.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (fd *specialFileFD) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
+	mr := optional
+	if fd.filesystem().opts.limitHostFDTranslation {
+		mr = maxFillRange(required, optional)
+	}
+	return []memmap.Translation{
+		{
+			Source: mr,
+			File:   fd,
+			Offset: mr.Start,
+			Perms:  hostarch.AnyAccess,
+		},
+	}, nil
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (fd *specialFileFD) InvalidateUnsavable(ctx context.Context) error {
+	return nil
+}
+
+// IncRef implements memmap.File.IncRef.
+func (fd *specialFileFD) IncRef(fr memmap.FileRange) {
+	fd.fileRefsMu.Lock()
+	defer fd.fileRefsMu.Unlock()
+	fd.fileRefs.IncRefAndAccount(fr)
+}
+
+// DecRef implements memmap.File.DecRef.
+func (fd *specialFileFD) DecRef(fr memmap.FileRange) {
+	fd.fileRefsMu.Lock()
+	defer fd.fileRefsMu.Unlock()
+	fd.fileRefs.DecRefAndAccount(fr)
+}
+
+// MapInternal implements memmap.File.MapInternal.
+func (fd *specialFileFD) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
+	fd.requireHostFD()
+	return fd.hostFileMapper.MapInternal(fr, int(fd.handle.fd), at.Write)
+}
+
+// FD implements memmap.File.FD.
+func (fd *specialFileFD) FD() int {
+	fd.requireHostFD()
+	return int(fd.handle.fd)
+}
+
+func (fd *specialFileFD) requireHostFD() {
+	if fd.handle.fd < 0 {
+		// This is possible if fd was successfully mmapped before saving, then
+		// was restored without a host FD. This is unrecoverable: without a
+		// host FD, we can't mmap this file post-restore.
+		panic("gofer.specialFileFD can no longer be memory-mapped without a host FD")
+	}
+}
diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD
index 5955948f0..c12abdf33 100644
--- a/pkg/sentry/fsimpl/verity/BUILD
+++ b/pkg/sentry/fsimpl/verity/BUILD
@@ -1,10 +1,24 @@
 load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 licenses(["notice"])
 
+go_template_instance(
+    name = "dentry_list",
+    out = "dentry_list.go",
+    package = "verity",
+    prefix = "dentry",
+    template = "//pkg/ilist:generic_list",
+    types = {
+        "Element": "*dentry",
+        "Linker": "*dentry",
+    },
+)
+
 go_library(
     name = "verity",
     srcs = [
+        "dentry_list.go",
         "filesystem.go",
         "save_restore.go",
         "verity.go",
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
index e147d6b07..52d47994d 100644
--- a/pkg/sentry/fsimpl/verity/filesystem.go
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -66,40 +66,23 @@ func putDentrySlice(ds *[]*dentry) {
 	dentrySlicePool.Put(ds)
 }
 
-// renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls
-// dentry.checkDropLocked on all dentries in *ds with fs.renameMu locked for
+// renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls
+// dentry.checkCachingLocked on all dentries in *ds with fs.renameMu locked for
 // writing.
 //
 // ds is a pointer-to-pointer since defer evaluates its arguments immediately,
 // but dentry slices are allocated lazily, and it's much easier to say "defer
-// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
-// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
+// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
+// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
 // +checklocksrelease:fs.renameMu
-func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
+func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
 	fs.renameMu.RUnlock()
 	if *ds == nil {
 		return
 	}
-	if len(**ds) != 0 {
-		fs.renameMu.Lock()
-		for _, d := range **ds {
-			d.checkDropLocked(ctx)
-		}
-		fs.renameMu.Unlock()
-	}
-	putDentrySlice(*ds)
-}
-
-// +checklocksrelease:fs.renameMu
-func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
-	if *ds == nil {
-		fs.renameMu.Unlock()
-		return
-	}
 	for _, d := range **ds {
-		d.checkDropLocked(ctx)
+		d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
 	}
-	fs.renameMu.Unlock()
 	putDentrySlice(*ds)
 }
 
@@ -700,7 +683,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds
 	}
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return err
@@ -712,7 +695,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds
 func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return nil, err
@@ -733,7 +716,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
 func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	start := rp.Start().Impl().(*dentry)
 	d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
 	if err != nil {
@@ -770,7 +753,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 
 	start := rp.Start().Impl().(*dentry)
 	if rp.Done() {
@@ -952,7 +935,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return "", err
@@ -982,7 +965,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
 func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return linux.Statx{}, err
@@ -1028,7 +1011,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	if _, err := fs.resolveLocked(ctx, rp, &ds); err != nil {
 		return nil, err
 	}
@@ -1039,7 +1022,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
 func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return nil, err
@@ -1055,7 +1038,7 @@ func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
 func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return "", err
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index 23841ecf7..d2526263c 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -23,10 +23,12 @@
 // Lock order:
 //
 // filesystem.renameMu
-//   dentry.dirMu
-//     fileDescription.mu
-//       filesystem.verityMu
-//         dentry.hashMu
+//   dentry.cachingMu
+//     filesystem.cacheMu
+//       dentry.dirMu
+//         fileDescription.mu
+//           filesystem.verityMu
+//             dentry.hashMu
 //
 // Locking dentry.dirMu in multiple dentries requires that parent dentries are
 // locked before child dentries, and that filesystem.renameMu is locked to
@@ -96,6 +98,9 @@ const (
 	// sizeOfStringInt32 is the size for a 32 bit integer stored as string in
 	// extended attributes. The maximum value of a 32 bit integer has 10 digits.
 	sizeOfStringInt32 = 10
+
+	// defaultMaxCachedDentries is the default limit of dentry cache.
+	defaultMaxCachedDentries = uint64(1000)
 )
 
 var (
@@ -106,9 +111,10 @@ var (
 
 // Mount option names for verityfs.
 const (
-	moptLowerPath = "lower_path"
-	moptRootHash  = "root_hash"
-	moptRootName  = "root_name"
+	moptLowerPath        = "lower_path"
+	moptRootHash         = "root_hash"
+	moptRootName         = "root_name"
+	moptDentryCacheLimit = "dentry_cache_limit"
 )
 
 // HashAlgorithm is a type specifying the algorithm used to hash the file
@@ -188,6 +194,17 @@ type filesystem struct {
 	// dentries.
 	renameMu sync.RWMutex `state:"nosave"`
 
+	// cachedDentries contains all dentries with 0 references. (Due to race
+	// conditions, it may also contain dentries with non-zero references.)
+	// cachedDentriesLen is the number of dentries in cachedDentries. These
+	// fields are protected by cacheMu.
+	cacheMu           sync.Mutex `state:"nosave"`
+	cachedDentries    dentryList
+	cachedDentriesLen uint64
+
+	// maxCachedDentries is the maximum size of filesystem.cachedDentries.
+	maxCachedDentries uint64
+
 	// verityMu synchronizes enabling verity files, protects files or
 	// directories from being enabled by different threads simultaneously.
 	// It also ensures that verity does not access files that are being
@@ -198,6 +215,10 @@ type filesystem struct {
 	// is for the whole file system to ensure that no more than one file is
 	// enabled the same time.
 	verityMu sync.RWMutex `state:"nosave"`
+
+	// released is nonzero once filesystem.Release has been called. It is accessed
+	// with atomic memory operations.
+	released int32
 }
 
 // InternalFilesystemOptions may be passed as
@@ -266,6 +287,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 		delete(mopts, moptRootName)
 		rootName = root
 	}
+	maxCachedDentries := defaultMaxCachedDentries
+	if str, ok := mopts[moptDentryCacheLimit]; ok {
+		delete(mopts, moptDentryCacheLimit)
+		maxCD, err := strconv.ParseUint(str, 10, 64)
+		if err != nil {
+			ctx.Warningf("verity.FilesystemType.GetFilesystem: invalid dentry cache limit: %s=%s", moptDentryCacheLimit, str)
+			return nil, nil, linuxerr.EINVAL
+		}
+		maxCachedDentries = maxCD
+	}
 
 	// Check for unparsed options.
 	if len(mopts) != 0 {
@@ -339,12 +370,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 		action:             iopts.Action,
 		opts:               opts.Data,
 		allowRuntimeEnable: iopts.AllowRuntimeEnable,
+		maxCachedDentries:  maxCachedDentries,
 	}
 	fs.vfsfs.Init(vfsObj, &fstype, fs)
 
 	// Construct the root dentry.
 	d := fs.newDentry()
-	d.refs = 1
+	// Set the root's reference count to 2. One reference is returned to
+	// the caller, and the other is held by fs to prevent the root from
+	// being "cached" and subsequently evicted.
+	d.refs = 2
 	lowerVD := vfs.MakeVirtualDentry(lowerMount, lowerMount.Root())
 	lowerVD.IncRef()
 	d.lowerVD = lowerVD
@@ -519,7 +554,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 
 // Release implements vfs.FilesystemImpl.Release.
 func (fs *filesystem) Release(ctx context.Context) {
+	atomic.StoreInt32(&fs.released, 1)
 	fs.lowerMount.DecRef(ctx)
+
+	fs.renameMu.Lock()
+	fs.evictAllCachedDentriesLocked(ctx)
+	fs.renameMu.Unlock()
+
+	// An extra reference was held by the filesystem on the root to prevent
+	// it from being cached/evicted.
+	fs.rootDentry.DecRef(ctx)
 }
 
 // MountOptions implements vfs.FilesystemImpl.MountOptions.
@@ -533,6 +577,11 @@ func (fs *filesystem) MountOptions() string {
 type dentry struct {
 	vfsd vfs.Dentry
 
+	// refs is the reference count. Each dentry holds a reference on its
+	// parent, even if disowned. When refs reaches 0, the dentry may be
+	// added to the cache or destroyed. If refs == -1, the dentry has
+	// already been destroyed. refs is accessed using atomic memory
+	// operations.
 	refs int64
 
 	// fs is the owning filesystem. fs is immutable.
@@ -587,13 +636,23 @@ type dentry struct {
 	// is protected by hashMu.
 	hashMu sync.RWMutex `state:"nosave"`
 	hash   []byte
+
+	// cachingMu is used to synchronize concurrent dentry caching attempts on
+	// this dentry.
+	cachingMu sync.Mutex `state:"nosave"`
+
+	// If cached is true, dentryEntry links dentry into
+	// filesystem.cachedDentries. cached and dentryEntry are protected by
+	// cachingMu.
+	cached bool
+	dentryEntry
 }
 
 // newDentry creates a new dentry representing the given verity file. The
-// dentry initially has no references; it is the caller's responsibility to set
-// the dentry's reference count and/or call dentry.destroy() as appropriate.
-// The dentry is initially invalid in that it contains no underlying dentry;
-// the caller is responsible for setting them.
+// dentry initially has no references, but is not cached; it is the caller's
+// responsibility to set the dentry's reference count and/or call
+// dentry.destroy() as appropriate. The dentry is initially invalid in that it
+// contains no underlying dentry; the caller is responsible for setting them.
 func (fs *filesystem) newDentry() *dentry {
 	d := &dentry{
 		fs: fs,
@@ -629,42 +688,23 @@ func (d *dentry) TryIncRef() bool {
 
 // DecRef implements vfs.DentryImpl.DecRef.
 func (d *dentry) DecRef(ctx context.Context) {
-	r := atomic.AddInt64(&d.refs, -1)
-	if d.LogRefs() {
-		refsvfs2.LogDecRef(d, r)
-	}
-	if r == 0 {
-		d.fs.renameMu.Lock()
-		d.checkDropLocked(ctx)
-		d.fs.renameMu.Unlock()
-	} else if r < 0 {
-		panic("verity.dentry.DecRef() called without holding a reference")
+	if d.decRefNoCaching() == 0 {
+		d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
 	}
 }
 
-func (d *dentry) decRefLocked(ctx context.Context) {
+// decRefNoCaching decrements d's reference count without calling
+// d.checkCachingLocked, even if d's reference count reaches 0; callers are
+// responsible for ensuring that d.checkCachingLocked will be called later.
+func (d *dentry) decRefNoCaching() int64 {
 	r := atomic.AddInt64(&d.refs, -1)
 	if d.LogRefs() {
 		refsvfs2.LogDecRef(d, r)
 	}
-	if r == 0 {
-		d.checkDropLocked(ctx)
-	} else if r < 0 {
-		panic("verity.dentry.decRefLocked() called without holding a reference")
+	if r < 0 {
+		panic("verity.dentry.decRefNoCaching() called without holding a reference")
 	}
-}
-
-// checkDropLocked should be called after d's reference count becomes 0 or it
-// becomes deleted.
-func (d *dentry) checkDropLocked(ctx context.Context) {
-	// Dentries with a positive reference count must be retained. Dentries
-	// with a negative reference count have already been destroyed.
-	if atomic.LoadInt64(&d.refs) != 0 {
-		return
-	}
-	// Refs is still zero; destroy it.
-	d.destroyLocked(ctx)
-	return
+	return r
 }
 
 // destroyLocked destroys the dentry.
@@ -683,6 +723,12 @@ func (d *dentry) destroyLocked(ctx context.Context) {
 		panic("verity.dentry.destroyLocked() called with references on the dentry")
 	}
 
+	// Drop the reference held by d on its parent without recursively
+	// locking d.fs.renameMu.
+	if d.parent != nil && d.parent.decRefNoCaching() == 0 {
+		d.parent.checkCachingLocked(ctx, true /* renameMuWriteLocked */)
+	}
+
 	if d.lowerVD.Ok() {
 		d.lowerVD.DecRef(ctx)
 	}
@@ -695,7 +741,6 @@ func (d *dentry) destroyLocked(ctx context.Context) {
 			delete(d.parent.children, d.name)
 		}
 		d.parent.dirMu.Unlock()
-		d.parent.decRefLocked(ctx)
 	}
 	refsvfs2.Unregister(d)
 }
@@ -734,6 +779,140 @@ func (d *dentry) OnZeroWatches(context.Context) {
 	//TODO(b/159261227): Implement OnZeroWatches.
 }
 
+// checkCachingLocked should be called after d's reference count becomes 0 or
+// it becomes disowned.
+//
+// For performance, checkCachingLocked can also be called after d's reference
+// count becomes non-zero, so that d can be removed from the LRU cache. This
+// may help in reducing the size of the cache and hence reduce evictions. Note
+// that this is not necessary for correctness.
+//
+// It may be called on a destroyed dentry. For example,
+// renameMu[R]UnlockAndCheckCaching may call checkCachingLocked multiple times
+// for the same dentry when the dentry is visited more than once in the same
+// operation. One of the calls may destroy the dentry, so subsequent calls will
+// do nothing.
+//
+// Preconditions: d.fs.renameMu must be locked for writing if
+// renameMuWriteLocked is true; it may be temporarily unlocked.
+func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked bool) {
+	d.cachingMu.Lock()
+	refs := atomic.LoadInt64(&d.refs)
+	if refs == -1 {
+		// Dentry has already been destroyed.
+		d.cachingMu.Unlock()
+		return
+	}
+	if refs > 0 {
+		// fs.cachedDentries is permitted to contain dentries with non-zero refs,
+		// which are skipped by fs.evictCachedDentryLocked() upon reaching the end
+		// of the LRU. But it is still beneficial to remove d from the cache as we
+		// are already holding d.cachingMu. Keeping a cleaner cache also reduces
+		// the number of evictions (which is expensive as it acquires fs.renameMu).
+		d.removeFromCacheLocked()
+		d.cachingMu.Unlock()
+		return
+	}
+
+	if atomic.LoadInt32(&d.fs.released) != 0 {
+		d.cachingMu.Unlock()
+		if !renameMuWriteLocked {
+			// Need to lock d.fs.renameMu to access d.parent. Lock it for writing as
+			// needed by d.destroyLocked() later.
+			d.fs.renameMu.Lock()
+			defer d.fs.renameMu.Unlock()
+		}
+		if d.parent != nil {
+			d.parent.dirMu.Lock()
+			delete(d.parent.children, d.name)
+			d.parent.dirMu.Unlock()
+		}
+		d.destroyLocked(ctx) // +checklocksforce: see above.
+		return
+	}
+
+	d.fs.cacheMu.Lock()
+	// If d is already cached, just move it to the front of the LRU.
+	if d.cached {
+		d.fs.cachedDentries.Remove(d)
+		d.fs.cachedDentries.PushFront(d)
+		d.fs.cacheMu.Unlock()
+		d.cachingMu.Unlock()
+		return
+	}
+	// Cache the dentry, then evict the least recently used cached dentry if
+	// the cache becomes over-full.
+	d.fs.cachedDentries.PushFront(d)
+	d.fs.cachedDentriesLen++
+	d.cached = true
+	shouldEvict := d.fs.cachedDentriesLen > d.fs.maxCachedDentries
+	d.fs.cacheMu.Unlock()
+	d.cachingMu.Unlock()
+
+	if shouldEvict {
+		if !renameMuWriteLocked {
+			// Need to lock d.fs.renameMu for writing as needed by
+			// d.evictCachedDentryLocked().
+			d.fs.renameMu.Lock()
+			defer d.fs.renameMu.Unlock()
+		}
+		d.fs.evictCachedDentryLocked(ctx) // +checklocksforce: see above.
+	}
+}
+
+// Preconditions: d.cachingMu must be locked.
+func (d *dentry) removeFromCacheLocked() {
+	if d.cached {
+		d.fs.cacheMu.Lock()
+		d.fs.cachedDentries.Remove(d)
+		d.fs.cachedDentriesLen--
+		d.fs.cacheMu.Unlock()
+		d.cached = false
+	}
+}
+
+// Precondition: fs.renameMu must be locked for writing; it may be temporarily
+// unlocked.
+// +checklocks:fs.renameMu
+func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
+	for fs.cachedDentriesLen != 0 {
+		fs.evictCachedDentryLocked(ctx)
+	}
+}
+
+// Preconditions:
+// * fs.renameMu must be locked for writing; it may be temporarily unlocked.
+// +checklocks:fs.renameMu
+func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
+	fs.cacheMu.Lock()
+	victim := fs.cachedDentries.Back()
+	fs.cacheMu.Unlock()
+	if victim == nil {
+		// fs.cachedDentries may have become empty between when it was
+		// checked and when we locked fs.cacheMu.
+		return
+	}
+
+	victim.cachingMu.Lock()
+	victim.removeFromCacheLocked()
+	// victim.refs may have become non-zero from an earlier path resolution
+	// since it was inserted into fs.cachedDentries.
+	if atomic.LoadInt64(&victim.refs) != 0 {
+		victim.cachingMu.Unlock()
+		return
+	}
+	if victim.parent != nil {
+		victim.parent.dirMu.Lock()
+		// Note that victim can't be a mount point (in any mount
+		// namespace), since VFS holds references on mount points.
+		fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, &victim.vfsd)
+		delete(victim.parent.children, victim.name)
+		victim.parent.dirMu.Unlock()
+	}
+	victim.cachingMu.Unlock()
+	victim.destroyLocked(ctx) // +checklocksforce: owned as precondition, victim.fs == fs.
+}
+
 func (d *dentry) isSymlink() bool {
 	return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
 }