26 files changed, 854 insertions, 508 deletions
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index 642769e7c..c855608db 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -27,6 +27,39 @@ go_template_instance(
     },
 )
 
+go_template_instance(
+    name = "file_description_refs",
+    out = "file_description_refs.go",
+    package = "vfs",
+    prefix = "FileDescription",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "FileDescription",
+    },
+)
+
+go_template_instance(
+    name = "mount_namespace_refs",
+    out = "mount_namespace_refs.go",
+    package = "vfs",
+    prefix = "MountNamespace",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "MountNamespace",
+    },
+)
+
+go_template_instance(
+    name = "filesystem_refs",
+    out = "filesystem_refs.go",
+    package = "vfs",
+    prefix = "Filesystem",
+    template = "//pkg/refs_vfs2:refs_template",
+    types = {
+        "T": "Filesystem",
+    },
+)
+
 go_library(
     name = "vfs",
     srcs = [
@@ -40,12 +73,15 @@ go_library(
         "event_list.go",
         "file_description.go",
         "file_description_impl_util.go",
+        "file_description_refs.go",
         "filesystem.go",
         "filesystem_impl_util.go",
+        "filesystem_refs.go",
         "filesystem_type.go",
         "inotify.go",
         "lock.go",
         "mount.go",
+        "mount_namespace_refs.go",
         "mount_unsafe.go",
         "options.go",
         "pathname.go",
@@ -56,13 +92,13 @@ go_library(
     visibility = ["//pkg/sentry:internal"],
     deps = [
         "//pkg/abi/linux",
-        "//pkg/binary",
         "//pkg/context",
         "//pkg/fd",
         "//pkg/fdnotifier",
         "//pkg/fspath",
         "//pkg/gohacks",
         "//pkg/log",
+        "//pkg/refs",
         "//pkg/safemem",
         "//pkg/sentry/arch",
         "//pkg/sentry/fs",
diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md
index 4b9faf2ea..5aad31b78 100644
--- a/pkg/sentry/vfs/README.md
+++ b/pkg/sentry/vfs/README.md
@@ -184,12 +184,3 @@ This construction, which is essentially a type-safe analogue to Linux's
     -   File locking
 
     -   `O_ASYNC`
-
--   Reference counts in the `vfs` package do not use the `refs` package since
-    `refs.AtomicRefCount` adds 64 bytes of overhead to each 8-byte reference
-    count, resulting in considerable cache bloat. 24 bytes of this overhead is
-    for weak reference support, which have poor performance and will not be used
-    by VFS2. The remaining 40 bytes is to store a descriptive string and stack
-    trace for reference leak checking; we can support reference leak checking
-    without incurring this space overhead by including the applicable
-    information directly in finalizers for applicable types.
diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go
index 641e3e502..bdfd3ca8f 100644
--- a/pkg/sentry/vfs/anonfs.go
+++ b/pkg/sentry/vfs/anonfs.go
@@ -52,6 +52,8 @@ const (
 )
 
 // anonFilesystemType implements FilesystemType.
+//
+// +stateify savable
 type anonFilesystemType struct{}
 
 // GetFilesystem implements FilesystemType.GetFilesystem.
@@ -69,12 +71,15 @@ func (anonFilesystemType) Name() string {
 //
 // Since all Dentries in anonFilesystem are non-directories, all FilesystemImpl
 // methods that would require an anonDentry to be a directory return ENOTDIR.
+//
+// +stateify savable
 type anonFilesystem struct {
 	vfsfs Filesystem
 
 	devMinor uint32
 }
 
+// +stateify savable
 type anonDentry struct {
 	vfsd Dentry
 
@@ -82,7 +87,7 @@ type anonDentry struct {
 }
 
 // Release implements FilesystemImpl.Release.
-func (fs *anonFilesystem) Release() {
+func (fs *anonFilesystem) Release(ctx context.Context) {
 }
 
 // Sync implements FilesystemImpl.Sync.
@@ -245,32 +250,32 @@ func (fs *anonFilesystem) BoundEndpointAt(ctx context.Context, rp *ResolvingPath
 	return nil, syserror.ECONNREFUSED
 }
 
-// ListxattrAt implements FilesystemImpl.ListxattrAt.
-func (fs *anonFilesystem) ListxattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error) {
+// ListXattrAt implements FilesystemImpl.ListXattrAt.
+func (fs *anonFilesystem) ListXattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error) {
 	if !rp.Done() {
 		return nil, syserror.ENOTDIR
 	}
 	return nil, nil
 }
 
-// GetxattrAt implements FilesystemImpl.GetxattrAt.
-func (fs *anonFilesystem) GetxattrAt(ctx context.Context, rp *ResolvingPath, opts GetxattrOptions) (string, error) {
+// GetXattrAt implements FilesystemImpl.GetXattrAt.
+func (fs *anonFilesystem) GetXattrAt(ctx context.Context, rp *ResolvingPath, opts GetXattrOptions) (string, error) {
 	if !rp.Done() {
 		return "", syserror.ENOTDIR
 	}
 	return "", syserror.ENOTSUP
 }
 
-// SetxattrAt implements FilesystemImpl.SetxattrAt.
-func (fs *anonFilesystem) SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error {
+// SetXattrAt implements FilesystemImpl.SetXattrAt.
+func (fs *anonFilesystem) SetXattrAt(ctx context.Context, rp *ResolvingPath, opts SetXattrOptions) error {
 	if !rp.Done() {
 		return syserror.ENOTDIR
 	}
 	return syserror.EPERM
 }
 
-// RemovexattrAt implements FilesystemImpl.RemovexattrAt.
-func (fs *anonFilesystem) RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error {
+// RemoveXattrAt implements FilesystemImpl.RemoveXattrAt.
+func (fs *anonFilesystem) RemoveXattrAt(ctx context.Context, rp *ResolvingPath, name string) error {
 	if !rp.Done() {
 		return syserror.ENOTDIR
 	}
@@ -294,7 +299,7 @@ func (d *anonDentry) TryIncRef() bool {
 }
 
 // DecRef implements DentryImpl.DecRef.
-func (d *anonDentry) DecRef() {
+func (d *anonDentry) DecRef(ctx context.Context) {
 	// no-op
 }
 
@@ -303,7 +308,7 @@ func (d *anonDentry) DecRef() {
 // Although Linux technically supports inotify on pseudo filesystems (inotify
 // is implemented at the vfs layer), it is not particularly useful. It is left
 // unimplemented until someone actually needs it.
-func (d *anonDentry) InotifyWithParent(events, cookie uint32, et EventType) {}
+func (d *anonDentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) {}
 
 // Watches implements DentryImpl.Watches.
 func (d *anonDentry) Watches() *Watches {
@@ -311,4 +316,4 @@ func (d *anonDentry) Watches() *Watches {
 }
 
 // OnZeroWatches implements Dentry.OnZeroWatches.
-func (d *anonDentry) OnZeroWatches() {}
+func (d *anonDentry) OnZeroWatches(context.Context) {}
diff --git a/pkg/sentry/vfs/context.go b/pkg/sentry/vfs/context.go
index c9e724fef..97018651f 100644
--- a/pkg/sentry/vfs/context.go
+++ b/pkg/sentry/vfs/context.go
@@ -40,6 +40,30 @@ func MountNamespaceFromContext(ctx context.Context) *MountNamespace {
 	return nil
 }
 
+type mountNamespaceContext struct {
+	context.Context
+	mntns *MountNamespace
+}
+
+// Value implements Context.Value.
+func (mc mountNamespaceContext) Value(key interface{}) interface{} {
+	switch key {
+	case CtxMountNamespace:
+		mc.mntns.IncRef()
+		return mc.mntns
+	default:
+		return mc.Context.Value(key)
+	}
+}
+
+// WithMountNamespace returns a copy of ctx with the given MountNamespace.
+func WithMountNamespace(ctx context.Context, mntns *MountNamespace) context.Context {
+	return &mountNamespaceContext{
+		Context: ctx,
+		mntns:   mntns,
+	}
+}
+
 // RootFromContext returns the VFS root used by ctx. It takes a reference on
 // the returned VirtualDentry. If ctx does not have a specific VFS root,
 // RootFromContext returns a zero-value VirtualDentry.
diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go
index cea3e6955..320ab7ce1 100644
--- a/pkg/sentry/vfs/dentry.go
+++ b/pkg/sentry/vfs/dentry.go
@@ -17,6 +17,7 @@ package vfs
 import (
 	"sync/atomic"
 
+	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
 )
@@ -88,6 +89,8 @@ func (d *Dentry) Impl() DentryImpl {
 // DentryImpl contains implementation details for a Dentry. Implementations of
 // DentryImpl should contain their associated Dentry by value as their first
 // field.
+//
+// +stateify savable
 type DentryImpl interface {
 	// IncRef increments the Dentry's reference count. A Dentry with a non-zero
 	// reference count must remain coherent with the state of the filesystem.
@@ -102,7 +105,7 @@ type DentryImpl interface {
 	TryIncRef() bool
 
 	// DecRef decrements the Dentry's reference count.
-	DecRef()
+	DecRef(ctx context.Context)
 
 	// InotifyWithParent notifies all watches on the targets represented by this
 	// dentry and its parent. The parent's watches are notified first, followed
@@ -113,7 +116,7 @@ type DentryImpl interface {
 	//
 	// Note that the events may not actually propagate up to the user, depending
 	// on the event masks.
-	InotifyWithParent(events, cookie uint32, et EventType)
+	InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType)
 
 	// Watches returns the set of inotify watches for the file corresponding to
 	// the Dentry. Dentries that are hard links to the same underlying file
@@ -135,7 +138,7 @@ type DentryImpl interface {
 	// The caller does not need to hold a reference on the dentry. OnZeroWatches
 	// may acquire inotify locks, so to prevent deadlock, no inotify locks should
 	// be held by the caller.
-	OnZeroWatches()
+	OnZeroWatches(ctx context.Context)
 }
 
 // IncRef increments d's reference count.
@@ -150,8 +153,8 @@ func (d *Dentry) TryIncRef() bool {
 }
 
 // DecRef decrements d's reference count.
-func (d *Dentry) DecRef() {
-	d.impl.DecRef()
+func (d *Dentry) DecRef(ctx context.Context) {
+	d.impl.DecRef(ctx)
 }
 
 // IsDead returns true if d has been deleted or invalidated by its owning
@@ -168,8 +171,8 @@ func (d *Dentry) isMounted() bool {
 
 // InotifyWithParent notifies all watches on the targets represented by d and
 // its parent of events.
-func (d *Dentry) InotifyWithParent(events, cookie uint32, et EventType) {
-	d.impl.InotifyWithParent(events, cookie, et)
+func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) {
+	d.impl.InotifyWithParent(ctx, events, cookie, et)
 }
 
 // Watches returns the set of inotify watches associated with d.
@@ -182,8 +185,8 @@ func (d *Dentry) Watches() *Watches {
 
 // OnZeroWatches performs cleanup tasks whenever the number of watches on a
 // dentry drops to zero.
-func (d *Dentry) OnZeroWatches() {
-	d.impl.OnZeroWatches()
+func (d *Dentry) OnZeroWatches(ctx context.Context) {
+	d.impl.OnZeroWatches(ctx)
 }
 
 // The following functions are exported so that filesystem implementations can
@@ -214,11 +217,11 @@ func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) {
 
 // CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion
 // succeeds.
-func (vfs *VirtualFilesystem) CommitDeleteDentry(d *Dentry) {
+func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) {
 	d.dead = true
 	d.mu.Unlock()
 	if d.isMounted() {
-		vfs.forgetDeadMountpoint(d)
+		vfs.forgetDeadMountpoint(ctx, d)
 	}
 }
 
@@ -226,12 +229,12 @@ func (vfs *VirtualFilesystem) CommitDeleteDentry(d *Dentry) {
 // did for reasons outside of VFS' control (e.g. d represents the local state
 // of a file on a remote filesystem on which the file has already been
 // deleted).
-func (vfs *VirtualFilesystem) InvalidateDentry(d *Dentry) {
+func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) {
 	d.mu.Lock()
 	d.dead = true
 	d.mu.Unlock()
 	if d.isMounted() {
-		vfs.forgetDeadMountpoint(d)
+		vfs.forgetDeadMountpoint(ctx, d)
 	}
 }
 
@@ -241,8 +244,9 @@ func (vfs *VirtualFilesystem) InvalidateDentry(d *Dentry) {
 // caller must call AbortRenameDentry, CommitRenameReplaceDentry, or
 // CommitRenameExchangeDentry depending on the rename's outcome.
 //
-// Preconditions: If to is not nil, it must be a child Dentry from the same
-// Filesystem. from != to.
+// Preconditions:
+// * If to is not nil, it must be a child Dentry from the same Filesystem.
+// * from != to.
 func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
 	vfs.mountMu.Lock()
 	if mntns.mountpoints[from] != 0 {
@@ -278,13 +282,13 @@ func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) {
 // that was replaced by from.
 //
 // Preconditions: PrepareRenameDentry was previously called on from and to.
-func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(from, to *Dentry) {
+func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) {
 	from.mu.Unlock()
 	if to != nil {
 		to.dead = true
 		to.mu.Unlock()
 		if to.isMounted() {
-			vfs.forgetDeadMountpoint(to)
+			vfs.forgetDeadMountpoint(ctx, to)
 		}
 	}
 }
@@ -303,7 +307,7 @@ func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) {
 //
 // forgetDeadMountpoint is analogous to Linux's
 // fs/namespace.c:__detach_mounts().
-func (vfs *VirtualFilesystem) forgetDeadMountpoint(d *Dentry) {
+func (vfs *VirtualFilesystem) forgetDeadMountpoint(ctx context.Context, d *Dentry) {
 	var (
 		vdsToDecRef    []VirtualDentry
 		mountsToDecRef []*Mount
@@ -316,9 +320,9 @@ func (vfs *VirtualFilesystem) forgetDeadMountpoint(d *Dentry) {
 	vfs.mounts.seq.EndWrite()
 	vfs.mountMu.Unlock()
 	for _, vd := range vdsToDecRef {
-		vd.DecRef()
+		vd.DecRef(ctx)
 	}
 	for _, mnt := range mountsToDecRef {
-		mnt.DecRef()
+		mnt.DecRef(ctx)
 	}
 }
diff --git a/pkg/sentry/vfs/device.go b/pkg/sentry/vfs/device.go
index 1e9dffc8f..dde2ad79b 100644
--- a/pkg/sentry/vfs/device.go
+++ b/pkg/sentry/vfs/device.go
@@ -22,6 +22,8 @@ import (
 )
 
 // DeviceKind indicates whether a device is a block or character device.
+//
+// +stateify savable
 type DeviceKind uint32
 
 const (
@@ -44,6 +46,7 @@ func (kind DeviceKind) String() string {
 	}
 }
 
+// +stateify savable
 type devTuple struct {
 	kind  DeviceKind
 	major uint32
diff --git a/pkg/sentry/vfs/epoll.go b/pkg/sentry/vfs/epoll.go
index 599c3131c..8f36c3e3b 100644
--- a/pkg/sentry/vfs/epoll.go
+++ b/pkg/sentry/vfs/epoll.go
@@ -27,6 +27,8 @@ import (
 var epollCycleMu sync.Mutex
 
 // EpollInstance represents an epoll instance, as described by epoll(7).
+//
+// +stateify savable
 type EpollInstance struct {
 	vfsfd FileDescription
 	FileDescriptionDefaultImpl
@@ -38,11 +40,11 @@ type EpollInstance struct {
 
 	// interest is the set of file descriptors that are registered with the
 	// EpollInstance for monitoring. interest is protected by interestMu.
-	interestMu sync.Mutex
+	interestMu sync.Mutex `state:"nosave"`
 	interest   map[epollInterestKey]*epollInterest
 
 	// mu protects fields in registered epollInterests.
-	mu sync.Mutex
+	mu sync.Mutex `state:"nosave"`
 
 	// ready is the set of file descriptors that may be "ready" for I/O. Note
 	// that this must be an ordered list, not a map: "If more than maxevents
@@ -55,6 +57,7 @@ type EpollInstance struct {
 	ready epollInterestList
 }
 
+// +stateify savable
 type epollInterestKey struct {
 	// file is the registered FileDescription. No reference is held on file;
 	// instead, when the last reference is dropped, FileDescription.DecRef()
@@ -67,6 +70,8 @@ type epollInterestKey struct {
 }
 
 // epollInterest represents an EpollInstance's interest in a file descriptor.
+//
+// +stateify savable
 type epollInterest struct {
 	// epoll is the owning EpollInstance. epoll is immutable.
 	epoll *EpollInstance
@@ -93,9 +98,9 @@ type epollInterest struct {
 
 // NewEpollInstanceFD returns a FileDescription representing a new epoll
 // instance. A reference is taken on the returned FileDescription.
-func (vfs *VirtualFilesystem) NewEpollInstanceFD() (*FileDescription, error) {
+func (vfs *VirtualFilesystem) NewEpollInstanceFD(ctx context.Context) (*FileDescription, error) {
 	vd := vfs.NewAnonVirtualDentry("[eventpoll]")
-	defer vd.DecRef()
+	defer vd.DecRef(ctx)
 	ep := &EpollInstance{
 		interest: make(map[epollInterestKey]*epollInterest),
 	}
@@ -110,7 +115,7 @@ func (vfs *VirtualFilesystem) NewEpollInstanceFD() (*FileDescription, error) {
 }
 
 // Release implements FileDescriptionImpl.Release.
-func (ep *EpollInstance) Release() {
+func (ep *EpollInstance) Release(ctx context.Context) {
 	// Unregister all polled fds.
 	ep.interestMu.Lock()
 	defer ep.interestMu.Unlock()
@@ -186,7 +191,7 @@ func (ep *EpollInstance) AddInterest(file *FileDescription, num int32, event lin
 	}
 
 	// Register interest in file.
-	mask := event.Events | linux.EPOLLERR | linux.EPOLLRDHUP
+	mask := event.Events | linux.EPOLLERR | linux.EPOLLHUP
 	epi := &epollInterest{
 		epoll:    ep,
 		key:      key,
@@ -257,7 +262,7 @@ func (ep *EpollInstance) ModifyInterest(file *FileDescription, num int32, event
 	}
 
 	// Update epi for the next call to ep.ReadEvents().
-	mask := event.Events | linux.EPOLLERR | linux.EPOLLRDHUP
+	mask := event.Events | linux.EPOLLERR | linux.EPOLLHUP
 	ep.mu.Lock()
 	epi.mask = mask
 	epi.userData = event.Data
@@ -331,11 +336,9 @@ func (ep *EpollInstance) removeLocked(epi *epollInterest) {
 	ep.mu.Unlock()
 }
 
-// ReadEvents reads up to len(events) ready events into events and returns the
-// number of events read.
-//
-// Preconditions: len(events) != 0.
-func (ep *EpollInstance) ReadEvents(events []linux.EpollEvent) int {
+// ReadEvents appends up to maxReady events to events and returns the updated
+// slice of events.
+func (ep *EpollInstance) ReadEvents(events []linux.EpollEvent, maxEvents int) []linux.EpollEvent {
 	i := 0
 	// Hot path: avoid defer.
 	ep.mu.Lock()
@@ -368,16 +371,16 @@ func (ep *EpollInstance) ReadEvents(events []linux.EpollEvent) int {
 			requeue.PushBack(epi)
 		}
 		// Report ievents.
-		events[i] = linux.EpollEvent{
+		events = append(events, linux.EpollEvent{
 			Events: ievents.ToLinux(),
 			Data:   epi.userData,
-		}
+		})
 		i++
-		if i == len(events) {
+		if i == maxEvents {
 			break
 		}
 	}
 	ep.ready.PushBackList(&requeue)
 	ep.mu.Unlock()
-	return i
+	return events
 }
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 0c42574db..1eba0270f 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -37,13 +37,13 @@ import (
 // FileDescription methods require that a reference is held.
 //
 // FileDescription is analogous to Linux's struct file.
+//
+// +stateify savable
 type FileDescription struct {
-	// refs is the reference count. refs is accessed using atomic memory
-	// operations.
-	refs int64
+	FileDescriptionRefs
 
 	// flagsMu protects statusFlags and asyncHandler below.
-	flagsMu sync.Mutex
+	flagsMu sync.Mutex `state:"nosave"`
 
 	// statusFlags contains status flags, "initialized by open(2) and possibly
 	// modified by fcntl()" - fcntl(2). statusFlags can be read using atomic
@@ -58,7 +58,7 @@ type FileDescription struct {
 
 	// epolls is the set of epollInterests registered for this FileDescription.
 	// epolls is protected by epollMu.
-	epollMu sync.Mutex
+	epollMu sync.Mutex `state:"nosave"`
 	epolls  map[*epollInterest]struct{}
 
 	// vd is the filesystem location at which this FileDescription was opened.
@@ -90,6 +90,8 @@ type FileDescription struct {
 }
 
 // FileDescriptionOptions contains options to FileDescription.Init().
+//
+// +stateify savable
 type FileDescriptionOptions struct {
 	// If AllowDirectIO is true, allow O_DIRECT to be set on the file.
 	AllowDirectIO bool
@@ -103,7 +105,7 @@ type FileDescriptionOptions struct {
 
 	// If UseDentryMetadata is true, calls to FileDescription methods that
 	// interact with file and filesystem metadata (Stat, SetStat, StatFS,
-	// Listxattr, Getxattr, Setxattr, Removexattr) are implemented by calling
+	// ListXattr, GetXattr, SetXattr, RemoveXattr) are implemented by calling
 	// the corresponding FilesystemImpl methods instead of the corresponding
 	// FileDescriptionImpl methods.
 	//
@@ -131,7 +133,7 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou
 		}
 	}
 
-	fd.refs = 1
+	fd.EnableLeakCheck()
 
 	// Remove "file creation flags" to mirror the behavior from file.f_flags in
 	// fs/open.c:do_dentry_open.
@@ -149,30 +151,9 @@ func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mou
 	return nil
 }
 
-// IncRef increments fd's reference count.
-func (fd *FileDescription) IncRef() {
-	atomic.AddInt64(&fd.refs, 1)
-}
-
-// TryIncRef increments fd's reference count and returns true. If fd's
-// reference count is already zero, TryIncRef does nothing and returns false.
-//
-// TryIncRef does not require that a reference is held on fd.
-func (fd *FileDescription) TryIncRef() bool {
-	for {
-		refs := atomic.LoadInt64(&fd.refs)
-		if refs <= 0 {
-			return false
-		}
-		if atomic.CompareAndSwapInt64(&fd.refs, refs, refs+1) {
-			return true
-		}
-	}
-}
-
 // DecRef decrements fd's reference count.
-func (fd *FileDescription) DecRef() {
-	if refs := atomic.AddInt64(&fd.refs, -1); refs == 0 {
+func (fd *FileDescription) DecRef(ctx context.Context) {
+	fd.FileDescriptionRefs.DecRef(func() {
 		// Unregister fd from all epoll instances.
 		fd.epollMu.Lock()
 		epolls := fd.epolls
@@ -196,11 +177,11 @@ func (fd *FileDescription) DecRef() {
 		}
 
 		// Release implementation resources.
-		fd.impl.Release()
+		fd.impl.Release(ctx)
 		if fd.writable {
 			fd.vd.mount.EndWrite()
 		}
-		fd.vd.DecRef()
+		fd.vd.DecRef(ctx)
 		fd.flagsMu.Lock()
 		// TODO(gvisor.dev/issue/1663): We may need to unregister during save, as we do in VFS1.
 		if fd.statusFlags&linux.O_ASYNC != 0 && fd.asyncHandler != nil {
@@ -208,15 +189,7 @@ func (fd *FileDescription) DecRef() {
 		}
 		fd.asyncHandler = nil
 		fd.flagsMu.Unlock()
-	} else if refs < 0 {
-		panic("FileDescription.DecRef() called without holding a reference")
-	}
-}
-
-// Refs returns the current number of references. The returned count
-// is inherently racy and is unsafe to use without external synchronization.
-func (fd *FileDescription) Refs() int64 {
-	return atomic.LoadInt64(&fd.refs)
+	})
 }
 
 // Mount returns the mount on which fd was opened. It does not take a reference
@@ -289,7 +262,7 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede
 	if flags&linux.O_DIRECT != 0 && !fd.opts.AllowDirectIO {
 		return syserror.EINVAL
 	}
-	// TODO(jamieliu): FileDescriptionImpl.SetOAsync()?
+	// TODO(gvisor.dev/issue/1035): FileDescriptionImpl.SetOAsync()?
 	const settableFlags = linux.O_APPEND | linux.O_ASYNC | linux.O_DIRECT | linux.O_NOATIME | linux.O_NONBLOCK
 	fd.flagsMu.Lock()
 	if fd.asyncHandler != nil {
@@ -301,7 +274,7 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede
 			fd.asyncHandler.Unregister(fd)
 		}
 	}
-	fd.statusFlags = (oldFlags &^ settableFlags) | (flags & settableFlags)
+	atomic.StoreUint32(&fd.statusFlags, (oldFlags&^settableFlags)|(flags&settableFlags))
 	fd.flagsMu.Unlock()
 	return nil
 }
@@ -335,7 +308,7 @@ func (fd *FileDescription) Impl() FileDescriptionImpl {
 type FileDescriptionImpl interface {
 	// Release is called when the associated FileDescription reaches zero
 	// references.
-	Release()
+	Release(ctx context.Context)
 
 	// OnClose is called when a file descriptor representing the
 	// FileDescription is closed. Note that returning a non-nil error does not
@@ -354,8 +327,13 @@ type FileDescriptionImpl interface {
 	// represented by the FileDescription.
 	StatFS(ctx context.Context) (linux.Statfs, error)
 
-	// Allocate grows file represented by FileDescription to offset + length bytes.
+	// Allocate grows the file to offset + length bytes.
 	// Only mode == 0 is supported currently.
+	//
+	// Allocate should return EISDIR on directories, ESPIPE on pipes, and ENODEV on
+	// other files where it is not supported.
+	//
+	// Preconditions: The FileDescription was opened for writing.
 	Allocate(ctx context.Context, mode, offset, length uint64) error
 
 	// waiter.Waitable methods may be used to poll for I/O events.
@@ -369,8 +347,9 @@ type FileDescriptionImpl interface {
 	//
 	// - If opts.Flags specifies unsupported options, PRead returns EOPNOTSUPP.
 	//
-	// Preconditions: The FileDescription was opened for reading.
-	// FileDescriptionOptions.DenyPRead == false.
+	// Preconditions:
+	// * The FileDescription was opened for reading.
+	// * FileDescriptionOptions.DenyPRead == false.
 	PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error)
 
 	// Read is similar to PRead, but does not specify an offset.
@@ -401,8 +380,9 @@ type FileDescriptionImpl interface {
 	// - If opts.Flags specifies unsupported options, PWrite returns
 	// EOPNOTSUPP.
 	//
-	// Preconditions: The FileDescription was opened for writing.
-	// FileDescriptionOptions.DenyPWrite == false.
+	// Preconditions:
+	// * The FileDescription was opened for writing.
+	// * FileDescriptionOptions.DenyPWrite == false.
 	PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error)
 
 	// Write is similar to PWrite, but does not specify an offset, which is
@@ -447,19 +427,19 @@ type FileDescriptionImpl interface {
 	// Ioctl implements the ioctl(2) syscall.
 	Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error)
 
-	// Listxattr returns all extended attribute names for the file.
-	Listxattr(ctx context.Context, size uint64) ([]string, error)
+	// ListXattr returns all extended attribute names for the file.
+	ListXattr(ctx context.Context, size uint64) ([]string, error)
 
-	// Getxattr returns the value associated with the given extended attribute
+	// GetXattr returns the value associated with the given extended attribute
 	// for the file.
-	Getxattr(ctx context.Context, opts GetxattrOptions) (string, error)
+	GetXattr(ctx context.Context, opts GetXattrOptions) (string, error)
 
-	// Setxattr changes the value associated with the given extended attribute
+	// SetXattr changes the value associated with the given extended attribute
 	// for the file.
-	Setxattr(ctx context.Context, opts SetxattrOptions) error
+	SetXattr(ctx context.Context, opts SetXattrOptions) error
 
-	// Removexattr removes the given extended attribute from the file.
-	Removexattr(ctx context.Context, name string) error
+	// RemoveXattr removes the given extended attribute from the file.
+	RemoveXattr(ctx context.Context, name string) error
 
 	// LockBSD tries to acquire a BSD-style advisory file lock.
 	LockBSD(ctx context.Context, uid lock.UniqueID, t lock.LockType, block lock.Blocker) error
@@ -475,6 +455,8 @@ type FileDescriptionImpl interface {
 }
 
 // Dirent holds the information contained in struct linux_dirent64.
+//
+// +stateify savable
 type Dirent struct {
 	// Name is the filename.
 	Name string
@@ -526,7 +508,7 @@ func (fd *FileDescription) Stat(ctx context.Context, opts StatOptions) (linux.St
 			Start: fd.vd,
 		})
 		stat, err := fd.vd.mount.fs.impl.StatAt(ctx, rp, opts)
-		vfsObj.putResolvingPath(rp)
+		vfsObj.putResolvingPath(ctx, rp)
 		return stat, err
 	}
 	return fd.impl.Stat(ctx, opts)
@@ -541,7 +523,7 @@ func (fd *FileDescription) SetStat(ctx context.Context, opts SetStatOptions) err
 			Start: fd.vd,
 		})
 		err := fd.vd.mount.fs.impl.SetStatAt(ctx, rp, opts)
-		vfsObj.putResolvingPath(rp)
+		vfsObj.putResolvingPath(ctx, rp)
 		return err
 	}
 	return fd.impl.SetStat(ctx, opts)
@@ -557,12 +539,20 @@ func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) {
 			Start: fd.vd,
 		})
 		statfs, err := fd.vd.mount.fs.impl.StatFSAt(ctx, rp)
-		vfsObj.putResolvingPath(rp)
+		vfsObj.putResolvingPath(ctx, rp)
 		return statfs, err
 	}
 	return fd.impl.StatFS(ctx)
 }
 
+// Allocate grows file represented by FileDescription to offset + length bytes.
+func (fd *FileDescription) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	if !fd.IsWritable() {
+		return syserror.EBADF
+	}
+	return fd.impl.Allocate(ctx, mode, offset, length)
+}
+
 // Readiness implements waiter.Waitable.Readiness.
 //
 // It returns fd's I/O readiness.
@@ -654,25 +644,25 @@ func (fd *FileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.
 	return fd.impl.Ioctl(ctx, uio, args)
 }
 
-// Listxattr returns all extended attribute names for the file represented by
+// ListXattr returns all extended attribute names for the file represented by
 // fd.
 //
 // If the size of the list (including a NUL terminating byte after every entry)
 // would exceed size, ERANGE may be returned. Note that implementations
 // are free to ignore size entirely and return without error). In all cases,
 // if size is 0, the list should be returned without error, regardless of size.
-func (fd *FileDescription) Listxattr(ctx context.Context, size uint64) ([]string, error) {
+func (fd *FileDescription) ListXattr(ctx context.Context, size uint64) ([]string, error) {
 	if fd.opts.UseDentryMetadata {
 		vfsObj := fd.vd.mount.vfs
 		rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
 			Root:  fd.vd,
 			Start: fd.vd,
 		})
-		names, err := fd.vd.mount.fs.impl.ListxattrAt(ctx, rp, size)
-		vfsObj.putResolvingPath(rp)
+		names, err := fd.vd.mount.fs.impl.ListXattrAt(ctx, rp, size)
+		vfsObj.putResolvingPath(ctx, rp)
 		return names, err
 	}
-	names, err := fd.impl.Listxattr(ctx, size)
+	names, err := fd.impl.ListXattr(ctx, size)
 	if err == syserror.ENOTSUP {
 		// Linux doesn't actually return ENOTSUP in this case; instead,
 		// fs/xattr.c:vfs_listxattr() falls back to allowing the security
@@ -683,57 +673,57 @@ func (fd *FileDescription) Listxattr(ctx context.Context, size uint64) ([]string
 	return names, err
 }
 
-// Getxattr returns the value associated with the given extended attribute for
+// GetXattr returns the value associated with the given extended attribute for
 // the file represented by fd.
 //
 // If the size of the return value exceeds opts.Size, ERANGE may be returned
 // (note that implementations are free to ignore opts.Size entirely and return
 // without error). In all cases, if opts.Size is 0, the value should be
 // returned without error, regardless of size.
-func (fd *FileDescription) Getxattr(ctx context.Context, opts *GetxattrOptions) (string, error) {
+func (fd *FileDescription) GetXattr(ctx context.Context, opts *GetXattrOptions) (string, error) {
 	if fd.opts.UseDentryMetadata {
 		vfsObj := fd.vd.mount.vfs
 		rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
 			Root:  fd.vd,
 			Start: fd.vd,
 		})
-		val, err := fd.vd.mount.fs.impl.GetxattrAt(ctx, rp, *opts)
-		vfsObj.putResolvingPath(rp)
+		val, err := fd.vd.mount.fs.impl.GetXattrAt(ctx, rp, *opts)
+		vfsObj.putResolvingPath(ctx, rp)
 		return val, err
 	}
-	return fd.impl.Getxattr(ctx, *opts)
+	return fd.impl.GetXattr(ctx, *opts)
 }
 
-// Setxattr changes the value associated with the given extended attribute for
+// SetXattr changes the value associated with the given extended attribute for
 // the file represented by fd.
-func (fd *FileDescription) Setxattr(ctx context.Context, opts *SetxattrOptions) error {
+func (fd *FileDescription) SetXattr(ctx context.Context, opts *SetXattrOptions) error {
 	if fd.opts.UseDentryMetadata {
 		vfsObj := fd.vd.mount.vfs
 		rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
 			Root:  fd.vd,
 			Start: fd.vd,
 		})
-		err := fd.vd.mount.fs.impl.SetxattrAt(ctx, rp, *opts)
-		vfsObj.putResolvingPath(rp)
+		err := fd.vd.mount.fs.impl.SetXattrAt(ctx, rp, *opts)
+		vfsObj.putResolvingPath(ctx, rp)
 		return err
 	}
-	return fd.impl.Setxattr(ctx, *opts)
+	return fd.impl.SetXattr(ctx, *opts)
 }
 
-// Removexattr removes the given extended attribute from the file represented
+// RemoveXattr removes the given extended attribute from the file represented
 // by fd.
-func (fd *FileDescription) Removexattr(ctx context.Context, name string) error {
+func (fd *FileDescription) RemoveXattr(ctx context.Context, name string) error {
 	if fd.opts.UseDentryMetadata {
 		vfsObj := fd.vd.mount.vfs
 		rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
 			Root:  fd.vd,
 			Start: fd.vd,
 		})
-		err := fd.vd.mount.fs.impl.RemovexattrAt(ctx, rp, name)
-		vfsObj.putResolvingPath(rp)
+		err := fd.vd.mount.fs.impl.RemoveXattrAt(ctx, rp, name)
+		vfsObj.putResolvingPath(ctx, rp)
 		return err
 	}
-	return fd.impl.Removexattr(ctx, name)
+	return fd.impl.RemoveXattr(ctx, name)
 }
 
 // SyncFS instructs the filesystem containing fd to execute the semantics of
@@ -747,7 +737,7 @@ func (fd *FileDescription) MappedName(ctx context.Context) string {
 	vfsroot := RootFromContext(ctx)
 	s, _ := fd.vd.mount.vfs.PathnameWithDeleted(ctx, vfsroot, fd.vd)
 	if vfsroot.Ok() {
-		vfsroot.DecRef()
+		vfsroot.DecRef(ctx)
 	}
 	return s
 }
@@ -835,3 +825,31 @@ func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) FileAsyn
 	}
 	return fd.asyncHandler
 }
+
+// FileReadWriteSeeker is a helper struct to pass a FileDescription as
+// io.Reader/io.Writer/io.ReadSeeker/etc.
+type FileReadWriteSeeker struct {
+	FD    *FileDescription
+	Ctx   context.Context
+	ROpts ReadOptions
+	WOpts WriteOptions
+}
+
+// Read implements io.ReadWriteSeeker.Read.
+func (f *FileReadWriteSeeker) Read(p []byte) (int, error) {
+	dst := usermem.BytesIOSequence(p)
+	ret, err := f.FD.Read(f.Ctx, dst, f.ROpts)
+	return int(ret), err
+}
+
+// Seek implements io.ReadWriteSeeker.Seek.
+func (f *FileReadWriteSeeker) Seek(offset int64, whence int) (int64, error) {
+	return f.FD.Seek(f.Ctx, offset, int32(whence))
+}
+
+// Write implements io.ReadWriteSeeker.Write.
+func (f *FileReadWriteSeeker) Write(p []byte) (int, error) {
+	buf := usermem.BytesIOSequence(p)
+	ret, err := f.FD.Write(f.Ctx, buf, f.WOpts)
+	return int(ret), err
+}
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 6b8b4ad49..48ca9de44 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -42,6 +42,8 @@ import (
 // FileDescriptionDefaultImpl may be embedded by implementations of
 // FileDescriptionImpl to obtain implementations of many FileDescriptionImpl
 // methods with default behavior analogous to Linux's.
+//
+// +stateify savable
 type FileDescriptionDefaultImpl struct{}
 
 // OnClose implements FileDescriptionImpl.OnClose analogously to
@@ -57,7 +59,11 @@ func (FileDescriptionDefaultImpl) StatFS(ctx context.Context) (linux.Statfs, err
 }
 
 // Allocate implements FileDescriptionImpl.Allocate analogously to
-// fallocate called on regular file, directory or FIFO in Linux.
+// fallocate called on an invalid type of file in Linux.
+//
+// Note that directories can rely on this implementation even though they
+// should technically return EISDIR. Allocate should never be called for a
+// directory, because it requires a writable fd.
 func (FileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error {
 	return syserror.ENODEV
 }
@@ -134,34 +140,36 @@ func (FileDescriptionDefaultImpl) Ioctl(ctx context.Context, uio usermem.IO, arg
 	return 0, syserror.ENOTTY
 }
 
-// Listxattr implements FileDescriptionImpl.Listxattr analogously to
+// ListXattr implements FileDescriptionImpl.ListXattr analogously to
 // inode_operations::listxattr == NULL in Linux.
-func (FileDescriptionDefaultImpl) Listxattr(ctx context.Context, size uint64) ([]string, error) {
-	// This isn't exactly accurate; see FileDescription.Listxattr.
+func (FileDescriptionDefaultImpl) ListXattr(ctx context.Context, size uint64) ([]string, error) {
+	// This isn't exactly accurate; see FileDescription.ListXattr.
 	return nil, syserror.ENOTSUP
 }
 
-// Getxattr implements FileDescriptionImpl.Getxattr analogously to
+// GetXattr implements FileDescriptionImpl.GetXattr analogously to
 // inode::i_opflags & IOP_XATTR == 0 in Linux.
-func (FileDescriptionDefaultImpl) Getxattr(ctx context.Context, opts GetxattrOptions) (string, error) {
+func (FileDescriptionDefaultImpl) GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) {
 	return "", syserror.ENOTSUP
 }
 
-// Setxattr implements FileDescriptionImpl.Setxattr analogously to
+// SetXattr implements FileDescriptionImpl.SetXattr analogously to
 // inode::i_opflags & IOP_XATTR == 0 in Linux.
-func (FileDescriptionDefaultImpl) Setxattr(ctx context.Context, opts SetxattrOptions) error {
+func (FileDescriptionDefaultImpl) SetXattr(ctx context.Context, opts SetXattrOptions) error {
 	return syserror.ENOTSUP
 }
 
-// Removexattr implements FileDescriptionImpl.Removexattr analogously to
+// RemoveXattr implements FileDescriptionImpl.RemoveXattr analogously to
 // inode::i_opflags & IOP_XATTR == 0 in Linux.
-func (FileDescriptionDefaultImpl) Removexattr(ctx context.Context, name string) error {
+func (FileDescriptionDefaultImpl) RemoveXattr(ctx context.Context, name string) error {
 	return syserror.ENOTSUP
 }
 
 // DirectoryFileDescriptionDefaultImpl may be embedded by implementations of
 // FileDescriptionImpl that always represent directories to obtain
 // implementations of non-directory I/O methods that return EISDIR.
+//
+// +stateify savable
 type DirectoryFileDescriptionDefaultImpl struct{}
 
 // Allocate implements DirectoryFileDescriptionDefaultImpl.Allocate.
@@ -192,6 +200,8 @@ func (DirectoryFileDescriptionDefaultImpl) Write(ctx context.Context, src userme
 // DentryMetadataFileDescriptionImpl may be embedded by implementations of
 // FileDescriptionImpl for which FileDescriptionOptions.UseDentryMetadata is
 // true to obtain implementations of Stat and SetStat that panic.
+//
+// +stateify savable
 type DentryMetadataFileDescriptionImpl struct{}
 
 // Stat implements FileDescriptionImpl.Stat.
@@ -206,12 +216,16 @@ func (DentryMetadataFileDescriptionImpl) SetStat(ctx context.Context, opts SetSt
 
 // DynamicBytesSource represents a data source for a
 // DynamicBytesFileDescriptionImpl.
+//
+// +stateify savable
 type DynamicBytesSource interface {
 	// Generate writes the file's contents to buf.
 	Generate(ctx context.Context, buf *bytes.Buffer) error
 }
 
 // StaticData implements DynamicBytesSource over a static string.
+//
+// +stateify savable
 type StaticData struct {
 	Data string
 }
@@ -238,14 +252,24 @@ type WritableDynamicBytesSource interface {
 //
 // DynamicBytesFileDescriptionImpl.SetDataSource() must be called before first
 // use.
+//
+// +stateify savable
 type DynamicBytesFileDescriptionImpl struct {
 	data     DynamicBytesSource // immutable
-	mu       sync.Mutex         // protects the following fields
-	buf      bytes.Buffer
+	mu       sync.Mutex         `state:"nosave"` // protects the following fields
+	buf      bytes.Buffer       `state:".([]byte)"`
 	off      int64
 	lastRead int64 // offset at which the last Read, PRead, or Seek ended
 }
 
+func (fd *DynamicBytesFileDescriptionImpl) saveBuf() []byte {
+	return fd.buf.Bytes()
+}
+
+func (fd *DynamicBytesFileDescriptionImpl) loadBuf(p []byte) {
+	fd.buf.Write(p)
+}
+
 // SetDataSource must be called exactly once on fd before first use.
 func (fd *DynamicBytesFileDescriptionImpl) SetDataSource(data DynamicBytesSource) {
 	fd.data = data
@@ -378,6 +402,8 @@ func GenericConfigureMMap(fd *FileDescription, m memmap.Mappable, opts *memmap.M
 
 // LockFD may be used by most implementations of FileDescriptionImpl.Lock*
 // functions. Caller must call Init().
+//
+// +stateify savable
 type LockFD struct {
 	locks *FileLocks
 }
@@ -405,6 +431,8 @@ func (fd *LockFD) UnlockBSD(ctx context.Context, uid fslock.UniqueID) error {
 
 // NoLockFD implements Lock*/Unlock* portion of FileDescriptionImpl interface
 // returning ENOLCK.
+//
+// +stateify savable
 type NoLockFD struct{}
 
 // LockBSD implements vfs.FileDescriptionImpl.LockBSD.
diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go
index 3b7e1c273..1cd607c0a 100644
--- a/pkg/sentry/vfs/file_description_impl_util_test.go
+++ b/pkg/sentry/vfs/file_description_impl_util_test.go
@@ -80,9 +80,9 @@ type testFD struct {
 	data DynamicBytesSource
 }
 
-func newTestFD(vfsObj *VirtualFilesystem, statusFlags uint32, data DynamicBytesSource) *FileDescription {
+func newTestFD(ctx context.Context, vfsObj *VirtualFilesystem, statusFlags uint32, data DynamicBytesSource) *FileDescription {
 	vd := vfsObj.NewAnonVirtualDentry("genCountFD")
-	defer vd.DecRef()
+	defer vd.DecRef(ctx)
 	var fd testFD
 	fd.vfsfd.Init(&fd, statusFlags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{})
 	fd.DynamicBytesFileDescriptionImpl.SetDataSource(data)
@@ -90,7 +90,7 @@ func newTestFD(vfsObj *VirtualFilesystem, statusFlags uint32, data DynamicBytesS
 }
 
 // Release implements FileDescriptionImpl.Release.
-func (fd *testFD) Release() {
+func (fd *testFD) Release(context.Context) {
 }
 
 // SetStatusFlags implements FileDescriptionImpl.SetStatusFlags.
@@ -109,11 +109,11 @@ func TestGenCountFD(t *testing.T) {
 	ctx := contexttest.Context(t)
 
 	vfsObj := &VirtualFilesystem{}
-	if err := vfsObj.Init(); err != nil {
+	if err := vfsObj.Init(ctx); err != nil {
 		t.Fatalf("VFS init: %v", err)
 	}
-	fd := newTestFD(vfsObj, linux.O_RDWR, &genCount{})
-	defer fd.DecRef()
+	fd := newTestFD(ctx, vfsObj, linux.O_RDWR, &genCount{})
+	defer fd.DecRef(ctx)
 
 	// The first read causes Generate to be called to fill the FD's buffer.
 	buf := make([]byte, 2)
@@ -167,11 +167,11 @@ func TestWritable(t *testing.T) {
 	ctx := contexttest.Context(t)
 
 	vfsObj := &VirtualFilesystem{}
-	if err := vfsObj.Init(); err != nil {
+	if err := vfsObj.Init(ctx); err != nil {
 		t.Fatalf("VFS init: %v", err)
 	}
-	fd := newTestFD(vfsObj, linux.O_RDWR, &storeData{data: "init"})
-	defer fd.DecRef()
+	fd := newTestFD(ctx, vfsObj, linux.O_RDWR, &storeData{data: "init"})
+	defer fd.DecRef(ctx)
 
 	buf := make([]byte, 10)
 	ioseq := usermem.BytesIOSequence(buf)
diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go
index 6bb9ca180..c93d94634 100644
--- a/pkg/sentry/vfs/filesystem.go
+++ b/pkg/sentry/vfs/filesystem.go
@@ -15,8 +15,6 @@
 package vfs
 
 import (
-	"sync/atomic"
-
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fspath"
@@ -34,9 +32,7 @@ import (
 //
 // +stateify savable
 type Filesystem struct {
-	// refs is the reference count. refs is accessed using atomic memory
-	// operations.
-	refs int64
+	FilesystemRefs
 
 	// vfs is the VirtualFilesystem that uses this Filesystem. vfs is
 	// immutable.
@@ -52,7 +48,7 @@ type Filesystem struct {
 
 // Init must be called before first use of fs.
 func (fs *Filesystem) Init(vfsObj *VirtualFilesystem, fsType FilesystemType, impl FilesystemImpl) {
-	fs.refs = 1
+	fs.EnableLeakCheck()
 	fs.vfs = vfsObj
 	fs.fsType = fsType
 	fs.impl = impl
@@ -76,39 +72,14 @@ func (fs *Filesystem) Impl() FilesystemImpl {
 	return fs.impl
 }
 
-// IncRef increments fs' reference count.
-func (fs *Filesystem) IncRef() {
-	if atomic.AddInt64(&fs.refs, 1) <= 1 {
-		panic("Filesystem.IncRef() called without holding a reference")
-	}
-}
-
-// TryIncRef increments fs' reference count and returns true. If fs' reference
-// count is zero, TryIncRef does nothing and returns false.
-//
-// TryIncRef does not require that a reference is held on fs.
-func (fs *Filesystem) TryIncRef() bool {
-	for {
-		refs := atomic.LoadInt64(&fs.refs)
-		if refs <= 0 {
-			return false
-		}
-		if atomic.CompareAndSwapInt64(&fs.refs, refs, refs+1) {
-			return true
-		}
-	}
-}
-
 // DecRef decrements fs' reference count.
-func (fs *Filesystem) DecRef() {
-	if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 {
+func (fs *Filesystem) DecRef(ctx context.Context) {
+	fs.FilesystemRefs.DecRef(func() {
 		fs.vfs.filesystemsMu.Lock()
 		delete(fs.vfs.filesystems, fs)
 		fs.vfs.filesystemsMu.Unlock()
-		fs.impl.Release()
-	} else if refs < 0 {
-		panic("Filesystem.decRef() called without holding a reference")
-	}
+		fs.impl.Release(ctx)
+	})
 }
 
 // FilesystemImpl contains implementation details for a Filesystem.
@@ -149,7 +120,7 @@ func (fs *Filesystem) DecRef() {
 type FilesystemImpl interface {
 	// Release is called when the associated Filesystem reaches zero
 	// references.
-	Release()
+	Release(ctx context.Context)
 
 	// Sync "causes all pending modifications to filesystem metadata and cached
 	// file data to be written to the underlying [filesystem]", as by syncfs(2).
@@ -212,8 +183,9 @@ type FilesystemImpl interface {
 	// ENOENT. Equivalently, if vd represents a file with a link count of 0 not
 	// created by open(O_TMPFILE) without O_EXCL, LinkAt returns ENOENT.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If LinkAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -231,8 +203,9 @@ type FilesystemImpl interface {
 	// - If the directory in which the new directory would be created has been
 	// removed by RmdirAt or RenameAt, MkdirAt returns ENOENT.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If MkdirAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -253,8 +226,9 @@ type FilesystemImpl interface {
 	// - If the directory in which the file would be created has been removed
 	// by RmdirAt or RenameAt, MknodAt returns ENOENT.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If MknodAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -345,11 +319,12 @@ type FilesystemImpl interface {
 	// - If renaming would replace a non-empty directory, RenameAt returns
 	// ENOTEMPTY.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink(). oldParentVD.Dentry() was obtained from a
-	// previous call to
-	// oldParentVD.Mount().Filesystem().Impl().GetParentDentryAt(). oldName is
-	// not "." or "..".
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
+	// * oldParentVD.Dentry() was obtained from a previous call to
+	//   oldParentVD.Mount().Filesystem().Impl().GetParentDentryAt().
+	// * oldName is not "." or "..".
 	//
 	// Postconditions: If RenameAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -372,8 +347,9 @@ type FilesystemImpl interface {
 	// - If the file at rp exists but is not a directory, RmdirAt returns
 	// ENOTDIR.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If RmdirAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -410,8 +386,9 @@ type FilesystemImpl interface {
 	// - If the directory in which the symbolic link would be created has been
 	// removed by RmdirAt or RenameAt, SymlinkAt returns ENOENT.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If SymlinkAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
@@ -431,33 +408,34 @@ type FilesystemImpl interface {
 	//
 	// - If the file at rp exists but is a directory, UnlinkAt returns EISDIR.
 	//
-	// Preconditions: !rp.Done(). For the final path component in rp,
-	// !rp.ShouldFollowSymlink().
+	// Preconditions:
+	// * !rp.Done().
+	// * For the final path component in rp, !rp.ShouldFollowSymlink().
 	//
 	// Postconditions: If UnlinkAt returns an error returned by
 	// ResolvingPath.Resolve*(), then !rp.Done().
 	UnlinkAt(ctx context.Context, rp *ResolvingPath) error
 
-	// ListxattrAt returns all extended attribute names for the file at rp.
+	// ListXattrAt returns all extended attribute names for the file at rp.
 	//
 	// Errors:
 	//
 	// - If extended attributes are not supported by the filesystem,
-	// ListxattrAt returns ENOTSUP.
+	// ListXattrAt returns ENOTSUP.
 	//
 	// - If the size of the list (including a NUL terminating byte after every
 	// entry) would exceed size, ERANGE may be returned. Note that
 	// implementations are free to ignore size entirely and return without
 	// error). In all cases, if size is 0, the list should be returned without
 	// error, regardless of size.
-	ListxattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error)
+	ListXattrAt(ctx context.Context, rp *ResolvingPath, size uint64) ([]string, error)
 
-	// GetxattrAt returns the value associated with the given extended
+	// GetXattrAt returns the value associated with the given extended
 	// attribute for the file at rp.
 	//
 	// Errors:
 	//
-	// - If extended attributes are not supported by the filesystem, GetxattrAt
+	// - If extended attributes are not supported by the filesystem, GetXattrAt
 	// returns ENOTSUP.
 	//
 	// - If an extended attribute named opts.Name does not exist, ENODATA is
@@ -467,30 +445,30 @@ type FilesystemImpl interface {
 	// returned (note that implementations are free to ignore opts.Size entirely
 	// and return without error). In all cases, if opts.Size is 0, the value
 	// should be returned without error, regardless of size.
-	GetxattrAt(ctx context.Context, rp *ResolvingPath, opts GetxattrOptions) (string, error)
+	GetXattrAt(ctx context.Context, rp *ResolvingPath, opts GetXattrOptions) (string, error)
 
-	// SetxattrAt changes the value associated with the given extended
+	// SetXattrAt changes the value associated with the given extended
 	// attribute for the file at rp.
 	//
 	// Errors:
 	//
-	// - If extended attributes are not supported by the filesystem, SetxattrAt
+	// - If extended attributes are not supported by the filesystem, SetXattrAt
 	// returns ENOTSUP.
 	//
 	// - If XATTR_CREATE is set in opts.Flag and opts.Name already exists,
 	// EEXIST is returned. If XATTR_REPLACE is set and opts.Name does not exist,
 	// ENODATA is returned.
-	SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error
+	SetXattrAt(ctx context.Context, rp *ResolvingPath, opts SetXattrOptions) error
 
-	// RemovexattrAt removes the given extended attribute from the file at rp.
+	// RemoveXattrAt removes the given extended attribute from the file at rp.
 	//
 	// Errors:
 	//
 	// - If extended attributes are not supported by the filesystem,
-	// RemovexattrAt returns ENOTSUP.
+	// RemoveXattrAt returns ENOTSUP.
 	//
 	// - If name does not exist, ENODATA is returned.
-	RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error
+	RemoveXattrAt(ctx context.Context, rp *ResolvingPath, name string) error
 
 	// BoundEndpointAt returns the Unix socket endpoint bound at the path rp.
 	//
@@ -528,6 +506,8 @@ type FilesystemImpl interface {
 
 // PrependPathAtVFSRootError is returned by implementations of
 // FilesystemImpl.PrependPath() when they encounter the contextual VFS root.
+//
+// +stateify savable
 type PrependPathAtVFSRootError struct{}
 
 // Error implements error.Error.
@@ -538,6 +518,8 @@ func (PrependPathAtVFSRootError) Error() string {
 // PrependPathAtNonMountRootError is returned by implementations of
 // FilesystemImpl.PrependPath() when they encounter an independent ancestor
 // Dentry that is not the Mount root.
+//
+// +stateify savable
 type PrependPathAtNonMountRootError struct{}
 
 // Error implements error.Error.
@@ -548,6 +530,8 @@ func (PrependPathAtNonMountRootError) Error() string {
 // PrependPathSyntheticError is returned by implementations of
 // FilesystemImpl.PrependPath() for which prepended names do not represent real
 // paths.
+//
+// +stateify savable
 type PrependPathSyntheticError struct{}
 
 // Error implements error.Error.
diff --git a/pkg/sentry/vfs/filesystem_impl_util.go b/pkg/sentry/vfs/filesystem_impl_util.go
index 465e610e0..2620cf975 100644
--- a/pkg/sentry/vfs/filesystem_impl_util.go
+++ b/pkg/sentry/vfs/filesystem_impl_util.go
@@ -16,6 +16,9 @@ package vfs
 
 import (
 	"strings"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/usermem"
 )
 
 // GenericParseMountOptions parses a comma-separated list of options of the
@@ -41,3 +44,13 @@ func GenericParseMountOptions(str string) map[string]string {
 	}
 	return m
 }
+
+// GenericStatFS returns a statfs struct filled with the common fields for a
+// general filesystem. This is analogous to Linux's fs/libfs.cs:simple_statfs().
+func GenericStatFS(fsMagic uint64) linux.Statfs {
+	return linux.Statfs{
+		Type:       fsMagic,
+		BlockSize:  usermem.PageSize,
+		NameLength: linux.NAME_MAX,
+	}
+}
diff --git a/pkg/sentry/vfs/filesystem_type.go b/pkg/sentry/vfs/filesystem_type.go
index f2298f7f6..bc19db1d5 100644
--- a/pkg/sentry/vfs/filesystem_type.go
+++ b/pkg/sentry/vfs/filesystem_type.go
@@ -55,10 +55,13 @@ type registeredFilesystemType struct {
 
 // RegisterFilesystemTypeOptions contains options to
 // VirtualFilesystem.RegisterFilesystem().
+//
+// +stateify savable
 type RegisterFilesystemTypeOptions struct {
-	// If AllowUserMount is true, allow calls to VirtualFilesystem.MountAt()
-	// for which MountOptions.InternalMount == false to use this filesystem
-	// type.
+	// AllowUserMount determines whether users are allowed to mount a file system
+	// of this type, i.e. through mount(2). If AllowUserMount is true, allow calls
+	// to VirtualFilesystem.MountAt() for which MountOptions.InternalMount == false
+	// to use this filesystem type.
 	AllowUserMount bool
 
 	// If AllowUserList is true, make this filesystem type visible in
diff --git a/pkg/sentry/vfs/g3doc/inotify.md b/pkg/sentry/vfs/g3doc/inotify.md
index e7da49faa..833db213f 100644
--- a/pkg/sentry/vfs/g3doc/inotify.md
+++ b/pkg/sentry/vfs/g3doc/inotify.md
@@ -28,9 +28,9 @@ The set of all watches held on a single file (i.e., the watch target) is stored
 in vfs.Watches. Each watch will belong to a different inotify instance (an
 instance can only have one watch on any watch target). The watches are stored in
 a map indexed by their vfs.Inotify owner’s id. Hard links and file descriptions
-to a single file will all share the same vfs.Watches. Activity on the target
-causes its vfs.Watches to generate notifications on its watches’ inotify
-instances.
+to a single file will all share the same vfs.Watches (with the exception of the
+gofer filesystem, described in a later section). Activity on the target causes
+its vfs.Watches to generate notifications on its watches’ inotify instances.
 
 ### vfs.Watch
 
@@ -103,12 +103,12 @@ inotify:
     unopened p9 file (and possibly an open FID), through which the Sentry
     interacts with the gofer.
     *   *Solution:* Because there is no inode structure stored in the sandbox,
-        inotify watches must be held on the dentry. This would be an issue in
-        the presence of hard links, where multiple dentries would need to share
-        the same set of watches, but in VFS2, we do not support the internal
-        creation of hard links on gofer fs. As a result, we make the assumption
-        that every dentry corresponds to a unique inode. However, the next point
-        raises an issue with this assumption:
+        inotify watches must be held on the dentry. For the purposes of inotify,
+        we assume that every dentry corresponds to a unique inode, which may
+        cause unexpected behavior in the presence of hard links, where multiple
+        dentries should share the same set of watches. Indeed, it is impossible
+        for us to be absolutely sure whether dentries correspond to the same
+        file or not, due to the following point:
 *   **The Sentry cannot always be aware of hard links on the remote
     filesystem.** There is no way for us to confirm whether two files on the
     remote filesystem are actually links to the same inode. QIDs and inodes are
diff --git a/pkg/sentry/vfs/genericfstree/genericfstree.go b/pkg/sentry/vfs/genericfstree/genericfstree.go
index 8882fa84a..2d27d9d35 100644
--- a/pkg/sentry/vfs/genericfstree/genericfstree.go
+++ b/pkg/sentry/vfs/genericfstree/genericfstree.go
@@ -27,6 +27,8 @@ import (
 )
 
 // Dentry is a required type parameter that is a struct with the given fields.
+//
+// +stateify savable
 type Dentry struct {
 	// vfsd is the embedded vfs.Dentry corresponding to this vfs.DentryImpl.
 	vfsd vfs.Dentry
diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go
index c2e21ac5f..3f0b8f45b 100644
--- a/pkg/sentry/vfs/inotify.go
+++ b/pkg/sentry/vfs/inotify.go
@@ -37,6 +37,8 @@ const inotifyEventBaseSize = 16
 //
 // The way events are labelled appears somewhat arbitrary, but they must match
 // Linux so that IN_EXCL_UNLINK behaves as it does in Linux.
+//
+// +stateify savable
 type EventType uint8
 
 // PathEvent and InodeEvent correspond to FSNOTIFY_EVENT_PATH and
@@ -100,7 +102,7 @@ func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32)
 
 	id := uniqueid.GlobalFromContext(ctx)
 	vd := vfsObj.NewAnonVirtualDentry(fmt.Sprintf("[inotifyfd:%d]", id))
-	defer vd.DecRef()
+	defer vd.DecRef(ctx)
 	fd := &Inotify{
 		id:      id,
 		scratch: make([]byte, inotifyEventBaseSize),
@@ -118,7 +120,7 @@ func NewInotifyFD(ctx context.Context, vfsObj *VirtualFilesystem, flags uint32)
 
 // Release implements FileDescriptionImpl.Release. Release removes all
 // watches and frees all resources for an inotify instance.
-func (i *Inotify) Release() {
+func (i *Inotify) Release(ctx context.Context) {
 	var ds []*Dentry
 
 	// We need to hold i.mu to avoid a race with concurrent calls to
@@ -144,7 +146,7 @@ func (i *Inotify) Release() {
 	i.mu.Unlock()
 
 	for _, d := range ds {
-		d.OnZeroWatches()
+		d.OnZeroWatches(ctx)
 	}
 }
 
@@ -179,12 +181,12 @@ func (i *Inotify) Readiness(mask waiter.EventMask) waiter.EventMask {
 	return mask & ready
 }
 
-// PRead implements FileDescriptionImpl.
+// PRead implements FileDescriptionImpl.PRead.
 func (*Inotify) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
 	return 0, syserror.ESPIPE
 }
 
-// PWrite implements FileDescriptionImpl.
+// PWrite implements FileDescriptionImpl.PWrite.
 func (*Inotify) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) {
 	return 0, syserror.ESPIPE
 }
@@ -243,7 +245,7 @@ func (i *Inotify) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOpt
 	return writeLen, nil
 }
 
-// Ioctl implements fs.FileOperations.Ioctl.
+// Ioctl implements FileDescriptionImpl.Ioctl.
 func (i *Inotify) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
 	switch args[1].Int() {
 	case linux.FIONREAD:
@@ -350,7 +352,7 @@ func (i *Inotify) AddWatch(target *Dentry, mask uint32) (int32, error) {
 
 // RmWatch looks up an inotify watch for the given 'wd' and configures the
 // target to stop sending events to this inotify instance.
-func (i *Inotify) RmWatch(wd int32) error {
+func (i *Inotify) RmWatch(ctx context.Context, wd int32) error {
 	i.mu.Lock()
 
 	// Find the watch we were asked to removed.
@@ -374,7 +376,7 @@ func (i *Inotify) RmWatch(wd int32) error {
 	i.mu.Unlock()
 
 	if remaining == 0 {
-		w.target.OnZeroWatches()
+		w.target.OnZeroWatches(ctx)
 	}
 
 	// Generate the event for the removal.
@@ -462,7 +464,7 @@ func (w *Watches) Remove(id uint64) {
 // Notify queues a new event with watches in this set. Watches with
 // IN_EXCL_UNLINK are skipped if the event is coming from a child that has been
 // unlinked.
-func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlinked bool) {
+func (w *Watches) Notify(ctx context.Context, name string, events, cookie uint32, et EventType, unlinked bool) {
 	var hasExpired bool
 	w.mu.RLock()
 	for _, watch := range w.ws {
@@ -476,13 +478,13 @@ func (w *Watches) Notify(name string, events, cookie uint32, et EventType, unlin
 	w.mu.RUnlock()
 
 	if hasExpired {
-		w.cleanupExpiredWatches()
+		w.cleanupExpiredWatches(ctx)
 	}
 }
 
 // This function is relatively expensive and should only be called where there
 // are expired watches.
-func (w *Watches) cleanupExpiredWatches() {
+func (w *Watches) cleanupExpiredWatches(ctx context.Context) {
 	// Because of lock ordering, we cannot acquire Inotify.mu for each watch
 	// owner while holding w.mu. As a result, store expired watches locally
 	// before removing.
@@ -495,15 +497,15 @@ func (w *Watches) cleanupExpiredWatches() {
 	}
 	w.mu.RUnlock()
 	for _, watch := range toRemove {
-		watch.owner.RmWatch(watch.wd)
+		watch.owner.RmWatch(ctx, watch.wd)
 	}
 }
 
 // HandleDeletion is called when the watch target is destroyed. Clear the
 // watch set, detach watches from the inotify instances they belong to, and
 // generate the appropriate events.
-func (w *Watches) HandleDeletion() {
-	w.Notify("", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */)
+func (w *Watches) HandleDeletion(ctx context.Context) {
+	w.Notify(ctx, "", linux.IN_DELETE_SELF, 0, InodeEvent, true /* unlinked */)
 
 	// As in Watches.Notify, we can't hold w.mu while acquiring Inotify.mu for
 	// the owner of each watch being deleted. Instead, atomically store the
@@ -744,12 +746,12 @@ func InotifyEventFromStatMask(mask uint32) uint32 {
 // InotifyRemoveChild sends the appriopriate notifications to the watch sets of
 // the child being removed and its parent. Note that unlike most pairs of
 // parent/child notifications, the child is notified first in this case.
-func InotifyRemoveChild(self, parent *Watches, name string) {
+func InotifyRemoveChild(ctx context.Context, self, parent *Watches, name string) {
 	if self != nil {
-		self.Notify("", linux.IN_ATTRIB, 0, InodeEvent, true /* unlinked */)
+		self.Notify(ctx, "", linux.IN_ATTRIB, 0, InodeEvent, true /* unlinked */)
 	}
 	if parent != nil {
-		parent.Notify(name, linux.IN_DELETE, 0, InodeEvent, true /* unlinked */)
+		parent.Notify(ctx, name, linux.IN_DELETE, 0, InodeEvent, true /* unlinked */)
 	}
 }
 
@@ -762,13 +764,13 @@ func InotifyRename(ctx context.Context, renamed, oldParent, newParent *Watches,
 	}
 	cookie := uniqueid.InotifyCookie(ctx)
 	if oldParent != nil {
-		oldParent.Notify(oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent, false /* unlinked */)
+		oldParent.Notify(ctx, oldName, dirEv|linux.IN_MOVED_FROM, cookie, InodeEvent, false /* unlinked */)
 	}
 	if newParent != nil {
-		newParent.Notify(newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent, false /* unlinked */)
+		newParent.Notify(ctx, newName, dirEv|linux.IN_MOVED_TO, cookie, InodeEvent, false /* unlinked */)
 	}
 	// Somewhat surprisingly, self move events do not have a cookie.
 	if renamed != nil {
-		renamed.Notify("", linux.IN_MOVE_SELF, 0, InodeEvent, false /* unlinked */)
+		renamed.Notify(ctx, "", linux.IN_MOVE_SELF, 0, InodeEvent, false /* unlinked */)
 	}
 }
diff --git a/pkg/sentry/vfs/lock.go b/pkg/sentry/vfs/lock.go
index 6c7583a81..55783d4eb 100644
--- a/pkg/sentry/vfs/lock.go
+++ b/pkg/sentry/vfs/lock.go
@@ -33,6 +33,8 @@ import (
 // Note that in Linux these two types of locks are _not_ cooperative, because
 // race and deadlock conditions make merging them prohibitive. We do the same
 // and keep them oblivious to each other.
+//
+// +stateify savable
 type FileLocks struct {
 	// bsd is a set of BSD-style advisory file wide locks, see flock(2).
 	bsd fslock.Locks
@@ -46,7 +48,13 @@ func (fl *FileLocks) LockBSD(uid fslock.UniqueID, t fslock.LockType, block fsloc
 	if fl.bsd.LockRegion(uid, t, fslock.LockRange{0, fslock.LockEOF}, block) {
 		return nil
 	}
-	return syserror.ErrWouldBlock
+
+	// Return an appropriate error for the unsuccessful lock attempt, depending on
+	// whether this is a blocking or non-blocking operation.
+	if block == nil {
+		return syserror.ErrWouldBlock
+	}
+	return syserror.ERESTARTSYS
 }
 
 // UnlockBSD releases a BSD-style lock on the entire file.
@@ -66,7 +74,13 @@ func (fl *FileLocks) LockPOSIX(ctx context.Context, fd *FileDescription, uid fsl
 	if fl.posix.LockRegion(uid, t, rng, block) {
 		return nil
 	}
-	return syserror.ErrWouldBlock
+
+	// Return an appropriate error for the unsuccessful lock attempt, depending on
+	// whether this is a blocking or non-blocking operation.
+	if block == nil {
+		return syserror.ErrWouldBlock
+	}
+	return syserror.ERESTARTSYS
 }
 
 // UnlockPOSIX releases a POSIX-style lock on a file region.
diff --git a/pkg/sentry/vfs/memxattr/xattr.go b/pkg/sentry/vfs/memxattr/xattr.go
index cc1e7d764..638b5d830 100644
--- a/pkg/sentry/vfs/memxattr/xattr.go
+++ b/pkg/sentry/vfs/memxattr/xattr.go
@@ -33,8 +33,8 @@ type SimpleExtendedAttributes struct {
 	xattrs map[string]string
 }
 
-// Getxattr returns the value at 'name'.
-func (x *SimpleExtendedAttributes) Getxattr(opts *vfs.GetxattrOptions) (string, error) {
+// GetXattr returns the value at 'name'.
+func (x *SimpleExtendedAttributes) GetXattr(opts *vfs.GetXattrOptions) (string, error) {
 	x.mu.RLock()
 	value, ok := x.xattrs[opts.Name]
 	x.mu.RUnlock()
@@ -49,8 +49,8 @@ func (x *SimpleExtendedAttributes) Getxattr(opts *vfs.GetxattrOptions) (string,
 	return value, nil
 }
 
-// Setxattr sets 'value' at 'name'.
-func (x *SimpleExtendedAttributes) Setxattr(opts *vfs.SetxattrOptions) error {
+// SetXattr sets 'value' at 'name'.
+func (x *SimpleExtendedAttributes) SetXattr(opts *vfs.SetXattrOptions) error {
 	x.mu.Lock()
 	defer x.mu.Unlock()
 	if x.xattrs == nil {
@@ -72,8 +72,8 @@ func (x *SimpleExtendedAttributes) Setxattr(opts *vfs.SetxattrOptions) error {
 	return nil
 }
 
-// Listxattr returns all names in xattrs.
-func (x *SimpleExtendedAttributes) Listxattr(size uint64) ([]string, error) {
+// ListXattr returns all names in xattrs.
+func (x *SimpleExtendedAttributes) ListXattr(size uint64) ([]string, error) {
 	// Keep track of the size of the buffer needed in listxattr(2) for the list.
 	listSize := 0
 	x.mu.RLock()
@@ -90,8 +90,8 @@ func (x *SimpleExtendedAttributes) Listxattr(size uint64) ([]string, error) {
 	return names, nil
 }
 
-// Removexattr removes the xattr at 'name'.
-func (x *SimpleExtendedAttributes) Removexattr(name string) error {
+// RemoveXattr removes the xattr at 'name'.
+func (x *SimpleExtendedAttributes) RemoveXattr(name string) error {
 	x.mu.Lock()
 	defer x.mu.Unlock()
 	if _, ok := x.xattrs[name]; !ok {
diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go
index 32f901bd8..dfc3ae6c0 100644
--- a/pkg/sentry/vfs/mount.go
+++ b/pkg/sentry/vfs/mount.go
@@ -65,7 +65,7 @@ type Mount struct {
 	//
 	// Invariant: key.parent != nil iff key.point != nil. key.point belongs to
 	// key.parent.fs.
-	key mountKey
+	key mountKey `state:".(VirtualDentry)"`
 
 	// ns is the namespace in which this Mount was mounted. ns is protected by
 	// VirtualFilesystem.mountMu.
@@ -114,7 +114,7 @@ func (mnt *Mount) Options() MountOptions {
 	defer mnt.vfs.mountMu.Unlock()
 	return MountOptions{
 		Flags:    mnt.Flags,
-		ReadOnly: mnt.readOnly(),
+		ReadOnly: mnt.ReadOnly(),
 	}
 }
 
@@ -126,16 +126,14 @@ func (mnt *Mount) Options() MountOptions {
 //
 // +stateify savable
 type MountNamespace struct {
+	MountNamespaceRefs
+
 	// Owner is the usernamespace that owns this mount namespace.
 	Owner *auth.UserNamespace
 
 	// root is the MountNamespace's root mount. root is immutable.
 	root *Mount
 
-	// refs is the reference count. refs is accessed using atomic memory
-	// operations.
-	refs int64
-
 	// mountpoints maps all Dentries which are mount points in this namespace
 	// to the number of Mounts for which they are mount points. mountpoints is
 	// protected by VirtualFilesystem.mountMu.
@@ -154,22 +152,22 @@ type MountNamespace struct {
 // NewMountNamespace returns a new mount namespace with a root filesystem
 // configured by the given arguments. A reference is taken on the returned
 // MountNamespace.
-func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *GetFilesystemOptions) (*MountNamespace, error) {
+func (vfs *VirtualFilesystem) NewMountNamespace(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *MountOptions) (*MountNamespace, error) {
 	rft := vfs.getFilesystemType(fsTypeName)
 	if rft == nil {
 		ctx.Warningf("Unknown filesystem type: %s", fsTypeName)
 		return nil, syserror.ENODEV
 	}
-	fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, *opts)
+	fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, opts.GetFilesystemOptions)
 	if err != nil {
 		return nil, err
 	}
 	mntns := &MountNamespace{
 		Owner:       creds.UserNamespace,
-		refs:        1,
 		mountpoints: make(map[*Dentry]uint32),
 	}
-	mntns.root = newMount(vfs, fs, root, mntns, &MountOptions{})
+	mntns.EnableLeakCheck()
+	mntns.root = newMount(vfs, fs, root, mntns, opts)
 	return mntns, nil
 }
 
@@ -200,8 +198,8 @@ func (vfs *VirtualFilesystem) MountDisconnected(ctx context.Context, creds *auth
 	if err != nil {
 		return nil, err
 	}
-	defer root.DecRef()
-	defer fs.DecRef()
+	defer root.DecRef(ctx)
+	defer fs.DecRef(ctx)
 	return vfs.NewDisconnectedMount(fs, root, opts)
 }
 
@@ -221,7 +219,7 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr
 		if vd.dentry.dead {
 			vd.dentry.mu.Unlock()
 			vfs.mountMu.Unlock()
-			vd.DecRef()
+			vd.DecRef(ctx)
 			return syserror.ENOENT
 		}
 		// vd might have been mounted over between vfs.GetDentryAt() and
@@ -243,7 +241,7 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr
 		// This can't fail since we're holding vfs.mountMu.
 		nextmnt.root.IncRef()
 		vd.dentry.mu.Unlock()
-		vd.DecRef()
+		vd.DecRef(ctx)
 		vd = VirtualDentry{
 			mount:  nextmnt,
 			dentry: nextmnt.root,
@@ -263,16 +261,20 @@ func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Cr
 }
 
 // MountAt creates and mounts a Filesystem configured by the given arguments.
-func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *MountOptions) error {
+// The VirtualFilesystem will hold a reference to the Mount until it is unmounted.
+//
+// This method returns the mounted Mount without a reference, for convenience
+// during VFS setup when there is no chance of racing with unmount.
+func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *MountOptions) (*Mount, error) {
 	mnt, err := vfs.MountDisconnected(ctx, creds, source, fsTypeName, opts)
 	if err != nil {
-		return err
+		return nil, err
 	}
-	defer mnt.DecRef()
+	defer mnt.DecRef(ctx)
 	if err := vfs.ConnectMountAt(ctx, creds, mnt, target); err != nil {
-		return err
+		return nil, err
 	}
-	return nil
+	return mnt, nil
 }
 
 // UmountAt removes the Mount at the given path.
@@ -293,13 +295,13 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti
 	if err != nil {
 		return err
 	}
-	defer vd.DecRef()
+	defer vd.DecRef(ctx)
 	if vd.dentry != vd.mount.root {
 		return syserror.EINVAL
 	}
 	vfs.mountMu.Lock()
 	if mntns := MountNamespaceFromContext(ctx); mntns != nil {
-		defer mntns.DecRef()
+		defer mntns.DecRef(ctx)
 		if mntns != vd.mount.ns {
 			vfs.mountMu.Unlock()
 			return syserror.EINVAL
@@ -335,14 +337,15 @@ func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credenti
 	vfs.mounts.seq.EndWrite()
 	vfs.mountMu.Unlock()
 	for _, vd := range vdsToDecRef {
-		vd.DecRef()
+		vd.DecRef(ctx)
 	}
 	for _, mnt := range mountsToDecRef {
-		mnt.DecRef()
+		mnt.DecRef(ctx)
 	}
 	return nil
 }
 
+// +stateify savable
 type umountRecursiveOptions struct {
 	// If eager is true, ensure that future calls to Mount.tryIncMountedRef()
 	// on umounted mounts fail.
@@ -369,8 +372,9 @@ type umountRecursiveOptions struct {
 //
 // umountRecursiveLocked is analogous to Linux's fs/namespace.c:umount_tree().
 //
-// Preconditions: vfs.mountMu must be locked. vfs.mounts.seq must be in a
-// writer critical section.
+// Preconditions:
+// * vfs.mountMu must be locked.
+// * vfs.mounts.seq must be in a writer critical section.
 func (vfs *VirtualFilesystem) umountRecursiveLocked(mnt *Mount, opts *umountRecursiveOptions, vdsToDecRef []VirtualDentry, mountsToDecRef []*Mount) ([]VirtualDentry, []*Mount) {
 	if !mnt.umounted {
 		mnt.umounted = true
@@ -399,9 +403,11 @@ func (vfs *VirtualFilesystem) umountRecursiveLocked(mnt *Mount, opts *umountRecu
 // connectLocked makes vd the mount parent/point for mnt. It consumes
 // references held by vd.
 //
-// Preconditions: vfs.mountMu must be locked. vfs.mounts.seq must be in a
-// writer critical section. d.mu must be locked. mnt.parent() == nil, i.e. mnt
-// must not already be connected.
+// Preconditions:
+// * vfs.mountMu must be locked.
+// * vfs.mounts.seq must be in a writer critical section.
+// * d.mu must be locked.
+// * mnt.parent() == nil, i.e. mnt must not already be connected.
 func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns *MountNamespace) {
 	if checkInvariants {
 		if mnt.parent() != nil {
@@ -409,7 +415,7 @@ func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns
 		}
 	}
 	mnt.IncRef() // dropped by callers of umountRecursiveLocked
-	mnt.storeKey(vd)
+	mnt.setKey(vd)
 	if vd.mount.children == nil {
 		vd.mount.children = make(map[*Mount]struct{})
 	}
@@ -429,16 +435,18 @@ func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns
 // disconnectLocked makes vd have no mount parent/point and returns its old
 // mount parent/point with a reference held.
 //
-// Preconditions: vfs.mountMu must be locked. vfs.mounts.seq must be in a
-// writer critical section. mnt.parent() != nil.
+// Preconditions:
+// * vfs.mountMu must be locked.
+// * vfs.mounts.seq must be in a writer critical section.
+// * mnt.parent() != nil.
 func (vfs *VirtualFilesystem) disconnectLocked(mnt *Mount) VirtualDentry {
-	vd := mnt.loadKey()
+	vd := mnt.getKey()
 	if checkInvariants {
 		if vd.mount != nil {
 			panic("VFS.disconnectLocked called on disconnected mount")
 		}
 	}
-	mnt.storeKey(VirtualDentry{})
+	mnt.loadKey(VirtualDentry{})
 	delete(vd.mount.children, mnt)
 	atomic.AddUint32(&vd.dentry.mounts, math.MaxUint32) // -1
 	mnt.ns.mountpoints[vd.dentry]--
@@ -479,7 +487,7 @@ func (mnt *Mount) IncRef() {
 }
 
 // DecRef decrements mnt's reference count.
-func (mnt *Mount) DecRef() {
+func (mnt *Mount) DecRef(ctx context.Context) {
 	refs := atomic.AddInt64(&mnt.refs, -1)
 	if refs&^math.MinInt64 == 0 { // mask out MSB
 		var vd VirtualDentry
@@ -490,25 +498,18 @@ func (mnt *Mount) DecRef() {
 			mnt.vfs.mounts.seq.EndWrite()
 			mnt.vfs.mountMu.Unlock()
 		}
-		mnt.root.DecRef()
-		mnt.fs.DecRef()
+		mnt.root.DecRef(ctx)
+		mnt.fs.DecRef(ctx)
 		if vd.Ok() {
-			vd.DecRef()
+			vd.DecRef(ctx)
 		}
 	}
 }
 
-// IncRef increments mntns' reference count.
-func (mntns *MountNamespace) IncRef() {
-	if atomic.AddInt64(&mntns.refs, 1) <= 1 {
-		panic("MountNamespace.IncRef() called without holding a reference")
-	}
-}
-
 // DecRef decrements mntns' reference count.
-func (mntns *MountNamespace) DecRef() {
+func (mntns *MountNamespace) DecRef(ctx context.Context) {
 	vfs := mntns.root.fs.VirtualFilesystem()
-	if refs := atomic.AddInt64(&mntns.refs, -1); refs == 0 {
+	mntns.MountNamespaceRefs.DecRef(func() {
 		vfs.mountMu.Lock()
 		vfs.mounts.seq.BeginWrite()
 		vdsToDecRef, mountsToDecRef := vfs.umountRecursiveLocked(mntns.root, &umountRecursiveOptions{
@@ -517,14 +518,12 @@ func (mntns *MountNamespace) DecRef() {
 		vfs.mounts.seq.EndWrite()
 		vfs.mountMu.Unlock()
 		for _, vd := range vdsToDecRef {
-			vd.DecRef()
+			vd.DecRef(ctx)
 		}
 		for _, mnt := range mountsToDecRef {
-			mnt.DecRef()
+			mnt.DecRef(ctx)
 		}
-	} else if refs < 0 {
-		panic("MountNamespace.DecRef() called without holding a reference")
-	}
+	})
 }
 
 // getMountAt returns the last Mount in the stack mounted at (mnt, d). It takes
@@ -534,7 +533,7 @@ func (mntns *MountNamespace) DecRef() {
 // getMountAt is analogous to Linux's fs/namei.c:follow_mount().
 //
 // Preconditions: References are held on mnt and d.
-func (vfs *VirtualFilesystem) getMountAt(mnt *Mount, d *Dentry) *Mount {
+func (vfs *VirtualFilesystem) getMountAt(ctx context.Context, mnt *Mount, d *Dentry) *Mount {
 	// The first mount is special-cased:
 	//
 	// - The caller is assumed to have checked d.isMounted() already. (This
@@ -565,7 +564,7 @@ retryFirst:
 			// Raced with umount.
 			continue
 		}
-		mnt.DecRef()
+		mnt.DecRef(ctx)
 		mnt = next
 		d = next.root
 	}
@@ -576,9 +575,10 @@ retryFirst:
 // mnt. It takes a reference on the returned VirtualDentry. If no such mount
 // point exists (i.e. mnt is a root mount), getMountpointAt returns (nil, nil).
 //
-// Preconditions: References are held on mnt and root. vfsroot is not (mnt,
-// mnt.root).
-func (vfs *VirtualFilesystem) getMountpointAt(mnt *Mount, vfsroot VirtualDentry) VirtualDentry {
+// Preconditions:
+// * References are held on mnt and root.
+// * vfsroot is not (mnt, mnt.root).
+func (vfs *VirtualFilesystem) getMountpointAt(ctx context.Context, mnt *Mount, vfsroot VirtualDentry) VirtualDentry {
 	// The first mount is special-cased:
 	//
 	// - The caller must have already checked mnt against vfsroot.
@@ -602,12 +602,12 @@ retryFirst:
 	if !point.TryIncRef() {
 		// Since Mount holds a reference on Mount.key.point, this can only
 		// happen due to a racing change to Mount.key.
-		parent.DecRef()
+		parent.DecRef(ctx)
 		goto retryFirst
 	}
 	if !vfs.mounts.seq.ReadOk(epoch) {
-		point.DecRef()
-		parent.DecRef()
+		point.DecRef(ctx)
+		parent.DecRef(ctx)
 		goto retryFirst
 	}
 	mnt = parent
@@ -635,22 +635,29 @@ retryFirst:
 		if !point.TryIncRef() {
 			// Since Mount holds a reference on Mount.key.point, this can
 			// only happen due to a racing change to Mount.key.
-			parent.DecRef()
+			parent.DecRef(ctx)
 			goto retryNotFirst
 		}
 		if !vfs.mounts.seq.ReadOk(epoch) {
-			point.DecRef()
-			parent.DecRef()
+			point.DecRef(ctx)
+			parent.DecRef(ctx)
 			goto retryNotFirst
 		}
-		d.DecRef()
-		mnt.DecRef()
+		d.DecRef(ctx)
+		mnt.DecRef(ctx)
 		mnt = parent
 		d = point
 	}
 	return VirtualDentry{mnt, d}
 }
 
+// SetMountReadOnly sets the mount as ReadOnly.
+func (vfs *VirtualFilesystem) SetMountReadOnly(mnt *Mount, ro bool) error {
+	vfs.mountMu.Lock()
+	defer vfs.mountMu.Unlock()
+	return mnt.setReadOnlyLocked(ro)
+}
+
 // CheckBeginWrite increments the counter of in-progress write operations on
 // mnt. If mnt is mounted MS_RDONLY, CheckBeginWrite does nothing and returns
 // EROFS.
@@ -688,7 +695,8 @@ func (mnt *Mount) setReadOnlyLocked(ro bool) error {
 	return nil
 }
 
-func (mnt *Mount) readOnly() bool {
+// ReadOnly returns true if mount is readonly.
+func (mnt *Mount) ReadOnly() bool {
 	return atomic.LoadInt64(&mnt.writers) < 0
 }
 
@@ -731,11 +739,23 @@ func (mntns *MountNamespace) Root() VirtualDentry {
 //
 // Preconditions: taskRootDir.Ok().
 func (vfs *VirtualFilesystem) GenerateProcMounts(ctx context.Context, taskRootDir VirtualDentry, buf *bytes.Buffer) {
-	vfs.mountMu.Lock()
-	defer vfs.mountMu.Unlock()
 	rootMnt := taskRootDir.mount
+
+	vfs.mountMu.Lock()
 	mounts := rootMnt.submountsLocked()
+	// Take a reference on mounts since we need to drop vfs.mountMu before
+	// calling vfs.PathnameReachable() (=> FilesystemImpl.PrependPath()).
+	for _, mnt := range mounts {
+		mnt.IncRef()
+	}
+	vfs.mountMu.Unlock()
+	defer func() {
+		for _, mnt := range mounts {
+			mnt.DecRef(ctx)
+		}
+	}()
 	sort.Slice(mounts, func(i, j int) bool { return mounts[i].ID < mounts[j].ID })
+
 	for _, mnt := range mounts {
 		// Get the path to this mount relative to task root.
 		mntRootVD := VirtualDentry{
@@ -746,7 +766,7 @@ func (vfs *VirtualFilesystem) GenerateProcMounts(ctx context.Context, taskRootDi
 		if err != nil {
 			// For some reason we didn't get a path. Log a warning
 			// and run with empty path.
-			ctx.Warningf("Error getting pathname for mount root %+v: %v", mnt.root, err)
+			ctx.Warningf("VFS.GenerateProcMounts: error getting pathname for mount root %+v: %v", mnt.root, err)
 			path = ""
 		}
 		if path == "" {
@@ -756,7 +776,7 @@ func (vfs *VirtualFilesystem) GenerateProcMounts(ctx context.Context, taskRootDi
 		}
 
 		opts := "rw"
-		if mnt.readOnly() {
+		if mnt.ReadOnly() {
 			opts = "ro"
 		}
 		if mnt.Flags.NoATime {
@@ -780,11 +800,25 @@ func (vfs *VirtualFilesystem) GenerateProcMounts(ctx context.Context, taskRootDi
 //
 // Preconditions: taskRootDir.Ok().
 func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRootDir VirtualDentry, buf *bytes.Buffer) {
-	vfs.mountMu.Lock()
-	defer vfs.mountMu.Unlock()
 	rootMnt := taskRootDir.mount
+
+	vfs.mountMu.Lock()
 	mounts := rootMnt.submountsLocked()
+	// Take a reference on mounts since we need to drop vfs.mountMu before
+	// calling vfs.PathnameReachable() (=> FilesystemImpl.PrependPath()) or
+	// vfs.StatAt() (=> FilesystemImpl.StatAt()).
+	for _, mnt := range mounts {
+		mnt.IncRef()
+	}
+	vfs.mountMu.Unlock()
+	defer func() {
+		for _, mnt := range mounts {
+			mnt.DecRef(ctx)
+		}
+	}()
 	sort.Slice(mounts, func(i, j int) bool { return mounts[i].ID < mounts[j].ID })
+
+	creds := auth.CredentialsFromContext(ctx)
 	for _, mnt := range mounts {
 		// Get the path to this mount relative to task root.
 		mntRootVD := VirtualDentry{
@@ -795,7 +829,7 @@ func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRoo
 		if err != nil {
 			// For some reason we didn't get a path. Log a warning
 			// and run with empty path.
-			ctx.Warningf("Error getting pathname for mount root %+v: %v", mnt.root, err)
+			ctx.Warningf("VFS.GenerateProcMountInfo: error getting pathname for mount root %+v: %v", mnt.root, err)
 			path = ""
 		}
 		if path == "" {
@@ -808,9 +842,10 @@ func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRoo
 			Root:  mntRootVD,
 			Start: mntRootVD,
 		}
-		statx, err := vfs.StatAt(ctx, auth.NewAnonymousCredentials(), pop, &StatOptions{})
+		statx, err := vfs.StatAt(ctx, creds, pop, &StatOptions{})
 		if err != nil {
 			// Well that's not good. Ignore this mount.
+			ctx.Warningf("VFS.GenerateProcMountInfo: failed to stat mount root %+v: %v", mnt.root, err)
 			break
 		}
 
@@ -822,6 +857,9 @@ func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRoo
 		fmt.Fprintf(buf, "%d ", mnt.ID)
 
 		// (2)  Parent ID (or this ID if there is no parent).
+		// Note that even if the call to mnt.parent() races with Mount
+		// destruction (which is possible since we're not holding vfs.mountMu),
+		// its Mount.ID will still be valid.
 		pID := mnt.ID
 		if p := mnt.parent(); p != nil {
 			pID = p.ID
@@ -844,7 +882,7 @@ func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRoo
 
 		// (6) Mount options.
 		opts := "rw"
-		if mnt.readOnly() {
+		if mnt.ReadOnly() {
 			opts = "ro"
 		}
 		if mnt.Flags.NoATime {
@@ -883,7 +921,7 @@ func superBlockOpts(mountPath string, mnt *Mount) string {
 	// gVisor doesn't (yet) have a concept of super block options, so we
 	// use the ro/rw bit from the mount flag.
 	opts := "rw"
-	if mnt.readOnly() {
+	if mnt.ReadOnly() {
 		opts = "ro"
 	}
 
diff --git a/pkg/sentry/vfs/mount_test.go b/pkg/sentry/vfs/mount_test.go
index 3335e4057..cb8c56bd3 100644
--- a/pkg/sentry/vfs/mount_test.go
+++ b/pkg/sentry/vfs/mount_test.go
@@ -38,7 +38,7 @@ func TestMountTableInsertLookup(t *testing.T) {
 	mt.Init()
 
 	mount := &Mount{}
-	mount.storeKey(VirtualDentry{&Mount{}, &Dentry{}})
+	mount.setKey(VirtualDentry{&Mount{}, &Dentry{}})
 	mt.Insert(mount)
 
 	if m := mt.Lookup(mount.parent(), mount.point()); m != mount {
@@ -79,7 +79,7 @@ const enableComparativeBenchmarks = false
 
 func newBenchMount() *Mount {
 	mount := &Mount{}
-	mount.storeKey(VirtualDentry{&Mount{}, &Dentry{}})
+	mount.loadKey(VirtualDentry{&Mount{}, &Dentry{}})
 	return mount
 }
 
@@ -94,7 +94,7 @@ func BenchmarkMountTableParallelLookup(b *testing.B) {
 				for i := 0; i < numMounts; i++ {
 					mount := newBenchMount()
 					mt.Insert(mount)
-					keys = append(keys, mount.loadKey())
+					keys = append(keys, mount.saveKey())
 				}
 
 				var ready sync.WaitGroup
@@ -146,7 +146,7 @@ func BenchmarkMountMapParallelLookup(b *testing.B) {
 				keys := make([]VirtualDentry, 0, numMounts)
 				for i := 0; i < numMounts; i++ {
 					mount := newBenchMount()
-					key := mount.loadKey()
+					key := mount.saveKey()
 					ms[key] = mount
 					keys = append(keys, key)
 				}
@@ -201,7 +201,7 @@ func BenchmarkMountSyncMapParallelLookup(b *testing.B) {
 				keys := make([]VirtualDentry, 0, numMounts)
 				for i := 0; i < numMounts; i++ {
 					mount := newBenchMount()
-					key := mount.loadKey()
+					key := mount.getKey()
 					ms.Store(key, mount)
 					keys = append(keys, key)
 				}
@@ -283,7 +283,7 @@ func BenchmarkMountMapNegativeLookup(b *testing.B) {
 			ms := make(map[VirtualDentry]*Mount)
 			for i := 0; i < numMounts; i++ {
 				mount := newBenchMount()
-				ms[mount.loadKey()] = mount
+				ms[mount.getKey()] = mount
 			}
 			negkeys := make([]VirtualDentry, 0, numMounts)
 			for i := 0; i < numMounts; i++ {
@@ -318,7 +318,7 @@ func BenchmarkMountSyncMapNegativeLookup(b *testing.B) {
 			var ms sync.Map
 			for i := 0; i < numMounts; i++ {
 				mount := newBenchMount()
-				ms.Store(mount.loadKey(), mount)
+				ms.Store(mount.saveKey(), mount)
 			}
 			negkeys := make([]VirtualDentry, 0, numMounts)
 			for i := 0; i < numMounts; i++ {
@@ -372,7 +372,7 @@ func BenchmarkMountMapInsert(b *testing.B) {
 	b.ResetTimer()
 	for i := range mounts {
 		mount := mounts[i]
-		ms[mount.loadKey()] = mount
+		ms[mount.saveKey()] = mount
 	}
 }
 
@@ -392,7 +392,7 @@ func BenchmarkMountSyncMapInsert(b *testing.B) {
 	b.ResetTimer()
 	for i := range mounts {
 		mount := mounts[i]
-		ms.Store(mount.loadKey(), mount)
+		ms.Store(mount.saveKey(), mount)
 	}
 }
 
@@ -425,13 +425,13 @@ func BenchmarkMountMapRemove(b *testing.B) {
 	ms := make(map[VirtualDentry]*Mount)
 	for i := range mounts {
 		mount := mounts[i]
-		ms[mount.loadKey()] = mount
+		ms[mount.saveKey()] = mount
 	}
 
 	b.ResetTimer()
 	for i := range mounts {
 		mount := mounts[i]
-		delete(ms, mount.loadKey())
+		delete(ms, mount.saveKey())
 	}
 }
 
@@ -447,12 +447,12 @@ func BenchmarkMountSyncMapRemove(b *testing.B) {
 	var ms sync.Map
 	for i := range mounts {
 		mount := mounts[i]
-		ms.Store(mount.loadKey(), mount)
+		ms.Store(mount.saveKey(), mount)
 	}
 
 	b.ResetTimer()
 	for i := range mounts {
 		mount := mounts[i]
-		ms.Delete(mount.loadKey())
+		ms.Delete(mount.saveKey())
 	}
 }
diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go
index 70f850ca4..b7d122d22 100644
--- a/pkg/sentry/vfs/mount_unsafe.go
+++ b/pkg/sentry/vfs/mount_unsafe.go
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 // +build go1.12
-// +build !go1.16
+// +build !go1.17
 
 // Check go:linkname function signatures when updating Go version.
 
@@ -34,6 +34,8 @@ import (
 // structurally identical to VirtualDentry, but stores its fields as
 // unsafe.Pointer since mutators synchronize with VFS path traversal using
 // seqcounts.
+//
+// This is explicitly not savable.
 type mountKey struct {
 	parent unsafe.Pointer // *Mount
 	point  unsafe.Pointer // *Dentry
@@ -47,19 +49,23 @@ func (mnt *Mount) point() *Dentry {
 	return (*Dentry)(atomic.LoadPointer(&mnt.key.point))
 }
 
-func (mnt *Mount) loadKey() VirtualDentry {
+func (mnt *Mount) getKey() VirtualDentry {
 	return VirtualDentry{
 		mount:  mnt.parent(),
 		dentry: mnt.point(),
 	}
 }
 
+func (mnt *Mount) saveKey() VirtualDentry { return mnt.getKey() }
+
 // Invariant: mnt.key.parent == nil. vd.Ok().
-func (mnt *Mount) storeKey(vd VirtualDentry) {
+func (mnt *Mount) setKey(vd VirtualDentry) {
 	atomic.StorePointer(&mnt.key.parent, unsafe.Pointer(vd.mount))
 	atomic.StorePointer(&mnt.key.point, unsafe.Pointer(vd.dentry))
 }
 
+func (mnt *Mount) loadKey(vd VirtualDentry) { mnt.setKey(vd) }
+
 // mountTable maps (mount parent, mount point) pairs to mounts. It supports
 // efficient concurrent lookup, even in the presence of concurrent mutators
 // (provided mutation is sufficiently uncommon).
@@ -92,6 +98,7 @@ type mountTable struct {
 	// length and cap in separate uint32s) for ~free.
 	size uint64
 
+	// FIXME(gvisor.dev/issue/1663): Slots need to be saved.
 	slots unsafe.Pointer `state:"nosave"` // []mountSlot; never nil after Init
 }
 
@@ -217,8 +224,9 @@ func (mt *mountTable) Insert(mount *Mount) {
 
 // insertSeqed inserts the given mount into mt.
 //
-// Preconditions: mt.seq must be in a writer critical section. mt must not
-// already contain a Mount with the same mount point and parent.
+// Preconditions:
+// * mt.seq must be in a writer critical section.
+// * mt must not already contain a Mount with the same mount point and parent.
 func (mt *mountTable) insertSeqed(mount *Mount) {
 	hash := memhash(unsafe.Pointer(&mount.key), uintptr(mt.seed), mountKeyBytes)
 
@@ -269,9 +277,11 @@ func (mt *mountTable) insertSeqed(mount *Mount) {
 	atomic.StorePointer(&mt.slots, newSlots)
 }
 
-// Preconditions: There are no concurrent mutators of the table (slots, cap).
-// If the table is visible to readers, then mt.seq must be in a writer critical
-// section. cap must be a power of 2.
+// Preconditions:
+// * There are no concurrent mutators of the table (slots, cap).
+// * If the table is visible to readers, then mt.seq must be in a writer
+//   critical section.
+// * cap must be a power of 2.
 func mtInsertLocked(slots unsafe.Pointer, cap uintptr, value unsafe.Pointer, hash uintptr) {
 	mask := cap - 1
 	off := (hash & mask) * mountSlotBytes
@@ -313,8 +323,9 @@ func (mt *mountTable) Remove(mount *Mount) {
 
 // removeSeqed removes the given mount from mt.
 //
-// Preconditions: mt.seq must be in a writer critical section. mt must contain
-// mount.
+// Preconditions:
+// * mt.seq must be in a writer critical section.
+// * mt must contain mount.
 func (mt *mountTable) removeSeqed(mount *Mount) {
 	hash := memhash(unsafe.Pointer(&mount.key), uintptr(mt.seed), mountKeyBytes)
 	tcap := uintptr(1) << (mt.size & mtSizeOrderMask)
diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go
index f223aeda8..bc79e5ecc 100644
--- a/pkg/sentry/vfs/options.go
+++ b/pkg/sentry/vfs/options.go
@@ -21,6 +21,8 @@ import (
 
 // GetDentryOptions contains options to VirtualFilesystem.GetDentryAt() and
 // FilesystemImpl.GetDentryAt().
+//
+// +stateify savable
 type GetDentryOptions struct {
 	// If CheckSearchable is true, FilesystemImpl.GetDentryAt() must check that
 	// the returned Dentry is a directory for which creds has search
@@ -30,6 +32,8 @@ type GetDentryOptions struct {
 
 // MkdirOptions contains options to VirtualFilesystem.MkdirAt() and
 // FilesystemImpl.MkdirAt().
+//
+// +stateify savable
 type MkdirOptions struct {
 	// Mode is the file mode bits for the created directory.
 	Mode linux.FileMode
@@ -56,6 +60,8 @@ type MkdirOptions struct {
 
 // MknodOptions contains options to VirtualFilesystem.MknodAt() and
 // FilesystemImpl.MknodAt().
+//
+// +stateify savable
 type MknodOptions struct {
 	// Mode is the file type and mode bits for the created file.
 	Mode linux.FileMode
@@ -72,6 +78,8 @@ type MknodOptions struct {
 
 // MountFlags contains flags as specified for mount(2), e.g. MS_NOEXEC.
 // MS_RDONLY is not part of MountFlags because it's tracked in Mount.writers.
+//
+// +stateify savable
 type MountFlags struct {
 	// NoExec is equivalent to MS_NOEXEC.
 	NoExec bool
@@ -79,9 +87,22 @@ type MountFlags struct {
 	// NoATime is equivalent to MS_NOATIME and indicates that the
 	// filesystem should not update access time in-place.
 	NoATime bool
+
+	// NoDev is equivalent to MS_NODEV and indicates that the
+	// filesystem should not allow access to devices (special files).
+	// TODO(gVisor.dev/issue/3186): respect this flag in non FUSE
+	// filesystems.
+	NoDev bool
+
+	// NoSUID is equivalent to MS_NOSUID and indicates that the
+	// filesystem should not honor set-user-ID and set-group-ID bits or
+	// file capabilities when executing programs.
+	NoSUID bool
 }
 
 // MountOptions contains options to VirtualFilesystem.MountAt().
+//
+// +stateify savable
 type MountOptions struct {
 	// Flags contains flags as specified for mount(2), e.g. MS_NOEXEC.
 	Flags MountFlags
@@ -92,13 +113,17 @@ type MountOptions struct {
 	// GetFilesystemOptions contains options to FilesystemType.GetFilesystem().
 	GetFilesystemOptions GetFilesystemOptions
 
-	// If InternalMount is true, allow the use of filesystem types for which
-	// RegisterFilesystemTypeOptions.AllowUserMount == false.
+	// InternalMount indicates whether the mount operation is coming from the
+	// application, i.e. through mount(2). If InternalMount is true, allow the use
+	// of filesystem types for which RegisterFilesystemTypeOptions.AllowUserMount
+	// == false.
 	InternalMount bool
 }
 
 // OpenOptions contains options to VirtualFilesystem.OpenAt() and
 // FilesystemImpl.OpenAt().
+//
+// +stateify savable
 type OpenOptions struct {
 	// Flags contains access mode and flags as specified for open(2).
 	//
@@ -124,6 +149,8 @@ type OpenOptions struct {
 // ReadOptions contains options to FileDescription.PRead(),
 // FileDescriptionImpl.PRead(), FileDescription.Read(), and
 // FileDescriptionImpl.Read().
+//
+// +stateify savable
 type ReadOptions struct {
 	// Flags contains flags as specified for preadv2(2).
 	Flags uint32
@@ -131,6 +158,8 @@ type ReadOptions struct {
 
 // RenameOptions contains options to VirtualFilesystem.RenameAt() and
 // FilesystemImpl.RenameAt().
+//
+// +stateify savable
 type RenameOptions struct {
 	// Flags contains flags as specified for renameat2(2).
 	Flags uint32
@@ -142,6 +171,8 @@ type RenameOptions struct {
 // SetStatOptions contains options to VirtualFilesystem.SetStatAt(),
 // FilesystemImpl.SetStatAt(), FileDescription.SetStat(), and
 // FileDescriptionImpl.SetStat().
+//
+// +stateify savable
 type SetStatOptions struct {
 	// Stat is the metadata that should be set. Only fields indicated by
 	// Stat.Mask should be set.
@@ -153,10 +184,18 @@ type SetStatOptions struct {
 	// == UTIME_OMIT (VFS users must unset the corresponding bit in Stat.Mask
 	// instead).
 	Stat linux.Statx
+
+	// NeedWritePerm indicates that write permission on the file is needed for
+	// this operation. This is needed for truncate(2) (note that ftruncate(2)
+	// does not require the same check--instead, it checks that the fd is
+	// writable).
+	NeedWritePerm bool
 }
 
 // BoundEndpointOptions contains options to VirtualFilesystem.BoundEndpointAt()
 // and FilesystemImpl.BoundEndpointAt().
+//
+// +stateify savable
 type BoundEndpointOptions struct {
 	// Addr is the path of the file whose socket endpoint is being retrieved.
 	// It is generally irrelevant: most endpoints are stored at a dentry that
@@ -173,10 +212,12 @@ type BoundEndpointOptions struct {
 	Addr string
 }
 
-// GetxattrOptions contains options to VirtualFilesystem.GetxattrAt(),
-// FilesystemImpl.GetxattrAt(), FileDescription.Getxattr(), and
-// FileDescriptionImpl.Getxattr().
-type GetxattrOptions struct {
+// GetXattrOptions contains options to VirtualFilesystem.GetXattrAt(),
+// FilesystemImpl.GetXattrAt(), FileDescription.GetXattr(), and
+// FileDescriptionImpl.GetXattr().
+//
+// +stateify savable
+type GetXattrOptions struct {
 	// Name is the name of the extended attribute to retrieve.
 	Name string
 
@@ -187,10 +228,12 @@ type GetxattrOptions struct {
 	Size uint64
 }
 
-// SetxattrOptions contains options to VirtualFilesystem.SetxattrAt(),
-// FilesystemImpl.SetxattrAt(), FileDescription.Setxattr(), and
-// FileDescriptionImpl.Setxattr().
-type SetxattrOptions struct {
+// SetXattrOptions contains options to VirtualFilesystem.SetXattrAt(),
+// FilesystemImpl.SetXattrAt(), FileDescription.SetXattr(), and
+// FileDescriptionImpl.SetXattr().
+//
+// +stateify savable
+type SetXattrOptions struct {
 	// Name is the name of the extended attribute being mutated.
 	Name string
 
@@ -204,6 +247,8 @@ type SetxattrOptions struct {
 // StatOptions contains options to VirtualFilesystem.StatAt(),
 // FilesystemImpl.StatAt(), FileDescription.Stat(), and
 // FileDescriptionImpl.Stat().
+//
+// +stateify savable
 type StatOptions struct {
 	// Mask is the set of fields in the returned Statx that the FilesystemImpl
 	// or FileDescriptionImpl should provide. Bits are as in linux.Statx.Mask.
@@ -221,6 +266,8 @@ type StatOptions struct {
 }
 
 // UmountOptions contains options to VirtualFilesystem.UmountAt().
+//
+// +stateify savable
 type UmountOptions struct {
 	// Flags contains flags as specified for umount2(2).
 	Flags uint32
@@ -229,6 +276,8 @@ type UmountOptions struct {
 // WriteOptions contains options to FileDescription.PWrite(),
 // FileDescriptionImpl.PWrite(), FileDescription.Write(), and
 // FileDescriptionImpl.Write().
+//
+// +stateify savable
 type WriteOptions struct {
 	// Flags contains flags as specified for pwritev2(2).
 	Flags uint32
diff --git a/pkg/sentry/vfs/pathname.go b/pkg/sentry/vfs/pathname.go
index cd78d66bc..e4da15009 100644
--- a/pkg/sentry/vfs/pathname.go
+++ b/pkg/sentry/vfs/pathname.go
@@ -47,7 +47,7 @@ func (vfs *VirtualFilesystem) PathnameWithDeleted(ctx context.Context, vfsroot,
 	haveRef := false
 	defer func() {
 		if haveRef {
-			vd.DecRef()
+			vd.DecRef(ctx)
 		}
 	}()
 
@@ -64,12 +64,12 @@ loop:
 				// of FilesystemImpl.PrependPath() may return nil instead.
 				break loop
 			}
-			nextVD := vfs.getMountpointAt(vd.mount, vfsroot)
+			nextVD := vfs.getMountpointAt(ctx, vd.mount, vfsroot)
 			if !nextVD.Ok() {
 				break loop
 			}
 			if haveRef {
-				vd.DecRef()
+				vd.DecRef(ctx)
 			}
 			vd = nextVD
 			haveRef = true
@@ -101,7 +101,7 @@ func (vfs *VirtualFilesystem) PathnameReachable(ctx context.Context, vfsroot, vd
 	haveRef := false
 	defer func() {
 		if haveRef {
-			vd.DecRef()
+			vd.DecRef(ctx)
 		}
 	}()
 loop:
@@ -112,12 +112,12 @@ loop:
 			if vd.mount == vfsroot.mount && vd.mount.root == vfsroot.dentry {
 				break loop
 			}
-			nextVD := vfs.getMountpointAt(vd.mount, vfsroot)
+			nextVD := vfs.getMountpointAt(ctx, vd.mount, vfsroot)
 			if !nextVD.Ok() {
 				return "", nil
 			}
 			if haveRef {
-				vd.DecRef()
+				vd.DecRef(ctx)
 			}
 			vd = nextVD
 			haveRef = true
@@ -145,7 +145,7 @@ func (vfs *VirtualFilesystem) PathnameForGetcwd(ctx context.Context, vfsroot, vd
 	haveRef := false
 	defer func() {
 		if haveRef {
-			vd.DecRef()
+			vd.DecRef(ctx)
 		}
 	}()
 	unreachable := false
@@ -157,13 +157,13 @@ loop:
 			if vd.mount == vfsroot.mount && vd.mount.root == vfsroot.dentry {
 				break loop
 			}
-			nextVD := vfs.getMountpointAt(vd.mount, vfsroot)
+			nextVD := vfs.getMountpointAt(ctx, vd.mount, vfsroot)
 			if !nextVD.Ok() {
 				unreachable = true
 				break loop
 			}
 			if haveRef {
-				vd.DecRef()
+				vd.DecRef(ctx)
 			}
 			vd = nextVD
 			haveRef = true
diff --git a/pkg/sentry/vfs/permissions.go b/pkg/sentry/vfs/permissions.go
index 9cb050597..d48520d58 100644
--- a/pkg/sentry/vfs/permissions.go
+++ b/pkg/sentry/vfs/permissions.go
@@ -16,6 +16,7 @@ package vfs
 
 import (
 	"math"
+	"strings"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -25,6 +26,8 @@ import (
 )
 
 // AccessTypes is a bitmask of Unix file permissions.
+//
+// +stateify savable
 type AccessTypes uint16
 
 // Bits in AccessTypes.
@@ -183,7 +186,8 @@ func MayWriteFileWithOpenFlags(flags uint32) bool {
 // CheckSetStat checks that creds has permission to change the metadata of a
 // file with the given permissions, UID, and GID as specified by stat, subject
 // to the rules of Linux's fs/attr.c:setattr_prepare().
-func CheckSetStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) error {
+func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOptions, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) error {
+	stat := &opts.Stat
 	if stat.Mask&linux.STATX_SIZE != 0 {
 		limit, err := CheckLimit(ctx, 0, int64(stat.Size))
 		if err != nil {
@@ -215,6 +219,11 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, stat *linux.Stat
 			return syserror.EPERM
 		}
 	}
+	if opts.NeedWritePerm && !creds.HasCapability(linux.CAP_DAC_OVERRIDE) {
+		if err := GenericCheckPermissions(creds, MayWrite, mode, kuid, kgid); err != nil {
+			return err
+		}
+	}
 	if stat.Mask&(linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME) != 0 {
 		if !CanActAsOwner(creds, kuid) {
 			if (stat.Mask&linux.STATX_ATIME != 0 && stat.Atime.Nsec != linux.UTIME_NOW) ||
@@ -265,7 +274,7 @@ func HasCapabilityOnFile(creds *auth.Credentials, cp linux.Capability, kuid auth
 // operation must not proceed. Otherwise it returns the max length allowed to
 // without violating the limit.
 func CheckLimit(ctx context.Context, offset, size int64) (int64, error) {
-	fileSizeLimit := limits.FromContext(ctx).Get(limits.FileSize).Cur
+	fileSizeLimit := limits.FromContextOrDie(ctx).Get(limits.FileSize).Cur
 	if fileSizeLimit > math.MaxInt64 {
 		return size, nil
 	}
@@ -278,3 +287,40 @@ func CheckLimit(ctx context.Context, offset, size int64) (int64, error) {
 	}
 	return size, nil
 }
+
+// CheckXattrPermissions checks permissions for extended attribute access.
+// This is analogous to fs/xattr.c:xattr_permission(). Some key differences:
+// * Does not check for read-only filesystem property.
+// * Does not check inode immutability or append only mode. In both cases EPERM
+//   must be returned by filesystem implementations.
+// * Does not do inode permission checks. Filesystem implementations should
+//   handle inode permission checks as they may differ across implementations.
+func CheckXattrPermissions(creds *auth.Credentials, ats AccessTypes, mode linux.FileMode, kuid auth.KUID, name string) error {
+	switch {
+	case strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX):
+		// The trusted.* namespace can only be accessed by privileged
+		// users.
+		if creds.HasCapability(linux.CAP_SYS_ADMIN) {
+			return nil
+		}
+		if ats.MayWrite() {
+			return syserror.EPERM
+		}
+		return syserror.ENODATA
+	case strings.HasPrefix(name, linux.XATTR_USER_PREFIX):
+		// In the user.* namespace, only regular files and directories can have
+		// extended attributes. For sticky directories, only the owner and
+		// privileged users can write attributes.
+		filetype := mode.FileType()
+		if filetype != linux.ModeRegular && filetype != linux.ModeDirectory {
+			if ats.MayWrite() {
+				return syserror.EPERM
+			}
+			return syserror.ENODATA
+		}
+		if filetype == linux.ModeDirectory && mode&linux.ModeSticky != 0 && ats.MayWrite() && !CanActAsOwner(creds, kuid) {
+			return syserror.EPERM
+		}
+	}
+	return nil
+}
diff --git a/pkg/sentry/vfs/resolving_path.go b/pkg/sentry/vfs/resolving_path.go
index 9d047ff88..e4fd55012 100644
--- a/pkg/sentry/vfs/resolving_path.go
+++ b/pkg/sentry/vfs/resolving_path.go
@@ -18,6 +18,7 @@ import (
 	"fmt"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/fspath"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sync"
@@ -34,6 +35,8 @@ import (
 // FilesystemImpl methods.
 //
 // ResolvingPath is loosely analogous to Linux's struct nameidata.
+//
+// +stateify savable
 type ResolvingPath struct {
 	vfs   *VirtualFilesystem
 	root  VirtualDentry // refs borrowed from PathOperation
@@ -87,6 +90,7 @@ func init() {
 // so error "constants" are really mutable vars, necessitating somewhat
 // expensive interface object comparisons.
 
+// +stateify savable
 type resolveMountRootOrJumpError struct{}
 
 // Error implements error.Error.
@@ -94,6 +98,7 @@ func (resolveMountRootOrJumpError) Error() string {
 	return "resolving mount root or jump"
 }
 
+// +stateify savable
 type resolveMountPointError struct{}
 
 // Error implements error.Error.
@@ -101,6 +106,7 @@ func (resolveMountPointError) Error() string {
 	return "resolving mount point"
 }
 
+// +stateify savable
 type resolveAbsSymlinkError struct{}
 
 // Error implements error.Error.
@@ -136,31 +142,31 @@ func (vfs *VirtualFilesystem) getResolvingPath(creds *auth.Credentials, pop *Pat
 	return rp
 }
 
-func (vfs *VirtualFilesystem) putResolvingPath(rp *ResolvingPath) {
+func (vfs *VirtualFilesystem) putResolvingPath(ctx context.Context, rp *ResolvingPath) {
 	rp.root = VirtualDentry{}
-	rp.decRefStartAndMount()
+	rp.decRefStartAndMount(ctx)
 	rp.mount = nil
 	rp.start = nil
-	rp.releaseErrorState()
+	rp.releaseErrorState(ctx)
 	resolvingPathPool.Put(rp)
 }
 
-func (rp *ResolvingPath) decRefStartAndMount() {
+func (rp *ResolvingPath) decRefStartAndMount(ctx context.Context) {
 	if rp.flags&rpflagsHaveStartRef != 0 {
-		rp.start.DecRef()
+		rp.start.DecRef(ctx)
 	}
 	if rp.flags&rpflagsHaveMountRef != 0 {
-		rp.mount.DecRef()
+		rp.mount.DecRef(ctx)
 	}
 }
 
-func (rp *ResolvingPath) releaseErrorState() {
+func (rp *ResolvingPath) releaseErrorState(ctx context.Context) {
 	if rp.nextStart != nil {
-		rp.nextStart.DecRef()
+		rp.nextStart.DecRef(ctx)
 		rp.nextStart = nil
 	}
 	if rp.nextMount != nil {
-		rp.nextMount.DecRef()
+		rp.nextMount.DecRef(ctx)
 		rp.nextMount = nil
 	}
 }
@@ -236,13 +242,13 @@ func (rp *ResolvingPath) Advance() {
 
 // Restart resets the stream of path components represented by rp to its state
 // on entry to the current FilesystemImpl method.
-func (rp *ResolvingPath) Restart() {
+func (rp *ResolvingPath) Restart(ctx context.Context) {
 	rp.pit = rp.origParts[rp.numOrigParts-1]
 	rp.mustBeDir = rp.mustBeDirOrig
 	rp.symlinks = rp.symlinksOrig
 	rp.curPart = rp.numOrigParts - 1
 	copy(rp.parts[:], rp.origParts[:rp.numOrigParts])
-	rp.releaseErrorState()
+	rp.releaseErrorState(ctx)
 }
 
 func (rp *ResolvingPath) relpathCommit() {
@@ -260,13 +266,13 @@ func (rp *ResolvingPath) relpathCommit() {
 // Mount, CheckRoot returns (unspecified, non-nil error). Otherwise, path
 // resolution should resolve d's parent normally, and CheckRoot returns (false,
 // nil).
-func (rp *ResolvingPath) CheckRoot(d *Dentry) (bool, error) {
+func (rp *ResolvingPath) CheckRoot(ctx context.Context, d *Dentry) (bool, error) {
 	if d == rp.root.dentry && rp.mount == rp.root.mount {
 		// At contextual VFS root (due to e.g. chroot(2)).
 		return true, nil
 	} else if d == rp.mount.root {
 		// At mount root ...
-		vd := rp.vfs.getMountpointAt(rp.mount, rp.root)
+		vd := rp.vfs.getMountpointAt(ctx, rp.mount, rp.root)
 		if vd.Ok() {
 			// ... of non-root mount.
 			rp.nextMount = vd.mount
@@ -283,11 +289,11 @@ func (rp *ResolvingPath) CheckRoot(d *Dentry) (bool, error) {
 // to d. If d is a mount point, such that path resolution should switch to
 // another Mount, CheckMount returns a non-nil error. Otherwise, CheckMount
 // returns nil.
-func (rp *ResolvingPath) CheckMount(d *Dentry) error {
+func (rp *ResolvingPath) CheckMount(ctx context.Context, d *Dentry) error {
 	if !d.isMounted() {
 		return nil
 	}
-	if mnt := rp.vfs.getMountAt(rp.mount, d); mnt != nil {
+	if mnt := rp.vfs.getMountAt(ctx, rp.mount, d); mnt != nil {
 		rp.nextMount = mnt
 		return resolveMountPointError{}
 	}
@@ -389,11 +395,11 @@ func (rp *ResolvingPath) HandleJump(target VirtualDentry) error {
 	return resolveMountRootOrJumpError{}
 }
 
-func (rp *ResolvingPath) handleError(err error) bool {
+func (rp *ResolvingPath) handleError(ctx context.Context, err error) bool {
 	switch err.(type) {
 	case resolveMountRootOrJumpError:
 		// Switch to the new Mount. We hold references on the Mount and Dentry.
-		rp.decRefStartAndMount()
+		rp.decRefStartAndMount(ctx)
 		rp.mount = rp.nextMount
 		rp.start = rp.nextStart
 		rp.flags |= rpflagsHaveMountRef | rpflagsHaveStartRef
@@ -412,7 +418,7 @@ func (rp *ResolvingPath) handleError(err error) bool {
 	case resolveMountPointError:
 		// Switch to the new Mount. We hold a reference on the Mount, but
 		// borrow the reference on the mount root from the Mount.
-		rp.decRefStartAndMount()
+		rp.decRefStartAndMount(ctx)
 		rp.mount = rp.nextMount
 		rp.start = rp.nextMount.root
 		rp.flags = rp.flags&^rpflagsHaveStartRef | rpflagsHaveMountRef
@@ -423,12 +429,12 @@ func (rp *ResolvingPath) handleError(err error) bool {
 		// path.
 		rp.relpathCommit()
 		// Restart path resolution on the new Mount.
-		rp.releaseErrorState()
+		rp.releaseErrorState(ctx)
 		return true
 
 	case resolveAbsSymlinkError:
 		// Switch to the new Mount. References are borrowed from rp.root.
-		rp.decRefStartAndMount()
+		rp.decRefStartAndMount(ctx)
 		rp.mount = rp.root.mount
 		rp.start = rp.root.dentry
 		rp.flags &^= rpflagsHaveMountRef | rpflagsHaveStartRef
@@ -440,7 +446,7 @@ func (rp *ResolvingPath) handleError(err error) bool {
 		// path, including the symlink target we just prepended.
 		rp.relpathCommit()
 		// Restart path resolution on the new Mount.
-		rp.releaseErrorState()
+		rp.releaseErrorState(ctx)
 		return true
 
 	default:
diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index 58c7ad778..5bd756ea5 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -24,9 +24,9 @@
 //           Locks acquired by FilesystemImpls between Prepare{Delete,Rename}Dentry and Commit{Delete,Rename*}Dentry
 //         VirtualFilesystem.filesystemsMu
 //       EpollInstance.mu
-//		   Inotify.mu
-// 		     Watches.mu
-//  		     Inotify.evMu
+//       Inotify.mu
+//         Watches.mu
+//           Inotify.evMu
 // VirtualFilesystem.fsTypesMu
 //
 // Locking Dentry.mu in multiple Dentries requires holding
@@ -36,6 +36,7 @@ package vfs
 
 import (
 	"fmt"
+	"path"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
@@ -122,7 +123,10 @@ type VirtualFilesystem struct {
 }
 
 // Init initializes a new VirtualFilesystem with no mounts or FilesystemTypes.
-func (vfs *VirtualFilesystem) Init() error {
+func (vfs *VirtualFilesystem) Init(ctx context.Context) error {
+	if vfs.mountpoints != nil {
+		panic("VFS already initialized")
+	}
 	vfs.mountpoints = make(map[*Dentry]map[*Mount]struct{})
 	vfs.devices = make(map[devTuple]*registeredDevice)
 	vfs.anonBlockDevMinorNext = 1
@@ -142,7 +146,7 @@ func (vfs *VirtualFilesystem) Init() error {
 		devMinor: anonfsDevMinor,
 	}
 	anonfs.vfsfs.Init(vfs, &anonFilesystemType{}, &anonfs)
-	defer anonfs.vfsfs.DecRef()
+	defer anonfs.vfsfs.DecRef(ctx)
 	anonMount, err := vfs.NewDisconnectedMount(&anonfs.vfsfs, nil, &MountOptions{})
 	if err != nil {
 		// We should not be passing any MountOptions that would cause
@@ -159,6 +163,8 @@ func (vfs *VirtualFilesystem) Init() error {
 // PathOperation is passed to VFS methods by pointer to reduce memory copying:
 // it's somewhat large and should never escape. (Options structs are passed by
 // pointer to VFS and FileDescription methods for the same reason.)
+//
+// +stateify savable
 type PathOperation struct {
 	// Root is the VFS root. References on Root are borrowed from the provider
 	// of the PathOperation.
@@ -189,11 +195,11 @@ func (vfs *VirtualFilesystem) AccessAt(ctx context.Context, creds *auth.Credenti
 	for {
 		err := rp.mount.fs.impl.AccessAt(ctx, rp, creds, ats)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return err
 		}
 	}
@@ -211,11 +217,11 @@ func (vfs *VirtualFilesystem) GetDentryAt(ctx context.Context, creds *auth.Crede
 				dentry: d,
 			}
 			rp.mount.IncRef()
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return vd, nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return VirtualDentry{}, err
 		}
 	}
@@ -233,7 +239,7 @@ func (vfs *VirtualFilesystem) getParentDirAndName(ctx context.Context, creds *au
 			}
 			rp.mount.IncRef()
 			name := rp.Component()
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return parentVD, name, nil
 		}
 		if checkInvariants {
@@ -241,8 +247,8 @@ func (vfs *VirtualFilesystem) getParentDirAndName(ctx context.Context, creds *au
 				panic(fmt.Sprintf("%T.GetParentDentryAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
 			}
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return VirtualDentry{}, "", err
 		}
 	}
@@ -257,14 +263,14 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential
 	}
 
 	if !newpop.Path.Begin.Ok() {
-		oldVD.DecRef()
+		oldVD.DecRef(ctx)
 		if newpop.Path.Absolute {
 			return syserror.EEXIST
 		}
 		return syserror.ENOENT
 	}
 	if newpop.FollowFinalSymlink {
-		oldVD.DecRef()
+		oldVD.DecRef(ctx)
 		ctx.Warningf("VirtualFilesystem.LinkAt: file creation paths can't follow final symlink")
 		return syserror.EINVAL
 	}
@@ -273,8 +279,8 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential
 	for {
 		err := rp.mount.fs.impl.LinkAt(ctx, rp, oldVD)
 		if err == nil {
-			vfs.putResolvingPath(rp)
-			oldVD.DecRef()
+			vfs.putResolvingPath(ctx, rp)
+			oldVD.DecRef(ctx)
 			return nil
 		}
 		if checkInvariants {
@@ -282,9 +288,9 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential
 				panic(fmt.Sprintf("%T.LinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
 			}
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
-			oldVD.DecRef()
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
+			oldVD.DecRef(ctx)
 			return err
 		}
 	}
@@ -293,6 +299,8 @@ func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credential
 // MkdirAt creates a directory at the given path.
 func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MkdirOptions) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with mkdirat(dirfd, "", mode).
 		if pop.Path.Absolute {
 			return syserror.EEXIST
 		}
@@ -310,7 +318,7 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia
 	for {
 		err := rp.mount.fs.impl.MkdirAt(ctx, rp, *opts)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil
 		}
 		if checkInvariants {
@@ -318,8 +326,8 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia
 				panic(fmt.Sprintf("%T.MkdirAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
 			}
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return err
 		}
 	}
@@ -329,6 +337,8 @@ func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentia
 // error from the syserror package.
 func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MknodOptions) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with mknodat(dirfd, "", mode, dev).
 		if pop.Path.Absolute {
 			return syserror.EEXIST
 		}
@@ -343,7 +353,7 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia
 	for {
 		err := rp.mount.fs.impl.MknodAt(ctx, rp, *opts)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil
 		}
 		if checkInvariants {
@@ -351,8 +361,8 @@ func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentia
 				panic(fmt.Sprintf("%T.MknodAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
 			}
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return err
 		}
 	}
@@ -405,31 +415,31 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential
 	for {
 		fd, err := rp.mount.fs.impl.OpenAt(ctx, rp, *opts)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 
 			if opts.FileExec {
 				if fd.Mount().Flags.NoExec {
-					fd.DecRef()
+					fd.DecRef(ctx)
 					return nil, syserror.EACCES
 				}
 
 				// Only a regular file can be executed.
 				stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_TYPE})
 				if err != nil {
-					fd.DecRef()
+					fd.DecRef(ctx)
 					return nil, err
 				}
 				if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.S_IFMT != linux.S_IFREG {
-					fd.DecRef()
+					fd.DecRef(ctx)
 					return nil, syserror.EACCES
 				}
 			}
 
-			fd.Dentry().InotifyWithParent(linux.IN_OPEN, 0, PathEvent)
+			fd.Dentry().InotifyWithParent(ctx, linux.IN_OPEN, 0, PathEvent)
 			return fd, nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return nil, err
 		}
 	}
@@ -441,11 +451,11 @@ func (vfs *VirtualFilesystem) ReadlinkAt(ctx context.Context, creds *auth.Creden
 	for {
 		target, err := rp.mount.fs.impl.ReadlinkAt(ctx, rp)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return target, nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return "", err
 		}
 	}
@@ -469,19 +479,19 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti
 		return err
 	}
 	if oldName == "." || oldName == ".." {
-		oldParentVD.DecRef()
+		oldParentVD.DecRef(ctx)
 		return syserror.EBUSY
 	}
 
 	if !newpop.Path.Begin.Ok() {
-		oldParentVD.DecRef()
+		oldParentVD.DecRef(ctx)
 		if newpop.Path.Absolute {
 			return syserror.EBUSY
 		}
 		return syserror.ENOENT
 	}
 	if newpop.FollowFinalSymlink {
-		oldParentVD.DecRef()
+		oldParentVD.DecRef(ctx)
 		ctx.Warningf("VirtualFilesystem.RenameAt: destination path can't follow final symlink")
 		return syserror.EINVAL
 	}
@@ -494,8 +504,8 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti
 	for {
 		err := rp.mount.fs.impl.RenameAt(ctx, rp, oldParentVD, oldName, renameOpts)
 		if err == nil {
-			vfs.putResolvingPath(rp)
-			oldParentVD.DecRef()
+			vfs.putResolvingPath(ctx, rp)
+			oldParentVD.DecRef(ctx)
 			return nil
 		}
 		if checkInvariants {
@@ -503,9 +513,9 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti
 				panic(fmt.Sprintf("%T.RenameAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
 			}
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
-			oldParentVD.DecRef()
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
+			oldParentVD.DecRef(ctx)
 			return err
 		}
 	}
@@ -514,6 +524,8 @@ func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credenti
 // RmdirAt removes the directory at the given path.
 func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with unlinkat(dirfd, "", AT_REMOVEDIR).
 		if pop.Path.Absolute {
 			return syserror.EBUSY
 		}
@@ -528,7 +540,7 @@ func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentia
 	for {
 		err := rp.mount.fs.impl.RmdirAt(ctx, rp)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil
 		}
 		if checkInvariants {
@@ -536,8 +548,8 @@ func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentia
 				panic(fmt.Sprintf("%T.RmdirAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
 			}
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return err
 		}
 	}
@@ -549,11 +561,11 @@ func (vfs *VirtualFilesystem) SetStatAt(ctx context.Context, creds *auth.Credent
 	for {
 		err := rp.mount.fs.impl.SetStatAt(ctx, rp, *opts)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return err
 		}
 	}
@@ -565,11 +577,11 @@ func (vfs *VirtualFilesystem) StatAt(ctx context.Context, creds *auth.Credential
 	for {
 		stat, err := rp.mount.fs.impl.StatAt(ctx, rp, *opts)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return stat, nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return linux.Statx{}, err
 		}
 	}
@@ -582,11 +594,11 @@ func (vfs *VirtualFilesystem) StatFSAt(ctx context.Context, creds *auth.Credenti
 	for {
 		statfs, err := rp.mount.fs.impl.StatFSAt(ctx, rp)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return statfs, nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return linux.Statfs{}, err
 		}
 	}
@@ -595,6 +607,8 @@ func (vfs *VirtualFilesystem) StatFSAt(ctx context.Context, creds *auth.Credenti
 // SymlinkAt creates a symbolic link at the given path with the given target.
 func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, target string) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with symlinkat(oldpath, newdirfd, "").
 		if pop.Path.Absolute {
 			return syserror.EEXIST
 		}
@@ -609,7 +623,7 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent
 	for {
 		err := rp.mount.fs.impl.SymlinkAt(ctx, rp, target)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil
 		}
 		if checkInvariants {
@@ -617,8 +631,8 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent
 				panic(fmt.Sprintf("%T.SymlinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
 			}
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return err
 		}
 	}
@@ -627,6 +641,8 @@ func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credent
 // UnlinkAt deletes the non-directory file at the given path.
 func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error {
 	if !pop.Path.Begin.Ok() {
+		// pop.Path should not be empty in operations that create/delete files.
+		// This is consistent with unlinkat(dirfd, "", 0).
 		if pop.Path.Absolute {
 			return syserror.EBUSY
 		}
@@ -641,7 +657,7 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti
 	for {
 		err := rp.mount.fs.impl.UnlinkAt(ctx, rp)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil
 		}
 		if checkInvariants {
@@ -649,8 +665,8 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti
 				panic(fmt.Sprintf("%T.UnlinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
 			}
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return err
 		}
 	}
@@ -658,17 +674,11 @@ func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credenti
 
 // BoundEndpointAt gets the bound endpoint at the given path, if one exists.
 func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *BoundEndpointOptions) (transport.BoundEndpoint, error) {
-	if !pop.Path.Begin.Ok() {
-		if pop.Path.Absolute {
-			return nil, syserror.ECONNREFUSED
-		}
-		return nil, syserror.ENOENT
-	}
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
 		bep, err := rp.mount.fs.impl.BoundEndpointAt(ctx, rp, *opts)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return bep, nil
 		}
 		if checkInvariants {
@@ -676,21 +686,21 @@ func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.C
 				panic(fmt.Sprintf("%T.BoundEndpointAt() consumed all path components and returned %v", rp.mount.fs.impl, err))
 			}
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return nil, err
 		}
 	}
 }
 
-// ListxattrAt returns all extended attribute names for the file at the given
+// ListXattrAt returns all extended attribute names for the file at the given
 // path.
-func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) {
+func (vfs *VirtualFilesystem) ListXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) {
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
-		names, err := rp.mount.fs.impl.ListxattrAt(ctx, rp, size)
+		names, err := rp.mount.fs.impl.ListXattrAt(ctx, rp, size)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return names, nil
 		}
 		if err == syserror.ENOTSUP {
@@ -698,61 +708,61 @@ func (vfs *VirtualFilesystem) ListxattrAt(ctx context.Context, creds *auth.Crede
 			// fs/xattr.c:vfs_listxattr() falls back to allowing the security
 			// subsystem to return security extended attributes, which by
 			// default don't exist.
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil, nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return nil, err
 		}
 	}
 }
 
-// GetxattrAt returns the value associated with the given extended attribute
+// GetXattrAt returns the value associated with the given extended attribute
 // for the file at the given path.
-func (vfs *VirtualFilesystem) GetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetxattrOptions) (string, error) {
+func (vfs *VirtualFilesystem) GetXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetXattrOptions) (string, error) {
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
-		val, err := rp.mount.fs.impl.GetxattrAt(ctx, rp, *opts)
+		val, err := rp.mount.fs.impl.GetXattrAt(ctx, rp, *opts)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return val, nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return "", err
 		}
 	}
 }
 
-// SetxattrAt changes the value associated with the given extended attribute
+// SetXattrAt changes the value associated with the given extended attribute
 // for the file at the given path.
-func (vfs *VirtualFilesystem) SetxattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetxattrOptions) error {
+func (vfs *VirtualFilesystem) SetXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetXattrOptions) error {
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
-		err := rp.mount.fs.impl.SetxattrAt(ctx, rp, *opts)
+		err := rp.mount.fs.impl.SetXattrAt(ctx, rp, *opts)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return err
 		}
 	}
 }
 
-// RemovexattrAt removes the given extended attribute from the file at rp.
-func (vfs *VirtualFilesystem) RemovexattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) error {
+// RemoveXattrAt removes the given extended attribute from the file at rp.
+func (vfs *VirtualFilesystem) RemoveXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) error {
 	rp := vfs.getResolvingPath(creds, pop)
 	for {
-		err := rp.mount.fs.impl.RemovexattrAt(ctx, rp, name)
+		err := rp.mount.fs.impl.RemoveXattrAt(ctx, rp, name)
 		if err == nil {
-			vfs.putResolvingPath(rp)
+			vfs.putResolvingPath(ctx, rp)
 			return nil
 		}
-		if !rp.handleError(err) {
-			vfs.putResolvingPath(rp)
+		if !rp.handleError(ctx, err) {
+			vfs.putResolvingPath(ctx, rp)
 			return err
 		}
 	}
@@ -774,11 +784,67 @@ func (vfs *VirtualFilesystem) SyncAllFilesystems(ctx context.Context) error {
 		if err := fs.impl.Sync(ctx); err != nil && retErr == nil {
 			retErr = err
 		}
-		fs.DecRef()
+		fs.DecRef(ctx)
 	}
 	return retErr
 }
 
+// MkdirAllAt recursively creates non-existent directories on the given path
+// (including the last component).
+func (vfs *VirtualFilesystem) MkdirAllAt(ctx context.Context, currentPath string, root VirtualDentry, creds *auth.Credentials, mkdirOpts *MkdirOptions) error {
+	pop := &PathOperation{
+		Root:  root,
+		Start: root,
+		Path:  fspath.Parse(currentPath),
+	}
+	stat, err := vfs.StatAt(ctx, creds, pop, &StatOptions{Mask: linux.STATX_TYPE})
+	switch err {
+	case nil:
+		if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.FileTypeMask != linux.ModeDirectory {
+			return syserror.ENOTDIR
+		}
+		// Directory already exists.
+		return nil
+	case syserror.ENOENT:
+		// Expected, we will create the dir.
+	default:
+		return fmt.Errorf("stat failed for %q during directory creation: %w", currentPath, err)
+	}
+
+	// Recurse to ensure parent is created and then create the final directory.
+	if err := vfs.MkdirAllAt(ctx, path.Dir(currentPath), root, creds, mkdirOpts); err != nil {
+		return err
+	}
+	if err := vfs.MkdirAt(ctx, creds, pop, mkdirOpts); err != nil {
+		return fmt.Errorf("failed to create directory %q: %w", currentPath, err)
+	}
+	return nil
+}
+
+// MakeSyntheticMountpoint creates parent directories of target if they do not
+// exist and attempts to create a directory for the mountpoint. If a
+// non-directory file already exists there then we allow it.
+func (vfs *VirtualFilesystem) MakeSyntheticMountpoint(ctx context.Context, target string, root VirtualDentry, creds *auth.Credentials) error {
+	mkdirOpts := &MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true}
+
+	// Make sure the parent directory of target exists.
+	if err := vfs.MkdirAllAt(ctx, path.Dir(target), root, creds, mkdirOpts); err != nil {
+		return fmt.Errorf("failed to create parent directory of mountpoint %q: %w", target, err)
+	}
+
+	// Attempt to mkdir the final component. If a file (of any type) exists
+	// then we let allow mounting on top of that because we do not require the
+	// target to be an existing directory, unlike Linux mount(2).
+	if err := vfs.MkdirAt(ctx, creds, &PathOperation{
+		Root:  root,
+		Start: root,
+		Path:  fspath.Parse(target),
+	}, mkdirOpts); err != nil && err != syserror.EEXIST {
+		return fmt.Errorf("failed to create mountpoint %q: %w", target, err)
+	}
+	return nil
+}
+
 // A VirtualDentry represents a node in a VFS tree, by combining a Dentry
 // (which represents a node in a Filesystem's tree) and a Mount (which
 // represents the Filesystem's position in a VFS mount tree).
@@ -828,9 +894,9 @@ func (vd VirtualDentry) IncRef() {
 
 // DecRef decrements the reference counts on the Mount and Dentry represented
 // by vd.
-func (vd VirtualDentry) DecRef() {
-	vd.dentry.DecRef()
-	vd.mount.DecRef()
+func (vd VirtualDentry) DecRef(ctx context.Context) {
+	vd.dentry.DecRef(ctx)
+	vd.mount.DecRef(ctx)
 }
 
 // Mount returns the Mount associated with vd. It does not take a reference on