32 files changed, 450 insertions, 325 deletions
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go
index 3a45e9041..8d7660e79 100644
--- a/pkg/sentry/fs/dirent.go
+++ b/pkg/sentry/fs/dirent.go
@@ -488,11 +488,11 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl
 	// Slow path: load the InodeOperations into memory. Since this is a hot path and the lookup may be
 	// expensive, if possible release the lock and re-acquire it.
 	if walkMayUnlock {
-		d.mu.Unlock()
+		d.mu.Unlock() // +checklocksforce: results in an inconsistent block.
 	}
 	c, err := d.Inode.Lookup(ctx, name)
 	if walkMayUnlock {
-		d.mu.Lock()
+		d.mu.Lock() // +checklocksforce: see above.
 	}
 	// No dice.
 	if err != nil {
@@ -594,21 +594,27 @@ func (d *Dirent) exists(ctx context.Context, root *Dirent, name string) bool {
 
 // lockDirectory should be called for any operation that changes this `d`s
 // children (creating or removing them).
-func (d *Dirent) lockDirectory() func() {
+// +checklocksacquire:d.dirMu
+// +checklocksacquire:d.mu
+func (d *Dirent) lockDirectory() {
 	renameMu.RLock()
 	d.dirMu.Lock()
 	d.mu.Lock()
-	return func() {
-		d.mu.Unlock()
-		d.dirMu.Unlock()
-		renameMu.RUnlock()
-	}
+}
+
+// unlockDirectory is the reverse of lockDirectory.
+// +checklocksrelease:d.dirMu
+// +checklocksrelease:d.mu
+func (d *Dirent) unlockDirectory() {
+	d.mu.Unlock()
+	d.dirMu.Unlock()
+	renameMu.RUnlock() // +checklocksforce: see lockDirectory.
 }
 
 // Create creates a new regular file in this directory.
 func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags FileFlags, perms FilePermissions) (*File, error) {
-	unlock := d.lockDirectory()
-	defer unlock()
+	d.lockDirectory()
+	defer d.unlockDirectory()
 
 	// Does something already exist?
 	if d.exists(ctx, root, name) {
@@ -670,8 +676,8 @@ func (d *Dirent) finishCreate(ctx context.Context, child *Dirent, name string) {
 // genericCreate executes create if name does not exist. Removes a negative Dirent at name if
 // create succeeds.
 func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, create func() error) error {
-	unlock := d.lockDirectory()
-	defer unlock()
+	d.lockDirectory()
+	defer d.unlockDirectory()
 
 	// Does something already exist?
 	if d.exists(ctx, root, name) {
@@ -1021,8 +1027,8 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath
 		panic("Dirent.Remove: root must not be nil")
 	}
 
-	unlock := d.lockDirectory()
-	defer unlock()
+	d.lockDirectory()
+	defer d.unlockDirectory()
 
 	// Try to walk to the node.
 	child, err := d.walk(ctx, root, name, false /* may unlock */)
@@ -1082,8 +1088,8 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string)
 		panic("Dirent.Remove: root must not be nil")
 	}
 
-	unlock := d.lockDirectory()
-	defer unlock()
+	d.lockDirectory()
+	defer d.unlockDirectory()
 
 	// Check for dots.
 	if name == "." {
@@ -1259,17 +1265,15 @@ func (d *Dirent) dropExtendedReference() {
 	d.Inode.MountSource.fscache.Remove(d)
 }
 
-// lockForRename takes locks on oldParent and newParent as required by Rename
-// and returns a function that will unlock the locks taken. The returned
-// function must be called even if a non-nil error is returned.
-func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName string) (func(), error) {
+// lockForRename takes locks on oldParent and newParent as required by Rename.
+// On return, unlockForRename must always be called, even with an error.
+// +checklocksacquire:oldParent.mu
+// +checklocksacquire:newParent.mu
+func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName string) error {
 	renameMu.Lock()
 	if oldParent == newParent {
 		oldParent.mu.Lock()
-		return func() {
-			oldParent.mu.Unlock()
-			renameMu.Unlock()
-		}, nil
+		return nil // +checklocksforce: only one lock exists.
 	}
 
 	// Renaming between directories is a bit subtle:
@@ -1297,11 +1301,7 @@ func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName
 				// itself.
 				err = unix.EINVAL
 			}
-			return func() {
-				newParent.mu.Unlock()
-				oldParent.mu.Unlock()
-				renameMu.Unlock()
-			}, err
+			return err
 		}
 		child = p
 	}
@@ -1310,11 +1310,21 @@ func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName
 	// have no relationship; in either case we can do this:
 	newParent.mu.Lock()
 	oldParent.mu.Lock()
-	return func() {
+	return nil
+}
+
+// unlockForRename is the opposite of lockForRename.
+// +checklocksrelease:oldParent.mu
+// +checklocksrelease:newParent.mu
+func unlockForRename(oldParent, newParent *Dirent) {
+	if oldParent == newParent {
 		oldParent.mu.Unlock()
-		newParent.mu.Unlock()
-		renameMu.Unlock()
-	}, nil
+		renameMu.Unlock() // +checklocksforce: only one lock exists.
+		return
+	}
+	newParent.mu.Unlock()
+	oldParent.mu.Unlock()
+	renameMu.Unlock() // +checklocksforce: not tracked.
 }
 
 func (d *Dirent) checkSticky(ctx context.Context, victim *Dirent) error {
@@ -1353,8 +1363,8 @@ func (d *Dirent) MayDelete(ctx context.Context, root *Dirent, name string) error
 		return err
 	}
 
-	unlock := d.lockDirectory()
-	defer unlock()
+	d.lockDirectory()
+	defer d.unlockDirectory()
 
 	victim, err := d.walk(ctx, root, name, true /* may unlock */)
 	if err != nil {
@@ -1392,8 +1402,8 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
 	}
 
 	// Acquire global renameMu lock, and mu locks on oldParent/newParent.
-	unlock, err := lockForRename(oldParent, oldName, newParent, newName)
-	defer unlock()
+	err := lockForRename(oldParent, oldName, newParent, newName)
+	defer unlockForRename(oldParent, newParent)
 	if err != nil {
 		return err
 	}
diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go
index 44587bb37..a346c316b 100644
--- a/pkg/sentry/fs/fs.go
+++ b/pkg/sentry/fs/fs.go
@@ -80,23 +80,33 @@ func AsyncBarrier() {
 // Async executes a function asynchronously.
 //
 // Async must not be called recursively.
+// +checklocksignore
 func Async(f func()) {
 	workMu.RLock()
-	go func() { // S/R-SAFE: AsyncBarrier must be called.
-		defer workMu.RUnlock() // Ensure RUnlock in case of panic.
-		f()
-	}()
+	go asyncWork(f) // S/R-SAFE: AsyncBarrier must be called.
+}
+
+// +checklocksignore
+func asyncWork(f func()) {
+	// Ensure RUnlock in case of panic.
+	defer workMu.RUnlock()
+	f()
 }
 
 // AsyncWithContext is just like Async, except that it calls the asynchronous
 // function with the given context as argument. This function exists to avoid
 // needing to allocate an extra function on the heap in a hot path.
+// +checklocksignore
 func AsyncWithContext(ctx context.Context, f func(context.Context)) {
 	workMu.RLock()
-	go func() { // S/R-SAFE: AsyncBarrier must be called.
-		defer workMu.RUnlock() // Ensure RUnlock in case of panic.
-		f(ctx)
-	}()
+	go asyncWorkWithContext(ctx, f)
+}
+
+// +checklocksignore
+func asyncWorkWithContext(ctx context.Context, f func(context.Context)) {
+	// Ensure RUnlock in case of panic.
+	defer workMu.RUnlock()
+	f(ctx)
 }
 
 // AsyncErrorBarrier waits for all outstanding asynchronous work to complete, or
diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go
index e2af1d2ae..19f91f010 100644
--- a/pkg/sentry/fs/gofer/inode_state.go
+++ b/pkg/sentry/fs/gofer/inode_state.go
@@ -112,13 +112,6 @@ func (i *inodeFileState) loadLoading(_ struct{}) {
 // +checklocks:i.loading
 func (i *inodeFileState) afterLoad() {
 	load := func() (err error) {
-		// See comment on i.loading().
-		defer func() {
-			if err == nil {
-				i.loading.Unlock()
-			}
-		}()
-
 		// Manually restore the p9.File.
 		name, ok := i.s.inodeMappings[i.sattr.InodeID]
 		if !ok {
@@ -167,6 +160,9 @@ func (i *inodeFileState) afterLoad() {
 			i.savedUAttr = nil
 		}
 
+		// See comment on i.loading(). This only unlocks on the
+		// non-error path.
+		i.loading.Unlock() // +checklocksforce: per comment.
 		return nil
 	}
 
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index aa2405f68..958f46bd6 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -47,7 +47,8 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
 		return nil, linuxerr.ENAMETOOLONG
 	}
 
-	cp := i.session().cachePolicy
+	s := i.session()
+	cp := s.cachePolicy
 	if cp.cacheReaddir() {
 		// Check to see if we have readdirCache that indicates the
 		// child does not exist.  Avoid holding readdirMu longer than
@@ -78,7 +79,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
 		return nil, err
 	}
 
-	if i.session().overrides != nil {
+	if s.overrides != nil {
 		// Check if file belongs to a internal named pipe. Note that it doesn't need
 		// to check for sockets because it's done in newInodeOperations below.
 		deviceKey := device.MultiDeviceKey{
@@ -86,13 +87,13 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
 			SecondaryDevice: i.session().connID,
 			Inode:           qids[0].Path,
 		}
-		unlock := i.session().overrides.lock()
-		if pipeInode := i.session().overrides.getPipe(deviceKey); pipeInode != nil {
-			unlock()
+		s.overrides.lock()
+		if pipeInode := s.overrides.getPipe(deviceKey); pipeInode != nil {
+			s.overrides.unlock()
 			pipeInode.IncRef()
 			return fs.NewDirent(ctx, pipeInode, name), nil
 		}
-		unlock()
+		s.overrides.unlock()
 	}
 
 	// Construct the Inode operations.
@@ -221,17 +222,20 @@ func (i *inodeOperations) CreateHardLink(ctx context.Context, inode *fs.Inode, t
 	if err := i.fileState.file.link(ctx, &targetOpts.fileState.file, newName); err != nil {
 		return err
 	}
-	if i.session().cachePolicy.cacheUAttrs(inode) {
+
+	s := i.session()
+	if s.cachePolicy.cacheUAttrs(inode) {
 		// Increase link count.
 		targetOpts.cachingInodeOps.IncLinks(ctx)
 	}
+
 	i.touchModificationAndStatusChangeTime(ctx, inode)
 	return nil
 }
 
 // CreateDirectory uses Create to create a directory named s under inodeOperations.
-func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s string, perm fs.FilePermissions) error {
-	if len(s) > maxFilenameLen {
+func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
+	if len(name) > maxFilenameLen {
 		return linuxerr.ENAMETOOLONG
 	}
 
@@ -247,16 +251,18 @@ func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s
 		perm.SetGID = true
 	}
 
-	if _, err := i.fileState.file.mkdir(ctx, s, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
+	if _, err := i.fileState.file.mkdir(ctx, name, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
 		return err
 	}
-	if i.session().cachePolicy.cacheUAttrs(dir) {
+
+	s := i.session()
+	if s.cachePolicy.cacheUAttrs(dir) {
 		// Increase link count.
 		//
 		// N.B. This will update the modification time.
 		i.cachingInodeOps.IncLinks(ctx)
 	}
-	if i.session().cachePolicy.cacheReaddir() {
+	if s.cachePolicy.cacheReaddir() {
 		// Invalidate readdir cache.
 		i.markDirectoryDirty()
 	}
@@ -269,13 +275,14 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
 		return nil, linuxerr.ENAMETOOLONG
 	}
 
-	if i.session().overrides == nil {
+	s := i.session()
+	if s.overrides == nil {
 		return nil, syserror.EOPNOTSUPP
 	}
 
 	// Stabilize the override map while creation is in progress.
-	unlock := i.session().overrides.lock()
-	defer unlock()
+	s.overrides.lock()
+	defer s.overrides.unlock()
 
 	sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket)
 	if err != nil {
@@ -284,7 +291,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
 
 	// Construct the positive Dirent.
 	childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
-	i.session().overrides.addBoundEndpoint(iops.fileState.key, childDir, ep)
+	s.overrides.addBoundEndpoint(iops.fileState.key, childDir, ep)
 	return childDir, nil
 }
 
@@ -298,8 +305,9 @@ func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name st
 	mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe
 
 	// N.B. FIFOs use major/minor numbers 0.
+	s := i.session()
 	if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
-		if i.session().overrides == nil || !linuxerr.Equals(linuxerr.EPERM, err) {
+		if s.overrides == nil || !linuxerr.Equals(linuxerr.EPERM, err) {
 			return err
 		}
 		// If gofer doesn't support mknod, check if we can create an internal fifo.
@@ -311,13 +319,14 @@ func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name st
 }
 
 func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error {
-	if i.session().overrides == nil {
+	s := i.session()
+	if s.overrides == nil {
 		return linuxerr.EPERM
 	}
 
 	// Stabilize the override map while creation is in progress.
-	unlock := i.session().overrides.lock()
-	defer unlock()
+	s.overrides.lock()
+	defer s.overrides.unlock()
 
 	sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe)
 	if err != nil {
@@ -336,7 +345,7 @@ func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode,
 
 	// Construct the positive Dirent.
 	childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
-	i.session().overrides.addPipe(fileOps.fileState.key, childDir, inode)
+	s.overrides.addPipe(fileOps.fileState.key, childDir, inode)
 	return nil
 }
 
@@ -386,8 +395,9 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
 		return linuxerr.ENAMETOOLONG
 	}
 
+	s := i.session()
 	var key *device.MultiDeviceKey
-	if i.session().overrides != nil {
+	if s.overrides != nil {
 		// Find out if file being deleted is a socket or pipe that needs to be
 		// removed from endpoint map.
 		if d, err := i.Lookup(ctx, dir, name); err == nil {
@@ -402,8 +412,8 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
 				}
 
 				// Stabilize the override map while deletion is in progress.
-				unlock := i.session().overrides.lock()
-				defer unlock()
+				s.overrides.lock()
+				defer s.overrides.unlock()
 			}
 		}
 	}
@@ -412,7 +422,7 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
 		return err
 	}
 	if key != nil {
-		i.session().overrides.remove(ctx, *key)
+		s.overrides.remove(ctx, *key)
 	}
 	i.touchModificationAndStatusChangeTime(ctx, dir)
 
@@ -429,11 +439,13 @@ func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, na
 	if err := i.fileState.file.unlinkAt(ctx, name, 0x200); err != nil {
 		return err
 	}
-	if i.session().cachePolicy.cacheUAttrs(dir) {
+
+	s := i.session()
+	if s.cachePolicy.cacheUAttrs(dir) {
 		// Decrease link count and updates atime.
 		i.cachingInodeOps.DecLinks(ctx)
 	}
-	if i.session().cachePolicy.cacheReaddir() {
+	if s.cachePolicy.cacheReaddir() {
 		// Invalidate readdir cache.
 		i.markDirectoryDirty()
 	}
@@ -463,12 +475,13 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent
 	}
 
 	// Is the renamed entity a directory? Fix link counts.
+	s := i.session()
 	if fs.IsDir(i.fileState.sattr) {
 		// Update cached state.
-		if i.session().cachePolicy.cacheUAttrs(oldParent) {
+		if s.cachePolicy.cacheUAttrs(oldParent) {
 			oldParentInodeOperations.cachingInodeOps.DecLinks(ctx)
 		}
-		if i.session().cachePolicy.cacheUAttrs(newParent) {
+		if s.cachePolicy.cacheUAttrs(newParent) {
 			// Only IncLinks if there is a new addition to
 			// newParent. If this is replacement, then the total
 			// count remains the same.
@@ -477,7 +490,7 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent
 			}
 		}
 	}
-	if i.session().cachePolicy.cacheReaddir() {
+	if s.cachePolicy.cacheReaddir() {
 		// Mark old directory dirty.
 		oldParentInodeOperations.markDirectoryDirty()
 		if oldParent != newParent {
@@ -487,17 +500,18 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent
 	}
 
 	// Rename always updates ctime.
-	if i.session().cachePolicy.cacheUAttrs(inode) {
+	if s.cachePolicy.cacheUAttrs(inode) {
 		i.cachingInodeOps.TouchStatusChangeTime(ctx)
 	}
 	return nil
 }
 
 func (i *inodeOperations) touchModificationAndStatusChangeTime(ctx context.Context, inode *fs.Inode) {
-	if i.session().cachePolicy.cacheUAttrs(inode) {
+	s := i.session()
+	if s.cachePolicy.cacheUAttrs(inode) {
 		i.cachingInodeOps.TouchModificationAndStatusChangeTime(ctx)
 	}
-	if i.session().cachePolicy.cacheReaddir() {
+	if s.cachePolicy.cacheReaddir() {
 		// Invalidate readdir cache.
 		i.markDirectoryDirty()
 	}
diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go
index 7cf3522ff..b7debeecb 100644
--- a/pkg/sentry/fs/gofer/session.go
+++ b/pkg/sentry/fs/gofer/session.go
@@ -98,9 +98,14 @@ func (e *overrideMaps) remove(ctx context.Context, key device.MultiDeviceKey) {
 // lock blocks other addition and removal operations from happening while
 // the backing file is being created or deleted. Returns a function that unlocks
 // the endpoint map.
-func (e *overrideMaps) lock() func() {
+// +checklocksacquire:e.mu
+func (e *overrideMaps) lock() {
 	e.mu.Lock()
-	return func() { e.mu.Unlock() }
+}
+
+// +checklocksrelease:e.mu
+func (e *overrideMaps) unlock() {
+	e.mu.Unlock()
 }
 
 // getBoundEndpoint returns the bound endpoint mapped to the given key.
@@ -366,8 +371,8 @@ func newOverrideMaps() *overrideMaps {
 
 // fillKeyMap populates key and dirent maps upon restore from saved pathmap.
 func (s *session) fillKeyMap(ctx context.Context) error {
-	unlock := s.overrides.lock()
-	defer unlock()
+	s.overrides.lock()
+	defer s.overrides.unlock()
 
 	for ep, dirPath := range s.overrides.pathMap {
 		_, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath))
@@ -394,8 +399,8 @@ func (s *session) fillKeyMap(ctx context.Context) error {
 // fillPathMap populates paths for overrides from dirents in direntMap
 // before save.
 func (s *session) fillPathMap(ctx context.Context) error {
-	unlock := s.overrides.lock()
-	defer unlock()
+	s.overrides.lock()
+	defer s.overrides.unlock()
 
 	for _, endpoint := range s.overrides.keyMap {
 		mountRoot := endpoint.dirent.MountRoot()
diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go
index 8a1c69ac2..1fd8a0910 100644
--- a/pkg/sentry/fs/gofer/socket.go
+++ b/pkg/sentry/fs/gofer/socket.go
@@ -32,10 +32,11 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport.
 		return nil
 	}
 
-	if i.session().overrides != nil {
-		unlock := i.session().overrides.lock()
-		defer unlock()
-		ep := i.session().overrides.getBoundEndpoint(i.fileState.key)
+	s := i.session()
+	if s.overrides != nil {
+		s.overrides.lock()
+		defer s.overrides.unlock()
+		ep := s.overrides.getBoundEndpoint(i.fileState.key)
 		if ep != nil {
 			return ep
 		}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 237d17921..652e5fe77 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -147,6 +147,7 @@ func putDentrySlice(ds *[]*dentry) {
 // but dentry slices are allocated lazily, and it's much easier to say "defer
 // fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
 // fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
+// +checklocksrelease:fs.renameMu
 func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **[]*dentry) {
 	fs.renameMu.RUnlock()
 	if *dsp == nil {
@@ -159,6 +160,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **
 	putDentrySlice(*dsp)
 }
 
+// +checklocksrelease:fs.renameMu
 func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
 	if *ds == nil {
 		fs.renameMu.Unlock()
@@ -540,7 +542,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
 			if child.syntheticChildren != 0 {
 				// This is definitely not an empty directory, irrespective of
 				// fs.opts.interop.
-				vfsObj.AbortDeleteDentry(&child.vfsd)
+				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: PrepareDeleteDentry called if child != nil.
 				return linuxerr.ENOTEMPTY
 			}
 			// If InteropModeShared is in effect and the first call to
@@ -550,12 +552,12 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
 			// still exist) would be a waste of time.
 			if child.cachedMetadataAuthoritative() {
 				if !child.isDir() {
-					vfsObj.AbortDeleteDentry(&child.vfsd)
+					vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
 					return syserror.ENOTDIR
 				}
 				for _, grandchild := range child.children {
 					if grandchild != nil {
-						vfsObj.AbortDeleteDentry(&child.vfsd)
+						vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
 						return linuxerr.ENOTEMPTY
 					}
 				}
@@ -565,12 +567,12 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
 	} else {
 		// child must be a non-directory file.
 		if child != nil && child.isDir() {
-			vfsObj.AbortDeleteDentry(&child.vfsd)
+			vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
 			return syserror.EISDIR
 		}
 		if rp.MustBeDir() {
 			if child != nil {
-				vfsObj.AbortDeleteDentry(&child.vfsd)
+				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
 			}
 			return syserror.ENOTDIR
 		}
@@ -583,7 +585,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
 		err = parent.file.unlinkAt(ctx, name, flags)
 		if err != nil {
 			if child != nil {
-				vfsObj.AbortDeleteDentry(&child.vfsd)
+				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
 			}
 			return err
 		}
@@ -601,7 +603,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
 	}
 
 	if child != nil {
-		vfsObj.CommitDeleteDentry(ctx, &child.vfsd)
+		vfsObj.CommitDeleteDentry(ctx, &child.vfsd) // +checklocksforce: see above.
 		child.setDeleted()
 		if child.isSynthetic() {
 			parent.syntheticChildren--
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index fe4c2e0e1..2f85215d9 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -947,10 +947,10 @@ func (d *dentry) cachedMetadataAuthoritative() bool {
 // updateFromP9Attrs is called to update d's metadata after an update from the
 // remote filesystem.
 // Precondition: d.metadataMu must be locked.
+// +checklocks:d.metadataMu
 func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) {
 	if mask.Mode {
 		if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want {
-			d.metadataMu.Unlock()
 			panic(fmt.Sprintf("gofer.dentry file type changed from %#o to %#o", want, got))
 		}
 		atomic.StoreUint32(&d.mode, uint32(attr.Mode))
@@ -989,6 +989,7 @@ func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) {
 
 // Preconditions: !d.isSynthetic().
 // Preconditions: d.metadataMu is locked.
+// +checklocks:d.metadataMu
 func (d *dentry) refreshSizeLocked(ctx context.Context) error {
 	d.handleMu.RLock()
 
@@ -1020,6 +1021,7 @@ func (d *dentry) updateFromGetattr(ctx context.Context) error {
 // Preconditions:
 // * !d.isSynthetic().
 // * d.metadataMu is locked.
+// +checklocks:d.metadataMu
 func (d *dentry) updateFromGetattrLocked(ctx context.Context) error {
 	// Use d.readFile or d.writeFile, which represent 9P FIDs that have been
 	// opened, in preference to d.file, which represents a 9P fid that has not.
@@ -1044,7 +1046,8 @@ func (d *dentry) updateFromGetattrLocked(ctx context.Context) error {
 
 	_, attrMask, attr, err := file.getAttr(ctx, dentryAttrMask())
 	if handleMuRLocked {
-		d.handleMu.RUnlock() // must be released before updateFromP9AttrsLocked()
+		// handleMu must be released before updateFromP9AttrsLocked().
+		d.handleMu.RUnlock() // +checklocksforce: complex case.
 	}
 	if err != nil {
 		return err
@@ -1470,7 +1473,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo
 		if d.isDeleted() {
 			d.watches.HandleDeletion(ctx)
 		}
-		d.destroyLocked(ctx)
+		d.destroyLocked(ctx) // +checklocksforce: renameMu must be acquired at this point.
 		return
 	}
 	// If d still has inotify watches and it is not deleted or invalidated, it
@@ -1498,7 +1501,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo
 			delete(d.parent.children, d.name)
 			d.parent.dirMu.Unlock()
 		}
-		d.destroyLocked(ctx)
+		d.destroyLocked(ctx) // +checklocksforce: see above.
 		return
 	}
 
@@ -1527,7 +1530,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo
 			d.fs.renameMu.Lock()
 			defer d.fs.renameMu.Unlock()
 		}
-		d.fs.evictCachedDentryLocked(ctx)
+		d.fs.evictCachedDentryLocked(ctx) // +checklocksforce: see above.
 	}
 }
 
@@ -1544,6 +1547,7 @@ func (d *dentry) removeFromCacheLocked() {
 
 // Precondition: fs.renameMu must be locked for writing; it may be temporarily
 // unlocked.
+// +checklocks:fs.renameMu
 func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
 	for fs.cachedDentriesLen != 0 {
 		fs.evictCachedDentryLocked(ctx)
@@ -1552,6 +1556,7 @@ func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
 
 // Preconditions:
 // * fs.renameMu must be locked for writing; it may be temporarily unlocked.
+// +checklocks:fs.renameMu
 func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
 	fs.cacheMu.Lock()
 	victim := fs.cachedDentries.Back()
@@ -1588,7 +1593,7 @@ func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
 	// will try to acquire fs.renameMu (which we have already acquired). Hence,
 	// fs.renameMu will synchronize the destroy attempts.
 	victim.cachingMu.Unlock()
-	victim.destroyLocked(ctx)
+	victim.destroyLocked(ctx) // +checklocksforce: owned as precondition, victim.fs == fs.
 }
 
 // destroyLocked destroys the dentry.
@@ -1598,6 +1603,7 @@ func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
 // * d.refs == 0.
 // * d.parent.children[d.name] != d, i.e. d is not reachable by path traversal
 //   from its former parent dentry.
+// +checklocks:d.fs.renameMu
 func (d *dentry) destroyLocked(ctx context.Context) {
 	switch atomic.LoadInt64(&d.refs) {
 	case 0:
diff --git a/pkg/sentry/fsimpl/gofer/revalidate.go b/pkg/sentry/fsimpl/gofer/revalidate.go
index 8f81f0822..226790a11 100644
--- a/pkg/sentry/fsimpl/gofer/revalidate.go
+++ b/pkg/sentry/fsimpl/gofer/revalidate.go
@@ -247,16 +247,16 @@ func (fs *filesystem) revalidateHelper(ctx context.Context, vfsObj *vfs.VirtualF
 			if found && !d.isSynthetic() {
 				// First dentry is where the search is starting, just update attributes
 				// since it cannot be replaced.
-				d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr)
+				d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) // +checklocksforce: acquired by lockAllMetadata.
 			}
-			d.metadataMu.Unlock()
+			d.metadataMu.Unlock() // +checklocksforce: see above.
 			continue
 		}
 
 		// Note that synthetic dentries will always fails the comparison check
 		// below.
 		if !found || d.qidPath != stats[i].QID.Path {
-			d.metadataMu.Unlock()
+			d.metadataMu.Unlock() // +checklocksforce: see above.
 			if !found && d.isSynthetic() {
 				// We have a synthetic file, and no remote file has arisen to replace
 				// it.
@@ -298,7 +298,7 @@ func (fs *filesystem) revalidateHelper(ctx context.Context, vfsObj *vfs.VirtualF
 		}
 
 		// The file at this path hasn't changed. Just update cached metadata.
-		d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr)
+		d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) // +checklocksforce: see above.
 		d.metadataMu.Unlock()
 	}
 
@@ -354,6 +354,7 @@ func (r *revalidateState) add(name string, d *dentry) {
 	r.dentries = append(r.dentries, d)
 }
 
+// +checklocksignore
 func (r *revalidateState) lockAllMetadata() {
 	for _, d := range r.dentries {
 		d.metadataMu.Lock()
@@ -372,6 +373,7 @@ func (r *revalidateState) popFront() *dentry {
 
 // reset releases all metadata locks and resets all fields to allow this
 // instance to be reused.
+// +checklocksignore
 func (r *revalidateState) reset() {
 	if r.locked {
 		// Unlock any remaining dentries.
diff --git a/pkg/sentry/fsimpl/gofer/symlink.go b/pkg/sentry/fsimpl/gofer/symlink.go
index 2ec819f86..dbd834c67 100644
--- a/pkg/sentry/fsimpl/gofer/symlink.go
+++ b/pkg/sentry/fsimpl/gofer/symlink.go
@@ -41,7 +41,7 @@ func (d *dentry) readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
 			d.haveTarget = true
 			d.target = target
 		}
-		d.dataMu.Unlock()
+		d.dataMu.Unlock() // +checklocksforce: guaranteed locked from above.
 	}
 	return target, err
 }
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 38c2b6df1..20d2526ad 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -752,7 +752,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
 		fs.deferDecRef(replaced)
 		replaceVFSD = replaced.VFSDentry()
 	}
-	virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD)
+	virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) // +checklocksforce: to may be nil, that's okay.
 	return nil
 }
 
@@ -788,7 +788,7 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
 	defer mntns.DecRef(ctx)
 	vfsd := d.VFSDentry()
 	if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
-		return err
+		return err // +checklocksforce: vfsd is not locked.
 	}
 
 	if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil {
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 41207211a..77f9affc1 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -87,7 +87,7 @@ func putDentrySlice(ds *[]*dentry) {
 // fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
 // fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
 //
-// +checklocks:fs.renameMu
+// +checklocksrelease:fs.renameMu
 func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*dentry) {
 	fs.renameMu.RUnlock()
 	if *dsp == nil {
@@ -113,7 +113,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*
 	putDentrySlice(*dsp)
 }
 
-// +checklocks:fs.renameMu
+// +checklocksrelease:fs.renameMu
 func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
 	if *ds == nil {
 		fs.renameMu.Unlock()
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
index e4bfbd3c9..358a66072 100644
--- a/pkg/sentry/fsimpl/verity/filesystem.go
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -75,6 +75,7 @@ func putDentrySlice(ds *[]*dentry) {
 // but dentry slices are allocated lazily, and it's much easier to say "defer
 // fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
 // fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
+// +checklocksrelease:fs.renameMu
 func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
 	fs.renameMu.RUnlock()
 	if *ds == nil {
@@ -90,6 +91,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*d
 	putDentrySlice(*ds)
 }
 
+// +checklocksrelease:fs.renameMu
 func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
 	if *ds == nil {
 		fs.renameMu.Unlock()
diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go
index 6377abb94..f5c364c96 100644
--- a/pkg/sentry/kernel/futex/futex.go
+++ b/pkg/sentry/kernel/futex/futex.go
@@ -398,8 +398,8 @@ func (m *Manager) Fork() *Manager {
 }
 
 // lockBucket returns a locked bucket for the given key.
-func (m *Manager) lockBucket(k *Key) *bucket {
-	var b *bucket
+// +checklocksacquire:b.mu
+func (m *Manager) lockBucket(k *Key) (b *bucket) {
 	if k.Kind == KindSharedMappable {
 		b = m.sharedBucket
 	} else {
@@ -410,7 +410,9 @@ func (m *Manager) lockBucket(k *Key) *bucket {
 }
 
 // lockBuckets returns locked buckets for the given keys.
-func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
+// +checklocksacquire:b1.mu
+// +checklocksacquire:b2.mu
+func (m *Manager) lockBuckets(k1, k2 *Key) (b1 *bucket, b2 *bucket) {
 	// Buckets must be consistently ordered to avoid circular lock
 	// dependencies. We order buckets in m.privateBuckets by index (lowest
 	// index first), and all buckets in m.privateBuckets precede
@@ -420,8 +422,8 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
 	if k1.Kind != KindSharedMappable && k2.Kind != KindSharedMappable {
 		i1 := bucketIndexForAddr(k1.addr())
 		i2 := bucketIndexForAddr(k2.addr())
-		b1 := &m.privateBuckets[i1]
-		b2 := &m.privateBuckets[i2]
+		b1 = &m.privateBuckets[i1]
+		b2 = &m.privateBuckets[i2]
 		switch {
 		case i1 < i2:
 			b1.mu.Lock()
@@ -432,19 +434,30 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
 		default:
 			b1.mu.Lock()
 		}
-		return b1, b2
+		return b1, b2 // +checklocksforce
 	}
 
 	// At least one of b1 or b2 should be m.sharedBucket.
-	b1 := m.sharedBucket
-	b2 := m.sharedBucket
+	b1 = m.sharedBucket
+	b2 = m.sharedBucket
 	if k1.Kind != KindSharedMappable {
 		b1 = m.lockBucket(k1)
 	} else if k2.Kind != KindSharedMappable {
 		b2 = m.lockBucket(k2)
 	}
 	m.sharedBucket.mu.Lock()
-	return b1, b2
+	return b1, b2 // +checklocksforce
+}
+
+// unlockBuckets unlocks two buckets.
+// +checklocksrelease:b1.mu
+// +checklocksrelease:b2.mu
+func (m *Manager) unlockBuckets(b1, b2 *bucket) {
+	b1.mu.Unlock()
+	if b1 != b2 {
+		b2.mu.Unlock()
+	}
+	return // +checklocksforce
 }
 
 // Wake wakes up to n waiters matching the bitmask on the given addr.
@@ -477,10 +490,7 @@ func (m *Manager) doRequeue(t Target, addr, naddr hostarch.Addr, private bool, c
 	defer k2.release(t)
 
 	b1, b2 := m.lockBuckets(&k1, &k2)
-	defer b1.mu.Unlock()
-	if b2 != b1 {
-		defer b2.mu.Unlock()
-	}
+	defer m.unlockBuckets(b1, b2)
 
 	if checkval {
 		if err := check(t, addr, val); err != nil {
@@ -527,10 +537,7 @@ func (m *Manager) WakeOp(t Target, addr1, addr2 hostarch.Addr, private bool, nwa
 	defer k2.release(t)
 
 	b1, b2 := m.lockBuckets(&k1, &k2)
-	defer b1.mu.Unlock()
-	if b2 != b1 {
-		defer b2.mu.Unlock()
-	}
+	defer m.unlockBuckets(b1, b2)
 
 	done := 0
 	cond, err := atomicOp(t, addr2, op)
diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe.go b/pkg/sentry/kernel/pipe/pipe_unsafe.go
index dd60cba24..077c5d596 100644
--- a/pkg/sentry/kernel/pipe/pipe_unsafe.go
+++ b/pkg/sentry/kernel/pipe/pipe_unsafe.go
@@ -23,6 +23,8 @@ import (
 // concurrent calls cannot deadlock.
 //
 // Preconditions: x != y.
+// +checklocksacquire:x.mu
+// +checklocksacquire:y.mu
 func lockTwoPipes(x, y *Pipe) {
 	// Lock the two pipes in order of increasing address.
 	if uintptr(unsafe.Pointer(x)) < uintptr(unsafe.Pointer(y)) {
diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go
index 84f9f6234..c883a9014 100644
--- a/pkg/sentry/kernel/pipe/pipe_util.go
+++ b/pkg/sentry/kernel/pipe/pipe_util.go
@@ -157,6 +157,7 @@ func (p *Pipe) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArgume
 //
 // mu must be held by the caller. waitFor returns with mu held, but it will
 // drop mu before blocking for any reader/writers.
+// +checklocks:mu
 func waitFor(mu *sync.Mutex, wakeupChan *chan struct{}, sleeper amutex.Sleeper) bool {
 	// Ideally this function would simply use a condition variable. However, the
 	// wait needs to be interruptible via 'sleeper', so we must sychronize via a
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index cdaee5d7f..161140980 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -652,6 +652,7 @@ func (t *Task) forgetTracerLocked() {
 // Preconditions:
 // * The signal mutex must be locked.
 // * The caller must be running on the task goroutine.
+// +checklocks:t.tg.signalHandlers.mu
 func (t *Task) ptraceSignalLocked(info *linux.SignalInfo) bool {
 	if linux.Signal(info.Signo) == linux.SIGKILL {
 		return false
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index c0c1f1f13..ae21a55da 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -121,8 +121,9 @@ func (pg *ProcessGroup) Originator() *ThreadGroup {
 
 // IsOrphan returns true if this process group is an orphan.
 func (pg *ProcessGroup) IsOrphan() bool {
-	pg.originator.TaskSet().mu.RLock()
-	defer pg.originator.TaskSet().mu.RUnlock()
+	ts := pg.originator.TaskSet()
+	ts.mu.RLock()
+	defer ts.mu.RUnlock()
 	return pg.ancestors == 0
 }
 
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
index 125fd855b..b51ec6aa7 100644
--- a/pkg/sentry/mm/syscalls.go
+++ b/pkg/sentry/mm/syscalls.go
@@ -204,6 +204,7 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar h
 // * vseg.Range().IsSupersetOf(ar).
 //
 // Postconditions: mm.mappingMu will be unlocked.
+// +checklocksrelease:mm.mappingMu
 func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) {
 	// See populateVMA above for commentary.
 	if !vseg.ValuePtr().effectivePerms.Any() {
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index f7d5a1800..0c8542485 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -945,7 +945,7 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(
 				// NOTE(b/165896008): mincore (which is passed as checkCommitted)
 				// by f.UpdateUsage() might take a really long time. So unlock f.mu
 				// while checkCommitted runs.
-				f.mu.Unlock()
+				f.mu.Unlock() // +checklocksforce
 				err := checkCommitted(s, buf)
 				f.mu.Lock()
 				if err != nil {
diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go
index 242eb5ecb..cb92b6eee 100644
--- a/pkg/sentry/vfs/dentry.go
+++ b/pkg/sentry/vfs/dentry.go
@@ -196,11 +196,12 @@ func (d *Dentry) OnZeroWatches(ctx context.Context) {
 // PrepareDeleteDentry must be called before attempting to delete the file
 // represented by d. If PrepareDeleteDentry succeeds, the caller must call
 // AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome.
+// +checklocksacquire:d.mu
 func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error {
 	vfs.mountMu.Lock()
 	if mntns.mountpoints[d] != 0 {
 		vfs.mountMu.Unlock()
-		return linuxerr.EBUSY
+		return linuxerr.EBUSY // +checklocksforce: inconsistent return.
 	}
 	d.mu.Lock()
 	vfs.mountMu.Unlock()
@@ -211,14 +212,14 @@ func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dent
 
 // AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion
 // fails.
-// +checklocks:d.mu
+// +checklocksrelease:d.mu
 func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) {
 	d.mu.Unlock()
 }
 
 // CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion
 // succeeds.
-// +checklocks:d.mu
+// +checklocksrelease:d.mu
 func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) {
 	d.dead = true
 	d.mu.Unlock()
@@ -249,16 +250,18 @@ func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) {
 // Preconditions:
 // * If to is not nil, it must be a child Dentry from the same Filesystem.
 // * from != to.
+// +checklocksacquire:from.mu
+// +checklocksacquire:to.mu
 func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
 	vfs.mountMu.Lock()
 	if mntns.mountpoints[from] != 0 {
 		vfs.mountMu.Unlock()
-		return linuxerr.EBUSY
+		return linuxerr.EBUSY // +checklocksforce: no locks acquired.
 	}
 	if to != nil {
 		if mntns.mountpoints[to] != 0 {
 			vfs.mountMu.Unlock()
-			return linuxerr.EBUSY
+			return linuxerr.EBUSY // +checklocksforce: no locks acquired.
 		}
 		to.mu.Lock()
 	}
@@ -267,13 +270,13 @@ func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, t
 	// Return with from.mu and to.mu locked, which will be unlocked by
 	// AbortRenameDentry, CommitRenameReplaceDentry, or
 	// CommitRenameExchangeDentry.
-	return nil
+	return nil // +checklocksforce: to may not be acquired.
 }
 
 // AbortRenameDentry must be called after PrepareRenameDentry if the rename
 // fails.
-// +checklocks:from.mu
-// +checklocks:to.mu
+// +checklocksrelease:from.mu
+// +checklocksrelease:to.mu
 func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) {
 	from.mu.Unlock()
 	if to != nil {
@@ -286,8 +289,8 @@ func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) {
 // that was replaced by from.
 //
 // Preconditions: PrepareRenameDentry was previously called on from and to.
-// +checklocks:from.mu
-// +checklocks:to.mu
+// +checklocksrelease:from.mu
+// +checklocksrelease:to.mu
 func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) {
 	from.mu.Unlock()
 	if to != nil {
@@ -303,8 +306,8 @@ func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, fro
 // from and to are exchanged by rename(RENAME_EXCHANGE).
 //
 // Preconditions: PrepareRenameDentry was previously called on from and to.
-// +checklocks:from.mu
-// +checklocks:to.mu
+// +checklocksrelease:from.mu
+// +checklocksrelease:to.mu
 func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) {
 	from.mu.Unlock()
 	to.mu.Unlock()
diff --git a/pkg/sync/mutex_unsafe.go b/pkg/sync/mutex_unsafe.go
index 411a80a8a..b829765d9 100644
--- a/pkg/sync/mutex_unsafe.go
+++ b/pkg/sync/mutex_unsafe.go
@@ -32,6 +32,18 @@ func (m *CrossGoroutineMutex) state() *int32 {
 	return &(*syncMutex)(unsafe.Pointer(&m.Mutex)).state
 }
 
+// Lock locks the underlying Mutex.
+// +checklocksignore
+func (m *CrossGoroutineMutex) Lock() {
+	m.Mutex.Lock()
+}
+
+// Unlock unlocks the underlying Mutex.
+// +checklocksignore
+func (m *CrossGoroutineMutex) Unlock() {
+	m.Mutex.Unlock()
+}
+
 const (
 	mutexUnlocked = 0
 	mutexLocked   = 1
@@ -62,6 +74,7 @@ type Mutex struct {
 
 // Lock locks m. If the lock is already in use, the calling goroutine blocks
 // until the mutex is available.
+// +checklocksignore
 func (m *Mutex) Lock() {
 	noteLock(unsafe.Pointer(m))
 	m.m.Lock()
@@ -80,6 +93,7 @@ func (m *Mutex) Unlock() {
 
 // TryLock tries to acquire the mutex. It returns true if it succeeds and false
 // otherwise. TryLock does not block.
+// +checklocksignore
 func (m *Mutex) TryLock() bool {
 	// Note lock first to enforce proper locking even if unsuccessful.
 	noteLock(unsafe.Pointer(m))
diff --git a/pkg/sync/rwmutex_unsafe.go b/pkg/sync/rwmutex_unsafe.go
index 892d3e641..7829b06db 100644
--- a/pkg/sync/rwmutex_unsafe.go
+++ b/pkg/sync/rwmutex_unsafe.go
@@ -37,6 +37,7 @@ const rwmutexMaxReaders = 1 << 30
 
 // TryRLock locks rw for reading. It returns true if it succeeds and false
 // otherwise. It does not block.
+// +checklocksignore
 func (rw *CrossGoroutineRWMutex) TryRLock() bool {
 	if RaceEnabled {
 		RaceDisable()
@@ -65,6 +66,7 @@ func (rw *CrossGoroutineRWMutex) TryRLock() bool {
 // It should not be used for recursive read locking; a blocked Lock call
 // excludes new readers from acquiring the lock. See the documentation on the
 // RWMutex type.
+// +checklocksignore
 func (rw *CrossGoroutineRWMutex) RLock() {
 	if RaceEnabled {
 		RaceDisable()
@@ -83,6 +85,7 @@ func (rw *CrossGoroutineRWMutex) RLock() {
 //
 // Preconditions:
 // * rw is locked for reading.
+// +checklocksignore
 func (rw *CrossGoroutineRWMutex) RUnlock() {
 	if RaceEnabled {
 		RaceReleaseMerge(unsafe.Pointer(&rw.writerSem))
@@ -134,6 +137,7 @@ func (rw *CrossGoroutineRWMutex) TryLock() bool {
 
 // Lock locks rw for writing. If the lock is already locked for reading or
 // writing, Lock blocks until the lock is available.
+// +checklocksignore
 func (rw *CrossGoroutineRWMutex) Lock() {
 	if RaceEnabled {
 		RaceDisable()
@@ -228,6 +232,7 @@ type RWMutex struct {
 
 // TryRLock locks rw for reading. It returns true if it succeeds and false
 // otherwise. It does not block.
+// +checklocksignore
 func (rw *RWMutex) TryRLock() bool {
 	// Note lock first to enforce proper locking even if unsuccessful.
 	noteLock(unsafe.Pointer(rw))
@@ -243,6 +248,7 @@ func (rw *RWMutex) TryRLock() bool {
 // It should not be used for recursive read locking; a blocked Lock call
 // excludes new readers from acquiring the lock. See the documentation on the
 // RWMutex type.
+// +checklocksignore
 func (rw *RWMutex) RLock() {
 	noteLock(unsafe.Pointer(rw))
 	rw.m.RLock()
@@ -261,6 +267,7 @@ func (rw *RWMutex) RUnlock() {
 
 // TryLock locks rw for writing. It returns true if it succeeds and false
 // otherwise. It does not block.
+// +checklocksignore
 func (rw *RWMutex) TryLock() bool {
 	// Note lock first to enforce proper locking even if unsuccessful.
 	noteLock(unsafe.Pointer(rw))
@@ -273,6 +280,7 @@ func (rw *RWMutex) TryLock() bool {
 
 // Lock locks rw for writing. If the lock is already locked for reading or
 // writing, Lock blocks until the lock is available.
+// +checklocksignore
 func (rw *RWMutex) Lock() {
 	noteLock(unsafe.Pointer(rw))
 	rw.m.Lock()
diff --git a/pkg/tcpip/stack/addressable_endpoint_state.go b/pkg/tcpip/stack/addressable_endpoint_state.go
index ce9cebdaa..ae0bb4ace 100644
--- a/pkg/tcpip/stack/addressable_endpoint_state.go
+++ b/pkg/tcpip/stack/addressable_endpoint_state.go
@@ -249,7 +249,7 @@ func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.Address
 	// or we are adding a new temporary or permanent address.
 	//
 	// The address MUST be write locked at this point.
-	defer addrState.mu.Unlock()
+	defer addrState.mu.Unlock() // +checklocksforce
 
 	if permanent {
 		if addrState.mu.kind.IsPermanent() {
diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go
index 782e74b24..068dab7ce 100644
--- a/pkg/tcpip/stack/conntrack.go
+++ b/pkg/tcpip/stack/conntrack.go
@@ -363,7 +363,7 @@ func (ct *ConnTrack) insertConn(conn *conn) {
 	// Unlocking can happen in any order.
 	ct.buckets[tupleBucket].mu.Unlock()
 	if tupleBucket != replyBucket {
-		ct.buckets[replyBucket].mu.Unlock()
+		ct.buckets[replyBucket].mu.Unlock() // +checklocksforce
 	}
 }
 
@@ -626,7 +626,7 @@ func (ct *ConnTrack) reapTupleLocked(tuple *tuple, bucket int, now time.Time) bo
 
 	// Don't re-unlock if both tuples are in the same bucket.
 	if differentBuckets {
-		ct.buckets[replyBucket].mu.Unlock()
+		ct.buckets[replyBucket].mu.Unlock() // +checklocksforce
 	}
 
 	return true
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index cb316d27a..f9a15efb2 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -213,6 +213,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult
 // reacquire the mutex in exclusive mode.
 //
 // Returns true for retry if preparation should be retried.
+// +checklocks:e.mu
 func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip.Error) {
 	switch e.state {
 	case stateInitial:
@@ -229,10 +230,8 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip
 	}
 
 	e.mu.RUnlock()
-	defer e.mu.RLock()
-
 	e.mu.Lock()
-	defer e.mu.Unlock()
+	defer e.mu.DowngradeLock()
 
 	// The state changed when we released the shared locked and re-acquired
 	// it in exclusive mode. Try again.
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index d807b13b7..aa413ad05 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -330,7 +330,9 @@ func (l *listenContext) performHandshake(s *segment, opts *header.TCPSynOptions,
 	}
 	ep := h.ep
 
-	if err := h.complete(); err != nil {
+	// N.B. the endpoint is generated above by startHandshake, and will be
+	// returned locked. This first call is forced.
+	if err := h.complete(); err != nil { // +checklocksforce
 		ep.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 		ep.stats.FailedConnectionAttempts.Increment()
 		l.cleanupFailedHandshake(h)
@@ -364,6 +366,7 @@ func (l *listenContext) closeAllPendingEndpoints() {
 }
 
 // Precondition: h.ep.mu must be held.
+// +checklocks:h.ep.mu
 func (l *listenContext) cleanupFailedHandshake(h *handshake) {
 	e := h.ep
 	e.mu.Unlock()
@@ -504,7 +507,9 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header
 	}
 
 	go func() {
-		if err := h.complete(); err != nil {
+		// Note that startHandshake returns a locked endpoint. The
+		// force call here just makes it so.
+		if err := h.complete(); err != nil { // +checklocksforce
 			e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 			e.stats.FailedConnectionAttempts.Increment()
 			ctx.cleanupFailedHandshake(h)
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index e39d1623d..93ed161f9 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -511,6 +511,7 @@ func (h *handshake) start() {
 }
 
 // complete completes the TCP 3-way handshake initiated by h.start().
+// +checklocks:h.ep.mu
 func (h *handshake) complete() tcpip.Error {
 	// Set up the wakers.
 	var s sleep.Sleeper
@@ -1283,42 +1284,45 @@ func (e *endpoint) disableKeepaliveTimer() {
 	e.keepalive.Unlock()
 }
 
-// protocolMainLoop is the main loop of the TCP protocol. It runs in its own
-// goroutine and is responsible for sending segments and handling received
-// segments.
-func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{}) tcpip.Error {
-	e.mu.Lock()
-	var closeTimer tcpip.Timer
-	var closeWaker sleep.Waker
-
-	epilogue := func() {
-		// e.mu is expected to be hold upon entering this section.
-		if e.snd != nil {
-			e.snd.resendTimer.cleanup()
-			e.snd.probeTimer.cleanup()
-			e.snd.reorderTimer.cleanup()
-		}
+// protocolMainLoopDone is called at the end of protocolMainLoop.
+// +checklocksrelease:e.mu
+func (e *endpoint) protocolMainLoopDone(closeTimer tcpip.Timer, closeWaker *sleep.Waker) {
+	if e.snd != nil {
+		e.snd.resendTimer.cleanup()
+		e.snd.probeTimer.cleanup()
+		e.snd.reorderTimer.cleanup()
+	}
 
-		if closeTimer != nil {
-			closeTimer.Stop()
-		}
+	if closeTimer != nil {
+		closeTimer.Stop()
+	}
 
-		e.completeWorkerLocked()
+	e.completeWorkerLocked()
 
-		if e.drainDone != nil {
-			close(e.drainDone)
-		}
+	if e.drainDone != nil {
+		close(e.drainDone)
+	}
 
-		e.mu.Unlock()
+	e.mu.Unlock()
 
-		e.drainClosingSegmentQueue()
+	e.drainClosingSegmentQueue()
 
-		// When the protocol loop exits we should wake up our waiters.
-		e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
-	}
+	// When the protocol loop exits we should wake up our waiters.
+	e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
+}
 
+// protocolMainLoop is the main loop of the TCP protocol. It runs in its own
+// goroutine and is responsible for sending segments and handling received
+// segments.
+func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{}) tcpip.Error {
+	var (
+		closeTimer tcpip.Timer
+		closeWaker sleep.Waker
+	)
+
+	e.mu.Lock()
 	if handshake {
-		if err := e.h.complete(); err != nil {
+		if err := e.h.complete(); err != nil { // +checklocksforce
 			e.lastErrorMu.Lock()
 			e.lastError = err
 			e.lastErrorMu.Unlock()
@@ -1327,8 +1331,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 			e.hardError = err
 
 			e.workerCleanup = true
-			// Lock released below.
-			epilogue()
+			e.protocolMainLoopDone(closeTimer, &closeWaker)
 			return err
 		}
 	}
@@ -1472,7 +1475,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 						// Only block the worker if the endpoint
 						// is not in closed state or error state.
 						close(e.drainDone)
-						e.mu.Unlock()
+						e.mu.Unlock() // +checklocksforce
 						<-e.undrain
 						e.mu.Lock()
 					}
@@ -1533,8 +1536,6 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 		if err != nil {
 			e.resetConnectionLocked(err)
 		}
-		// Lock released below.
-		epilogue()
 	}
 
 loop:
@@ -1558,6 +1559,7 @@ loop:
 			// just want to terminate the loop and cleanup the
 			// endpoint.
 			cleanupOnError(nil)
+			e.protocolMainLoopDone(closeTimer, &closeWaker)
 			return nil
 		case StateTimeWait:
 			fallthrough
@@ -1566,6 +1568,7 @@ loop:
 		default:
 			if err := funcs[v].f(); err != nil {
 				cleanupOnError(err)
+				e.protocolMainLoopDone(closeTimer, &closeWaker)
 				return nil
 			}
 		}
@@ -1589,13 +1592,13 @@ loop:
 	// Handle any StateError transition from StateTimeWait.
 	if e.EndpointState() == StateError {
 		cleanupOnError(nil)
+		e.protocolMainLoopDone(closeTimer, &closeWaker)
 		return nil
 	}
 
 	e.transitionToStateCloseLocked()
 
-	// Lock released below.
-	epilogue()
+	e.protocolMainLoopDone(closeTimer, &closeWaker)
 
 	// A new SYN was received during TIME_WAIT and we need to abort
 	// the timewait and redirect the segment to the listener queue
@@ -1665,6 +1668,7 @@ func (e *endpoint) handleTimeWaitSegments() (extendTimeWait bool, reuseTW func()
 // should be executed after releasing the endpoint registrations. This is
 // done in cases where a new SYN is received during TIME_WAIT that carries
 // a sequence number larger than one see on the connection.
+// +checklocks:e.mu
 func (e *endpoint) doTimeWait() (twReuse func()) {
 	// Trigger a 2 * MSL time wait state. During this period
 	// we will drop all incoming segments.
diff --git a/pkg/tcpip/transport/tcp/dispatcher.go b/pkg/tcpip/transport/tcp/dispatcher.go
index dff7cb89c..7d110516b 100644
--- a/pkg/tcpip/transport/tcp/dispatcher.go
+++ b/pkg/tcpip/transport/tcp/dispatcher.go
@@ -127,7 +127,7 @@ func (p *processor) start(wg *sync.WaitGroup) {
 				case !ep.segmentQueue.empty():
 					p.epQ.enqueue(ep)
 				}
-				ep.mu.Unlock()
+				ep.mu.Unlock() // +checklocksforce
 			} else {
 				ep.newSegmentWaker.Assert()
 			}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 4acddc959..1ed4ba419 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -664,6 +664,7 @@ func calculateAdvertisedMSS(userMSS uint16, r *stack.Route) uint16 {
 // The assumption behind spinning here being that background packet processing
 // should not be holding the lock for long and spinning reduces latency as we
 // avoid an expensive sleep/wakeup of of the syscall goroutine).
+// +checklocksacquire:e.mu
 func (e *endpoint) LockUser() {
 	for {
 		// Try first if the sock is locked then check if it's owned
@@ -683,7 +684,7 @@ func (e *endpoint) LockUser() {
 			continue
 		}
 		atomic.StoreUint32(&e.ownedByUser, 1)
-		return
+		return // +checklocksforce
 	}
 }
 
@@ -700,7 +701,7 @@ func (e *endpoint) LockUser() {
 // protocol goroutine altogether.
 //
 // Precondition: e.LockUser() must have been called before calling e.UnlockUser()
-// +checklocks:e.mu
+// +checklocksrelease:e.mu
 func (e *endpoint) UnlockUser() {
 	// Lock segment queue before checking so that we avoid a race where
 	// segments can be queued between the time we check if queue is empty
@@ -736,12 +737,13 @@ func (e *endpoint) UnlockUser() {
 }
 
 // StopWork halts packet processing. Only to be used in tests.
+// +checklocksacquire:e.mu
 func (e *endpoint) StopWork() {
 	e.mu.Lock()
 }
 
 // ResumeWork resumes packet processing. Only to be used in tests.
-// +checklocks:e.mu
+// +checklocksrelease:e.mu
 func (e *endpoint) ResumeWork() {
 	e.mu.Unlock()
 }
@@ -1480,86 +1482,95 @@ func (e *endpoint) isEndpointWritableLocked() (int, tcpip.Error) {
 	return avail, nil
 }
 
-// Write writes data to the endpoint's peer.
-func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
-	// Linux completely ignores any address passed to sendto(2) for TCP sockets
-	// (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More
-	// and opts.EndOfRecord are also ignored.
+// readFromPayloader reads a slice from the Payloader.
+// +checklocks:e.mu
+// +checklocks:e.sndQueueInfo.sndQueueMu
+func (e *endpoint) readFromPayloader(p tcpip.Payloader, opts tcpip.WriteOptions, avail int) ([]byte, tcpip.Error) {
+	// We can release locks while copying data.
+	//
+	// This is not possible if atomic is set, because we can't allow the
+	// available buffer space to be consumed by some other caller while we
+	// are copying data in.
+	if !opts.Atomic {
+		e.sndQueueInfo.sndQueueMu.Unlock()
+		defer e.sndQueueInfo.sndQueueMu.Lock()
 
-	e.LockUser()
-	defer e.UnlockUser()
+		e.UnlockUser()
+		defer e.LockUser()
+	}
 
-	nextSeg, n, err := func() (*segment, int, tcpip.Error) {
-		e.sndQueueInfo.sndQueueMu.Lock()
-		defer e.sndQueueInfo.sndQueueMu.Unlock()
+	// Fetch data.
+	if l := p.Len(); l < avail {
+		avail = l
+	}
+	if avail == 0 {
+		return nil, nil
+	}
+	v := make([]byte, avail)
+	n, err := p.Read(v)
+	if err != nil && err != io.EOF {
+		return nil, &tcpip.ErrBadBuffer{}
+	}
+	return v[:n], nil
+}
+
+// queueSegment reads data from the payloader and returns a segment to be sent.
+// +checklocks:e.mu
+func (e *endpoint) queueSegment(p tcpip.Payloader, opts tcpip.WriteOptions) (*segment, int, tcpip.Error) {
+	e.sndQueueInfo.sndQueueMu.Lock()
+	defer e.sndQueueInfo.sndQueueMu.Unlock()
+
+	avail, err := e.isEndpointWritableLocked()
+	if err != nil {
+		e.stats.WriteErrors.WriteClosed.Increment()
+		return nil, 0, err
+	}
 
+	v, err := e.readFromPayloader(p, opts, avail)
+	if err != nil {
+		return nil, 0, err
+	}
+	if !opts.Atomic {
+		// Since we released locks in between it's possible that the
+		// endpoint transitioned to a CLOSED/ERROR states so make
+		// sure endpoint is still writable before trying to write.
 		avail, err := e.isEndpointWritableLocked()
 		if err != nil {
 			e.stats.WriteErrors.WriteClosed.Increment()
 			return nil, 0, err
 		}
 
-		v, err := func() ([]byte, tcpip.Error) {
-			// We can release locks while copying data.
-			//
-			// This is not possible if atomic is set, because we can't allow the
-			// available buffer space to be consumed by some other caller while we
-			// are copying data in.
-			if !opts.Atomic {
-				e.sndQueueInfo.sndQueueMu.Unlock()
-				defer e.sndQueueInfo.sndQueueMu.Lock()
-
-				e.UnlockUser()
-				defer e.LockUser()
-			}
-
-			// Fetch data.
-			if l := p.Len(); l < avail {
-				avail = l
-			}
-			if avail == 0 {
-				return nil, nil
-			}
-			v := make([]byte, avail)
-			n, err := p.Read(v)
-			if err != nil && err != io.EOF {
-				return nil, &tcpip.ErrBadBuffer{}
-			}
-			return v[:n], nil
-		}()
-		if len(v) == 0 || err != nil {
-			return nil, 0, err
+		// Discard any excess data copied in due to avail being reduced due
+		// to a simultaneous write call to the socket.
+		if avail < len(v) {
+			v = v[:avail]
 		}
+	}
 
-		if !opts.Atomic {
-			// Since we released locks in between it's possible that the
-			// endpoint transitioned to a CLOSED/ERROR states so make
-			// sure endpoint is still writable before trying to write.
-			avail, err := e.isEndpointWritableLocked()
-			if err != nil {
-				e.stats.WriteErrors.WriteClosed.Increment()
-				return nil, 0, err
-			}
+	// Add data to the send queue.
+	s := newOutgoingSegment(e.TransportEndpointInfo.ID, e.stack.Clock(), v)
+	e.sndQueueInfo.SndBufUsed += len(v)
+	e.snd.writeList.PushBack(s)
 
-			// Discard any excess data copied in due to avail being reduced due
-			// to a simultaneous write call to the socket.
-			if avail < len(v) {
-				v = v[:avail]
-			}
-		}
+	return s, len(v), nil
+}
 
-		// Add data to the send queue.
-		s := newOutgoingSegment(e.TransportEndpointInfo.ID, e.stack.Clock(), v)
-		e.sndQueueInfo.SndBufUsed += len(v)
-		e.snd.writeList.PushBack(s)
+// Write writes data to the endpoint's peer.
+func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
+	// Linux completely ignores any address passed to sendto(2) for TCP sockets
+	// (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More
+	// and opts.EndOfRecord are also ignored.
+
+	e.LockUser()
+	defer e.UnlockUser()
 
-		return s, len(v), nil
-	}()
 	// Return if either we didn't queue anything or if an error occurred while
 	// attempting to queue data.
+	nextSeg, n, err := e.queueSegment(p, opts)
 	if n == 0 || err != nil {
 		return 0, err
 	}
+
 	e.sendData(nextSeg)
 	return int64(n), nil
 }
@@ -2504,6 +2515,7 @@ func (e *endpoint) listen(backlog int) tcpip.Error {
 
 // startAcceptedLoop sets up required state and starts a goroutine with the
 // main loop for accepted connections.
+// +checklocksrelease:e.mu
 func (e *endpoint) startAcceptedLoop() {
 	e.workerRunning = true
 	e.mu.Unlock()
diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go
index 65c86823a..2e709ed78 100644
--- a/pkg/tcpip/transport/tcp/forwarder.go
+++ b/pkg/tcpip/transport/tcp/forwarder.go
@@ -164,8 +164,9 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint,
 		return nil, err
 	}
 
-	// Start the protocol goroutine.
-	ep.startAcceptedLoop()
+	// Start the protocol goroutine. Note that the endpoint is returned
+	// from performHandshake locked.
+	ep.startAcceptedLoop() // +checklocksforce
 
 	return ep, nil
 }
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index def9d7186..82a3f2287 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -364,6 +364,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult
 // reacquire the mutex in exclusive mode.
 //
 // Returns true for retry if preparation should be retried.
+// +checklocks:e.mu
 func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip.Error) {
 	switch e.EndpointState() {
 	case StateInitial:
@@ -380,10 +381,8 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip
 	}
 
 	e.mu.RUnlock()
-	defer e.mu.RLock()
-
 	e.mu.Lock()
-	defer e.mu.Unlock()
+	defer e.mu.DowngradeLock()
 
 	// The state changed when we released the shared locked and re-acquired
 	// it in exclusive mode. Try again.
@@ -449,37 +448,20 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
 	return n, err
 }
 
-func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
-	if err := e.LastError(); err != nil {
-		return 0, err
-	}
-
-	// MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.)
-	if opts.More {
-		return 0, &tcpip.ErrInvalidOptionValue{}
-	}
-
-	to := opts.To
-
+func (e *endpoint) buildUDPPacketInfo(p tcpip.Payloader, opts tcpip.WriteOptions) (udpPacketInfo, tcpip.Error) {
 	e.mu.RLock()
-	lockReleased := false
-	defer func() {
-		if lockReleased {
-			return
-		}
-		e.mu.RUnlock()
-	}()
+	defer e.mu.RUnlock()
 
 	// If we've shutdown with SHUT_WR we are in an invalid state for sending.
 	if e.shutdownFlags&tcpip.ShutdownWrite != 0 {
-		return 0, &tcpip.ErrClosedForSend{}
+		return udpPacketInfo{}, &tcpip.ErrClosedForSend{}
 	}
 
 	// Prepare for write.
 	for {
-		retry, err := e.prepareForWrite(to)
+		retry, err := e.prepareForWrite(opts.To)
 		if err != nil {
-			return 0, err
+			return udpPacketInfo{}, err
 		}
 
 		if !retry {
@@ -489,34 +471,34 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
 
 	route := e.route
 	dstPort := e.dstPort
-	if to != nil {
+	if opts.To != nil {
 		// Reject destination address if it goes through a different
 		// NIC than the endpoint was bound to.
-		nicID := to.NIC
+		nicID := opts.To.NIC
 		if nicID == 0 {
 			nicID = tcpip.NICID(e.ops.GetBindToDevice())
 		}
 		if e.BindNICID != 0 {
 			if nicID != 0 && nicID != e.BindNICID {
-				return 0, &tcpip.ErrNoRoute{}
+				return udpPacketInfo{}, &tcpip.ErrNoRoute{}
 			}
 
 			nicID = e.BindNICID
 		}
 
-		if to.Port == 0 {
+		if opts.To.Port == 0 {
 			// Port 0 is an invalid port to send to.
-			return 0, &tcpip.ErrInvalidEndpointState{}
+			return udpPacketInfo{}, &tcpip.ErrInvalidEndpointState{}
 		}
 
-		dst, netProto, err := e.checkV4MappedLocked(*to)
+		dst, netProto, err := e.checkV4MappedLocked(*opts.To)
 		if err != nil {
-			return 0, err
+			return udpPacketInfo{}, err
 		}
 
 		r, _, err := e.connectRoute(nicID, dst, netProto)
 		if err != nil {
-			return 0, err
+			return udpPacketInfo{}, err
 		}
 		defer r.Release()
 
@@ -525,12 +507,12 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
 	}
 
 	if !e.ops.GetBroadcast() && route.IsOutboundBroadcast() {
-		return 0, &tcpip.ErrBroadcastDisabled{}
+		return udpPacketInfo{}, &tcpip.ErrBroadcastDisabled{}
 	}
 
 	v := make([]byte, p.Len())
 	if _, err := io.ReadFull(p, v); err != nil {
-		return 0, &tcpip.ErrBadBuffer{}
+		return udpPacketInfo{}, &tcpip.ErrBadBuffer{}
 	}
 	if len(v) > header.UDPMaximumPacketSize {
 		// Payload can't possibly fit in a packet.
@@ -548,24 +530,39 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
 				v,
 			)
 		}
-		return 0, &tcpip.ErrMessageTooLong{}
+		return udpPacketInfo{}, &tcpip.ErrMessageTooLong{}
 	}
 
 	ttl := e.ttl
 	useDefaultTTL := ttl == 0
-
 	if header.IsV4MulticastAddress(route.RemoteAddress()) || header.IsV6MulticastAddress(route.RemoteAddress()) {
 		ttl = e.multicastTTL
 		// Multicast allows a 0 TTL.
 		useDefaultTTL = false
 	}
 
-	localPort := e.ID.LocalPort
-	sendTOS := e.sendTOS
-	owner := e.owner
-	noChecksum := e.SocketOptions().GetNoChecksum()
-	lockReleased = true
-	e.mu.RUnlock()
+	return udpPacketInfo{
+		route:         route,
+		data:          buffer.View(v),
+		localPort:     e.ID.LocalPort,
+		remotePort:    dstPort,
+		ttl:           ttl,
+		useDefaultTTL: useDefaultTTL,
+		tos:           e.sendTOS,
+		owner:         e.owner,
+		noChecksum:    e.SocketOptions().GetNoChecksum(),
+	}, nil
+}
+
+func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
+	if err := e.LastError(); err != nil {
+		return 0, err
+	}
+
+	// MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.)
+	if opts.More {
+		return 0, &tcpip.ErrInvalidOptionValue{}
+	}
 
 	// Do not hold lock when sending as loopback is synchronous and if the UDP
 	// datagram ends up generating an ICMP response then it can result in a
@@ -577,10 +574,15 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
 	//
 	// See: https://golang.org/pkg/sync/#RWMutex for details on why recursive read
 	// locking is prohibited.
-	if err := sendUDP(route, buffer.View(v).ToVectorisedView(), localPort, dstPort, ttl, useDefaultTTL, sendTOS, owner, noChecksum); err != nil {
+	u, err := e.buildUDPPacketInfo(p, opts)
+	if err != nil {
 		return 0, err
 	}
-	return int64(len(v)), nil
+	n, err := u.send()
+	if err != nil {
+		return 0, err
+	}
+	return int64(n), nil
 }
 
 // OnReuseAddressSet implements tcpip.SocketOptionsHandler.
@@ -817,14 +819,30 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error {
 	return nil
 }
 
-// sendUDP sends a UDP segment via the provided network endpoint and under the
-// provided identity.
-func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner, noChecksum bool) tcpip.Error {
+// udpPacketInfo contains all information required to send a UDP packet.
+//
+// This should be used as a value-only type, which exists in order to simplify
+// return value syntax. It should not be exported or extended.
+type udpPacketInfo struct {
+	route         *stack.Route
+	data          buffer.View
+	localPort     uint16
+	remotePort    uint16
+	ttl           uint8
+	useDefaultTTL bool
+	tos           uint8
+	owner         tcpip.PacketOwner
+	noChecksum    bool
+}
+
+// send sends the given packet.
+func (u *udpPacketInfo) send() (int, tcpip.Error) {
+	vv := u.data.ToVectorisedView()
 	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-		ReserveHeaderBytes: header.UDPMinimumSize + int(r.MaxHeaderLength()),
-		Data:               data,
+		ReserveHeaderBytes: header.UDPMinimumSize + int(u.route.MaxHeaderLength()),
+		Data:               vv,
 	})
-	pkt.Owner = owner
+	pkt.Owner = u.owner
 
 	// Initialize the UDP header.
 	udp := header.UDP(pkt.TransportHeader().Push(header.UDPMinimumSize))
@@ -832,8 +850,8 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u
 
 	length := uint16(pkt.Size())
 	udp.Encode(&header.UDPFields{
-		SrcPort: localPort,
-		DstPort: remotePort,
+		SrcPort: u.localPort,
+		DstPort: u.remotePort,
 		Length:  length,
 	})
 
@@ -841,30 +859,30 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u
 	// On IPv4, UDP checksum is optional, and a zero value indicates the
 	// transmitter skipped the checksum generation (RFC768).
 	// On IPv6, UDP checksum is not optional (RFC2460 Section 8.1).
-	if r.RequiresTXTransportChecksum() &&
-		(!noChecksum || r.NetProto() == header.IPv6ProtocolNumber) {
-		xsum := r.PseudoHeaderChecksum(ProtocolNumber, length)
-		for _, v := range data.Views() {
+	if u.route.RequiresTXTransportChecksum() &&
+		(!u.noChecksum || u.route.NetProto() == header.IPv6ProtocolNumber) {
+		xsum := u.route.PseudoHeaderChecksum(ProtocolNumber, length)
+		for _, v := range vv.Views() {
 			xsum = header.Checksum(v, xsum)
 		}
 		udp.SetChecksum(^udp.CalculateChecksum(xsum))
 	}
 
-	if useDefaultTTL {
-		ttl = r.DefaultTTL()
+	if u.useDefaultTTL {
+		u.ttl = u.route.DefaultTTL()
 	}
-	if err := r.WritePacket(stack.NetworkHeaderParams{
+	if err := u.route.WritePacket(stack.NetworkHeaderParams{
 		Protocol: ProtocolNumber,
-		TTL:      ttl,
-		TOS:      tos,
+		TTL:      u.ttl,
+		TOS:      u.tos,
 	}, pkt); err != nil {
-		r.Stats().UDP.PacketSendErrors.Increment()
-		return err
+		u.route.Stats().UDP.PacketSendErrors.Increment()
+		return 0, err
 	}
 
 	// Track count of packets sent.
-	r.Stats().UDP.PacketsSent.Increment()
-	return nil
+	u.route.Stats().UDP.PacketsSent.Increment()
+	return len(u.data), nil
 }
 
 // checkV4MappedLocked determines the effective network protocol and converts