summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/sentry/fs/dirent.go84
-rw-r--r--pkg/sentry/fs/fs.go26
-rw-r--r--pkg/sentry/fs/gofer/inode_state.go10
-rw-r--r--pkg/sentry/fs/gofer/path.go80
-rw-r--r--pkg/sentry/fs/gofer/session.go17
-rw-r--r--pkg/sentry/fs/gofer/socket.go9
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go16
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go18
-rw-r--r--pkg/sentry/fsimpl/gofer/revalidate.go10
-rw-r--r--pkg/sentry/fsimpl/gofer/symlink.go2
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go4
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go4
-rw-r--r--pkg/sentry/fsimpl/verity/filesystem.go2
-rw-r--r--pkg/sentry/kernel/futex/futex.go41
-rw-r--r--pkg/sentry/kernel/pipe/pipe_unsafe.go2
-rw-r--r--pkg/sentry/kernel/pipe/pipe_util.go1
-rw-r--r--pkg/sentry/kernel/ptrace.go1
-rw-r--r--pkg/sentry/kernel/sessions.go5
-rw-r--r--pkg/sentry/mm/syscalls.go1
-rw-r--r--pkg/sentry/pgalloc/pgalloc.go2
-rw-r--r--pkg/sentry/vfs/dentry.go27
-rw-r--r--pkg/sync/mutex_unsafe.go14
-rw-r--r--pkg/sync/rwmutex_unsafe.go8
-rw-r--r--pkg/tcpip/stack/addressable_endpoint_state.go2
-rw-r--r--pkg/tcpip/stack/conntrack.go4
-rw-r--r--pkg/tcpip/transport/icmp/endpoint.go5
-rw-r--r--pkg/tcpip/transport/tcp/accept.go9
-rw-r--r--pkg/tcpip/transport/tcp/connect.go74
-rw-r--r--pkg/tcpip/transport/tcp/dispatcher.go2
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go140
-rw-r--r--pkg/tcpip/transport/tcp/forwarder.go5
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go150
32 files changed, 450 insertions, 325 deletions
diff --git a/pkg/sentry/fs/dirent.go b/pkg/sentry/fs/dirent.go
index 3a45e9041..8d7660e79 100644
--- a/pkg/sentry/fs/dirent.go
+++ b/pkg/sentry/fs/dirent.go
@@ -488,11 +488,11 @@ func (d *Dirent) walk(ctx context.Context, root *Dirent, name string, walkMayUnl
// Slow path: load the InodeOperations into memory. Since this is a hot path and the lookup may be
// expensive, if possible release the lock and re-acquire it.
if walkMayUnlock {
- d.mu.Unlock()
+ d.mu.Unlock() // +checklocksforce: results in an inconsistent block.
}
c, err := d.Inode.Lookup(ctx, name)
if walkMayUnlock {
- d.mu.Lock()
+ d.mu.Lock() // +checklocksforce: see above.
}
// No dice.
if err != nil {
@@ -594,21 +594,27 @@ func (d *Dirent) exists(ctx context.Context, root *Dirent, name string) bool {
// lockDirectory should be called for any operation that changes this `d`s
// children (creating or removing them).
-func (d *Dirent) lockDirectory() func() {
+// +checklocksacquire:d.dirMu
+// +checklocksacquire:d.mu
+func (d *Dirent) lockDirectory() {
renameMu.RLock()
d.dirMu.Lock()
d.mu.Lock()
- return func() {
- d.mu.Unlock()
- d.dirMu.Unlock()
- renameMu.RUnlock()
- }
+}
+
+// unlockDirectory is the reverse of lockDirectory.
+// +checklocksrelease:d.dirMu
+// +checklocksrelease:d.mu
+func (d *Dirent) unlockDirectory() {
+ d.mu.Unlock()
+ d.dirMu.Unlock()
+ renameMu.RUnlock() // +checklocksforce: see lockDirectory.
}
// Create creates a new regular file in this directory.
func (d *Dirent) Create(ctx context.Context, root *Dirent, name string, flags FileFlags, perms FilePermissions) (*File, error) {
- unlock := d.lockDirectory()
- defer unlock()
+ d.lockDirectory()
+ defer d.unlockDirectory()
// Does something already exist?
if d.exists(ctx, root, name) {
@@ -670,8 +676,8 @@ func (d *Dirent) finishCreate(ctx context.Context, child *Dirent, name string) {
// genericCreate executes create if name does not exist. Removes a negative Dirent at name if
// create succeeds.
func (d *Dirent) genericCreate(ctx context.Context, root *Dirent, name string, create func() error) error {
- unlock := d.lockDirectory()
- defer unlock()
+ d.lockDirectory()
+ defer d.unlockDirectory()
// Does something already exist?
if d.exists(ctx, root, name) {
@@ -1021,8 +1027,8 @@ func (d *Dirent) Remove(ctx context.Context, root *Dirent, name string, dirPath
panic("Dirent.Remove: root must not be nil")
}
- unlock := d.lockDirectory()
- defer unlock()
+ d.lockDirectory()
+ defer d.unlockDirectory()
// Try to walk to the node.
child, err := d.walk(ctx, root, name, false /* may unlock */)
@@ -1082,8 +1088,8 @@ func (d *Dirent) RemoveDirectory(ctx context.Context, root *Dirent, name string)
panic("Dirent.Remove: root must not be nil")
}
- unlock := d.lockDirectory()
- defer unlock()
+ d.lockDirectory()
+ defer d.unlockDirectory()
// Check for dots.
if name == "." {
@@ -1259,17 +1265,15 @@ func (d *Dirent) dropExtendedReference() {
d.Inode.MountSource.fscache.Remove(d)
}
-// lockForRename takes locks on oldParent and newParent as required by Rename
-// and returns a function that will unlock the locks taken. The returned
-// function must be called even if a non-nil error is returned.
-func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName string) (func(), error) {
+// lockForRename takes locks on oldParent and newParent as required by Rename.
+// On return, unlockForRename must always be called, even with an error.
+// +checklocksacquire:oldParent.mu
+// +checklocksacquire:newParent.mu
+func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName string) error {
renameMu.Lock()
if oldParent == newParent {
oldParent.mu.Lock()
- return func() {
- oldParent.mu.Unlock()
- renameMu.Unlock()
- }, nil
+ return nil // +checklocksforce: only one lock exists.
}
// Renaming between directories is a bit subtle:
@@ -1297,11 +1301,7 @@ func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName
// itself.
err = unix.EINVAL
}
- return func() {
- newParent.mu.Unlock()
- oldParent.mu.Unlock()
- renameMu.Unlock()
- }, err
+ return err
}
child = p
}
@@ -1310,11 +1310,21 @@ func lockForRename(oldParent *Dirent, oldName string, newParent *Dirent, newName
// have no relationship; in either case we can do this:
newParent.mu.Lock()
oldParent.mu.Lock()
- return func() {
+ return nil
+}
+
+// unlockForRename is the opposite of lockForRename.
+// +checklocksrelease:oldParent.mu
+// +checklocksrelease:newParent.mu
+func unlockForRename(oldParent, newParent *Dirent) {
+ if oldParent == newParent {
oldParent.mu.Unlock()
- newParent.mu.Unlock()
- renameMu.Unlock()
- }, nil
+ renameMu.Unlock() // +checklocksforce: only one lock exists.
+ return
+ }
+ newParent.mu.Unlock()
+ oldParent.mu.Unlock()
+ renameMu.Unlock() // +checklocksforce: not tracked.
}
func (d *Dirent) checkSticky(ctx context.Context, victim *Dirent) error {
@@ -1353,8 +1363,8 @@ func (d *Dirent) MayDelete(ctx context.Context, root *Dirent, name string) error
return err
}
- unlock := d.lockDirectory()
- defer unlock()
+ d.lockDirectory()
+ defer d.unlockDirectory()
victim, err := d.walk(ctx, root, name, true /* may unlock */)
if err != nil {
@@ -1392,8 +1402,8 @@ func Rename(ctx context.Context, root *Dirent, oldParent *Dirent, oldName string
}
// Acquire global renameMu lock, and mu locks on oldParent/newParent.
- unlock, err := lockForRename(oldParent, oldName, newParent, newName)
- defer unlock()
+ err := lockForRename(oldParent, oldName, newParent, newName)
+ defer unlockForRename(oldParent, newParent)
if err != nil {
return err
}
diff --git a/pkg/sentry/fs/fs.go b/pkg/sentry/fs/fs.go
index 44587bb37..a346c316b 100644
--- a/pkg/sentry/fs/fs.go
+++ b/pkg/sentry/fs/fs.go
@@ -80,23 +80,33 @@ func AsyncBarrier() {
// Async executes a function asynchronously.
//
// Async must not be called recursively.
+// +checklocksignore
func Async(f func()) {
workMu.RLock()
- go func() { // S/R-SAFE: AsyncBarrier must be called.
- defer workMu.RUnlock() // Ensure RUnlock in case of panic.
- f()
- }()
+ go asyncWork(f) // S/R-SAFE: AsyncBarrier must be called.
+}
+
+// +checklocksignore
+func asyncWork(f func()) {
+ // Ensure RUnlock in case of panic.
+ defer workMu.RUnlock()
+ f()
}
// AsyncWithContext is just like Async, except that it calls the asynchronous
// function with the given context as argument. This function exists to avoid
// needing to allocate an extra function on the heap in a hot path.
+// +checklocksignore
func AsyncWithContext(ctx context.Context, f func(context.Context)) {
workMu.RLock()
- go func() { // S/R-SAFE: AsyncBarrier must be called.
- defer workMu.RUnlock() // Ensure RUnlock in case of panic.
- f(ctx)
- }()
+ go asyncWorkWithContext(ctx, f)
+}
+
+// +checklocksignore
+func asyncWorkWithContext(ctx context.Context, f func(context.Context)) {
+ // Ensure RUnlock in case of panic.
+ defer workMu.RUnlock()
+ f(ctx)
}
// AsyncErrorBarrier waits for all outstanding asynchronous work to complete, or
diff --git a/pkg/sentry/fs/gofer/inode_state.go b/pkg/sentry/fs/gofer/inode_state.go
index e2af1d2ae..19f91f010 100644
--- a/pkg/sentry/fs/gofer/inode_state.go
+++ b/pkg/sentry/fs/gofer/inode_state.go
@@ -112,13 +112,6 @@ func (i *inodeFileState) loadLoading(_ struct{}) {
// +checklocks:i.loading
func (i *inodeFileState) afterLoad() {
load := func() (err error) {
- // See comment on i.loading().
- defer func() {
- if err == nil {
- i.loading.Unlock()
- }
- }()
-
// Manually restore the p9.File.
name, ok := i.s.inodeMappings[i.sattr.InodeID]
if !ok {
@@ -167,6 +160,9 @@ func (i *inodeFileState) afterLoad() {
i.savedUAttr = nil
}
+ // See comment on i.loading(). This only unlocks on the
+ // non-error path.
+ i.loading.Unlock() // +checklocksforce: per comment.
return nil
}
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index aa2405f68..958f46bd6 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -47,7 +47,8 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
return nil, linuxerr.ENAMETOOLONG
}
- cp := i.session().cachePolicy
+ s := i.session()
+ cp := s.cachePolicy
if cp.cacheReaddir() {
// Check to see if we have readdirCache that indicates the
// child does not exist. Avoid holding readdirMu longer than
@@ -78,7 +79,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
return nil, err
}
- if i.session().overrides != nil {
+ if s.overrides != nil {
// Check if file belongs to a internal named pipe. Note that it doesn't need
// to check for sockets because it's done in newInodeOperations below.
deviceKey := device.MultiDeviceKey{
@@ -86,13 +87,13 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
SecondaryDevice: i.session().connID,
Inode: qids[0].Path,
}
- unlock := i.session().overrides.lock()
- if pipeInode := i.session().overrides.getPipe(deviceKey); pipeInode != nil {
- unlock()
+ s.overrides.lock()
+ if pipeInode := s.overrides.getPipe(deviceKey); pipeInode != nil {
+ s.overrides.unlock()
pipeInode.IncRef()
return fs.NewDirent(ctx, pipeInode, name), nil
}
- unlock()
+ s.overrides.unlock()
}
// Construct the Inode operations.
@@ -221,17 +222,20 @@ func (i *inodeOperations) CreateHardLink(ctx context.Context, inode *fs.Inode, t
if err := i.fileState.file.link(ctx, &targetOpts.fileState.file, newName); err != nil {
return err
}
- if i.session().cachePolicy.cacheUAttrs(inode) {
+
+ s := i.session()
+ if s.cachePolicy.cacheUAttrs(inode) {
// Increase link count.
targetOpts.cachingInodeOps.IncLinks(ctx)
}
+
i.touchModificationAndStatusChangeTime(ctx, inode)
return nil
}
// CreateDirectory uses Create to create a directory named s under inodeOperations.
-func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s string, perm fs.FilePermissions) error {
- if len(s) > maxFilenameLen {
+func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
+ if len(name) > maxFilenameLen {
return linuxerr.ENAMETOOLONG
}
@@ -247,16 +251,18 @@ func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, s
perm.SetGID = true
}
- if _, err := i.fileState.file.mkdir(ctx, s, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
+ if _, err := i.fileState.file.mkdir(ctx, name, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
return err
}
- if i.session().cachePolicy.cacheUAttrs(dir) {
+
+ s := i.session()
+ if s.cachePolicy.cacheUAttrs(dir) {
// Increase link count.
//
// N.B. This will update the modification time.
i.cachingInodeOps.IncLinks(ctx)
}
- if i.session().cachePolicy.cacheReaddir() {
+ if s.cachePolicy.cacheReaddir() {
// Invalidate readdir cache.
i.markDirectoryDirty()
}
@@ -269,13 +275,14 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
return nil, linuxerr.ENAMETOOLONG
}
- if i.session().overrides == nil {
+ s := i.session()
+ if s.overrides == nil {
return nil, syserror.EOPNOTSUPP
}
// Stabilize the override map while creation is in progress.
- unlock := i.session().overrides.lock()
- defer unlock()
+ s.overrides.lock()
+ defer s.overrides.unlock()
sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket)
if err != nil {
@@ -284,7 +291,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
// Construct the positive Dirent.
childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
- i.session().overrides.addBoundEndpoint(iops.fileState.key, childDir, ep)
+ s.overrides.addBoundEndpoint(iops.fileState.key, childDir, ep)
return childDir, nil
}
@@ -298,8 +305,9 @@ func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name st
mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe
// N.B. FIFOs use major/minor numbers 0.
+ s := i.session()
if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
- if i.session().overrides == nil || !linuxerr.Equals(linuxerr.EPERM, err) {
+ if s.overrides == nil || !linuxerr.Equals(linuxerr.EPERM, err) {
return err
}
// If gofer doesn't support mknod, check if we can create an internal fifo.
@@ -311,13 +319,14 @@ func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name st
}
func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error {
- if i.session().overrides == nil {
+ s := i.session()
+ if s.overrides == nil {
return linuxerr.EPERM
}
// Stabilize the override map while creation is in progress.
- unlock := i.session().overrides.lock()
- defer unlock()
+ s.overrides.lock()
+ defer s.overrides.unlock()
sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe)
if err != nil {
@@ -336,7 +345,7 @@ func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode,
// Construct the positive Dirent.
childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
- i.session().overrides.addPipe(fileOps.fileState.key, childDir, inode)
+ s.overrides.addPipe(fileOps.fileState.key, childDir, inode)
return nil
}
@@ -386,8 +395,9 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
return linuxerr.ENAMETOOLONG
}
+ s := i.session()
var key *device.MultiDeviceKey
- if i.session().overrides != nil {
+ if s.overrides != nil {
// Find out if file being deleted is a socket or pipe that needs to be
// removed from endpoint map.
if d, err := i.Lookup(ctx, dir, name); err == nil {
@@ -402,8 +412,8 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
}
// Stabilize the override map while deletion is in progress.
- unlock := i.session().overrides.lock()
- defer unlock()
+ s.overrides.lock()
+ defer s.overrides.unlock()
}
}
}
@@ -412,7 +422,7 @@ func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string
return err
}
if key != nil {
- i.session().overrides.remove(ctx, *key)
+ s.overrides.remove(ctx, *key)
}
i.touchModificationAndStatusChangeTime(ctx, dir)
@@ -429,11 +439,13 @@ func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, na
if err := i.fileState.file.unlinkAt(ctx, name, 0x200); err != nil {
return err
}
- if i.session().cachePolicy.cacheUAttrs(dir) {
+
+ s := i.session()
+ if s.cachePolicy.cacheUAttrs(dir) {
// Decrease link count and updates atime.
i.cachingInodeOps.DecLinks(ctx)
}
- if i.session().cachePolicy.cacheReaddir() {
+ if s.cachePolicy.cacheReaddir() {
// Invalidate readdir cache.
i.markDirectoryDirty()
}
@@ -463,12 +475,13 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent
}
// Is the renamed entity a directory? Fix link counts.
+ s := i.session()
if fs.IsDir(i.fileState.sattr) {
// Update cached state.
- if i.session().cachePolicy.cacheUAttrs(oldParent) {
+ if s.cachePolicy.cacheUAttrs(oldParent) {
oldParentInodeOperations.cachingInodeOps.DecLinks(ctx)
}
- if i.session().cachePolicy.cacheUAttrs(newParent) {
+ if s.cachePolicy.cacheUAttrs(newParent) {
// Only IncLinks if there is a new addition to
// newParent. If this is replacement, then the total
// count remains the same.
@@ -477,7 +490,7 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent
}
}
}
- if i.session().cachePolicy.cacheReaddir() {
+ if s.cachePolicy.cacheReaddir() {
// Mark old directory dirty.
oldParentInodeOperations.markDirectoryDirty()
if oldParent != newParent {
@@ -487,17 +500,18 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent
}
// Rename always updates ctime.
- if i.session().cachePolicy.cacheUAttrs(inode) {
+ if s.cachePolicy.cacheUAttrs(inode) {
i.cachingInodeOps.TouchStatusChangeTime(ctx)
}
return nil
}
func (i *inodeOperations) touchModificationAndStatusChangeTime(ctx context.Context, inode *fs.Inode) {
- if i.session().cachePolicy.cacheUAttrs(inode) {
+ s := i.session()
+ if s.cachePolicy.cacheUAttrs(inode) {
i.cachingInodeOps.TouchModificationAndStatusChangeTime(ctx)
}
- if i.session().cachePolicy.cacheReaddir() {
+ if s.cachePolicy.cacheReaddir() {
// Invalidate readdir cache.
i.markDirectoryDirty()
}
diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go
index 7cf3522ff..b7debeecb 100644
--- a/pkg/sentry/fs/gofer/session.go
+++ b/pkg/sentry/fs/gofer/session.go
@@ -98,9 +98,14 @@ func (e *overrideMaps) remove(ctx context.Context, key device.MultiDeviceKey) {
// lock blocks other addition and removal operations from happening while
// the backing file is being created or deleted. Returns a function that unlocks
// the endpoint map.
-func (e *overrideMaps) lock() func() {
+// +checklocksacquire:e.mu
+func (e *overrideMaps) lock() {
e.mu.Lock()
- return func() { e.mu.Unlock() }
+}
+
+// +checklocksrelease:e.mu
+func (e *overrideMaps) unlock() {
+ e.mu.Unlock()
}
// getBoundEndpoint returns the bound endpoint mapped to the given key.
@@ -366,8 +371,8 @@ func newOverrideMaps() *overrideMaps {
// fillKeyMap populates key and dirent maps upon restore from saved pathmap.
func (s *session) fillKeyMap(ctx context.Context) error {
- unlock := s.overrides.lock()
- defer unlock()
+ s.overrides.lock()
+ defer s.overrides.unlock()
for ep, dirPath := range s.overrides.pathMap {
_, file, err := s.attach.walk(ctx, splitAbsolutePath(dirPath))
@@ -394,8 +399,8 @@ func (s *session) fillKeyMap(ctx context.Context) error {
// fillPathMap populates paths for overrides from dirents in direntMap
// before save.
func (s *session) fillPathMap(ctx context.Context) error {
- unlock := s.overrides.lock()
- defer unlock()
+ s.overrides.lock()
+ defer s.overrides.unlock()
for _, endpoint := range s.overrides.keyMap {
mountRoot := endpoint.dirent.MountRoot()
diff --git a/pkg/sentry/fs/gofer/socket.go b/pkg/sentry/fs/gofer/socket.go
index 8a1c69ac2..1fd8a0910 100644
--- a/pkg/sentry/fs/gofer/socket.go
+++ b/pkg/sentry/fs/gofer/socket.go
@@ -32,10 +32,11 @@ func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport.
return nil
}
- if i.session().overrides != nil {
- unlock := i.session().overrides.lock()
- defer unlock()
- ep := i.session().overrides.getBoundEndpoint(i.fileState.key)
+ s := i.session()
+ if s.overrides != nil {
+ s.overrides.lock()
+ defer s.overrides.unlock()
+ ep := s.overrides.getBoundEndpoint(i.fileState.key)
if ep != nil {
return ep
}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 237d17921..652e5fe77 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -147,6 +147,7 @@ func putDentrySlice(ds *[]*dentry) {
// but dentry slices are allocated lazily, and it's much easier to say "defer
// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **[]*dentry) {
fs.renameMu.RUnlock()
if *dsp == nil {
@@ -159,6 +160,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **
putDentrySlice(*dsp)
}
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
if *ds == nil {
fs.renameMu.Unlock()
@@ -540,7 +542,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
if child.syntheticChildren != 0 {
// This is definitely not an empty directory, irrespective of
// fs.opts.interop.
- vfsObj.AbortDeleteDentry(&child.vfsd)
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: PrepareDeleteDentry called if child != nil.
return linuxerr.ENOTEMPTY
}
// If InteropModeShared is in effect and the first call to
@@ -550,12 +552,12 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
// still exist) would be a waste of time.
if child.cachedMetadataAuthoritative() {
if !child.isDir() {
- vfsObj.AbortDeleteDentry(&child.vfsd)
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
return syserror.ENOTDIR
}
for _, grandchild := range child.children {
if grandchild != nil {
- vfsObj.AbortDeleteDentry(&child.vfsd)
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
return linuxerr.ENOTEMPTY
}
}
@@ -565,12 +567,12 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
} else {
// child must be a non-directory file.
if child != nil && child.isDir() {
- vfsObj.AbortDeleteDentry(&child.vfsd)
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
return syserror.EISDIR
}
if rp.MustBeDir() {
if child != nil {
- vfsObj.AbortDeleteDentry(&child.vfsd)
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
}
return syserror.ENOTDIR
}
@@ -583,7 +585,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
err = parent.file.unlinkAt(ctx, name, flags)
if err != nil {
if child != nil {
- vfsObj.AbortDeleteDentry(&child.vfsd)
+ vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
}
return err
}
@@ -601,7 +603,7 @@ func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir b
}
if child != nil {
- vfsObj.CommitDeleteDentry(ctx, &child.vfsd)
+ vfsObj.CommitDeleteDentry(ctx, &child.vfsd) // +checklocksforce: see above.
child.setDeleted()
if child.isSynthetic() {
parent.syntheticChildren--
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index fe4c2e0e1..2f85215d9 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -947,10 +947,10 @@ func (d *dentry) cachedMetadataAuthoritative() bool {
// updateFromP9Attrs is called to update d's metadata after an update from the
// remote filesystem.
// Precondition: d.metadataMu must be locked.
+// +checklocks:d.metadataMu
func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) {
if mask.Mode {
if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want {
- d.metadataMu.Unlock()
panic(fmt.Sprintf("gofer.dentry file type changed from %#o to %#o", want, got))
}
atomic.StoreUint32(&d.mode, uint32(attr.Mode))
@@ -989,6 +989,7 @@ func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) {
// Preconditions: !d.isSynthetic().
// Preconditions: d.metadataMu is locked.
+// +checklocks:d.metadataMu
func (d *dentry) refreshSizeLocked(ctx context.Context) error {
d.handleMu.RLock()
@@ -1020,6 +1021,7 @@ func (d *dentry) updateFromGetattr(ctx context.Context) error {
// Preconditions:
// * !d.isSynthetic().
// * d.metadataMu is locked.
+// +checklocks:d.metadataMu
func (d *dentry) updateFromGetattrLocked(ctx context.Context) error {
// Use d.readFile or d.writeFile, which represent 9P FIDs that have been
// opened, in preference to d.file, which represents a 9P fid that has not.
@@ -1044,7 +1046,8 @@ func (d *dentry) updateFromGetattrLocked(ctx context.Context) error {
_, attrMask, attr, err := file.getAttr(ctx, dentryAttrMask())
if handleMuRLocked {
- d.handleMu.RUnlock() // must be released before updateFromP9AttrsLocked()
+ // handleMu must be released before updateFromP9AttrsLocked().
+ d.handleMu.RUnlock() // +checklocksforce: complex case.
}
if err != nil {
return err
@@ -1470,7 +1473,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo
if d.isDeleted() {
d.watches.HandleDeletion(ctx)
}
- d.destroyLocked(ctx)
+ d.destroyLocked(ctx) // +checklocksforce: renameMu must be acquired at this point.
return
}
// If d still has inotify watches and it is not deleted or invalidated, it
@@ -1498,7 +1501,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo
delete(d.parent.children, d.name)
d.parent.dirMu.Unlock()
}
- d.destroyLocked(ctx)
+ d.destroyLocked(ctx) // +checklocksforce: see above.
return
}
@@ -1527,7 +1530,7 @@ func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked boo
d.fs.renameMu.Lock()
defer d.fs.renameMu.Unlock()
}
- d.fs.evictCachedDentryLocked(ctx)
+ d.fs.evictCachedDentryLocked(ctx) // +checklocksforce: see above.
}
}
@@ -1544,6 +1547,7 @@ func (d *dentry) removeFromCacheLocked() {
// Precondition: fs.renameMu must be locked for writing; it may be temporarily
// unlocked.
+// +checklocks:fs.renameMu
func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
for fs.cachedDentriesLen != 0 {
fs.evictCachedDentryLocked(ctx)
@@ -1552,6 +1556,7 @@ func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
// Preconditions:
// * fs.renameMu must be locked for writing; it may be temporarily unlocked.
+// +checklocks:fs.renameMu
func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
fs.cacheMu.Lock()
victim := fs.cachedDentries.Back()
@@ -1588,7 +1593,7 @@ func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
// will try to acquire fs.renameMu (which we have already acquired). Hence,
// fs.renameMu will synchronize the destroy attempts.
victim.cachingMu.Unlock()
- victim.destroyLocked(ctx)
+ victim.destroyLocked(ctx) // +checklocksforce: owned as precondition, victim.fs == fs.
}
// destroyLocked destroys the dentry.
@@ -1598,6 +1603,7 @@ func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
// * d.refs == 0.
// * d.parent.children[d.name] != d, i.e. d is not reachable by path traversal
// from its former parent dentry.
+// +checklocks:d.fs.renameMu
func (d *dentry) destroyLocked(ctx context.Context) {
switch atomic.LoadInt64(&d.refs) {
case 0:
diff --git a/pkg/sentry/fsimpl/gofer/revalidate.go b/pkg/sentry/fsimpl/gofer/revalidate.go
index 8f81f0822..226790a11 100644
--- a/pkg/sentry/fsimpl/gofer/revalidate.go
+++ b/pkg/sentry/fsimpl/gofer/revalidate.go
@@ -247,16 +247,16 @@ func (fs *filesystem) revalidateHelper(ctx context.Context, vfsObj *vfs.VirtualF
if found && !d.isSynthetic() {
// First dentry is where the search is starting, just update attributes
// since it cannot be replaced.
- d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr)
+ d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) // +checklocksforce: acquired by lockAllMetadata.
}
- d.metadataMu.Unlock()
+ d.metadataMu.Unlock() // +checklocksforce: see above.
continue
}
// Note that synthetic dentries will always fails the comparison check
// below.
if !found || d.qidPath != stats[i].QID.Path {
- d.metadataMu.Unlock()
+ d.metadataMu.Unlock() // +checklocksforce: see above.
if !found && d.isSynthetic() {
// We have a synthetic file, and no remote file has arisen to replace
// it.
@@ -298,7 +298,7 @@ func (fs *filesystem) revalidateHelper(ctx context.Context, vfsObj *vfs.VirtualF
}
// The file at this path hasn't changed. Just update cached metadata.
- d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr)
+ d.updateFromP9AttrsLocked(stats[i].Valid, &stats[i].Attr) // +checklocksforce: see above.
d.metadataMu.Unlock()
}
@@ -354,6 +354,7 @@ func (r *revalidateState) add(name string, d *dentry) {
r.dentries = append(r.dentries, d)
}
+// +checklocksignore
func (r *revalidateState) lockAllMetadata() {
for _, d := range r.dentries {
d.metadataMu.Lock()
@@ -372,6 +373,7 @@ func (r *revalidateState) popFront() *dentry {
// reset releases all metadata locks and resets all fields to allow this
// instance to be reused.
+// +checklocksignore
func (r *revalidateState) reset() {
if r.locked {
// Unlock any remaining dentries.
diff --git a/pkg/sentry/fsimpl/gofer/symlink.go b/pkg/sentry/fsimpl/gofer/symlink.go
index 2ec819f86..dbd834c67 100644
--- a/pkg/sentry/fsimpl/gofer/symlink.go
+++ b/pkg/sentry/fsimpl/gofer/symlink.go
@@ -41,7 +41,7 @@ func (d *dentry) readlink(ctx context.Context, mnt *vfs.Mount) (string, error) {
d.haveTarget = true
d.target = target
}
- d.dataMu.Unlock()
+ d.dataMu.Unlock() // +checklocksforce: guaranteed locked from above.
}
return target, err
}
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 38c2b6df1..20d2526ad 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -752,7 +752,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
fs.deferDecRef(replaced)
replaceVFSD = replaced.VFSDentry()
}
- virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD)
+ virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) // +checklocksforce: to may be nil, that's okay.
return nil
}
@@ -788,7 +788,7 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
defer mntns.DecRef(ctx)
vfsd := d.VFSDentry()
if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
- return err
+ return err // +checklocksforce: vfsd is not locked.
}
if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil {
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index 41207211a..77f9affc1 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -87,7 +87,7 @@ func putDentrySlice(ds *[]*dentry) {
// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
//
-// +checklocks:fs.renameMu
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*dentry) {
fs.renameMu.RUnlock()
if *dsp == nil {
@@ -113,7 +113,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*
putDentrySlice(*dsp)
}
-// +checklocks:fs.renameMu
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
if *ds == nil {
fs.renameMu.Unlock()
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
index e4bfbd3c9..358a66072 100644
--- a/pkg/sentry/fsimpl/verity/filesystem.go
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -75,6 +75,7 @@ func putDentrySlice(ds *[]*dentry) {
// but dentry slices are allocated lazily, and it's much easier to say "defer
// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
fs.renameMu.RUnlock()
if *ds == nil {
@@ -90,6 +91,7 @@ func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*d
putDentrySlice(*ds)
}
+// +checklocksrelease:fs.renameMu
func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
if *ds == nil {
fs.renameMu.Unlock()
diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go
index 6377abb94..f5c364c96 100644
--- a/pkg/sentry/kernel/futex/futex.go
+++ b/pkg/sentry/kernel/futex/futex.go
@@ -398,8 +398,8 @@ func (m *Manager) Fork() *Manager {
}
// lockBucket returns a locked bucket for the given key.
-func (m *Manager) lockBucket(k *Key) *bucket {
- var b *bucket
+// +checklocksacquire:b.mu
+func (m *Manager) lockBucket(k *Key) (b *bucket) {
if k.Kind == KindSharedMappable {
b = m.sharedBucket
} else {
@@ -410,7 +410,9 @@ func (m *Manager) lockBucket(k *Key) *bucket {
}
// lockBuckets returns locked buckets for the given keys.
-func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
+// +checklocksacquire:b1.mu
+// +checklocksacquire:b2.mu
+func (m *Manager) lockBuckets(k1, k2 *Key) (b1 *bucket, b2 *bucket) {
// Buckets must be consistently ordered to avoid circular lock
// dependencies. We order buckets in m.privateBuckets by index (lowest
// index first), and all buckets in m.privateBuckets precede
@@ -420,8 +422,8 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
if k1.Kind != KindSharedMappable && k2.Kind != KindSharedMappable {
i1 := bucketIndexForAddr(k1.addr())
i2 := bucketIndexForAddr(k2.addr())
- b1 := &m.privateBuckets[i1]
- b2 := &m.privateBuckets[i2]
+ b1 = &m.privateBuckets[i1]
+ b2 = &m.privateBuckets[i2]
switch {
case i1 < i2:
b1.mu.Lock()
@@ -432,19 +434,30 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
default:
b1.mu.Lock()
}
- return b1, b2
+ return b1, b2 // +checklocksforce
}
// At least one of b1 or b2 should be m.sharedBucket.
- b1 := m.sharedBucket
- b2 := m.sharedBucket
+ b1 = m.sharedBucket
+ b2 = m.sharedBucket
if k1.Kind != KindSharedMappable {
b1 = m.lockBucket(k1)
} else if k2.Kind != KindSharedMappable {
b2 = m.lockBucket(k2)
}
m.sharedBucket.mu.Lock()
- return b1, b2
+ return b1, b2 // +checklocksforce
+}
+
+// unlockBuckets unlocks two buckets.
+// +checklocksrelease:b1.mu
+// +checklocksrelease:b2.mu
+func (m *Manager) unlockBuckets(b1, b2 *bucket) {
+ b1.mu.Unlock()
+ if b1 != b2 {
+ b2.mu.Unlock()
+ }
+ return // +checklocksforce
}
// Wake wakes up to n waiters matching the bitmask on the given addr.
@@ -477,10 +490,7 @@ func (m *Manager) doRequeue(t Target, addr, naddr hostarch.Addr, private bool, c
defer k2.release(t)
b1, b2 := m.lockBuckets(&k1, &k2)
- defer b1.mu.Unlock()
- if b2 != b1 {
- defer b2.mu.Unlock()
- }
+ defer m.unlockBuckets(b1, b2)
if checkval {
if err := check(t, addr, val); err != nil {
@@ -527,10 +537,7 @@ func (m *Manager) WakeOp(t Target, addr1, addr2 hostarch.Addr, private bool, nwa
defer k2.release(t)
b1, b2 := m.lockBuckets(&k1, &k2)
- defer b1.mu.Unlock()
- if b2 != b1 {
- defer b2.mu.Unlock()
- }
+ defer m.unlockBuckets(b1, b2)
done := 0
cond, err := atomicOp(t, addr2, op)
diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe.go b/pkg/sentry/kernel/pipe/pipe_unsafe.go
index dd60cba24..077c5d596 100644
--- a/pkg/sentry/kernel/pipe/pipe_unsafe.go
+++ b/pkg/sentry/kernel/pipe/pipe_unsafe.go
@@ -23,6 +23,8 @@ import (
// concurrent calls cannot deadlock.
//
// Preconditions: x != y.
+// +checklocksacquire:x.mu
+// +checklocksacquire:y.mu
func lockTwoPipes(x, y *Pipe) {
// Lock the two pipes in order of increasing address.
if uintptr(unsafe.Pointer(x)) < uintptr(unsafe.Pointer(y)) {
diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go
index 84f9f6234..c883a9014 100644
--- a/pkg/sentry/kernel/pipe/pipe_util.go
+++ b/pkg/sentry/kernel/pipe/pipe_util.go
@@ -157,6 +157,7 @@ func (p *Pipe) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArgume
//
// mu must be held by the caller. waitFor returns with mu held, but it will
// drop mu before blocking for any reader/writers.
+// +checklocks:mu
func waitFor(mu *sync.Mutex, wakeupChan *chan struct{}, sleeper amutex.Sleeper) bool {
// Ideally this function would simply use a condition variable. However, the
// wait needs to be interruptible via 'sleeper', so we must sychronize via a
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index cdaee5d7f..161140980 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -652,6 +652,7 @@ func (t *Task) forgetTracerLocked() {
// Preconditions:
// * The signal mutex must be locked.
// * The caller must be running on the task goroutine.
+// +checklocks:t.tg.signalHandlers.mu
func (t *Task) ptraceSignalLocked(info *linux.SignalInfo) bool {
if linux.Signal(info.Signo) == linux.SIGKILL {
return false
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index c0c1f1f13..ae21a55da 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -121,8 +121,9 @@ func (pg *ProcessGroup) Originator() *ThreadGroup {
// IsOrphan returns true if this process group is an orphan.
func (pg *ProcessGroup) IsOrphan() bool {
- pg.originator.TaskSet().mu.RLock()
- defer pg.originator.TaskSet().mu.RUnlock()
+ ts := pg.originator.TaskSet()
+ ts.mu.RLock()
+ defer ts.mu.RUnlock()
return pg.ancestors == 0
}
diff --git a/pkg/sentry/mm/syscalls.go b/pkg/sentry/mm/syscalls.go
index 125fd855b..b51ec6aa7 100644
--- a/pkg/sentry/mm/syscalls.go
+++ b/pkg/sentry/mm/syscalls.go
@@ -204,6 +204,7 @@ func (mm *MemoryManager) populateVMA(ctx context.Context, vseg vmaIterator, ar h
// * vseg.Range().IsSupersetOf(ar).
//
// Postconditions: mm.mappingMu will be unlocked.
+// +checklocksrelease:mm.mappingMu
func (mm *MemoryManager) populateVMAAndUnlock(ctx context.Context, vseg vmaIterator, ar hostarch.AddrRange, precommit bool) {
// See populateVMA above for commentary.
if !vseg.ValuePtr().effectivePerms.Any() {
diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go
index f7d5a1800..0c8542485 100644
--- a/pkg/sentry/pgalloc/pgalloc.go
+++ b/pkg/sentry/pgalloc/pgalloc.go
@@ -945,7 +945,7 @@ func (f *MemoryFile) updateUsageLocked(currentUsage uint64, checkCommitted func(
// NOTE(b/165896008): mincore (which is passed as checkCommitted)
// by f.UpdateUsage() might take a really long time. So unlock f.mu
// while checkCommitted runs.
- f.mu.Unlock()
+ f.mu.Unlock() // +checklocksforce
err := checkCommitted(s, buf)
f.mu.Lock()
if err != nil {
diff --git a/pkg/sentry/vfs/dentry.go b/pkg/sentry/vfs/dentry.go
index 242eb5ecb..cb92b6eee 100644
--- a/pkg/sentry/vfs/dentry.go
+++ b/pkg/sentry/vfs/dentry.go
@@ -196,11 +196,12 @@ func (d *Dentry) OnZeroWatches(ctx context.Context) {
// PrepareDeleteDentry must be called before attempting to delete the file
// represented by d. If PrepareDeleteDentry succeeds, the caller must call
// AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome.
+// +checklocksacquire:d.mu
func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error {
vfs.mountMu.Lock()
if mntns.mountpoints[d] != 0 {
vfs.mountMu.Unlock()
- return linuxerr.EBUSY
+ return linuxerr.EBUSY // +checklocksforce: inconsistent return.
}
d.mu.Lock()
vfs.mountMu.Unlock()
@@ -211,14 +212,14 @@ func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dent
// AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion
// fails.
-// +checklocks:d.mu
+// +checklocksrelease:d.mu
func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) {
d.mu.Unlock()
}
// CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion
// succeeds.
-// +checklocks:d.mu
+// +checklocksrelease:d.mu
func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) {
d.dead = true
d.mu.Unlock()
@@ -249,16 +250,18 @@ func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) {
// Preconditions:
// * If to is not nil, it must be a child Dentry from the same Filesystem.
// * from != to.
+// +checklocksacquire:from.mu
+// +checklocksacquire:to.mu
func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
vfs.mountMu.Lock()
if mntns.mountpoints[from] != 0 {
vfs.mountMu.Unlock()
- return linuxerr.EBUSY
+ return linuxerr.EBUSY // +checklocksforce: no locks acquired.
}
if to != nil {
if mntns.mountpoints[to] != 0 {
vfs.mountMu.Unlock()
- return linuxerr.EBUSY
+ return linuxerr.EBUSY // +checklocksforce: no locks acquired.
}
to.mu.Lock()
}
@@ -267,13 +270,13 @@ func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, t
// Return with from.mu and to.mu locked, which will be unlocked by
// AbortRenameDentry, CommitRenameReplaceDentry, or
// CommitRenameExchangeDentry.
- return nil
+ return nil // +checklocksforce: to may not be acquired.
}
// AbortRenameDentry must be called after PrepareRenameDentry if the rename
// fails.
-// +checklocks:from.mu
-// +checklocks:to.mu
+// +checklocksrelease:from.mu
+// +checklocksrelease:to.mu
func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) {
from.mu.Unlock()
if to != nil {
@@ -286,8 +289,8 @@ func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) {
// that was replaced by from.
//
// Preconditions: PrepareRenameDentry was previously called on from and to.
-// +checklocks:from.mu
-// +checklocks:to.mu
+// +checklocksrelease:from.mu
+// +checklocksrelease:to.mu
func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) {
from.mu.Unlock()
if to != nil {
@@ -303,8 +306,8 @@ func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, fro
// from and to are exchanged by rename(RENAME_EXCHANGE).
//
// Preconditions: PrepareRenameDentry was previously called on from and to.
-// +checklocks:from.mu
-// +checklocks:to.mu
+// +checklocksrelease:from.mu
+// +checklocksrelease:to.mu
func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) {
from.mu.Unlock()
to.mu.Unlock()
diff --git a/pkg/sync/mutex_unsafe.go b/pkg/sync/mutex_unsafe.go
index 411a80a8a..b829765d9 100644
--- a/pkg/sync/mutex_unsafe.go
+++ b/pkg/sync/mutex_unsafe.go
@@ -32,6 +32,18 @@ func (m *CrossGoroutineMutex) state() *int32 {
return &(*syncMutex)(unsafe.Pointer(&m.Mutex)).state
}
+// Lock locks the underlying Mutex.
+// +checklocksignore
+func (m *CrossGoroutineMutex) Lock() {
+ m.Mutex.Lock()
+}
+
+// Unlock unlocks the underlying Mutex.
+// +checklocksignore
+func (m *CrossGoroutineMutex) Unlock() {
+ m.Mutex.Unlock()
+}
+
const (
mutexUnlocked = 0
mutexLocked = 1
@@ -62,6 +74,7 @@ type Mutex struct {
// Lock locks m. If the lock is already in use, the calling goroutine blocks
// until the mutex is available.
+// +checklocksignore
func (m *Mutex) Lock() {
noteLock(unsafe.Pointer(m))
m.m.Lock()
@@ -80,6 +93,7 @@ func (m *Mutex) Unlock() {
// TryLock tries to acquire the mutex. It returns true if it succeeds and false
// otherwise. TryLock does not block.
+// +checklocksignore
func (m *Mutex) TryLock() bool {
// Note lock first to enforce proper locking even if unsuccessful.
noteLock(unsafe.Pointer(m))
diff --git a/pkg/sync/rwmutex_unsafe.go b/pkg/sync/rwmutex_unsafe.go
index 892d3e641..7829b06db 100644
--- a/pkg/sync/rwmutex_unsafe.go
+++ b/pkg/sync/rwmutex_unsafe.go
@@ -37,6 +37,7 @@ const rwmutexMaxReaders = 1 << 30
// TryRLock locks rw for reading. It returns true if it succeeds and false
// otherwise. It does not block.
+// +checklocksignore
func (rw *CrossGoroutineRWMutex) TryRLock() bool {
if RaceEnabled {
RaceDisable()
@@ -65,6 +66,7 @@ func (rw *CrossGoroutineRWMutex) TryRLock() bool {
// It should not be used for recursive read locking; a blocked Lock call
// excludes new readers from acquiring the lock. See the documentation on the
// RWMutex type.
+// +checklocksignore
func (rw *CrossGoroutineRWMutex) RLock() {
if RaceEnabled {
RaceDisable()
@@ -83,6 +85,7 @@ func (rw *CrossGoroutineRWMutex) RLock() {
//
// Preconditions:
// * rw is locked for reading.
+// +checklocksignore
func (rw *CrossGoroutineRWMutex) RUnlock() {
if RaceEnabled {
RaceReleaseMerge(unsafe.Pointer(&rw.writerSem))
@@ -134,6 +137,7 @@ func (rw *CrossGoroutineRWMutex) TryLock() bool {
// Lock locks rw for writing. If the lock is already locked for reading or
// writing, Lock blocks until the lock is available.
+// +checklocksignore
func (rw *CrossGoroutineRWMutex) Lock() {
if RaceEnabled {
RaceDisable()
@@ -228,6 +232,7 @@ type RWMutex struct {
// TryRLock locks rw for reading. It returns true if it succeeds and false
// otherwise. It does not block.
+// +checklocksignore
func (rw *RWMutex) TryRLock() bool {
// Note lock first to enforce proper locking even if unsuccessful.
noteLock(unsafe.Pointer(rw))
@@ -243,6 +248,7 @@ func (rw *RWMutex) TryRLock() bool {
// It should not be used for recursive read locking; a blocked Lock call
// excludes new readers from acquiring the lock. See the documentation on the
// RWMutex type.
+// +checklocksignore
func (rw *RWMutex) RLock() {
noteLock(unsafe.Pointer(rw))
rw.m.RLock()
@@ -261,6 +267,7 @@ func (rw *RWMutex) RUnlock() {
// TryLock locks rw for writing. It returns true if it succeeds and false
// otherwise. It does not block.
+// +checklocksignore
func (rw *RWMutex) TryLock() bool {
// Note lock first to enforce proper locking even if unsuccessful.
noteLock(unsafe.Pointer(rw))
@@ -273,6 +280,7 @@ func (rw *RWMutex) TryLock() bool {
// Lock locks rw for writing. If the lock is already locked for reading or
// writing, Lock blocks until the lock is available.
+// +checklocksignore
func (rw *RWMutex) Lock() {
noteLock(unsafe.Pointer(rw))
rw.m.Lock()
diff --git a/pkg/tcpip/stack/addressable_endpoint_state.go b/pkg/tcpip/stack/addressable_endpoint_state.go
index ce9cebdaa..ae0bb4ace 100644
--- a/pkg/tcpip/stack/addressable_endpoint_state.go
+++ b/pkg/tcpip/stack/addressable_endpoint_state.go
@@ -249,7 +249,7 @@ func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.Address
// or we are adding a new temporary or permanent address.
//
// The address MUST be write locked at this point.
- defer addrState.mu.Unlock()
+ defer addrState.mu.Unlock() // +checklocksforce
if permanent {
if addrState.mu.kind.IsPermanent() {
diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go
index 782e74b24..068dab7ce 100644
--- a/pkg/tcpip/stack/conntrack.go
+++ b/pkg/tcpip/stack/conntrack.go
@@ -363,7 +363,7 @@ func (ct *ConnTrack) insertConn(conn *conn) {
// Unlocking can happen in any order.
ct.buckets[tupleBucket].mu.Unlock()
if tupleBucket != replyBucket {
- ct.buckets[replyBucket].mu.Unlock()
+ ct.buckets[replyBucket].mu.Unlock() // +checklocksforce
}
}
@@ -626,7 +626,7 @@ func (ct *ConnTrack) reapTupleLocked(tuple *tuple, bucket int, now time.Time) bo
// Don't re-unlock if both tuples are in the same bucket.
if differentBuckets {
- ct.buckets[replyBucket].mu.Unlock()
+ ct.buckets[replyBucket].mu.Unlock() // +checklocksforce
}
return true
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index cb316d27a..f9a15efb2 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -213,6 +213,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult
// reacquire the mutex in exclusive mode.
//
// Returns true for retry if preparation should be retried.
+// +checklocks:e.mu
func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip.Error) {
switch e.state {
case stateInitial:
@@ -229,10 +230,8 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip
}
e.mu.RUnlock()
- defer e.mu.RLock()
-
e.mu.Lock()
- defer e.mu.Unlock()
+ defer e.mu.DowngradeLock()
// The state changed when we released the shared locked and re-acquired
// it in exclusive mode. Try again.
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index d807b13b7..aa413ad05 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -330,7 +330,9 @@ func (l *listenContext) performHandshake(s *segment, opts *header.TCPSynOptions,
}
ep := h.ep
- if err := h.complete(); err != nil {
+ // N.B. the endpoint is generated above by startHandshake, and will be
+ // returned locked. This first call is forced.
+ if err := h.complete(); err != nil { // +checklocksforce
ep.stack.Stats().TCP.FailedConnectionAttempts.Increment()
ep.stats.FailedConnectionAttempts.Increment()
l.cleanupFailedHandshake(h)
@@ -364,6 +366,7 @@ func (l *listenContext) closeAllPendingEndpoints() {
}
// Precondition: h.ep.mu must be held.
+// +checklocks:h.ep.mu
func (l *listenContext) cleanupFailedHandshake(h *handshake) {
e := h.ep
e.mu.Unlock()
@@ -504,7 +507,9 @@ func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header
}
go func() {
- if err := h.complete(); err != nil {
+ // Note that startHandshake returns a locked endpoint. The
+ // force call here just makes it so.
+ if err := h.complete(); err != nil { // +checklocksforce
e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
e.stats.FailedConnectionAttempts.Increment()
ctx.cleanupFailedHandshake(h)
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index e39d1623d..93ed161f9 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -511,6 +511,7 @@ func (h *handshake) start() {
}
// complete completes the TCP 3-way handshake initiated by h.start().
+// +checklocks:h.ep.mu
func (h *handshake) complete() tcpip.Error {
// Set up the wakers.
var s sleep.Sleeper
@@ -1283,42 +1284,45 @@ func (e *endpoint) disableKeepaliveTimer() {
e.keepalive.Unlock()
}
-// protocolMainLoop is the main loop of the TCP protocol. It runs in its own
-// goroutine and is responsible for sending segments and handling received
-// segments.
-func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{}) tcpip.Error {
- e.mu.Lock()
- var closeTimer tcpip.Timer
- var closeWaker sleep.Waker
-
- epilogue := func() {
- // e.mu is expected to be hold upon entering this section.
- if e.snd != nil {
- e.snd.resendTimer.cleanup()
- e.snd.probeTimer.cleanup()
- e.snd.reorderTimer.cleanup()
- }
+// protocolMainLoopDone is called at the end of protocolMainLoop.
+// +checklocksrelease:e.mu
+func (e *endpoint) protocolMainLoopDone(closeTimer tcpip.Timer, closeWaker *sleep.Waker) {
+ if e.snd != nil {
+ e.snd.resendTimer.cleanup()
+ e.snd.probeTimer.cleanup()
+ e.snd.reorderTimer.cleanup()
+ }
- if closeTimer != nil {
- closeTimer.Stop()
- }
+ if closeTimer != nil {
+ closeTimer.Stop()
+ }
- e.completeWorkerLocked()
+ e.completeWorkerLocked()
- if e.drainDone != nil {
- close(e.drainDone)
- }
+ if e.drainDone != nil {
+ close(e.drainDone)
+ }
- e.mu.Unlock()
+ e.mu.Unlock()
- e.drainClosingSegmentQueue()
+ e.drainClosingSegmentQueue()
- // When the protocol loop exits we should wake up our waiters.
- e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
- }
+ // When the protocol loop exits we should wake up our waiters.
+ e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
+}
+// protocolMainLoop is the main loop of the TCP protocol. It runs in its own
+// goroutine and is responsible for sending segments and handling received
+// segments.
+func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{}) tcpip.Error {
+ var (
+ closeTimer tcpip.Timer
+ closeWaker sleep.Waker
+ )
+
+ e.mu.Lock()
if handshake {
- if err := e.h.complete(); err != nil {
+ if err := e.h.complete(); err != nil { // +checklocksforce
e.lastErrorMu.Lock()
e.lastError = err
e.lastErrorMu.Unlock()
@@ -1327,8 +1331,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
e.hardError = err
e.workerCleanup = true
- // Lock released below.
- epilogue()
+ e.protocolMainLoopDone(closeTimer, &closeWaker)
return err
}
}
@@ -1472,7 +1475,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
// Only block the worker if the endpoint
// is not in closed state or error state.
close(e.drainDone)
- e.mu.Unlock()
+ e.mu.Unlock() // +checklocksforce
<-e.undrain
e.mu.Lock()
}
@@ -1533,8 +1536,6 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
if err != nil {
e.resetConnectionLocked(err)
}
- // Lock released below.
- epilogue()
}
loop:
@@ -1558,6 +1559,7 @@ loop:
// just want to terminate the loop and cleanup the
// endpoint.
cleanupOnError(nil)
+ e.protocolMainLoopDone(closeTimer, &closeWaker)
return nil
case StateTimeWait:
fallthrough
@@ -1566,6 +1568,7 @@ loop:
default:
if err := funcs[v].f(); err != nil {
cleanupOnError(err)
+ e.protocolMainLoopDone(closeTimer, &closeWaker)
return nil
}
}
@@ -1589,13 +1592,13 @@ loop:
// Handle any StateError transition from StateTimeWait.
if e.EndpointState() == StateError {
cleanupOnError(nil)
+ e.protocolMainLoopDone(closeTimer, &closeWaker)
return nil
}
e.transitionToStateCloseLocked()
- // Lock released below.
- epilogue()
+ e.protocolMainLoopDone(closeTimer, &closeWaker)
// A new SYN was received during TIME_WAIT and we need to abort
// the timewait and redirect the segment to the listener queue
@@ -1665,6 +1668,7 @@ func (e *endpoint) handleTimeWaitSegments() (extendTimeWait bool, reuseTW func()
// should be executed after releasing the endpoint registrations. This is
// done in cases where a new SYN is received during TIME_WAIT that carries
// a sequence number larger than one see on the connection.
+// +checklocks:e.mu
func (e *endpoint) doTimeWait() (twReuse func()) {
// Trigger a 2 * MSL time wait state. During this period
// we will drop all incoming segments.
diff --git a/pkg/tcpip/transport/tcp/dispatcher.go b/pkg/tcpip/transport/tcp/dispatcher.go
index dff7cb89c..7d110516b 100644
--- a/pkg/tcpip/transport/tcp/dispatcher.go
+++ b/pkg/tcpip/transport/tcp/dispatcher.go
@@ -127,7 +127,7 @@ func (p *processor) start(wg *sync.WaitGroup) {
case !ep.segmentQueue.empty():
p.epQ.enqueue(ep)
}
- ep.mu.Unlock()
+ ep.mu.Unlock() // +checklocksforce
} else {
ep.newSegmentWaker.Assert()
}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 4acddc959..1ed4ba419 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -664,6 +664,7 @@ func calculateAdvertisedMSS(userMSS uint16, r *stack.Route) uint16 {
// The assumption behind spinning here being that background packet processing
// should not be holding the lock for long and spinning reduces latency as we
// avoid an expensive sleep/wakeup of of the syscall goroutine).
+// +checklocksacquire:e.mu
func (e *endpoint) LockUser() {
for {
// Try first if the sock is locked then check if it's owned
@@ -683,7 +684,7 @@ func (e *endpoint) LockUser() {
continue
}
atomic.StoreUint32(&e.ownedByUser, 1)
- return
+ return // +checklocksforce
}
}
@@ -700,7 +701,7 @@ func (e *endpoint) LockUser() {
// protocol goroutine altogether.
//
// Precondition: e.LockUser() must have been called before calling e.UnlockUser()
-// +checklocks:e.mu
+// +checklocksrelease:e.mu
func (e *endpoint) UnlockUser() {
// Lock segment queue before checking so that we avoid a race where
// segments can be queued between the time we check if queue is empty
@@ -736,12 +737,13 @@ func (e *endpoint) UnlockUser() {
}
// StopWork halts packet processing. Only to be used in tests.
+// +checklocksacquire:e.mu
func (e *endpoint) StopWork() {
e.mu.Lock()
}
// ResumeWork resumes packet processing. Only to be used in tests.
-// +checklocks:e.mu
+// +checklocksrelease:e.mu
func (e *endpoint) ResumeWork() {
e.mu.Unlock()
}
@@ -1480,86 +1482,95 @@ func (e *endpoint) isEndpointWritableLocked() (int, tcpip.Error) {
return avail, nil
}
-// Write writes data to the endpoint's peer.
-func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
- // Linux completely ignores any address passed to sendto(2) for TCP sockets
- // (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More
- // and opts.EndOfRecord are also ignored.
+// readFromPayloader reads a slice from the Payloader.
+// +checklocks:e.mu
+// +checklocks:e.sndQueueInfo.sndQueueMu
+func (e *endpoint) readFromPayloader(p tcpip.Payloader, opts tcpip.WriteOptions, avail int) ([]byte, tcpip.Error) {
+ // We can release locks while copying data.
+ //
+ // This is not possible if atomic is set, because we can't allow the
+ // available buffer space to be consumed by some other caller while we
+ // are copying data in.
+ if !opts.Atomic {
+ e.sndQueueInfo.sndQueueMu.Unlock()
+ defer e.sndQueueInfo.sndQueueMu.Lock()
- e.LockUser()
- defer e.UnlockUser()
+ e.UnlockUser()
+ defer e.LockUser()
+ }
- nextSeg, n, err := func() (*segment, int, tcpip.Error) {
- e.sndQueueInfo.sndQueueMu.Lock()
- defer e.sndQueueInfo.sndQueueMu.Unlock()
+ // Fetch data.
+ if l := p.Len(); l < avail {
+ avail = l
+ }
+ if avail == 0 {
+ return nil, nil
+ }
+ v := make([]byte, avail)
+ n, err := p.Read(v)
+ if err != nil && err != io.EOF {
+ return nil, &tcpip.ErrBadBuffer{}
+ }
+ return v[:n], nil
+}
+
+// queueSegment reads data from the payloader and returns a segment to be sent.
+// +checklocks:e.mu
+func (e *endpoint) queueSegment(p tcpip.Payloader, opts tcpip.WriteOptions) (*segment, int, tcpip.Error) {
+ e.sndQueueInfo.sndQueueMu.Lock()
+ defer e.sndQueueInfo.sndQueueMu.Unlock()
+
+ avail, err := e.isEndpointWritableLocked()
+ if err != nil {
+ e.stats.WriteErrors.WriteClosed.Increment()
+ return nil, 0, err
+ }
+ v, err := e.readFromPayloader(p, opts, avail)
+ if err != nil {
+ return nil, 0, err
+ }
+ if !opts.Atomic {
+ // Since we released locks in between it's possible that the
+ // endpoint transitioned to a CLOSED/ERROR states so make
+ // sure endpoint is still writable before trying to write.
avail, err := e.isEndpointWritableLocked()
if err != nil {
e.stats.WriteErrors.WriteClosed.Increment()
return nil, 0, err
}
- v, err := func() ([]byte, tcpip.Error) {
- // We can release locks while copying data.
- //
- // This is not possible if atomic is set, because we can't allow the
- // available buffer space to be consumed by some other caller while we
- // are copying data in.
- if !opts.Atomic {
- e.sndQueueInfo.sndQueueMu.Unlock()
- defer e.sndQueueInfo.sndQueueMu.Lock()
-
- e.UnlockUser()
- defer e.LockUser()
- }
-
- // Fetch data.
- if l := p.Len(); l < avail {
- avail = l
- }
- if avail == 0 {
- return nil, nil
- }
- v := make([]byte, avail)
- n, err := p.Read(v)
- if err != nil && err != io.EOF {
- return nil, &tcpip.ErrBadBuffer{}
- }
- return v[:n], nil
- }()
- if len(v) == 0 || err != nil {
- return nil, 0, err
+ // Discard any excess data copied in due to avail being reduced due
+ // to a simultaneous write call to the socket.
+ if avail < len(v) {
+ v = v[:avail]
}
+ }
- if !opts.Atomic {
- // Since we released locks in between it's possible that the
- // endpoint transitioned to a CLOSED/ERROR states so make
- // sure endpoint is still writable before trying to write.
- avail, err := e.isEndpointWritableLocked()
- if err != nil {
- e.stats.WriteErrors.WriteClosed.Increment()
- return nil, 0, err
- }
+ // Add data to the send queue.
+ s := newOutgoingSegment(e.TransportEndpointInfo.ID, e.stack.Clock(), v)
+ e.sndQueueInfo.SndBufUsed += len(v)
+ e.snd.writeList.PushBack(s)
- // Discard any excess data copied in due to avail being reduced due
- // to a simultaneous write call to the socket.
- if avail < len(v) {
- v = v[:avail]
- }
- }
+ return s, len(v), nil
+}
- // Add data to the send queue.
- s := newOutgoingSegment(e.TransportEndpointInfo.ID, e.stack.Clock(), v)
- e.sndQueueInfo.SndBufUsed += len(v)
- e.snd.writeList.PushBack(s)
+// Write writes data to the endpoint's peer.
+func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
+ // Linux completely ignores any address passed to sendto(2) for TCP sockets
+ // (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More
+ // and opts.EndOfRecord are also ignored.
+
+ e.LockUser()
+ defer e.UnlockUser()
- return s, len(v), nil
- }()
// Return if either we didn't queue anything or if an error occurred while
// attempting to queue data.
+ nextSeg, n, err := e.queueSegment(p, opts)
if n == 0 || err != nil {
return 0, err
}
+
e.sendData(nextSeg)
return int64(n), nil
}
@@ -2504,6 +2515,7 @@ func (e *endpoint) listen(backlog int) tcpip.Error {
// startAcceptedLoop sets up required state and starts a goroutine with the
// main loop for accepted connections.
+// +checklocksrelease:e.mu
func (e *endpoint) startAcceptedLoop() {
e.workerRunning = true
e.mu.Unlock()
diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go
index 65c86823a..2e709ed78 100644
--- a/pkg/tcpip/transport/tcp/forwarder.go
+++ b/pkg/tcpip/transport/tcp/forwarder.go
@@ -164,8 +164,9 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint,
return nil, err
}
- // Start the protocol goroutine.
- ep.startAcceptedLoop()
+ // Start the protocol goroutine. Note that the endpoint is returned
+ // from performHandshake locked.
+ ep.startAcceptedLoop() // +checklocksforce
return ep, nil
}
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index def9d7186..82a3f2287 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -364,6 +364,7 @@ func (e *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult
// reacquire the mutex in exclusive mode.
//
// Returns true for retry if preparation should be retried.
+// +checklocks:e.mu
func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip.Error) {
switch e.EndpointState() {
case StateInitial:
@@ -380,10 +381,8 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err tcpip
}
e.mu.RUnlock()
- defer e.mu.RLock()
-
e.mu.Lock()
- defer e.mu.Unlock()
+ defer e.mu.DowngradeLock()
// The state changed when we released the shared locked and re-acquired
// it in exclusive mode. Try again.
@@ -449,37 +448,20 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
return n, err
}
-func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
- if err := e.LastError(); err != nil {
- return 0, err
- }
-
- // MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.)
- if opts.More {
- return 0, &tcpip.ErrInvalidOptionValue{}
- }
-
- to := opts.To
-
+func (e *endpoint) buildUDPPacketInfo(p tcpip.Payloader, opts tcpip.WriteOptions) (udpPacketInfo, tcpip.Error) {
e.mu.RLock()
- lockReleased := false
- defer func() {
- if lockReleased {
- return
- }
- e.mu.RUnlock()
- }()
+ defer e.mu.RUnlock()
// If we've shutdown with SHUT_WR we are in an invalid state for sending.
if e.shutdownFlags&tcpip.ShutdownWrite != 0 {
- return 0, &tcpip.ErrClosedForSend{}
+ return udpPacketInfo{}, &tcpip.ErrClosedForSend{}
}
// Prepare for write.
for {
- retry, err := e.prepareForWrite(to)
+ retry, err := e.prepareForWrite(opts.To)
if err != nil {
- return 0, err
+ return udpPacketInfo{}, err
}
if !retry {
@@ -489,34 +471,34 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
route := e.route
dstPort := e.dstPort
- if to != nil {
+ if opts.To != nil {
// Reject destination address if it goes through a different
// NIC than the endpoint was bound to.
- nicID := to.NIC
+ nicID := opts.To.NIC
if nicID == 0 {
nicID = tcpip.NICID(e.ops.GetBindToDevice())
}
if e.BindNICID != 0 {
if nicID != 0 && nicID != e.BindNICID {
- return 0, &tcpip.ErrNoRoute{}
+ return udpPacketInfo{}, &tcpip.ErrNoRoute{}
}
nicID = e.BindNICID
}
- if to.Port == 0 {
+ if opts.To.Port == 0 {
// Port 0 is an invalid port to send to.
- return 0, &tcpip.ErrInvalidEndpointState{}
+ return udpPacketInfo{}, &tcpip.ErrInvalidEndpointState{}
}
- dst, netProto, err := e.checkV4MappedLocked(*to)
+ dst, netProto, err := e.checkV4MappedLocked(*opts.To)
if err != nil {
- return 0, err
+ return udpPacketInfo{}, err
}
r, _, err := e.connectRoute(nicID, dst, netProto)
if err != nil {
- return 0, err
+ return udpPacketInfo{}, err
}
defer r.Release()
@@ -525,12 +507,12 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
}
if !e.ops.GetBroadcast() && route.IsOutboundBroadcast() {
- return 0, &tcpip.ErrBroadcastDisabled{}
+ return udpPacketInfo{}, &tcpip.ErrBroadcastDisabled{}
}
v := make([]byte, p.Len())
if _, err := io.ReadFull(p, v); err != nil {
- return 0, &tcpip.ErrBadBuffer{}
+ return udpPacketInfo{}, &tcpip.ErrBadBuffer{}
}
if len(v) > header.UDPMaximumPacketSize {
// Payload can't possibly fit in a packet.
@@ -548,24 +530,39 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
v,
)
}
- return 0, &tcpip.ErrMessageTooLong{}
+ return udpPacketInfo{}, &tcpip.ErrMessageTooLong{}
}
ttl := e.ttl
useDefaultTTL := ttl == 0
-
if header.IsV4MulticastAddress(route.RemoteAddress()) || header.IsV6MulticastAddress(route.RemoteAddress()) {
ttl = e.multicastTTL
// Multicast allows a 0 TTL.
useDefaultTTL = false
}
- localPort := e.ID.LocalPort
- sendTOS := e.sendTOS
- owner := e.owner
- noChecksum := e.SocketOptions().GetNoChecksum()
- lockReleased = true
- e.mu.RUnlock()
+ return udpPacketInfo{
+ route: route,
+ data: buffer.View(v),
+ localPort: e.ID.LocalPort,
+ remotePort: dstPort,
+ ttl: ttl,
+ useDefaultTTL: useDefaultTTL,
+ tos: e.sendTOS,
+ owner: e.owner,
+ noChecksum: e.SocketOptions().GetNoChecksum(),
+ }, nil
+}
+
+func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
+ if err := e.LastError(); err != nil {
+ return 0, err
+ }
+
+ // MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.)
+ if opts.More {
+ return 0, &tcpip.ErrInvalidOptionValue{}
+ }
// Do not hold lock when sending as loopback is synchronous and if the UDP
// datagram ends up generating an ICMP response then it can result in a
@@ -577,10 +574,15 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
//
// See: https://golang.org/pkg/sync/#RWMutex for details on why recursive read
// locking is prohibited.
- if err := sendUDP(route, buffer.View(v).ToVectorisedView(), localPort, dstPort, ttl, useDefaultTTL, sendTOS, owner, noChecksum); err != nil {
+ u, err := e.buildUDPPacketInfo(p, opts)
+ if err != nil {
return 0, err
}
- return int64(len(v)), nil
+ n, err := u.send()
+ if err != nil {
+ return 0, err
+ }
+ return int64(n), nil
}
// OnReuseAddressSet implements tcpip.SocketOptionsHandler.
@@ -817,14 +819,30 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error {
return nil
}
-// sendUDP sends a UDP segment via the provided network endpoint and under the
-// provided identity.
-func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner, noChecksum bool) tcpip.Error {
+// udpPacketInfo contains all information required to send a UDP packet.
+//
+// This should be used as a value-only type, which exists in order to simplify
+// return value syntax. It should not be exported or extended.
+type udpPacketInfo struct {
+ route *stack.Route
+ data buffer.View
+ localPort uint16
+ remotePort uint16
+ ttl uint8
+ useDefaultTTL bool
+ tos uint8
+ owner tcpip.PacketOwner
+ noChecksum bool
+}
+
+// send sends the given packet.
+func (u *udpPacketInfo) send() (int, tcpip.Error) {
+ vv := u.data.ToVectorisedView()
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- ReserveHeaderBytes: header.UDPMinimumSize + int(r.MaxHeaderLength()),
- Data: data,
+ ReserveHeaderBytes: header.UDPMinimumSize + int(u.route.MaxHeaderLength()),
+ Data: vv,
})
- pkt.Owner = owner
+ pkt.Owner = u.owner
// Initialize the UDP header.
udp := header.UDP(pkt.TransportHeader().Push(header.UDPMinimumSize))
@@ -832,8 +850,8 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u
length := uint16(pkt.Size())
udp.Encode(&header.UDPFields{
- SrcPort: localPort,
- DstPort: remotePort,
+ SrcPort: u.localPort,
+ DstPort: u.remotePort,
Length: length,
})
@@ -841,30 +859,30 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u
// On IPv4, UDP checksum is optional, and a zero value indicates the
// transmitter skipped the checksum generation (RFC768).
// On IPv6, UDP checksum is not optional (RFC2460 Section 8.1).
- if r.RequiresTXTransportChecksum() &&
- (!noChecksum || r.NetProto() == header.IPv6ProtocolNumber) {
- xsum := r.PseudoHeaderChecksum(ProtocolNumber, length)
- for _, v := range data.Views() {
+ if u.route.RequiresTXTransportChecksum() &&
+ (!u.noChecksum || u.route.NetProto() == header.IPv6ProtocolNumber) {
+ xsum := u.route.PseudoHeaderChecksum(ProtocolNumber, length)
+ for _, v := range vv.Views() {
xsum = header.Checksum(v, xsum)
}
udp.SetChecksum(^udp.CalculateChecksum(xsum))
}
- if useDefaultTTL {
- ttl = r.DefaultTTL()
+ if u.useDefaultTTL {
+ u.ttl = u.route.DefaultTTL()
}
- if err := r.WritePacket(stack.NetworkHeaderParams{
+ if err := u.route.WritePacket(stack.NetworkHeaderParams{
Protocol: ProtocolNumber,
- TTL: ttl,
- TOS: tos,
+ TTL: u.ttl,
+ TOS: u.tos,
}, pkt); err != nil {
- r.Stats().UDP.PacketSendErrors.Increment()
- return err
+ u.route.Stats().UDP.PacketSendErrors.Increment()
+ return 0, err
}
// Track count of packets sent.
- r.Stats().UDP.PacketsSent.Increment()
- return nil
+ u.route.Stats().UDP.PacketsSent.Increment()
+ return len(u.data), nil
}
// checkV4MappedLocked determines the effective network protocol and converts