59 files changed, 1230 insertions, 855 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 10563e3d7..e4e0dc04f 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -227,6 +227,7 @@ go_library(
         "//pkg/context",
         "//pkg/coverage",
         "//pkg/cpuid",
+        "//pkg/errors",
         "//pkg/errors/linuxerr",
         "//pkg/eventchannel",
         "//pkg/fspath",
@@ -256,6 +257,7 @@ go_library(
         "//pkg/sentry/kernel/auth",
         "//pkg/sentry/kernel/epoll",
         "//pkg/sentry/kernel/futex",
+        "//pkg/sentry/kernel/msgqueue",
         "//pkg/sentry/kernel/sched",
         "//pkg/sentry/kernel/semaphore",
         "//pkg/sentry/kernel/shm",
@@ -301,6 +303,7 @@ go_test(
     deps = [
         "//pkg/abi",
         "//pkg/context",
+        "//pkg/errors/linuxerr",
         "//pkg/hostarch",
         "//pkg/sentry/arch",
         "//pkg/sentry/contexttest",
@@ -312,6 +315,5 @@ go_test(
         "//pkg/sentry/time",
         "//pkg/sentry/usage",
         "//pkg/sync",
-        "//pkg/syserror",
     ],
 )
diff --git a/pkg/sentry/kernel/abstract_socket_namespace.go b/pkg/sentry/kernel/abstract_socket_namespace.go
index d100e58d7..5d86a04f3 100644
--- a/pkg/sentry/kernel/abstract_socket_namespace.go
+++ b/pkg/sentry/kernel/abstract_socket_namespace.go
@@ -27,7 +27,7 @@ import (
 // +stateify savable
 type abstractEndpoint struct {
 	ep     transport.BoundEndpoint
-	socket refsvfs2.RefCounter
+	socket refsvfs2.TryRefCounter
 	name   string
 	ns     *AbstractSocketNamespace
 }
@@ -57,7 +57,7 @@ func NewAbstractSocketNamespace() *AbstractSocketNamespace {
 // its backing socket.
 type boundEndpoint struct {
 	transport.BoundEndpoint
-	socket refsvfs2.RefCounter
+	socket refsvfs2.TryRefCounter
 }
 
 // Release implements transport.BoundEndpoint.Release.
@@ -89,7 +89,7 @@ func (a *AbstractSocketNamespace) BoundEndpoint(name string) transport.BoundEndp
 //
 // When the last reference managed by socket is dropped, ep may be removed from the
 // namespace.
-func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep transport.BoundEndpoint, socket refsvfs2.RefCounter) error {
+func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep transport.BoundEndpoint, socket refsvfs2.TryRefCounter) error {
 	a.mu.Lock()
 	defer a.mu.Unlock()
 
@@ -109,7 +109,7 @@ func (a *AbstractSocketNamespace) Bind(ctx context.Context, name string, ep tran
 
 // Remove removes the specified socket at name from the abstract socket
 // namespace, if it has not yet been replaced.
-func (a *AbstractSocketNamespace) Remove(name string, socket refsvfs2.RefCounter) {
+func (a *AbstractSocketNamespace) Remove(name string, socket refsvfs2.TryRefCounter) {
 	a.mu.Lock()
 	defer a.mu.Unlock()
 
diff --git a/pkg/sentry/kernel/auth/BUILD b/pkg/sentry/kernel/auth/BUILD
index 12180351d..7a1a36454 100644
--- a/pkg/sentry/kernel/auth/BUILD
+++ b/pkg/sentry/kernel/auth/BUILD
@@ -63,6 +63,7 @@ go_library(
         "//pkg/abi/linux",
         "//pkg/bits",
         "//pkg/context",
+        "//pkg/errors/linuxerr",
         "//pkg/log",
         "//pkg/sync",
         "//pkg/syserror",
diff --git a/pkg/sentry/kernel/auth/credentials.go b/pkg/sentry/kernel/auth/credentials.go
index 3325fedcb..fc245c54b 100644
--- a/pkg/sentry/kernel/auth/credentials.go
+++ b/pkg/sentry/kernel/auth/credentials.go
@@ -16,7 +16,7 @@ package auth
 
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 )
 
 // Credentials contains information required to authorize privileged operations
@@ -203,7 +203,7 @@ func (c *Credentials) UseUID(uid UID) (KUID, error) {
 	// uid must be mapped.
 	kuid := c.UserNamespace.MapToKUID(uid)
 	if !kuid.Ok() {
-		return NoID, syserror.EINVAL
+		return NoID, linuxerr.EINVAL
 	}
 	// If c has CAP_SETUID, then it can use any UID in its user namespace.
 	if c.HasCapability(linux.CAP_SETUID) {
@@ -214,7 +214,7 @@ func (c *Credentials) UseUID(uid UID) (KUID, error) {
 	if kuid == c.RealKUID || kuid == c.EffectiveKUID || kuid == c.SavedKUID {
 		return kuid, nil
 	}
-	return NoID, syserror.EPERM
+	return NoID, linuxerr.EPERM
 }
 
 // UseGID checks that c can use gid in its user namespace, then translates it
@@ -222,7 +222,7 @@ func (c *Credentials) UseUID(uid UID) (KUID, error) {
 func (c *Credentials) UseGID(gid GID) (KGID, error) {
 	kgid := c.UserNamespace.MapToKGID(gid)
 	if !kgid.Ok() {
-		return NoID, syserror.EINVAL
+		return NoID, linuxerr.EINVAL
 	}
 	if c.HasCapability(linux.CAP_SETGID) {
 		return kgid, nil
@@ -230,7 +230,7 @@ func (c *Credentials) UseGID(gid GID) (KGID, error) {
 	if kgid == c.RealKGID || kgid == c.EffectiveKGID || kgid == c.SavedKGID {
 		return kgid, nil
 	}
-	return NoID, syserror.EPERM
+	return NoID, linuxerr.EPERM
 }
 
 // SetUID translates the provided uid to the root user namespace and updates c's
@@ -239,7 +239,7 @@ func (c *Credentials) UseGID(gid GID) (KGID, error) {
 func (c *Credentials) SetUID(uid UID) error {
 	kuid := c.UserNamespace.MapToKUID(uid)
 	if !kuid.Ok() {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	c.RealKUID = kuid
 	c.EffectiveKUID = kuid
@@ -253,7 +253,7 @@ func (c *Credentials) SetUID(uid UID) error {
 func (c *Credentials) SetGID(gid GID) error {
 	kgid := c.UserNamespace.MapToKGID(gid)
 	if !kgid.Ok() {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	c.RealKGID = kgid
 	c.EffectiveKGID = kgid
diff --git a/pkg/sentry/kernel/auth/id_map.go b/pkg/sentry/kernel/auth/id_map.go
index 28cbe159d..f06a374a0 100644
--- a/pkg/sentry/kernel/auth/id_map.go
+++ b/pkg/sentry/kernel/auth/id_map.go
@@ -17,7 +17,7 @@ package auth
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 )
 
 // MapFromKUID translates kuid, a UID in the root namespace, to a UID in ns.
@@ -106,11 +106,11 @@ func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) er
 	// than once to a uid_map file in a user namespace fails with the error
 	// EPERM. Similar rules apply for gid_map files." - user_namespaces(7)
 	if !ns.uidMapFromParent.IsEmpty() {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	// "At least one line must be written to the file."
 	if len(entries) == 0 {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	// """
 	// In order for a process to write to the /proc/[pid]/uid_map
@@ -121,12 +121,12 @@ func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) er
 	// in the user namespace of the process pid.
 	// """
 	if !c.HasCapabilityIn(linux.CAP_SETUID, ns) {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	// "2. The writing process must either be in the user namespace of the process
 	// pid or be in the parent user namespace of the process pid."
 	if c.UserNamespace != ns && c.UserNamespace != ns.parent {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	// """
 	// 3. (see trySetUIDMap)
@@ -145,14 +145,14 @@ func (ns *UserNamespace) SetUIDMap(ctx context.Context, entries []IDMapEntry) er
 		//   parent user namespace to a user ID (group ID) in the user namespace.
 		// """
 		if len(entries) != 1 || ns.parent.MapToKUID(UID(entries[0].FirstParentID)) != c.EffectiveKUID || entries[0].Length != 1 {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		// """
 		//   + The writing process must have the same effective user ID as the
 		//   process that created the user namespace.
 		// """
 		if c.EffectiveKUID != ns.owner {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 	}
 	// trySetUIDMap leaves data in maps if it fails.
@@ -170,11 +170,11 @@ func (ns *UserNamespace) trySetUIDMap(entries []IDMapEntry) error {
 		// checks for NoID.
 		lastID := e.FirstID + e.Length
 		if lastID <= e.FirstID {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		lastParentID := e.FirstParentID + e.Length
 		if lastParentID <= e.FirstParentID {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		// "3. The mapped user IDs (group IDs) must in turn have a mapping in
 		// the parent user namespace."
@@ -182,14 +182,14 @@ func (ns *UserNamespace) trySetUIDMap(entries []IDMapEntry) error {
 		// mappings when it's created, so SetUIDMap would have returned EPERM
 		// without reaching this point if ns is root.
 		if !ns.parent.allIDsMapped(&ns.parent.uidMapToParent, e.FirstParentID, lastParentID) {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		// If either of these Adds fail, we have an overlapping range.
 		if !ns.uidMapFromParent.Add(idMapRange{e.FirstParentID, lastParentID}, e.FirstID) {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		if !ns.uidMapToParent.Add(idMapRange{e.FirstID, lastID}, e.FirstParentID) {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 	}
 	return nil
@@ -202,24 +202,24 @@ func (ns *UserNamespace) SetGIDMap(ctx context.Context, entries []IDMapEntry) er
 	ns.mu.Lock()
 	defer ns.mu.Unlock()
 	if !ns.gidMapFromParent.IsEmpty() {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	if len(entries) == 0 {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	if !c.HasCapabilityIn(linux.CAP_SETGID, ns) {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	if c.UserNamespace != ns && c.UserNamespace != ns.parent {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	if !c.HasCapabilityIn(linux.CAP_SETGID, ns.parent) {
 		if len(entries) != 1 || ns.parent.MapToKGID(GID(entries[0].FirstParentID)) != c.EffectiveKGID || entries[0].Length != 1 {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		// It's correct for this to still be UID.
 		if c.EffectiveKUID != ns.owner {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		// "In the case of gid_map, use of the setgroups(2) system call must
 		// first be denied by writing "deny" to the /proc/[pid]/setgroups file
@@ -239,20 +239,20 @@ func (ns *UserNamespace) trySetGIDMap(entries []IDMapEntry) error {
 	for _, e := range entries {
 		lastID := e.FirstID + e.Length
 		if lastID <= e.FirstID {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		lastParentID := e.FirstParentID + e.Length
 		if lastParentID <= e.FirstParentID {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		if !ns.parent.allIDsMapped(&ns.parent.gidMapToParent, e.FirstParentID, lastParentID) {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		if !ns.gidMapFromParent.Add(idMapRange{e.FirstParentID, lastParentID}, e.FirstID) {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		if !ns.gidMapToParent.Add(idMapRange{e.FirstID, lastID}, e.FirstParentID) {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 	}
 	return nil
diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go
index 9dd52c860..40a406f9d 100644
--- a/pkg/sentry/kernel/auth/user_namespace.go
+++ b/pkg/sentry/kernel/auth/user_namespace.go
@@ -17,8 +17,8 @@ package auth
 import (
 	"math"
 
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sync"
-	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // A UserNamespace represents a user namespace. See user_namespaces(7) for
@@ -105,7 +105,7 @@ func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) {
 	if c.UserNamespace.depth() >= maxUserNamespaceDepth {
 		// "... Calls to unshare(2) or clone(2) that would cause this limit to
 		// be exceeded fail with the error EUSERS." - user_namespaces(7)
-		return nil, syserror.EUSERS
+		return nil, linuxerr.EUSERS
 	}
 	// "EPERM: CLONE_NEWUSER was specified in flags, but either the effective
 	// user ID or the effective group ID of the caller does not have a mapping
@@ -114,10 +114,10 @@ func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) {
 	// process are mapped to user IDs and group IDs in the user namespace of
 	// the calling process at the time of the call." - unshare(2)
 	if !c.EffectiveKUID.In(c.UserNamespace).Ok() {
-		return nil, syserror.EPERM
+		return nil, linuxerr.EPERM
 	}
 	if !c.EffectiveKGID.In(c.UserNamespace).Ok() {
-		return nil, syserror.EPERM
+		return nil, linuxerr.EPERM
 	}
 	return &UserNamespace{
 		parent: c.UserNamespace,
diff --git a/pkg/sentry/kernel/fasync/BUILD b/pkg/sentry/kernel/fasync/BUILD
index 6224a0cbd..6b2dd09da 100644
--- a/pkg/sentry/kernel/fasync/BUILD
+++ b/pkg/sentry/kernel/fasync/BUILD
@@ -8,12 +8,12 @@ go_library(
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/abi/linux",
+        "//pkg/errors/linuxerr",
         "//pkg/sentry/fs",
         "//pkg/sentry/kernel",
         "//pkg/sentry/kernel/auth",
         "//pkg/sentry/vfs",
         "//pkg/sync",
-        "//pkg/syserror",
         "//pkg/waiter",
     ],
 )
diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go
index 5d584dc45..473987a79 100644
--- a/pkg/sentry/kernel/fasync/fasync.go
+++ b/pkg/sentry/kernel/fasync/fasync.go
@@ -17,12 +17,12 @@ package fasync
 
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
-	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
@@ -248,7 +248,7 @@ func (a *FileAsync) Signal() linux.Signal {
 // to send SIGIO.
 func (a *FileAsync) SetSignal(signal linux.Signal) error {
 	if signal != 0 && !signal.IsValid() {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	a.mu.Lock()
 	defer a.mu.Unlock()
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index 9f7702fcc..eff556a0c 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -108,7 +108,7 @@ func (f *FDTable) saveDescriptorTable() map[int32]descriptor {
 func (f *FDTable) loadDescriptorTable(m map[int32]descriptor) {
 	ctx := context.Background()
 	f.initNoLeakCheck() // Initialize table.
-	f.fdBitmap = bitmap.BitmapWithSize(uint32(math.MaxUint16))
+	f.fdBitmap = bitmap.New(uint32(math.MaxUint16))
 	for fd, d := range m {
 		if fd < 0 {
 			panic(fmt.Sprintf("FD is not supposed to be negative. FD: %d", fd))
diff --git a/pkg/sentry/kernel/fd_table_unsafe.go b/pkg/sentry/kernel/fd_table_unsafe.go
index c4cac6b99..2b3e6ef71 100644
--- a/pkg/sentry/kernel/fd_table_unsafe.go
+++ b/pkg/sentry/kernel/fd_table_unsafe.go
@@ -46,7 +46,7 @@ func (f *FDTable) initNoLeakCheck() {
 func (f *FDTable) init() {
 	f.initNoLeakCheck()
 	f.InitRefs()
-	f.fdBitmap = bitmap.BitmapWithSize(uint32(math.MaxUint16))
+	f.fdBitmap = bitmap.New(uint32(math.MaxUint16))
 }
 
 // get gets a file entry.
diff --git a/pkg/sentry/kernel/futex/BUILD b/pkg/sentry/kernel/futex/BUILD
index 6c31e082c..cfdea5cf7 100644
--- a/pkg/sentry/kernel/futex/BUILD
+++ b/pkg/sentry/kernel/futex/BUILD
@@ -37,6 +37,7 @@ go_library(
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/errors/linuxerr",
         "//pkg/hostarch",
         "//pkg/log",
         "//pkg/sentry/memmap",
@@ -53,8 +54,8 @@ go_test(
     library = ":futex",
     deps = [
         "//pkg/context",
+        "//pkg/errors/linuxerr",
         "//pkg/hostarch",
         "//pkg/sync",
-        "@org_golang_x_sys//unix:go_default_library",
     ],
 )
diff --git a/pkg/sentry/kernel/futex/futex.go b/pkg/sentry/kernel/futex/futex.go
index 0427cf3f4..f5c364c96 100644
--- a/pkg/sentry/kernel/futex/futex.go
+++ b/pkg/sentry/kernel/futex/futex.go
@@ -20,6 +20,7 @@ package futex
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sync"
@@ -122,7 +123,7 @@ func check(t Target, addr hostarch.Addr, val uint32) error {
 		return err
 	}
 	if cur != val {
-		return syserror.EAGAIN
+		return linuxerr.EAGAIN
 	}
 	return nil
 }
@@ -332,7 +333,7 @@ func getKey(t Target, addr hostarch.Addr, private bool) (Key, error) {
 	// Ensure the address is aligned.
 	// It must be a DWORD boundary.
 	if addr&0x3 != 0 {
-		return Key{}, syserror.EINVAL
+		return Key{}, linuxerr.EINVAL
 	}
 	if private {
 		return Key{Kind: KindPrivate, Offset: uint64(addr)}, nil
@@ -397,8 +398,8 @@ func (m *Manager) Fork() *Manager {
 }
 
 // lockBucket returns a locked bucket for the given key.
-func (m *Manager) lockBucket(k *Key) *bucket {
-	var b *bucket
+// +checklocksacquire:b.mu
+func (m *Manager) lockBucket(k *Key) (b *bucket) {
 	if k.Kind == KindSharedMappable {
 		b = m.sharedBucket
 	} else {
@@ -409,7 +410,9 @@ func (m *Manager) lockBucket(k *Key) *bucket {
 }
 
 // lockBuckets returns locked buckets for the given keys.
-func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
+// +checklocksacquire:b1.mu
+// +checklocksacquire:b2.mu
+func (m *Manager) lockBuckets(k1, k2 *Key) (b1 *bucket, b2 *bucket) {
 	// Buckets must be consistently ordered to avoid circular lock
 	// dependencies. We order buckets in m.privateBuckets by index (lowest
 	// index first), and all buckets in m.privateBuckets precede
@@ -419,8 +422,8 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
 	if k1.Kind != KindSharedMappable && k2.Kind != KindSharedMappable {
 		i1 := bucketIndexForAddr(k1.addr())
 		i2 := bucketIndexForAddr(k2.addr())
-		b1 := &m.privateBuckets[i1]
-		b2 := &m.privateBuckets[i2]
+		b1 = &m.privateBuckets[i1]
+		b2 = &m.privateBuckets[i2]
 		switch {
 		case i1 < i2:
 			b1.mu.Lock()
@@ -431,19 +434,30 @@ func (m *Manager) lockBuckets(k1, k2 *Key) (*bucket, *bucket) {
 		default:
 			b1.mu.Lock()
 		}
-		return b1, b2
+		return b1, b2 // +checklocksforce
 	}
 
 	// At least one of b1 or b2 should be m.sharedBucket.
-	b1 := m.sharedBucket
-	b2 := m.sharedBucket
+	b1 = m.sharedBucket
+	b2 = m.sharedBucket
 	if k1.Kind != KindSharedMappable {
 		b1 = m.lockBucket(k1)
 	} else if k2.Kind != KindSharedMappable {
 		b2 = m.lockBucket(k2)
 	}
 	m.sharedBucket.mu.Lock()
-	return b1, b2
+	return b1, b2 // +checklocksforce
+}
+
+// unlockBuckets unlocks two buckets.
+// +checklocksrelease:b1.mu
+// +checklocksrelease:b2.mu
+func (m *Manager) unlockBuckets(b1, b2 *bucket) {
+	b1.mu.Unlock()
+	if b1 != b2 {
+		b2.mu.Unlock()
+	}
+	return // +checklocksforce
 }
 
 // Wake wakes up to n waiters matching the bitmask on the given addr.
@@ -476,10 +490,7 @@ func (m *Manager) doRequeue(t Target, addr, naddr hostarch.Addr, private bool, c
 	defer k2.release(t)
 
 	b1, b2 := m.lockBuckets(&k1, &k2)
-	defer b1.mu.Unlock()
-	if b2 != b1 {
-		defer b2.mu.Unlock()
-	}
+	defer m.unlockBuckets(b1, b2)
 
 	if checkval {
 		if err := check(t, addr, val); err != nil {
@@ -526,10 +537,7 @@ func (m *Manager) WakeOp(t Target, addr1, addr2 hostarch.Addr, private bool, nwa
 	defer k2.release(t)
 
 	b1, b2 := m.lockBuckets(&k1, &k2)
-	defer b1.mu.Unlock()
-	if b2 != b1 {
-		defer b2.mu.Unlock()
-	}
+	defer m.unlockBuckets(b1, b2)
 
 	done := 0
 	cond, err := atomicOp(t, addr2, op)
@@ -670,7 +678,7 @@ func (m *Manager) lockPILocked(w *Waiter, t Target, addr hostarch.Addr, tid uint
 			return false, err
 		}
 		if (cur & linux.FUTEX_TID_MASK) == tid {
-			return false, syserror.EDEADLK
+			return false, linuxerr.EDEADLK
 		}
 
 		if (cur & linux.FUTEX_TID_MASK) == 0 {
@@ -745,7 +753,7 @@ func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bu
 	}
 
 	if (cur & linux.FUTEX_TID_MASK) != tid {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 
 	var next *Waiter  // Who's the next owner?
@@ -773,7 +781,7 @@ func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bu
 		if prev != cur {
 			// Let user mode handle CAS races. This is different than lock, which
 			// retries when CAS fails.
-			return syserror.EAGAIN
+			return linuxerr.EAGAIN
 		}
 		return nil
 	}
@@ -790,7 +798,7 @@ func (m *Manager) unlockPILocked(t Target, addr hostarch.Addr, tid uint32, b *bu
 		return err
 	}
 	if prev != cur {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	b.wakeWaiterLocked(next)
diff --git a/pkg/sentry/kernel/futex/futex_test.go b/pkg/sentry/kernel/futex/futex_test.go
index deba44e5c..04c136f87 100644
--- a/pkg/sentry/kernel/futex/futex_test.go
+++ b/pkg/sentry/kernel/futex/futex_test.go
@@ -21,8 +21,8 @@ import (
 	"testing"
 	"unsafe"
 
-	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/sync"
 )
@@ -488,7 +488,7 @@ func (t *testMutex) Lock() {
 		// Wait for it to be "not locked".
 		w := NewWaiter()
 		err := t.m.WaitPrepare(w, t.d, t.a, true, testMutexLocked, ^uint32(0))
-		if err == unix.EAGAIN {
+		if linuxerr.Equals(linuxerr.EAGAIN, err) {
 			continue
 		}
 		if err != nil {
diff --git a/pkg/sentry/kernel/ipc/BUILD b/pkg/sentry/kernel/ipc/BUILD
new file mode 100644
index 000000000..e42a94e15
--- /dev/null
+++ b/pkg/sentry/kernel/ipc/BUILD
@@ -0,0 +1,20 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "ipc",
+    srcs = [
+        "object.go",
+        "registry.go",
+    ],
+    visibility = ["//pkg/sentry:internal"],
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/context",
+        "//pkg/errors/linuxerr",
+        "//pkg/log",
+        "//pkg/sentry/fs",
+        "//pkg/sentry/kernel/auth",
+    ],
+)
diff --git a/pkg/sentry/kernel/ipc/object.go b/pkg/sentry/kernel/ipc/object.go
new file mode 100644
index 000000000..387b35e7e
--- /dev/null
+++ b/pkg/sentry/kernel/ipc/object.go
@@ -0,0 +1,115 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package ipc defines functionality and utilities common to sysvipc mechanisms.
+//
+// Lock ordering: [shm/semaphore/msgqueue].Registry.mu -> Mechanism
+package ipc
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+// Key is a user-provided identifier for IPC objects.
+type Key int32
+
+// ID is a kernel identifier for IPC objects.
+type ID int32
+
+// Object represents an abstract IPC object with fields common to all IPC
+// mechanisms.
+//
+// +stateify savable
+type Object struct {
+	// User namespace which owns the IPC namespace which owns the IPC object.
+	// Immutable.
+	UserNS *auth.UserNamespace
+
+	// ID is a kernel identifier for the IPC object. Immutable.
+	ID ID
+
+	// Key is a user-provided identifier for the IPC object. Immutable.
+	Key Key
+
+	// Creator is the user who created the IPC object. Immutable.
+	Creator fs.FileOwner
+
+	// Owner is the current owner of the IPC object.
+	Owner fs.FileOwner
+
+	// Perms is the access permissions the IPC object.
+	Perms fs.FilePermissions
+}
+
+// Mechanism represents a SysV mechanism that holds an IPC object. It can also
+// be looked at as a container for an ipc.Object, which is by definition a fully
+// functional SysV object.
+type Mechanism interface {
+	// Lock behaves the same as Mutex.Lock on the mechanism.
+	Lock()
+
+	// Unlock behaves the same as Mutex.Unlock on the mechanism.
+	Unlock()
+
+	// Object returns a pointer to the mechanism's ipc.Object. Mechanism.Lock,
+	// and Mechanism.Unlock should be used when the object is used.
+	Object() *Object
+
+	// Destroy destroys the mechanism.
+	Destroy()
+}
+
+// NewObject returns a new, initialized ipc.Object. The newly returned object
+// doesn't have a valid ID. When the object is registered, the registry assigns
+// it a new unique ID.
+func NewObject(un *auth.UserNamespace, key Key, creator, owner fs.FileOwner, perms fs.FilePermissions) *Object {
+	return &Object{
+		UserNS:  un,
+		Key:     key,
+		Creator: creator,
+		Owner:   owner,
+		Perms:   perms,
+	}
+}
+
+// CheckOwnership verifies whether an IPC object may be accessed using creds as
+// an owner. See ipc/util.c:ipcctl_obtain_check() in Linux.
+func (o *Object) CheckOwnership(creds *auth.Credentials) bool {
+	if o.Owner.UID == creds.EffectiveKUID || o.Creator.UID == creds.EffectiveKUID {
+		return true
+	}
+
+	// Tasks with CAP_SYS_ADMIN may bypass ownership checks. Strangely, Linux
+	// doesn't use CAP_IPC_OWNER for this despite CAP_IPC_OWNER being documented
+	// for use to "override IPC ownership checks".
+	return creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, o.UserNS)
+}
+
+// CheckPermissions verifies whether an IPC object is accessible using creds for
+// access described by req. See ipc/util.c:ipcperms() in Linux.
+func (o *Object) CheckPermissions(creds *auth.Credentials, req fs.PermMask) bool {
+	p := o.Perms.Other
+	if o.Owner.UID == creds.EffectiveKUID {
+		p = o.Perms.User
+	} else if creds.InGroup(o.Owner.GID) {
+		p = o.Perms.Group
+	}
+
+	if p.SupersetOf(req) {
+		return true
+	}
+	return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, o.UserNS)
+}
diff --git a/pkg/sentry/kernel/ipc/registry.go b/pkg/sentry/kernel/ipc/registry.go
new file mode 100644
index 000000000..91de19070
--- /dev/null
+++ b/pkg/sentry/kernel/ipc/registry.go
@@ -0,0 +1,196 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ipc
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+)
+
+// Registry is similar to Object, but for registries. It represent an abstract
+// SysV IPC registry with fields common to all SysV registries. Registry is not
+// thread-safe, and should be protected using a mutex.
+//
+// +stateify savable
+type Registry struct {
+	// UserNS owning the IPC namespace this registry belongs to. Immutable.
+	UserNS *auth.UserNamespace
+
+	// objects is a map of IDs to IPC mechanisms.
+	objects map[ID]Mechanism
+
+	// KeysToIDs maps a lookup key to an ID.
+	keysToIDs map[Key]ID
+
+	// lastIDUsed is used to find the next available ID for object creation.
+	lastIDUsed ID
+}
+
+// NewRegistry return a new, initialized ipc.Registry.
+func NewRegistry(userNS *auth.UserNamespace) *Registry {
+	return &Registry{
+		UserNS:    userNS,
+		objects:   make(map[ID]Mechanism),
+		keysToIDs: make(map[Key]ID),
+	}
+}
+
+// Find uses key to search for and return a SysV mechanism. Find returns an
+// error if an object is found by shouldn't be, or if the user doesn't have
+// permission to use the object. If no object is found, Find checks create
+// flag, and returns an error only if it's false.
+func (r *Registry) Find(ctx context.Context, key Key, mode linux.FileMode, create, exclusive bool) (Mechanism, error) {
+	if id, ok := r.keysToIDs[key]; ok {
+		mech := r.objects[id]
+		mech.Lock()
+		defer mech.Unlock()
+
+		obj := mech.Object()
+		creds := auth.CredentialsFromContext(ctx)
+		if !obj.CheckPermissions(creds, fs.PermsFromMode(mode)) {
+			// The [calling process / user] does not have permission to access
+			// the set, and does not have the CAP_IPC_OWNER capability in the
+			// user namespace that governs its IPC namespace.
+			return nil, linuxerr.EACCES
+		}
+
+		if create && exclusive {
+			// IPC_CREAT and IPC_EXCL were specified, but an object already
+			// exists for key.
+			return nil, linuxerr.EEXIST
+		}
+		return mech, nil
+	}
+
+	if !create {
+		// No object exists for key and msgflg did not specify IPC_CREAT.
+		return nil, linuxerr.ENOENT
+	}
+
+	return nil, nil
+}
+
+// Register adds the given object into Registry.Objects, and assigns it a new
+// ID. It returns an error if all IDs are exhausted.
+func (r *Registry) Register(m Mechanism) error {
+	id, err := r.newID()
+	if err != nil {
+		return err
+	}
+
+	obj := m.Object()
+	obj.ID = id
+
+	r.objects[id] = m
+	r.keysToIDs[obj.Key] = id
+
+	return nil
+}
+
+// newID finds the first unused ID in the registry, and returns an error if
+// non is found.
+func (r *Registry) newID() (ID, error) {
+	// Find the next available ID.
+	for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
+		// Handle wrap around.
+		if id < 0 {
+			id = 0
+			continue
+		}
+		if r.objects[id] == nil {
+			r.lastIDUsed = id
+			return id, nil
+		}
+	}
+
+	log.Warningf("ids exhausted, they may be leaking")
+
+	// The man pages for shmget(2) mention that ENOSPC should be used if "All
+	// possible shared memory IDs have been taken (SHMMNI)". Other SysV
+	// mechanisms don't have a specific errno for running out of IDs, but they
+	// return ENOSPC if the max number of objects is exceeded, so we assume that
+	// it's the same case.
+	return 0, linuxerr.ENOSPC
+}
+
+// Remove removes the mechanism with the given id from the registry, and calls
+// mechanism.Destroy to perform mechanism-specific removal.
+func (r *Registry) Remove(id ID, creds *auth.Credentials) error {
+	mech := r.objects[id]
+	if mech == nil {
+		return linuxerr.EINVAL
+	}
+
+	mech.Lock()
+	defer mech.Unlock()
+
+	obj := mech.Object()
+
+	// The effective user ID of the calling process must match the creator or
+	// owner of the [mechanism], or the caller must be privileged.
+	if !obj.CheckOwnership(creds) {
+		return linuxerr.EPERM
+	}
+
+	delete(r.objects, obj.ID)
+	delete(r.keysToIDs, obj.Key)
+	mech.Destroy()
+
+	return nil
+}
+
+// ForAllObjects executes a given function for all given objects.
+func (r *Registry) ForAllObjects(f func(o Mechanism)) {
+	for _, o := range r.objects {
+		f(o)
+	}
+}
+
+// FindByID returns the mechanism with the given ID, nil if non exists.
+func (r *Registry) FindByID(id ID) Mechanism {
+	return r.objects[id]
+}
+
+// DissociateKey removes the association between a mechanism and its key
+// (deletes it from r.keysToIDs), preventing it from being discovered by any new
+// process, but not necessarily destroying it. If the given key doesn't exist,
+// nothing is changed.
+func (r *Registry) DissociateKey(key Key) {
+	delete(r.keysToIDs, key)
+}
+
+// DissociateID removes the association between a mechanism and its ID (deletes
+// it from r.objects). An ID can't be removed unless the associated key is
+// removed already, this is done to prevent the users from acquiring nil a
+// Mechanism.
+//
+// Precondition: must be preceded by a call to r.DissociateKey.
+func (r *Registry) DissociateID(id ID) {
+	delete(r.objects, id)
+}
+
+// ObjectCount returns the number of registered objects.
+func (r *Registry) ObjectCount() int {
+	return len(r.objects)
+}
+
+// LastIDUsed returns the last used ID.
+func (r *Registry) LastIDUsed() ID {
+	return r.lastIDUsed
+}
diff --git a/pkg/sentry/kernel/ipc_namespace.go b/pkg/sentry/kernel/ipc_namespace.go
index 9545bb5ef..0b101b1bb 100644
--- a/pkg/sentry/kernel/ipc_namespace.go
+++ b/pkg/sentry/kernel/ipc_namespace.go
@@ -17,6 +17,7 @@ package kernel
 import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/msgqueue"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/semaphore"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/shm"
 )
@@ -30,6 +31,7 @@ type IPCNamespace struct {
 	// User namespace which owns this IPC namespace. Immutable.
 	userNS *auth.UserNamespace
 
+	queues     *msgqueue.Registry
 	semaphores *semaphore.Registry
 	shms       *shm.Registry
 }
@@ -38,6 +40,7 @@ type IPCNamespace struct {
 func NewIPCNamespace(userNS *auth.UserNamespace) *IPCNamespace {
 	ns := &IPCNamespace{
 		userNS:     userNS,
+		queues:     msgqueue.NewRegistry(userNS),
 		semaphores: semaphore.NewRegistry(userNS),
 		shms:       shm.NewRegistry(userNS),
 	}
@@ -45,6 +48,11 @@ func NewIPCNamespace(userNS *auth.UserNamespace) *IPCNamespace {
 	return ns
 }
 
+// MsgqueueRegistry returns the message queue registry for this namespace.
+func (i *IPCNamespace) MsgqueueRegistry() *msgqueue.Registry {
+	return i.queues
+}
+
 // SemaphoreRegistry returns the semaphore set registry for this namespace.
 func (i *IPCNamespace) SemaphoreRegistry() *semaphore.Registry {
 	return i.semaphores
diff --git a/pkg/sentry/kernel/kcov.go b/pkg/sentry/kernel/kcov.go
index 4b943106b..e8a71bec1 100644
--- a/pkg/sentry/kernel/kcov.go
+++ b/pkg/sentry/kernel/kcov.go
@@ -22,13 +22,13 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/coverage"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/mm"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
-	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // kcovAreaSizeMax is the maximum number of uint64 entries allowed in the kcov
@@ -125,19 +125,19 @@ func (kcov *Kcov) InitTrace(size uint64) error {
 	defer kcov.mu.Unlock()
 
 	if kcov.mode != linux.KCOV_MODE_DISABLED {
-		return syserror.EBUSY
+		return linuxerr.EBUSY
 	}
 
 	// To simplify all the logic around mapping, we require that the length of the
 	// shared region is a multiple of the system page size.
 	if (8*size)&(hostarch.PageSize-1) != 0 {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	// We need space for at least two uint64s to hold current position and a
 	// single PC.
 	if size < 2 || size > kcovAreaSizeMax {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	kcov.size = size
@@ -157,7 +157,7 @@ func (kcov *Kcov) EnableTrace(ctx context.Context, traceKind uint8) error {
 
 	// KCOV_ENABLE must be preceded by KCOV_INIT_TRACE and an mmap call.
 	if kcov.mode != linux.KCOV_MODE_INIT || kcov.mappable == nil {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	switch traceKind {
@@ -165,13 +165,13 @@ func (kcov *Kcov) EnableTrace(ctx context.Context, traceKind uint8) error {
 		kcov.mode = linux.KCOV_MODE_TRACE_PC
 	case linux.KCOV_TRACE_CMP:
 		// We do not support KCOV_MODE_TRACE_CMP.
-		return syserror.ENOTSUP
+		return linuxerr.ENOTSUP
 	default:
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	if kcov.owningTask != nil && kcov.owningTask != t {
-		return syserror.EBUSY
+		return linuxerr.EBUSY
 	}
 
 	kcov.owningTask = t
@@ -195,7 +195,7 @@ func (kcov *Kcov) DisableTrace(ctx context.Context) error {
 	}
 
 	if t != kcov.owningTask {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	kcov.mode = linux.KCOV_MODE_INIT
 	kcov.owningTask = nil
@@ -237,7 +237,7 @@ func (kcov *Kcov) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) erro
 	defer kcov.mu.Unlock()
 
 	if kcov.mode != linux.KCOV_MODE_INIT {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	if kcov.mappable == nil {
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 352c36ba9..df5160b67 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -1299,11 +1299,11 @@ func (k *Kernel) WaitExited() {
 }
 
 // Kill requests that all tasks in k immediately exit as if group exiting with
-// status es. Kill does not wait for tasks to exit.
-func (k *Kernel) Kill(es ExitStatus) {
+// status ws. Kill does not wait for tasks to exit.
+func (k *Kernel) Kill(ws linux.WaitStatus) {
 	k.extMu.Lock()
 	defer k.extMu.Unlock()
-	k.tasks.Kill(es)
+	k.tasks.Kill(ws)
 }
 
 // Pause requests that all tasks in k temporarily stop executing, and blocks
diff --git a/pkg/sentry/kernel/kernel_opts.go b/pkg/sentry/kernel/kernel_opts.go
index 2e66ec587..5ffafb0d1 100644
--- a/pkg/sentry/kernel/kernel_opts.go
+++ b/pkg/sentry/kernel/kernel_opts.go
@@ -12,6 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+//go:build go1.1
+// +build go1.1
+
 package kernel
 
 // SpecialOpts contains non-standard options for the kernel.
diff --git a/pkg/sentry/kernel/msgqueue/BUILD b/pkg/sentry/kernel/msgqueue/BUILD
new file mode 100644
index 000000000..5ec11e1f6
--- /dev/null
+++ b/pkg/sentry/kernel/msgqueue/BUILD
@@ -0,0 +1,36 @@
+load("//tools:defs.bzl", "go_library")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
+
+package(licenses = ["notice"])
+
+go_template_instance(
+    name = "message_list",
+    out = "message_list.go",
+    package = "msgqueue",
+    prefix = "msg",
+    template = "//pkg/ilist:generic_list",
+    types = {
+        "Element": "*Message",
+        "Linker": "*Message",
+    },
+)
+
+go_library(
+    name = "msgqueue",
+    srcs = [
+        "message_list.go",
+        "msgqueue.go",
+    ],
+    visibility = ["//pkg/sentry:internal"],
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/context",
+        "//pkg/errors/linuxerr",
+        "//pkg/sentry/fs",
+        "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/kernel/ipc",
+        "//pkg/sentry/kernel/time",
+        "//pkg/sync",
+        "//pkg/waiter",
+    ],
+)
diff --git a/pkg/sentry/kernel/msgqueue/msgqueue.go b/pkg/sentry/kernel/msgqueue/msgqueue.go
new file mode 100644
index 000000000..3ce926950
--- /dev/null
+++ b/pkg/sentry/kernel/msgqueue/msgqueue.go
@@ -0,0 +1,220 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package msgqueue implements System V message queues.
+package msgqueue
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
+	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
+	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/waiter"
+)
+
+const (
+	// System-wide limit for maximum number of queues.
+	maxQueues = linux.MSGMNI
+
+	// Maximum size of a queue in bytes.
+	maxQueueBytes = linux.MSGMNB
+
+	// Maximum size of a message in bytes.
+	maxMessageBytes = linux.MSGMAX
+)
+
+// Registry contains a set of message queues that can be referenced using keys
+// or IDs.
+//
+// +stateify savable
+type Registry struct {
+	// mu protects all the fields below.
+	mu sync.Mutex `state:"nosave"`
+
+	// reg defines basic fields and operations needed for all SysV registries.
+	reg *ipc.Registry
+}
+
+// NewRegistry returns a new Registry ready to be used.
+func NewRegistry(userNS *auth.UserNamespace) *Registry {
+	return &Registry{
+		reg: ipc.NewRegistry(userNS),
+	}
+}
+
+// Queue represents a SysV message queue, described by sysvipc(7).
+//
+// +stateify savable
+type Queue struct {
+	// registry is the registry owning this queue. Immutable.
+	registry *Registry
+
+	// mu protects all the fields below.
+	mu sync.Mutex `state:"nosave"`
+
+	// dead is set to true when a queue is removed from the registry and should
+	// not be used. Operations on the queue should check dead, and return
+	// EIDRM if set to true.
+	dead bool
+
+	// obj defines basic fields that should be included in all SysV IPC objects.
+	obj *ipc.Object
+
+	// senders holds a queue of blocked message senders. Senders are notified
+	// when enough space is available in the queue to insert their message.
+	senders waiter.Queue
+
+	// receivers holds a queue of blocked receivers. Receivers are notified
+	// when a new message is inserted into the queue and can be received.
+	receivers waiter.Queue
+
+	// messages is a list of sent messages.
+	messages msgList
+
+	// sendTime is the last time a msgsnd was perfomed.
+	sendTime ktime.Time
+
+	// receiveTime is the last time a msgrcv was performed.
+	receiveTime ktime.Time
+
+	// changeTime is the last time the queue was modified using msgctl.
+	changeTime ktime.Time
+
+	// byteCount is the current number of message bytes in the queue.
+	byteCount uint64
+
+	// messageCount is the current number of messages in the queue.
+	messageCount uint64
+
+	// maxBytes is the maximum allowed number of bytes in the queue, and is also
+	// used as a limit for the number of total possible messages.
+	maxBytes uint64
+
+	// sendPID is the PID of the process that performed the last msgsnd.
+	sendPID int32
+
+	// receivePID is the PID of the process that performed the last msgrcv.
+	receivePID int32
+}
+
+// Message represents a message exchanged through a Queue via msgsnd(2) and
+// msgrcv(2).
+//
+// +stateify savable
+type Message struct {
+	msgEntry
+
+	// mType is an integer representing the type of the sent message.
+	mType int64
+
+	// mText is an untyped block of memory.
+	mText []byte
+
+	// mSize is the size of mText.
+	mSize uint64
+}
+
+// FindOrCreate creates a new message queue or returns an existing one. See
+// msgget(2).
+func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, mode linux.FileMode, private, create, exclusive bool) (*Queue, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if !private {
+		queue, err := r.reg.Find(ctx, key, mode, create, exclusive)
+		if err != nil {
+			return nil, err
+		}
+
+		if queue != nil {
+			return queue.(*Queue), nil
+		}
+	}
+
+	// Check system-wide limits.
+	if r.reg.ObjectCount() >= maxQueues {
+		return nil, linuxerr.ENOSPC
+	}
+
+	return r.newQueueLocked(ctx, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode))
+}
+
+// newQueueLocked creates a new queue using the given fields. An error is
+// returned if there're no more available identifiers.
+//
+// Precondition: r.mu must be held.
+func (r *Registry) newQueueLocked(ctx context.Context, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions) (*Queue, error) {
+	q := &Queue{
+		registry:    r,
+		obj:         ipc.NewObject(r.reg.UserNS, key, creator, creator, perms),
+		sendTime:    ktime.ZeroTime,
+		receiveTime: ktime.ZeroTime,
+		changeTime:  ktime.NowFromContext(ctx),
+		maxBytes:    maxQueueBytes,
+	}
+
+	err := r.reg.Register(q)
+	if err != nil {
+		return nil, err
+	}
+	return q, nil
+}
+
+// Remove removes the queue with specified ID. All waiters (readers and
+// writers) and writers will be awakened and fail. Remove will return an error
+// if the ID is invalid, or the the user doesn't have privileges.
+func (r *Registry) Remove(id ipc.ID, creds *auth.Credentials) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	r.reg.Remove(id, creds)
+	return nil
+}
+
+// Lock implements ipc.Mechanism.Lock.
+func (q *Queue) Lock() {
+	q.mu.Lock()
+}
+
+// Unlock implements ipc.mechanism.Unlock.
+//
+// +checklocksignore
+func (q *Queue) Unlock() {
+	q.mu.Unlock()
+}
+
+// Object implements ipc.Mechanism.Object.
+func (q *Queue) Object() *ipc.Object {
+	return q.obj
+}
+
+// Destroy implements ipc.Mechanism.Destroy.
+func (q *Queue) Destroy() {
+	q.dead = true
+
+	// Notify waiters. Senders and receivers will try to run, and return an
+	// error (EIDRM). Waiters should remove themselves from the queue after
+	// waking up.
+	q.senders.Notify(waiter.EventOut)
+	q.receivers.Notify(waiter.EventIn)
+}
+
+// ID returns queue's ID.
+func (q *Queue) ID() ipc.ID {
+	return q.obj.ID
+}
diff --git a/pkg/sentry/kernel/pipe/BUILD b/pkg/sentry/kernel/pipe/BUILD
index af46b3e08..94ebac7c5 100644
--- a/pkg/sentry/kernel/pipe/BUILD
+++ b/pkg/sentry/kernel/pipe/BUILD
@@ -21,6 +21,7 @@ go_library(
         "//pkg/abi/linux",
         "//pkg/amutex",
         "//pkg/context",
+        "//pkg/errors/linuxerr",
         "//pkg/hostarch",
         "//pkg/marshal/primitive",
         "//pkg/safemem",
diff --git a/pkg/sentry/kernel/pipe/node.go b/pkg/sentry/kernel/pipe/node.go
index 6497dc4ba..08786d704 100644
--- a/pkg/sentry/kernel/pipe/node.go
+++ b/pkg/sentry/kernel/pipe/node.go
@@ -17,6 +17,7 @@ package pipe
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
 	"gvisor.dev/gvisor/pkg/sync"
@@ -112,7 +113,7 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi
 			// read side isn't open yet.
 			if flags.NonBlocking {
 				w.DecRef(ctx)
-				return nil, syserror.ENXIO
+				return nil, linuxerr.ENXIO
 			}
 
 			if !waitFor(&i.mu, &i.rWakeup, ctx) {
@@ -130,10 +131,10 @@ func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.Fi
 		return rw, nil
 
 	default:
-		return nil, syserror.EINVAL
+		return nil, linuxerr.EINVAL
 	}
 }
 
 func (*inodeOperations) Allocate(_ context.Context, _ *fs.Inode, _, _ int64) error {
-	return syserror.EPIPE
+	return linuxerr.EPIPE
 }
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index 06769931a..85e3ce9f4 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -22,6 +22,7 @@ import (
 
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -428,18 +429,18 @@ func (p *Pipe) FifoSize(context.Context, *fs.File) (int64, error) {
 // SetFifoSize implements fs.FifoSizer.SetFifoSize.
 func (p *Pipe) SetFifoSize(size int64) (int64, error) {
 	if size < 0 {
-		return 0, syserror.EINVAL
+		return 0, linuxerr.EINVAL
 	}
 	if size < MinimumPipeSize {
 		size = MinimumPipeSize // Per spec.
 	}
 	if size > MaximumPipeSize {
-		return 0, syserror.EPERM
+		return 0, linuxerr.EPERM
 	}
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	if size < p.size {
-		return 0, syserror.EBUSY
+		return 0, linuxerr.EBUSY
 	}
 	p.max = size
 	return size, nil
diff --git a/pkg/sentry/kernel/pipe/pipe_unsafe.go b/pkg/sentry/kernel/pipe/pipe_unsafe.go
index dd60cba24..077c5d596 100644
--- a/pkg/sentry/kernel/pipe/pipe_unsafe.go
+++ b/pkg/sentry/kernel/pipe/pipe_unsafe.go
@@ -23,6 +23,8 @@ import (
 // concurrent calls cannot deadlock.
 //
 // Preconditions: x != y.
+// +checklocksacquire:x.mu
+// +checklocksacquire:y.mu
 func lockTwoPipes(x, y *Pipe) {
 	// Lock the two pipes in order of increasing address.
 	if uintptr(unsafe.Pointer(x)) < uintptr(unsafe.Pointer(y)) {
diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go
index 3fa5d1d2f..c883a9014 100644
--- a/pkg/sentry/kernel/pipe/pipe_util.go
+++ b/pkg/sentry/kernel/pipe/pipe_util.go
@@ -22,6 +22,7 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/amutex"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/marshal/primitive"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -86,7 +87,7 @@ func (p *Pipe) Write(ctx context.Context, src usermem.IOSequence) (int64, error)
 	if n > 0 {
 		p.Notify(waiter.ReadableEvents)
 	}
-	if err == unix.EPIPE {
+	if linuxerr.Equals(linuxerr.EPIPE, err) {
 		// If we are returning EPIPE send SIGPIPE to the task.
 		if sendSig := linux.SignalNoInfoFuncFromContext(ctx); sendSig != nil {
 			sendSig(linux.SIGPIPE)
@@ -156,6 +157,7 @@ func (p *Pipe) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArgume
 //
 // mu must be held by the caller. waitFor returns with mu held, but it will
 // drop mu before blocking for any reader/writers.
+// +checklocks:mu
 func waitFor(mu *sync.Mutex, wakeupChan *chan struct{}, sleeper amutex.Sleeper) bool {
 	// Ideally this function would simply use a condition variable. However, the
 	// wait needs to be interruptible via 'sleeper', so we must sychronize via a
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index 95b948edb..077d5fd7f 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -17,6 +17,7 @@ package pipe
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -79,7 +80,7 @@ func (vp *VFSPipe) ReaderWriterPair(ctx context.Context, mnt *vfs.Mount, vfsd *v
 
 // Allocate implements vfs.FileDescriptionImpl.Allocate.
 func (*VFSPipe) Allocate(context.Context, uint64, uint64, uint64) error {
-	return syserror.ESPIPE
+	return linuxerr.ESPIPE
 }
 
 // Open opens the pipe represented by vp.
@@ -90,7 +91,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s
 	readable := vfs.MayReadFileWithOpenFlags(statusFlags)
 	writable := vfs.MayWriteFileWithOpenFlags(statusFlags)
 	if !readable && !writable {
-		return nil, syserror.EINVAL
+		return nil, linuxerr.EINVAL
 	}
 
 	fd, err := vp.newFD(mnt, vfsd, statusFlags, locks)
@@ -131,7 +132,7 @@ func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, s
 			// side isn't open yet.
 			if statusFlags&linux.O_NONBLOCK != 0 {
 				fd.DecRef(ctx)
-				return nil, syserror.ENXIO
+				return nil, linuxerr.ENXIO
 			}
 			// Wait for a reader to open the other end.
 			if !waitFor(&vp.mu, &vp.rWakeup, ctx) {
@@ -224,7 +225,7 @@ func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask {
 
 // Allocate implements vfs.FileDescriptionImpl.Allocate.
 func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
-	return syserror.ESPIPE
+	return linuxerr.ESPIPE
 }
 
 // EventRegister implements waiter.Waitable.EventRegister.
@@ -415,7 +416,7 @@ func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) {
 // Preconditions: count > 0.
 func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) {
 	if dst.pipe == src.pipe {
-		return 0, syserror.EINVAL
+		return 0, linuxerr.EINVAL
 	}
 
 	lockTwoPipes(dst.pipe, src.pipe)
diff --git a/pkg/sentry/kernel/posixtimer.go b/pkg/sentry/kernel/posixtimer.go
index d801a3d83..319754a42 100644
--- a/pkg/sentry/kernel/posixtimer.go
+++ b/pkg/sentry/kernel/posixtimer.go
@@ -18,8 +18,8 @@ import (
 	"math"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
-	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // IntervalTimer represents a POSIX interval timer as described by
@@ -175,7 +175,7 @@ func (t *Task) IntervalTimerCreate(c ktime.Clock, sigev *linux.Sigevent) (linux.
 			break
 		}
 		if t.tg.nextTimerID == end {
-			return 0, syserror.EAGAIN
+			return 0, linuxerr.EAGAIN
 		}
 	}
 
@@ -214,16 +214,16 @@ func (t *Task) IntervalTimerCreate(c ktime.Clock, sigev *linux.Sigevent) (linux.
 		target, ok := t.tg.pidns.tasks[ThreadID(sigev.Tid)]
 		t.tg.pidns.owner.mu.RUnlock()
 		if !ok || target.tg != t.tg {
-			return 0, syserror.EINVAL
+			return 0, linuxerr.EINVAL
 		}
 		it.target = target
 	default:
-		return 0, syserror.EINVAL
+		return 0, linuxerr.EINVAL
 	}
 	if sigev.Notify != linux.SIGEV_NONE {
 		it.signo = linux.Signal(sigev.Signo)
 		if !it.signo.IsValid() {
-			return 0, syserror.EINVAL
+			return 0, linuxerr.EINVAL
 		}
 	}
 	it.timer = ktime.NewTimer(c, it)
@@ -238,7 +238,7 @@ func (t *Task) IntervalTimerDelete(id linux.TimerID) error {
 	defer t.tg.timerMu.Unlock()
 	it := t.tg.timers[id]
 	if it == nil {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	delete(t.tg.timers, id)
 	it.DestroyTimer()
@@ -251,7 +251,7 @@ func (t *Task) IntervalTimerSettime(id linux.TimerID, its linux.Itimerspec, abs
 	defer t.tg.timerMu.Unlock()
 	it := t.tg.timers[id]
 	if it == nil {
-		return linux.Itimerspec{}, syserror.EINVAL
+		return linux.Itimerspec{}, linuxerr.EINVAL
 	}
 
 	newS, err := ktime.SettingFromItimerspec(its, abs, it.timer.Clock())
@@ -269,7 +269,7 @@ func (t *Task) IntervalTimerGettime(id linux.TimerID) (linux.Itimerspec, error)
 	defer t.tg.timerMu.Unlock()
 	it := t.tg.timers[id]
 	if it == nil {
-		return linux.Itimerspec{}, syserror.EINVAL
+		return linux.Itimerspec{}, linuxerr.EINVAL
 	}
 
 	tm, s := it.timer.Get()
@@ -285,7 +285,7 @@ func (t *Task) IntervalTimerGetoverrun(id linux.TimerID) (int32, error) {
 	defer t.tg.timerMu.Unlock()
 	it := t.tg.timers[id]
 	if it == nil {
-		return 0, syserror.EINVAL
+		return 0, linuxerr.EINVAL
 	}
 	// By timer_create(2) invariant, either it.target == nil (in which case
 	// it.overrunLast is immutably 0) or t.tg == it.target.tg; and the fact
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index 20563f02a..079294f81 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -19,6 +19,7 @@ import (
 	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/marshal/primitive"
 	"gvisor.dev/gvisor/pkg/sentry/mm"
@@ -481,7 +482,7 @@ func (t *Task) ptraceTraceme() error {
 	t.tg.pidns.owner.mu.Lock()
 	defer t.tg.pidns.owner.mu.Unlock()
 	if t.hasTracer() {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	if t.parent == nil {
 		// In Linux, only init can not have a parent, and init is assumed never
@@ -497,7 +498,7 @@ func (t *Task) ptraceTraceme() error {
 		return nil
 	}
 	if !t.parent.canTraceLocked(t, true) {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	if t.parent.exitState != TaskExitNone {
 		// Fail silently, as if we were successfully attached but then
@@ -513,21 +514,21 @@ func (t *Task) ptraceTraceme() error {
 // ptrace(PTRACE_SEIZE, target, 0, opts) if seize is true. t is the caller.
 func (t *Task) ptraceAttach(target *Task, seize bool, opts uintptr) error {
 	if t.tg == target.tg {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	t.tg.pidns.owner.mu.Lock()
 	defer t.tg.pidns.owner.mu.Unlock()
 	if !t.canTraceLocked(target, true) {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	if target.hasTracer() {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	// Attaching to zombies and dead tasks is not permitted; the exit
 	// notification logic relies on this. Linux allows attaching to PF_EXITING
 	// tasks, though.
 	if target.exitState >= TaskExitZombie {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	if seize {
 		if err := target.ptraceSetOptionsLocked(opts); err != nil {
@@ -651,6 +652,7 @@ func (t *Task) forgetTracerLocked() {
 // Preconditions:
 // * The signal mutex must be locked.
 // * The caller must be running on the task goroutine.
+// +checklocks:t.tg.signalHandlers.mu
 func (t *Task) ptraceSignalLocked(info *linux.SignalInfo) bool {
 	if linux.Signal(info.Signo) == linux.SIGKILL {
 		return false
@@ -766,14 +768,14 @@ const (
 // ptraceClone is called at the end of a clone or fork syscall to check if t
 // should enter PTRACE_EVENT_CLONE, PTRACE_EVENT_FORK, or PTRACE_EVENT_VFORK
 // stop. child is the new task.
-func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions) bool {
+func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, args *linux.CloneArgs) bool {
 	if !t.hasTracer() {
 		return false
 	}
 	t.tg.pidns.owner.mu.Lock()
 	defer t.tg.pidns.owner.mu.Unlock()
 	event := false
-	if !opts.Untraced {
+	if args.Flags&linux.CLONE_UNTRACED == 0 {
 		switch kind {
 		case ptraceCloneKindClone:
 			if t.ptraceOpts.TraceClone {
@@ -808,7 +810,7 @@ func (t *Task) ptraceClone(kind ptraceCloneKind, child *Task, opts *CloneOptions
 	// clone(2)'s documentation of CLONE_UNTRACED and CLONE_PTRACE is
 	// confusingly wrong; see kernel/fork.c:_do_fork() => copy_process() =>
 	// include/linux/ptrace.h:ptrace_init_task().
-	if event || opts.InheritTracer {
+	if event || args.Flags&linux.CLONE_PTRACE != 0 {
 		tracer := t.Tracer()
 		if tracer != nil {
 			child.ptraceTracer.Store(tracer)
@@ -910,7 +912,7 @@ func (t *Task) ptraceExit() {
 		return
 	}
 	t.tg.signalHandlers.mu.Lock()
-	status := t.exitStatus.Status()
+	status := t.exitStatus
 	t.tg.signalHandlers.mu.Unlock()
 	t.Debugf("Entering PTRACE_EVENT_EXIT stop")
 	t.ptraceEventLocked(linux.PTRACE_EVENT_EXIT, uint64(status))
@@ -938,7 +940,7 @@ func (t *Task) ptraceKill(target *Task) error {
 	t.tg.pidns.owner.mu.Lock()
 	defer t.tg.pidns.owner.mu.Unlock()
 	if target.Tracer() != t {
-		return syserror.ESRCH
+		return linuxerr.ESRCH
 	}
 	target.tg.signalHandlers.mu.Lock()
 	defer target.tg.signalHandlers.mu.Unlock()
@@ -962,7 +964,7 @@ func (t *Task) ptraceInterrupt(target *Task) error {
 	t.tg.pidns.owner.mu.Lock()
 	defer t.tg.pidns.owner.mu.Unlock()
 	if target.Tracer() != t {
-		return syserror.ESRCH
+		return linuxerr.ESRCH
 	}
 	if !target.ptraceSeized {
 		return syserror.EIO
@@ -994,7 +996,7 @@ func (t *Task) ptraceSetOptionsLocked(opts uintptr) error {
 		linux.PTRACE_O_TRACEVFORK |
 		linux.PTRACE_O_TRACEVFORKDONE)
 	if opts&^valid != 0 {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	t.ptraceOpts = ptraceOptions{
 		ExitKill:       opts&linux.PTRACE_O_EXITKILL != 0,
@@ -1020,7 +1022,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
 	// specified by pid.
 	target := t.tg.pidns.TaskWithID(pid)
 	if target == nil {
-		return syserror.ESRCH
+		return linuxerr.ESRCH
 	}
 
 	// PTRACE_ATTACH and PTRACE_SEIZE do not require that target is not already
@@ -1045,7 +1047,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
 	t.tg.pidns.owner.mu.RLock()
 	if target.Tracer() != t {
 		t.tg.pidns.owner.mu.RUnlock()
-		return syserror.ESRCH
+		return linuxerr.ESRCH
 	}
 	if !target.ptraceFreeze() {
 		t.tg.pidns.owner.mu.RUnlock()
@@ -1053,7 +1055,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
 		// PTRACE_TRACEME, PTRACE_INTERRUPT, and PTRACE_KILL) require the
 		// tracee to be in a ptrace-stop, otherwise they fail with ESRCH." -
 		// ptrace(2)
-		return syserror.ESRCH
+		return linuxerr.ESRCH
 	}
 	t.tg.pidns.owner.mu.RUnlock()
 	// Even if the target has a ptrace-stop active, the tracee's task goroutine
@@ -1221,7 +1223,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
 		t.tg.pidns.owner.mu.RLock()
 		defer t.tg.pidns.owner.mu.RUnlock()
 		if target.ptraceSiginfo == nil {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		_, err := target.ptraceSiginfo.CopyOut(t, data)
 		return err
@@ -1234,14 +1236,14 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
 		t.tg.pidns.owner.mu.RLock()
 		defer t.tg.pidns.owner.mu.RUnlock()
 		if target.ptraceSiginfo == nil {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		target.ptraceSiginfo = &info
 		return nil
 
 	case linux.PTRACE_GETSIGMASK:
 		if addr != linux.SignalSetSize {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		mask := target.SignalMask()
 		_, err := mask.CopyOut(t, data)
@@ -1249,7 +1251,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data hostarch.Addr) error {
 
 	case linux.PTRACE_SETSIGMASK:
 		if addr != linux.SignalSetSize {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		var mask linux.SignalSet
 		if _, err := mask.CopyIn(t, data); err != nil {
diff --git a/pkg/sentry/kernel/ptrace_amd64.go b/pkg/sentry/kernel/ptrace_amd64.go
index 5ae05b5c3..63422e155 100644
--- a/pkg/sentry/kernel/ptrace_amd64.go
+++ b/pkg/sentry/kernel/ptrace_amd64.go
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+//go:build amd64
 // +build amd64
 
 package kernel
diff --git a/pkg/sentry/kernel/ptrace_arm64.go b/pkg/sentry/kernel/ptrace_arm64.go
index 46dd84cbc..27514d67b 100644
--- a/pkg/sentry/kernel/ptrace_arm64.go
+++ b/pkg/sentry/kernel/ptrace_arm64.go
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+//go:build arm64
 // +build arm64
 
 package kernel
diff --git a/pkg/sentry/kernel/rseq.go b/pkg/sentry/kernel/rseq.go
index 4bc5bca44..de352f4f2 100644
--- a/pkg/sentry/kernel/rseq.go
+++ b/pkg/sentry/kernel/rseq.go
@@ -18,9 +18,9 @@ import (
 	"fmt"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/sentry/hostcpu"
-	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
@@ -59,23 +59,23 @@ func (t *Task) RSeqAvailable() bool {
 func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error {
 	if t.rseqAddr != 0 {
 		if t.rseqAddr != addr {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		if t.rseqSignature != signature {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
-		return syserror.EBUSY
+		return linuxerr.EBUSY
 	}
 
 	// rseq must be aligned and correctly sized.
 	if addr&(linux.AlignOfRSeq-1) != 0 {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	if length != linux.SizeOfRSeq {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	if _, ok := t.MemoryManager().CheckIORange(addr, linux.SizeOfRSeq); !ok {
-		return syserror.EFAULT
+		return linuxerr.EFAULT
 	}
 
 	t.rseqAddr = addr
@@ -92,7 +92,7 @@ func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error {
 		t.Debugf("Failed to copy CPU to %#x for rseq: %v", t.rseqAddr, err)
 		t.forceSignal(linux.SIGSEGV, false /* unconditional */)
 		t.SendSignal(SignalInfoPriv(linux.SIGSEGV))
-		return syserror.EFAULT
+		return linuxerr.EFAULT
 	}
 
 	return nil
@@ -103,16 +103,16 @@ func (t *Task) SetRSeq(addr hostarch.Addr, length, signature uint32) error {
 // Preconditions: The caller must be running on the task goroutine.
 func (t *Task) ClearRSeq(addr hostarch.Addr, length, signature uint32) error {
 	if t.rseqAddr == 0 {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	if t.rseqAddr != addr {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	if length != linux.SizeOfRSeq {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	if t.rseqSignature != signature {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 
 	if err := t.rseqClearCPU(); err != nil {
@@ -152,10 +152,10 @@ func (t *Task) SetOldRSeqCriticalRegion(r OldRSeqCriticalRegion) error {
 		return nil
 	}
 	if r.CriticalSection.Start >= r.CriticalSection.End {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	if r.CriticalSection.Contains(r.Restart) {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	// TODO(jamieliu): check that r.CriticalSection and r.Restart are in
 	// the application address range, for consistency with Linux.
@@ -187,7 +187,7 @@ func (t *Task) SetOldRSeqCPUAddr(addr hostarch.Addr) error {
 	// unfortunate, but unlikely in a correct program.
 	if err := t.rseqUpdateCPU(); err != nil {
 		t.oldRSeqCPUAddr = 0
-		return syserror.EINVAL // yes, EINVAL, not err or EFAULT
+		return linuxerr.EINVAL // yes, EINVAL, not err or EFAULT
 	}
 	return nil
 }
diff --git a/pkg/sentry/kernel/semaphore/BUILD b/pkg/sentry/kernel/semaphore/BUILD
index 65e5427c1..2ae08ed12 100644
--- a/pkg/sentry/kernel/semaphore/BUILD
+++ b/pkg/sentry/kernel/semaphore/BUILD
@@ -25,9 +25,10 @@ go_library(
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
-        "//pkg/log",
+        "//pkg/errors/linuxerr",
         "//pkg/sentry/fs",
         "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/kernel/ipc",
         "//pkg/sentry/kernel/time",
         "//pkg/sync",
         "//pkg/syserror",
@@ -40,10 +41,11 @@ go_test(
     srcs = ["semaphore_test.go"],
     library = ":semaphore",
     deps = [
-        "//pkg/abi/linux",
-        "//pkg/context",
-        "//pkg/sentry/contexttest",
-        "//pkg/sentry/kernel/auth",
-        "//pkg/syserror",
+        "//pkg/abi/linux",  # keep
+        "//pkg/context",  # keep
+        "//pkg/sentry/contexttest",  # keep
+        "//pkg/sentry/kernel/auth",  # keep
+        "//pkg/sentry/kernel/ipc",  # keep
+        "//pkg/syserror",  # keep
     ],
 )
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index 47bb66b42..8610d3fc1 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -20,9 +20,10 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/syserror"
@@ -46,15 +47,15 @@ const (
 //
 // +stateify savable
 type Registry struct {
-	// userNS owning the ipc name this registry belongs to. Immutable.
-	userNS *auth.UserNamespace
 	// mu protects all fields below.
-	mu         sync.Mutex `state:"nosave"`
-	semaphores map[int32]*Set
-	lastIDUsed int32
+	mu sync.Mutex `state:"nosave"`
+
+	// reg defines basic fields and operations needed for all SysV registries.
+	reg *ipc.Registry
+
 	// indexes maintains a mapping between a set's index in virtual array and
 	// its identifier.
-	indexes map[int32]int32
+	indexes map[int32]ipc.ID
 }
 
 // Set represents a set of semaphores that can be operated atomically.
@@ -64,19 +65,11 @@ type Set struct {
 	// registry owning this sem set. Immutable.
 	registry *Registry
 
-	// Id is a handle that identifies the set.
-	ID int32
-
-	// key is an user provided key that can be shared between processes.
-	key int32
+	// mu protects all fields below.
+	mu sync.Mutex `state:"nosave"`
 
-	// creator is the user that created the set. Immutable.
-	creator fs.FileOwner
+	obj *ipc.Object
 
-	// mu protects all fields below.
-	mu         sync.Mutex `state:"nosave"`
-	owner      fs.FileOwner
-	perms      fs.FilePermissions
 	opTime     ktime.Time
 	changeTime ktime.Time
 
@@ -114,9 +107,8 @@ type waiter struct {
 // NewRegistry creates a new semaphore set registry.
 func NewRegistry(userNS *auth.UserNamespace) *Registry {
 	return &Registry{
-		userNS:     userNS,
-		semaphores: make(map[int32]*Set),
-		indexes:    make(map[int32]int32),
+		reg:     ipc.NewRegistry(userNS),
+		indexes: make(map[int32]ipc.ID),
 	}
 }
 
@@ -125,52 +117,40 @@ func NewRegistry(userNS *auth.UserNamespace) *Registry {
 // a new set is always created. If create is false, it fails if a set cannot
 // be found. If exclusive is true, it fails if a set with the same key already
 // exists.
-func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) {
+func (r *Registry) FindOrCreate(ctx context.Context, key ipc.Key, nsems int32, mode linux.FileMode, private, create, exclusive bool) (*Set, error) {
 	if nsems < 0 || nsems > semsMax {
-		return nil, syserror.EINVAL
+		return nil, linuxerr.EINVAL
 	}
 
 	r.mu.Lock()
 	defer r.mu.Unlock()
 
 	if !private {
-		// Look up an existing semaphore.
-		if set := r.findByKey(key); set != nil {
-			set.mu.Lock()
-			defer set.mu.Unlock()
-
-			// Check that caller can access semaphore set.
-			creds := auth.CredentialsFromContext(ctx)
-			if !set.checkPerms(creds, fs.PermsFromMode(mode)) {
-				return nil, syserror.EACCES
-			}
+		set, err := r.reg.Find(ctx, key, mode, create, exclusive)
+		if err != nil {
+			return nil, err
+		}
 
-			// Validate parameters.
+		// Validate semaphore-specific parameters.
+		if set != nil {
+			set := set.(*Set)
 			if nsems > int32(set.Size()) {
-				return nil, syserror.EINVAL
-			}
-			if create && exclusive {
-				return nil, syserror.EEXIST
+				return nil, linuxerr.EINVAL
 			}
 			return set, nil
 		}
-
-		if !create {
-			// Semaphore not found and should not be created.
-			return nil, syserror.ENOENT
-		}
 	}
 
 	// Zero is only valid if an existing set is found.
 	if nsems == 0 {
-		return nil, syserror.EINVAL
+		return nil, linuxerr.EINVAL
 	}
 
 	// Apply system limits.
 	//
-	// Map semaphores and map indexes in a registry are of the same size,
-	// check map semaphores only here for the system limit.
-	if len(r.semaphores) >= setsMax {
+	// Map reg.objects and map indexes in a registry are of the same size,
+	// check map reg.objects only here for the system limit.
+	if r.reg.ObjectCount() >= setsMax {
 		return nil, syserror.ENOSPC
 	}
 	if r.totalSems() > int(semsTotalMax-nsems) {
@@ -178,9 +158,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, key, nsems int32, mode linu
 	}
 
 	// Finally create a new set.
-	owner := fs.FileOwnerFromContext(ctx)
-	perms := fs.FilePermsFromMode(mode)
-	return r.newSet(ctx, key, owner, owner, perms, nsems)
+	return r.newSetLocked(ctx, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode), nsems)
 }
 
 // IPCInfo returns information about system-wide semaphore limits and parameters.
@@ -207,7 +185,7 @@ func (r *Registry) SemInfo() *linux.SemInfo {
 	defer r.mu.Unlock()
 
 	info := r.IPCInfo()
-	info.SemUsz = uint32(len(r.semaphores))
+	info.SemUsz = uint32(r.reg.ObjectCount())
 	info.SemAem = uint32(r.totalSems())
 
 	return info
@@ -230,77 +208,59 @@ func (r *Registry) HighestIndex() int32 {
 	return highestIndex
 }
 
-// RemoveID removes set with give 'id' from the registry and marks the set as
+// Remove removes set with give 'id' from the registry and marks the set as
 // dead. All waiters will be awakened and fail.
-func (r *Registry) RemoveID(id int32, creds *auth.Credentials) error {
+func (r *Registry) Remove(id ipc.ID, creds *auth.Credentials) error {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 
-	set := r.semaphores[id]
-	if set == nil {
-		return syserror.EINVAL
-	}
 	index, found := r.findIndexByID(id)
 	if !found {
-		// Inconsistent state.
-		panic(fmt.Sprintf("unable to find an index for ID: %d", id))
+		return linuxerr.EINVAL
 	}
+	delete(r.indexes, index)
 
-	set.mu.Lock()
-	defer set.mu.Unlock()
-
-	// "The effective user ID of the calling process must match the creator or
-	// owner of the semaphore set, or the caller must be privileged."
-	if !set.checkCredentials(creds) && !set.checkCapability(creds) {
-		return syserror.EACCES
-	}
+	r.reg.Remove(id, creds)
 
-	delete(r.semaphores, set.ID)
-	delete(r.indexes, index)
-	set.destroy()
 	return nil
 }
 
-func (r *Registry) newSet(ctx context.Context, key int32, owner, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) {
+// newSetLocked creates a new Set using given fields. An error is returned if there
+// are no more available identifiers.
+//
+// Precondition: r.mu must be held.
+func (r *Registry) newSetLocked(ctx context.Context, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, nsems int32) (*Set, error) {
 	set := &Set{
 		registry:   r,
-		key:        key,
-		owner:      owner,
-		creator:    owner,
-		perms:      perms,
+		obj:        ipc.NewObject(r.reg.UserNS, ipc.Key(key), creator, creator, perms),
 		changeTime: ktime.NowFromContext(ctx),
 		sems:       make([]sem, nsems),
 	}
 
-	// Find the next available ID.
-	for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
-		// Handle wrap around.
-		if id < 0 {
-			id = 0
-			continue
-		}
-		if r.semaphores[id] == nil {
-			index, found := r.findFirstAvailableIndex()
-			if !found {
-				panic("unable to find an available index")
-			}
-			r.indexes[index] = id
-			r.lastIDUsed = id
-			r.semaphores[id] = set
-			set.ID = id
-			return set, nil
-		}
+	err := r.reg.Register(set)
+	if err != nil {
+		return nil, err
+	}
+
+	index, found := r.findFirstAvailableIndex()
+	if !found {
+		// See linux, ipc/sem.c:newary().
+		return nil, linuxerr.ENOSPC
 	}
+	r.indexes[index] = set.obj.ID
 
-	log.Warningf("Semaphore map is full, they must be leaking")
-	return nil, syserror.ENOMEM
+	return set, nil
 }
 
 // FindByID looks up a set given an ID.
-func (r *Registry) FindByID(id int32) *Set {
+func (r *Registry) FindByID(id ipc.ID) *Set {
 	r.mu.Lock()
 	defer r.mu.Unlock()
-	return r.semaphores[id]
+	mech := r.reg.FindByID(id)
+	if mech == nil {
+		return nil
+	}
+	return mech.(*Set)
 }
 
 // FindByIndex looks up a set given an index.
@@ -312,19 +272,10 @@ func (r *Registry) FindByIndex(index int32) *Set {
 	if !present {
 		return nil
 	}
-	return r.semaphores[id]
+	return r.reg.FindByID(id).(*Set)
 }
 
-func (r *Registry) findByKey(key int32) *Set {
-	for _, v := range r.semaphores {
-		if v.key == key {
-			return v
-		}
-	}
-	return nil
-}
-
-func (r *Registry) findIndexByID(id int32) (int32, bool) {
+func (r *Registry) findIndexByID(id ipc.ID) (int32, bool) {
 	for k, v := range r.indexes {
 		if v == id {
 			return k, true
@@ -344,12 +295,36 @@ func (r *Registry) findFirstAvailableIndex() (int32, bool) {
 
 func (r *Registry) totalSems() int {
 	totalSems := 0
-	for _, v := range r.semaphores {
-		totalSems += v.Size()
-	}
+	r.reg.ForAllObjects(
+		func(o ipc.Mechanism) {
+			totalSems += o.(*Set).Size()
+		},
+	)
 	return totalSems
 }
 
+// ID returns semaphore's ID.
+func (s *Set) ID() ipc.ID {
+	return s.obj.ID
+}
+
+// Object implements ipc.Mechanism.Object.
+func (s *Set) Object() *ipc.Object {
+	return s.obj
+}
+
+// Lock implements ipc.Mechanism.Lock.
+func (s *Set) Lock() {
+	s.mu.Lock()
+}
+
+// Unlock implements ipc.mechanism.Unlock.
+//
+// +checklocksignore
+func (s *Set) Unlock() {
+	s.mu.Unlock()
+}
+
 func (s *Set) findSem(num int32) *sem {
 	if num < 0 || int(num) >= s.Size() {
 		return nil
@@ -369,12 +344,12 @@ func (s *Set) Change(ctx context.Context, creds *auth.Credentials, owner fs.File
 
 	// "The effective UID of the calling process must match the owner or creator
 	// of the semaphore set, or the caller must be privileged."
-	if !s.checkCredentials(creds) && !s.checkCapability(creds) {
-		return syserror.EACCES
+	if !s.obj.CheckOwnership(creds) {
+		return linuxerr.EACCES
 	}
 
-	s.owner = owner
-	s.perms = perms
+	s.obj.Owner = owner
+	s.obj.Perms = perms
 	s.changeTime = ktime.NowFromContext(ctx)
 	return nil
 }
@@ -394,18 +369,18 @@ func (s *Set) semStat(creds *auth.Credentials, permMask fs.PermMask) (*linux.Sem
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
-	if !s.checkPerms(creds, permMask) {
-		return nil, syserror.EACCES
+	if !s.obj.CheckPermissions(creds, permMask) {
+		return nil, linuxerr.EACCES
 	}
 
 	return &linux.SemidDS{
 		SemPerm: linux.IPCPerm{
-			Key:  uint32(s.key),
-			UID:  uint32(creds.UserNamespace.MapFromKUID(s.owner.UID)),
-			GID:  uint32(creds.UserNamespace.MapFromKGID(s.owner.GID)),
-			CUID: uint32(creds.UserNamespace.MapFromKUID(s.creator.UID)),
-			CGID: uint32(creds.UserNamespace.MapFromKGID(s.creator.GID)),
-			Mode: uint16(s.perms.LinuxMode()),
+			Key:  uint32(s.obj.Key),
+			UID:  uint32(creds.UserNamespace.MapFromKUID(s.obj.Owner.UID)),
+			GID:  uint32(creds.UserNamespace.MapFromKGID(s.obj.Owner.GID)),
+			CUID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Creator.UID)),
+			CGID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Creator.GID)),
+			Mode: uint16(s.obj.Perms.LinuxMode()),
 			Seq:  0, // IPC sequence not supported.
 		},
 		SemOTime: s.opTime.TimeT(),
@@ -417,20 +392,20 @@ func (s *Set) semStat(creds *auth.Credentials, permMask fs.PermMask) (*linux.Sem
 // SetVal overrides a semaphore value, waking up waiters as needed.
 func (s *Set) SetVal(ctx context.Context, num int32, val int16, creds *auth.Credentials, pid int32) error {
 	if val < 0 || val > valueMax {
-		return syserror.ERANGE
+		return linuxerr.ERANGE
 	}
 
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
 	// "The calling process must have alter permission on the semaphore set."
-	if !s.checkPerms(creds, fs.PermMask{Write: true}) {
-		return syserror.EACCES
+	if !s.obj.CheckPermissions(creds, fs.PermMask{Write: true}) {
+		return linuxerr.EACCES
 	}
 
 	sem := s.findSem(num)
 	if sem == nil {
-		return syserror.ERANGE
+		return linuxerr.ERANGE
 	}
 
 	// TODO(gvisor.dev/issue/137): Clear undo entries in all processes.
@@ -452,7 +427,7 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti
 
 	for _, val := range vals {
 		if val > valueMax {
-			return syserror.ERANGE
+			return linuxerr.ERANGE
 		}
 	}
 
@@ -460,8 +435,8 @@ func (s *Set) SetValAll(ctx context.Context, vals []uint16, creds *auth.Credenti
 	defer s.mu.Unlock()
 
 	// "The calling process must have alter permission on the semaphore set."
-	if !s.checkPerms(creds, fs.PermMask{Write: true}) {
-		return syserror.EACCES
+	if !s.obj.CheckPermissions(creds, fs.PermMask{Write: true}) {
+		return linuxerr.EACCES
 	}
 
 	for i, val := range vals {
@@ -482,13 +457,13 @@ func (s *Set) GetVal(num int32, creds *auth.Credentials) (int16, error) {
 	defer s.mu.Unlock()
 
 	// "The calling process must have read permission on the semaphore set."
-	if !s.checkPerms(creds, fs.PermMask{Read: true}) {
-		return 0, syserror.EACCES
+	if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
+		return 0, linuxerr.EACCES
 	}
 
 	sem := s.findSem(num)
 	if sem == nil {
-		return 0, syserror.ERANGE
+		return 0, linuxerr.ERANGE
 	}
 	return sem.value, nil
 }
@@ -499,8 +474,8 @@ func (s *Set) GetValAll(creds *auth.Credentials) ([]uint16, error) {
 	defer s.mu.Unlock()
 
 	// "The calling process must have read permission on the semaphore set."
-	if !s.checkPerms(creds, fs.PermMask{Read: true}) {
-		return nil, syserror.EACCES
+	if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
+		return nil, linuxerr.EACCES
 	}
 
 	vals := make([]uint16, s.Size())
@@ -516,13 +491,13 @@ func (s *Set) GetPID(num int32, creds *auth.Credentials) (int32, error) {
 	defer s.mu.Unlock()
 
 	// "The calling process must have read permission on the semaphore set."
-	if !s.checkPerms(creds, fs.PermMask{Read: true}) {
-		return 0, syserror.EACCES
+	if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
+		return 0, linuxerr.EACCES
 	}
 
 	sem := s.findSem(num)
 	if sem == nil {
-		return 0, syserror.ERANGE
+		return 0, linuxerr.ERANGE
 	}
 	return sem.pid, nil
 }
@@ -532,13 +507,13 @@ func (s *Set) countWaiters(num int32, creds *auth.Credentials, pred func(w *wait
 	defer s.mu.Unlock()
 
 	// The calling process must have read permission on the semaphore set.
-	if !s.checkPerms(creds, fs.PermMask{Read: true}) {
-		return 0, syserror.EACCES
+	if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
+		return 0, linuxerr.EACCES
 	}
 
 	sem := s.findSem(num)
 	if sem == nil {
-		return 0, syserror.ERANGE
+		return 0, linuxerr.ERANGE
 	}
 	var cnt uint16
 	for w := sem.waiters.Front(); w != nil; w = w.Next() {
@@ -581,15 +556,15 @@ func (s *Set) ExecuteOps(ctx context.Context, ops []linux.Sembuf, creds *auth.Cr
 	readOnly := true
 	for _, op := range ops {
 		if s.findSem(int32(op.SemNum)) == nil {
-			return nil, 0, syserror.EFBIG
+			return nil, 0, linuxerr.EFBIG
 		}
 		if op.SemOp != 0 {
 			readOnly = false
 		}
 	}
 
-	if !s.checkPerms(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) {
-		return nil, 0, syserror.EACCES
+	if !s.obj.CheckPermissions(creds, fs.PermMask{Read: readOnly, Write: !readOnly}) {
+		return nil, 0, linuxerr.EACCES
 	}
 
 	ch, num, err := s.executeOps(ctx, ops, pid)
@@ -624,7 +599,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch
 			if op.SemOp < 0 {
 				// Handle 'wait' operation.
 				if -op.SemOp > valueMax {
-					return nil, 0, syserror.ERANGE
+					return nil, 0, linuxerr.ERANGE
 				}
 				if -op.SemOp > tmpVals[op.SemNum] {
 					// Not enough resources, must wait.
@@ -639,7 +614,7 @@ func (s *Set) executeOps(ctx context.Context, ops []linux.Sembuf, pid int32) (ch
 			} else {
 				// op.SemOp > 0: Handle 'signal' operation.
 				if tmpVals[op.SemNum] > valueMax-op.SemOp {
-					return nil, 0, syserror.ERANGE
+					return nil, 0, linuxerr.ERANGE
 				}
 			}
 
@@ -674,38 +649,10 @@ func (s *Set) AbortWait(num int32, ch chan struct{}) {
 	// Waiter may not be found in case it raced with wakeWaiters().
 }
 
-func (s *Set) checkCredentials(creds *auth.Credentials) bool {
-	return s.owner.UID == creds.EffectiveKUID ||
-		s.owner.GID == creds.EffectiveKGID ||
-		s.creator.UID == creds.EffectiveKUID ||
-		s.creator.GID == creds.EffectiveKGID
-}
-
-func (s *Set) checkCapability(creds *auth.Credentials) bool {
-	return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, s.registry.userNS) && creds.UserNamespace.MapFromKUID(s.owner.UID).Ok()
-}
-
-func (s *Set) checkPerms(creds *auth.Credentials, reqPerms fs.PermMask) bool {
-	// Are we owner, or in group, or other?
-	p := s.perms.Other
-	if s.owner.UID == creds.EffectiveKUID {
-		p = s.perms.User
-	} else if creds.InGroup(s.owner.GID) {
-		p = s.perms.Group
-	}
-
-	// Are permissions satisfied without capability checks?
-	if p.SupersetOf(reqPerms) {
-		return true
-	}
-
-	return s.checkCapability(creds)
-}
-
-// destroy destroys the set.
+// Destroy implements ipc.Mechanism.Destroy.
 //
 // Preconditions: Caller must hold 's.mu'.
-func (s *Set) destroy() {
+func (s *Set) Destroy() {
 	// Notify all waiters. They will fail on the next attempt to execute
 	// operations and return error.
 	s.dead = true
diff --git a/pkg/sentry/kernel/semaphore/semaphore_test.go b/pkg/sentry/kernel/semaphore/semaphore_test.go
index e47acefdf..2e4ab8121 100644
--- a/pkg/sentry/kernel/semaphore/semaphore_test.go
+++ b/pkg/sentry/kernel/semaphore/semaphore_test.go
@@ -21,6 +21,7 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/sentry/contexttest"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
 	"gvisor.dev/gvisor/pkg/syserror"
 )
 
@@ -55,7 +56,7 @@ func signalled(ch chan struct{}) bool {
 
 func TestBasic(t *testing.T) {
 	ctx := contexttest.Context(t)
-	set := &Set{ID: 123, sems: make([]sem, 1)}
+	set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)}
 	ops := []linux.Sembuf{
 		{SemOp: 1},
 	}
@@ -76,7 +77,7 @@ func TestBasic(t *testing.T) {
 
 func TestWaitForZero(t *testing.T) {
 	ctx := contexttest.Context(t)
-	set := &Set{ID: 123, sems: make([]sem, 1)}
+	set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)}
 	ops := []linux.Sembuf{
 		{SemOp: 0},
 	}
@@ -115,7 +116,7 @@ func TestWaitForZero(t *testing.T) {
 
 func TestNoWait(t *testing.T) {
 	ctx := contexttest.Context(t)
-	set := &Set{ID: 123, sems: make([]sem, 1)}
+	set := &Set{obj: &ipc.Object{ID: 123}, sems: make([]sem, 1)}
 	ops := []linux.Sembuf{
 		{SemOp: 1},
 	}
@@ -138,11 +139,12 @@ func TestUnregister(t *testing.T) {
 	ctx := contexttest.Context(t)
 	r := NewRegistry(auth.NewRootUserNamespace())
 	set, err := r.FindOrCreate(ctx, 123, 2, linux.FileMode(0x600), true, true, true)
+
 	if err != nil {
 		t.Fatalf("FindOrCreate() failed, err: %v", err)
 	}
-	if got := r.FindByID(set.ID); got.ID != set.ID {
-		t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.ID, got, set)
+	if got := r.FindByID(set.obj.ID); got.obj.ID != set.obj.ID {
+		t.Fatalf("FindById(%d) failed, got: %+v, expected: %+v", set.obj.ID, got, set)
 	}
 
 	ops := []linux.Sembuf{
@@ -155,14 +157,14 @@ func TestUnregister(t *testing.T) {
 	}
 
 	creds := auth.CredentialsFromContext(ctx)
-	if err := r.RemoveID(set.ID, creds); err != nil {
-		t.Fatalf("RemoveID(%d) failed, err: %v", set.ID, err)
+	if err := r.Remove(set.obj.ID, creds); err != nil {
+		t.Fatalf("Remove(%d) failed, err: %v", set.obj.ID, err)
 	}
 	if !set.dead {
 		t.Fatalf("set is not dead: %+v", set)
 	}
-	if got := r.FindByID(set.ID); got != nil {
-		t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.ID, got)
+	if got := r.FindByID(set.obj.ID); got != nil {
+		t.Fatalf("FindById(%d) failed, got: %+v, expected: nil", set.obj.ID, got)
 	}
 	for i, ch := range chs {
 		if !signalled(ch) {
diff --git a/pkg/sentry/kernel/sessions.go b/pkg/sentry/kernel/sessions.go
index 973d708a3..f9f872522 100644
--- a/pkg/sentry/kernel/sessions.go
+++ b/pkg/sentry/kernel/sessions.go
@@ -16,7 +16,7 @@ package kernel
 
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/syserror"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 )
 
 // SessionID is the public identifier.
@@ -120,8 +120,9 @@ func (pg *ProcessGroup) Originator() *ThreadGroup {
 
 // IsOrphan returns true if this process group is an orphan.
 func (pg *ProcessGroup) IsOrphan() bool {
-	pg.originator.TaskSet().mu.RLock()
-	defer pg.originator.TaskSet().mu.RUnlock()
+	ts := pg.originator.TaskSet()
+	ts.mu.RLock()
+	defer ts.mu.RUnlock()
 	return pg.ancestors == 0
 }
 
@@ -277,14 +278,14 @@ func (tg *ThreadGroup) createSession() error {
 			continue
 		}
 		if s.leader == tg {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		if s.id == SessionID(id) {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() {
 			if pg.id == ProcessGroupID(id) {
-				return syserror.EPERM
+				return linuxerr.EPERM
 			}
 		}
 	}
@@ -371,7 +372,7 @@ func (tg *ThreadGroup) CreateProcessGroup() error {
 
 	// Check whether a process still exists or not.
 	if id == 0 {
-		return syserror.ESRCH
+		return linuxerr.ESRCH
 	}
 
 	// Per above, check for a Session leader or existing group.
@@ -380,11 +381,11 @@ func (tg *ThreadGroup) CreateProcessGroup() error {
 			continue
 		}
 		if s.leader == tg {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		for pg := s.processGroups.Front(); pg != nil; pg = pg.Next() {
 			if pg.id == ProcessGroupID(id) {
-				return syserror.EPERM
+				return linuxerr.EPERM
 			}
 		}
 	}
@@ -442,17 +443,17 @@ func (tg *ThreadGroup) JoinProcessGroup(pidns *PIDNamespace, pgid ProcessGroupID
 	// Lookup the ProcessGroup.
 	pg := pidns.processGroups[pgid]
 	if pg == nil {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 
 	// Disallow the join if an execve has performed, per POSIX.
 	if checkExec && tg.execed {
-		return syserror.EACCES
+		return linuxerr.EACCES
 	}
 
 	// See if it's in the same session as ours.
 	if pg.session != tg.processGroup.session {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 
 	// Join the group; adjust children.
diff --git a/pkg/sentry/kernel/shm/BUILD b/pkg/sentry/kernel/shm/BUILD
index 1c3c0794f..4e8deac4c 100644
--- a/pkg/sentry/kernel/shm/BUILD
+++ b/pkg/sentry/kernel/shm/BUILD
@@ -28,6 +28,7 @@ go_library(
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/errors/linuxerr",
         "//pkg/hostarch",
         "//pkg/log",
         "//pkg/refs",
@@ -35,6 +36,7 @@ go_library(
         "//pkg/sentry/device",
         "//pkg/sentry/fs",
         "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/kernel/ipc",
         "//pkg/sentry/kernel/time",
         "//pkg/sentry/memmap",
         "//pkg/sentry/pgalloc",
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index a73f1bdca..2abf467d7 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -38,10 +38,12 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
@@ -50,12 +52,6 @@ import (
 	"gvisor.dev/gvisor/pkg/syserror"
 )
 
-// Key represents a shm segment key. Analogous to a file name.
-type Key int32
-
-// ID represents the opaque handle for a shm segment. Analogous to an fd.
-type ID int32
-
 // Registry tracks all shared memory segments in an IPC namespace. The registry
 // provides the mechanisms for creating and finding segments, and reporting
 // global shm parameters.
@@ -68,50 +64,51 @@ type Registry struct {
 	// mu protects all fields below.
 	mu sync.Mutex `state:"nosave"`
 
-	// shms maps segment ids to segments.
+	// reg defines basic fields and operations needed for all SysV registries.
 	//
-	// shms holds all referenced segments, which are removed on the last
+	// Withing reg, there are two maps, Objects and KeysToIDs.
+	//
+	// reg.objects holds all referenced segments, which are removed on the last
 	// DecRef. Thus, it cannot itself hold a reference on the Shm.
 	//
 	// Since removal only occurs after the last (unlocked) DecRef, there
 	// exists a short window during which a Shm still exists in Shm, but is
 	// unreferenced. Users must use TryIncRef to determine if the Shm is
 	// still valid.
-	shms map[ID]*Shm
-
-	// keysToShms maps segment keys to segments.
 	//
-	// Shms in keysToShms are guaranteed to be referenced, as they are
+	// keysToIDs maps segment keys to IDs.
+	//
+	// Shms in keysToIDs are guaranteed to be referenced, as they are
 	// removed by disassociateKey before the last DecRef.
-	keysToShms map[Key]*Shm
+	reg *ipc.Registry
 
 	// Sum of the sizes of all existing segments rounded up to page size, in
 	// units of page size.
 	totalPages uint64
-
-	// ID assigned to the last created segment. Used to quickly find the next
-	// unused ID.
-	lastIDUsed ID
 }
 
 // NewRegistry creates a new shm registry.
 func NewRegistry(userNS *auth.UserNamespace) *Registry {
 	return &Registry{
-		userNS:     userNS,
-		shms:       make(map[ID]*Shm),
-		keysToShms: make(map[Key]*Shm),
+		userNS: userNS,
+		reg:    ipc.NewRegistry(userNS),
 	}
 }
 
 // FindByID looks up a segment given an ID.
 //
 // FindByID returns a reference on Shm.
-func (r *Registry) FindByID(id ID) *Shm {
+func (r *Registry) FindByID(id ipc.ID) *Shm {
 	r.mu.Lock()
 	defer r.mu.Unlock()
-	s := r.shms[id]
+	mech := r.reg.FindByID(id)
+	if mech == nil {
+		return nil
+	}
+	s := mech.(*Shm)
+
 	// Take a reference on s. If TryIncRef fails, s has reached the last
-	// DecRef, but hasn't quite been removed from r.shms yet.
+	// DecRef, but hasn't quite been removed from r.reg.objects yet.
 	if s != nil && s.TryIncRef() {
 		return s
 	}
@@ -128,9 +125,9 @@ func (r *Registry) dissociateKey(s *Shm) {
 	defer r.mu.Unlock()
 	s.mu.Lock()
 	defer s.mu.Unlock()
-	if s.key != linux.IPC_PRIVATE {
-		delete(r.keysToShms, s.key)
-		s.key = linux.IPC_PRIVATE
+	if s.obj.Key != linux.IPC_PRIVATE {
+		r.reg.DissociateKey(s.obj.Key)
+		s.obj.Key = linux.IPC_PRIVATE
 	}
 }
 
@@ -138,69 +135,49 @@ func (r *Registry) dissociateKey(s *Shm) {
 // analogous to open(2).
 //
 // FindOrCreate returns a reference on Shm.
-func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size uint64, mode linux.FileMode, private, create, exclusive bool) (*Shm, error) {
+func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key ipc.Key, size uint64, mode linux.FileMode, private, create, exclusive bool) (*Shm, error) {
 	if (create || private) && (size < linux.SHMMIN || size > linux.SHMMAX) {
 		// "A new segment was to be created and size is less than SHMMIN or
 		// greater than SHMMAX." - man shmget(2)
 		//
 		// Note that 'private' always implies the creation of a new segment
 		// whether IPC_CREAT is specified or not.
-		return nil, syserror.EINVAL
+		return nil, linuxerr.EINVAL
 	}
 
 	r.mu.Lock()
 	defer r.mu.Unlock()
 
-	if len(r.shms) >= linux.SHMMNI {
+	if r.reg.ObjectCount() >= linux.SHMMNI {
 		// "All possible shared memory IDs have been taken (SHMMNI) ..."
 		//   - man shmget(2)
 		return nil, syserror.ENOSPC
 	}
 
 	if !private {
-		// Look up an existing segment.
-		if shm := r.keysToShms[key]; shm != nil {
-			shm.mu.Lock()
-			defer shm.mu.Unlock()
-
-			// Check that caller can access the segment.
-			if !shm.checkPermissions(ctx, fs.PermsFromMode(mode)) {
-				// "The user does not have permission to access the shared
-				// memory segment, and does not have the CAP_IPC_OWNER
-				// capability in the user namespace that governs its IPC
-				// namespace." - man shmget(2)
-				return nil, syserror.EACCES
-			}
+		shm, err := r.reg.Find(ctx, key, mode, create, exclusive)
+		if err != nil {
+			return nil, err
+		}
 
+		// Validate shm-specific parameters.
+		if shm != nil {
+			shm := shm.(*Shm)
 			if size > shm.size {
 				// "A segment for the given key exists, but size is greater than
 				// the size of that segment." - man shmget(2)
-				return nil, syserror.EINVAL
-			}
-
-			if create && exclusive {
-				// "IPC_CREAT and IPC_EXCL were specified in shmflg, but a
-				// shared memory segment already exists for key."
-				//  - man shmget(2)
-				return nil, syserror.EEXIST
+				return nil, linuxerr.EINVAL
 			}
-
 			shm.IncRef()
 			return shm, nil
 		}
-
-		if !create {
-			// "No segment exists for the given key, and IPC_CREAT was not
-			// specified." - man shmget(2)
-			return nil, syserror.ENOENT
-		}
 	}
 
 	var sizeAligned uint64
 	if val, ok := hostarch.Addr(size).RoundUp(); ok {
 		sizeAligned = uint64(val)
 	} else {
-		return nil, syserror.EINVAL
+		return nil, linuxerr.EINVAL
 	}
 
 	if numPages := sizeAligned / hostarch.PageSize; r.totalPages+numPages > linux.SHMALL {
@@ -211,9 +188,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui
 	}
 
 	// Need to create a new segment.
-	creator := fs.FileOwnerFromContext(ctx)
-	perms := fs.FilePermsFromMode(mode)
-	s, err := r.newShm(ctx, pid, key, creator, perms, size)
+	s, err := r.newShmLocked(ctx, pid, key, fs.FileOwnerFromContext(ctx), fs.FilePermsFromMode(mode), size)
 	if err != nil {
 		return nil, err
 	}
@@ -223,10 +198,10 @@ func (r *Registry) FindOrCreate(ctx context.Context, pid int32, key Key, size ui
 	return s, nil
 }
 
-// newShm creates a new segment in the registry.
+// newShmLocked creates a new segment in the registry.
 //
 // Precondition: Caller must hold r.mu.
-func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) {
+func (r *Registry) newShmLocked(ctx context.Context, pid int32, key ipc.Key, creator fs.FileOwner, perms fs.FilePermissions, size uint64) (*Shm, error) {
 	mfp := pgalloc.MemoryFileProviderFromContext(ctx)
 	if mfp == nil {
 		panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider))
@@ -241,40 +216,21 @@ func (r *Registry) newShm(ctx context.Context, pid int32, key Key, creator fs.Fi
 	shm := &Shm{
 		mfp:           mfp,
 		registry:      r,
-		creator:       creator,
 		size:          size,
 		effectiveSize: effectiveSize,
+		obj:           ipc.NewObject(r.reg.UserNS, ipc.Key(key), creator, creator, perms),
 		fr:            fr,
-		key:           key,
-		perms:         perms,
-		owner:         creator,
 		creatorPID:    pid,
 		changeTime:    ktime.NowFromContext(ctx),
 	}
 	shm.InitRefs()
 
-	// Find the next available ID.
-	for id := r.lastIDUsed + 1; id != r.lastIDUsed; id++ {
-		// Handle wrap around.
-		if id < 0 {
-			id = 0
-			continue
-		}
-		if r.shms[id] == nil {
-			r.lastIDUsed = id
-
-			shm.ID = id
-			r.shms[id] = shm
-			r.keysToShms[key] = shm
-
-			r.totalPages += effectiveSize / hostarch.PageSize
-
-			return shm, nil
-		}
+	if err := r.reg.Register(shm); err != nil {
+		return nil, err
 	}
+	r.totalPages += effectiveSize / hostarch.PageSize
 
-	log.Warningf("Shm ids exhuasted, they may be leaking")
-	return nil, syserror.ENOSPC
+	return shm, nil
 }
 
 // IPCInfo reports global parameters for sysv shared memory segments on this
@@ -296,7 +252,7 @@ func (r *Registry) ShmInfo() *linux.ShmInfo {
 	defer r.mu.Unlock()
 
 	return &linux.ShmInfo{
-		UsedIDs: int32(r.lastIDUsed),
+		UsedIDs: int32(r.reg.LastIDUsed()),
 		ShmTot:  r.totalPages,
 		ShmRss:  r.totalPages, // We could probably get a better estimate from memory accounting.
 		ShmSwp:  0,            // No reclaim at the moment.
@@ -313,11 +269,11 @@ func (r *Registry) remove(s *Shm) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
-	if s.key != linux.IPC_PRIVATE {
+	if s.obj.Key != linux.IPC_PRIVATE {
 		panic(fmt.Sprintf("Attempted to remove %s from the registry whose key is still associated", s.debugLocked()))
 	}
 
-	delete(r.shms, s.ID)
+	r.reg.DissociateID(s.obj.ID)
 	r.totalPages -= s.effectiveSize / hostarch.PageSize
 }
 
@@ -329,13 +285,16 @@ func (r *Registry) Release(ctx context.Context) {
 	// the IPC namespace containing it has no more references.
 	toRelease := make([]*Shm, 0)
 	r.mu.Lock()
-	for _, s := range r.keysToShms {
-		s.mu.Lock()
-		if !s.pendingDestruction {
-			toRelease = append(toRelease, s)
-		}
-		s.mu.Unlock()
-	}
+	r.reg.ForAllObjects(
+		func(o ipc.Mechanism) {
+			s := o.(*Shm)
+			s.mu.Lock()
+			if !s.pendingDestruction {
+				toRelease = append(toRelease, s)
+			}
+			s.mu.Unlock()
+		},
+	)
 	r.mu.Unlock()
 
 	for _, s := range toRelease {
@@ -373,12 +332,6 @@ type Shm struct {
 	// registry points to the shm registry containing this segment. Immutable.
 	registry *Registry
 
-	// ID is the kernel identifier for this segment. Immutable.
-	ID ID
-
-	// creator is the user that created the segment. Immutable.
-	creator fs.FileOwner
-
 	// size is the requested size of the segment at creation, in
 	// bytes. Immutable.
 	size uint64
@@ -396,14 +349,8 @@ type Shm struct {
 	// mu protects all fields below.
 	mu sync.Mutex `state:"nosave"`
 
-	// key is the public identifier for this segment.
-	key Key
-
-	// perms is the access permissions for the segment.
-	perms fs.FilePermissions
+	obj *ipc.Object
 
-	// owner of this segment.
-	owner fs.FileOwner
 	// attachTime is updated on every successful shmat.
 	attachTime ktime.Time
 	// detachTime is updated on every successful shmdt.
@@ -425,17 +372,44 @@ type Shm struct {
 	pendingDestruction bool
 }
 
+// ID returns object's ID.
+func (s *Shm) ID() ipc.ID {
+	return s.obj.ID
+}
+
+// Object implements ipc.Mechanism.Object.
+func (s *Shm) Object() *ipc.Object {
+	return s.obj
+}
+
+// Destroy implements ipc.Mechanism.Destroy. No work is performed on shm.Destroy
+// because a different removal mechanism is used in shm. See Shm.MarkDestroyed.
+func (s *Shm) Destroy() {
+}
+
+// Lock implements ipc.Mechanism.Lock.
+func (s *Shm) Lock() {
+	s.mu.Lock()
+}
+
+// Unlock implements ipc.mechanism.Unlock.
+//
+// +checklocksignore
+func (s *Shm) Unlock() {
+	s.mu.Unlock()
+}
+
 // Precondition: Caller must hold s.mu.
 func (s *Shm) debugLocked() string {
 	return fmt.Sprintf("Shm{id: %d, key: %d, size: %d bytes, refs: %d, destroyed: %v}",
-		s.ID, s.key, s.size, s.ReadRefs(), s.pendingDestruction)
+		s.obj.ID, s.obj.Key, s.size, s.ReadRefs(), s.pendingDestruction)
 }
 
 // MappedName implements memmap.MappingIdentity.MappedName.
 func (s *Shm) MappedName(ctx context.Context) string {
 	s.mu.Lock()
 	defer s.mu.Unlock()
-	return fmt.Sprintf("SYSV%08d", s.key)
+	return fmt.Sprintf("SYSV%08d", s.obj.Key)
 }
 
 // DeviceID implements memmap.MappingIdentity.DeviceID.
@@ -447,7 +421,7 @@ func (s *Shm) DeviceID() uint64 {
 func (s *Shm) InodeID() uint64 {
 	// "shmid gets reported as "inode#" in /proc/pid/maps. proc-ps tools use
 	// this. Changing this will break them." -- Linux, ipc/shm.c:newseg()
-	return uint64(s.ID)
+	return uint64(s.obj.ID)
 }
 
 // DecRef drops a reference on s.
@@ -511,7 +485,7 @@ func (*Shm) CopyMapping(context.Context, memmap.MappingSpace, hostarch.AddrRange
 func (s *Shm) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
 	var err error
 	if required.End > s.fr.Length() {
-		err = &memmap.BusError{syserror.EFAULT}
+		err = &memmap.BusError{linuxerr.EFAULT}
 	}
 	if source := optional.Intersect(memmap.MappableRange{0, s.fr.Length()}); source.Length() != 0 {
 		return []memmap.Translation{
@@ -550,7 +524,8 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts Atta
 		return memmap.MMapOpts{}, syserror.EIDRM
 	}
 
-	if !s.checkPermissions(ctx, fs.PermMask{
+	creds := auth.CredentialsFromContext(ctx)
+	if !s.obj.CheckPermissions(creds, fs.PermMask{
 		Read:    true,
 		Write:   !opts.Readonly,
 		Execute: opts.Execute,
@@ -558,7 +533,7 @@ func (s *Shm) ConfigureAttach(ctx context.Context, addr hostarch.Addr, opts Atta
 		// "The calling process does not have the required permissions for the
 		// requested attach type, and does not have the CAP_IPC_OWNER capability
 		// in the user namespace that governs its IPC namespace." - man shmat(2)
-		return memmap.MMapOpts{}, syserror.EACCES
+		return memmap.MMapOpts{}, linuxerr.EACCES
 	}
 	return memmap.MMapOpts{
 		Length: s.size,
@@ -590,19 +565,19 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) {
 
 	// "The caller must have read permission on the shared memory segment."
 	//   - man shmctl(2)
-	if !s.checkPermissions(ctx, fs.PermMask{Read: true}) {
+	creds := auth.CredentialsFromContext(ctx)
+	if !s.obj.CheckPermissions(creds, fs.PermMask{Read: true}) {
 		// "IPC_STAT or SHM_STAT is requested and shm_perm.mode does not allow
 		// read access for shmid, and the calling process does not have the
 		// CAP_IPC_OWNER capability in the user namespace that governs its IPC
 		// namespace." - man shmctl(2)
-		return nil, syserror.EACCES
+		return nil, linuxerr.EACCES
 	}
 
 	var mode uint16
 	if s.pendingDestruction {
 		mode |= linux.SHM_DEST
 	}
-	creds := auth.CredentialsFromContext(ctx)
 
 	// Use the reference count as a rudimentary count of the number of
 	// attaches. We exclude:
@@ -619,12 +594,12 @@ func (s *Shm) IPCStat(ctx context.Context) (*linux.ShmidDS, error) {
 
 	ds := &linux.ShmidDS{
 		ShmPerm: linux.IPCPerm{
-			Key:  uint32(s.key),
-			UID:  uint32(creds.UserNamespace.MapFromKUID(s.owner.UID)),
-			GID:  uint32(creds.UserNamespace.MapFromKGID(s.owner.GID)),
-			CUID: uint32(creds.UserNamespace.MapFromKUID(s.creator.UID)),
-			CGID: uint32(creds.UserNamespace.MapFromKGID(s.creator.GID)),
-			Mode: mode | uint16(s.perms.LinuxMode()),
+			Key:  uint32(s.obj.Key),
+			UID:  uint32(creds.UserNamespace.MapFromKUID(s.obj.Owner.UID)),
+			GID:  uint32(creds.UserNamespace.MapFromKGID(s.obj.Owner.GID)),
+			CUID: uint32(creds.UserNamespace.MapFromKUID(s.obj.Creator.UID)),
+			CGID: uint32(creds.UserNamespace.MapFromKGID(s.obj.Creator.GID)),
+			Mode: mode | uint16(s.obj.Perms.LinuxMode()),
 			Seq:  0, // IPC sequences not supported.
 		},
 		ShmSegsz:   s.size,
@@ -644,24 +619,24 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
-	if !s.checkOwnership(ctx) {
-		return syserror.EPERM
+	creds := auth.CredentialsFromContext(ctx)
+	if !s.obj.CheckOwnership(creds) {
+		return linuxerr.EPERM
 	}
 
-	creds := auth.CredentialsFromContext(ctx)
 	uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID))
 	gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID))
 	if !uid.Ok() || !gid.Ok() {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	// User may only modify the lower 9 bits of the mode. All the other bits are
 	// always 0 for the underlying inode.
 	mode := linux.FileMode(ds.ShmPerm.Mode & 0x1ff)
-	s.perms = fs.FilePermsFromMode(mode)
+	s.obj.Perms = fs.FilePermsFromMode(mode)
 
-	s.owner.UID = uid
-	s.owner.GID = gid
+	s.obj.Owner.UID = uid
+	s.obj.Owner.GID = gid
 
 	s.changeTime = ktime.NowFromContext(ctx)
 	return nil
@@ -690,40 +665,3 @@ func (s *Shm) MarkDestroyed(ctx context.Context) {
 	s.DecRef(ctx)
 	return
 }
-
-// checkOwnership verifies whether a segment may be accessed by ctx as an
-// owner. See ipc/util.c:ipcctl_pre_down_nolock() in Linux.
-//
-// Precondition: Caller must hold s.mu.
-func (s *Shm) checkOwnership(ctx context.Context) bool {
-	creds := auth.CredentialsFromContext(ctx)
-	if s.owner.UID == creds.EffectiveKUID || s.creator.UID == creds.EffectiveKUID {
-		return true
-	}
-
-	// Tasks with CAP_SYS_ADMIN may bypass ownership checks. Strangely, Linux
-	// doesn't use CAP_IPC_OWNER for this despite CAP_IPC_OWNER being documented
-	// for use to "override IPC ownership checks".
-	return creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, s.registry.userNS)
-}
-
-// checkPermissions verifies whether a segment is accessible by ctx for access
-// described by req. See ipc/util.c:ipcperms() in Linux.
-//
-// Precondition: Caller must hold s.mu.
-func (s *Shm) checkPermissions(ctx context.Context, req fs.PermMask) bool {
-	creds := auth.CredentialsFromContext(ctx)
-
-	p := s.perms.Other
-	if s.owner.UID == creds.EffectiveKUID {
-		p = s.perms.User
-	} else if creds.InGroup(s.owner.GID) {
-		p = s.perms.Group
-	}
-	if p.SupersetOf(req) {
-		return true
-	}
-
-	// Tasks with CAP_IPC_OWNER may bypass permission checks.
-	return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, s.registry.userNS)
-}
diff --git a/pkg/sentry/kernel/signalfd/BUILD b/pkg/sentry/kernel/signalfd/BUILD
index 76d472292..1110ecca5 100644
--- a/pkg/sentry/kernel/signalfd/BUILD
+++ b/pkg/sentry/kernel/signalfd/BUILD
@@ -9,6 +9,7 @@ go_library(
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/errors/linuxerr",
         "//pkg/sentry/fs",
         "//pkg/sentry/fs/anon",
         "//pkg/sentry/fs/fsutil",
diff --git a/pkg/sentry/kernel/signalfd/signalfd.go b/pkg/sentry/kernel/signalfd/signalfd.go
index f58ec4194..47958e2d4 100644
--- a/pkg/sentry/kernel/signalfd/signalfd.go
+++ b/pkg/sentry/kernel/signalfd/signalfd.go
@@ -18,6 +18,7 @@ package signalfd
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/fs/anon"
 	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
@@ -64,7 +65,7 @@ func New(ctx context.Context, mask linux.SignalSet) (*fs.File, error) {
 	t := kernel.TaskFromContext(ctx)
 	if t == nil {
 		// No task context? Not valid.
-		return nil, syserror.EINVAL
+		return nil, linuxerr.EINVAL
 	}
 	// name matches fs/signalfd.c:signalfd4.
 	dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[signalfd]")
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index 2e3b4488a..59eeb253d 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -21,6 +21,7 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/bpf"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/inet"
@@ -32,7 +33,6 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
-	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
@@ -232,7 +232,7 @@ type Task struct {
 	// exitStatus is the task's exit status.
 	//
 	// exitStatus is protected by the signal mutex.
-	exitStatus ExitStatus
+	exitStatus linux.WaitStatus
 
 	// syscallRestartBlock represents a custom restart function to run in
 	// restart_syscall(2) to resume an interrupted syscall.
@@ -846,7 +846,7 @@ func (t *Task) OOMScoreAdj() int32 {
 // value should be between -1000 and 1000 inclusive.
 func (t *Task) SetOOMScoreAdj(adj int32) error {
 	if adj > 1000 || adj < -1000 {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	atomic.StoreInt32(&t.tg.oomScoreAdj, adj)
 	return nil
diff --git a/pkg/sentry/kernel/task_acct.go b/pkg/sentry/kernel/task_acct.go
index e574997f7..dd364ae50 100644
--- a/pkg/sentry/kernel/task_acct.go
+++ b/pkg/sentry/kernel/task_acct.go
@@ -18,10 +18,10 @@ package kernel
 
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
 	"gvisor.dev/gvisor/pkg/sentry/limits"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
-	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // Getitimer implements getitimer(2).
@@ -44,7 +44,7 @@ func (t *Task) Getitimer(id int32) (linux.ItimerVal, error) {
 		s, _ = t.tg.itimerProfSetting.At(tm)
 		t.tg.signalHandlers.mu.Unlock()
 	default:
-		return linux.ItimerVal{}, syserror.EINVAL
+		return linux.ItimerVal{}, linuxerr.EINVAL
 	}
 	val, iv := ktime.SpecFromSetting(tm, s)
 	return linux.ItimerVal{
@@ -105,7 +105,7 @@ func (t *Task) Setitimer(id int32, newitv linux.ItimerVal) (linux.ItimerVal, err
 			return linux.ItimerVal{}, err
 		}
 	default:
-		return linux.ItimerVal{}, syserror.EINVAL
+		return linux.ItimerVal{}, linuxerr.EINVAL
 	}
 	oldval, oldiv := ktime.SpecFromSetting(tm, olds)
 	return linux.ItimerVal{
diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go
index 07533d982..b2520eecf 100644
--- a/pkg/sentry/kernel/task_block.go
+++ b/pkg/sentry/kernel/task_block.go
@@ -163,7 +163,7 @@ func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error {
 		region.End()
 		t.SleepFinish(true)
 		// We've timed out.
-		return syserror.ETIMEDOUT
+		return linuxerr.ETIMEDOUT
 	}
 }
 
diff --git a/pkg/sentry/kernel/task_cgroup.go b/pkg/sentry/kernel/task_cgroup.go
index 7c138e80f..828b90014 100644
--- a/pkg/sentry/kernel/task_cgroup.go
+++ b/pkg/sentry/kernel/task_cgroup.go
@@ -20,15 +20,13 @@ import (
 	"sort"
 	"strings"
 
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/log"
-	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // EnterInitialCgroups moves t into an initial set of cgroups.
 //
 // Precondition: t isn't in any cgroups yet, t.cgs is empty.
-//
-// +checklocksignore parent.mu is conditionally acquired.
 func (t *Task) EnterInitialCgroups(parent *Task) {
 	var inherit map[Cgroup]struct{}
 	if parent != nil {
@@ -67,7 +65,7 @@ func (t *Task) EnterCgroup(c Cgroup) error {
 				//
 				// TODO(b/183137098): Implement cgroup migration.
 				log.Warningf("Cgroup migration is not implemented")
-				return syserror.EBUSY
+				return linuxerr.EBUSY
 			}
 		}
 	}
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index 405771f3f..da4b77ca2 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -20,147 +20,46 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/bpf"
 	"gvisor.dev/gvisor/pkg/cleanup"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/sentry/inet"
-	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
 )
 
-// SharingOptions controls what resources are shared by a new task created by
-// Task.Clone, or an existing task affected by Task.Unshare.
-type SharingOptions struct {
-	// If NewAddressSpace is true, the task should have an independent virtual
-	// address space.
-	NewAddressSpace bool
-
-	// If NewSignalHandlers is true, the task should use an independent set of
-	// signal handlers.
-	NewSignalHandlers bool
-
-	// If NewThreadGroup is true, the task should be the leader of its own
-	// thread group. TerminationSignal is the signal that the thread group
-	// will send to its parent when it exits. If NewThreadGroup is false,
-	// TerminationSignal is ignored.
-	NewThreadGroup    bool
-	TerminationSignal linux.Signal
-
-	// If NewPIDNamespace is true:
-	//
-	// - In the context of Task.Clone, the new task should be the init task
-	// (TID 1) in a new PID namespace.
-	//
-	// - In the context of Task.Unshare, the task should create a new PID
-	// namespace, and all subsequent clones of the task should be members of
-	// the new PID namespace.
-	NewPIDNamespace bool
-
-	// If NewUserNamespace is true, the task should have an independent user
-	// namespace.
-	NewUserNamespace bool
-
-	// If NewNetworkNamespace is true, the task should have an independent
-	// network namespace.
-	NewNetworkNamespace bool
-
-	// If NewFiles is true, the task should use an independent file descriptor
-	// table.
-	NewFiles bool
-
-	// If NewFSContext is true, the task should have an independent FSContext.
-	NewFSContext bool
-
-	// If NewUTSNamespace is true, the task should have an independent UTS
-	// namespace.
-	NewUTSNamespace bool
-
-	// If NewIPCNamespace is true, the task should have an independent IPC
-	// namespace.
-	NewIPCNamespace bool
-}
-
-// CloneOptions controls the behavior of Task.Clone.
-type CloneOptions struct {
-	// SharingOptions defines the set of resources that the new task will share
-	// with its parent.
-	SharingOptions
-
-	// Stack is the initial stack pointer of the new task. If Stack is 0, the
-	// new task will start with the same stack pointer as its parent.
-	Stack hostarch.Addr
-
-	// If SetTLS is true, set the new task's TLS (thread-local storage)
-	// descriptor to TLS. If SetTLS is false, TLS is ignored.
-	SetTLS bool
-	TLS    hostarch.Addr
-
-	// If ChildClearTID is true, when the child exits, 0 is written to the
-	// address ChildTID in the child's memory, and if the write is successful a
-	// futex wake on the same address is performed.
-	//
-	// If ChildSetTID is true, the child's thread ID (in the child's PID
-	// namespace) is written to address ChildTID in the child's memory. (As in
-	// Linux, failed writes are silently ignored.)
-	ChildClearTID bool
-	ChildSetTID   bool
-	ChildTID      hostarch.Addr
-
-	// If ParentSetTID is true, the child's thread ID (in the parent's PID
-	// namespace) is written to address ParentTID in the parent's memory. (As
-	// in Linux, failed writes are silently ignored.)
-	//
-	// Older versions of the clone(2) man page state that CLONE_PARENT_SETTID
-	// causes the child's thread ID to be written to ptid in both the parent
-	// and child's memory, but this is a documentation error fixed by
-	// 87ab04792ced ("clone.2: Fix description of CLONE_PARENT_SETTID").
-	ParentSetTID bool
-	ParentTID    hostarch.Addr
-
-	// If Vfork is true, place the parent in vforkStop until the cloned task
-	// releases its TaskImage.
-	Vfork bool
-
-	// If Untraced is true, do not report PTRACE_EVENT_CLONE/FORK/VFORK for
-	// this clone(), and do not ptrace-attach the caller's tracer to the new
-	// task. (PTRACE_EVENT_VFORK_DONE will still be reported if appropriate).
-	Untraced bool
-
-	// If InheritTracer is true, ptrace-attach the caller's tracer to the new
-	// task, even if no PTRACE_EVENT_CLONE/FORK/VFORK event would be reported
-	// for it. If both Untraced and InheritTracer are true, no event will be
-	// reported, but tracer inheritance will still occur.
-	InheritTracer bool
-}
-
 // Clone implements the clone(2) syscall and returns the thread ID of the new
 // task in t's PID namespace. Clone may return both a non-zero thread ID and a
 // non-nil error.
 //
 // Preconditions: The caller must be running Task.doSyscallInvoke on the task
 // goroutine.
-func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
+func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) {
 	// Since signal actions may refer to application signal handlers by virtual
 	// address, any set of signal handlers must refer to the same address
 	// space.
-	if !opts.NewSignalHandlers && opts.NewAddressSpace {
-		return 0, nil, syserror.EINVAL
+	if args.Flags&(linux.CLONE_SIGHAND|linux.CLONE_VM) == linux.CLONE_SIGHAND {
+		return 0, nil, linuxerr.EINVAL
 	}
 	// In order for the behavior of thread-group-directed signals to be sane,
 	// all tasks in a thread group must share signal handlers.
-	if !opts.NewThreadGroup && opts.NewSignalHandlers {
-		return 0, nil, syserror.EINVAL
+	if args.Flags&(linux.CLONE_THREAD|linux.CLONE_SIGHAND) == linux.CLONE_THREAD {
+		return 0, nil, linuxerr.EINVAL
 	}
 	// All tasks in a thread group must be in the same PID namespace.
-	if !opts.NewThreadGroup && (opts.NewPIDNamespace || t.childPIDNamespace != nil) {
-		return 0, nil, syserror.EINVAL
+	if (args.Flags&linux.CLONE_THREAD != 0) && (args.Flags&linux.CLONE_NEWPID != 0 || t.childPIDNamespace != nil) {
+		return 0, nil, linuxerr.EINVAL
 	}
 	// The two different ways of specifying a new PID namespace are
 	// incompatible.
-	if opts.NewPIDNamespace && t.childPIDNamespace != nil {
-		return 0, nil, syserror.EINVAL
+	if args.Flags&linux.CLONE_NEWPID != 0 && t.childPIDNamespace != nil {
+		return 0, nil, linuxerr.EINVAL
 	}
 	// Thread groups and FS contexts cannot span user namespaces.
-	if opts.NewUserNamespace && (!opts.NewThreadGroup || !opts.NewFSContext) {
-		return 0, nil, syserror.EINVAL
+	if args.Flags&linux.CLONE_NEWUSER != 0 && args.Flags&(linux.CLONE_THREAD|linux.CLONE_FS) != 0 {
+		return 0, nil, linuxerr.EINVAL
+	}
+	// args.ExitSignal must be a valid signal.
+	if args.ExitSignal != 0 && !linux.Signal(args.ExitSignal).IsValid() {
+		return 0, nil, linuxerr.EINVAL
 	}
 
 	// Pull task registers and FPU state, a cloned task will inherit the
@@ -174,7 +73,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 	// user_namespaces(7)
 	creds := t.Credentials()
 	userns := creds.UserNamespace
-	if opts.NewUserNamespace {
+	if args.Flags&linux.CLONE_NEWUSER != 0 {
 		var err error
 		// "EPERM (since Linux 3.9): CLONE_NEWUSER was specified in flags and
 		// the caller is in a chroot environment (i.e., the caller's root
@@ -182,28 +81,26 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 		// in which it resides)." - clone(2). Neither chroot(2) nor
 		// user_namespaces(7) document this.
 		if t.IsChrooted() {
-			return 0, nil, syserror.EPERM
+			return 0, nil, linuxerr.EPERM
 		}
 		userns, err = creds.NewChildUserNamespace()
 		if err != nil {
 			return 0, nil, err
 		}
 	}
-	if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) {
-		return 0, nil, syserror.EPERM
+	if args.Flags&(linux.CLONE_NEWPID|linux.CLONE_NEWNET|linux.CLONE_NEWUTS|linux.CLONE_NEWIPC) != 0 && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) {
+		return 0, nil, linuxerr.EPERM
 	}
 
 	utsns := t.UTSNamespace()
-	if opts.NewUTSNamespace {
+	if args.Flags&linux.CLONE_NEWUTS != 0 {
 		// Note that this must happen after NewUserNamespace so we get
 		// the new userns if there is one.
 		utsns = t.UTSNamespace().Clone(userns)
 	}
 
 	ipcns := t.IPCNamespace()
-	if opts.NewIPCNamespace {
-		// Note that "If CLONE_NEWIPC is set, then create the process in a new IPC
-		// namespace"
+	if args.Flags&linux.CLONE_NEWIPC != 0 {
 		ipcns = NewIPCNamespace(userns)
 	} else {
 		ipcns.IncRef()
@@ -214,7 +111,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 	defer cu.Clean()
 
 	netns := t.NetworkNamespace()
-	if opts.NewNetworkNamespace {
+	if args.Flags&linux.CLONE_NEWNET != 0 {
 		netns = inet.NewNamespace(netns)
 	}
 
@@ -227,7 +124,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 		})
 	}
 
-	image, err := t.image.Fork(t, t.k, !opts.NewAddressSpace)
+	image, err := t.image.Fork(t, t.k, args.Flags&linux.CLONE_VM != 0)
 	if err != nil {
 		return 0, nil, err
 	}
@@ -236,17 +133,17 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 	})
 	// clone() returns 0 in the child.
 	image.Arch.SetReturn(0)
-	if opts.Stack != 0 {
-		image.Arch.SetStack(uintptr(opts.Stack))
+	if args.Stack != 0 {
+		image.Arch.SetStack(uintptr(args.Stack))
 	}
-	if opts.SetTLS {
-		if !image.Arch.SetTLS(uintptr(opts.TLS)) {
-			return 0, nil, syserror.EPERM
+	if args.Flags&linux.CLONE_SETTLS != 0 {
+		if !image.Arch.SetTLS(uintptr(args.TLS)) {
+			return 0, nil, linuxerr.EPERM
 		}
 	}
 
 	var fsContext *FSContext
-	if opts.NewFSContext {
+	if args.Flags&linux.CLONE_FS == 0 {
 		fsContext = t.fsContext.Fork()
 	} else {
 		fsContext = t.fsContext
@@ -254,7 +151,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 	}
 
 	var fdTable *FDTable
-	if opts.NewFiles {
+	if args.Flags&linux.CLONE_FILES == 0 {
 		fdTable = t.fdTable.Fork(t)
 	} else {
 		fdTable = t.fdTable
@@ -264,22 +161,22 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 	pidns := t.tg.pidns
 	if t.childPIDNamespace != nil {
 		pidns = t.childPIDNamespace
-	} else if opts.NewPIDNamespace {
+	} else if args.Flags&linux.CLONE_NEWPID != 0 {
 		pidns = pidns.NewChild(userns)
 	}
 
 	tg := t.tg
 	rseqAddr := hostarch.Addr(0)
 	rseqSignature := uint32(0)
-	if opts.NewThreadGroup {
+	if args.Flags&linux.CLONE_THREAD == 0 {
 		if tg.mounts != nil {
 			tg.mounts.IncRef()
 		}
 		sh := t.tg.signalHandlers
-		if opts.NewSignalHandlers {
+		if args.Flags&linux.CLONE_SIGHAND == 0 {
 			sh = sh.Fork()
 		}
-		tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, opts.TerminationSignal, tg.limits.GetCopy())
+		tg = t.k.NewThreadGroup(tg.mounts, pidns, sh, linux.Signal(args.ExitSignal), tg.limits.GetCopy())
 		tg.oomScoreAdj = atomic.LoadInt32(&t.tg.oomScoreAdj)
 		rseqAddr = t.rseqAddr
 		rseqSignature = t.rseqSignature
@@ -304,7 +201,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 		RSeqSignature:           rseqSignature,
 		ContainerID:             t.ContainerID(),
 	}
-	if opts.NewThreadGroup {
+	if args.Flags&linux.CLONE_THREAD == 0 {
 		cfg.Parent = t
 	} else {
 		cfg.InheritParent = t
@@ -322,7 +219,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 	//
 	// However kernel/fork.c:copy_process() adds a limitation to this:
 	// "sigaltstack should be cleared when sharing the same VM".
-	if opts.NewAddressSpace || opts.Vfork {
+	if args.Flags&linux.CLONE_VM == 0 || args.Flags&linux.CLONE_VFORK != 0 {
 		nt.SetSignalStack(t.SignalStack())
 	}
 
@@ -347,35 +244,35 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
 		copiedFilters := append([]bpf.Program(nil), f.([]bpf.Program)...)
 		nt.syscallFilters.Store(copiedFilters)
 	}
-	if opts.Vfork {
+	if args.Flags&linux.CLONE_VFORK != 0 {
 		nt.vforkParent = t
 	}
 
-	if opts.ChildClearTID {
-		nt.SetClearTID(opts.ChildTID)
+	if args.Flags&linux.CLONE_CHILD_CLEARTID != 0 {
+		nt.SetClearTID(hostarch.Addr(args.ChildTID))
 	}
-	if opts.ChildSetTID {
+	if args.Flags&linux.CLONE_CHILD_SETTID != 0 {
 		ctid := nt.ThreadID()
-		ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{AddressSpaceActive: false}), opts.ChildTID)
+		ctid.CopyOut(nt.CopyContext(t, usermem.IOOpts{AddressSpaceActive: false}), hostarch.Addr(args.ChildTID))
 	}
 	ntid := t.tg.pidns.IDOfTask(nt)
-	if opts.ParentSetTID {
-		ntid.CopyOut(t, opts.ParentTID)
+	if args.Flags&linux.CLONE_PARENT_SETTID != 0 {
+		ntid.CopyOut(t, hostarch.Addr(args.ParentTID))
 	}
 
 	kind := ptraceCloneKindClone
-	if opts.Vfork {
+	if args.Flags&linux.CLONE_VFORK != 0 {
 		kind = ptraceCloneKindVfork
-	} else if opts.TerminationSignal == linux.SIGCHLD {
+	} else if linux.Signal(args.ExitSignal) == linux.SIGCHLD {
 		kind = ptraceCloneKindFork
 	}
-	if t.ptraceClone(kind, nt, opts) {
-		if opts.Vfork {
+	if t.ptraceClone(kind, nt, args) {
+		if args.Flags&linux.CLONE_VFORK != 0 {
 			return ntid, &SyscallControl{next: &runSyscallAfterPtraceEventClone{vforkChild: nt, vforkChildTID: ntid}}, nil
 		}
 		return ntid, &SyscallControl{next: &runSyscallAfterPtraceEventClone{}}, nil
 	}
-	if opts.Vfork {
+	if args.Flags&linux.CLONE_VFORK != 0 {
 		t.maybeBeginVforkStop(nt)
 		return ntid, &SyscallControl{next: &runSyscallAfterVforkStop{childTID: ntid}}, nil
 	}
@@ -446,39 +343,47 @@ func (r *runSyscallAfterVforkStop) execute(t *Task) taskRunState {
 }
 
 // Unshare changes the set of resources t shares with other tasks, as specified
-// by opts.
+// by flags.
 //
 // Preconditions: The caller must be running on the task goroutine.
-func (t *Task) Unshare(opts *SharingOptions) error {
-	// In Linux unshare(2), NewThreadGroup implies NewSignalHandlers and
-	// NewSignalHandlers implies NewAddressSpace. All three flags are no-ops if
-	// t is the only task using its MM, which due to clone(2)'s rules imply
-	// that it is also the only task using its signal handlers / in its thread
-	// group, and cause EINVAL to be returned otherwise.
+func (t *Task) Unshare(flags int32) error {
+	// "CLONE_THREAD, CLONE_SIGHAND, and CLONE_VM can be specified in flags if
+	// the caller is single threaded (i.e., it is not sharing its address space
+	// with another process or thread). In this case, these flags have no
+	// effect. (Note also that specifying CLONE_THREAD automatically implies
+	// CLONE_VM, and specifying CLONE_VM automatically implies CLONE_SIGHAND.)
+	// If the process is multithreaded, then the use of these flags results in
+	// an error." - unshare(2). This is incorrect (cf.
+	// kernel/fork.c:ksys_unshare()):
+	//
+	// - CLONE_THREAD does not imply CLONE_VM.
+	//
+	// - CLONE_SIGHAND implies CLONE_THREAD.
+	//
+	// - Only CLONE_VM requires that the caller is not sharing its address
+	// space with another thread. CLONE_SIGHAND requires that the caller is not
+	// sharing its signal handlers, and CLONE_THREAD requires that the caller
+	// is the only thread in its thread group.
 	//
 	// Since we don't count the number of tasks using each address space or set
-	// of signal handlers, we reject NewSignalHandlers and NewAddressSpace
-	// altogether, and interpret NewThreadGroup as requiring that t be the only
-	// member of its thread group. This seems to be logically coherent, in the
-	// sense that clone(2) allows a task to share signal handlers and address
-	// spaces with tasks in other thread groups.
-	if opts.NewAddressSpace || opts.NewSignalHandlers {
-		return syserror.EINVAL
+	// of signal handlers, we reject CLONE_VM and CLONE_SIGHAND altogether.
+	if flags&(linux.CLONE_VM|linux.CLONE_SIGHAND) != 0 {
+		return linuxerr.EINVAL
 	}
 	creds := t.Credentials()
-	if opts.NewThreadGroup {
+	if flags&linux.CLONE_THREAD != 0 {
 		t.tg.signalHandlers.mu.Lock()
 		if t.tg.tasksCount != 1 {
 			t.tg.signalHandlers.mu.Unlock()
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		t.tg.signalHandlers.mu.Unlock()
 		// This isn't racy because we're the only living task, and therefore
 		// the only task capable of creating new ones, in our thread group.
 	}
-	if opts.NewUserNamespace {
+	if flags&linux.CLONE_NEWUSER != 0 {
 		if t.IsChrooted() {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		newUserNS, err := creds.NewChildUserNamespace()
 		if err != nil {
@@ -492,34 +397,34 @@ func (t *Task) Unshare(opts *SharingOptions) error {
 		creds = t.Credentials()
 	}
 	haveCapSysAdmin := t.HasCapability(linux.CAP_SYS_ADMIN)
-	if opts.NewPIDNamespace {
+	if flags&linux.CLONE_NEWPID != 0 {
 		if !haveCapSysAdmin {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		t.childPIDNamespace = t.tg.pidns.NewChild(t.UserNamespace())
 	}
 	t.mu.Lock()
 	// Can't defer unlock: DecRefs must occur without holding t.mu.
-	if opts.NewNetworkNamespace {
+	if flags&linux.CLONE_NEWNET != 0 {
 		if !haveCapSysAdmin {
 			t.mu.Unlock()
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		t.netns = inet.NewNamespace(t.netns)
 	}
-	if opts.NewUTSNamespace {
+	if flags&linux.CLONE_NEWUTS != 0 {
 		if !haveCapSysAdmin {
 			t.mu.Unlock()
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		// Note that this must happen after NewUserNamespace, so the
 		// new user namespace is used if there is one.
 		t.utsns = t.utsns.Clone(creds.UserNamespace)
 	}
-	if opts.NewIPCNamespace {
+	if flags&linux.CLONE_NEWIPC != 0 {
 		if !haveCapSysAdmin {
 			t.mu.Unlock()
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		// Note that "If CLONE_NEWIPC is set, then create the process in a new IPC
 		// namespace"
@@ -527,12 +432,12 @@ func (t *Task) Unshare(opts *SharingOptions) error {
 		t.ipcns = NewIPCNamespace(creds.UserNamespace)
 	}
 	var oldFDTable *FDTable
-	if opts.NewFiles {
+	if flags&linux.CLONE_FILES != 0 {
 		oldFDTable = t.fdTable
 		t.fdTable = oldFDTable.Fork(t)
 	}
 	var oldFSContext *FSContext
-	if opts.NewFSContext {
+	if flags&linux.CLONE_FS != 0 {
 		oldFSContext = t.fsContext
 		t.fsContext = oldFSContext.Fork()
 	}
diff --git a/pkg/sentry/kernel/task_exit.go b/pkg/sentry/kernel/task_exit.go
index d115b8783..fbfcc19e5 100644
--- a/pkg/sentry/kernel/task_exit.go
+++ b/pkg/sentry/kernel/task_exit.go
@@ -28,66 +28,14 @@ import (
 	"errors"
 	"fmt"
 	"strconv"
-	"strings"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
-// An ExitStatus is a value communicated from an exiting task or thread group
-// to the party that reaps it.
-//
-// +stateify savable
-type ExitStatus struct {
-	// Code is the numeric value passed to the call to exit or exit_group that
-	// caused the exit. If the exit was not caused by such a call, Code is 0.
-	Code int
-
-	// Signo is the signal that caused the exit. If the exit was not caused by
-	// a signal, Signo is 0.
-	Signo int
-}
-
-func (es ExitStatus) String() string {
-	var b strings.Builder
-	if code := es.Code; code != 0 {
-		if b.Len() != 0 {
-			b.WriteByte(' ')
-		}
-		_, _ = fmt.Fprintf(&b, "Code=%d", code)
-	}
-	if signal := es.Signo; signal != 0 {
-		if b.Len() != 0 {
-			b.WriteByte(' ')
-		}
-		_, _ = fmt.Fprintf(&b, "Signal=%d", signal)
-	}
-	return b.String()
-}
-
-// Signaled returns true if the ExitStatus indicates that the exiting task or
-// thread group was killed by a signal.
-func (es ExitStatus) Signaled() bool {
-	return es.Signo != 0
-}
-
-// Status returns the numeric representation of the ExitStatus returned by e.g.
-// the wait4() system call.
-func (es ExitStatus) Status() uint32 {
-	return ((uint32(es.Code) & 0xff) << 8) | (uint32(es.Signo) & 0xff)
-}
-
-// ShellExitCode returns the numeric exit code that Bash would return for an
-// exit status of es.
-func (es ExitStatus) ShellExitCode() int {
-	if es.Signaled() {
-		return 128 + es.Signo
-	}
-	return es.Code
-}
-
 // TaskExitState represents a step in the task exit path.
 //
 // "Exiting" and "exited" are often ambiguous; prefer to name specific states.
@@ -163,13 +111,13 @@ func (t *Task) killedLocked() bool {
 	return t.pendingSignals.pendingSet&linux.SignalSetOf(linux.SIGKILL) != 0
 }
 
-// PrepareExit indicates an exit with status es.
+// PrepareExit indicates an exit with the given status.
 //
 // Preconditions: The caller must be running on the task goroutine.
-func (t *Task) PrepareExit(es ExitStatus) {
+func (t *Task) PrepareExit(ws linux.WaitStatus) {
 	t.tg.signalHandlers.mu.Lock()
 	defer t.tg.signalHandlers.mu.Unlock()
-	t.exitStatus = es
+	t.exitStatus = ws
 }
 
 // PrepareGroupExit indicates a group exit with status es to t's thread group.
@@ -180,7 +128,7 @@ func (t *Task) PrepareExit(es ExitStatus) {
 // ptrace.)
 //
 // Preconditions: The caller must be running on the task goroutine.
-func (t *Task) PrepareGroupExit(es ExitStatus) {
+func (t *Task) PrepareGroupExit(ws linux.WaitStatus) {
 	t.tg.signalHandlers.mu.Lock()
 	defer t.tg.signalHandlers.mu.Unlock()
 	if t.tg.exiting || t.tg.execing != nil {
@@ -198,8 +146,8 @@ func (t *Task) PrepareGroupExit(es ExitStatus) {
 		return
 	}
 	t.tg.exiting = true
-	t.tg.exitStatus = es
-	t.exitStatus = es
+	t.tg.exitStatus = ws
+	t.exitStatus = ws
 	for sibling := t.tg.tasks.Front(); sibling != nil; sibling = sibling.Next() {
 		if sibling != t {
 			sibling.killLocked()
@@ -207,11 +155,11 @@ func (t *Task) PrepareGroupExit(es ExitStatus) {
 	}
 }
 
-// Kill requests that all tasks in ts exit as if group exiting with status es.
+// Kill requests that all tasks in ts exit as if group exiting with status ws.
 // Kill does not wait for tasks to exit.
 //
 // Kill has no analogue in Linux; it's provided for save/restore only.
-func (ts *TaskSet) Kill(es ExitStatus) {
+func (ts *TaskSet) Kill(ws linux.WaitStatus) {
 	ts.mu.Lock()
 	defer ts.mu.Unlock()
 	ts.Root.exiting = true
@@ -219,7 +167,7 @@ func (ts *TaskSet) Kill(es ExitStatus) {
 		t.tg.signalHandlers.mu.Lock()
 		if !t.tg.exiting {
 			t.tg.exiting = true
-			t.tg.exitStatus = es
+			t.tg.exitStatus = ws
 		}
 		t.killLocked()
 		t.tg.signalHandlers.mu.Unlock()
@@ -730,10 +678,10 @@ func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *linux.S
 	info.SetUID(int32(t.Credentials().RealKUID.In(receiver.UserNamespace()).OrOverflow()))
 	if t.exitStatus.Signaled() {
 		info.Code = linux.CLD_KILLED
-		info.SetStatus(int32(t.exitStatus.Signo))
+		info.SetStatus(int32(t.exitStatus.TerminationSignal()))
 	} else {
 		info.Code = linux.CLD_EXITED
-		info.SetStatus(int32(t.exitStatus.Code))
+		info.SetStatus(int32(t.exitStatus.ExitStatus()))
 	}
 	// TODO(b/72102453): Set utime, stime.
 	return info
@@ -741,7 +689,7 @@ func (t *Task) exitNotificationSignal(sig linux.Signal, receiver *Task) *linux.S
 
 // ExitStatus returns t's exit status, which is only guaranteed to be
 // meaningful if t.ExitState() != TaskExitNone.
-func (t *Task) ExitStatus() ExitStatus {
+func (t *Task) ExitStatus() linux.WaitStatus {
 	t.tg.pidns.owner.mu.RLock()
 	defer t.tg.pidns.owner.mu.RUnlock()
 	t.tg.signalHandlers.mu.Lock()
@@ -751,7 +699,7 @@ func (t *Task) ExitStatus() ExitStatus {
 
 // ExitStatus returns the exit status that would be returned by a consuming
 // wait*() on tg.
-func (tg *ThreadGroup) ExitStatus() ExitStatus {
+func (tg *ThreadGroup) ExitStatus() linux.WaitStatus {
 	tg.pidns.owner.mu.RLock()
 	defer tg.pidns.owner.mu.RUnlock()
 	tg.signalHandlers.mu.Lock()
@@ -762,7 +710,9 @@ func (tg *ThreadGroup) ExitStatus() ExitStatus {
 	return tg.leader.exitStatus
 }
 
-// TerminationSignal returns the thread group's termination signal.
+// TerminationSignal returns the thread group's termination signal, which is
+// the signal that will be sent to its leader's parent when all threads have
+// exited.
 func (tg *ThreadGroup) TerminationSignal() linux.Signal {
 	tg.pidns.owner.mu.RLock()
 	defer tg.pidns.owner.mu.RUnlock()
@@ -888,8 +838,8 @@ type WaitResult struct {
 	// Event is exactly one of the events defined above.
 	Event waiter.EventMask
 
-	// Status is the numeric status associated with the event.
-	Status uint32
+	// Status is the wait status associated with the event.
+	Status linux.WaitStatus
 }
 
 // Wait waits for an event from a thread group that is a child of t's thread
@@ -942,7 +892,7 @@ func (t *Task) waitOnce(opts *WaitOptions) (*WaitResult, error) {
 	if anyWaitableTasks {
 		return nil, ErrNoWaitableEvent
 	}
-	return nil, syserror.ECHILD
+	return nil, linuxerr.ECHILD
 }
 
 // Preconditions: The TaskSet mutex must be locked for writing.
@@ -1042,7 +992,7 @@ func (t *Task) waitCollectZombieLocked(target *Task, opts *WaitOptions, asPtrace
 	}
 	pid := t.tg.pidns.tids[target]
 	uid := target.Credentials().RealKUID.In(t.UserNamespace()).OrOverflow()
-	status := target.exitStatus.Status()
+	status := target.exitStatus
 	if !opts.ConsumeEvent {
 		return &WaitResult{
 			Task:   target,
@@ -1056,7 +1006,7 @@ func (t *Task) waitCollectZombieLocked(target *Task, opts *WaitOptions, asPtrace
 	// differ from that reported by a consuming wait; the latter will return
 	// the group exit code if one is available.
 	if target.tg.exiting {
-		status = target.tg.exitStatus.Status()
+		status = target.tg.exitStatus
 	}
 	// t may be (in the thread group of) target's parent, tracer, or both. We
 	// don't need to check for !exitTracerAcked because tracees are detached
@@ -1122,12 +1072,11 @@ func (t *Task) waitCollectChildGroupStopLocked(target *Task, opts *WaitOptions)
 		target.tg.groupStopWaitable = false
 	}
 	return &WaitResult{
-		Task:  target,
-		TID:   pid,
-		UID:   uid,
-		Event: EventChildGroupStop,
-		// There is no name for these status constants.
-		Status: (uint32(sig)&0xff)<<8 | 0x7f,
+		Task:   target,
+		TID:    pid,
+		UID:    uid,
+		Event:  EventChildGroupStop,
+		Status: linux.WaitStatusStopped(uint32(sig)),
 	}
 }
 
@@ -1148,7 +1097,7 @@ func (t *Task) waitCollectGroupContinueLocked(target *Task, opts *WaitOptions) *
 		TID:    pid,
 		UID:    uid,
 		Event:  EventGroupContinue,
-		Status: 0xffff,
+		Status: linux.WaitStatusContinued(),
 	}
 }
 
@@ -1176,7 +1125,7 @@ func (t *Task) waitCollectTraceeStopLocked(target *Task, opts *WaitOptions) *Wai
 		TID:    pid,
 		UID:    uid,
 		Event:  EventTraceeStop,
-		Status: uint32(code)<<8 | 0x7f,
+		Status: linux.WaitStatusStopped(uint32(code)),
 	}
 }
 
diff --git a/pkg/sentry/kernel/task_identity.go b/pkg/sentry/kernel/task_identity.go
index 0325967e4..a9067b682 100644
--- a/pkg/sentry/kernel/task_identity.go
+++ b/pkg/sentry/kernel/task_identity.go
@@ -16,9 +16,9 @@ package kernel
 
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/mm"
-	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // Credentials returns t's credentials.
@@ -47,7 +47,7 @@ func (t *Task) HasCapability(cp linux.Capability) bool {
 func (t *Task) SetUID(uid auth.UID) error {
 	// setuid considers -1 to be invalid.
 	if !uid.Ok() {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	t.mu.Lock()
@@ -56,7 +56,7 @@ func (t *Task) SetUID(uid auth.UID) error {
 	creds := t.Credentials()
 	kuid := creds.UserNamespace.MapToKUID(uid)
 	if !kuid.Ok() {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	// "setuid() sets the effective user ID of the calling process. If the
 	// effective UID of the caller is root (more precisely: if the caller has
@@ -70,7 +70,7 @@ func (t *Task) SetUID(uid auth.UID) error {
 	// capability) and uid does not match the real UID or saved set-user-ID of
 	// the calling process."
 	if kuid != creds.RealKUID && kuid != creds.SavedKUID {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	t.setKUIDsUncheckedLocked(creds.RealKUID, kuid, creds.SavedKUID)
 	return nil
@@ -87,26 +87,26 @@ func (t *Task) SetREUID(r, e auth.UID) error {
 	if r.Ok() {
 		newR = creds.UserNamespace.MapToKUID(r)
 		if !newR.Ok() {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 	}
 	newE := creds.EffectiveKUID
 	if e.Ok() {
 		newE = creds.UserNamespace.MapToKUID(e)
 		if !newE.Ok() {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 	}
 	if !creds.HasCapability(linux.CAP_SETUID) {
 		// "Unprivileged processes may only set the effective user ID to the
 		// real user ID, the effective user ID, or the saved set-user-ID."
 		if newE != creds.RealKUID && newE != creds.EffectiveKUID && newE != creds.SavedKUID {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		// "Unprivileged users may only set the real user ID to the real user
 		// ID or the effective user ID."
 		if newR != creds.RealKUID && newR != creds.EffectiveKUID {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 	}
 	// "If the real user ID is set (i.e., ruid is not -1) or the effective user
@@ -223,7 +223,7 @@ func (t *Task) setKUIDsUncheckedLocked(newR, newE, newS auth.KUID) {
 // SetGID implements the semantics of setgid(2).
 func (t *Task) SetGID(gid auth.GID) error {
 	if !gid.Ok() {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	t.mu.Lock()
@@ -232,14 +232,14 @@ func (t *Task) SetGID(gid auth.GID) error {
 	creds := t.Credentials()
 	kgid := creds.UserNamespace.MapToKGID(gid)
 	if !kgid.Ok() {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 	if creds.HasCapability(linux.CAP_SETGID) {
 		t.setKGIDsUncheckedLocked(kgid, kgid, kgid)
 		return nil
 	}
 	if kgid != creds.RealKGID && kgid != creds.SavedKGID {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	t.setKGIDsUncheckedLocked(creds.RealKGID, kgid, creds.SavedKGID)
 	return nil
@@ -255,22 +255,22 @@ func (t *Task) SetREGID(r, e auth.GID) error {
 	if r.Ok() {
 		newR = creds.UserNamespace.MapToKGID(r)
 		if !newR.Ok() {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 	}
 	newE := creds.EffectiveKGID
 	if e.Ok() {
 		newE = creds.UserNamespace.MapToKGID(e)
 		if !newE.Ok() {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 	}
 	if !creds.HasCapability(linux.CAP_SETGID) {
 		if newE != creds.RealKGID && newE != creds.EffectiveKGID && newE != creds.SavedKGID {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		if newR != creds.RealKGID && newR != creds.EffectiveKGID {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 	}
 	newS := creds.SavedKGID
@@ -343,13 +343,13 @@ func (t *Task) SetExtraGIDs(gids []auth.GID) error {
 	defer t.mu.Unlock()
 	creds := t.Credentials()
 	if !creds.HasCapability(linux.CAP_SETGID) {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	kgids := make([]auth.KGID, len(gids))
 	for i, gid := range gids {
 		kgid := creds.UserNamespace.MapToKGID(gid)
 		if !kgid.Ok() {
-			return syserror.EINVAL
+			return linuxerr.EINVAL
 		}
 		kgids[i] = kgid
 	}
@@ -367,25 +367,25 @@ func (t *Task) SetCapabilitySets(permitted, inheritable, effective auth.Capabili
 	// "Permitted: This is a limiting superset for the effective capabilities
 	// that the thread may assume." - capabilities(7)
 	if effective & ^permitted != 0 {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	creds := t.Credentials()
 	// "It is also a limiting superset for the capabilities that may be added
 	// to the inheritable set by a thread that does not have the CAP_SETPCAP
 	// capability in its effective set."
 	if !creds.HasCapability(linux.CAP_SETPCAP) && (inheritable & ^(creds.InheritableCaps|creds.PermittedCaps) != 0) {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	// "If a thread drops a capability from its permitted set, it can never
 	// reacquire that capability (unless it execve(2)s ..."
 	if permitted & ^creds.PermittedCaps != 0 {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	// "... if a capability is not in the bounding set, then a thread can't add
 	// this capability to its inheritable set, even if it was in its permitted
 	// capabilities ..."
 	if inheritable & ^(creds.InheritableCaps|creds.BoundingCaps) != 0 {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
 	creds.PermittedCaps = permitted
@@ -402,7 +402,7 @@ func (t *Task) DropBoundingCapability(cp linux.Capability) error {
 	defer t.mu.Unlock()
 	creds := t.Credentials()
 	if !creds.HasCapability(linux.CAP_SETPCAP) {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
 	creds.BoundingCaps &^= auth.CapabilitySetOf(cp)
@@ -422,7 +422,7 @@ func (t *Task) SetUserNamespace(ns *auth.UserNamespace) error {
 	// If t just created ns, then t.creds is guaranteed to have CAP_SYS_ADMIN
 	// in ns (by rule 3 in auth.Credentials.HasCapability).
 	if !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 
 	creds = creds.Fork() // The credentials object is immutable. See doc for creds.
diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go
index 72b9a0384..8de08151a 100644
--- a/pkg/sentry/kernel/task_log.go
+++ b/pkg/sentry/kernel/task_log.go
@@ -235,7 +235,7 @@ func (t *Task) traceExitEvent() {
 	if !trace.IsEnabled() {
 		return
 	}
-	trace.Logf(t.traceContext, traceCategory, "exit status: 0x%x", t.exitStatus.Status())
+	trace.Logf(t.traceContext, traceCategory, "exit status: %s", t.exitStatus)
 }
 
 // traceExecEvent is called when a task calls exec.
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index 068f25af1..054ff212f 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -377,7 +377,7 @@ func (app *runApp) execute(t *Task) taskRunState {
 	default:
 		// What happened? Can't continue.
 		t.Warningf("Unexpected SwitchToApp error: %v", err)
-		t.PrepareExit(ExitStatus{Code: ExtractErrno(err, -1)})
+		t.PrepareExit(linux.WaitStatusExit(int32(ExtractErrno(err, -1))))
 		return (*runExit)(nil)
 	}
 }
diff --git a/pkg/sentry/kernel/task_sched.go b/pkg/sentry/kernel/task_sched.go
index f142feab4..9d9fa76a6 100644
--- a/pkg/sentry/kernel/task_sched.go
+++ b/pkg/sentry/kernel/task_sched.go
@@ -23,12 +23,12 @@ import (
 	"time"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sentry/hostcpu"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/sched"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
 	"gvisor.dev/gvisor/pkg/sentry/limits"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
-	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // TaskGoroutineState is a coarse representation of the current execution
@@ -601,7 +601,7 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error {
 
 	// Ensure that at least 1 CPU is still allowed.
 	if mask.NumCPUs() == 0 {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	if t.k.useHostCores {
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index 8ca61ed48..7065ac79c 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -22,6 +22,7 @@ import (
 	"time"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/eventchannel"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
@@ -156,7 +157,8 @@ func (t *Task) PendingSignals() linux.SignalSet {
 
 // deliverSignal delivers the given signal and returns the following run state.
 func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRunState {
-	sigact := computeAction(linux.Signal(info.Signo), act)
+	sig := linux.Signal(info.Signo)
+	sigact := computeAction(sig, act)
 
 	if t.haveSyscallReturn {
 		if sre, ok := syserror.SyscallRestartErrnoFromReturn(t.Arch().Return()); ok {
@@ -197,14 +199,14 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu
 		}
 
 		// Attach an fault address if appropriate.
-		switch linux.Signal(info.Signo) {
+		switch sig {
 		case linux.SIGSEGV, linux.SIGFPE, linux.SIGILL, linux.SIGTRAP, linux.SIGBUS:
 			ucs.FaultAddr = info.Addr()
 		}
 
 		eventchannel.Emit(ucs)
 
-		t.PrepareGroupExit(ExitStatus{Signo: int(info.Signo)})
+		t.PrepareGroupExit(linux.WaitStatusTerminationSignal(sig))
 		return (*runExit)(nil)
 
 	case SignalActionStop:
@@ -224,12 +226,12 @@ func (t *Task) deliverSignal(info *linux.SignalInfo, act linux.SigAction) taskRu
 
 			// Send a forced SIGSEGV. If the signal that couldn't be delivered
 			// was a SIGSEGV, force the handler to SIG_DFL.
-			t.forceSignal(linux.SIGSEGV, linux.Signal(info.Signo) == linux.SIGSEGV /* unconditional */)
+			t.forceSignal(linux.SIGSEGV, sig == linux.SIGSEGV /* unconditional */)
 			t.SendSignal(SignalInfoPriv(linux.SIGSEGV))
 		}
 
 	default:
-		panic(fmt.Sprintf("Unknown signal action %+v, %d?", info, computeAction(linux.Signal(info.Signo), act)))
+		panic(fmt.Sprintf("Unknown signal action %+v, %d?", info, computeAction(sig, act)))
 	}
 	return (*runInterrupt)(nil)
 }
@@ -338,7 +340,7 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux.
 	}
 
 	if timeout == 0 {
-		return nil, syserror.EAGAIN
+		return nil, linuxerr.EAGAIN
 	}
 
 	// Unblock signals we're waiting for. Remember the original signal mask so
@@ -359,8 +361,8 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux.
 	if info := t.dequeueSignalLocked(mask); info != nil {
 		return info, nil
 	}
-	if err == syserror.ETIMEDOUT {
-		return nil, syserror.EAGAIN
+	if err == linuxerr.ETIMEDOUT {
+		return nil, linuxerr.EAGAIN
 	}
 	return nil, err
 }
@@ -369,9 +371,9 @@ func (t *Task) Sigtimedwait(set linux.SignalSet, timeout time.Duration) (*linux.
 //
 // The following errors may be returned:
 //
-//	syserror.ESRCH - The task has exited.
-//	syserror.EINVAL - The signal is not valid.
-//	syserror.EAGAIN - THe signal is realtime, and cannot be queued.
+//	linuxerr.ESRCH - The task has exited.
+//	linuxerr.EINVAL - The signal is not valid.
+//	linuxerr.EAGAIN - THe signal is realtime, and cannot be queued.
 //
 func (t *Task) SendSignal(info *linux.SignalInfo) error {
 	t.tg.pidns.owner.mu.RLock()
@@ -406,14 +408,14 @@ func (t *Task) sendSignalLocked(info *linux.SignalInfo, group bool) error {
 
 func (t *Task) sendSignalTimerLocked(info *linux.SignalInfo, group bool, timer *IntervalTimer) error {
 	if t.exitState == TaskExitDead {
-		return syserror.ESRCH
+		return linuxerr.ESRCH
 	}
 	sig := linux.Signal(info.Signo)
 	if sig == 0 {
 		return nil
 	}
 	if !sig.IsValid() {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	// Signal side effects apply even if the signal is ultimately discarded.
@@ -450,7 +452,7 @@ func (t *Task) sendSignalTimerLocked(info *linux.SignalInfo, group bool, timer *
 	}
 	if !q.enqueue(info, timer) {
 		if sig.IsRealtime() {
-			return syserror.EAGAIN
+			return linuxerr.EAGAIN
 		}
 		t.Debugf("Discarding duplicate signal %d", sig)
 		if timer != nil {
@@ -505,7 +507,7 @@ func (tg *ThreadGroup) applySignalSideEffectsLocked(sig linux.Signal) {
 		// ignores tg.execing.
 		if !tg.exiting {
 			tg.exiting = true
-			tg.exitStatus = ExitStatus{Signo: int(linux.SIGKILL)}
+			tg.exitStatus = linux.WaitStatusTerminationSignal(linux.SIGKILL)
 		}
 		for t := tg.tasks.Front(); t != nil; t = t.Next() {
 			t.killLocked()
@@ -684,7 +686,7 @@ func (t *Task) SetSignalStack(alt linux.SignalStack) bool {
 // to *actptr (if actptr is not nil) and returns the old signal action.
 func (tg *ThreadGroup) SetSigAction(sig linux.Signal, actptr *linux.SigAction) (linux.SigAction, error) {
 	if !sig.IsValid() {
-		return linux.SigAction{}, syserror.EINVAL
+		return linux.SigAction{}, linuxerr.EINVAL
 	}
 
 	tg.pidns.owner.mu.RLock()
@@ -695,7 +697,7 @@ func (tg *ThreadGroup) SetSigAction(sig linux.Signal, actptr *linux.SigAction) (
 	oldact := sh.actions[sig]
 	if actptr != nil {
 		if sig == linux.SIGKILL || sig == linux.SIGSTOP {
-			return oldact, syserror.EINVAL
+			return oldact, linuxerr.EINVAL
 		}
 
 		act := *actptr
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 41fd2d471..0565059c1 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -17,6 +17,7 @@ package kernel
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/sentry/inet"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -299,7 +300,7 @@ func (ns *PIDNamespace) allocateTID() (ThreadID, error) {
 		// Did we do a full cycle?
 		if tid == ns.last {
 			// No tid available.
-			return 0, syserror.EAGAIN
+			return 0, linuxerr.EAGAIN
 		}
 	}
 }
diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go
index 1874f74e5..0586c9def 100644
--- a/pkg/sentry/kernel/task_syscall.go
+++ b/pkg/sentry/kernel/task_syscall.go
@@ -22,6 +22,7 @@ import (
 	"golang.org/x/sys/unix"
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/bits"
+	"gvisor.dev/gvisor/pkg/errors"
 	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/marshal"
@@ -160,7 +161,7 @@ func (t *Task) doSyscall() taskRunState {
 			// ok
 		case linux.SECCOMP_RET_KILL_THREAD:
 			t.Debugf("Syscall %d: killed by seccomp", sysno)
-			t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)})
+			t.PrepareExit(linux.WaitStatusTerminationSignal(linux.SIGSYS))
 			return (*runExit)(nil)
 		case linux.SECCOMP_RET_TRACE:
 			t.Debugf("Syscall %d: stopping for PTRACE_EVENT_SECCOMP", sysno)
@@ -310,7 +311,7 @@ func (t *Task) doVsyscall(addr hostarch.Addr, sysno uintptr) taskRunState {
 			return &runVsyscallAfterPtraceEventSeccomp{addr, sysno, caller}
 		case linux.SECCOMP_RET_KILL_THREAD:
 			t.Debugf("vsyscall %d: killed by seccomp", sysno)
-			t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)})
+			t.PrepareExit(linux.WaitStatusTerminationSignal(linux.SIGSYS))
 			return (*runExit)(nil)
 		default:
 			panic(fmt.Sprintf("Unknown seccomp result %d", r))
@@ -337,7 +338,7 @@ func (r *runVsyscallAfterPtraceEventSeccomp) execute(t *Task) taskRunState {
 	// Documentation/prctl/seccomp_filter.txt. On Linux, changing orig_ax or ip
 	// causes do_exit(SIGSYS), and changing sp is ignored.
 	if (sysno != ^uintptr(0) && sysno != r.sysno) || hostarch.Addr(t.Arch().IP()) != r.addr {
-		t.PrepareExit(ExitStatus{Signo: int(linux.SIGSYS)})
+		t.PrepareExit(linux.WaitStatusTerminationSignal(linux.SIGSYS))
 		return (*runExit)(nil)
 	}
 	if sysno == ^uintptr(0) {
@@ -380,6 +381,8 @@ func ExtractErrno(err error, sysno int) int {
 		return 0
 	case unix.Errno:
 		return int(err)
+	case *errors.Error:
+		return int(err.Errno())
 	case syserror.SyscallRestartErrno:
 		return int(err)
 	case *memmap.BusError:
diff --git a/pkg/sentry/kernel/task_usermem.go b/pkg/sentry/kernel/task_usermem.go
index fc6d9438a..8e2c36598 100644
--- a/pkg/sentry/kernel/task_usermem.go
+++ b/pkg/sentry/kernel/task_usermem.go
@@ -19,6 +19,7 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/sentry/mm"
 	"gvisor.dev/gvisor/pkg/syserror"
@@ -132,7 +133,7 @@ func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) erro
 	case 8:
 		const itemLen = 16
 		if _, ok := addr.AddLength(uint64(src.NumRanges()) * itemLen); !ok {
-			return syserror.EFAULT
+			return linuxerr.EFAULT
 		}
 
 		b := t.CopyScratchBuffer(itemLen)
@@ -190,7 +191,7 @@ func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRan
 	case 8:
 		const itemLen = 16
 		if _, ok := addr.AddLength(uint64(numIovecs) * itemLen); !ok {
-			return hostarch.AddrRangeSeq{}, syserror.EFAULT
+			return hostarch.AddrRangeSeq{}, linuxerr.EFAULT
 		}
 
 		b := t.CopyScratchBuffer(itemLen)
@@ -202,11 +203,11 @@ func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRan
 			base := hostarch.Addr(hostarch.ByteOrder.Uint64(b[0:8]))
 			length := hostarch.ByteOrder.Uint64(b[8:16])
 			if length > math.MaxInt64 {
-				return hostarch.AddrRangeSeq{}, syserror.EINVAL
+				return hostarch.AddrRangeSeq{}, linuxerr.EINVAL
 			}
 			ar, ok := t.MemoryManager().CheckIORange(base, int64(length))
 			if !ok {
-				return hostarch.AddrRangeSeq{}, syserror.EFAULT
+				return hostarch.AddrRangeSeq{}, linuxerr.EFAULT
 			}
 
 			if numIovecs == 1 {
@@ -252,7 +253,7 @@ func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOO
 	}
 	ar, ok := t.MemoryManager().CheckIORange(addr, int64(length))
 	if !ok {
-		return usermem.IOSequence{}, syserror.EFAULT
+		return usermem.IOSequence{}, linuxerr.EFAULT
 	}
 	return usermem.IOSequence{
 		IO:    t.MemoryManager(),
@@ -270,7 +271,7 @@ func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOO
 // Preconditions: Same as Task.CopyInIovecs.
 func (t *Task) IovecsIOSequence(addr hostarch.Addr, iovcnt int, opts usermem.IOOpts) (usermem.IOSequence, error) {
 	if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV {
-		return usermem.IOSequence{}, syserror.EINVAL
+		return usermem.IOSequence{}, linuxerr.EINVAL
 	}
 	ars, err := t.CopyInIovecs(addr, iovcnt)
 	if err != nil {
@@ -312,7 +313,7 @@ func (cc *taskCopyContext) getMemoryManager() (*mm.MemoryManager, error) {
 	tmm := cc.t.MemoryManager()
 	cc.t.mu.Unlock()
 	if !tmm.IncUsers() {
-		return nil, syserror.EFAULT
+		return nil, linuxerr.EFAULT
 	}
 	return tmm, nil
 }
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index 4566e4c7c..2eda15303 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -19,13 +19,13 @@ import (
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
 	"gvisor.dev/gvisor/pkg/sentry/limits"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
 	"gvisor.dev/gvisor/pkg/sync"
-	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // A ThreadGroup is a logical grouping of tasks that has widespread
@@ -143,7 +143,7 @@ type ThreadGroup struct {
 	//
 	// While exiting is false, exitStatus is protected by the signal mutex.
 	// When exiting becomes true, exitStatus becomes immutable.
-	exitStatus ExitStatus
+	exitStatus linux.WaitStatus
 
 	// terminationSignal is the signal that this thread group's leader will
 	// send to its parent when it exits.
@@ -357,7 +357,7 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool)
 	// "The calling process must be a session leader and not have a
 	// controlling terminal already." - tty_ioctl(4)
 	if tg.processGroup.session.leader != tg || tg.tty != nil {
-		return syserror.EINVAL
+		return linuxerr.EINVAL
 	}
 
 	creds := auth.CredentialsFromContext(tg.leader)
@@ -371,7 +371,7 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool)
 	if tty.tg != nil && tg.processGroup.session != tty.tg.processGroup.session {
 		// Stealing requires CAP_SYS_ADMIN in the root user namespace.
 		if !hasAdmin || !steal {
-			return syserror.EPERM
+			return linuxerr.EPERM
 		}
 		// Steal the TTY away. Unlike TIOCNOTTY, don't send signals.
 		for othertg := range tg.pidns.owner.Root.tgids {
@@ -391,7 +391,7 @@ func (tg *ThreadGroup) SetControllingTTY(tty *TTY, steal bool, isReadable bool)
 	}
 
 	if !isReadable && !hasAdmin {
-		return syserror.EPERM
+		return linuxerr.EPERM
 	}
 
 	// Set the controlling terminal and foreground process group.
@@ -419,7 +419,7 @@ func (tg *ThreadGroup) ReleaseControllingTTY(tty *TTY) error {
 
 	if tg.tty == nil || tg.tty != tty {
 		tg.signalHandlers.mu.Unlock()
-		return syserror.ENOTTY
+		return linuxerr.ENOTTY
 	}
 
 	// "If the process was session leader, then send SIGHUP and SIGCONT to
@@ -473,7 +473,7 @@ func (tg *ThreadGroup) ForegroundProcessGroup(tty *TTY) (int32, error) {
 	// "When fd does not refer to the controlling terminal of the calling
 	// process, -1 is returned" - tcgetpgrp(3)
 	if tg.tty != tty {
-		return -1, syserror.ENOTTY
+		return -1, linuxerr.ENOTTY
 	}
 
 	return int32(tg.processGroup.session.foreground.id), nil
@@ -496,24 +496,24 @@ func (tg *ThreadGroup) SetForegroundProcessGroup(tty *TTY, pgid ProcessGroupID)
 
 	// tty must be the controlling terminal.
 	if tg.tty != tty {
-		return -1, syserror.ENOTTY
+		return -1, linuxerr.ENOTTY
 	}
 
 	// pgid must be positive.
 	if pgid < 0 {
-		return -1, syserror.EINVAL
+		return -1, linuxerr.EINVAL
 	}
 
 	// pg must not be empty. Empty process groups are removed from their
 	// pid namespaces.
 	pg, ok := tg.pidns.processGroups[pgid]
 	if !ok {
-		return -1, syserror.ESRCH
+		return -1, linuxerr.ESRCH
 	}
 
 	// pg must be part of this process's session.
 	if tg.processGroup.session != pg.session {
-		return -1, syserror.EPERM
+		return -1, linuxerr.EPERM
 	}
 
 	tg.processGroup.session.foreground.id = pgid
diff --git a/pkg/sentry/kernel/time/BUILD b/pkg/sentry/kernel/time/BUILD
index 2817aa3ba..e293d9a0f 100644
--- a/pkg/sentry/kernel/time/BUILD
+++ b/pkg/sentry/kernel/time/BUILD
@@ -13,8 +13,8 @@ go_library(
     deps = [
         "//pkg/abi/linux",
         "//pkg/context",
+        "//pkg/errors/linuxerr",
         "//pkg/sync",
-        "//pkg/syserror",
         "//pkg/waiter",
     ],
 )
diff --git a/pkg/sentry/kernel/time/time.go b/pkg/sentry/kernel/time/time.go
index 26aa34aa6..191b92811 100644
--- a/pkg/sentry/kernel/time/time.go
+++ b/pkg/sentry/kernel/time/time.go
@@ -22,8 +22,8 @@ import (
 	"time"
 
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sync"
-	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
@@ -322,7 +322,7 @@ func SettingFromSpec(value time.Duration, interval time.Duration, c Clock) (Sett
 // interpreted as a time relative to now.
 func SettingFromSpecAt(value time.Duration, interval time.Duration, now Time) (Setting, error) {
 	if value < 0 {
-		return Setting{}, syserror.EINVAL
+		return Setting{}, linuxerr.EINVAL
 	}
 	if value == 0 {
 		return Setting{Period: interval}, nil
@@ -338,7 +338,7 @@ func SettingFromSpecAt(value time.Duration, interval time.Duration, now Time) (S
 // interpreted as an absolute time.
 func SettingFromAbsSpec(value Time, interval time.Duration) (Setting, error) {
 	if value.Before(ZeroTime) {
-		return Setting{}, syserror.EINVAL
+		return Setting{}, linuxerr.EINVAL
 	}
 	if value.IsZero() {
 		return Setting{Period: interval}, nil
diff --git a/pkg/sentry/kernel/timekeeper_test.go b/pkg/sentry/kernel/timekeeper_test.go
index dfc3c0719..b6039505a 100644
--- a/pkg/sentry/kernel/timekeeper_test.go
+++ b/pkg/sentry/kernel/timekeeper_test.go
@@ -17,12 +17,12 @@ package kernel
 import (
 	"testing"
 
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/sentry/contexttest"
 	"gvisor.dev/gvisor/pkg/sentry/pgalloc"
 	sentrytime "gvisor.dev/gvisor/pkg/sentry/time"
 	"gvisor.dev/gvisor/pkg/sentry/usage"
-	"gvisor.dev/gvisor/pkg/syserror"
 )
 
 // mockClocks is a sentrytime.Clocks that simply returns the times in the
@@ -45,7 +45,7 @@ func (c *mockClocks) GetTime(id sentrytime.ClockID) (int64, error) {
 	case sentrytime.Realtime:
 		return c.realtime, nil
 	default:
-		return 0, syserror.EINVAL
+		return 0, linuxerr.EINVAL
 	}
 }