21 files changed, 672 insertions, 236 deletions
diff --git a/pkg/sentry/control/BUILD b/pkg/sentry/control/BUILD
index a4934a565..cfb33a398 100644
--- a/pkg/sentry/control/BUILD
+++ b/pkg/sentry/control/BUILD
@@ -1,7 +1,13 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test", "proto_library")
 
 package(licenses = ["notice"])
 
+proto_library(
+    name = "control",
+    srcs = ["control.proto"],
+    visibility = ["//visibility:public"],
+)
+
 go_library(
     name = "control",
     srcs = [
diff --git a/pkg/sentry/control/control.proto b/pkg/sentry/control/control.proto
new file mode 100644
index 000000000..72dda3fbc
--- /dev/null
+++ b/pkg/sentry/control/control.proto
@@ -0,0 +1,40 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package gvisor;
+
+// ControlConfig configures the permission of controls.
+message ControlConfig {
+  // Names for individual control URPC service objects.
+  // Any new service object that should be given conditional access should be
+  // named here and conditionally added based on presence in allowed_controls.
+  enum Endpoint {
+    UNKNOWN = 0;
+    EVENTS = 1;
+    FS = 2;
+    LIFECYCLE = 3;
+    LOGGING = 4;
+    PROFILE = 5;
+    USAGE = 6;
+    PROC = 7;
+    STATE = 8;
+    DEBUG = 9;
+  }
+
+  // allowed_controls represents which endpoints may be registered to the
+  // server.
+  repeated Endpoint allowed_controls = 1;
+}
diff --git a/pkg/sentry/devices/quotedev/BUILD b/pkg/sentry/devices/quotedev/BUILD
deleted file mode 100644
index ee946610a..000000000
--- a/pkg/sentry/devices/quotedev/BUILD
+++ /dev/null
@@ -1,16 +0,0 @@
-load("//tools:defs.bzl", "go_library")
-
-licenses(["notice"])
-
-go_library(
-    name = "quotedev",
-    srcs = ["quotedev.go"],
-    visibility = ["//pkg/sentry:internal"],
-    deps = [
-        "//pkg/abi/linux",
-        "//pkg/context",
-        "//pkg/errors/linuxerr",
-        "//pkg/sentry/fsimpl/devtmpfs",
-        "//pkg/sentry/vfs",
-    ],
-)
diff --git a/pkg/sentry/devices/quotedev/quotedev.go b/pkg/sentry/devices/quotedev/quotedev.go
deleted file mode 100644
index 140856a4a..000000000
--- a/pkg/sentry/devices/quotedev/quotedev.go
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright 2021 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package quotedev implements a vfs.Device for /dev/gvisor_quote.
-package quotedev
-
-import (
-	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/context"
-	"gvisor.dev/gvisor/pkg/errors/linuxerr"
-	"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
-	"gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-const (
-	quoteDevMinor = 0
-)
-
-// quoteDevice implements vfs.Device for /dev/gvisor_quote
-//
-// +stateify savable
-type quoteDevice struct{}
-
-// Open implements vfs.Device.Open.
-// TODO(b/157161182): Add support for attestation ioctls.
-func (quoteDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
-	return nil, linuxerr.EIO
-}
-
-// Register registers all devices implemented by this package in vfsObj.
-func Register(vfsObj *vfs.VirtualFilesystem) error {
-	return vfsObj.RegisterDevice(vfs.CharDevice, linux.UNNAMED_MAJOR, quoteDevMinor, quoteDevice{}, &vfs.RegisterDeviceOptions{
-		GroupName: "gvisor_quote",
-	})
-}
-
-// CreateDevtmpfsFiles creates device special files in dev representing all
-// devices implemented by this package.
-func CreateDevtmpfsFiles(ctx context.Context, dev *devtmpfs.Accessor) error {
-	return dev.CreateDeviceFile(ctx, "gvisor_quote", vfs.CharDevice, linux.UNNAMED_MAJOR, quoteDevMinor, 0666 /* mode */)
-}
diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go
index 5c57f6fea..02540a754 100644
--- a/pkg/sentry/fsimpl/gofer/handle.go
+++ b/pkg/sentry/fsimpl/gofer/handle.go
@@ -20,6 +20,7 @@ import (
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/hostfd"
+	"gvisor.dev/gvisor/pkg/sync"
 )
 
 // handle represents a remote "open file descriptor", consisting of an opened
@@ -130,3 +131,43 @@ func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, o
 	}
 	return uint64(n), cperr
 }
+
+type handleReadWriter struct {
+	ctx context.Context
+	h   *handle
+	off uint64
+}
+
+var handleReadWriterPool = sync.Pool{
+	New: func() interface{} {
+		return &handleReadWriter{}
+	},
+}
+
+func getHandleReadWriter(ctx context.Context, h *handle, offset int64) *handleReadWriter {
+	rw := handleReadWriterPool.Get().(*handleReadWriter)
+	rw.ctx = ctx
+	rw.h = h
+	rw.off = uint64(offset)
+	return rw
+}
+
+func putHandleReadWriter(rw *handleReadWriter) {
+	rw.ctx = nil
+	rw.h = nil
+	handleReadWriterPool.Put(rw)
+}
+
+// ReadToBlocks implements safemem.Reader.ReadToBlocks.
+func (rw *handleReadWriter) ReadToBlocks(dsts safemem.BlockSeq) (uint64, error) {
+	n, err := rw.h.readToBlocksAt(rw.ctx, dsts, rw.off)
+	rw.off += n
+	return n, err
+}
+
+// WriteFromBlocks implements safemem.Writer.WriteFromBlocks.
+func (rw *handleReadWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+	n, err := rw.h.writeFromBlocksAt(rw.ctx, srcs, rw.off)
+	rw.off += n
+	return n, err
+}
diff --git a/pkg/sentry/fsimpl/gofer/save_restore.go b/pkg/sentry/fsimpl/gofer/save_restore.go
index e67422a2f..8dcbc61ed 100644
--- a/pkg/sentry/fsimpl/gofer/save_restore.go
+++ b/pkg/sentry/fsimpl/gofer/save_restore.go
@@ -158,6 +158,10 @@ func (d *dentryPlatformFile) afterLoad() {
 // afterLoad is invoked by stateify.
 func (fd *specialFileFD) afterLoad() {
 	fd.handle.fd = -1
+	if fd.hostFileMapper.IsInited() {
+		// Ensure that we don't call fd.hostFileMapper.Init() again.
+		fd.hostFileMapperInitOnce.Do(func() {})
+	}
 }
 
 // CompleteRestore implements
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index 144a1045e..a8d47b65b 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -22,10 +22,13 @@ import (
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/fdnotifier"
+	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/metric"
 	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/safemem"
+	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
 	"gvisor.dev/gvisor/pkg/sentry/fsmetric"
+	"gvisor.dev/gvisor/pkg/sentry/memmap"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -75,6 +78,16 @@ type specialFileFD struct {
 	bufMu   sync.Mutex `state:"nosave"`
 	haveBuf uint32
 	buf     []byte
+
+	// If handle.fd >= 0, hostFileMapper caches mappings of handle.fd, and
+	// hostFileMapperInitOnce is used to initialize it on first use.
+	hostFileMapperInitOnce sync.Once `state:"nosave"`
+	hostFileMapper         fsutil.HostFileMapper
+
+	// If handle.fd >= 0, fileRefs counts references on memmap.File offsets.
+	// fileRefs is protected by fileRefsMu.
+	fileRefsMu sync.Mutex `state:"nosave"`
+	fileRefs   fsutil.FrameRefSet
 }
 
 func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) {
@@ -229,23 +242,13 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
 		}
 	}
 
-	// Going through dst.CopyOutFrom() would hold MM locks around file
-	// operations of unknown duration. For regularFileFD, doing so is necessary
-	// to support mmap due to lock ordering; MM locks precede dentry.dataMu.
-	// That doesn't hold here since specialFileFD doesn't client-cache data.
-	// Just buffer the read instead.
-	buf := make([]byte, dst.NumBytes())
-	n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
+	rw := getHandleReadWriter(ctx, &fd.handle, offset)
+	n, err := dst.CopyOutFrom(ctx, rw)
+	putHandleReadWriter(rw)
 	if linuxerr.Equals(linuxerr.EAGAIN, err) {
 		err = linuxerr.ErrWouldBlock
 	}
-	if n == 0 {
-		return bufN, err
-	}
-	if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil {
-		return bufN + int64(cp), cperr
-	}
-	return bufN + int64(n), err
+	return bufN + n, err
 }
 
 // Read implements vfs.FileDescriptionImpl.Read.
@@ -316,20 +319,15 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
 		}
 	}
 
-	// Do a buffered write. See rationale in PRead.
-	buf := make([]byte, src.NumBytes())
-	copied, copyErr := src.CopyIn(ctx, buf)
-	if copied == 0 && copyErr != nil {
-		// Only return the error if we didn't get any data.
-		return 0, offset, copyErr
-	}
-	n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:copied])), uint64(offset))
+	rw := getHandleReadWriter(ctx, &fd.handle, offset)
+	n, err := src.CopyInTo(ctx, rw)
+	putHandleReadWriter(rw)
 	if linuxerr.Equals(linuxerr.EAGAIN, err) {
 		err = linuxerr.ErrWouldBlock
 	}
 	// Update offset if the offset is valid.
 	if offset >= 0 {
-		offset += int64(n)
+		offset += n
 	}
 	// Update file size for regular files.
 	if fd.isRegularFile {
@@ -340,10 +338,7 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
 			atomic.StoreUint64(&d.size, uint64(offset))
 		}
 	}
-	if err != nil {
-		return int64(n), offset, err
-	}
-	return int64(n), offset, copyErr
+	return int64(n), offset, err
 }
 
 // Write implements vfs.FileDescriptionImpl.Write.
@@ -411,3 +406,85 @@ func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error
 	}
 	return nil
 }
+
+// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+func (fd *specialFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+	if fd.handle.fd < 0 || fd.filesystem().opts.forcePageCache {
+		return linuxerr.ENODEV
+	}
+	// After this point, fd may be used as a memmap.Mappable and memmap.File.
+	fd.hostFileMapperInitOnce.Do(fd.hostFileMapper.Init)
+	return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
+}
+
+// AddMapping implements memmap.Mappable.AddMapping.
+func (fd *specialFileFD) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
+	fd.hostFileMapper.IncRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
+	return nil
+}
+
+// RemoveMapping implements memmap.Mappable.RemoveMapping.
+func (fd *specialFileFD) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
+	fd.hostFileMapper.DecRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
+}
+
+// CopyMapping implements memmap.Mappable.CopyMapping.
+func (fd *specialFileFD) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
+	return fd.AddMapping(ctx, ms, dstAR, offset, writable)
+}
+
+// Translate implements memmap.Mappable.Translate.
+func (fd *specialFileFD) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
+	mr := optional
+	if fd.filesystem().opts.limitHostFDTranslation {
+		mr = maxFillRange(required, optional)
+	}
+	return []memmap.Translation{
+		{
+			Source: mr,
+			File:   fd,
+			Offset: mr.Start,
+			Perms:  hostarch.AnyAccess,
+		},
+	}, nil
+}
+
+// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
+func (fd *specialFileFD) InvalidateUnsavable(ctx context.Context) error {
+	return nil
+}
+
+// IncRef implements memmap.File.IncRef.
+func (fd *specialFileFD) IncRef(fr memmap.FileRange) {
+	fd.fileRefsMu.Lock()
+	defer fd.fileRefsMu.Unlock()
+	fd.fileRefs.IncRefAndAccount(fr)
+}
+
+// DecRef implements memmap.File.DecRef.
+func (fd *specialFileFD) DecRef(fr memmap.FileRange) {
+	fd.fileRefsMu.Lock()
+	defer fd.fileRefsMu.Unlock()
+	fd.fileRefs.DecRefAndAccount(fr)
+}
+
+// MapInternal implements memmap.File.MapInternal.
+func (fd *specialFileFD) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
+	fd.requireHostFD()
+	return fd.hostFileMapper.MapInternal(fr, int(fd.handle.fd), at.Write)
+}
+
+// FD implements memmap.File.FD.
+func (fd *specialFileFD) FD() int {
+	fd.requireHostFD()
+	return int(fd.handle.fd)
+}
+
+func (fd *specialFileFD) requireHostFD() {
+	if fd.handle.fd < 0 {
+		// This is possible if fd was successfully mmapped before saving, then
+		// was restored without a host FD. This is unrecoverable: without a
+		// host FD, we can't mmap this file post-restore.
+		panic("gofer.specialFileFD can no longer be memory-mapped without a host FD")
+	}
+}
diff --git a/pkg/sentry/fsimpl/verity/BUILD b/pkg/sentry/fsimpl/verity/BUILD
index 5955948f0..c12abdf33 100644
--- a/pkg/sentry/fsimpl/verity/BUILD
+++ b/pkg/sentry/fsimpl/verity/BUILD
@@ -1,10 +1,24 @@
 load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 licenses(["notice"])
 
+go_template_instance(
+    name = "dentry_list",
+    out = "dentry_list.go",
+    package = "verity",
+    prefix = "dentry",
+    template = "//pkg/ilist:generic_list",
+    types = {
+        "Element": "*dentry",
+        "Linker": "*dentry",
+    },
+)
+
 go_library(
     name = "verity",
     srcs = [
+        "dentry_list.go",
         "filesystem.go",
         "save_restore.go",
         "verity.go",
diff --git a/pkg/sentry/fsimpl/verity/filesystem.go b/pkg/sentry/fsimpl/verity/filesystem.go
index e147d6b07..52d47994d 100644
--- a/pkg/sentry/fsimpl/verity/filesystem.go
+++ b/pkg/sentry/fsimpl/verity/filesystem.go
@@ -66,40 +66,23 @@ func putDentrySlice(ds *[]*dentry) {
 	dentrySlicePool.Put(ds)
 }
 
-// renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls
-// dentry.checkDropLocked on all dentries in *ds with fs.renameMu locked for
+// renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls
+// dentry.checkCachingLocked on all dentries in *ds with fs.renameMu locked for
 // writing.
 //
 // ds is a pointer-to-pointer since defer evaluates its arguments immediately,
 // but dentry slices are allocated lazily, and it's much easier to say "defer
-// fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
-// fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
+// fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
+// fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
 // +checklocksrelease:fs.renameMu
-func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
+func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
 	fs.renameMu.RUnlock()
 	if *ds == nil {
 		return
 	}
-	if len(**ds) != 0 {
-		fs.renameMu.Lock()
-		for _, d := range **ds {
-			d.checkDropLocked(ctx)
-		}
-		fs.renameMu.Unlock()
-	}
-	putDentrySlice(*ds)
-}
-
-// +checklocksrelease:fs.renameMu
-func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
-	if *ds == nil {
-		fs.renameMu.Unlock()
-		return
-	}
 	for _, d := range **ds {
-		d.checkDropLocked(ctx)
+		d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
 	}
-	fs.renameMu.Unlock()
 	putDentrySlice(*ds)
 }
 
@@ -700,7 +683,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds
 	}
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return err
@@ -712,7 +695,7 @@ func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds
 func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return nil, err
@@ -733,7 +716,7 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op
 func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	start := rp.Start().Impl().(*dentry)
 	d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
 	if err != nil {
@@ -770,7 +753,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
 
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 
 	start := rp.Start().Impl().(*dentry)
 	if rp.Done() {
@@ -952,7 +935,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return "", err
@@ -982,7 +965,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
 func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return linux.Statx{}, err
@@ -1028,7 +1011,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
 func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	if _, err := fs.resolveLocked(ctx, rp, &ds); err != nil {
 		return nil, err
 	}
@@ -1039,7 +1022,7 @@ func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
 func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return nil, err
@@ -1055,7 +1038,7 @@ func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
 func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
 	var ds *[]*dentry
 	fs.renameMu.RLock()
-	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
+	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
 	d, err := fs.resolveLocked(ctx, rp, &ds)
 	if err != nil {
 		return "", err
diff --git a/pkg/sentry/fsimpl/verity/verity.go b/pkg/sentry/fsimpl/verity/verity.go
index 23841ecf7..d2526263c 100644
--- a/pkg/sentry/fsimpl/verity/verity.go
+++ b/pkg/sentry/fsimpl/verity/verity.go
@@ -23,10 +23,12 @@
 // Lock order:
 //
 // filesystem.renameMu
-//   dentry.dirMu
-//     fileDescription.mu
-//       filesystem.verityMu
-//         dentry.hashMu
+//   dentry.cachingMu
+//     filesystem.cacheMu
+//       dentry.dirMu
+//         fileDescription.mu
+//           filesystem.verityMu
+//             dentry.hashMu
 //
 // Locking dentry.dirMu in multiple dentries requires that parent dentries are
 // locked before child dentries, and that filesystem.renameMu is locked to
@@ -96,6 +98,9 @@ const (
 	// sizeOfStringInt32 is the size for a 32 bit integer stored as string in
 	// extended attributes. The maximum value of a 32 bit integer has 10 digits.
 	sizeOfStringInt32 = 10
+
+	// defaultMaxCachedDentries is the default limit of dentry cache.
+	defaultMaxCachedDentries = uint64(1000)
 )
 
 var (
@@ -106,9 +111,10 @@ var (
 
 // Mount option names for verityfs.
 const (
-	moptLowerPath = "lower_path"
-	moptRootHash  = "root_hash"
-	moptRootName  = "root_name"
+	moptLowerPath        = "lower_path"
+	moptRootHash         = "root_hash"
+	moptRootName         = "root_name"
+	moptDentryCacheLimit = "dentry_cache_limit"
 )
 
 // HashAlgorithm is a type specifying the algorithm used to hash the file
@@ -188,6 +194,17 @@ type filesystem struct {
 	// dentries.
 	renameMu sync.RWMutex `state:"nosave"`
 
+	// cachedDentries contains all dentries with 0 references. (Due to race
+	// conditions, it may also contain dentries with non-zero references.)
+	// cachedDentriesLen is the number of dentries in cachedDentries. These
+	// fields are protected by cacheMu.
+	cacheMu           sync.Mutex `state:"nosave"`
+	cachedDentries    dentryList
+	cachedDentriesLen uint64
+
+	// maxCachedDentries is the maximum size of filesystem.cachedDentries.
+	maxCachedDentries uint64
+
 	// verityMu synchronizes enabling verity files, protects files or
 	// directories from being enabled by different threads simultaneously.
 	// It also ensures that verity does not access files that are being
@@ -198,6 +215,10 @@ type filesystem struct {
 	// is for the whole file system to ensure that no more than one file is
 	// enabled the same time.
 	verityMu sync.RWMutex `state:"nosave"`
+
+	// released is nonzero once filesystem.Release has been called. It is accessed
+	// with atomic memory operations.
+	released int32
 }
 
 // InternalFilesystemOptions may be passed as
@@ -266,6 +287,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 		delete(mopts, moptRootName)
 		rootName = root
 	}
+	maxCachedDentries := defaultMaxCachedDentries
+	if str, ok := mopts[moptDentryCacheLimit]; ok {
+		delete(mopts, moptDentryCacheLimit)
+		maxCD, err := strconv.ParseUint(str, 10, 64)
+		if err != nil {
+			ctx.Warningf("verity.FilesystemType.GetFilesystem: invalid dentry cache limit: %s=%s", moptDentryCacheLimit, str)
+			return nil, nil, linuxerr.EINVAL
+		}
+		maxCachedDentries = maxCD
+	}
 
 	// Check for unparsed options.
 	if len(mopts) != 0 {
@@ -339,12 +370,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 		action:             iopts.Action,
 		opts:               opts.Data,
 		allowRuntimeEnable: iopts.AllowRuntimeEnable,
+		maxCachedDentries:  maxCachedDentries,
 	}
 	fs.vfsfs.Init(vfsObj, &fstype, fs)
 
 	// Construct the root dentry.
 	d := fs.newDentry()
-	d.refs = 1
+	// Set the root's reference count to 2. One reference is returned to
+	// the caller, and the other is held by fs to prevent the root from
+	// being "cached" and subsequently evicted.
+	d.refs = 2
 	lowerVD := vfs.MakeVirtualDentry(lowerMount, lowerMount.Root())
 	lowerVD.IncRef()
 	d.lowerVD = lowerVD
@@ -519,7 +554,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
 
 // Release implements vfs.FilesystemImpl.Release.
 func (fs *filesystem) Release(ctx context.Context) {
+	atomic.StoreInt32(&fs.released, 1)
 	fs.lowerMount.DecRef(ctx)
+
+	fs.renameMu.Lock()
+	fs.evictAllCachedDentriesLocked(ctx)
+	fs.renameMu.Unlock()
+
+	// An extra reference was held by the filesystem on the root to prevent
+	// it from being cached/evicted.
+	fs.rootDentry.DecRef(ctx)
 }
 
 // MountOptions implements vfs.FilesystemImpl.MountOptions.
@@ -533,6 +577,11 @@ func (fs *filesystem) MountOptions() string {
 type dentry struct {
 	vfsd vfs.Dentry
 
+	// refs is the reference count. Each dentry holds a reference on its
+	// parent, even if disowned. When refs reaches 0, the dentry may be
+	// added to the cache or destroyed. If refs == -1, the dentry has
+	// already been destroyed. refs is accessed using atomic memory
+	// operations.
 	refs int64
 
 	// fs is the owning filesystem. fs is immutable.
@@ -587,13 +636,23 @@ type dentry struct {
 	// is protected by hashMu.
 	hashMu sync.RWMutex `state:"nosave"`
 	hash   []byte
+
+	// cachingMu is used to synchronize concurrent dentry caching attempts on
+	// this dentry.
+	cachingMu sync.Mutex `state:"nosave"`
+
+	// If cached is true, dentryEntry links dentry into
+	// filesystem.cachedDentries. cached and dentryEntry are protected by
+	// cachingMu.
+	cached bool
+	dentryEntry
 }
 
 // newDentry creates a new dentry representing the given verity file. The
-// dentry initially has no references; it is the caller's responsibility to set
-// the dentry's reference count and/or call dentry.destroy() as appropriate.
-// The dentry is initially invalid in that it contains no underlying dentry;
-// the caller is responsible for setting them.
+// dentry initially has no references, but is not cached; it is the caller's
+// responsibility to set the dentry's reference count and/or call
+// dentry.destroy() as appropriate. The dentry is initially invalid in that it
+// contains no underlying dentry; the caller is responsible for setting them.
 func (fs *filesystem) newDentry() *dentry {
 	d := &dentry{
 		fs: fs,
@@ -629,42 +688,23 @@ func (d *dentry) TryIncRef() bool {
 
 // DecRef implements vfs.DentryImpl.DecRef.
 func (d *dentry) DecRef(ctx context.Context) {
-	r := atomic.AddInt64(&d.refs, -1)
-	if d.LogRefs() {
-		refsvfs2.LogDecRef(d, r)
-	}
-	if r == 0 {
-		d.fs.renameMu.Lock()
-		d.checkDropLocked(ctx)
-		d.fs.renameMu.Unlock()
-	} else if r < 0 {
-		panic("verity.dentry.DecRef() called without holding a reference")
+	if d.decRefNoCaching() == 0 {
+		d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
 	}
 }
 
-func (d *dentry) decRefLocked(ctx context.Context) {
+// decRefNoCaching decrements d's reference count without calling
+// d.checkCachingLocked, even if d's reference count reaches 0; callers are
+// responsible for ensuring that d.checkCachingLocked will be called later.
+func (d *dentry) decRefNoCaching() int64 {
 	r := atomic.AddInt64(&d.refs, -1)
 	if d.LogRefs() {
 		refsvfs2.LogDecRef(d, r)
 	}
-	if r == 0 {
-		d.checkDropLocked(ctx)
-	} else if r < 0 {
-		panic("verity.dentry.decRefLocked() called without holding a reference")
+	if r < 0 {
+		panic("verity.dentry.decRefNoCaching() called without holding a reference")
 	}
-}
-
-// checkDropLocked should be called after d's reference count becomes 0 or it
-// becomes deleted.
-func (d *dentry) checkDropLocked(ctx context.Context) {
-	// Dentries with a positive reference count must be retained. Dentries
-	// with a negative reference count have already been destroyed.
-	if atomic.LoadInt64(&d.refs) != 0 {
-		return
-	}
-	// Refs is still zero; destroy it.
-	d.destroyLocked(ctx)
-	return
+	return r
 }
 
 // destroyLocked destroys the dentry.
@@ -683,6 +723,12 @@ func (d *dentry) destroyLocked(ctx context.Context) {
 		panic("verity.dentry.destroyLocked() called with references on the dentry")
 	}
 
+	// Drop the reference held by d on its parent without recursively
+	// locking d.fs.renameMu.
+	if d.parent != nil && d.parent.decRefNoCaching() == 0 {
+		d.parent.checkCachingLocked(ctx, true /* renameMuWriteLocked */)
+	}
+
 	if d.lowerVD.Ok() {
 		d.lowerVD.DecRef(ctx)
 	}
@@ -695,7 +741,6 @@ func (d *dentry) destroyLocked(ctx context.Context) {
 			delete(d.parent.children, d.name)
 		}
 		d.parent.dirMu.Unlock()
-		d.parent.decRefLocked(ctx)
 	}
 	refsvfs2.Unregister(d)
 }
@@ -734,6 +779,140 @@ func (d *dentry) OnZeroWatches(context.Context) {
 	//TODO(b/159261227): Implement OnZeroWatches.
 }
 
+// checkCachingLocked should be called after d's reference count becomes 0 or
+// it becomes disowned.
+//
+// For performance, checkCachingLocked can also be called after d's reference
+// count becomes non-zero, so that d can be removed from the LRU cache. This
+// may help in reducing the size of the cache and hence reduce evictions. Note
+// that this is not necessary for correctness.
+//
+// It may be called on a destroyed dentry. For example,
+// renameMu[R]UnlockAndCheckCaching may call checkCachingLocked multiple times
+// for the same dentry when the dentry is visited more than once in the same
+// operation. One of the calls may destroy the dentry, so subsequent calls will
+// do nothing.
+//
+// Preconditions: d.fs.renameMu must be locked for writing if
+// renameMuWriteLocked is true; it may be temporarily unlocked.
+func (d *dentry) checkCachingLocked(ctx context.Context, renameMuWriteLocked bool) {
+	d.cachingMu.Lock()
+	refs := atomic.LoadInt64(&d.refs)
+	if refs == -1 {
+		// Dentry has already been destroyed.
+		d.cachingMu.Unlock()
+		return
+	}
+	if refs > 0 {
+		// fs.cachedDentries is permitted to contain dentries with non-zero refs,
+		// which are skipped by fs.evictCachedDentryLocked() upon reaching the end
+		// of the LRU. But it is still beneficial to remove d from the cache as we
+		// are already holding d.cachingMu. Keeping a cleaner cache also reduces
+		// the number of evictions (which is expensive as it acquires fs.renameMu).
+		d.removeFromCacheLocked()
+		d.cachingMu.Unlock()
+		return
+	}
+
+	if atomic.LoadInt32(&d.fs.released) != 0 {
+		d.cachingMu.Unlock()
+		if !renameMuWriteLocked {
+			// Need to lock d.fs.renameMu to access d.parent. Lock it for writing as
+			// needed by d.destroyLocked() later.
+			d.fs.renameMu.Lock()
+			defer d.fs.renameMu.Unlock()
+		}
+		if d.parent != nil {
+			d.parent.dirMu.Lock()
+			delete(d.parent.children, d.name)
+			d.parent.dirMu.Unlock()
+		}
+		d.destroyLocked(ctx) // +checklocksforce: see above.
+		return
+	}
+
+	d.fs.cacheMu.Lock()
+	// If d is already cached, just move it to the front of the LRU.
+	if d.cached {
+		d.fs.cachedDentries.Remove(d)
+		d.fs.cachedDentries.PushFront(d)
+		d.fs.cacheMu.Unlock()
+		d.cachingMu.Unlock()
+		return
+	}
+	// Cache the dentry, then evict the least recently used cached dentry if
+	// the cache becomes over-full.
+	d.fs.cachedDentries.PushFront(d)
+	d.fs.cachedDentriesLen++
+	d.cached = true
+	shouldEvict := d.fs.cachedDentriesLen > d.fs.maxCachedDentries
+	d.fs.cacheMu.Unlock()
+	d.cachingMu.Unlock()
+
+	if shouldEvict {
+		if !renameMuWriteLocked {
+			// Need to lock d.fs.renameMu for writing as needed by
+			// d.evictCachedDentryLocked().
+			d.fs.renameMu.Lock()
+			defer d.fs.renameMu.Unlock()
+		}
+		d.fs.evictCachedDentryLocked(ctx) // +checklocksforce: see above.
+	}
+}
+
+// Preconditions: d.cachingMu must be locked.
+func (d *dentry) removeFromCacheLocked() {
+	if d.cached {
+		d.fs.cacheMu.Lock()
+		d.fs.cachedDentries.Remove(d)
+		d.fs.cachedDentriesLen--
+		d.fs.cacheMu.Unlock()
+		d.cached = false
+	}
+}
+
+// Precondition: fs.renameMu must be locked for writing; it may be temporarily
+// unlocked.
+// +checklocks:fs.renameMu
+func (fs *filesystem) evictAllCachedDentriesLocked(ctx context.Context) {
+	for fs.cachedDentriesLen != 0 {
+		fs.evictCachedDentryLocked(ctx)
+	}
+}
+
+// Preconditions:
+// * fs.renameMu must be locked for writing; it may be temporarily unlocked.
+// +checklocks:fs.renameMu
+func (fs *filesystem) evictCachedDentryLocked(ctx context.Context) {
+	fs.cacheMu.Lock()
+	victim := fs.cachedDentries.Back()
+	fs.cacheMu.Unlock()
+	if victim == nil {
+		// fs.cachedDentries may have become empty between when it was
+		// checked and when we locked fs.cacheMu.
+		return
+	}
+
+	victim.cachingMu.Lock()
+	victim.removeFromCacheLocked()
+	// victim.refs may have become non-zero from an earlier path resolution
+	// since it was inserted into fs.cachedDentries.
+	if atomic.LoadInt64(&victim.refs) != 0 {
+		victim.cachingMu.Unlock()
+		return
+	}
+	if victim.parent != nil {
+		victim.parent.dirMu.Lock()
+		// Note that victim can't be a mount point (in any mount
+		// namespace), since VFS holds references on mount points.
+		fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, &victim.vfsd)
+		delete(victim.parent.children, victim.name)
+		victim.parent.dirMu.Unlock()
+	}
+	victim.cachingMu.Unlock()
+	victim.destroyLocked(ctx) // +checklocksforce: owned as precondition, victim.fs == fs.
+}
+
 func (d *dentry) isSymlink() bool {
 	return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
 }
diff --git a/pkg/sentry/kernel/ipc/object.go b/pkg/sentry/kernel/ipc/object.go
index 387b35e7e..facd157c7 100644
--- a/pkg/sentry/kernel/ipc/object.go
+++ b/pkg/sentry/kernel/ipc/object.go
@@ -19,6 +19,8 @@ package ipc
 
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/context"
+	"gvisor.dev/gvisor/pkg/errors/linuxerr"
 	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 )
@@ -113,3 +115,36 @@ func (o *Object) CheckPermissions(creds *auth.Credentials, req fs.PermMask) bool
 	}
 	return creds.HasCapabilityIn(linux.CAP_IPC_OWNER, o.UserNS)
 }
+
+// Set modifies attributes for an IPC object. See *ctl(IPC_SET).
+//
+// Precondition: Mechanism.mu must be held.
+func (o *Object) Set(ctx context.Context, perm *linux.IPCPerm) error {
+	creds := auth.CredentialsFromContext(ctx)
+	uid := creds.UserNamespace.MapToKUID(auth.UID(perm.UID))
+	gid := creds.UserNamespace.MapToKGID(auth.GID(perm.GID))
+	if !uid.Ok() || !gid.Ok() {
+		// The man pages don't specify an errno for invalid uid/gid, but EINVAL
+		// is generally used for invalid arguments.
+		return linuxerr.EINVAL
+	}
+
+	if !o.CheckOwnership(creds) {
+		// "The argument cmd has the value IPC_SET or IPC_RMID, but the
+		//  effective user ID of the calling process is not the creator (as
+		//  found in msg_perm.cuid) or the owner (as found in msg_perm.uid)
+		//  of the message queue, and the caller is not privileged (Linux:
+		//  does not have the CAP_SYS_ADMIN capability)."
+		return linuxerr.EPERM
+	}
+
+	// User may only modify the lower 9 bits of the mode. All the other bits are
+	// always 0 for the underlying inode.
+	mode := linux.FileMode(perm.Mode & 0x1ff)
+
+	o.Perms = fs.FilePermsFromMode(mode)
+	o.Owner.UID = uid
+	o.Owner.GID = gid
+
+	return nil
+}
diff --git a/pkg/sentry/kernel/msgqueue/msgqueue.go b/pkg/sentry/kernel/msgqueue/msgqueue.go
index fab396d7c..7c459d076 100644
--- a/pkg/sentry/kernel/msgqueue/msgqueue.go
+++ b/pkg/sentry/kernel/msgqueue/msgqueue.go
@@ -206,6 +206,48 @@ func (r *Registry) FindByID(id ipc.ID) (*Queue, error) {
 	return mech.(*Queue), nil
 }
 
+// IPCInfo reports global parameters for message queues. See msgctl(IPC_INFO).
+func (r *Registry) IPCInfo(ctx context.Context) *linux.MsgInfo {
+	return &linux.MsgInfo{
+		MsgPool: linux.MSGPOOL,
+		MsgMap:  linux.MSGMAP,
+		MsgMax:  linux.MSGMAX,
+		MsgMnb:  linux.MSGMNB,
+		MsgMni:  linux.MSGMNI,
+		MsgSsz:  linux.MSGSSZ,
+		MsgTql:  linux.MSGTQL,
+		MsgSeg:  linux.MSGSEG,
+	}
+}
+
+// MsgInfo reports global parameters for message queues. See msgctl(MSG_INFO).
+func (r *Registry) MsgInfo(ctx context.Context) *linux.MsgInfo {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	var messages, bytes uint64
+	r.reg.ForAllObjects(
+		func(o ipc.Mechanism) {
+			q := o.(*Queue)
+			q.mu.Lock()
+			messages += q.messageCount
+			bytes += q.byteCount
+			q.mu.Unlock()
+		},
+	)
+
+	return &linux.MsgInfo{
+		MsgPool: int32(r.reg.ObjectCount()),
+		MsgMap:  int32(messages),
+		MsgTql:  int32(bytes),
+		MsgMax:  linux.MSGMAX,
+		MsgMnb:  linux.MSGMNB,
+		MsgMni:  linux.MSGMNI,
+		MsgSsz:  linux.MSGSSZ,
+		MsgSeg:  linux.MSGSEG,
+	}
+}
+
 // Send appends a message to the message queue, and returns an error if sending
 // fails. See msgsnd(2).
 func (q *Queue) Send(ctx context.Context, m Message, b Blocker, wait bool, pid int32) error {
@@ -465,6 +507,73 @@ func (q *Queue) msgAtIndex(mType int64) *Message {
 	return msg
 }
 
+// Set modifies some values of the queue. See msgctl(IPC_SET).
+func (q *Queue) Set(ctx context.Context, ds *linux.MsqidDS) error {
+	q.mu.Lock()
+	defer q.mu.Unlock()
+
+	creds := auth.CredentialsFromContext(ctx)
+	if ds.MsgQbytes > maxQueueBytes && !creds.HasCapabilityIn(linux.CAP_SYS_RESOURCE, q.obj.UserNS) {
+		// "An attempt (IPC_SET) was made to increase msg_qbytes beyond the
+		// system parameter MSGMNB, but the caller is not privileged (Linux:
+		// does not have the CAP_SYS_RESOURCE capability)."
+		return linuxerr.EPERM
+	}
+
+	if err := q.obj.Set(ctx, &ds.MsgPerm); err != nil {
+		return err
+	}
+
+	q.maxBytes = ds.MsgQbytes
+	q.changeTime = ktime.NowFromContext(ctx)
+	return nil
+}
+
+// Stat returns a MsqidDS object filled with information about the queue. See
+// msgctl(IPC_STAT) and msgctl(MSG_STAT).
+func (q *Queue) Stat(ctx context.Context) (*linux.MsqidDS, error) {
+	return q.stat(ctx, fs.PermMask{Read: true})
+}
+
+// StatAny is similar to Queue.Stat, but doesn't require read permission. See
+// msgctl(MSG_STAT_ANY).
+func (q *Queue) StatAny(ctx context.Context) (*linux.MsqidDS, error) {
+	return q.stat(ctx, fs.PermMask{})
+}
+
+// stat returns a MsqidDS object filled with information about the queue. An
+// error is returned if the user doesn't have the specified permissions.
+func (q *Queue) stat(ctx context.Context, mask fs.PermMask) (*linux.MsqidDS, error) {
+	q.mu.Lock()
+	defer q.mu.Unlock()
+
+	creds := auth.CredentialsFromContext(ctx)
+	if !q.obj.CheckPermissions(creds, mask) {
+		// "The caller must have read permission on the message queue."
+		return nil, linuxerr.EACCES
+	}
+
+	return &linux.MsqidDS{
+		MsgPerm: linux.IPCPerm{
+			Key:  uint32(q.obj.Key),
+			UID:  uint32(creds.UserNamespace.MapFromKUID(q.obj.Owner.UID)),
+			GID:  uint32(creds.UserNamespace.MapFromKGID(q.obj.Owner.GID)),
+			CUID: uint32(creds.UserNamespace.MapFromKUID(q.obj.Creator.UID)),
+			CGID: uint32(creds.UserNamespace.MapFromKGID(q.obj.Creator.GID)),
+			Mode: uint16(q.obj.Perms.LinuxMode()),
+			Seq:  0, // IPC sequences not supported.
+		},
+		MsgStime:  q.sendTime.TimeT(),
+		MsgRtime:  q.receiveTime.TimeT(),
+		MsgCtime:  q.changeTime.TimeT(),
+		MsgCbytes: q.byteCount,
+		MsgQnum:   q.messageCount,
+		MsgQbytes: q.maxBytes,
+		MsgLspid:  q.sendPID,
+		MsgLrpid:  q.receivePID,
+	}, nil
+}
+
 // Lock implements ipc.Mechanism.Lock.
 func (q *Queue) Lock() {
 	q.mu.Lock()
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index 8a5c81a68..28e466948 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -336,19 +336,15 @@ func (s *Set) Size() int {
 	return len(s.sems)
 }
 
-// Change changes some fields from the set atomically.
-func (s *Set) Change(ctx context.Context, creds *auth.Credentials, owner fs.FileOwner, perms fs.FilePermissions) error {
+// Set modifies attributes for a semaphore set. See semctl(IPC_SET).
+func (s *Set) Set(ctx context.Context, ds *linux.SemidDS) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
-	// "The effective UID of the calling process must match the owner or creator
-	// of the semaphore set, or the caller must be privileged."
-	if !s.obj.CheckOwnership(creds) {
-		return linuxerr.EACCES
+	if err := s.obj.Set(ctx, &ds.SemPerm); err != nil {
+		return err
 	}
 
-	s.obj.Owner = owner
-	s.obj.Perms = perms
 	s.changeTime = ktime.NowFromContext(ctx)
 	return nil
 }
diff --git a/pkg/sentry/kernel/shm/shm.go b/pkg/sentry/kernel/shm/shm.go
index b8da0c76c..ab938fa3c 100644
--- a/pkg/sentry/kernel/shm/shm.go
+++ b/pkg/sentry/kernel/shm/shm.go
@@ -618,25 +618,10 @@ func (s *Shm) Set(ctx context.Context, ds *linux.ShmidDS) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
-	creds := auth.CredentialsFromContext(ctx)
-	if !s.obj.CheckOwnership(creds) {
-		return linuxerr.EPERM
-	}
-
-	uid := creds.UserNamespace.MapToKUID(auth.UID(ds.ShmPerm.UID))
-	gid := creds.UserNamespace.MapToKGID(auth.GID(ds.ShmPerm.GID))
-	if !uid.Ok() || !gid.Ok() {
-		return linuxerr.EINVAL
+	if err := s.obj.Set(ctx, &ds.ShmPerm); err != nil {
+		return err
 	}
 
-	// User may only modify the lower 9 bits of the mode. All the other bits are
-	// always 0 for the underlying inode.
-	mode := linux.FileMode(ds.ShmPerm.Mode & 0x1ff)
-	s.obj.Perms = fs.FilePermsFromMode(mode)
-
-	s.obj.Owner.UID = uid
-	s.obj.Owner.GID = gid
-
 	s.changeTime = ktime.NowFromContext(ctx)
 	return nil
 }
diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go
index 2eda15303..5814a4eca 100644
--- a/pkg/sentry/kernel/thread_group.go
+++ b/pkg/sentry/kernel/thread_group.go
@@ -489,11 +489,6 @@ func (tg *ThreadGroup) SetForegroundProcessGroup(tty *TTY, pgid ProcessGroupID)
 	tg.signalHandlers.mu.Lock()
 	defer tg.signalHandlers.mu.Unlock()
 
-	// TODO(gvisor.dev/issue/6148): "If tcsetpgrp() is called by a member of a
-	// background process group in its session, and the calling process is not
-	// blocking or ignoring SIGTTOU, a SIGTTOU signal is sent to all members of
-	// this background process group."
-
 	// tty must be the controlling terminal.
 	if tg.tty != tty {
 		return -1, linuxerr.ENOTTY
@@ -516,6 +511,16 @@ func (tg *ThreadGroup) SetForegroundProcessGroup(tty *TTY, pgid ProcessGroupID)
 		return -1, linuxerr.EPERM
 	}
 
+	signalAction := tg.signalHandlers.actions[linux.SIGTTOU]
+	// If the calling process is a member of a background group, a SIGTTOU
+	// signal is sent to all members of this background process group.
+	// We need also need to check whether it is ignoring or blocking SIGTTOU.
+	ignored := signalAction.Handler == linux.SIG_IGN
+	blocked := tg.leader.signalMask == linux.SignalSetOf(linux.SIGTTOU)
+	if tg.processGroup.id != tg.processGroup.session.foreground.id && !ignored && !blocked {
+		tg.leader.sendSignalLocked(SignalInfoPriv(linux.SIGTTOU), true)
+	}
+
 	tg.processGroup.session.foreground.id = pgid
 	return 0, nil
 }
diff --git a/pkg/sentry/mm/aio_context.go b/pkg/sentry/mm/aio_context.go
index b7f765cd7..d71d64580 100644
--- a/pkg/sentry/mm/aio_context.go
+++ b/pkg/sentry/mm/aio_context.go
@@ -77,15 +77,6 @@ func (mm *MemoryManager) destroyAIOContextLocked(ctx context.Context, id uint64)
 		return nil
 	}
 
-	// Only unmaps after it assured that the address is a valid aio context to
-	// prevent random memory from been unmapped.
-	//
-	// Note: It's possible to unmap this address and map something else into
-	// the same address. Then it would be unmapping memory that it doesn't own.
-	// This is, however, the way Linux implements AIO. Keeps the same [weird]
-	// semantics in case anyone relies on it.
-	mm.MUnmap(ctx, hostarch.Addr(id), aioRingBufferSize)
-
 	delete(mm.aioManager.contexts, id)
 	aioCtx.destroy()
 	return aioCtx
@@ -411,6 +402,15 @@ func (mm *MemoryManager) DestroyAIOContext(ctx context.Context, id uint64) *AIOC
 		return nil
 	}
 
+	// Only unmaps after it assured that the address is a valid aio context to
+	// prevent random memory from been unmapped.
+	//
+	// Note: It's possible to unmap this address and map something else into
+	// the same address. Then it would be unmapping memory that it doesn't own.
+	// This is, however, the way Linux implements AIO. Keeps the same [weird]
+	// semantics in case anyone relies on it.
+	mm.MUnmap(ctx, hostarch.Addr(id), aioRingBufferSize)
+
 	mm.aioManager.mu.Lock()
 	defer mm.aioManager.mu.Unlock()
 	return mm.destroyAIOContextLocked(ctx, id)
diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go
index 8fd8287b3..7a3c97c5a 100644
--- a/pkg/sentry/platform/kvm/bluepill_fault.go
+++ b/pkg/sentry/platform/kvm/bluepill_fault.go
@@ -55,11 +55,7 @@ func calculateBluepillFault(physical uintptr, phyRegions []physicalRegion) (virt
 		}
 
 		// Adjust the block to match our size.
-		physicalStart = alignedPhysical & faultBlockMask
-		if physicalStart < pr.physical {
-			// Bound the starting point to the start of the region.
-			physicalStart = pr.physical
-		}
+		physicalStart = pr.physical + (alignedPhysical-pr.physical)&faultBlockMask
 		virtualStart = pr.virtual + (physicalStart - pr.physical)
 		physicalEnd := physicalStart + faultBlockSize
 		if physicalEnd > end {
diff --git a/pkg/sentry/syscalls/linux/linux64.go b/pkg/sentry/syscalls/linux/linux64.go
index 56f90d952..2046a48b9 100644
--- a/pkg/sentry/syscalls/linux/linux64.go
+++ b/pkg/sentry/syscalls/linux/linux64.go
@@ -123,7 +123,7 @@ var AMD64 = &kernel.SyscallTable{
 		68:  syscalls.Supported("msgget", Msgget),
 		69:  syscalls.Supported("msgsnd", Msgsnd),
 		70:  syscalls.Supported("msgrcv", Msgrcv),
-		71:  syscalls.PartiallySupported("msgctl", Msgctl, "Only supports IPC_RMID option.", []string{"gvisor.dev/issue/135"}),
+		71:  syscalls.Supported("msgctl", Msgctl),
 		72:  syscalls.PartiallySupported("fcntl", Fcntl, "Not all options are supported.", nil),
 		73:  syscalls.PartiallySupported("flock", Flock, "Locks are held within the sandbox only.", nil),
 		74:  syscalls.PartiallySupported("fsync", Fsync, "Full data flush is not guaranteed at this time.", nil),
@@ -616,7 +616,7 @@ var ARM64 = &kernel.SyscallTable{
 		184: syscalls.ErrorWithEvent("mq_notify", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}),       // TODO(b/29354921)
 		185: syscalls.ErrorWithEvent("mq_getsetattr", linuxerr.ENOSYS, "", []string{"gvisor.dev/issue/136"}),   // TODO(b/29354921)
 		186: syscalls.Supported("msgget", Msgget),
-		187: syscalls.PartiallySupported("msgctl", Msgctl, "Only supports IPC_RMID option.", []string{"gvisor.dev/issue/135"}),
+		187: syscalls.Supported("msgctl", Msgctl),
 		188: syscalls.Supported("msgrcv", Msgrcv),
 		189: syscalls.Supported("msgsnd", Msgsnd),
 		190: syscalls.Supported("semget", Semget),
diff --git a/pkg/sentry/syscalls/linux/sys_msgqueue.go b/pkg/sentry/syscalls/linux/sys_msgqueue.go
index 5259ade90..60b989ee7 100644
--- a/pkg/sentry/syscalls/linux/sys_msgqueue.go
+++ b/pkg/sentry/syscalls/linux/sys_msgqueue.go
@@ -130,12 +130,63 @@ func receive(t *kernel.Task, id ipc.ID, mType int64, maxSize int64, msgCopy, wai
 func Msgctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
 	id := ipc.ID(args[0].Int())
 	cmd := args[1].Int()
+	buf := args[2].Pointer()
 
 	creds := auth.CredentialsFromContext(t)
 
+	r := t.IPCNamespace().MsgqueueRegistry()
+
 	switch cmd {
+	case linux.IPC_INFO:
+		info := r.IPCInfo(t)
+		_, err := info.CopyOut(t, buf)
+		return 0, nil, err
+	case linux.MSG_INFO:
+		msgInfo := r.MsgInfo(t)
+		_, err := msgInfo.CopyOut(t, buf)
+		return 0, nil, err
 	case linux.IPC_RMID:
-		return 0, nil, t.IPCNamespace().MsgqueueRegistry().Remove(id, creds)
+		return 0, nil, r.Remove(id, creds)
+	}
+
+	// Remaining commands use a queue.
+	queue, err := r.FindByID(id)
+	if err != nil {
+		return 0, nil, err
+	}
+
+	switch cmd {
+	case linux.MSG_STAT:
+		// Technically, we should be treating id as "an index into the kernel's
+		// internal array that maintains information about all shared memory
+		// segments on the system". Since we don't track segments in an array,
+		// we'll just pretend the msqid is the index and do the same thing as
+		// IPC_STAT. Linux also uses the index as the msqid.
+		fallthrough
+	case linux.IPC_STAT:
+		stat, err := queue.Stat(t)
+		if err != nil {
+			return 0, nil, err
+		}
+		_, err = stat.CopyOut(t, buf)
+		return 0, nil, err
+
+	case linux.MSG_STAT_ANY:
+		stat, err := queue.StatAny(t)
+		if err != nil {
+			return 0, nil, err
+		}
+		_, err = stat.CopyOut(t, buf)
+		return 0, nil, err
+
+	case linux.IPC_SET:
+		var ds linux.MsqidDS
+		if _, err := ds.CopyIn(t, buf); err != nil {
+			return 0, nil, linuxerr.EINVAL
+		}
+		err := queue.Set(t, &ds)
+		return 0, nil, err
+
 	default:
 		return 0, nil, linuxerr.EINVAL
 	}
diff --git a/pkg/sentry/syscalls/linux/sys_sem.go b/pkg/sentry/syscalls/linux/sys_sem.go
index f61cc466c..5a119b21c 100644
--- a/pkg/sentry/syscalls/linux/sys_sem.go
+++ b/pkg/sentry/syscalls/linux/sys_sem.go
@@ -23,7 +23,6 @@ import (
 	"gvisor.dev/gvisor/pkg/hostarch"
 	"gvisor.dev/gvisor/pkg/marshal/primitive"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
-	"gvisor.dev/gvisor/pkg/sentry/fs"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
@@ -166,8 +165,7 @@ func Semctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 			return 0, nil, err
 		}
 
-		perms := fs.FilePermsFromMode(linux.FileMode(s.SemPerm.Mode & 0777))
-		return 0, nil, ipcSet(t, id, auth.UID(s.SemPerm.UID), auth.GID(s.SemPerm.GID), perms)
+		return 0, nil, ipcSet(t, id, &s)
 
 	case linux.GETPID:
 		v, err := getPID(t, id, num)
@@ -243,24 +241,13 @@ func remove(t *kernel.Task, id ipc.ID) error {
 	return r.Remove(id, creds)
 }
 
-func ipcSet(t *kernel.Task, id ipc.ID, uid auth.UID, gid auth.GID, perms fs.FilePermissions) error {
+func ipcSet(t *kernel.Task, id ipc.ID, ds *linux.SemidDS) error {
 	r := t.IPCNamespace().SemaphoreRegistry()
 	set := r.FindByID(id)
 	if set == nil {
 		return linuxerr.EINVAL
 	}
-
-	creds := auth.CredentialsFromContext(t)
-	kuid := creds.UserNamespace.MapToKUID(uid)
-	if !kuid.Ok() {
-		return linuxerr.EINVAL
-	}
-	kgid := creds.UserNamespace.MapToKGID(gid)
-	if !kgid.Ok() {
-		return linuxerr.EINVAL
-	}
-	owner := fs.FileOwner{UID: kuid, GID: kgid}
-	return set.Change(t, creds, owner, perms)
+	return set.Set(t, ds)
 }
 
 func ipcStat(t *kernel.Task, id ipc.ID) (*linux.SemidDS, error) {
diff --git a/pkg/sentry/vfs/README.md b/pkg/sentry/vfs/README.md
index 5aad31b78..82ee2c521 100644
--- a/pkg/sentry/vfs/README.md
+++ b/pkg/sentry/vfs/README.md
@@ -1,9 +1,5 @@
 # The gVisor Virtual Filesystem
 
-THIS PACKAGE IS CURRENTLY EXPERIMENTAL AND NOT READY OR ENABLED FOR PRODUCTION
-USE. For the filesystem implementation currently used by gVisor, see the `fs`
-package.
-
 ## Implementation Notes
 
 ### Reference Counting