54 files changed, 830 insertions, 456 deletions
diff --git a/benchmarks/workloads/httpd/BUILD b/benchmarks/workloads/httpd/BUILD
index a70873065..83450d190 100644
--- a/benchmarks/workloads/httpd/BUILD
+++ b/benchmarks/workloads/httpd/BUILD
@@ -9,5 +9,6 @@ pkg_tar(
     name = "tar",
     srcs = [
         "Dockerfile",
+        "apache2-tmpdir.conf",
     ],
 )
diff --git a/benchmarks/workloads/httpd/Dockerfile b/benchmarks/workloads/httpd/Dockerfile
index 5259c8f4f..52a550678 100644
--- a/benchmarks/workloads/httpd/Dockerfile
+++ b/benchmarks/workloads/httpd/Dockerfile
@@ -6,16 +6,16 @@ RUN set -x \
             apache2 \
         && rm -rf /var/lib/apt/lists/*
 
-# Link the htdoc directory to tmp.
-RUN mkdir -p /usr/local/apache2/htdocs && \
-        cd /usr/local/apache2 && ln -s /tmp htdocs
-
 # Generate a bunch of relevant files.
 RUN mkdir -p /local && \
         for size in 1 10 100 1000 1024 10240; do \
                 dd if=/dev/zero of=/local/latin${size}k.txt count=${size} bs=1024; \
         done
 
+# Rewrite DocumentRoot to point to /tmp/html instead of the default path.
+RUN sed -i 's/DocumentRoot.*\/var\/www\/html$/DocumentRoot   \/tmp\/html/' /etc/apache2/sites-enabled/000-default.conf
+COPY ./apache2-tmpdir.conf /etc/apache2/sites-enabled/apache2-tmpdir.conf
+
 # Standard settings.
 ENV APACHE_RUN_DIR /tmp
 ENV APACHE_RUN_USER nobody
@@ -24,4 +24,4 @@ ENV APACHE_LOG_DIR /tmp
 ENV APACHE_PID_FILE /tmp/apache.pid
 
 # Copy on start-up; serve everything from /tmp (including the configuration).
-CMD ["sh", "-c", "cp -a /local/* /tmp && apache2 -c \"ServerName localhost\" -c \"DocumentRoot /tmp\" -X"]
+CMD ["sh", "-c", "mkdir -p /tmp/html && cp -a /local/* /tmp/html && apache2 -X"]
diff --git a/benchmarks/workloads/httpd/apache2-tmpdir.conf b/benchmarks/workloads/httpd/apache2-tmpdir.conf
new file mode 100644
index 000000000..e33f8d9bb
--- /dev/null
+++ b/benchmarks/workloads/httpd/apache2-tmpdir.conf
@@ -0,0 +1,5 @@
+<Directory /tmp/html/>
+        Options Indexes FollowSymLinks
+        AllowOverride None
+        Require all granted
+</Directory>
+\ No newline at end of file
diff --git a/g3doc/BUILD b/g3doc/BUILD
index dbbf96204..c315d38be 100644
--- a/g3doc/BUILD
+++ b/g3doc/BUILD
@@ -33,3 +33,12 @@ doc(
     subcategory = "Community",
     weight = "95",
 )
+
+doc(
+    name = "style",
+    src = "style.md",
+    category = "Project",
+    permalink = "/community/style_guide/",
+    subcategory = "Community",
+    weight = "10",
+)
diff --git a/g3doc/user_guide/BUILD b/g3doc/user_guide/BUILD
index 5568e1ba4..b69aee12c 100644
--- a/g3doc/user_guide/BUILD
+++ b/g3doc/user_guide/BUILD
@@ -33,7 +33,7 @@ doc(
     name = "FAQ",
     src = "FAQ.md",
     category = "User Guide",
-    permalink = "/docs/user_guide/FAQ/",
+    permalink = "/docs/user_guide/faq/",
     weight = "90",
 )
 
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go
index 055ac1d7c..e11ca2d62 100644
--- a/pkg/abi/linux/file.go
+++ b/pkg/abi/linux/file.go
@@ -191,8 +191,9 @@ var DirentType = abi.ValueSet{
 
 // Values for preadv2/pwritev2.
 const (
-	// Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
-	// accepted as a valid flag argument for preadv2/pwritev2.
+	// NOTE(b/120162627): gVisor does not implement the RWF_HIPRI feature, but
+	// the flag is accepted as a valid flag argument for preadv2/pwritev2 and
+	// silently ignored.
 	RWF_HIPRI = 0x00000001
 	RWF_DSYNC = 0x00000002
 	RWF_SYNC  = 0x00000004
diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go
index 28d851ff5..122c457d2 100644
--- a/pkg/p9/p9.go
+++ b/pkg/p9/p9.go
@@ -1091,6 +1091,19 @@ type AllocateMode struct {
 	Unshare       bool
 }
 
+// ToAllocateMode returns an AllocateMode from a fallocate(2) mode.
+func ToAllocateMode(mode uint64) AllocateMode {
+	return AllocateMode{
+		KeepSize:      mode&unix.FALLOC_FL_KEEP_SIZE != 0,
+		PunchHole:     mode&unix.FALLOC_FL_PUNCH_HOLE != 0,
+		NoHideStale:   mode&unix.FALLOC_FL_NO_HIDE_STALE != 0,
+		CollapseRange: mode&unix.FALLOC_FL_COLLAPSE_RANGE != 0,
+		ZeroRange:     mode&unix.FALLOC_FL_ZERO_RANGE != 0,
+		InsertRange:   mode&unix.FALLOC_FL_INSERT_RANGE != 0,
+		Unshare:       mode&unix.FALLOC_FL_UNSHARE_RANGE != 0,
+	}
+}
+
 // ToLinux converts to a value compatible with fallocate(2)'s mode.
 func (a *AllocateMode) ToLinux() uint32 {
 	rv := uint32(0)
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 51082359d..7bcc99b29 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -869,11 +869,22 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 	if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
 		return nil, err
 	}
+
+	trunc := opts.Flags&linux.O_TRUNC != 0 && d.fileType() == linux.S_IFREG
+	if trunc {
+		// Lock metadataMu *while* we open a regular file with O_TRUNC because
+		// open(2) will change the file size on server.
+		d.metadataMu.Lock()
+		defer d.metadataMu.Unlock()
+	}
+
+	var vfd *vfs.FileDescription
+	var err error
 	mnt := rp.Mount()
 	switch d.fileType() {
 	case linux.S_IFREG:
 		if !d.fs.opts.regularFilesUseSpecialFileFD {
-			if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0); err != nil {
+			if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, trunc); err != nil {
 				return nil, err
 			}
 			fd := &regularFileFD{}
@@ -883,7 +894,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 			}); err != nil {
 				return nil, err
 			}
-			return &fd.vfsfd, nil
+			vfd = &fd.vfsfd
 		}
 	case linux.S_IFDIR:
 		// Can't open directories with O_CREAT.
@@ -923,7 +934,25 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
 			return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags, &d.locks)
 		}
 	}
-	return d.openSpecialFileLocked(ctx, mnt, opts)
+
+	if vfd == nil {
+		if vfd, err = d.openSpecialFileLocked(ctx, mnt, opts); err != nil {
+			return nil, err
+		}
+	}
+
+	if trunc {
+		// If no errors occured so far then update file size in memory. This
+		// step is required even if !d.cachedMetadataAuthoritative() because
+		// d.mappings has to be updated.
+		// d.metadataMu has already been acquired if trunc == true.
+		d.updateFileSizeLocked(0)
+
+		if d.cachedMetadataAuthoritative() {
+			d.touchCMtimeLocked()
+		}
+	}
+	return vfd, err
 }
 
 func (d *dentry) connectSocketLocked(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 71c8d3ae1..8e74e60a5 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -794,9 +794,7 @@ func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) {
 		atomic.StoreUint32(&d.nlink, uint32(attr.NLink))
 	}
 	if mask.Size {
-		d.dataMu.Lock()
-		atomic.StoreUint64(&d.size, attr.Size)
-		d.dataMu.Unlock()
+		d.updateFileSizeLocked(attr.Size)
 	}
 	d.metadataMu.Unlock()
 }
@@ -902,6 +900,12 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
 	}
 	d.metadataMu.Lock()
 	defer d.metadataMu.Unlock()
+	if stat.Mask&linux.STATX_SIZE != 0 {
+		// The size needs to be changed even when
+		// !d.cachedMetadataAuthoritative() because d.mappings has to be
+		// updated.
+		d.updateFileSizeLocked(stat.Size)
+	}
 	if !d.isSynthetic() {
 		if stat.Mask != 0 {
 			if err := d.file.setAttr(ctx, p9.SetAttrMask{
@@ -963,40 +967,42 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
 		stat.Mask |= linux.STATX_MTIME
 	}
 	atomic.StoreInt64(&d.ctime, now)
-	if stat.Mask&linux.STATX_SIZE != 0 {
+	return nil
+}
+
+// Preconditions: d.metadataMu must be locked.
+func (d *dentry) updateFileSizeLocked(newSize uint64) {
+	d.dataMu.Lock()
+	oldSize := d.size
+	d.size = newSize
+	// d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings
+	// below. This allows concurrent calls to Read/Translate/etc. These
+	// functions synchronize with truncation by refusing to use cache
+	// contents beyond the new d.size. (We are still holding d.metadataMu,
+	// so we can't race with Write or another truncate.)
+	d.dataMu.Unlock()
+	if d.size < oldSize {
+		oldpgend, _ := usermem.PageRoundUp(oldSize)
+		newpgend, _ := usermem.PageRoundUp(d.size)
+		if oldpgend != newpgend {
+			d.mapsMu.Lock()
+			d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
+				// Compare Linux's mm/truncate.c:truncate_setsize() =>
+				// truncate_pagecache() =>
+				// mm/memory.c:unmap_mapping_range(evencows=1).
+				InvalidatePrivate: true,
+			})
+			d.mapsMu.Unlock()
+		}
+		// We are now guaranteed that there are no translations of
+		// truncated pages, and can remove them from the cache. Since
+		// truncated pages have been removed from the remote file, they
+		// should be dropped without being written back.
 		d.dataMu.Lock()
-		oldSize := d.size
-		d.size = stat.Size
-		// d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings
-		// below. This allows concurrent calls to Read/Translate/etc. These
-		// functions synchronize with truncation by refusing to use cache
-		// contents beyond the new d.size. (We are still holding d.metadataMu,
-		// so we can't race with Write or another truncate.)
+		d.cache.Truncate(d.size, d.fs.mfp.MemoryFile())
+		d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend})
 		d.dataMu.Unlock()
-		if d.size < oldSize {
-			oldpgend, _ := usermem.PageRoundUp(oldSize)
-			newpgend, _ := usermem.PageRoundUp(d.size)
-			if oldpgend != newpgend {
-				d.mapsMu.Lock()
-				d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
-					// Compare Linux's mm/truncate.c:truncate_setsize() =>
-					// truncate_pagecache() =>
-					// mm/memory.c:unmap_mapping_range(evencows=1).
-					InvalidatePrivate: true,
-				})
-				d.mapsMu.Unlock()
-			}
-			// We are now guaranteed that there are no translations of
-			// truncated pages, and can remove them from the cache. Since
-			// truncated pages have been removed from the remote file, they
-			// should be dropped without being written back.
-			d.dataMu.Lock()
-			d.cache.Truncate(d.size, d.fs.mfp.MemoryFile())
-			d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend})
-			d.dataMu.Unlock()
-		}
 	}
-	return nil
 }
 
 func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 404a452c4..3d2d3530a 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -24,6 +24,7 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/context"
 	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/p9"
 	"gvisor.dev/gvisor/pkg/safemem"
 	"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
 	"gvisor.dev/gvisor/pkg/sentry/memmap"
@@ -67,13 +68,43 @@ func (fd *regularFileFD) OnClose(ctx context.Context) error {
 	return d.handle.file.flush(ctx)
 }
 
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+
+	d := fd.dentry()
+	d.metadataMu.Lock()
+	defer d.metadataMu.Unlock()
+
+	size := offset + length
+
+	// Allocating a smaller size is a noop.
+	if size <= d.size {
+		return nil
+	}
+
+	d.handleMu.Lock()
+	defer d.handleMu.Unlock()
+
+	err := d.handle.file.allocate(ctx, p9.ToAllocateMode(mode), offset, length)
+	if err != nil {
+		return err
+	}
+	d.size = size
+	if !d.cachedMetadataAuthoritative() {
+		d.touchCMtimeLocked()
+	}
+	return nil
+}
+
 // PRead implements vfs.FileDescriptionImpl.PRead.
 func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
 	if offset < 0 {
 		return 0, syserror.EINVAL
 	}
 
-	// Check that flags are supported. Silently ignore RWF_HIPRI.
+	// Check that flags are supported.
+	//
+	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
 	if opts.Flags&^linux.RWF_HIPRI != 0 {
 		return 0, syserror.EOPNOTSUPP
 	}
@@ -126,7 +157,9 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
 		return 0, syserror.EINVAL
 	}
 
-	// Check that flags are supported. Silently ignore RWF_HIPRI.
+	// Check that flags are supported.
+	//
+	// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
 	if opts.Flags&^linux.RWF_HIPRI != 0 {
 		return 0, syserror.EOPNOTSUPP
 	}
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index a016cbae1..3c4e7e2e4 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -130,7 +130,9 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
 		return 0, syserror.EINVAL
 	}
 
-	// Check that flags are supported. Silently ignore RWF_HIPRI.
+	// Check that flags are supported.
+	//
+	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
 	if opts.Flags&^linux.RWF_HIPRI != 0 {
 		return 0, syserror.EOPNOTSUPP
 	}
@@ -176,7 +178,9 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
 		return 0, syserror.EINVAL
 	}
 
-	// Check that flags are supported. Silently ignore RWF_HIPRI.
+	// Check that flags are supported.
+	//
+	// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
 	if opts.Flags&^linux.RWF_HIPRI != 0 {
 		return 0, syserror.EOPNOTSUPP
 	}
@@ -235,8 +239,5 @@ func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (
 
 // Sync implements vfs.FileDescriptionImpl.Sync.
 func (fd *specialFileFD) Sync(ctx context.Context) error {
-	if !fd.vfsfd.IsWritable() {
-		return nil
-	}
-	return fd.handle.sync(ctx)
+	return fd.dentry().syncSharedHandle(ctx)
 }
diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go
index 1d5aa82dc..0eef4e16e 100644
--- a/pkg/sentry/fsimpl/gofer/time.go
+++ b/pkg/sentry/fsimpl/gofer/time.go
@@ -36,7 +36,7 @@ func statxTimestampFromDentry(ns int64) linux.StatxTimestamp {
 	}
 }
 
-// Preconditions: fs.interop != InteropModeShared.
+// Preconditions: d.cachedMetadataAuthoritative() == true.
 func (d *dentry) touchAtime(mnt *vfs.Mount) {
 	if mnt.Flags.NoATime {
 		return
@@ -51,8 +51,8 @@ func (d *dentry) touchAtime(mnt *vfs.Mount) {
 	mnt.EndWrite()
 }
 
-// Preconditions: fs.interop != InteropModeShared. The caller has successfully
-// called vfs.Mount.CheckBeginWrite().
+// Preconditions: d.cachedMetadataAuthoritative() == true. The caller has
+// successfully called vfs.Mount.CheckBeginWrite().
 func (d *dentry) touchCtime() {
 	now := d.fs.clock.Now().Nanoseconds()
 	d.metadataMu.Lock()
@@ -60,8 +60,8 @@ func (d *dentry) touchCtime() {
 	d.metadataMu.Unlock()
 }
 
-// Preconditions: fs.interop != InteropModeShared. The caller has successfully
-// called vfs.Mount.CheckBeginWrite().
+// Preconditions: d.cachedMetadataAuthoritative() == true. The caller has
+// successfully called vfs.Mount.CheckBeginWrite().
 func (d *dentry) touchCMtime() {
 	now := d.fs.clock.Now().Nanoseconds()
 	d.metadataMu.Lock()
@@ -70,6 +70,8 @@ func (d *dentry) touchCMtime() {
 	d.metadataMu.Unlock()
 }
 
+// Preconditions: d.cachedMetadataAuthoritative() == true. The caller has
+// locked d.metadataMu.
 func (d *dentry) touchCMtimeLocked() {
 	now := d.fs.clock.Now().Nanoseconds()
 	atomic.StoreInt64(&d.mtime, now)
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index d5e73ddae..1cd2982cb 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -543,6 +543,16 @@ func (f *fileDescription) Release() {
 	// noop
 }
 
+// Allocate implements vfs.FileDescriptionImpl.
+func (f *fileDescription) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	if !f.inode.seekable {
+		return syserror.ESPIPE
+	}
+
+	// TODO(gvisor.dev/issue/2923): Implement Allocate for non-pipe hostfds.
+	return syserror.EOPNOTSUPP
+}
+
 // PRead implements FileDescriptionImpl.
 func (f *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
 	i := f.inode
@@ -578,8 +588,10 @@ func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts
 }
 
 func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags uint32) (int64, error) {
+	// Check that flags are supported.
+	//
 	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
-	if flags != 0 {
+	if flags&^linux.RWF_HIPRI != 0 {
 		return 0, syserror.EOPNOTSUPP
 	}
 	reader := hostfd.GetReadWriterAt(int32(hostFD), offset, flags)
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
index 38f1fbfba..fd16bd92d 100644
--- a/pkg/sentry/fsimpl/host/socket.go
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -47,11 +47,6 @@ func newEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue) (transpor
 	return ep, nil
 }
 
-// maxSendBufferSize is the maximum host send buffer size allowed for endpoint.
-//
-// N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max).
-const maxSendBufferSize = 8 << 20
-
 // ConnectedEndpoint is an implementation of transport.ConnectedEndpoint and
 // transport.Receiver. It is backed by a host fd that was imported at sentry
 // startup. This fd is shared with a hostfs inode, which retains ownership of
@@ -114,10 +109,6 @@ func (c *ConnectedEndpoint) init() *syserr.Error {
 	if err != nil {
 		return syserr.FromError(err)
 	}
-	if sndbuf > maxSendBufferSize {
-		log.Warningf("Socket send buffer too large: %d", sndbuf)
-		return syserr.ErrInvalidEndpointState
-	}
 
 	c.stype = linux.SockType(stype)
 	c.sndbuf = int64(sndbuf)
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index 5f7853a2a..ca8b8c63b 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -236,6 +236,11 @@ func (fd *GenericDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptio
 	return inode.SetStat(ctx, fd.filesystem(), creds, opts)
 }
 
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *GenericDirectoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	return fd.DirectoryFileDescriptionDefaultImpl.Allocate(ctx, mode, offset, length)
+}
+
 // LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
 func (fd *GenericDirectoryFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
 	return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 286c23f01..9af43b859 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -231,8 +231,9 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
 
 		// Linux will return envp up to and including the first NULL character,
 		// so find it.
-		if end := bytes.IndexByte(buf.Bytes()[ar.Length():], 0); end != -1 {
-			buf.Truncate(end)
+		envStart := int(ar.Length())
+		if nullIdx := bytes.IndexByte(buf.Bytes()[envStart:], 0); nullIdx != -1 {
+			buf.Truncate(envStart + nullIdx)
 		}
 	}
 
@@ -300,7 +301,7 @@ type idMapData struct {
 
 var _ dynamicInode = (*idMapData)(nil)
 
-// Generate implements vfs.DynamicBytesSource.Generate.
+// Generate implements vfs.WritableDynamicBytesSource.Generate.
 func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error {
 	var entries []auth.IDMapEntry
 	if d.gids {
@@ -314,6 +315,7 @@ func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error {
 	return nil
 }
 
+// Write implements vfs.WritableDynamicBytesSource.Write.
 func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
 	// "In addition, the number of bytes written to the file must be less than
 	// the system page size, and the write must be performed at the start of
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index fe02f7ee9..01ce30a4d 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -138,7 +138,7 @@ type cpuFile struct {
 
 // Generate implements vfs.DynamicBytesSource.Generate.
 func (c *cpuFile) Generate(ctx context.Context, buf *bytes.Buffer) error {
-	fmt.Fprintf(buf, "0-%d", c.maxCores-1)
+	fmt.Fprintf(buf, "0-%d\n", c.maxCores-1)
 	return nil
 }
 
diff --git a/pkg/sentry/fsimpl/sys/sys_test.go b/pkg/sentry/fsimpl/sys/sys_test.go
index 4b3602d47..242d5fd12 100644
--- a/pkg/sentry/fsimpl/sys/sys_test.go
+++ b/pkg/sentry/fsimpl/sys/sys_test.go
@@ -51,7 +51,7 @@ func TestReadCPUFile(t *testing.T) {
 	k := kernel.KernelFromContext(s.Ctx)
 	maxCPUCores := k.ApplicationCores()
 
-	expected := fmt.Sprintf("0-%d", maxCPUCores-1)
+	expected := fmt.Sprintf("0-%d\n", maxCPUCores-1)
 
 	for _, fname := range []string{"online", "possible", "present"} {
 		pop := s.PathOpAtRoot(fmt.Sprintf("devices/system/cpu/%s", fname))
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 71ac7b8e6..ed40f6b52 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -407,10 +407,10 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
 	case *regularFile:
 		var fd regularFileFD
 		fd.LockFD.Init(&d.inode.locks)
-		if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+		if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{AllowDirectIO: true}); err != nil {
 			return nil, err
 		}
-		if opts.Flags&linux.O_TRUNC != 0 {
+		if !afterCreate && opts.Flags&linux.O_TRUNC != 0 {
 			if _, err := impl.truncate(0); err != nil {
 				return nil, err
 			}
@@ -423,7 +423,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
 		}
 		var fd directoryFD
 		fd.LockFD.Init(&d.inode.locks)
-		if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+		if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{AllowDirectIO: true}); err != nil {
 			return nil, err
 		}
 		return &fd.vfsfd, nil
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index b805aadd0..1cdb46e6f 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -274,14 +274,32 @@ func (fd *regularFileFD) Release() {
 	// noop
 }
 
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	f := fd.inode().impl.(*regularFile)
+
+	f.inode.mu.Lock()
+	defer f.inode.mu.Unlock()
+	oldSize := f.size
+	size := offset + length
+	if oldSize >= size {
+		return nil
+	}
+	_, err := f.truncateLocked(size)
+	return err
+}
+
 // PRead implements vfs.FileDescriptionImpl.PRead.
 func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
 	if offset < 0 {
 		return 0, syserror.EINVAL
 	}
 
-	// Check that flags are supported. Silently ignore RWF_HIPRI.
-	if opts.Flags&^linux.RWF_HIPRI != 0 {
+	// Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since
+	// all state is in-memory.
+	//
+	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
+	if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 {
 		return 0, syserror.EOPNOTSUPP
 	}
 
@@ -311,8 +329,11 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
 		return 0, syserror.EINVAL
 	}
 
-	// Check that flags are supported. Silently ignore RWF_HIPRI.
-	if opts.Flags&^linux.RWF_HIPRI != 0 {
+	// Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since
+	// all state is in-memory.
+	//
+	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
+	if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 {
 		return 0, syserror.EOPNOTSUPP
 	}
 
diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go
index 323f1dfa5..153d2cd9b 100644
--- a/pkg/sentry/kernel/fasync/fasync.go
+++ b/pkg/sentry/kernel/fasync/fasync.go
@@ -176,3 +176,13 @@ func (a *FileAsync) SetOwnerProcessGroup(requester *kernel.Task, recipient *kern
 	a.recipientTG = nil
 	a.recipientPG = recipient
 }
+
+// ClearOwner unsets the current signal recipient.
+func (a *FileAsync) ClearOwner() {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	a.requester = nil
+	a.recipientT = nil
+	a.recipientTG = nil
+	a.recipientPG = nil
+}
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index a4519363f..45d4c5fc1 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -200,6 +200,11 @@ func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask {
 	}
 }
 
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	return syserror.ESPIPE
+}
+
 // EventRegister implements waiter.Waitable.EventRegister.
 func (fd *VFSPipeFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
 	fd.pipe.EventRegister(e, mask)
diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go
index ad5f64799..8f192c62f 100644
--- a/pkg/sentry/socket/hostinet/socket_vfs2.go
+++ b/pkg/sentry/socket/hostinet/socket_vfs2.go
@@ -96,7 +96,12 @@ func (s *socketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal
 	return ioctl(ctx, s.fd, uio, args)
 }
 
-// PRead implements vfs.FileDescriptionImpl.
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (s *socketVFS2) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	return syserror.ENODEV
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
 func (s *socketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
 	return 0, syserror.ESPIPE
 }
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index 7e4c6a56e..517394ba9 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -156,11 +156,10 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 		}
 		return uintptr(n), nil, nil
 	case linux.F_GETOWN:
-		a := file.AsyncHandler()
-		if a == nil {
+		owner, hasOwner := getAsyncOwner(t, file)
+		if !hasOwner {
 			return 0, nil, nil
 		}
-		owner := getAsyncOwner(t, a.(*fasync.FileAsync))
 		if owner.Type == linux.F_OWNER_PGRP {
 			return uintptr(-owner.PID), nil, nil
 		}
@@ -176,26 +175,21 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 			ownerType = linux.F_OWNER_PGRP
 			who = -who
 		}
-		a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync)
-		return 0, nil, setAsyncOwner(t, a, ownerType, who)
+		return 0, nil, setAsyncOwner(t, file, ownerType, who)
 	case linux.F_GETOWN_EX:
-		a := file.AsyncHandler()
-		if a == nil {
+		owner, hasOwner := getAsyncOwner(t, file)
+		if !hasOwner {
 			return 0, nil, nil
 		}
-		addr := args[2].Pointer()
-		owner := getAsyncOwner(t, a.(*fasync.FileAsync))
-		_, err := t.CopyOut(addr, &owner)
+		_, err := t.CopyOut(args[2].Pointer(), &owner)
 		return 0, nil, err
 	case linux.F_SETOWN_EX:
-		addr := args[2].Pointer()
 		var owner linux.FOwnerEx
-		n, err := t.CopyIn(addr, &owner)
+		n, err := t.CopyIn(args[2].Pointer(), &owner)
 		if err != nil {
 			return 0, nil, err
 		}
-		a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync)
-		return uintptr(n), nil, setAsyncOwner(t, a, owner.Type, owner.PID)
+		return uintptr(n), nil, setAsyncOwner(t, file, owner.Type, owner.PID)
 	case linux.F_GETPIPE_SZ:
 		pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
 		if !ok {
@@ -219,30 +213,48 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 	}
 }
 
-func getAsyncOwner(t *kernel.Task, a *fasync.FileAsync) linux.FOwnerEx {
-	ot, otg, opg := a.Owner()
+func getAsyncOwner(t *kernel.Task, fd *vfs.FileDescription) (ownerEx linux.FOwnerEx, hasOwner bool) {
+	a := fd.AsyncHandler()
+	if a == nil {
+		return linux.FOwnerEx{}, false
+	}
+
+	ot, otg, opg := a.(*fasync.FileAsync).Owner()
 	switch {
 	case ot != nil:
 		return linux.FOwnerEx{
 			Type: linux.F_OWNER_TID,
 			PID:  int32(t.PIDNamespace().IDOfTask(ot)),
-		}
+		}, true
 	case otg != nil:
 		return linux.FOwnerEx{
 			Type: linux.F_OWNER_PID,
 			PID:  int32(t.PIDNamespace().IDOfThreadGroup(otg)),
-		}
+		}, true
 	case opg != nil:
 		return linux.FOwnerEx{
 			Type: linux.F_OWNER_PGRP,
 			PID:  int32(t.PIDNamespace().IDOfProcessGroup(opg)),
-		}
+		}, true
 	default:
-		return linux.FOwnerEx{}
+		return linux.FOwnerEx{}, true
 	}
 }
 
-func setAsyncOwner(t *kernel.Task, a *fasync.FileAsync, ownerType, pid int32) error {
+func setAsyncOwner(t *kernel.Task, fd *vfs.FileDescription, ownerType, pid int32) error {
+	switch ownerType {
+	case linux.F_OWNER_TID, linux.F_OWNER_PID, linux.F_OWNER_PGRP:
+		// Acceptable type.
+	default:
+		return syserror.EINVAL
+	}
+
+	a := fd.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync)
+	if pid == 0 {
+		a.ClearOwner()
+		return nil
+	}
+
 	switch ownerType {
 	case linux.F_OWNER_TID:
 		task := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid))
diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
index 5dac77e4d..b12b5967b 100644
--- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go
+++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
@@ -18,6 +18,7 @@ import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/limits"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
 	"gvisor.dev/gvisor/pkg/usermem"
@@ -239,6 +240,55 @@ func renameat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd
 	})
 }
 
+// Fallocate implements linux system call fallocate(2).
+func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+	fd := args[0].Int()
+	mode := args[1].Uint64()
+	offset := args[2].Int64()
+	length := args[3].Int64()
+
+	file := t.GetFileVFS2(fd)
+
+	if file == nil {
+		return 0, nil, syserror.EBADF
+	}
+	defer file.DecRef()
+
+	if !file.IsWritable() {
+		return 0, nil, syserror.EBADF
+	}
+
+	if mode != 0 {
+		return 0, nil, syserror.ENOTSUP
+	}
+
+	if offset < 0 || length <= 0 {
+		return 0, nil, syserror.EINVAL
+	}
+
+	size := offset + length
+
+	if size < 0 {
+		return 0, nil, syserror.EFBIG
+	}
+
+	limit := limits.FromContext(t).Get(limits.FileSize).Cur
+
+	if uint64(size) >= limit {
+		t.SendSignal(&arch.SignalInfo{
+			Signo: int32(linux.SIGXFSZ),
+			Code:  arch.SignalInfoUser,
+		})
+		return 0, nil, syserror.EFBIG
+	}
+
+	return 0, nil, file.Impl().Allocate(t, mode, uint64(offset), uint64(length))
+
+	// File length modified, generate notification.
+	// TODO(gvisor.dev/issue/1479): Reenable when Inotify is ported.
+	// file.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
+}
+
 // Rmdir implements Linux syscall rmdir(2).
 func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
 	pathAddr := args[0].Pointer()
diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
index 0399c0db4..fd6ab94b2 100644
--- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go
+++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
@@ -57,6 +57,49 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
 			flags &^= linux.O_NONBLOCK
 		}
 		return 0, nil, file.SetStatusFlags(t, t.Credentials(), flags)
+
+	case linux.FIOASYNC:
+		var set int32
+		if _, err := t.CopyIn(args[2].Pointer(), &set); err != nil {
+			return 0, nil, err
+		}
+		flags := file.StatusFlags()
+		if set != 0 {
+			flags |= linux.O_ASYNC
+		} else {
+			flags &^= linux.O_ASYNC
+		}
+		file.SetStatusFlags(t, t.Credentials(), flags)
+		return 0, nil, nil
+
+	case linux.FIOGETOWN, linux.SIOCGPGRP:
+		var who int32
+		owner, hasOwner := getAsyncOwner(t, file)
+		if hasOwner {
+			if owner.Type == linux.F_OWNER_PGRP {
+				who = -owner.PID
+			} else {
+				who = owner.PID
+			}
+		}
+		_, err := t.CopyOut(args[2].Pointer(), &who)
+		return 0, nil, err
+
+	case linux.FIOSETOWN, linux.SIOCSPGRP:
+		var who int32
+		if _, err := t.CopyIn(args[2].Pointer(), &who); err != nil {
+			return 0, nil, err
+		}
+		ownerType := int32(linux.F_OWNER_PID)
+		if who < 0 {
+			// Check for overflow before flipping the sign.
+			if who-1 > who {
+				return 0, nil, syserror.EINVAL
+			}
+			ownerType = linux.F_OWNER_PGRP
+			who = -who
+		}
+		return 0, nil, setAsyncOwner(t, file, ownerType, who)
 	}
 
 	ret, err := file.Ioctl(t, t.MemoryManager(), args)
diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go
index 365250b0b..0d0ebf46a 100644
--- a/pkg/sentry/syscalls/linux/vfs2/sync.go
+++ b/pkg/sentry/syscalls/linux/vfs2/sync.go
@@ -65,10 +65,8 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
 	nbytes := args[2].Int64()
 	flags := args[3].Uint()
 
-	if offset < 0 {
-		return 0, nil, syserror.EINVAL
-	}
-	if nbytes < 0 {
+	// Check for negative values and overflow.
+	if offset < 0 || offset+nbytes < 0 {
 		return 0, nil, syserror.EINVAL
 	}
 	if flags&^(linux.SYNC_FILE_RANGE_WAIT_BEFORE|linux.SYNC_FILE_RANGE_WRITE|linux.SYNC_FILE_RANGE_WAIT_AFTER) != 0 {
@@ -81,7 +79,37 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
 	}
 	defer file.DecRef()
 
-	// TODO(gvisor.dev/issue/1897): Avoid writeback of data ranges outside of
-	// [offset, offset+nbytes).
-	return 0, nil, file.Sync(t)
+	// TODO(gvisor.dev/issue/1897): Currently, the only file syncing we support
+	// is a full-file sync, i.e. fsync(2). As a result, there are severe
+	// limitations on how much we support sync_file_range:
+	// - In Linux, sync_file_range(2) doesn't write out the file's metadata, even
+	//   if the file size is changed. We do.
+	// - We always sync the entire file instead of [offset, offset+nbytes).
+	// - We do not support the use of WAIT_BEFORE without WAIT_AFTER. For
+	//   correctness, we would have to perform a write-out every time WAIT_BEFORE
+	//   was used, but this would be much more expensive than expected if there
+	//   were no write-out operations in progress.
+	// - Whenever WAIT_AFTER is used, we sync the file.
+	// - Ignore WRITE. If this flag is used with WAIT_AFTER, then the file will
+	//   be synced anyway. If this flag is used without WAIT_AFTER, then it is
+	//   safe (and less expensive) to do nothing, because the syscall will not
+	//   wait for the write-out to complete--we only need to make sure that the
+	//   next time WAIT_BEFORE or WAIT_AFTER are used, the write-out completes.
+	// - According to fs/sync.c, WAIT_BEFORE|WAIT_AFTER "will detect any I/O
+	//   errors or ENOSPC conditions and will return those to the caller, after
+	//   clearing the EIO and ENOSPC flags in the address_space." We don't do
+	//   this.
+
+	if flags&linux.SYNC_FILE_RANGE_WAIT_BEFORE != 0 &&
+		flags&linux.SYNC_FILE_RANGE_WAIT_AFTER == 0 {
+		t.Kernel().EmitUnimplementedEvent(t)
+		return 0, nil, syserror.ENOSYS
+	}
+
+	if flags&linux.SYNC_FILE_RANGE_WAIT_AFTER != 0 {
+		if err := file.Sync(t); err != nil {
+			return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+		}
+	}
+	return 0, nil, nil
 }
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index 9e60c4a1c..8f497ecc7 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -138,7 +138,7 @@ func Override() {
 	s.Table[282] = syscalls.Supported("signalfd", Signalfd)
 	s.Table[283] = syscalls.Supported("timerfd_create", TimerfdCreate)
 	s.Table[284] = syscalls.Supported("eventfd", Eventfd)
-	delete(s.Table, 285) // fallocate
+	s.Table[285] = syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil)
 	s.Table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime)
 	s.Table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime)
 	s.Table[288] = syscalls.Supported("accept4", Accept4)
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index cd1db14ac..0c42574db 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -91,8 +91,7 @@ type FileDescription struct {
 
 // FileDescriptionOptions contains options to FileDescription.Init().
 type FileDescriptionOptions struct {
-	// If AllowDirectIO is true, allow O_DIRECT to be set on the file. This is
-	// usually only the case if O_DIRECT would actually have an effect.
+	// If AllowDirectIO is true, allow O_DIRECT to be set on the file.
 	AllowDirectIO bool
 
 	// If DenyPRead is true, calls to FileDescription.PRead() return ESPIPE.
@@ -355,6 +354,10 @@ type FileDescriptionImpl interface {
 	// represented by the FileDescription.
 	StatFS(ctx context.Context) (linux.Statfs, error)
 
+	// Allocate grows file represented by FileDescription to offset + length bytes.
+	// Only mode == 0 is supported currently.
+	Allocate(ctx context.Context, mode, offset, length uint64) error
+
 	// waiter.Waitable methods may be used to poll for I/O events.
 	waiter.Waitable
 
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 3fec0d6d6..6b8b4ad49 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -56,6 +56,12 @@ func (FileDescriptionDefaultImpl) StatFS(ctx context.Context) (linux.Statfs, err
 	return linux.Statfs{}, syserror.ENOSYS
 }
 
+// Allocate implements FileDescriptionImpl.Allocate analogously to
+// fallocate called on regular file, directory or FIFO in Linux.
+func (FileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	return syserror.ENODEV
+}
+
 // Readiness implements waiter.Waitable.Readiness analogously to
 // file_operations::poll == NULL in Linux.
 func (FileDescriptionDefaultImpl) Readiness(mask waiter.EventMask) waiter.EventMask {
@@ -158,6 +164,11 @@ func (FileDescriptionDefaultImpl) Removexattr(ctx context.Context, name string)
 // implementations of non-directory I/O methods that return EISDIR.
 type DirectoryFileDescriptionDefaultImpl struct{}
 
+// Allocate implements DirectoryFileDescriptionDefaultImpl.Allocate.
+func (DirectoryFileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	return syserror.EISDIR
+}
+
 // PRead implements FileDescriptionImpl.PRead.
 func (DirectoryFileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
 	return 0, syserror.EISDIR
diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go
index 509034531..c2e21ac5f 100644
--- a/pkg/sentry/vfs/inotify.go
+++ b/pkg/sentry/vfs/inotify.go
@@ -148,6 +148,11 @@ func (i *Inotify) Release() {
 	}
 }
 
+// Allocate implements FileDescription.Allocate.
+func (i *Inotify) Allocate(ctx context.Context, mode, offset, length uint64) error {
+	panic("Allocate should not be called on read-only inotify fds")
+}
+
 // EventRegister implements waiter.Waitable.
 func (i *Inotify) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
 	i.queue.EventRegister(e, mask)
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 9d4dce826..81b740115 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -509,9 +509,7 @@ func (h *handshake) execute() *tcpip.Error {
 	// Initialize the resend timer.
 	resendWaker := sleep.Waker{}
 	timeOut := time.Duration(time.Second)
-	rt := time.AfterFunc(timeOut, func() {
-		resendWaker.Assert()
-	})
+	rt := time.AfterFunc(timeOut, resendWaker.Assert)
 	defer rt.Stop()
 
 	// Set up the wakers.
@@ -1050,8 +1048,8 @@ func (e *endpoint) tryDeliverSegmentFromClosedEndpoint(s *segment) {
 		panic("current endpoint not removed from demuxer, enqueing segments to itself")
 	}
 
-	if ep.(*endpoint).enqueueSegment(s) {
-		ep.(*endpoint).newSegmentWaker.Assert()
+	if ep := ep.(*endpoint); ep.enqueueSegment(s) {
+		ep.newSegmentWaker.Assert()
 	}
 }
 
@@ -1120,7 +1118,7 @@ func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) {
 func (e *endpoint) handleSegments(fastPath bool) *tcpip.Error {
 	checkRequeue := true
 	for i := 0; i < maxSegmentsPerWake; i++ {
-		if e.EndpointState() == StateClose || e.EndpointState() == StateError {
+		if e.EndpointState().closed() {
 			return nil
 		}
 		s := e.segmentQueue.dequeue()
@@ -1440,9 +1438,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 					if e.EndpointState() == StateFinWait2 && e.closed {
 						// The socket has been closed and we are in FIN_WAIT2
 						// so start the FIN_WAIT2 timer.
-						closeTimer = time.AfterFunc(e.tcpLingerTimeout, func() {
-							closeWaker.Assert()
-						})
+						closeTimer = time.AfterFunc(e.tcpLingerTimeout, closeWaker.Assert)
 						e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
 					}
 				}
@@ -1460,7 +1456,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 							return err
 						}
 					}
-					if e.EndpointState() != StateClose && e.EndpointState() != StateError {
+					if !e.EndpointState().closed() {
 						// Only block the worker if the endpoint
 						// is not in closed state or error state.
 						close(e.drainDone)
@@ -1526,7 +1522,12 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 	}
 
 loop:
-	for e.EndpointState() != StateTimeWait && e.EndpointState() != StateClose && e.EndpointState() != StateError {
+	for {
+		switch e.EndpointState() {
+		case StateTimeWait, StateClose, StateError:
+			break loop
+		}
+
 		e.mu.Unlock()
 		v, _ := s.Fetch(true)
 		e.mu.Lock()
diff --git a/pkg/tcpip/transport/tcp/dispatcher.go b/pkg/tcpip/transport/tcp/dispatcher.go
index 047704c80..98aecab9e 100644
--- a/pkg/tcpip/transport/tcp/dispatcher.go
+++ b/pkg/tcpip/transport/tcp/dispatcher.go
@@ -15,6 +15,8 @@
 package tcp
 
 import (
+	"encoding/binary"
+
 	"gvisor.dev/gvisor/pkg/rand"
 	"gvisor.dev/gvisor/pkg/sleep"
 	"gvisor.dev/gvisor/pkg/sync"
@@ -66,89 +68,68 @@ func (q *epQueue) empty() bool {
 // processor is responsible for processing packets queued to a tcp endpoint.
 type processor struct {
 	epQ              epQueue
+	sleeper          sleep.Sleeper
 	newEndpointWaker sleep.Waker
 	closeWaker       sleep.Waker
-	id               int
-	wg               sync.WaitGroup
-}
-
-func newProcessor(id int) *processor {
-	p := &processor{
-		id: id,
-	}
-	p.wg.Add(1)
-	go p.handleSegments()
-	return p
 }
 
 func (p *processor) close() {
 	p.closeWaker.Assert()
 }
 
-func (p *processor) wait() {
-	p.wg.Wait()
-}
-
 func (p *processor) queueEndpoint(ep *endpoint) {
 	// Queue an endpoint for processing by the processor goroutine.
 	p.epQ.enqueue(ep)
 	p.newEndpointWaker.Assert()
 }
 
-func (p *processor) handleSegments() {
-	const newEndpointWaker = 1
-	const closeWaker = 2
-	s := sleep.Sleeper{}
-	s.AddWaker(&p.newEndpointWaker, newEndpointWaker)
-	s.AddWaker(&p.closeWaker, closeWaker)
-	defer s.Done()
+const (
+	newEndpointWaker = 1
+	closeWaker       = 2
+)
+
+func (p *processor) start(wg *sync.WaitGroup) {
+	defer wg.Done()
+	defer p.sleeper.Done()
+
 	for {
-		id, ok := s.Fetch(true)
-		if ok && id == closeWaker {
-			p.wg.Done()
-			return
+		if id, _ := p.sleeper.Fetch(true); id == closeWaker {
+			break
 		}
-		for ep := p.epQ.dequeue(); ep != nil; ep = p.epQ.dequeue() {
+		for {
+			ep := p.epQ.dequeue()
+			if ep == nil {
+				break
+			}
 			if ep.segmentQueue.empty() {
 				continue
 			}
 
-			// If socket has transitioned out of connected state
-			// then just let the worker handle the packet.
+			// If socket has transitioned out of connected state then just let the
+			// worker handle the packet.
 			//
-			// NOTE: We read this outside of e.mu lock which means
-			// that by the time we get to handleSegments the
-			// endpoint may not be in ESTABLISHED. But this should
-			// be fine as all normal shutdown states are handled by
-			// handleSegments and if the endpoint moves to a
-			// CLOSED/ERROR state then handleSegments is a noop.
-			if ep.EndpointState() != StateEstablished {
-				ep.newSegmentWaker.Assert()
-				continue
-			}
-
-			if !ep.mu.TryLock() {
-				ep.newSegmentWaker.Assert()
-				continue
-			}
-			// If the endpoint is in a connected state then we do
-			// direct delivery to ensure low latency and avoid
-			// scheduler interactions.
-			if err := ep.handleSegments(true /* fastPath */); err != nil || ep.EndpointState() == StateClose {
-				// Send any active resets if required.
-				if err != nil {
+			// NOTE: We read this outside of e.mu lock which means that by the time
+			// we get to handleSegments the endpoint may not be in ESTABLISHED. But
+			// this should be fine as all normal shutdown states are handled by
+			// handleSegments and if the endpoint moves to a CLOSED/ERROR state
+			// then handleSegments is a noop.
+			if ep.EndpointState() == StateEstablished && ep.mu.TryLock() {
+				// If the endpoint is in a connected state then we do direct delivery
+				// to ensure low latency and avoid scheduler interactions.
+				switch err := ep.handleSegments(true /* fastPath */); {
+				case err != nil:
+					// Send any active resets if required.
 					ep.resetConnectionLocked(err)
+					fallthrough
+				case ep.EndpointState() == StateClose:
+					ep.notifyProtocolGoroutine(notifyTickleWorker)
+				case !ep.segmentQueue.empty():
+					p.epQ.enqueue(ep)
 				}
-				ep.notifyProtocolGoroutine(notifyTickleWorker)
 				ep.mu.Unlock()
-				continue
-			}
-
-			if !ep.segmentQueue.empty() {
-				p.epQ.enqueue(ep)
+			} else {
+				ep.newSegmentWaker.Assert()
 			}
-
-			ep.mu.Unlock()
 		}
 	}
 }
@@ -159,31 +140,36 @@ func (p *processor) handleSegments() {
 // hash of the endpoint id to ensure that delivery for the same endpoint happens
 // in-order.
 type dispatcher struct {
-	processors []*processor
+	processors []processor
 	seed       uint32
-}
-
-func newDispatcher(nProcessors int) *dispatcher {
-	processors := []*processor{}
-	for i := 0; i < nProcessors; i++ {
-		processors = append(processors, newProcessor(i))
-	}
-	return &dispatcher{
-		processors: processors,
-		seed:       generateRandUint32(),
+	wg         sync.WaitGroup
+}
+
+func (d *dispatcher) init(nProcessors int) {
+	d.close()
+	d.wait()
+	d.processors = make([]processor, nProcessors)
+	d.seed = generateRandUint32()
+	for i := range d.processors {
+		p := &d.processors[i]
+		p.sleeper.AddWaker(&p.newEndpointWaker, newEndpointWaker)
+		p.sleeper.AddWaker(&p.closeWaker, closeWaker)
+		d.wg.Add(1)
+		// NB: sleeper-waker registration must happen synchronously to avoid races
+		// with `close`.  It's possible to pull all this logic into `start`, but
+		// that results in a heap-allocated function literal.
+		go p.start(&d.wg)
 	}
 }
 
 func (d *dispatcher) close() {
-	for _, p := range d.processors {
-		p.close()
+	for i := range d.processors {
+		d.processors[i].close()
 	}
 }
 
 func (d *dispatcher) wait() {
-	for _, p := range d.processors {
-		p.wait()
-	}
+	d.wg.Wait()
 }
 
 func (d *dispatcher) queuePacket(r *stack.Route, stackEP stack.TransportEndpoint, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
@@ -231,20 +217,18 @@ func generateRandUint32() uint32 {
 	if _, err := rand.Read(b); err != nil {
 		panic(err)
 	}
-	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+	return binary.LittleEndian.Uint32(b)
 }
 
 func (d *dispatcher) selectProcessor(id stack.TransportEndpointID) *processor {
-	payload := []byte{
-		byte(id.LocalPort),
-		byte(id.LocalPort >> 8),
-		byte(id.RemotePort),
-		byte(id.RemotePort >> 8)}
+	var payload [4]byte
+	binary.LittleEndian.PutUint16(payload[0:], id.LocalPort)
+	binary.LittleEndian.PutUint16(payload[2:], id.RemotePort)
 
 	h := jenkins.Sum32(d.seed)
-	h.Write(payload)
+	h.Write(payload[:])
 	h.Write([]byte(id.LocalAddress))
 	h.Write([]byte(id.RemoteAddress))
 
-	return d.processors[h.Sum32()%uint32(len(d.processors))]
+	return &d.processors[h.Sum32()%uint32(len(d.processors))]
 }
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 99a691815..bd3ec5a8d 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1212,6 +1212,16 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
 // Read reads data from the endpoint.
 func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
 	e.LockUser()
+	defer e.UnlockUser()
+
+	// When in SYN-SENT state, let the caller block on the receive.
+	// An application can initiate a non-blocking connect and then block
+	// on a receive. It can expect to read any data after the handshake
+	// is complete. RFC793, section 3.9, p58.
+	if e.EndpointState() == StateSynSent {
+		return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrWouldBlock
+	}
+
 	// The endpoint can be read if it's connected, or if it's already closed
 	// but has some pending unread data. Also note that a RST being received
 	// would cause the state to become StateError so we should allow the
@@ -1221,7 +1231,6 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages,
 	if s := e.EndpointState(); !s.connected() && s != StateClose && bufUsed == 0 {
 		e.rcvListMu.Unlock()
 		he := e.HardError
-		e.UnlockUser()
 		if s == StateError {
 			return buffer.View{}, tcpip.ControlMessages{}, he
 		}
@@ -1231,7 +1240,6 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages,
 
 	v, err := e.readLocked()
 	e.rcvListMu.Unlock()
-	e.UnlockUser()
 
 	if err == tcpip.ErrClosedForReceive {
 		e.stats.ReadErrors.ReadClosed.Increment()
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index f2ae6ce50..b34e47bbd 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -174,7 +174,7 @@ type protocol struct {
 	maxRetries                 uint32
 	synRcvdCount               synRcvdCounter
 	synRetries                 uint8
-	dispatcher                 *dispatcher
+	dispatcher                 dispatcher
 }
 
 // Number returns the tcp protocol number.
@@ -515,7 +515,7 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
 
 // NewProtocol returns a TCP transport protocol.
 func NewProtocol() stack.TransportProtocol {
-	return &protocol{
+	p := protocol{
 		sendBufferSize: SendBufferSizeOption{
 			Min:     MinBufferSize,
 			Default: DefaultSendBufferSize,
@@ -531,10 +531,11 @@ func NewProtocol() stack.TransportProtocol {
 		tcpLingerTimeout:           DefaultTCPLingerTimeout,
 		tcpTimeWaitTimeout:         DefaultTCPTimeWaitTimeout,
 		synRcvdCount:               synRcvdCounter{threshold: SynRcvdCountThreshold},
-		dispatcher:                 newDispatcher(runtime.GOMAXPROCS(0)),
 		synRetries:                 DefaultSynRetries,
 		minRTO:                     MinRTO,
 		maxRTO:                     MaxRTO,
 		maxRetries:                 MaxRetries,
 	}
+	p.dispatcher.init(runtime.GOMAXPROCS(0))
+	return &p
 }
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index acacb42e4..5862c32f2 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -833,25 +833,6 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
 			panic("Netstack queues FIN segments without data.")
 		}
 
-		segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size()))
-		// If the entire segment cannot be accomodated in the receiver
-		// advertized window, skip splitting and sending of the segment.
-		// ref: net/ipv4/tcp_output.c::tcp_snd_wnd_test()
-		//
-		// Linux checks this for all segment transmits not triggered
-		// by a probe timer. On this condition, it defers the segment
-		// split and transmit to a short probe timer.
-		// ref: include/net/tcp.h::tcp_check_probe_timer()
-		// ref: net/ipv4/tcp_output.c::tcp_write_wakeup()
-		//
-		// Instead of defining a new transmit timer, we attempt to split the
-		// segment right here if there are no pending segments.
-		// If there are pending segments, segment transmits are deferred
-		// to the retransmit timer handler.
-		if s.sndUna != s.sndNxt && !segEnd.LessThan(end) {
-			return false
-		}
-
 		if !seg.sequenceNumber.LessThan(end) {
 			return false
 		}
@@ -861,14 +842,48 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
 			return false
 		}
 
-		// The segment size limit is computed as a function of sender congestion
-		// window and MSS. When sender congestion window is > 1, this limit can
-		// be larger than MSS. Ensure that the currently available send space
-		// is not greater than minimum of this limit and MSS.
+		// If the whole segment or at least 1MSS sized segment cannot
+		// be accomodated in the receiver advertized window, skip
+		// splitting and sending of the segment. ref:
+		// net/ipv4/tcp_output.c::tcp_snd_wnd_test()
+		//
+		// Linux checks this for all segment transmits not triggered by
+		// a probe timer. On this condition, it defers the segment split
+		// and transmit to a short probe timer.
+		//
+		// ref: include/net/tcp.h::tcp_check_probe_timer()
+		// ref: net/ipv4/tcp_output.c::tcp_write_wakeup()
+		//
+		// Instead of defining a new transmit timer, we attempt to split
+		// the segment right here if there are no pending segments. If
+		// there are pending segments, segment transmits are deferred to
+		// the retransmit timer handler.
+		if s.sndUna != s.sndNxt {
+			switch {
+			case available >= seg.data.Size():
+				// OK to send, the whole segments fits in the
+				// receiver's advertised window.
+			case available >= s.maxPayloadSize:
+				// OK to send, at least 1 MSS sized segment fits
+				// in the receiver's advertised window.
+			default:
+				return false
+			}
+		}
+
+		// The segment size limit is computed as a function of sender
+		// congestion window and MSS. When sender congestion window is >
+		// 1, this limit can be larger than MSS. Ensure that the
+		// currently available send space is not greater than minimum of
+		// this limit and MSS.
 		if available > limit {
 			available = limit
 		}
-		if available > s.maxPayloadSize {
+
+		// If GSO is not in use then cap available to
+		// maxPayloadSize. When GSO is in use the gVisor GSO logic or
+		// the host GSO logic will cap the segment to the correct size.
+		if s.ep.gso == nil && available > s.maxPayloadSize {
 			available = s.maxPayloadSize
 		}
 
diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl
index ea66b9756..77cdfea12 100644
--- a/test/packetimpact/runner/defs.bzl
+++ b/test/packetimpact/runner/defs.bzl
@@ -20,12 +20,12 @@ def _packetimpact_test_impl(ctx):
     ])
     ctx.actions.write(bench, bench_content, is_executable = True)
 
-    transitive_files = depset()
+    transitive_files = []
     if hasattr(ctx.attr._test_runner, "data_runfiles"):
-        transitive_files = depset(ctx.attr._test_runner.data_runfiles.files)
+        transitive_files.append(ctx.attr._test_runner.data_runfiles.files)
     runfiles = ctx.runfiles(
         files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary,
-        transitive_files = transitive_files,
+        transitive_files = depset(transitive = transitive_files),
         collect_default = True,
         collect_data = True,
     )
diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go
index 3cac5915f..c0a2620de 100644
--- a/test/packetimpact/runner/packetimpact_test.go
+++ b/test/packetimpact/runner/packetimpact_test.go
@@ -242,7 +242,13 @@ func TestOne(t *testing.T) {
 	}
 
 	// Kill so that it will flush output.
-	defer testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0])
+	defer func() {
+		// Wait 1 second before killing tcpdump to give it time to flush
+		// any packets.  On linux tests killing it immediately can
+		// sometimes result in partial pcaps.
+		time.Sleep(1 * time.Second)
+		testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0])
+	}()
 
 	if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil {
 		t.Fatalf("sniffer on %s never listened: %s", dut.Name, err)
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 1c9f59df5..85749c559 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -183,8 +183,6 @@ packetimpact_go_test(
 packetimpact_go_test(
     name = "tcp_queue_receive_in_syn_sent",
     srcs = ["tcp_queue_receive_in_syn_sent_test.go"],
-    # TODO(b/157658105): Fix netstack then remove the line below.
-    expect_netstack_failure = True,
     deps = [
         "//pkg/tcpip/header",
         "//test/packetimpact/testbench",
@@ -213,16 +211,6 @@ packetimpact_go_test(
 )
 
 packetimpact_go_test(
-    name = "tcp_splitseg_mss",
-    srcs = ["tcp_splitseg_mss_test.go"],
-    deps = [
-        "//pkg/tcpip/header",
-        "//test/packetimpact/testbench",
-        "@org_golang_x_sys//unix:go_default_library",
-    ],
-)
-
-packetimpact_go_test(
     name = "tcp_cork_mss",
     srcs = ["tcp_cork_mss_test.go"],
     deps = [
diff --git a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
index b640d8673..8fbec893b 100644
--- a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
+++ b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
@@ -35,53 +35,98 @@ func init() {
 	testbench.RegisterFlags(flag.CommandLine)
 }
 
+// TestQueueReceiveInSynSent tests receive behavior when the TCP state
+// is SYN-SENT.
+// It tests for 2 variants where the receive is blocked and:
+// (1) we complete handshake and send sample data.
+// (2) we send a TCP RST.
 func TestQueueReceiveInSynSent(t *testing.T) {
-	dut := testbench.NewDUT(t)
-	defer dut.TearDown()
+	for _, tt := range []struct {
+		description string
+		reset       bool
+	}{
+		{description: "Send DATA", reset: false},
+		{description: "Send RST", reset: true},
+	} {
+		t.Run(tt.description, func(t *testing.T) {
+			dut := testbench.NewDUT(t)
+			defer dut.TearDown()
 
-	socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4))
-	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
+			socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4))
+			conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+			defer conn.Close()
 
-	sampleData := []byte("Sample Data")
+			sampleData := []byte("Sample Data")
 
-	dut.SetNonBlocking(socket, true)
-	if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) {
-		t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err)
-	}
-	if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil {
-		t.Fatalf("expected a SYN from DUT, but got none: %s", err)
-	}
+			dut.SetNonBlocking(socket, true)
+			if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) {
+				t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err)
+			}
+			if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil {
+				t.Fatalf("expected a SYN from DUT, but got none: %s", err)
+			}
 
-	// Issue RECEIVE call in SYN-SENT, this should be queued for process until the connection
-	// is established.
-	dut.SetNonBlocking(socket, false)
-	var wg sync.WaitGroup
-	defer wg.Wait()
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
-		defer cancel()
-		n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0)
-		if n == -1 {
-			t.Fatalf("failed to recv on DUT: %s", err)
-		}
-		if got := buff[:n]; !bytes.Equal(got, sampleData) {
-			t.Fatalf("received data don't match, got:\n%s, want:\n%s", hex.Dump(got), hex.Dump(sampleData))
-		}
-	}()
-
-	// The following sleep is used to prevent the connection from being established while the
-	// RPC is in flight.
-	time.Sleep(time.Second)
-
-	// Bring the connection to Established.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)})
-	if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
-		t.Fatalf("expected an ACK from DUT, but got none: %s", err)
-	}
+			if _, _, err := dut.RecvWithErrno(context.Background(), socket, int32(len(sampleData)), 0); err != syscall.Errno(unix.EWOULDBLOCK) {
+				t.Fatalf("expected error %s, got %s", syscall.Errno(unix.EWOULDBLOCK), err)
+			}
+
+			// Test blocking read.
+			dut.SetNonBlocking(socket, false)
+
+			var wg sync.WaitGroup
+			defer wg.Wait()
+			wg.Add(1)
+			var block sync.WaitGroup
+			block.Add(1)
+			go func() {
+				defer wg.Done()
+				ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
+				defer cancel()
 
-	// Send sample data to DUT.
-	conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData})
+				block.Done()
+				// Issue RECEIVE call in SYN-SENT, this should be queued for
+				// process until the connection is established.
+				n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0)
+				if tt.reset {
+					if err != syscall.Errno(unix.ECONNREFUSED) {
+						t.Errorf("expected error %s, got %s", syscall.Errno(unix.ECONNREFUSED), err)
+					}
+					if n != -1 {
+						t.Errorf("expected return value %d, got %d", -1, n)
+					}
+					return
+				}
+				if n == -1 {
+					t.Errorf("failed to recv on DUT: %s", err)
+				}
+				if got := buff[:n]; !bytes.Equal(got, sampleData) {
+					t.Errorf("received data doesn't match, got:\n%s, want:\n%s", hex.Dump(got), hex.Dump(sampleData))
+				}
+			}()
+
+			// Wait for the goroutine to be scheduled and before it
+			// blocks on endpoint receive.
+			block.Wait()
+			// The following sleep is used to prevent the connection
+			// from being established before we are blocked on Recv.
+			time.Sleep(100 * time.Millisecond)
+
+			if tt.reset {
+				conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)})
+				return
+			}
+
+			// Bring the connection to Established.
+			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)})
+			if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
+				t.Fatalf("expected an ACK from DUT, but got none: %s", err)
+			}
+
+			// Send sample payload and expect an ACK.
+			conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData})
+			if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
+				t.Fatalf("expected an ACK from DUT, but got none: %s", err)
+			}
+		})
+	}
 }
diff --git a/test/packetimpact/tests/tcp_splitseg_mss_test.go b/test/packetimpact/tests/tcp_splitseg_mss_test.go
deleted file mode 100644
index 9350d0988..000000000
--- a/test/packetimpact/tests/tcp_splitseg_mss_test.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tcp_splitseg_mss_test
-
-import (
-	"flag"
-	"testing"
-	"time"
-
-	"golang.org/x/sys/unix"
-	"gvisor.dev/gvisor/pkg/tcpip/header"
-	"gvisor.dev/gvisor/test/packetimpact/testbench"
-)
-
-func init() {
-	testbench.RegisterFlags(flag.CommandLine)
-}
-
-// TestTCPSplitSegMSS lets the dut try to send segments larger than MSS.
-// It tests if the transmitted segments are capped at MSS and are split.
-func TestTCPSplitSegMSS(t *testing.T) {
-	dut := testbench.NewDUT(t)
-	defer dut.TearDown()
-	listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
-	defer dut.Close(listenFD)
-	conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
-	defer conn.Close()
-
-	const mss = uint32(header.TCPDefaultMSS)
-	options := make([]byte, header.TCPOptionMSSLength)
-	header.EncodeMSSOption(mss, options)
-	conn.ConnectWithOptions(options)
-
-	acceptFD, _ := dut.Accept(listenFD)
-	defer dut.Close(acceptFD)
-
-	// Let the dut send a segment larger than MSS.
-	largeData := make([]byte, mss+1)
-	for i := 0; i < 2; i++ {
-		dut.Send(acceptFD, largeData, 0)
-		if i == 0 {
-			// On Linux, the initial segment goes out beyond MSS and the segment
-			// split occurs on retransmission. Call ExpectData to wait to
-			// receive the split segment.
-			if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
-				t.Fatalf("expected payload was not received: %s", err)
-			}
-		} else {
-			if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
-				t.Fatalf("expected payload was not received: %s", err)
-			}
-		}
-		conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-		if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: largeData[mss:]}, time.Second); err != nil {
-			t.Fatalf("expected payload was not received: %s", err)
-		}
-		conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
-	}
-}
diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
index 5ab193181..8c89d57c9 100644
--- a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
+++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
@@ -88,8 +88,8 @@ func TestZeroWindowProbeRetransmit(t *testing.T) {
 			continue
 		}
 		// Check if the probes came at exponentially increasing intervals.
-		if p := time.Since(start); p < current-startProbeDuration {
-			t.Fatalf("zero probe came sooner interval %d probe %d\n", p, i)
+		if got, want := time.Since(start), current-startProbeDuration; got < want {
+			t.Errorf("got zero probe %d after %s, want >= %s", i, got, want)
 		}
 		// Acknowledge the zero-window probes from the dut.
 		conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 36c178e4a..7c4cd8192 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -199,6 +199,7 @@ syscall_test(
 syscall_test(
     add_overlay = True,
     test = "//test/syscalls/linux:fallocate_test",
+    vfs2 = "True",
 )
 
 syscall_test(
@@ -288,6 +289,7 @@ syscall_test(
     size = "medium",
     add_overlay = True,
     test = "//test/syscalls/linux:ioctl_test",
+    vfs2 = "True",
 )
 
 syscall_test(
@@ -355,6 +357,7 @@ syscall_test(
     size = "medium",
     shard_count = 5,
     test = "//test/syscalls/linux:mmap_test",
+    vfs2 = "True",
 )
 
 syscall_test(
@@ -528,6 +531,7 @@ syscall_test(
 syscall_test(
     add_overlay = True,
     test = "//test/syscalls/linux:pwritev2_test",
+    vfs2 = "True",
 )
 
 syscall_test(
@@ -960,6 +964,7 @@ syscall_test(
 syscall_test(
     add_overlay = True,
     test = "//test/syscalls/linux:sync_file_range_test",
+    vfs2 = "True",
 )
 
 syscall_test(
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 8c7d54b21..9e097c888 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -748,9 +748,14 @@ cc_binary(
     linkstatic = 1,
     deps = [
         ":file_base",
+        ":socket_test_util",
         "//test/util:cleanup",
+        "//test/util:eventfd_util",
         "//test/util:file_descriptor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
         gtest,
+        "//test/util:posix_error",
         "//test/util:temp_path",
         "//test/util:test_main",
         "//test/util:test_util",
diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc
index 7819f4ac3..cabc2b751 100644
--- a/test/syscalls/linux/fallocate.cc
+++ b/test/syscalls/linux/fallocate.cc
@@ -15,16 +15,27 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
+#include <sys/eventfd.h>
 #include <sys/resource.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/timerfd.h>
 #include <syscall.h>
 #include <time.h>
 #include <unistd.h>
 
+#include <ctime>
+
 #include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/time.h"
 #include "test/syscalls/linux/file_base.h"
+#include "test/syscalls/linux/socket_test_util.h"
 #include "test/util/cleanup.h"
+#include "test/util/eventfd_util.h"
 #include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
 #include "test/util/temp_path.h"
 #include "test/util/test_util.h"
 
@@ -70,6 +81,12 @@ TEST_F(AllocateTest, Fallocate) {
   ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 39, 1), SyscallSucceeds());
   ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
   EXPECT_EQ(buf.st_size, 40);
+
+  // Given length 0 should fail with EINVAL.
+  ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 50, 0),
+              SyscallFailsWithErrno(EINVAL));
+  ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+  EXPECT_EQ(buf.st_size, 40);
 }
 
 TEST_F(AllocateTest, FallocateInvalid) {
@@ -136,6 +153,34 @@ TEST_F(AllocateTest, FallocateRlimit) {
   ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds());
 }
 
+TEST_F(AllocateTest, FallocateOtherFDs) {
+  int fd;
+  ASSERT_THAT(fd = timerfd_create(CLOCK_MONOTONIC, 0), SyscallSucceeds());
+  auto timer_fd = FileDescriptor(fd);
+  EXPECT_THAT(fallocate(timer_fd.get(), 0, 0, 10),
+              SyscallFailsWithErrno(ENODEV));
+
+  sigset_t mask;
+  sigemptyset(&mask);
+  ASSERT_THAT(fd = signalfd(-1, &mask, 0), SyscallSucceeds());
+  auto sfd = FileDescriptor(fd);
+  EXPECT_THAT(fallocate(sfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+
+  auto efd =
+      ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
+  EXPECT_THAT(fallocate(efd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+
+  auto sockfd = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+  EXPECT_THAT(fallocate(sockfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+
+  int socks[2];
+  ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, socks),
+              SyscallSucceeds());
+  auto sock0 = FileDescriptor(socks[0]);
+  auto sock1 = FileDescriptor(socks[1]);
+  EXPECT_THAT(fallocate(sock0.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+}
+
 }  // namespace
 }  // namespace testing
 }  // namespace gvisor
diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc
index 9130618fa..5467fa2c8 100644
--- a/test/syscalls/linux/fcntl.cc
+++ b/test/syscalls/linux/fcntl.cc
@@ -18,7 +18,10 @@
 #include <syscall.h>
 #include <unistd.h>
 
+#include <iostream>
+#include <list>
 #include <string>
+#include <vector>
 
 #include "gtest/gtest.h"
 #include "absl/base/macros.h"
@@ -1028,6 +1031,30 @@ TEST(FcntlTest, SetOwnPgrp) {
   MaybeSave();
 }
 
+TEST(FcntlTest, SetOwnUnset) {
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  // Set and unset pid.
+  pid_t pid;
+  EXPECT_THAT(pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(0));
+
+  // Set and unset pgid.
+  pid_t pgid;
+  EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(0));
+  MaybeSave();
+}
+
 // F_SETOWN flips the sign of negative values, an operation that is guarded
 // against overflow.
 TEST(FcntlTest, SetOwnOverflow) {
@@ -1138,6 +1165,39 @@ TEST(FcntlTest, SetOwnExPgrp) {
   MaybeSave();
 }
 
+TEST(FcntlTest, SetOwnExUnset) {
+  SKIP_IF(IsRunningWithVFS1());
+
+  FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+      Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+  // Set and unset pid.
+  f_owner_ex owner = {};
+  owner.type = F_OWNER_PID;
+  EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+  owner.pid = 0;
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(0));
+
+  // Set and unset pgid.
+  owner.type = F_OWNER_PGRP;
+  EXPECT_THAT(owner.pid = getpgrp(), SyscallSucceeds());
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+  owner.pid = 0;
+  ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+              SyscallSucceeds());
+
+  EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+              SyscallSucceedsWithValue(0));
+  MaybeSave();
+}
+
 TEST(FcntlTest, GetOwnExTid) {
   FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
       Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
diff --git a/test/syscalls/linux/pwritev2.cc b/test/syscalls/linux/pwritev2.cc
index 3fe5a600f..63b686c62 100644
--- a/test/syscalls/linux/pwritev2.cc
+++ b/test/syscalls/linux/pwritev2.cc
@@ -69,7 +69,7 @@ ssize_t pwritev2(unsigned long fd, const struct iovec* iov,
 }
 
 // This test is the base case where we call pwritev (no offset, no flags).
-TEST(Writev2Test, TestBaseCall) {
+TEST(Writev2Test, BaseCall) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
 
   const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
@@ -97,7 +97,7 @@ TEST(Writev2Test, TestBaseCall) {
 }
 
 // This test is where we call pwritev2 with a positive offset and no flags.
-TEST(Pwritev2Test, TestValidPositiveOffset) {
+TEST(Pwritev2Test, ValidPositiveOffset) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
 
   std::string prefix(kBufSize, '0');
@@ -129,7 +129,7 @@ TEST(Pwritev2Test, TestValidPositiveOffset) {
 // This test is the base case where we call writev by using -1 as the offset.
 // The write should use the file offset, so the test increments the file offset
 // prior to call pwritev2.
-TEST(Pwritev2Test, TestNegativeOneOffset) {
+TEST(Pwritev2Test, NegativeOneOffset) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
 
   const std::string prefix = "00";
@@ -164,7 +164,7 @@ TEST(Pwritev2Test, TestNegativeOneOffset) {
 
 // pwritev2 requires if the RWF_HIPRI flag is passed, the fd must be opened with
 // O_DIRECT. This test implements a correct call with the RWF_HIPRI flag.
-TEST(Pwritev2Test, TestCallWithRWF_HIPRI) {
+TEST(Pwritev2Test, CallWithRWF_HIPRI) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
 
   const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
@@ -189,47 +189,8 @@ TEST(Pwritev2Test, TestCallWithRWF_HIPRI) {
   EXPECT_EQ(buf, content);
 }
 
-// This test checks that pwritev2 can be called with valid flags
-TEST(Pwritev2Test, TestCallWithValidFlags) {
-  SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
-
-  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
-      GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
-  const FileDescriptor fd =
-      ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
-
-  std::vector<char> content(kBufSize, '0');
-  struct iovec iov;
-  iov.iov_base = content.data();
-  iov.iov_len = content.size();
-
-  EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1,
-                       /*offset=*/0, /*flags=*/RWF_DSYNC),
-              SyscallSucceedsWithValue(kBufSize));
-
-  std::vector<char> buf(content.size());
-  EXPECT_THAT(read(fd.get(), buf.data(), buf.size()),
-              SyscallSucceedsWithValue(buf.size()));
-
-  EXPECT_EQ(buf, content);
-
-  SetContent(content);
-
-  EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1,
-                       /*offset=*/0, /*flags=*/0x4),
-              SyscallSucceedsWithValue(kBufSize));
-
-  ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR),
-              SyscallSucceedsWithValue(content.size()));
-
-  EXPECT_THAT(pread(fd.get(), buf.data(), buf.size(), /*offset=*/0),
-              SyscallSucceedsWithValue(buf.size()));
-
-  EXPECT_EQ(buf, content);
-}
-
 // This test calls pwritev2 with a bad file descriptor.
-TEST(Writev2Test, TestBadFile) {
+TEST(Writev2Test, BadFile) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
   ASSERT_THAT(pwritev2(/*fd=*/-1, /*iov=*/nullptr, /*iovcnt=*/0,
                        /*offset=*/0, /*flags=*/0),
@@ -237,7 +198,7 @@ TEST(Writev2Test, TestBadFile) {
 }
 
 // This test calls pwrite2 with an invalid offset.
-TEST(Pwritev2Test, TestInvalidOffset) {
+TEST(Pwritev2Test, InvalidOffset) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
 
   const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
@@ -255,7 +216,7 @@ TEST(Pwritev2Test, TestInvalidOffset) {
               SyscallFailsWithErrno(EINVAL));
 }
 
-TEST(Pwritev2Test, TestUnseekableFileValid) {
+TEST(Pwritev2Test, UnseekableFileValid) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
 
   int pipe_fds[2];
@@ -285,7 +246,7 @@ TEST(Pwritev2Test, TestUnseekableFileValid) {
 // Calling pwritev2 with a non-negative offset calls pwritev.  Calling pwritev
 // with an unseekable file is not allowed. A pipe is used for an unseekable
 // file.
-TEST(Pwritev2Test, TestUnseekableFileInValid) {
+TEST(Pwritev2Test, UnseekableFileInvalid) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
 
   int pipe_fds[2];
@@ -304,7 +265,7 @@ TEST(Pwritev2Test, TestUnseekableFileInValid) {
   EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
 }
 
-TEST(Pwritev2Test, TestReadOnlyFile) {
+TEST(Pwritev2Test, ReadOnlyFile) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
 
   const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
@@ -323,7 +284,7 @@ TEST(Pwritev2Test, TestReadOnlyFile) {
 }
 
 // This test calls pwritev2 with an invalid flag.
-TEST(Pwritev2Test, TestInvalidFlag) {
+TEST(Pwritev2Test, InvalidFlag) {
   SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
 
   const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc
index fbe61c5a0..e6647a1c3 100644
--- a/test/syscalls/linux/socket_netlink_route.cc
+++ b/test/syscalls/linux/socket_netlink_route.cc
@@ -595,7 +595,7 @@ TEST(NetlinkRouteTest, GetRouteRequest) {
       ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
   uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
 
-  struct __attribute__((__packed__)) request {
+  struct request {
     struct nlmsghdr hdr;
     struct rtmsg rtm;
     struct nlattr nla;
diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc
index 39f4fb801..4afed6d08 100644
--- a/test/syscalls/linux/sticky.cc
+++ b/test/syscalls/linux/sticky.cc
@@ -42,12 +42,15 @@ TEST(StickyTest, StickyBitPermDenied) {
 
   const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
   EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
-  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
-      TempPath::CreateFileWith(parent.path(), "some content", 0755));
-  const TempPath dir =
-      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755));
-  const TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
-      TempPath::CreateSymlinkTo(parent.path(), file.path()));
+
+  // After changing credentials below, we need to use an open fd to make
+  // modifications in the parent dir, because there is no guarantee that we will
+  // still have the ability to open it.
+  const FileDescriptor parent_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY));
+  ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds());
+  ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds());
+  ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds());
 
   // Drop privileges and change IDs only in child thread, or else this parent
   // thread won't be able to open some log files after the test ends.
@@ -65,12 +68,14 @@ TEST(StickyTest, StickyBitPermDenied) {
         syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1),
         SyscallSucceeds());
 
-    std::string new_path = NewTempAbsPath();
-    EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()),
+    EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"),
+                SyscallFailsWithErrno(EPERM));
+    EXPECT_THAT(unlinkat(parent_fd.get(), "file", 0),
+                SyscallFailsWithErrno(EPERM));
+    EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR),
+                SyscallFailsWithErrno(EPERM));
+    EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0),
                 SyscallFailsWithErrno(EPERM));
-    EXPECT_THAT(unlink(file.path().c_str()), SyscallFailsWithErrno(EPERM));
-    EXPECT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EPERM));
-    EXPECT_THAT(unlink(link.path().c_str()), SyscallFailsWithErrno(EPERM));
   });
 }
 
@@ -79,12 +84,15 @@ TEST(StickyTest, StickyBitSameUID) {
 
   const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
   EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
-  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
-      TempPath::CreateFileWith(parent.path(), "some content", 0755));
-  const TempPath dir =
-      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755));
-  const TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
-      TempPath::CreateSymlinkTo(parent.path(), file.path()));
+
+  // After changing credentials below, we need to use an open fd to make
+  // modifications in the parent dir, because there is no guarantee that we will
+  // still have the ability to open it.
+  const FileDescriptor parent_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY));
+  ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds());
+  ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds());
+  ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds());
 
   // Drop privileges and change IDs only in child thread, or else this parent
   // thread won't be able to open some log files after the test ends.
@@ -100,12 +108,12 @@ TEST(StickyTest, StickyBitSameUID) {
         SyscallSucceeds());
 
     // We still have the same EUID.
-    std::string new_path = NewTempAbsPath();
-    EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()),
+    EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"),
+                SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR),
                 SyscallSucceeds());
-    EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds());
-    EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds());
-    EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds());
   });
 }
 
@@ -114,12 +122,15 @@ TEST(StickyTest, StickyBitCapFOWNER) {
 
   const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
   EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
-  const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
-      TempPath::CreateFileWith(parent.path(), "some content", 0755));
-  const TempPath dir =
-      ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755));
-  const TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
-      TempPath::CreateSymlinkTo(parent.path(), file.path()));
+
+  // After changing credentials below, we need to use an open fd to make
+  // modifications in the parent dir, because there is no guarantee that we will
+  // still have the ability to open it.
+  const FileDescriptor parent_fd =
+      ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY));
+  ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds());
+  ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds());
+  ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds());
 
   // Drop privileges and change IDs only in child thread, or else this parent
   // thread won't be able to open some log files after the test ends.
@@ -136,12 +147,12 @@ TEST(StickyTest, StickyBitCapFOWNER) {
         SyscallSucceeds());
 
     EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true));
-    std::string new_path = NewTempAbsPath();
-    EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()),
+    EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"),
+                SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR),
                 SyscallSucceeds());
-    EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds());
-    EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds());
-    EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds());
+    EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds());
   });
 }
 }  // namespace
diff --git a/test/util/test_util.cc b/test/util/test_util.cc
index b20758626..8a037f45f 100644
--- a/test/util/test_util.cc
+++ b/test/util/test_util.cc
@@ -40,15 +40,14 @@
 namespace gvisor {
 namespace testing {
 
-#define TEST_ON_GVISOR "TEST_ON_GVISOR"
-#define GVISOR_NETWORK "GVISOR_NETWORK"
-#define GVISOR_VFS "GVISOR_VFS"
+constexpr char kGvisorNetwork[] = "GVISOR_NETWORK";
+constexpr char kGvisorVfs[] = "GVISOR_VFS";
 
 bool IsRunningOnGvisor() { return GvisorPlatform() != Platform::kNative; }
 
 const std::string GvisorPlatform() {
   // Set by runner.go.
-  const char* env = getenv(TEST_ON_GVISOR);
+  const char* env = getenv(kTestOnGvisor);
   if (!env) {
     return Platform::kNative;
   }
@@ -56,12 +55,12 @@ const std::string GvisorPlatform() {
 }
 
 bool IsRunningWithHostinet() {
-  const char* env = getenv(GVISOR_NETWORK);
+  const char* env = getenv(kGvisorNetwork);
   return env && strcmp(env, "host") == 0;
 }
 
 bool IsRunningWithVFS1() {
-  const char* env = getenv(GVISOR_VFS);
+  const char* env = getenv(kGvisorVfs);
   if (env == nullptr) {
     // If not set, it's running on Linux.
     return false;
diff --git a/test/util/test_util.h b/test/util/test_util.h
index e635827e6..109078fc7 100644
--- a/test/util/test_util.h
+++ b/test/util/test_util.h
@@ -195,6 +195,8 @@
 namespace gvisor {
 namespace testing {
 
+constexpr char kTestOnGvisor[] = "TEST_ON_GVISOR";
+
 // TestInit must be called prior to RUN_ALL_TESTS.
 //
 // This parses all arguments and adjusts argc and argv appropriately.
@@ -215,6 +217,7 @@ namespace Platform {
 constexpr char kNative[] = "native";
 constexpr char kPtrace[] = "ptrace";
 constexpr char kKVM[] = "kvm";
+constexpr char kFuchsia[] = "fuchsia";
 }  // namespace Platform
 
 bool IsRunningOnGvisor();
diff --git a/website/BUILD b/website/BUILD
index c97b2560b..4488cb543 100644
--- a/website/BUILD
+++ b/website/BUILD
@@ -138,6 +138,7 @@ docs(
         "//g3doc:community",
         "//g3doc:index",
         "//g3doc:roadmap",
+        "//g3doc:style",
         "//g3doc/architecture_guide:performance",
         "//g3doc/architecture_guide:platforms",
         "//g3doc/architecture_guide:resources",
diff --git a/website/_includes/footer-links.html b/website/_includes/footer-links.html
index 10c28ead4..2036dbaa9 100644
--- a/website/_includes/footer-links.html
+++ b/website/_includes/footer-links.html
@@ -15,7 +15,7 @@
       <ul class="list-unstyled">
         <li><a href="https://github.com/google/gvisor/issues">Issues</a></li>
         <li><a href="/docs">Documentation</a></li>
-        <li><a href="/docs/user_guide/FAQ">FAQ</a></li>
+        <li><a href="/docs/user_guide/faq">FAQ</a></li>
       </ul>
     </div>
     <div class="col-sm-3 col-md-2">
diff --git a/website/cmd/server/main.go b/website/cmd/server/main.go
index 7c8bc9bfa..c401b6abd 100644
--- a/website/cmd/server/main.go
+++ b/website/cmd/server/main.go
@@ -35,6 +35,10 @@ var redirects = map[string]string{
 	// For links.
 	"/faq": "/docs/user_guide/faq/",
 
+	// From 2020-05-12 to 2020-06-30, the FAQ URL was uppercase. Redirect that
+	// back to maintain any links.
+	"/docs/user_guide/FAQ/": "/docs/user_guide/faq/",
+
 	// Redirects to compatibility docs.
 	"/c":             "/docs/user_guide/compatibility/",
 	"/c/linux/amd64": "/docs/user_guide/compatibility/linux/amd64/",