78 files changed, 3993 insertions, 1332 deletions
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go
index 615e72646..7d742871a 100644
--- a/pkg/abi/linux/file.go
+++ b/pkg/abi/linux/file.go
@@ -178,6 +178,8 @@ const (
 
 // Values for preadv2/pwritev2.
 const (
+	// Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
+	// accepted as a valid flag argument for preadv2/pwritev2.
 	RWF_HIPRI = 0x00000001
 	RWF_DSYNC = 0x00000002
 	RWF_SYNC  = 0x00000004
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go
index 6857a20a3..d5b731390 100644
--- a/pkg/abi/linux/socket.go
+++ b/pkg/abi/linux/socket.go
@@ -292,7 +292,10 @@ const SizeOfLinger = 8
 
 // TCPInfo is a collection of TCP statistics.
 //
-// From uapi/linux/tcp.h.
+// From uapi/linux/tcp.h. Newer versions of Linux continue to add new fields to
+// the end of this struct or within existing unusued space, so its size grows
+// over time. The current iteration is based on linux v4.17. New versions are
+// always backwards compatible.
 type TCPInfo struct {
 	State       uint8
 	CaState     uint8
diff --git a/pkg/refs/refcounter.go b/pkg/refs/refcounter.go
index 6ecbace9e..828e9b5c1 100644
--- a/pkg/refs/refcounter.go
+++ b/pkg/refs/refcounter.go
@@ -325,6 +325,8 @@ func (r *AtomicRefCount) finalize() {
 		msg := fmt.Sprintf("%sAtomicRefCount %p owned by %q garbage collected with ref count of %d (want 0)", note, r, r.name, n)
 		if len(r.stack) != 0 {
 			msg += ":\nCaller:\n" + formatStack(r.stack)
+		} else {
+			msg += " (enable trace logging to debug)"
 		}
 		log.Warningf(msg)
 	}
diff --git a/pkg/sentry/fs/fdpipe/pipe.go b/pkg/sentry/fs/fdpipe/pipe.go
index 5a0a67eab..669ffcb75 100644
--- a/pkg/sentry/fs/fdpipe/pipe.go
+++ b/pkg/sentry/fs/fdpipe/pipe.go
@@ -87,7 +87,7 @@ func (p *pipeOperations) init() error {
 		log.Warningf("pipe: cannot stat fd %d: %v", p.file.FD(), err)
 		return syscall.EINVAL
 	}
-	if s.Mode&syscall.S_IFIFO != syscall.S_IFIFO {
+	if (s.Mode & syscall.S_IFMT) != syscall.S_IFIFO {
 		log.Warningf("pipe: cannot load fd %d as pipe, file type: %o", p.file.FD(), s.Mode)
 		return syscall.EINVAL
 	}
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index 44c4ee5f2..2392787cb 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -65,7 +65,7 @@ type ConnectedEndpoint struct {
 	// GetSockOpt and message splitting/rejection in SendMsg, but do not
 	// prevent lots of small messages from filling the real send buffer
 	// size on the host.
-	sndbuf int `state:"nosave"`
+	sndbuf int64 `state:"nosave"`
 
 	// mu protects the fields below.
 	mu sync.RWMutex `state:"nosave"`
@@ -107,7 +107,7 @@ func (c *ConnectedEndpoint) init() *syserr.Error {
 	}
 
 	c.stype = linux.SockType(stype)
-	c.sndbuf = sndbuf
+	c.sndbuf = int64(sndbuf)
 
 	return nil
 }
@@ -202,7 +202,7 @@ func newSocket(ctx context.Context, orgfd int, saveable bool) (*fs.File, error)
 }
 
 // Send implements transport.ConnectedEndpoint.Send.
-func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (uintptr, bool, *syserr.Error) {
+func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
@@ -279,7 +279,7 @@ func (c *ConnectedEndpoint) EventUpdate() {
 }
 
 // Recv implements transport.Receiver.Recv.
-func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights uintptr, peek bool) (uintptr, uintptr, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 
diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go
index 05d7c79ad..af6955675 100644
--- a/pkg/sentry/fs/host/socket_iovec.go
+++ b/pkg/sentry/fs/host/socket_iovec.go
@@ -55,19 +55,19 @@ func copyFromMulti(dst []byte, src [][]byte) {
 //
 // If intermediate != nil, iovecs references intermediate rather than bufs and
 // the caller must copy to/from bufs as necessary.
-func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovecs []syscall.Iovec, intermediate []byte, err error) {
+func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovecs []syscall.Iovec, intermediate []byte, err error) {
 	var iovsRequired int
 	for _, b := range bufs {
-		length += uintptr(len(b))
+		length += int64(len(b))
 		if len(b) > 0 {
 			iovsRequired++
 		}
 	}
 
 	stopLen := length
-	if length > uintptr(maxlen) {
+	if length > maxlen {
 		if truncate {
-			stopLen = uintptr(maxlen)
+			stopLen = maxlen
 			err = syserror.EAGAIN
 		} else {
 			return 0, nil, nil, syserror.EMSGSIZE
@@ -85,7 +85,7 @@ func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovec
 		}}, b, err
 	}
 
-	var total uintptr
+	var total int64
 	iovecs = make([]syscall.Iovec, 0, iovsRequired)
 	for i := range bufs {
 		l := len(bufs[i])
@@ -93,9 +93,9 @@ func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovec
 			continue
 		}
 
-		stop := l
-		if total+uintptr(stop) > stopLen {
-			stop = int(stopLen - total)
+		stop := int64(l)
+		if total+stop > stopLen {
+			stop = stopLen - total
 		}
 
 		iovecs = append(iovecs, syscall.Iovec{
@@ -103,7 +103,7 @@ func buildIovec(bufs [][]byte, maxlen int, truncate bool) (length uintptr, iovec
 			Len:  uint64(stop),
 		})
 
-		total += uintptr(stop)
+		total += stop
 		if total >= stopLen {
 			break
 		}
diff --git a/pkg/sentry/fs/host/socket_unsafe.go b/pkg/sentry/fs/host/socket_unsafe.go
index e57be0506..f3bbed7ea 100644
--- a/pkg/sentry/fs/host/socket_unsafe.go
+++ b/pkg/sentry/fs/host/socket_unsafe.go
@@ -23,7 +23,7 @@ import (
 //
 // If the total length of bufs is > maxlen, fdReadVec will do a partial read
 // and err will indicate why the message was truncated.
-func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int) (readLen uintptr, msgLen uintptr, controlLen uint64, controlTrunc bool, err error) {
+func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int64) (readLen int64, msgLen int64, controlLen uint64, controlTrunc bool, err error) {
 	flags := uintptr(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC)
 	if peek {
 		flags |= syscall.MSG_PEEK
@@ -48,11 +48,12 @@ func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int) (re
 		msg.Iovlen = uint64(len(iovecs))
 	}
 
-	n, _, e := syscall.RawSyscall(syscall.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), flags)
+	rawN, _, e := syscall.RawSyscall(syscall.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), flags)
 	if e != 0 {
 		// N.B. prioritize the syscall error over the buildIovec error.
 		return 0, 0, 0, false, e
 	}
+	n := int64(rawN)
 
 	// Copy data back to bufs.
 	if intermediate != nil {
@@ -72,7 +73,7 @@ func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int) (re
 //
 // If the total length of bufs is > maxlen && truncate, fdWriteVec will do a
 // partial write and err will indicate why the message was truncated.
-func fdWriteVec(fd int, bufs [][]byte, maxlen int, truncate bool) (uintptr, uintptr, error) {
+func fdWriteVec(fd int, bufs [][]byte, maxlen int64, truncate bool) (int64, int64, error) {
 	length, iovecs, intermediate, err := buildIovec(bufs, maxlen, truncate)
 	if err != nil && len(iovecs) == 0 {
 		// No partial write to do, return error immediately.
@@ -96,5 +97,5 @@ func fdWriteVec(fd int, bufs [][]byte, maxlen int, truncate bool) (uintptr, uint
 		return 0, length, e
 	}
 
-	return n, length, err
+	return int64(n), length, err
 }
diff --git a/pkg/sentry/fsimpl/ext/file_description.go b/pkg/sentry/fsimpl/ext/file_description.go
index d244cf1e7..a0065343b 100644
--- a/pkg/sentry/fsimpl/ext/file_description.go
+++ b/pkg/sentry/fsimpl/ext/file_description.go
@@ -16,18 +16,16 @@ package ext
 
 import (
 	"gvisor.dev/gvisor/pkg/abi/linux"
-	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/context"
-	"gvisor.dev/gvisor/pkg/sentry/usermem"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
 	"gvisor.dev/gvisor/pkg/syserror"
-	"gvisor.dev/gvisor/pkg/waiter"
 )
 
 // fileDescription is embedded by ext implementations of
 // vfs.FileDescriptionImpl.
 type fileDescription struct {
 	vfsfd vfs.FileDescription
+	vfs.FileDescriptionDefaultImpl
 
 	// flags is the same as vfs.OpenOptions.Flags which are passed to
 	// vfs.FilesystemImpl.OpenAt.
@@ -82,29 +80,7 @@ func (fd *fileDescription) StatFS(ctx context.Context) (linux.Statfs, error) {
 	return stat, nil
 }
 
-// Readiness implements waiter.Waitable.Readiness analogously to
-// file_operations::poll == NULL in Linux.
-func (fd *fileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
-	// include/linux/poll.h:vfs_poll() => DEFAULT_POLLMASK
-	return waiter.EventIn | waiter.EventOut
-}
-
-// EventRegister implements waiter.Waitable.EventRegister analogously to
-// file_operations::poll == NULL in Linux.
-func (fd *fileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) {}
-
-// EventUnregister implements waiter.Waitable.EventUnregister analogously to
-// file_operations::poll == NULL in Linux.
-func (fd *fileDescription) EventUnregister(e *waiter.Entry) {}
-
 // Sync implements vfs.FileDescriptionImpl.Sync.
 func (fd *fileDescription) Sync(ctx context.Context) error {
 	return nil
 }
-
-// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
-func (fd *fileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
-	// ioctl(2) specifies that ENOTTY must be returned if the file descriptor is
-	// not associated with a character special device (which is unimplemented).
-	return 0, syserror.ENOTTY
-}
diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go
index 59612da14..45cd42b3e 100644
--- a/pkg/sentry/fsimpl/memfs/memfs.go
+++ b/pkg/sentry/fsimpl/memfs/memfs.go
@@ -258,6 +258,7 @@ func (i *inode) direntType() uint8 {
 // vfs.FileDescriptionImpl.
 type fileDescription struct {
 	vfsfd vfs.FileDescription
+	vfs.FileDescriptionDefaultImpl
 
 	flags uint32 // status flags; immutable
 }
diff --git a/pkg/sentry/fsimpl/memfs/regular_file.go b/pkg/sentry/fsimpl/memfs/regular_file.go
index 7a16d5719..55f869798 100644
--- a/pkg/sentry/fsimpl/memfs/regular_file.go
+++ b/pkg/sentry/fsimpl/memfs/regular_file.go
@@ -46,7 +46,6 @@ func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode uint16) *inod
 
 type regularFileFD struct {
 	fileDescription
-	vfs.FileDescriptionDefaultImpl
 
 	// These are immutable.
 	readable bool
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
new file mode 100644
index 000000000..3d8a4deaf
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/BUILD
@@ -0,0 +1,49 @@
+load("//tools/go_stateify:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "proc",
+    srcs = [
+        "filesystems.go",
+        "loadavg.go",
+        "meminfo.go",
+        "mounts.go",
+        "net.go",
+        "proc.go",
+        "stat.go",
+        "sys.go",
+        "task.go",
+        "version.go",
+    ],
+    importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc",
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/binary",
+        "//pkg/log",
+        "//pkg/sentry/context",
+        "//pkg/sentry/fs",
+        "//pkg/sentry/inet",
+        "//pkg/sentry/kernel",
+        "//pkg/sentry/limits",
+        "//pkg/sentry/mm",
+        "//pkg/sentry/socket",
+        "//pkg/sentry/socket/unix",
+        "//pkg/sentry/socket/unix/transport",
+        "//pkg/sentry/usage",
+        "//pkg/sentry/usermem",
+        "//pkg/sentry/vfs",
+    ],
+)
+
+go_test(
+    name = "proc_test",
+    size = "small",
+    srcs = ["net_test.go"],
+    embed = [":proc"],
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/sentry/context/contexttest",
+        "//pkg/sentry/inet",
+    ],
+)
diff --git a/pkg/sentry/fsimpl/proc/filesystems.go b/pkg/sentry/fsimpl/proc/filesystems.go
new file mode 100644
index 000000000..c36c4aff5
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/filesystems.go
@@ -0,0 +1,25 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+// filesystemsData implements vfs.DynamicBytesSource for /proc/filesystems.
+//
+// +stateify savable
+type filesystemsData struct{}
+
+// TODO(b/138862512): Implement vfs.DynamicBytesSource.Generate for
+// filesystemsData. We would need to retrive filesystem names from
+// vfs.VirtualFilesystem. Also needs vfs replacement for
+// fs.Filesystem.AllowUserList() and fs.FilesystemRequiresDev.
diff --git a/pkg/sentry/fsimpl/proc/loadavg.go b/pkg/sentry/fsimpl/proc/loadavg.go
new file mode 100644
index 000000000..9135afef1
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/loadavg.go
@@ -0,0 +1,40 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// loadavgData backs /proc/loadavg.
+//
+// +stateify savable
+type loadavgData struct{}
+
+var _ vfs.DynamicBytesSource = (*loadavgData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	// TODO(b/62345059): Include real data in fields.
+	// Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
+	// Column 4-5: currently running processes and the total number of processes.
+	// Column 6: the last process ID used.
+	fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/meminfo.go b/pkg/sentry/fsimpl/proc/meminfo.go
new file mode 100644
index 000000000..9a827cd66
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/meminfo.go
@@ -0,0 +1,77 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/usage"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo.
+//
+// +stateify savable
+type meminfoData struct {
+	// k is the owning Kernel.
+	k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*meminfoData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	mf := d.k.MemoryFile()
+	mf.UpdateUsage()
+	snapshot, totalUsage := usage.MemoryAccounting.Copy()
+	totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
+	anon := snapshot.Anonymous + snapshot.Tmpfs
+	file := snapshot.PageCache + snapshot.Mapped
+	// We don't actually have active/inactive LRUs, so just make up numbers.
+	activeFile := (file / 2) &^ (usermem.PageSize - 1)
+	inactiveFile := file - activeFile
+
+	fmt.Fprintf(buf, "MemTotal:       %8d kB\n", totalSize/1024)
+	memFree := (totalSize - totalUsage) / 1024
+	// We use MemFree as MemAvailable because we don't swap.
+	// TODO(rahat): When reclaim is implemented the value of MemAvailable
+	// should change.
+	fmt.Fprintf(buf, "MemFree:        %8d kB\n", memFree)
+	fmt.Fprintf(buf, "MemAvailable:   %8d kB\n", memFree)
+	fmt.Fprintf(buf, "Buffers:               0 kB\n") // memory usage by block devices
+	fmt.Fprintf(buf, "Cached:         %8d kB\n", (file+snapshot.Tmpfs)/1024)
+	// Emulate a system with no swap, which disables inactivation of anon pages.
+	fmt.Fprintf(buf, "SwapCache:             0 kB\n")
+	fmt.Fprintf(buf, "Active:         %8d kB\n", (anon+activeFile)/1024)
+	fmt.Fprintf(buf, "Inactive:       %8d kB\n", inactiveFile/1024)
+	fmt.Fprintf(buf, "Active(anon):   %8d kB\n", anon/1024)
+	fmt.Fprintf(buf, "Inactive(anon):        0 kB\n")
+	fmt.Fprintf(buf, "Active(file):   %8d kB\n", activeFile/1024)
+	fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
+	fmt.Fprintf(buf, "Unevictable:           0 kB\n") // TODO(b/31823263)
+	fmt.Fprintf(buf, "Mlocked:               0 kB\n") // TODO(b/31823263)
+	fmt.Fprintf(buf, "SwapTotal:             0 kB\n")
+	fmt.Fprintf(buf, "SwapFree:              0 kB\n")
+	fmt.Fprintf(buf, "Dirty:                 0 kB\n")
+	fmt.Fprintf(buf, "Writeback:             0 kB\n")
+	fmt.Fprintf(buf, "AnonPages:      %8d kB\n", anon/1024)
+	fmt.Fprintf(buf, "Mapped:         %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
+	fmt.Fprintf(buf, "Shmem:          %8d kB\n", snapshot.Tmpfs/1024)
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/mounts.go b/pkg/sentry/fsimpl/proc/mounts.go
new file mode 100644
index 000000000..e81b1e910
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/mounts.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import "gvisor.dev/gvisor/pkg/sentry/kernel"
+
+// TODO(b/138862512): Implement mountInfoFile and mountsFile.
+
+// mountInfoFile implements vfs.DynamicBytesSource for /proc/[pid]/mountinfo.
+//
+// +stateify savable
+type mountInfoFile struct {
+	t *kernel.Task
+}
+
+// mountsFile implements vfs.DynamicBytesSource for /proc/[pid]/mounts.
+//
+// +stateify savable
+type mountsFile struct {
+	t *kernel.Task
+}
diff --git a/pkg/sentry/fsimpl/proc/net.go b/pkg/sentry/fsimpl/proc/net.go
new file mode 100644
index 000000000..fd46eebf8
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/net.go
@@ -0,0 +1,338 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/binary"
+	"gvisor.dev/gvisor/pkg/log"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/fs"
+	"gvisor.dev/gvisor/pkg/sentry/inet"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/socket"
+	"gvisor.dev/gvisor/pkg/sentry/socket/unix"
+	"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6.
+//
+// +stateify savable
+type ifinet6 struct {
+	s inet.Stack
+}
+
+var _ vfs.DynamicBytesSource = (*ifinet6)(nil)
+
+func (n *ifinet6) contents() []string {
+	var lines []string
+	nics := n.s.Interfaces()
+	for id, naddrs := range n.s.InterfaceAddrs() {
+		nic, ok := nics[id]
+		if !ok {
+			// NIC was added after NICNames was called. We'll just
+			// ignore it.
+			continue
+		}
+
+		for _, a := range naddrs {
+			// IPv6 only.
+			if a.Family != linux.AF_INET6 {
+				continue
+			}
+
+			// Fields:
+			// IPv6 address displayed in 32 hexadecimal chars without colons
+			// Netlink device number (interface index) in hexadecimal (use nic id)
+			// Prefix length in hexadecimal
+			// Scope value (use 0)
+			// Interface flags
+			// Device name
+			lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name))
+		}
+	}
+	return lines
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	for _, l := range n.contents() {
+		buf.WriteString(l)
+	}
+	return nil
+}
+
+// netDev implements vfs.DynamicBytesSource for /proc/net/dev.
+//
+// +stateify savable
+type netDev struct {
+	s inet.Stack
+}
+
+var _ vfs.DynamicBytesSource = (*netDev)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *netDev) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	interfaces := n.s.Interfaces()
+	buf.WriteString("Inter-|   Receive                                                |  Transmit\n")
+	buf.WriteString(" face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed\n")
+
+	for _, i := range interfaces {
+		// Implements the same format as
+		// net/core/net-procfs.c:dev_seq_printf_stats.
+		var stats inet.StatDev
+		if err := n.s.Statistics(&stats, i.Name); err != nil {
+			log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err)
+			continue
+		}
+		fmt.Fprintf(
+			buf,
+			"%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n",
+			i.Name,
+			// Received
+			stats[0], // bytes
+			stats[1], // packets
+			stats[2], // errors
+			stats[3], // dropped
+			stats[4], // fifo
+			stats[5], // frame
+			stats[6], // compressed
+			stats[7], // multicast
+			// Transmitted
+			stats[8],  // bytes
+			stats[9],  // packets
+			stats[10], // errors
+			stats[11], // dropped
+			stats[12], // fifo
+			stats[13], // frame
+			stats[14], // compressed
+			stats[15], // multicast
+		)
+	}
+
+	return nil
+}
+
+// netUnix implements vfs.DynamicBytesSource for /proc/net/unix.
+//
+// +stateify savable
+type netUnix struct {
+	k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*netUnix)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *netUnix) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	buf.WriteString("Num       RefCount Protocol Flags    Type St Inode Path\n")
+	for _, se := range n.k.ListSockets() {
+		s := se.Sock.Get()
+		if s == nil {
+			log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock)
+			continue
+		}
+		sfile := s.(*fs.File)
+		if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX {
+			s.DecRef()
+			// Not a unix socket.
+			continue
+		}
+		sops := sfile.FileOperations.(*unix.SocketOperations)
+
+		addr, err := sops.Endpoint().GetLocalAddress()
+		if err != nil {
+			log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err)
+			addr.Addr = "<unknown>"
+		}
+
+		sockFlags := 0
+		if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok {
+			if ce.Listening() {
+				// For unix domain sockets, linux reports a single flag
+				// value if the socket is listening, of __SO_ACCEPTCON.
+				sockFlags = linux.SO_ACCEPTCON
+			}
+		}
+
+		// In the socket entry below, the value for the 'Num' field requires
+		// some consideration. Linux prints the address to the struct
+		// unix_sock representing a socket in the kernel, but may redact the
+		// value for unprivileged users depending on the kptr_restrict
+		// sysctl.
+		//
+		// One use for this field is to allow a privileged user to
+		// introspect into the kernel memory to determine information about
+		// a socket not available through procfs, such as the socket's peer.
+		//
+		// In gvisor, returning a pointer to our internal structures would
+		// be pointless, as it wouldn't match the memory layout for struct
+		// unix_sock, making introspection difficult. We could populate a
+		// struct unix_sock with the appropriate data, but even that
+		// requires consideration for which kernel version to emulate, as
+		// the definition of this struct changes over time.
+		//
+		// For now, we always redact this pointer.
+		fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d",
+			(*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct.
+			sfile.ReadRefs()-1,            // RefCount, don't count our own ref.
+			0,                             // Protocol, always 0 for UDS.
+			sockFlags,                     // Flags.
+			sops.Endpoint().Type(),        // Type.
+			sops.State(),                  // State.
+			sfile.InodeID(),               // Inode.
+		)
+
+		// Path
+		if len(addr.Addr) != 0 {
+			if addr.Addr[0] == 0 {
+				// Abstract path.
+				fmt.Fprintf(buf, " @%s", string(addr.Addr[1:]))
+			} else {
+				fmt.Fprintf(buf, " %s", string(addr.Addr))
+			}
+		}
+		fmt.Fprintf(buf, "\n")
+
+		s.DecRef()
+	}
+	return nil
+}
+
+// netTCP implements vfs.DynamicBytesSource for /proc/net/tcp.
+//
+// +stateify savable
+type netTCP struct {
+	k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*netTCP)(nil)
+
+func (n *netTCP) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	t := kernel.TaskFromContext(ctx)
+	buf.WriteString("  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode                                                     \n")
+	for _, se := range n.k.ListSockets() {
+		s := se.Sock.Get()
+		if s == nil {
+			log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock)
+			continue
+		}
+		sfile := s.(*fs.File)
+		sops, ok := sfile.FileOperations.(socket.Socket)
+		if !ok {
+			panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
+		}
+		if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) {
+			s.DecRef()
+			// Not tcp4 sockets.
+			continue
+		}
+
+		// Linux's documentation for the fields below can be found at
+		// https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
+		// For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
+		// Note that the header doesn't contain labels for all the fields.
+
+		// Field: sl; entry number.
+		fmt.Fprintf(buf, "%4d: ", se.ID)
+
+		portBuf := make([]byte, 2)
+
+		// Field: local_adddress.
+		var localAddr linux.SockAddrInet
+		if local, _, err := sops.GetSockName(t); err == nil {
+			localAddr = *local.(*linux.SockAddrInet)
+		}
+		binary.LittleEndian.PutUint16(portBuf, localAddr.Port)
+		fmt.Fprintf(buf, "%08X:%04X ",
+			binary.LittleEndian.Uint32(localAddr.Addr[:]),
+			portBuf)
+
+		// Field: rem_address.
+		var remoteAddr linux.SockAddrInet
+		if remote, _, err := sops.GetPeerName(t); err == nil {
+			remoteAddr = *remote.(*linux.SockAddrInet)
+		}
+		binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port)
+		fmt.Fprintf(buf, "%08X:%04X ",
+			binary.LittleEndian.Uint32(remoteAddr.Addr[:]),
+			portBuf)
+
+		// Field: state; socket state.
+		fmt.Fprintf(buf, "%02X ", sops.State())
+
+		// Field: tx_queue, rx_queue; number of packets in the transmit and
+		// receive queue. Unimplemented.
+		fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
+
+		// Field: tr, tm->when; timer active state and number of jiffies
+		// until timer expires. Unimplemented.
+		fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
+
+		// Field: retrnsmt; number of unrecovered RTO timeouts.
+		// Unimplemented.
+		fmt.Fprintf(buf, "%08X ", 0)
+
+		// Field: uid.
+		uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
+		if err != nil {
+			log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
+			fmt.Fprintf(buf, "%5d ", 0)
+		} else {
+			fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow()))
+		}
+
+		// Field: timeout; number of unanswered 0-window probes.
+		// Unimplemented.
+		fmt.Fprintf(buf, "%8d ", 0)
+
+		// Field: inode.
+		fmt.Fprintf(buf, "%8d ", sfile.InodeID())
+
+		// Field: refcount. Don't count the ref we obtain while deferencing
+		// the weakref to this socket.
+		fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1)
+
+		// Field: Socket struct address. Redacted due to the same reason as
+		// the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
+		fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
+
+		// Field: retransmit timeout. Unimplemented.
+		fmt.Fprintf(buf, "%d ", 0)
+
+		// Field: predicted tick of soft clock (delayed ACK control data).
+		// Unimplemented.
+		fmt.Fprintf(buf, "%d ", 0)
+
+		// Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
+		fmt.Fprintf(buf, "%d ", 0)
+
+		// Field: sending congestion window, Unimplemented.
+		fmt.Fprintf(buf, "%d ", 0)
+
+		// Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
+		// Unimplemented, report as large threshold.
+		fmt.Fprintf(buf, "%d", -1)
+
+		fmt.Fprintf(buf, "\n")
+
+		s.DecRef()
+	}
+
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/net_test.go b/pkg/sentry/fsimpl/proc/net_test.go
new file mode 100644
index 000000000..20a77a8ca
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/net_test.go
@@ -0,0 +1,78 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+	"gvisor.dev/gvisor/pkg/sentry/inet"
+)
+
+func newIPv6TestStack() *inet.TestStack {
+	s := inet.NewTestStack()
+	s.SupportsIPv6Flag = true
+	return s
+}
+
+func TestIfinet6NoAddresses(t *testing.T) {
+	n := &ifinet6{s: newIPv6TestStack()}
+	var buf bytes.Buffer
+	n.Generate(contexttest.Context(t), &buf)
+	if buf.Len() > 0 {
+		t.Errorf("n.Generate() generated = %v, want = %v", buf.Bytes(), []byte{})
+	}
+}
+
+func TestIfinet6(t *testing.T) {
+	s := newIPv6TestStack()
+	s.InterfacesMap[1] = inet.Interface{Name: "eth0"}
+	s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{
+		{
+			Family:    linux.AF_INET6,
+			PrefixLen: 128,
+			Addr:      []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"),
+		},
+	}
+	s.InterfacesMap[2] = inet.Interface{Name: "eth1"}
+	s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{
+		{
+			Family:    linux.AF_INET6,
+			PrefixLen: 128,
+			Addr:      []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"),
+		},
+	}
+	want := map[string]struct{}{
+		"000102030405060708090a0b0c0d0e0f 01 80 00 00     eth0\n": {},
+		"101112131415161718191a1b1c1d1e1f 02 80 00 00     eth1\n": {},
+	}
+
+	n := &ifinet6{s: s}
+	contents := n.contents()
+	if len(contents) != len(want) {
+		t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want))
+	}
+	got := map[string]struct{}{}
+	for _, l := range contents {
+		got[l] = struct{}{}
+	}
+
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf("Got n.contents() = %v, want = %v", got, want)
+	}
+}
diff --git a/pkg/sentry/fsimpl/proc/proc.go b/pkg/sentry/fsimpl/proc/proc.go
new file mode 100644
index 000000000..31dec36de
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/proc.go
@@ -0,0 +1,16 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package proc implements a partial in-memory file system for procfs.
+package proc
diff --git a/pkg/sentry/fsimpl/proc/stat.go b/pkg/sentry/fsimpl/proc/stat.go
new file mode 100644
index 000000000..720db3828
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/stat.go
@@ -0,0 +1,127 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// cpuStats contains the breakdown of CPU time for /proc/stat.
+type cpuStats struct {
+	// user is time spent in userspace tasks with non-positive niceness.
+	user uint64
+
+	// nice is time spent in userspace tasks with positive niceness.
+	nice uint64
+
+	// system is time spent in non-interrupt kernel context.
+	system uint64
+
+	// idle is time spent idle.
+	idle uint64
+
+	// ioWait is time spent waiting for IO.
+	ioWait uint64
+
+	// irq is time spent in interrupt context.
+	irq uint64
+
+	// softirq is time spent in software interrupt context.
+	softirq uint64
+
+	// steal is involuntary wait time.
+	steal uint64
+
+	// guest is time spent in guests with non-positive niceness.
+	guest uint64
+
+	// guestNice is time spent in guests with positive niceness.
+	guestNice uint64
+}
+
+// String implements fmt.Stringer.
+func (c cpuStats) String() string {
+	return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
+}
+
+// statData implements vfs.DynamicBytesSource for /proc/stat.
+//
+// +stateify savable
+type statData struct {
+	// k is the owning Kernel.
+	k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*statData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *statData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	// TODO(b/37226836): We currently export only zero CPU stats. We could
+	// at least provide some aggregate stats.
+	var cpu cpuStats
+	fmt.Fprintf(buf, "cpu  %s\n", cpu)
+
+	for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ {
+		fmt.Fprintf(buf, "cpu%d %s\n", c, cpu)
+	}
+
+	// The total number of interrupts is dependent on the CPUs and PCI
+	// devices on the system. See arch_probe_nr_irqs.
+	//
+	// Since we don't report real interrupt stats, just choose an arbitrary
+	// value from a representative VM.
+	const numInterrupts = 256
+
+	// The Kernel doesn't handle real interrupts, so report all zeroes.
+	// TODO(b/37226836): We could count page faults as #PF.
+	fmt.Fprintf(buf, "intr 0") // total
+	for i := 0; i < numInterrupts; i++ {
+		fmt.Fprintf(buf, " 0")
+	}
+	fmt.Fprintf(buf, "\n")
+
+	// Total number of context switches.
+	// TODO(b/37226836): Count this.
+	fmt.Fprintf(buf, "ctxt 0\n")
+
+	// CLOCK_REALTIME timestamp from boot, in seconds.
+	fmt.Fprintf(buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds())
+
+	// Total number of clones.
+	// TODO(b/37226836): Count this.
+	fmt.Fprintf(buf, "processes 0\n")
+
+	// Number of runnable tasks.
+	// TODO(b/37226836): Count this.
+	fmt.Fprintf(buf, "procs_running 0\n")
+
+	// Number of tasks waiting on IO.
+	// TODO(b/37226836): Count this.
+	fmt.Fprintf(buf, "procs_blocked 0\n")
+
+	// Number of each softirq handled.
+	fmt.Fprintf(buf, "softirq 0") // total
+	for i := 0; i < linux.NumSoftIRQ; i++ {
+		fmt.Fprintf(buf, " 0")
+	}
+	fmt.Fprintf(buf, "\n")
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/sys.go b/pkg/sentry/fsimpl/proc/sys.go
new file mode 100644
index 000000000..b88256e12
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/sys.go
@@ -0,0 +1,51 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// mmapMinAddrData implements vfs.DynamicBytesSource for
+// /proc/sys/vm/mmap_min_addr.
+//
+// +stateify savable
+type mmapMinAddrData struct {
+	k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*mmapMinAddrData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress())
+	return nil
+}
+
+// +stateify savable
+type overcommitMemory struct{}
+
+var _ vfs.DynamicBytesSource = (*overcommitMemory)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *overcommitMemory) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	fmt.Fprintf(buf, "0\n")
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
new file mode 100644
index 000000000..c46e05c3a
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -0,0 +1,261 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/limits"
+	"gvisor.dev/gvisor/pkg/sentry/mm"
+	"gvisor.dev/gvisor/pkg/sentry/usage"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// mapsCommon is embedded by mapsData and smapsData.
+type mapsCommon struct {
+	t *kernel.Task
+}
+
+// mm gets the kernel task's MemoryManager. No additional reference is taken on
+// mm here. This is safe because MemoryManager.destroy is required to leave the
+// MemoryManager in a state where it's still usable as a DynamicBytesSource.
+func (md *mapsCommon) mm() *mm.MemoryManager {
+	var tmm *mm.MemoryManager
+	md.t.WithMuLocked(func(t *kernel.Task) {
+		if mm := t.MemoryManager(); mm != nil {
+			tmm = mm
+		}
+	})
+	return tmm
+}
+
+// mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps.
+//
+// +stateify savable
+type mapsData struct {
+	mapsCommon
+}
+
+var _ vfs.DynamicBytesSource = (*mapsData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (md *mapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	if mm := md.mm(); mm != nil {
+		mm.ReadMapsDataInto(ctx, buf)
+	}
+	return nil
+}
+
+// smapsData implements vfs.DynamicBytesSource for /proc/[pid]/smaps.
+//
+// +stateify savable
+type smapsData struct {
+	mapsCommon
+}
+
+var _ vfs.DynamicBytesSource = (*smapsData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (sd *smapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	if mm := sd.mm(); mm != nil {
+		mm.ReadSmapsDataInto(ctx, buf)
+	}
+	return nil
+}
+
+// +stateify savable
+type taskStatData struct {
+	t *kernel.Task
+
+	// If tgstats is true, accumulate fault stats (not implemented) and CPU
+	// time across all tasks in t's thread group.
+	tgstats bool
+
+	// pidns is the PID namespace associated with the proc filesystem that
+	// includes the file using this statData.
+	pidns *kernel.PIDNamespace
+}
+
+var _ vfs.DynamicBytesSource = (*taskStatData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	fmt.Fprintf(buf, "%d ", s.pidns.IDOfTask(s.t))
+	fmt.Fprintf(buf, "(%s) ", s.t.Name())
+	fmt.Fprintf(buf, "%c ", s.t.StateStatus()[0])
+	ppid := kernel.ThreadID(0)
+	if parent := s.t.Parent(); parent != nil {
+		ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
+	}
+	fmt.Fprintf(buf, "%d ", ppid)
+	fmt.Fprintf(buf, "%d ", s.pidns.IDOfProcessGroup(s.t.ThreadGroup().ProcessGroup()))
+	fmt.Fprintf(buf, "%d ", s.pidns.IDOfSession(s.t.ThreadGroup().Session()))
+	fmt.Fprintf(buf, "0 0 " /* tty_nr tpgid */)
+	fmt.Fprintf(buf, "0 " /* flags */)
+	fmt.Fprintf(buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */)
+	var cputime usage.CPUStats
+	if s.tgstats {
+		cputime = s.t.ThreadGroup().CPUStats()
+	} else {
+		cputime = s.t.CPUStats()
+	}
+	fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
+	cputime = s.t.ThreadGroup().JoinedChildCPUStats()
+	fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
+	fmt.Fprintf(buf, "%d %d ", s.t.Priority(), s.t.Niceness())
+	fmt.Fprintf(buf, "%d ", s.t.ThreadGroup().Count())
+
+	// itrealvalue. Since kernel 2.6.17, this field is no longer
+	// maintained, and is hard coded as 0.
+	fmt.Fprintf(buf, "0 ")
+
+	// Start time is relative to boot time, expressed in clock ticks.
+	fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.t.StartTime().Sub(s.t.Kernel().Timekeeper().BootTime())))
+
+	var vss, rss uint64
+	s.t.WithMuLocked(func(t *kernel.Task) {
+		if mm := t.MemoryManager(); mm != nil {
+			vss = mm.VirtualMemorySize()
+			rss = mm.ResidentSetSize()
+		}
+	})
+	fmt.Fprintf(buf, "%d %d ", vss, rss/usermem.PageSize)
+
+	// rsslim.
+	fmt.Fprintf(buf, "%d ", s.t.ThreadGroup().Limits().Get(limits.Rss).Cur)
+
+	fmt.Fprintf(buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */)
+	fmt.Fprintf(buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */)
+	fmt.Fprintf(buf, "0 0 " /* nswap cnswap */)
+	terminationSignal := linux.Signal(0)
+	if s.t == s.t.ThreadGroup().Leader() {
+		terminationSignal = s.t.ThreadGroup().TerminationSignal()
+	}
+	fmt.Fprintf(buf, "%d ", terminationSignal)
+	fmt.Fprintf(buf, "0 0 0 " /* processor rt_priority policy */)
+	fmt.Fprintf(buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */)
+	fmt.Fprintf(buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */)
+	fmt.Fprintf(buf, "0\n" /* exit_code */)
+
+	return nil
+}
+
+// statmData implements vfs.DynamicBytesSource for /proc/[pid]/statm.
+//
+// +stateify savable
+type statmData struct {
+	t *kernel.Task
+}
+
+var _ vfs.DynamicBytesSource = (*statmData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	var vss, rss uint64
+	s.t.WithMuLocked(func(t *kernel.Task) {
+		if mm := t.MemoryManager(); mm != nil {
+			vss = mm.VirtualMemorySize()
+			rss = mm.ResidentSetSize()
+		}
+	})
+
+	fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize)
+	return nil
+}
+
+// statusData implements vfs.DynamicBytesSource for /proc/[pid]/status.
+//
+// +stateify savable
+type statusData struct {
+	t     *kernel.Task
+	pidns *kernel.PIDNamespace
+}
+
+var _ vfs.DynamicBytesSource = (*statusData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	fmt.Fprintf(buf, "Name:\t%s\n", s.t.Name())
+	fmt.Fprintf(buf, "State:\t%s\n", s.t.StateStatus())
+	fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.t.ThreadGroup()))
+	fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.t))
+	ppid := kernel.ThreadID(0)
+	if parent := s.t.Parent(); parent != nil {
+		ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
+	}
+	fmt.Fprintf(buf, "PPid:\t%d\n", ppid)
+	tpid := kernel.ThreadID(0)
+	if tracer := s.t.Tracer(); tracer != nil {
+		tpid = s.pidns.IDOfTask(tracer)
+	}
+	fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid)
+	var fds int
+	var vss, rss, data uint64
+	s.t.WithMuLocked(func(t *kernel.Task) {
+		if fdTable := t.FDTable(); fdTable != nil {
+			fds = fdTable.Size()
+		}
+		if mm := t.MemoryManager(); mm != nil {
+			vss = mm.VirtualMemorySize()
+			rss = mm.ResidentSetSize()
+			data = mm.VirtualDataSize()
+		}
+	})
+	fmt.Fprintf(buf, "FDSize:\t%d\n", fds)
+	fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10)
+	fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10)
+	fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10)
+	fmt.Fprintf(buf, "Threads:\t%d\n", s.t.ThreadGroup().Count())
+	creds := s.t.Credentials()
+	fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps)
+	fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps)
+	fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps)
+	fmt.Fprintf(buf, "CapBnd:\t%016x\n", creds.BoundingCaps)
+	fmt.Fprintf(buf, "Seccomp:\t%d\n", s.t.SeccompMode())
+	return nil
+}
+
+// ioUsage is the /proc/<pid>/io and /proc/<pid>/task/<tid>/io data provider.
+type ioUsage interface {
+	// IOUsage returns the io usage data.
+	IOUsage() *usage.IO
+}
+
+// +stateify savable
+type ioData struct {
+	ioUsage
+}
+
+var _ vfs.DynamicBytesSource = (*ioData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (i *ioData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	io := usage.IO{}
+	io.Accumulate(i.IOUsage())
+
+	fmt.Fprintf(buf, "char: %d\n", io.CharsRead)
+	fmt.Fprintf(buf, "wchar: %d\n", io.CharsWritten)
+	fmt.Fprintf(buf, "syscr: %d\n", io.ReadSyscalls)
+	fmt.Fprintf(buf, "syscw: %d\n", io.WriteSyscalls)
+	fmt.Fprintf(buf, "read_bytes: %d\n", io.BytesRead)
+	fmt.Fprintf(buf, "write_bytes: %d\n", io.BytesWritten)
+	fmt.Fprintf(buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled)
+	return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/version.go b/pkg/sentry/fsimpl/proc/version.go
new file mode 100644
index 000000000..e1643d4e0
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/version.go
@@ -0,0 +1,68 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+	"bytes"
+	"fmt"
+
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel"
+	"gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// versionData implements vfs.DynamicBytesSource for /proc/version.
+//
+// +stateify savable
+type versionData struct {
+	// k is the owning Kernel.
+	k *kernel.Kernel
+}
+
+var _ vfs.DynamicBytesSource = (*versionData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (v *versionData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	init := v.k.GlobalInit()
+	if init == nil {
+		// Attempted to read before the init Task is created. This can
+		// only occur during startup, which should never need to read
+		// this file.
+		panic("Attempted to read version before initial Task is available")
+	}
+
+	// /proc/version takes the form:
+	//
+	// "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
+	// (COMPILER_VERSION) VERSION"
+	//
+	// where:
+	// - SYSNAME, RELEASE, and VERSION are the same as returned by
+	// sys_utsname
+	// - COMPILE_USER is the user that build the kernel
+	// - COMPILE_HOST is the hostname of the machine on which the kernel
+	// was built
+	// - COMPILER_VERSION is the version reported by the building compiler
+	//
+	// Since we don't really want to expose build information to
+	// applications, those fields are omitted.
+	//
+	// FIXME(mpratt): Using Version from the init task SyscallTable
+	// disregards the different version a task may have (e.g., in a uts
+	// namespace).
+	ver := init.Leader().SyscallTable().Version
+	fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)
+	return nil
+}
diff --git a/pkg/sentry/kernel/task_block.go b/pkg/sentry/kernel/task_block.go
index 2a2e6f662..dd69939f9 100644
--- a/pkg/sentry/kernel/task_block.go
+++ b/pkg/sentry/kernel/task_block.go
@@ -15,6 +15,7 @@
 package kernel
 
 import (
+	"runtime"
 	"time"
 
 	ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
@@ -121,6 +122,17 @@ func (t *Task) block(C <-chan struct{}, timerChan <-chan struct{}) error {
 	// Deactive our address space, we don't need it.
 	interrupt := t.SleepStart()
 
+	// If the request is not completed, but the timer has already expired,
+	// then ensure that we run through a scheduler cycle. This is because
+	// we may see applications relying on timer slack to yield the thread.
+	// For example, they may attempt to sleep for some number of nanoseconds,
+	// and expect that this will actually yield the CPU and sleep for at
+	// least microseconds, e.g.:
+	// https://github.com/LMAX-Exchange/disruptor/commit/6ca210f2bcd23f703c479804d583718e16f43c07
+	if len(timerChan) > 0 {
+		runtime.Gosched()
+	}
+
 	select {
 	case <-C:
 		t.SleepFinish(true)
diff --git a/pkg/sentry/mm/procfs.go b/pkg/sentry/mm/procfs.go
index a8819aa84..8c2246bb4 100644
--- a/pkg/sentry/mm/procfs.go
+++ b/pkg/sentry/mm/procfs.go
@@ -58,6 +58,34 @@ func (mm *MemoryManager) NeedsUpdate(generation int64) bool {
 	return true
 }
 
+// ReadMapsDataInto is called by fsimpl/proc.mapsData.Generate to
+// implement /proc/[pid]/maps.
+func (mm *MemoryManager) ReadMapsDataInto(ctx context.Context, buf *bytes.Buffer) {
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	var start usermem.Addr
+
+	for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+		// FIXME(b/30793614): If we use a usermem.Addr for the handle, we get
+		// "panic: autosave error: type usermem.Addr is not registered".
+		mm.appendVMAMapsEntryLocked(ctx, vseg, buf)
+	}
+
+	// We always emulate vsyscall, so advertise it here. Everything about a
+	// vsyscall region is static, so just hard code the maps entry since we
+	// don't have a real vma backing it. The vsyscall region is at the end of
+	// the virtual address space so nothing should be mapped after it (if
+	// something is really mapped in the tiny ~10 MiB segment afterwards, we'll
+	// get the sorting on the maps file wrong at worst; but that's not possible
+	// on any current platform).
+	//
+	// Artifically adjust the seqfile handle so we only output vsyscall entry once.
+	if start != vsyscallEnd {
+		// FIXME(b/30793614): Can't get a pointer to constant vsyscallEnd.
+		buf.WriteString(vsyscallMapsEntry)
+	}
+}
+
 // ReadMapsSeqFileData is called by fs/proc.mapsData.ReadSeqFileData to
 // implement /proc/[pid]/maps.
 func (mm *MemoryManager) ReadMapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
@@ -151,6 +179,27 @@ func (mm *MemoryManager) appendVMAMapsEntryLocked(ctx context.Context, vseg vmaI
 	b.WriteString("\n")
 }
 
+// ReadSmapsDataInto is called by fsimpl/proc.smapsData.Generate to
+// implement /proc/[pid]/maps.
+func (mm *MemoryManager) ReadSmapsDataInto(ctx context.Context, buf *bytes.Buffer) {
+	mm.mappingMu.RLock()
+	defer mm.mappingMu.RUnlock()
+	var start usermem.Addr
+
+	for vseg := mm.vmas.LowerBoundSegment(start); vseg.Ok(); vseg = vseg.NextSegment() {
+		// FIXME(b/30793614): If we use a usermem.Addr for the handle, we get
+		// "panic: autosave error: type usermem.Addr is not registered".
+		mm.vmaSmapsEntryIntoLocked(ctx, vseg, buf)
+	}
+
+	// We always emulate vsyscall, so advertise it here. See
+	// ReadMapsSeqFileData for additional commentary.
+	if start != vsyscallEnd {
+		// FIXME(b/30793614): Can't get a pointer to constant vsyscallEnd.
+		buf.WriteString(vsyscallSmapsEntry)
+	}
+}
+
 // ReadSmapsSeqFileData is called by fs/proc.smapsData.ReadSeqFileData to
 // implement /proc/[pid]/smaps.
 func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
@@ -190,7 +239,12 @@ func (mm *MemoryManager) ReadSmapsSeqFileData(ctx context.Context, handle seqfil
 // Preconditions: mm.mappingMu must be locked.
 func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterator) []byte {
 	var b bytes.Buffer
-	mm.appendVMAMapsEntryLocked(ctx, vseg, &b)
+	mm.vmaSmapsEntryIntoLocked(ctx, vseg, &b)
+	return b.Bytes()
+}
+
+func (mm *MemoryManager) vmaSmapsEntryIntoLocked(ctx context.Context, vseg vmaIterator, b *bytes.Buffer) {
+	mm.appendVMAMapsEntryLocked(ctx, vseg, b)
 	vma := vseg.ValuePtr()
 
 	// We take mm.activeMu here in each call to vmaSmapsEntryLocked, instead of
@@ -211,40 +265,40 @@ func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterat
 	}
 	mm.activeMu.RUnlock()
 
-	fmt.Fprintf(&b, "Size:           %8d kB\n", vseg.Range().Length()/1024)
-	fmt.Fprintf(&b, "Rss:            %8d kB\n", rss/1024)
+	fmt.Fprintf(b, "Size:           %8d kB\n", vseg.Range().Length()/1024)
+	fmt.Fprintf(b, "Rss:            %8d kB\n", rss/1024)
 	// Currently we report PSS = RSS, i.e. we pretend each page mapped by a pma
 	// is only mapped by that pma. This avoids having to query memmap.Mappables
 	// for reference count information on each page. As a corollary, all pages
 	// are accounted as "private" whether or not the vma is private; compare
 	// Linux's fs/proc/task_mmu.c:smaps_account().
-	fmt.Fprintf(&b, "Pss:            %8d kB\n", rss/1024)
-	fmt.Fprintf(&b, "Shared_Clean:   %8d kB\n", 0)
-	fmt.Fprintf(&b, "Shared_Dirty:   %8d kB\n", 0)
+	fmt.Fprintf(b, "Pss:            %8d kB\n", rss/1024)
+	fmt.Fprintf(b, "Shared_Clean:   %8d kB\n", 0)
+	fmt.Fprintf(b, "Shared_Dirty:   %8d kB\n", 0)
 	// Pretend that all pages are dirty if the vma is writable, and clean otherwise.
 	clean := rss
 	if vma.effectivePerms.Write {
 		clean = 0
 	}
-	fmt.Fprintf(&b, "Private_Clean:  %8d kB\n", clean/1024)
-	fmt.Fprintf(&b, "Private_Dirty:  %8d kB\n", (rss-clean)/1024)
+	fmt.Fprintf(b, "Private_Clean:  %8d kB\n", clean/1024)
+	fmt.Fprintf(b, "Private_Dirty:  %8d kB\n", (rss-clean)/1024)
 	// Pretend that all pages are "referenced" (recently touched).
-	fmt.Fprintf(&b, "Referenced:     %8d kB\n", rss/1024)
-	fmt.Fprintf(&b, "Anonymous:      %8d kB\n", anon/1024)
+	fmt.Fprintf(b, "Referenced:     %8d kB\n", rss/1024)
+	fmt.Fprintf(b, "Anonymous:      %8d kB\n", anon/1024)
 	// Hugepages (hugetlb and THP) are not implemented.
-	fmt.Fprintf(&b, "AnonHugePages:  %8d kB\n", 0)
-	fmt.Fprintf(&b, "Shared_Hugetlb: %8d kB\n", 0)
-	fmt.Fprintf(&b, "Private_Hugetlb: %7d kB\n", 0)
+	fmt.Fprintf(b, "AnonHugePages:  %8d kB\n", 0)
+	fmt.Fprintf(b, "Shared_Hugetlb: %8d kB\n", 0)
+	fmt.Fprintf(b, "Private_Hugetlb: %7d kB\n", 0)
 	// Swap is not implemented.
-	fmt.Fprintf(&b, "Swap:           %8d kB\n", 0)
-	fmt.Fprintf(&b, "SwapPss:        %8d kB\n", 0)
-	fmt.Fprintf(&b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024)
-	fmt.Fprintf(&b, "MMUPageSize:    %8d kB\n", usermem.PageSize/1024)
+	fmt.Fprintf(b, "Swap:           %8d kB\n", 0)
+	fmt.Fprintf(b, "SwapPss:        %8d kB\n", 0)
+	fmt.Fprintf(b, "KernelPageSize: %8d kB\n", usermem.PageSize/1024)
+	fmt.Fprintf(b, "MMUPageSize:    %8d kB\n", usermem.PageSize/1024)
 	locked := rss
 	if vma.mlockMode == memmap.MLockNone {
 		locked = 0
 	}
-	fmt.Fprintf(&b, "Locked:         %8d kB\n", locked/1024)
+	fmt.Fprintf(b, "Locked:         %8d kB\n", locked/1024)
 
 	b.WriteString("VmFlags: ")
 	if vma.realPerms.Read {
@@ -284,6 +338,4 @@ func (mm *MemoryManager) vmaSmapsEntryLocked(ctx context.Context, vseg vmaIterat
 		b.WriteString("ac ")
 	}
 	b.WriteString("\n")
-
-	return b.Bytes()
 }
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index 79501682d..6bf7cd097 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -354,6 +354,9 @@ func (t *thread) wait(outcome waitOutcome) syscall.Signal {
 				continue // Spurious stop.
 			}
 			if stopSig == syscall.SIGTRAP {
+				if status.TrapCause() == syscall.PTRACE_EVENT_EXIT {
+					t.dumpAndPanic("wait failed: the process exited")
+				}
 				// Re-encode the trap cause the way it's expected.
 				return stopSig | syscall.Signal(status.TrapCause()<<8)
 			}
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go
index 8cb5c823f..635042263 100644
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ b/pkg/sentry/socket/epsocket/epsocket.go
@@ -291,18 +291,22 @@ func bytesToIPAddress(addr []byte) tcpip.Address {
 	return tcpip.Address(addr)
 }
 
-// GetAddress reads an sockaddr struct from the given address and converts it
-// to the FullAddress format. It supports AF_UNIX, AF_INET and AF_INET6
-// addresses.
-func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syserr.Error) {
+// AddressAndFamily reads an sockaddr struct from the given address and
+// converts it to the FullAddress format. It supports AF_UNIX, AF_INET and
+// AF_INET6 addresses.
+//
+// strict indicates whether addresses with the AF_UNSPEC family are accepted of not.
+//
+// AddressAndFamily returns an address, its family.
+func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, uint16, *syserr.Error) {
 	// Make sure we have at least 2 bytes for the address family.
 	if len(addr) < 2 {
-		return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+		return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument
 	}
 
 	family := usermem.ByteOrder.Uint16(addr)
 	if family != uint16(sfamily) && (!strict && family != linux.AF_UNSPEC) {
-		return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported
+		return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported
 	}
 
 	// Get the rest of the fields based on the address family.
@@ -310,7 +314,7 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
 	case linux.AF_UNIX:
 		path := addr[2:]
 		if len(path) > linux.UnixPathMax {
-			return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+			return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
 		}
 		// Drop the terminating NUL (if one exists) and everything after
 		// it for filesystem (non-abstract) addresses.
@@ -321,12 +325,12 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
 		}
 		return tcpip.FullAddress{
 			Addr: tcpip.Address(path),
-		}, nil
+		}, family, nil
 
 	case linux.AF_INET:
 		var a linux.SockAddrInet
 		if len(addr) < sockAddrInetSize {
-			return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+			return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
 		}
 		binary.Unmarshal(addr[:sockAddrInetSize], usermem.ByteOrder, &a)
 
@@ -334,12 +338,12 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
 			Addr: bytesToIPAddress(a.Addr[:]),
 			Port: ntohs(a.Port),
 		}
-		return out, nil
+		return out, family, nil
 
 	case linux.AF_INET6:
 		var a linux.SockAddrInet6
 		if len(addr) < sockAddrInet6Size {
-			return tcpip.FullAddress{}, syserr.ErrInvalidArgument
+			return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
 		}
 		binary.Unmarshal(addr[:sockAddrInet6Size], usermem.ByteOrder, &a)
 
@@ -350,13 +354,13 @@ func GetAddress(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, *syse
 		if isLinkLocal(out.Addr) {
 			out.NIC = tcpip.NICID(a.Scope_id)
 		}
-		return out, nil
+		return out, family, nil
 
 	case linux.AF_UNSPEC:
-		return tcpip.FullAddress{}, nil
+		return tcpip.FullAddress{}, family, nil
 
 	default:
-		return tcpip.FullAddress{}, syserr.ErrAddressFamilyNotSupported
+		return tcpip.FullAddress{}, 0, syserr.ErrAddressFamilyNotSupported
 	}
 }
 
@@ -429,6 +433,11 @@ func (i *ioSequencePayload) Size() int {
 	return int(i.src.NumBytes())
 }
 
+// DropFirst drops the first n bytes from underlying src.
+func (i *ioSequencePayload) DropFirst(n int) {
+	i.src = i.src.DropFirst(int(n))
+}
+
 // Write implements fs.FileOperations.Write.
 func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
 	f := &ioSequencePayload{ctx: ctx, src: src}
@@ -477,11 +486,18 @@ func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
 // Connect implements the linux syscall connect(2) for sockets backed by
 // tpcip.Endpoint.
 func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
-	addr, err := GetAddress(s.family, sockaddr, false /* strict */)
+	addr, family, err := AddressAndFamily(s.family, sockaddr, false /* strict */)
 	if err != nil {
 		return err
 	}
 
+	if family == linux.AF_UNSPEC {
+		err := s.Endpoint.Disconnect()
+		if err == tcpip.ErrNotSupported {
+			return syserr.ErrAddressFamilyNotSupported
+		}
+		return syserr.TranslateNetstackError(err)
+	}
 	// Always return right away in the non-blocking case.
 	if !blocking {
 		return syserr.TranslateNetstackError(s.Endpoint.Connect(addr))
@@ -510,7 +526,7 @@ func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking boo
 // Bind implements the linux syscall bind(2) for sockets backed by
 // tcpip.Endpoint.
 func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
-	addr, err := GetAddress(s.family, sockaddr, true /* strict */)
+	addr, _, err := AddressAndFamily(s.family, sockaddr, true /* strict */)
 	if err != nil {
 		return err
 	}
@@ -2018,7 +2034,7 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
 
 	var addr *tcpip.FullAddress
 	if len(to) > 0 {
-		addrBuf, err := GetAddress(s.family, to, true /* strict */)
+		addrBuf, _, err := AddressAndFamily(s.family, to, true /* strict */)
 		if err != nil {
 			return 0, err
 		}
@@ -2026,28 +2042,22 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
 		addr = &addrBuf
 	}
 
-	v := buffer.NewView(int(src.NumBytes()))
-
-	// Copy all the data into the buffer.
-	if _, err := src.CopyIn(t, v); err != nil {
-		return 0, syserr.FromError(err)
-	}
-
 	opts := tcpip.WriteOptions{
 		To:          addr,
 		More:        flags&linux.MSG_MORE != 0,
 		EndOfRecord: flags&linux.MSG_EOR != 0,
 	}
 
-	n, resCh, err := s.Endpoint.Write(tcpip.SlicePayload(v), opts)
+	v := &ioSequencePayload{t, src}
+	n, resCh, err := s.Endpoint.Write(v, opts)
 	if resCh != nil {
 		if err := t.Block(resCh); err != nil {
 			return 0, syserr.FromError(err)
 		}
-		n, _, err = s.Endpoint.Write(tcpip.SlicePayload(v), opts)
+		n, _, err = s.Endpoint.Write(v, opts)
 	}
 	dontWait := flags&linux.MSG_DONTWAIT != 0
-	if err == nil && (n >= uintptr(len(v)) || dontWait) {
+	if err == nil && (n >= int64(v.Size()) || dontWait) {
 		// Complete write.
 		return int(n), nil
 	}
@@ -2061,18 +2071,18 @@ func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []
 	s.EventRegister(&e, waiter.EventOut)
 	defer s.EventUnregister(&e)
 
-	v.TrimFront(int(n))
+	v.DropFirst(int(n))
 	total := n
 	for {
-		n, _, err = s.Endpoint.Write(tcpip.SlicePayload(v), opts)
-		v.TrimFront(int(n))
+		n, _, err = s.Endpoint.Write(v, opts)
+		v.DropFirst(int(n))
 		total += n
 
 		if err != nil && err != tcpip.ErrWouldBlock && total == 0 {
 			return 0, syserr.TranslateNetstackError(err)
 		}
 
-		if err == nil && len(v) == 0 || err != nil && err != tcpip.ErrWouldBlock {
+		if err == nil && v.Size() == 0 || err != nil && err != tcpip.ErrWouldBlock {
 			return int(total), nil
 		}
 
diff --git a/pkg/sentry/socket/epsocket/stack.go b/pkg/sentry/socket/epsocket/stack.go
index 8f1572bf4..7cf7ff735 100644
--- a/pkg/sentry/socket/epsocket/stack.go
+++ b/pkg/sentry/socket/epsocket/stack.go
@@ -20,7 +20,6 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/inet"
 	"gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
 	"gvisor.dev/gvisor/pkg/syserr"
-	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/iptables"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -154,7 +153,7 @@ func (s *Stack) RouteTable() []inet.Route {
 
 	for _, rt := range s.Stack.GetRouteTable() {
 		var family uint8
-		switch len(rt.Destination) {
+		switch len(rt.Destination.ID()) {
 		case header.IPv4AddressSize:
 			family = linux.AF_INET
 		case header.IPv6AddressSize:
@@ -164,14 +163,9 @@ func (s *Stack) RouteTable() []inet.Route {
 			continue
 		}
 
-		dstSubnet, err := tcpip.NewSubnet(rt.Destination, rt.Mask)
-		if err != nil {
-			log.Warningf("Invalid destination & mask in route: %s(%s): %v", rt.Destination, rt.Mask, err)
-			continue
-		}
 		routeTable = append(routeTable, inet.Route{
 			Family: family,
-			DstLen: uint8(dstSubnet.Prefix()), // The CIDR prefix for the destination.
+			DstLen: uint8(rt.Destination.Prefix()), // The CIDR prefix for the destination.
 
 			// Always return unspecified protocol since we have no notion of
 			// protocol for routes.
@@ -182,7 +176,7 @@ func (s *Stack) RouteTable() []inet.Route {
 			Scope: linux.RT_SCOPE_LINK,
 			Type:  linux.RTN_UNICAST,
 
-			DstAddr:         []byte(rt.Destination),
+			DstAddr:         []byte(rt.Destination.ID()),
 			OutputInterface: int32(rt.NIC),
 			GatewayAddr:     []byte(rt.Gateway),
 		})
@@ -198,8 +192,8 @@ func (s *Stack) IPTables() (iptables.IPTables, error) {
 
 // FillDefaultIPTables sets the stack's iptables to the default tables, which
 // allow and do not modify all traffic.
-func (s *Stack) FillDefaultIPTables() error {
-	return netfilter.FillDefaultIPTables(s.Stack)
+func (s *Stack) FillDefaultIPTables() {
+	netfilter.FillDefaultIPTables(s.Stack)
 }
 
 // Resume implements inet.Stack.Resume.
diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go
index 1902fe155..3a4fdec47 100644
--- a/pkg/sentry/socket/hostinet/stack.go
+++ b/pkg/sentry/socket/hostinet/stack.go
@@ -203,8 +203,14 @@ func ExtractHostRoutes(routeMsgs []syscall.NetlinkMessage) ([]inet.Route, error)
 				inetRoute.DstAddr = attr.Value
 			case syscall.RTA_SRC:
 				inetRoute.SrcAddr = attr.Value
-			case syscall.RTA_OIF:
+			case syscall.RTA_GATEWAY:
 				inetRoute.GatewayAddr = attr.Value
+			case syscall.RTA_OIF:
+				expected := int(binary.Size(inetRoute.OutputInterface))
+				if len(attr.Value) != expected {
+					return nil, fmt.Errorf("RTM_GETROUTE returned RTM_NEWROUTE message with invalid attribute data length (%d bytes, expected %d bytes)", len(attr.Value), expected)
+				}
+				binary.Unmarshal(attr.Value, usermem.ByteOrder, &inetRoute.OutputInterface)
 			}
 		}
 
diff --git a/pkg/sentry/socket/netfilter/BUILD b/pkg/sentry/socket/netfilter/BUILD
index 3021f83e7..354a0d6ee 100644
--- a/pkg/sentry/socket/netfilter/BUILD
+++ b/pkg/sentry/socket/netfilter/BUILD
@@ -13,6 +13,7 @@ go_library(
     visibility = ["//pkg/sentry:internal"],
     deps = [
         "//pkg/abi/linux",
+        "//pkg/binary",
         "//pkg/sentry/kernel",
         "//pkg/sentry/usermem",
         "//pkg/syserr",
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index efdb42903..9f87c32f1 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -17,7 +17,10 @@
 package netfilter
 
 import (
+	"fmt"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/binary"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/usermem"
 	"gvisor.dev/gvisor/pkg/syserr"
@@ -26,21 +29,258 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
 
+// errorTargetName is used to mark targets as error targets. Error targets
+// shouldn't be reached - an error has occurred if we fall through to one.
+const errorTargetName = "ERROR"
+
+// metadata is opaque to netstack. It holds data that we need to translate
+// between Linux's and netstack's iptables representations.
+type metadata struct {
+	HookEntry  [linux.NF_INET_NUMHOOKS]uint32
+	Underflow  [linux.NF_INET_NUMHOOKS]uint32
+	NumEntries uint32
+	Size       uint32
+}
+
 // GetInfo returns information about iptables.
 func GetInfo(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr) (linux.IPTGetinfo, *syserr.Error) {
-	// TODO(b/129292233): Implement.
-	return linux.IPTGetinfo{}, syserr.ErrInvalidArgument
+	// Read in the struct and table name.
+	var info linux.IPTGetinfo
+	if _, err := t.CopyIn(outPtr, &info); err != nil {
+		return linux.IPTGetinfo{}, syserr.FromError(err)
+	}
+
+	// Find the appropriate table.
+	table, err := findTable(ep, info.TableName())
+	if err != nil {
+		return linux.IPTGetinfo{}, err
+	}
+
+	// Get the hooks that apply to this table.
+	info.ValidHooks = table.ValidHooks()
+
+	// Grab the metadata struct, which is used to store information (e.g.
+	// the number of entries) that applies to the user's encoding of
+	// iptables, but not netstack's.
+	metadata := table.Metadata().(metadata)
+
+	// Set values from metadata.
+	info.HookEntry = metadata.HookEntry
+	info.Underflow = metadata.Underflow
+	info.NumEntries = metadata.NumEntries
+	info.Size = metadata.Size
+
+	return info, nil
 }
 
 // GetEntries returns netstack's iptables rules encoded for the iptables tool.
 func GetEntries(t *kernel.Task, ep tcpip.Endpoint, outPtr usermem.Addr, outLen int) (linux.KernelIPTGetEntries, *syserr.Error) {
-	// TODO(b/129292233): Implement.
-	return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
+	// Read in the struct and table name.
+	var userEntries linux.IPTGetEntries
+	if _, err := t.CopyIn(outPtr, &userEntries); err != nil {
+		return linux.KernelIPTGetEntries{}, syserr.FromError(err)
+	}
+
+	// Find the appropriate table.
+	table, err := findTable(ep, userEntries.TableName())
+	if err != nil {
+		return linux.KernelIPTGetEntries{}, err
+	}
+
+	// Convert netstack's iptables rules to something that the iptables
+	// tool can understand.
+	entries, _, err := convertNetstackToBinary(userEntries.TableName(), table)
+	if err != nil {
+		return linux.KernelIPTGetEntries{}, err
+	}
+	if binary.Size(entries) > uintptr(outLen) {
+		return linux.KernelIPTGetEntries{}, syserr.ErrInvalidArgument
+	}
+
+	return entries, nil
+}
+
+func findTable(ep tcpip.Endpoint, tableName string) (iptables.Table, *syserr.Error) {
+	ipt, err := ep.IPTables()
+	if err != nil {
+		return iptables.Table{}, syserr.FromError(err)
+	}
+	table, ok := ipt.Tables[tableName]
+	if !ok {
+		return iptables.Table{}, syserr.ErrInvalidArgument
+	}
+	return table, nil
 }
 
 // FillDefaultIPTables sets stack's IPTables to the default tables and
 // populates them with metadata.
-func FillDefaultIPTables(stack *stack.Stack) error {
-	stack.SetIPTables(iptables.DefaultTables())
-	return nil
+func FillDefaultIPTables(stack *stack.Stack) {
+	ipt := iptables.DefaultTables()
+
+	// In order to fill in the metadata, we have to translate ipt from its
+	// netstack format to Linux's giant-binary-blob format.
+	for name, table := range ipt.Tables {
+		_, metadata, err := convertNetstackToBinary(name, table)
+		if err != nil {
+			panic(fmt.Errorf("Unable to set default IP tables: %v", err))
+		}
+		table.SetMetadata(metadata)
+		ipt.Tables[name] = table
+	}
+
+	stack.SetIPTables(ipt)
+}
+
+// convertNetstackToBinary converts the iptables as stored in netstack to the
+// format expected by the iptables tool. Linux stores each table as a binary
+// blob that can only be traversed by parsing a bit, reading some offsets,
+// jumping to those offsets, parsing again, etc.
+func convertNetstackToBinary(name string, table iptables.Table) (linux.KernelIPTGetEntries, metadata, *syserr.Error) {
+	// Return values.
+	var entries linux.KernelIPTGetEntries
+	var meta metadata
+
+	// The table name has to fit in the struct.
+	if linux.XT_TABLE_MAXNAMELEN < len(name) {
+		return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument
+	}
+	copy(entries.Name[:], name)
+
+	// Deal with the built in chains first (INPUT, OUTPUT, etc.). Each of
+	// these chains ends with an unconditional policy entry.
+	for hook := iptables.Prerouting; hook < iptables.NumHooks; hook++ {
+		chain, ok := table.BuiltinChains[hook]
+		if !ok {
+			// This table doesn't support this hook.
+			continue
+		}
+
+		// Sanity check.
+		if len(chain.Rules) < 1 {
+			return linux.KernelIPTGetEntries{}, metadata{}, syserr.ErrInvalidArgument
+		}
+
+		for ruleIdx, rule := range chain.Rules {
+			// If this is the first rule of a builtin chain, set
+			// the metadata hook entry point.
+			if ruleIdx == 0 {
+				meta.HookEntry[hook] = entries.Size
+			}
+
+			// Each rule corresponds to an entry.
+			entry := linux.KernelIPTEntry{
+				IPTEntry: linux.IPTEntry{
+					NextOffset:   linux.SizeOfIPTEntry,
+					TargetOffset: linux.SizeOfIPTEntry,
+				},
+			}
+
+			for _, matcher := range rule.Matchers {
+				// Serialize the matcher and add it to the
+				// entry.
+				serialized := marshalMatcher(matcher)
+				entry.Elems = append(entry.Elems, serialized...)
+				entry.NextOffset += uint16(len(serialized))
+				entry.TargetOffset += uint16(len(serialized))
+			}
+
+			// Serialize and append the target.
+			serialized := marshalTarget(rule.Target)
+			entry.Elems = append(entry.Elems, serialized...)
+			entry.NextOffset += uint16(len(serialized))
+
+			// The underflow rule is the last rule in the chain,
+			// and is an unconditional rule (i.e. it matches any
+			// packet). This is enforced when saving iptables.
+			if ruleIdx == len(chain.Rules)-1 {
+				meta.Underflow[hook] = entries.Size
+			}
+
+			entries.Size += uint32(entry.NextOffset)
+			entries.Entrytable = append(entries.Entrytable, entry)
+			meta.NumEntries++
+		}
+
+	}
+
+	// TODO(gvisor.dev/issue/170): Deal with the user chains here. Each of
+	// these starts with an error node holding the chain's name and ends
+	// with an unconditional return.
+
+	// Lastly, each table ends with an unconditional error target rule as
+	// its final entry.
+	errorEntry := linux.KernelIPTEntry{
+		IPTEntry: linux.IPTEntry{
+			NextOffset:   linux.SizeOfIPTEntry,
+			TargetOffset: linux.SizeOfIPTEntry,
+		},
+	}
+	var errorTarget linux.XTErrorTarget
+	errorTarget.Target.TargetSize = linux.SizeOfXTErrorTarget
+	copy(errorTarget.ErrorName[:], errorTargetName)
+	copy(errorTarget.Target.Name[:], errorTargetName)
+
+	// Serialize and add it to the list of entries.
+	errorTargetBuf := make([]byte, 0, linux.SizeOfXTErrorTarget)
+	serializedErrorTarget := binary.Marshal(errorTargetBuf, usermem.ByteOrder, errorTarget)
+	errorEntry.Elems = append(errorEntry.Elems, serializedErrorTarget...)
+	errorEntry.NextOffset += uint16(len(serializedErrorTarget))
+
+	entries.Size += uint32(errorEntry.NextOffset)
+	entries.Entrytable = append(entries.Entrytable, errorEntry)
+	meta.NumEntries++
+	meta.Size = entries.Size
+
+	return entries, meta, nil
+}
+
+func marshalMatcher(matcher iptables.Matcher) []byte {
+	switch matcher.(type) {
+	default:
+		// TODO(gvisor.dev/issue/170): We don't support any matchers yet, so
+		// any call to marshalMatcher will panic.
+		panic(fmt.Errorf("unknown matcher of type %T", matcher))
+	}
+}
+
+func marshalTarget(target iptables.Target) []byte {
+	switch target.(type) {
+	case iptables.UnconditionalAcceptTarget:
+		return marshalUnconditionalAcceptTarget()
+	default:
+		panic(fmt.Errorf("unknown target of type %T", target))
+	}
+}
+
+func marshalUnconditionalAcceptTarget() []byte {
+	// The target's name will be the empty string.
+	target := linux.XTStandardTarget{
+		Target: linux.XTEntryTarget{
+			TargetSize: linux.SizeOfXTStandardTarget,
+		},
+		Verdict: translateStandardVerdict(iptables.Accept),
+	}
+
+	ret := make([]byte, 0, linux.SizeOfXTStandardTarget)
+	return binary.Marshal(ret, usermem.ByteOrder, target)
+}
+
+// translateStandardVerdict translates verdicts the same way as the iptables
+// tool.
+func translateStandardVerdict(verdict iptables.Verdict) int32 {
+	switch verdict {
+	case iptables.Accept:
+		return -linux.NF_ACCEPT - 1
+	case iptables.Drop:
+		return -linux.NF_DROP - 1
+	case iptables.Queue:
+		return -linux.NF_QUEUE - 1
+	case iptables.Return:
+		return linux.NF_RETURN
+	case iptables.Jump:
+		// TODO(gvisor.dev/issue/170): Support Jump.
+		panic("Jump isn't supported yet")
+	default:
+		panic(fmt.Sprintf("unknown standard verdict: %d", verdict))
+	}
 }
diff --git a/pkg/sentry/socket/unix/io.go b/pkg/sentry/socket/unix/io.go
index 760c7beab..2ec1a662d 100644
--- a/pkg/sentry/socket/unix/io.go
+++ b/pkg/sentry/socket/unix/io.go
@@ -62,7 +62,7 @@ type EndpointReader struct {
 	Creds bool
 
 	// NumRights is the number of SCM_RIGHTS FDs requested.
-	NumRights uintptr
+	NumRights int
 
 	// Peek indicates that the data should not be consumed from the
 	// endpoint.
@@ -70,7 +70,7 @@ type EndpointReader struct {
 
 	// MsgSize is the size of the message that was read from. For stream
 	// sockets, it is the amount read.
-	MsgSize uintptr
+	MsgSize int64
 
 	// From, if not nil, will be set with the address read from.
 	From *tcpip.FullAddress
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index 73d2df15d..4bd15808a 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -436,7 +436,7 @@ func (e *connectionedEndpoint) Bind(addr tcpip.FullAddress, commit func() *syser
 
 // SendMsg writes data and a control message to the endpoint's peer.
 // This method does not block if the data cannot be written.
-func (e *connectionedEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
+func (e *connectionedEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (int64, *syserr.Error) {
 	// Stream sockets do not support specifying the endpoint. Seqpacket
 	// sockets ignore the passed endpoint.
 	if e.stype == linux.SOCK_STREAM && to != nil {
diff --git a/pkg/sentry/socket/unix/transport/connectionless.go b/pkg/sentry/socket/unix/transport/connectionless.go
index c7f7c5b16..0322dec0b 100644
--- a/pkg/sentry/socket/unix/transport/connectionless.go
+++ b/pkg/sentry/socket/unix/transport/connectionless.go
@@ -99,7 +99,7 @@ func (e *connectionlessEndpoint) UnidirectionalConnect(ctx context.Context) (Con
 
 // SendMsg writes data and a control message to the specified endpoint.
 // This method does not block if the data cannot be written.
-func (e *connectionlessEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
+func (e *connectionlessEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (int64, *syserr.Error) {
 	if to == nil {
 		return e.baseEndpoint.SendMsg(ctx, data, c, nil)
 	}
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index 7fb9cb1e0..2b0ad6395 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -121,13 +121,13 @@ type Endpoint interface {
 	// CMTruncated indicates that the numRights hint was used to receive fewer
 	// than the total available SCM_RIGHTS FDs. Additional truncation may be
 	// required by the caller.
-	RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (recvLen, msgLen uintptr, cm ControlMessages, CMTruncated bool, err *syserr.Error)
+	RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool, addr *tcpip.FullAddress) (recvLen, msgLen int64, cm ControlMessages, CMTruncated bool, err *syserr.Error)
 
 	// SendMsg writes data and a control message to the endpoint's peer.
 	// This method does not block if the data cannot be written.
 	//
 	// SendMsg does not take ownership of any of its arguments on error.
-	SendMsg(context.Context, [][]byte, ControlMessages, BoundEndpoint) (uintptr, *syserr.Error)
+	SendMsg(context.Context, [][]byte, ControlMessages, BoundEndpoint) (int64, *syserr.Error)
 
 	// Connect connects this endpoint directly to another.
 	//
@@ -291,7 +291,7 @@ type Receiver interface {
 	// See Endpoint.RecvMsg for documentation on shared arguments.
 	//
 	// notify indicates if RecvNotify should be called.
-	Recv(data [][]byte, creds bool, numRights uintptr, peek bool) (recvLen, msgLen uintptr, cm ControlMessages, CMTruncated bool, source tcpip.FullAddress, notify bool, err *syserr.Error)
+	Recv(data [][]byte, creds bool, numRights int, peek bool) (recvLen, msgLen int64, cm ControlMessages, CMTruncated bool, source tcpip.FullAddress, notify bool, err *syserr.Error)
 
 	// RecvNotify notifies the Receiver of a successful Recv. This must not be
 	// called while holding any endpoint locks.
@@ -331,7 +331,7 @@ type queueReceiver struct {
 }
 
 // Recv implements Receiver.Recv.
-func (q *queueReceiver) Recv(data [][]byte, creds bool, numRights uintptr, peek bool) (uintptr, uintptr, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+func (q *queueReceiver) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
 	var m *message
 	var notify bool
 	var err *syserr.Error
@@ -344,13 +344,13 @@ func (q *queueReceiver) Recv(data [][]byte, creds bool, numRights uintptr, peek
 		return 0, 0, ControlMessages{}, false, tcpip.FullAddress{}, false, err
 	}
 	src := []byte(m.Data)
-	var copied uintptr
+	var copied int64
 	for i := 0; i < len(data) && len(src) > 0; i++ {
 		n := copy(data[i], src)
-		copied += uintptr(n)
+		copied += int64(n)
 		src = src[n:]
 	}
-	return copied, uintptr(len(m.Data)), m.Control, false, m.Address, notify, nil
+	return copied, int64(len(m.Data)), m.Control, false, m.Address, notify, nil
 }
 
 // RecvNotify implements Receiver.RecvNotify.
@@ -401,11 +401,11 @@ type streamQueueReceiver struct {
 	addr    tcpip.FullAddress
 }
 
-func vecCopy(data [][]byte, buf []byte) (uintptr, [][]byte, []byte) {
-	var copied uintptr
+func vecCopy(data [][]byte, buf []byte) (int64, [][]byte, []byte) {
+	var copied int64
 	for len(data) > 0 && len(buf) > 0 {
 		n := copy(data[0], buf)
-		copied += uintptr(n)
+		copied += int64(n)
 		buf = buf[n:]
 		data[0] = data[0][n:]
 		if len(data[0]) == 0 {
@@ -443,7 +443,7 @@ func (q *streamQueueReceiver) RecvMaxQueueSize() int64 {
 }
 
 // Recv implements Receiver.Recv.
-func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights uintptr, peek bool) (uintptr, uintptr, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights int, peek bool) (int64, int64, ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
 	q.mu.Lock()
 	defer q.mu.Unlock()
 
@@ -464,7 +464,7 @@ func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights uint
 		q.addr = m.Address
 	}
 
-	var copied uintptr
+	var copied int64
 	if peek {
 		// Don't consume control message if we are peeking.
 		c := q.control.Clone()
@@ -531,7 +531,7 @@ func (q *streamQueueReceiver) Recv(data [][]byte, wantCreds bool, numRights uint
 			break
 		}
 
-		var cpd uintptr
+		var cpd int64
 		cpd, data, q.buffer = vecCopy(data, q.buffer)
 		copied += cpd
 
@@ -569,7 +569,7 @@ type ConnectedEndpoint interface {
 	//
 	// syserr.ErrWouldBlock can be returned along with a partial write if
 	// the caller should block to send the rest of the data.
-	Send(data [][]byte, controlMessages ControlMessages, from tcpip.FullAddress) (n uintptr, notify bool, err *syserr.Error)
+	Send(data [][]byte, controlMessages ControlMessages, from tcpip.FullAddress) (n int64, notify bool, err *syserr.Error)
 
 	// SendNotify notifies the ConnectedEndpoint of a successful Send. This
 	// must not be called while holding any endpoint locks.
@@ -637,7 +637,7 @@ func (e *connectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
 }
 
 // Send implements ConnectedEndpoint.Send.
-func (e *connectedEndpoint) Send(data [][]byte, controlMessages ControlMessages, from tcpip.FullAddress) (uintptr, bool, *syserr.Error) {
+func (e *connectedEndpoint) Send(data [][]byte, controlMessages ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) {
 	var l int64
 	for _, d := range data {
 		l += int64(len(d))
@@ -665,7 +665,7 @@ func (e *connectedEndpoint) Send(data [][]byte, controlMessages ControlMessages,
 	}
 
 	l, notify, err := e.writeQueue.Enqueue(&message{Data: buffer.View(v), Control: controlMessages, Address: from}, truncate)
-	return uintptr(l), notify, err
+	return int64(l), notify, err
 }
 
 // SendNotify implements ConnectedEndpoint.SendNotify.
@@ -781,7 +781,7 @@ func (e *baseEndpoint) Connected() bool {
 }
 
 // RecvMsg reads data and a control message from the endpoint.
-func (e *baseEndpoint) RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights uintptr, peek bool, addr *tcpip.FullAddress) (uintptr, uintptr, ControlMessages, bool, *syserr.Error) {
+func (e *baseEndpoint) RecvMsg(ctx context.Context, data [][]byte, creds bool, numRights int, peek bool, addr *tcpip.FullAddress) (int64, int64, ControlMessages, bool, *syserr.Error) {
 	e.Lock()
 
 	if e.receiver == nil {
@@ -807,7 +807,7 @@ func (e *baseEndpoint) RecvMsg(ctx context.Context, data [][]byte, creds bool, n
 
 // SendMsg writes data and a control message to the endpoint's peer.
 // This method does not block if the data cannot be written.
-func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (uintptr, *syserr.Error) {
+func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMessages, to BoundEndpoint) (int64, *syserr.Error) {
 	e.Lock()
 	if !e.Connected() {
 		e.Unlock()
diff --git a/pkg/sentry/socket/unix/unix.go b/pkg/sentry/socket/unix/unix.go
index 9032b7580..0d0cb68df 100644
--- a/pkg/sentry/socket/unix/unix.go
+++ b/pkg/sentry/socket/unix/unix.go
@@ -116,7 +116,7 @@ func (s *SocketOperations) Endpoint() transport.Endpoint {
 
 // extractPath extracts and validates the address.
 func extractPath(sockaddr []byte) (string, *syserr.Error) {
-	addr, err := epsocket.GetAddress(linux.AF_UNIX, sockaddr, true /* strict */)
+	addr, _, err := epsocket.AddressAndFamily(linux.AF_UNIX, sockaddr, true /* strict */)
 	if err != nil {
 		return "", err
 	}
@@ -535,7 +535,7 @@ func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags
 		Ctx:       t,
 		Endpoint:  s.ep,
 		Creds:     wantCreds,
-		NumRights: uintptr(numRights),
+		NumRights: numRights,
 		Peek:      peek,
 	}
 	if senderRequested {
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index 386b40af7..f779186ad 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -332,7 +332,7 @@ func sockAddr(t *kernel.Task, addr usermem.Addr, length uint32) string {
 
 	switch family {
 	case linux.AF_INET, linux.AF_INET6, linux.AF_UNIX:
-		fa, err := epsocket.GetAddress(int(family), b, true /* strict */)
+		fa, _, err := epsocket.AddressAndFamily(int(family), b, true /* strict */)
 		if err != nil {
 			return fmt.Sprintf("%#x {Family: %s, error extracting address: %v}", addr, familyStr, err)
 		}
diff --git a/pkg/sentry/syscalls/linux/sys_read.go b/pkg/sentry/syscalls/linux/sys_read.go
index b2474e60d..3ab54271c 100644
--- a/pkg/sentry/syscalls/linux/sys_read.go
+++ b/pkg/sentry/syscalls/linux/sys_read.go
@@ -191,7 +191,6 @@ func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
 }
 
 // Preadv2 implements linux syscall preadv2(2).
-// TODO(b/120162627): Implement RWF_HIPRI functionality.
 func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
 	// While the syscall is
 	// preadv2(int fd, struct iovec* iov, int iov_cnt, off_t offset, int flags)
@@ -228,6 +227,8 @@ func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 	}
 
 	// Check flags field.
+	// Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
+	// accepted as a valid flag argument for preadv2.
 	if flags&^linux.RWF_VALID != 0 {
 		return 0, nil, syserror.EOPNOTSUPP
 	}
diff --git a/pkg/sentry/syscalls/linux/sys_write.go b/pkg/sentry/syscalls/linux/sys_write.go
index 5278c96a6..27cd2c336 100644
--- a/pkg/sentry/syscalls/linux/sys_write.go
+++ b/pkg/sentry/syscalls/linux/sys_write.go
@@ -191,7 +191,6 @@ func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
 }
 
 // Pwritev2 implements linux syscall pwritev2(2).
-// TODO(b/120162627): Implement RWF_HIPRI functionality.
 // TODO(b/120161091): Implement O_SYNC and D_SYNC functionality.
 func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
 	// While the syscall is
@@ -227,6 +226,8 @@ func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 		return 0, nil, syserror.ESPIPE
 	}
 
+	// Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
+	// accepted as a valid flag argument for pwritev2.
 	if flags&^linux.RWF_VALID != 0 {
 		return uintptr(flags), nil, syserror.EOPNOTSUPP
 	}
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index 4de6c41cf..0f247bf77 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -18,6 +18,7 @@ go_library(
         "permissions.go",
         "resolving_path.go",
         "syscalls.go",
+        "testutil.go",
         "vfs.go",
     ],
     importpath = "gvisor.dev/gvisor/pkg/sentry/vfs",
@@ -40,7 +41,16 @@ go_test(
     name = "vfs_test",
     size = "small",
     srcs = [
+        "file_description_impl_util_test.go",
         "mount_test.go",
     ],
     embed = [":vfs"],
+    deps = [
+        "//pkg/abi/linux",
+        "//pkg/sentry/context",
+        "//pkg/sentry/context/contexttest",
+        "//pkg/sentry/kernel/auth",
+        "//pkg/sentry/usermem",
+        "//pkg/syserror",
+    ],
 )
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 486893e70..ba230da72 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -15,6 +15,10 @@
 package vfs
 
 import (
+	"bytes"
+	"io"
+	"sync"
+
 	"gvisor.dev/gvisor/pkg/abi/linux"
 	"gvisor.dev/gvisor/pkg/sentry/arch"
 	"gvisor.dev/gvisor/pkg/sentry/context"
@@ -24,6 +28,16 @@ import (
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
+// The following design pattern is strongly recommended for filesystem
+// implementations to adapt:
+//   - Have a local fileDescription struct (containing FileDescription) which
+//     embeds FileDescriptionDefaultImpl and overrides the default methods
+//     which are common to all fd implementations for that for that filesystem
+//     like StatusFlags, SetStatusFlags, Stat, SetStat, StatFS, etc.
+//   - This should be embedded in all file description implementations as the
+//     first field by value.
+//   - Directory FDs would also embed DirectoryFileDescriptionDefaultImpl.
+
 // FileDescriptionDefaultImpl may be embedded by implementations of
 // FileDescriptionImpl to obtain implementations of many FileDescriptionImpl
 // methods with default behavior analogous to Linux's.
@@ -115,11 +129,8 @@ func (FileDescriptionDefaultImpl) Ioctl(ctx context.Context, uio usermem.IO, arg
 
 // DirectoryFileDescriptionDefaultImpl may be embedded by implementations of
 // FileDescriptionImpl that always represent directories to obtain
-// implementations of non-directory I/O methods that return EISDIR, and
-// implementations of other methods consistent with FileDescriptionDefaultImpl.
-type DirectoryFileDescriptionDefaultImpl struct {
-	FileDescriptionDefaultImpl
-}
+// implementations of non-directory I/O methods that return EISDIR.
+type DirectoryFileDescriptionDefaultImpl struct{}
 
 // PRead implements FileDescriptionImpl.PRead.
 func (DirectoryFileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
@@ -140,3 +151,104 @@ func (DirectoryFileDescriptionDefaultImpl) PWrite(ctx context.Context, src userm
 func (DirectoryFileDescriptionDefaultImpl) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
 	return 0, syserror.EISDIR
 }
+
+// DynamicBytesFileDescriptionImpl may be embedded by implementations of
+// FileDescriptionImpl that represent read-only regular files whose contents
+// are backed by a bytes.Buffer that is regenerated when necessary, consistent
+// with Linux's fs/seq_file.c:single_open().
+//
+// DynamicBytesFileDescriptionImpl.SetDataSource() must be called before first
+// use.
+type DynamicBytesFileDescriptionImpl struct {
+	data     DynamicBytesSource // immutable
+	mu       sync.Mutex         // protects the following fields
+	buf      bytes.Buffer
+	off      int64
+	lastRead int64 // offset at which the last Read, PRead, or Seek ended
+}
+
+// DynamicBytesSource represents a data source for a
+// DynamicBytesFileDescriptionImpl.
+type DynamicBytesSource interface {
+	// Generate writes the file's contents to buf.
+	Generate(ctx context.Context, buf *bytes.Buffer) error
+}
+
+// SetDataSource must be called exactly once on fd before first use.
+func (fd *DynamicBytesFileDescriptionImpl) SetDataSource(data DynamicBytesSource) {
+	fd.data = data
+}
+
+// Preconditions: fd.mu must be locked.
+func (fd *DynamicBytesFileDescriptionImpl) preadLocked(ctx context.Context, dst usermem.IOSequence, offset int64, opts *ReadOptions) (int64, error) {
+	// Regenerate the buffer if it's empty, or before pread() at a new offset.
+	// Compare fs/seq_file.c:seq_read() => traverse().
+	switch {
+	case offset != fd.lastRead:
+		fd.buf.Reset()
+		fallthrough
+	case fd.buf.Len() == 0:
+		if err := fd.data.Generate(ctx, &fd.buf); err != nil {
+			fd.buf.Reset()
+			// fd.off is not updated in this case.
+			fd.lastRead = 0
+			return 0, err
+		}
+	}
+	bs := fd.buf.Bytes()
+	if offset >= int64(len(bs)) {
+		return 0, io.EOF
+	}
+	n, err := dst.CopyOut(ctx, bs[offset:])
+	fd.lastRead = offset + int64(n)
+	return int64(n), err
+}
+
+// PRead implements FileDescriptionImpl.PRead.
+func (fd *DynamicBytesFileDescriptionImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
+	fd.mu.Lock()
+	n, err := fd.preadLocked(ctx, dst, offset, &opts)
+	fd.mu.Unlock()
+	return n, err
+}
+
+// Read implements FileDescriptionImpl.Read.
+func (fd *DynamicBytesFileDescriptionImpl) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
+	fd.mu.Lock()
+	n, err := fd.preadLocked(ctx, dst, fd.off, &opts)
+	fd.off += n
+	fd.mu.Unlock()
+	return n, err
+}
+
+// Seek implements FileDescriptionImpl.Seek.
+func (fd *DynamicBytesFileDescriptionImpl) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+	fd.mu.Lock()
+	defer fd.mu.Unlock()
+	switch whence {
+	case linux.SEEK_SET:
+		// Use offset as given.
+	case linux.SEEK_CUR:
+		offset += fd.off
+	default:
+		// fs/seq_file:seq_lseek() rejects SEEK_END etc.
+		return 0, syserror.EINVAL
+	}
+	if offset < 0 {
+		return 0, syserror.EINVAL
+	}
+	if offset != fd.lastRead {
+		// Regenerate the file's contents immediately. Compare
+		// fs/seq_file.c:seq_lseek() => traverse().
+		fd.buf.Reset()
+		if err := fd.data.Generate(ctx, &fd.buf); err != nil {
+			fd.buf.Reset()
+			fd.off = 0
+			fd.lastRead = 0
+			return 0, err
+		}
+		fd.lastRead = offset
+	}
+	fd.off = offset
+	return offset, nil
+}
diff --git a/pkg/sentry/vfs/file_description_impl_util_test.go b/pkg/sentry/vfs/file_description_impl_util_test.go
new file mode 100644
index 000000000..511b829fc
--- /dev/null
+++ b/pkg/sentry/vfs/file_description_impl_util_test.go
@@ -0,0 +1,141 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"sync/atomic"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/sentry/usermem"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// fileDescription is the common fd struct which a filesystem implementation
+// embeds in all of its file description implementations as required.
+type fileDescription struct {
+	vfsfd FileDescription
+	FileDescriptionDefaultImpl
+}
+
+// genCountFD is a read-only FileDescriptionImpl representing a regular file
+// that contains the number of times its DynamicBytesSource.Generate()
+// implementation has been called.
+type genCountFD struct {
+	fileDescription
+	DynamicBytesFileDescriptionImpl
+
+	count uint64 // accessed using atomic memory ops
+}
+
+func newGenCountFD(mnt *Mount, vfsd *Dentry) *FileDescription {
+	var fd genCountFD
+	fd.vfsfd.Init(&fd, mnt, vfsd)
+	fd.DynamicBytesFileDescriptionImpl.SetDataSource(&fd)
+	return &fd.vfsfd
+}
+
+// Release implements FileDescriptionImpl.Release.
+func (fd *genCountFD) Release() {
+}
+
+// StatusFlags implements FileDescriptionImpl.StatusFlags.
+func (fd *genCountFD) StatusFlags(ctx context.Context) (uint32, error) {
+	return 0, nil
+}
+
+// SetStatusFlags implements FileDescriptionImpl.SetStatusFlags.
+func (fd *genCountFD) SetStatusFlags(ctx context.Context, flags uint32) error {
+	return syserror.EPERM
+}
+
+// Stat implements FileDescriptionImpl.Stat.
+func (fd *genCountFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) {
+	// Note that Statx.Mask == 0 in the return value.
+	return linux.Statx{}, nil
+}
+
+// SetStat implements FileDescriptionImpl.SetStat.
+func (fd *genCountFD) SetStat(ctx context.Context, opts SetStatOptions) error {
+	return syserror.EPERM
+}
+
+// Generate implements DynamicBytesSource.Generate.
+func (fd *genCountFD) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	fmt.Fprintf(buf, "%d", atomic.AddUint64(&fd.count, 1))
+	return nil
+}
+
+func TestGenCountFD(t *testing.T) {
+	ctx := contexttest.Context(t)
+	creds := auth.CredentialsFromContext(ctx)
+
+	vfsObj := New() // vfs.New()
+	vfsObj.MustRegisterFilesystemType("testfs", FDTestFilesystemType{})
+	mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "testfs", &NewFilesystemOptions{})
+	if err != nil {
+		t.Fatalf("failed to create testfs root mount: %v", err)
+	}
+	vd := mntns.Root()
+	defer vd.DecRef()
+
+	fd := newGenCountFD(vd.Mount(), vd.Dentry())
+	defer fd.DecRef()
+
+	// The first read causes Generate to be called to fill the FD's buffer.
+	buf := make([]byte, 2)
+	ioseq := usermem.BytesIOSequence(buf)
+	n, err := fd.Impl().Read(ctx, ioseq, ReadOptions{})
+	if n != 1 || (err != nil && err != io.EOF) {
+		t.Fatalf("first Read: got (%d, %v), wanted (1, nil or EOF)", n, err)
+	}
+	if want := byte('1'); buf[0] != want {
+		t.Errorf("first Read: got byte %c, wanted %c", buf[0], want)
+	}
+
+	// A second read without seeking is still at EOF.
+	n, err = fd.Impl().Read(ctx, ioseq, ReadOptions{})
+	if n != 0 || err != io.EOF {
+		t.Fatalf("second Read: got (%d, %v), wanted (0, EOF)", n, err)
+	}
+
+	// Seeking to the beginning of the file causes it to be regenerated.
+	n, err = fd.Impl().Seek(ctx, 0, linux.SEEK_SET)
+	if n != 0 || err != nil {
+		t.Fatalf("Seek: got (%d, %v), wanted (0, nil)", n, err)
+	}
+	n, err = fd.Impl().Read(ctx, ioseq, ReadOptions{})
+	if n != 1 || (err != nil && err != io.EOF) {
+		t.Fatalf("Read after Seek: got (%d, %v), wanted (1, nil or EOF)", n, err)
+	}
+	if want := byte('2'); buf[0] != want {
+		t.Errorf("Read after Seek: got byte %c, wanted %c", buf[0], want)
+	}
+
+	// PRead at the beginning of the file also causes it to be regenerated.
+	n, err = fd.Impl().PRead(ctx, ioseq, 0, ReadOptions{})
+	if n != 1 || (err != nil && err != io.EOF) {
+		t.Fatalf("PRead: got (%d, %v), wanted (1, nil or EOF)", n, err)
+	}
+	if want := byte('3'); buf[0] != want {
+		t.Errorf("PRead: got byte %c, wanted %c", buf[0], want)
+	}
+}
diff --git a/pkg/sentry/vfs/testutil.go b/pkg/sentry/vfs/testutil.go
new file mode 100644
index 000000000..70b192ece
--- /dev/null
+++ b/pkg/sentry/vfs/testutil.go
@@ -0,0 +1,139 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs
+
+import (
+	"gvisor.dev/gvisor/pkg/abi/linux"
+	"gvisor.dev/gvisor/pkg/sentry/context"
+	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+	"gvisor.dev/gvisor/pkg/syserror"
+)
+
+// FDTestFilesystemType is a test-only FilesystemType that produces Filesystems
+// for which all FilesystemImpl methods taking a path return EPERM. It is used
+// to produce Mounts and Dentries for testing of FileDescriptionImpls that do
+// not depend on their originating Filesystem.
+type FDTestFilesystemType struct{}
+
+// FDTestFilesystem is a test-only FilesystemImpl produced by
+// FDTestFilesystemType.
+type FDTestFilesystem struct {
+	vfsfs Filesystem
+}
+
+// NewFilesystem implements FilesystemType.NewFilesystem.
+func (fstype FDTestFilesystemType) NewFilesystem(ctx context.Context, creds *auth.Credentials, source string, opts NewFilesystemOptions) (*Filesystem, *Dentry, error) {
+	var fs FDTestFilesystem
+	fs.vfsfs.Init(&fs)
+	return &fs.vfsfs, fs.NewDentry(), nil
+}
+
+// Release implements FilesystemImpl.Release.
+func (fs *FDTestFilesystem) Release() {
+}
+
+// Sync implements FilesystemImpl.Sync.
+func (fs *FDTestFilesystem) Sync(ctx context.Context) error {
+	return nil
+}
+
+// GetDentryAt implements FilesystemImpl.GetDentryAt.
+func (fs *FDTestFilesystem) GetDentryAt(ctx context.Context, rp *ResolvingPath, opts GetDentryOptions) (*Dentry, error) {
+	return nil, syserror.EPERM
+}
+
+// LinkAt implements FilesystemImpl.LinkAt.
+func (fs *FDTestFilesystem) LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error {
+	return syserror.EPERM
+}
+
+// MkdirAt implements FilesystemImpl.MkdirAt.
+func (fs *FDTestFilesystem) MkdirAt(ctx context.Context, rp *ResolvingPath, opts MkdirOptions) error {
+	return syserror.EPERM
+}
+
+// MknodAt implements FilesystemImpl.MknodAt.
+func (fs *FDTestFilesystem) MknodAt(ctx context.Context, rp *ResolvingPath, opts MknodOptions) error {
+	return syserror.EPERM
+}
+
+// OpenAt implements FilesystemImpl.OpenAt.
+func (fs *FDTestFilesystem) OpenAt(ctx context.Context, rp *ResolvingPath, opts OpenOptions) (*FileDescription, error) {
+	return nil, syserror.EPERM
+}
+
+// ReadlinkAt implements FilesystemImpl.ReadlinkAt.
+func (fs *FDTestFilesystem) ReadlinkAt(ctx context.Context, rp *ResolvingPath) (string, error) {
+	return "", syserror.EPERM
+}
+
+// RenameAt implements FilesystemImpl.RenameAt.
+func (fs *FDTestFilesystem) RenameAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry, opts RenameOptions) error {
+	return syserror.EPERM
+}
+
+// RmdirAt implements FilesystemImpl.RmdirAt.
+func (fs *FDTestFilesystem) RmdirAt(ctx context.Context, rp *ResolvingPath) error {
+	return syserror.EPERM
+}
+
+// SetStatAt implements FilesystemImpl.SetStatAt.
+func (fs *FDTestFilesystem) SetStatAt(ctx context.Context, rp *ResolvingPath, opts SetStatOptions) error {
+	return syserror.EPERM
+}
+
+// StatAt implements FilesystemImpl.StatAt.
+func (fs *FDTestFilesystem) StatAt(ctx context.Context, rp *ResolvingPath, opts StatOptions) (linux.Statx, error) {
+	return linux.Statx{}, syserror.EPERM
+}
+
+// StatFSAt implements FilesystemImpl.StatFSAt.
+func (fs *FDTestFilesystem) StatFSAt(ctx context.Context, rp *ResolvingPath) (linux.Statfs, error) {
+	return linux.Statfs{}, syserror.EPERM
+}
+
+// SymlinkAt implements FilesystemImpl.SymlinkAt.
+func (fs *FDTestFilesystem) SymlinkAt(ctx context.Context, rp *ResolvingPath, target string) error {
+	return syserror.EPERM
+}
+
+// UnlinkAt implements FilesystemImpl.UnlinkAt.
+func (fs *FDTestFilesystem) UnlinkAt(ctx context.Context, rp *ResolvingPath) error {
+	return syserror.EPERM
+}
+
+type fdTestDentry struct {
+	vfsd Dentry
+}
+
+// NewDentry returns a new Dentry.
+func (fs *FDTestFilesystem) NewDentry() *Dentry {
+	var d fdTestDentry
+	d.vfsd.Init(&d)
+	return &d.vfsd
+}
+
+// IncRef implements DentryImpl.IncRef.
+func (d *fdTestDentry) IncRef(vfsfs *Filesystem) {
+}
+
+// TryIncRef implements DentryImpl.TryIncRef.
+func (d *fdTestDentry) TryIncRef(vfsfs *Filesystem) bool {
+	return true
+}
+
+// DecRef implements DentryImpl.DecRef.
+func (d *fdTestDentry) DecRef(vfsfs *Filesystem) {
+}
diff --git a/pkg/tcpip/adapters/gonet/BUILD b/pkg/tcpip/adapters/gonet/BUILD
index c40924852..0d2637ee4 100644
--- a/pkg/tcpip/adapters/gonet/BUILD
+++ b/pkg/tcpip/adapters/gonet/BUILD
@@ -24,6 +24,7 @@ go_test(
     embed = [":gonet"],
     deps = [
         "//pkg/tcpip",
+        "//pkg/tcpip/header",
         "//pkg/tcpip/link/loopback",
         "//pkg/tcpip/network/ipv4",
         "//pkg/tcpip/network/ipv6",
diff --git a/pkg/tcpip/adapters/gonet/gonet.go b/pkg/tcpip/adapters/gonet/gonet.go
index 308f620e5..cd6ce930a 100644
--- a/pkg/tcpip/adapters/gonet/gonet.go
+++ b/pkg/tcpip/adapters/gonet/gonet.go
@@ -404,7 +404,7 @@ func (c *Conn) Write(b []byte) (int, error) {
 			}
 		}
 
-		var n uintptr
+		var n int64
 		var resCh <-chan struct{}
 		n, resCh, err = c.ep.Write(tcpip.SlicePayload(v), tcpip.WriteOptions{})
 		nbytes += int(n)
@@ -556,32 +556,50 @@ type PacketConn struct {
 	wq    *waiter.Queue
 }
 
-// NewPacketConn creates a new PacketConn.
-func NewPacketConn(s *stack.Stack, addr tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*PacketConn, error) {
-	// Create UDP endpoint and bind it.
+// DialUDP creates a new PacketConn.
+//
+// If laddr is nil, a local address is automatically chosen.
+//
+// If raddr is nil, the PacketConn is left unconnected.
+func DialUDP(s *stack.Stack, laddr, raddr *tcpip.FullAddress, network tcpip.NetworkProtocolNumber) (*PacketConn, error) {
 	var wq waiter.Queue
 	ep, err := s.NewEndpoint(udp.ProtocolNumber, network, &wq)
 	if err != nil {
 		return nil, errors.New(err.String())
 	}
 
-	if err := ep.Bind(addr); err != nil {
-		ep.Close()
-		return nil, &net.OpError{
-			Op:   "bind",
-			Net:  "udp",
-			Addr: fullToUDPAddr(addr),
-			Err:  errors.New(err.String()),
+	if laddr != nil {
+		if err := ep.Bind(*laddr); err != nil {
+			ep.Close()
+			return nil, &net.OpError{
+				Op:   "bind",
+				Net:  "udp",
+				Addr: fullToUDPAddr(*laddr),
+				Err:  errors.New(err.String()),
+			}
 		}
 	}
 
-	c := &PacketConn{
+	c := PacketConn{
 		stack: s,
 		ep:    ep,
 		wq:    &wq,
 	}
 	c.deadlineTimer.init()
-	return c, nil
+
+	if raddr != nil {
+		if err := c.ep.Connect(*raddr); err != nil {
+			c.ep.Close()
+			return nil, &net.OpError{
+				Op:   "connect",
+				Net:  "udp",
+				Addr: fullToUDPAddr(*raddr),
+				Err:  errors.New(err.String()),
+			}
+		}
+	}
+
+	return &c, nil
 }
 
 func (c *PacketConn) newOpError(op string, err error) *net.OpError {
diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go
index 39efe44c7..672f026b2 100644
--- a/pkg/tcpip/adapters/gonet/gonet_test.go
+++ b/pkg/tcpip/adapters/gonet/gonet_test.go
@@ -26,6 +26,7 @@ import (
 
 	"golang.org/x/net/nettest"
 	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
@@ -69,17 +70,13 @@ func newLoopbackStack() (*stack.Stack, *tcpip.Error) {
 	s.SetRouteTable([]tcpip.Route{
 		// IPv4
 		{
-			Destination: tcpip.Address(strings.Repeat("\x00", 4)),
-			Mask:        tcpip.AddressMask(strings.Repeat("\x00", 4)),
-			Gateway:     "",
+			Destination: header.IPv4EmptySubnet,
 			NIC:         NICID,
 		},
 
 		// IPv6
 		{
-			Destination: tcpip.Address(strings.Repeat("\x00", 16)),
-			Mask:        tcpip.AddressMask(strings.Repeat("\x00", 16)),
-			Gateway:     "",
+			Destination: header.IPv6EmptySubnet,
 			NIC:         NICID,
 		},
 	})
@@ -371,9 +368,9 @@ func TestUDPForwarder(t *testing.T) {
 	})
 	s.SetTransportProtocolHandler(udp.ProtocolNumber, fwd.HandlePacket)
 
-	c2, err := NewPacketConn(s, addr2, ipv4.ProtocolNumber)
+	c2, err := DialUDP(s, &addr2, nil, ipv4.ProtocolNumber)
 	if err != nil {
-		t.Fatal("NewPacketConn(port 5):", err)
+		t.Fatal("DialUDP(bind port 5):", err)
 	}
 
 	sent := "abc123"
@@ -452,13 +449,13 @@ func TestPacketConnTransfer(t *testing.T) {
 	addr2 := tcpip.FullAddress{NICID, ip2, 11311}
 	s.AddAddress(NICID, ipv4.ProtocolNumber, ip2)
 
-	c1, err := NewPacketConn(s, addr1, ipv4.ProtocolNumber)
+	c1, err := DialUDP(s, &addr1, nil, ipv4.ProtocolNumber)
 	if err != nil {
-		t.Fatal("NewPacketConn(port 4):", err)
+		t.Fatal("DialUDP(bind port 4):", err)
 	}
-	c2, err := NewPacketConn(s, addr2, ipv4.ProtocolNumber)
+	c2, err := DialUDP(s, &addr2, nil, ipv4.ProtocolNumber)
 	if err != nil {
-		t.Fatal("NewPacketConn(port 5):", err)
+		t.Fatal("DialUDP(bind port 5):", err)
 	}
 
 	c1.SetDeadline(time.Now().Add(time.Second))
@@ -491,6 +488,50 @@ func TestPacketConnTransfer(t *testing.T) {
 	}
 }
 
+func TestConnectedPacketConnTransfer(t *testing.T) {
+	s, e := newLoopbackStack()
+	if e != nil {
+		t.Fatalf("newLoopbackStack() = %v", e)
+	}
+
+	ip := tcpip.Address(net.IPv4(169, 254, 10, 1).To4())
+	addr := tcpip.FullAddress{NICID, ip, 11211}
+	s.AddAddress(NICID, ipv4.ProtocolNumber, ip)
+
+	c1, err := DialUDP(s, &addr, nil, ipv4.ProtocolNumber)
+	if err != nil {
+		t.Fatal("DialUDP(bind port 4):", err)
+	}
+	c2, err := DialUDP(s, nil, &addr, ipv4.ProtocolNumber)
+	if err != nil {
+		t.Fatal("DialUDP(bind port 5):", err)
+	}
+
+	c1.SetDeadline(time.Now().Add(time.Second))
+	c2.SetDeadline(time.Now().Add(time.Second))
+
+	sent := "abc123"
+	if n, err := c2.Write([]byte(sent)); err != nil || n != len(sent) {
+		t.Errorf("got c2.Write(%q) = %d, %v, want = %d, %v", sent, n, err, len(sent), nil)
+	}
+	recv := make([]byte, len(sent))
+	n, err := c1.Read(recv)
+	if err != nil || n != len(recv) {
+		t.Errorf("got c1.Read() = %d, %v, want = %d, %v", n, err, len(recv), nil)
+	}
+
+	if recv := string(recv); recv != sent {
+		t.Errorf("got recv = %q, want = %q", recv, sent)
+	}
+
+	if err := c1.Close(); err != nil {
+		t.Error("c1.Close():", err)
+	}
+	if err := c2.Close(); err != nil {
+		t.Error("c2.Close():", err)
+	}
+}
+
 func makePipe() (c1, c2 net.Conn, stop func(), err error) {
 	s, e := newLoopbackStack()
 	if e != nil {
diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go
index 94a3af289..17fc9c68e 100644
--- a/pkg/tcpip/header/ipv4.go
+++ b/pkg/tcpip/header/ipv4.go
@@ -111,6 +111,15 @@ const (
 	IPv4FlagDontFragment
 )
 
+// IPv4EmptySubnet is the empty IPv4 subnet.
+var IPv4EmptySubnet = func() tcpip.Subnet {
+	subnet, err := tcpip.NewSubnet(IPv4Any, tcpip.AddressMask(IPv4Any))
+	if err != nil {
+		panic(err)
+	}
+	return subnet
+}()
+
 // IPVersion returns the version of IP used in the given packet. It returns -1
 // if the packet is not large enough to contain the version field.
 func IPVersion(b []byte) int {
diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go
index 95fe8bfc3..31be42ce0 100644
--- a/pkg/tcpip/header/ipv6.go
+++ b/pkg/tcpip/header/ipv6.go
@@ -82,6 +82,15 @@ const (
 	IPv6Any tcpip.Address = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 )
 
+// IPv6EmptySubnet is the empty IPv6 subnet.
+var IPv6EmptySubnet = func() tcpip.Subnet {
+	subnet, err := tcpip.NewSubnet(IPv6Any, tcpip.AddressMask(IPv6Any))
+	if err != nil {
+		panic(err)
+	}
+	return subnet
+}()
+
 // PayloadLength returns the value of the "payload length" field of the ipv6
 // header.
 func (b IPv6) PayloadLength() uint16 {
diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD
index 11ba31ca4..088eb8a21 100644
--- a/pkg/tcpip/link/rawfile/BUILD
+++ b/pkg/tcpip/link/rawfile/BUILD
@@ -7,8 +7,9 @@ go_library(
     srcs = [
         "blockingpoll_amd64.s",
         "blockingpoll_arm64.s",
+        "blockingpoll_noyield_unsafe.go",
         "blockingpoll_unsafe.go",
-        "blockingpoll_stub_unsafe.go",
+        "blockingpoll_yield_unsafe.go",
         "errors.go",
         "rawfile_unsafe.go",
     ],
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_arm64.s b/pkg/tcpip/link/rawfile/blockingpoll_arm64.s
index 0bc873a01..b62888b93 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_arm64.s
+++ b/pkg/tcpip/link/rawfile/blockingpoll_arm64.s
@@ -1,4 +1,4 @@
-// Copyright 2019 The gVisor Authors.
+// Copyright 2018 The gVisor Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_stub_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go
index 621ab8d29..621ab8d29 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_stub_unsafe.go
+++ b/pkg/tcpip/link/rawfile/blockingpoll_noyield_unsafe.go
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_unsafe.go
index eeca47d78..84dc0e918 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_unsafe.go
+++ b/pkg/tcpip/link/rawfile/blockingpoll_unsafe.go
@@ -12,49 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// +build linux,amd64 linux,arm64
-// +build go1.12
-// +build !go1.14
-
-// Check go:linkname function signatures when updating Go version.
+// +build linux,!amd64
 
 package rawfile
 
 import (
 	"syscall"
-	_ "unsafe" // for go:linkname
+	"unsafe"
 )
 
-//go:noescape
-func BlockingPoll(fds *PollEvent, nfds int, timeout *syscall.Timespec) (int, syscall.Errno)
-
-// Use go:linkname to call into the runtime. As of Go 1.12 this has to
-// be done from Go code so that we make an ABIInternal call to an
-// ABIInternal function; see https://golang.org/issue/27539.
-
-// We need to call both entersyscallblock and exitsyscall this way so
-// that the runtime's check on the stack pointer lines up.
-
-// Note that calling an unexported function in the runtime package is
-// unsafe and this hack is likely to break in future Go releases.
-
-//go:linkname entersyscallblock runtime.entersyscallblock
-func entersyscallblock()
-
-//go:linkname exitsyscall runtime.exitsyscall
-func exitsyscall()
-
-// These forwarding functions must be nosplit because 1) we must
-// disallow preemption between entersyscallblock and exitsyscall, and
-// 2) we have an untyped assembly frame on the stack which can not be
-// grown or moved.
-
-//go:nosplit
-func callEntersyscallblock() {
-	entersyscallblock()
-}
+// BlockingPoll is just a stub function that forwards to the ppoll() system call
+// on non-amd64 platforms.
+func BlockingPoll(fds *PollEvent, nfds int, timeout *syscall.Timespec) (int, syscall.Errno) {
+	n, _, e := syscall.Syscall6(syscall.SYS_PPOLL, uintptr(unsafe.Pointer(fds)),
+		uintptr(nfds), uintptr(unsafe.Pointer(timeout)), 0, 0, 0)
 
-//go:nosplit
-func callExitsyscall() {
-	exitsyscall()
+	return int(n), e
 }
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
new file mode 100644
index 000000000..dda3b10a6
--- /dev/null
+++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
@@ -0,0 +1,66 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build linux,amd64 linux,arm64
+// +build go1.12
+// +build !go1.14
+
+// Check go:linkname function signatures when updating Go version.
+
+package rawfile
+
+import (
+	"syscall"
+	_ "unsafe" // for go:linkname
+)
+
+// BlockingPoll on amd64/arm64 makes the ppoll() syscall while calling the
+// version of entersyscall that relinquishes the P so that other Gs can
+// run. This is meant to be called in cases when the syscall is expected to
+// block. On non amd64/arm64 platforms it just forwards to the ppoll() system
+// call.
+//
+//go:noescape
+func BlockingPoll(fds *PollEvent, nfds int, timeout *syscall.Timespec) (int, syscall.Errno)
+
+// Use go:linkname to call into the runtime. As of Go 1.12 this has to
+// be done from Go code so that we make an ABIInternal call to an
+// ABIInternal function; see https://golang.org/issue/27539.
+
+// We need to call both entersyscallblock and exitsyscall this way so
+// that the runtime's check on the stack pointer lines up.
+
+// Note that calling an unexported function in the runtime package is
+// unsafe and this hack is likely to break in future Go releases.
+
+//go:linkname entersyscallblock runtime.entersyscallblock
+func entersyscallblock()
+
+//go:linkname exitsyscall runtime.exitsyscall
+func exitsyscall()
+
+// These forwarding functions must be nosplit because 1) we must
+// disallow preemption between entersyscallblock and exitsyscall, and
+// 2) we have an untyped assembly frame on the stack which can not be
+// grown or moved.
+
+//go:nosplit
+func callEntersyscallblock() {
+	entersyscallblock()
+}
+
+//go:nosplit
+func callExitsyscall() {
+	exitsyscall()
+}
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index fc584c6a4..36c8c46fc 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -360,10 +360,9 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.Vie
 		if fragmentOffset == 0 && len(udp) >= header.UDPMinimumSize {
 			srcPort = udp.SourcePort()
 			dstPort = udp.DestinationPort()
+			details = fmt.Sprintf("xsum: 0x%x", udp.Checksum())
+			size -= header.UDPMinimumSize
 		}
-		size -= header.UDPMinimumSize
-
-		details = fmt.Sprintf("xsum: 0x%x", udp.Checksum())
 
 	case header.TCPProtocolNumber:
 		transName = "tcp"
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index e477046db..4c4b54469 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -66,9 +66,7 @@ func newTestContext(t *testing.T) *testContext {
 	}
 
 	s.SetRouteTable([]tcpip.Route{{
-		Destination: "\x00\x00\x00\x00",
-		Mask:        "\x00\x00\x00\x00",
-		Gateway:     "",
+		Destination: header.IPv4EmptySubnet,
 		NIC:         1,
 	}})
 
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index 55e9eec99..6bbfcd97f 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -173,8 +173,7 @@ func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
 	s.CreateNIC(1, loopback.New())
 	s.AddAddress(1, ipv4.ProtocolNumber, local)
 	s.SetRouteTable([]tcpip.Route{{
-		Destination: ipv4SubnetAddr,
-		Mask:        ipv4SubnetMask,
+		Destination: header.IPv4EmptySubnet,
 		Gateway:     ipv4Gateway,
 		NIC:         1,
 	}})
@@ -187,8 +186,7 @@ func buildIPv6Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
 	s.CreateNIC(1, loopback.New())
 	s.AddAddress(1, ipv6.ProtocolNumber, local)
 	s.SetRouteTable([]tcpip.Route{{
-		Destination: ipv6SubnetAddr,
-		Mask:        ipv6SubnetMask,
+		Destination: header.IPv6EmptySubnet,
 		Gateway:     ipv6Gateway,
 		NIC:         1,
 	}})
diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index 3207a3d46..1b5a55bea 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -52,9 +52,7 @@ func TestExcludeBroadcast(t *testing.T) {
 	}
 
 	s.SetRouteTable([]tcpip.Route{{
-		Destination: "\x00\x00\x00\x00",
-		Mask:        "\x00\x00\x00\x00",
-		Gateway:     "",
+		Destination: header.IPv4EmptySubnet,
 		NIC:         1,
 	}})
 
@@ -247,14 +245,22 @@ func buildContext(t *testing.T, packetCollectorErrors []*tcpip.Error, mtu uint32
 	_, linkEP := newErrorChannel(100 /* Enough for all tests. */, mtu, "", packetCollectorErrors)
 	linkEPId := stack.RegisterLinkEndpoint(linkEP)
 	s.CreateNIC(1, linkEPId)
-	s.AddAddress(1, ipv4.ProtocolNumber, "\x10\x00\x00\x01")
-	s.SetRouteTable([]tcpip.Route{{
-		Destination: "\x10\x00\x00\x02",
-		Mask:        "\xff\xff\xff\xff",
-		Gateway:     "",
-		NIC:         1,
-	}})
-	r, err := s.FindRoute(0, "\x10\x00\x00\x01", "\x10\x00\x00\x02", ipv4.ProtocolNumber, false /* multicastLoop */)
+	const (
+		src = "\x10\x00\x00\x01"
+		dst = "\x10\x00\x00\x02"
+	)
+	s.AddAddress(1, ipv4.ProtocolNumber, src)
+	{
+		subnet, err := tcpip.NewSubnet(dst, tcpip.AddressMask(header.IPv4Broadcast))
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{
+			Destination: subnet,
+			NIC:         1,
+		}})
+	}
+	r, err := s.FindRoute(0, src, dst, ipv4.ProtocolNumber, false /* multicastLoop */)
 	if err != nil {
 		t.Fatalf("s.FindRoute got %v, want %v", err, nil)
 	}
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 726362c87..d0dc72506 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -91,13 +91,18 @@ func TestICMPCounts(t *testing.T) {
 			t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err)
 		}
 	}
-	s.SetRouteTable(
-		[]tcpip.Route{{
-			Destination: lladdr1,
-			Mask:        tcpip.AddressMask(strings.Repeat("\xff", 16)),
-			NIC:         1,
-		}},
-	)
+	{
+		subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1))))
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable(
+			[]tcpip.Route{{
+				Destination: subnet,
+				NIC:         1,
+			}},
+		)
+	}
 
 	netProto := s.NetworkProtocolInstance(ProtocolNumber)
 	if netProto == nil {
@@ -237,17 +242,23 @@ func newTestContext(t *testing.T) *testContext {
 		t.Fatalf("AddAddress sn lladdr1: %v", err)
 	}
 
+	subnet0, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1))))
+	if err != nil {
+		t.Fatal(err)
+	}
 	c.s0.SetRouteTable(
 		[]tcpip.Route{{
-			Destination: lladdr1,
-			Mask:        tcpip.AddressMask(strings.Repeat("\xff", 16)),
+			Destination: subnet0,
 			NIC:         1,
 		}},
 	)
+	subnet1, err := tcpip.NewSubnet(lladdr0, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr0))))
+	if err != nil {
+		t.Fatal(err)
+	}
 	c.s1.SetRouteTable(
 		[]tcpip.Route{{
-			Destination: lladdr0,
-			Mask:        tcpip.AddressMask(strings.Repeat("\xff", 16)),
+			Destination: subnet1,
 			NIC:         1,
 		}},
 	)
diff --git a/pkg/tcpip/sample/tun_tcp_connect/BUILD b/pkg/tcpip/sample/tun_tcp_connect/BUILD
index 996939581..a57752a7c 100644
--- a/pkg/tcpip/sample/tun_tcp_connect/BUILD
+++ b/pkg/tcpip/sample/tun_tcp_connect/BUILD
@@ -8,6 +8,7 @@ go_binary(
     deps = [
         "//pkg/tcpip",
         "//pkg/tcpip/buffer",
+        "//pkg/tcpip/header",
         "//pkg/tcpip/link/fdbased",
         "//pkg/tcpip/link/rawfile",
         "//pkg/tcpip/link/sniffer",
diff --git a/pkg/tcpip/sample/tun_tcp_connect/main.go b/pkg/tcpip/sample/tun_tcp_connect/main.go
index 3ac381631..e2021cd15 100644
--- a/pkg/tcpip/sample/tun_tcp_connect/main.go
+++ b/pkg/tcpip/sample/tun_tcp_connect/main.go
@@ -52,6 +52,7 @@ import (
 
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/link/fdbased"
 	"gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
 	"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
@@ -152,9 +153,7 @@ func main() {
 	// Add default route.
 	s.SetRouteTable([]tcpip.Route{
 		{
-			Destination: "\x00\x00\x00\x00",
-			Mask:        "\x00\x00\x00\x00",
-			Gateway:     "",
+			Destination: header.IPv4EmptySubnet,
 			NIC:         1,
 		},
 	})
diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go
index da425394a..1716be285 100644
--- a/pkg/tcpip/sample/tun_tcp_echo/main.go
+++ b/pkg/tcpip/sample/tun_tcp_echo/main.go
@@ -149,12 +149,15 @@ func main() {
 		log.Fatal(err)
 	}
 
+	subnet, err := tcpip.NewSubnet(tcpip.Address(strings.Repeat("\x00", len(addr))), tcpip.AddressMask(strings.Repeat("\x00", len(addr))))
+	if err != nil {
+		log.Fatal(err)
+	}
+
 	// Add default route.
 	s.SetRouteTable([]tcpip.Route{
 		{
-			Destination: tcpip.Address(strings.Repeat("\x00", len(addr))),
-			Mask:        tcpip.AddressMask(strings.Repeat("\x00", len(addr))),
-			Gateway:     "",
+			Destination: subnet,
 			NIC:         1,
 		},
 	})
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index dc28dc970..04b63d783 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -139,7 +139,7 @@ func (n *NIC) getMainNICAddress(protocol tcpip.NetworkProtocolNumber) (tcpip.Add
 	if list, ok := n.primary[protocol]; ok {
 		for e := list.Front(); e != nil; e = e.Next() {
 			ref := e.(*referencedNetworkEndpoint)
-			if ref.holdsInsertRef && ref.tryIncRef() {
+			if ref.kind == permanent && ref.tryIncRef() {
 				r = ref
 				break
 			}
@@ -178,7 +178,7 @@ func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber) *referencedN
 		case header.IPv4Broadcast, header.IPv4Any:
 			continue
 		}
-		if r.tryIncRef() {
+		if r.isValidForOutgoing() && r.tryIncRef() {
 			return r
 		}
 	}
@@ -186,46 +186,124 @@ func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber) *referencedN
 	return nil
 }
 
+func (n *NIC) getRef(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) *referencedNetworkEndpoint {
+	return n.getRefOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, n.promiscuous)
+}
+
 // findEndpoint finds the endpoint, if any, with the given address.
 func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) *referencedNetworkEndpoint {
+	return n.getRefOrCreateTemp(protocol, address, peb, n.spoofing)
+}
+
+// getRefEpOrCreateTemp returns the referenced network endpoint for the given
+// protocol and address. If none exists a temporary one may be created if
+// we are in promiscuous mode or spoofing.
+func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, spoofingOrPromiscuous bool) *referencedNetworkEndpoint {
 	id := NetworkEndpointID{address}
 
 	n.mu.RLock()
-	ref := n.endpoints[id]
-	if ref != nil && !ref.tryIncRef() {
-		ref = nil
+
+	if ref, ok := n.endpoints[id]; ok {
+		// An endpoint with this id exists, check if it can be used and return it.
+		switch ref.kind {
+		case permanentExpired:
+			if !spoofingOrPromiscuous {
+				n.mu.RUnlock()
+				return nil
+			}
+			fallthrough
+		case temporary, permanent:
+			if ref.tryIncRef() {
+				n.mu.RUnlock()
+				return ref
+			}
+		}
 	}
-	spoofing := n.spoofing
+
+	// A usable reference was not found, create a temporary one if requested by
+	// the caller or if the address is found in the NIC's subnets.
+	createTempEP := spoofingOrPromiscuous
+	if !createTempEP {
+		for _, sn := range n.subnets {
+			if sn.Contains(address) {
+				createTempEP = true
+				break
+			}
+		}
+	}
+
 	n.mu.RUnlock()
 
-	if ref != nil || !spoofing {
-		return ref
+	if !createTempEP {
+		return nil
 	}
 
 	// Try again with the lock in exclusive mode. If we still can't get the
 	// endpoint, create a new "temporary" endpoint. It will only exist while
 	// there's a route through it.
 	n.mu.Lock()
-	ref = n.endpoints[id]
-	if ref == nil || !ref.tryIncRef() {
-		if netProto, ok := n.stack.networkProtocols[protocol]; ok {
-			ref, _ = n.addAddressLocked(tcpip.ProtocolAddress{
-				Protocol: protocol,
-				AddressWithPrefix: tcpip.AddressWithPrefix{
-					Address:   address,
-					PrefixLen: netProto.DefaultPrefixLen(),
-				},
-			}, peb, true)
-			if ref != nil {
-				ref.holdsInsertRef = false
-			}
+	if ref, ok := n.endpoints[id]; ok {
+		// No need to check the type as we are ok with expired endpoints at this
+		// point.
+		if ref.tryIncRef() {
+			n.mu.Unlock()
+			return ref
 		}
+		// tryIncRef failing means the endpoint is scheduled to be removed once the
+		// lock is released. Remove it here so we can create a new (temporary) one.
+		// The removal logic waiting for the lock handles this case.
+		n.removeEndpointLocked(ref)
 	}
+
+	// Add a new temporary endpoint.
+	netProto, ok := n.stack.networkProtocols[protocol]
+	if !ok {
+		n.mu.Unlock()
+		return nil
+	}
+	ref, _ := n.addAddressLocked(tcpip.ProtocolAddress{
+		Protocol: protocol,
+		AddressWithPrefix: tcpip.AddressWithPrefix{
+			Address:   address,
+			PrefixLen: netProto.DefaultPrefixLen(),
+		},
+	}, peb, temporary)
+
 	n.mu.Unlock()
 	return ref
 }
 
-func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior, replace bool) (*referencedNetworkEndpoint, *tcpip.Error) {
+func (n *NIC) addPermanentAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) (*referencedNetworkEndpoint, *tcpip.Error) {
+	id := NetworkEndpointID{protocolAddress.AddressWithPrefix.Address}
+	if ref, ok := n.endpoints[id]; ok {
+		switch ref.kind {
+		case permanent:
+			// The NIC already have a permanent endpoint with that address.
+			return nil, tcpip.ErrDuplicateAddress
+		case permanentExpired, temporary:
+			// Promote the endpoint to become permanent.
+			if ref.tryIncRef() {
+				ref.kind = permanent
+				return ref, nil
+			}
+			// tryIncRef failing means the endpoint is scheduled to be removed once
+			// the lock is released. Remove it here so we can create a new
+			// (permanent) one. The removal logic waiting for the lock handles this
+			// case.
+			n.removeEndpointLocked(ref)
+		}
+	}
+	return n.addAddressLocked(protocolAddress, peb, permanent)
+}
+
+func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior, kind networkEndpointKind) (*referencedNetworkEndpoint, *tcpip.Error) {
+	// Sanity check.
+	id := NetworkEndpointID{protocolAddress.AddressWithPrefix.Address}
+	if _, ok := n.endpoints[id]; ok {
+		// Endpoint already exists.
+		return nil, tcpip.ErrDuplicateAddress
+	}
+
 	netProto, ok := n.stack.networkProtocols[protocolAddress.Protocol]
 	if !ok {
 		return nil, tcpip.ErrUnknownProtocol
@@ -236,22 +314,12 @@ func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb Primar
 	if err != nil {
 		return nil, err
 	}
-
-	id := *ep.ID()
-	if ref, ok := n.endpoints[id]; ok {
-		if !replace {
-			return nil, tcpip.ErrDuplicateAddress
-		}
-
-		n.removeEndpointLocked(ref)
-	}
-
 	ref := &referencedNetworkEndpoint{
-		refs:           1,
-		ep:             ep,
-		nic:            n,
-		protocol:       protocolAddress.Protocol,
-		holdsInsertRef: true,
+		refs:     1,
+		ep:       ep,
+		nic:      n,
+		protocol: protocolAddress.Protocol,
+		kind:     kind,
 	}
 
 	// Set up cache if link address resolution exists for this protocol.
@@ -284,7 +352,7 @@ func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb Primar
 func (n *NIC) AddAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error {
 	// Add the endpoint.
 	n.mu.Lock()
-	_, err := n.addAddressLocked(protocolAddress, peb, false)
+	_, err := n.addPermanentAddressLocked(protocolAddress, peb)
 	n.mu.Unlock()
 
 	return err
@@ -296,6 +364,12 @@ func (n *NIC) Addresses() []tcpip.ProtocolAddress {
 	defer n.mu.RUnlock()
 	addrs := make([]tcpip.ProtocolAddress, 0, len(n.endpoints))
 	for nid, ref := range n.endpoints {
+		// Don't include expired or tempory endpoints to avoid confusion and
+		// prevent the caller from using those.
+		switch ref.kind {
+		case permanentExpired, temporary:
+			continue
+		}
 		addrs = append(addrs, tcpip.ProtocolAddress{
 			Protocol: ref.protocol,
 			AddressWithPrefix: tcpip.AddressWithPrefix{
@@ -361,13 +435,16 @@ func (n *NIC) Subnets() []tcpip.Subnet {
 func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) {
 	id := *r.ep.ID()
 
-	// Nothing to do if the reference has already been replaced with a
-	// different one.
+	// Nothing to do if the reference has already been replaced with a different
+	// one. This happens in the case where 1) this endpoint's ref count hit zero
+	// and was waiting (on the lock) to be removed and 2) the same address was
+	// re-added in the meantime by removing this endpoint from the list and
+	// adding a new one.
 	if n.endpoints[id] != r {
 		return
 	}
 
-	if r.holdsInsertRef {
+	if r.kind == permanent {
 		panic("Reference count dropped to zero before being removed")
 	}
 
@@ -386,14 +463,13 @@ func (n *NIC) removeEndpoint(r *referencedNetworkEndpoint) {
 	n.mu.Unlock()
 }
 
-func (n *NIC) removeAddressLocked(addr tcpip.Address) *tcpip.Error {
+func (n *NIC) removePermanentAddressLocked(addr tcpip.Address) *tcpip.Error {
 	r := n.endpoints[NetworkEndpointID{addr}]
-	if r == nil || !r.holdsInsertRef {
+	if r == nil || r.kind != permanent {
 		return tcpip.ErrBadLocalAddress
 	}
 
-	r.holdsInsertRef = false
-
+	r.kind = permanentExpired
 	r.decRefLocked()
 
 	return nil
@@ -403,7 +479,7 @@ func (n *NIC) removeAddressLocked(addr tcpip.Address) *tcpip.Error {
 func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error {
 	n.mu.Lock()
 	defer n.mu.Unlock()
-	return n.removeAddressLocked(addr)
+	return n.removePermanentAddressLocked(addr)
 }
 
 // joinGroup adds a new endpoint for the given multicast address, if none
@@ -419,13 +495,13 @@ func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address
 		if !ok {
 			return tcpip.ErrUnknownProtocol
 		}
-		if _, err := n.addAddressLocked(tcpip.ProtocolAddress{
+		if _, err := n.addPermanentAddressLocked(tcpip.ProtocolAddress{
 			Protocol: protocol,
 			AddressWithPrefix: tcpip.AddressWithPrefix{
 				Address:   addr,
 				PrefixLen: netProto.DefaultPrefixLen(),
 			},
-		}, NeverPrimaryEndpoint, false); err != nil {
+		}, NeverPrimaryEndpoint); err != nil {
 			return err
 		}
 	}
@@ -447,7 +523,7 @@ func (n *NIC) leaveGroup(addr tcpip.Address) *tcpip.Error {
 		return tcpip.ErrBadLocalAddress
 	case 1:
 		// This is the last one, clean up.
-		if err := n.removeAddressLocked(addr); err != nil {
+		if err := n.removePermanentAddressLocked(addr); err != nil {
 			return err
 		}
 	}
@@ -489,7 +565,7 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddr
 		// n.endpoints is mutex protected so acquire lock.
 		n.mu.RLock()
 		for _, ref := range n.endpoints {
-			if ref.protocol == header.IPv4ProtocolNumber && ref.tryIncRef() {
+			if ref.isValidForIncoming() && ref.protocol == header.IPv4ProtocolNumber && ref.tryIncRef() {
 				r := makeRoute(protocol, dst, src, linkEP.LinkAddress(), ref, false /* handleLocal */, false /* multicastLoop */)
 				r.RemoteLinkAddress = remote
 				ref.ep.HandlePacket(&r, vv)
@@ -527,8 +603,9 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddr
 		n := r.ref.nic
 		n.mu.RLock()
 		ref, ok := n.endpoints[NetworkEndpointID{dst}]
+		ok = ok && ref.isValidForOutgoing() && ref.tryIncRef()
 		n.mu.RUnlock()
-		if ok && ref.tryIncRef() {
+		if ok {
 			r.RemoteAddress = src
 			// TODO(b/123449044): Update the source NIC as well.
 			ref.ep.HandlePacket(&r, vv)
@@ -553,57 +630,6 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, _ tcpip.LinkAddr
 	n.stack.stats.IP.InvalidAddressesReceived.Increment()
 }
 
-func (n *NIC) getRef(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) *referencedNetworkEndpoint {
-	id := NetworkEndpointID{dst}
-
-	n.mu.RLock()
-	if ref, ok := n.endpoints[id]; ok && ref.tryIncRef() {
-		n.mu.RUnlock()
-		return ref
-	}
-
-	promiscuous := n.promiscuous
-	// Check if the packet is for a subnet this NIC cares about.
-	if !promiscuous {
-		for _, sn := range n.subnets {
-			if sn.Contains(dst) {
-				promiscuous = true
-				break
-			}
-		}
-	}
-	n.mu.RUnlock()
-	if promiscuous {
-		// Try again with the lock in exclusive mode. If we still can't
-		// get the endpoint, create a new "temporary" one. It will only
-		// exist while there's a route through it.
-		n.mu.Lock()
-		if ref, ok := n.endpoints[id]; ok && ref.tryIncRef() {
-			n.mu.Unlock()
-			return ref
-		}
-		netProto, ok := n.stack.networkProtocols[protocol]
-		if !ok {
-			n.mu.Unlock()
-			return nil
-		}
-		ref, err := n.addAddressLocked(tcpip.ProtocolAddress{
-			Protocol: protocol,
-			AddressWithPrefix: tcpip.AddressWithPrefix{
-				Address:   dst,
-				PrefixLen: netProto.DefaultPrefixLen(),
-			},
-		}, CanBePrimaryEndpoint, true)
-		n.mu.Unlock()
-		if err == nil {
-			ref.holdsInsertRef = false
-			return ref
-		}
-	}
-
-	return nil
-}
-
 // DeliverTransportPacket delivers the packets to the appropriate transport
 // protocol endpoint.
 func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, netHeader buffer.View, vv buffer.VectorisedView) {
@@ -691,9 +717,33 @@ func (n *NIC) ID() tcpip.NICID {
 	return n.id
 }
 
+type networkEndpointKind int
+
+const (
+	// A permanent endpoint is created by adding a permanent address (vs. a
+	// temporary one) to the NIC. Its reference count is biased by 1 to avoid
+	// removal when no route holds a reference to it. It is removed by explicitly
+	// removing the permanent address from the NIC.
+	permanent networkEndpointKind = iota
+
+	// An expired permanent endoint is a permanent endoint that had its address
+	// removed from the NIC, and it is waiting to be removed once no more routes
+	// hold a reference to it. This is achieved by decreasing its reference count
+	// by 1. If its address is re-added before the endpoint is removed, its type
+	// changes back to permanent and its reference count increases by 1 again.
+	permanentExpired
+
+	// A temporary endpoint is created for spoofing outgoing packets, or when in
+	// promiscuous mode and accepting incoming packets that don't match any
+	// permanent endpoint. Its reference count is not biased by 1 and the
+	// endpoint is removed immediately when no more route holds a reference to
+	// it. A temporary endpoint can be promoted to permanent if its address
+	// is added permanently.
+	temporary
+)
+
 type referencedNetworkEndpoint struct {
 	ilist.Entry
-	refs     int32
 	ep       NetworkEndpoint
 	nic      *NIC
 	protocol tcpip.NetworkProtocolNumber
@@ -702,11 +752,25 @@ type referencedNetworkEndpoint struct {
 	// protocol. Set to nil otherwise.
 	linkCache LinkAddressCache
 
-	// holdsInsertRef is protected by the NIC's mutex. It indicates whether
-	// the reference count is biased by 1 due to the insertion of the
-	// endpoint. It is reset to false when RemoveAddress is called on the
-	// NIC.
-	holdsInsertRef bool
+	// refs is counting references held for this endpoint. When refs hits zero it
+	// triggers the automatic removal of the endpoint from the NIC.
+	refs int32
+
+	kind networkEndpointKind
+}
+
+// isValidForOutgoing returns true if the endpoint can be used to send out a
+// packet. It requires the endpoint to not be marked expired (i.e., its address
+// has been removed), or the NIC to be in spoofing mode.
+func (r *referencedNetworkEndpoint) isValidForOutgoing() bool {
+	return r.kind != permanentExpired || r.nic.spoofing
+}
+
+// isValidForIncoming returns true if the endpoint can accept an incoming
+// packet. It requires the endpoint to not be marked expired (i.e., its address
+// has been removed), or the NIC to be in promiscuous mode.
+func (r *referencedNetworkEndpoint) isValidForIncoming() bool {
+	return r.kind != permanentExpired || r.nic.promiscuous
 }
 
 // decRef decrements the ref count and cleans up the endpoint once it reaches
diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go
index 391ab4344..e52cdd674 100644
--- a/pkg/tcpip/stack/route.go
+++ b/pkg/tcpip/stack/route.go
@@ -148,11 +148,15 @@ func (r *Route) RemoveWaker(waker *sleep.Waker) {
 // IsResolutionRequired returns true if Resolve() must be called to resolve
 // the link address before the this route can be written to.
 func (r *Route) IsResolutionRequired() bool {
-	return r.ref.linkCache != nil && r.RemoteLinkAddress == ""
+	return r.ref.isValidForOutgoing() && r.ref.linkCache != nil && r.RemoteLinkAddress == ""
 }
 
 // WritePacket writes the packet through the given route.
 func (r *Route) WritePacket(gso *GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.TransportProtocolNumber, ttl uint8) *tcpip.Error {
+	if !r.ref.isValidForOutgoing() {
+		return tcpip.ErrInvalidEndpointState
+	}
+
 	err := r.ref.ep.WritePacket(r, gso, hdr, payload, protocol, ttl, r.loop)
 	if err != nil {
 		r.Stats().IP.OutgoingPacketErrors.Increment()
@@ -166,6 +170,10 @@ func (r *Route) WritePacket(gso *GSO, hdr buffer.Prependable, payload buffer.Vec
 // WriteHeaderIncludedPacket writes a packet already containing a network
 // header through the given route.
 func (r *Route) WriteHeaderIncludedPacket(payload buffer.VectorisedView) *tcpip.Error {
+	if !r.ref.isValidForOutgoing() {
+		return tcpip.ErrInvalidEndpointState
+	}
+
 	if err := r.ref.ep.WriteHeaderIncludedPacket(r, payload, r.loop); err != nil {
 		r.Stats().IP.OutgoingPacketErrors.Increment()
 		return err
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index d45e547ee..d69162ba1 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -895,7 +895,7 @@ func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, n
 		}
 	} else {
 		for _, route := range s.routeTable {
-			if (id != 0 && id != route.NIC) || (len(remoteAddr) != 0 && !route.Match(remoteAddr)) {
+			if (id != 0 && id != route.NIC) || (len(remoteAddr) != 0 && !isBroadcast && !route.Destination.Contains(remoteAddr)) {
 				continue
 			}
 			if nic, ok := s.nics[route.NIC]; ok {
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 1ab9c575b..4debd1eec 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -181,6 +181,10 @@ func (f *fakeNetworkProtocol) DefaultPrefixLen() int {
 	return fakeDefaultPrefixLen
 }
 
+func (f *fakeNetworkProtocol) PacketCount(intfAddr byte) int {
+	return f.packetCount[int(intfAddr)%len(f.packetCount)]
+}
+
 func (*fakeNetworkProtocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
 	return tcpip.Address(v[1:2]), tcpip.Address(v[0:1])
 }
@@ -289,16 +293,75 @@ func TestNetworkReceive(t *testing.T) {
 	}
 }
 
-func sendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, payload buffer.View) {
+func sendTo(s *stack.Stack, addr tcpip.Address, payload buffer.View) *tcpip.Error {
 	r, err := s.FindRoute(0, "", addr, fakeNetNumber, false /* multicastLoop */)
 	if err != nil {
-		t.Fatal("FindRoute failed:", err)
+		return err
 	}
 	defer r.Release()
+	return send(r, payload)
+}
 
+func send(r stack.Route, payload buffer.View) *tcpip.Error {
 	hdr := buffer.NewPrependable(int(r.MaxHeaderLength()))
-	if err := r.WritePacket(nil /* gso */, hdr, payload.ToVectorisedView(), fakeTransNumber, 123); err != nil {
-		t.Error("WritePacket failed:", err)
+	return r.WritePacket(nil /* gso */, hdr, payload.ToVectorisedView(), fakeTransNumber, 123)
+}
+
+func testSendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, linkEP *channel.Endpoint, payload buffer.View) {
+	t.Helper()
+	linkEP.Drain()
+	if err := sendTo(s, addr, payload); err != nil {
+		t.Error("sendTo failed:", err)
+	}
+	if got, want := linkEP.Drain(), 1; got != want {
+		t.Errorf("sendTo packet count: got = %d, want %d", got, want)
+	}
+}
+
+func testSend(t *testing.T, r stack.Route, linkEP *channel.Endpoint, payload buffer.View) {
+	t.Helper()
+	linkEP.Drain()
+	if err := send(r, payload); err != nil {
+		t.Error("send failed:", err)
+	}
+	if got, want := linkEP.Drain(), 1; got != want {
+		t.Errorf("send packet count: got = %d, want %d", got, want)
+	}
+}
+
+func testFailingSend(t *testing.T, r stack.Route, linkEP *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) {
+	t.Helper()
+	if gotErr := send(r, payload); gotErr != wantErr {
+		t.Errorf("send failed: got = %s, want = %s ", gotErr, wantErr)
+	}
+}
+
+func testFailingSendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, linkEP *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) {
+	t.Helper()
+	if gotErr := sendTo(s, addr, payload); gotErr != wantErr {
+		t.Errorf("sendto failed: got = %s, want = %s ", gotErr, wantErr)
+	}
+}
+
+func testRecv(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, linkEP *channel.Endpoint, buf buffer.View) {
+	t.Helper()
+	// testRecvInternal injects one packet, and we expect to receive it.
+	want := fakeNet.PacketCount(localAddrByte) + 1
+	testRecvInternal(t, fakeNet, localAddrByte, linkEP, buf, want)
+}
+
+func testFailingRecv(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, linkEP *channel.Endpoint, buf buffer.View) {
+	t.Helper()
+	// testRecvInternal injects one packet, and we do NOT expect to receive it.
+	want := fakeNet.PacketCount(localAddrByte)
+	testRecvInternal(t, fakeNet, localAddrByte, linkEP, buf, want)
+}
+
+func testRecvInternal(t *testing.T, fakeNet *fakeNetworkProtocol, localAddrByte byte, linkEP *channel.Endpoint, buf buffer.View, want int) {
+	t.Helper()
+	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
+	if got := fakeNet.PacketCount(localAddrByte); got != want {
+		t.Errorf("receive packet count: got = %d, want %d", got, want)
 	}
 }
 
@@ -312,17 +375,20 @@ func TestNetworkSend(t *testing.T) {
 		t.Fatal("NewNIC failed:", err)
 	}
 
-	s.SetRouteTable([]tcpip.Route{{"\x00", "\x00", "\x00", 1}})
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+	}
 
 	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
 		t.Fatal("AddAddress failed:", err)
 	}
 
 	// Make sure that the link-layer endpoint received the outbound packet.
-	sendTo(t, s, "\x03", nil)
-	if c := linkEP.Drain(); c != 1 {
-		t.Errorf("packetCount = %d, want %d", c, 1)
-	}
+	testSendTo(t, s, "\x03", linkEP, nil)
 }
 
 func TestNetworkSendMultiRoute(t *testing.T) {
@@ -360,24 +426,26 @@ func TestNetworkSendMultiRoute(t *testing.T) {
 	// Set a route table that sends all packets with odd destination
 	// addresses through the first NIC, and all even destination address
 	// through the second one.
-	s.SetRouteTable([]tcpip.Route{
-		{"\x01", "\x01", "\x00", 1},
-		{"\x00", "\x01", "\x00", 2},
-	})
+	{
+		subnet0, err := tcpip.NewSubnet("\x00", "\x01")
+		if err != nil {
+			t.Fatal(err)
+		}
+		subnet1, err := tcpip.NewSubnet("\x01", "\x01")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{
+			{Destination: subnet1, Gateway: "\x00", NIC: 1},
+			{Destination: subnet0, Gateway: "\x00", NIC: 2},
+		})
+	}
 
 	// Send a packet to an odd destination.
-	sendTo(t, s, "\x05", nil)
-
-	if c := linkEP1.Drain(); c != 1 {
-		t.Errorf("packetCount = %d, want %d", c, 1)
-	}
+	testSendTo(t, s, "\x05", linkEP1, nil)
 
 	// Send a packet to an even destination.
-	sendTo(t, s, "\x06", nil)
-
-	if c := linkEP2.Drain(); c != 1 {
-		t.Errorf("packetCount = %d, want %d", c, 1)
-	}
+	testSendTo(t, s, "\x06", linkEP2, nil)
 }
 
 func testRoute(t *testing.T, s *stack.Stack, nic tcpip.NICID, srcAddr, dstAddr, expectedSrcAddr tcpip.Address) {
@@ -439,10 +507,20 @@ func TestRoutes(t *testing.T) {
 	// Set a route table that sends all packets with odd destination
 	// addresses through the first NIC, and all even destination address
 	// through the second one.
-	s.SetRouteTable([]tcpip.Route{
-		{"\x01", "\x01", "\x00", 1},
-		{"\x00", "\x01", "\x00", 2},
-	})
+	{
+		subnet0, err := tcpip.NewSubnet("\x00", "\x01")
+		if err != nil {
+			t.Fatal(err)
+		}
+		subnet1, err := tcpip.NewSubnet("\x01", "\x01")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{
+			{Destination: subnet1, Gateway: "\x00", NIC: 1},
+			{Destination: subnet0, Gateway: "\x00", NIC: 2},
+		})
+	}
 
 	// Test routes to odd address.
 	testRoute(t, s, 0, "", "\x05", "\x01")
@@ -472,6 +550,10 @@ func TestRoutes(t *testing.T) {
 }
 
 func TestAddressRemoval(t *testing.T) {
+	const localAddrByte byte = 0x01
+	localAddr := tcpip.Address([]byte{localAddrByte})
+	remoteAddr := tcpip.Address("\x02")
+
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
 	id, linkEP := channel.New(10, defaultMTU, "")
@@ -479,99 +561,285 @@ func TestAddressRemoval(t *testing.T) {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
-	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
+	if err := s.AddAddress(1, fakeNetNumber, localAddr); err != nil {
 		t.Fatal("AddAddress failed:", err)
 	}
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+	}
 
 	fakeNet := s.NetworkProtocolInstance(fakeNetNumber).(*fakeNetworkProtocol)
 
 	buf := buffer.NewView(30)
 
-	// Write a packet, and check that it gets delivered.
-	fakeNet.packetCount[1] = 0
-	buf[0] = 1
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 1 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
-	}
+	// Send and receive packets, and verify they are received.
+	buf[0] = localAddrByte
+	testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+	testSendTo(t, s, remoteAddr, linkEP, nil)
 
-	// Remove the address, then check that packet doesn't get delivered
-	// anymore.
-	if err := s.RemoveAddress(1, "\x01"); err != nil {
+	// Remove the address, then check that send/receive doesn't work anymore.
+	if err := s.RemoveAddress(1, localAddr); err != nil {
 		t.Fatal("RemoveAddress failed:", err)
 	}
-
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 1 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
-	}
+	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+	testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
 
 	// Check that removing the same address fails.
-	if err := s.RemoveAddress(1, "\x01"); err != tcpip.ErrBadLocalAddress {
+	if err := s.RemoveAddress(1, localAddr); err != tcpip.ErrBadLocalAddress {
 		t.Fatalf("RemoveAddress returned unexpected error, got = %v, want = %s", err, tcpip.ErrBadLocalAddress)
 	}
 }
 
-func TestDelayedRemovalDueToRoute(t *testing.T) {
+func TestAddressRemovalWithRouteHeld(t *testing.T) {
+	const localAddrByte byte = 0x01
+	localAddr := tcpip.Address([]byte{localAddrByte})
+	remoteAddr := tcpip.Address("\x02")
+
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
 	id, linkEP := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, id); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
-
-	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
-		t.Fatal("AddAddress failed:", err)
-	}
-
-	s.SetRouteTable([]tcpip.Route{
-		{"\x00", "\x00", "\x00", 1},
-	})
-
 	fakeNet := s.NetworkProtocolInstance(fakeNetNumber).(*fakeNetworkProtocol)
-
 	buf := buffer.NewView(30)
 
-	// Write a packet, and check that it gets delivered.
-	fakeNet.packetCount[1] = 0
-	buf[0] = 1
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 1 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
+	if err := s.AddAddress(1, fakeNetNumber, localAddr); err != nil {
+		t.Fatal("AddAddress failed:", err)
+	}
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
 	}
 
-	// Get a route, check that packet is still deliverable.
-	r, err := s.FindRoute(0, "", "\x02", fakeNetNumber, false /* multicastLoop */)
+	r, err := s.FindRoute(0, "", remoteAddr, fakeNetNumber, false /* multicastLoop */)
 	if err != nil {
 		t.Fatal("FindRoute failed:", err)
 	}
 
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 2 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 2)
-	}
+	// Send and receive packets, and verify they are received.
+	buf[0] = localAddrByte
+	testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+	testSend(t, r, linkEP, nil)
+	testSendTo(t, s, remoteAddr, linkEP, nil)
 
-	// Remove the address, then check that packet is still deliverable
-	// because the route is keeping the address alive.
-	if err := s.RemoveAddress(1, "\x01"); err != nil {
+	// Remove the address, then check that send/receive doesn't work anymore.
+	if err := s.RemoveAddress(1, localAddr); err != nil {
 		t.Fatal("RemoveAddress failed:", err)
 	}
-
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 3 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 3)
-	}
+	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+	testFailingSend(t, r, linkEP, nil, tcpip.ErrInvalidEndpointState)
+	testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
 
 	// Check that removing the same address fails.
-	if err := s.RemoveAddress(1, "\x01"); err != tcpip.ErrBadLocalAddress {
+	if err := s.RemoveAddress(1, localAddr); err != tcpip.ErrBadLocalAddress {
 		t.Fatalf("RemoveAddress returned unexpected error, got = %v, want = %s", err, tcpip.ErrBadLocalAddress)
 	}
+}
 
-	// Release the route, then check that packet is not deliverable anymore.
-	r.Release()
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 3 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 3)
+func verifyAddress(t *testing.T, s *stack.Stack, nicid tcpip.NICID, addr tcpip.Address) {
+	t.Helper()
+	info, ok := s.NICInfo()[nicid]
+	if !ok {
+		t.Fatalf("NICInfo() failed to find nicid=%d", nicid)
+	}
+	if len(addr) == 0 {
+		// No address given, verify that there is no address assigned to the NIC.
+		for _, a := range info.ProtocolAddresses {
+			if a.Protocol == fakeNetNumber && a.AddressWithPrefix != (tcpip.AddressWithPrefix{}) {
+				t.Errorf("verify no-address: got = %s, want = %s", a.AddressWithPrefix, (tcpip.AddressWithPrefix{}))
+			}
+		}
+		return
+	}
+	// Address given, verify the address is assigned to the NIC and no other
+	// address is.
+	found := false
+	for _, a := range info.ProtocolAddresses {
+		if a.Protocol == fakeNetNumber {
+			if a.AddressWithPrefix.Address == addr {
+				found = true
+			} else {
+				t.Errorf("verify address: got = %s, want = %s", a.AddressWithPrefix.Address, addr)
+			}
+		}
+	}
+	if !found {
+		t.Errorf("verify address: couldn't find %s on the NIC", addr)
+	}
+}
+
+func TestEndpointExpiration(t *testing.T) {
+	const (
+		localAddrByte byte          = 0x01
+		remoteAddr    tcpip.Address = "\x03"
+		noAddr        tcpip.Address = ""
+		nicid         tcpip.NICID   = 1
+	)
+	localAddr := tcpip.Address([]byte{localAddrByte})
+
+	for _, promiscuous := range []bool{true, false} {
+		for _, spoofing := range []bool{true, false} {
+			t.Run(fmt.Sprintf("promiscuous=%t spoofing=%t", promiscuous, spoofing), func(t *testing.T) {
+				s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+
+				id, linkEP := channel.New(10, defaultMTU, "")
+				if err := s.CreateNIC(nicid, id); err != nil {
+					t.Fatal("CreateNIC failed:", err)
+				}
+
+				{
+					subnet, err := tcpip.NewSubnet("\x00", "\x00")
+					if err != nil {
+						t.Fatal(err)
+					}
+					s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+				}
+
+				fakeNet := s.NetworkProtocolInstance(fakeNetNumber).(*fakeNetworkProtocol)
+				buf := buffer.NewView(30)
+				buf[0] = localAddrByte
+
+				if promiscuous {
+					if err := s.SetPromiscuousMode(nicid, true); err != nil {
+						t.Fatal("SetPromiscuousMode failed:", err)
+					}
+				}
+
+				if spoofing {
+					if err := s.SetSpoofing(nicid, true); err != nil {
+						t.Fatal("SetSpoofing failed:", err)
+					}
+				}
+
+				// 1. No Address yet, send should only work for spoofing, receive for
+				// promiscuous mode.
+				//-----------------------
+				verifyAddress(t, s, nicid, noAddr)
+				if promiscuous {
+					testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				} else {
+					testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				}
+				if spoofing {
+					// FIXME(b/139841518):Spoofing doesn't work if there is no primary address.
+					// testSendTo(t, s, remoteAddr, linkEP, nil)
+				} else {
+					testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+				}
+
+				// 2. Add Address, everything should work.
+				//-----------------------
+				if err := s.AddAddress(nicid, fakeNetNumber, localAddr); err != nil {
+					t.Fatal("AddAddress failed:", err)
+				}
+				verifyAddress(t, s, nicid, localAddr)
+				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				testSendTo(t, s, remoteAddr, linkEP, nil)
+
+				// 3. Remove the address, send should only work for spoofing, receive
+				// for promiscuous mode.
+				//-----------------------
+				if err := s.RemoveAddress(nicid, localAddr); err != nil {
+					t.Fatal("RemoveAddress failed:", err)
+				}
+				verifyAddress(t, s, nicid, noAddr)
+				if promiscuous {
+					testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				} else {
+					testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				}
+				if spoofing {
+					// FIXME(b/139841518):Spoofing doesn't work if there is no primary address.
+					// testSendTo(t, s, remoteAddr, linkEP, nil)
+				} else {
+					testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+				}
+
+				// 4. Add Address back, everything should work again.
+				//-----------------------
+				if err := s.AddAddress(nicid, fakeNetNumber, localAddr); err != nil {
+					t.Fatal("AddAddress failed:", err)
+				}
+				verifyAddress(t, s, nicid, localAddr)
+				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				testSendTo(t, s, remoteAddr, linkEP, nil)
+
+				// 5. Take a reference to the endpoint by getting a route. Verify that
+				// we can still send/receive, including sending using the route.
+				//-----------------------
+				r, err := s.FindRoute(0, "", remoteAddr, fakeNetNumber, false /* multicastLoop */)
+				if err != nil {
+					t.Fatal("FindRoute failed:", err)
+				}
+				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				testSendTo(t, s, remoteAddr, linkEP, nil)
+				testSend(t, r, linkEP, nil)
+
+				// 6. Remove the address. Send should only work for spoofing, receive
+				// for promiscuous mode.
+				//-----------------------
+				if err := s.RemoveAddress(nicid, localAddr); err != nil {
+					t.Fatal("RemoveAddress failed:", err)
+				}
+				verifyAddress(t, s, nicid, noAddr)
+				if promiscuous {
+					testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				} else {
+					testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				}
+				if spoofing {
+					testSend(t, r, linkEP, nil)
+					testSendTo(t, s, remoteAddr, linkEP, nil)
+				} else {
+					testFailingSend(t, r, linkEP, nil, tcpip.ErrInvalidEndpointState)
+					testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+				}
+
+				// 7. Add Address back, everything should work again.
+				//-----------------------
+				if err := s.AddAddress(nicid, fakeNetNumber, localAddr); err != nil {
+					t.Fatal("AddAddress failed:", err)
+				}
+				verifyAddress(t, s, nicid, localAddr)
+				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				testSendTo(t, s, remoteAddr, linkEP, nil)
+				testSend(t, r, linkEP, nil)
+
+				// 8. Remove the route, sendTo/recv should still work.
+				//-----------------------
+				r.Release()
+				verifyAddress(t, s, nicid, localAddr)
+				testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				testSendTo(t, s, remoteAddr, linkEP, nil)
+
+				// 9. Remove the address. Send should only work for spoofing, receive
+				// for promiscuous mode.
+				//-----------------------
+				if err := s.RemoveAddress(nicid, localAddr); err != nil {
+					t.Fatal("RemoveAddress failed:", err)
+				}
+				verifyAddress(t, s, nicid, noAddr)
+				if promiscuous {
+					testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				} else {
+					testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+				}
+				if spoofing {
+					// FIXME(b/139841518):Spoofing doesn't work if there is no primary address.
+					// testSendTo(t, s, remoteAddr, linkEP, nil)
+				} else {
+					testFailingSendTo(t, s, remoteAddr, linkEP, nil, tcpip.ErrNoRoute)
+				}
+			})
+		}
 	}
 }
 
@@ -583,9 +851,13 @@ func TestPromiscuousMode(t *testing.T) {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
-	s.SetRouteTable([]tcpip.Route{
-		{"\x00", "\x00", "\x00", 1},
-	})
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+	}
 
 	fakeNet := s.NetworkProtocolInstance(fakeNetNumber).(*fakeNetworkProtocol)
 
@@ -593,22 +865,15 @@ func TestPromiscuousMode(t *testing.T) {
 
 	// Write a packet, and check that it doesn't get delivered as we don't
 	// have a matching endpoint.
-	fakeNet.packetCount[1] = 0
-	buf[0] = 1
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 0 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 0)
-	}
+	const localAddrByte byte = 0x01
+	buf[0] = localAddrByte
+	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
 
 	// Set promiscuous mode, then check that packet is delivered.
 	if err := s.SetPromiscuousMode(1, true); err != nil {
 		t.Fatal("SetPromiscuousMode failed:", err)
 	}
-
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 1 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
-	}
+	testRecv(t, fakeNet, localAddrByte, linkEP, buf)
 
 	// Check that we can't get a route as there is no local address.
 	_, err := s.FindRoute(0, "", "\x02", fakeNetNumber, false /* multicastLoop */)
@@ -621,54 +886,120 @@ func TestPromiscuousMode(t *testing.T) {
 	if err := s.SetPromiscuousMode(1, false); err != nil {
 		t.Fatal("SetPromiscuousMode failed:", err)
 	}
+	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
+}
 
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 1 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
+func TestSpoofingWithAddress(t *testing.T) {
+	localAddr := tcpip.Address("\x01")
+	nonExistentLocalAddr := tcpip.Address("\x02")
+	dstAddr := tcpip.Address("\x03")
+
+	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+
+	id, linkEP := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(1, id); err != nil {
+		t.Fatal("CreateNIC failed:", err)
+	}
+
+	if err := s.AddAddress(1, fakeNetNumber, localAddr); err != nil {
+		t.Fatal("AddAddress failed:", err)
+	}
+
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
 	}
+
+	// With address spoofing disabled, FindRoute does not permit an address
+	// that was not added to the NIC to be used as the source.
+	r, err := s.FindRoute(0, nonExistentLocalAddr, dstAddr, fakeNetNumber, false /* multicastLoop */)
+	if err == nil {
+		t.Errorf("FindRoute succeeded with route %+v when it should have failed", r)
+	}
+
+	// With address spoofing enabled, FindRoute permits any address to be used
+	// as the source.
+	if err := s.SetSpoofing(1, true); err != nil {
+		t.Fatal("SetSpoofing failed:", err)
+	}
+	r, err = s.FindRoute(0, nonExistentLocalAddr, dstAddr, fakeNetNumber, false /* multicastLoop */)
+	if err != nil {
+		t.Fatal("FindRoute failed:", err)
+	}
+	if r.LocalAddress != nonExistentLocalAddr {
+		t.Errorf("Route has wrong local address: got %v, wanted %v", r.LocalAddress, nonExistentLocalAddr)
+	}
+	if r.RemoteAddress != dstAddr {
+		t.Errorf("Route has wrong remote address: got %v, wanted %v", r.RemoteAddress, dstAddr)
+	}
+	// Sending a packet works.
+	testSendTo(t, s, dstAddr, linkEP, nil)
+	testSend(t, r, linkEP, nil)
+
+	// FindRoute should also work with a local address that exists on the NIC.
+	r, err = s.FindRoute(0, localAddr, dstAddr, fakeNetNumber, false /* multicastLoop */)
+	if err != nil {
+		t.Fatal("FindRoute failed:", err)
+	}
+	if r.LocalAddress != localAddr {
+		t.Errorf("Route has wrong local address: got %v, wanted %v", r.LocalAddress, nonExistentLocalAddr)
+	}
+	if r.RemoteAddress != dstAddr {
+		t.Errorf("Route has wrong remote address: got %v, wanted %v", r.RemoteAddress, dstAddr)
+	}
+	// Sending a packet using the route works.
+	testSend(t, r, linkEP, nil)
 }
 
-func TestAddressSpoofing(t *testing.T) {
-	srcAddr := tcpip.Address("\x01")
+func TestSpoofingNoAddress(t *testing.T) {
+	nonExistentLocalAddr := tcpip.Address("\x01")
 	dstAddr := tcpip.Address("\x02")
 
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 
-	id, _ := channel.New(10, defaultMTU, "")
+	id, linkEP := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, id); err != nil {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
-	if err := s.AddAddress(1, fakeNetNumber, dstAddr); err != nil {
-		t.Fatal("AddAddress failed:", err)
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
 	}
 
-	s.SetRouteTable([]tcpip.Route{
-		{"\x00", "\x00", "\x00", 1},
-	})
-
 	// With address spoofing disabled, FindRoute does not permit an address
 	// that was not added to the NIC to be used as the source.
-	r, err := s.FindRoute(0, srcAddr, dstAddr, fakeNetNumber, false /* multicastLoop */)
+	r, err := s.FindRoute(0, nonExistentLocalAddr, dstAddr, fakeNetNumber, false /* multicastLoop */)
 	if err == nil {
 		t.Errorf("FindRoute succeeded with route %+v when it should have failed", r)
 	}
+	// Sending a packet fails.
+	testFailingSendTo(t, s, dstAddr, linkEP, nil, tcpip.ErrNoRoute)
 
 	// With address spoofing enabled, FindRoute permits any address to be used
 	// as the source.
 	if err := s.SetSpoofing(1, true); err != nil {
 		t.Fatal("SetSpoofing failed:", err)
 	}
-	r, err = s.FindRoute(0, srcAddr, dstAddr, fakeNetNumber, false /* multicastLoop */)
+	r, err = s.FindRoute(0, nonExistentLocalAddr, dstAddr, fakeNetNumber, false /* multicastLoop */)
 	if err != nil {
 		t.Fatal("FindRoute failed:", err)
 	}
-	if r.LocalAddress != srcAddr {
-		t.Errorf("Route has wrong local address: got %v, wanted %v", r.LocalAddress, srcAddr)
+	if r.LocalAddress != nonExistentLocalAddr {
+		t.Errorf("Route has wrong local address: got %v, wanted %v", r.LocalAddress, nonExistentLocalAddr)
 	}
 	if r.RemoteAddress != dstAddr {
 		t.Errorf("Route has wrong remote address: got %v, wanted %v", r.RemoteAddress, dstAddr)
 	}
+	// Sending a packet works.
+	// FIXME(b/139841518):Spoofing doesn't work if there is no primary address.
+	// testSendTo(t, s, remoteAddr, linkEP, nil)
 }
 
 func TestBroadcastNeedsNoRoute(t *testing.T) {
@@ -806,16 +1137,20 @@ func TestSubnetAcceptsMatchingPacket(t *testing.T) {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
-	s.SetRouteTable([]tcpip.Route{
-		{"\x00", "\x00", "\x00", 1},
-	})
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+	}
 
 	fakeNet := s.NetworkProtocolInstance(fakeNetNumber).(*fakeNetworkProtocol)
 
 	buf := buffer.NewView(30)
 
-	buf[0] = 1
-	fakeNet.packetCount[1] = 0
+	const localAddrByte byte = 0x01
+	buf[0] = localAddrByte
 	subnet, err := tcpip.NewSubnet(tcpip.Address("\x00"), tcpip.AddressMask("\xF0"))
 	if err != nil {
 		t.Fatal("NewSubnet failed:", err)
@@ -824,9 +1159,52 @@ func TestSubnetAcceptsMatchingPacket(t *testing.T) {
 		t.Fatal("AddSubnet failed:", err)
 	}
 
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 1 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 1)
+	testRecv(t, fakeNet, localAddrByte, linkEP, buf)
+}
+
+// Set the subnet, then check that CheckLocalAddress returns the correct NIC.
+func TestCheckLocalAddressForSubnet(t *testing.T) {
+	const nicID tcpip.NICID = 1
+	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
+
+	id, _ := channel.New(10, defaultMTU, "")
+	if err := s.CreateNIC(nicID, id); err != nil {
+		t.Fatal("CreateNIC failed:", err)
+	}
+
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: nicID}})
+	}
+
+	subnet, err := tcpip.NewSubnet(tcpip.Address("\xa0"), tcpip.AddressMask("\xf0"))
+
+	if err != nil {
+		t.Fatal("NewSubnet failed:", err)
+	}
+	if err := s.AddSubnet(nicID, fakeNetNumber, subnet); err != nil {
+		t.Fatal("AddSubnet failed:", err)
+	}
+
+	// Loop over all subnet addresses and check them.
+	numOfAddresses := 1 << uint(8-subnet.Prefix())
+	if numOfAddresses < 1 || numOfAddresses > 255 {
+		t.Fatalf("got numOfAddresses = %d, want = [1 .. 255] (subnet=%s)", numOfAddresses, subnet)
+	}
+	addr := []byte(subnet.ID())
+	for i := 0; i < numOfAddresses; i++ {
+		if gotNicID := s.CheckLocalAddress(0, fakeNetNumber, tcpip.Address(addr)); gotNicID != nicID {
+			t.Errorf("got CheckLocalAddress(0, %d, %s) = %d, want = %d", fakeNetNumber, tcpip.Address(addr), gotNicID, nicID)
+		}
+		addr[0]++
+	}
+
+	// Trying the next address should fail since it is outside the subnet range.
+	if gotNicID := s.CheckLocalAddress(0, fakeNetNumber, tcpip.Address(addr)); gotNicID != 0 {
+		t.Errorf("got CheckLocalAddress(0, %d, %s) = %d, want = %d", fakeNetNumber, tcpip.Address(addr), gotNicID, 0)
 	}
 }
 
@@ -839,16 +1217,20 @@ func TestSubnetRejectsNonmatchingPacket(t *testing.T) {
 		t.Fatal("CreateNIC failed:", err)
 	}
 
-	s.SetRouteTable([]tcpip.Route{
-		{"\x00", "\x00", "\x00", 1},
-	})
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+	}
 
 	fakeNet := s.NetworkProtocolInstance(fakeNetNumber).(*fakeNetworkProtocol)
 
 	buf := buffer.NewView(30)
 
-	buf[0] = 1
-	fakeNet.packetCount[1] = 0
+	const localAddrByte byte = 0x01
+	buf[0] = localAddrByte
 	subnet, err := tcpip.NewSubnet(tcpip.Address("\x10"), tcpip.AddressMask("\xF0"))
 	if err != nil {
 		t.Fatal("NewSubnet failed:", err)
@@ -856,10 +1238,7 @@ func TestSubnetRejectsNonmatchingPacket(t *testing.T) {
 	if err := s.AddSubnet(1, fakeNetNumber, subnet); err != nil {
 		t.Fatal("AddSubnet failed:", err)
 	}
-	linkEP.Inject(fakeNetNumber, buf.ToVectorisedView())
-	if fakeNet.packetCount[1] != 0 {
-		t.Errorf("packetCount[1] = %d, want %d", fakeNet.packetCount[1], 0)
-	}
+	testFailingRecv(t, fakeNet, localAddrByte, linkEP, buf)
 }
 
 func TestNetworkOptions(t *testing.T) {
@@ -1213,15 +1592,19 @@ func TestNICStats(t *testing.T) {
 	s := stack.New([]string{"fakeNet"}, nil, stack.Options{})
 	id1, linkEP1 := channel.New(10, defaultMTU, "")
 	if err := s.CreateNIC(1, id1); err != nil {
-		t.Fatal("CreateNIC failed:", err)
+		t.Fatal("CreateNIC failed: ", err)
 	}
 	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
 		t.Fatal("AddAddress failed:", err)
 	}
 	// Route all packets for address \x01 to NIC 1.
-	s.SetRouteTable([]tcpip.Route{
-		{"\x01", "\xff", "\x00", 1},
-	})
+	{
+		subnet, err := tcpip.NewSubnet("\x01", "\xff")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+	}
 
 	// Send a packet to address 1.
 	buf := buffer.NewView(30)
@@ -1236,7 +1619,9 @@ func TestNICStats(t *testing.T) {
 
 	payload := buffer.NewView(10)
 	// Write a packet out via the address for NIC 1
-	sendTo(t, s, "\x01", payload)
+	if err := sendTo(s, "\x01", payload); err != nil {
+		t.Fatal("sendTo failed: ", err)
+	}
 	want := uint64(linkEP1.Drain())
 	if got := s.NICInfo()[1].Stats.Tx.Packets.Value(); got != want {
 		t.Errorf("got Tx.Packets.Value() = %d, linkEP1.Drain() = %d", got, want)
@@ -1270,9 +1655,13 @@ func TestNICForwarding(t *testing.T) {
 	}
 
 	// Route all packets to address 3 to NIC 2.
-	s.SetRouteTable([]tcpip.Route{
-		{"\x03", "\xff", "\x00", 2},
-	})
+	{
+		subnet, err := tcpip.NewSubnet("\x03", "\xff")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 2}})
+	}
 
 	// Send a packet to address 3.
 	buf := buffer.NewView(30)
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index eee3144cd..5335897f5 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -65,7 +65,7 @@ func (*fakeTransportEndpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.Contr
 	return buffer.View{}, tcpip.ControlMessages{}, nil
 }
 
-func (f *fakeTransportEndpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-chan struct{}, *tcpip.Error) {
+func (f *fakeTransportEndpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
 	if len(f.route.RemoteAddress) == 0 {
 		return 0, nil, tcpip.ErrNoRoute
 	}
@@ -79,10 +79,10 @@ func (f *fakeTransportEndpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions)
 		return 0, nil, err
 	}
 
-	return uintptr(len(v)), nil, nil
+	return int64(len(v)), nil, nil
 }
 
-func (f *fakeTransportEndpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) {
+func (f *fakeTransportEndpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
 	return 0, tcpip.ControlMessages{}, nil
 }
 
@@ -105,6 +105,11 @@ func (*fakeTransportEndpoint) GetSockOpt(opt interface{}) *tcpip.Error {
 	return tcpip.ErrInvalidEndpointState
 }
 
+// Disconnect implements tcpip.Endpoint.Disconnect.
+func (*fakeTransportEndpoint) Disconnect() *tcpip.Error {
+	return tcpip.ErrNotSupported
+}
+
 func (f *fakeTransportEndpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 	f.peerAddr = addr.Addr
 
@@ -279,7 +284,13 @@ func TestTransportReceive(t *testing.T) {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
-	s.SetRouteTable([]tcpip.Route{{"\x00", "\x00", "\x00", 1}})
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+	}
 
 	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
 		t.Fatalf("AddAddress failed: %v", err)
@@ -335,7 +346,13 @@ func TestTransportControlReceive(t *testing.T) {
 		t.Fatalf("CreateNIC failed: %v", err)
 	}
 
-	s.SetRouteTable([]tcpip.Route{{"\x00", "\x00", "\x00", 1}})
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+	}
 
 	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
 		t.Fatalf("AddAddress failed: %v", err)
@@ -401,7 +418,13 @@ func TestTransportSend(t *testing.T) {
 		t.Fatalf("AddAddress failed: %v", err)
 	}
 
-	s.SetRouteTable([]tcpip.Route{{"\x00", "\x00", "\x00", 1}})
+	{
+		subnet, err := tcpip.NewSubnet("\x00", "\x00")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: 1}})
+	}
 
 	// Create endpoint and bind it.
 	wq := waiter.Queue{}
@@ -492,10 +515,20 @@ func TestTransportForwarding(t *testing.T) {
 
 	// Route all packets to address 3 to NIC 2 and all packets to address
 	// 1 to NIC 1.
-	s.SetRouteTable([]tcpip.Route{
-		{"\x03", "\xff", "\x00", 2},
-		{"\x01", "\xff", "\x00", 1},
-	})
+	{
+		subnet0, err := tcpip.NewSubnet("\x03", "\xff")
+		if err != nil {
+			t.Fatal(err)
+		}
+		subnet1, err := tcpip.NewSubnet("\x01", "\xff")
+		if err != nil {
+			t.Fatal(err)
+		}
+		s.SetRouteTable([]tcpip.Route{
+			{Destination: subnet0, Gateway: "\x00", NIC: 2},
+			{Destination: subnet1, Gateway: "\x00", NIC: 1},
+		})
+	}
 
 	wq := waiter.Queue{}
 	ep, err := s.NewEndpoint(fakeTransNumber, fakeNetNumber, &wq)
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 119712d2f..8f9b86cce 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -31,6 +31,7 @@ package tcpip
 import (
 	"errors"
 	"fmt"
+	"math/bits"
 	"reflect"
 	"strconv"
 	"strings"
@@ -145,8 +146,17 @@ type Address string
 type AddressMask string
 
 // String implements Stringer.
-func (a AddressMask) String() string {
-	return Address(a).String()
+func (m AddressMask) String() string {
+	return Address(m).String()
+}
+
+// Prefix returns the number of bits before the first host bit.
+func (m AddressMask) Prefix() int {
+	p := 0
+	for _, b := range []byte(m) {
+		p += bits.LeadingZeros8(^b)
+	}
+	return p
 }
 
 // Subnet is a subnet defined by its address and mask.
@@ -168,6 +178,11 @@ func NewSubnet(a Address, m AddressMask) (Subnet, error) {
 	return Subnet{a, m}, nil
 }
 
+// String implements Stringer.
+func (s Subnet) String() string {
+	return fmt.Sprintf("%s/%d", s.ID(), s.Prefix())
+}
+
 // Contains returns true iff the address is of the same length and matches the
 // subnet address and mask.
 func (s *Subnet) Contains(a Address) bool {
@@ -190,28 +205,13 @@ func (s *Subnet) ID() Address {
 // Bits returns the number of ones (network bits) and zeros (host bits) in the
 // subnet mask.
 func (s *Subnet) Bits() (ones int, zeros int) {
-	for _, b := range []byte(s.mask) {
-		for i := uint(0); i < 8; i++ {
-			if b&(1<<i) == 0 {
-				zeros++
-			} else {
-				ones++
-			}
-		}
-	}
-	return
+	ones = s.mask.Prefix()
+	return ones, len(s.mask)*8 - ones
 }
 
 // Prefix returns the number of bits before the first host bit.
 func (s *Subnet) Prefix() int {
-	for i, b := range []byte(s.mask) {
-		for j := 7; j >= 0; j-- {
-			if b&(1<<uint(j)) == 0 {
-				return i*8 + 7 - j
-			}
-		}
-	}
-	return len(s.mask) * 8
+	return s.mask.Prefix()
 }
 
 // Mask returns the subnet mask.
@@ -329,12 +329,12 @@ type Endpoint interface {
 	// ErrNoLinkAddress and a notification channel is returned for the caller to
 	// block. Channel is closed once address resolution is complete (success or
 	// not). The channel is only non-nil in this case.
-	Write(Payload, WriteOptions) (uintptr, <-chan struct{}, *Error)
+	Write(Payload, WriteOptions) (int64, <-chan struct{}, *Error)
 
 	// Peek reads data without consuming it from the endpoint.
 	//
 	// This method does not block if there is no data pending.
-	Peek([][]byte) (uintptr, ControlMessages, *Error)
+	Peek([][]byte) (int64, ControlMessages, *Error)
 
 	// Connect connects the endpoint to its peer. Specifying a NIC is
 	// optional.
@@ -353,6 +353,9 @@ type Endpoint interface {
 	// ErrAddressFamilyNotSupported must be returned.
 	Connect(address FullAddress) *Error
 
+	// Disconnect disconnects the endpoint from its peer.
+	Disconnect() *Error
+
 	// Shutdown closes the read and/or write end of the endpoint connection
 	// to its peer.
 	Shutdown(flags ShutdownFlags) *Error
@@ -567,13 +570,8 @@ type BroadcastOption int
 // gateway) sets of packets should be routed. A row is considered viable if the
 // masked target address matches the destination address in the row.
 type Route struct {
-	// Destination is the address that must be matched against the masked
-	// target address to check if this row is viable.
-	Destination Address
-
-	// Mask specifies which bits of the Destination and the target address
-	// must match for this row to be viable.
-	Mask AddressMask
+	// Destination must contain the target address for this row to be viable.
+	Destination Subnet
 
 	// Gateway is the gateway to be used if this row is viable.
 	Gateway Address
@@ -582,25 +580,15 @@ type Route struct {
 	NIC NICID
 }
 
-// Match determines if r is viable for the given destination address.
-func (r *Route) Match(addr Address) bool {
-	if len(addr) != len(r.Destination) {
-		return false
-	}
-
-	// Using header.Ipv4Broadcast would introduce an import cycle, so
-	// we'll use a literal instead.
-	if addr == "\xff\xff\xff\xff" {
-		return true
-	}
-
-	for i := 0; i < len(r.Destination); i++ {
-		if (addr[i] & r.Mask[i]) != r.Destination[i] {
-			return false
-		}
+// String implements the fmt.Stringer interface.
+func (r Route) String() string {
+	var out strings.Builder
+	fmt.Fprintf(&out, "%s", r.Destination)
+	if len(r.Gateway) > 0 {
+		fmt.Fprintf(&out, " via %s", r.Gateway)
 	}
-
-	return true
+	fmt.Fprintf(&out, " nic %d", r.NIC)
+	return out.String()
 }
 
 // LinkEndpointID represents a data link layer endpoint.
@@ -1072,6 +1060,11 @@ type AddressWithPrefix struct {
 	PrefixLen int
 }
 
+// String implements the fmt.Stringer interface.
+func (a AddressWithPrefix) String() string {
+	return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen)
+}
+
 // ProtocolAddress is an address and the network protocol it is associated
 // with.
 type ProtocolAddress struct {
diff --git a/pkg/tcpip/tcpip_test.go b/pkg/tcpip/tcpip_test.go
index ebb1c1b56..fb3a0a5ee 100644
--- a/pkg/tcpip/tcpip_test.go
+++ b/pkg/tcpip/tcpip_test.go
@@ -60,12 +60,12 @@ func TestSubnetBits(t *testing.T) {
 	}{
 		{"\x00", 0, 8},
 		{"\x00\x00", 0, 16},
-		{"\x36", 4, 4},
-		{"\x5c", 4, 4},
-		{"\x5c\x5c", 8, 8},
-		{"\x5c\x36", 8, 8},
-		{"\x36\x5c", 8, 8},
-		{"\x36\x36", 8, 8},
+		{"\x36", 0, 8},
+		{"\x5c", 0, 8},
+		{"\x5c\x5c", 0, 16},
+		{"\x5c\x36", 0, 16},
+		{"\x36\x5c", 0, 16},
+		{"\x36\x36", 0, 16},
 		{"\xff", 8, 0},
 		{"\xff\xff", 16, 0},
 	}
@@ -122,26 +122,6 @@ func TestSubnetCreation(t *testing.T) {
 	}
 }
 
-func TestRouteMatch(t *testing.T) {
-	tests := []struct {
-		d    Address
-		m    AddressMask
-		a    Address
-		want bool
-	}{
-		{"\xc2\x80", "\xff\xf0", "\xc2\x80", true},
-		{"\xc2\x80", "\xff\xf0", "\xc2\x00", false},
-		{"\xc2\x00", "\xff\xf0", "\xc2\x00", true},
-		{"\xc2\x00", "\xff\xf0", "\xc2\x80", false},
-	}
-	for _, tt := range tests {
-		r := Route{Destination: tt.d, Mask: tt.m}
-		if got := r.Match(tt.a); got != tt.want {
-			t.Errorf("Route(%v).Match(%v) = %v, want %v", r, tt.a, got, tt.want)
-		}
-	}
-}
-
 func TestAddressString(t *testing.T) {
 	for _, want := range []string{
 		// Taken from stdlib.
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index 9a4306011..451d3880e 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -136,34 +136,6 @@ func (e *endpoint) IPTables() (iptables.IPTables, error) {
 	return e.stack.IPTables(), nil
 }
 
-// Resume implements tcpip.ResumableEndpoint.Resume.
-func (e *endpoint) Resume(s *stack.Stack) {
-	e.stack = s
-
-	if e.state != stateBound && e.state != stateConnected {
-		return
-	}
-
-	var err *tcpip.Error
-	if e.state == stateConnected {
-		e.route, err = e.stack.FindRoute(e.regNICID, e.bindAddr, e.id.RemoteAddress, e.netProto, false /* multicastLoop */)
-		if err != nil {
-			panic(*err)
-		}
-
-		e.id.LocalAddress = e.route.LocalAddress
-	} else if len(e.id.LocalAddress) != 0 { // stateBound
-		if e.stack.CheckLocalAddress(e.regNICID, e.netProto, e.id.LocalAddress) == 0 {
-			panic(tcpip.ErrBadLocalAddress)
-		}
-	}
-
-	e.id, err = e.registerWithStack(e.regNICID, []tcpip.NetworkProtocolNumber{e.netProto}, e.id)
-	if err != nil {
-		panic(*err)
-	}
-}
-
 // Read reads data from the endpoint. This method does not block if
 // there is no data pending.
 func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
@@ -233,7 +205,7 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpi
 
 // Write writes data to the endpoint's peer. This method does not block
 // if the data cannot be written.
-func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-chan struct{}, *tcpip.Error) {
+func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
 	// MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.)
 	if opts.More {
 		return 0, nil, tcpip.ErrInvalidOptionValue
@@ -335,11 +307,11 @@ func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-c
 		return 0, nil, err
 	}
 
-	return uintptr(len(v)), nil, nil
+	return int64(len(v)), nil, nil
 }
 
 // Peek only returns data from a single datagram, so do nothing here.
-func (e *endpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) {
+func (e *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
 	return 0, tcpip.ControlMessages{}, nil
 }
 
@@ -456,16 +428,16 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (t
 	return netProto, nil
 }
 
+// Disconnect implements tcpip.Endpoint.Disconnect.
+func (*endpoint) Disconnect() *tcpip.Error {
+	return tcpip.ErrNotSupported
+}
+
 // Connect connects the endpoint to its peer. Specifying a NIC is optional.
 func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 	e.mu.Lock()
 	defer e.mu.Unlock()
 
-	if addr.Addr == "" {
-		// AF_UNSPEC isn't supported.
-		return tcpip.ErrAddressFamilyNotSupported
-	}
-
 	nicid := addr.NIC
 	localPort := uint16(0)
 	switch e.state {
diff --git a/pkg/tcpip/transport/icmp/endpoint_state.go b/pkg/tcpip/transport/icmp/endpoint_state.go
index 43551d642..c587b96b6 100644
--- a/pkg/tcpip/transport/icmp/endpoint_state.go
+++ b/pkg/tcpip/transport/icmp/endpoint_state.go
@@ -15,6 +15,7 @@
 package icmp
 
 import (
+	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
@@ -64,3 +65,31 @@ func (e *endpoint) loadRcvBufSizeMax(max int) {
 func (e *endpoint) afterLoad() {
 	stack.StackFromEnv.RegisterRestoredEndpoint(e)
 }
+
+// Resume implements tcpip.ResumableEndpoint.Resume.
+func (e *endpoint) Resume(s *stack.Stack) {
+	e.stack = s
+
+	if e.state != stateBound && e.state != stateConnected {
+		return
+	}
+
+	var err *tcpip.Error
+	if e.state == stateConnected {
+		e.route, err = e.stack.FindRoute(e.regNICID, e.bindAddr, e.id.RemoteAddress, e.netProto, false /* multicastLoop */)
+		if err != nil {
+			panic(err)
+		}
+
+		e.id.LocalAddress = e.route.LocalAddress
+	} else if len(e.id.LocalAddress) != 0 { // stateBound
+		if e.stack.CheckLocalAddress(e.regNICID, e.netProto, e.id.LocalAddress) == 0 {
+			panic(tcpip.ErrBadLocalAddress)
+		}
+	}
+
+	e.id, err = e.registerWithStack(e.regNICID, []tcpip.NetworkProtocolNumber{e.netProto}, e.id)
+	if err != nil {
+		panic(err)
+	}
+}
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index eab3dcbd2..13e17e2a6 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -174,31 +174,6 @@ func (ep *endpoint) IPTables() (iptables.IPTables, error) {
 	return ep.stack.IPTables(), nil
 }
 
-// Resume implements tcpip.ResumableEndpoint.Resume.
-func (ep *endpoint) Resume(s *stack.Stack) {
-	ep.stack = s
-
-	// If the endpoint is connected, re-connect.
-	if ep.connected {
-		var err *tcpip.Error
-		ep.route, err = ep.stack.FindRoute(ep.registeredNIC, ep.boundAddr, ep.route.RemoteAddress, ep.netProto, false)
-		if err != nil {
-			panic(*err)
-		}
-	}
-
-	// If the endpoint is bound, re-bind.
-	if ep.bound {
-		if ep.stack.CheckLocalAddress(ep.registeredNIC, ep.netProto, ep.boundAddr) == 0 {
-			panic(tcpip.ErrBadLocalAddress)
-		}
-	}
-
-	if err := ep.stack.RegisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep); err != nil {
-		panic(*err)
-	}
-}
-
 // Read implements tcpip.Endpoint.Read.
 func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
 	if !ep.associated {
@@ -232,7 +207,7 @@ func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMes
 }
 
 // Write implements tcpip.Endpoint.Write.
-func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-chan struct{}, *tcpip.Error) {
+func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
 	// MSG_MORE is unimplemented. This also means that MSG_EOR is a no-op.
 	if opts.More {
 		return 0, nil, tcpip.ErrInvalidOptionValue
@@ -336,7 +311,7 @@ func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintp
 
 // finishWrite writes the payload to a route. It resolves the route if
 // necessary. It's really just a helper to make defer unnecessary in Write.
-func (ep *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (uintptr, <-chan struct{}, *tcpip.Error) {
+func (ep *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64, <-chan struct{}, *tcpip.Error) {
 	// We may need to resolve the route (match a link layer address to the
 	// network address). If that requires blocking (e.g. to use ARP),
 	// return a channel on which the caller can wait.
@@ -366,24 +341,24 @@ func (ep *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (uintpt
 		return 0, nil, tcpip.ErrUnknownProtocol
 	}
 
-	return uintptr(len(payloadBytes)), nil, nil
+	return int64(len(payloadBytes)), nil, nil
 }
 
 // Peek implements tcpip.Endpoint.Peek.
-func (ep *endpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) {
+func (ep *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
 	return 0, tcpip.ControlMessages{}, nil
 }
 
+// Disconnect implements tcpip.Endpoint.Disconnect.
+func (*endpoint) Disconnect() *tcpip.Error {
+	return tcpip.ErrNotSupported
+}
+
 // Connect implements tcpip.Endpoint.Connect.
 func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 	ep.mu.Lock()
 	defer ep.mu.Unlock()
 
-	if addr.Addr == "" {
-		// AF_UNSPEC isn't supported.
-		return tcpip.ErrAddressFamilyNotSupported
-	}
-
 	if ep.closed {
 		return tcpip.ErrInvalidEndpointState
 	}
diff --git a/pkg/tcpip/transport/raw/endpoint_state.go b/pkg/tcpip/transport/raw/endpoint_state.go
index 44abddb2b..168953dec 100644
--- a/pkg/tcpip/transport/raw/endpoint_state.go
+++ b/pkg/tcpip/transport/raw/endpoint_state.go
@@ -15,6 +15,7 @@
 package raw
 
 import (
+	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
@@ -64,3 +65,28 @@ func (ep *endpoint) loadRcvBufSizeMax(max int) {
 func (ep *endpoint) afterLoad() {
 	stack.StackFromEnv.RegisterRestoredEndpoint(ep)
 }
+
+// Resume implements tcpip.ResumableEndpoint.Resume.
+func (ep *endpoint) Resume(s *stack.Stack) {
+	ep.stack = s
+
+	// If the endpoint is connected, re-connect.
+	if ep.connected {
+		var err *tcpip.Error
+		ep.route, err = ep.stack.FindRoute(ep.registeredNIC, ep.boundAddr, ep.route.RemoteAddress, ep.netProto, false)
+		if err != nil {
+			panic(err)
+		}
+	}
+
+	// If the endpoint is bound, re-bind.
+	if ep.bound {
+		if ep.stack.CheckLocalAddress(ep.registeredNIC, ep.netProto, ep.boundAddr) == 0 {
+			panic(tcpip.ErrBadLocalAddress)
+		}
+	}
+
+	if err := ep.stack.RegisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep); err != nil {
+		panic(err)
+	}
+}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index e67169111..ac927569a 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -720,107 +720,6 @@ func (e *endpoint) IPTables() (iptables.IPTables, error) {
 	return e.stack.IPTables(), nil
 }
 
-// Resume implements tcpip.ResumableEndpoint.Resume.
-func (e *endpoint) Resume(s *stack.Stack) {
-	e.stack = s
-	e.segmentQueue.setLimit(MaxUnprocessedSegments)
-	e.workMu.Init()
-
-	state := e.state
-	switch state {
-	case StateInitial, StateBound, StateListen, StateConnecting, StateEstablished:
-		var ss SendBufferSizeOption
-		if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
-			if e.sndBufSize < ss.Min || e.sndBufSize > ss.Max {
-				panic(fmt.Sprintf("endpoint.sndBufSize %d is outside the min and max allowed [%d, %d]", e.sndBufSize, ss.Min, ss.Max))
-			}
-			if e.rcvBufSize < ss.Min || e.rcvBufSize > ss.Max {
-				panic(fmt.Sprintf("endpoint.rcvBufSize %d is outside the min and max allowed [%d, %d]", e.rcvBufSize, ss.Min, ss.Max))
-			}
-		}
-	}
-
-	bind := func() {
-		e.state = StateInitial
-		if len(e.bindAddress) == 0 {
-			e.bindAddress = e.id.LocalAddress
-		}
-		if err := e.Bind(tcpip.FullAddress{Addr: e.bindAddress, Port: e.id.LocalPort}); err != nil {
-			panic("endpoint binding failed: " + err.String())
-		}
-	}
-
-	switch state {
-	case StateEstablished, StateFinWait1, StateFinWait2, StateTimeWait, StateCloseWait, StateLastAck, StateClosing:
-		bind()
-		if len(e.connectingAddress) == 0 {
-			e.connectingAddress = e.id.RemoteAddress
-			// This endpoint is accepted by netstack but not yet by
-			// the app. If the endpoint is IPv6 but the remote
-			// address is IPv4, we need to connect as IPv6 so that
-			// dual-stack mode can be properly activated.
-			if e.netProto == header.IPv6ProtocolNumber && len(e.id.RemoteAddress) != header.IPv6AddressSize {
-				e.connectingAddress = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff" + e.id.RemoteAddress
-			}
-		}
-		// Reset the scoreboard to reinitialize the sack information as
-		// we do not restore SACK information.
-		e.scoreboard.Reset()
-		if err := e.connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.id.RemotePort}, false, e.workerRunning); err != tcpip.ErrConnectStarted {
-			panic("endpoint connecting failed: " + err.String())
-		}
-		connectedLoading.Done()
-	case StateListen:
-		tcpip.AsyncLoading.Add(1)
-		go func() {
-			connectedLoading.Wait()
-			bind()
-			backlog := cap(e.acceptedChan)
-			if err := e.Listen(backlog); err != nil {
-				panic("endpoint listening failed: " + err.String())
-			}
-			listenLoading.Done()
-			tcpip.AsyncLoading.Done()
-		}()
-	case StateConnecting, StateSynSent, StateSynRecv:
-		tcpip.AsyncLoading.Add(1)
-		go func() {
-			connectedLoading.Wait()
-			listenLoading.Wait()
-			bind()
-			if err := e.Connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.id.RemotePort}); err != tcpip.ErrConnectStarted {
-				panic("endpoint connecting failed: " + err.String())
-			}
-			connectingLoading.Done()
-			tcpip.AsyncLoading.Done()
-		}()
-	case StateBound:
-		tcpip.AsyncLoading.Add(1)
-		go func() {
-			connectedLoading.Wait()
-			listenLoading.Wait()
-			connectingLoading.Wait()
-			bind()
-			tcpip.AsyncLoading.Done()
-		}()
-	case StateClose:
-		if e.isPortReserved {
-			tcpip.AsyncLoading.Add(1)
-			go func() {
-				connectedLoading.Wait()
-				listenLoading.Wait()
-				connectingLoading.Wait()
-				bind()
-				e.state = StateClose
-				tcpip.AsyncLoading.Done()
-			}()
-		}
-		fallthrough
-	case StateError:
-		tcpip.DeleteDanglingEndpoint(e)
-	}
-}
-
 // Read reads data from the endpoint.
 func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
 	e.mu.RLock()
@@ -878,60 +777,95 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) {
 	return v, nil
 }
 
-// Write writes data to the endpoint's peer.
-func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-chan struct{}, *tcpip.Error) {
-	// Linux completely ignores any address passed to sendto(2) for TCP sockets
-	// (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More
-	// and opts.EndOfRecord are also ignored.
-
-	e.mu.RLock()
-	defer e.mu.RUnlock()
-
+// isEndpointWritableLocked checks if a given endpoint is writable
+// and also returns the number of bytes that can be written at this
+// moment. If the endpoint is not writable then it returns an error
+// indicating the reason why it's not writable.
+// Caller must hold e.mu and e.sndBufMu
+func (e *endpoint) isEndpointWritableLocked() (int, *tcpip.Error) {
 	// The endpoint cannot be written to if it's not connected.
 	if !e.state.connected() {
 		switch e.state {
 		case StateError:
-			return 0, nil, e.hardError
+			return 0, e.hardError
 		default:
-			return 0, nil, tcpip.ErrClosedForSend
+			return 0, tcpip.ErrClosedForSend
 		}
 	}
 
-	// Nothing to do if the buffer is empty.
-	if p.Size() == 0 {
-		return 0, nil, nil
-	}
-
-	e.sndBufMu.Lock()
-
 	// Check if the connection has already been closed for sends.
 	if e.sndClosed {
-		e.sndBufMu.Unlock()
-		return 0, nil, tcpip.ErrClosedForSend
+		return 0, tcpip.ErrClosedForSend
 	}
 
-	// Check against the limit.
 	avail := e.sndBufSize - e.sndBufUsed
 	if avail <= 0 {
+		return 0, tcpip.ErrWouldBlock
+	}
+	return avail, nil
+}
+
+// Write writes data to the endpoint's peer.
+func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
+	// Linux completely ignores any address passed to sendto(2) for TCP sockets
+	// (without the MSG_FASTOPEN flag). Corking is unimplemented, so opts.More
+	// and opts.EndOfRecord are also ignored.
+
+	e.mu.RLock()
+	e.sndBufMu.Lock()
+
+	avail, err := e.isEndpointWritableLocked()
+	if err != nil {
 		e.sndBufMu.Unlock()
-		return 0, nil, tcpip.ErrWouldBlock
+		e.mu.RUnlock()
+		return 0, nil, err
 	}
 
+	e.sndBufMu.Unlock()
+	e.mu.RUnlock()
+
+	// Nothing to do if the buffer is empty.
+	if p.Size() == 0 {
+		return 0, nil, nil
+	}
+
+	// Copy in memory without holding sndBufMu so that worker goroutine can
+	// make progress independent of this operation.
 	v, perr := p.Get(avail)
 	if perr != nil {
-		e.sndBufMu.Unlock()
 		return 0, nil, perr
 	}
 
-	l := len(v)
-	s := newSegmentFromView(&e.route, e.id, v)
+	e.mu.RLock()
+	e.sndBufMu.Lock()
+
+	// Because we released the lock before copying, check state again
+	// to make sure the endpoint is still in a valid state for a
+	// write.
+	avail, err = e.isEndpointWritableLocked()
+	if err != nil {
+		e.sndBufMu.Unlock()
+		e.mu.RUnlock()
+		return 0, nil, err
+	}
+
+	// Discard any excess data copied in due to avail being reduced due to a
+	// simultaneous write call to the socket.
+	if avail < len(v) {
+		v = v[:avail]
+	}
 
 	// Add data to the send queue.
+	l := len(v)
+	s := newSegmentFromView(&e.route, e.id, v)
 	e.sndBufUsed += l
 	e.sndBufInQueue += seqnum.Size(l)
 	e.sndQueue.PushBack(s)
 
 	e.sndBufMu.Unlock()
+	// Release the endpoint lock to prevent deadlocks due to lock
+	// order inversion when acquiring workMu.
+	e.mu.RUnlock()
 
 	if e.workMu.TryLock() {
 		// Do the work inline.
@@ -941,13 +875,13 @@ func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-c
 		// Let the protocol goroutine do the work.
 		e.sndWaker.Assert()
 	}
-	return uintptr(l), nil, nil
+	return int64(l), nil, nil
 }
 
 // Peek reads data without consuming it from the endpoint.
 //
 // This method does not block if there is no data pending.
-func (e *endpoint) Peek(vec [][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) {
+func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
 	e.mu.RLock()
 	defer e.mu.RUnlock()
 
@@ -973,8 +907,7 @@ func (e *endpoint) Peek(vec [][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Er
 	// Make a copy of vec so we can modify the slide headers.
 	vec = append([][]byte(nil), vec...)
 
-	var num uintptr
-
+	var num int64
 	for s := e.rcvList.Front(); s != nil; s = s.Next() {
 		views := s.data.Views()
 
@@ -993,7 +926,7 @@ func (e *endpoint) Peek(vec [][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Er
 				n := copy(vec[0], v)
 				v = v[n:]
 				vec[0] = vec[0][n:]
-				num += uintptr(n)
+				num += int64(n)
 			}
 		}
 	}
@@ -1415,7 +1348,7 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocol
 
 		netProto = header.IPv4ProtocolNumber
 		addr.Addr = addr.Addr[header.IPv6AddressSize-header.IPv4AddressSize:]
-		if addr.Addr == "\x00\x00\x00\x00" {
+		if addr.Addr == header.IPv4Any {
 			addr.Addr = ""
 		}
 	}
@@ -1429,13 +1362,13 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress) (tcpip.NetworkProtocol
 	return netProto, nil
 }
 
+// Disconnect implements tcpip.Endpoint.Disconnect.
+func (*endpoint) Disconnect() *tcpip.Error {
+	return tcpip.ErrNotSupported
+}
+
 // Connect connects the endpoint to its peer.
 func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
-	if addr.Addr == "" && addr.Port == 0 {
-		// AF_UNSPEC isn't supported.
-		return tcpip.ErrAddressFamilyNotSupported
-	}
-
 	return e.connect(addr, true, true)
 }
 
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index ef88dc618..831389ec7 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -20,6 +20,7 @@ import (
 	"time"
 
 	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
 
@@ -167,6 +168,107 @@ func (e *endpoint) afterLoad() {
 	stack.StackFromEnv.RegisterRestoredEndpoint(e)
 }
 
+// Resume implements tcpip.ResumableEndpoint.Resume.
+func (e *endpoint) Resume(s *stack.Stack) {
+	e.stack = s
+	e.segmentQueue.setLimit(MaxUnprocessedSegments)
+	e.workMu.Init()
+
+	state := e.state
+	switch state {
+	case StateInitial, StateBound, StateListen, StateConnecting, StateEstablished:
+		var ss SendBufferSizeOption
+		if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
+			if e.sndBufSize < ss.Min || e.sndBufSize > ss.Max {
+				panic(fmt.Sprintf("endpoint.sndBufSize %d is outside the min and max allowed [%d, %d]", e.sndBufSize, ss.Min, ss.Max))
+			}
+			if e.rcvBufSize < ss.Min || e.rcvBufSize > ss.Max {
+				panic(fmt.Sprintf("endpoint.rcvBufSize %d is outside the min and max allowed [%d, %d]", e.rcvBufSize, ss.Min, ss.Max))
+			}
+		}
+	}
+
+	bind := func() {
+		e.state = StateInitial
+		if len(e.bindAddress) == 0 {
+			e.bindAddress = e.id.LocalAddress
+		}
+		if err := e.Bind(tcpip.FullAddress{Addr: e.bindAddress, Port: e.id.LocalPort}); err != nil {
+			panic("endpoint binding failed: " + err.String())
+		}
+	}
+
+	switch state {
+	case StateEstablished, StateFinWait1, StateFinWait2, StateTimeWait, StateCloseWait, StateLastAck, StateClosing:
+		bind()
+		if len(e.connectingAddress) == 0 {
+			e.connectingAddress = e.id.RemoteAddress
+			// This endpoint is accepted by netstack but not yet by
+			// the app. If the endpoint is IPv6 but the remote
+			// address is IPv4, we need to connect as IPv6 so that
+			// dual-stack mode can be properly activated.
+			if e.netProto == header.IPv6ProtocolNumber && len(e.id.RemoteAddress) != header.IPv6AddressSize {
+				e.connectingAddress = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff" + e.id.RemoteAddress
+			}
+		}
+		// Reset the scoreboard to reinitialize the sack information as
+		// we do not restore SACK information.
+		e.scoreboard.Reset()
+		if err := e.connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.id.RemotePort}, false, e.workerRunning); err != tcpip.ErrConnectStarted {
+			panic("endpoint connecting failed: " + err.String())
+		}
+		connectedLoading.Done()
+	case StateListen:
+		tcpip.AsyncLoading.Add(1)
+		go func() {
+			connectedLoading.Wait()
+			bind()
+			backlog := cap(e.acceptedChan)
+			if err := e.Listen(backlog); err != nil {
+				panic("endpoint listening failed: " + err.String())
+			}
+			listenLoading.Done()
+			tcpip.AsyncLoading.Done()
+		}()
+	case StateConnecting, StateSynSent, StateSynRecv:
+		tcpip.AsyncLoading.Add(1)
+		go func() {
+			connectedLoading.Wait()
+			listenLoading.Wait()
+			bind()
+			if err := e.Connect(tcpip.FullAddress{NIC: e.boundNICID, Addr: e.connectingAddress, Port: e.id.RemotePort}); err != tcpip.ErrConnectStarted {
+				panic("endpoint connecting failed: " + err.String())
+			}
+			connectingLoading.Done()
+			tcpip.AsyncLoading.Done()
+		}()
+	case StateBound:
+		tcpip.AsyncLoading.Add(1)
+		go func() {
+			connectedLoading.Wait()
+			listenLoading.Wait()
+			connectingLoading.Wait()
+			bind()
+			tcpip.AsyncLoading.Done()
+		}()
+	case StateClose:
+		if e.isPortReserved {
+			tcpip.AsyncLoading.Add(1)
+			go func() {
+				connectedLoading.Wait()
+				listenLoading.Wait()
+				connectingLoading.Wait()
+				bind()
+				e.state = StateClose
+				tcpip.AsyncLoading.Done()
+			}()
+		}
+		fallthrough
+	case StateError:
+		tcpip.DeleteDanglingEndpoint(e)
+	}
+}
+
 // saveLastError is invoked by stateify.
 func (e *endpoint) saveLastError() string {
 	if e.lastError == nil {
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 915a98047..f79b8ec5f 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -2874,15 +2874,11 @@ func makeStack() (*stack.Stack, *tcpip.Error) {
 
 	s.SetRouteTable([]tcpip.Route{
 		{
-			Destination: "\x00\x00\x00\x00",
-			Mask:        "\x00\x00\x00\x00",
-			Gateway:     "",
+			Destination: header.IPv4EmptySubnet,
 			NIC:         1,
 		},
 		{
-			Destination: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
-			Mask:        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
-			Gateway:     "",
+			Destination: header.IPv6EmptySubnet,
 			NIC:         1,
 		},
 	})
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index bcc0f3e28..272481aa0 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -168,15 +168,11 @@ func New(t *testing.T, mtu uint32) *Context {
 
 	s.SetRouteTable([]tcpip.Route{
 		{
-			Destination: "\x00\x00\x00\x00",
-			Mask:        "\x00\x00\x00\x00",
-			Gateway:     "",
+			Destination: header.IPv4EmptySubnet,
 			NIC:         1,
 		},
 		{
-			Destination: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
-			Mask:        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
-			Gateway:     "",
+			Destination: header.IPv6EmptySubnet,
 			NIC:         1,
 		},
 	})
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 7c12a6092..ac5905772 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -178,53 +178,6 @@ func (e *endpoint) IPTables() (iptables.IPTables, error) {
 	return e.stack.IPTables(), nil
 }
 
-// Resume implements tcpip.ResumableEndpoint.Resume.
-func (e *endpoint) Resume(s *stack.Stack) {
-	e.stack = s
-
-	for _, m := range e.multicastMemberships {
-		if err := e.stack.JoinGroup(e.netProto, m.nicID, m.multicastAddr); err != nil {
-			panic(err)
-		}
-	}
-
-	if e.state != stateBound && e.state != stateConnected {
-		return
-	}
-
-	netProto := e.effectiveNetProtos[0]
-	// Connect() and bindLocked() both assert
-	//
-	//     netProto == header.IPv6ProtocolNumber
-	//
-	// before creating a multi-entry effectiveNetProtos.
-	if len(e.effectiveNetProtos) > 1 {
-		netProto = header.IPv6ProtocolNumber
-	}
-
-	var err *tcpip.Error
-	if e.state == stateConnected {
-		e.route, err = e.stack.FindRoute(e.regNICID, e.id.LocalAddress, e.id.RemoteAddress, netProto, e.multicastLoop)
-		if err != nil {
-			panic(*err)
-		}
-	} else if len(e.id.LocalAddress) != 0 { // stateBound
-		if e.stack.CheckLocalAddress(e.regNICID, netProto, e.id.LocalAddress) == 0 {
-			panic(tcpip.ErrBadLocalAddress)
-		}
-	}
-
-	// Our saved state had a port, but we don't actually have a
-	// reservation. We need to remove the port from our state, but still
-	// pass it to the reservation machinery.
-	id := e.id
-	e.id.LocalPort = 0
-	e.id, err = e.registerWithStack(e.regNICID, e.effectiveNetProtos, id)
-	if err != nil {
-		panic(*err)
-	}
-}
-
 // Read reads data from the endpoint. This method does not block if
 // there is no data pending.
 func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
@@ -296,6 +249,11 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpi
 // specified address is a multicast address.
 func (e *endpoint) connectRoute(nicid tcpip.NICID, addr tcpip.FullAddress, netProto tcpip.NetworkProtocolNumber) (stack.Route, tcpip.NICID, *tcpip.Error) {
 	localAddr := e.id.LocalAddress
+	if isBroadcastOrMulticast(localAddr) {
+		// A packet can only originate from a unicast address (i.e., an interface).
+		localAddr = ""
+	}
+
 	if header.IsV4MulticastAddress(addr.Addr) || header.IsV6MulticastAddress(addr.Addr) {
 		if nicid == 0 {
 			nicid = e.multicastNICID
@@ -315,7 +273,7 @@ func (e *endpoint) connectRoute(nicid tcpip.NICID, addr tcpip.FullAddress, netPr
 
 // Write writes data to the endpoint's peer. This method does not block
 // if the data cannot be written.
-func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-chan struct{}, *tcpip.Error) {
+func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (int64, <-chan struct{}, *tcpip.Error) {
 	// MSG_MORE is unimplemented. (This also means that MSG_EOR is a no-op.)
 	if opts.More {
 		return 0, nil, tcpip.ErrInvalidOptionValue
@@ -421,11 +379,11 @@ func (e *endpoint) Write(p tcpip.Payload, opts tcpip.WriteOptions) (uintptr, <-c
 	if err := sendUDP(route, buffer.View(v).ToVectorisedView(), e.id.LocalPort, dstPort, ttl); err != nil {
 		return 0, nil, err
 	}
-	return uintptr(len(v)), nil, nil
+	return int64(len(v)), nil, nil
 }
 
 // Peek only returns data from a single datagram, so do nothing here.
-func (e *endpoint) Peek([][]byte) (uintptr, tcpip.ControlMessages, *tcpip.Error) {
+func (e *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
 	return 0, tcpip.ControlMessages{}, nil
 }
 
@@ -495,7 +453,12 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error {
 		}
 
 		nicID := v.NIC
-		if v.InterfaceAddr == header.IPv4Any {
+
+		// The interface address is considered not-set if it is empty or contains
+		// all-zeros. The former represent the zero-value in golang, the latter the
+		// same in a setsockopt(IP_ADD_MEMBERSHIP, &ip_mreqn) syscall.
+		allZeros := header.IPv4Any
+		if len(v.InterfaceAddr) == 0 || v.InterfaceAddr == allZeros {
 			if nicID == 0 {
 				r, err := e.stack.FindRoute(0, "", v.MulticastAddr, header.IPv4ProtocolNumber, false /* multicastLoop */)
 				if err == nil {
@@ -739,7 +702,7 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (t
 
 		netProto = header.IPv4ProtocolNumber
 		addr.Addr = addr.Addr[header.IPv6AddressSize-header.IPv4AddressSize:]
-		if addr.Addr == "\x00\x00\x00\x00" {
+		if addr.Addr == header.IPv4Any {
 			addr.Addr = ""
 		}
 
@@ -758,7 +721,8 @@ func (e *endpoint) checkV4Mapped(addr *tcpip.FullAddress, allowMismatch bool) (t
 	return netProto, nil
 }
 
-func (e *endpoint) disconnect() *tcpip.Error {
+// Disconnect implements tcpip.Endpoint.Disconnect.
+func (e *endpoint) Disconnect() *tcpip.Error {
 	e.mu.Lock()
 	defer e.mu.Unlock()
 
@@ -797,9 +761,6 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 	if err != nil {
 		return err
 	}
-	if addr.Addr == "" {
-		return e.disconnect()
-	}
 	if addr.Port == 0 {
 		// We don't support connecting to port zero.
 		return tcpip.ErrInvalidEndpointState
@@ -963,8 +924,8 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error {
 	}
 
 	nicid := addr.NIC
-	if len(addr.Addr) != 0 {
-		// A local address was specified, verify that it's valid.
+	if len(addr.Addr) != 0 && !isBroadcastOrMulticast(addr.Addr) {
+		// A local unicast address was specified, verify that it's valid.
 		nicid = e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr)
 		if nicid == 0 {
 			return tcpip.ErrBadLocalAddress
@@ -1113,3 +1074,7 @@ func (e *endpoint) State() uint32 {
 	// TODO(b/112063468): Translate internal state to values returned by Linux.
 	return 0
 }
+
+func isBroadcastOrMulticast(a tcpip.Address) bool {
+	return a == header.IPv4Broadcast || header.IsV4MulticastAddress(a) || header.IsV6MulticastAddress(a)
+}
diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go
index 86db36260..5cbb56120 100644
--- a/pkg/tcpip/transport/udp/endpoint_state.go
+++ b/pkg/tcpip/transport/udp/endpoint_state.go
@@ -15,7 +15,9 @@
 package udp
 
 import (
+	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
 
@@ -64,3 +66,51 @@ func (e *endpoint) loadRcvBufSizeMax(max int) {
 func (e *endpoint) afterLoad() {
 	stack.StackFromEnv.RegisterRestoredEndpoint(e)
 }
+
+// Resume implements tcpip.ResumableEndpoint.Resume.
+func (e *endpoint) Resume(s *stack.Stack) {
+	e.stack = s
+
+	for _, m := range e.multicastMemberships {
+		if err := e.stack.JoinGroup(e.netProto, m.nicID, m.multicastAddr); err != nil {
+			panic(err)
+		}
+	}
+
+	if e.state != stateBound && e.state != stateConnected {
+		return
+	}
+
+	netProto := e.effectiveNetProtos[0]
+	// Connect() and bindLocked() both assert
+	//
+	//     netProto == header.IPv6ProtocolNumber
+	//
+	// before creating a multi-entry effectiveNetProtos.
+	if len(e.effectiveNetProtos) > 1 {
+		netProto = header.IPv6ProtocolNumber
+	}
+
+	var err *tcpip.Error
+	if e.state == stateConnected {
+		e.route, err = e.stack.FindRoute(e.regNICID, e.id.LocalAddress, e.id.RemoteAddress, netProto, e.multicastLoop)
+		if err != nil {
+			panic(err)
+		}
+	} else if len(e.id.LocalAddress) != 0 && !isBroadcastOrMulticast(e.id.LocalAddress) { // stateBound
+		// A local unicast address is specified, verify that it's valid.
+		if e.stack.CheckLocalAddress(e.regNICID, netProto, e.id.LocalAddress) == 0 {
+			panic(tcpip.ErrBadLocalAddress)
+		}
+	}
+
+	// Our saved state had a port, but we don't actually have a
+	// reservation. We need to remove the port from our state, but still
+	// pass it to the reservation machinery.
+	id := e.id
+	e.id.LocalPort = 0
+	e.id, err = e.registerWithStack(e.regNICID, e.effectiveNetProtos, id)
+	if err != nil {
+		panic(err)
+	}
+}
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index 56c285f88..9da6edce2 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -16,6 +16,7 @@ package udp_test
 
 import (
 	"bytes"
+	"fmt"
 	"math"
 	"math/rand"
 	"testing"
@@ -34,13 +35,19 @@ import (
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
+// Addresses and ports used for testing. It is recommended that tests stick to
+// using these addresses as it allows using the testFlow helper.
+// Naming rules: 'stack*'' denotes local addresses and ports, while 'test*'
+// represents the remote endpoint.
 const (
+	v4MappedAddrPrefix    = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff"
 	stackV6Addr           = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01"
 	testV6Addr            = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
-	stackV4MappedAddr     = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff" + stackAddr
-	testV4MappedAddr      = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff" + testAddr
-	multicastV4MappedAddr = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff" + multicastAddr
-	V4MappedWildcardAddr  = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x00\x00"
+	stackV4MappedAddr     = v4MappedAddrPrefix + stackAddr
+	testV4MappedAddr      = v4MappedAddrPrefix + testAddr
+	multicastV4MappedAddr = v4MappedAddrPrefix + multicastAddr
+	broadcastV4MappedAddr = v4MappedAddrPrefix + broadcastAddr
+	v4MappedWildcardAddr  = v4MappedAddrPrefix + "\x00\x00\x00\x00"
 
 	stackAddr       = "\x0a\x00\x00\x01"
 	stackPort       = 1234
@@ -48,7 +55,7 @@ const (
 	testPort        = 4096
 	multicastAddr   = "\xe8\x2b\xd3\xea"
 	multicastV6Addr = "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
-	multicastPort   = 1234
+	broadcastAddr   = header.IPv4Broadcast
 
 	// defaultMTU is the MTU, in bytes, used throughout the tests, except
 	// where another value is explicitly used. It is chosen to match the MTU
@@ -56,6 +63,205 @@ const (
 	defaultMTU = 65536
 )
 
+// header4Tuple stores the 4-tuple {src-IP, src-port, dst-IP, dst-port} used in
+// a packet header. These values are used to populate a header or verify one.
+// Note that because they are used in packet headers, the addresses are never in
+// a V4-mapped format.
+type header4Tuple struct {
+	srcAddr tcpip.FullAddress
+	dstAddr tcpip.FullAddress
+}
+
+// testFlow implements a helper type used for sending and receiving test
+// packets. A given test flow value defines 1) the socket endpoint used for the
+// test and 2) the type of packet send or received on the endpoint. E.g., a
+// multicastV6Only flow is a V6 multicast packet passing through a V6-only
+// endpoint. The type provides helper methods to characterize the flow (e.g.,
+// isV4) as well as return a proper header4Tuple for it.
+type testFlow int
+
+const (
+	unicastV4       testFlow = iota // V4 unicast on a V4 socket
+	unicastV4in6                    // V4-mapped unicast on a V6-dual socket
+	unicastV6                       // V6 unicast on a V6 socket
+	unicastV6Only                   // V6 unicast on a V6-only socket
+	multicastV4                     // V4 multicast on a V4 socket
+	multicastV4in6                  // V4-mapped multicast on a V6-dual socket
+	multicastV6                     // V6 multicast on a V6 socket
+	multicastV6Only                 // V6 multicast on a V6-only socket
+	broadcast                       // V4 broadcast on a V4 socket
+	broadcastIn6                    // V4-mapped broadcast on a V6-dual socket
+)
+
+func (flow testFlow) String() string {
+	switch flow {
+	case unicastV4:
+		return "unicastV4"
+	case unicastV6:
+		return "unicastV6"
+	case unicastV6Only:
+		return "unicastV6Only"
+	case unicastV4in6:
+		return "unicastV4in6"
+	case multicastV4:
+		return "multicastV4"
+	case multicastV6:
+		return "multicastV6"
+	case multicastV6Only:
+		return "multicastV6Only"
+	case multicastV4in6:
+		return "multicastV4in6"
+	case broadcast:
+		return "broadcast"
+	case broadcastIn6:
+		return "broadcastIn6"
+	default:
+		return "unknown"
+	}
+}
+
+// packetDirection explains if a flow is incoming (read) or outgoing (write).
+type packetDirection int
+
+const (
+	incoming packetDirection = iota
+	outgoing
+)
+
+// header4Tuple returns the header4Tuple for the given flow and direction. Note
+// that the tuple contains no mapped addresses as those only exist at the socket
+// level but not at the packet header level.
+func (flow testFlow) header4Tuple(d packetDirection) header4Tuple {
+	var h header4Tuple
+	if flow.isV4() {
+		if d == outgoing {
+			h = header4Tuple{
+				srcAddr: tcpip.FullAddress{Addr: stackAddr, Port: stackPort},
+				dstAddr: tcpip.FullAddress{Addr: testAddr, Port: testPort},
+			}
+		} else {
+			h = header4Tuple{
+				srcAddr: tcpip.FullAddress{Addr: testAddr, Port: testPort},
+				dstAddr: tcpip.FullAddress{Addr: stackAddr, Port: stackPort},
+			}
+		}
+		if flow.isMulticast() {
+			h.dstAddr.Addr = multicastAddr
+		} else if flow.isBroadcast() {
+			h.dstAddr.Addr = broadcastAddr
+		}
+	} else { // IPv6
+		if d == outgoing {
+			h = header4Tuple{
+				srcAddr: tcpip.FullAddress{Addr: stackV6Addr, Port: stackPort},
+				dstAddr: tcpip.FullAddress{Addr: testV6Addr, Port: testPort},
+			}
+		} else {
+			h = header4Tuple{
+				srcAddr: tcpip.FullAddress{Addr: testV6Addr, Port: testPort},
+				dstAddr: tcpip.FullAddress{Addr: stackV6Addr, Port: stackPort},
+			}
+		}
+		if flow.isMulticast() {
+			h.dstAddr.Addr = multicastV6Addr
+		}
+	}
+	return h
+}
+
+func (flow testFlow) getMcastAddr() tcpip.Address {
+	if flow.isV4() {
+		return multicastAddr
+	}
+	return multicastV6Addr
+}
+
+// mapAddrIfApplicable converts the given V4 address into its V4-mapped version
+// if it is applicable to the flow.
+func (flow testFlow) mapAddrIfApplicable(v4Addr tcpip.Address) tcpip.Address {
+	if flow.isMapped() {
+		return v4MappedAddrPrefix + v4Addr
+	}
+	return v4Addr
+}
+
+// netProto returns the protocol number used for the network packet.
+func (flow testFlow) netProto() tcpip.NetworkProtocolNumber {
+	if flow.isV4() {
+		return ipv4.ProtocolNumber
+	}
+	return ipv6.ProtocolNumber
+}
+
+// sockProto returns the protocol number used when creating the socket
+// endpoint for this flow.
+func (flow testFlow) sockProto() tcpip.NetworkProtocolNumber {
+	switch flow {
+	case unicastV4in6, unicastV6, unicastV6Only, multicastV4in6, multicastV6, multicastV6Only, broadcastIn6:
+		return ipv6.ProtocolNumber
+	case unicastV4, multicastV4, broadcast:
+		return ipv4.ProtocolNumber
+	default:
+		panic(fmt.Sprintf("invalid testFlow given: %d", flow))
+	}
+}
+
+func (flow testFlow) checkerFn() func(*testing.T, []byte, ...checker.NetworkChecker) {
+	if flow.isV4() {
+		return checker.IPv4
+	}
+	return checker.IPv6
+}
+
+func (flow testFlow) isV6() bool { return !flow.isV4() }
+func (flow testFlow) isV4() bool {
+	return flow.sockProto() == ipv4.ProtocolNumber || flow.isMapped()
+}
+
+func (flow testFlow) isV6Only() bool {
+	switch flow {
+	case unicastV6Only, multicastV6Only:
+		return true
+	case unicastV4, unicastV4in6, unicastV6, multicastV4, multicastV4in6, multicastV6, broadcast, broadcastIn6:
+		return false
+	default:
+		panic(fmt.Sprintf("invalid testFlow given: %d", flow))
+	}
+}
+
+func (flow testFlow) isMulticast() bool {
+	switch flow {
+	case multicastV4, multicastV4in6, multicastV6, multicastV6Only:
+		return true
+	case unicastV4, unicastV4in6, unicastV6, unicastV6Only, broadcast, broadcastIn6:
+		return false
+	default:
+		panic(fmt.Sprintf("invalid testFlow given: %d", flow))
+	}
+}
+
+func (flow testFlow) isBroadcast() bool {
+	switch flow {
+	case broadcast, broadcastIn6:
+		return true
+	case unicastV4, unicastV4in6, unicastV6, unicastV6Only, multicastV4, multicastV4in6, multicastV6, multicastV6Only:
+		return false
+	default:
+		panic(fmt.Sprintf("invalid testFlow given: %d", flow))
+	}
+}
+
+func (flow testFlow) isMapped() bool {
+	switch flow {
+	case unicastV4in6, multicastV4in6, broadcastIn6:
+		return true
+	case unicastV4, unicastV6, unicastV6Only, multicastV4, multicastV6, multicastV6Only, broadcast:
+		return false
+	default:
+		panic(fmt.Sprintf("invalid testFlow given: %d", flow))
+	}
+}
+
 type testContext struct {
 	t      *testing.T
 	linkEP *channel.Endpoint
@@ -65,12 +271,9 @@ type testContext struct {
 	wq waiter.Queue
 }
 
-type headers struct {
-	srcPort uint16
-	dstPort uint16
-}
-
 func newDualTestContext(t *testing.T, mtu uint32) *testContext {
+	t.Helper()
+
 	s := stack.New([]string{ipv4.ProtocolName, ipv6.ProtocolName}, []string{udp.ProtocolName}, stack.Options{})
 
 	id, linkEP := channel.New(256, mtu, "")
@@ -91,15 +294,11 @@ func newDualTestContext(t *testing.T, mtu uint32) *testContext {
 
 	s.SetRouteTable([]tcpip.Route{
 		{
-			Destination: "\x00\x00\x00\x00",
-			Mask:        "\x00\x00\x00\x00",
-			Gateway:     "",
+			Destination: header.IPv4EmptySubnet,
 			NIC:         1,
 		},
 		{
-			Destination: "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
-			Mask:        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
-			Gateway:     "",
+			Destination: header.IPv6EmptySubnet,
 			NIC:         1,
 		},
 	})
@@ -117,51 +316,54 @@ func (c *testContext) cleanup() {
 	}
 }
 
-func (c *testContext) createV6Endpoint(v6only bool) {
+func (c *testContext) createEndpoint(proto tcpip.NetworkProtocolNumber) {
+	c.t.Helper()
+
 	var err *tcpip.Error
-	c.ep, err = c.s.NewEndpoint(udp.ProtocolNumber, ipv6.ProtocolNumber, &c.wq)
+	c.ep, err = c.s.NewEndpoint(udp.ProtocolNumber, proto, &c.wq)
 	if err != nil {
-		c.t.Fatalf("NewEndpoint failed: %v", err)
+		c.t.Fatal("NewEndpoint failed: ", err)
 	}
+}
 
-	var v tcpip.V6OnlyOption
-	if v6only {
-		v = 1
-	}
-	if err := c.ep.SetSockOpt(v); err != nil {
-		c.t.Fatalf("SetSockOpt failed failed: %v", err)
+func (c *testContext) createEndpointForFlow(flow testFlow) {
+	c.t.Helper()
+
+	c.createEndpoint(flow.sockProto())
+	if flow.isV6Only() {
+		if err := c.ep.SetSockOpt(tcpip.V6OnlyOption(1)); err != nil {
+			c.t.Fatalf("SetSockOpt failed: %v", err)
+		}
+	} else if flow.isBroadcast() {
+		if err := c.ep.SetSockOpt(tcpip.BroadcastOption(1)); err != nil {
+			c.t.Fatal("SetSockOpt failed:", err)
+		}
 	}
 }
 
-func (c *testContext) getPacket(protocolNumber tcpip.NetworkProtocolNumber, multicast bool) []byte {
+// getPacketAndVerify reads a packet from the link endpoint and verifies the
+// header against expected values from the given test flow. In addition, it
+// calls any extra checker functions provided.
+func (c *testContext) getPacketAndVerify(flow testFlow, checkers ...checker.NetworkChecker) []byte {
+	c.t.Helper()
+
 	select {
 	case p := <-c.linkEP.C:
-		if p.Proto != protocolNumber {
-			c.t.Fatalf("Bad network protocol: got %v, wanted %v", p.Proto, protocolNumber)
+		if p.Proto != flow.netProto() {
+			c.t.Fatalf("Bad network protocol: got %v, wanted %v", p.Proto, flow.netProto())
 		}
 		b := make([]byte, len(p.Header)+len(p.Payload))
 		copy(b, p.Header)
 		copy(b[len(p.Header):], p.Payload)
 
-		var checkerFn func(*testing.T, []byte, ...checker.NetworkChecker)
-		var srcAddr, dstAddr tcpip.Address
-		switch protocolNumber {
-		case ipv4.ProtocolNumber:
-			checkerFn = checker.IPv4
-			srcAddr, dstAddr = stackAddr, testAddr
-			if multicast {
-				dstAddr = multicastAddr
-			}
-		case ipv6.ProtocolNumber:
-			checkerFn = checker.IPv6
-			srcAddr, dstAddr = stackV6Addr, testV6Addr
-			if multicast {
-				dstAddr = multicastV6Addr
-			}
-		default:
-			c.t.Fatalf("unknown protocol %d", protocolNumber)
-		}
-		checkerFn(c.t, b, checker.SrcAddr(srcAddr), checker.DstAddr(dstAddr))
+		h := flow.header4Tuple(outgoing)
+		checkers := append(
+			checkers,
+			checker.SrcAddr(h.srcAddr.Addr),
+			checker.DstAddr(h.dstAddr.Addr),
+			checker.UDP(checker.DstPort(h.dstAddr.Port)),
+		)
+		flow.checkerFn()(c.t, b, checkers...)
 		return b
 
 	case <-time.After(2 * time.Second):
@@ -171,7 +373,22 @@ func (c *testContext) getPacket(protocolNumber tcpip.NetworkProtocolNumber, mult
 	return nil
 }
 
-func (c *testContext) sendV6Packet(payload []byte, h *headers) {
+// injectPacket creates a packet of the given flow and with the given payload,
+// and injects it into the link endpoint.
+func (c *testContext) injectPacket(flow testFlow, payload []byte) {
+	c.t.Helper()
+
+	h := flow.header4Tuple(incoming)
+	if flow.isV4() {
+		c.injectV4Packet(payload, &h)
+	} else {
+		c.injectV6Packet(payload, &h)
+	}
+}
+
+// injectV6Packet creates a V6 test packet with the given payload and header
+// values, and injects it into the link endpoint.
+func (c *testContext) injectV6Packet(payload []byte, h *header4Tuple) {
 	// Allocate a buffer for data and headers.
 	buf := buffer.NewView(header.UDPMinimumSize + header.IPv6MinimumSize + len(payload))
 	copy(buf[len(buf)-len(payload):], payload)
@@ -182,20 +399,20 @@ func (c *testContext) sendV6Packet(payload []byte, h *headers) {
 		PayloadLength: uint16(header.UDPMinimumSize + len(payload)),
 		NextHeader:    uint8(udp.ProtocolNumber),
 		HopLimit:      65,
-		SrcAddr:       testV6Addr,
-		DstAddr:       stackV6Addr,
+		SrcAddr:       h.srcAddr.Addr,
+		DstAddr:       h.dstAddr.Addr,
 	})
 
 	// Initialize the UDP header.
 	u := header.UDP(buf[header.IPv6MinimumSize:])
 	u.Encode(&header.UDPFields{
-		SrcPort: h.srcPort,
-		DstPort: h.dstPort,
+		SrcPort: h.srcAddr.Port,
+		DstPort: h.dstAddr.Port,
 		Length:  uint16(header.UDPMinimumSize + len(payload)),
 	})
 
 	// Calculate the UDP pseudo-header checksum.
-	xsum := header.PseudoHeaderChecksum(udp.ProtocolNumber, testV6Addr, stackV6Addr, uint16(len(u)))
+	xsum := header.PseudoHeaderChecksum(udp.ProtocolNumber, h.srcAddr.Addr, h.dstAddr.Addr, uint16(len(u)))
 
 	// Calculate the UDP checksum and set it.
 	xsum = header.Checksum(payload, xsum)
@@ -205,7 +422,9 @@ func (c *testContext) sendV6Packet(payload []byte, h *headers) {
 	c.linkEP.Inject(ipv6.ProtocolNumber, buf.ToVectorisedView())
 }
 
-func (c *testContext) sendPacket(payload []byte, h *headers) {
+// injectV6Packet creates a V4 test packet with the given payload and header
+// values, and injects it into the link endpoint.
+func (c *testContext) injectV4Packet(payload []byte, h *header4Tuple) {
 	// Allocate a buffer for data and headers.
 	buf := buffer.NewView(header.UDPMinimumSize + header.IPv4MinimumSize + len(payload))
 	copy(buf[len(buf)-len(payload):], payload)
@@ -217,21 +436,21 @@ func (c *testContext) sendPacket(payload []byte, h *headers) {
 		TotalLength: uint16(len(buf)),
 		TTL:         65,
 		Protocol:    uint8(udp.ProtocolNumber),
-		SrcAddr:     testAddr,
-		DstAddr:     stackAddr,
+		SrcAddr:     h.srcAddr.Addr,
+		DstAddr:     h.dstAddr.Addr,
 	})
 	ip.SetChecksum(^ip.CalculateChecksum())
 
 	// Initialize the UDP header.
 	u := header.UDP(buf[header.IPv4MinimumSize:])
 	u.Encode(&header.UDPFields{
-		SrcPort: h.srcPort,
-		DstPort: h.dstPort,
+		SrcPort: h.srcAddr.Port,
+		DstPort: h.dstAddr.Port,
 		Length:  uint16(header.UDPMinimumSize + len(payload)),
 	})
 
 	// Calculate the UDP pseudo-header checksum.
-	xsum := header.PseudoHeaderChecksum(udp.ProtocolNumber, testAddr, stackAddr, uint16(len(u)))
+	xsum := header.PseudoHeaderChecksum(udp.ProtocolNumber, h.srcAddr.Addr, h.dstAddr.Addr, uint16(len(u)))
 
 	// Calculate the UDP checksum and set it.
 	xsum = header.Checksum(payload, xsum)
@@ -253,7 +472,7 @@ func TestBindPortReuse(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	var eps [5]tcpip.Endpoint
 	reusePortOpt := tcpip.ReusePortOption(1)
@@ -296,9 +515,9 @@ func TestBindPortReuse(t *testing.T) {
 		// Send a packet.
 		port := uint16(i % nports)
 		payload := newPayload()
-		c.sendV6Packet(payload, &headers{
-			srcPort: testPort + port,
-			dstPort: stackPort,
+		c.injectV6Packet(payload, &header4Tuple{
+			srcAddr: tcpip.FullAddress{Addr: testV6Addr, Port: testPort + port},
+			dstAddr: tcpip.FullAddress{Addr: stackV6Addr, Port: stackPort},
 		})
 
 		var addr tcpip.FullAddress
@@ -333,13 +552,14 @@ func TestBindPortReuse(t *testing.T) {
 	}
 }
 
-func testV4Read(c *testContext) {
-	// Send a packet.
+// testRead sends a packet of the given test flow into the stack by injecting it
+// into the link endpoint. It then reads it from the UDP endpoint and verifies
+// its correctness.
+func testRead(c *testContext, flow testFlow) {
+	c.t.Helper()
+
 	payload := newPayload()
-	c.sendPacket(payload, &headers{
-		srcPort: testPort,
-		dstPort: stackPort,
-	})
+	c.injectPacket(flow, payload)
 
 	// Try to receive the data.
 	we, ch := waiter.NewChannelEntry(nil)
@@ -363,8 +583,9 @@ func testV4Read(c *testContext) {
 	}
 
 	// Check the peer address.
-	if addr.Addr != testAddr {
-		c.t.Fatalf("Unexpected remote address: got %v, want %v", addr.Addr, testAddr)
+	h := flow.header4Tuple(incoming)
+	if addr.Addr != h.srcAddr.Addr {
+		c.t.Fatalf("Unexpected remote address: got %v, want %v", addr.Addr, h.srcAddr)
 	}
 
 	// Check the payload.
@@ -377,7 +598,7 @@ func TestBindEphemeralPort(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	if err := c.ep.Bind(tcpip.FullAddress{}); err != nil {
 		t.Fatalf("ep.Bind(...) failed: %v", err)
@@ -388,7 +609,7 @@ func TestBindReservedPort(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	if err := c.ep.Connect(tcpip.FullAddress{Addr: testV6Addr, Port: testPort}); err != nil {
 		c.t.Fatalf("Connect failed: %v", err)
@@ -447,7 +668,7 @@ func TestV4ReadOnV6(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpointForFlow(unicastV4in6)
 
 	// Bind to wildcard.
 	if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
@@ -455,29 +676,29 @@ func TestV4ReadOnV6(t *testing.T) {
 	}
 
 	// Test acceptance.
-	testV4Read(c)
+	testRead(c, unicastV4in6)
 }
 
 func TestV4ReadOnBoundToV4MappedWildcard(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpointForFlow(unicastV4in6)
 
 	// Bind to v4 mapped wildcard.
-	if err := c.ep.Bind(tcpip.FullAddress{Addr: V4MappedWildcardAddr, Port: stackPort}); err != nil {
+	if err := c.ep.Bind(tcpip.FullAddress{Addr: v4MappedWildcardAddr, Port: stackPort}); err != nil {
 		c.t.Fatalf("Bind failed: %v", err)
 	}
 
 	// Test acceptance.
-	testV4Read(c)
+	testRead(c, unicastV4in6)
 }
 
 func TestV4ReadOnBoundToV4Mapped(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpointForFlow(unicastV4in6)
 
 	// Bind to local address.
 	if err := c.ep.Bind(tcpip.FullAddress{Addr: stackV4MappedAddr, Port: stackPort}); err != nil {
@@ -485,69 +706,29 @@ func TestV4ReadOnBoundToV4Mapped(t *testing.T) {
 	}
 
 	// Test acceptance.
-	testV4Read(c)
+	testRead(c, unicastV4in6)
 }
 
 func TestV6ReadOnV6(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpointForFlow(unicastV6)
 
 	// Bind to wildcard.
 	if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
 		c.t.Fatalf("Bind failed: %v", err)
 	}
 
-	// Send a packet.
-	payload := newPayload()
-	c.sendV6Packet(payload, &headers{
-		srcPort: testPort,
-		dstPort: stackPort,
-	})
-
-	// Try to receive the data.
-	we, ch := waiter.NewChannelEntry(nil)
-	c.wq.EventRegister(&we, waiter.EventIn)
-	defer c.wq.EventUnregister(&we)
-
-	var addr tcpip.FullAddress
-	v, _, err := c.ep.Read(&addr)
-	if err == tcpip.ErrWouldBlock {
-		// Wait for data to become available.
-		select {
-		case <-ch:
-			v, _, err = c.ep.Read(&addr)
-			if err != nil {
-				c.t.Fatalf("Read failed: %v", err)
-			}
-
-		case <-time.After(1 * time.Second):
-			c.t.Fatalf("Timed out waiting for data")
-		}
-	}
-
-	// Check the peer address.
-	if addr.Addr != testV6Addr {
-		c.t.Fatalf("Unexpected remote address: got %v, want %v", addr.Addr, testAddr)
-	}
-
-	// Check the payload.
-	if !bytes.Equal(payload, v) {
-		c.t.Fatalf("Bad payload: got %x, want %x", v, payload)
-	}
+	// Test acceptance.
+	testRead(c, unicastV6)
 }
 
 func TestV4ReadOnV4(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	// Create v4 UDP endpoint.
-	var err *tcpip.Error
-	c.ep, err = c.s.NewEndpoint(udp.ProtocolNumber, ipv4.ProtocolNumber, &c.wq)
-	if err != nil {
-		c.t.Fatalf("NewEndpoint failed: %v", err)
-	}
+	c.createEndpointForFlow(unicastV4)
 
 	// Bind to wildcard.
 	if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
@@ -555,62 +736,123 @@ func TestV4ReadOnV4(t *testing.T) {
 	}
 
 	// Test acceptance.
-	testV4Read(c)
+	testRead(c, unicastV4)
 }
 
-func testV4Write(c *testContext) uint16 {
-	// Write to V4 mapped address.
-	payload := buffer.View(newPayload())
-	n, _, err := c.ep.Write(tcpip.SlicePayload(payload), tcpip.WriteOptions{
-		To: &tcpip.FullAddress{Addr: testV4MappedAddr, Port: testPort},
-	})
-	if err != nil {
-		c.t.Fatalf("Write failed: %v", err)
+// TestReadOnBoundToMulticast checks that an endpoint can bind to a multicast
+// address and receive data sent to that address.
+func TestReadOnBoundToMulticast(t *testing.T) {
+	// FIXME(b/128189410): multicastV4in6 currently doesn't work as
+	// AddMembershipOption doesn't handle V4in6 addresses.
+	for _, flow := range []testFlow{multicastV4, multicastV6, multicastV6Only} {
+		t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpointForFlow(flow)
+
+			// Bind to multicast address.
+			mcastAddr := flow.mapAddrIfApplicable(flow.getMcastAddr())
+			if err := c.ep.Bind(tcpip.FullAddress{Addr: mcastAddr, Port: stackPort}); err != nil {
+				c.t.Fatal("Bind failed:", err)
+			}
+
+			// Join multicast group.
+			ifoptSet := tcpip.AddMembershipOption{NIC: 1, MulticastAddr: mcastAddr}
+			if err := c.ep.SetSockOpt(ifoptSet); err != nil {
+				c.t.Fatal("SetSockOpt failed:", err)
+			}
+
+			testRead(c, flow)
+		})
 	}
-	if n != uintptr(len(payload)) {
-		c.t.Fatalf("Bad number of bytes written: got %v, want %v", n, len(payload))
+}
+
+// TestV4ReadOnBoundToBroadcast checks that an endpoint can bind to a broadcast
+// address and receive broadcast data on it.
+func TestV4ReadOnBoundToBroadcast(t *testing.T) {
+	for _, flow := range []testFlow{broadcast, broadcastIn6} {
+		t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpointForFlow(flow)
+
+			// Bind to broadcast address.
+			bcastAddr := flow.mapAddrIfApplicable(broadcastAddr)
+			if err := c.ep.Bind(tcpip.FullAddress{Addr: bcastAddr, Port: stackPort}); err != nil {
+				c.t.Fatalf("Bind failed: %s", err)
+			}
+
+			// Test acceptance.
+			testRead(c, flow)
+		})
 	}
+}
 
-	// Check that we received the packet.
-	b := c.getPacket(ipv4.ProtocolNumber, false)
-	udp := header.UDP(header.IPv4(b).Payload())
-	checker.IPv4(c.t, b,
-		checker.UDP(
-			checker.DstPort(testPort),
-		),
-	)
+// testFailingWrite sends a packet of the given test flow into the UDP endpoint
+// and verifies it fails with the provided error code.
+func testFailingWrite(c *testContext, flow testFlow, wantErr *tcpip.Error) {
+	c.t.Helper()
 
-	// Check the payload.
-	if !bytes.Equal(payload, udp.Payload()) {
-		c.t.Fatalf("Bad payload: got %x, want %x", udp.Payload(), payload)
+	h := flow.header4Tuple(outgoing)
+	writeDstAddr := flow.mapAddrIfApplicable(h.dstAddr.Addr)
+
+	payload := buffer.View(newPayload())
+	_, _, gotErr := c.ep.Write(tcpip.SlicePayload(payload), tcpip.WriteOptions{
+		To: &tcpip.FullAddress{Addr: writeDstAddr, Port: h.dstAddr.Port},
+	})
+	if gotErr != wantErr {
+		c.t.Fatalf("Write returned unexpected error: got %v, want %v", gotErr, wantErr)
 	}
+}
 
-	return udp.SourcePort()
+// testWrite sends a packet of the given test flow from the UDP endpoint to the
+// flow's destination address:port. It then receives it from the link endpoint
+// and verifies its correctness including any additional checker functions
+// provided.
+func testWrite(c *testContext, flow testFlow, checkers ...checker.NetworkChecker) uint16 {
+	c.t.Helper()
+	return testWriteInternal(c, flow, true, checkers...)
 }
 
-func testV6Write(c *testContext) uint16 {
-	// Write to v6 address.
+// testWriteWithoutDestination sends a packet of the given test flow from the
+// UDP endpoint without giving a destination address:port. It then receives it
+// from the link endpoint and verifies its correctness including any additional
+// checker functions provided.
+func testWriteWithoutDestination(c *testContext, flow testFlow, checkers ...checker.NetworkChecker) uint16 {
+	c.t.Helper()
+	return testWriteInternal(c, flow, false, checkers...)
+}
+
+func testWriteInternal(c *testContext, flow testFlow, setDest bool, checkers ...checker.NetworkChecker) uint16 {
+	c.t.Helper()
+
+	writeOpts := tcpip.WriteOptions{}
+	if setDest {
+		h := flow.header4Tuple(outgoing)
+		writeDstAddr := flow.mapAddrIfApplicable(h.dstAddr.Addr)
+		writeOpts = tcpip.WriteOptions{
+			To: &tcpip.FullAddress{Addr: writeDstAddr, Port: h.dstAddr.Port},
+		}
+	}
 	payload := buffer.View(newPayload())
-	n, _, err := c.ep.Write(tcpip.SlicePayload(payload), tcpip.WriteOptions{
-		To: &tcpip.FullAddress{Addr: testV6Addr, Port: testPort},
-	})
+	n, _, err := c.ep.Write(tcpip.SlicePayload(payload), writeOpts)
 	if err != nil {
 		c.t.Fatalf("Write failed: %v", err)
 	}
-	if n != uintptr(len(payload)) {
+	if n != int64(len(payload)) {
 		c.t.Fatalf("Bad number of bytes written: got %v, want %v", n, len(payload))
 	}
 
-	// Check that we received the packet.
-	b := c.getPacket(ipv6.ProtocolNumber, false)
-	udp := header.UDP(header.IPv6(b).Payload())
-	checker.IPv6(c.t, b,
-		checker.UDP(
-			checker.DstPort(testPort),
-		),
-	)
-
-	// Check the payload.
+	// Received the packet and check the payload.
+	b := c.getPacketAndVerify(flow, checkers...)
+	var udp header.UDP
+	if flow.isV4() {
+		udp = header.UDP(header.IPv4(b).Payload())
+	} else {
+		udp = header.UDP(header.IPv6(b).Payload())
+	}
 	if !bytes.Equal(payload, udp.Payload()) {
 		c.t.Fatalf("Bad payload: got %x, want %x", udp.Payload(), payload)
 	}
@@ -619,8 +861,10 @@ func testV6Write(c *testContext) uint16 {
 }
 
 func testDualWrite(c *testContext) uint16 {
-	v4Port := testV4Write(c)
-	v6Port := testV6Write(c)
+	c.t.Helper()
+
+	v4Port := testWrite(c, unicastV4in6)
+	v6Port := testWrite(c, unicastV6)
 	if v4Port != v6Port {
 		c.t.Fatalf("expected v4 and v6 ports to be equal: got v4Port = %d, v6Port = %d", v4Port, v6Port)
 	}
@@ -632,7 +876,7 @@ func TestDualWriteUnbound(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	testDualWrite(c)
 }
@@ -641,7 +885,7 @@ func TestDualWriteBoundToWildcard(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	// Bind to wildcard.
 	if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
@@ -658,69 +902,51 @@ func TestDualWriteConnectedToV6(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	// Connect to v6 address.
 	if err := c.ep.Connect(tcpip.FullAddress{Addr: testV6Addr, Port: testPort}); err != nil {
 		c.t.Fatalf("Bind failed: %v", err)
 	}
 
-	testV6Write(c)
+	testWrite(c, unicastV6)
 
 	// Write to V4 mapped address.
-	payload := buffer.View(newPayload())
-	_, _, err := c.ep.Write(tcpip.SlicePayload(payload), tcpip.WriteOptions{
-		To: &tcpip.FullAddress{Addr: testV4MappedAddr, Port: testPort},
-	})
-	if err != tcpip.ErrNetworkUnreachable {
-		c.t.Fatalf("Write returned unexpected error: got %v, want %v", err, tcpip.ErrNetworkUnreachable)
-	}
+	testFailingWrite(c, unicastV4in6, tcpip.ErrNetworkUnreachable)
 }
 
 func TestDualWriteConnectedToV4Mapped(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	// Connect to v4 mapped address.
 	if err := c.ep.Connect(tcpip.FullAddress{Addr: testV4MappedAddr, Port: testPort}); err != nil {
 		c.t.Fatalf("Bind failed: %v", err)
 	}
 
-	testV4Write(c)
+	testWrite(c, unicastV4in6)
 
 	// Write to v6 address.
-	payload := buffer.View(newPayload())
-	_, _, err := c.ep.Write(tcpip.SlicePayload(payload), tcpip.WriteOptions{
-		To: &tcpip.FullAddress{Addr: testV6Addr, Port: testPort},
-	})
-	if err != tcpip.ErrInvalidEndpointState {
-		c.t.Fatalf("Write returned unexpected error: got %v, want %v", err, tcpip.ErrInvalidEndpointState)
-	}
+	testFailingWrite(c, unicastV6, tcpip.ErrInvalidEndpointState)
 }
 
 func TestV4WriteOnV6Only(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(true)
+	c.createEndpointForFlow(unicastV6Only)
 
 	// Write to V4 mapped address.
-	payload := buffer.View(newPayload())
-	_, _, err := c.ep.Write(tcpip.SlicePayload(payload), tcpip.WriteOptions{
-		To: &tcpip.FullAddress{Addr: testV4MappedAddr, Port: testPort},
-	})
-	if err != tcpip.ErrNoRoute {
-		c.t.Fatalf("Write returned unexpected error: got %v, want %v", err, tcpip.ErrNoRoute)
-	}
+	testFailingWrite(c, unicastV4in6, tcpip.ErrNoRoute)
 }
 
 func TestV6WriteOnBoundToV4Mapped(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	// Bind to v4 mapped address.
 	if err := c.ep.Bind(tcpip.FullAddress{Addr: stackV4MappedAddr, Port: stackPort}); err != nil {
@@ -728,84 +954,154 @@ func TestV6WriteOnBoundToV4Mapped(t *testing.T) {
 	}
 
 	// Write to v6 address.
-	payload := buffer.View(newPayload())
-	_, _, err := c.ep.Write(tcpip.SlicePayload(payload), tcpip.WriteOptions{
-		To: &tcpip.FullAddress{Addr: testV6Addr, Port: testPort},
-	})
-	if err != tcpip.ErrInvalidEndpointState {
-		c.t.Fatalf("Write returned unexpected error: got %v, want %v", err, tcpip.ErrInvalidEndpointState)
-	}
+	testFailingWrite(c, unicastV6, tcpip.ErrInvalidEndpointState)
 }
 
 func TestV6WriteOnConnected(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	// Connect to v6 address.
 	if err := c.ep.Connect(tcpip.FullAddress{Addr: testV6Addr, Port: testPort}); err != nil {
 		c.t.Fatalf("Connect failed: %v", err)
 	}
 
-	// Write without destination.
-	payload := buffer.View(newPayload())
-	n, _, err := c.ep.Write(tcpip.SlicePayload(payload), tcpip.WriteOptions{})
-	if err != nil {
-		c.t.Fatalf("Write failed: %v", err)
-	}
-	if n != uintptr(len(payload)) {
-		c.t.Fatalf("Bad number of bytes written: got %v, want %v", n, len(payload))
-	}
-
-	// Check that we received the packet.
-	b := c.getPacket(ipv6.ProtocolNumber, false)
-	udp := header.UDP(header.IPv6(b).Payload())
-	checker.IPv6(c.t, b,
-		checker.UDP(
-			checker.DstPort(testPort),
-		),
-	)
-
-	// Check the payload.
-	if !bytes.Equal(payload, udp.Payload()) {
-		c.t.Fatalf("Bad payload: got %x, want %x", udp.Payload(), payload)
-	}
+	testWriteWithoutDestination(c, unicastV6)
 }
 
 func TestV4WriteOnConnected(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	// Connect to v4 mapped address.
 	if err := c.ep.Connect(tcpip.FullAddress{Addr: testV4MappedAddr, Port: testPort}); err != nil {
 		c.t.Fatalf("Connect failed: %v", err)
 	}
 
-	// Write without destination.
-	payload := buffer.View(newPayload())
-	n, _, err := c.ep.Write(tcpip.SlicePayload(payload), tcpip.WriteOptions{})
-	if err != nil {
-		c.t.Fatalf("Write failed: %v", err)
+	testWriteWithoutDestination(c, unicastV4)
+}
+
+// TestWriteOnBoundToV4Multicast checks that we can send packets out of a socket
+// that is bound to a V4 multicast address.
+func TestWriteOnBoundToV4Multicast(t *testing.T) {
+	for _, flow := range []testFlow{unicastV4, multicastV4, broadcast} {
+		t.Run(fmt.Sprintf("%s", flow), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpointForFlow(flow)
+
+			// Bind to V4 mcast address.
+			if err := c.ep.Bind(tcpip.FullAddress{Addr: multicastAddr, Port: stackPort}); err != nil {
+				c.t.Fatal("Bind failed:", err)
+			}
+
+			testWrite(c, flow)
+		})
 	}
-	if n != uintptr(len(payload)) {
-		c.t.Fatalf("Bad number of bytes written: got %v, want %v", n, len(payload))
+}
+
+// TestWriteOnBoundToV4MappedMulticast checks that we can send packets out of a
+// socket that is bound to a V4-mapped multicast address.
+func TestWriteOnBoundToV4MappedMulticast(t *testing.T) {
+	for _, flow := range []testFlow{unicastV4in6, multicastV4in6, broadcastIn6} {
+		t.Run(fmt.Sprintf("%s", flow), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpointForFlow(flow)
+
+			// Bind to V4Mapped mcast address.
+			if err := c.ep.Bind(tcpip.FullAddress{Addr: multicastV4MappedAddr, Port: stackPort}); err != nil {
+				c.t.Fatalf("Bind failed: %s", err)
+			}
+
+			testWrite(c, flow)
+		})
 	}
+}
 
-	// Check that we received the packet.
-	b := c.getPacket(ipv4.ProtocolNumber, false)
-	udp := header.UDP(header.IPv4(b).Payload())
-	checker.IPv4(c.t, b,
-		checker.UDP(
-			checker.DstPort(testPort),
-		),
-	)
+// TestWriteOnBoundToV6Multicast checks that we can send packets out of a
+// socket that is bound to a V6 multicast address.
+func TestWriteOnBoundToV6Multicast(t *testing.T) {
+	for _, flow := range []testFlow{unicastV6, multicastV6} {
+		t.Run(fmt.Sprintf("%s", flow), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
 
-	// Check the payload.
-	if !bytes.Equal(payload, udp.Payload()) {
-		c.t.Fatalf("Bad payload: got %x, want %x", udp.Payload(), payload)
+			c.createEndpointForFlow(flow)
+
+			// Bind to V6 mcast address.
+			if err := c.ep.Bind(tcpip.FullAddress{Addr: multicastV6Addr, Port: stackPort}); err != nil {
+				c.t.Fatalf("Bind failed: %s", err)
+			}
+
+			testWrite(c, flow)
+		})
+	}
+}
+
+// TestWriteOnBoundToV6Multicast checks that we can send packets out of a
+// V6-only socket that is bound to a V6 multicast address.
+func TestWriteOnBoundToV6OnlyMulticast(t *testing.T) {
+	for _, flow := range []testFlow{unicastV6Only, multicastV6Only} {
+		t.Run(fmt.Sprintf("%s", flow), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpointForFlow(flow)
+
+			// Bind to V6 mcast address.
+			if err := c.ep.Bind(tcpip.FullAddress{Addr: multicastV6Addr, Port: stackPort}); err != nil {
+				c.t.Fatalf("Bind failed: %s", err)
+			}
+
+			testWrite(c, flow)
+		})
+	}
+}
+
+// TestWriteOnBoundToBroadcast checks that we can send packets out of a
+// socket that is bound to the broadcast address.
+func TestWriteOnBoundToBroadcast(t *testing.T) {
+	for _, flow := range []testFlow{unicastV4, multicastV4, broadcast} {
+		t.Run(fmt.Sprintf("%s", flow), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpointForFlow(flow)
+
+			// Bind to V4 broadcast address.
+			if err := c.ep.Bind(tcpip.FullAddress{Addr: broadcastAddr, Port: stackPort}); err != nil {
+				c.t.Fatal("Bind failed:", err)
+			}
+
+			testWrite(c, flow)
+		})
+	}
+}
+
+// TestWriteOnBoundToV4MappedBroadcast checks that we can send packets out of a
+// socket that is bound to the V4-mapped broadcast address.
+func TestWriteOnBoundToV4MappedBroadcast(t *testing.T) {
+	for _, flow := range []testFlow{unicastV4in6, multicastV4in6, broadcastIn6} {
+		t.Run(fmt.Sprintf("%s", flow), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpointForFlow(flow)
+
+			// Bind to V4Mapped mcast address.
+			if err := c.ep.Bind(tcpip.FullAddress{Addr: broadcastV4MappedAddr, Port: stackPort}); err != nil {
+				c.t.Fatalf("Bind failed: %s", err)
+			}
+
+			testWrite(c, flow)
+		})
 	}
 }
 
@@ -814,18 +1110,14 @@ func TestReadIncrementsPacketsReceived(t *testing.T) {
 	defer c.cleanup()
 
 	// Create IPv4 UDP endpoint
-	var err *tcpip.Error
-	c.ep, err = c.s.NewEndpoint(udp.ProtocolNumber, ipv4.ProtocolNumber, &c.wq)
-	if err != nil {
-		c.t.Fatalf("NewEndpoint failed: %v", err)
-	}
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	// Bind to wildcard.
 	if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
 		c.t.Fatalf("Bind failed: %v", err)
 	}
 
-	testV4Read(c)
+	testRead(c, unicastV4)
 
 	var want uint64 = 1
 	if got := c.s.Stats().UDP.PacketsReceived.Value(); got != want {
@@ -837,7 +1129,7 @@ func TestWriteIncrementsPacketsSent(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
 
-	c.createV6Endpoint(false)
+	c.createEndpoint(ipv6.ProtocolNumber)
 
 	testDualWrite(c)
 
@@ -847,244 +1139,102 @@ func TestWriteIncrementsPacketsSent(t *testing.T) {
 	}
 }
 
-func setSockOptVariants(t *testing.T, optFunc func(*testing.T, string, tcpip.NetworkProtocolNumber, string)) {
-	for _, name := range []string{"v4", "v6", "dual"} {
-		t.Run(name, func(t *testing.T) {
-			var networkProtocolNumber tcpip.NetworkProtocolNumber
-			switch name {
-			case "v4":
-				networkProtocolNumber = ipv4.ProtocolNumber
-			case "v6", "dual":
-				networkProtocolNumber = ipv6.ProtocolNumber
-			default:
-				t.Fatal("unknown test variant")
-			}
+func TestTTL(t *testing.T) {
+	for _, flow := range []testFlow{unicastV4, unicastV4in6, unicastV6, unicastV6Only, multicastV4, multicastV4in6, multicastV6, broadcast, broadcastIn6} {
+		t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
 
-			var variants []string
-			switch name {
-			case "v4":
-				variants = []string{"v4"}
-			case "v6":
-				variants = []string{"v6"}
-			case "dual":
-				variants = []string{"v6", "mapped"}
-			}
+			c.createEndpointForFlow(flow)
 
-			for _, variant := range variants {
-				t.Run(variant, func(t *testing.T) {
-					optFunc(t, name, networkProtocolNumber, variant)
-				})
+			const multicastTTL = 42
+			if err := c.ep.SetSockOpt(tcpip.MulticastTTLOption(multicastTTL)); err != nil {
+				c.t.Fatalf("SetSockOpt failed: %v", err)
 			}
-		})
-	}
-}
 
-func TestTTL(t *testing.T) {
-	payload := tcpip.SlicePayload(buffer.View(newPayload()))
-
-	setSockOptVariants(t, func(t *testing.T, name string, networkProtocolNumber tcpip.NetworkProtocolNumber, variant string) {
-		for _, typ := range []string{"unicast", "multicast"} {
-			t.Run(typ, func(t *testing.T) {
-				var addr tcpip.Address
-				var port uint16
-				switch typ {
-				case "unicast":
-					port = testPort
-					switch variant {
-					case "v4":
-						addr = testAddr
-					case "mapped":
-						addr = testV4MappedAddr
-					case "v6":
-						addr = testV6Addr
-					default:
-						t.Fatal("unknown test variant")
-					}
-				case "multicast":
-					port = multicastPort
-					switch variant {
-					case "v4":
-						addr = multicastAddr
-					case "mapped":
-						addr = multicastV4MappedAddr
-					case "v6":
-						addr = multicastV6Addr
-					default:
-						t.Fatal("unknown test variant")
-					}
-				default:
-					t.Fatal("unknown test variant")
+			var wantTTL uint8
+			if flow.isMulticast() {
+				wantTTL = multicastTTL
+			} else {
+				var p stack.NetworkProtocol
+				if flow.isV4() {
+					p = ipv4.NewProtocol()
+				} else {
+					p = ipv6.NewProtocol()
 				}
-
-				c := newDualTestContext(t, defaultMTU)
-				defer c.cleanup()
-
-				var err *tcpip.Error
-				c.ep, err = c.s.NewEndpoint(udp.ProtocolNumber, networkProtocolNumber, &c.wq)
+				ep, err := p.NewEndpoint(0, tcpip.AddressWithPrefix{}, nil, nil, nil)
 				if err != nil {
-					c.t.Fatalf("NewEndpoint failed: %v", err)
-				}
-
-				switch name {
-				case "v4":
-				case "v6":
-					if err := c.ep.SetSockOpt(tcpip.V6OnlyOption(1)); err != nil {
-						c.t.Fatalf("SetSockOpt failed: %v", err)
-					}
-				case "dual":
-					if err := c.ep.SetSockOpt(tcpip.V6OnlyOption(0)); err != nil {
-						c.t.Fatalf("SetSockOpt failed: %v", err)
-					}
-				default:
-					t.Fatal("unknown test variant")
-				}
-
-				const multicastTTL = 42
-				if err := c.ep.SetSockOpt(tcpip.MulticastTTLOption(multicastTTL)); err != nil {
-					c.t.Fatalf("SetSockOpt failed: %v", err)
+					t.Fatal(err)
 				}
+				wantTTL = ep.DefaultTTL()
+				ep.Close()
+			}
 
-				n, _, err := c.ep.Write(payload, tcpip.WriteOptions{To: &tcpip.FullAddress{Addr: addr, Port: port}})
-				if err != nil {
-					c.t.Fatalf("Write failed: %v", err)
-				}
-				if n != uintptr(len(payload)) {
-					c.t.Fatalf("got c.ep.Write(...) = %d, want = %d", n, len(payload))
-				}
+			testWrite(c, flow, checker.TTL(wantTTL))
+		})
+	}
+}
 
-				checkerFn := checker.IPv4
-				switch variant {
-				case "v4", "mapped":
-				case "v6":
-					checkerFn = checker.IPv6
-				default:
-					t.Fatal("unknown test variant")
-				}
-				var wantTTL uint8
-				var multicast bool
-				switch typ {
-				case "unicast":
-					multicast = false
-					switch variant {
-					case "v4", "mapped":
-						ep, err := ipv4.NewProtocol().NewEndpoint(0, tcpip.AddressWithPrefix{}, nil, nil, nil)
-						if err != nil {
-							t.Fatal(err)
-						}
-						wantTTL = ep.DefaultTTL()
-						ep.Close()
-					case "v6":
-						ep, err := ipv6.NewProtocol().NewEndpoint(0, tcpip.AddressWithPrefix{}, nil, nil, nil)
-						if err != nil {
-							t.Fatal(err)
-						}
-						wantTTL = ep.DefaultTTL()
-						ep.Close()
-					default:
-						t.Fatal("unknown test variant")
-					}
-				case "multicast":
-					wantTTL = multicastTTL
-					multicast = true
-				default:
-					t.Fatal("unknown test variant")
-				}
+func TestMulticastInterfaceOption(t *testing.T) {
+	for _, flow := range []testFlow{multicastV4, multicastV4in6, multicastV6, multicastV6Only} {
+		t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) {
+			for _, bindTyp := range []string{"bound", "unbound"} {
+				t.Run(bindTyp, func(t *testing.T) {
+					for _, optTyp := range []string{"use local-addr", "use NICID", "use local-addr and NIC"} {
+						t.Run(optTyp, func(t *testing.T) {
+							h := flow.header4Tuple(outgoing)
+							mcastAddr := h.dstAddr.Addr
+							localIfAddr := h.srcAddr.Addr
+
+							var ifoptSet tcpip.MulticastInterfaceOption
+							switch optTyp {
+							case "use local-addr":
+								ifoptSet.InterfaceAddr = localIfAddr
+							case "use NICID":
+								ifoptSet.NIC = 1
+							case "use local-addr and NIC":
+								ifoptSet.InterfaceAddr = localIfAddr
+								ifoptSet.NIC = 1
+							default:
+								t.Fatal("unknown test variant")
+							}
 
-				var networkProtocolNumber tcpip.NetworkProtocolNumber
-				switch variant {
-				case "v4", "mapped":
-					networkProtocolNumber = ipv4.ProtocolNumber
-				case "v6":
-					networkProtocolNumber = ipv6.ProtocolNumber
-				default:
-					t.Fatal("unknown test variant")
-				}
+							c := newDualTestContext(t, defaultMTU)
+							defer c.cleanup()
+
+							c.createEndpoint(flow.sockProto())
+
+							if bindTyp == "bound" {
+								// Bind the socket by connecting to the multicast address.
+								// This may have an influence on how the multicast interface
+								// is set.
+								addr := tcpip.FullAddress{
+									Addr: flow.mapAddrIfApplicable(mcastAddr),
+									Port: stackPort,
+								}
+								if err := c.ep.Connect(addr); err != nil {
+									c.t.Fatalf("Connect failed: %v", err)
+								}
+							}
 
-				b := c.getPacket(networkProtocolNumber, multicast)
-				checkerFn(c.t, b,
-					checker.TTL(wantTTL),
-					checker.UDP(
-						checker.DstPort(port),
-					),
-				)
-			})
-		}
-	})
-}
+							if err := c.ep.SetSockOpt(ifoptSet); err != nil {
+								c.t.Fatalf("SetSockOpt failed: %v", err)
+							}
 
-func TestMulticastInterfaceOption(t *testing.T) {
-	setSockOptVariants(t, func(t *testing.T, name string, networkProtocolNumber tcpip.NetworkProtocolNumber, variant string) {
-		for _, bindTyp := range []string{"bound", "unbound"} {
-			t.Run(bindTyp, func(t *testing.T) {
-				for _, optTyp := range []string{"use local-addr", "use NICID", "use local-addr and NIC"} {
-					t.Run(optTyp, func(t *testing.T) {
-						var mcastAddr, localIfAddr tcpip.Address
-						switch variant {
-						case "v4":
-							mcastAddr = multicastAddr
-							localIfAddr = stackAddr
-						case "mapped":
-							mcastAddr = multicastV4MappedAddr
-							localIfAddr = stackAddr
-						case "v6":
-							mcastAddr = multicastV6Addr
-							localIfAddr = stackV6Addr
-						default:
-							t.Fatal("unknown test variant")
-						}
-
-						var ifoptSet tcpip.MulticastInterfaceOption
-						switch optTyp {
-						case "use local-addr":
-							ifoptSet.InterfaceAddr = localIfAddr
-						case "use NICID":
-							ifoptSet.NIC = 1
-						case "use local-addr and NIC":
-							ifoptSet.InterfaceAddr = localIfAddr
-							ifoptSet.NIC = 1
-						default:
-							t.Fatal("unknown test variant")
-						}
-
-						c := newDualTestContext(t, defaultMTU)
-						defer c.cleanup()
-
-						var err *tcpip.Error
-						c.ep, err = c.s.NewEndpoint(udp.ProtocolNumber, networkProtocolNumber, &c.wq)
-						if err != nil {
-							c.t.Fatalf("NewEndpoint failed: %v", err)
-						}
-
-						if bindTyp == "bound" {
-							// Bind the socket by connecting to the multicast address.
-							// This may have an influence on how the multicast interface
-							// is set.
-							addr := tcpip.FullAddress{
-								Addr: mcastAddr,
-								Port: multicastPort,
+							// Verify multicast interface addr and NIC were set correctly.
+							// Note that NIC must be 1 since this is our outgoing interface.
+							ifoptWant := tcpip.MulticastInterfaceOption{NIC: 1, InterfaceAddr: ifoptSet.InterfaceAddr}
+							var ifoptGot tcpip.MulticastInterfaceOption
+							if err := c.ep.GetSockOpt(&ifoptGot); err != nil {
+								c.t.Fatalf("GetSockOpt failed: %v", err)
 							}
-							if err := c.ep.Connect(addr); err != nil {
-								c.t.Fatalf("Connect failed: %v", err)
+							if ifoptGot != ifoptWant {
+								c.t.Errorf("got GetSockOpt() = %#v, want = %#v", ifoptGot, ifoptWant)
 							}
-						}
-
-						if err := c.ep.SetSockOpt(ifoptSet); err != nil {
-							c.t.Fatalf("SetSockOpt failed: %v", err)
-						}
-
-						// Verify multicast interface addr and NIC were set correctly.
-						// Note that NIC must be 1 since this is our outgoing interface.
-						ifoptWant := tcpip.MulticastInterfaceOption{NIC: 1, InterfaceAddr: ifoptSet.InterfaceAddr}
-						var ifoptGot tcpip.MulticastInterfaceOption
-						if err := c.ep.GetSockOpt(&ifoptGot); err != nil {
-							c.t.Fatalf("GetSockOpt failed: %v", err)
-						}
-						if ifoptGot != ifoptWant {
-							c.t.Errorf("got GetSockOpt() = %#v, want = %#v", ifoptGot, ifoptWant)
-						}
-					})
-				}
-			})
-		}
-	})
+						})
+					}
+				})
+			}
+		})
+	}
 }