55 files changed, 1826 insertions, 335 deletions
diff --git a/images/benchmarks/sysbench/Dockerfile b/images/benchmarks/sysbench/Dockerfile
new file mode 100644
index 000000000..55e865f43
--- /dev/null
+++ b/images/benchmarks/sysbench/Dockerfile
@@ -0,0 +1,7 @@
+FROM ubuntu:18.04
+
+RUN set -x \
+        && apt-get update \
+        && apt-get install -y \
+            sysbench \
+        && rm -rf /var/lib/apt/lists/*
diff --git a/pkg/sentry/control/pprof.go b/pkg/sentry/control/pprof.go
index 663e51989..2bf3c45e1 100644
--- a/pkg/sentry/control/pprof.go
+++ b/pkg/sentry/control/pprof.go
@@ -49,6 +49,9 @@ type ProfileOpts struct {
 // - dump out the stack trace of current go routines.
 //   sentryctl -pid <pid> pprof-goroutine
 type Profile struct {
+	// Kernel is the kernel under profile. It's immutable.
+	Kernel *kernel.Kernel
+
 	// mu protects the fields below.
 	mu sync.Mutex
 
@@ -57,9 +60,6 @@ type Profile struct {
 
 	// traceFile is the current execution trace output file.
 	traceFile *fd.FD
-
-	// Kernel is the kernel under profile.
-	Kernel *kernel.Kernel
 }
 
 // StartCPUProfile is an RPC stub which starts recording the CPU profile in a
diff --git a/pkg/sentry/devices/tundev/tundev.go b/pkg/sentry/devices/tundev/tundev.go
index 852ec3c5c..a40625e19 100644
--- a/pkg/sentry/devices/tundev/tundev.go
+++ b/pkg/sentry/devices/tundev/tundev.go
@@ -160,8 +160,8 @@ func (fd *tunFD) EventUnregister(e *waiter.Entry) {
 	fd.device.EventUnregister(e)
 }
 
-// isNetTunSupported returns whether /dev/net/tun device is supported for s.
-func isNetTunSupported(s inet.Stack) bool {
+// IsNetTunSupported returns whether /dev/net/tun device is supported for s.
+func IsNetTunSupported(s inet.Stack) bool {
 	_, ok := s.(*netstack.Stack)
 	return ok
 }
diff --git a/pkg/sentry/fs/proc/sys_net.go b/pkg/sentry/fs/proc/sys_net.go
index 702fdd392..8615b60f0 100644
--- a/pkg/sentry/fs/proc/sys_net.go
+++ b/pkg/sentry/fs/proc/sys_net.go
@@ -272,6 +272,96 @@ func (f *tcpSackFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSeque
 	return n, f.tcpSack.stack.SetTCPSACKEnabled(*f.tcpSack.enabled)
 }
 
+// +stateify savable
+type tcpRecovery struct {
+	fsutil.SimpleFileInode
+
+	stack    inet.Stack `state:"wait"`
+	recovery inet.TCPLossRecovery
+}
+
+func newTCPRecoveryInode(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
+	ts := &tcpRecovery{
+		SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC),
+		stack:           s,
+	}
+	sattr := fs.StableAttr{
+		DeviceID:  device.ProcDevice.DeviceID(),
+		InodeID:   device.ProcDevice.NextIno(),
+		BlockSize: usermem.PageSize,
+		Type:      fs.SpecialFile,
+	}
+	return fs.NewInode(ctx, ts, msrc, sattr)
+}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (*tcpRecovery) Truncate(context.Context, *fs.Inode, int64) error {
+	return nil
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (r *tcpRecovery) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+	flags.Pread = true
+	flags.Pwrite = true
+	return fs.NewFile(ctx, dirent, flags, &tcpRecoveryFile{
+		tcpRecovery: r,
+		stack:       r.stack,
+	}), nil
+}
+
+// +stateify savable
+type tcpRecoveryFile struct {
+	fsutil.FileGenericSeek          `state:"nosave"`
+	fsutil.FileNoIoctl              `state:"nosave"`
+	fsutil.FileNoMMap               `state:"nosave"`
+	fsutil.FileNoSplice             `state:"nosave"`
+	fsutil.FileNoopRelease          `state:"nosave"`
+	fsutil.FileNoopFlush            `state:"nosave"`
+	fsutil.FileNoopFsync            `state:"nosave"`
+	fsutil.FileNotDirReaddir        `state:"nosave"`
+	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+	waiter.AlwaysReady              `state:"nosave"`
+
+	tcpRecovery *tcpRecovery
+
+	stack inet.Stack `state:"wait"`
+}
+
+// Read implements fs.FileOperations.Read.
+func (f *tcpRecoveryFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
+	if offset != 0 {
+		return 0, io.EOF
+	}
+
+	recovery, err := f.stack.TCPRecovery()
+	if err != nil {
+		return 0, err
+	}
+	f.tcpRecovery.recovery = recovery
+	s := fmt.Sprintf("%d\n", f.tcpRecovery.recovery)
+	n, err := dst.CopyOut(ctx, []byte(s))
+	return int64(n), err
+}
+
+// Write implements fs.FileOperations.Write.
+func (f *tcpRecoveryFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
+	if src.NumBytes() == 0 {
+		return 0, nil
+	}
+	src = src.TakeFirst(usermem.PageSize - 1)
+
+	var v int32
+	n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+	if err != nil {
+		return 0, err
+	}
+	f.tcpRecovery.recovery = inet.TCPLossRecovery(v)
+	if err := f.tcpRecovery.stack.SetTCPRecovery(f.tcpRecovery.recovery); err != nil {
+		return 0, err
+	}
+	return n, nil
+}
+
 func (p *proc) newSysNetCore(ctx context.Context, msrc *fs.MountSource, s inet.Stack) *fs.Inode {
 	// The following files are simple stubs until they are implemented in
 	// netstack, most of these files are configuration related. We use the
@@ -351,6 +441,11 @@ func (p *proc) newSysNetIPv4Dir(ctx context.Context, msrc *fs.MountSource, s ine
 		contents["tcp_wmem"] = newTCPMemInode(ctx, msrc, s, tcpWMem)
 	}
 
+	// Add tcp_recovery.
+	if _, err := s.TCPRecovery(); err == nil {
+		contents["tcp_recovery"] = newTCPRecoveryInode(ctx, msrc, s)
+	}
+
 	d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555))
 	return newProcInode(ctx, d, msrc, fs.SpecialDirectory, nil)
 }
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 420e8efe2..db6bed4f6 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -184,6 +184,7 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
 
 	d.metadataMu.Lock()
 	defer d.metadataMu.Unlock()
+
 	// Set offset to file size if the fd was opened with O_APPEND.
 	if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
 		// Holding d.metadataMu is sufficient for reading d.size.
@@ -194,70 +195,79 @@ func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
 		return 0, offset, err
 	}
 	src = src.TakeFirst64(limit)
-	n, err := fd.pwriteLocked(ctx, src, offset, opts)
-	return n, offset + n, err
-}
 
-// Preconditions: fd.dentry().metatdataMu must be locked.
-func (fd *regularFileFD) pwriteLocked(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
-	d := fd.dentry()
 	if d.fs.opts.interop != InteropModeShared {
 		// Compare Linux's mm/filemap.c:__generic_file_write_iter() =>
 		// file_update_time(). This is d.touchCMtime(), but without locking
 		// d.metadataMu (recursively).
 		d.touchCMtimeLocked()
 	}
-	if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
-		// Write dirty cached pages that will be touched by the write back to
-		// the remote file.
-		if err := d.writeback(ctx, offset, src.NumBytes()); err != nil {
-			return 0, err
-		}
-		// Remove touched pages from the cache.
-		pgstart := usermem.PageRoundDown(uint64(offset))
-		pgend, ok := usermem.PageRoundUp(uint64(offset + src.NumBytes()))
-		if !ok {
-			return 0, syserror.EINVAL
-		}
-		mr := memmap.MappableRange{pgstart, pgend}
-		var freed []memmap.FileRange
-		d.dataMu.Lock()
-		cseg := d.cache.LowerBoundSegment(mr.Start)
-		for cseg.Ok() && cseg.Start() < mr.End {
-			cseg = d.cache.Isolate(cseg, mr)
-			freed = append(freed, memmap.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()})
-			cseg = d.cache.Remove(cseg).NextSegment()
-		}
-		d.dataMu.Unlock()
-		// Invalidate mappings of removed pages.
-		d.mapsMu.Lock()
-		d.mappings.Invalidate(mr, memmap.InvalidateOpts{})
-		d.mapsMu.Unlock()
-		// Finally free pages removed from the cache.
-		mf := d.fs.mfp.MemoryFile()
-		for _, freedFR := range freed {
-			mf.DecRef(freedFR)
-		}
-	}
+
 	rw := getDentryReadWriter(ctx, d, offset)
+	defer putDentryReadWriter(rw)
+
 	if fd.vfsfd.StatusFlags()&linux.O_DIRECT != 0 {
+		if err := fd.writeCache(ctx, d, offset, src); err != nil {
+			return 0, offset, err
+		}
+
 		// Require the write to go to the remote file.
 		rw.direct = true
 	}
+
 	n, err := src.CopyInTo(ctx, rw)
-	putDentryReadWriter(rw)
-	if n != 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 {
-		// Write dirty cached pages touched by the write back to the remote
-		// file.
+	if err != nil {
+		return n, offset, err
+	}
+	if n > 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 {
+		// Write dirty cached pages touched by the write back to the remote file.
 		if err := d.writeback(ctx, offset, src.NumBytes()); err != nil {
-			return 0, err
+			return n, offset, err
 		}
 		// Request the remote filesystem to sync the remote file.
-		if err := d.handle.file.fsync(ctx); err != nil {
-			return 0, err
+		if err := d.handle.sync(ctx); err != nil {
+			return n, offset, err
 		}
 	}
-	return n, err
+	return n, offset + n, nil
+}
+
+func (fd *regularFileFD) writeCache(ctx context.Context, d *dentry, offset int64, src usermem.IOSequence) error {
+	// Write dirty cached pages that will be touched by the write back to
+	// the remote file.
+	if err := d.writeback(ctx, offset, src.NumBytes()); err != nil {
+		return err
+	}
+
+	// Remove touched pages from the cache.
+	pgstart := usermem.PageRoundDown(uint64(offset))
+	pgend, ok := usermem.PageRoundUp(uint64(offset + src.NumBytes()))
+	if !ok {
+		return syserror.EINVAL
+	}
+	mr := memmap.MappableRange{pgstart, pgend}
+	var freed []memmap.FileRange
+
+	d.dataMu.Lock()
+	cseg := d.cache.LowerBoundSegment(mr.Start)
+	for cseg.Ok() && cseg.Start() < mr.End {
+		cseg = d.cache.Isolate(cseg, mr)
+		freed = append(freed, memmap.FileRange{cseg.Value(), cseg.Value() + cseg.Range().Length()})
+		cseg = d.cache.Remove(cseg).NextSegment()
+	}
+	d.dataMu.Unlock()
+
+	// Invalidate mappings of removed pages.
+	d.mapsMu.Lock()
+	d.mappings.Invalidate(mr, memmap.InvalidateOpts{})
+	d.mapsMu.Unlock()
+
+	// Finally free pages removed from the cache.
+	mf := d.fs.mfp.MemoryFile()
+	for _, freedFR := range freed {
+		mf.DecRef(freedFR)
+	}
+	return nil
 }
 
 // Write implements vfs.FileDescriptionImpl.Write.
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 6dac2afa4..b71778128 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -55,7 +55,8 @@ func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *ke
 	if stack := k.RootNetworkNamespace().Stack(); stack != nil {
 		contents = map[string]*kernfs.Dentry{
 			"ipv4": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
-				"tcp_sack": fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}),
+				"tcp_recovery": fs.newDentry(root, fs.NextIno(), 0644, &tcpRecoveryData{stack: stack}),
+				"tcp_sack":     fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}),
 
 				// The following files are simple stubs until they are implemented in
 				// netstack, most of these files are configuration related. We use the
@@ -207,3 +208,49 @@ func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset
 	*d.enabled = v != 0
 	return n, d.stack.SetTCPSACKEnabled(*d.enabled)
 }
+
+// tcpRecoveryData implements vfs.WritableDynamicBytesSource for
+// /proc/sys/net/ipv4/tcp_recovery.
+//
+// +stateify savable
+type tcpRecoveryData struct {
+	kernfs.DynamicBytesFile
+
+	stack inet.Stack `state:"wait"`
+}
+
+var _ vfs.WritableDynamicBytesSource = (*tcpRecoveryData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.
+func (d *tcpRecoveryData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+	recovery, err := d.stack.TCPRecovery()
+	if err != nil {
+		return err
+	}
+
+	buf.WriteString(fmt.Sprintf("%d\n", recovery))
+	return nil
+}
+
+func (d *tcpRecoveryData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+	if offset != 0 {
+		// No need to handle partial writes thus far.
+		return 0, syserror.EINVAL
+	}
+	if src.NumBytes() == 0 {
+		return 0, nil
+	}
+
+	// Limit the amount of memory allocated.
+	src = src.TakeFirst(usermem.PageSize - 1)
+
+	var v int32
+	n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+	if err != nil {
+		return 0, err
+	}
+	if err := d.stack.SetTCPRecovery(inet.TCPLossRecovery(v)); err != nil {
+		return 0, err
+	}
+	return n, nil
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index fb77f95cc..065812065 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -566,7 +566,9 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
 	if replaced != nil {
 		newParentDir.removeChildLocked(replaced)
 		if replaced.inode.isDir() {
-			newParentDir.inode.decLinksLocked(ctx) // from replaced's ".."
+			// Remove links for replaced/. and replaced/..
+			replaced.inode.decLinksLocked(ctx)
+			newParentDir.inode.decLinksLocked(ctx)
 		}
 		replaced.inode.decLinksLocked(ctx)
 	}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 68e615e8b..4681a2f52 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -558,6 +558,8 @@ func (i *inode) direntType() uint8 {
 		return linux.DT_LNK
 	case *socketFile:
 		return linux.DT_SOCK
+	case *namedPipe:
+		return linux.DT_FIFO
 	case *deviceFile:
 		switch impl.kind {
 		case vfs.BlockDevice:
diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go
index 2916a0644..c0b4831d1 100644
--- a/pkg/sentry/inet/inet.go
+++ b/pkg/sentry/inet/inet.go
@@ -56,6 +56,12 @@ type Stack interface {
 	// settings.
 	SetTCPSACKEnabled(enabled bool) error
 
+	// TCPRecovery returns the TCP loss detection algorithm.
+	TCPRecovery() (TCPLossRecovery, error)
+
+	// SetTCPRecovery attempts to change TCP loss detection algorithm.
+	SetTCPRecovery(recovery TCPLossRecovery) error
+
 	// Statistics reports stack statistics.
 	Statistics(stat interface{}, arg string) error
 
@@ -189,3 +195,14 @@ type StatSNMPUDP [8]uint64
 
 // StatSNMPUDPLite describes UdpLite line of /proc/net/snmp.
 type StatSNMPUDPLite [8]uint64
+
+// TCPLossRecovery indicates TCP loss detection and recovery methods to use.
+type TCPLossRecovery int32
+
+// Loss recovery constants from include/net/tcp.h which are used to set
+// /proc/sys/net/ipv4/tcp_recovery.
+const (
+	TCP_RACK_LOSS_DETECTION TCPLossRecovery = 1 << iota
+	TCP_RACK_STATIC_REO_WND
+	TCP_RACK_NO_DUPTHRESH
+)
diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go
index d8961fc94..9771f01fc 100644
--- a/pkg/sentry/inet/test_stack.go
+++ b/pkg/sentry/inet/test_stack.go
@@ -25,6 +25,7 @@ type TestStack struct {
 	TCPRecvBufSize    TCPBufferSize
 	TCPSendBufSize    TCPBufferSize
 	TCPSACKFlag       bool
+	Recovery          TCPLossRecovery
 }
 
 // NewTestStack returns a TestStack with no network interfaces. The value of
@@ -91,6 +92,17 @@ func (s *TestStack) SetTCPSACKEnabled(enabled bool) error {
 	return nil
 }
 
+// TCPRecovery implements Stack.TCPRecovery.
+func (s *TestStack) TCPRecovery() (TCPLossRecovery, error) {
+	return s.Recovery, nil
+}
+
+// SetTCPRecovery implements Stack.SetTCPRecovery.
+func (s *TestStack) SetTCPRecovery(recovery TCPLossRecovery) error {
+	s.Recovery = recovery
+	return nil
+}
+
 // Statistics implements inet.Stack.Statistics.
 func (s *TestStack) Statistics(stat interface{}, arg string) error {
 	return nil
diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go
index a48082631..fda3dcb35 100644
--- a/pkg/sentry/socket/hostinet/stack.go
+++ b/pkg/sentry/socket/hostinet/stack.go
@@ -53,6 +53,7 @@ type Stack struct {
 	interfaceAddrs map[int32][]inet.InterfaceAddr
 	routes         []inet.Route
 	supportsIPv6   bool
+	tcpRecovery    inet.TCPLossRecovery
 	tcpRecvBufSize inet.TCPBufferSize
 	tcpSendBufSize inet.TCPBufferSize
 	tcpSACKEnabled bool
@@ -350,6 +351,16 @@ func (s *Stack) SetTCPSACKEnabled(enabled bool) error {
 	return syserror.EACCES
 }
 
+// TCPRecovery implements inet.Stack.TCPRecovery.
+func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) {
+	return s.tcpRecovery, nil
+}
+
+// SetTCPRecovery implements inet.Stack.SetTCPRecovery.
+func (s *Stack) SetTCPRecovery(recovery inet.TCPLossRecovery) error {
+	return syserror.EACCES
+}
+
 // getLine reads one line from proc file, with specified prefix.
 // The last argument, withHeader, specifies if it contains line header.
 func getLine(f *os.File, prefix string, withHeader bool) string {
diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go
index 67737ae87..f0fe18684 100644
--- a/pkg/sentry/socket/netstack/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -207,6 +207,20 @@ func (s *Stack) SetTCPSACKEnabled(enabled bool) error {
 	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enabled))).ToError()
 }
 
+// TCPRecovery implements inet.Stack.TCPRecovery.
+func (s *Stack) TCPRecovery() (inet.TCPLossRecovery, error) {
+	var recovery tcp.Recovery
+	if err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &recovery); err != nil {
+		return 0, syserr.TranslateNetstackError(err).ToError()
+	}
+	return inet.TCPLossRecovery(recovery), nil
+}
+
+// SetTCPRecovery implements inet.Stack.SetTCPRecovery.
+func (s *Stack) SetTCPRecovery(recovery inet.TCPLossRecovery) error {
+	return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.Recovery(recovery))).ToError()
+}
+
 // Statistics implements inet.Stack.Statistics.
 func (s *Stack) Statistics(stat interface{}, arg string) error {
 	switch stats := stat.(type) {
diff --git a/pkg/sentry/syscalls/linux/vfs2/memfd.go b/pkg/sentry/syscalls/linux/vfs2/memfd.go
index 519583e4e..c4c0f9e0a 100644
--- a/pkg/sentry/syscalls/linux/vfs2/memfd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/memfd.go
@@ -51,6 +51,7 @@ func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.S
 	if err != nil {
 		return 0, nil, err
 	}
+	defer file.DecRef(t)
 
 	fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{
 		CloseOnExec: cloExec,
diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go
index 16f59fce9..75bfa2c79 100644
--- a/pkg/sentry/syscalls/linux/vfs2/splice.go
+++ b/pkg/sentry/syscalls/linux/vfs2/splice.go
@@ -347,6 +347,11 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 			} else {
 				spliceN, err = inFile.Read(t, outPipeFD.IOSequence(count), vfs.ReadOptions{})
 			}
+			if spliceN == 0 && err == io.EOF {
+				// We reached the end of the file. Eat the error and exit the loop.
+				err = nil
+				break
+			}
 			n += spliceN
 			if err == syserror.ErrWouldBlock && !nonBlock {
 				err = dw.waitForBoth(t)
@@ -367,8 +372,7 @@ func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
 				readN, err = inFile.Read(t, usermem.BytesIOSequence(buf), vfs.ReadOptions{})
 			}
 			if readN == 0 && err == io.EOF {
-				// We reached the end of the file. Eat the
-				// error and exit the loop.
+				// We reached the end of the file. Eat the error and exit the loop.
 				err = nil
 				break
 			}
diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go
index 62ac932bb..d0d1efd0d 100644
--- a/pkg/tcpip/header/ipv4.go
+++ b/pkg/tcpip/header/ipv4.go
@@ -101,6 +101,11 @@ const (
 	// IPv4Version is the version of the ipv4 protocol.
 	IPv4Version = 4
 
+	// IPv4AllSystems is the all systems IPv4 multicast address as per
+	// IANA's IPv4 Multicast Address Space Registry. See
+	// https://www.iana.org/assignments/multicast-addresses/multicast-addresses.xhtml.
+	IPv4AllSystems tcpip.Address = "\xe0\x00\x00\x01"
+
 	// IPv4Broadcast is the broadcast address of the IPv4 procotol.
 	IPv4Broadcast tcpip.Address = "\xff\xff\xff\xff"
 
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index d5f5d38f7..6c4f0ae3e 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -52,27 +52,25 @@ const (
 )
 
 type endpoint struct {
-	nicID         tcpip.NICID
-	id            stack.NetworkEndpointID
-	prefixLen     int
-	linkEP        stack.LinkEndpoint
-	dispatcher    stack.TransportDispatcher
-	fragmentation *fragmentation.Fragmentation
-	protocol      *protocol
-	stack         *stack.Stack
+	nicID      tcpip.NICID
+	id         stack.NetworkEndpointID
+	prefixLen  int
+	linkEP     stack.LinkEndpoint
+	dispatcher stack.TransportDispatcher
+	protocol   *protocol
+	stack      *stack.Stack
 }
 
 // NewEndpoint creates a new ipv4 endpoint.
 func (p *protocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint, st *stack.Stack) (stack.NetworkEndpoint, *tcpip.Error) {
 	e := &endpoint{
-		nicID:         nicID,
-		id:            stack.NetworkEndpointID{LocalAddress: addrWithPrefix.Address},
-		prefixLen:     addrWithPrefix.PrefixLen,
-		linkEP:        linkEP,
-		dispatcher:    dispatcher,
-		fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout),
-		protocol:      p,
-		stack:         st,
+		nicID:      nicID,
+		id:         stack.NetworkEndpointID{LocalAddress: addrWithPrefix.Address},
+		prefixLen:  addrWithPrefix.PrefixLen,
+		linkEP:     linkEP,
+		dispatcher: dispatcher,
+		protocol:   p,
+		stack:      st,
 	}
 
 	return e, nil
@@ -442,7 +440,9 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 		}
 		var ready bool
 		var err error
-		pkt.Data, ready, err = e.fragmentation.Process(
+		pkt.Data, ready, err = e.protocol.fragmentation.Process(
+			// As per RFC 791 section 2.3, the identification value is unique
+			// for a source-destination pair and protocol.
 			fragmentation.FragmentID{
 				Source:      h.SourceAddress(),
 				Destination: h.DestinationAddress(),
@@ -484,6 +484,8 @@ type protocol struct {
 	// uint8 portion of it is meaningful and it must be accessed
 	// atomically.
 	defaultTTL uint32
+
+	fragmentation *fragmentation.Fragmentation
 }
 
 // Number returns the ipv4 protocol number.
@@ -605,5 +607,10 @@ func NewProtocol() stack.NetworkProtocol {
 	}
 	hashIV := r[buckets]
 
-	return &protocol{ids: ids, hashIV: hashIV, defaultTTL: DefaultTTL}
+	return &protocol{
+		ids:           ids,
+		hashIV:        hashIV,
+		defaultTTL:    DefaultTTL,
+		fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout),
+	}
 }
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index a0a5c9c01..4a0b53c45 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -51,7 +51,6 @@ type endpoint struct {
 	linkEP        stack.LinkEndpoint
 	linkAddrCache stack.LinkAddressCache
 	dispatcher    stack.TransportDispatcher
-	fragmentation *fragmentation.Fragmentation
 	protocol      *protocol
 }
 
@@ -342,7 +341,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			var ready bool
 			// Note that pkt doesn't have its transport header set after reassembly,
 			// and won't until DeliverNetworkPacket sets it.
-			pkt.Data, ready, err = e.fragmentation.Process(
+			pkt.Data, ready, err = e.protocol.fragmentation.Process(
 				// IPv6 ignores the Protocol field since the ID only needs to be unique
 				// across source-destination pairs, as per RFC 8200 section 4.5.
 				fragmentation.FragmentID{
@@ -445,7 +444,8 @@ type protocol struct {
 	// defaultTTL is the current default TTL for the protocol. Only the
 	// uint8 portion of it is meaningful and it must be accessed
 	// atomically.
-	defaultTTL uint32
+	defaultTTL    uint32
+	fragmentation *fragmentation.Fragmentation
 }
 
 // Number returns the ipv6 protocol number.
@@ -478,7 +478,6 @@ func (p *protocol) NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWi
 		linkEP:        linkEP,
 		linkAddrCache: linkAddrCache,
 		dispatcher:    dispatcher,
-		fragmentation: fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout),
 		protocol:      p,
 	}, nil
 }
@@ -606,5 +605,8 @@ func calculateMTU(mtu uint32) uint32 {
 
 // NewProtocol returns an IPv6 network protocol.
 func NewProtocol() stack.NetworkProtocol {
-	return &protocol{defaultTTL: DefaultTTL}
+	return &protocol{
+		defaultTTL:    DefaultTTL,
+		fragmentation: fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout),
+	}
 }
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index 644ba7c33..5d286ccbc 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -1689,13 +1689,7 @@ func containsV6Addr(list []tcpip.ProtocolAddress, item tcpip.AddressWithPrefix)
 		AddressWithPrefix: item,
 	}
 
-	for _, i := range list {
-		if i == protocolAddress {
-			return true
-		}
-	}
-
-	return false
+	return containsAddr(list, protocolAddress)
 }
 
 // TestNoAutoGenAddr tests that SLAAC is not performed when configured not to.
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index f21066fce..eaaf756cd 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -217,6 +217,11 @@ func (n *NIC) disableLocked() *tcpip.Error {
 	}
 
 	if _, ok := n.stack.networkProtocols[header.IPv4ProtocolNumber]; ok {
+		// The NIC may have already left the multicast group.
+		if err := n.leaveGroupLocked(header.IPv4AllSystems, false /* force */); err != nil && err != tcpip.ErrBadLocalAddress {
+			return err
+		}
+
 		// The address may have already been removed.
 		if err := n.removePermanentAddressLocked(ipv4BroadcastAddr.AddressWithPrefix.Address); err != nil && err != tcpip.ErrBadLocalAddress {
 			return err
@@ -255,6 +260,13 @@ func (n *NIC) enable() *tcpip.Error {
 		if _, err := n.addAddressLocked(ipv4BroadcastAddr, NeverPrimaryEndpoint, permanent, static, false /* deprecated */); err != nil {
 			return err
 		}
+
+		// As per RFC 1122 section 3.3.7, all hosts should join the all-hosts
+		// multicast group. Note, the IANA calls the all-hosts multicast group the
+		// all-systems multicast group.
+		if err := n.joinGroupLocked(header.IPv4ProtocolNumber, header.IPv4AllSystems); err != nil {
+			return err
+		}
 	}
 
 	// Join the IPv6 All-Nodes Multicast group if the stack is configured to
@@ -609,6 +621,9 @@ func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.A
 // If none exists a temporary one may be created if we are in promiscuous mode
 // or spoofing. Promiscuous mode will only be checked if promiscuous is true.
 // Similarly, spoofing will only be checked if spoofing is true.
+//
+// If the address is the IPv4 broadcast address for an endpoint's network, that
+// endpoint will be returned.
 func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, tempRef getRefBehaviour) *referencedNetworkEndpoint {
 	n.mu.RLock()
 
@@ -633,6 +648,16 @@ func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address t
 		}
 	}
 
+	// Check if address is a broadcast address for the endpoint's network.
+	//
+	// Only IPv4 has a notion of broadcast addresses.
+	if protocol == header.IPv4ProtocolNumber {
+		if ref := n.getRefForBroadcastRLocked(address); ref != nil {
+			n.mu.RUnlock()
+			return ref
+		}
+	}
+
 	// A usable reference was not found, create a temporary one if requested by
 	// the caller or if the address is found in the NIC's subnets.
 	createTempEP := spoofingOrPromiscuous
@@ -670,8 +695,34 @@ func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address t
 	return ref
 }
 
+// getRefForBroadcastLocked returns an endpoint where address is the IPv4
+// broadcast address for the endpoint's network.
+//
+// n.mu MUST be read locked.
+func (n *NIC) getRefForBroadcastRLocked(address tcpip.Address) *referencedNetworkEndpoint {
+	for _, ref := range n.mu.endpoints {
+		// Only IPv4 has a notion of broadcast addresses.
+		if ref.protocol != header.IPv4ProtocolNumber {
+			continue
+		}
+
+		addr := ref.addrWithPrefix()
+		subnet := addr.Subnet()
+		if subnet.IsBroadcast(address) && ref.tryIncRef() {
+			return ref
+		}
+	}
+
+	return nil
+}
+
 /// getRefOrCreateTempLocked returns an existing endpoint for address or creates
 /// and returns a temporary endpoint.
+//
+// If the address is the IPv4 broadcast address for an endpoint's network, that
+// endpoint will be returned.
+//
+// n.mu must be write locked.
 func (n *NIC) getRefOrCreateTempLocked(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) *referencedNetworkEndpoint {
 	if ref, ok := n.mu.endpoints[NetworkEndpointID{address}]; ok {
 		// No need to check the type as we are ok with expired endpoints at this
@@ -685,6 +736,15 @@ func (n *NIC) getRefOrCreateTempLocked(protocol tcpip.NetworkProtocolNumber, add
 		n.removeEndpointLocked(ref)
 	}
 
+	// Check if address is a broadcast address for an endpoint's network.
+	//
+	// Only IPv4 has a notion of broadcast addresses.
+	if protocol == header.IPv4ProtocolNumber {
+		if ref := n.getRefForBroadcastRLocked(address); ref != nil {
+			return ref
+		}
+	}
+
 	// Add a new temporary endpoint.
 	netProto, ok := n.stack.networkProtocols[protocol]
 	if !ok {
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 3f07e4159..5b19c5d59 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -73,6 +73,16 @@ type TCPCubicState struct {
 	WEst                    float64
 }
 
+// TCPRACKState is used to hold a copy of the internal RACK state when the
+// TCPProbeFunc is invoked.
+type TCPRACKState struct {
+	XmitTime    time.Time
+	EndSequence seqnum.Value
+	FACK        seqnum.Value
+	RTT         time.Duration
+	Reord       bool
+}
+
 // TCPEndpointID is the unique 4 tuple that identifies a given endpoint.
 type TCPEndpointID struct {
 	// LocalPort is the local port associated with the endpoint.
@@ -212,6 +222,9 @@ type TCPSenderState struct {
 
 	// Cubic holds the state related to CUBIC congestion control.
 	Cubic TCPCubicState
+
+	// RACKState holds the state related to RACK loss detection algorithm.
+	RACKState TCPRACKState
 }
 
 // TCPSACKInfo holds TCP SACK related information for a given TCP endpoint.
@@ -1972,8 +1985,8 @@ func generateRandInt64() int64 {
 
 // FindNetworkEndpoint returns the network endpoint for the given address.
 func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, address tcpip.Address) (NetworkEndpoint, *tcpip.Error) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
+	s.mu.RLock()
+	defer s.mu.RUnlock()
 
 	for _, nic := range s.nics {
 		id := NetworkEndpointID{address}
@@ -1992,8 +2005,8 @@ func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, addres
 
 // FindNICNameFromID returns the name of the nic for the given NICID.
 func (s *Stack) FindNICNameFromID(id tcpip.NICID) string {
-	s.mu.Lock()
-	defer s.mu.Unlock()
+	s.mu.RLock()
+	defer s.mu.RUnlock()
 
 	nic, ok := s.nics[id]
 	if !ok {
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index f22062889..0b6deda02 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -277,6 +277,17 @@ func (l *linkEPWithMockedAttach) isAttached() bool {
 	return l.attached
 }
 
+// Checks to see if list contains an address.
+func containsAddr(list []tcpip.ProtocolAddress, item tcpip.ProtocolAddress) bool {
+	for _, i := range list {
+		if i == item {
+			return true
+		}
+	}
+
+	return false
+}
+
 func TestNetworkReceive(t *testing.T) {
 	// Create a stack with the fake network protocol, one nic, and two
 	// addresses attached to it: 1 & 2.
@@ -1704,7 +1715,7 @@ func testNicForAddressRange(t *testing.T, nicID tcpip.NICID, s *stack.Stack, sub
 
 	// Trying the next address should always fail since it is outside the range.
 	if gotNicID := s.CheckLocalAddress(0, fakeNetNumber, tcpip.Address(addrBytes)); gotNicID != 0 {
-		t.Errorf("got CheckLocalAddress(0, %d, %s) = %d, want = %d", fakeNetNumber, tcpip.Address(addrBytes), gotNicID, 0)
+		t.Errorf("got CheckLocalAddress(0, %d, %s) = %d, want = 0", fakeNetNumber, tcpip.Address(addrBytes), gotNicID)
 	}
 }
 
@@ -3089,6 +3100,13 @@ func TestIPv6SourceAddressSelectionScopeAndSameAddress(t *testing.T) {
 
 func TestAddRemoveIPv4BroadcastAddressOnNICEnableDisable(t *testing.T) {
 	const nicID = 1
+	broadcastAddr := tcpip.ProtocolAddress{
+		Protocol: header.IPv4ProtocolNumber,
+		AddressWithPrefix: tcpip.AddressWithPrefix{
+			Address:   header.IPv4Broadcast,
+			PrefixLen: 32,
+		},
+	}
 
 	e := loopback.New()
 	s := stack.New(stack.Options{
@@ -3099,49 +3117,41 @@ func TestAddRemoveIPv4BroadcastAddressOnNICEnableDisable(t *testing.T) {
 		t.Fatalf("CreateNIC(%d, _, %+v) = %s", nicID, nicOpts, err)
 	}
 
-	allStackAddrs := s.AllAddresses()
-	allNICAddrs, ok := allStackAddrs[nicID]
-	if !ok {
-		t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs)
-	}
-	if l := len(allNICAddrs); l != 0 {
-		t.Fatalf("got len(allNICAddrs) = %d, want = 0", l)
+	{
+		allStackAddrs := s.AllAddresses()
+		if allNICAddrs, ok := allStackAddrs[nicID]; !ok {
+			t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs)
+		} else if containsAddr(allNICAddrs, broadcastAddr) {
+			t.Fatalf("got allNICAddrs = %+v, don't want = %+v", allNICAddrs, broadcastAddr)
+		}
 	}
 
 	// Enabling the NIC should add the IPv4 broadcast address.
 	if err := s.EnableNIC(nicID); err != nil {
 		t.Fatalf("s.EnableNIC(%d): %s", nicID, err)
 	}
-	allStackAddrs = s.AllAddresses()
-	allNICAddrs, ok = allStackAddrs[nicID]
-	if !ok {
-		t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs)
-	}
-	if l := len(allNICAddrs); l != 1 {
-		t.Fatalf("got len(allNICAddrs) = %d, want = 1", l)
-	}
-	want := tcpip.ProtocolAddress{
-		Protocol: header.IPv4ProtocolNumber,
-		AddressWithPrefix: tcpip.AddressWithPrefix{
-			Address:   header.IPv4Broadcast,
-			PrefixLen: 32,
-		},
-	}
-	if allNICAddrs[0] != want {
-		t.Fatalf("got allNICAddrs[0] = %+v, want = %+v", allNICAddrs[0], want)
+
+	{
+		allStackAddrs := s.AllAddresses()
+		if allNICAddrs, ok := allStackAddrs[nicID]; !ok {
+			t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs)
+		} else if !containsAddr(allNICAddrs, broadcastAddr) {
+			t.Fatalf("got allNICAddrs = %+v, want = %+v", allNICAddrs, broadcastAddr)
+		}
 	}
 
 	// Disabling the NIC should remove the IPv4 broadcast address.
 	if err := s.DisableNIC(nicID); err != nil {
 		t.Fatalf("s.DisableNIC(%d): %s", nicID, err)
 	}
-	allStackAddrs = s.AllAddresses()
-	allNICAddrs, ok = allStackAddrs[nicID]
-	if !ok {
-		t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs)
-	}
-	if l := len(allNICAddrs); l != 0 {
-		t.Fatalf("got len(allNICAddrs) = %d, want = 0", l)
+
+	{
+		allStackAddrs := s.AllAddresses()
+		if allNICAddrs, ok := allStackAddrs[nicID]; !ok {
+			t.Fatalf("entry for %d missing from allStackAddrs = %+v", nicID, allStackAddrs)
+		} else if containsAddr(allNICAddrs, broadcastAddr) {
+			t.Fatalf("got allNICAddrs = %+v, don't want = %+v", allNICAddrs, broadcastAddr)
+		}
 	}
 }
 
@@ -3189,50 +3199,93 @@ func TestLeaveIPv6SolicitedNodeAddrBeforeAddrRemoval(t *testing.T) {
 	}
 }
 
-func TestJoinLeaveAllNodesMulticastOnNICEnableDisable(t *testing.T) {
+func TestJoinLeaveMulticastOnNICEnableDisable(t *testing.T) {
 	const nicID = 1
 
-	e := loopback.New()
-	s := stack.New(stack.Options{
-		NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
-	})
-	nicOpts := stack.NICOptions{Disabled: true}
-	if err := s.CreateNICWithOptions(nicID, e, nicOpts); err != nil {
-		t.Fatalf("CreateNIC(%d, _, %+v) = %s", nicID, nicOpts, err)
+	tests := []struct {
+		name  string
+		proto tcpip.NetworkProtocolNumber
+		addr  tcpip.Address
+	}{
+		{
+			name:  "IPv6 All-Nodes",
+			proto: header.IPv6ProtocolNumber,
+			addr:  header.IPv6AllNodesMulticastAddress,
+		},
+		{
+			name:  "IPv4 All-Systems",
+			proto: header.IPv4ProtocolNumber,
+			addr:  header.IPv4AllSystems,
+		},
 	}
 
-	// Should not be in the IPv6 all-nodes multicast group yet because the NIC has
-	// not been enabled yet.
-	isInGroup, err := s.IsInGroup(nicID, header.IPv6AllNodesMulticastAddress)
-	if err != nil {
-		t.Fatalf("IsInGroup(%d, %s): %s", nicID, header.IPv6AllNodesMulticastAddress, err)
-	}
-	if isInGroup {
-		t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, header.IPv6AllNodesMulticastAddress)
-	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			e := loopback.New()
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
+			})
+			nicOpts := stack.NICOptions{Disabled: true}
+			if err := s.CreateNICWithOptions(nicID, e, nicOpts); err != nil {
+				t.Fatalf("CreateNIC(%d, _, %+v) = %s", nicID, nicOpts, err)
+			}
 
-	// The all-nodes multicast group should be joined when the NIC is enabled.
-	if err := s.EnableNIC(nicID); err != nil {
-		t.Fatalf("s.EnableNIC(%d): %s", nicID, err)
-	}
-	isInGroup, err = s.IsInGroup(nicID, header.IPv6AllNodesMulticastAddress)
-	if err != nil {
-		t.Fatalf("IsInGroup(%d, %s): %s", nicID, header.IPv6AllNodesMulticastAddress, err)
-	}
-	if !isInGroup {
-		t.Fatalf("got IsInGroup(%d, %s) = false, want = true", nicID, header.IPv6AllNodesMulticastAddress)
-	}
+			// Should not be in the multicast group yet because the NIC has not been
+			// enabled yet.
+			if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil {
+				t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err)
+			} else if isInGroup {
+				t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, test.addr)
+			}
 
-	// The all-nodes multicast group should be left when the NIC is disabled.
-	if err := s.DisableNIC(nicID); err != nil {
-		t.Fatalf("s.DisableNIC(%d): %s", nicID, err)
-	}
-	isInGroup, err = s.IsInGroup(nicID, header.IPv6AllNodesMulticastAddress)
-	if err != nil {
-		t.Fatalf("IsInGroup(%d, %s): %s", nicID, header.IPv6AllNodesMulticastAddress, err)
-	}
-	if isInGroup {
-		t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, header.IPv6AllNodesMulticastAddress)
+			// The all-nodes multicast group should be joined when the NIC is enabled.
+			if err := s.EnableNIC(nicID); err != nil {
+				t.Fatalf("s.EnableNIC(%d): %s", nicID, err)
+			}
+
+			if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil {
+				t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err)
+			} else if !isInGroup {
+				t.Fatalf("got IsInGroup(%d, %s) = false, want = true", nicID, test.addr)
+			}
+
+			// The multicast group should be left when the NIC is disabled.
+			if err := s.DisableNIC(nicID); err != nil {
+				t.Fatalf("s.DisableNIC(%d): %s", nicID, err)
+			}
+
+			if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil {
+				t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err)
+			} else if isInGroup {
+				t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, test.addr)
+			}
+
+			// The all-nodes multicast group should be joined when the NIC is enabled.
+			if err := s.EnableNIC(nicID); err != nil {
+				t.Fatalf("s.EnableNIC(%d): %s", nicID, err)
+			}
+
+			if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil {
+				t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err)
+			} else if !isInGroup {
+				t.Fatalf("got IsInGroup(%d, %s) = false, want = true", nicID, test.addr)
+			}
+
+			// Leaving the group before disabling the NIC should not cause an error.
+			if err := s.LeaveGroup(test.proto, nicID, test.addr); err != nil {
+				t.Fatalf("s.LeaveGroup(%d, %d, %s): %s", test.proto, nicID, test.addr, err)
+			}
+
+			if err := s.DisableNIC(nicID); err != nil {
+				t.Fatalf("s.DisableNIC(%d): %s", nicID, err)
+			}
+
+			if isInGroup, err := s.IsInGroup(nicID, test.addr); err != nil {
+				t.Fatalf("IsInGroup(%d, %s): %s", nicID, test.addr, err)
+			} else if isInGroup {
+				t.Fatalf("got IsInGroup(%d, %s) = true, want = false", nicID, test.addr)
+			}
+		})
 	}
 }
 
diff --git a/pkg/tcpip/tests/integration/BUILD b/pkg/tcpip/tests/integration/BUILD
new file mode 100644
index 000000000..7fff30462
--- /dev/null
+++ b/pkg/tcpip/tests/integration/BUILD
@@ -0,0 +1,21 @@
+load("//tools:defs.bzl", "go_test")
+
+package(licenses = ["notice"])
+
+go_test(
+    name = "integration_test",
+    size = "small",
+    srcs = ["multicast_broadcast_test.go"],
+    deps = [
+        "//pkg/tcpip",
+        "//pkg/tcpip/buffer",
+        "//pkg/tcpip/header",
+        "//pkg/tcpip/link/channel",
+        "//pkg/tcpip/network/ipv4",
+        "//pkg/tcpip/network/ipv6",
+        "//pkg/tcpip/stack",
+        "//pkg/tcpip/transport/udp",
+        "//pkg/waiter",
+        "@com_github_google_go_cmp//cmp:go_default_library",
+    ],
+)
diff --git a/pkg/tcpip/tests/integration/multicast_broadcast_test.go b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
new file mode 100644
index 000000000..d9b2d147a
--- /dev/null
+++ b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
@@ -0,0 +1,274 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package integration_test
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+	"gvisor.dev/gvisor/pkg/waiter"
+)
+
+const defaultMTU = 1280
+
+// TestIncomingMulticastAndBroadcast tests receiving a packet destined to some
+// multicast or broadcast address.
+func TestIncomingMulticastAndBroadcast(t *testing.T) {
+	const (
+		nicID      = 1
+		remotePort = 5555
+		localPort  = 80
+		ttl        = 255
+	)
+
+	data := []byte{1, 2, 3, 4}
+
+	// Local IPv4 subnet: 192.168.1.58/24
+	ipv4Addr := tcpip.AddressWithPrefix{
+		Address:   "\xc0\xa8\x01\x3a",
+		PrefixLen: 24,
+	}
+	ipv4Subnet := ipv4Addr.Subnet()
+	ipv4SubnetBcast := ipv4Subnet.Broadcast()
+
+	// Local IPv6 subnet: 200a::1/64
+	ipv6Addr := tcpip.AddressWithPrefix{
+		Address:   "\x20\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+		PrefixLen: 64,
+	}
+	ipv6Subnet := ipv6Addr.Subnet()
+	ipv6SubnetBcast := ipv6Subnet.Broadcast()
+
+	// Remote addrs.
+	remoteIPv4Addr := tcpip.Address("\x64\x0a\x7b\x18")
+	remoteIPv6Addr := tcpip.Address("\x20\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02")
+
+	rxIPv4UDP := func(e *channel.Endpoint, dst tcpip.Address) {
+		payloadLen := header.UDPMinimumSize + len(data)
+		totalLen := header.IPv4MinimumSize + payloadLen
+		hdr := buffer.NewPrependable(totalLen)
+		u := header.UDP(hdr.Prepend(payloadLen))
+		u.Encode(&header.UDPFields{
+			SrcPort: remotePort,
+			DstPort: localPort,
+			Length:  uint16(payloadLen),
+		})
+		copy(u.Payload(), data)
+		sum := header.PseudoHeaderChecksum(udp.ProtocolNumber, remoteIPv4Addr, dst, uint16(payloadLen))
+		sum = header.Checksum(data, sum)
+		u.SetChecksum(^u.CalculateChecksum(sum))
+
+		ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
+		ip.Encode(&header.IPv4Fields{
+			IHL:         header.IPv4MinimumSize,
+			TotalLength: uint16(totalLen),
+			Protocol:    uint8(udp.ProtocolNumber),
+			TTL:         ttl,
+			SrcAddr:     remoteIPv4Addr,
+			DstAddr:     dst,
+		})
+
+		e.InjectInbound(header.IPv4ProtocolNumber, &stack.PacketBuffer{
+			Data: hdr.View().ToVectorisedView(),
+		})
+	}
+
+	rxIPv6UDP := func(e *channel.Endpoint, dst tcpip.Address) {
+		payloadLen := header.UDPMinimumSize + len(data)
+		hdr := buffer.NewPrependable(header.IPv6MinimumSize + payloadLen)
+		u := header.UDP(hdr.Prepend(payloadLen))
+		u.Encode(&header.UDPFields{
+			SrcPort: remotePort,
+			DstPort: localPort,
+			Length:  uint16(payloadLen),
+		})
+		copy(u.Payload(), data)
+		sum := header.PseudoHeaderChecksum(udp.ProtocolNumber, remoteIPv6Addr, dst, uint16(payloadLen))
+		sum = header.Checksum(data, sum)
+		u.SetChecksum(^u.CalculateChecksum(sum))
+
+		ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
+		ip.Encode(&header.IPv6Fields{
+			PayloadLength: uint16(payloadLen),
+			NextHeader:    uint8(udp.ProtocolNumber),
+			HopLimit:      ttl,
+			SrcAddr:       remoteIPv6Addr,
+			DstAddr:       dst,
+		})
+
+		e.InjectInbound(header.IPv6ProtocolNumber, &stack.PacketBuffer{
+			Data: hdr.View().ToVectorisedView(),
+		})
+	}
+
+	tests := []struct {
+		name     string
+		bindAddr tcpip.Address
+		dstAddr  tcpip.Address
+		expectRx bool
+	}{
+		{
+			name:     "IPv4 unicast binding to unicast",
+			bindAddr: ipv4Addr.Address,
+			dstAddr:  ipv4Addr.Address,
+			expectRx: true,
+		},
+		{
+			name:     "IPv4 unicast binding to broadcast",
+			bindAddr: header.IPv4Broadcast,
+			dstAddr:  ipv4Addr.Address,
+			expectRx: false,
+		},
+		{
+			name:     "IPv4 unicast binding to wildcard",
+			dstAddr:  ipv4Addr.Address,
+			expectRx: true,
+		},
+
+		{
+			name:     "IPv4 directed broadcast binding to subnet broadcast",
+			bindAddr: ipv4SubnetBcast,
+			dstAddr:  ipv4SubnetBcast,
+			expectRx: true,
+		},
+		{
+			name:     "IPv4 directed broadcast binding to broadcast",
+			bindAddr: header.IPv4Broadcast,
+			dstAddr:  ipv4SubnetBcast,
+			expectRx: false,
+		},
+		{
+			name:     "IPv4 directed broadcast binding to wildcard",
+			dstAddr:  ipv4SubnetBcast,
+			expectRx: true,
+		},
+
+		{
+			name:     "IPv4 broadcast binding to broadcast",
+			bindAddr: header.IPv4Broadcast,
+			dstAddr:  header.IPv4Broadcast,
+			expectRx: true,
+		},
+		{
+			name:     "IPv4 broadcast binding to subnet broadcast",
+			bindAddr: ipv4SubnetBcast,
+			dstAddr:  header.IPv4Broadcast,
+			expectRx: false,
+		},
+		{
+			name:     "IPv4 broadcast binding to wildcard",
+			dstAddr:  ipv4SubnetBcast,
+			expectRx: true,
+		},
+
+		{
+			name:     "IPv4 all-systems multicast binding to all-systems multicast",
+			bindAddr: header.IPv4AllSystems,
+			dstAddr:  header.IPv4AllSystems,
+			expectRx: true,
+		},
+		{
+			name:     "IPv4 all-systems multicast binding to wildcard",
+			dstAddr:  header.IPv4AllSystems,
+			expectRx: true,
+		},
+		{
+			name:     "IPv4 all-systems multicast binding to unicast",
+			bindAddr: ipv4Addr.Address,
+			dstAddr:  header.IPv4AllSystems,
+			expectRx: false,
+		},
+
+		// IPv6 has no notion of a broadcast.
+		{
+			name:     "IPv6 unicast binding to wildcard",
+			dstAddr:  ipv6Addr.Address,
+			expectRx: true,
+		},
+		{
+			name:     "IPv6 broadcast-like address binding to wildcard",
+			dstAddr:  ipv6SubnetBcast,
+			expectRx: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols:   []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol()},
+				TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+			})
+			e := channel.New(0, defaultMTU, "")
+			if err := s.CreateNIC(nicID, e); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
+			}
+			ipv4ProtoAddr := tcpip.ProtocolAddress{Protocol: header.IPv4ProtocolNumber, AddressWithPrefix: ipv4Addr}
+			if err := s.AddProtocolAddress(nicID, ipv4ProtoAddr); err != nil {
+				t.Fatalf("AddProtocolAddress(%d, %+v): %s", nicID, ipv4ProtoAddr, err)
+			}
+			ipv6ProtoAddr := tcpip.ProtocolAddress{Protocol: header.IPv6ProtocolNumber, AddressWithPrefix: ipv6Addr}
+			if err := s.AddProtocolAddress(nicID, ipv6ProtoAddr); err != nil {
+				t.Fatalf("AddProtocolAddress(%d, %+v): %s", nicID, ipv6ProtoAddr, err)
+			}
+
+			var netproto tcpip.NetworkProtocolNumber
+			var rxUDP func(*channel.Endpoint, tcpip.Address)
+			switch l := len(test.dstAddr); l {
+			case header.IPv4AddressSize:
+				netproto = header.IPv4ProtocolNumber
+				rxUDP = rxIPv4UDP
+			case header.IPv6AddressSize:
+				netproto = header.IPv6ProtocolNumber
+				rxUDP = rxIPv6UDP
+			default:
+				t.Fatalf("got unexpected address length = %d bytes", l)
+			}
+
+			wq := waiter.Queue{}
+			ep, err := s.NewEndpoint(udp.ProtocolNumber, netproto, &wq)
+			if err != nil {
+				t.Fatalf("NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, netproto, err)
+			}
+			defer ep.Close()
+
+			bindAddr := tcpip.FullAddress{Addr: test.bindAddr, Port: localPort}
+			if err := ep.Bind(bindAddr); err != nil {
+				t.Fatalf("ep.Bind(%+v): %s", bindAddr, err)
+			}
+
+			rxUDP(e, test.dstAddr)
+			if gotPayload, _, err := ep.Read(nil); test.expectRx {
+				if err != nil {
+					t.Fatalf("Read(nil): %s", err)
+				}
+				if diff := cmp.Diff(buffer.View(data), gotPayload); diff != "" {
+					t.Errorf("got UDP payload mismatch (-want +got):\n%s", diff)
+				}
+			} else {
+				if err != tcpip.ErrWouldBlock {
+					t.Fatalf("got Read(nil) = (%x, _, %v), want = (_, _, %s)", gotPayload, err, tcpip.ErrWouldBlock)
+				}
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index e860ee484..234fb95ce 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -40,6 +40,8 @@ go_library(
         "endpoint_state.go",
         "forwarder.go",
         "protocol.go",
+        "rack.go",
+        "rack_state.go",
         "rcv.go",
         "rcv_state.go",
         "reno.go",
@@ -83,6 +85,7 @@ go_test(
         "dual_stack_test.go",
         "sack_scoreboard_test.go",
         "tcp_noracedetector_test.go",
+        "tcp_rack_test.go",
         "tcp_sack_test.go",
         "tcp_test.go",
         "tcp_timestamp_test.go",
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index 6e00e5526..913ea6535 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -521,7 +521,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 			synOpts := header.TCPSynOptions{
 				WS:    -1,
 				TS:    opts.TS,
-				TSVal: tcpTimeStamp(timeStampOffset()),
+				TSVal: tcpTimeStamp(time.Now(), timeStampOffset()),
 				TSEcr: opts.TSVal,
 				MSS:   mssForRoute(&s.route),
 			}
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 6e5e55b6f..8dd759ba2 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -1166,13 +1166,18 @@ func (e *endpoint) handleSegments(fastPath bool) *tcpip.Error {
 	return nil
 }
 
-// handleSegment handles a given segment and notifies the worker goroutine if
-// if the connection should be terminated.
-func (e *endpoint) handleSegment(s *segment) (cont bool, err *tcpip.Error) {
-	// Invoke the tcp probe if installed.
+func (e *endpoint) probeSegment() {
 	if e.probe != nil {
 		e.probe(e.completeState())
 	}
+}
+
+// handleSegment handles a given segment and notifies the worker goroutine if
+// if the connection should be terminated.
+func (e *endpoint) handleSegment(s *segment) (cont bool, err *tcpip.Error) {
+	// Invoke the tcp probe if installed. The tcp probe function will update
+	// the TCPEndpointState after the segment is processed.
+	defer e.probeSegment()
 
 	if s.flagIsSet(header.TCPFlagRst) {
 		if ok, err := e.handleReset(s); !ok {
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 682687ebe..39ea38fe6 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -2692,15 +2692,14 @@ func (e *endpoint) maybeEnableTimestamp(synOpts *header.TCPSynOptions) {
 // timestamp returns the timestamp value to be used in the TSVal field of the
 // timestamp option for outgoing TCP segments for a given endpoint.
 func (e *endpoint) timestamp() uint32 {
-	return tcpTimeStamp(e.tsOffset)
+	return tcpTimeStamp(time.Now(), e.tsOffset)
 }
 
 // tcpTimeStamp returns a timestamp offset by the provided offset. This is
 // not inlined above as it's used when SYN cookies are in use and endpoint
 // is not created at the time when the SYN cookie is sent.
-func tcpTimeStamp(offset uint32) uint32 {
-	now := time.Now()
-	return uint32(now.Unix()*1000+int64(now.Nanosecond()/1e6)) + offset
+func tcpTimeStamp(curTime time.Time, offset uint32) uint32 {
+	return uint32(curTime.Unix()*1000+int64(curTime.Nanosecond()/1e6)) + offset
 }
 
 // timeStampOffset returns a randomized timestamp offset to be used when sending
@@ -2843,6 +2842,14 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
 			WEst:                    cubic.wEst,
 		}
 	}
+
+	rc := e.snd.rc
+	s.Sender.RACKState = stack.TCPRACKState{
+		XmitTime:    rc.xmitTime,
+		EndSequence: rc.endSequence,
+		FACK:        rc.fack,
+		RTT:         rc.rtt,
+	}
 	return s
 }
 
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index b34e47bbd..d9abb8d94 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -80,6 +80,25 @@ const (
 // enable/disable SACK support in TCP. See: https://tools.ietf.org/html/rfc2018.
 type SACKEnabled bool
 
+// Recovery is used by stack.(*Stack).TransportProtocolOption to
+// set loss detection algorithm in TCP.
+type Recovery int32
+
+const (
+	// RACKLossDetection indicates RACK is used for loss detection and
+	// recovery.
+	RACKLossDetection Recovery = 1 << iota
+
+	// RACKStaticReoWnd indicates the reordering window should not be
+	// adjusted when DSACK is received.
+	RACKStaticReoWnd
+
+	// RACKNoDupTh indicates RACK should not consider the classic three
+	// duplicate acknowledgements rule to mark the segments as lost. This
+	// is used when reordering is not detected.
+	RACKNoDupTh
+)
+
 // DelayEnabled is used by stack.(Stack*).TransportProtocolOption to
 // enable/disable Nagle's algorithm in TCP.
 type DelayEnabled bool
@@ -161,6 +180,7 @@ func (s *synRcvdCounter) Threshold() uint64 {
 type protocol struct {
 	mu                         sync.RWMutex
 	sackEnabled                bool
+	recovery                   Recovery
 	delayEnabled               bool
 	sendBufferSize             SendBufferSizeOption
 	recvBufferSize             ReceiveBufferSizeOption
@@ -280,6 +300,12 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
 		p.mu.Unlock()
 		return nil
 
+	case Recovery:
+		p.mu.Lock()
+		p.recovery = Recovery(v)
+		p.mu.Unlock()
+		return nil
+
 	case DelayEnabled:
 		p.mu.Lock()
 		p.delayEnabled = bool(v)
@@ -394,6 +420,12 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
 		p.mu.RUnlock()
 		return nil
 
+	case *Recovery:
+		p.mu.RLock()
+		*v = Recovery(p.recovery)
+		p.mu.RUnlock()
+		return nil
+
 	case *DelayEnabled:
 		p.mu.RLock()
 		*v = DelayEnabled(p.delayEnabled)
@@ -535,6 +567,7 @@ func NewProtocol() stack.TransportProtocol {
 		minRTO:                     MinRTO,
 		maxRTO:                     MaxRTO,
 		maxRetries:                 MaxRetries,
+		recovery:                   RACKLossDetection,
 	}
 	p.dispatcher.init(runtime.GOMAXPROCS(0))
 	return &p
diff --git a/pkg/tcpip/transport/tcp/rack.go b/pkg/tcpip/transport/tcp/rack.go
new file mode 100644
index 000000000..d969ca23a
--- /dev/null
+++ b/pkg/tcpip/transport/tcp/rack.go
@@ -0,0 +1,82 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp
+
+import (
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip/seqnum"
+)
+
+// RACK is a loss detection algorithm used in TCP to detect packet loss and
+// reordering using transmission timestamp of the packets instead of packet or
+// sequence counts. To use RACK, SACK should be enabled on the connection.
+
+// rackControl stores the rack related fields.
+// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-6.1
+//
+// +stateify savable
+type rackControl struct {
+	// xmitTime is the latest transmission timestamp of rackControl.seg.
+	xmitTime time.Time `state:".(unixTime)"`
+
+	// endSequence is the ending TCP sequence number of rackControl.seg.
+	endSequence seqnum.Value
+
+	// fack is the highest selectively or cumulatively acknowledged
+	// sequence.
+	fack seqnum.Value
+
+	// rtt is the RTT of the most recently delivered packet on the
+	// connection (either cumulatively acknowledged or selectively
+	// acknowledged) that was not marked invalid as a possible spurious
+	// retransmission.
+	rtt time.Duration
+}
+
+// Update will update the RACK related fields when an ACK has been received.
+// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2
+func (rc *rackControl) Update(seg *segment, ackSeg *segment, srtt time.Duration, offset uint32) {
+	rtt := time.Now().Sub(seg.xmitTime)
+
+	// If the ACK is for a retransmitted packet, do not update if it is a
+	// spurious inference which is determined by below checks:
+	// 1. When Timestamping option is available, if the TSVal is less than the
+	// transmit time of the most recent retransmitted packet.
+	// 2. When RTT calculated for the packet is less than the smoothed RTT
+	// for the connection.
+	// See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2
+	// step 2
+	if seg.xmitCount > 1 {
+		if ackSeg.parsedOptions.TS && ackSeg.parsedOptions.TSEcr != 0 {
+			if ackSeg.parsedOptions.TSEcr < tcpTimeStamp(seg.xmitTime, offset) {
+				return
+			}
+		}
+		if rtt < srtt {
+			return
+		}
+	}
+
+	rc.rtt = rtt
+	// Update rc.xmitTime and rc.endSequence to the transmit time and
+	// ending sequence number of the packet which has been acknowledged
+	// most recently.
+	endSeq := seg.sequenceNumber.Add(seqnum.Size(seg.data.Size()))
+	if rc.xmitTime.Before(seg.xmitTime) || (seg.xmitTime.Equal(rc.xmitTime) && rc.endSequence.LessThan(endSeq)) {
+		rc.xmitTime = seg.xmitTime
+		rc.endSequence = endSeq
+	}
+}
diff --git a/pkg/tcpip/transport/tcp/rack_state.go b/pkg/tcpip/transport/tcp/rack_state.go
new file mode 100644
index 000000000..c9dc7e773
--- /dev/null
+++ b/pkg/tcpip/transport/tcp/rack_state.go
@@ -0,0 +1,29 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp
+
+import (
+	"time"
+)
+
+// saveXmitTime is invoked by stateify.
+func (rc *rackControl) saveXmitTime() unixTime {
+	return unixTime{rc.xmitTime.Unix(), rc.xmitTime.UnixNano()}
+}
+
+// loadXmitTime is invoked by stateify.
+func (rc *rackControl) loadXmitTime(unix unixTime) {
+	rc.xmitTime = time.Unix(unix.second, unix.nano)
+}
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 5862c32f2..c55589c45 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -191,6 +191,10 @@ type sender struct {
 
 	// cc is the congestion control algorithm in use for this sender.
 	cc congestionControl
+
+	// rc has the fields needed for implementing RACK loss detection
+	// algorithm.
+	rc rackControl
 }
 
 // rtt is a synchronization wrapper used to appease stateify. See the comment
@@ -1272,21 +1276,21 @@ func (s *sender) checkDuplicateAck(seg *segment) (rtx bool) {
 
 // handleRcvdSegment is called when a segment is received; it is responsible for
 // updating the send-related state.
-func (s *sender) handleRcvdSegment(seg *segment) {
+func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 	// Check if we can extract an RTT measurement from this ack.
-	if !seg.parsedOptions.TS && s.rttMeasureSeqNum.LessThan(seg.ackNumber) {
+	if !rcvdSeg.parsedOptions.TS && s.rttMeasureSeqNum.LessThan(rcvdSeg.ackNumber) {
 		s.updateRTO(time.Now().Sub(s.rttMeasureTime))
 		s.rttMeasureSeqNum = s.sndNxt
 	}
 
 	// Update Timestamp if required. See RFC7323, section-4.3.
-	if s.ep.sendTSOk && seg.parsedOptions.TS {
-		s.ep.updateRecentTimestamp(seg.parsedOptions.TSVal, s.maxSentAck, seg.sequenceNumber)
+	if s.ep.sendTSOk && rcvdSeg.parsedOptions.TS {
+		s.ep.updateRecentTimestamp(rcvdSeg.parsedOptions.TSVal, s.maxSentAck, rcvdSeg.sequenceNumber)
 	}
 
 	// Insert SACKBlock information into our scoreboard.
 	if s.ep.sackPermitted {
-		for _, sb := range seg.parsedOptions.SACKBlocks {
+		for _, sb := range rcvdSeg.parsedOptions.SACKBlocks {
 			// Only insert the SACK block if the following holds
 			// true:
 			//  * SACK block acks data after the ack number in the
@@ -1299,27 +1303,27 @@ func (s *sender) handleRcvdSegment(seg *segment) {
 			// NOTE: This check specifically excludes DSACK blocks
 			// which have start/end before sndUna and are used to
 			// indicate spurious retransmissions.
-			if seg.ackNumber.LessThan(sb.Start) && s.sndUna.LessThan(sb.Start) && sb.End.LessThanEq(s.sndNxt) && !s.ep.scoreboard.IsSACKED(sb) {
+			if rcvdSeg.ackNumber.LessThan(sb.Start) && s.sndUna.LessThan(sb.Start) && sb.End.LessThanEq(s.sndNxt) && !s.ep.scoreboard.IsSACKED(sb) {
 				s.ep.scoreboard.Insert(sb)
-				seg.hasNewSACKInfo = true
+				rcvdSeg.hasNewSACKInfo = true
 			}
 		}
 		s.SetPipe()
 	}
 
 	// Count the duplicates and do the fast retransmit if needed.
-	rtx := s.checkDuplicateAck(seg)
+	rtx := s.checkDuplicateAck(rcvdSeg)
 
 	// Stash away the current window size.
-	s.sndWnd = seg.window
+	s.sndWnd = rcvdSeg.window
 
-	ack := seg.ackNumber
+	ack := rcvdSeg.ackNumber
 
 	// Disable zero window probing if remote advertizes a non-zero receive
 	// window. This can be with an ACK to the zero window probe (where the
 	// acknumber refers to the already acknowledged byte) OR to any previously
 	// unacknowledged segment.
-	if s.zeroWindowProbing && seg.window > 0 &&
+	if s.zeroWindowProbing && rcvdSeg.window > 0 &&
 		(ack == s.sndUna || (ack-1).InRange(s.sndUna, s.sndNxt)) {
 		s.disableZeroWindowProbing()
 	}
@@ -1344,10 +1348,10 @@ func (s *sender) handleRcvdSegment(seg *segment) {
 		//    averaged RTT measurement only if the segment acknowledges
 		//    some new data, i.e., only if it advances the left edge of
 		//    the send window.
-		if s.ep.sendTSOk && seg.parsedOptions.TSEcr != 0 {
+		if s.ep.sendTSOk && rcvdSeg.parsedOptions.TSEcr != 0 {
 			// TSVal/Ecr values sent by Netstack are at a millisecond
 			// granularity.
-			elapsed := time.Duration(s.ep.timestamp()-seg.parsedOptions.TSEcr) * time.Millisecond
+			elapsed := time.Duration(s.ep.timestamp()-rcvdSeg.parsedOptions.TSEcr) * time.Millisecond
 			s.updateRTO(elapsed)
 		}
 
@@ -1361,6 +1365,9 @@ func (s *sender) handleRcvdSegment(seg *segment) {
 
 		ackLeft := acked
 		originalOutstanding := s.outstanding
+		s.rtt.Lock()
+		srtt := s.rtt.srtt
+		s.rtt.Unlock()
 		for ackLeft > 0 {
 			// We use logicalLen here because we can have FIN
 			// segments (which are always at the end of list) that
@@ -1380,6 +1387,11 @@ func (s *sender) handleRcvdSegment(seg *segment) {
 				s.writeNext = seg.Next()
 			}
 
+			// Update the RACK fields if SACK is enabled.
+			if s.ep.sackPermitted {
+				s.rc.Update(seg, rcvdSeg, srtt, s.ep.tsOffset)
+			}
+
 			s.writeList.Remove(seg)
 
 			// if SACK is enabled then Only reduce outstanding if
@@ -1435,7 +1447,7 @@ func (s *sender) handleRcvdSegment(seg *segment) {
 	// that the window opened up, or the congestion window was inflated due
 	// to a duplicate ack during fast recovery. This will also re-enable
 	// the retransmit timer if needed.
-	if !s.ep.sackPermitted || s.fr.active || s.dupAckCount == 0 || seg.hasNewSACKInfo {
+	if !s.ep.sackPermitted || s.fr.active || s.dupAckCount == 0 || rcvdSeg.hasNewSACKInfo {
 		s.sendData()
 	}
 }
diff --git a/pkg/tcpip/transport/tcp/tcp_rack_test.go b/pkg/tcpip/transport/tcp/tcp_rack_test.go
new file mode 100644
index 000000000..e03f101e8
--- /dev/null
+++ b/pkg/tcpip/transport/tcp/tcp_rack_test.go
@@ -0,0 +1,74 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp_test
+
+import (
+	"testing"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
+)
+
+// TestRACKUpdate tests the RACK related fields are updated when an ACK is
+// received on a SACK enabled connection.
+func TestRACKUpdate(t *testing.T) {
+	const maxPayload = 10
+	const tsOptionSize = 12
+	const maxTCPOptionSize = 40
+
+	c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxTCPOptionSize+maxPayload))
+	defer c.Cleanup()
+
+	var xmitTime time.Time
+	c.Stack().AddTCPProbe(func(state stack.TCPEndpointState) {
+		// Validate that the endpoint Sender.RACKState is what we expect.
+		if state.Sender.RACKState.XmitTime.Before(xmitTime) {
+			t.Fatalf("RACK transmit time failed to update when an ACK is received")
+		}
+
+		gotSeq := state.Sender.RACKState.EndSequence
+		wantSeq := state.Sender.SndNxt
+		if !gotSeq.LessThanEq(wantSeq) || gotSeq.LessThan(wantSeq) {
+			t.Fatalf("RACK sequence number failed to update, got: %v, but want: %v", gotSeq, wantSeq)
+		}
+
+		if state.Sender.RACKState.RTT == 0 {
+			t.Fatalf("RACK RTT failed to update when an ACK is received")
+		}
+	})
+	setStackSACKPermitted(t, c, true)
+	createConnectedWithSACKAndTS(c)
+
+	data := buffer.NewView(maxPayload)
+	for i := range data {
+		data[i] = byte(i)
+	}
+
+	// Write the data.
+	xmitTime = time.Now()
+	if _, _, err := c.EP.Write(tcpip.SlicePayload(data), tcpip.WriteOptions{}); err != nil {
+		t.Fatalf("Write failed: %s", err)
+	}
+
+	bytesRead := 0
+	c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+	bytesRead += maxPayload
+	c.SendAck(790, bytesRead)
+	time.Sleep(200 * time.Millisecond)
+}
diff --git a/pkg/test/dockerutil/profile.go b/pkg/test/dockerutil/profile.go
index 1fab33083..f0396ef24 100644
--- a/pkg/test/dockerutil/profile.go
+++ b/pkg/test/dockerutil/profile.go
@@ -49,17 +49,16 @@ type Profile interface {
 // should have --profile set as an option in /etc/docker/daemon.json in
 // order for profiling to work with Pprof.
 type Pprof struct {
-	BasePath         string // path to put profiles
-	BlockProfile     bool
-	CPUProfile       bool
-	GoRoutineProfile bool
-	HeapProfile      bool
-	MutexProfile     bool
-	Duration         time.Duration // duration to run profiler e.g. '10s' or '1m'.
-	shouldRun        bool
-	cmd              *exec.Cmd
-	stdout           io.ReadCloser
-	stderr           io.ReadCloser
+	BasePath     string // path to put profiles
+	BlockProfile bool
+	CPUProfile   bool
+	HeapProfile  bool
+	MutexProfile bool
+	Duration     time.Duration // duration to run profiler e.g. '10s' or '1m'.
+	shouldRun    bool
+	cmd          *exec.Cmd
+	stdout       io.ReadCloser
+	stderr       io.ReadCloser
 }
 
 // MakePprofFromFlags makes a Pprof profile from flags.
@@ -68,13 +67,12 @@ func MakePprofFromFlags(c *Container) *Pprof {
 		return nil
 	}
 	return &Pprof{
-		BasePath:         filepath.Join(*pprofBaseDir, c.runtime, c.Name),
-		BlockProfile:     *pprofBlock,
-		CPUProfile:       *pprofCPU,
-		GoRoutineProfile: *pprofGo,
-		HeapProfile:      *pprofHeap,
-		MutexProfile:     *pprofMutex,
-		Duration:         *duration,
+		BasePath:     filepath.Join(*pprofBaseDir, c.runtime, c.Name),
+		BlockProfile: *pprofBlock,
+		CPUProfile:   *pprofCPU,
+		HeapProfile:  *pprofHeap,
+		MutexProfile: *pprofMutex,
+		Duration:     *duration,
 	}
 }
 
@@ -138,9 +136,6 @@ func (p *Pprof) makeProfileArgs(c *Container) []string {
 	if p.CPUProfile {
 		ret = append(ret, fmt.Sprintf("--profile-cpu=%s", filepath.Join(p.BasePath, "cpu.pprof")))
 	}
-	if p.GoRoutineProfile {
-		ret = append(ret, fmt.Sprintf("--profile-goroutine=%s", filepath.Join(p.BasePath, "go.pprof")))
-	}
 	if p.HeapProfile {
 		ret = append(ret, fmt.Sprintf("--profile-heap=%s", filepath.Join(p.BasePath, "heap.pprof")))
 	}
diff --git a/pkg/test/dockerutil/profile_test.go b/pkg/test/dockerutil/profile_test.go
index b7b4d7618..8c4ffe483 100644
--- a/pkg/test/dockerutil/profile_test.go
+++ b/pkg/test/dockerutil/profile_test.go
@@ -51,13 +51,12 @@ func TestPprof(t *testing.T) {
 		{
 			name: "All",
 			pprof: Pprof{
-				BasePath:         basePath,
-				BlockProfile:     true,
-				CPUProfile:       true,
-				GoRoutineProfile: true,
-				HeapProfile:      true,
-				MutexProfile:     true,
-				Duration:         2 * time.Second,
+				BasePath:     basePath,
+				BlockProfile: true,
+				CPUProfile:   true,
+				HeapProfile:  true,
+				MutexProfile: true,
+				Duration:     2 * time.Second,
 			},
 			expectedFiles: []string{block, cpu, goprofle, heap, mutex},
 		},
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 3e5e4c22f..626a3816e 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -101,14 +101,13 @@ const (
 
 // Profiling related commands (see pprof.go for more details).
 const (
-	StartCPUProfile  = "Profile.StartCPUProfile"
-	StopCPUProfile   = "Profile.StopCPUProfile"
-	HeapProfile      = "Profile.HeapProfile"
-	GoroutineProfile = "Profile.GoroutineProfile"
-	BlockProfile     = "Profile.BlockProfile"
-	MutexProfile     = "Profile.MutexProfile"
-	StartTrace       = "Profile.StartTrace"
-	StopTrace        = "Profile.StopTrace"
+	StartCPUProfile = "Profile.StartCPUProfile"
+	StopCPUProfile  = "Profile.StopCPUProfile"
+	HeapProfile     = "Profile.HeapProfile"
+	BlockProfile    = "Profile.BlockProfile"
+	MutexProfile    = "Profile.MutexProfile"
+	StartTrace      = "Profile.StartTrace"
+	StopTrace       = "Profile.StopTrace"
 )
 
 // Logging related commands (see logging.go for more details).
@@ -129,42 +128,52 @@ type controller struct {
 
 	// manager holds the containerManager methods.
 	manager *containerManager
+
+	// pprop holds the profile instance if enabled. It may be nil.
+	pprof *control.Profile
 }
 
 // newController creates a new controller. The caller must call
 // controller.srv.StartServing() to start the controller.
 func newController(fd int, l *Loader) (*controller, error) {
-	srv, err := server.CreateFromFD(fd)
+	ctrl := &controller{}
+	var err error
+	ctrl.srv, err = server.CreateFromFD(fd)
 	if err != nil {
 		return nil, err
 	}
 
-	manager := &containerManager{
+	ctrl.manager = &containerManager{
 		startChan:       make(chan struct{}),
 		startResultChan: make(chan error),
 		l:               l,
 	}
-	srv.Register(manager)
+	ctrl.srv.Register(ctrl.manager)
 
 	if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok {
 		net := &Network{
 			Stack: eps.Stack,
 		}
-		srv.Register(net)
+		ctrl.srv.Register(net)
 	}
 
-	srv.Register(&debug{})
-	srv.Register(&control.Logging{})
+	ctrl.srv.Register(&debug{})
+	ctrl.srv.Register(&control.Logging{})
+
 	if l.root.conf.ProfileEnable {
-		srv.Register(&control.Profile{
-			Kernel: l.k,
-		})
+		ctrl.pprof = &control.Profile{Kernel: l.k}
+		ctrl.srv.Register(ctrl.pprof)
 	}
 
-	return &controller{
-		srv:     srv,
-		manager: manager,
-	}, nil
+	return ctrl, nil
+}
+
+func (c *controller) stop() {
+	if c.pprof != nil {
+		// These are noop if there is nothing being profiled.
+		_ = c.pprof.StopCPUProfile(nil, nil)
+		_ = c.pprof.StopTrace(nil, nil)
+	}
 }
 
 // containerManager manages sandbox containers.
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index e0d077f5a..533b9c5e7 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -16,12 +16,12 @@
 package boot
 
 import (
+	"errors"
 	"fmt"
 	mrand "math/rand"
 	"os"
 	"runtime"
 	"sync/atomic"
-	"syscall"
 	gtime "time"
 
 	specs "github.com/opencontainers/runtime-spec/specs-go"
@@ -187,7 +187,7 @@ type Args struct {
 }
 
 // make sure stdioFDs are always the same on initial start and on restore
-const startingStdioFD = 64
+const startingStdioFD = 256
 
 // New initializes a new kernel loader configured by spec.
 // New also handles setting up a kernel for restoring a container.
@@ -360,15 +360,20 @@ func New(args Args) (*Loader, error) {
 	var stdioFDs []int
 	newfd := startingStdioFD
 	for _, fd := range args.StdioFDs {
-		err := syscall.Dup3(fd, newfd, syscall.O_CLOEXEC)
+		// Check that newfd is unused to avoid clobbering over it.
+		if _, err := unix.FcntlInt(uintptr(newfd), unix.F_GETFD, 0); !errors.Is(err, unix.EBADF) {
+			if err != nil {
+				return nil, fmt.Errorf("error checking for FD (%d) conflict: %w", newfd, err)
+			}
+			return nil, fmt.Errorf("unable to remap stdios, FD %d is already in use", newfd)
+		}
+
+		err := unix.Dup3(fd, newfd, unix.O_CLOEXEC)
 		if err != nil {
 			return nil, fmt.Errorf("dup3 of stdioFDs failed: %v", err)
 		}
 		stdioFDs = append(stdioFDs, newfd)
-		err = syscall.Close(fd)
-		if err != nil {
-			return nil, fmt.Errorf("close original stdioFDs failed: %v", err)
-		}
+		_ = unix.Close(fd)
 		newfd++
 	}
 
@@ -458,6 +463,11 @@ func (l *Loader) Destroy() {
 		l.stopSignalForwarding()
 	}
 	l.watchdog.Stop()
+
+	for i, fd := range l.root.stdioFDs {
+		_ = unix.Close(fd)
+		l.root.stdioFDs[i] = -1
+	}
 }
 
 func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) {
@@ -591,11 +601,9 @@ func (l *Loader) run() error {
 	// during restore, we can release l.stdioFDs now. VFS2 takes ownership of the
 	// passed FDs, so only close for VFS1.
 	if !kernel.VFS2Enabled {
-		for _, fd := range l.root.stdioFDs {
-			err := syscall.Close(fd)
-			if err != nil {
-				return fmt.Errorf("close dup()ed stdioFDs: %v", err)
-			}
+		for i, fd := range l.root.stdioFDs {
+			_ = unix.Close(fd)
+			l.root.stdioFDs[i] = -1
 		}
 	}
 
@@ -686,7 +694,7 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file
 
 	// Can't take ownership away from os.File. dup them to get a new FDs.
 	for _, f := range files[3:] {
-		fd, err := syscall.Dup(int(f.Fd()))
+		fd, err := unix.Dup(int(f.Fd()))
 		if err != nil {
 			return fmt.Errorf("failed to dup file: %v", err)
 		}
@@ -1000,6 +1008,9 @@ func (l *Loader) WaitExit() kernel.ExitStatus {
 	// Wait for container.
 	l.k.WaitExited()
 
+	// Cleanup
+	l.ctrl.stop()
+
 	return l.k.GlobalInit().ExitStatus()
 }
 
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 252ca07e3..e7d6035bb 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -37,6 +37,7 @@ import (
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/sys"
 	"gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
+	"gvisor.dev/gvisor/pkg/sentry/inet"
 	"gvisor.dev/gvisor/pkg/sentry/kernel"
 	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
 	"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -89,6 +90,12 @@ func registerFilesystems(k *kernel.Kernel) error {
 	if err := ttydev.Register(vfsObj); err != nil {
 		return fmt.Errorf("registering ttydev: %w", err)
 	}
+	tunSupported := tundev.IsNetTunSupported(inet.StackFromContext(ctx))
+	if tunSupported {
+		if err := tundev.Register(vfsObj); err != nil {
+			return fmt.Errorf("registering tundev: %v", err)
+		}
+	}
 
 	if kernel.FUSEEnabled {
 		if err := fuse.Register(vfsObj); err != nil {
@@ -96,9 +103,6 @@ func registerFilesystems(k *kernel.Kernel) error {
 		}
 	}
 
-	if err := tundev.Register(vfsObj); err != nil {
-		return fmt.Errorf("registering tundev: %v", err)
-	}
 	a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name)
 	if err != nil {
 		return fmt.Errorf("creating devtmpfs accessor: %w", err)
@@ -114,8 +118,10 @@ func registerFilesystems(k *kernel.Kernel) error {
 	if err := ttydev.CreateDevtmpfsFiles(ctx, a); err != nil {
 		return fmt.Errorf("creating ttydev devtmpfs files: %w", err)
 	}
-	if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil {
-		return fmt.Errorf("creating tundev devtmpfs files: %v", err)
+	if tunSupported {
+		if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil {
+			return fmt.Errorf("creating tundev devtmpfs files: %v", err)
+		}
 	}
 
 	if kernel.FUSEEnabled {
diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go
index b5de2588b..742f8c344 100644
--- a/runsc/cmd/debug.go
+++ b/runsc/cmd/debug.go
@@ -32,20 +32,19 @@ import (
 
 // Debug implements subcommands.Command for the "debug" command.
 type Debug struct {
-	pid              int
-	stacks           bool
-	signal           int
-	profileHeap      string
-	profileCPU       string
-	profileGoroutine string
-	profileBlock     string
-	profileMutex     string
-	trace            string
-	strace           string
-	logLevel         string
-	logPackets       string
-	duration         time.Duration
-	ps               bool
+	pid          int
+	stacks       bool
+	signal       int
+	profileHeap  string
+	profileCPU   string
+	profileBlock string
+	profileMutex string
+	trace        string
+	strace       string
+	logLevel     string
+	logPackets   string
+	duration     time.Duration
+	ps           bool
 }
 
 // Name implements subcommands.Command.
@@ -69,7 +68,6 @@ func (d *Debug) SetFlags(f *flag.FlagSet) {
 	f.BoolVar(&d.stacks, "stacks", false, "if true, dumps all sandbox stacks to the log")
 	f.StringVar(&d.profileHeap, "profile-heap", "", "writes heap profile to the given file.")
 	f.StringVar(&d.profileCPU, "profile-cpu", "", "writes CPU profile to the given file.")
-	f.StringVar(&d.profileGoroutine, "profile-goroutine", "", "writes goroutine profile to the given file.")
 	f.StringVar(&d.profileBlock, "profile-block", "", "writes block profile to the given file.")
 	f.StringVar(&d.profileMutex, "profile-mutex", "", "writes mutex profile to the given file.")
 	f.DurationVar(&d.duration, "duration", time.Second, "amount of time to wait for CPU and trace profiles")
@@ -153,18 +151,6 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
 		}
 		log.Infof("Heap profile written to %q", d.profileHeap)
 	}
-	if d.profileGoroutine != "" {
-		f, err := os.Create(d.profileGoroutine)
-		if err != nil {
-			return Errorf(err.Error())
-		}
-		defer f.Close()
-
-		if err := c.Sandbox.GoroutineProfile(f); err != nil {
-			return Errorf(err.Error())
-		}
-		log.Infof("Goroutine profile written to %q", d.profileGoroutine)
-	}
 	if d.profileBlock != "" {
 		f, err := os.Create(d.profileBlock)
 		if err != nil {
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 2afcc27af..36bb0c9c9 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -1012,26 +1012,6 @@ func (s *Sandbox) StopCPUProfile() error {
 	return nil
 }
 
-// GoroutineProfile writes a goroutine profile to the given file.
-func (s *Sandbox) GoroutineProfile(f *os.File) error {
-	log.Debugf("Goroutine profile %q", s.ID)
-	conn, err := s.sandboxConnect()
-	if err != nil {
-		return err
-	}
-	defer conn.Close()
-
-	opts := control.ProfileOpts{
-		FilePayload: urpc.FilePayload{
-			Files: []*os.File{f},
-		},
-	}
-	if err := conn.Call(boot.GoroutineProfile, &opts, nil); err != nil {
-		return fmt.Errorf("getting sandbox %q goroutine profile: %v", s.ID, err)
-	}
-	return nil
-}
-
 // BlockProfile writes a block profile to the given file.
 func (s *Sandbox) BlockProfile(f *os.File) error {
 	log.Debugf("Block profile %q", s.ID)
diff --git a/test/benchmarks/base/BUILD b/test/benchmarks/base/BUILD
new file mode 100644
index 000000000..3cb07797d
--- /dev/null
+++ b/test/benchmarks/base/BUILD
@@ -0,0 +1,26 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "base",
+    testonly = 1,
+    srcs = ["base.go"],
+    deps = ["//test/benchmarks/harness"],
+)
+
+go_test(
+    name = "base_test",
+    size = "small",
+    srcs = ["sysbench_test.go"],
+    library = ":base",
+    tags = [
+        # Requires docker and runsc to be configured before test runs.
+        "manual",
+        "local",
+    ],
+    deps = [
+        "//pkg/test/dockerutil",
+        "//test/benchmarks/tools",
+    ],
+)
diff --git a/test/benchmarks/base/base.go b/test/benchmarks/base/base.go
new file mode 100644
index 000000000..7eb44d0ab
--- /dev/null
+++ b/test/benchmarks/base/base.go
@@ -0,0 +1,31 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package base holds base performance benchmarks.
+package base
+
+import (
+	"os"
+	"testing"
+
+	"gvisor.dev/gvisor/test/benchmarks/harness"
+)
+
+var h harness.Harness
+
+// TestMain is the main method for package network.
+func TestMain(m *testing.M) {
+	h.Init()
+	os.Exit(m.Run())
+}
diff --git a/test/benchmarks/base/sysbench_test.go b/test/benchmarks/base/sysbench_test.go
new file mode 100644
index 000000000..7df73e38b
--- /dev/null
+++ b/test/benchmarks/base/sysbench_test.go
@@ -0,0 +1,89 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package base
+
+import (
+	"context"
+	"testing"
+
+	"gvisor.dev/gvisor/pkg/test/dockerutil"
+	"gvisor.dev/gvisor/test/benchmarks/tools"
+)
+
+type testCase struct {
+	name string
+	test tools.Sysbench
+}
+
+// BenchmarSysbench runs sysbench on the runtime.
+func BenchmarkSysbench(b *testing.B) {
+
+	testCases := []testCase{
+		testCase{
+			name: "CPU",
+			test: &tools.SysbenchCPU{
+				Base: tools.SysbenchBase{
+					Threads: 1,
+					Time:    5,
+				},
+				MaxPrime: 50000,
+			},
+		},
+		testCase{
+			name: "Memory",
+			test: &tools.SysbenchMemory{
+				Base: tools.SysbenchBase{
+					Threads: 1,
+				},
+				BlockSize: "1M",
+				TotalSize: "500G",
+			},
+		},
+		testCase{
+			name: "Mutex",
+			test: &tools.SysbenchMutex{
+				Base: tools.SysbenchBase{
+					Threads: 8,
+				},
+				Loops: 1,
+				Locks: 10000000,
+				Num:   4,
+			},
+		},
+	}
+
+	machine, err := h.GetMachine()
+	if err != nil {
+		b.Fatalf("failed to get machine: %v", err)
+	}
+	defer machine.CleanUp()
+
+	for _, tc := range testCases {
+		b.Run(tc.name, func(b *testing.B) {
+
+			ctx := context.Background()
+			sysbench := machine.GetContainer(ctx, b)
+			defer sysbench.CleanUp(ctx)
+
+			out, err := sysbench.Run(ctx, dockerutil.RunOpts{
+				Image: "benchmarks/sysbench",
+			}, tc.test.MakeCmd()...)
+			if err != nil {
+				b.Fatalf("failed to run sysbench: %v: logs:%s", err, out)
+			}
+			tc.test.Report(b, out)
+		})
+	}
+}
diff --git a/test/benchmarks/database/BUILD b/test/benchmarks/database/BUILD
index 572db665f..6139f6e8a 100644
--- a/test/benchmarks/database/BUILD
+++ b/test/benchmarks/database/BUILD
@@ -12,9 +12,7 @@ go_library(
 go_test(
     name = "database_test",
     size = "enormous",
-    srcs = [
-        "redis_test.go",
-    ],
+    srcs = ["redis_test.go"],
     library = ":database",
     tags = [
         # Requires docker and runsc to be configured before test runs.
diff --git a/test/benchmarks/tools/BUILD b/test/benchmarks/tools/BUILD
index 4358551bc..a6bd949e6 100644
--- a/test/benchmarks/tools/BUILD
+++ b/test/benchmarks/tools/BUILD
@@ -10,6 +10,7 @@ go_library(
         "hey.go",
         "iperf.go",
         "redis.go",
+        "sysbench.go",
         "tools.go",
     ],
     visibility = ["//:sandbox"],
@@ -24,6 +25,7 @@ go_test(
         "hey_test.go",
         "iperf_test.go",
         "redis_test.go",
+        "sysbench_test.go",
     ],
     library = ":tools",
 )
diff --git a/test/benchmarks/tools/redis.go b/test/benchmarks/tools/redis.go
index db32460ec..c899ae0d4 100644
--- a/test/benchmarks/tools/redis.go
+++ b/test/benchmarks/tools/redis.go
@@ -56,7 +56,6 @@ func (r *Redis) Report(b *testing.B, output string) {
 func (r *Redis) parseOperation(data string) (float64, error) {
 	re := regexp.MustCompile(fmt.Sprintf(`"%s( .*)?","(\d*\.\d*)"`, r.Operation))
 	match := re.FindStringSubmatch(data)
-	// If no match, simply don't add it to the result map.
 	if len(match) < 3 {
 		return 0.0, fmt.Errorf("could not find %s in %s", r.Operation, data)
 	}
diff --git a/test/benchmarks/tools/sysbench.go b/test/benchmarks/tools/sysbench.go
new file mode 100644
index 000000000..6b2f75ca2
--- /dev/null
+++ b/test/benchmarks/tools/sysbench.go
@@ -0,0 +1,245 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tools
+
+import (
+	"fmt"
+	"regexp"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+var warmup = "sysbench --threads=8 --memory-total-size=5G memory run > /dev/null &&"
+
+// Sysbench represents a 'sysbench' command.
+type Sysbench interface {
+	MakeCmd() []string // Makes a sysbench command.
+	flags() []string
+	Report(*testing.B, string) // Reports results contained in string.
+}
+
+// SysbenchBase is the top level struct for sysbench and holds top-level arguments
+// for sysbench. See: 'sysbench --help'
+type SysbenchBase struct {
+	Threads int // number of Threads for the test.
+	Time    int // time limit for test in seconds.
+}
+
+// baseFlags returns top level flags.
+func (s *SysbenchBase) baseFlags() []string {
+	var ret []string
+	if s.Threads > 0 {
+		ret = append(ret, fmt.Sprintf("--threads=%d", s.Threads))
+	}
+	if s.Time > 0 {
+		ret = append(ret, fmt.Sprintf("--time=%d", s.Time))
+	}
+	return ret
+}
+
+// SysbenchCPU is for 'sysbench [flags] cpu run' and holds CPU specific arguments.
+type SysbenchCPU struct {
+	Base     SysbenchBase
+	MaxPrime int // upper limit for primes generator [10000].
+}
+
+// MakeCmd makes commands for SysbenchCPU.
+func (s *SysbenchCPU) MakeCmd() []string {
+	cmd := []string{warmup, "sysbench"}
+	cmd = append(cmd, s.flags()...)
+	cmd = append(cmd, "cpu run")
+	return []string{"sh", "-c", strings.Join(cmd, " ")}
+}
+
+// flags makes flags for SysbenchCPU cmds.
+func (s *SysbenchCPU) flags() []string {
+	cmd := s.Base.baseFlags()
+	if s.MaxPrime > 0 {
+		return append(cmd, fmt.Sprintf("--cpu-max-prime=%d", s.MaxPrime))
+	}
+	return cmd
+}
+
+// Report reports the relevant metrics for SysbenchCPU.
+func (s *SysbenchCPU) Report(b *testing.B, output string) {
+	b.Helper()
+	result, err := s.parseEvents(output)
+	if err != nil {
+		b.Fatalf("parsing CPU events from %s failed: %v", output, err)
+	}
+	b.ReportMetric(result, "cpu_events_per_second")
+}
+
+var cpuEventsPerSecondRE = regexp.MustCompile(`events per second:\s*(\d*.?\d*)\n`)
+
+// parseEvents parses cpu events per second.
+func (s *SysbenchCPU) parseEvents(data string) (float64, error) {
+	match := cpuEventsPerSecondRE.FindStringSubmatch(data)
+	if len(match) < 2 {
+		return 0.0, fmt.Errorf("could not find events per second: %s", data)
+	}
+	return strconv.ParseFloat(match[1], 64)
+}
+
+// SysbenchMemory is for 'sysbench [FLAGS] memory run' and holds Memory specific arguments.
+type SysbenchMemory struct {
+	Base          SysbenchBase
+	BlockSize     string // size of test memory block [1K].
+	TotalSize     string // size of data to transfer [100G].
+	Scope         string // memory access scope {global, local} [global].
+	HugeTLB       bool   // allocate memory from HugeTLB [off].
+	OperationType string // type of memory ops {read, write, none} [write].
+	AccessMode    string // access mode {seq, rnd} [seq].
+}
+
+// MakeCmd makes commands for SysbenchMemory.
+func (s *SysbenchMemory) MakeCmd() []string {
+	cmd := []string{warmup, "sysbench"}
+	cmd = append(cmd, s.flags()...)
+	cmd = append(cmd, "memory run")
+	return []string{"sh", "-c", strings.Join(cmd, " ")}
+}
+
+// flags makes flags for SysbenchMemory cmds.
+func (s *SysbenchMemory) flags() []string {
+	cmd := s.Base.baseFlags()
+	if s.BlockSize != "" {
+		cmd = append(cmd, fmt.Sprintf("--memory-block-size=%s", s.BlockSize))
+	}
+	if s.TotalSize != "" {
+		cmd = append(cmd, fmt.Sprintf("--memory-total-size=%s", s.TotalSize))
+	}
+	if s.Scope != "" {
+		cmd = append(cmd, fmt.Sprintf("--memory-scope=%s", s.Scope))
+	}
+	if s.HugeTLB {
+		cmd = append(cmd, "--memory-hugetlb=on")
+	}
+	if s.OperationType != "" {
+		cmd = append(cmd, fmt.Sprintf("--memory-oper=%s", s.OperationType))
+	}
+	if s.AccessMode != "" {
+		cmd = append(cmd, fmt.Sprintf("--memory-access-mode=%s", s.AccessMode))
+	}
+	return cmd
+}
+
+// Report reports the relevant metrics for SysbenchMemory.
+func (s *SysbenchMemory) Report(b *testing.B, output string) {
+	b.Helper()
+	result, err := s.parseOperations(output)
+	if err != nil {
+		b.Fatalf("parsing result %s failed with err: %v", output, err)
+	}
+	b.ReportMetric(result, "operations_per_second")
+}
+
+var memoryOperationsRE = regexp.MustCompile(`Total\soperations:\s+\d*\s*\((\d*\.\d*)\sper\ssecond\)`)
+
+// parseOperations parses memory operations per second form sysbench memory ouput.
+func (s *SysbenchMemory) parseOperations(data string) (float64, error) {
+	match := memoryOperationsRE.FindStringSubmatch(data)
+	if len(match) < 2 {
+		return 0.0, fmt.Errorf("couldn't find memory operations per second: %s", data)
+	}
+	return strconv.ParseFloat(match[1], 64)
+}
+
+// SysbenchMutex is for 'sysbench [FLAGS] mutex run' and holds Mutex specific arguments.
+type SysbenchMutex struct {
+	Base  SysbenchBase
+	Num   int // total size of mutex array [4096].
+	Locks int // number of mutex locks per thread [50K].
+	Loops int // number of loops to do outside mutex lock [10K].
+}
+
+// MakeCmd makes commands for SysbenchMutex.
+func (s *SysbenchMutex) MakeCmd() []string {
+	cmd := []string{warmup, "sysbench"}
+	cmd = append(cmd, s.flags()...)
+	cmd = append(cmd, "mutex run")
+	return []string{"sh", "-c", strings.Join(cmd, " ")}
+}
+
+// flags makes flags for SysbenchMutex commands.
+func (s *SysbenchMutex) flags() []string {
+	var cmd []string
+	cmd = append(cmd, s.Base.baseFlags()...)
+	if s.Num > 0 {
+		cmd = append(cmd, fmt.Sprintf("--mutex-num=%d", s.Num))
+	}
+	if s.Locks > 0 {
+		cmd = append(cmd, fmt.Sprintf("--mutex-locks=%d", s.Locks))
+	}
+	if s.Loops > 0 {
+		cmd = append(cmd, fmt.Sprintf("--mutex-loops=%d", s.Loops))
+	}
+	return cmd
+}
+
+// Report parses and reports relevant sysbench mutex metrics.
+func (s *SysbenchMutex) Report(b *testing.B, output string) {
+	b.Helper()
+
+	result, err := s.parseExecutionTime(output)
+	if err != nil {
+		b.Fatalf("parsing result %s failed with err: %v", output, err)
+	}
+	b.ReportMetric(result, "average_execution_time_secs")
+
+	result, err = s.parseDeviation(output)
+	if err != nil {
+		b.Fatalf("parsing result %s failed with err: %v", output, err)
+	}
+	b.ReportMetric(result, "stdev_execution_time_secs")
+
+	result, err = s.parseLatency(output)
+	if err != nil {
+		b.Fatalf("parsing result %s failed with err: %v", output, err)
+	}
+	b.ReportMetric(result/1000, "average_latency_secs")
+}
+
+var executionTimeRE = regexp.MustCompile(`execution time \(avg/stddev\):\s*(\d*.?\d*)/(\d*.?\d*)`)
+
+// parseExecutionTime parses threads fairness average execution time from sysbench output.
+func (s *SysbenchMutex) parseExecutionTime(data string) (float64, error) {
+	match := executionTimeRE.FindStringSubmatch(data)
+	if len(match) < 2 {
+		return 0.0, fmt.Errorf("could not find execution time average: %s", data)
+	}
+	return strconv.ParseFloat(match[1], 64)
+}
+
+// parseDeviation parses threads fairness stddev time from sysbench output.
+func (s *SysbenchMutex) parseDeviation(data string) (float64, error) {
+	match := executionTimeRE.FindStringSubmatch(data)
+	if len(match) < 3 {
+		return 0.0, fmt.Errorf("could not find execution time deviation: %s", data)
+	}
+	return strconv.ParseFloat(match[2], 64)
+}
+
+var averageLatencyRE = regexp.MustCompile(`avg:[^\n^\d]*(\d*\.?\d*)`)
+
+// parseLatency parses latency from sysbench output.
+func (s *SysbenchMutex) parseLatency(data string) (float64, error) {
+	match := averageLatencyRE.FindStringSubmatch(data)
+	if len(match) < 2 {
+		return 0.0, fmt.Errorf("could not find average latency: %s", data)
+	}
+	return strconv.ParseFloat(match[1], 64)
+}
diff --git a/test/benchmarks/tools/sysbench_test.go b/test/benchmarks/tools/sysbench_test.go
new file mode 100644
index 000000000..850d1939e
--- /dev/null
+++ b/test/benchmarks/tools/sysbench_test.go
@@ -0,0 +1,169 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tools
+
+import (
+	"testing"
+)
+
+// TestSysbenchCpu tests parses on sample 'sysbench cpu' output.
+func TestSysbenchCpu(t *testing.T) {
+	sampleData := `
+sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3)
+
+Running the test with following options:
+Number of threads: 8
+Initializing random number generator from current time
+
+
+Prime numbers limit: 10000
+
+Initializing worker threads...
+
+Threads started!
+
+CPU speed:
+    events per second:  9093.38
+
+General statistics:
+    total time:                          10.0007s
+    total number of events:              90949
+
+Latency (ms):
+         min:                                  0.64
+         avg:                                  0.88
+         max:                                 24.65
+         95th percentile:                      1.55
+         sum:                              79936.91
+
+Threads fairness:
+    events (avg/stddev):           11368.6250/831.38
+    execution time (avg/stddev):   9.9921/0.01
+`
+	sysbench := SysbenchCPU{}
+	want := 9093.38
+	if got, err := sysbench.parseEvents(sampleData); err != nil {
+		t.Fatalf("parse cpu events failed: %v", err)
+	} else if want != got {
+		t.Fatalf("got: %f want: %f", got, want)
+	}
+}
+
+// TestSysbenchMemory tests parsers on sample 'sysbench memory' output.
+func TestSysbenchMemory(t *testing.T) {
+	sampleData := `
+sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3)
+
+Running the test with following options:
+Number of threads: 8
+Initializing random number generator from current time
+
+
+Running memory speed test with the following options:
+  block size: 1KiB
+  total size: 102400MiB
+  operation: write
+  scope: global
+
+Initializing worker threads...
+
+Threads started!
+
+Total operations: 47999046 (9597428.64 per second)
+
+46874.07 MiB transferred (9372.49 MiB/sec)
+
+
+General statistics:
+    total time:                          5.0001s
+    total number of events:              47999046
+
+Latency (ms):
+         min:                                  0.00
+         avg:                                  0.00
+         max:                                  0.21
+         95th percentile:                      0.00
+         sum:                              33165.91
+
+Threads fairness:
+    events (avg/stddev):           5999880.7500/111242.52
+    execution time (avg/stddev):   4.1457/0.09
+`
+	sysbench := SysbenchMemory{}
+	want := 9597428.64
+	if got, err := sysbench.parseOperations(sampleData); err != nil {
+		t.Fatalf("parse memory ops failed: %v", err)
+	} else if want != got {
+		t.Fatalf("got: %f want: %f", got, want)
+	}
+}
+
+// TestSysbenchMutex tests parsers on sample 'sysbench mutex' output.
+func TestSysbenchMutex(t *testing.T) {
+	sampleData := `
+sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3)
+
+The 'mutex' test requires a command argument. See 'sysbench mutex help'
+root@ec078132e294:/# sysbench mutex --threads=8 run
+sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3)
+
+Running the test with following options:
+Number of threads: 8
+Initializing random number generator from current time
+
+
+Initializing worker threads...
+
+Threads started!
+
+
+General statistics:
+    total time:                          0.2320s
+    total number of events:              8
+
+Latency (ms):
+         min:                                152.35
+         avg:                                192.48
+         max:                                231.41
+         95th percentile:                    231.53
+         sum:                               1539.83
+
+Threads fairness:
+    events (avg/stddev):           1.0000/0.00
+    execution time (avg/stddev):   0.1925/0.04
+`
+
+	sysbench := SysbenchMutex{}
+	want := .1925
+	if got, err := sysbench.parseExecutionTime(sampleData); err != nil {
+		t.Fatalf("parse mutex time failed: %v", err)
+	} else if want != got {
+		t.Fatalf("got: %f want: %f", got, want)
+	}
+
+	want = 0.04
+	if got, err := sysbench.parseDeviation(sampleData); err != nil {
+		t.Fatalf("parse mutex deviation failed: %v", err)
+	} else if want != got {
+		t.Fatalf("got: %f want: %f", got, want)
+	}
+
+	want = 192.48
+	if got, err := sysbench.parseLatency(sampleData); err != nil {
+		t.Fatalf("parse mutex time failed: %v", err)
+	} else if want != got {
+		t.Fatalf("got: %f want: %f", got, want)
+	}
+}
diff --git a/test/fuse/linux/fuse_base.cc b/test/fuse/linux/fuse_base.cc
index 6c8432fd0..ce69276c9 100644
--- a/test/fuse/linux/fuse_base.cc
+++ b/test/fuse/linux/fuse_base.cc
@@ -100,8 +100,11 @@ PosixError FuseTest::ConsumeFuseInit() {
       .error = 0,
       .unique = 2,
   };
-  // Returns an empty init out payload since this is just a test.
-  struct fuse_init_out out_payload;
+  // Returns a fake fuse_init_out with 7.0 version to avoid ECONNREFUSED
+  // error in the initialization of FUSE connection.
+  struct fuse_init_out out_payload = {
+      .major = 7,
+  };
   iov_out[0].iov_len = sizeof(out_header);
   iov_out[0].iov_base = &out_header;
   iov_out[1].iov_len = sizeof(out_payload);
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 0c2a05380..74658fea0 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -40,8 +40,6 @@ packetimpact_go_test(
 packetimpact_go_test(
     name = "udp_recv_mcast_bcast",
     srcs = ["udp_recv_mcast_bcast_test.go"],
-    # TODO(b/152813495): Fix netstack then remove the line below.
-    expect_netstack_failure = True,
     deps = [
         "//pkg/tcpip",
         "//pkg/tcpip/header",
diff --git a/test/runtimes/BUILD b/test/runtimes/BUILD
index 3be123d94..3d0f10855 100644
--- a/test/runtimes/BUILD
+++ b/test/runtimes/BUILD
@@ -6,7 +6,7 @@ runtime_test(
     name = "go1.12",
     exclude_file = "exclude_go1.12.csv",
     lang = "go",
-    shard_count = 5,
+    shard_count = 8,
 )
 
 runtime_test(
@@ -14,26 +14,26 @@ runtime_test(
     batch = 100,
     exclude_file = "exclude_java11.csv",
     lang = "java",
-    shard_count = 20,
+    shard_count = 16,
 )
 
 runtime_test(
     name = "nodejs12.4.0",
     exclude_file = "exclude_nodejs12.4.0.csv",
     lang = "nodejs",
-    shard_count = 10,
+    shard_count = 8,
 )
 
 runtime_test(
     name = "php7.3.6",
     exclude_file = "exclude_php7.3.6.csv",
     lang = "php",
-    shard_count = 5,
+    shard_count = 8,
 )
 
 runtime_test(
     name = "python3.7.3",
     exclude_file = "exclude_python3.7.3.csv",
     lang = "python",
-    shard_count = 5,
+    shard_count = 8,
 )
diff --git a/test/runtimes/exclude_java11.csv b/test/runtimes/exclude_java11.csv
index 4d62f7d3a..997a29cad 100644
--- a/test/runtimes/exclude_java11.csv
+++ b/test/runtimes/exclude_java11.csv
@@ -18,8 +18,16 @@ java/lang/ClassLoader/nativeLibrary/NativeLibraryTest.java,,Fails in Docker
 java/lang/module/ModuleDescriptorTest.java,,
 java/lang/String/nativeEncoding/StringPlatformChars.java,,
 java/net/CookieHandler/B6791927.java,,java.lang.RuntimeException: Expiration date shouldn't be 0
+java/net/ipv6tests/TcpTest.java,,java.net.ConnectException: Connection timed out (Connection timed out)
+java/net/ipv6tests/UdpTest.java,,Times out
+java/net/Inet6Address/B6558853.java,,Times out
+java/net/InetAddress/CheckJNI.java,,java.net.ConnectException: Connection timed out (Connection timed out)
 java/net/InterfaceAddress/NetworkPrefixLength.java,b/78507103,
+java/net/MulticastSocket/B6425815.java,,java.net.SocketException: Protocol not available (Error getting socket option)
+java/net/MulticastSocket/B6427403.java,,java.net.SocketException: Protocol not available
 java/net/MulticastSocket/MulticastTTL.java,,
+java/net/MulticastSocket/NetworkInterfaceEmptyGetInetAddressesTest.java,,java.net.SocketException: Protocol not available (Error getting socket option)
+java/net/MulticastSocket/NoLoopbackPackets.java,,java.net.SocketException: Protocol not available
 java/net/MulticastSocket/Promiscuous.java,,
 java/net/MulticastSocket/SetLoopbackMode.java,,
 java/net/MulticastSocket/SetTTLAndGetTTL.java,,
@@ -27,6 +35,7 @@ java/net/MulticastSocket/Test.java,,
 java/net/MulticastSocket/TestDefaults.java,,
 java/net/MulticastSocket/TimeToLive.java,,
 java/net/NetworkInterface/NetworkInterfaceStreamTest.java,,
+java/net/Socket/LinkLocal.java,,java.net.SocketTimeoutException: Receive timed out
 java/net/Socket/SetSoLinger.java,b/78527327,SO_LINGER is not yet supported
 java/net/Socket/UrgentDataTest.java,b/111515323,
 java/net/SocketOption/OptionsTest.java,,Fails in Docker
@@ -167,6 +176,10 @@ sun/management/jmxremote/bootstrap/RmiSslBootstrapTest.sh,,
 sun/management/jmxremote/startstop/JMXStartStopTest.java,,
 sun/management/jmxremote/startstop/JMXStatusPerfCountersTest.java,,
 sun/management/jmxremote/startstop/JMXStatusTest.java,,
+sun/management/jdp/JdpDefaultsTest.java,,
+sun/management/jdp/JdpJmxRemoteDynamicPortTest.java,,
+sun/management/jdp/JdpOffTest.java,,
+sun/management/jdp/JdpSpecificAddressTest.java,,
 sun/text/resources/LocaleDataTest.java,,
 sun/tools/jcmd/TestJcmdSanity.java,,
 sun/tools/jhsdb/AlternateHashingTest.java,,
diff --git a/test/runtimes/exclude_nodejs12.4.0.csv b/test/runtimes/exclude_nodejs12.4.0.csv
index 9aaf1e647..525117f5c 100644
--- a/test/runtimes/exclude_nodejs12.4.0.csv
+++ b/test/runtimes/exclude_nodejs12.4.0.csv
@@ -11,13 +11,17 @@ parallel/test-dgram-socket-buffer-size.js,b/68847921,
 parallel/test-dns-channel-timeout.js,b/161893056,
 parallel/test-fs-access.js,,
 parallel/test-fs-watchfile.js,,Flaky - File already exists error
+parallel/test-fs-write-stream.js,,Flaky
 parallel/test-fs-write-stream-throw-type-error.js,b/110226209,
 parallel/test-http-writable-true-after-close.js,,Flaky - Mismatched <anonymous> function calls. Expected exactly 1 actual 2
 parallel/test-os.js,b/63997097,
 parallel/test-process-uid-gid.js,,
 parallel/test-tls-cli-min-version-1.0.js,,Flaky - EADDRINUSE
+parallel/test-tls-cli-min-version-1.1.js,,Flaky - EADDRINUSE
+parallel/test-tls-cli-min-version-1.2.js,,Flaky - EADDRINUSE
 parallel/test-tls-cli-min-version-1.3.js,,Flaky - EADDRINUSE
-parallel/test-tls-cli-max-version-1.2,,Flaky - EADDRINUSE
+parallel/test-tls-cli-max-version-1.2.js,,Flaky - EADDRINUSE
+parallel/test-tls-cli-max-version-1.3.js,,Flaky - EADDRINUSE
 parallel/test-tls-min-max-version.js,,Flaky - EADDRINUSE
 pseudo-tty/test-assert-colors.js,b/162801321,
 pseudo-tty/test-assert-no-color.js,b/162801321,
diff --git a/test/runtimes/exclude_php7.3.6.csv b/test/runtimes/exclude_php7.3.6.csv
index 0bef786c0..e828f91dd 100644
--- a/test/runtimes/exclude_php7.3.6.csv
+++ b/test/runtimes/exclude_php7.3.6.csv
@@ -8,23 +8,23 @@ ext/mbstring/tests/bug77165.phpt,,
 ext/mbstring/tests/bug77454.phpt,,
 ext/mbstring/tests/mb_convert_encoding_leak.phpt,,
 ext/mbstring/tests/mb_strrpos_encoding_3rd_param.phpt,,
+ext/session/tests/session_module_name_variation4.phpt,,Flaky
 ext/session/tests/session_set_save_handler_class_018.phpt,,
 ext/session/tests/session_set_save_handler_iface_003.phpt,,
 ext/session/tests/session_set_save_handler_variation4.phpt,,
-ext/session/tests/session_set_save_handler_variation5.phpt,,
-ext/standard/tests/file/filetype_variation.phpt,,
-ext/standard/tests/file/fopen_variation19.phpt,,
+ext/standard/tests/file/fopen_variation19.phpt,b/162894964,
+ext/standard/tests/file/lstat_stat_variation14.phpt,,Flaky
 ext/standard/tests/file/php_fd_wrapper_01.phpt,,
 ext/standard/tests/file/php_fd_wrapper_02.phpt,,
 ext/standard/tests/file/php_fd_wrapper_03.phpt,,
 ext/standard/tests/file/php_fd_wrapper_04.phpt,,
-ext/standard/tests/file/realpath_bug77484.phpt,,
+ext/standard/tests/file/realpath_bug77484.phpt,b/162894969,
 ext/standard/tests/file/rename_variation.phpt,b/68717309,
-ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,,
-ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,,
+ext/standard/tests/file/symlink_link_linkinfo_is_link_variation4.phpt,b/162895341,
+ext/standard/tests/file/symlink_link_linkinfo_is_link_variation8.phpt,b/162896223,
 ext/standard/tests/general_functions/escapeshellarg_bug71270.phpt,,
 ext/standard/tests/general_functions/escapeshellcmd_bug71270.phpt,,
-ext/standard/tests/network/bug20134.phpt,,
+ext/standard/tests/streams/proc_open_bug69900.phpt,,Flaky
 ext/standard/tests/streams/stream_socket_sendto.phpt,,
 ext/standard/tests/strings/007.phpt,,
 sapi/cli/tests/upload_2G.phpt,,
@@ -34,4 +34,4 @@ tests/output/stream_isatty_in-out-err.phpt,,
 tests/output/stream_isatty_in-out.phpt,b/68720299,
 tests/output/stream_isatty_out-err.phpt,b/68720311,
 tests/output/stream_isatty_out.phpt,b/68720325,
-Zend/tests/concat_003.phpt,,
+Zend/tests/concat_003.phpt,b/162896021,
diff --git a/test/runtimes/exclude_python3.7.3.csv b/test/runtimes/exclude_python3.7.3.csv
index 2b9947212..8760f8951 100644
--- a/test/runtimes/exclude_python3.7.3.csv
+++ b/test/runtimes/exclude_python3.7.3.csv
@@ -1,27 +1,21 @@
 test name,bug id,comment
-test_asynchat,b/76031995,SO_REUSEADDR
 test_asyncio,,Fails on Docker.
-test_asyncore,b/76031995,SO_REUSEADDR
-test_epoll,,
-test_fcntl,,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode.
-test_ftplib,,Fails in Docker
-test_httplib,b/76031995,SO_REUSEADDR
-test_imaplib,,
-test_logging,,
+test_asyncore,b/162973328,
+test_epoll,b/162983393,
+test_fcntl,b/162978767,fcntl invalid argument -- artificial test to make sure something works in 64 bit mode.
+test_httplib,b/163000009,OSError: [Errno 98] Address already in use
+test_imaplib,b/162979661,
+test_logging,b/162980079,
 test_multiprocessing_fork,,Flaky. Sometimes times out.
 test_multiprocessing_forkserver,,Flaky. Sometimes times out.
 test_multiprocessing_main_handling,,Flaky. Sometimes times out.
 test_multiprocessing_spawn,,Flaky. Sometimes times out.
-test_nntplib,b/76031995,tests should not set SO_REUSEADDR
-test_poplib,,Fails on Docker
 test_posix,b/76174079,posix.sched_get_priority_min not implemented + posix.sched_rr_get_interval not permitted
-test_pty,b/76157709,out of pty devices
-test_readline,b/76157709,out of pty devices
+test_pty,b/162979921,
+test_readline,b/162980389,TestReadline hangs forever
 test_resource,b/76174079,
 test_selectors,b/76116849,OSError not raised with epoll
-test_smtplib,b/76031995,SO_REUSEADDR and unclosed sockets
+test_smtplib,b/162980434,unclosed sockets
+test_signal,,Flaky - signal: alarm clock
 test_socket,b/75983380,
-test_ssl,b/76031995,SO_REUSEADDR
-test_subprocess,,
-test_support,b/76031995,SO_REUSEADDR
-test_telnetlib,b/76031995,SO_REUSEADDR
+test_subprocess,b/162980831,
diff --git a/test/syscalls/linux/proc_net.cc b/test/syscalls/linux/proc_net.cc
index 3377b65cf..4fab097f4 100644
--- a/test/syscalls/linux/proc_net.cc
+++ b/test/syscalls/linux/proc_net.cc
@@ -477,6 +477,44 @@ TEST(ProcNetSnmp, CheckSnmp) {
   EXPECT_EQ(value_count, 1);
 }
 
+TEST(ProcSysNetIpv4Recovery, Exists) {
+  EXPECT_THAT(open("/proc/sys/net/ipv4/tcp_recovery", O_RDONLY),
+              SyscallSucceeds());
+}
+
+TEST(ProcSysNetIpv4Recovery, CanReadAndWrite) {
+  // TODO(b/162988252): Enable save/restore for this test after the bug is
+  // fixed.
+  DisableSave ds;
+
+  SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability((CAP_DAC_OVERRIDE))));
+
+  auto const fd = ASSERT_NO_ERRNO_AND_VALUE(
+      Open("/proc/sys/net/ipv4/tcp_recovery", O_RDWR));
+
+  char buf[10] = {'\0'};
+  char to_write = '2';
+
+  // Check initial value is set to 1.
+  EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(to_write) + 1));
+  EXPECT_EQ(strcmp(buf, "1\n"), 0);
+
+  // Set tcp_recovery to one of the allowed constants.
+  EXPECT_THAT(PwriteFd(fd.get(), &to_write, sizeof(to_write), 0),
+              SyscallSucceedsWithValue(sizeof(to_write)));
+  EXPECT_THAT(PreadFd(fd.get(), &buf, sizeof(buf), 0),
+              SyscallSucceedsWithValue(sizeof(to_write) + 1));
+  EXPECT_EQ(strcmp(buf, "2\n"), 0);
+
+  // Set tcp_recovery to any random value.
+  char kMessage[] = "100";
+  EXPECT_THAT(PwriteFd(fd.get(), kMessage, strlen(kMessage), 0),
+              SyscallSucceedsWithValue(strlen(kMessage)));
+  EXPECT_THAT(PreadFd(fd.get(), buf, sizeof(kMessage), 0),
+              SyscallSucceedsWithValue(sizeof(kMessage)));
+  EXPECT_EQ(strcmp(buf, "100\n"), 0);
+}
 }  // namespace
 }  // namespace testing
 }  // namespace gvisor