144 files changed, 18701 insertions, 6227 deletions
diff --git a/pkg/tcpip/BUILD b/pkg/tcpip/BUILD
index 454e07662..27f96a3ac 100644
--- a/pkg/tcpip/BUILD
+++ b/pkg/tcpip/BUILD
@@ -5,6 +5,7 @@ package(licenses = ["notice"])
 go_library(
     name = "tcpip",
     srcs = [
+        "socketops.go",
         "tcpip.go",
         "time_unsafe.go",
         "timer.go",
diff --git a/pkg/tcpip/adapters/gonet/gonet_test.go b/pkg/tcpip/adapters/gonet/gonet_test.go
index 12b061def..b196324c7 100644
--- a/pkg/tcpip/adapters/gonet/gonet_test.go
+++ b/pkg/tcpip/adapters/gonet/gonet_test.go
@@ -97,6 +97,9 @@ type testConnection struct {
 func connect(s *stack.Stack, addr tcpip.FullAddress) (*testConnection, *tcpip.Error) {
 	wq := &waiter.Queue{}
 	ep, err := s.NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, wq)
+	if err != nil {
+		return nil, err
+	}
 
 	entry, ch := waiter.NewChannelEntry(nil)
 	wq.EventRegister(&entry, waiter.EventOut)
@@ -145,7 +148,9 @@ func TestCloseReader(t *testing.T) {
 		defer close(done)
 		c, err := l.Accept()
 		if err != nil {
-			t.Fatalf("l.Accept() = %v", err)
+			t.Errorf("l.Accept() = %v", err)
+			// Cannot call Fatalf in goroutine. Just return from the goroutine.
+			return
 		}
 
 		// Give c.Read() a chance to block before closing the connection.
@@ -416,7 +421,9 @@ func TestDeadlineChange(t *testing.T) {
 		defer close(done)
 		c, err := l.Accept()
 		if err != nil {
-			t.Fatalf("l.Accept() = %v", err)
+			t.Errorf("l.Accept() = %v", err)
+			// Cannot call Fatalf in goroutine. Just return from the goroutine.
+			return
 		}
 
 		c.SetDeadline(time.Now().Add(time.Minute))
diff --git a/pkg/tcpip/checker/checker.go b/pkg/tcpip/checker/checker.go
index 6f81b0164..91971b687 100644
--- a/pkg/tcpip/checker/checker.go
+++ b/pkg/tcpip/checker/checker.go
@@ -20,6 +20,7 @@ import (
 	"encoding/binary"
 	"reflect"
 	"testing"
+	"time"
 
 	"github.com/google/go-cmp/cmp"
 	"gvisor.dev/gvisor/pkg/tcpip"
@@ -116,6 +117,10 @@ func TTL(ttl uint8) NetworkChecker {
 			v = ip.TTL()
 		case header.IPv6:
 			v = ip.HopLimit()
+		case *ipv6HeaderWithExtHdr:
+			v = ip.HopLimit()
+		default:
+			t.Fatalf("unrecognized header type %T for TTL evaluation", ip)
 		}
 		if v != ttl {
 			t.Fatalf("Bad TTL, got = %d, want = %d", v, ttl)
@@ -197,7 +202,7 @@ func IPPayload(payload []byte) NetworkChecker {
 }
 
 // IPv4Options returns a checker that checks the options in an IPv4 packet.
-func IPv4Options(want []byte) NetworkChecker {
+func IPv4Options(want header.IPv4Options) NetworkChecker {
 	return func(t *testing.T, h []header.Network) {
 		t.Helper()
 
@@ -216,6 +221,42 @@ func IPv4Options(want []byte) NetworkChecker {
 	}
 }
 
+// IPv4RouterAlert returns a checker that checks that the RouterAlert option is
+// set in an IPv4 packet.
+func IPv4RouterAlert() NetworkChecker {
+	return func(t *testing.T, h []header.Network) {
+		t.Helper()
+		ip, ok := h[0].(header.IPv4)
+		if !ok {
+			t.Fatalf("unexpected network header passed to checker, got = %T, want = header.IPv4", h[0])
+		}
+		iterator := ip.Options().MakeIterator()
+		for {
+			opt, done, err := iterator.Next()
+			if err != nil {
+				t.Fatalf("error acquiring next IPv4 option %s", err)
+			}
+			if done {
+				break
+			}
+			if opt.Type() != header.IPv4OptionRouterAlertType {
+				continue
+			}
+			want := [header.IPv4OptionRouterAlertLength]byte{
+				byte(header.IPv4OptionRouterAlertType),
+				header.IPv4OptionRouterAlertLength,
+				header.IPv4OptionRouterAlertValue,
+				header.IPv4OptionRouterAlertValue,
+			}
+			if diff := cmp.Diff(want[:], opt.Contents()); diff != "" {
+				t.Errorf("router alert option mismatch (-want +got):\n%s", diff)
+			}
+			return
+		}
+		t.Errorf("failed to find router alert option in %v", ip.Options())
+	}
+}
+
 // FragmentOffset creates a checker that checks the FragmentOffset field.
 func FragmentOffset(offset uint16) NetworkChecker {
 	return func(t *testing.T, h []header.Network) {
@@ -284,6 +325,19 @@ func ReceiveIPPacketInfo(want tcpip.IPPacketInfo) ControlMessagesChecker {
 	}
 }
 
+// ReceiveOriginalDstAddr creates a checker that checks the OriginalDstAddress
+// field in ControlMessages.
+func ReceiveOriginalDstAddr(want tcpip.FullAddress) ControlMessagesChecker {
+	return func(t *testing.T, cm tcpip.ControlMessages) {
+		t.Helper()
+		if !cm.HasOriginalDstAddress {
+			t.Errorf("got cm.HasOriginalDstAddress = %t, want = true", cm.HasOriginalDstAddress)
+		} else if diff := cmp.Diff(want, cm.OriginalDstAddress); diff != "" {
+			t.Errorf("OriginalDstAddress mismatch (-want +got):\n%s", diff)
+		}
+	}
+}
+
 // TOS creates a checker that checks the TOS field.
 func TOS(tos uint8, label uint32) NetworkChecker {
 	return func(t *testing.T, h []header.Network) {
@@ -859,6 +913,21 @@ func ICMPv4Seq(want uint16) TransportChecker {
 	}
 }
 
+// ICMPv4Pointer creates a checker that checks the ICMPv4 Param Problem pointer.
+func ICMPv4Pointer(want uint8) TransportChecker {
+	return func(t *testing.T, h header.Transport) {
+		t.Helper()
+
+		icmpv4, ok := h.(header.ICMPv4)
+		if !ok {
+			t.Fatalf("unexpected transport header passed to checker, got = %T, want = header.ICMPv4", h)
+		}
+		if got := icmpv4.Pointer(); got != want {
+			t.Fatalf("unexpected ICMP Param Problem pointer, got = %d, want = %d", got, want)
+		}
+	}
+}
+
 // ICMPv4Checksum creates a checker that checks the ICMPv4 Checksum.
 // This assumes that the payload exactly makes up the rest of the slice.
 func ICMPv4Checksum() TransportChecker {
@@ -889,6 +958,12 @@ func ICMPv4Payload(want []byte) TransportChecker {
 			t.Fatalf("unexpected transport header passed to checker, got = %T, want = header.ICMPv4", h)
 		}
 		payload := icmpv4.Payload()
+
+		// cmp.Diff does not consider nil slices equal to empty slices, but we do.
+		if len(want) == 0 && len(payload) == 0 {
+			return
+		}
+
 		if diff := cmp.Diff(want, payload); diff != "" {
 			t.Errorf("ICMP payload mismatch (-want +got):\n%s", diff)
 		}
@@ -953,6 +1028,112 @@ func ICMPv6Code(want header.ICMPv6Code) TransportChecker {
 	}
 }
 
+// ICMPv6TypeSpecific creates a checker that checks the ICMPv6 TypeSpecific
+// field.
+func ICMPv6TypeSpecific(want uint32) TransportChecker {
+	return func(t *testing.T, h header.Transport) {
+		t.Helper()
+
+		icmpv6, ok := h.(header.ICMPv6)
+		if !ok {
+			t.Fatalf("unexpected transport header passed to checker, got = %T, want = header.ICMPv6", h)
+		}
+		if got := icmpv6.TypeSpecific(); got != want {
+			t.Fatalf("unexpected ICMP TypeSpecific, got = %d, want = %d", got, want)
+		}
+	}
+}
+
+// ICMPv6Payload creates a checker that checks the payload in an ICMPv6 packet.
+func ICMPv6Payload(want []byte) TransportChecker {
+	return func(t *testing.T, h header.Transport) {
+		t.Helper()
+
+		icmpv6, ok := h.(header.ICMPv6)
+		if !ok {
+			t.Fatalf("unexpected transport header passed to checker, got = %T, want = header.ICMPv6", h)
+		}
+		payload := icmpv6.Payload()
+
+		// cmp.Diff does not consider nil slices equal to empty slices, but we do.
+		if len(want) == 0 && len(payload) == 0 {
+			return
+		}
+
+		if diff := cmp.Diff(want, payload); diff != "" {
+			t.Errorf("ICMP payload mismatch (-want +got):\n%s", diff)
+		}
+	}
+}
+
+// MLD creates a checker that checks that the packet contains a valid MLD
+// message for type of mldType, with potentially additional checks specified by
+// checkers.
+//
+// Checkers may assume that a valid ICMPv6 is passed to it containing a valid
+// MLD message as far as the size of the message (minSize) is concerned. The
+// values within the message are up to checkers to validate.
+func MLD(msgType header.ICMPv6Type, minSize int, checkers ...TransportChecker) NetworkChecker {
+	return func(t *testing.T, h []header.Network) {
+		t.Helper()
+
+		// Check normal ICMPv6 first.
+		ICMPv6(
+			ICMPv6Type(msgType),
+			ICMPv6Code(0))(t, h)
+
+		last := h[len(h)-1]
+
+		icmp := header.ICMPv6(last.Payload())
+		if got := len(icmp.MessageBody()); got < minSize {
+			t.Fatalf("ICMPv6 MLD (type = %d) payload size of %d is less than the minimum size of %d", msgType, got, minSize)
+		}
+
+		for _, f := range checkers {
+			f(t, icmp)
+		}
+		if t.Failed() {
+			t.FailNow()
+		}
+	}
+}
+
+// MLDMaxRespDelay creates a checker that checks the Maximum Response Delay
+// field of a MLD message.
+//
+// The returned TransportChecker assumes that a valid ICMPv6 is passed to it
+// containing a valid MLD message as far as the size is concerned.
+func MLDMaxRespDelay(want time.Duration) TransportChecker {
+	return func(t *testing.T, h header.Transport) {
+		t.Helper()
+
+		icmp := h.(header.ICMPv6)
+		ns := header.MLD(icmp.MessageBody())
+
+		if got := ns.MaximumResponseDelay(); got != want {
+			t.Errorf("got %T.MaximumResponseDelay() = %s, want = %s", ns, got, want)
+		}
+	}
+}
+
+// MLDMulticastAddress creates a checker that checks the Multicast Address
+// field of a MLD message.
+//
+// The returned TransportChecker assumes that a valid ICMPv6 is passed to it
+// containing a valid MLD message as far as the size is concerned.
+func MLDMulticastAddress(want tcpip.Address) TransportChecker {
+	return func(t *testing.T, h header.Transport) {
+		t.Helper()
+
+		icmp := h.(header.ICMPv6)
+		ns := header.MLD(icmp.MessageBody())
+
+		if got := ns.MulticastAddress(); got != want {
+			t.Errorf("got %T.MulticastAddress() = %s, want = %s", ns, got, want)
+		}
+	}
+}
+
 // NDP creates a checker that checks that the packet contains a valid NDP
 // message for type of ty, with potentially additional checks specified by
 // checkers.
@@ -972,7 +1153,7 @@ func NDP(msgType header.ICMPv6Type, minSize int, checkers ...TransportChecker) N
 		last := h[len(h)-1]
 
 		icmp := header.ICMPv6(last.Payload())
-		if got := len(icmp.NDPPayload()); got < minSize {
+		if got := len(icmp.MessageBody()); got < minSize {
 			t.Fatalf("ICMPv6 NDP (type = %d) payload size of %d is less than the minimum size of %d", msgType, got, minSize)
 		}
 
@@ -1006,7 +1187,7 @@ func NDPNSTargetAddress(want tcpip.Address) TransportChecker {
 		t.Helper()
 
 		icmp := h.(header.ICMPv6)
-		ns := header.NDPNeighborSolicit(icmp.NDPPayload())
+		ns := header.NDPNeighborSolicit(icmp.MessageBody())
 
 		if got := ns.TargetAddress(); got != want {
 			t.Errorf("got %T.TargetAddress() = %s, want = %s", ns, got, want)
@@ -1035,7 +1216,7 @@ func NDPNATargetAddress(want tcpip.Address) TransportChecker {
 		t.Helper()
 
 		icmp := h.(header.ICMPv6)
-		na := header.NDPNeighborAdvert(icmp.NDPPayload())
+		na := header.NDPNeighborAdvert(icmp.MessageBody())
 
 		if got := na.TargetAddress(); got != want {
 			t.Errorf("got %T.TargetAddress() = %s, want = %s", na, got, want)
@@ -1053,7 +1234,7 @@ func NDPNASolicitedFlag(want bool) TransportChecker {
 		t.Helper()
 
 		icmp := h.(header.ICMPv6)
-		na := header.NDPNeighborAdvert(icmp.NDPPayload())
+		na := header.NDPNeighborAdvert(icmp.MessageBody())
 
 		if got := na.SolicitedFlag(); got != want {
 			t.Errorf("got %T.SolicitedFlag = %t, want = %t", na, got, want)
@@ -1124,7 +1305,7 @@ func NDPNAOptions(opts []header.NDPOption) TransportChecker {
 		t.Helper()
 
 		icmp := h.(header.ICMPv6)
-		na := header.NDPNeighborAdvert(icmp.NDPPayload())
+		na := header.NDPNeighborAdvert(icmp.MessageBody())
 		ndpOptions(t, na.Options(), opts)
 	}
 }
@@ -1139,7 +1320,7 @@ func NDPNSOptions(opts []header.NDPOption) TransportChecker {
 		t.Helper()
 
 		icmp := h.(header.ICMPv6)
-		ns := header.NDPNeighborSolicit(icmp.NDPPayload())
+		ns := header.NDPNeighborSolicit(icmp.MessageBody())
 		ndpOptions(t, ns.Options(), opts)
 	}
 }
@@ -1164,7 +1345,261 @@ func NDPRSOptions(opts []header.NDPOption) TransportChecker {
 		t.Helper()
 
 		icmp := h.(header.ICMPv6)
-		rs := header.NDPRouterSolicit(icmp.NDPPayload())
+		rs := header.NDPRouterSolicit(icmp.MessageBody())
 		ndpOptions(t, rs.Options(), opts)
 	}
 }
+
+// IGMP checks the validity and properties of the given IGMP packet. It is
+// expected to be used in conjunction with other IGMP transport checkers for
+// specific properties.
+func IGMP(checkers ...TransportChecker) NetworkChecker {
+	return func(t *testing.T, h []header.Network) {
+		t.Helper()
+
+		last := h[len(h)-1]
+
+		if p := last.TransportProtocol(); p != header.IGMPProtocolNumber {
+			t.Fatalf("Bad protocol, got %d, want %d", p, header.IGMPProtocolNumber)
+		}
+
+		igmp := header.IGMP(last.Payload())
+		for _, f := range checkers {
+			f(t, igmp)
+		}
+		if t.Failed() {
+			t.FailNow()
+		}
+	}
+}
+
+// IGMPType creates a checker that checks the IGMP Type field.
+func IGMPType(want header.IGMPType) TransportChecker {
+	return func(t *testing.T, h header.Transport) {
+		t.Helper()
+
+		igmp, ok := h.(header.IGMP)
+		if !ok {
+			t.Fatalf("got transport header = %T, want = header.IGMP", h)
+		}
+		if got := igmp.Type(); got != want {
+			t.Errorf("got igmp.Type() = %d, want = %d", got, want)
+		}
+	}
+}
+
+// IGMPMaxRespTime creates a checker that checks the IGMP Max Resp Time field.
+func IGMPMaxRespTime(want time.Duration) TransportChecker {
+	return func(t *testing.T, h header.Transport) {
+		t.Helper()
+
+		igmp, ok := h.(header.IGMP)
+		if !ok {
+			t.Fatalf("got transport header = %T, want = header.IGMP", h)
+		}
+		if got := igmp.MaxRespTime(); got != want {
+			t.Errorf("got igmp.MaxRespTime() = %s, want = %s", got, want)
+		}
+	}
+}
+
+// IGMPGroupAddress creates a checker that checks the IGMP Group Address field.
+func IGMPGroupAddress(want tcpip.Address) TransportChecker {
+	return func(t *testing.T, h header.Transport) {
+		t.Helper()
+
+		igmp, ok := h.(header.IGMP)
+		if !ok {
+			t.Fatalf("got transport header = %T, want = header.IGMP", h)
+		}
+		if got := igmp.GroupAddress(); got != want {
+			t.Errorf("got igmp.GroupAddress() = %s, want = %s", got, want)
+		}
+	}
+}
+
+// IPv6ExtHdrChecker is a function to check an extension header.
+type IPv6ExtHdrChecker func(*testing.T, header.IPv6PayloadHeader)
+
+// IPv6WithExtHdr is like IPv6 but allows IPv6 packets with extension headers.
+func IPv6WithExtHdr(t *testing.T, b []byte, checkers ...NetworkChecker) {
+	t.Helper()
+
+	ipv6 := header.IPv6(b)
+	if !ipv6.IsValid(len(b)) {
+		t.Error("not a valid IPv6 packet")
+		return
+	}
+
+	payloadIterator := header.MakeIPv6PayloadIterator(
+		header.IPv6ExtensionHeaderIdentifier(ipv6.NextHeader()),
+		buffer.View(ipv6.Payload()).ToVectorisedView(),
+	)
+
+	var rawPayloadHeader header.IPv6RawPayloadHeader
+	for {
+		h, done, err := payloadIterator.Next()
+		if err != nil {
+			t.Errorf("payloadIterator.Next(): %s", err)
+			return
+		}
+		if done {
+			t.Errorf("got payloadIterator.Next() = (%T, %t, _), want = (_, true, _)", h, done)
+			return
+		}
+		r, ok := h.(header.IPv6RawPayloadHeader)
+		if ok {
+			rawPayloadHeader = r
+			break
+		}
+	}
+
+	networkHeader := ipv6HeaderWithExtHdr{
+		IPv6:      ipv6,
+		transport: tcpip.TransportProtocolNumber(rawPayloadHeader.Identifier),
+		payload:   rawPayloadHeader.Buf.ToView(),
+	}
+
+	for _, checker := range checkers {
+		checker(t, []header.Network{&networkHeader})
+	}
+}
+
+// IPv6ExtHdr checks for the presence of extension headers.
+//
+// All the extension headers in headers will be checked exhaustively in the
+// order provided.
+func IPv6ExtHdr(headers ...IPv6ExtHdrChecker) NetworkChecker {
+	return func(t *testing.T, h []header.Network) {
+		t.Helper()
+
+		extHdrs, ok := h[0].(*ipv6HeaderWithExtHdr)
+		if !ok {
+			t.Errorf("got network header = %T, want = *ipv6HeaderWithExtHdr", h[0])
+			return
+		}
+
+		payloadIterator := header.MakeIPv6PayloadIterator(
+			header.IPv6ExtensionHeaderIdentifier(extHdrs.IPv6.NextHeader()),
+			buffer.View(extHdrs.IPv6.Payload()).ToVectorisedView(),
+		)
+
+		for _, check := range headers {
+			h, done, err := payloadIterator.Next()
+			if err != nil {
+				t.Errorf("payloadIterator.Next(): %s", err)
+				return
+			}
+			if done {
+				t.Errorf("got payloadIterator.Next() = (%T, %t, _), want = (_, false, _)", h, done)
+				return
+			}
+			check(t, h)
+		}
+		// Validate we consumed all headers.
+		//
+		// The next one over should be a raw payload and then iterator should
+		// terminate.
+		wantDone := false
+		for {
+			h, done, err := payloadIterator.Next()
+			if err != nil {
+				t.Errorf("payloadIterator.Next(): %s", err)
+				return
+			}
+			if done != wantDone {
+				t.Errorf("got payloadIterator.Next() = (%T, %t, _), want = (_, %t, _)", h, done, wantDone)
+				return
+			}
+			if done {
+				break
+			}
+			if _, ok := h.(header.IPv6RawPayloadHeader); !ok {
+				t.Errorf("got payloadIterator.Next() = (%T, _, _), want = (header.IPv6RawPayloadHeader, _, _)", h)
+				continue
+			}
+			wantDone = true
+		}
+	}
+}
+
+var _ header.Network = (*ipv6HeaderWithExtHdr)(nil)
+
+// ipv6HeaderWithExtHdr provides a header.Network implementation that takes
+// extension headers into consideration, which is not the case with vanilla
+// header.IPv6.
+type ipv6HeaderWithExtHdr struct {
+	header.IPv6
+	transport tcpip.TransportProtocolNumber
+	payload   []byte
+}
+
+// TransportProtocol implements header.Network.
+func (h *ipv6HeaderWithExtHdr) TransportProtocol() tcpip.TransportProtocolNumber {
+	return h.transport
+}
+
+// Payload implements header.Network.
+func (h *ipv6HeaderWithExtHdr) Payload() []byte {
+	return h.payload
+}
+
+// IPv6ExtHdrOptionChecker is a function to check an extension header option.
+type IPv6ExtHdrOptionChecker func(*testing.T, header.IPv6ExtHdrOption)
+
+// IPv6HopByHopExtensionHeader checks the extension header is a Hop by Hop
+// extension header and validates the containing options with checkers.
+//
+// checkers must exhaustively contain all the expected options.
+func IPv6HopByHopExtensionHeader(checkers ...IPv6ExtHdrOptionChecker) IPv6ExtHdrChecker {
+	return func(t *testing.T, payloadHeader header.IPv6PayloadHeader) {
+		t.Helper()
+
+		hbh, ok := payloadHeader.(header.IPv6HopByHopOptionsExtHdr)
+		if !ok {
+			t.Errorf("unexpected IPv6 payload header, got = %T, want = header.IPv6HopByHopOptionsExtHdr", payloadHeader)
+			return
+		}
+		optionsIterator := hbh.Iter()
+		for _, f := range checkers {
+			opt, done, err := optionsIterator.Next()
+			if err != nil {
+				t.Errorf("optionsIterator.Next(): %s", err)
+				return
+			}
+			if done {
+				t.Errorf("got optionsIterator.Next() = (%T, %t, _), want = (_, false, _)", opt, done)
+			}
+			f(t, opt)
+		}
+		// Validate all options were consumed.
+		for {
+			opt, done, err := optionsIterator.Next()
+			if err != nil {
+				t.Errorf("optionsIterator.Next(): %s", err)
+				return
+			}
+			if !done {
+				t.Errorf("got optionsIterator.Next() = (%T, %t, _), want = (_, true, _)", opt, done)
+			}
+			if done {
+				break
+			}
+		}
+	}
+}
+
+// IPv6RouterAlert validates that an extension header option is the RouterAlert
+// option and matches on its value.
+func IPv6RouterAlert(want header.IPv6RouterAlertValue) IPv6ExtHdrOptionChecker {
+	return func(t *testing.T, opt header.IPv6ExtHdrOption) {
+		routerAlert, ok := opt.(*header.IPv6RouterAlertOption)
+		if !ok {
+			t.Errorf("unexpected extension header option, got = %T, want = header.IPv6RouterAlertOption", opt)
+			return
+		}
+		if routerAlert.Value != want {
+			t.Errorf("got routerAlert.Value = %d, want = %d", routerAlert.Value, want)
+		}
+	}
+}
diff --git a/pkg/tcpip/header/BUILD b/pkg/tcpip/header/BUILD
index d87797617..0bdc12d53 100644
--- a/pkg/tcpip/header/BUILD
+++ b/pkg/tcpip/header/BUILD
@@ -11,11 +11,13 @@ go_library(
         "gue.go",
         "icmpv4.go",
         "icmpv6.go",
+        "igmp.go",
         "interfaces.go",
         "ipv4.go",
         "ipv6.go",
         "ipv6_extension_headers.go",
         "ipv6_fragment.go",
+        "mld.go",
         "ndp_neighbor_advert.go",
         "ndp_neighbor_solicit.go",
         "ndp_options.go",
@@ -39,6 +41,8 @@ go_test(
     size = "small",
     srcs = [
         "checksum_test.go",
+        "igmp_test.go",
+        "ipv4_test.go",
         "ipv6_test.go",
         "ipversion_test.go",
         "tcp_test.go",
@@ -58,6 +62,7 @@ go_test(
     srcs = [
         "eth_test.go",
         "ipv6_extension_headers_test.go",
+        "mld_test.go",
         "ndp_test.go",
     ],
     library = ":header",
diff --git a/pkg/tcpip/header/icmpv4.go b/pkg/tcpip/header/icmpv4.go
index 504408878..2f13dea6a 100644
--- a/pkg/tcpip/header/icmpv4.go
+++ b/pkg/tcpip/header/icmpv4.go
@@ -99,7 +99,8 @@ const (
 
 // ICMP codes for ICMPv4 Time Exceeded messages as defined in RFC 792.
 const (
-	ICMPv4TTLExceeded ICMPv4Code = 0
+	ICMPv4TTLExceeded       ICMPv4Code = 0
+	ICMPv4ReassemblyTimeout ICMPv4Code = 1
 )
 
 // ICMP codes for ICMPv4 Destination Unreachable messages as defined in RFC 792.
@@ -126,6 +127,12 @@ func (b ICMPv4) Code() ICMPv4Code { return ICMPv4Code(b[1]) }
 // SetCode sets the ICMP code field.
 func (b ICMPv4) SetCode(c ICMPv4Code) { b[1] = byte(c) }
 
+// Pointer returns the pointer field in a Parameter Problem packet.
+func (b ICMPv4) Pointer() byte { return b[icmpv4PointerOffset] }
+
+// SetPointer sets the pointer field in a Parameter Problem packet.
+func (b ICMPv4) SetPointer(c byte) { b[icmpv4PointerOffset] = c }
+
 // Checksum is the ICMP checksum field.
 func (b ICMPv4) Checksum() uint16 {
 	return binary.BigEndian.Uint16(b[icmpv4ChecksumOffset:])
diff --git a/pkg/tcpip/header/icmpv6.go b/pkg/tcpip/header/icmpv6.go
index 4303fc5d5..2eef64b4d 100644
--- a/pkg/tcpip/header/icmpv6.go
+++ b/pkg/tcpip/header/icmpv6.go
@@ -115,6 +115,12 @@ const (
 	ICMPv6NeighborSolicit ICMPv6Type = 135
 	ICMPv6NeighborAdvert  ICMPv6Type = 136
 	ICMPv6RedirectMsg     ICMPv6Type = 137
+
+	// Multicast Listener Discovery (MLD) messages, see RFC 2710.
+
+	ICMPv6MulticastListenerQuery  ICMPv6Type = 130
+	ICMPv6MulticastListenerReport ICMPv6Type = 131
+	ICMPv6MulticastListenerDone   ICMPv6Type = 132
 )
 
 // IsErrorType returns true if the receiver is an ICMP error type.
@@ -245,10 +251,9 @@ func (b ICMPv6) SetSequence(sequence uint16) {
 	binary.BigEndian.PutUint16(b[icmpv6SequenceOffset:], sequence)
 }
 
-// NDPPayload returns the NDP payload buffer. That is, it returns the ICMPv6
-// packet's message body as defined by RFC 4443 section 2.1; the portion of the
-// ICMPv6 buffer after the first ICMPv6HeaderSize bytes.
-func (b ICMPv6) NDPPayload() []byte {
+// MessageBody returns the message body as defined by RFC 4443 section 2.1; the
+// portion of the ICMPv6 buffer after the first ICMPv6HeaderSize bytes.
+func (b ICMPv6) MessageBody() []byte {
 	return b[ICMPv6HeaderSize:]
 }
 
diff --git a/pkg/tcpip/header/igmp.go b/pkg/tcpip/header/igmp.go
new file mode 100644
index 000000000..5c5be1b9d
--- /dev/null
+++ b/pkg/tcpip/header/igmp.go
@@ -0,0 +1,181 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package header
+
+import (
+	"encoding/binary"
+	"fmt"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+)
+
+// IGMP represents an IGMP header stored in a byte array.
+type IGMP []byte
+
+// IGMP implements `Transport`.
+var _ Transport = (*IGMP)(nil)
+
+const (
+	// IGMPMinimumSize is the minimum size of a valid IGMP packet in bytes,
+	// as per RFC 2236, Section 2, Page 2.
+	IGMPMinimumSize = 8
+
+	// IGMPQueryMinimumSize is the minimum size of a valid Membership Query
+	// Message in bytes, as per RFC 2236, Section 2, Page 2.
+	IGMPQueryMinimumSize = 8
+
+	// IGMPReportMinimumSize is the minimum size of a valid Report Message in
+	// bytes, as per RFC 2236, Section 2, Page 2.
+	IGMPReportMinimumSize = 8
+
+	// IGMPLeaveMessageMinimumSize is the minimum size of a valid Leave Message
+	// in bytes, as per RFC 2236, Section 2, Page 2.
+	IGMPLeaveMessageMinimumSize = 8
+
+	// IGMPTTL is the TTL for all IGMP messages, as per RFC 2236, Section 3, Page
+	// 3.
+	IGMPTTL = 1
+
+	// igmpTypeOffset defines the offset of the type field in an IGMP message.
+	igmpTypeOffset = 0
+
+	// igmpMaxRespTimeOffset defines the offset of the MaxRespTime field in an
+	// IGMP message.
+	igmpMaxRespTimeOffset = 1
+
+	// igmpChecksumOffset defines the offset of the checksum field in an IGMP
+	// message.
+	igmpChecksumOffset = 2
+
+	// igmpGroupAddressOffset defines the offset of the Group Address field in an
+	// IGMP message.
+	igmpGroupAddressOffset = 4
+
+	// IGMPProtocolNumber is IGMP's transport protocol number.
+	IGMPProtocolNumber tcpip.TransportProtocolNumber = 2
+)
+
+// IGMPType is the IGMP type field as per RFC 2236.
+type IGMPType byte
+
+// Values for the IGMP Type described in RFC 2236 Section 2.1, Page 2.
+// Descriptions below come from there.
+const (
+	// IGMPMembershipQuery indicates that the message type is Membership Query.
+	// "There are two sub-types of Membership Query messages:
+	// - General Query, used to learn which groups have members on an
+	//   attached network.
+	// - Group-Specific Query, used to learn if a particular group
+	//   has any members on an attached network.
+	// These two messages are differentiated by the Group Address, as
+	// described in section 1.4 ."
+	IGMPMembershipQuery IGMPType = 0x11
+	// IGMPv1MembershipReport indicates that the message is a Membership Report
+	// generated by a host using the IGMPv1 protocol: "an additional type of
+	// message, for backwards-compatibility with IGMPv1"
+	IGMPv1MembershipReport IGMPType = 0x12
+	// IGMPv2MembershipReport indicates that the Message type is a Membership
+	// Report generated by a host using the IGMPv2 protocol.
+	IGMPv2MembershipReport IGMPType = 0x16
+	// IGMPLeaveGroup indicates that the message type is a Leave Group
+	// notification message.
+	IGMPLeaveGroup IGMPType = 0x17
+)
+
+// Type is the IGMP type field.
+func (b IGMP) Type() IGMPType { return IGMPType(b[igmpTypeOffset]) }
+
+// SetType sets the IGMP type field.
+func (b IGMP) SetType(t IGMPType) { b[igmpTypeOffset] = byte(t) }
+
+// MaxRespTime gets the MaxRespTimeField. This is meaningful only in Membership
+// Query messages, in other cases it is set to 0 by the sender and ignored by
+// the receiver.
+func (b IGMP) MaxRespTime() time.Duration {
+	// As per RFC 2236 section 2.2,
+	//
+	//  The Max Response Time field is meaningful only in Membership Query
+	//  messages, and specifies the maximum allowed time before sending a
+	//  responding report in units of 1/10 second.  In all other messages, it
+	//  is set to zero by the sender and ignored by receivers.
+	return DecisecondToDuration(b[igmpMaxRespTimeOffset])
+}
+
+// SetMaxRespTime sets the MaxRespTimeField.
+func (b IGMP) SetMaxRespTime(m byte) { b[igmpMaxRespTimeOffset] = m }
+
+// Checksum is the IGMP checksum field.
+func (b IGMP) Checksum() uint16 {
+	return binary.BigEndian.Uint16(b[igmpChecksumOffset:])
+}
+
+// SetChecksum sets the IGMP checksum field.
+func (b IGMP) SetChecksum(checksum uint16) {
+	binary.BigEndian.PutUint16(b[igmpChecksumOffset:], checksum)
+}
+
+// GroupAddress gets the Group Address field.
+func (b IGMP) GroupAddress() tcpip.Address {
+	return tcpip.Address(b[igmpGroupAddressOffset:][:IPv4AddressSize])
+}
+
+// SetGroupAddress sets the Group Address field.
+func (b IGMP) SetGroupAddress(address tcpip.Address) {
+	if n := copy(b[igmpGroupAddressOffset:], address); n != IPv4AddressSize {
+		panic(fmt.Sprintf("copied %d bytes, expected %d", n, IPv4AddressSize))
+	}
+}
+
+// SourcePort implements Transport.SourcePort.
+func (IGMP) SourcePort() uint16 {
+	return 0
+}
+
+// DestinationPort implements Transport.DestinationPort.
+func (IGMP) DestinationPort() uint16 {
+	return 0
+}
+
+// SetSourcePort implements Transport.SetSourcePort.
+func (IGMP) SetSourcePort(uint16) {
+}
+
+// SetDestinationPort implements Transport.SetDestinationPort.
+func (IGMP) SetDestinationPort(uint16) {
+}
+
+// Payload implements Transport.Payload.
+func (IGMP) Payload() []byte {
+	return nil
+}
+
+// IGMPCalculateChecksum calculates the IGMP checksum over the provided IGMP
+// header.
+func IGMPCalculateChecksum(h IGMP) uint16 {
+	// The header contains a checksum itself, set it aside to avoid checksumming
+	// the checksum and replace it afterwards.
+	existingXsum := h.Checksum()
+	h.SetChecksum(0)
+	xsum := ^Checksum(h, 0)
+	h.SetChecksum(existingXsum)
+	return xsum
+}
+
+// DecisecondToDuration converts a value representing deci-seconds to a
+// time.Duration.
+func DecisecondToDuration(ds uint8) time.Duration {
+	return time.Duration(ds) * time.Second / 10
+}
diff --git a/pkg/tcpip/header/igmp_test.go b/pkg/tcpip/header/igmp_test.go
new file mode 100644
index 000000000..b6126d29a
--- /dev/null
+++ b/pkg/tcpip/header/igmp_test.go
@@ -0,0 +1,110 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package header_test
+
+import (
+	"testing"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+)
+
+// TestIGMPHeader tests the functions within header.igmp
+func TestIGMPHeader(t *testing.T) {
+	const maxRespTimeTenthSec = 0xF0
+	b := []byte{
+		0x11,                // IGMP Type, Membership Query
+		maxRespTimeTenthSec, // Maximum Response Time
+		0xC0, 0xC0,          // Checksum
+		0x01, 0x02, 0x03, 0x04, // Group Address
+	}
+
+	igmpHeader := header.IGMP(b)
+
+	if got, want := igmpHeader.Type(), header.IGMPMembershipQuery; got != want {
+		t.Errorf("got igmpHeader.Type() = %x, want = %x", got, want)
+	}
+
+	if got, want := igmpHeader.MaxRespTime(), header.DecisecondToDuration(maxRespTimeTenthSec); got != want {
+		t.Errorf("got igmpHeader.MaxRespTime() = %s, want = %s", got, want)
+	}
+
+	if got, want := igmpHeader.Checksum(), uint16(0xC0C0); got != want {
+		t.Errorf("got igmpHeader.Checksum() = %x, want = %x", got, want)
+	}
+
+	if got, want := igmpHeader.GroupAddress(), tcpip.Address("\x01\x02\x03\x04"); got != want {
+		t.Errorf("got igmpHeader.GroupAddress() = %s, want = %s", got, want)
+	}
+
+	igmpType := header.IGMPv2MembershipReport
+	igmpHeader.SetType(igmpType)
+	if got := igmpHeader.Type(); got != igmpType {
+		t.Errorf("got igmpHeader.Type() = %x, want = %x", got, igmpType)
+	}
+	if got := header.IGMPType(b[0]); got != igmpType {
+		t.Errorf("got IGMPtype in backing buffer = %x, want %x", got, igmpType)
+	}
+
+	respTime := byte(0x02)
+	igmpHeader.SetMaxRespTime(respTime)
+	if got, want := igmpHeader.MaxRespTime(), header.DecisecondToDuration(respTime); got != want {
+		t.Errorf("got igmpHeader.MaxRespTime() = %s, want = %s", got, want)
+	}
+
+	checksum := uint16(0x0102)
+	igmpHeader.SetChecksum(checksum)
+	if got := igmpHeader.Checksum(); got != checksum {
+		t.Errorf("got igmpHeader.Checksum() = %x, want = %x", got, checksum)
+	}
+
+	groupAddress := tcpip.Address("\x04\x03\x02\x01")
+	igmpHeader.SetGroupAddress(groupAddress)
+	if got := igmpHeader.GroupAddress(); got != groupAddress {
+		t.Errorf("got igmpHeader.GroupAddress() = %s, want = %s", got, groupAddress)
+	}
+}
+
+// TestIGMPChecksum ensures that the checksum calculator produces the expected
+// checksum.
+func TestIGMPChecksum(t *testing.T) {
+	b := []byte{
+		0x11,       // IGMP Type, Membership Query
+		0xF0,       // Maximum Response Time
+		0xC0, 0xC0, // Checksum
+		0x01, 0x02, 0x03, 0x04, // Group Address
+	}
+
+	igmpHeader := header.IGMP(b)
+
+	// Calculate the initial checksum after setting the checksum temporarily to 0
+	// to avoid checksumming the checksum.
+	initialChecksum := igmpHeader.Checksum()
+	igmpHeader.SetChecksum(0)
+	checksum := ^header.Checksum(b, 0)
+	igmpHeader.SetChecksum(initialChecksum)
+
+	if got := header.IGMPCalculateChecksum(igmpHeader); got != checksum {
+		t.Errorf("got IGMPCalculateChecksum = %x, want %x", got, checksum)
+	}
+}
+
+func TestDecisecondToDuration(t *testing.T) {
+	const valueInDeciseconds = 5
+	if got, want := header.DecisecondToDuration(valueInDeciseconds), valueInDeciseconds*time.Second/10; got != want {
+		t.Fatalf("got header.DecisecondToDuration(%d) = %s, want = %s", valueInDeciseconds, got, want)
+	}
+}
diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go
index 4c6e4be64..e6103f4bc 100644
--- a/pkg/tcpip/header/ipv4.go
+++ b/pkg/tcpip/header/ipv4.go
@@ -16,6 +16,7 @@ package header
 
 import (
 	"encoding/binary"
+	"errors"
 	"fmt"
 
 	"gvisor.dev/gvisor/pkg/tcpip"
@@ -38,7 +39,6 @@ import (
 //   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 //   |                    Options                    |    Padding    |
 //   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-//
 const (
 	versIHL = 0
 	tos     = 1
@@ -56,12 +56,9 @@ const (
 )
 
 // IPv4Fields contains the fields of an IPv4 packet. It is used to describe the
-// fields of a packet that needs to be encoded.
+// fields of a packet that needs to be encoded. The IHL field is not here as
+// it is totally defined by the size of the options.
 type IPv4Fields struct {
-	// IHL is the "internet header length" field of an IPv4 packet. The value
-	// is in bytes.
-	IHL uint8
-
 	// TOS is the "type of service" field of an IPv4 packet.
 	TOS uint8
 
@@ -91,9 +88,22 @@ type IPv4Fields struct {
 
 	// DstAddr is the "destination ip address" of an IPv4 packet.
 	DstAddr tcpip.Address
+
+	// Options must be 40 bytes or less as they must fit along with the
+	// rest of the IPv4 header into the maximum size describable in the
+	// IHL field. RFC 791 section 3.1 says:
+	//    IHL:  4 bits
+	//
+	//    Internet Header Length is the length of the internet header in 32
+	//    bit words, and thus points to the beginning of the data.  Note that
+	//    the minimum value for a correct header is 5.
+	//
+	// That leaves ten 32 bit (4 byte) fields for options. An attempt to encode
+	// more will fail.
+	Options IPv4OptionsSerializer
 }
 
-// IPv4 represents an ipv4 header stored in a byte array.
+// IPv4 is an IPv4 header.
 // Most of the methods of IPv4 access to the underlying slice without
 // checking the boundaries and could panic because of 'index out of range'.
 // Always call IsValid() to validate an instance of IPv4 before using other
@@ -106,16 +116,19 @@ const (
 	IPv4MinimumSize = 20
 
 	// IPv4MaximumHeaderSize is the maximum size of an IPv4 header. Given
-	// that there are only 4 bits to represents the header length in 32-bit
-	// units, the header cannot exceed 15*4 = 60 bytes.
+	// that there are only 4 bits (max 0xF (15)) to represent the header length
+	// in 32-bit (4 byte) units, the header cannot exceed 15*4 = 60 bytes.
 	IPv4MaximumHeaderSize = 60
 
+	// IPv4MaximumOptionsSize is the largest size the IPv4 options can be.
+	IPv4MaximumOptionsSize = IPv4MaximumHeaderSize - IPv4MinimumSize
+
 	// IPv4MaximumPayloadSize is the maximum size of a valid IPv4 payload.
 	//
 	// Linux limits this to 65,515 octets (the max IP datagram size - the IPv4
 	// header size). But RFC 791 section 3.2 discusses the design of the IPv4
 	// fragment "allows 2**13 = 8192 fragments of 8 octets each for a total of
-	// 65,536 octets. Note that this is consistent with the the datagram total
+	// 65,536 octets. Note that this is consistent with the datagram total
 	// length field (of course, the header is counted in the total length and not
 	// in the fragments)."
 	IPv4MaximumPayloadSize = 65536
@@ -130,7 +143,7 @@ const (
 	// IPv4ProtocolNumber is IPv4's network protocol number.
 	IPv4ProtocolNumber tcpip.NetworkProtocolNumber = 0x0800
 
-	// IPv4Version is the version of the ipv4 protocol.
+	// IPv4Version is the version of the IPv4 protocol.
 	IPv4Version = 4
 
 	// IPv4AllSystems is the all systems IPv4 multicast address as per
@@ -144,10 +157,20 @@ const (
 	// IPv4Any is the non-routable IPv4 "any" meta address.
 	IPv4Any tcpip.Address = "\x00\x00\x00\x00"
 
+	// IPv4AllRoutersGroup is a multicast address for all routers.
+	IPv4AllRoutersGroup tcpip.Address = "\xe0\x00\x00\x02"
+
 	// IPv4MinimumProcessableDatagramSize is the minimum size of an IP
 	// packet that every IPv4 capable host must be able to
 	// process/reassemble.
 	IPv4MinimumProcessableDatagramSize = 576
+
+	// IPv4MinimumMTU is the minimum MTU required by IPv4, per RFC 791,
+	// section 3.2:
+	//   Every internet module must be able to forward a datagram of 68 octets
+	//   without further fragmentation.  This is because an internet header may be
+	//   up to 60 octets, and the minimum fragment is 8 octets.
+	IPv4MinimumMTU = 68
 )
 
 // Flags that may be set in an IPv4 packet.
@@ -191,14 +214,13 @@ func IPVersion(b []byte) int {
 //       Internet Header Length is the length of the internet header in 32
 //       bit words, and thus points to the beginning of the data.  Note that
 //       the minimum value for a correct header is 5.
-//
 const (
 	ipVersionShift = 4
 	ipIHLMask      = 0x0f
 	IPv4IHLStride  = 4
 )
 
-// HeaderLength returns the value of the "header length" field of the ipv4
+// HeaderLength returns the value of the "header length" field of the IPv4
 // header. The length returned is in bytes.
 func (b IPv4) HeaderLength() uint8 {
 	return (b[versIHL] & ipIHLMask) * IPv4IHLStride
@@ -212,17 +234,17 @@ func (b IPv4) SetHeaderLength(hdrLen uint8) {
 	b[versIHL] = (IPv4Version << ipVersionShift) | ((hdrLen / IPv4IHLStride) & ipIHLMask)
 }
 
-// ID returns the value of the identifier field of the ipv4 header.
+// ID returns the value of the identifier field of the IPv4 header.
 func (b IPv4) ID() uint16 {
 	return binary.BigEndian.Uint16(b[id:])
 }
 
-// Protocol returns the value of the protocol field of the ipv4 header.
+// Protocol returns the value of the protocol field of the IPv4 header.
 func (b IPv4) Protocol() uint8 {
 	return b[protocol]
 }
 
-// Flags returns the "flags" field of the ipv4 header.
+// Flags returns the "flags" field of the IPv4 header.
 func (b IPv4) Flags() uint8 {
 	return uint8(binary.BigEndian.Uint16(b[flagsFO:]) >> 13)
 }
@@ -232,41 +254,52 @@ func (b IPv4) More() bool {
 	return b.Flags()&IPv4FlagMoreFragments != 0
 }
 
-// TTL returns the "TTL" field of the ipv4 header.
+// TTL returns the "TTL" field of the IPv4 header.
 func (b IPv4) TTL() uint8 {
 	return b[ttl]
 }
 
-// FragmentOffset returns the "fragment offset" field of the ipv4 header.
+// FragmentOffset returns the "fragment offset" field of the IPv4 header.
 func (b IPv4) FragmentOffset() uint16 {
 	return binary.BigEndian.Uint16(b[flagsFO:]) << 3
 }
 
-// TotalLength returns the "total length" field of the ipv4 header.
+// TotalLength returns the "total length" field of the IPv4 header.
 func (b IPv4) TotalLength() uint16 {
 	return binary.BigEndian.Uint16(b[IPv4TotalLenOffset:])
 }
 
-// Checksum returns the checksum field of the ipv4 header.
+// Checksum returns the checksum field of the IPv4 header.
 func (b IPv4) Checksum() uint16 {
 	return binary.BigEndian.Uint16(b[checksum:])
 }
 
-// SourceAddress returns the "source address" field of the ipv4 header.
+// SourceAddress returns the "source address" field of the IPv4 header.
 func (b IPv4) SourceAddress() tcpip.Address {
 	return tcpip.Address(b[srcAddr : srcAddr+IPv4AddressSize])
 }
 
-// DestinationAddress returns the "destination address" field of the ipv4
+// DestinationAddress returns the "destination address" field of the IPv4
 // header.
 func (b IPv4) DestinationAddress() tcpip.Address {
 	return tcpip.Address(b[dstAddr : dstAddr+IPv4AddressSize])
 }
 
-// Options returns a a buffer holding the options.
-func (b IPv4) Options() []byte {
+// padIPv4OptionsLength returns the total length for IPv4 options of length l
+// after applying padding according to RFC 791:
+//    The internet header padding is used to ensure that the internet
+//    header ends on a 32 bit boundary.
+func padIPv4OptionsLength(length uint8) uint8 {
+	return (length + IPv4IHLStride - 1) & ^uint8(IPv4IHLStride-1)
+}
+
+// IPv4Options is a buffer that holds all the raw IP options.
+type IPv4Options []byte
+
+// Options returns a buffer holding the options.
+func (b IPv4) Options() IPv4Options {
 	hdrLen := b.HeaderLength()
-	return b[options:hdrLen:hdrLen]
+	return IPv4Options(b[options:hdrLen:hdrLen])
 }
 
 // TransportProtocol implements Network.TransportProtocol.
@@ -279,17 +312,17 @@ func (b IPv4) Payload() []byte {
 	return b[b.HeaderLength():][:b.PayloadLength()]
 }
 
-// PayloadLength returns the length of the payload portion of the ipv4 packet.
+// PayloadLength returns the length of the payload portion of the IPv4 packet.
 func (b IPv4) PayloadLength() uint16 {
 	return b.TotalLength() - uint16(b.HeaderLength())
 }
 
-// TOS returns the "type of service" field of the ipv4 header.
+// TOS returns the "type of service" field of the IPv4 header.
 func (b IPv4) TOS() (uint8, uint32) {
 	return b[tos], 0
 }
 
-// SetTOS sets the "type of service" field of the ipv4 header.
+// SetTOS sets the "type of service" field of the IPv4 header.
 func (b IPv4) SetTOS(v uint8, _ uint32) {
 	b[tos] = v
 }
@@ -299,18 +332,18 @@ func (b IPv4) SetTTL(v byte) {
 	b[ttl] = v
 }
 
-// SetTotalLength sets the "total length" field of the ipv4 header.
+// SetTotalLength sets the "total length" field of the IPv4 header.
 func (b IPv4) SetTotalLength(totalLength uint16) {
 	binary.BigEndian.PutUint16(b[IPv4TotalLenOffset:], totalLength)
 }
 
-// SetChecksum sets the checksum field of the ipv4 header.
+// SetChecksum sets the checksum field of the IPv4 header.
 func (b IPv4) SetChecksum(v uint16) {
 	binary.BigEndian.PutUint16(b[checksum:], v)
 }
 
 // SetFlagsFragmentOffset sets the "flags" and "fragment offset" fields of the
-// ipv4 header.
+// IPv4 header.
 func (b IPv4) SetFlagsFragmentOffset(flags uint8, offset uint16) {
 	v := (uint16(flags) << 13) | (offset >> 3)
 	binary.BigEndian.PutUint16(b[flagsFO:], v)
@@ -321,25 +354,36 @@ func (b IPv4) SetID(v uint16) {
 	binary.BigEndian.PutUint16(b[id:], v)
 }
 
-// SetSourceAddress sets the "source address" field of the ipv4 header.
+// SetSourceAddress sets the "source address" field of the IPv4 header.
 func (b IPv4) SetSourceAddress(addr tcpip.Address) {
 	copy(b[srcAddr:srcAddr+IPv4AddressSize], addr)
 }
 
-// SetDestinationAddress sets the "destination address" field of the ipv4
+// SetDestinationAddress sets the "destination address" field of the IPv4
 // header.
 func (b IPv4) SetDestinationAddress(addr tcpip.Address) {
 	copy(b[dstAddr:dstAddr+IPv4AddressSize], addr)
 }
 
-// CalculateChecksum calculates the checksum of the ipv4 header.
+// CalculateChecksum calculates the checksum of the IPv4 header.
 func (b IPv4) CalculateChecksum() uint16 {
 	return Checksum(b[:b.HeaderLength()], 0)
 }
 
-// Encode encodes all the fields of the ipv4 header.
+// Encode encodes all the fields of the IPv4 header.
 func (b IPv4) Encode(i *IPv4Fields) {
-	b.SetHeaderLength(i.IHL)
+	// The size of the options defines the size of the whole header and thus the
+	// IHL field. Options are rare and this is a heavily used function so it is
+	// worth a bit of optimisation here to keep the serializer out of the fast
+	// path.
+	hdrLen := uint8(IPv4MinimumSize)
+	if len(i.Options) != 0 {
+		hdrLen += i.Options.Serialize(b[options:])
+	}
+	if hdrLen > IPv4MaximumHeaderSize {
+		panic(fmt.Sprintf("%d is larger than maximum IPv4 header size of %d", hdrLen, IPv4MaximumHeaderSize))
+	}
+	b.SetHeaderLength(hdrLen)
 	b[tos] = i.TOS
 	b.SetTotalLength(i.TotalLength)
 	binary.BigEndian.PutUint16(b[id:], i.ID)
@@ -351,7 +395,7 @@ func (b IPv4) Encode(i *IPv4Fields) {
 	copy(b[dstAddr:dstAddr+IPv4AddressSize], i.DstAddr)
 }
 
-// EncodePartial updates the total length and checksum fields of ipv4 header,
+// EncodePartial updates the total length and checksum fields of IPv4 header,
 // taking in the partial checksum, which is the checksum of the header without
 // the total length and checksum fields. It is useful in cases when similar
 // packets are produced.
@@ -398,3 +442,587 @@ func IsV4LoopbackAddress(addr tcpip.Address) bool {
 	}
 	return addr[0] == 0x7f
 }
+
+// ========================= Options ==========================
+
+// An IPv4OptionType can hold the valuse for the Type in an IPv4 option.
+type IPv4OptionType byte
+
+// These constants are needed to identify individual options in the option list.
+// While RFC 791 (page 31) says "Every internet module must be able to act on
+// every option." This has not generally been adhered to and some options have
+// very low rates of support. We do not support options other than those shown
+// below.
+
+const (
+	// IPv4OptionListEndType is the option type for the End Of Option List
+	// option. Anything following is ignored.
+	IPv4OptionListEndType IPv4OptionType = 0
+
+	// IPv4OptionNOPType is the No-Operation option. May appear between other
+	// options and may appear multiple times.
+	IPv4OptionNOPType IPv4OptionType = 1
+
+	// IPv4OptionRouterAlertType is the option type for the Router Alert option,
+	// defined in RFC 2113 Section 2.1.
+	IPv4OptionRouterAlertType IPv4OptionType = 20 | 0x80
+
+	// IPv4OptionRecordRouteType is used by each router on the path of the packet
+	// to record its path. It is carried over to an Echo Reply.
+	IPv4OptionRecordRouteType IPv4OptionType = 7
+
+	// IPv4OptionTimestampType is the option type for the Timestamp option.
+	IPv4OptionTimestampType IPv4OptionType = 68
+
+	// ipv4OptionTypeOffset is the offset in an option of its type field.
+	ipv4OptionTypeOffset = 0
+
+	// IPv4OptionLengthOffset is the offset in an option of its length field.
+	IPv4OptionLengthOffset = 1
+)
+
+// Potential errors when parsing generic IP options.
+var (
+	ErrIPv4OptZeroLength   = errors.New("zero length IP option")
+	ErrIPv4OptDuplicate    = errors.New("duplicate IP option")
+	ErrIPv4OptInvalid      = errors.New("invalid IP option")
+	ErrIPv4OptMalformed    = errors.New("malformed IP option")
+	ErrIPv4OptionTruncated = errors.New("truncated IP option")
+	ErrIPv4OptionAddress   = errors.New("bad IP option address")
+)
+
+// IPv4Option is an interface representing various option types.
+type IPv4Option interface {
+	// Type returns the type identifier of the option.
+	Type() IPv4OptionType
+
+	// Size returns the size of the option in bytes.
+	Size() uint8
+
+	// Contents returns a slice holding the contents of the option.
+	Contents() []byte
+}
+
+var _ IPv4Option = (*IPv4OptionGeneric)(nil)
+
+// IPv4OptionGeneric is an IPv4 Option of unknown type.
+type IPv4OptionGeneric []byte
+
+// Type implements IPv4Option.
+func (o *IPv4OptionGeneric) Type() IPv4OptionType {
+	return IPv4OptionType((*o)[ipv4OptionTypeOffset])
+}
+
+// Size implements IPv4Option.
+func (o *IPv4OptionGeneric) Size() uint8 { return uint8(len(*o)) }
+
+// Contents implements IPv4Option.
+func (o *IPv4OptionGeneric) Contents() []byte { return []byte(*o) }
+
+// IPv4OptionIterator is an iterator pointing to a specific IP option
+// at any point of time. It also holds information as to a new options buffer
+// that we are building up to hand back to the caller.
+type IPv4OptionIterator struct {
+	options IPv4Options
+	// ErrCursor is where we are while parsing options. It is exported as any
+	// resulting ICMP packet is supposed to have a pointer to the byte within
+	// the IP packet where the error was detected.
+	ErrCursor     uint8
+	nextErrCursor uint8
+	newOptions    [IPv4MaximumOptionsSize]byte
+	writePoint    int
+}
+
+// MakeIterator sets up and returns an iterator of options. It also sets up the
+// building of a new option set.
+func (o IPv4Options) MakeIterator() IPv4OptionIterator {
+	return IPv4OptionIterator{
+		options:       o,
+		nextErrCursor: IPv4MinimumSize,
+	}
+}
+
+// RemainingBuffer returns the remaining (unused) part of the new option buffer,
+// into which a new option may be written.
+func (i *IPv4OptionIterator) RemainingBuffer() IPv4Options {
+	return IPv4Options(i.newOptions[i.writePoint:])
+}
+
+// ConsumeBuffer marks a portion of the new buffer as used.
+func (i *IPv4OptionIterator) ConsumeBuffer(size int) {
+	i.writePoint += size
+}
+
+// PushNOPOrEnd puts one of the single byte options onto the new options.
+// Only values 0 or 1 (ListEnd or NOP) are valid input.
+func (i *IPv4OptionIterator) PushNOPOrEnd(val IPv4OptionType) {
+	if val > IPv4OptionNOPType {
+		panic(fmt.Sprintf("invalid option type %d pushed onto option build buffer", val))
+	}
+	i.newOptions[i.writePoint] = byte(val)
+	i.writePoint++
+}
+
+// Finalize returns the completed replacement options buffer padded
+// as needed.
+func (i *IPv4OptionIterator) Finalize() IPv4Options {
+	// RFC 791 page 31 says:
+	//     The options might not end on a 32-bit boundary.  The internet header
+	//     must be filled out with octets of zeros.  The first of these would
+	//     be interpreted as the end-of-options option, and the remainder as
+	//     internet header padding.
+	// Since the buffer is already zero filled we just need to step the write
+	// pointer up to the next multiple of 4.
+	options := IPv4Options(i.newOptions[:(i.writePoint+0x3) & ^0x3])
+	// Poison the write pointer.
+	i.writePoint = len(i.newOptions)
+	return options
+}
+
+// Next returns the next IP option in the buffer/list of IP options.
+// It returns
+// - A slice of bytes holding the next option or nil if there is error.
+// - A boolean which is true if parsing of all the options is complete.
+// - An error which is non-nil if an error condition was encountered.
+func (i *IPv4OptionIterator) Next() (IPv4Option, bool, error) {
+	// The opts slice gets shorter as we process the options. When we have no
+	// bytes left we are done.
+	if len(i.options) == 0 {
+		return nil, true, nil
+	}
+
+	i.ErrCursor = i.nextErrCursor
+
+	optType := IPv4OptionType(i.options[ipv4OptionTypeOffset])
+
+	if optType == IPv4OptionNOPType || optType == IPv4OptionListEndType {
+		optionBody := i.options[:1]
+		i.options = i.options[1:]
+		i.nextErrCursor = i.ErrCursor + 1
+		retval := IPv4OptionGeneric(optionBody)
+		return &retval, false, nil
+	}
+
+	// There are no more single byte options defined.  All the rest have a length
+	// field so we need to sanity check it.
+	if len(i.options) == 1 {
+		return nil, true, ErrIPv4OptMalformed
+	}
+
+	optLen := i.options[IPv4OptionLengthOffset]
+
+	if optLen == 0 {
+		i.ErrCursor++
+		return nil, true, ErrIPv4OptZeroLength
+	}
+
+	if optLen == 1 {
+		i.ErrCursor++
+		return nil, true, ErrIPv4OptMalformed
+	}
+
+	if optLen > uint8(len(i.options)) {
+		i.ErrCursor++
+		return nil, true, ErrIPv4OptionTruncated
+	}
+
+	optionBody := i.options[:optLen]
+	i.nextErrCursor = i.ErrCursor + optLen
+	i.options = i.options[optLen:]
+
+	// Check the length of some option types that we know.
+	switch optType {
+	case IPv4OptionTimestampType:
+		if optLen < IPv4OptionTimestampHdrLength {
+			i.ErrCursor++
+			return nil, true, ErrIPv4OptMalformed
+		}
+		retval := IPv4OptionTimestamp(optionBody)
+		return &retval, false, nil
+
+	case IPv4OptionRecordRouteType:
+		if optLen < IPv4OptionRecordRouteHdrLength {
+			i.ErrCursor++
+			return nil, true, ErrIPv4OptMalformed
+		}
+		retval := IPv4OptionRecordRoute(optionBody)
+		return &retval, false, nil
+	}
+	retval := IPv4OptionGeneric(optionBody)
+	return &retval, false, nil
+}
+
+//
+// IP Timestamp option - RFC 791 page 22.
+// +--------+--------+--------+--------+
+// |01000100| length | pointer|oflw|flg|
+// +--------+--------+--------+--------+
+// |         internet address          |
+// +--------+--------+--------+--------+
+// |             timestamp             |
+// +--------+--------+--------+--------+
+// |                ...                |
+//
+// Type = 68
+//
+// The Option Length is the number of octets in the option counting
+// the type, length, pointer, and overflow/flag octets (maximum
+// length 40).
+//
+// The Pointer is the number of octets from the beginning of this
+// option to the end of timestamps plus one (i.e., it points to the
+// octet beginning the space for next timestamp).  The smallest
+// legal value is 5.  The timestamp area is full when the pointer
+// is greater than the length.
+//
+// The Overflow (oflw) [4 bits] is the number of IP modules that
+// cannot register timestamps due to lack of space.
+//
+// The Flag (flg) [4 bits] values are
+//
+//   0 -- time stamps only, stored in consecutive 32-bit words,
+//
+//   1 -- each timestamp is preceded with internet address of the
+//        registering entity,
+//
+//   3 -- the internet address fields are prespecified.  An IP
+//        module only registers its timestamp if it matches its own
+//        address with the next specified internet address.
+//
+// Timestamps are defined in RFC 791 page 22 as milliseconds since midnight UTC.
+//
+//        The Timestamp is a right-justified, 32-bit timestamp in
+//        milliseconds since midnight UT.  If the time is not available in
+//        milliseconds or cannot be provided with respect to midnight UT
+//        then any time may be inserted as a timestamp provided the high
+//        order bit of the timestamp field is set to one to indicate the
+//        use of a non-standard value.
+
+// IPv4OptTSFlags sefines the values expected in the Timestamp
+// option Flags field.
+type IPv4OptTSFlags uint8
+
+//
+// Timestamp option specific related constants.
+const (
+	// IPv4OptionTimestampHdrLength is the length of the timestamp option header.
+	IPv4OptionTimestampHdrLength = 4
+
+	// IPv4OptionTimestampSize is the size of an IP timestamp.
+	IPv4OptionTimestampSize = 4
+
+	// IPv4OptionTimestampWithAddrSize is the size of an IP timestamp + Address.
+	IPv4OptionTimestampWithAddrSize = IPv4AddressSize + IPv4OptionTimestampSize
+
+	// IPv4OptionTimestampMaxSize is limited by space for options
+	IPv4OptionTimestampMaxSize = IPv4MaximumOptionsSize
+
+	// IPv4OptionTimestampOnlyFlag is a flag indicating that only timestamp
+	// is present.
+	IPv4OptionTimestampOnlyFlag IPv4OptTSFlags = 0
+
+	// IPv4OptionTimestampWithIPFlag is a flag indicating that both timestamps and
+	// IP are present.
+	IPv4OptionTimestampWithIPFlag IPv4OptTSFlags = 1
+
+	// IPv4OptionTimestampWithPredefinedIPFlag is a flag indicating that
+	// predefined IP is present.
+	IPv4OptionTimestampWithPredefinedIPFlag IPv4OptTSFlags = 3
+)
+
+// ipv4TimestampTime provides the current time as specified in RFC 791.
+func ipv4TimestampTime(clock tcpip.Clock) uint32 {
+	const millisecondsPerDay = 24 * 3600 * 1000
+	const nanoPerMilli = 1000000
+	return uint32((clock.NowNanoseconds() / nanoPerMilli) % millisecondsPerDay)
+}
+
+// IP Timestamp option fields.
+const (
+	// IPv4OptTSPointerOffset is the offset of the Timestamp pointer field.
+	IPv4OptTSPointerOffset = 2
+
+	// IPv4OptTSPointerOffset is the offset of the combined Flag and Overflow
+	// fields, (each being 4 bits).
+	IPv4OptTSOFLWAndFLGOffset = 3
+	// These constants define the sub byte fields of the Flag and OverFlow field.
+	ipv4OptionTimestampOverflowshift      = 4
+	ipv4OptionTimestampFlagsMask     byte = 0x0f
+)
+
+var _ IPv4Option = (*IPv4OptionTimestamp)(nil)
+
+// IPv4OptionTimestamp is a Timestamp option from RFC 791.
+type IPv4OptionTimestamp []byte
+
+// Type implements IPv4Option.Type().
+func (ts *IPv4OptionTimestamp) Type() IPv4OptionType { return IPv4OptionTimestampType }
+
+// Size implements IPv4Option.
+func (ts *IPv4OptionTimestamp) Size() uint8 { return uint8(len(*ts)) }
+
+// Contents implements IPv4Option.
+func (ts *IPv4OptionTimestamp) Contents() []byte { return []byte(*ts) }
+
+// Pointer returns the pointer field in the IP Timestamp option.
+func (ts *IPv4OptionTimestamp) Pointer() uint8 {
+	return (*ts)[IPv4OptTSPointerOffset]
+}
+
+// Flags returns the flags field in the IP Timestamp option.
+func (ts *IPv4OptionTimestamp) Flags() IPv4OptTSFlags {
+	return IPv4OptTSFlags((*ts)[IPv4OptTSOFLWAndFLGOffset] & ipv4OptionTimestampFlagsMask)
+}
+
+// Overflow returns the Overflow field in the IP Timestamp option.
+func (ts *IPv4OptionTimestamp) Overflow() uint8 {
+	return (*ts)[IPv4OptTSOFLWAndFLGOffset] >> ipv4OptionTimestampOverflowshift
+}
+
+// IncOverflow increments the Overflow field in the IP Timestamp option. It
+// returns the incremented value. If the return value is 0 then the field
+// overflowed.
+func (ts *IPv4OptionTimestamp) IncOverflow() uint8 {
+	(*ts)[IPv4OptTSOFLWAndFLGOffset] += 1 << ipv4OptionTimestampOverflowshift
+	return ts.Overflow()
+}
+
+// UpdateTimestamp updates the fields of the next free timestamp slot.
+func (ts *IPv4OptionTimestamp) UpdateTimestamp(addr tcpip.Address, clock tcpip.Clock) {
+	slot := (*ts)[ts.Pointer()-1:]
+
+	switch ts.Flags() {
+	case IPv4OptionTimestampOnlyFlag:
+		binary.BigEndian.PutUint32(slot, ipv4TimestampTime(clock))
+		(*ts)[IPv4OptTSPointerOffset] += IPv4OptionTimestampSize
+	case IPv4OptionTimestampWithIPFlag:
+		if n := copy(slot, addr); n != IPv4AddressSize {
+			panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, IPv4AddressSize))
+		}
+		binary.BigEndian.PutUint32(slot[IPv4AddressSize:], ipv4TimestampTime(clock))
+		(*ts)[IPv4OptTSPointerOffset] += IPv4OptionTimestampWithAddrSize
+	case IPv4OptionTimestampWithPredefinedIPFlag:
+		if tcpip.Address(slot[:IPv4AddressSize]) == addr {
+			binary.BigEndian.PutUint32(slot[IPv4AddressSize:], ipv4TimestampTime(clock))
+			(*ts)[IPv4OptTSPointerOffset] += IPv4OptionTimestampWithAddrSize
+		}
+	}
+}
+
+// RecordRoute option specific related constants.
+//
+// from RFC 791 page 20:
+//   Record Route
+//
+//         +--------+--------+--------+---------//--------+
+//         |00000111| length | pointer|     route data    |
+//         +--------+--------+--------+---------//--------+
+//           Type=7
+//
+//         The record route option provides a means to record the route of
+//         an internet datagram.
+//
+//         The option begins with the option type code.  The second octet
+//         is the option length which includes the option type code and the
+//         length octet, the pointer octet, and length-3 octets of route
+//         data.  The third octet is the pointer into the route data
+//         indicating the octet which begins the next area to store a route
+//         address.  The pointer is relative to this option, and the
+//         smallest legal value for the pointer is 4.
+const (
+	// IPv4OptionRecordRouteHdrLength is the length of the Record Route option
+	// header.
+	IPv4OptionRecordRouteHdrLength = 3
+
+	// IPv4OptRRPointerOffset is the offset to the pointer field in an RR
+	// option, which points to the next free slot in the list of addresses.
+	IPv4OptRRPointerOffset = 2
+)
+
+var _ IPv4Option = (*IPv4OptionRecordRoute)(nil)
+
+// IPv4OptionRecordRoute is an IPv4 RecordRoute option defined by RFC 791.
+type IPv4OptionRecordRoute []byte
+
+// Pointer returns the pointer field in the IP RecordRoute option.
+func (rr *IPv4OptionRecordRoute) Pointer() uint8 {
+	return (*rr)[IPv4OptRRPointerOffset]
+}
+
+// StoreAddress stores the given IPv4 address into the next free slot.
+func (rr *IPv4OptionRecordRoute) StoreAddress(addr tcpip.Address) {
+	start := rr.Pointer() - 1 // A one based number.
+	// start and room checked by caller.
+	if n := copy((*rr)[start:], addr); n != IPv4AddressSize {
+		panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, IPv4AddressSize))
+	}
+	(*rr)[IPv4OptRRPointerOffset] += IPv4AddressSize
+}
+
+// Type implements IPv4Option.
+func (rr *IPv4OptionRecordRoute) Type() IPv4OptionType { return IPv4OptionRecordRouteType }
+
+// Size implements IPv4Option.
+func (rr *IPv4OptionRecordRoute) Size() uint8 { return uint8(len(*rr)) }
+
+// Contents implements IPv4Option.
+func (rr *IPv4OptionRecordRoute) Contents() []byte { return []byte(*rr) }
+
+// Router Alert option specific related constants.
+//
+// from RFC 2113 section 2.1:
+//
+//     +--------+--------+--------+--------+
+//     |10010100|00000100|  2 octet value  |
+//     +--------+--------+--------+--------+
+//
+//     Type:
+//     Copied flag:  1 (all fragments must carry the option)
+//     Option class: 0 (control)
+//     Option number: 20 (decimal)
+//
+//     Length: 4
+//
+//     Value:  A two octet code with the following values:
+//     0 - Router shall examine packet
+//     1-65535 - Reserved
+const (
+	// IPv4OptionRouterAlertLength is the length of a Router Alert option.
+	IPv4OptionRouterAlertLength = 4
+
+	// IPv4OptionRouterAlertValue is the only permissible value of the 16 bit
+	// payload of the router alert option.
+	IPv4OptionRouterAlertValue = 0
+
+	// iPv4OptionRouterAlertValueOffset is the offset for the value of a
+	// RouterAlert option.
+	iPv4OptionRouterAlertValueOffset = 2
+)
+
+// IPv4SerializableOption is an interface to represent serializable IPv4 option
+// types.
+type IPv4SerializableOption interface {
+	// optionType returns the type identifier of the option.
+	optionType() IPv4OptionType
+}
+
+// IPv4SerializableOptionPayload is an interface providing serialization of the
+// payload of an IPv4 option.
+type IPv4SerializableOptionPayload interface {
+	// length returns the size of the payload.
+	length() uint8
+
+	// serializeInto serializes the payload into the provided byte buffer.
+	//
+	// Note, the caller MUST provide a byte buffer with size of at least
+	// Length. Implementers of this function may assume that the byte buffer
+	// is of sufficient size. serializeInto MUST panic if the provided byte
+	// buffer is not of sufficient size.
+	//
+	// serializeInto will return the number of bytes that was used to
+	// serialize the receiver. Implementers must only use the number of
+	// bytes required to serialize the receiver. Callers MAY provide a
+	// larger buffer than required to serialize into.
+	serializeInto(buffer []byte) uint8
+}
+
+// IPv4OptionsSerializer is a serializer for IPv4 options.
+type IPv4OptionsSerializer []IPv4SerializableOption
+
+// Length returns the total number of bytes required to serialize the options.
+func (s IPv4OptionsSerializer) Length() uint8 {
+	var total uint8
+	for _, opt := range s {
+		total++
+		if withPayload, ok := opt.(IPv4SerializableOptionPayload); ok {
+			// Add 1 to reported length to account for the length byte.
+			total += 1 + withPayload.length()
+		}
+	}
+	return padIPv4OptionsLength(total)
+}
+
+// Serialize serializes the provided list of IPV4 options into b.
+//
+// Note, b must be of sufficient size to hold all the options in s. See
+// IPv4OptionsSerializer.Length for details on the getting the total size
+// of a serialized IPv4OptionsSerializer.
+//
+// Serialize panics if b is not of sufficient size to hold all the options in s.
+func (s IPv4OptionsSerializer) Serialize(b []byte) uint8 {
+	var total uint8
+	for _, opt := range s {
+		ty := opt.optionType()
+		if withPayload, ok := opt.(IPv4SerializableOptionPayload); ok {
+			// Serialize first to reduce bounds checks.
+			l := 2 + withPayload.serializeInto(b[2:])
+			b[0] = byte(ty)
+			b[1] = l
+			b = b[l:]
+			total += l
+			continue
+		}
+		// Options without payload consist only of the type field.
+		//
+		// NB: Repeating code from the branch above is intentional to minimize
+		// bounds checks.
+		b[0] = byte(ty)
+		b = b[1:]
+		total++
+	}
+
+	// According to RFC 791:
+	//
+	//  The internet header padding is used to ensure that the internet
+	//  header ends on a 32 bit boundary. The padding is zero.
+	padded := padIPv4OptionsLength(total)
+	b = b[:padded-total]
+	for i := range b {
+		b[i] = 0
+	}
+	return padded
+}
+
+var _ IPv4SerializableOptionPayload = (*IPv4SerializableRouterAlertOption)(nil)
+var _ IPv4SerializableOption = (*IPv4SerializableRouterAlertOption)(nil)
+
+// IPv4SerializableRouterAlertOption provides serialization of the Router Alert
+// IPv4 option according to RFC 2113.
+type IPv4SerializableRouterAlertOption struct{}
+
+// Type implements IPv4SerializableOption.
+func (*IPv4SerializableRouterAlertOption) optionType() IPv4OptionType {
+	return IPv4OptionRouterAlertType
+}
+
+// Length implements IPv4SerializableOption.
+func (*IPv4SerializableRouterAlertOption) length() uint8 {
+	return IPv4OptionRouterAlertLength - iPv4OptionRouterAlertValueOffset
+}
+
+// SerializeInto implements IPv4SerializableOption.
+func (o *IPv4SerializableRouterAlertOption) serializeInto(buffer []byte) uint8 {
+	binary.BigEndian.PutUint16(buffer, IPv4OptionRouterAlertValue)
+	return o.length()
+}
+
+var _ IPv4SerializableOption = (*IPv4SerializableNOPOption)(nil)
+
+// IPv4SerializableNOPOption provides serialization for the IPv4 no-op option.
+type IPv4SerializableNOPOption struct{}
+
+// Type implements IPv4SerializableOption.
+func (*IPv4SerializableNOPOption) optionType() IPv4OptionType {
+	return IPv4OptionNOPType
+}
+
+var _ IPv4SerializableOption = (*IPv4SerializableListEndOption)(nil)
+
+// IPv4SerializableListEndOption provides serialization for the IPv4 List End
+// option.
+type IPv4SerializableListEndOption struct{}
+
+// Type implements IPv4SerializableOption.
+func (*IPv4SerializableListEndOption) optionType() IPv4OptionType {
+	return IPv4OptionListEndType
+}
diff --git a/pkg/tcpip/header/ipv4_test.go b/pkg/tcpip/header/ipv4_test.go
new file mode 100644
index 000000000..6475cd694
--- /dev/null
+++ b/pkg/tcpip/header/ipv4_test.go
@@ -0,0 +1,179 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package header_test
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+)
+
+func TestIPv4OptionsSerializer(t *testing.T) {
+	optCases := []struct {
+		name   string
+		option []header.IPv4SerializableOption
+		expect []byte
+	}{
+		{
+			name: "NOP",
+			option: []header.IPv4SerializableOption{
+				&header.IPv4SerializableNOPOption{},
+			},
+			expect: []byte{1, 0, 0, 0},
+		},
+		{
+			name: "ListEnd",
+			option: []header.IPv4SerializableOption{
+				&header.IPv4SerializableListEndOption{},
+			},
+			expect: []byte{0, 0, 0, 0},
+		},
+		{
+			name: "RouterAlert",
+			option: []header.IPv4SerializableOption{
+				&header.IPv4SerializableRouterAlertOption{},
+			},
+			expect: []byte{148, 4, 0, 0},
+		}, {
+			name: "NOP and RouterAlert",
+			option: []header.IPv4SerializableOption{
+				&header.IPv4SerializableNOPOption{},
+				&header.IPv4SerializableRouterAlertOption{},
+			},
+			expect: []byte{1, 148, 4, 0, 0, 0, 0, 0},
+		},
+	}
+
+	for _, opt := range optCases {
+		t.Run(opt.name, func(t *testing.T) {
+			s := header.IPv4OptionsSerializer(opt.option)
+			l := s.Length()
+			if got := len(opt.expect); got != int(l) {
+				t.Fatalf("s.Length() = %d, want = %d", got, l)
+			}
+			b := make([]byte, l)
+			for i := range b {
+				// Fill the buffer with full bytes to ensure padding is being set
+				// correctly.
+				b[i] = 0xFF
+			}
+			if serializedLength := s.Serialize(b); serializedLength != l {
+				t.Fatalf("s.Serialize(_) = %d, want %d", serializedLength, l)
+			}
+			if diff := cmp.Diff(opt.expect, b); diff != "" {
+				t.Errorf("mismatched serialized option (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+// TestIPv4Encode checks that ipv4.Encode correctly fills out the requested
+// fields when options are supplied.
+func TestIPv4EncodeOptions(t *testing.T) {
+	tests := []struct {
+		name           string
+		numberOfNops   int
+		encodedOptions header.IPv4Options // reply should look like this
+		wantIHL        int
+	}{
+		{
+			name:    "valid no options",
+			wantIHL: header.IPv4MinimumSize,
+		},
+		{
+			name:           "one byte options",
+			numberOfNops:   1,
+			encodedOptions: header.IPv4Options{1, 0, 0, 0},
+			wantIHL:        header.IPv4MinimumSize + 4,
+		},
+		{
+			name:           "two byte options",
+			numberOfNops:   2,
+			encodedOptions: header.IPv4Options{1, 1, 0, 0},
+			wantIHL:        header.IPv4MinimumSize + 4,
+		},
+		{
+			name:           "three byte options",
+			numberOfNops:   3,
+			encodedOptions: header.IPv4Options{1, 1, 1, 0},
+			wantIHL:        header.IPv4MinimumSize + 4,
+		},
+		{
+			name:           "four byte options",
+			numberOfNops:   4,
+			encodedOptions: header.IPv4Options{1, 1, 1, 1},
+			wantIHL:        header.IPv4MinimumSize + 4,
+		},
+		{
+			name:           "five byte options",
+			numberOfNops:   5,
+			encodedOptions: header.IPv4Options{1, 1, 1, 1, 1, 0, 0, 0},
+			wantIHL:        header.IPv4MinimumSize + 8,
+		},
+		{
+			name:         "thirty nine byte options",
+			numberOfNops: 39,
+			encodedOptions: header.IPv4Options{
+				1, 1, 1, 1, 1, 1, 1, 1,
+				1, 1, 1, 1, 1, 1, 1, 1,
+				1, 1, 1, 1, 1, 1, 1, 1,
+				1, 1, 1, 1, 1, 1, 1, 1,
+				1, 1, 1, 1, 1, 1, 1, 0,
+			},
+			wantIHL: header.IPv4MinimumSize + 40,
+		},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			serializeOpts := header.IPv4OptionsSerializer(make([]header.IPv4SerializableOption, test.numberOfNops))
+			for i := range serializeOpts {
+				serializeOpts[i] = &header.IPv4SerializableNOPOption{}
+			}
+			paddedOptionLength := serializeOpts.Length()
+			ipHeaderLength := int(header.IPv4MinimumSize + paddedOptionLength)
+			if ipHeaderLength > header.IPv4MaximumHeaderSize {
+				t.Fatalf("IP header length too large: got = %d, want <= %d ", ipHeaderLength, header.IPv4MaximumHeaderSize)
+			}
+			totalLen := uint16(ipHeaderLength)
+			hdr := buffer.NewPrependable(int(totalLen))
+			ip := header.IPv4(hdr.Prepend(ipHeaderLength))
+			// To check the padding works, poison the last byte of the options space.
+			if paddedOptionLength != serializeOpts.Length() {
+				ip.SetHeaderLength(uint8(ipHeaderLength))
+				ip.Options()[paddedOptionLength-1] = 0xff
+				ip.SetHeaderLength(0)
+			}
+			ip.Encode(&header.IPv4Fields{
+				Options: serializeOpts,
+			})
+			options := ip.Options()
+			wantOptions := test.encodedOptions
+			if got, want := int(ip.HeaderLength()), test.wantIHL; got != want {
+				t.Errorf("got IHL of %d, want %d", got, want)
+			}
+
+			// cmp.Diff does not consider nil slices equal to empty slices, but we do.
+			if len(wantOptions) == 0 && len(options) == 0 {
+				return
+			}
+
+			if diff := cmp.Diff(wantOptions, options); diff != "" {
+				t.Errorf("options mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go
index c5d8a3456..d522e5f10 100644
--- a/pkg/tcpip/header/ipv6.go
+++ b/pkg/tcpip/header/ipv6.go
@@ -48,13 +48,15 @@ type IPv6Fields struct {
 	// FlowLabel is the "flow label" field of an IPv6 packet.
 	FlowLabel uint32
 
-	// PayloadLength is the "payload length" field of an IPv6 packet.
+	// PayloadLength is the "payload length" field of an IPv6 packet, including
+	// the length of all extension headers.
 	PayloadLength uint16
 
-	// NextHeader is the "next header" field of an IPv6 packet.
-	NextHeader uint8
+	// TransportProtocol is the transport layer protocol number. Serialized in the
+	// last "next header" field of the IPv6 header + extension headers.
+	TransportProtocol tcpip.TransportProtocolNumber
 
-	// HopLimit is the "hop limit" field of an IPv6 packet.
+	// HopLimit is the "Hop Limit" field of an IPv6 packet.
 	HopLimit uint8
 
 	// SrcAddr is the "source ip address" of an IPv6 packet.
@@ -62,6 +64,9 @@ type IPv6Fields struct {
 
 	// DstAddr is the "destination ip address" of an IPv6 packet.
 	DstAddr tcpip.Address
+
+	// ExtensionHeaders are the extension headers following the IPv6 header.
+	ExtensionHeaders IPv6ExtHdrSerializer
 }
 
 // IPv6 represents an ipv6 header stored in a byte array.
@@ -101,8 +106,10 @@ const (
 	// The address is ff02::2.
 	IPv6AllRoutersMulticastAddress tcpip.Address = "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
 
-	// IPv6MinimumMTU is the minimum MTU required by IPv6, per RFC 2460,
-	// section 5.
+	// IPv6MinimumMTU is the minimum MTU required by IPv6, per RFC 8200,
+	// section 5:
+	//   IPv6 requires that every link in the Internet have an MTU of 1280 octets
+	//   or greater.  This is known as the IPv6 minimum link MTU.
 	IPv6MinimumMTU = 1280
 
 	// IPv6Loopback is the IPv6 Loopback address.
@@ -169,7 +176,7 @@ func (b IPv6) PayloadLength() uint16 {
 	return binary.BigEndian.Uint16(b[IPv6PayloadLenOffset:])
 }
 
-// HopLimit returns the value of the "hop limit" field of the ipv6 header.
+// HopLimit returns the value of the "Hop Limit" field of the ipv6 header.
 func (b IPv6) HopLimit() uint8 {
 	return b[hopLimit]
 }
@@ -234,6 +241,11 @@ func (b IPv6) SetDestinationAddress(addr tcpip.Address) {
 	copy(b[v6DstAddr:][:IPv6AddressSize], addr)
 }
 
+// SetHopLimit sets the value of the "Hop Limit" field.
+func (b IPv6) SetHopLimit(v uint8) {
+	b[hopLimit] = v
+}
+
 // SetNextHeader sets the value of the "next header" field of the ipv6 header.
 func (b IPv6) SetNextHeader(v uint8) {
 	b[IPv6NextHeaderOffset] = v
@@ -246,12 +258,14 @@ func (IPv6) SetChecksum(uint16) {
 
 // Encode encodes all the fields of the ipv6 header.
 func (b IPv6) Encode(i *IPv6Fields) {
+	extHdr := b[IPv6MinimumSize:]
 	b.SetTOS(i.TrafficClass, i.FlowLabel)
 	b.SetPayloadLength(i.PayloadLength)
-	b[IPv6NextHeaderOffset] = i.NextHeader
 	b[hopLimit] = i.HopLimit
 	b.SetSourceAddress(i.SrcAddr)
 	b.SetDestinationAddress(i.DstAddr)
+	nextHeader, _ := i.ExtensionHeaders.Serialize(i.TransportProtocol, extHdr)
+	b[IPv6NextHeaderOffset] = nextHeader
 }
 
 // IsValid performs basic validation on the packet.
@@ -373,6 +387,12 @@ func IsV6LinkLocalAddress(addr tcpip.Address) bool {
 	return addr[0] == 0xfe && (addr[1]&0xc0) == 0x80
 }
 
+// IsV6LoopbackAddress determines if the provided address is an IPv6 loopback
+// address.
+func IsV6LoopbackAddress(addr tcpip.Address) bool {
+	return addr == IPv6Loopback
+}
+
 // IsV6LinkLocalMulticastAddress determines if the provided address is an IPv6
 // link-local multicast address.
 func IsV6LinkLocalMulticastAddress(addr tcpip.Address) bool {
diff --git a/pkg/tcpip/header/ipv6_extension_headers.go b/pkg/tcpip/header/ipv6_extension_headers.go
index 583c2c5d3..1fbb2cc98 100644
--- a/pkg/tcpip/header/ipv6_extension_headers.go
+++ b/pkg/tcpip/header/ipv6_extension_headers.go
@@ -20,7 +20,9 @@ import (
 	"encoding/binary"
 	"fmt"
 	"io"
+	"math"
 
+	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 )
 
@@ -47,6 +49,11 @@ const (
 	// IPv6NoNextHeaderIdentifier is the header identifier used to signify the end
 	// of an IPv6 payload, as per RFC 8200 section 4.7.
 	IPv6NoNextHeaderIdentifier IPv6ExtensionHeaderIdentifier = 59
+
+	// IPv6UnknownExtHdrIdentifier is reserved by IANA.
+	// https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#extension-header
+	// "254	Use for experimentation and testing	[RFC3692][RFC4727]"
+	IPv6UnknownExtHdrIdentifier IPv6ExtensionHeaderIdentifier = 254
 )
 
 const (
@@ -70,8 +77,8 @@ const (
 	// Fragment Offset field within an IPv6FragmentExtHdr.
 	ipv6FragmentExtHdrFragmentOffsetOffset = 0
 
-	// ipv6FragmentExtHdrFragmentOffsetShift is the least significant bits to
-	// discard from the Fragment Offset.
+	// ipv6FragmentExtHdrFragmentOffsetShift is the bit offset of the Fragment
+	// Offset field within an IPv6FragmentExtHdr.
 	ipv6FragmentExtHdrFragmentOffsetShift = 3
 
 	// ipv6FragmentExtHdrFlagsIdx is the index to the flags field within an
@@ -109,6 +116,37 @@ const (
 	IPv6FragmentExtHdrFragmentOffsetBytesPerUnit = 8
 )
 
+// padIPv6OptionsLength returns the total length for IPv6 options of length l
+// considering the 8-octet alignment as stated in RFC 8200 Section 4.2.
+func padIPv6OptionsLength(length int) int {
+	return (length + ipv6ExtHdrLenBytesPerUnit - 1) & ^(ipv6ExtHdrLenBytesPerUnit - 1)
+}
+
+// padIPv6Option fills b with the appropriate padding options depending on its
+// length.
+func padIPv6Option(b []byte) {
+	switch len(b) {
+	case 0: // No padding needed.
+	case 1: // Pad with Pad1.
+		b[ipv6ExtHdrOptionTypeOffset] = uint8(ipv6Pad1ExtHdrOptionIdentifier)
+	default: // Pad with PadN.
+		s := b[ipv6ExtHdrOptionPayloadOffset:]
+		for i := range s {
+			s[i] = 0
+		}
+		b[ipv6ExtHdrOptionTypeOffset] = uint8(ipv6PadNExtHdrOptionIdentifier)
+		b[ipv6ExtHdrOptionLengthOffset] = uint8(len(s))
+	}
+}
+
+// ipv6OptionsAlignmentPadding returns the number of padding bytes needed to
+// serialize an option at headerOffset with alignment requirements
+// [align]n + alignOffset.
+func ipv6OptionsAlignmentPadding(headerOffset int, align int, alignOffset int) int {
+	padLen := headerOffset - alignOffset
+	return ((padLen + align - 1) & ^(align - 1)) - padLen
+}
+
 // IPv6PayloadHeader is implemented by the various headers that can be found
 // in an IPv6 payload.
 //
@@ -201,29 +239,51 @@ type IPv6ExtHdrOption interface {
 	isIPv6ExtHdrOption()
 }
 
-// IPv6ExtHdrOptionIndentifier is an IPv6 extension header option identifier.
-type IPv6ExtHdrOptionIndentifier uint8
+// IPv6ExtHdrOptionIdentifier is an IPv6 extension header option identifier.
+type IPv6ExtHdrOptionIdentifier uint8
 
 const (
 	// ipv6Pad1ExtHdrOptionIdentifier is the identifier for a padding option that
 	// provides 1 byte padding, as outlined in RFC 8200 section 4.2.
-	ipv6Pad1ExtHdrOptionIdentifier IPv6ExtHdrOptionIndentifier = 0
+	ipv6Pad1ExtHdrOptionIdentifier IPv6ExtHdrOptionIdentifier = 0
 
 	// ipv6PadBExtHdrOptionIdentifier is the identifier for a padding option that
 	// provides variable length byte padding, as outlined in RFC 8200 section 4.2.
-	ipv6PadNExtHdrOptionIdentifier IPv6ExtHdrOptionIndentifier = 1
+	ipv6PadNExtHdrOptionIdentifier IPv6ExtHdrOptionIdentifier = 1
+
+	// ipv6RouterAlertHopByHopOptionIdentifier is the identifier for the Router
+	// Alert Hop by Hop option as defined in RFC 2711 section 2.1.
+	ipv6RouterAlertHopByHopOptionIdentifier IPv6ExtHdrOptionIdentifier = 5
+
+	// ipv6ExtHdrOptionTypeOffset is the option type offset in an extension header
+	// option as defined in RFC 8200 section 4.2.
+	ipv6ExtHdrOptionTypeOffset = 0
+
+	// ipv6ExtHdrOptionLengthOffset is the option length offset in an extension
+	// header option as defined in RFC 8200 section 4.2.
+	ipv6ExtHdrOptionLengthOffset = 1
+
+	// ipv6ExtHdrOptionPayloadOffset is the option payload offset in an extension
+	// header option as defined in RFC 8200 section 4.2.
+	ipv6ExtHdrOptionPayloadOffset = 2
 )
 
+// ipv6UnknownActionFromIdentifier maps an extension header option's
+// identifier's high  bits to the action to take when the identifier is unknown.
+func ipv6UnknownActionFromIdentifier(id IPv6ExtHdrOptionIdentifier) IPv6OptionUnknownAction {
+	return IPv6OptionUnknownAction((id & ipv6UnknownExtHdrOptionActionMask) >> ipv6UnknownExtHdrOptionActionShift)
+}
+
 // IPv6UnknownExtHdrOption holds the identifier and data for an IPv6 extension
 // header option that is unknown by the parsing utilities.
 type IPv6UnknownExtHdrOption struct {
-	Identifier IPv6ExtHdrOptionIndentifier
+	Identifier IPv6ExtHdrOptionIdentifier
 	Data       []byte
 }
 
 // UnknownAction implements IPv6OptionUnknownAction.UnknownAction.
 func (o *IPv6UnknownExtHdrOption) UnknownAction() IPv6OptionUnknownAction {
-	return IPv6OptionUnknownAction((o.Identifier & ipv6UnknownExtHdrOptionActionMask) >> ipv6UnknownExtHdrOptionActionShift)
+	return ipv6UnknownActionFromIdentifier(o.Identifier)
 }
 
 // isIPv6ExtHdrOption implements IPv6ExtHdrOption.isIPv6ExtHdrOption.
@@ -246,7 +306,7 @@ func (i *IPv6OptionsExtHdrOptionsIterator) Next() (IPv6ExtHdrOption, bool, error
 			// options buffer has been exhausted and we are done iterating.
 			return nil, true, nil
 		}
-		id := IPv6ExtHdrOptionIndentifier(temp)
+		id := IPv6ExtHdrOptionIdentifier(temp)
 
 		// If the option identifier indicates the option is a Pad1 option, then we
 		// know the option does not have Length and Data fields. End processing of
@@ -289,6 +349,14 @@ func (i *IPv6OptionsExtHdrOptionsIterator) Next() (IPv6ExtHdrOption, bool, error
 				panic(fmt.Sprintf("error when skipping PadN (N = %d) option's data bytes: %s", length, err))
 			}
 			continue
+		case ipv6RouterAlertHopByHopOptionIdentifier:
+			var routerAlertValue [ipv6RouterAlertPayloadLength]byte
+			if n, err := i.reader.Read(routerAlertValue[:]); err != nil {
+				panic(fmt.Sprintf("error when reading RouterAlert option's data bytes: %s", err))
+			} else if n != ipv6RouterAlertPayloadLength {
+				return nil, true, fmt.Errorf("read %d bytes for RouterAlert option, expected %d", n, ipv6RouterAlertPayloadLength)
+			}
+			return &IPv6RouterAlertOption{Value: IPv6RouterAlertValue(binary.BigEndian.Uint16(routerAlertValue[:]))}, false, nil
 		default:
 			bytes := make([]byte, length)
 			if n, err := io.ReadFull(&i.reader, bytes); err != nil {
@@ -452,9 +520,11 @@ func (i *IPv6PayloadIterator) AsRawHeader(consume bool) IPv6RawPayloadHeader {
 		// Since we consume the iterator, we return the payload as is.
 		buf = i.payload
 
-		// Mark i as done.
+		// Mark i as done, but keep track of where we were for error reporting.
 		*i = IPv6PayloadIterator{
 			nextHdrIdentifier: IPv6NoNextHeaderIdentifier,
+			headerOffset:      i.headerOffset,
+			nextOffset:        i.nextOffset,
 		}
 	} else {
 		buf = i.payload.Clone(nil)
@@ -602,3 +672,248 @@ func (i *IPv6PayloadIterator) nextHeaderData(fragmentHdr bool, bytes []byte) (IP
 
 	return IPv6ExtensionHeaderIdentifier(nextHdrIdentifier), bytes, nil
 }
+
+// IPv6SerializableExtHdr provides serialization for IPv6 extension
+// headers.
+type IPv6SerializableExtHdr interface {
+	// identifier returns the assigned IPv6 header identifier for this extension
+	// header.
+	identifier() IPv6ExtensionHeaderIdentifier
+
+	// length returns the total serialized length in bytes of this extension
+	// header, including the common next header and length fields.
+	length() int
+
+	// serializeInto serializes the receiver into the provided byte
+	// buffer and with the provided nextHeader value.
+	//
+	// Note, the caller MUST provide a byte buffer with size of at least
+	// length. Implementers of this function may assume that the byte buffer
+	// is of sufficient size. serializeInto MAY panic if the provided byte
+	// buffer is not of sufficient size.
+	//
+	// serializeInto returns the number of bytes that was used to serialize the
+	// receiver. Implementers must only use the number of bytes required to
+	// serialize the receiver. Callers MAY provide a larger buffer than required
+	// to serialize into.
+	serializeInto(nextHeader uint8, b []byte) int
+}
+
+var _ IPv6SerializableExtHdr = (*IPv6SerializableHopByHopExtHdr)(nil)
+
+// IPv6SerializableHopByHopExtHdr implements serialization of the Hop by Hop
+// options extension header.
+type IPv6SerializableHopByHopExtHdr []IPv6SerializableHopByHopOption
+
+const (
+	// ipv6HopByHopExtHdrNextHeaderOffset is the offset of the next header field
+	// in a hop by hop extension header as defined in RFC 8200 section 4.3.
+	ipv6HopByHopExtHdrNextHeaderOffset = 0
+
+	// ipv6HopByHopExtHdrLengthOffset is the offset of the length field in a hop
+	// by hop extension header as defined in RFC 8200 section 4.3.
+	ipv6HopByHopExtHdrLengthOffset = 1
+
+	// ipv6HopByHopExtHdrPayloadOffset is the offset of the options in a hop by
+	// hop extension header as defined in RFC 8200 section 4.3.
+	ipv6HopByHopExtHdrOptionsOffset = 2
+
+	// ipv6HopByHopExtHdrUnaccountedLenWords is the implicit number of 8-octet
+	// words in a hop by hop extension header's length field, as stated in RFC
+	// 8200 section 4.3:
+	//   Length of the Hop-by-Hop Options header in 8-octet units,
+	//   not including the first 8 octets.
+	ipv6HopByHopExtHdrUnaccountedLenWords = 1
+)
+
+// identifier implements IPv6SerializableExtHdr.
+func (IPv6SerializableHopByHopExtHdr) identifier() IPv6ExtensionHeaderIdentifier {
+	return IPv6HopByHopOptionsExtHdrIdentifier
+}
+
+// length implements IPv6SerializableExtHdr.
+func (h IPv6SerializableHopByHopExtHdr) length() int {
+	var total int
+	for _, opt := range h {
+		align, alignOffset := opt.alignment()
+		total += ipv6OptionsAlignmentPadding(total, align, alignOffset)
+		total += ipv6ExtHdrOptionPayloadOffset + int(opt.length())
+	}
+	// Account for next header and total length fields and add padding.
+	return padIPv6OptionsLength(ipv6HopByHopExtHdrOptionsOffset + total)
+}
+
+// serializeInto implements IPv6SerializableExtHdr.
+func (h IPv6SerializableHopByHopExtHdr) serializeInto(nextHeader uint8, b []byte) int {
+	optBuffer := b[ipv6HopByHopExtHdrOptionsOffset:]
+	totalLength := ipv6HopByHopExtHdrOptionsOffset
+	for _, opt := range h {
+		// Calculate alignment requirements and pad buffer if necessary.
+		align, alignOffset := opt.alignment()
+		padLen := ipv6OptionsAlignmentPadding(totalLength, align, alignOffset)
+		if padLen != 0 {
+			padIPv6Option(optBuffer[:padLen])
+			totalLength += padLen
+			optBuffer = optBuffer[padLen:]
+		}
+
+		l := opt.serializeInto(optBuffer[ipv6ExtHdrOptionPayloadOffset:])
+		optBuffer[ipv6ExtHdrOptionTypeOffset] = uint8(opt.identifier())
+		optBuffer[ipv6ExtHdrOptionLengthOffset] = l
+		l += ipv6ExtHdrOptionPayloadOffset
+		totalLength += int(l)
+		optBuffer = optBuffer[l:]
+	}
+	padded := padIPv6OptionsLength(totalLength)
+	if padded != totalLength {
+		padIPv6Option(optBuffer[:padded-totalLength])
+		totalLength = padded
+	}
+	wordsLen := totalLength/ipv6ExtHdrLenBytesPerUnit - ipv6HopByHopExtHdrUnaccountedLenWords
+	if wordsLen > math.MaxUint8 {
+		panic(fmt.Sprintf("IPv6 hop by hop options too large: %d+1 64-bit words", wordsLen))
+	}
+	b[ipv6HopByHopExtHdrNextHeaderOffset] = nextHeader
+	b[ipv6HopByHopExtHdrLengthOffset] = uint8(wordsLen)
+	return totalLength
+}
+
+// IPv6SerializableHopByHopOption provides serialization for hop by hop options.
+type IPv6SerializableHopByHopOption interface {
+	// identifier returns the option identifier of this Hop by Hop option.
+	identifier() IPv6ExtHdrOptionIdentifier
+
+	// length returns the *payload* size of the option (not considering the type
+	// and length fields).
+	length() uint8
+
+	// alignment returns the alignment requirements from this option.
+	//
+	// Alignment requirements take the form [align]n + offset as specified in
+	// RFC 8200 section 4.2. The alignment requirement is on the offset between
+	// the option type byte and the start of the hop by hop header.
+	//
+	// align must be a power of 2.
+	alignment() (align int, offset int)
+
+	// serializeInto serializes the receiver into the provided byte
+	// buffer.
+	//
+	// Note, the caller MUST provide a byte buffer with size of at least
+	// length. Implementers of this function may assume that the byte buffer
+	// is of sufficient size. serializeInto MAY panic if the provided byte
+	// buffer is not of sufficient size.
+	//
+	// serializeInto will return the number of bytes that was used to
+	// serialize the receiver. Implementers must only use the number of
+	// bytes required to serialize the receiver. Callers MAY provide a
+	// larger buffer than required to serialize into.
+	serializeInto([]byte) uint8
+}
+
+var _ IPv6SerializableHopByHopOption = (*IPv6RouterAlertOption)(nil)
+
+// IPv6RouterAlertOption is the IPv6 Router alert Hop by Hop option defined in
+// RFC 2711 section 2.1.
+type IPv6RouterAlertOption struct {
+	Value IPv6RouterAlertValue
+}
+
+// IPv6RouterAlertValue is the payload of an IPv6 Router Alert option.
+type IPv6RouterAlertValue uint16
+
+const (
+	// IPv6RouterAlertMLD indicates a datagram containing a Multicast Listener
+	// Discovery message as defined in RFC 2711 section 2.1.
+	IPv6RouterAlertMLD IPv6RouterAlertValue = 0
+	// IPv6RouterAlertRSVP indicates a datagram containing an RSVP message as
+	// defined in RFC 2711 section 2.1.
+	IPv6RouterAlertRSVP IPv6RouterAlertValue = 1
+	// IPv6RouterAlertActiveNetworks indicates a datagram containing an Active
+	// Networks message as defined in RFC 2711 section 2.1.
+	IPv6RouterAlertActiveNetworks IPv6RouterAlertValue = 2
+
+	// ipv6RouterAlertPayloadLength is the length of the Router Alert payload
+	// as defined in RFC 2711.
+	ipv6RouterAlertPayloadLength = 2
+
+	// ipv6RouterAlertAlignmentRequirement is the alignment requirement for the
+	// Router Alert option defined as 2n+0 in RFC 2711.
+	ipv6RouterAlertAlignmentRequirement = 2
+
+	// ipv6RouterAlertAlignmentOffsetRequirement is the alignment offset
+	// requirement for the Router Alert option defined as 2n+0 in RFC 2711 section
+	// 2.1.
+	ipv6RouterAlertAlignmentOffsetRequirement = 0
+)
+
+// UnknownAction implements IPv6ExtHdrOption.
+func (*IPv6RouterAlertOption) UnknownAction() IPv6OptionUnknownAction {
+	return ipv6UnknownActionFromIdentifier(ipv6RouterAlertHopByHopOptionIdentifier)
+}
+
+// isIPv6ExtHdrOption implements IPv6ExtHdrOption.
+func (*IPv6RouterAlertOption) isIPv6ExtHdrOption() {}
+
+// identifier implements IPv6SerializableHopByHopOption.
+func (*IPv6RouterAlertOption) identifier() IPv6ExtHdrOptionIdentifier {
+	return ipv6RouterAlertHopByHopOptionIdentifier
+}
+
+// length implements IPv6SerializableHopByHopOption.
+func (*IPv6RouterAlertOption) length() uint8 {
+	return ipv6RouterAlertPayloadLength
+}
+
+// alignment implements IPv6SerializableHopByHopOption.
+func (*IPv6RouterAlertOption) alignment() (int, int) {
+	// From RFC 2711 section 2.1:
+	//   Alignment requirement: 2n+0.
+	return ipv6RouterAlertAlignmentRequirement, ipv6RouterAlertAlignmentOffsetRequirement
+}
+
+// serializeInto implements IPv6SerializableHopByHopOption.
+func (o *IPv6RouterAlertOption) serializeInto(b []byte) uint8 {
+	binary.BigEndian.PutUint16(b, uint16(o.Value))
+	return ipv6RouterAlertPayloadLength
+}
+
+// IPv6ExtHdrSerializer provides serialization of IPv6 extension headers.
+type IPv6ExtHdrSerializer []IPv6SerializableExtHdr
+
+// Serialize serializes the provided list of IPv6 extension headers into b.
+//
+// Note, b must be of sufficient size to hold all the headers in s. See
+// IPv6ExtHdrSerializer.Length for details on the getting the total size of a
+// serialized IPv6ExtHdrSerializer.
+//
+// Serialize may panic if b is not of sufficient size to hold all the options
+// in s.
+//
+// Serialize takes the transportProtocol value to be used as the last extension
+// header's Next Header value and returns the header identifier of the first
+// serialized extension header and the total serialized length.
+func (s IPv6ExtHdrSerializer) Serialize(transportProtocol tcpip.TransportProtocolNumber, b []byte) (uint8, int) {
+	nextHeader := uint8(transportProtocol)
+	if len(s) == 0 {
+		return nextHeader, 0
+	}
+	var totalLength int
+	for i, h := range s[:len(s)-1] {
+		length := h.serializeInto(uint8(s[i+1].identifier()), b)
+		b = b[length:]
+		totalLength += length
+	}
+	totalLength += s[len(s)-1].serializeInto(nextHeader, b)
+	return uint8(s[0].identifier()), totalLength
+}
+
+// Length returns the total number of bytes required to serialize the extension
+// headers.
+func (s IPv6ExtHdrSerializer) Length() int {
+	var totalLength int
+	for _, h := range s {
+		totalLength += h.length()
+	}
+	return totalLength
+}
diff --git a/pkg/tcpip/header/ipv6_extension_headers_test.go b/pkg/tcpip/header/ipv6_extension_headers_test.go
index ab20c5f37..5d2296353 100644
--- a/pkg/tcpip/header/ipv6_extension_headers_test.go
+++ b/pkg/tcpip/header/ipv6_extension_headers_test.go
@@ -21,6 +21,7 @@ import (
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
+	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 )
 
@@ -59,7 +60,7 @@ func (a IPv6DestinationOptionsExtHdr) Equal(b IPv6DestinationOptionsExtHdr) bool
 func TestIPv6UnknownExtHdrOption(t *testing.T) {
 	tests := []struct {
 		name                  string
-		identifier            IPv6ExtHdrOptionIndentifier
+		identifier            IPv6ExtHdrOptionIdentifier
 		expectedUnknownAction IPv6OptionUnknownAction
 	}{
 		{
@@ -990,3 +991,331 @@ func TestIPv6ExtHdrIter(t *testing.T) {
 		})
 	}
 }
+
+var _ IPv6SerializableHopByHopOption = (*dummyHbHOptionSerializer)(nil)
+
+// dummyHbHOptionSerializer provides a generic implementation of
+// IPv6SerializableHopByHopOption for use in tests.
+type dummyHbHOptionSerializer struct {
+	id          IPv6ExtHdrOptionIdentifier
+	payload     []byte
+	align       int
+	alignOffset int
+}
+
+// identifier implements IPv6SerializableHopByHopOption.
+func (s *dummyHbHOptionSerializer) identifier() IPv6ExtHdrOptionIdentifier {
+	return s.id
+}
+
+// length implements IPv6SerializableHopByHopOption.
+func (s *dummyHbHOptionSerializer) length() uint8 {
+	return uint8(len(s.payload))
+}
+
+// alignment implements IPv6SerializableHopByHopOption.
+func (s *dummyHbHOptionSerializer) alignment() (int, int) {
+	align := 1
+	if s.align != 0 {
+		align = s.align
+	}
+	return align, s.alignOffset
+}
+
+// serializeInto implements IPv6SerializableHopByHopOption.
+func (s *dummyHbHOptionSerializer) serializeInto(b []byte) uint8 {
+	return uint8(copy(b, s.payload))
+}
+
+func TestIPv6HopByHopSerializer(t *testing.T) {
+	validateDummies := func(t *testing.T, serializable IPv6SerializableHopByHopOption, deserialized IPv6ExtHdrOption) {
+		t.Helper()
+		dummy, ok := serializable.(*dummyHbHOptionSerializer)
+		if !ok {
+			t.Fatalf("got serializable = %T, want = *dummyHbHOptionSerializer", serializable)
+		}
+		unknown, ok := deserialized.(*IPv6UnknownExtHdrOption)
+		if !ok {
+			t.Fatalf("got deserialized = %T, want = %T", deserialized, &IPv6UnknownExtHdrOption{})
+		}
+		if dummy.id != unknown.Identifier {
+			t.Errorf("got deserialized identifier = %d, want = %d", unknown.Identifier, dummy.id)
+		}
+		if diff := cmp.Diff(dummy.payload, unknown.Data); diff != "" {
+			t.Errorf("option payload deserialization mismatch (-want +got):\n%s", diff)
+		}
+	}
+	tests := []struct {
+		name       string
+		nextHeader uint8
+		options    []IPv6SerializableHopByHopOption
+		expect     []byte
+		validate   func(*testing.T, IPv6SerializableHopByHopOption, IPv6ExtHdrOption)
+	}{
+		{
+			name:       "single option",
+			nextHeader: 13,
+			options: []IPv6SerializableHopByHopOption{
+				&dummyHbHOptionSerializer{
+					id:      15,
+					payload: []byte{9, 8, 7, 6},
+				},
+			},
+			expect:   []byte{13, 0, 15, 4, 9, 8, 7, 6},
+			validate: validateDummies,
+		},
+		{
+			name:       "short option padN zero",
+			nextHeader: 88,
+			options: []IPv6SerializableHopByHopOption{
+				&dummyHbHOptionSerializer{
+					id:      22,
+					payload: []byte{4, 5},
+				},
+			},
+			expect:   []byte{88, 0, 22, 2, 4, 5, 1, 0},
+			validate: validateDummies,
+		},
+		{
+			name:       "short option pad1",
+			nextHeader: 11,
+			options: []IPv6SerializableHopByHopOption{
+				&dummyHbHOptionSerializer{
+					id:      33,
+					payload: []byte{1, 2, 3},
+				},
+			},
+			expect:   []byte{11, 0, 33, 3, 1, 2, 3, 0},
+			validate: validateDummies,
+		},
+		{
+			name:       "long option padN",
+			nextHeader: 55,
+			options: []IPv6SerializableHopByHopOption{
+				&dummyHbHOptionSerializer{
+					id:      77,
+					payload: []byte{1, 2, 3, 4, 5, 6, 7, 8},
+				},
+			},
+			expect:   []byte{55, 1, 77, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 0, 0},
+			validate: validateDummies,
+		},
+		{
+			name:       "two options",
+			nextHeader: 33,
+			options: []IPv6SerializableHopByHopOption{
+				&dummyHbHOptionSerializer{
+					id:      11,
+					payload: []byte{1, 2, 3},
+				},
+				&dummyHbHOptionSerializer{
+					id:      22,
+					payload: []byte{4, 5, 6},
+				},
+			},
+			expect:   []byte{33, 1, 11, 3, 1, 2, 3, 22, 3, 4, 5, 6, 1, 2, 0, 0},
+			validate: validateDummies,
+		},
+		{
+			name:       "two options align 2n",
+			nextHeader: 33,
+			options: []IPv6SerializableHopByHopOption{
+				&dummyHbHOptionSerializer{
+					id:      11,
+					payload: []byte{1, 2, 3},
+				},
+				&dummyHbHOptionSerializer{
+					id:      22,
+					payload: []byte{4, 5, 6},
+					align:   2,
+				},
+			},
+			expect:   []byte{33, 1, 11, 3, 1, 2, 3, 0, 22, 3, 4, 5, 6, 1, 1, 0},
+			validate: validateDummies,
+		},
+		{
+			name:       "two options align 8n+1",
+			nextHeader: 33,
+			options: []IPv6SerializableHopByHopOption{
+				&dummyHbHOptionSerializer{
+					id:      11,
+					payload: []byte{1, 2},
+				},
+				&dummyHbHOptionSerializer{
+					id:          22,
+					payload:     []byte{4, 5, 6},
+					align:       8,
+					alignOffset: 1,
+				},
+			},
+			expect:   []byte{33, 1, 11, 2, 1, 2, 1, 1, 0, 22, 3, 4, 5, 6, 1, 0},
+			validate: validateDummies,
+		},
+		{
+			name:       "no options",
+			nextHeader: 33,
+			options:    []IPv6SerializableHopByHopOption{},
+			expect:     []byte{33, 0, 1, 4, 0, 0, 0, 0},
+		},
+		{
+			name:       "Router Alert",
+			nextHeader: 33,
+			options:    []IPv6SerializableHopByHopOption{&IPv6RouterAlertOption{Value: IPv6RouterAlertMLD}},
+			expect:     []byte{33, 0, 5, 2, 0, 0, 1, 0},
+			validate: func(t *testing.T, _ IPv6SerializableHopByHopOption, deserialized IPv6ExtHdrOption) {
+				t.Helper()
+				routerAlert, ok := deserialized.(*IPv6RouterAlertOption)
+				if !ok {
+					t.Fatalf("got deserialized = %T, want = *IPv6RouterAlertOption", deserialized)
+				}
+				if routerAlert.Value != IPv6RouterAlertMLD {
+					t.Errorf("got routerAlert.Value = %d, want = %d", routerAlert.Value, IPv6RouterAlertMLD)
+				}
+			},
+		},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := IPv6SerializableHopByHopExtHdr(test.options)
+			length := s.length()
+			if length != len(test.expect) {
+				t.Fatalf("got s.length() = %d, want = %d", length, len(test.expect))
+			}
+			b := make([]byte, length)
+			for i := range b {
+				// Fill the buffer with ones to ensure all padding is correctly set.
+				b[i] = 0xFF
+			}
+			if got := s.serializeInto(test.nextHeader, b); got != length {
+				t.Fatalf("got s.serializeInto(..) = %d, want = %d", got, length)
+			}
+			if diff := cmp.Diff(test.expect, b); diff != "" {
+				t.Fatalf("serialization mismatch (-want +got):\n%s", diff)
+			}
+
+			// Deserialize the options and verify them.
+			optLen := (b[ipv6HopByHopExtHdrLengthOffset] + ipv6HopByHopExtHdrUnaccountedLenWords) * ipv6ExtHdrLenBytesPerUnit
+			iter := ipv6OptionsExtHdr(b[ipv6HopByHopExtHdrOptionsOffset:optLen]).Iter()
+			for _, testOpt := range test.options {
+				opt, done, err := iter.Next()
+				if err != nil {
+					t.Fatalf("iter.Next(): %s", err)
+				}
+				if done {
+					t.Fatalf("got iter.Next() = (%T, %t, _), want = (_, false, _)", opt, done)
+				}
+				test.validate(t, testOpt, opt)
+			}
+			opt, done, err := iter.Next()
+			if err != nil {
+				t.Fatalf("iter.Next(): %s", err)
+			}
+			if !done {
+				t.Fatalf("got iter.Next() = (%T, %t, _), want = (_, true, _)", opt, done)
+			}
+		})
+	}
+}
+
+var _ IPv6SerializableExtHdr = (*dummyIPv6ExtHdrSerializer)(nil)
+
+// dummyIPv6ExtHdrSerializer provides a generic implementation of
+// IPv6SerializableExtHdr for use in tests.
+//
+// The dummy header always carries the nextHeader value in the first byte.
+type dummyIPv6ExtHdrSerializer struct {
+	id             IPv6ExtensionHeaderIdentifier
+	headerContents []byte
+}
+
+// identifier implements IPv6SerializableExtHdr.
+func (s *dummyIPv6ExtHdrSerializer) identifier() IPv6ExtensionHeaderIdentifier {
+	return s.id
+}
+
+// length implements IPv6SerializableExtHdr.
+func (s *dummyIPv6ExtHdrSerializer) length() int {
+	return len(s.headerContents) + 1
+}
+
+// serializeInto implements IPv6SerializableExtHdr.
+func (s *dummyIPv6ExtHdrSerializer) serializeInto(nextHeader uint8, b []byte) int {
+	b[0] = nextHeader
+	return copy(b[1:], s.headerContents) + 1
+}
+
+func TestIPv6ExtHdrSerializer(t *testing.T) {
+	tests := []struct {
+		name             string
+		headers          []IPv6SerializableExtHdr
+		nextHeader       tcpip.TransportProtocolNumber
+		expectSerialized []byte
+		expectNextHeader uint8
+	}{
+		{
+			name: "one header",
+			headers: []IPv6SerializableExtHdr{
+				&dummyIPv6ExtHdrSerializer{
+					id:             15,
+					headerContents: []byte{1, 2, 3, 4},
+				},
+			},
+			nextHeader:       TCPProtocolNumber,
+			expectSerialized: []byte{byte(TCPProtocolNumber), 1, 2, 3, 4},
+			expectNextHeader: 15,
+		},
+		{
+			name: "two headers",
+			headers: []IPv6SerializableExtHdr{
+				&dummyIPv6ExtHdrSerializer{
+					id:             22,
+					headerContents: []byte{1, 2, 3},
+				},
+				&dummyIPv6ExtHdrSerializer{
+					id:             23,
+					headerContents: []byte{4, 5, 6},
+				},
+			},
+			nextHeader: ICMPv6ProtocolNumber,
+			expectSerialized: []byte{
+				23, 1, 2, 3,
+				byte(ICMPv6ProtocolNumber), 4, 5, 6,
+			},
+			expectNextHeader: 22,
+		},
+		{
+			name:             "no headers",
+			headers:          []IPv6SerializableExtHdr{},
+			nextHeader:       UDPProtocolNumber,
+			expectSerialized: []byte{},
+			expectNextHeader: byte(UDPProtocolNumber),
+		},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := IPv6ExtHdrSerializer(test.headers)
+			l := s.Length()
+			if got, want := l, len(test.expectSerialized); got != want {
+				t.Fatalf("got serialized length = %d, want = %d", got, want)
+			}
+			b := make([]byte, l)
+			for i := range b {
+				// Fill the buffer with garbage to make sure we're writing to all bytes.
+				b[i] = 0xFF
+			}
+			nextHeader, serializedLen := s.Serialize(test.nextHeader, b)
+			if serializedLen != len(test.expectSerialized) || nextHeader != test.expectNextHeader {
+				t.Errorf(
+					"got s.Serialize(..) = (%d, %d), want = (%d, %d)",
+					nextHeader,
+					serializedLen,
+					test.expectNextHeader,
+					len(test.expectSerialized),
+				)
+			}
+			if diff := cmp.Diff(test.expectSerialized, b); diff != "" {
+				t.Errorf("serialization mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/header/ipv6_fragment.go b/pkg/tcpip/header/ipv6_fragment.go
index 018555a26..9d09f32eb 100644
--- a/pkg/tcpip/header/ipv6_fragment.go
+++ b/pkg/tcpip/header/ipv6_fragment.go
@@ -27,12 +27,11 @@ const (
 	idV6        = 4
 )
 
-// IPv6FragmentFields contains the fields of an IPv6 fragment. It is used to describe the
-// fields of a packet that needs to be encoded.
-type IPv6FragmentFields struct {
-	// NextHeader is the "next header" field of an IPv6 fragment.
-	NextHeader uint8
+var _ IPv6SerializableExtHdr = (*IPv6SerializableFragmentExtHdr)(nil)
 
+// IPv6SerializableFragmentExtHdr is used to serialize an IPv6 fragment
+// extension header as defined in RFC 8200 section 4.5.
+type IPv6SerializableFragmentExtHdr struct {
 	// FragmentOffset is the "fragment offset" field of an IPv6 fragment.
 	FragmentOffset uint16
 
@@ -43,6 +42,29 @@ type IPv6FragmentFields struct {
 	Identification uint32
 }
 
+// identifier implements IPv6SerializableFragmentExtHdr.
+func (h *IPv6SerializableFragmentExtHdr) identifier() IPv6ExtensionHeaderIdentifier {
+	return IPv6FragmentHeader
+}
+
+// length implements IPv6SerializableFragmentExtHdr.
+func (h *IPv6SerializableFragmentExtHdr) length() int {
+	return IPv6FragmentHeaderSize
+}
+
+// serializeInto implements IPv6SerializableFragmentExtHdr.
+func (h *IPv6SerializableFragmentExtHdr) serializeInto(nextHeader uint8, b []byte) int {
+	// Prevent too many bounds checks.
+	_ = b[IPv6FragmentHeaderSize:]
+	binary.BigEndian.PutUint32(b[idV6:], h.Identification)
+	binary.BigEndian.PutUint16(b[fragOff:], h.FragmentOffset<<ipv6FragmentExtHdrFragmentOffsetShift)
+	b[nextHdrFrag] = nextHeader
+	if h.M {
+		b[more] |= ipv6FragmentExtHdrMFlagMask
+	}
+	return IPv6FragmentHeaderSize
+}
+
 // IPv6Fragment represents an ipv6 fragment header stored in a byte array.
 // Most of the methods of IPv6Fragment access to the underlying slice without
 // checking the boundaries and could panic because of 'index out of range'.
@@ -58,16 +80,6 @@ const (
 	IPv6FragmentHeaderSize = 8
 )
 
-// Encode encodes all the fields of the ipv6 fragment.
-func (b IPv6Fragment) Encode(i *IPv6FragmentFields) {
-	b[nextHdrFrag] = i.NextHeader
-	binary.BigEndian.PutUint16(b[fragOff:], i.FragmentOffset<<3)
-	if i.M {
-		b[more] |= 1
-	}
-	binary.BigEndian.PutUint32(b[idV6:], i.Identification)
-}
-
 // IsValid performs basic validation on the fragment header.
 func (b IPv6Fragment) IsValid() bool {
 	return len(b) >= IPv6FragmentHeaderSize
diff --git a/pkg/tcpip/header/ipversion_test.go b/pkg/tcpip/header/ipversion_test.go
index 17a49d4fa..b5540bf66 100644
--- a/pkg/tcpip/header/ipversion_test.go
+++ b/pkg/tcpip/header/ipversion_test.go
@@ -22,7 +22,7 @@ import (
 
 func TestIPv4(t *testing.T) {
 	b := header.IPv4(make([]byte, header.IPv4MinimumSize))
-	b.Encode(&header.IPv4Fields{IHL: header.IPv4MinimumSize})
+	b.Encode(&header.IPv4Fields{})
 
 	const want = header.IPv4Version
 	if v := header.IPVersion(b); v != want {
diff --git a/pkg/tcpip/header/mld.go b/pkg/tcpip/header/mld.go
new file mode 100644
index 000000000..ffe03c76a
--- /dev/null
+++ b/pkg/tcpip/header/mld.go
@@ -0,0 +1,103 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package header
+
+import (
+	"encoding/binary"
+	"fmt"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+)
+
+const (
+	// MLDMinimumSize is the minimum size for an MLD message.
+	MLDMinimumSize = 20
+
+	// MLDHopLimit is the Hop Limit for all IPv6 packets with an MLD message, as
+	// per RFC 2710 section 3.
+	MLDHopLimit = 1
+
+	// mldMaximumResponseDelayOffset is the offset to the Maximum Response Delay
+	// field within MLD.
+	mldMaximumResponseDelayOffset = 0
+
+	// mldMulticastAddressOffset is the offset to the Multicast Address field
+	// within MLD.
+	mldMulticastAddressOffset = 4
+)
+
+// MLD is a Multicast Listener Discovery message in an ICMPv6 packet.
+//
+// MLD will only contain the body of an ICMPv6 packet.
+//
+// As per RFC 2710 section 3, MLD messages have the following format (MLD only
+// holds the bytes after the first four bytes in the diagram below):
+//
+//    0                   1                   2                   3
+//    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+//   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+//   |     Type      |     Code      |          Checksum             |
+//   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+//   |     Maximum Response Delay    |          Reserved             |
+//   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+//   |                                                               |
+//   +                                                               +
+//   |                                                               |
+//   +                       Multicast Address                       +
+//   |                                                               |
+//   +                                                               +
+//   |                                                               |
+//   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+type MLD []byte
+
+// MaximumResponseDelay returns the Maximum Response Delay.
+func (m MLD) MaximumResponseDelay() time.Duration {
+	// As per RFC 2710 section 3.4:
+	//
+	//   The Maximum Response Delay field is meaningful only in Query
+	//   messages, and specifies the maximum allowed delay before sending a
+	//   responding Report, in units of milliseconds. In all other messages,
+	//   it is set to zero by the sender and ignored by receivers.
+	return time.Duration(binary.BigEndian.Uint16(m[mldMaximumResponseDelayOffset:])) * time.Millisecond
+}
+
+// SetMaximumResponseDelay sets the Maximum Response Delay field.
+//
+// maxRespDelayMS is the value in milliseconds.
+func (m MLD) SetMaximumResponseDelay(maxRespDelayMS uint16) {
+	binary.BigEndian.PutUint16(m[mldMaximumResponseDelayOffset:], maxRespDelayMS)
+}
+
+// MulticastAddress returns the Multicast Address.
+func (m MLD) MulticastAddress() tcpip.Address {
+	// As per RFC 2710 section 3.5:
+	//
+	//   In a Query message, the Multicast Address field is set to zero when
+	//   sending a General Query, and set to a specific IPv6 multicast address
+	//   when sending a Multicast-Address-Specific Query.
+	//
+	//   In a Report or Done message, the Multicast Address field holds a
+	//   specific IPv6 multicast address to which the message sender is
+	//   listening or is ceasing to listen, respectively.
+	return tcpip.Address(m[mldMulticastAddressOffset:][:IPv6AddressSize])
+}
+
+// SetMulticastAddress sets the Multicast Address field.
+func (m MLD) SetMulticastAddress(multicastAddress tcpip.Address) {
+	if n := copy(m[mldMulticastAddressOffset:], multicastAddress); n != IPv6AddressSize {
+		panic(fmt.Sprintf("copied %d bytes, expected to copy %d bytes", n, IPv6AddressSize))
+	}
+}
diff --git a/pkg/tcpip/header/mld_test.go b/pkg/tcpip/header/mld_test.go
new file mode 100644
index 000000000..0cecf10d4
--- /dev/null
+++ b/pkg/tcpip/header/mld_test.go
@@ -0,0 +1,61 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package header
+
+import (
+	"encoding/binary"
+	"testing"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+)
+
+func TestMLD(t *testing.T) {
+	b := []byte{
+		// Maximum Response Delay
+		0, 0,
+
+		// Reserved
+		0, 0,
+
+		// MulticastAddress
+		1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6,
+	}
+
+	const maxRespDelay = 513
+	binary.BigEndian.PutUint16(b, maxRespDelay)
+
+	mld := MLD(b)
+
+	if got, want := mld.MaximumResponseDelay(), maxRespDelay*time.Millisecond; got != want {
+		t.Errorf("got mld.MaximumResponseDelay() = %s, want = %s", got, want)
+	}
+
+	const newMaxRespDelay = 1234
+	mld.SetMaximumResponseDelay(newMaxRespDelay)
+	if got, want := mld.MaximumResponseDelay(), newMaxRespDelay*time.Millisecond; got != want {
+		t.Errorf("got mld.MaximumResponseDelay() = %s, want = %s", got, want)
+	}
+
+	if got, want := mld.MulticastAddress(), tcpip.Address([]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6}); got != want {
+		t.Errorf("got mld.MulticastAddress() = %s, want = %s", got, want)
+	}
+
+	multicastAddress := tcpip.Address([]byte{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0})
+	mld.SetMulticastAddress(multicastAddress)
+	if got := mld.MulticastAddress(); got != multicastAddress {
+		t.Errorf("got mld.MulticastAddress() = %s, want = %s", got, multicastAddress)
+	}
+}
diff --git a/pkg/tcpip/header/ndp_options.go b/pkg/tcpip/header/ndp_options.go
index 5d3975c56..554242f0c 100644
--- a/pkg/tcpip/header/ndp_options.go
+++ b/pkg/tcpip/header/ndp_options.go
@@ -298,7 +298,7 @@ func (b NDPOptions) Iter(check bool) (NDPOptionIterator, error) {
 	return it, nil
 }
 
-// Serialize serializes the provided list of NDP options into o.
+// Serialize serializes the provided list of NDP options into b.
 //
 // Note, b must be of sufficient size to hold all the options in s. See
 // NDPOptionsSerializer.Length for details on the getting the total size
diff --git a/pkg/tcpip/header/parse/parse.go b/pkg/tcpip/header/parse/parse.go
index 5ca75c834..2042f214a 100644
--- a/pkg/tcpip/header/parse/parse.go
+++ b/pkg/tcpip/header/parse/parse.go
@@ -109,6 +109,9 @@ traverseExtensions:
 				fragOffset = extHdr.FragmentOffset()
 				fragMore = extHdr.More()
 			}
+			rawPayload := it.AsRawHeader(true /* consume */)
+			extensionsSize = dataClone.Size() - rawPayload.Buf.Size()
+			break traverseExtensions
 
 		case header.IPv6RawPayloadHeader:
 			// We've found the payload after any extensions.
diff --git a/pkg/tcpip/header/udp.go b/pkg/tcpip/header/udp.go
index 98bdd29db..a6d4fcd59 100644
--- a/pkg/tcpip/header/udp.go
+++ b/pkg/tcpip/header/udp.go
@@ -36,10 +36,10 @@ const (
 // UDPFields contains the fields of a UDP packet. It is used to describe the
 // fields of a packet that needs to be encoded.
 type UDPFields struct {
-	// SrcPort is the "source port" field of a UDP packet.
+	// SrcPort is the "Source Port" field of a UDP packet.
 	SrcPort uint16
 
-	// DstPort is the "destination port" field of a UDP packet.
+	// DstPort is the "Destination Port" field of a UDP packet.
 	DstPort uint16
 
 	// Length is the "length" field of a UDP packet.
@@ -64,52 +64,57 @@ const (
 	UDPProtocolNumber tcpip.TransportProtocolNumber = 17
 )
 
-// SourcePort returns the "source port" field of the udp header.
+// SourcePort returns the "Source Port" field of the UDP header.
 func (b UDP) SourcePort() uint16 {
 	return binary.BigEndian.Uint16(b[udpSrcPort:])
 }
 
-// DestinationPort returns the "destination port" field of the udp header.
+// DestinationPort returns the "Destination Port" field of the UDP header.
 func (b UDP) DestinationPort() uint16 {
 	return binary.BigEndian.Uint16(b[udpDstPort:])
 }
 
-// Length returns the "length" field of the udp header.
+// Length returns the "Length" field of the UDP header.
 func (b UDP) Length() uint16 {
 	return binary.BigEndian.Uint16(b[udpLength:])
 }
 
 // Payload returns the data contained in the UDP datagram.
 func (b UDP) Payload() []byte {
-	return b[UDPMinimumSize:]
+	return b[:b.Length()][UDPMinimumSize:]
 }
 
-// Checksum returns the "checksum" field of the udp header.
+// Checksum returns the "checksum" field of the UDP header.
 func (b UDP) Checksum() uint16 {
 	return binary.BigEndian.Uint16(b[udpChecksum:])
 }
 
-// SetSourcePort sets the "source port" field of the udp header.
+// SetSourcePort sets the "source port" field of the UDP header.
 func (b UDP) SetSourcePort(port uint16) {
 	binary.BigEndian.PutUint16(b[udpSrcPort:], port)
 }
 
-// SetDestinationPort sets the "destination port" field of the udp header.
+// SetDestinationPort sets the "destination port" field of the UDP header.
 func (b UDP) SetDestinationPort(port uint16) {
 	binary.BigEndian.PutUint16(b[udpDstPort:], port)
 }
 
-// SetChecksum sets the "checksum" field of the udp header.
+// SetChecksum sets the "checksum" field of the UDP header.
 func (b UDP) SetChecksum(checksum uint16) {
 	binary.BigEndian.PutUint16(b[udpChecksum:], checksum)
 }
 
-// SetLength sets the "length" field of the udp header.
+// SetLength sets the "length" field of the UDP header.
 func (b UDP) SetLength(length uint16) {
 	binary.BigEndian.PutUint16(b[udpLength:], length)
 }
 
-// CalculateChecksum calculates the checksum of the udp packet, given the
+// PayloadLength returns the length of the payload following the UDP header.
+func (b UDP) PayloadLength() uint16 {
+	return b.Length() - UDPMinimumSize
+}
+
+// CalculateChecksum calculates the checksum of the UDP packet, given the
 // checksum of the network-layer pseudo-header and the checksum of the payload.
 func (b UDP) CalculateChecksum(partialChecksum uint16) uint16 {
 	// Calculate the rest of the checksum.
diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD
index 39ca774ef..973f06cbc 100644
--- a/pkg/tcpip/link/channel/BUILD
+++ b/pkg/tcpip/link/channel/BUILD
@@ -9,7 +9,6 @@ go_library(
     deps = [
         "//pkg/sync",
         "//pkg/tcpip",
-        "//pkg/tcpip/buffer",
         "//pkg/tcpip/header",
         "//pkg/tcpip/stack",
     ],
diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index c95aef63c..0efbfb22b 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -22,7 +22,6 @@ import (
 
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
-	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
@@ -32,7 +31,7 @@ type PacketInfo struct {
 	Pkt   *stack.PacketBuffer
 	Proto tcpip.NetworkProtocolNumber
 	GSO   *stack.GSO
-	Route stack.Route
+	Route *stack.Route
 }
 
 // Notification is the interface for receiving notification from the packet
@@ -271,21 +270,6 @@ func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 	return n, nil
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *Endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	p := PacketInfo{
-		Pkt: stack.NewPacketBuffer(stack.PacketBufferOptions{
-			Data: vv,
-		}),
-		Proto: 0,
-		GSO:   nil,
-	}
-
-	e.q.Write(p)
-
-	return nil
-}
-
 // Wait implements stack.LinkEndpoint.Wait.
 func (*Endpoint) Wait() {}
 
diff --git a/pkg/tcpip/link/ethernet/ethernet.go b/pkg/tcpip/link/ethernet/ethernet.go
index 3eef7cd56..beefcd008 100644
--- a/pkg/tcpip/link/ethernet/ethernet.go
+++ b/pkg/tcpip/link/ethernet/ethernet.go
@@ -62,7 +62,7 @@ func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities {
 
 // WritePacket implements stack.LinkEndpoint.
 func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, proto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
-	e.AddHeader(e.Endpoint.LinkAddress(), r.RemoteLinkAddress, proto, pkt)
+	e.AddHeader(e.Endpoint.LinkAddress(), r.RemoteLinkAddress(), proto, pkt)
 	return e.Endpoint.WritePacket(r, gso, proto, pkt)
 }
 
@@ -71,7 +71,7 @@ func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 	linkAddr := e.Endpoint.LinkAddress()
 
 	for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
-		e.AddHeader(linkAddr, r.RemoteLinkAddress, proto, pkt)
+		e.AddHeader(linkAddr, r.RemoteLinkAddress(), proto, pkt)
 	}
 
 	return e.Endpoint.WritePackets(r, gso, pkts, proto)
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index 975309fc8..cb94cbea6 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -284,9 +284,12 @@ func createInboundDispatcher(e *endpoint, fd int, isSocket bool) (linkDispatcher
 		}
 		switch sa.(type) {
 		case *unix.SockaddrLinklayer:
-			// enable PACKET_FANOUT mode is the underlying socket is
-			// of type AF_PACKET.
-			const fanoutType = 0x8000 // PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG
+			// Enable PACKET_FANOUT mode if the underlying socket is of type
+			// AF_PACKET. We do not enable PACKET_FANOUT_FLAG_DEFRAG as that will
+			// prevent gvisor from receiving fragmented packets and the host does the
+			// reassembly on our behalf before delivering the fragments. This makes it
+			// hard to test fragmentation reassembly code in Netstack.
+			const fanoutType = unix.PACKET_FANOUT_HASH
 			fanoutArg := fanoutID | fanoutType<<16
 			if err := syscall.SetsockoptInt(fd, syscall.SOL_PACKET, unix.PACKET_FANOUT, fanoutArg); err != nil {
 				return nil, fmt.Errorf("failed to enable PACKET_FANOUT option: %v", err)
@@ -410,7 +413,7 @@ func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net
 // currently writable, the packet is dropped.
 func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
 	if e.hdrSize > 0 {
-		e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt)
+		e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress(), protocol, pkt)
 	}
 
 	var builder iovec.Builder
@@ -453,7 +456,7 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc
 	mmsgHdrs := make([]rawfile.MMsgHdr, 0, len(batch))
 	for _, pkt := range batch {
 		if e.hdrSize > 0 {
-			e.AddHeader(pkt.EgressRoute.LocalLinkAddress, pkt.EgressRoute.RemoteLinkAddress, pkt.NetworkProtocolNumber, pkt)
+			e.AddHeader(pkt.EgressRoute.LocalLinkAddress, pkt.EgressRoute.RemoteLinkAddress(), pkt.NetworkProtocolNumber, pkt)
 		}
 
 		var vnetHdrBuf []byte
@@ -558,11 +561,6 @@ func viewsEqual(vs1, vs2 []buffer.View) bool {
 	return len(vs1) == len(vs2) && (len(vs1) == 0 || &vs1[0] == &vs2[0])
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	return rawfile.NonBlockingWrite(e.fds[0], vv.ToView())
-}
-
 // InjectOutobund implements stack.InjectableEndpoint.InjectOutbound.
 func (e *endpoint) InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error {
 	return rawfile.NonBlockingWrite(e.fds[0], packet)
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index 709f829c8..ce4da7230 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -183,9 +183,8 @@ func testWritePacket(t *testing.T, plen int, eth bool, gsoMaxSize uint32, hash u
 	c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth, GSOMaxSize: gsoMaxSize})
 	defer c.cleanup()
 
-	r := &stack.Route{
-		RemoteLinkAddress: raddr,
-	}
+	var r stack.Route
+	r.ResolveWith(raddr)
 
 	// Build payload.
 	payload := buffer.NewView(plen)
@@ -220,7 +219,7 @@ func testWritePacket(t *testing.T, plen int, eth bool, gsoMaxSize uint32, hash u
 			L3HdrLen:   header.IPv4MaximumHeaderSize,
 		}
 	}
-	if err := c.ep.WritePacket(r, gso, proto, pkt); err != nil {
+	if err := c.ep.WritePacket(&r, gso, proto, pkt); err != nil {
 		t.Fatalf("WritePacket failed: %v", err)
 	}
 
@@ -325,9 +324,9 @@ func TestPreserveSrcAddress(t *testing.T) {
 
 	// Set LocalLinkAddress in route to the value of the bridged address.
 	r := &stack.Route{
-		RemoteLinkAddress: raddr,
-		LocalLinkAddress:  baddr,
+		LocalLinkAddress: baddr,
 	}
+	r.ResolveWith(raddr)
 
 	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 		// WritePacket panics given a prependable with anything less than
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index 38aa694e4..edca57e4e 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -96,23 +96,6 @@ func (e *endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList
 	panic("not implemented")
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-		Data: vv,
-	})
-	// There should be an ethernet header at the beginning of vv.
-	hdr, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize)
-	if !ok {
-		// Reject the packet if it's shorter than an ethernet header.
-		return tcpip.ErrBadAddress
-	}
-	linkHeader := header.Ethernet(hdr)
-	e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, linkHeader.Type(), pkt)
-
-	return nil
-}
-
 // ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
 func (*endpoint) ARPHardwareType() header.ARPHardwareType {
 	return header.ARPHardwareLoopback
diff --git a/pkg/tcpip/link/muxed/BUILD b/pkg/tcpip/link/muxed/BUILD
index e7493e5c5..cbda59775 100644
--- a/pkg/tcpip/link/muxed/BUILD
+++ b/pkg/tcpip/link/muxed/BUILD
@@ -8,7 +8,6 @@ go_library(
     visibility = ["//visibility:public"],
     deps = [
         "//pkg/tcpip",
-        "//pkg/tcpip/buffer",
         "//pkg/tcpip/header",
         "//pkg/tcpip/stack",
     ],
diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go
index 56a611825..22e79ce3a 100644
--- a/pkg/tcpip/link/muxed/injectable.go
+++ b/pkg/tcpip/link/muxed/injectable.go
@@ -17,7 +17,6 @@ package muxed
 
 import (
 	"gvisor.dev/gvisor/pkg/tcpip"
-	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
@@ -106,13 +105,6 @@ func (m *InjectableEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, protoco
 	return tcpip.ErrNoRoute
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (m *InjectableEndpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error {
-	// WriteRawPacket doesn't get a route or network address, so there's
-	// nowhere to write this.
-	return tcpip.ErrNoRoute
-}
-
 // InjectOutbound writes outbound packets to the appropriate
 // LinkInjectableEndpoint based on the dest address.
 func (m *InjectableEndpoint) InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error {
diff --git a/pkg/tcpip/link/nested/BUILD b/pkg/tcpip/link/nested/BUILD
index 2cdb23475..00b42b924 100644
--- a/pkg/tcpip/link/nested/BUILD
+++ b/pkg/tcpip/link/nested/BUILD
@@ -11,7 +11,6 @@ go_library(
     deps = [
         "//pkg/sync",
         "//pkg/tcpip",
-        "//pkg/tcpip/buffer",
         "//pkg/tcpip/header",
         "//pkg/tcpip/stack",
     ],
diff --git a/pkg/tcpip/link/nested/nested.go b/pkg/tcpip/link/nested/nested.go
index d40de54df..0ee54c3d5 100644
--- a/pkg/tcpip/link/nested/nested.go
+++ b/pkg/tcpip/link/nested/nested.go
@@ -19,7 +19,6 @@ package nested
 import (
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
-	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
@@ -123,11 +122,6 @@ func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 	return e.child.WritePackets(r, gso, pkts, protocol)
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.
-func (e *Endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	return e.child.WriteRawPacket(vv)
-}
-
 // Wait implements stack.LinkEndpoint.
 func (e *Endpoint) Wait() {
 	e.child.Wait()
diff --git a/pkg/tcpip/link/packetsocket/endpoint.go b/pkg/tcpip/link/packetsocket/endpoint.go
index 3922c2a04..9a1b0c0c2 100644
--- a/pkg/tcpip/link/packetsocket/endpoint.go
+++ b/pkg/tcpip/link/packetsocket/endpoint.go
@@ -36,14 +36,14 @@ func New(lower stack.LinkEndpoint) stack.LinkEndpoint {
 
 // WritePacket implements stack.LinkEndpoint.WritePacket.
 func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
-	e.Endpoint.DeliverOutboundPacket(r.RemoteLinkAddress, r.LocalLinkAddress, protocol, pkt)
+	e.Endpoint.DeliverOutboundPacket(r.RemoteLinkAddress(), r.LocalLinkAddress, protocol, pkt)
 	return e.Endpoint.WritePacket(r, gso, protocol, pkt)
 }
 
 // WritePackets implements stack.LinkEndpoint.WritePackets.
 func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, proto tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
 	for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
-		e.Endpoint.DeliverOutboundPacket(pkt.EgressRoute.RemoteLinkAddress, pkt.EgressRoute.LocalLinkAddress, pkt.NetworkProtocolNumber, pkt)
+		e.Endpoint.DeliverOutboundPacket(pkt.EgressRoute.RemoteLinkAddress(), pkt.EgressRoute.LocalLinkAddress, pkt.NetworkProtocolNumber, pkt)
 	}
 
 	return e.Endpoint.WritePackets(r, gso, pkts, proto)
diff --git a/pkg/tcpip/link/pipe/pipe.go b/pkg/tcpip/link/pipe/pipe.go
index 523b0d24b..25c364391 100644
--- a/pkg/tcpip/link/pipe/pipe.go
+++ b/pkg/tcpip/link/pipe/pipe.go
@@ -55,7 +55,7 @@ func (e *Endpoint) WritePacket(r *stack.Route, _ *stack.GSO, proto tcpip.Network
 	// remote address from the perspective of the other end of the pipe
 	// (e.linked). Similarly, the remote address from the perspective of this
 	// endpoint is the local address on the other end.
-	e.linked.dispatcher.DeliverNetworkPacket(r.LocalLinkAddress /* remote */, r.RemoteLinkAddress /* local */, proto, stack.NewPacketBuffer(stack.PacketBufferOptions{
+	e.linked.dispatcher.DeliverNetworkPacket(r.LocalLinkAddress /* remote */, r.RemoteLinkAddress() /* local */, proto, stack.NewPacketBuffer(stack.PacketBufferOptions{
 		Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views()),
 	}))
 
@@ -67,11 +67,6 @@ func (*Endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList,
 	panic("not implemented")
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.
-func (*Endpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error {
-	panic("not implemented")
-}
-
 // Attach implements stack.LinkEndpoint.
 func (e *Endpoint) Attach(dispatcher stack.NetworkDispatcher) {
 	e.dispatcher = dispatcher
diff --git a/pkg/tcpip/link/qdisc/fifo/BUILD b/pkg/tcpip/link/qdisc/fifo/BUILD
index 1d0079bd6..5bea598eb 100644
--- a/pkg/tcpip/link/qdisc/fifo/BUILD
+++ b/pkg/tcpip/link/qdisc/fifo/BUILD
@@ -13,7 +13,6 @@ go_library(
         "//pkg/sleep",
         "//pkg/sync",
         "//pkg/tcpip",
-        "//pkg/tcpip/buffer",
         "//pkg/tcpip/header",
         "//pkg/tcpip/stack",
     ],
diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go
index fc1e34fc7..27667f5f0 100644
--- a/pkg/tcpip/link/qdisc/fifo/endpoint.go
+++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go
@@ -21,7 +21,6 @@ import (
 	"gvisor.dev/gvisor/pkg/sleep"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
-	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
@@ -156,7 +155,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne
 	// WritePacket caller's do not set the following fields in PacketBuffer
 	// so we populate them here.
 	newRoute := r.Clone()
-	pkt.EgressRoute = &newRoute
+	pkt.EgressRoute = newRoute
 	pkt.GSOOptions = gso
 	pkt.NetworkProtocolNumber = protocol
 	d := e.dispatchers[int(pkt.Hash)%len(e.dispatchers)]
@@ -183,7 +182,7 @@ func (e *endpoint) WritePackets(_ *stack.Route, _ *stack.GSO, pkts stack.PacketB
 		// the route here to ensure it doesn't get released while the
 		// packet is still in our queue.
 		newRoute := pkt.EgressRoute.Clone()
-		pkt.EgressRoute = &newRoute
+		pkt.EgressRoute = newRoute
 		if !d.q.enqueue(pkt) {
 			if enqueued > 0 {
 				d.newPacketWaker.Assert()
@@ -197,13 +196,6 @@ func (e *endpoint) WritePackets(_ *stack.Route, _ *stack.GSO, pkts stack.PacketB
 	return enqueued, nil
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	// TODO(gvisor.dev/issue/3267): Queue these packets as well once
-	// WriteRawPacket takes PacketBuffer instead of VectorisedView.
-	return e.lower.WriteRawPacket(vv)
-}
-
 // Wait implements stack.LinkEndpoint.Wait.
 func (e *endpoint) Wait() {
 	e.lower.Wait()
diff --git a/pkg/tcpip/link/sharedmem/pipe/pipe_test.go b/pkg/tcpip/link/sharedmem/pipe/pipe_test.go
index dc239a0d0..2777f1411 100644
--- a/pkg/tcpip/link/sharedmem/pipe/pipe_test.go
+++ b/pkg/tcpip/link/sharedmem/pipe/pipe_test.go
@@ -470,6 +470,7 @@ func TestConcurrentReaderWriter(t *testing.T) {
 
 	const count = 1000000
 	var wg sync.WaitGroup
+	defer wg.Wait()
 	wg.Add(1)
 	go func() {
 		defer wg.Done()
@@ -489,30 +490,23 @@ func TestConcurrentReaderWriter(t *testing.T) {
 		}
 	}()
 
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		runtime.Gosched()
-		for i := 0; i < count; i++ {
-			n := 1 + rr.Intn(80)
-			rb := rx.Pull()
-			for rb == nil {
-				rb = rx.Pull()
-			}
+	for i := 0; i < count; i++ {
+		n := 1 + rr.Intn(80)
+		rb := rx.Pull()
+		for rb == nil {
+			rb = rx.Pull()
+		}
 
-			if n != len(rb) {
-				t.Fatalf("Bad %v-th buffer length: got %v, want %v", i, len(rb), n)
-			}
+		if n != len(rb) {
+			t.Fatalf("Bad %v-th buffer length: got %v, want %v", i, len(rb), n)
+		}
 
-			for j := range rb {
-				if v := byte(rr.Intn(256)); v != rb[j] {
-					t.Fatalf("Bad %v-th read buffer at index %v: got %v, want %v", i, j, rb[j], v)
-				}
+		for j := range rb {
+			if v := byte(rr.Intn(256)); v != rb[j] {
+				t.Fatalf("Bad %v-th read buffer at index %v: got %v, want %v", i, j, rb[j], v)
 			}
-
-			rx.Flush()
 		}
-	}()
 
-	wg.Wait()
+		rx.Flush()
+	}
 }
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index 7fb8a6c49..5660418fa 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -204,7 +204,7 @@ func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net
 // WritePacket writes outbound packets to the file descriptor. If it is not
 // currently writable, the packet is dropped.
 func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
-	e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt)
+	e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress(), protocol, pkt)
 
 	views := pkt.Views()
 	// Transmit the packet.
@@ -224,21 +224,6 @@ func (e *endpoint) WritePackets(r *stack.Route, _ *stack.GSO, pkts stack.PacketB
 	panic("not implemented")
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	views := vv.Views()
-	// Transmit the packet.
-	e.mu.Lock()
-	ok := e.tx.transmit(views...)
-	e.mu.Unlock()
-
-	if !ok {
-		return tcpip.ErrWouldBlock
-	}
-
-	return nil
-}
-
 // dispatchLoop reads packets from the rx queue in a loop and dispatches them
 // to the network stack.
 func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) {
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 22d5c97f1..7131392cc 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -260,9 +260,8 @@ func TestSimpleSend(t *testing.T) {
 	defer c.cleanup()
 
 	// Prepare route.
-	r := stack.Route{
-		RemoteLinkAddress: remoteLinkAddr,
-	}
+	var r stack.Route
+	r.ResolveWith(remoteLinkAddr)
 
 	for iters := 1000; iters > 0; iters-- {
 		func() {
@@ -342,9 +341,9 @@ func TestPreserveSrcAddressInSend(t *testing.T) {
 	newLocalLinkAddress := tcpip.LinkAddress(strings.Repeat("0xFE", 6))
 	// Set both remote and local link address in route.
 	r := stack.Route{
-		RemoteLinkAddress: remoteLinkAddr,
-		LocalLinkAddress:  newLocalLinkAddress,
+		LocalLinkAddress: newLocalLinkAddress,
 	}
+	r.ResolveWith(remoteLinkAddr)
 
 	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 		// WritePacket panics given a prependable with anything less than
@@ -395,9 +394,8 @@ func TestFillTxQueue(t *testing.T) {
 	defer c.cleanup()
 
 	// Prepare to send a packet.
-	r := stack.Route{
-		RemoteLinkAddress: remoteLinkAddr,
-	}
+	var r stack.Route
+	r.ResolveWith(remoteLinkAddr)
 
 	buf := buffer.NewView(100)
 
@@ -444,9 +442,8 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
 	c.txq.rx.Flush()
 
 	// Prepare to send a packet.
-	r := stack.Route{
-		RemoteLinkAddress: remoteLinkAddr,
-	}
+	var r stack.Route
+	r.ResolveWith(remoteLinkAddr)
 
 	buf := buffer.NewView(100)
 
@@ -509,9 +506,8 @@ func TestFillTxMemory(t *testing.T) {
 	defer c.cleanup()
 
 	// Prepare to send a packet.
-	r := stack.Route{
-		RemoteLinkAddress: remoteLinkAddr,
-	}
+	var r stack.Route
+	r.ResolveWith(remoteLinkAddr)
 
 	buf := buffer.NewView(100)
 
@@ -557,9 +553,8 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
 	defer c.cleanup()
 
 	// Prepare to send a packet.
-	r := stack.Route{
-		RemoteLinkAddress: remoteLinkAddr,
-	}
+	var r stack.Route
+	r.ResolveWith(remoteLinkAddr)
 
 	buf := buffer.NewView(100)
 
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index 560477926..8d9a91020 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -53,16 +53,35 @@ type endpoint struct {
 	nested.Endpoint
 	writer     io.Writer
 	maxPCAPLen uint32
+	logPrefix  string
 }
 
 var _ stack.GSOEndpoint = (*endpoint)(nil)
 var _ stack.LinkEndpoint = (*endpoint)(nil)
 var _ stack.NetworkDispatcher = (*endpoint)(nil)
 
+type direction int
+
+const (
+	directionSend = iota
+	directionRecv
+)
+
 // New creates a new sniffer link-layer endpoint. It wraps around another
 // endpoint and logs packets and they traverse the endpoint.
 func New(lower stack.LinkEndpoint) stack.LinkEndpoint {
-	sniffer := &endpoint{}
+	return NewWithPrefix(lower, "")
+}
+
+// NewWithPrefix creates a new sniffer link-layer endpoint. It wraps around
+// another endpoint and logs packets prefixed with logPrefix as they traverse
+// the endpoint.
+//
+// logPrefix is prepended to the log line without any separators.
+// E.g. logPrefix = "NIC:en0/" will produce log lines like
+// "NIC:en0/send udp [...]".
+func NewWithPrefix(lower stack.LinkEndpoint, logPrefix string) stack.LinkEndpoint {
+	sniffer := &endpoint{logPrefix: logPrefix}
 	sniffer.Endpoint.Init(lower, sniffer)
 	return sniffer
 }
@@ -120,7 +139,7 @@ func NewWithWriter(lower stack.LinkEndpoint, writer io.Writer, snapLen uint32) (
 // called by the link-layer endpoint being wrapped when a packet arrives, and
 // logs the packet before forwarding to the actual dispatcher.
 func (e *endpoint) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
-	e.dumpPacket("recv", nil, protocol, pkt)
+	e.dumpPacket(directionRecv, nil, protocol, pkt)
 	e.Endpoint.DeliverNetworkPacket(remote, local, protocol, pkt)
 }
 
@@ -129,10 +148,10 @@ func (e *endpoint) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protoc
 	e.Endpoint.DeliverOutboundPacket(remote, local, protocol, pkt)
 }
 
-func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+func (e *endpoint) dumpPacket(dir direction, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
 	writer := e.writer
 	if writer == nil && atomic.LoadUint32(&LogPackets) == 1 {
-		logPacket(prefix, protocol, pkt, gso)
+		logPacket(e.logPrefix, dir, protocol, pkt, gso)
 	}
 	if writer != nil && atomic.LoadUint32(&LogPacketsToPCAP) == 1 {
 		totalLength := pkt.Size()
@@ -169,7 +188,7 @@ func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.Netw
 // higher-level protocols to write packets; it just logs the packet and
 // forwards the request to the lower endpoint.
 func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
-	e.dumpPacket("send", gso, protocol, pkt)
+	e.dumpPacket(directionSend, gso, protocol, pkt)
 	return e.Endpoint.WritePacket(r, gso, protocol, pkt)
 }
 
@@ -178,20 +197,12 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne
 // forwards the request to the lower endpoint.
 func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
 	for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
-		e.dumpPacket("send", gso, protocol, pkt)
+		e.dumpPacket(directionSend, gso, protocol, pkt)
 	}
 	return e.Endpoint.WritePackets(r, gso, pkts, protocol)
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	e.dumpPacket("send", nil, 0, stack.NewPacketBuffer(stack.PacketBufferOptions{
-		Data: vv,
-	}))
-	return e.Endpoint.WriteRawPacket(vv)
-}
-
-func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer, gso *stack.GSO) {
+func logPacket(prefix string, dir direction, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer, gso *stack.GSO) {
 	// Figure out the network layer info.
 	var transProto uint8
 	src := tcpip.Address("unknown")
@@ -201,11 +212,26 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 	var fragmentOffset uint16
 	var moreFragments bool
 
+	var directionPrefix string
+	switch dir {
+	case directionSend:
+		directionPrefix = "send"
+	case directionRecv:
+		directionPrefix = "recv"
+	default:
+		panic(fmt.Sprintf("unrecognized direction: %d", dir))
+	}
+
 	// Clone the packet buffer to not modify the original.
 	//
 	// We don't clone the original packet buffer so that the new packet buffer
 	// does not have any of its headers set.
-	pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views())})
+	//
+	// We trim the link headers from the cloned buffer as the sniffer doesn't
+	// handle link headers.
+	vv := buffer.NewVectorisedView(pkt.Size(), pkt.Views())
+	vv.TrimFront(len(pkt.LinkHeader().View()))
+	pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{Data: vv})
 	switch protocol {
 	case header.IPv4ProtocolNumber:
 		if ok := parse.IPv4(pkt); !ok {
@@ -243,15 +269,16 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 
 		arp := header.ARP(pkt.NetworkHeader().View())
 		log.Infof(
-			"%s arp %s (%s) -> %s (%s) valid:%t",
+			"%s%s arp %s (%s) -> %s (%s) valid:%t",
 			prefix,
+			directionPrefix,
 			tcpip.Address(arp.ProtocolAddressSender()), tcpip.LinkAddress(arp.HardwareAddressSender()),
 			tcpip.Address(arp.ProtocolAddressTarget()), tcpip.LinkAddress(arp.HardwareAddressTarget()),
 			arp.IsValid(),
 		)
 		return
 	default:
-		log.Infof("%s unknown network protocol", prefix)
+		log.Infof("%s%s unknown network protocol", prefix, directionPrefix)
 		return
 	}
 
@@ -295,7 +322,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 				icmpType = "info reply"
 			}
 		}
-		log.Infof("%s %s %s -> %s %s len:%d id:%04x code:%d", prefix, transName, src, dst, icmpType, size, id, icmp.Code())
+		log.Infof("%s%s %s %s -> %s %s len:%d id:%04x code:%d", prefix, directionPrefix, transName, src, dst, icmpType, size, id, icmp.Code())
 		return
 
 	case header.ICMPv6ProtocolNumber:
@@ -330,7 +357,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 		case header.ICMPv6RedirectMsg:
 			icmpType = "redirect message"
 		}
-		log.Infof("%s %s %s -> %s %s len:%d id:%04x code:%d", prefix, transName, src, dst, icmpType, size, id, icmp.Code())
+		log.Infof("%s%s %s %s -> %s %s len:%d id:%04x code:%d", prefix, directionPrefix, transName, src, dst, icmpType, size, id, icmp.Code())
 		return
 
 	case header.UDPProtocolNumber:
@@ -386,7 +413,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 		}
 
 	default:
-		log.Infof("%s %s -> %s unknown transport protocol: %d", prefix, src, dst, transProto)
+		log.Infof("%s%s %s -> %s unknown transport protocol: %d", prefix, directionPrefix, src, dst, transProto)
 		return
 	}
 
@@ -394,5 +421,5 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
 		details += fmt.Sprintf(" gso: %+v", gso)
 	}
 
-	log.Infof("%s %s %s:%d -> %s:%d len:%d id:%04x %s", prefix, transName, src, srcPort, dst, dstPort, size, id, details)
+	log.Infof("%s%s %s %s:%d -> %s:%d len:%d id:%04x %s", prefix, directionPrefix, transName, src, srcPort, dst, dstPort, size, id, details)
 }
diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD
index 0243424f6..86f14db76 100644
--- a/pkg/tcpip/link/tun/BUILD
+++ b/pkg/tcpip/link/tun/BUILD
@@ -8,7 +8,7 @@ go_template_instance(
     out = "tun_endpoint_refs.go",
     package = "tun",
     prefix = "tunEndpoint",
-    template = "//pkg/refs_vfs2:refs_template",
+    template = "//pkg/refsvfs2:refs_template",
     types = {
         "T": "tunEndpoint",
     },
@@ -28,6 +28,7 @@ go_library(
         "//pkg/context",
         "//pkg/log",
         "//pkg/refs",
+        "//pkg/refsvfs2",
         "//pkg/sync",
         "//pkg/syserror",
         "//pkg/tcpip",
diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go
index f94491026..a364c5801 100644
--- a/pkg/tcpip/link/tun/device.go
+++ b/pkg/tcpip/link/tun/device.go
@@ -76,29 +76,13 @@ func (d *Device) Release(ctx context.Context) {
 	}
 }
 
-// NICID returns the NIC ID of the device.
-//
-// Must only be called after the device has been attached to an endpoint.
-func (d *Device) NICID() tcpip.NICID {
-	d.mu.RLock()
-	defer d.mu.RUnlock()
-
-	if d.endpoint == nil {
-		panic("called NICID on a device that has not been attached")
-	}
-
-	return d.endpoint.nicID
-}
-
 // SetIff services TUNSETIFF ioctl(2) request.
-//
-// Returns true if a new NIC was created; false if an existing one was attached.
-func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) (bool, error) {
+func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) error {
 	d.mu.Lock()
 	defer d.mu.Unlock()
 
 	if d.endpoint != nil {
-		return false, syserror.EINVAL
+		return syserror.EINVAL
 	}
 
 	// Input validations.
@@ -106,7 +90,7 @@ func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) (bool, error)
 	isTap := flags&linux.IFF_TAP != 0
 	supportedFlags := uint16(linux.IFF_TUN | linux.IFF_TAP | linux.IFF_NO_PI)
 	if isTap && isTun || !isTap && !isTun || flags&^supportedFlags != 0 {
-		return false, syserror.EINVAL
+		return syserror.EINVAL
 	}
 
 	prefix := "tun"
@@ -119,18 +103,18 @@ func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) (bool, error)
 		linkCaps |= stack.CapabilityResolutionRequired
 	}
 
-	endpoint, created, err := attachOrCreateNIC(s, name, prefix, linkCaps)
+	endpoint, err := attachOrCreateNIC(s, name, prefix, linkCaps)
 	if err != nil {
-		return false, syserror.EINVAL
+		return syserror.EINVAL
 	}
 
 	d.endpoint = endpoint
 	d.notifyHandle = d.endpoint.AddNotify(d)
 	d.flags = flags
-	return created, nil
+	return nil
 }
 
-func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkEndpointCapabilities) (*tunEndpoint, bool, error) {
+func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkEndpointCapabilities) (*tunEndpoint, error) {
 	for {
 		// 1. Try to attach to an existing NIC.
 		if name != "" {
@@ -138,19 +122,18 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE
 				endpoint, ok := linkEP.(*tunEndpoint)
 				if !ok {
 					// Not a NIC created by tun device.
-					return nil, false, syserror.EOPNOTSUPP
+					return nil, syserror.EOPNOTSUPP
 				}
 				if !endpoint.TryIncRef() {
 					// Race detected: NIC got deleted in between.
 					continue
 				}
-				return endpoint, false, nil
+				return endpoint, nil
 			}
 		}
 
 		// 2. Creating a new NIC.
 		id := tcpip.NICID(s.UniqueID())
-		// TODO(gvisor.dev/1486): enable leak check for tunEndpoint.
 		endpoint := &tunEndpoint{
 			Endpoint: channel.New(defaultDevOutQueueLen, defaultDevMtu, ""),
 			stack:    s,
@@ -158,6 +141,7 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE
 			name:     name,
 			isTap:    prefix == "tap",
 		}
+		endpoint.InitRefs()
 		endpoint.Endpoint.LinkEPCapabilities = linkCaps
 		if endpoint.name == "" {
 			endpoint.name = fmt.Sprintf("%s%d", prefix, id)
@@ -167,12 +151,12 @@ func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkE
 		})
 		switch err {
 		case nil:
-			return endpoint, true, nil
+			return endpoint, nil
 		case tcpip.ErrDuplicateNICID:
 			// Race detected: A NIC has been created in between.
 			continue
 		default:
-			return nil, false, syserror.EINVAL
+			return nil, syserror.EINVAL
 		}
 	}
 }
@@ -280,7 +264,7 @@ func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) {
 	// If the packet does not already have link layer header, and the route
 	// does not exist, we can't compute it. This is possibly a raw packet, tun
 	// device doesn't support this at the moment.
-	if info.Pkt.LinkHeader().View().IsEmpty() && info.Route.RemoteLinkAddress == "" {
+	if info.Pkt.LinkHeader().View().IsEmpty() && info.Route.RemoteLinkAddress() == "" {
 		return nil, false
 	}
 
@@ -288,7 +272,7 @@ func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) {
 	if d.hasFlags(linux.IFF_TAP) {
 		// Add ethernet header if not provided.
 		if info.Pkt.LinkHeader().View().IsEmpty() {
-			d.endpoint.AddHeader(info.Route.LocalLinkAddress, info.Route.RemoteLinkAddress, info.Proto, info.Pkt)
+			d.endpoint.AddHeader(info.Route.LocalLinkAddress, info.Route.RemoteLinkAddress(), info.Proto, info.Pkt)
 		}
 		vv.AppendView(info.Pkt.LinkHeader().View())
 	}
diff --git a/pkg/tcpip/link/waitable/BUILD b/pkg/tcpip/link/waitable/BUILD
index ee84c3d96..9b4602c1b 100644
--- a/pkg/tcpip/link/waitable/BUILD
+++ b/pkg/tcpip/link/waitable/BUILD
@@ -11,7 +11,6 @@ go_library(
     deps = [
         "//pkg/gate",
         "//pkg/tcpip",
-        "//pkg/tcpip/buffer",
         "//pkg/tcpip/header",
         "//pkg/tcpip/stack",
     ],
@@ -25,7 +24,6 @@ go_test(
     library = ":waitable",
     deps = [
         "//pkg/tcpip",
-        "//pkg/tcpip/buffer",
         "//pkg/tcpip/header",
         "//pkg/tcpip/stack",
     ],
diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go
index b152a0f26..cf0077f43 100644
--- a/pkg/tcpip/link/waitable/waitable.go
+++ b/pkg/tcpip/link/waitable/waitable.go
@@ -24,7 +24,6 @@ package waitable
 import (
 	"gvisor.dev/gvisor/pkg/gate"
 	"gvisor.dev/gvisor/pkg/tcpip"
-	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
@@ -132,17 +131,6 @@ func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 	return n, err
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *Endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	if !e.writeGate.Enter() {
-		return nil
-	}
-
-	err := e.lower.WriteRawPacket(vv)
-	e.writeGate.Leave()
-	return err
-}
-
 // WaitWrite prevents new calls to WritePacket from reaching the lower endpoint,
 // and waits for inflight ones to finish before returning.
 func (e *Endpoint) WaitWrite() {
diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go
index 94827fc56..cf7fb5126 100644
--- a/pkg/tcpip/link/waitable/waitable_test.go
+++ b/pkg/tcpip/link/waitable/waitable_test.go
@@ -18,7 +18,6 @@ import (
 	"testing"
 
 	"gvisor.dev/gvisor/pkg/tcpip"
-	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
@@ -81,11 +80,6 @@ func (e *countedEndpoint) WritePackets(r *stack.Route, _ *stack.GSO, pkts stack.
 	return pkts.Len(), nil
 }
 
-func (e *countedEndpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error {
-	e.writeCount++
-	return nil
-}
-
 // ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
 func (*countedEndpoint) ARPHardwareType() header.ARPHardwareType {
 	panic("unimplemented")
diff --git a/pkg/tcpip/network/BUILD b/pkg/tcpip/network/BUILD
index c118a2929..9ebf31b78 100644
--- a/pkg/tcpip/network/BUILD
+++ b/pkg/tcpip/network/BUILD
@@ -7,13 +7,16 @@ go_test(
     size = "small",
     srcs = [
         "ip_test.go",
+        "multicast_group_test.go",
     ],
     deps = [
         "//pkg/sync",
         "//pkg/tcpip",
         "//pkg/tcpip/buffer",
         "//pkg/tcpip/checker",
+        "//pkg/tcpip/faketime",
         "//pkg/tcpip/header",
+        "//pkg/tcpip/header/parse",
         "//pkg/tcpip/link/channel",
         "//pkg/tcpip/link/loopback",
         "//pkg/tcpip/network/ipv4",
diff --git a/pkg/tcpip/network/arp/BUILD b/pkg/tcpip/network/arp/BUILD
index b40dde96b..8a6bcfc2c 100644
--- a/pkg/tcpip/network/arp/BUILD
+++ b/pkg/tcpip/network/arp/BUILD
@@ -30,5 +30,6 @@ go_test(
         "//pkg/tcpip/stack",
         "//pkg/tcpip/transport/icmp",
         "@com_github_google_go_cmp//cmp:go_default_library",
+        "@com_github_google_go_cmp//cmp/cmpopts:go_default_library",
     ],
 )
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index 7df77c66e..3d5c0d270 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -18,6 +18,7 @@
 package arp
 
 import (
+	"fmt"
 	"sync/atomic"
 
 	"gvisor.dev/gvisor/pkg/tcpip"
@@ -30,17 +31,15 @@ import (
 const (
 	// ProtocolNumber is the ARP protocol number.
 	ProtocolNumber = header.ARPProtocolNumber
-
-	// ProtocolAddress is the address expected by the ARP endpoint.
-	ProtocolAddress = tcpip.Address("arp")
 )
 
-var _ stack.AddressableEndpoint = (*endpoint)(nil)
+// ARP endpoints need to implement stack.NetworkEndpoint because the stack
+// considers the layer above the link-layer a network layer; the only
+// facility provided by the stack to deliver packets to a layer above
+// the link-layer is via stack.NetworkEndpoint.HandlePacket.
 var _ stack.NetworkEndpoint = (*endpoint)(nil)
 
 type endpoint struct {
-	stack.AddressableEndpointState
-
 	protocol *protocol
 
 	// enabled is set to 1 when the NIC is enabled and 0 when it is disabled.
@@ -86,7 +85,7 @@ func (e *endpoint) Disable() {
 }
 
 // DefaultTTL is unused for ARP. It implements stack.NetworkEndpoint.
-func (e *endpoint) DefaultTTL() uint8 {
+func (*endpoint) DefaultTTL() uint8 {
 	return 0
 }
 
@@ -99,29 +98,27 @@ func (e *endpoint) MaxHeaderLength() uint16 {
 	return e.nic.MaxHeaderLength() + header.ARPSize
 }
 
-func (e *endpoint) Close() {
-	e.AddressableEndpointState.Cleanup()
-}
+func (*endpoint) Close() {}
 
-func (e *endpoint) WritePacket(*stack.Route, *stack.GSO, stack.NetworkHeaderParams, *stack.PacketBuffer) *tcpip.Error {
+func (*endpoint) WritePacket(*stack.Route, *stack.GSO, stack.NetworkHeaderParams, *stack.PacketBuffer) *tcpip.Error {
 	return tcpip.ErrNotSupported
 }
 
 // NetworkProtocolNumber implements stack.NetworkEndpoint.NetworkProtocolNumber.
-func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
+func (*endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
 	return ProtocolNumber
 }
 
 // WritePackets implements stack.NetworkEndpoint.WritePackets.
-func (e *endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList, stack.NetworkHeaderParams) (int, *tcpip.Error) {
+func (*endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList, stack.NetworkHeaderParams) (int, *tcpip.Error) {
 	return 0, tcpip.ErrNotSupported
 }
 
-func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error {
+func (*endpoint) WriteHeaderIncludedPacket(*stack.Route, *stack.PacketBuffer) *tcpip.Error {
 	return tcpip.ErrNotSupported
 }
 
-func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
 	if !e.isEnabled() {
 		return
 	}
@@ -144,34 +141,43 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			linkAddr := tcpip.LinkAddress(h.HardwareAddressSender())
 			e.linkAddrCache.AddLinkAddress(e.nic.ID(), addr, linkAddr)
 		} else {
-			if r.Stack().CheckLocalAddress(e.nic.ID(), header.IPv4ProtocolNumber, localAddr) == 0 {
+			if e.protocol.stack.CheckLocalAddress(e.nic.ID(), header.IPv4ProtocolNumber, localAddr) == 0 {
 				return // we have no useful answer, ignore the request
 			}
 
 			remoteAddr := tcpip.Address(h.ProtocolAddressSender())
 			remoteLinkAddr := tcpip.LinkAddress(h.HardwareAddressSender())
-			e.nud.HandleProbe(remoteAddr, localAddr, ProtocolNumber, remoteLinkAddr, e.protocol)
+			e.nud.HandleProbe(remoteAddr, ProtocolNumber, remoteLinkAddr, e.protocol)
 		}
 
-		// As per RFC 826, under Packet Reception:
-		//   Swap hardware and protocol fields, putting the local hardware and
-		//   protocol addresses in the sender fields.
-		//
-		//   Send the packet to the (new) target hardware address on the same
-		//   hardware on which the request was received.
-		origSender := h.HardwareAddressSender()
-		r.RemoteLinkAddress = tcpip.LinkAddress(origSender)
 		respPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 			ReserveHeaderBytes: int(e.nic.MaxHeaderLength()) + header.ARPSize,
 		})
 		packet := header.ARP(respPkt.NetworkHeader().Push(header.ARPSize))
+		respPkt.NetworkProtocolNumber = ProtocolNumber
 		packet.SetIPv4OverEthernet()
 		packet.SetOp(header.ARPReply)
-		copy(packet.HardwareAddressSender(), r.LocalLinkAddress[:])
-		copy(packet.ProtocolAddressSender(), h.ProtocolAddressTarget())
-		copy(packet.HardwareAddressTarget(), origSender)
-		copy(packet.ProtocolAddressTarget(), h.ProtocolAddressSender())
-		_ = e.nic.WritePacket(r, nil /* gso */, ProtocolNumber, respPkt)
+		// TODO(gvisor.dev/issue/4582): check copied length once TAP devices have a
+		// link address.
+		_ = copy(packet.HardwareAddressSender(), e.nic.LinkAddress())
+		if n := copy(packet.ProtocolAddressSender(), h.ProtocolAddressTarget()); n != header.IPv4AddressSize {
+			panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize))
+		}
+		origSender := h.HardwareAddressSender()
+		if n := copy(packet.HardwareAddressTarget(), origSender); n != header.EthernetAddressSize {
+			panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.EthernetAddressSize))
+		}
+		if n := copy(packet.ProtocolAddressTarget(), h.ProtocolAddressSender()); n != header.IPv4AddressSize {
+			panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize))
+		}
+
+		// As per RFC 826, under Packet Reception:
+		//   Swap hardware and protocol fields, putting the local hardware and
+		//   protocol addresses in the sender fields.
+		//
+		//   Send the packet to the (new) target hardware address on the same
+		//   hardware on which the request was received.
+		_ = e.nic.WritePacketToRemote(tcpip.LinkAddress(origSender), nil /* gso */, ProtocolNumber, respPkt)
 
 	case header.ARPReply:
 		addr := tcpip.Address(h.ProtocolAddressSender())
@@ -199,15 +205,15 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 
 // protocol implements stack.NetworkProtocol and stack.LinkAddressResolver.
 type protocol struct {
+	stack *stack.Stack
 }
 
 func (p *protocol) Number() tcpip.NetworkProtocolNumber { return ProtocolNumber }
 func (p *protocol) MinimumPacketSize() int              { return header.ARPSize }
 func (p *protocol) DefaultPrefixLen() int               { return 0 }
 
-func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) {
-	h := header.ARP(v)
-	return tcpip.Address(h.ProtocolAddressSender()), ProtocolAddress
+func (*protocol) ParseAddresses(buffer.View) (src, dst tcpip.Address) {
+	return "", ""
 }
 
 func (p *protocol) NewEndpoint(nic stack.NetworkInterface, linkAddrCache stack.LinkAddressCache, nud stack.NUDHandler, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint {
@@ -217,7 +223,6 @@ func (p *protocol) NewEndpoint(nic stack.NetworkInterface, linkAddrCache stack.L
 		linkAddrCache: linkAddrCache,
 		nud:           nud,
 	}
-	e.AddressableEndpointState.Init(e)
 	return e
 }
 
@@ -227,26 +232,44 @@ func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
 }
 
 // LinkAddressRequest implements stack.LinkAddressResolver.LinkAddressRequest.
-func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP stack.LinkEndpoint) *tcpip.Error {
-	r := &stack.Route{
-		NetProto:          ProtocolNumber,
-		RemoteLinkAddress: remoteLinkAddr,
+func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, nic stack.NetworkInterface) *tcpip.Error {
+	if len(remoteLinkAddr) == 0 {
+		remoteLinkAddr = header.EthernetBroadcastAddress
 	}
-	if len(r.RemoteLinkAddress) == 0 {
-		r.RemoteLinkAddress = header.EthernetBroadcastAddress
+
+	nicID := nic.ID()
+	if len(localAddr) == 0 {
+		addr, err := p.stack.GetMainNICAddress(nicID, header.IPv4ProtocolNumber)
+		if err != nil {
+			return err
+		}
+
+		if len(addr.Address) == 0 {
+			return tcpip.ErrNetworkUnreachable
+		}
+
+		localAddr = addr.Address
+	} else if p.stack.CheckLocalAddress(nicID, header.IPv4ProtocolNumber, localAddr) == 0 {
+		return tcpip.ErrBadLocalAddress
 	}
 
 	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-		ReserveHeaderBytes: int(linkEP.MaxHeaderLength()) + header.ARPSize,
+		ReserveHeaderBytes: int(nic.MaxHeaderLength()) + header.ARPSize,
 	})
 	h := header.ARP(pkt.NetworkHeader().Push(header.ARPSize))
+	pkt.NetworkProtocolNumber = ProtocolNumber
 	h.SetIPv4OverEthernet()
 	h.SetOp(header.ARPRequest)
-	copy(h.HardwareAddressSender(), linkEP.LinkAddress())
-	copy(h.ProtocolAddressSender(), localAddr)
-	copy(h.ProtocolAddressTarget(), addr)
-
-	return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, pkt)
+	// TODO(gvisor.dev/issue/4582): check copied length once TAP devices have a
+	// link address.
+	_ = copy(h.HardwareAddressSender(), nic.LinkAddress())
+	if n := copy(h.ProtocolAddressSender(), localAddr); n != header.IPv4AddressSize {
+		panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize))
+	}
+	if n := copy(h.ProtocolAddressTarget(), targetAddr); n != header.IPv4AddressSize {
+		panic(fmt.Sprintf("copied %d bytes, expected %d bytes", n, header.IPv4AddressSize))
+	}
+	return nic.WritePacketToRemote(remoteLinkAddr, nil /* gso */, ProtocolNumber, pkt)
 }
 
 // ResolveStaticAddress implements stack.LinkAddressResolver.ResolveStaticAddress.
@@ -282,10 +305,6 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNu
 }
 
 // NewProtocol returns an ARP network protocol.
-//
-// Note, to make sure that the ARP endpoint receives ARP packets, the "arp"
-// address must be added to every NIC that should respond to ARP requests. See
-// ProtocolAddress for more details.
-func NewProtocol(*stack.Stack) stack.NetworkProtocol {
-	return &protocol{}
+func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
+	return &protocol{stack: s}
 }
diff --git a/pkg/tcpip/network/arp/arp_test.go b/pkg/tcpip/network/arp/arp_test.go
index 626af975a..0fb373612 100644
--- a/pkg/tcpip/network/arp/arp_test.go
+++ b/pkg/tcpip/network/arp/arp_test.go
@@ -22,6 +22,7 @@ import (
 	"time"
 
 	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -78,13 +79,11 @@ func (t eventType) String() string {
 type eventInfo struct {
 	eventType eventType
 	nicID     tcpip.NICID
-	addr      tcpip.Address
-	linkAddr  tcpip.LinkAddress
-	state     stack.NeighborState
+	entry     stack.NeighborEntry
 }
 
 func (e eventInfo) String() string {
-	return fmt.Sprintf("%s event for NIC #%d, addr=%q, linkAddr=%q, state=%q", e.eventType, e.nicID, e.addr, e.linkAddr, e.state)
+	return fmt.Sprintf("%s event for NIC #%d, %#v", e.eventType, e.nicID, e.entry)
 }
 
 // arpDispatcher implements NUDDispatcher to validate the dispatching of
@@ -96,35 +95,29 @@ type arpDispatcher struct {
 
 var _ stack.NUDDispatcher = (*arpDispatcher)(nil)
 
-func (d *arpDispatcher) OnNeighborAdded(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) {
+func (d *arpDispatcher) OnNeighborAdded(nicID tcpip.NICID, entry stack.NeighborEntry) {
 	e := eventInfo{
 		eventType: entryAdded,
 		nicID:     nicID,
-		addr:      addr,
-		linkAddr:  linkAddr,
-		state:     state,
+		entry:     entry,
 	}
 	d.C <- e
 }
 
-func (d *arpDispatcher) OnNeighborChanged(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) {
+func (d *arpDispatcher) OnNeighborChanged(nicID tcpip.NICID, entry stack.NeighborEntry) {
 	e := eventInfo{
 		eventType: entryChanged,
 		nicID:     nicID,
-		addr:      addr,
-		linkAddr:  linkAddr,
-		state:     state,
+		entry:     entry,
 	}
 	d.C <- e
 }
 
-func (d *arpDispatcher) OnNeighborRemoved(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state stack.NeighborState, updatedAt time.Time) {
+func (d *arpDispatcher) OnNeighborRemoved(nicID tcpip.NICID, entry stack.NeighborEntry) {
 	e := eventInfo{
 		eventType: entryRemoved,
 		nicID:     nicID,
-		addr:      addr,
-		linkAddr:  linkAddr,
-		state:     state,
+		entry:     entry,
 	}
 	d.C <- e
 }
@@ -132,7 +125,7 @@ func (d *arpDispatcher) OnNeighborRemoved(nicID tcpip.NICID, addr tcpip.Address,
 func (d *arpDispatcher) waitForEvent(ctx context.Context, want eventInfo) error {
 	select {
 	case got := <-d.C:
-		if diff := cmp.Diff(got, want, cmp.AllowUnexported(got)); diff != "" {
+		if diff := cmp.Diff(got, want, cmp.AllowUnexported(got), cmpopts.IgnoreFields(stack.NeighborEntry{}, "UpdatedAtNanos")); diff != "" {
 			return fmt.Errorf("got invalid event (-got +want):\n%s", diff)
 		}
 	case <-ctx.Done():
@@ -207,9 +200,6 @@ func newTestContext(t *testing.T, useNeighborCache bool) *testContext {
 			t.Fatalf("AddAddress for ipv4 failed: %v", err)
 		}
 	}
-	if err := s.AddAddress(nicID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-		t.Fatalf("AddAddress for arp failed: %v", err)
-	}
 
 	s.SetRouteTable([]tcpip.Route{{
 		Destination: header.IPv4EmptySubnet,
@@ -329,9 +319,9 @@ func TestDirectRequestWithNeighborCache(t *testing.T) {
 			copy(h.HardwareAddressSender(), test.senderLinkAddr)
 			copy(h.ProtocolAddressSender(), test.senderAddr)
 			copy(h.ProtocolAddressTarget(), test.targetAddr)
-			c.linkEP.InjectInbound(arp.ProtocolNumber, &stack.PacketBuffer{
+			c.linkEP.InjectInbound(arp.ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 				Data: v.ToVectorisedView(),
-			})
+			}))
 
 			if !test.isValid {
 				// No packets should be sent after receiving an invalid ARP request.
@@ -373,9 +363,11 @@ func TestDirectRequestWithNeighborCache(t *testing.T) {
 			wantEvent := eventInfo{
 				eventType: entryAdded,
 				nicID:     nicID,
-				addr:      test.senderAddr,
-				linkAddr:  tcpip.LinkAddress(test.senderLinkAddr),
-				state:     stack.Stale,
+				entry: stack.NeighborEntry{
+					Addr:     test.senderAddr,
+					LinkAddr: tcpip.LinkAddress(test.senderLinkAddr),
+					State:    stack.Stale,
+				},
 			}
 			if err := c.nudDisp.waitForEventWithTimeout(wantEvent, time.Second); err != nil {
 				t.Fatal(err)
@@ -404,9 +396,6 @@ func TestDirectRequestWithNeighborCache(t *testing.T) {
 			if got, want := neigh.LinkAddr, test.senderLinkAddr; got != want {
 				t.Errorf("got neighbor LinkAddr = %s, want = %s", got, want)
 			}
-			if got, want := neigh.LocalAddr, stackAddr; got != want {
-				t.Errorf("got neighbor LocalAddr = %s, want = %s", got, want)
-			}
 			if got, want := neigh.State, stack.Stale; got != want {
 				t.Errorf("got neighbor State = %s, want = %s", got, want)
 			}
@@ -423,43 +412,168 @@ func TestDirectRequestWithNeighborCache(t *testing.T) {
 	}
 }
 
+var _ stack.NetworkInterface = (*testInterface)(nil)
+
+type testInterface struct {
+	stack.LinkEndpoint
+
+	nicID tcpip.NICID
+}
+
+func (t *testInterface) ID() tcpip.NICID {
+	return t.nicID
+}
+
+func (*testInterface) IsLoopback() bool {
+	return false
+}
+
+func (*testInterface) Name() string {
+	return ""
+}
+
+func (*testInterface) Enabled() bool {
+	return true
+}
+
+func (*testInterface) Promiscuous() bool {
+	return false
+}
+
+func (t *testInterface) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+	r := stack.Route{
+		NetProto: protocol,
+	}
+	r.ResolveWith(remoteLinkAddr)
+	return t.LinkEndpoint.WritePacket(&r, gso, protocol, pkt)
+}
+
 func TestLinkAddressRequest(t *testing.T) {
+	const nicID = 1
+
+	testAddr := tcpip.Address([]byte{1, 2, 3, 4})
+
 	tests := []struct {
 		name           string
+		nicAddr        tcpip.Address
+		localAddr      tcpip.Address
 		remoteLinkAddr tcpip.LinkAddress
-		expectLinkAddr tcpip.LinkAddress
+
+		expectedErr            *tcpip.Error
+		expectedLocalAddr      tcpip.Address
+		expectedRemoteLinkAddr tcpip.LinkAddress
 	}{
 		{
-			name:           "Unicast",
+			name:                   "Unicast",
+			nicAddr:                stackAddr,
+			localAddr:              stackAddr,
+			remoteLinkAddr:         remoteLinkAddr,
+			expectedLocalAddr:      stackAddr,
+			expectedRemoteLinkAddr: remoteLinkAddr,
+		},
+		{
+			name:                   "Multicast",
+			nicAddr:                stackAddr,
+			localAddr:              stackAddr,
+			remoteLinkAddr:         "",
+			expectedLocalAddr:      stackAddr,
+			expectedRemoteLinkAddr: header.EthernetBroadcastAddress,
+		},
+		{
+			name:                   "Unicast with unspecified source",
+			nicAddr:                stackAddr,
+			remoteLinkAddr:         remoteLinkAddr,
+			expectedLocalAddr:      stackAddr,
+			expectedRemoteLinkAddr: remoteLinkAddr,
+		},
+		{
+			name:                   "Multicast with unspecified source",
+			nicAddr:                stackAddr,
+			remoteLinkAddr:         "",
+			expectedLocalAddr:      stackAddr,
+			expectedRemoteLinkAddr: header.EthernetBroadcastAddress,
+		},
+		{
+			name:           "Unicast with unassigned address",
+			localAddr:      testAddr,
+			remoteLinkAddr: remoteLinkAddr,
+			expectedErr:    tcpip.ErrBadLocalAddress,
+		},
+		{
+			name:           "Multicast with unassigned address",
+			localAddr:      testAddr,
+			remoteLinkAddr: "",
+			expectedErr:    tcpip.ErrBadLocalAddress,
+		},
+		{
+			name:           "Unicast with no local address available",
 			remoteLinkAddr: remoteLinkAddr,
-			expectLinkAddr: remoteLinkAddr,
+			expectedErr:    tcpip.ErrNetworkUnreachable,
 		},
 		{
-			name:           "Multicast",
+			name:           "Multicast with no local address available",
 			remoteLinkAddr: "",
-			expectLinkAddr: header.EthernetBroadcastAddress,
+			expectedErr:    tcpip.ErrNetworkUnreachable,
 		},
 	}
 
 	for _, test := range tests {
-		p := arp.NewProtocol(nil)
-		linkRes, ok := p.(stack.LinkAddressResolver)
-		if !ok {
-			t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver")
-		}
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol},
+			})
+			p := s.NetworkProtocolInstance(arp.ProtocolNumber)
+			linkRes, ok := p.(stack.LinkAddressResolver)
+			if !ok {
+				t.Fatal("expected ARP protocol to implement stack.LinkAddressResolver")
+			}
 
-		linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr)
-		if err := linkRes.LinkAddressRequest(stackAddr, remoteAddr, test.remoteLinkAddr, linkEP); err != nil {
-			t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", stackAddr, remoteAddr, test.remoteLinkAddr, err)
-		}
+			linkEP := channel.New(defaultChannelSize, defaultMTU, stackLinkAddr)
+			if err := s.CreateNIC(nicID, linkEP); err != nil {
+				t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
+			}
 
-		pkt, ok := linkEP.Read()
-		if !ok {
-			t.Fatal("expected to send a link address request")
-		}
+			if len(test.nicAddr) != 0 {
+				if err := s.AddAddress(nicID, ipv4.ProtocolNumber, test.nicAddr); err != nil {
+					t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, ipv4.ProtocolNumber, test.nicAddr, err)
+				}
+			}
 
-		if got, want := pkt.Route.RemoteLinkAddress, test.expectLinkAddr; got != want {
-			t.Errorf("got pkt.Route.RemoteLinkAddress = %s, want = %s", got, want)
-		}
+			// We pass a test network interface to LinkAddressRequest with the same
+			// NIC ID and link endpoint used by the NIC we created earlier so that we
+			// can mock a link address request and observe the packets sent to the
+			// link endpoint even though the stack uses the real NIC to validate the
+			// local address.
+			if err := linkRes.LinkAddressRequest(remoteAddr, test.localAddr, test.remoteLinkAddr, &testInterface{LinkEndpoint: linkEP, nicID: nicID}); err != test.expectedErr {
+				t.Fatalf("got p.LinkAddressRequest(%s, %s, %s, _) = %s, want = %s", remoteAddr, test.localAddr, test.remoteLinkAddr, err, test.expectedErr)
+			}
+
+			if test.expectedErr != nil {
+				return
+			}
+
+			pkt, ok := linkEP.Read()
+			if !ok {
+				t.Fatal("expected to send a link address request")
+			}
+
+			if got := pkt.Route.RemoteLinkAddress(); got != test.expectedRemoteLinkAddr {
+				t.Errorf("got pkt.Route.RemoteLinkAddress() = %s, want = %s", got, test.expectedRemoteLinkAddr)
+			}
+
+			rep := header.ARP(stack.PayloadSince(pkt.Pkt.NetworkHeader()))
+			if got := tcpip.LinkAddress(rep.HardwareAddressSender()); got != stackLinkAddr {
+				t.Errorf("got HardwareAddressSender = %s, want = %s", got, stackLinkAddr)
+			}
+			if got := tcpip.Address(rep.ProtocolAddressSender()); got != test.expectedLocalAddr {
+				t.Errorf("got ProtocolAddressSender = %s, want = %s", got, test.expectedLocalAddr)
+			}
+			if got, want := tcpip.LinkAddress(rep.HardwareAddressTarget()), tcpip.LinkAddress("\x00\x00\x00\x00\x00\x00"); got != want {
+				t.Errorf("got HardwareAddressTarget = %s, want = %s", got, want)
+			}
+			if got := tcpip.Address(rep.ProtocolAddressTarget()); got != remoteAddr {
+				t.Errorf("got ProtocolAddressTarget = %s, want = %s", got, remoteAddr)
+			}
+		})
 	}
 }
diff --git a/pkg/tcpip/network/fragmentation/BUILD b/pkg/tcpip/network/fragmentation/BUILD
index 47fb63290..429af69ee 100644
--- a/pkg/tcpip/network/fragmentation/BUILD
+++ b/pkg/tcpip/network/fragmentation/BUILD
@@ -18,7 +18,6 @@ go_template_instance(
 go_library(
     name = "fragmentation",
     srcs = [
-        "frag_heap.go",
         "fragmentation.go",
         "reassembler.go",
         "reassembler_list.go",
@@ -38,7 +37,6 @@ go_test(
     name = "fragmentation_test",
     size = "small",
     srcs = [
-        "frag_heap_test.go",
         "fragmentation_test.go",
         "reassembler_test.go",
     ],
@@ -47,6 +45,7 @@ go_test(
         "//pkg/tcpip/buffer",
         "//pkg/tcpip/faketime",
         "//pkg/tcpip/network/testutil",
+        "//pkg/tcpip/stack",
         "@com_github_google_go_cmp//cmp:go_default_library",
     ],
 )
diff --git a/pkg/tcpip/network/fragmentation/frag_heap.go b/pkg/tcpip/network/fragmentation/frag_heap.go
deleted file mode 100644
index 0b570d25a..000000000
--- a/pkg/tcpip/network/fragmentation/frag_heap.go
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package fragmentation
-
-import (
-	"container/heap"
-	"fmt"
-
-	"gvisor.dev/gvisor/pkg/tcpip/buffer"
-)
-
-type fragment struct {
-	offset uint16
-	vv     buffer.VectorisedView
-}
-
-type fragHeap []fragment
-
-func (h *fragHeap) Len() int {
-	return len(*h)
-}
-
-func (h *fragHeap) Less(i, j int) bool {
-	return (*h)[i].offset < (*h)[j].offset
-}
-
-func (h *fragHeap) Swap(i, j int) {
-	(*h)[i], (*h)[j] = (*h)[j], (*h)[i]
-}
-
-func (h *fragHeap) Push(x interface{}) {
-	*h = append(*h, x.(fragment))
-}
-
-func (h *fragHeap) Pop() interface{} {
-	old := *h
-	n := len(old)
-	x := old[n-1]
-	*h = old[:n-1]
-	return x
-}
-
-// reassamble empties the heap and returns a VectorisedView
-// containing a reassambled version of the fragments inside the heap.
-func (h *fragHeap) reassemble() (buffer.VectorisedView, error) {
-	curr := heap.Pop(h).(fragment)
-	views := curr.vv.Views()
-	size := curr.vv.Size()
-
-	if curr.offset != 0 {
-		return buffer.VectorisedView{}, fmt.Errorf("offset of the first packet is != 0 (%d)", curr.offset)
-	}
-
-	for h.Len() > 0 {
-		curr := heap.Pop(h).(fragment)
-		if int(curr.offset) < size {
-			curr.vv.TrimFront(size - int(curr.offset))
-		} else if int(curr.offset) > size {
-			return buffer.VectorisedView{}, fmt.Errorf("packet has a hole, expected offset %d, got %d", size, curr.offset)
-		}
-		size += curr.vv.Size()
-		views = append(views, curr.vv.Views()...)
-	}
-	return buffer.NewVectorisedView(size, views), nil
-}
diff --git a/pkg/tcpip/network/fragmentation/frag_heap_test.go b/pkg/tcpip/network/fragmentation/frag_heap_test.go
deleted file mode 100644
index 9ececcb9f..000000000
--- a/pkg/tcpip/network/fragmentation/frag_heap_test.go
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package fragmentation
-
-import (
-	"container/heap"
-	"reflect"
-	"testing"
-
-	"gvisor.dev/gvisor/pkg/tcpip/buffer"
-)
-
-var reassambleTestCases = []struct {
-	comment string
-	in      []fragment
-	want    buffer.VectorisedView
-}{
-	{
-		comment: "Non-overlapping in-order",
-		in: []fragment{
-			{offset: 0, vv: vv(1, "0")},
-			{offset: 1, vv: vv(1, "1")},
-		},
-		want: vv(2, "0", "1"),
-	},
-	{
-		comment: "Non-overlapping out-of-order",
-		in: []fragment{
-			{offset: 1, vv: vv(1, "1")},
-			{offset: 0, vv: vv(1, "0")},
-		},
-		want: vv(2, "0", "1"),
-	},
-	{
-		comment: "Duplicated packets",
-		in: []fragment{
-			{offset: 0, vv: vv(1, "0")},
-			{offset: 0, vv: vv(1, "0")},
-		},
-		want: vv(1, "0"),
-	},
-	{
-		comment: "Overlapping in-order",
-		in: []fragment{
-			{offset: 0, vv: vv(2, "01")},
-			{offset: 1, vv: vv(2, "12")},
-		},
-		want: vv(3, "01", "2"),
-	},
-	{
-		comment: "Overlapping out-of-order",
-		in: []fragment{
-			{offset: 1, vv: vv(2, "12")},
-			{offset: 0, vv: vv(2, "01")},
-		},
-		want: vv(3, "01", "2"),
-	},
-	{
-		comment: "Overlapping subset in-order",
-		in: []fragment{
-			{offset: 0, vv: vv(3, "012")},
-			{offset: 1, vv: vv(1, "1")},
-		},
-		want: vv(3, "012"),
-	},
-	{
-		comment: "Overlapping subset out-of-order",
-		in: []fragment{
-			{offset: 1, vv: vv(1, "1")},
-			{offset: 0, vv: vv(3, "012")},
-		},
-		want: vv(3, "012"),
-	},
-}
-
-func TestReassamble(t *testing.T) {
-	for _, c := range reassambleTestCases {
-		t.Run(c.comment, func(t *testing.T) {
-			h := make(fragHeap, 0, 8)
-			heap.Init(&h)
-			for _, f := range c.in {
-				heap.Push(&h, f)
-			}
-			got, err := h.reassemble()
-			if err != nil {
-				t.Fatal(err)
-			}
-			if !reflect.DeepEqual(got, c.want) {
-				t.Errorf("got reassemble(%+v) = %v, want = %v", c.in, got, c.want)
-			}
-		})
-	}
-}
-
-func TestReassambleFailsForNonZeroOffset(t *testing.T) {
-	h := make(fragHeap, 0, 8)
-	heap.Init(&h)
-	heap.Push(&h, fragment{offset: 1, vv: vv(1, "0")})
-	_, err := h.reassemble()
-	if err == nil {
-		t.Errorf("reassemble() did not fail when the first packet had offset != 0")
-	}
-}
-
-func TestReassambleFailsForHoles(t *testing.T) {
-	h := make(fragHeap, 0, 8)
-	heap.Init(&h)
-	heap.Push(&h, fragment{offset: 0, vv: vv(1, "0")})
-	heap.Push(&h, fragment{offset: 2, vv: vv(1, "1")})
-	_, err := h.reassemble()
-	if err == nil {
-		t.Errorf("reassemble() did not fail when there was a hole in the packet")
-	}
-}
diff --git a/pkg/tcpip/network/fragmentation/fragmentation.go b/pkg/tcpip/network/fragmentation/fragmentation.go
index ed502a473..1af87d713 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation.go
@@ -46,9 +46,17 @@ const (
 )
 
 var (
-	// ErrInvalidArgs indicates to the caller that that an invalid argument was
+	// ErrInvalidArgs indicates to the caller that an invalid argument was
 	// provided.
 	ErrInvalidArgs = errors.New("invalid args")
+
+	// ErrFragmentOverlap indicates that, during reassembly, a fragment overlaps
+	// with another one.
+	ErrFragmentOverlap = errors.New("overlapping fragments")
+
+	// ErrFragmentConflict indicates that, during reassembly, some fragments are
+	// in conflict with one another.
+	ErrFragmentConflict = errors.New("conflicting fragments")
 )
 
 // FragmentID is the identifier for a fragment.
@@ -71,16 +79,25 @@ type FragmentID struct {
 // Fragmentation is the main structure that other modules
 // of the stack should use to implement IP Fragmentation.
 type Fragmentation struct {
-	mu           sync.Mutex
-	highLimit    int
-	lowLimit     int
-	reassemblers map[FragmentID]*reassembler
-	rList        reassemblerList
-	size         int
-	timeout      time.Duration
-	blockSize    uint16
-	clock        tcpip.Clock
-	releaseJob   *tcpip.Job
+	mu             sync.Mutex
+	highLimit      int
+	lowLimit       int
+	reassemblers   map[FragmentID]*reassembler
+	rList          reassemblerList
+	size           int
+	timeout        time.Duration
+	blockSize      uint16
+	clock          tcpip.Clock
+	releaseJob     *tcpip.Job
+	timeoutHandler TimeoutHandler
+}
+
+// TimeoutHandler is consulted if a packet reassembly has timed out.
+type TimeoutHandler interface {
+	// OnReassemblyTimeout will be called with the first fragment (or nil, if the
+	// first fragment has not been received) of a packet whose reassembly has
+	// timed out.
+	OnReassemblyTimeout(pkt *stack.PacketBuffer)
 }
 
 // NewFragmentation creates a new Fragmentation.
@@ -97,7 +114,7 @@ type Fragmentation struct {
 // reassemblingTimeout specifies the maximum time allowed to reassemble a packet.
 // Fragments are lazily evicted only when a new a packet with an
 // already existing fragmentation-id arrives after the timeout.
-func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration, clock tcpip.Clock) *Fragmentation {
+func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, reassemblingTimeout time.Duration, clock tcpip.Clock, timeoutHandler TimeoutHandler) *Fragmentation {
 	if lowMemoryLimit >= highMemoryLimit {
 		lowMemoryLimit = highMemoryLimit
 	}
@@ -111,12 +128,13 @@ func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, rea
 	}
 
 	f := &Fragmentation{
-		reassemblers: make(map[FragmentID]*reassembler),
-		highLimit:    highMemoryLimit,
-		lowLimit:     lowMemoryLimit,
-		timeout:      reassemblingTimeout,
-		blockSize:    blockSize,
-		clock:        clock,
+		reassemblers:   make(map[FragmentID]*reassembler),
+		highLimit:      highMemoryLimit,
+		lowLimit:       lowMemoryLimit,
+		timeout:        reassemblingTimeout,
+		blockSize:      blockSize,
+		clock:          clock,
+		timeoutHandler: timeoutHandler,
 	}
 	f.releaseJob = tcpip.NewJob(f.clock, &f.mu, f.releaseReassemblersLocked)
 
@@ -137,7 +155,7 @@ func NewFragmentation(blockSize uint16, highMemoryLimit, lowMemoryLimit int, rea
 // to be given here outside of the FragmentID struct because IPv6 should not use
 // the protocol to identify a fragment.
 func (f *Fragmentation) Process(
-	id FragmentID, first, last uint16, more bool, proto uint8, vv buffer.VectorisedView) (
+	id FragmentID, first, last uint16, more bool, proto uint8, pkt *stack.PacketBuffer) (
 	buffer.VectorisedView, uint8, bool, error) {
 	if first > last {
 		return buffer.VectorisedView{}, 0, false, fmt.Errorf("first=%d is greater than last=%d: %w", first, last, ErrInvalidArgs)
@@ -152,10 +170,9 @@ func (f *Fragmentation) Process(
 		return buffer.VectorisedView{}, 0, false, fmt.Errorf("fragment size=%d bytes is not a multiple of block size=%d on non-final fragment: %w", fragmentSize, f.blockSize, ErrInvalidArgs)
 	}
 
-	if l := vv.Size(); l < int(fragmentSize) {
-		return buffer.VectorisedView{}, 0, false, fmt.Errorf("got fragment size=%d bytes less than the expected fragment size=%d bytes (first=%d last=%d): %w", l, fragmentSize, first, last, ErrInvalidArgs)
+	if l := pkt.Data.Size(); l != int(fragmentSize) {
+		return buffer.VectorisedView{}, 0, false, fmt.Errorf("got fragment size=%d bytes not equal to the expected fragment size=%d bytes (first=%d last=%d): %w", l, fragmentSize, first, last, ErrInvalidArgs)
 	}
-	vv.CapLength(int(fragmentSize))
 
 	f.mu.Lock()
 	r, ok := f.reassemblers[id]
@@ -173,19 +190,19 @@ func (f *Fragmentation) Process(
 	}
 	f.mu.Unlock()
 
-	res, firstFragmentProto, done, consumed, err := r.process(first, last, more, proto, vv)
+	res, firstFragmentProto, done, consumed, err := r.process(first, last, more, proto, pkt)
 	if err != nil {
 		// We probably got an invalid sequence of fragments. Just
 		// discard the reassembler and move on.
 		f.mu.Lock()
-		f.release(r)
+		f.release(r, false /* timedOut */)
 		f.mu.Unlock()
 		return buffer.VectorisedView{}, 0, false, fmt.Errorf("fragmentation processing error: %w", err)
 	}
 	f.mu.Lock()
 	f.size += consumed
 	if done {
-		f.release(r)
+		f.release(r, false /* timedOut */)
 	}
 	// Evict reassemblers if we are consuming more memory than highLimit until
 	// we reach lowLimit.
@@ -195,14 +212,14 @@ func (f *Fragmentation) Process(
 			if tail == nil {
 				break
 			}
-			f.release(tail)
+			f.release(tail, false /* timedOut */)
 		}
 	}
 	f.mu.Unlock()
 	return res, firstFragmentProto, done, nil
 }
 
-func (f *Fragmentation) release(r *reassembler) {
+func (f *Fragmentation) release(r *reassembler, timedOut bool) {
 	// Before releasing a fragment we need to check if r is already marked as done.
 	// Otherwise, we would delete it twice.
 	if r.checkDoneOrMark() {
@@ -216,6 +233,10 @@ func (f *Fragmentation) release(r *reassembler) {
 		log.Printf("memory counter < 0 (%d), this is an accounting bug that requires investigation", f.size)
 		f.size = 0
 	}
+
+	if h := f.timeoutHandler; timedOut && h != nil {
+		h.OnReassemblyTimeout(r.pkt)
+	}
 }
 
 // releaseReassemblersLocked releases already-expired reassemblers, then
@@ -238,31 +259,31 @@ func (f *Fragmentation) releaseReassemblersLocked() {
 			break
 		}
 		// If the oldest reassembler has already expired, release it.
-		f.release(r)
+		f.release(r, true /* timedOut*/)
 	}
 }
 
 // PacketFragmenter is the book-keeping struct for packet fragmentation.
 type PacketFragmenter struct {
-	transportHeader buffer.View
-	data            buffer.VectorisedView
-	reserve         int
-	innerMTU        int
-	fragmentCount   int
-	currentFragment int
-	fragmentOffset  int
+	transportHeader    buffer.View
+	data               buffer.VectorisedView
+	reserve            int
+	fragmentPayloadLen int
+	fragmentCount      int
+	currentFragment    int
+	fragmentOffset     int
 }
 
 // MakePacketFragmenter prepares the struct needed for packet fragmentation.
 //
 // pkt is the packet to be fragmented.
 //
-// innerMTU is the maximum number of bytes of fragmentable data a fragment can
+// fragmentPayloadLen is the maximum number of bytes of fragmentable data a fragment can
 // have.
 //
 // reserve is the number of bytes that should be reserved for the headers in
 // each generated fragment.
-func MakePacketFragmenter(pkt *stack.PacketBuffer, innerMTU int, reserve int) PacketFragmenter {
+func MakePacketFragmenter(pkt *stack.PacketBuffer, fragmentPayloadLen uint32, reserve int) PacketFragmenter {
 	// As per RFC 8200 Section 4.5, some IPv6 extension headers should not be
 	// repeated in each fragment. However we do not currently support any header
 	// of that kind yet, so the following computation is valid for both IPv4 and
@@ -273,13 +294,13 @@ func MakePacketFragmenter(pkt *stack.PacketBuffer, innerMTU int, reserve int) Pa
 	var fragmentableData buffer.VectorisedView
 	fragmentableData.AppendView(pkt.TransportHeader().View())
 	fragmentableData.Append(pkt.Data)
-	fragmentCount := (fragmentableData.Size() + innerMTU - 1) / innerMTU
+	fragmentCount := (uint32(fragmentableData.Size()) + fragmentPayloadLen - 1) / fragmentPayloadLen
 
 	return PacketFragmenter{
-		data:          fragmentableData,
-		reserve:       reserve,
-		innerMTU:      innerMTU,
-		fragmentCount: fragmentCount,
+		data:               fragmentableData,
+		reserve:            reserve,
+		fragmentPayloadLen: int(fragmentPayloadLen),
+		fragmentCount:      int(fragmentCount),
 	}
 }
 
@@ -302,7 +323,7 @@ func (pf *PacketFragmenter) BuildNextFragment() (*stack.PacketBuffer, int, int,
 	})
 
 	// Copy data for the fragment.
-	copied := pf.data.ReadToVV(&fragPkt.Data, pf.innerMTU)
+	copied := pf.data.ReadToVV(&fragPkt.Data, pf.fragmentPayloadLen)
 
 	offset := pf.fragmentOffset
 	pf.fragmentOffset += copied
diff --git a/pkg/tcpip/network/fragmentation/fragmentation_test.go b/pkg/tcpip/network/fragmentation/fragmentation_test.go
index d3c7d7f92..3a79688a8 100644
--- a/pkg/tcpip/network/fragmentation/fragmentation_test.go
+++ b/pkg/tcpip/network/fragmentation/fragmentation_test.go
@@ -24,6 +24,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/faketime"
 	"gvisor.dev/gvisor/pkg/tcpip/network/testutil"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
 
 // reassembleTimeout is dummy timeout used for testing, where the clock never
@@ -40,13 +41,19 @@ func vv(size int, pieces ...string) buffer.VectorisedView {
 	return buffer.NewVectorisedView(size, views)
 }
 
+func pkt(size int, pieces ...string) *stack.PacketBuffer {
+	return stack.NewPacketBuffer(stack.PacketBufferOptions{
+		Data: vv(size, pieces...),
+	})
+}
+
 type processInput struct {
 	id    FragmentID
 	first uint16
 	last  uint16
 	more  bool
 	proto uint8
-	vv    buffer.VectorisedView
+	pkt   *stack.PacketBuffer
 }
 
 type processOutput struct {
@@ -63,8 +70,8 @@ var processTestCases = []struct {
 	{
 		comment: "One ID",
 		in: []processInput{
-			{id: FragmentID{ID: 0}, first: 0, last: 1, more: true, vv: vv(2, "01")},
-			{id: FragmentID{ID: 0}, first: 2, last: 3, more: false, vv: vv(2, "23")},
+			{id: FragmentID{ID: 0}, first: 0, last: 1, more: true, pkt: pkt(2, "01")},
+			{id: FragmentID{ID: 0}, first: 2, last: 3, more: false, pkt: pkt(2, "23")},
 		},
 		out: []processOutput{
 			{vv: buffer.VectorisedView{}, done: false},
@@ -74,8 +81,8 @@ var processTestCases = []struct {
 	{
 		comment: "Next Header protocol mismatch",
 		in: []processInput{
-			{id: FragmentID{ID: 0}, first: 0, last: 1, more: true, proto: 6, vv: vv(2, "01")},
-			{id: FragmentID{ID: 0}, first: 2, last: 3, more: false, proto: 17, vv: vv(2, "23")},
+			{id: FragmentID{ID: 0}, first: 0, last: 1, more: true, proto: 6, pkt: pkt(2, "01")},
+			{id: FragmentID{ID: 0}, first: 2, last: 3, more: false, proto: 17, pkt: pkt(2, "23")},
 		},
 		out: []processOutput{
 			{vv: buffer.VectorisedView{}, done: false},
@@ -85,10 +92,10 @@ var processTestCases = []struct {
 	{
 		comment: "Two IDs",
 		in: []processInput{
-			{id: FragmentID{ID: 0}, first: 0, last: 1, more: true, vv: vv(2, "01")},
-			{id: FragmentID{ID: 1}, first: 0, last: 1, more: true, vv: vv(2, "ab")},
-			{id: FragmentID{ID: 1}, first: 2, last: 3, more: false, vv: vv(2, "cd")},
-			{id: FragmentID{ID: 0}, first: 2, last: 3, more: false, vv: vv(2, "23")},
+			{id: FragmentID{ID: 0}, first: 0, last: 1, more: true, pkt: pkt(2, "01")},
+			{id: FragmentID{ID: 1}, first: 0, last: 1, more: true, pkt: pkt(2, "ab")},
+			{id: FragmentID{ID: 1}, first: 2, last: 3, more: false, pkt: pkt(2, "cd")},
+			{id: FragmentID{ID: 0}, first: 2, last: 3, more: false, pkt: pkt(2, "23")},
 		},
 		out: []processOutput{
 			{vv: buffer.VectorisedView{}, done: false},
@@ -102,17 +109,17 @@ var processTestCases = []struct {
 func TestFragmentationProcess(t *testing.T) {
 	for _, c := range processTestCases {
 		t.Run(c.comment, func(t *testing.T) {
-			f := NewFragmentation(minBlockSize, 1024, 512, reassembleTimeout, &faketime.NullClock{})
+			f := NewFragmentation(minBlockSize, 1024, 512, reassembleTimeout, &faketime.NullClock{}, nil)
 			firstFragmentProto := c.in[0].proto
 			for i, in := range c.in {
-				vv, proto, done, err := f.Process(in.id, in.first, in.last, in.more, in.proto, in.vv)
+				vv, proto, done, err := f.Process(in.id, in.first, in.last, in.more, in.proto, in.pkt)
 				if err != nil {
-					t.Fatalf("f.Process(%+v, %d, %d, %t, %d, %X) failed: %s",
-						in.id, in.first, in.last, in.more, in.proto, in.vv.ToView(), err)
+					t.Fatalf("f.Process(%+v, %d, %d, %t, %d, %#v) failed: %s",
+						in.id, in.first, in.last, in.more, in.proto, in.pkt, err)
 				}
 				if !reflect.DeepEqual(vv, c.out[i].vv) {
-					t.Errorf("got Process(%+v, %d, %d, %t, %d, %X) = (%X, _, _, _), want = (%X, _, _, _)",
-						in.id, in.first, in.last, in.more, in.proto, in.vv.ToView(), vv.ToView(), c.out[i].vv.ToView())
+					t.Errorf("got Process(%+v, %d, %d, %t, %d, %#v) = (%X, _, _, _), want = (%X, _, _, _)",
+						in.id, in.first, in.last, in.more, in.proto, in.pkt, vv.ToView(), c.out[i].vv.ToView())
 				}
 				if done != c.out[i].done {
 					t.Errorf("got Process(%+v, %d, %d, %t, %d, _) = (_, _, %t, _), want = (_, _, %t, _)",
@@ -236,11 +243,11 @@ func TestReassemblingTimeout(t *testing.T) {
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
 			clock := faketime.NewManualClock()
-			f := NewFragmentation(minBlockSize, HighFragThreshold, LowFragThreshold, reassemblyTimeout, clock)
+			f := NewFragmentation(minBlockSize, HighFragThreshold, LowFragThreshold, reassemblyTimeout, clock, nil)
 			for _, event := range test.events {
 				clock.Advance(event.clockAdvance)
 				if frag := event.fragment; frag != nil {
-					_, _, done, err := f.Process(FragmentID{}, frag.first, frag.last, frag.more, protocol, vv(len(frag.data), frag.data))
+					_, _, done, err := f.Process(FragmentID{}, frag.first, frag.last, frag.more, protocol, pkt(len(frag.data), frag.data))
 					if err != nil {
 						t.Fatalf("%s: f.Process failed: %s", event.name, err)
 					}
@@ -257,17 +264,17 @@ func TestReassemblingTimeout(t *testing.T) {
 }
 
 func TestMemoryLimits(t *testing.T) {
-	f := NewFragmentation(minBlockSize, 3, 1, reassembleTimeout, &faketime.NullClock{})
+	f := NewFragmentation(minBlockSize, 3, 1, reassembleTimeout, &faketime.NullClock{}, nil)
 	// Send first fragment with id = 0.
-	f.Process(FragmentID{ID: 0}, 0, 0, true, 0xFF, vv(1, "0"))
+	f.Process(FragmentID{ID: 0}, 0, 0, true, 0xFF, pkt(1, "0"))
 	// Send first fragment with id = 1.
-	f.Process(FragmentID{ID: 1}, 0, 0, true, 0xFF, vv(1, "1"))
+	f.Process(FragmentID{ID: 1}, 0, 0, true, 0xFF, pkt(1, "1"))
 	// Send first fragment with id = 2.
-	f.Process(FragmentID{ID: 2}, 0, 0, true, 0xFF, vv(1, "2"))
+	f.Process(FragmentID{ID: 2}, 0, 0, true, 0xFF, pkt(1, "2"))
 
 	// Send first fragment with id = 3. This should caused id = 0 and id = 1 to be
 	// evicted.
-	f.Process(FragmentID{ID: 3}, 0, 0, true, 0xFF, vv(1, "3"))
+	f.Process(FragmentID{ID: 3}, 0, 0, true, 0xFF, pkt(1, "3"))
 
 	if _, ok := f.reassemblers[FragmentID{ID: 0}]; ok {
 		t.Errorf("Memory limits are not respected: id=0 has not been evicted.")
@@ -281,11 +288,11 @@ func TestMemoryLimits(t *testing.T) {
 }
 
 func TestMemoryLimitsIgnoresDuplicates(t *testing.T) {
-	f := NewFragmentation(minBlockSize, 1, 0, reassembleTimeout, &faketime.NullClock{})
+	f := NewFragmentation(minBlockSize, 1, 0, reassembleTimeout, &faketime.NullClock{}, nil)
 	// Send first fragment with id = 0.
-	f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"))
+	f.Process(FragmentID{}, 0, 0, true, 0xFF, pkt(1, "0"))
 	// Send the same packet again.
-	f.Process(FragmentID{}, 0, 0, true, 0xFF, vv(1, "0"))
+	f.Process(FragmentID{}, 0, 0, true, 0xFF, pkt(1, "0"))
 
 	got := f.size
 	want := 1
@@ -327,6 +334,7 @@ func TestErrors(t *testing.T) {
 			last:      3,
 			more:      true,
 			data:      "012",
+			err:       ErrInvalidArgs,
 		},
 		{
 			name:      "exact block size with more and too little data",
@@ -376,8 +384,8 @@ func TestErrors(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			f := NewFragmentation(test.blockSize, HighFragThreshold, LowFragThreshold, reassembleTimeout, &faketime.NullClock{})
-			_, _, done, err := f.Process(FragmentID{}, test.first, test.last, test.more, 0, vv(len(test.data), test.data))
+			f := NewFragmentation(test.blockSize, HighFragThreshold, LowFragThreshold, reassembleTimeout, &faketime.NullClock{}, nil)
+			_, _, done, err := f.Process(FragmentID{}, test.first, test.last, test.more, 0, pkt(len(test.data), test.data))
 			if !errors.Is(err, test.err) {
 				t.Errorf("got Process(_, %d, %d, %t, _, %q) = (_, _, _, %v), want = (_, _, _, %v)", test.first, test.last, test.more, test.data, err, test.err)
 			}
@@ -403,14 +411,14 @@ func TestPacketFragmenter(t *testing.T) {
 
 	tests := []struct {
 		name               string
-		innerMTU           int
+		fragmentPayloadLen uint32
 		transportHeaderLen int
 		payloadSize        int
 		wantFragments      []fragmentInfo
 	}{
 		{
 			name:               "Packet exactly fits in MTU",
-			innerMTU:           1280,
+			fragmentPayloadLen: 1280,
 			transportHeaderLen: 0,
 			payloadSize:        1280,
 			wantFragments: []fragmentInfo{
@@ -419,7 +427,7 @@ func TestPacketFragmenter(t *testing.T) {
 		},
 		{
 			name:               "Packet exactly does not fit in MTU",
-			innerMTU:           1000,
+			fragmentPayloadLen: 1000,
 			transportHeaderLen: 0,
 			payloadSize:        1001,
 			wantFragments: []fragmentInfo{
@@ -429,7 +437,7 @@ func TestPacketFragmenter(t *testing.T) {
 		},
 		{
 			name:               "Packet has a transport header",
-			innerMTU:           560,
+			fragmentPayloadLen: 560,
 			transportHeaderLen: 40,
 			payloadSize:        560,
 			wantFragments: []fragmentInfo{
@@ -439,7 +447,7 @@ func TestPacketFragmenter(t *testing.T) {
 		},
 		{
 			name:               "Packet has a huge transport header",
-			innerMTU:           500,
+			fragmentPayloadLen: 500,
 			transportHeaderLen: 1300,
 			payloadSize:        500,
 			wantFragments: []fragmentInfo{
@@ -458,7 +466,7 @@ func TestPacketFragmenter(t *testing.T) {
 			originalPayload.AppendView(pkt.TransportHeader().View())
 			originalPayload.Append(pkt.Data)
 			var reassembledPayload buffer.VectorisedView
-			pf := MakePacketFragmenter(pkt, test.innerMTU, reserve)
+			pf := MakePacketFragmenter(pkt, test.fragmentPayloadLen, reserve)
 			for i := 0; ; i++ {
 				fragPkt, offset, copied, more := pf.BuildNextFragment()
 				wantFragment := test.wantFragments[i]
@@ -474,8 +482,8 @@ func TestPacketFragmenter(t *testing.T) {
 				if more != wantFragment.more {
 					t.Errorf("(fragment #%d) got more = %t, want = %t", i, more, wantFragment.more)
 				}
-				if got := fragPkt.Size(); got > test.innerMTU {
-					t.Errorf("(fragment #%d) got fragPkt.Size() = %d, want <= %d", i, got, test.innerMTU)
+				if got := uint32(fragPkt.Size()); got > test.fragmentPayloadLen {
+					t.Errorf("(fragment #%d) got fragPkt.Size() = %d, want <= %d", i, got, test.fragmentPayloadLen)
 				}
 				if got := fragPkt.AvailableHeaderBytes(); got != reserve {
 					t.Errorf("(fragment #%d) got fragPkt.AvailableHeaderBytes() = %d, want = %d", i, got, reserve)
@@ -497,3 +505,126 @@ func TestPacketFragmenter(t *testing.T) {
 		})
 	}
 }
+
+type testTimeoutHandler struct {
+	pkt *stack.PacketBuffer
+}
+
+func (h *testTimeoutHandler) OnReassemblyTimeout(pkt *stack.PacketBuffer) {
+	h.pkt = pkt
+}
+
+func TestTimeoutHandler(t *testing.T) {
+	const (
+		proto = 99
+	)
+
+	pk1 := pkt(1, "1")
+	pk2 := pkt(1, "2")
+
+	type processParam struct {
+		first uint16
+		last  uint16
+		more  bool
+		pkt   *stack.PacketBuffer
+	}
+
+	tests := []struct {
+		name      string
+		params    []processParam
+		wantError bool
+		wantPkt   *stack.PacketBuffer
+	}{
+		{
+			name: "onTimeout runs",
+			params: []processParam{
+				{
+					first: 0,
+					last:  0,
+					more:  true,
+					pkt:   pk1,
+				},
+			},
+			wantError: false,
+			wantPkt:   pk1,
+		},
+		{
+			name: "no first fragment",
+			params: []processParam{
+				{
+					first: 1,
+					last:  1,
+					more:  true,
+					pkt:   pk1,
+				},
+			},
+			wantError: false,
+			wantPkt:   nil,
+		},
+		{
+			name: "second pkt is ignored",
+			params: []processParam{
+				{
+					first: 0,
+					last:  0,
+					more:  true,
+					pkt:   pk1,
+				},
+				{
+					first: 0,
+					last:  0,
+					more:  true,
+					pkt:   pk2,
+				},
+			},
+			wantError: false,
+			wantPkt:   pk1,
+		},
+		{
+			name: "invalid args - first is greater than last",
+			params: []processParam{
+				{
+					first: 1,
+					last:  0,
+					more:  true,
+					pkt:   pk1,
+				},
+			},
+			wantError: true,
+			wantPkt:   nil,
+		},
+	}
+
+	id := FragmentID{ID: 0}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			handler := &testTimeoutHandler{pkt: nil}
+
+			f := NewFragmentation(minBlockSize, HighFragThreshold, LowFragThreshold, reassembleTimeout, &faketime.NullClock{}, handler)
+
+			for _, p := range test.params {
+				if _, _, _, err := f.Process(id, p.first, p.last, p.more, proto, p.pkt); err != nil && !test.wantError {
+					t.Errorf("f.Process error = %s", err)
+				}
+			}
+			if !test.wantError {
+				r, ok := f.reassemblers[id]
+				if !ok {
+					t.Fatal("Reassembler not found")
+				}
+				f.release(r, true)
+			}
+			switch {
+			case handler.pkt != nil && test.wantPkt == nil:
+				t.Errorf("got handler.pkt = not nil (pkt.Data = %x), want = nil", handler.pkt.Data.ToView())
+			case handler.pkt == nil && test.wantPkt != nil:
+				t.Errorf("got handler.pkt = nil, want = not nil (pkt.Data = %x)", test.wantPkt.Data.ToView())
+			case handler.pkt != nil && test.wantPkt != nil:
+				if diff := cmp.Diff(test.wantPkt.Data.ToView(), handler.pkt.Data.ToView()); diff != "" {
+					t.Errorf("pkt.Data mismatch (-want, +got):\n%s", diff)
+				}
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/network/fragmentation/reassembler.go b/pkg/tcpip/network/fragmentation/reassembler.go
index 9bb051a30..9b20bb1d8 100644
--- a/pkg/tcpip/network/fragmentation/reassembler.go
+++ b/pkg/tcpip/network/fragmentation/reassembler.go
@@ -15,19 +15,21 @@
 package fragmentation
 
 import (
-	"container/heap"
-	"fmt"
 	"math"
+	"sort"
 
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
 
 type hole struct {
-	first   uint16
-	last    uint16
-	deleted bool
+	first  uint16
+	last   uint16
+	filled bool
+	final  bool
+	data   buffer.View
 }
 
 type reassembler struct {
@@ -37,83 +39,139 @@ type reassembler struct {
 	proto        uint8
 	mu           sync.Mutex
 	holes        []hole
-	deleted      int
-	heap         fragHeap
+	filled       int
 	done         bool
 	creationTime int64
+	pkt          *stack.PacketBuffer
 }
 
 func newReassembler(id FragmentID, clock tcpip.Clock) *reassembler {
 	r := &reassembler{
 		id:           id,
-		holes:        make([]hole, 0, 16),
-		heap:         make(fragHeap, 0, 8),
 		creationTime: clock.NowMonotonic(),
 	}
 	r.holes = append(r.holes, hole{
-		first:   0,
-		last:    math.MaxUint16,
-		deleted: false})
+		first:  0,
+		last:   math.MaxUint16,
+		filled: false,
+		final:  true,
+	})
 	return r
 }
 
-// updateHoles updates the list of holes for an incoming fragment and
-// returns true iff the fragment filled at least part of an existing hole.
-func (r *reassembler) updateHoles(first, last uint16, more bool) bool {
-	used := false
-	for i := range r.holes {
-		if r.holes[i].deleted || first > r.holes[i].last || last < r.holes[i].first {
-			continue
-		}
-		used = true
-		r.deleted++
-		r.holes[i].deleted = true
-		if first > r.holes[i].first {
-			r.holes = append(r.holes, hole{r.holes[i].first, first - 1, false})
-		}
-		if last < r.holes[i].last && more {
-			r.holes = append(r.holes, hole{last + 1, r.holes[i].last, false})
-		}
-	}
-	return used
-}
-
-func (r *reassembler) process(first, last uint16, more bool, proto uint8, vv buffer.VectorisedView) (buffer.VectorisedView, uint8, bool, int, error) {
+func (r *reassembler) process(first, last uint16, more bool, proto uint8, pkt *stack.PacketBuffer) (buffer.VectorisedView, uint8, bool, int, error) {
 	r.mu.Lock()
 	defer r.mu.Unlock()
-	consumed := 0
 	if r.done {
 		// A concurrent goroutine might have already reassembled
 		// the packet and emptied the heap while this goroutine
 		// was waiting on the mutex. We don't have to do anything in this case.
-		return buffer.VectorisedView{}, 0, false, consumed, nil
-	}
-	// For IPv6, it is possible to have different Protocol values between
-	// fragments of a packet (because, unlike IPv4, the Protocol is not used to
-	// identify a fragment). In this case, only the Protocol of the first
-	// fragment must be used as per RFC 8200 Section 4.5.
-	//
-	// TODO(gvisor.dev/issue/3648): The entire first IP header should be recorded
-	// here (instead of just the protocol) because most IP options should be
-	// derived from the first fragment.
-	if first == 0 {
-		r.proto = proto
+		return buffer.VectorisedView{}, 0, false, 0, nil
 	}
-	if r.updateHoles(first, last, more) {
-		// We store the incoming packet only if it filled some holes.
-		heap.Push(&r.heap, fragment{offset: first, vv: vv.Clone(nil)})
-		consumed = vv.Size()
+
+	var holeFound bool
+	var consumed int
+	for i := range r.holes {
+		currentHole := &r.holes[i]
+
+		if last < currentHole.first || currentHole.last < first {
+			continue
+		}
+		// For IPv6, overlaps with an existing fragment are explicitly forbidden by
+		// RFC 8200 section 4.5:
+		//   If any of the fragments being reassembled overlap with any other
+		//   fragments being reassembled for the same packet, reassembly of that
+		//   packet must be abandoned and all the fragments that have been received
+		//   for that packet must be discarded, and no ICMP error messages should be
+		//   sent.
+		//
+		// It is not explicitly forbidden for IPv4, but to keep parity with Linux we
+		// disallow it as well:
+		// https://github.com/torvalds/linux/blob/38525c6/net/ipv4/inet_fragment.c#L349
+		if first < currentHole.first || currentHole.last < last {
+			// Incoming fragment only partially fits in the free hole.
+			return buffer.VectorisedView{}, 0, false, 0, ErrFragmentOverlap
+		}
+		if !more {
+			if !currentHole.final || currentHole.filled && currentHole.last != last {
+				// We have another final fragment, which does not perfectly overlap.
+				return buffer.VectorisedView{}, 0, false, 0, ErrFragmentConflict
+			}
+		}
+
+		holeFound = true
+		if currentHole.filled {
+			// Incoming fragment is a duplicate.
+			continue
+		}
+
+		// We are populating the current hole with the payload and creating a new
+		// hole for any unfilled ranges on either end.
+		if first > currentHole.first {
+			r.holes = append(r.holes, hole{
+				first:  currentHole.first,
+				last:   first - 1,
+				filled: false,
+				final:  false,
+			})
+		}
+		if last < currentHole.last && more {
+			r.holes = append(r.holes, hole{
+				first:  last + 1,
+				last:   currentHole.last,
+				filled: false,
+				final:  currentHole.final,
+			})
+			currentHole.final = false
+		}
+		v := pkt.Data.ToOwnedView()
+		consumed = v.Size()
 		r.size += consumed
+		// Update the current hole to precisely match the incoming fragment.
+		r.holes[i] = hole{
+			first:  first,
+			last:   last,
+			filled: true,
+			final:  currentHole.final,
+			data:   v,
+		}
+		r.filled++
+		// For IPv6, it is possible to have different Protocol values between
+		// fragments of a packet (because, unlike IPv4, the Protocol is not used to
+		// identify a fragment). In this case, only the Protocol of the first
+		// fragment must be used as per RFC 8200 Section 4.5.
+		//
+		// TODO(gvisor.dev/issue/3648): During reassembly of an IPv6 packet, IP
+		// options received in the first fragment should be used - and they should
+		// override options from following fragments.
+		if first == 0 {
+			r.pkt = pkt
+			r.proto = proto
+		}
+
+		break
+	}
+	if !holeFound {
+		// Incoming fragment is beyond end.
+		return buffer.VectorisedView{}, 0, false, 0, ErrFragmentConflict
 	}
-	// Check if all the holes have been deleted and we are ready to reassamble.
-	if r.deleted < len(r.holes) {
+
+	// Check if all the holes have been filled and we are ready to reassemble.
+	if r.filled < len(r.holes) {
 		return buffer.VectorisedView{}, 0, false, consumed, nil
 	}
-	res, err := r.heap.reassemble()
-	if err != nil {
-		return buffer.VectorisedView{}, 0, false, consumed, fmt.Errorf("fragment reassembly failed: %w", err)
+
+	sort.Slice(r.holes, func(i, j int) bool {
+		return r.holes[i].first < r.holes[j].first
+	})
+
+	var size int
+	views := make([]buffer.View, 0, len(r.holes))
+	for _, hole := range r.holes {
+		views = append(views, hole.data)
+		size += hole.data.Size()
 	}
-	return res, r.proto, true, consumed, nil
+	return buffer.NewVectorisedView(size, views), r.proto, true, consumed, nil
 }
 
 func (r *reassembler) checkDoneOrMark() bool {
diff --git a/pkg/tcpip/network/fragmentation/reassembler_test.go b/pkg/tcpip/network/fragmentation/reassembler_test.go
index a0a04a027..2ff03eeeb 100644
--- a/pkg/tcpip/network/fragmentation/reassembler_test.go
+++ b/pkg/tcpip/network/fragmentation/reassembler_test.go
@@ -16,92 +16,175 @@ package fragmentation
 
 import (
 	"math"
-	"reflect"
 	"testing"
 
+	"github.com/google/go-cmp/cmp"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/faketime"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
 
-type updateHolesInput struct {
-	first uint16
-	last  uint16
-	more  bool
+type processParams struct {
+	first     uint16
+	last      uint16
+	more      bool
+	pkt       *stack.PacketBuffer
+	wantDone  bool
+	wantError error
 }
 
-var holesTestCases = []struct {
-	comment string
-	in      []updateHolesInput
-	want    []hole
-}{
-	{
-		comment: "No fragments. Expected holes: {[0 -> inf]}.",
-		in:      []updateHolesInput{},
-		want:    []hole{{first: 0, last: math.MaxUint16, deleted: false}},
-	},
-	{
-		comment: "One fragment at beginning. Expected holes: {[2, inf]}.",
-		in:      []updateHolesInput{{first: 0, last: 1, more: true}},
-		want: []hole{
-			{first: 0, last: math.MaxUint16, deleted: true},
-			{first: 2, last: math.MaxUint16, deleted: false},
+func TestReassemblerProcess(t *testing.T) {
+	const proto = 99
+
+	v := func(size int) buffer.View {
+		payload := buffer.NewView(size)
+		for i := 1; i < size; i++ {
+			payload[i] = uint8(i) * 3
+		}
+		return payload
+	}
+
+	pkt := func(size int) *stack.PacketBuffer {
+		return stack.NewPacketBuffer(stack.PacketBufferOptions{
+			Data: v(size).ToVectorisedView(),
+		})
+	}
+
+	var tests = []struct {
+		name   string
+		params []processParams
+		want   []hole
+	}{
+		{
+			name:   "No fragments",
+			params: nil,
+			want:   []hole{{first: 0, last: math.MaxUint16, filled: false, final: true}},
 		},
-	},
-	{
-		comment: "One fragment in the middle. Expected holes: {[0, 0], [3, inf]}.",
-		in:      []updateHolesInput{{first: 1, last: 2, more: true}},
-		want: []hole{
-			{first: 0, last: math.MaxUint16, deleted: true},
-			{first: 0, last: 0, deleted: false},
-			{first: 3, last: math.MaxUint16, deleted: false},
+		{
+			name:   "One fragment at beginning",
+			params: []processParams{{first: 0, last: 1, more: true, pkt: pkt(2), wantDone: false, wantError: nil}},
+			want: []hole{
+				{first: 0, last: 1, filled: true, final: false, data: v(2)},
+				{first: 2, last: math.MaxUint16, filled: false, final: true},
+			},
 		},
-	},
-	{
-		comment: "One fragment at the end. Expected holes: {[0, 0]}.",
-		in:      []updateHolesInput{{first: 1, last: 2, more: false}},
-		want: []hole{
-			{first: 0, last: math.MaxUint16, deleted: true},
-			{first: 0, last: 0, deleted: false},
+		{
+			name:   "One fragment in the middle",
+			params: []processParams{{first: 1, last: 2, more: true, pkt: pkt(2), wantDone: false, wantError: nil}},
+			want: []hole{
+				{first: 1, last: 2, filled: true, final: false, data: v(2)},
+				{first: 0, last: 0, filled: false, final: false},
+				{first: 3, last: math.MaxUint16, filled: false, final: true},
+			},
 		},
-	},
-	{
-		comment: "One fragment completing a packet. Expected holes: {}.",
-		in:      []updateHolesInput{{first: 0, last: 1, more: false}},
-		want: []hole{
-			{first: 0, last: math.MaxUint16, deleted: true},
+		{
+			name:   "One fragment at the end",
+			params: []processParams{{first: 1, last: 2, more: false, pkt: pkt(2), wantDone: false, wantError: nil}},
+			want: []hole{
+				{first: 1, last: 2, filled: true, final: true, data: v(2)},
+				{first: 0, last: 0, filled: false},
+			},
 		},
-	},
-	{
-		comment: "Two non-overlapping fragments completing a packet. Expected holes: {}.",
-		in: []updateHolesInput{
-			{first: 0, last: 1, more: true},
-			{first: 2, last: 3, more: false},
+		{
+			name:   "One fragment completing a packet",
+			params: []processParams{{first: 0, last: 1, more: false, pkt: pkt(2), wantDone: true, wantError: nil}},
+			want: []hole{
+				{first: 0, last: 1, filled: true, final: true, data: v(2)},
+			},
 		},
-		want: []hole{
-			{first: 0, last: math.MaxUint16, deleted: true},
-			{first: 2, last: math.MaxUint16, deleted: true},
+		{
+			name: "Two fragments completing a packet",
+			params: []processParams{
+				{first: 0, last: 1, more: true, pkt: pkt(2), wantDone: false, wantError: nil},
+				{first: 2, last: 3, more: false, pkt: pkt(2), wantDone: true, wantError: nil},
+			},
+			want: []hole{
+				{first: 0, last: 1, filled: true, final: false, data: v(2)},
+				{first: 2, last: 3, filled: true, final: true, data: v(2)},
+			},
 		},
-	},
-	{
-		comment: "Two overlapping fragments completing a packet. Expected holes: {}.",
-		in: []updateHolesInput{
-			{first: 0, last: 2, more: true},
-			{first: 2, last: 3, more: false},
+		{
+			name: "Two fragments completing a packet with a duplicate",
+			params: []processParams{
+				{first: 0, last: 1, more: true, pkt: pkt(2), wantDone: false, wantError: nil},
+				{first: 0, last: 1, more: true, pkt: pkt(2), wantDone: false, wantError: nil},
+				{first: 2, last: 3, more: false, pkt: pkt(2), wantDone: true, wantError: nil},
+			},
+			want: []hole{
+				{first: 0, last: 1, filled: true, final: false, data: v(2)},
+				{first: 2, last: 3, filled: true, final: true, data: v(2)},
+			},
 		},
-		want: []hole{
-			{first: 0, last: math.MaxUint16, deleted: true},
-			{first: 3, last: math.MaxUint16, deleted: true},
+		{
+			name: "Two fragments completing a packet with a partial duplicate",
+			params: []processParams{
+				{first: 0, last: 3, more: true, pkt: pkt(4), wantDone: false, wantError: nil},
+				{first: 1, last: 2, more: true, pkt: pkt(2), wantDone: false, wantError: nil},
+				{first: 4, last: 5, more: false, pkt: pkt(2), wantDone: true, wantError: nil},
+			},
+			want: []hole{
+				{first: 0, last: 3, filled: true, final: false, data: v(4)},
+				{first: 4, last: 5, filled: true, final: true, data: v(2)},
+			},
 		},
-	},
-}
+		{
+			name: "Two overlapping fragments",
+			params: []processParams{
+				{first: 0, last: 10, more: true, pkt: pkt(11), wantDone: false, wantError: nil},
+				{first: 5, last: 15, more: false, pkt: pkt(11), wantDone: false, wantError: ErrFragmentOverlap},
+			},
+			want: []hole{
+				{first: 0, last: 10, filled: true, final: false, data: v(11)},
+				{first: 11, last: math.MaxUint16, filled: false, final: true},
+			},
+		},
+		{
+			name: "Two final fragments with different ends",
+			params: []processParams{
+				{first: 10, last: 14, more: false, pkt: pkt(5), wantDone: false, wantError: nil},
+				{first: 0, last: 9, more: false, pkt: pkt(10), wantDone: false, wantError: ErrFragmentConflict},
+			},
+			want: []hole{
+				{first: 10, last: 14, filled: true, final: true, data: v(5)},
+				{first: 0, last: 9, filled: false, final: false},
+			},
+		},
+		{
+			name: "Two final fragments - duplicate",
+			params: []processParams{
+				{first: 5, last: 14, more: false, pkt: pkt(10), wantDone: false, wantError: nil},
+				{first: 10, last: 14, more: false, pkt: pkt(5), wantDone: false, wantError: nil},
+			},
+			want: []hole{
+				{first: 5, last: 14, filled: true, final: true, data: v(10)},
+				{first: 0, last: 4, filled: false, final: false},
+			},
+		},
+		{
+			name: "Two final fragments - duplicate, with different ends",
+			params: []processParams{
+				{first: 5, last: 14, more: false, pkt: pkt(10), wantDone: false, wantError: nil},
+				{first: 10, last: 13, more: false, pkt: pkt(4), wantDone: false, wantError: ErrFragmentConflict},
+			},
+			want: []hole{
+				{first: 5, last: 14, filled: true, final: true, data: v(10)},
+				{first: 0, last: 4, filled: false, final: false},
+			},
+		},
+	}
 
-func TestUpdateHoles(t *testing.T) {
-	for _, c := range holesTestCases {
-		r := newReassembler(FragmentID{}, &faketime.NullClock{})
-		for _, i := range c.in {
-			r.updateHoles(i.first, i.last, i.more)
-		}
-		if !reflect.DeepEqual(r.holes, c.want) {
-			t.Errorf("Test \"%s\" produced unexepetced holes. Got %v. Want %v", c.comment, r.holes, c.want)
-		}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			r := newReassembler(FragmentID{}, &faketime.NullClock{})
+			for _, param := range test.params {
+				_, _, done, _, err := r.process(param.first, param.last, param.more, proto, param.pkt)
+				if done != param.wantDone || err != param.wantError {
+					t.Errorf("got r.process(%d, %d, %t, %d, _) = (_, _, %t, _, %v), want = (%t, %v)", param.first, param.last, param.more, proto, done, err, param.wantDone, param.wantError)
+				}
+			}
+			if diff := cmp.Diff(test.want, r.holes, cmp.AllowUnexported(hole{})); diff != "" {
+				t.Errorf("r.holes mismatch (-want +got):\n%s", diff)
+			}
+		})
 	}
 }
diff --git a/pkg/tcpip/network/ip/BUILD b/pkg/tcpip/network/ip/BUILD
new file mode 100644
index 000000000..ca1247c1e
--- /dev/null
+++ b/pkg/tcpip/network/ip/BUILD
@@ -0,0 +1,26 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+package(licenses = ["notice"])
+
+go_library(
+    name = "ip",
+    srcs = ["generic_multicast_protocol.go"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//pkg/sync",
+        "//pkg/tcpip",
+    ],
+)
+
+go_test(
+    name = "ip_test",
+    size = "small",
+    srcs = ["generic_multicast_protocol_test.go"],
+    deps = [
+        ":ip",
+        "//pkg/sync",
+        "//pkg/tcpip",
+        "//pkg/tcpip/faketime",
+        "@com_github_google_go_cmp//cmp:go_default_library",
+    ],
+)
diff --git a/pkg/tcpip/network/ip/generic_multicast_protocol.go b/pkg/tcpip/network/ip/generic_multicast_protocol.go
new file mode 100644
index 000000000..f85c5ff9d
--- /dev/null
+++ b/pkg/tcpip/network/ip/generic_multicast_protocol.go
@@ -0,0 +1,671 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package ip holds IPv4/IPv6 common utilities.
+package ip
+
+import (
+	"fmt"
+	"math/rand"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/tcpip"
+)
+
+// hostState is the state a host may be in for a multicast group.
+type hostState int
+
+// The states below are generic across IGMPv2 (RFC 2236 section 6) and MLDv1
+// (RFC 2710 section 5). Even though the states are generic across both IGMPv2
+// and MLDv1, IGMPv2 terminology will be used.
+//
+//                                  ______________receive query______________
+//                                 |                                         |
+//                                 |   _____send or receive report_____      |
+//                                 |  |                                |     |
+//                                 V  |                                V     |
+//  +-------+ +-----------+ +------------+ +-------------------+ +--------+  |
+//  | Non-M | | Pending-M | | Delaying-M | | Queued Delaying-M | | Idle-M | -
+//  +-------+ +-----------+ +------------+ +-------------------+ +--------+
+//    |          ^      |       ^      |          ^       |             ^
+//    |          |      |       |      |          |       |             |
+//     ----------        -------        ----------         -------------
+//   initialize new    send inital     fail to send       send or receive
+//  group membership     report       delayed report          report
+//
+// Not shown in the diagram above, but any state may transition into the non
+// member state when a group is left.
+const (
+	// nonMember is the "'Non-Member' state, when the host does not belong to the
+	// group on the interface. This is the initial state for all memberships on
+	// all network interfaces; it requires no storage in the host."
+	//
+	// 'Non-Listener' is the MLDv1 term used to describe this state.
+	//
+	// This state is used to keep track of groups that have been joined locally,
+	// but without advertising the membership to the network.
+	nonMember hostState = iota
+
+	// pendingMember is a newly joined member that is waiting to successfully send
+	// the initial set of reports.
+	//
+	// This is not an RFC defined state; it is an implementation specific state to
+	// track that the initial report needs to be sent.
+	//
+	// MAY NOT transition to the idle member state from this state.
+	pendingMember
+
+	// delayingMember is the "'Delaying Member' state, when the host belongs to
+	// the group on the interface and has a report delay timer running for that
+	// membership."
+	//
+	// 'Delaying Listener' is the MLDv1 term used to describe this state.
+	delayingMember
+
+	// queuedDelayingMember is a delayingMember that failed to send a report after
+	// its delayed report timer fired. Hosts in this state are waiting to attempt
+	// retransmission of the delayed report.
+	//
+	// This is not an RFC defined state; it is an implementation specific state to
+	// track that the delayed report needs to be sent.
+	//
+	// May transition to idle member if a report is received for a group.
+	queuedDelayingMember
+
+	// idleMember is the "Idle Member" state, when the host belongs to the group
+	// on the interface and does not have a report delay timer running for that
+	// membership.
+	//
+	// 'Idle Listener' is the MLDv1 term used to describe this state.
+	idleMember
+)
+
+func (s hostState) isDelayingMember() bool {
+	switch s {
+	case nonMember, pendingMember, idleMember:
+		return false
+	case delayingMember, queuedDelayingMember:
+		return true
+	default:
+		panic(fmt.Sprintf("unrecognized host state = %d", s))
+	}
+}
+
+// multicastGroupState holds the Generic Multicast Protocol state for a
+// multicast group.
+type multicastGroupState struct {
+	// joins is the number of times the group has been joined.
+	joins uint64
+
+	// state holds the host's state for the group.
+	state hostState
+
+	// lastToSendReport is true if we sent the last report for the group. It is
+	// used to track whether there are other hosts on the subnet that are also
+	// members of the group.
+	//
+	// Defined in RFC 2236 section 6 page 9 for IGMPv2 and RFC 2710 section 5 page
+	// 8 for MLDv1.
+	lastToSendReport bool
+
+	// delayedReportJob is used to delay sending responses to membership report
+	// messages in order to reduce duplicate reports from multiple hosts on the
+	// interface.
+	//
+	// Must not be nil.
+	delayedReportJob *tcpip.Job
+}
+
+// GenericMulticastProtocolOptions holds options for the generic multicast
+// protocol.
+type GenericMulticastProtocolOptions struct {
+	// Enabled indicates whether the generic multicast protocol will be
+	// performed.
+	//
+	// When enabled, the protocol may transmit report and leave messages when
+	// joining and leaving multicast groups respectively, and handle incoming
+	// packets.
+	//
+	// When disabled, the protocol will still keep track of locally joined groups,
+	// it just won't transmit and handle packets, or update groups' state.
+	Enabled bool
+
+	// Rand is the source of random numbers.
+	Rand *rand.Rand
+
+	// Clock is the clock used to create timers.
+	Clock tcpip.Clock
+
+	// Protocol is the implementation of the variant of multicast group protocol
+	// in use.
+	Protocol MulticastGroupProtocol
+
+	// MaxUnsolicitedReportDelay is the maximum amount of time to wait between
+	// transmitting unsolicited reports.
+	//
+	// Unsolicited reports are transmitted when a group is newly joined.
+	MaxUnsolicitedReportDelay time.Duration
+
+	// AllNodesAddress is a multicast address that all nodes on a network should
+	// be a member of.
+	//
+	// This address will not have the generic multicast protocol performed on it;
+	// it will be left in the non member/listener state, and packets will never
+	// be sent for it.
+	AllNodesAddress tcpip.Address
+}
+
+// MulticastGroupProtocol is a multicast group protocol whose core state machine
+// can be represented by GenericMulticastProtocolState.
+type MulticastGroupProtocol interface {
+	// SendReport sends a multicast report for the specified group address.
+	//
+	// Returns false if the caller should queue the report to be sent later. Note,
+	// returning false does not mean that the receiver hit an error.
+	SendReport(groupAddress tcpip.Address) (sent bool, err *tcpip.Error)
+
+	// SendLeave sends a multicast leave for the specified group address.
+	SendLeave(groupAddress tcpip.Address) *tcpip.Error
+}
+
+// GenericMulticastProtocolState is the per interface generic multicast protocol
+// state.
+//
+// There is actually no protocol named "Generic Multicast Protocol". Instead,
+// the term used to refer to a generic multicast protocol that applies to both
+// IPv4 and IPv6. Specifically, Generic Multicast Protocol is the core state
+// machine of IGMPv2 as defined by RFC 2236 and MLDv1 as defined by RFC 2710.
+//
+// Callers must synchronize accesses to the generic multicast protocol state;
+// GenericMulticastProtocolState obtains no locks in any of its methods. The
+// only exception to this is GenericMulticastProtocolState's timer/job callbacks
+// which will obtain the lock provided to the GenericMulticastProtocolState when
+// it is initialized.
+//
+// GenericMulticastProtocolState.Init MUST be called before calling any of
+// the methods on GenericMulticastProtocolState.
+type GenericMulticastProtocolState struct {
+	// Do not allow overwriting this state.
+	_ sync.NoCopy
+
+	opts GenericMulticastProtocolOptions
+
+	// memberships holds group addresses and their associated state.
+	memberships map[tcpip.Address]multicastGroupState
+
+	// protocolMU is the mutex used to protect the protocol.
+	protocolMU *sync.RWMutex
+}
+
+// Init initializes the Generic Multicast Protocol state.
+//
+// Must only be called once for the lifetime of g; Init will panic if it is
+// called twice.
+//
+// The GenericMulticastProtocolState will only grab the lock when timers/jobs
+// fire.
+//
+// Note: the methods on opts.Protocol will always be called while protocolMU is
+// held.
+func (g *GenericMulticastProtocolState) Init(protocolMU *sync.RWMutex, opts GenericMulticastProtocolOptions) {
+	if g.memberships != nil {
+		panic("attempted to initialize generic membership protocol state twice")
+	}
+
+	*g = GenericMulticastProtocolState{
+		opts:        opts,
+		memberships: make(map[tcpip.Address]multicastGroupState),
+		protocolMU:  protocolMU,
+	}
+}
+
+// MakeAllNonMemberLocked transitions all groups to the non-member state.
+//
+// The groups will still be considered joined locally.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) MakeAllNonMemberLocked() {
+	if !g.opts.Enabled {
+		return
+	}
+
+	for groupAddress, info := range g.memberships {
+		g.transitionToNonMemberLocked(groupAddress, &info)
+		g.memberships[groupAddress] = info
+	}
+}
+
+// InitializeGroupsLocked initializes each group, as if they were newly joined
+// but without affecting the groups' join count.
+//
+// Must only be called after calling MakeAllNonMember as a group should not be
+// initialized while it is not in the non-member state.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) InitializeGroupsLocked() {
+	if !g.opts.Enabled {
+		return
+	}
+
+	for groupAddress, info := range g.memberships {
+		g.initializeNewMemberLocked(groupAddress, &info)
+		g.memberships[groupAddress] = info
+	}
+}
+
+// SendQueuedReportsLocked attempts to send reports for groups that failed to
+// send reports during their last attempt.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) SendQueuedReportsLocked() {
+	for groupAddress, info := range g.memberships {
+		switch info.state {
+		case nonMember, delayingMember, idleMember:
+		case pendingMember:
+			// pendingMembers failed to send their initial unsolicited report so try
+			// to send the report and queue the extra unsolicited reports.
+			g.maybeSendInitialReportLocked(groupAddress, &info)
+		case queuedDelayingMember:
+			// queuedDelayingMembers failed to send their delayed reports so try to
+			// send the report and transition them to the idle state.
+			g.maybeSendDelayedReportLocked(groupAddress, &info)
+		default:
+			panic(fmt.Sprintf("unrecognized host state = %d", info.state))
+		}
+		g.memberships[groupAddress] = info
+	}
+}
+
+// JoinGroupLocked handles joining a new group.
+//
+// If dontInitialize is true, the group will be not be initialized and will be
+// left in the non-member state - no packets will be sent for it until it is
+// initialized via InitializeGroups.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) JoinGroupLocked(groupAddress tcpip.Address, dontInitialize bool) {
+	if info, ok := g.memberships[groupAddress]; ok {
+		// The group has already been joined.
+		info.joins++
+		g.memberships[groupAddress] = info
+		return
+	}
+
+	info := multicastGroupState{
+		// Since we just joined the group, its count is 1.
+		joins: 1,
+		// The state will be updated below, if required.
+		state:            nonMember,
+		lastToSendReport: false,
+		delayedReportJob: tcpip.NewJob(g.opts.Clock, g.protocolMU, func() {
+			info, ok := g.memberships[groupAddress]
+			if !ok {
+				panic(fmt.Sprintf("expected to find group state for group = %s", groupAddress))
+			}
+
+			g.maybeSendDelayedReportLocked(groupAddress, &info)
+			g.memberships[groupAddress] = info
+		}),
+	}
+
+	if !dontInitialize && g.opts.Enabled {
+		g.initializeNewMemberLocked(groupAddress, &info)
+	}
+
+	g.memberships[groupAddress] = info
+}
+
+// IsLocallyJoinedRLocked returns true if the group is locally joined.
+//
+// Precondition: g.protocolMU must be read locked.
+func (g *GenericMulticastProtocolState) IsLocallyJoinedRLocked(groupAddress tcpip.Address) bool {
+	_, ok := g.memberships[groupAddress]
+	return ok
+}
+
+// LeaveGroupLocked handles leaving the group.
+//
+// Returns false if the group is not currently joined.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) LeaveGroupLocked(groupAddress tcpip.Address) bool {
+	info, ok := g.memberships[groupAddress]
+	if !ok {
+		return false
+	}
+
+	if info.joins == 0 {
+		panic(fmt.Sprintf("tried to leave group %s with a join count of 0", groupAddress))
+	}
+	info.joins--
+	if info.joins != 0 {
+		// If we still have outstanding joins, then do nothing further.
+		g.memberships[groupAddress] = info
+		return true
+	}
+
+	g.transitionToNonMemberLocked(groupAddress, &info)
+	delete(g.memberships, groupAddress)
+	return true
+}
+
+// HandleQueryLocked handles a query message with the specified maximum response
+// time.
+//
+// If the group address is unspecified, then reports will be scheduled for all
+// joined groups.
+//
+// Report(s) will be scheduled to be sent after a random duration between 0 and
+// the maximum response time.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) HandleQueryLocked(groupAddress tcpip.Address, maxResponseTime time.Duration) {
+	if !g.opts.Enabled {
+		return
+	}
+
+	// As per RFC 2236 section 2.4 (for IGMPv2),
+	//
+	//   In a Membership Query message, the group address field is set to zero
+	//   when sending a General Query, and set to the group address being
+	//   queried when sending a Group-Specific Query.
+	//
+	// As per RFC 2710 section 3.6 (for MLDv1),
+	//
+	//   In a Query message, the Multicast Address field is set to zero when
+	//   sending a General Query, and set to a specific IPv6 multicast address
+	//   when sending a Multicast-Address-Specific Query.
+	if groupAddress.Unspecified() {
+		// This is a general query as the group address is unspecified.
+		for groupAddress, info := range g.memberships {
+			g.setDelayTimerForAddressRLocked(groupAddress, &info, maxResponseTime)
+			g.memberships[groupAddress] = info
+		}
+	} else if info, ok := g.memberships[groupAddress]; ok {
+		g.setDelayTimerForAddressRLocked(groupAddress, &info, maxResponseTime)
+		g.memberships[groupAddress] = info
+	}
+}
+
+// HandleReportLocked handles a report message.
+//
+// If the report is for a joined group, any active delayed report will be
+// cancelled and the host state for the group transitions to idle.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) HandleReportLocked(groupAddress tcpip.Address) {
+	if !g.opts.Enabled {
+		return
+	}
+
+	// As per RFC 2236 section 3 pages 3-4 (for IGMPv2),
+	//
+	//   If the host receives another host's Report (version 1 or 2) while it has
+	//   a timer running, it stops its timer for the specified group and does not
+	//   send a Report
+	//
+	// As per RFC 2710 section 4 page 6 (for MLDv1),
+	//
+	//   If a node receives another node's Report from an interface for a
+	//   multicast address while it has a timer running for that same address
+	//   on that interface, it stops its timer and does not send a Report for
+	//   that address, thus suppressing duplicate reports on the link.
+	if info, ok := g.memberships[groupAddress]; ok && info.state.isDelayingMember() {
+		info.delayedReportJob.Cancel()
+		info.lastToSendReport = false
+		info.state = idleMember
+		g.memberships[groupAddress] = info
+	}
+}
+
+// initializeNewMemberLocked initializes a new group membership.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) initializeNewMemberLocked(groupAddress tcpip.Address, info *multicastGroupState) {
+	if info.state != nonMember {
+		panic(fmt.Sprintf("host must be in non-member state to be initialized; group = %s, state = %d", groupAddress, info.state))
+	}
+
+	info.lastToSendReport = false
+
+	if groupAddress == g.opts.AllNodesAddress {
+		// As per RFC 2236 section 6 page 10 (for IGMPv2),
+		//
+		//   The all-systems group (address 224.0.0.1) is handled as a special
+		//   case. The host starts in Idle Member state for that group on every
+		//   interface, never transitions to another state, and never sends a
+		//   report for that group.
+		//
+		// As per RFC 2710 section 5 page 10 (for MLDv1),
+		//
+		//   The link-scope all-nodes address (FF02::1) is handled as a special
+		//   case. The node starts in Idle Listener state for that address on
+		//   every interface, never transitions to another state, and never sends
+		//   a Report or Done for that address.
+		info.state = idleMember
+		return
+	}
+
+	info.state = pendingMember
+	g.maybeSendInitialReportLocked(groupAddress, info)
+}
+
+// maybeSendInitialReportLocked attempts to start transmission of the initial
+// set of reports after newly joining a group.
+//
+// Host must be in pending member state.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) maybeSendInitialReportLocked(groupAddress tcpip.Address, info *multicastGroupState) {
+	if info.state != pendingMember {
+		panic(fmt.Sprintf("host must be in pending member state to send initial reports; group = %s, state = %d", groupAddress, info.state))
+	}
+
+	// As per RFC 2236 section 3 page 5 (for IGMPv2),
+	//
+	//   When a host joins a multicast group, it should immediately transmit an
+	//   unsolicited Version 2 Membership Report for that group" ... "it is
+	//   recommended that it be repeated".
+	//
+	// As per RFC 2710 section 4 page 6 (for MLDv1),
+	//
+	//   When a node starts listening to a multicast address on an interface,
+	//   it should immediately transmit an unsolicited Report for that address
+	//   on that interface, in case it is the first listener on the link. To
+	//   cover the possibility of the initial Report being lost or damaged, it
+	//   is recommended that it be repeated once or twice after short delays
+	//   [Unsolicited Report Interval].
+	//
+	// TODO(gvisor.dev/issue/4901): Support a configurable number of initial
+	// unsolicited reports.
+	sent, err := g.opts.Protocol.SendReport(groupAddress)
+	if err == nil && sent {
+		info.lastToSendReport = true
+		g.setDelayTimerForAddressRLocked(groupAddress, info, g.opts.MaxUnsolicitedReportDelay)
+	}
+}
+
+// maybeSendDelayedReportLocked attempts to send the delayed report.
+//
+// Host must be in pending, delaying or queued delaying member state.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) maybeSendDelayedReportLocked(groupAddress tcpip.Address, info *multicastGroupState) {
+	if !info.state.isDelayingMember() {
+		panic(fmt.Sprintf("host must be in delaying or queued delaying member state to send delayed reports; group = %s, state = %d", groupAddress, info.state))
+	}
+
+	sent, err := g.opts.Protocol.SendReport(groupAddress)
+	if err == nil && sent {
+		info.lastToSendReport = true
+		info.state = idleMember
+	} else {
+		info.state = queuedDelayingMember
+	}
+}
+
+// maybeSendLeave attempts to send a leave message.
+func (g *GenericMulticastProtocolState) maybeSendLeave(groupAddress tcpip.Address, lastToSendReport bool) {
+	if !g.opts.Enabled || !lastToSendReport {
+		return
+	}
+
+	if groupAddress == g.opts.AllNodesAddress {
+		// As per RFC 2236 section 6 page 10 (for IGMPv2),
+		//
+		//   The all-systems group (address 224.0.0.1) is handled as a special
+		//   case. The host starts in Idle Member state for that group on every
+		//   interface, never transitions to another state, and never sends a
+		//   report for that group.
+		//
+		// As per RFC 2710 section 5 page 10 (for MLDv1),
+		//
+		//   The link-scope all-nodes address (FF02::1) is handled as a special
+		//   case. The node starts in Idle Listener state for that address on
+		//   every interface, never transitions to another state, and never sends
+		//   a Report or Done for that address.
+		return
+	}
+
+	// Okay to ignore the error here as if packet write failed, the multicast
+	// routers will eventually drop our membership anyways. If the interface is
+	// being disabled or removed, the generic multicast protocol's should be
+	// cleared eventually.
+	//
+	// As per RFC 2236 section 3 page 5 (for IGMPv2),
+	//
+	//   When a router receives a Report, it adds the group being reported to
+	//   the list of multicast group memberships on the network on which it
+	//   received the Report and sets the timer for the membership to the
+	//   [Group Membership Interval]. Repeated Reports refresh the timer. If
+	//   no Reports are received for a particular group before this timer has
+	//   expired, the router assumes that the group has no local members and
+	//   that it need not forward remotely-originated multicasts for that
+	//   group onto the attached network.
+	//
+	// As per RFC 2710 section 4 page 5 (for MLDv1),
+	//
+	//   When a router receives a Report from a link, if the reported address
+	//   is not already present in the router's list of multicast address
+	//   having listeners on that link, the reported address is added to the
+	//   list, its timer is set to [Multicast Listener Interval], and its
+	//   appearance is made known to the router's multicast routing component.
+	//   If a Report is received for a multicast address that is already
+	//   present in the router's list, the timer for that address is reset to
+	//   [Multicast Listener Interval]. If an address's timer expires, it is
+	//   assumed that there are no longer any listeners for that address
+	//   present on the link, so it is deleted from the list and its
+	//   disappearance is made known to the multicast routing component.
+	//
+	// The requirement to send a leave message is also optional (it MAY be
+	// skipped):
+	//
+	// As per RFC 2236 section 6 page 8 (for IGMPv2),
+	//
+	//  "send leave" for the group on the interface. If the interface
+	//   state says the Querier is running IGMPv1, this action SHOULD be
+	//   skipped. If the flag saying we were the last host to report is
+	//   cleared, this action MAY be skipped. The Leave Message is sent to
+	//   the ALL-ROUTERS group (224.0.0.2).
+	//
+	// As per RFC 2710 section 5 page 8 (for MLDv1),
+	//
+	//   "send done" for the address on the interface. If the flag saying
+	//   we were the last node to report is cleared, this action MAY be
+	//   skipped. The Done message is sent to the link-scope all-routers
+	//   address (FF02::2).
+	_ = g.opts.Protocol.SendLeave(groupAddress)
+}
+
+// transitionToNonMemberLocked transitions the given multicast group the the
+// non-member/listener state.
+//
+// Precondition: g.protocolMU must be locked.
+func (g *GenericMulticastProtocolState) transitionToNonMemberLocked(groupAddress tcpip.Address, info *multicastGroupState) {
+	if info.state == nonMember {
+		return
+	}
+
+	info.delayedReportJob.Cancel()
+	g.maybeSendLeave(groupAddress, info.lastToSendReport)
+	info.lastToSendReport = false
+	info.state = nonMember
+}
+
+// setDelayTimerForAddressRLocked sets timer to send a delay report.
+//
+// Precondition: g.protocolMU MUST be read locked.
+func (g *GenericMulticastProtocolState) setDelayTimerForAddressRLocked(groupAddress tcpip.Address, info *multicastGroupState, maxResponseTime time.Duration) {
+	if info.state == nonMember {
+		return
+	}
+
+	if groupAddress == g.opts.AllNodesAddress {
+		// As per RFC 2236 section 6 page 10 (for IGMPv2),
+		//
+		//   The all-systems group (address 224.0.0.1) is handled as a special
+		//   case. The host starts in Idle Member state for that group on every
+		//   interface, never transitions to another state, and never sends a
+		//   report for that group.
+		//
+		// As per RFC 2710 section 5 page 10 (for MLDv1),
+		//
+		//   The link-scope all-nodes address (FF02::1) is handled as a special
+		//   case. The node starts in Idle Listener state for that address on
+		//   every interface, never transitions to another state, and never sends
+		//   a Report or Done for that address.
+		return
+	}
+
+	// As per RFC 2236 section 3 page 3 (for IGMPv2),
+	//
+	//   If a timer for the group is already unning, it is reset to the random
+	//   value only if the requested Max Response Time is less than the remaining
+	//   value of the running timer.
+	//
+	// As per RFC 2710 section 4 page 5 (for MLDv1),
+	//
+	//   If a timer for any address is already running, it is reset to the new
+	//   random value only if the requested Maximum Response Delay is less than
+	//   the remaining value of the running timer.
+	if info.state == delayingMember {
+		// TODO: Reset the timer if time remaining is greater than maxResponseTime.
+		return
+	}
+
+	info.state = delayingMember
+	info.delayedReportJob.Cancel()
+	info.delayedReportJob.Schedule(g.calculateDelayTimerDuration(maxResponseTime))
+}
+
+// calculateDelayTimerDuration returns a random time between (0, maxRespTime].
+func (g *GenericMulticastProtocolState) calculateDelayTimerDuration(maxRespTime time.Duration) time.Duration {
+	// As per RFC 2236 section 3 page 3 (for IGMPv2),
+	//
+	//   When a host receives a Group-Specific Query, it sets a delay timer to a
+	//   random value selected from the range (0, Max Response Time]...
+	//
+	// As per RFC 2710 section 4 page 6 (for MLDv1),
+	//
+	//   When a node receives a Multicast-Address-Specific Query, if it is
+	//   listening to the queried Multicast Address on the interface from
+	//   which the Query was received, it sets a delay timer for that address
+	//   to a random value selected from the range [0, Maximum Response Delay],
+	//   as above.
+	if maxRespTime == 0 {
+		return 0
+	}
+	return time.Duration(g.opts.Rand.Int63n(int64(maxRespTime)))
+}
diff --git a/pkg/tcpip/network/ip/generic_multicast_protocol_test.go b/pkg/tcpip/network/ip/generic_multicast_protocol_test.go
new file mode 100644
index 000000000..95040515c
--- /dev/null
+++ b/pkg/tcpip/network/ip/generic_multicast_protocol_test.go
@@ -0,0 +1,884 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ip_test
+
+import (
+	"math/rand"
+	"testing"
+	"time"
+
+	"github.com/google/go-cmp/cmp"
+	"gvisor.dev/gvisor/pkg/sync"
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/faketime"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ip"
+)
+
+const (
+	addr1 = tcpip.Address("\x01")
+	addr2 = tcpip.Address("\x02")
+	addr3 = tcpip.Address("\x03")
+	addr4 = tcpip.Address("\x04")
+
+	maxUnsolicitedReportDelay = time.Second
+)
+
+var _ ip.MulticastGroupProtocol = (*mockMulticastGroupProtocol)(nil)
+
+type mockMulticastGroupProtocol struct {
+	t *testing.T
+
+	mu sync.RWMutex
+
+	// Must only be accessed with mu held.
+	sendReportGroupAddrCount map[tcpip.Address]int
+
+	// Must only be accessed with mu held.
+	sendLeaveGroupAddrCount map[tcpip.Address]int
+
+	// Must only be accessed with mu held.
+	makeQueuePackets bool
+}
+
+func (m *mockMulticastGroupProtocol) init() {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.initLocked()
+}
+
+func (m *mockMulticastGroupProtocol) initLocked() {
+	m.sendReportGroupAddrCount = make(map[tcpip.Address]int)
+	m.sendLeaveGroupAddrCount = make(map[tcpip.Address]int)
+}
+
+func (m *mockMulticastGroupProtocol) SendReport(groupAddress tcpip.Address) (bool, *tcpip.Error) {
+	if m.mu.TryLock() {
+		m.mu.Unlock()
+		m.t.Fatalf("got write lock, expected to not take the lock; generic multicast protocol must take the write lock before sending report for %s", groupAddress)
+	}
+	if m.mu.TryRLock() {
+		m.mu.RUnlock()
+		m.t.Fatalf("got read lock, expected to not take the lock; generic multicast protocol must take the write lock before sending report for %s", groupAddress)
+	}
+
+	m.sendReportGroupAddrCount[groupAddress]++
+	return !m.makeQueuePackets, nil
+}
+
+func (m *mockMulticastGroupProtocol) SendLeave(groupAddress tcpip.Address) *tcpip.Error {
+	if m.mu.TryLock() {
+		m.mu.Unlock()
+		m.t.Fatalf("got write lock, expected to not take the lock; generic multicast protocol must take the write lock before sending leave for %s", groupAddress)
+	}
+	if m.mu.TryRLock() {
+		m.mu.RUnlock()
+		m.t.Fatalf("got read lock, expected to not take the lock; generic multicast protocol must take the write lock before sending leave for %s", groupAddress)
+	}
+
+	m.sendLeaveGroupAddrCount[groupAddress]++
+	return nil
+}
+
+func (m *mockMulticastGroupProtocol) check(sendReportGroupAddresses []tcpip.Address, sendLeaveGroupAddresses []tcpip.Address) string {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	sendReportGroupAddrCount := make(map[tcpip.Address]int)
+	for _, a := range sendReportGroupAddresses {
+		sendReportGroupAddrCount[a] = 1
+	}
+
+	sendLeaveGroupAddrCount := make(map[tcpip.Address]int)
+	for _, a := range sendLeaveGroupAddresses {
+		sendLeaveGroupAddrCount[a] = 1
+	}
+
+	diff := cmp.Diff(
+		&mockMulticastGroupProtocol{
+			sendReportGroupAddrCount: sendReportGroupAddrCount,
+			sendLeaveGroupAddrCount:  sendLeaveGroupAddrCount,
+		},
+		m,
+		cmp.AllowUnexported(mockMulticastGroupProtocol{}),
+		// ignore mockMulticastGroupProtocol.mu and mockMulticastGroupProtocol.t
+		cmp.FilterPath(
+			func(p cmp.Path) bool {
+				return p.Last().String() == ".mu" || p.Last().String() == ".t" || p.Last().String() == ".makeQueuePackets"
+			},
+			cmp.Ignore(),
+		),
+	)
+	m.initLocked()
+	return diff
+}
+
+func TestJoinGroup(t *testing.T) {
+	tests := []struct {
+		name              string
+		addr              tcpip.Address
+		shouldSendReports bool
+	}{
+		{
+			name:              "Normal group",
+			addr:              addr1,
+			shouldSendReports: true,
+		},
+		{
+			name:              "All-nodes group",
+			addr:              addr2,
+			shouldSendReports: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			var g ip.GenericMulticastProtocolState
+			mgp := mockMulticastGroupProtocol{t: t}
+			clock := faketime.NewManualClock()
+
+			mgp.init()
+			g.Init(&mgp.mu, ip.GenericMulticastProtocolOptions{
+				Enabled:                   true,
+				Rand:                      rand.New(rand.NewSource(0)),
+				Clock:                     clock,
+				Protocol:                  &mgp,
+				MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
+				AllNodesAddress:           addr2,
+			})
+
+			// Joining a group should send a report immediately and another after
+			// a random interval between 0 and the maximum unsolicited report delay.
+			mgp.mu.Lock()
+			g.JoinGroupLocked(test.addr, false /* dontInitialize */)
+			mgp.mu.Unlock()
+			if test.shouldSendReports {
+				if diff := mgp.check([]tcpip.Address{test.addr} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+					t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+				}
+
+				// Generic multicast protocol timers are expected to take the job mutex.
+				clock.Advance(maxUnsolicitedReportDelay)
+				if diff := mgp.check([]tcpip.Address{test.addr} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+					t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+				}
+			}
+
+			// Should have no more messages to send.
+			clock.Advance(time.Hour)
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestLeaveGroup(t *testing.T) {
+	tests := []struct {
+		name               string
+		addr               tcpip.Address
+		shouldSendMessages bool
+	}{
+		{
+			name:               "Normal group",
+			addr:               addr1,
+			shouldSendMessages: true,
+		},
+		{
+			name:               "All-nodes group",
+			addr:               addr2,
+			shouldSendMessages: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			var g ip.GenericMulticastProtocolState
+			mgp := mockMulticastGroupProtocol{t: t}
+			clock := faketime.NewManualClock()
+
+			mgp.init()
+			g.Init(&mgp.mu, ip.GenericMulticastProtocolOptions{
+				Enabled:                   true,
+				Rand:                      rand.New(rand.NewSource(1)),
+				Clock:                     clock,
+				Protocol:                  &mgp,
+				MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
+				AllNodesAddress:           addr2,
+			})
+
+			mgp.mu.Lock()
+			g.JoinGroupLocked(test.addr, false /* dontInitialize */)
+			mgp.mu.Unlock()
+			if test.shouldSendMessages {
+				if diff := mgp.check([]tcpip.Address{test.addr} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+					t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+				}
+			}
+
+			// Leaving a group should send a leave report immediately and cancel any
+			// delayed reports.
+			{
+				mgp.mu.Lock()
+				res := g.LeaveGroupLocked(test.addr)
+				mgp.mu.Unlock()
+				if !res {
+					t.Fatalf("got g.LeaveGroupLocked(%s) = false, want = true", test.addr)
+				}
+			}
+			if test.shouldSendMessages {
+				if diff := mgp.check(nil /* sendReportGroupAddresses */, []tcpip.Address{test.addr} /* sendLeaveGroupAddresses */); diff != "" {
+					t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+				}
+			}
+
+			// Should have no more messages to send.
+			//
+			// Generic multicast protocol timers are expected to take the job mutex.
+			clock.Advance(time.Hour)
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestHandleReport(t *testing.T) {
+	tests := []struct {
+		name             string
+		reportAddr       tcpip.Address
+		expectReportsFor []tcpip.Address
+	}{
+		{
+			name:             "Unpecified empty",
+			reportAddr:       "",
+			expectReportsFor: []tcpip.Address{addr1, addr2},
+		},
+		{
+			name:             "Unpecified any",
+			reportAddr:       "\x00",
+			expectReportsFor: []tcpip.Address{addr1, addr2},
+		},
+		{
+			name:             "Specified",
+			reportAddr:       addr1,
+			expectReportsFor: []tcpip.Address{addr2},
+		},
+		{
+			name:             "Specified all-nodes",
+			reportAddr:       addr3,
+			expectReportsFor: []tcpip.Address{addr1, addr2},
+		},
+		{
+			name:             "Specified other",
+			reportAddr:       addr4,
+			expectReportsFor: []tcpip.Address{addr1, addr2},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			var g ip.GenericMulticastProtocolState
+			mgp := mockMulticastGroupProtocol{t: t}
+			clock := faketime.NewManualClock()
+
+			mgp.init()
+			g.Init(&mgp.mu, ip.GenericMulticastProtocolOptions{
+				Enabled:                   true,
+				Rand:                      rand.New(rand.NewSource(2)),
+				Clock:                     clock,
+				Protocol:                  &mgp,
+				MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
+				AllNodesAddress:           addr3,
+			})
+
+			mgp.mu.Lock()
+			g.JoinGroupLocked(addr1, false /* dontInitialize */)
+			mgp.mu.Unlock()
+			if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+			mgp.mu.Lock()
+			g.JoinGroupLocked(addr2, false /* dontInitialize */)
+			mgp.mu.Unlock()
+			if diff := mgp.check([]tcpip.Address{addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+			mgp.mu.Lock()
+			g.JoinGroupLocked(addr3, false /* dontInitialize */)
+			mgp.mu.Unlock()
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+
+			// Receiving a report for a group we have a timer scheduled for should
+			// cancel our delayed report timer for the group.
+			mgp.mu.Lock()
+			g.HandleReportLocked(test.reportAddr)
+			mgp.mu.Unlock()
+			if len(test.expectReportsFor) != 0 {
+				// Generic multicast protocol timers are expected to take the job mutex.
+				clock.Advance(maxUnsolicitedReportDelay)
+				if diff := mgp.check(test.expectReportsFor /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+					t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+				}
+			}
+
+			// Should have no more messages to send.
+			clock.Advance(time.Hour)
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestHandleQuery(t *testing.T) {
+	tests := []struct {
+		name             string
+		queryAddr        tcpip.Address
+		maxDelay         time.Duration
+		expectReportsFor []tcpip.Address
+	}{
+		{
+			name:             "Unpecified empty",
+			queryAddr:        "",
+			maxDelay:         0,
+			expectReportsFor: []tcpip.Address{addr1, addr2},
+		},
+		{
+			name:             "Unpecified any",
+			queryAddr:        "\x00",
+			maxDelay:         1,
+			expectReportsFor: []tcpip.Address{addr1, addr2},
+		},
+		{
+			name:             "Specified",
+			queryAddr:        addr1,
+			maxDelay:         2,
+			expectReportsFor: []tcpip.Address{addr1},
+		},
+		{
+			name:             "Specified all-nodes",
+			queryAddr:        addr3,
+			maxDelay:         3,
+			expectReportsFor: nil,
+		},
+		{
+			name:             "Specified other",
+			queryAddr:        addr4,
+			maxDelay:         4,
+			expectReportsFor: nil,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			var g ip.GenericMulticastProtocolState
+			mgp := mockMulticastGroupProtocol{t: t}
+			clock := faketime.NewManualClock()
+
+			mgp.init()
+			g.Init(&mgp.mu, ip.GenericMulticastProtocolOptions{
+				Enabled:                   true,
+				Rand:                      rand.New(rand.NewSource(3)),
+				Clock:                     clock,
+				Protocol:                  &mgp,
+				MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
+				AllNodesAddress:           addr3,
+			})
+
+			mgp.mu.Lock()
+			g.JoinGroupLocked(addr1, false /* dontInitialize */)
+			mgp.mu.Unlock()
+			if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+			mgp.mu.Lock()
+			g.JoinGroupLocked(addr2, false /* dontInitialize */)
+			mgp.mu.Unlock()
+			if diff := mgp.check([]tcpip.Address{addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+			mgp.mu.Lock()
+			g.JoinGroupLocked(addr3, false /* dontInitialize */)
+			mgp.mu.Unlock()
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+			// Generic multicast protocol timers are expected to take the job mutex.
+			clock.Advance(maxUnsolicitedReportDelay)
+			if diff := mgp.check([]tcpip.Address{addr1, addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+
+			// Receiving a query should make us schedule a new delayed report if it
+			// is a query directed at us or a general query.
+			mgp.mu.Lock()
+			g.HandleQueryLocked(test.queryAddr, test.maxDelay)
+			mgp.mu.Unlock()
+			if len(test.expectReportsFor) != 0 {
+				clock.Advance(test.maxDelay)
+				if diff := mgp.check(test.expectReportsFor /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+					t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+				}
+			}
+
+			// Should have no more messages to send.
+			clock.Advance(time.Hour)
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestJoinCount(t *testing.T) {
+	var g ip.GenericMulticastProtocolState
+	mgp := mockMulticastGroupProtocol{t: t}
+	clock := faketime.NewManualClock()
+
+	mgp.init()
+	g.Init(&mgp.mu, ip.GenericMulticastProtocolOptions{
+		Enabled:                   true,
+		Rand:                      rand.New(rand.NewSource(4)),
+		Clock:                     clock,
+		Protocol:                  &mgp,
+		MaxUnsolicitedReportDelay: time.Second,
+	})
+
+	// Set the join count to 2 for a group.
+	{
+		mgp.mu.Lock()
+		g.JoinGroupLocked(addr1, false /* dontInitialize */)
+		res := g.IsLocallyJoinedRLocked(addr1)
+		mgp.mu.Unlock()
+		if !res {
+			t.Fatalf("got g.IsLocallyJoinedRLocked(%s) = false, want = true", addr1)
+		}
+	}
+	// Only the first join should trigger a report to be sent.
+	if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	{
+		mgp.mu.Lock()
+		g.JoinGroupLocked(addr1, false /* dontInitialize */)
+		res := g.IsLocallyJoinedRLocked(addr1)
+		mgp.mu.Unlock()
+		if !res {
+			t.Errorf("got g.IsLocallyJoinedRLocked(%s) = false, want = true", addr1)
+		}
+	}
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	if t.Failed() {
+		t.FailNow()
+	}
+
+	// Group should still be considered joined after leaving once.
+	{
+		mgp.mu.Lock()
+		leaveGroupRes := g.LeaveGroupLocked(addr1)
+		isLocallyJoined := g.IsLocallyJoinedRLocked(addr1)
+		mgp.mu.Unlock()
+		if !leaveGroupRes {
+			t.Errorf("got g.LeaveGroupLocked(%s) = false, want = true", addr1)
+		}
+		if !isLocallyJoined {
+			t.Errorf("got g.IsLocallyJoinedRLocked(%s) = false, want = true", addr1)
+		}
+	}
+	// A leave report should only be sent once the join count reaches 0.
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	if t.Failed() {
+		t.FailNow()
+	}
+
+	// Leaving once more should actually remove us from the group.
+	{
+		mgp.mu.Lock()
+		leaveGroupRes := g.LeaveGroupLocked(addr1)
+		isLocallyJoined := g.IsLocallyJoinedRLocked(addr1)
+		mgp.mu.Unlock()
+		if !leaveGroupRes {
+			t.Errorf("got g.LeaveGroupLocked(%s) = false, want = true", addr1)
+		}
+		if isLocallyJoined {
+			t.Errorf("got g.IsLocallyJoinedRLocked(%s) = true, want = false", addr1)
+		}
+	}
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, []tcpip.Address{addr1} /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	if t.Failed() {
+		t.FailNow()
+	}
+
+	// Group should no longer be joined so we should not have anything to
+	// leave.
+	{
+		mgp.mu.Lock()
+		leaveGroupRes := g.LeaveGroupLocked(addr1)
+		isLocallyJoined := g.IsLocallyJoinedRLocked(addr1)
+		mgp.mu.Unlock()
+		if leaveGroupRes {
+			t.Errorf("got g.LeaveGroupLocked(%s) = true, want = false", addr1)
+		}
+		if isLocallyJoined {
+			t.Errorf("got g.IsLocallyJoinedRLocked(%s) = true, want = false", addr1)
+		}
+	}
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Should have no more messages to send.
+	//
+	// Generic multicast protocol timers are expected to take the job mutex.
+	clock.Advance(time.Hour)
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+}
+
+func TestMakeAllNonMemberAndInitialize(t *testing.T) {
+	var g ip.GenericMulticastProtocolState
+	mgp := mockMulticastGroupProtocol{t: t}
+	clock := faketime.NewManualClock()
+
+	mgp.init()
+	g.Init(&mgp.mu, ip.GenericMulticastProtocolOptions{
+		Enabled:                   true,
+		Rand:                      rand.New(rand.NewSource(3)),
+		Clock:                     clock,
+		Protocol:                  &mgp,
+		MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
+		AllNodesAddress:           addr3,
+	})
+
+	mgp.mu.Lock()
+	g.JoinGroupLocked(addr1, false /* dontInitialize */)
+	mgp.mu.Unlock()
+	if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	mgp.mu.Lock()
+	g.JoinGroupLocked(addr2, false /* dontInitialize */)
+	mgp.mu.Unlock()
+	if diff := mgp.check([]tcpip.Address{addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	mgp.mu.Lock()
+	g.JoinGroupLocked(addr3, false /* dontInitialize */)
+	mgp.mu.Unlock()
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Should send the leave reports for each but still consider them locally
+	// joined.
+	mgp.mu.Lock()
+	g.MakeAllNonMemberLocked()
+	mgp.mu.Unlock()
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, []tcpip.Address{addr1, addr2} /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	// Generic multicast protocol timers are expected to take the job mutex.
+	clock.Advance(time.Hour)
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	for _, group := range []tcpip.Address{addr1, addr2, addr3} {
+		mgp.mu.RLock()
+		res := g.IsLocallyJoinedRLocked(group)
+		mgp.mu.RUnlock()
+		if !res {
+			t.Fatalf("got g.IsLocallyJoinedRLocked(%s) = false, want = true", group)
+		}
+	}
+
+	// Should send the initial set of unsolcited reports.
+	mgp.mu.Lock()
+	g.InitializeGroupsLocked()
+	mgp.mu.Unlock()
+	if diff := mgp.check([]tcpip.Address{addr1, addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	clock.Advance(maxUnsolicitedReportDelay)
+	if diff := mgp.check([]tcpip.Address{addr1, addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Should have no more messages to send.
+	clock.Advance(time.Hour)
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+}
+
+// TestGroupStateNonMember tests that groups do not send packets when in the
+// non-member state, but are still considered locally joined.
+func TestGroupStateNonMember(t *testing.T) {
+	tests := []struct {
+		name           string
+		enabled        bool
+		dontInitialize bool
+	}{
+		{
+			name:           "Disabled",
+			enabled:        false,
+			dontInitialize: false,
+		},
+		{
+			name:           "Keep non-member",
+			enabled:        true,
+			dontInitialize: true,
+		},
+		{
+			name:           "disabled and Keep non-member",
+			enabled:        false,
+			dontInitialize: true,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			var g ip.GenericMulticastProtocolState
+			mgp := mockMulticastGroupProtocol{t: t}
+			clock := faketime.NewManualClock()
+
+			mgp.init()
+			g.Init(&mgp.mu, ip.GenericMulticastProtocolOptions{
+				Enabled:                   test.enabled,
+				Rand:                      rand.New(rand.NewSource(3)),
+				Clock:                     clock,
+				Protocol:                  &mgp,
+				MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
+			})
+
+			// Joining groups should not send any reports.
+			{
+				mgp.mu.Lock()
+				g.JoinGroupLocked(addr1, test.dontInitialize)
+				res := g.IsLocallyJoinedRLocked(addr1)
+				mgp.mu.Unlock()
+				if !res {
+					t.Fatalf("got g.IsLocallyJoinedRLocked(%s) = false, want = true", addr1)
+				}
+			}
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+			{
+				mgp.mu.Lock()
+				g.JoinGroupLocked(addr2, test.dontInitialize)
+				res := g.IsLocallyJoinedRLocked(addr2)
+				mgp.mu.Unlock()
+				if !res {
+					t.Fatalf("got g.IsLocallyJoinedRLocked(%s) = false, want = true", addr2)
+				}
+			}
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+
+			// Receiving a query should not send any reports.
+			mgp.mu.Lock()
+			g.HandleQueryLocked(addr1, time.Nanosecond)
+			mgp.mu.Unlock()
+			// Generic multicast protocol timers are expected to take the job mutex.
+			clock.Advance(time.Nanosecond)
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+
+			// Leaving groups should not send any leave messages.
+			{
+				mgp.mu.Lock()
+				addr2LeaveRes := g.LeaveGroupLocked(addr2)
+				addr1IsJoined := g.IsLocallyJoinedRLocked(addr1)
+				addr2IsJoined := g.IsLocallyJoinedRLocked(addr2)
+				mgp.mu.Unlock()
+				if !addr2LeaveRes {
+					t.Errorf("got g.LeaveGroupLocked(%s) = false, want = true", addr2)
+				}
+				if !addr1IsJoined {
+					t.Errorf("got g.IsLocallyJoinedRLocked(%s) = false, want = true", addr1)
+				}
+				if addr2IsJoined {
+					t.Errorf("got g.IsLocallyJoinedRLocked(%s) = true, want = false", addr2)
+				}
+			}
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+
+			clock.Advance(time.Hour)
+			if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+				t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestQueuedPackets(t *testing.T) {
+	var g ip.GenericMulticastProtocolState
+	var mgp mockMulticastGroupProtocol
+	mgp.init()
+	clock := faketime.NewManualClock()
+	g.Init(&mgp.mu, ip.GenericMulticastProtocolOptions{
+		Enabled:                   true,
+		Rand:                      rand.New(rand.NewSource(4)),
+		Clock:                     clock,
+		Protocol:                  &mgp,
+		MaxUnsolicitedReportDelay: maxUnsolicitedReportDelay,
+	})
+
+	// Joining should trigger a SendReport, but mgp should report that we did not
+	// send the packet.
+	mgp.mu.Lock()
+	mgp.makeQueuePackets = true
+	g.JoinGroupLocked(addr1, false /* dontInitialize */)
+	mgp.mu.Unlock()
+	if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// The delayed report timer should have been cancelled since we did not send
+	// the initial report earlier.
+	clock.Advance(time.Hour)
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Mock being able to successfully send the report.
+	mgp.mu.Lock()
+	mgp.makeQueuePackets = false
+	g.SendQueuedReportsLocked()
+	mgp.mu.Unlock()
+	if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// The delayed report (sent after the initial report) should now be sent.
+	clock.Advance(maxUnsolicitedReportDelay)
+	if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Should not have anything else to send (we should be idle).
+	mgp.mu.Lock()
+	g.SendQueuedReportsLocked()
+	mgp.mu.Unlock()
+	clock.Advance(time.Hour)
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Receive a query but mock being unable to send reports again.
+	mgp.mu.Lock()
+	mgp.makeQueuePackets = true
+	g.HandleQueryLocked(addr1, time.Nanosecond)
+	mgp.mu.Unlock()
+	clock.Advance(time.Nanosecond)
+	if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Mock being able to send reports again - we should have a packet queued to
+	// send.
+	mgp.mu.Lock()
+	mgp.makeQueuePackets = false
+	g.SendQueuedReportsLocked()
+	mgp.mu.Unlock()
+	if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Should not have anything else to send.
+	mgp.mu.Lock()
+	g.SendQueuedReportsLocked()
+	mgp.mu.Unlock()
+	clock.Advance(time.Hour)
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Receive a query again, but mock being unable to send reports.
+	mgp.mu.Lock()
+	mgp.makeQueuePackets = true
+	g.HandleQueryLocked(addr1, time.Nanosecond)
+	mgp.mu.Unlock()
+	clock.Advance(time.Nanosecond)
+	if diff := mgp.check([]tcpip.Address{addr1} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Receiving a report should should transition us into the idle member state,
+	// even if we had a packet queued. We should no longer have any packets to
+	// send.
+	mgp.mu.Lock()
+	g.HandleReportLocked(addr1)
+	g.SendQueuedReportsLocked()
+	mgp.mu.Unlock()
+	clock.Advance(time.Hour)
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// When we fail to send the initial set of reports, incoming reports should
+	// not affect a newly joined group's reports from being sent.
+	mgp.mu.Lock()
+	mgp.makeQueuePackets = true
+	g.JoinGroupLocked(addr2, false /* dontInitialize */)
+	mgp.mu.Unlock()
+	if diff := mgp.check([]tcpip.Address{addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	mgp.mu.Lock()
+	g.HandleReportLocked(addr2)
+	// Attempting to send queued reports while still unable to send reports should
+	// not change the host state.
+	g.SendQueuedReportsLocked()
+	mgp.mu.Unlock()
+	if diff := mgp.check([]tcpip.Address{addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	// Mock being able to successfully send the report.
+	mgp.mu.Lock()
+	mgp.makeQueuePackets = false
+	g.SendQueuedReportsLocked()
+	mgp.mu.Unlock()
+	if diff := mgp.check([]tcpip.Address{addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+	// The delayed report (sent after the initial report) should now be sent.
+	clock.Advance(maxUnsolicitedReportDelay)
+	if diff := mgp.check([]tcpip.Address{addr2} /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Errorf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+
+	// Should not have anything else to send.
+	mgp.mu.Lock()
+	g.SendQueuedReportsLocked()
+	mgp.mu.Unlock()
+	clock.Advance(time.Hour)
+	if diff := mgp.check(nil /* sendReportGroupAddresses */, nil /* sendLeaveGroupAddresses */); diff != "" {
+		t.Fatalf("mockMulticastGroupProtocol mismatch (-want +got):\n%s", diff)
+	}
+}
diff --git a/pkg/tcpip/network/ip_test.go b/pkg/tcpip/network/ip_test.go
index f20b94d97..3005973d7 100644
--- a/pkg/tcpip/network/ip_test.go
+++ b/pkg/tcpip/network/ip_test.go
@@ -23,6 +23,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/checker"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/header/parse"
 	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
 	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
@@ -34,16 +35,16 @@ import (
 )
 
 const (
-	localIPv4Addr  = "\x0a\x00\x00\x01"
-	remoteIPv4Addr = "\x0a\x00\x00\x02"
-	ipv4SubnetAddr = "\x0a\x00\x00\x00"
-	ipv4SubnetMask = "\xff\xff\xff\x00"
-	ipv4Gateway    = "\x0a\x00\x00\x03"
-	localIPv6Addr  = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01"
-	remoteIPv6Addr = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
-	ipv6SubnetAddr = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
-	ipv6SubnetMask = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00"
-	ipv6Gateway    = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03"
+	localIPv4Addr  = tcpip.Address("\x0a\x00\x00\x01")
+	remoteIPv4Addr = tcpip.Address("\x0a\x00\x00\x02")
+	ipv4SubnetAddr = tcpip.Address("\x0a\x00\x00\x00")
+	ipv4SubnetMask = tcpip.Address("\xff\xff\xff\x00")
+	ipv4Gateway    = tcpip.Address("\x0a\x00\x00\x03")
+	localIPv6Addr  = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01")
+	remoteIPv6Addr = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02")
+	ipv6SubnetAddr = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
+	ipv6SubnetMask = tcpip.Address("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00")
+	ipv6Gateway    = tcpip.Address("\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03")
 	nicID          = 1
 )
 
@@ -110,8 +111,9 @@ func (t *testObject) checkValues(protocol tcpip.TransportProtocolNumber, vv buff
 // DeliverTransportPacket is called by network endpoints after parsing incoming
 // packets. This is used by the test object to verify that the results of the
 // parsing are expected.
-func (t *testObject) DeliverTransportPacket(r *stack.Route, protocol tcpip.TransportProtocolNumber, pkt *stack.PacketBuffer) stack.TransportPacketDisposition {
-	t.checkValues(protocol, pkt.Data, r.RemoteAddress, r.LocalAddress)
+func (t *testObject) DeliverTransportPacket(protocol tcpip.TransportProtocolNumber, pkt *stack.PacketBuffer) stack.TransportPacketDisposition {
+	netHdr := pkt.Network()
+	t.checkValues(protocol, pkt.Data, netHdr.SourceAddress(), netHdr.DestinationAddress())
 	t.dataCalls++
 	return stack.TransportPacketHandled
 }
@@ -191,10 +193,6 @@ func (*testObject) WritePackets(_ *stack.Route, _ *stack.GSO, pkt stack.PacketBu
 	panic("not implemented")
 }
 
-func (*testObject) WriteRawPacket(_ buffer.VectorisedView) *tcpip.Error {
-	return tcpip.ErrNotSupported
-}
-
 // ARPHardwareType implements stack.LinkEndpoint.ARPHardwareType.
 func (*testObject) ARPHardwareType() header.ARPHardwareType {
 	panic("not implemented")
@@ -205,7 +203,7 @@ func (*testObject) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net
 	panic("not implemented")
 }
 
-func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
+func buildIPv4Route(local, remote tcpip.Address) (*stack.Route, *tcpip.Error) {
 	s := stack.New(stack.Options{
 		NetworkProtocols:   []stack.NetworkProtocolFactory{ipv4.NewProtocol},
 		TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol},
@@ -221,7 +219,7 @@ func buildIPv4Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
 	return s.FindRoute(nicID, local, remote, ipv4.ProtocolNumber, false /* multicastLoop */)
 }
 
-func buildIPv6Route(local, remote tcpip.Address) (stack.Route, *tcpip.Error) {
+func buildIPv6Route(local, remote tcpip.Address) (*stack.Route, *tcpip.Error) {
 	s := stack.New(stack.Options{
 		NetworkProtocols:   []stack.NetworkProtocolFactory{ipv6.NewProtocol},
 		TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, tcp.NewProtocol},
@@ -298,12 +296,20 @@ func (t *testInterface) Enabled() bool {
 	return !t.mu.disabled
 }
 
+func (*testInterface) Promiscuous() bool {
+	return false
+}
+
 func (t *testInterface) setEnabled(v bool) {
 	t.mu.Lock()
 	defer t.mu.Unlock()
 	t.mu.disabled = !v
 }
 
+func (*testInterface) WritePacketToRemote(tcpip.LinkAddress, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) *tcpip.Error {
+	return tcpip.ErrNotSupported
+}
+
 func TestSourceAddressValidation(t *testing.T) {
 	rxIPv4ICMP := func(e *channel.Endpoint, src tcpip.Address) {
 		totalLen := header.IPv4MinimumSize + header.ICMPv4MinimumSize
@@ -315,7 +321,6 @@ func TestSourceAddressValidation(t *testing.T) {
 		pkt.SetChecksum(^header.Checksum(pkt, 0))
 		ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
 		ip.Encode(&header.IPv4Fields{
-			IHL:         header.IPv4MinimumSize,
 			TotalLength: uint16(totalLen),
 			Protocol:    uint8(icmp.ProtocolNumber4),
 			TTL:         ipv4.DefaultTTL,
@@ -339,11 +344,11 @@ func TestSourceAddressValidation(t *testing.T) {
 		pkt.SetChecksum(header.ICMPv6Checksum(pkt, src, localIPv6Addr, buffer.VectorisedView{}))
 		ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 		ip.Encode(&header.IPv6Fields{
-			PayloadLength: header.ICMPv6MinimumSize,
-			NextHeader:    uint8(icmp.ProtocolNumber6),
-			HopLimit:      ipv6.DefaultTTL,
-			SrcAddr:       src,
-			DstAddr:       localIPv6Addr,
+			PayloadLength:     header.ICMPv6MinimumSize,
+			TransportProtocol: icmp.ProtocolNumber6,
+			HopLimit:          ipv6.DefaultTTL,
+			SrcAddr:           src,
+			DstAddr:           localIPv6Addr,
 		})
 		e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 			Data: hdr.View().ToVectorisedView(),
@@ -545,7 +550,7 @@ func TestIPv4Send(t *testing.T) {
 	if err != nil {
 		t.Fatalf("could not find route: %v", err)
 	}
-	if err := ep.WritePacket(&r, nil /* gso */, stack.NetworkHeaderParams{
+	if err := ep.WritePacket(r, nil /* gso */, stack.NetworkHeaderParams{
 		Protocol: 123,
 		TTL:      123,
 		TOS:      stack.DefaultTOS,
@@ -554,59 +559,135 @@ func TestIPv4Send(t *testing.T) {
 	}
 }
 
-func TestIPv4Receive(t *testing.T) {
-	s := buildDummyStack(t)
-	proto := s.NetworkProtocolInstance(ipv4.ProtocolNumber)
-	nic := testInterface{
-		testObject: testObject{
-			t:  t,
-			v4: true,
+func TestReceive(t *testing.T) {
+	tests := []struct {
+		name         string
+		protoFactory stack.NetworkProtocolFactory
+		protoNum     tcpip.NetworkProtocolNumber
+		v4           bool
+		epAddr       tcpip.AddressWithPrefix
+		handlePacket func(*testing.T, stack.NetworkEndpoint, *testInterface)
+	}{
+		{
+			name:         "IPv4",
+			protoFactory: ipv4.NewProtocol,
+			protoNum:     ipv4.ProtocolNumber,
+			v4:           true,
+			epAddr:       localIPv4Addr.WithPrefix(),
+			handlePacket: func(t *testing.T, ep stack.NetworkEndpoint, nic *testInterface) {
+				const totalLen = header.IPv4MinimumSize + 30 /* payload length */
+
+				view := buffer.NewView(totalLen)
+				ip := header.IPv4(view)
+				ip.Encode(&header.IPv4Fields{
+					TotalLength: totalLen,
+					TTL:         ipv4.DefaultTTL,
+					Protocol:    10,
+					SrcAddr:     remoteIPv4Addr,
+					DstAddr:     localIPv4Addr,
+				})
+				ip.SetChecksum(^ip.CalculateChecksum())
+
+				// Make payload be non-zero.
+				for i := header.IPv4MinimumSize; i < len(view); i++ {
+					view[i] = uint8(i)
+				}
+
+				// Give packet to ipv4 endpoint, dispatcher will validate that it's ok.
+				nic.testObject.protocol = 10
+				nic.testObject.srcAddr = remoteIPv4Addr
+				nic.testObject.dstAddr = localIPv4Addr
+				nic.testObject.contents = view[header.IPv4MinimumSize:totalLen]
+
+				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+					Data: view.ToVectorisedView(),
+				})
+				if ok := parse.IPv4(pkt); !ok {
+					t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
+				}
+				ep.HandlePacket(pkt)
+			},
 		},
-	}
-	ep := proto.NewEndpoint(&nic, nil, nil, &nic.testObject)
-	defer ep.Close()
+		{
+			name:         "IPv6",
+			protoFactory: ipv6.NewProtocol,
+			protoNum:     ipv6.ProtocolNumber,
+			v4:           false,
+			epAddr:       localIPv6Addr.WithPrefix(),
+			handlePacket: func(t *testing.T, ep stack.NetworkEndpoint, nic *testInterface) {
+				const payloadLen = 30
+				view := buffer.NewView(header.IPv6MinimumSize + payloadLen)
+				ip := header.IPv6(view)
+				ip.Encode(&header.IPv6Fields{
+					PayloadLength:     payloadLen,
+					TransportProtocol: 10,
+					HopLimit:          ipv6.DefaultTTL,
+					SrcAddr:           remoteIPv6Addr,
+					DstAddr:           localIPv6Addr,
+				})
 
-	if err := ep.Enable(); err != nil {
-		t.Fatalf("ep.Enable(): %s", err)
-	}
+				// Make payload be non-zero.
+				for i := header.IPv6MinimumSize; i < len(view); i++ {
+					view[i] = uint8(i)
+				}
 
-	totalLen := header.IPv4MinimumSize + 30
-	view := buffer.NewView(totalLen)
-	ip := header.IPv4(view)
-	ip.Encode(&header.IPv4Fields{
-		IHL:         header.IPv4MinimumSize,
-		TotalLength: uint16(totalLen),
-		TTL:         20,
-		Protocol:    10,
-		SrcAddr:     remoteIPv4Addr,
-		DstAddr:     localIPv4Addr,
-	})
-	ip.SetChecksum(^ip.CalculateChecksum())
+				// Give packet to ipv6 endpoint, dispatcher will validate that it's ok.
+				nic.testObject.protocol = 10
+				nic.testObject.srcAddr = remoteIPv6Addr
+				nic.testObject.dstAddr = localIPv6Addr
+				nic.testObject.contents = view[header.IPv6MinimumSize:][:payloadLen]
 
-	// Make payload be non-zero.
-	for i := header.IPv4MinimumSize; i < totalLen; i++ {
-		view[i] = uint8(i)
+				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+					Data: view.ToVectorisedView(),
+				})
+				if _, _, _, _, ok := parse.IPv6(pkt); !ok {
+					t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
+				}
+				ep.HandlePacket(pkt)
+			},
+		},
 	}
 
-	// Give packet to ipv4 endpoint, dispatcher will validate that it's ok.
-	nic.testObject.protocol = 10
-	nic.testObject.srcAddr = remoteIPv4Addr
-	nic.testObject.dstAddr = localIPv4Addr
-	nic.testObject.contents = view[header.IPv4MinimumSize:totalLen]
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocolFactory{test.protoFactory},
+			})
+			nic := testInterface{
+				testObject: testObject{
+					t:  t,
+					v4: test.v4,
+				},
+			}
+			ep := s.NetworkProtocolInstance(test.protoNum).NewEndpoint(&nic, nil, nil, &nic.testObject)
+			defer ep.Close()
 
-	r, err := buildIPv4Route(localIPv4Addr, remoteIPv4Addr)
-	if err != nil {
-		t.Fatalf("could not find route: %v", err)
-	}
-	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-		Data: view.ToVectorisedView(),
-	})
-	if _, _, ok := proto.Parse(pkt); !ok {
-		t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
-	}
-	ep.HandlePacket(&r, pkt)
-	if nic.testObject.dataCalls != 1 {
-		t.Fatalf("Bad number of data calls: got %x, want 1", nic.testObject.dataCalls)
+			if err := ep.Enable(); err != nil {
+				t.Fatalf("ep.Enable(): %s", err)
+			}
+
+			addressableEndpoint, ok := ep.(stack.AddressableEndpoint)
+			if !ok {
+				t.Fatalf("expected network endpoint with number = %d to implement stack.AddressableEndpoint", test.protoNum)
+			}
+			if ep, err := addressableEndpoint.AddAndAcquirePermanentAddress(test.epAddr, stack.CanBePrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */); err != nil {
+				t.Fatalf("addressableEndpoint.AddAndAcquirePermanentAddress(%s, CanBePrimaryEndpoint, AddressConfigStatic, false): %s", test.epAddr, err)
+			} else {
+				ep.DecRef()
+			}
+
+			stat := s.Stats().IP.PacketsReceived
+			if got := stat.Value(); got != 0 {
+				t.Fatalf("got s.Stats().IP.PacketsReceived.Value() = %d, want = 0", got)
+			}
+			test.handlePacket(t, ep, &nic)
+			if nic.testObject.dataCalls != 1 {
+				t.Errorf("Bad number of data calls: got %x, want 1", nic.testObject.dataCalls)
+			}
+			if got := stat.Value(); got != 1 {
+				t.Errorf("got s.Stats().IP.PacketsReceived.Value() = %d, want = 1", got)
+			}
+		})
 	}
 }
 
@@ -630,10 +711,6 @@ func TestIPv4ReceiveControl(t *testing.T) {
 		{"Non-zero fragment offset", 0, 100, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 0},
 		{"Zero-length packet", 0, 0, header.ICMPv4PortUnreachable, stack.ControlPortUnreachable, 0, 2*header.IPv4MinimumSize + header.ICMPv4MinimumSize + 8},
 	}
-	r, err := buildIPv4Route(localIPv4Addr, "\x0a\x00\x00\xbb")
-	if err != nil {
-		t.Fatal(err)
-	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			s := buildDummyStack(t)
@@ -656,7 +733,6 @@ func TestIPv4ReceiveControl(t *testing.T) {
 			// Create the outer IPv4 header.
 			ip := header.IPv4(view)
 			ip.Encode(&header.IPv4Fields{
-				IHL:         header.IPv4MinimumSize,
 				TotalLength: uint16(len(view) - c.trunc),
 				TTL:         20,
 				Protocol:    uint8(header.ICMPv4ProtocolNumber),
@@ -675,7 +751,6 @@ func TestIPv4ReceiveControl(t *testing.T) {
 			// Create the inner IPv4 header.
 			ip = header.IPv4(view[header.IPv4MinimumSize+header.ICMPv4MinimumSize:])
 			ip.Encode(&header.IPv4Fields{
-				IHL:            header.IPv4MinimumSize,
 				TotalLength:    100,
 				TTL:            20,
 				Protocol:       10,
@@ -690,6 +765,10 @@ func TestIPv4ReceiveControl(t *testing.T) {
 				view[i] = uint8(i)
 			}
 
+			icmp.SetChecksum(0)
+			checksum := ^header.Checksum(icmp, 0 /* initial */)
+			icmp.SetChecksum(checksum)
+
 			// Give packet to IPv4 endpoint, dispatcher will validate that
 			// it's ok.
 			nic.testObject.protocol = 10
@@ -699,7 +778,19 @@ func TestIPv4ReceiveControl(t *testing.T) {
 			nic.testObject.typ = c.expectedTyp
 			nic.testObject.extra = c.expectedExtra
 
-			ep.HandlePacket(&r, truncatedPacket(view, c.trunc, header.IPv4MinimumSize))
+			addressableEndpoint, ok := ep.(stack.AddressableEndpoint)
+			if !ok {
+				t.Fatal("expected IPv4 network endpoint to implement stack.AddressableEndpoint")
+			}
+			addr := localIPv4Addr.WithPrefix()
+			if ep, err := addressableEndpoint.AddAndAcquirePermanentAddress(addr, stack.CanBePrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */); err != nil {
+				t.Fatalf("addressableEndpoint.AddAndAcquirePermanentAddress(%s, CanBePrimaryEndpoint, AddressConfigStatic, false): %s", addr, err)
+			} else {
+				ep.DecRef()
+			}
+
+			pkt := truncatedPacket(view, c.trunc, header.IPv4MinimumSize)
+			ep.HandlePacket(pkt)
 			if want := c.expectedCount; nic.testObject.controlCalls != want {
 				t.Fatalf("Bad number of control calls for %q case: got %v, want %v", c.name, nic.testObject.controlCalls, want)
 			}
@@ -708,7 +799,9 @@ func TestIPv4ReceiveControl(t *testing.T) {
 }
 
 func TestIPv4FragmentationReceive(t *testing.T) {
-	s := buildDummyStack(t)
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+	})
 	proto := s.NetworkProtocolInstance(ipv4.ProtocolNumber)
 	nic := testInterface{
 		testObject: testObject{
@@ -728,7 +821,6 @@ func TestIPv4FragmentationReceive(t *testing.T) {
 	frag1 := buffer.NewView(totalLen)
 	ip1 := header.IPv4(frag1)
 	ip1.Encode(&header.IPv4Fields{
-		IHL:            header.IPv4MinimumSize,
 		TotalLength:    uint16(totalLen),
 		TTL:            20,
 		Protocol:       10,
@@ -747,7 +839,6 @@ func TestIPv4FragmentationReceive(t *testing.T) {
 	frag2 := buffer.NewView(totalLen)
 	ip2 := header.IPv4(frag2)
 	ip2.Encode(&header.IPv4Fields{
-		IHL:            header.IPv4MinimumSize,
 		TotalLength:    uint16(totalLen),
 		TTL:            20,
 		Protocol:       10,
@@ -768,11 +859,6 @@ func TestIPv4FragmentationReceive(t *testing.T) {
 	nic.testObject.dstAddr = localIPv4Addr
 	nic.testObject.contents = append(frag1[header.IPv4MinimumSize:totalLen], frag2[header.IPv4MinimumSize:totalLen]...)
 
-	r, err := buildIPv4Route(localIPv4Addr, remoteIPv4Addr)
-	if err != nil {
-		t.Fatalf("could not find route: %v", err)
-	}
-
 	// Send first segment.
 	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 		Data: frag1.ToVectorisedView(),
@@ -780,7 +866,19 @@ func TestIPv4FragmentationReceive(t *testing.T) {
 	if _, _, ok := proto.Parse(pkt); !ok {
 		t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
 	}
-	ep.HandlePacket(&r, pkt)
+
+	addressableEndpoint, ok := ep.(stack.AddressableEndpoint)
+	if !ok {
+		t.Fatal("expected IPv4 network endpoint to implement stack.AddressableEndpoint")
+	}
+	addr := localIPv4Addr.WithPrefix()
+	if ep, err := addressableEndpoint.AddAndAcquirePermanentAddress(addr, stack.CanBePrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */); err != nil {
+		t.Fatalf("addressableEndpoint.AddAndAcquirePermanentAddress(%s, CanBePrimaryEndpoint, AddressConfigStatic, false): %s", addr, err)
+	} else {
+		ep.DecRef()
+	}
+
+	ep.HandlePacket(pkt)
 	if nic.testObject.dataCalls != 0 {
 		t.Fatalf("Bad number of data calls: got %x, want 0", nic.testObject.dataCalls)
 	}
@@ -792,7 +890,7 @@ func TestIPv4FragmentationReceive(t *testing.T) {
 	if _, _, ok := proto.Parse(pkt); !ok {
 		t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
 	}
-	ep.HandlePacket(&r, pkt)
+	ep.HandlePacket(pkt)
 	if nic.testObject.dataCalls != 1 {
 		t.Fatalf("Bad number of data calls: got %x, want 1", nic.testObject.dataCalls)
 	}
@@ -835,7 +933,7 @@ func TestIPv6Send(t *testing.T) {
 	if err != nil {
 		t.Fatalf("could not find route: %v", err)
 	}
-	if err := ep.WritePacket(&r, nil /* gso */, stack.NetworkHeaderParams{
+	if err := ep.WritePacket(r, nil /* gso */, stack.NetworkHeaderParams{
 		Protocol: 123,
 		TTL:      123,
 		TOS:      stack.DefaultTOS,
@@ -844,60 +942,6 @@ func TestIPv6Send(t *testing.T) {
 	}
 }
 
-func TestIPv6Receive(t *testing.T) {
-	s := buildDummyStack(t)
-	proto := s.NetworkProtocolInstance(ipv6.ProtocolNumber)
-	nic := testInterface{
-		testObject: testObject{
-			t: t,
-		},
-	}
-	ep := proto.NewEndpoint(&nic, nil, nil, &nic.testObject)
-	defer ep.Close()
-
-	if err := ep.Enable(); err != nil {
-		t.Fatalf("ep.Enable(): %s", err)
-	}
-
-	totalLen := header.IPv6MinimumSize + 30
-	view := buffer.NewView(totalLen)
-	ip := header.IPv6(view)
-	ip.Encode(&header.IPv6Fields{
-		PayloadLength: uint16(totalLen - header.IPv6MinimumSize),
-		NextHeader:    10,
-		HopLimit:      20,
-		SrcAddr:       remoteIPv6Addr,
-		DstAddr:       localIPv6Addr,
-	})
-
-	// Make payload be non-zero.
-	for i := header.IPv6MinimumSize; i < totalLen; i++ {
-		view[i] = uint8(i)
-	}
-
-	// Give packet to ipv6 endpoint, dispatcher will validate that it's ok.
-	nic.testObject.protocol = 10
-	nic.testObject.srcAddr = remoteIPv6Addr
-	nic.testObject.dstAddr = localIPv6Addr
-	nic.testObject.contents = view[header.IPv6MinimumSize:totalLen]
-
-	r, err := buildIPv6Route(localIPv6Addr, remoteIPv6Addr)
-	if err != nil {
-		t.Fatalf("could not find route: %v", err)
-	}
-
-	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-		Data: view.ToVectorisedView(),
-	})
-	if _, _, ok := proto.Parse(pkt); !ok {
-		t.Fatalf("failed to parse packet: %x", pkt.Data.ToView())
-	}
-	ep.HandlePacket(&r, pkt)
-	if nic.testObject.dataCalls != 1 {
-		t.Fatalf("Bad number of data calls: got %x, want 1", nic.testObject.dataCalls)
-	}
-}
-
 func TestIPv6ReceiveControl(t *testing.T) {
 	newUint16 := func(v uint16) *uint16 { return &v }
 
@@ -924,13 +968,6 @@ func TestIPv6ReceiveControl(t *testing.T) {
 		{"Non-zero fragment offset", 0, newUint16(100), header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 0},
 		{"Zero-length packet", 0, nil, header.ICMPv6DstUnreachable, header.ICMPv6PortUnreachable, stack.ControlPortUnreachable, 0, 2*header.IPv6MinimumSize + header.ICMPv6DstUnreachableMinimumSize + 8},
 	}
-	r, err := buildIPv6Route(
-		localIPv6Addr,
-		"\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaa",
-	)
-	if err != nil {
-		t.Fatal(err)
-	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			s := buildDummyStack(t)
@@ -956,11 +993,11 @@ func TestIPv6ReceiveControl(t *testing.T) {
 			// Create the outer IPv6 header.
 			ip := header.IPv6(view)
 			ip.Encode(&header.IPv6Fields{
-				PayloadLength: uint16(len(view) - header.IPv6MinimumSize - c.trunc),
-				NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-				HopLimit:      20,
-				SrcAddr:       outerSrcAddr,
-				DstAddr:       localIPv6Addr,
+				PayloadLength:     uint16(len(view) - header.IPv6MinimumSize - c.trunc),
+				TransportProtocol: header.ICMPv6ProtocolNumber,
+				HopLimit:          20,
+				SrcAddr:           outerSrcAddr,
+				DstAddr:           localIPv6Addr,
 			})
 
 			// Create the ICMP header.
@@ -970,28 +1007,27 @@ func TestIPv6ReceiveControl(t *testing.T) {
 			icmp.SetIdent(0xdead)
 			icmp.SetSequence(0xbeef)
 
-			// Create the inner IPv6 header.
-			ip = header.IPv6(view[header.IPv6MinimumSize+header.ICMPv6PayloadOffset:])
-			ip.Encode(&header.IPv6Fields{
-				PayloadLength: 100,
-				NextHeader:    10,
-				HopLimit:      20,
-				SrcAddr:       localIPv6Addr,
-				DstAddr:       remoteIPv6Addr,
-			})
-
+			var extHdrs header.IPv6ExtHdrSerializer
 			// Build the fragmentation header if needed.
 			if c.fragmentOffset != nil {
-				ip.SetNextHeader(header.IPv6FragmentHeader)
-				frag := header.IPv6Fragment(view[2*header.IPv6MinimumSize+header.ICMPv6MinimumSize:])
-				frag.Encode(&header.IPv6FragmentFields{
-					NextHeader:     10,
+				extHdrs = append(extHdrs, &header.IPv6SerializableFragmentExtHdr{
 					FragmentOffset: *c.fragmentOffset,
 					M:              true,
 					Identification: 0x12345678,
 				})
 			}
 
+			// Create the inner IPv6 header.
+			ip = header.IPv6(view[header.IPv6MinimumSize+header.ICMPv6PayloadOffset:])
+			ip.Encode(&header.IPv6Fields{
+				PayloadLength:     100,
+				TransportProtocol: 10,
+				HopLimit:          20,
+				SrcAddr:           localIPv6Addr,
+				DstAddr:           remoteIPv6Addr,
+				ExtensionHeaders:  extHdrs,
+			})
+
 			// Make payload be non-zero.
 			for i := dataOffset; i < len(view); i++ {
 				view[i] = uint8(i)
@@ -1009,7 +1045,18 @@ func TestIPv6ReceiveControl(t *testing.T) {
 			// Set ICMPv6 checksum.
 			icmp.SetChecksum(header.ICMPv6Checksum(icmp, outerSrcAddr, localIPv6Addr, buffer.VectorisedView{}))
 
-			ep.HandlePacket(&r, truncatedPacket(view, c.trunc, header.IPv6MinimumSize))
+			addressableEndpoint, ok := ep.(stack.AddressableEndpoint)
+			if !ok {
+				t.Fatal("expected IPv6 network endpoint to implement stack.AddressableEndpoint")
+			}
+			addr := localIPv6Addr.WithPrefix()
+			if ep, err := addressableEndpoint.AddAndAcquirePermanentAddress(addr, stack.CanBePrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */); err != nil {
+				t.Fatalf("addressableEndpoint.AddAndAcquirePermanentAddress(%s, CanBePrimaryEndpoint, AddressConfigStatic, false): %s", addr, err)
+			} else {
+				ep.DecRef()
+			}
+			pkt := truncatedPacket(view, c.trunc, header.IPv6MinimumSize)
+			ep.HandlePacket(pkt)
 			if want := c.expectedCount; nic.testObject.controlCalls != want {
 				t.Fatalf("Bad number of control calls for %q case: got %v, want %v", c.name, nic.testObject.controlCalls, want)
 			}
@@ -1035,15 +1082,25 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 		nicID          = 1
 		transportProto = 5
 
-		dataLen    = 4
-		optionsLen = 4
+		dataLen = 4
 	)
 
 	dataBuf := [dataLen]byte{1, 2, 3, 4}
 	data := dataBuf[:]
 
-	ipv4OptionsBuf := [optionsLen]byte{0, 1, 0, 1}
-	ipv4Options := ipv4OptionsBuf[:]
+	ipv4Options := header.IPv4OptionsSerializer{
+		&header.IPv4SerializableListEndOption{},
+		&header.IPv4SerializableNOPOption{},
+		&header.IPv4SerializableListEndOption{},
+		&header.IPv4SerializableNOPOption{},
+	}
+
+	expectOptions := header.IPv4Options{
+		byte(header.IPv4OptionListEndType),
+		byte(header.IPv4OptionNOPType),
+		byte(header.IPv4OptionListEndType),
+		byte(header.IPv4OptionNOPType),
+	}
 
 	ipv6FragmentExtHdrBuf := [header.IPv6FragmentExtHdrLength]byte{transportProto, 0, 62, 4, 1, 2, 3, 4}
 	ipv6FragmentExtHdr := ipv6FragmentExtHdrBuf[:]
@@ -1063,7 +1120,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 		protoNum     tcpip.NetworkProtocolNumber
 		nicAddr      tcpip.Address
 		remoteAddr   tcpip.Address
-		pktGen       func(*testing.T, tcpip.Address) buffer.View
+		pktGen       func(*testing.T, tcpip.Address) buffer.VectorisedView
 		checker      func(*testing.T, *stack.PacketBuffer, tcpip.Address)
 		expectedErr  *tcpip.Error
 	}{
@@ -1073,7 +1130,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 			protoNum:     ipv4.ProtocolNumber,
 			nicAddr:      localIPv4Addr,
 			remoteAddr:   remoteIPv4Addr,
-			pktGen: func(t *testing.T, src tcpip.Address) buffer.View {
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
 				totalLen := header.IPv4MinimumSize + len(data)
 				hdr := buffer.NewPrependable(totalLen)
 				if n := copy(hdr.Prepend(len(data)), data); n != len(data) {
@@ -1081,13 +1138,12 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 				}
 				ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
 				ip.Encode(&header.IPv4Fields{
-					IHL:      header.IPv4MinimumSize,
 					Protocol: transportProto,
 					TTL:      ipv4.DefaultTTL,
 					SrcAddr:  src,
 					DstAddr:  header.IPv4Any,
 				})
-				return hdr.View()
+				return hdr.View().ToVectorisedView()
 			},
 			checker: func(t *testing.T, pkt *stack.PacketBuffer, src tcpip.Address) {
 				if src == header.IPv4Any {
@@ -1115,7 +1171,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 			protoNum:     ipv4.ProtocolNumber,
 			nicAddr:      localIPv4Addr,
 			remoteAddr:   remoteIPv4Addr,
-			pktGen: func(t *testing.T, src tcpip.Address) buffer.View {
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
 				totalLen := header.IPv4MinimumSize + len(data)
 				hdr := buffer.NewPrependable(totalLen)
 				if n := copy(hdr.Prepend(len(data)), data); n != len(data) {
@@ -1123,13 +1179,13 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 				}
 				ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
 				ip.Encode(&header.IPv4Fields{
-					IHL:      header.IPv4MinimumSize - 1,
 					Protocol: transportProto,
 					TTL:      ipv4.DefaultTTL,
 					SrcAddr:  src,
 					DstAddr:  header.IPv4Any,
 				})
-				return hdr.View()
+				ip.SetHeaderLength(header.IPv4MinimumSize - 1)
+				return hdr.View().ToVectorisedView()
 			},
 			expectedErr: tcpip.ErrMalformedHeader,
 		},
@@ -1139,16 +1195,15 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 			protoNum:     ipv4.ProtocolNumber,
 			nicAddr:      localIPv4Addr,
 			remoteAddr:   remoteIPv4Addr,
-			pktGen: func(t *testing.T, src tcpip.Address) buffer.View {
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
 				ip := header.IPv4(make([]byte, header.IPv4MinimumSize))
 				ip.Encode(&header.IPv4Fields{
-					IHL:      header.IPv4MinimumSize,
 					Protocol: transportProto,
 					TTL:      ipv4.DefaultTTL,
 					SrcAddr:  src,
 					DstAddr:  header.IPv4Any,
 				})
-				return buffer.View(ip[:len(ip)-1])
+				return buffer.View(ip[:len(ip)-1]).ToVectorisedView()
 			},
 			expectedErr: tcpip.ErrMalformedHeader,
 		},
@@ -1158,16 +1213,15 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 			protoNum:     ipv4.ProtocolNumber,
 			nicAddr:      localIPv4Addr,
 			remoteAddr:   remoteIPv4Addr,
-			pktGen: func(t *testing.T, src tcpip.Address) buffer.View {
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
 				ip := header.IPv4(make([]byte, header.IPv4MinimumSize))
 				ip.Encode(&header.IPv4Fields{
-					IHL:      header.IPv4MinimumSize,
 					Protocol: transportProto,
 					TTL:      ipv4.DefaultTTL,
 					SrcAddr:  src,
 					DstAddr:  header.IPv4Any,
 				})
-				return buffer.View(ip)
+				return buffer.View(ip).ToVectorisedView()
 			},
 			checker: func(t *testing.T, pkt *stack.PacketBuffer, src tcpip.Address) {
 				if src == header.IPv4Any {
@@ -1195,8 +1249,8 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 			protoNum:     ipv4.ProtocolNumber,
 			nicAddr:      localIPv4Addr,
 			remoteAddr:   remoteIPv4Addr,
-			pktGen: func(t *testing.T, src tcpip.Address) buffer.View {
-				ipHdrLen := header.IPv4MinimumSize + len(ipv4Options)
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
+				ipHdrLen := int(header.IPv4MinimumSize + ipv4Options.Length())
 				totalLen := ipHdrLen + len(data)
 				hdr := buffer.NewPrependable(totalLen)
 				if n := copy(hdr.Prepend(len(data)), data); n != len(data) {
@@ -1204,16 +1258,54 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 				}
 				ip := header.IPv4(hdr.Prepend(ipHdrLen))
 				ip.Encode(&header.IPv4Fields{
-					IHL:      uint8(ipHdrLen),
 					Protocol: transportProto,
 					TTL:      ipv4.DefaultTTL,
 					SrcAddr:  src,
 					DstAddr:  header.IPv4Any,
+					Options:  ipv4Options,
 				})
-				if n := copy(ip.Options(), ipv4Options); n != len(ipv4Options) {
-					t.Fatalf("copied %d bytes, expected %d bytes", n, len(ipv4Options))
+				return hdr.View().ToVectorisedView()
+			},
+			checker: func(t *testing.T, pkt *stack.PacketBuffer, src tcpip.Address) {
+				if src == header.IPv4Any {
+					src = localIPv4Addr
+				}
+
+				netHdr := pkt.NetworkHeader()
+
+				hdrLen := int(header.IPv4MinimumSize + ipv4Options.Length())
+				if len(netHdr.View()) != hdrLen {
+					t.Errorf("got len(netHdr.View()) = %d, want = %d", len(netHdr.View()), hdrLen)
 				}
-				return hdr.View()
+
+				checker.IPv4(t, stack.PayloadSince(netHdr),
+					checker.SrcAddr(src),
+					checker.DstAddr(remoteIPv4Addr),
+					checker.IPv4HeaderLength(hdrLen),
+					checker.IPFullLength(uint16(hdrLen+len(data))),
+					checker.IPv4Options(expectOptions),
+					checker.IPPayload(data),
+				)
+			},
+		},
+		{
+			name:         "IPv4 with options and data across views",
+			protoFactory: ipv4.NewProtocol,
+			protoNum:     ipv4.ProtocolNumber,
+			nicAddr:      localIPv4Addr,
+			remoteAddr:   remoteIPv4Addr,
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
+				ip := header.IPv4(make([]byte, header.IPv4MinimumSize+ipv4Options.Length()))
+				ip.Encode(&header.IPv4Fields{
+					Protocol: transportProto,
+					TTL:      ipv4.DefaultTTL,
+					SrcAddr:  src,
+					DstAddr:  header.IPv4Any,
+					Options:  ipv4Options,
+				})
+				vv := buffer.View(ip).ToVectorisedView()
+				vv.AppendView(data)
+				return vv
 			},
 			checker: func(t *testing.T, pkt *stack.PacketBuffer, src tcpip.Address) {
 				if src == header.IPv4Any {
@@ -1222,7 +1314,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 
 				netHdr := pkt.NetworkHeader()
 
-				hdrLen := header.IPv4MinimumSize + len(ipv4Options)
+				hdrLen := int(header.IPv4MinimumSize + ipv4Options.Length())
 				if len(netHdr.View()) != hdrLen {
 					t.Errorf("got len(netHdr.View()) = %d, want = %d", len(netHdr.View()), hdrLen)
 				}
@@ -1232,7 +1324,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 					checker.DstAddr(remoteIPv4Addr),
 					checker.IPv4HeaderLength(hdrLen),
 					checker.IPFullLength(uint16(hdrLen+len(data))),
-					checker.IPv4Options(ipv4Options),
+					checker.IPv4Options(expectOptions),
 					checker.IPPayload(data),
 				)
 			},
@@ -1243,7 +1335,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 			protoNum:     ipv6.ProtocolNumber,
 			nicAddr:      localIPv6Addr,
 			remoteAddr:   remoteIPv6Addr,
-			pktGen: func(t *testing.T, src tcpip.Address) buffer.View {
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
 				totalLen := header.IPv6MinimumSize + len(data)
 				hdr := buffer.NewPrependable(totalLen)
 				if n := copy(hdr.Prepend(len(data)), data); n != len(data) {
@@ -1251,12 +1343,12 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 				}
 				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					NextHeader: transportProto,
-					HopLimit:   ipv6.DefaultTTL,
-					SrcAddr:    src,
-					DstAddr:    header.IPv4Any,
+					TransportProtocol: transportProto,
+					HopLimit:          ipv6.DefaultTTL,
+					SrcAddr:           src,
+					DstAddr:           header.IPv4Any,
 				})
-				return hdr.View()
+				return hdr.View().ToVectorisedView()
 			},
 			checker: func(t *testing.T, pkt *stack.PacketBuffer, src tcpip.Address) {
 				if src == header.IPv6Any {
@@ -1283,7 +1375,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 			protoNum:     ipv6.ProtocolNumber,
 			nicAddr:      localIPv6Addr,
 			remoteAddr:   remoteIPv6Addr,
-			pktGen: func(t *testing.T, src tcpip.Address) buffer.View {
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
 				totalLen := header.IPv6MinimumSize + len(ipv6FragmentExtHdr) + len(data)
 				hdr := buffer.NewPrependable(totalLen)
 				if n := copy(hdr.Prepend(len(data)), data); n != len(data) {
@@ -1294,12 +1386,14 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 				}
 				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					NextHeader: uint8(header.IPv6FragmentExtHdrIdentifier),
-					HopLimit:   ipv6.DefaultTTL,
-					SrcAddr:    src,
-					DstAddr:    header.IPv4Any,
+					// NB: we're lying about transport protocol here to verify the raw
+					// fragment header bytes.
+					TransportProtocol: tcpip.TransportProtocolNumber(header.IPv6FragmentExtHdrIdentifier),
+					HopLimit:          ipv6.DefaultTTL,
+					SrcAddr:           src,
+					DstAddr:           header.IPv4Any,
 				})
-				return hdr.View()
+				return hdr.View().ToVectorisedView()
 			},
 			checker: func(t *testing.T, pkt *stack.PacketBuffer, src tcpip.Address) {
 				if src == header.IPv6Any {
@@ -1326,15 +1420,15 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 			protoNum:     ipv6.ProtocolNumber,
 			nicAddr:      localIPv6Addr,
 			remoteAddr:   remoteIPv6Addr,
-			pktGen: func(t *testing.T, src tcpip.Address) buffer.View {
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
 				ip := header.IPv6(make([]byte, header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					NextHeader: transportProto,
-					HopLimit:   ipv6.DefaultTTL,
-					SrcAddr:    src,
-					DstAddr:    header.IPv4Any,
+					TransportProtocol: transportProto,
+					HopLimit:          ipv6.DefaultTTL,
+					SrcAddr:           src,
+					DstAddr:           header.IPv4Any,
 				})
-				return buffer.View(ip)
+				return buffer.View(ip).ToVectorisedView()
 			},
 			checker: func(t *testing.T, pkt *stack.PacketBuffer, src tcpip.Address) {
 				if src == header.IPv6Any {
@@ -1361,15 +1455,15 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 			protoNum:     ipv6.ProtocolNumber,
 			nicAddr:      localIPv6Addr,
 			remoteAddr:   remoteIPv6Addr,
-			pktGen: func(t *testing.T, src tcpip.Address) buffer.View {
+			pktGen: func(t *testing.T, src tcpip.Address) buffer.VectorisedView {
 				ip := header.IPv6(make([]byte, header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					NextHeader: transportProto,
-					HopLimit:   ipv6.DefaultTTL,
-					SrcAddr:    src,
-					DstAddr:    header.IPv4Any,
+					TransportProtocol: transportProto,
+					HopLimit:          ipv6.DefaultTTL,
+					SrcAddr:           src,
+					DstAddr:           header.IPv4Any,
 				})
-				return buffer.View(ip[:len(ip)-1])
+				return buffer.View(ip[:len(ip)-1]).ToVectorisedView()
 			},
 			expectedErr: tcpip.ErrMalformedHeader,
 		},
@@ -1413,7 +1507,7 @@ func TestWriteHeaderIncludedPacket(t *testing.T) {
 					defer r.Release()
 
 					if err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{
-						Data: test.pktGen(t, subTest.srcAddr).ToVectorisedView(),
+						Data: test.pktGen(t, subTest.srcAddr),
 					})); err != test.expectedErr {
 						t.Fatalf("got r.WriteHeaderIncludedPacket(_) = %s, want = %s", err, test.expectedErr)
 					}
diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD
index 7fc12e229..32f53f217 100644
--- a/pkg/tcpip/network/ipv4/BUILD
+++ b/pkg/tcpip/network/ipv4/BUILD
@@ -6,6 +6,7 @@ go_library(
     name = "ipv4",
     srcs = [
         "icmp.go",
+        "igmp.go",
         "ipv4.go",
     ],
     visibility = ["//visibility:public"],
@@ -17,6 +18,7 @@ go_library(
         "//pkg/tcpip/header/parse",
         "//pkg/tcpip/network/fragmentation",
         "//pkg/tcpip/network/hash",
+        "//pkg/tcpip/network/ip",
         "//pkg/tcpip/stack",
     ],
 )
@@ -24,11 +26,15 @@ go_library(
 go_test(
     name = "ipv4_test",
     size = "small",
-    srcs = ["ipv4_test.go"],
+    srcs = [
+        "igmp_test.go",
+        "ipv4_test.go",
+    ],
     deps = [
         "//pkg/tcpip",
         "//pkg/tcpip/buffer",
         "//pkg/tcpip/checker",
+        "//pkg/tcpip/faketime",
         "//pkg/tcpip/header",
         "//pkg/tcpip/link/channel",
         "//pkg/tcpip/link/sniffer",
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 3407755ed..8e392f86c 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -15,6 +15,7 @@
 package ipv4
 
 import (
+	"errors"
 	"fmt"
 
 	"gvisor.dev/gvisor/pkg/tcpip"
@@ -23,10 +24,10 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 )
 
-// handleControl handles the case when an ICMP packet contains the headers of
-// the original packet that caused the ICMP one to be sent. This information is
-// used to find out which transport endpoint must be notified about the ICMP
-// packet.
+// handleControl handles the case when an ICMP error packet contains the headers
+// of the original packet that caused the ICMP one to be sent. This information
+// is used to find out which transport endpoint must be notified about the ICMP
+// packet. We only expect the payload, not the enclosing ICMP packet.
 func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
 	h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
 	if !ok {
@@ -41,8 +42,8 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
 	//
 	// Drop packet if it doesn't have the basic IPv4 header or if the
 	// original source address doesn't match an address we own.
-	src := hdr.SourceAddress()
-	if e.protocol.stack.CheckLocalAddress(e.nic.ID(), ProtocolNumber, src) == 0 {
+	srcAddr := hdr.SourceAddress()
+	if e.protocol.stack.CheckLocalAddress(e.nic.ID(), ProtocolNumber, srcAddr) == 0 {
 		return
 	}
 
@@ -57,12 +58,12 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt *stack
 	// Skip the ip header, then deliver control message.
 	pkt.Data.TrimFront(hlen)
 	p := hdr.TransportProtocol()
-	e.dispatcher.DeliverTransportControlPacket(src, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
+	e.dispatcher.DeliverTransportControlPacket(srcAddr, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
 }
 
-func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
-	stats := r.Stats()
-	received := stats.ICMP.V4PacketsReceived
+func (e *endpoint) handleICMP(pkt *stack.PacketBuffer) {
+	stats := e.protocol.stack.Stats()
+	received := stats.ICMP.V4.PacketsReceived
 	// TODO(gvisor.dev/issue/170): ICMP packets don't have their
 	// TransportHeader fields set. See icmp/protocol.go:protocol.Parse for a
 	// full explanation.
@@ -73,20 +74,65 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
 	}
 	h := header.ICMPv4(v)
 
+	// Only do in-stack processing if the checksum is correct.
+	if header.ChecksumVV(pkt.Data, 0 /* initial */) != 0xffff {
+		received.Invalid.Increment()
+		// It's possible that a raw socket expects to receive this regardless
+		// of checksum errors. If it's an echo request we know it's safe because
+		// we are the only handler, however other types do not cope well with
+		// packets with checksum errors.
+		switch h.Type() {
+		case header.ICMPv4Echo:
+			e.dispatcher.DeliverTransportPacket(header.ICMPv4ProtocolNumber, pkt)
+		}
+		return
+	}
+
+	iph := header.IPv4(pkt.NetworkHeader().View())
+	var newOptions header.IPv4Options
+	if opts := iph.Options(); len(opts) != 0 {
+		// RFC 1122 section 3.2.2.6 (page 43) (and similar for other round trip
+		// type ICMP packets):
+		//    If a Record Route and/or Time Stamp option is received in an
+		//    ICMP Echo Request, this option (these options) SHOULD be
+		//    updated to include the current host and included in the IP
+		//    header of the Echo Reply message, without "truncation".
+		//    Thus, the recorded route will be for the entire round trip.
+		//
+		// So we need to let the option processor know how it should handle them.
+		var op optionsUsage
+		if h.Type() == header.ICMPv4Echo {
+			op = &optionUsageEcho{}
+		} else {
+			op = &optionUsageReceive{}
+		}
+		aux, tmp, err := e.processIPOptions(pkt, opts, op)
+		if err != nil {
+			switch {
+			case
+				errors.Is(err, header.ErrIPv4OptDuplicate),
+				errors.Is(err, errIPv4RecordRouteOptInvalidLength),
+				errors.Is(err, errIPv4RecordRouteOptInvalidPointer),
+				errors.Is(err, errIPv4TimestampOptInvalidLength),
+				errors.Is(err, errIPv4TimestampOptInvalidPointer),
+				errors.Is(err, errIPv4TimestampOptOverflow):
+				_ = e.protocol.returnError(&icmpReasonParamProblem{pointer: aux}, pkt)
+				stats.MalformedRcvdPackets.Increment()
+				stats.IP.MalformedPacketsReceived.Increment()
+			}
+			return
+		}
+		newOptions = tmp
+	}
+
 	// TODO(b/112892170): Meaningfully handle all ICMP types.
 	switch h.Type() {
 	case header.ICMPv4Echo:
 		received.Echo.Increment()
 
-		// Only send a reply if the checksum is valid.
-		headerChecksum := h.Checksum()
-		h.SetChecksum(0)
-		calculatedChecksum := ^header.ChecksumVV(pkt.Data, 0 /* initial */)
-		h.SetChecksum(headerChecksum)
-		if calculatedChecksum != headerChecksum {
-			// It's possible that a raw socket still expects to receive this.
-			e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, pkt)
-			received.Invalid.Increment()
+		sent := stats.ICMP.V4.PacketsSent
+		if !e.protocol.stack.AllowICMPMessage() {
+			sent.RateLimited.Increment()
 			return
 		}
 
@@ -98,19 +144,27 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
 		// waiting endpoints. Consider moving responsibility for doing the copy to
 		// DeliverTransportPacket so that is is only done when needed.
 		replyData := pkt.Data.ToOwnedView()
-		replyIPHdr := header.IPv4(append(buffer.View(nil), pkt.NetworkHeader().View()...))
+		ipHdr := header.IPv4(pkt.NetworkHeader().View())
+		localAddressBroadcast := pkt.NetworkPacketInfo.LocalAddressBroadcast
+
+		// It's possible that a raw socket expects to receive this.
+		e.dispatcher.DeliverTransportPacket(header.ICMPv4ProtocolNumber, pkt)
+		pkt = nil
 
-		e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, pkt)
+		// Take the base of the incoming request IP header but replace the options.
+		replyHeaderLength := uint8(header.IPv4MinimumSize + len(newOptions))
+		replyIPHdr := header.IPv4(append(iph[:header.IPv4MinimumSize:header.IPv4MinimumSize], newOptions...))
+		replyIPHdr.SetHeaderLength(replyHeaderLength)
 
 		// As per RFC 1122 section 3.2.1.3, when a host sends any datagram, the IP
 		// source address MUST be one of its own IP addresses (but not a broadcast
 		// or multicast address).
-		localAddr := r.LocalAddress
-		if r.IsInboundBroadcast() || header.IsV4MulticastAddress(localAddr) {
+		localAddr := ipHdr.DestinationAddress()
+		if localAddressBroadcast || header.IsV4MulticastAddress(localAddr) {
 			localAddr = ""
 		}
 
-		r, err := r.Stack().FindRoute(e.nic.ID(), localAddr, r.RemoteAddress, ProtocolNumber, false /* multicastLoop */)
+		r, err := e.protocol.stack.FindRoute(e.nic.ID(), localAddr, ipHdr.SourceAddress(), ProtocolNumber, false /* multicastLoop */)
 		if err != nil {
 			// If we cannot find a route to the destination, silently drop the packet.
 			return
@@ -139,7 +193,8 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
 		// The fields we need to alter.
 		//
 		// We need to produce the entire packet in the data segment in order to
-		// use WriteHeaderIncludedPacket().
+		// use WriteHeaderIncludedPacket(). WriteHeaderIncludedPacket sets the
+		// total length and the header checksum so we don't need to set those here.
 		replyIPHdr.SetSourceAddress(r.LocalAddress)
 		replyIPHdr.SetDestinationAddress(r.RemoteAddress)
 		replyIPHdr.SetTTL(r.DefaultTTL())
@@ -157,8 +212,6 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
 		})
 		replyPkt.TransportProtocolNumber = header.ICMPv4ProtocolNumber
 
-		// The checksum will be calculated so we don't need to do it here.
-		sent := stats.ICMP.V4PacketsSent
 		if err := r.WriteHeaderIncludedPacket(replyPkt); err != nil {
 			sent.Dropped.Increment()
 			return
@@ -168,7 +221,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
 	case header.ICMPv4EchoReply:
 		received.EchoReply.Increment()
 
-		e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, pkt)
+		e.dispatcher.DeliverTransportPacket(header.ICMPv4ProtocolNumber, pkt)
 
 	case header.ICMPv4DstUnreachable:
 		received.DstUnreachable.Increment()
@@ -182,8 +235,11 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
 			e.handleControl(stack.ControlPortUnreachable, 0, pkt)
 
 		case header.ICMPv4FragmentationNeeded:
-			mtu := uint32(h.MTU())
-			e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), pkt)
+			networkMTU, err := calculateNetworkMTU(uint32(h.MTU()), header.IPv4MinimumSize)
+			if err != nil {
+				networkMTU = 0
+			}
+			e.handleControl(stack.ControlPacketTooBig, networkMTU, pkt)
 		}
 
 	case header.ICMPv4SrcQuench:
@@ -234,12 +290,38 @@ type icmpReasonProtoUnreachable struct{}
 
 func (*icmpReasonProtoUnreachable) isICMPReason() {}
 
+// icmpReasonTTLExceeded is an error where a packet's time to live exceeded in
+// transit to its final destination, as per RFC 792 page 6, Time Exceeded
+// Message.
+type icmpReasonTTLExceeded struct{}
+
+func (*icmpReasonTTLExceeded) isICMPReason() {}
+
+// icmpReasonReassemblyTimeout is an error where insufficient fragments are
+// received to complete reassembly of a packet within a configured time after
+// the reception of the first-arriving fragment of that packet.
+type icmpReasonReassemblyTimeout struct{}
+
+func (*icmpReasonReassemblyTimeout) isICMPReason() {}
+
+// icmpReasonParamProblem is an error to use to request a Parameter Problem
+// message to be sent.
+type icmpReasonParamProblem struct {
+	pointer byte
+}
+
+func (*icmpReasonParamProblem) isICMPReason() {}
+
 // returnError takes an error descriptor and generates the appropriate ICMP
 // error packet for IPv4 and sends it back to the remote device that sent
 // the problematic packet. It incorporates as much of that packet as
 // possible as well as any error metadata as is available. returnError
 // expects pkt to hold a valid IPv4 packet as per the wire format.
-func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.PacketBuffer) *tcpip.Error {
+func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) *tcpip.Error {
+	origIPHdr := header.IPv4(pkt.NetworkHeader().View())
+	origIPHdrSrc := origIPHdr.SourceAddress()
+	origIPHdrDst := origIPHdr.DestinationAddress()
+
 	// We check we are responding only when we are allowed to.
 	// See RFC 1812 section 4.3.2.7 (shown below).
 	//
@@ -263,35 +345,50 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 	//
 	// TODO(gvisor.dev/issues/4058): Make sure we don't send ICMP errors in
 	// response to a non-initial fragment, but it currently can not happen.
-
-	if r.IsInboundBroadcast() || header.IsV4MulticastAddress(r.LocalAddress) || r.RemoteAddress == header.IPv4Any {
+	if pkt.NetworkPacketInfo.LocalAddressBroadcast || header.IsV4MulticastAddress(origIPHdrDst) || origIPHdrSrc == header.IPv4Any {
 		return nil
 	}
 
+	// If we hit a TTL Exceeded error, then we know we are operating as a router.
+	// As per RFC 792 page 6, Time Exceeded Message,
+	//
+	//   If the gateway processing a datagram finds the time to live field
+	//   is zero it must discard the datagram.  The gateway may also notify
+	//   the source host via the time exceeded message.
+	//
+	//   ...
+	//
+	//   Code 0 may be received from a gateway. ...
+	//
+	// Note, Code 0 is the TTL exceeded error.
+	//
+	// If we are operating as a router/gateway, don't use the packet's destination
+	// address as the response's source address as we should not not own the
+	// destination address of a packet we are forwarding.
+	localAddr := origIPHdrDst
+	if _, ok := reason.(*icmpReasonTTLExceeded); ok {
+		localAddr = ""
+	}
 	// Even if we were able to receive a packet from some remote, we may not have
 	// a route to it - the remote may be blocked via routing rules. We must always
 	// consult our routing table and find a route to the remote before sending any
 	// packet.
-	route, err := p.stack.FindRoute(r.NICID(), r.LocalAddress, r.RemoteAddress, ProtocolNumber, false /* multicastLoop */)
+	route, err := p.stack.FindRoute(pkt.NICID, localAddr, origIPHdrSrc, ProtocolNumber, false /* multicastLoop */)
 	if err != nil {
 		return err
 	}
 	defer route.Release()
-	// From this point on, the incoming route should no longer be used; route
-	// must be used to send the ICMP error.
-	r = nil
 
-	sent := p.stack.Stats().ICMP.V4PacketsSent
+	sent := p.stack.Stats().ICMP.V4.PacketsSent
 	if !p.stack.AllowICMPMessage() {
 		sent.RateLimited.Increment()
 		return nil
 	}
 
-	networkHeader := pkt.NetworkHeader().View()
 	transportHeader := pkt.TransportHeader().View()
 
 	// Don't respond to icmp error packets.
-	if header.IPv4(networkHeader).Protocol() == uint8(header.ICMPv4ProtocolNumber) {
+	if origIPHdr.Protocol() == uint8(header.ICMPv4ProtocolNumber) {
 		// TODO(gvisor.dev/issue/3810):
 		// Unfortunately the current stack pretty much always has ICMPv4 headers
 		// in the Data section of the packet but there is no guarantee that is the
@@ -348,7 +445,7 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 		return nil
 	}
 
-	payloadLen := networkHeader.Size() + transportHeader.Size() + pkt.Data.Size()
+	payloadLen := len(origIPHdr) + transportHeader.Size() + pkt.Data.Size()
 	if payloadLen > available {
 		payloadLen = available
 	}
@@ -360,7 +457,7 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 	// view with the entire incoming IP packet reassembled and truncated as
 	// required. This is now the payload of the new ICMP packet and no longer
 	// considered a packet in its own right.
-	newHeader := append(buffer.View(nil), networkHeader...)
+	newHeader := append(buffer.View(nil), origIPHdr...)
 	newHeader = append(newHeader, transportHeader...)
 	payload := newHeader.ToVectorisedView()
 	payload.AppendView(pkt.Data.ToView())
@@ -374,17 +471,33 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 	icmpPkt.TransportProtocolNumber = header.ICMPv4ProtocolNumber
 
 	icmpHdr := header.ICMPv4(icmpPkt.TransportHeader().Push(header.ICMPv4MinimumSize))
-	switch reason.(type) {
+	var counter *tcpip.StatCounter
+	switch reason := reason.(type) {
 	case *icmpReasonPortUnreachable:
+		icmpHdr.SetType(header.ICMPv4DstUnreachable)
 		icmpHdr.SetCode(header.ICMPv4PortUnreachable)
+		counter = sent.DstUnreachable
 	case *icmpReasonProtoUnreachable:
+		icmpHdr.SetType(header.ICMPv4DstUnreachable)
 		icmpHdr.SetCode(header.ICMPv4ProtoUnreachable)
+		counter = sent.DstUnreachable
+	case *icmpReasonTTLExceeded:
+		icmpHdr.SetType(header.ICMPv4TimeExceeded)
+		icmpHdr.SetCode(header.ICMPv4TTLExceeded)
+		counter = sent.TimeExceeded
+	case *icmpReasonReassemblyTimeout:
+		icmpHdr.SetType(header.ICMPv4TimeExceeded)
+		icmpHdr.SetCode(header.ICMPv4ReassemblyTimeout)
+		counter = sent.TimeExceeded
+	case *icmpReasonParamProblem:
+		icmpHdr.SetType(header.ICMPv4ParamProblem)
+		icmpHdr.SetCode(header.ICMPv4UnusedCode)
+		icmpHdr.SetPointer(reason.pointer)
+		counter = sent.ParamProblem
 	default:
 		panic(fmt.Sprintf("unsupported ICMP type %T", reason))
 	}
-	icmpHdr.SetType(header.ICMPv4DstUnreachable)
 	icmpHdr.SetChecksum(header.ICMPv4Checksum(icmpHdr, icmpPkt.Data))
-	counter := sent.DstUnreachable
 
 	if err := route.WritePacket(
 		nil, /* gso */
@@ -401,3 +514,18 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 	counter.Increment()
 	return nil
 }
+
+// OnReassemblyTimeout implements fragmentation.TimeoutHandler.
+func (p *protocol) OnReassemblyTimeout(pkt *stack.PacketBuffer) {
+	// OnReassemblyTimeout sends a Time Exceeded Message, as per RFC 792:
+	//
+	//   If a host reassembling a fragmented datagram cannot complete the
+	//   reassembly due to missing fragments within its time limit it discards the
+	//   datagram, and it may send a time exceeded message.
+	//
+	//   If fragment zero is not available then no time exceeded need be sent at
+	//   all.
+	if pkt != nil {
+		p.returnError(&icmpReasonReassemblyTimeout{}, pkt)
+	}
+}
diff --git a/pkg/tcpip/network/ipv4/igmp.go b/pkg/tcpip/network/ipv4/igmp.go
new file mode 100644
index 000000000..fb7a9e68e
--- /dev/null
+++ b/pkg/tcpip/network/ipv4/igmp.go
@@ -0,0 +1,344 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ipv4
+
+import (
+	"fmt"
+	"sync/atomic"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ip"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+const (
+	// igmpV1PresentDefault is the initial state for igmpV1Present in the
+	// igmpState. As per RFC 2236 Page 9 says "No IGMPv1 Router Present ... is
+	// the initial state."
+	igmpV1PresentDefault = 0
+
+	// v1RouterPresentTimeout from RFC 2236 Section 8.11, Page 18
+	// See note on igmpState.igmpV1Present for more detail.
+	v1RouterPresentTimeout = 400 * time.Second
+
+	// v1MaxRespTime from RFC 2236 Section 4, Page 5. "The IGMPv1 router
+	// will send General Queries with the Max Response Time set to 0. This MUST
+	// be interpreted as a value of 100 (10 seconds)."
+	//
+	// Note that the Max Response Time field is a value in units of deciseconds.
+	v1MaxRespTime = 10 * time.Second
+
+	// UnsolicitedReportIntervalMax is the maximum delay between sending
+	// unsolicited IGMP reports.
+	//
+	// Obtained from RFC 2236 Section 8.10, Page 19.
+	UnsolicitedReportIntervalMax = 10 * time.Second
+)
+
+// IGMPOptions holds options for IGMP.
+type IGMPOptions struct {
+	// Enabled indicates whether IGMP will be performed.
+	//
+	// When enabled, IGMP may transmit IGMP report and leave messages when
+	// joining and leaving multicast groups respectively, and handle incoming
+	// IGMP packets.
+	//
+	// This field is ignored and is always assumed to be false for interfaces
+	// without neighbouring nodes (e.g. loopback).
+	Enabled bool
+}
+
+var _ ip.MulticastGroupProtocol = (*igmpState)(nil)
+
+// igmpState is the per-interface IGMP state.
+//
+// igmpState.init() MUST be called after creating an IGMP state.
+type igmpState struct {
+	// The IPv4 endpoint this igmpState is for.
+	ep *endpoint
+
+	enabled bool
+
+	genericMulticastProtocol ip.GenericMulticastProtocolState
+
+	// igmpV1Present is for maintaining compatibility with IGMPv1 Routers, from
+	// RFC 2236 Section 4 Page 6: "The IGMPv1 router expects Version 1
+	// Membership Reports in response to its Queries, and will not pay
+	// attention to Version 2 Membership Reports.  Therefore, a state variable
+	// MUST be kept for each interface, describing whether the multicast
+	// Querier on that interface is running IGMPv1 or IGMPv2.  This variable
+	// MUST be based upon whether or not an IGMPv1 query was heard in the last
+	// [Version 1 Router Present Timeout] seconds".
+	//
+	// Must be accessed with atomic operations. Holds a value of 1 when true, 0
+	// when false.
+	igmpV1Present uint32
+
+	// igmpV1Job is scheduled when this interface receives an IGMPv1 style
+	// message, upon expiration the igmpV1Present flag is cleared.
+	// igmpV1Job may not be nil once igmpState is initialized.
+	igmpV1Job *tcpip.Job
+}
+
+// SendReport implements ip.MulticastGroupProtocol.
+//
+// Precondition: igmp.ep.mu must be read locked.
+func (igmp *igmpState) SendReport(groupAddress tcpip.Address) (bool, *tcpip.Error) {
+	igmpType := header.IGMPv2MembershipReport
+	if igmp.v1Present() {
+		igmpType = header.IGMPv1MembershipReport
+	}
+	return igmp.writePacket(groupAddress, groupAddress, igmpType)
+}
+
+// SendLeave implements ip.MulticastGroupProtocol.
+//
+// Precondition: igmp.ep.mu must be read locked.
+func (igmp *igmpState) SendLeave(groupAddress tcpip.Address) *tcpip.Error {
+	// As per RFC 2236 Section 6, Page 8: "If the interface state says the
+	// Querier is running IGMPv1, this action SHOULD be skipped. If the flag
+	// saying we were the last host to report is cleared, this action MAY be
+	// skipped."
+	if igmp.v1Present() {
+		return nil
+	}
+	_, err := igmp.writePacket(header.IPv4AllRoutersGroup, groupAddress, header.IGMPLeaveGroup)
+	return err
+}
+
+// init sets up an igmpState struct, and is required to be called before using
+// a new igmpState.
+//
+// Must only be called once for the lifetime of igmp.
+func (igmp *igmpState) init(ep *endpoint) {
+	igmp.ep = ep
+	// No need to perform IGMP on loopback interfaces since they don't have
+	// neighbouring nodes.
+	igmp.enabled = ep.protocol.options.IGMP.Enabled && !igmp.ep.nic.IsLoopback()
+	igmp.genericMulticastProtocol.Init(&ep.mu.RWMutex, ip.GenericMulticastProtocolOptions{
+		Enabled:                   igmp.enabled,
+		Rand:                      ep.protocol.stack.Rand(),
+		Clock:                     ep.protocol.stack.Clock(),
+		Protocol:                  igmp,
+		MaxUnsolicitedReportDelay: UnsolicitedReportIntervalMax,
+		AllNodesAddress:           header.IPv4AllSystems,
+	})
+	igmp.igmpV1Present = igmpV1PresentDefault
+	igmp.igmpV1Job = ep.protocol.stack.NewJob(&ep.mu, func() {
+		igmp.setV1Present(false)
+	})
+}
+
+// handleIGMP handles an IGMP packet.
+//
+// Precondition: igmp.ep.mu must be locked.
+func (igmp *igmpState) handleIGMP(pkt *stack.PacketBuffer) {
+	stats := igmp.ep.protocol.stack.Stats()
+	received := stats.IGMP.PacketsReceived
+	headerView, ok := pkt.Data.PullUp(header.IGMPMinimumSize)
+	if !ok {
+		received.Invalid.Increment()
+		return
+	}
+	h := header.IGMP(headerView)
+
+	// Temporarily reset the checksum field to 0 in order to calculate the proper
+	// checksum.
+	wantChecksum := h.Checksum()
+	h.SetChecksum(0)
+	gotChecksum := ^header.ChecksumVV(pkt.Data, 0 /* initial */)
+	h.SetChecksum(wantChecksum)
+
+	if gotChecksum != wantChecksum {
+		received.ChecksumErrors.Increment()
+		return
+	}
+
+	switch h.Type() {
+	case header.IGMPMembershipQuery:
+		received.MembershipQuery.Increment()
+		if len(headerView) < header.IGMPQueryMinimumSize {
+			received.Invalid.Increment()
+			return
+		}
+		igmp.handleMembershipQuery(h.GroupAddress(), h.MaxRespTime())
+	case header.IGMPv1MembershipReport:
+		received.V1MembershipReport.Increment()
+		if len(headerView) < header.IGMPReportMinimumSize {
+			received.Invalid.Increment()
+			return
+		}
+		igmp.handleMembershipReport(h.GroupAddress())
+	case header.IGMPv2MembershipReport:
+		received.V2MembershipReport.Increment()
+		if len(headerView) < header.IGMPReportMinimumSize {
+			received.Invalid.Increment()
+			return
+		}
+		igmp.handleMembershipReport(h.GroupAddress())
+	case header.IGMPLeaveGroup:
+		received.LeaveGroup.Increment()
+		// As per RFC 2236 Section 6, Page 7: "IGMP messages other than Query or
+		// Report, are ignored in all states"
+
+	default:
+		// As per RFC 2236 Section 2.1 Page 3: "Unrecognized message types should
+		// be silently ignored. New message types may be used by newer versions of
+		// IGMP, by multicast routing protocols, or other uses."
+		received.Unrecognized.Increment()
+	}
+}
+
+func (igmp *igmpState) v1Present() bool {
+	return atomic.LoadUint32(&igmp.igmpV1Present) == 1
+}
+
+func (igmp *igmpState) setV1Present(v bool) {
+	if v {
+		atomic.StoreUint32(&igmp.igmpV1Present, 1)
+	} else {
+		atomic.StoreUint32(&igmp.igmpV1Present, 0)
+	}
+}
+
+// handleMembershipQuery handles a membership query.
+//
+// Precondition: igmp.ep.mu must be locked.
+func (igmp *igmpState) handleMembershipQuery(groupAddress tcpip.Address, maxRespTime time.Duration) {
+	// As per RFC 2236 Section 6, Page 10: If the maximum response time is zero
+	// then change the state to note that an IGMPv1 router is present and
+	// schedule the query received Job.
+	if igmp.enabled && maxRespTime == 0 {
+		igmp.igmpV1Job.Cancel()
+		igmp.igmpV1Job.Schedule(v1RouterPresentTimeout)
+		igmp.setV1Present(true)
+		maxRespTime = v1MaxRespTime
+	}
+
+	igmp.genericMulticastProtocol.HandleQueryLocked(groupAddress, maxRespTime)
+}
+
+// handleMembershipReport handles a membership report.
+//
+// Precondition: igmp.ep.mu must be locked.
+func (igmp *igmpState) handleMembershipReport(groupAddress tcpip.Address) {
+	igmp.genericMulticastProtocol.HandleReportLocked(groupAddress)
+}
+
+// writePacket assembles and sends an IGMP packet.
+//
+// Precondition: igmp.ep.mu must be read locked.
+func (igmp *igmpState) writePacket(destAddress tcpip.Address, groupAddress tcpip.Address, igmpType header.IGMPType) (bool, *tcpip.Error) {
+	igmpData := header.IGMP(buffer.NewView(header.IGMPReportMinimumSize))
+	igmpData.SetType(igmpType)
+	igmpData.SetGroupAddress(groupAddress)
+	igmpData.SetChecksum(header.IGMPCalculateChecksum(igmpData))
+
+	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+		ReserveHeaderBytes: int(igmp.ep.MaxHeaderLength()),
+		Data:               buffer.View(igmpData).ToVectorisedView(),
+	})
+
+	addressEndpoint := igmp.ep.acquireOutgoingPrimaryAddressRLocked(destAddress, false /* allowExpired */)
+	if addressEndpoint == nil {
+		return false, nil
+	}
+	localAddr := addressEndpoint.AddressWithPrefix().Address
+	addressEndpoint.DecRef()
+	addressEndpoint = nil
+	igmp.ep.addIPHeader(localAddr, destAddress, pkt, stack.NetworkHeaderParams{
+		Protocol: header.IGMPProtocolNumber,
+		TTL:      header.IGMPTTL,
+		TOS:      stack.DefaultTOS,
+	}, header.IPv4OptionsSerializer{
+		&header.IPv4SerializableRouterAlertOption{},
+	})
+
+	sentStats := igmp.ep.protocol.stack.Stats().IGMP.PacketsSent
+	if err := igmp.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv4Address(destAddress), nil /* gso */, ProtocolNumber, pkt); err != nil {
+		sentStats.Dropped.Increment()
+		return false, err
+	}
+	switch igmpType {
+	case header.IGMPv1MembershipReport:
+		sentStats.V1MembershipReport.Increment()
+	case header.IGMPv2MembershipReport:
+		sentStats.V2MembershipReport.Increment()
+	case header.IGMPLeaveGroup:
+		sentStats.LeaveGroup.Increment()
+	default:
+		panic(fmt.Sprintf("unrecognized igmp type = %d", igmpType))
+	}
+	return true, nil
+}
+
+// joinGroup handles adding a new group to the membership map, setting up the
+// IGMP state for the group, and sending and scheduling the required
+// messages.
+//
+// If the group already exists in the membership map, returns
+// tcpip.ErrDuplicateAddress.
+//
+// Precondition: igmp.ep.mu must be locked.
+func (igmp *igmpState) joinGroup(groupAddress tcpip.Address) {
+	igmp.genericMulticastProtocol.JoinGroupLocked(groupAddress, !igmp.ep.Enabled() /* dontInitialize */)
+}
+
+// isInGroup returns true if the specified group has been joined locally.
+//
+// Precondition: igmp.ep.mu must be read locked.
+func (igmp *igmpState) isInGroup(groupAddress tcpip.Address) bool {
+	return igmp.genericMulticastProtocol.IsLocallyJoinedRLocked(groupAddress)
+}
+
+// leaveGroup handles removing the group from the membership map, cancels any
+// delay timers associated with that group, and sends the Leave Group message
+// if required.
+//
+// Precondition: igmp.ep.mu must be locked.
+func (igmp *igmpState) leaveGroup(groupAddress tcpip.Address) *tcpip.Error {
+	// LeaveGroup returns false only if the group was not joined.
+	if igmp.genericMulticastProtocol.LeaveGroupLocked(groupAddress) {
+		return nil
+	}
+
+	return tcpip.ErrBadLocalAddress
+}
+
+// softLeaveAll leaves all groups from the perspective of IGMP, but remains
+// joined locally.
+//
+// Precondition: igmp.ep.mu must be locked.
+func (igmp *igmpState) softLeaveAll() {
+	igmp.genericMulticastProtocol.MakeAllNonMemberLocked()
+}
+
+// initializeAll attemps to initialize the IGMP state for each group that has
+// been joined locally.
+//
+// Precondition: igmp.ep.mu must be locked.
+func (igmp *igmpState) initializeAll() {
+	igmp.genericMulticastProtocol.InitializeGroupsLocked()
+}
+
+// sendQueuedReports attempts to send any reports that are queued for sending.
+//
+// Precondition: igmp.ep.mu must be locked.
+func (igmp *igmpState) sendQueuedReports() {
+	igmp.genericMulticastProtocol.SendQueuedReportsLocked()
+}
diff --git a/pkg/tcpip/network/ipv4/igmp_test.go b/pkg/tcpip/network/ipv4/igmp_test.go
new file mode 100644
index 000000000..1ee573ac8
--- /dev/null
+++ b/pkg/tcpip/network/ipv4/igmp_test.go
@@ -0,0 +1,215 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ipv4_test
+
+import (
+	"testing"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/checker"
+	"gvisor.dev/gvisor/pkg/tcpip/faketime"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+const (
+	linkAddr      = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06")
+	addr          = tcpip.Address("\x0a\x00\x00\x01")
+	multicastAddr = tcpip.Address("\xe0\x00\x00\x03")
+	nicID         = 1
+)
+
+// validateIgmpPacket checks that a passed PacketInfo is an IPv4 IGMP packet
+// sent to the provided address with the passed fields set. Raises a t.Error if
+// any field does not match.
+func validateIgmpPacket(t *testing.T, p channel.PacketInfo, remoteAddress tcpip.Address, igmpType header.IGMPType, maxRespTime byte, groupAddress tcpip.Address) {
+	t.Helper()
+
+	payload := header.IPv4(stack.PayloadSince(p.Pkt.NetworkHeader()))
+	checker.IPv4(t, payload,
+		checker.SrcAddr(addr),
+		checker.DstAddr(remoteAddress),
+		// TTL for an IGMP message must be 1 as per RFC 2236 section 2.
+		checker.TTL(1),
+		checker.IPv4RouterAlert(),
+		checker.IGMP(
+			checker.IGMPType(igmpType),
+			checker.IGMPMaxRespTime(header.DecisecondToDuration(maxRespTime)),
+			checker.IGMPGroupAddress(groupAddress),
+		),
+	)
+}
+
+func createStack(t *testing.T, igmpEnabled bool) (*channel.Endpoint, *stack.Stack, *faketime.ManualClock) {
+	t.Helper()
+
+	// Create an endpoint of queue size 1, since no more than 1 packets are ever
+	// queued in the tests in this file.
+	e := channel.New(1, 1280, linkAddr)
+	clock := faketime.NewManualClock()
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocolWithOptions(ipv4.Options{
+			IGMP: ipv4.IGMPOptions{
+				Enabled: igmpEnabled,
+			},
+		})},
+		Clock: clock,
+	})
+	if err := s.CreateNIC(nicID, e); err != nil {
+		t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+	}
+	return e, s, clock
+}
+
+func createAndInjectIGMPPacket(e *channel.Endpoint, igmpType header.IGMPType, maxRespTime byte, groupAddress tcpip.Address) {
+	buf := buffer.NewView(header.IPv4MinimumSize + header.IGMPQueryMinimumSize)
+
+	ip := header.IPv4(buf)
+	ip.Encode(&header.IPv4Fields{
+		TotalLength: uint16(len(buf)),
+		TTL:         1,
+		Protocol:    uint8(header.IGMPProtocolNumber),
+		SrcAddr:     header.IPv4Any,
+		DstAddr:     header.IPv4AllSystems,
+	})
+	ip.SetChecksum(^ip.CalculateChecksum())
+
+	igmp := header.IGMP(buf[header.IPv4MinimumSize:])
+	igmp.SetType(igmpType)
+	igmp.SetMaxRespTime(maxRespTime)
+	igmp.SetGroupAddress(groupAddress)
+	igmp.SetChecksum(header.IGMPCalculateChecksum(igmp))
+
+	e.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
+		Data: buf.ToVectorisedView(),
+	})
+}
+
+// TestIgmpV1Present tests the handling of the case where an IGMPv1 router is
+// present on the network. The IGMP stack will then send IGMPv1 Membership
+// reports for backwards compatibility.
+func TestIgmpV1Present(t *testing.T) {
+	e, s, clock := createStack(t, true)
+	if err := s.AddAddress(nicID, ipv4.ProtocolNumber, addr); err != nil {
+		t.Fatalf("AddAddress(%d, %d, %s): %s", nicID, ipv4.ProtocolNumber, addr, err)
+	}
+
+	if err := s.JoinGroup(ipv4.ProtocolNumber, nicID, multicastAddr); err != nil {
+		t.Fatalf("JoinGroup(ipv4, nic, %s) = %s", multicastAddr, err)
+	}
+
+	// This NIC will send an IGMPv2 report immediately, before this test can get
+	// the IGMPv1 General Membership Query in.
+	p, ok := e.Read()
+	if !ok {
+		t.Fatal("unable to Read IGMP packet, expected V2MembershipReport")
+	}
+	if got := s.Stats().IGMP.PacketsSent.V2MembershipReport.Value(); got != 1 {
+		t.Fatalf("got V2MembershipReport messages sent = %d, want = 1", got)
+	}
+	validateIgmpPacket(t, p, multicastAddr, header.IGMPv2MembershipReport, 0, multicastAddr)
+	if t.Failed() {
+		t.FailNow()
+	}
+
+	// Inject an IGMPv1 General Membership Query which is identical to a standard
+	// membership query except the Max Response Time is set to 0, which will tell
+	// the stack that this is a router using IGMPv1. Send it to the all systems
+	// group which is the only group this host belongs to.
+	createAndInjectIGMPPacket(e, header.IGMPMembershipQuery, 0, header.IPv4AllSystems)
+	if got := s.Stats().IGMP.PacketsReceived.MembershipQuery.Value(); got != 1 {
+		t.Fatalf("got Membership Queries received = %d, want = 1", got)
+	}
+
+	// Before advancing the clock, verify that this host has not sent a
+	// V1MembershipReport yet.
+	if got := s.Stats().IGMP.PacketsSent.V1MembershipReport.Value(); got != 0 {
+		t.Fatalf("got V1MembershipReport messages sent = %d, want = 0", got)
+	}
+
+	// Verify the solicited Membership Report is sent. Now that this NIC has seen
+	// an IGMPv1 query, it should send an IGMPv1 Membership Report.
+	p, ok = e.Read()
+	if ok {
+		t.Fatalf("sent unexpected packet, expected V1MembershipReport only after advancing the clock = %+v", p.Pkt)
+	}
+	clock.Advance(ipv4.UnsolicitedReportIntervalMax)
+	p, ok = e.Read()
+	if !ok {
+		t.Fatal("unable to Read IGMP packet, expected V1MembershipReport")
+	}
+	if got := s.Stats().IGMP.PacketsSent.V1MembershipReport.Value(); got != 1 {
+		t.Fatalf("got V1MembershipReport messages sent = %d, want = 1", got)
+	}
+	validateIgmpPacket(t, p, multicastAddr, header.IGMPv1MembershipReport, 0, multicastAddr)
+}
+
+func TestSendQueuedIGMPReports(t *testing.T) {
+	e, s, clock := createStack(t, true)
+
+	// Joining a group without an assigned address should queue IGMP packets; none
+	// should be sent without an assigned address.
+	if err := s.JoinGroup(ipv4.ProtocolNumber, nicID, multicastAddr); err != nil {
+		t.Fatalf("JoinGroup(%d, %d, %s): %s", ipv4.ProtocolNumber, nicID, multicastAddr, err)
+	}
+	reportStat := s.Stats().IGMP.PacketsSent.V2MembershipReport
+	if got := reportStat.Value(); got != 0 {
+		t.Errorf("got reportStat.Value() = %d, want = 0", got)
+	}
+	clock.Advance(time.Hour)
+	if p, ok := e.Read(); ok {
+		t.Fatalf("got unexpected packet = %#v", p)
+	}
+
+	// The initial set of IGMP reports that were queued should be sent once an
+	// address is assigned.
+	if err := s.AddAddress(nicID, ipv4.ProtocolNumber, addr); err != nil {
+		t.Fatalf("AddAddress(%d, %d, %s): %s", nicID, ipv4.ProtocolNumber, addr, err)
+	}
+	if got := reportStat.Value(); got != 1 {
+		t.Errorf("got reportStat.Value() = %d, want = 1", got)
+	}
+	if p, ok := e.Read(); !ok {
+		t.Error("expected to send an IGMP membership report")
+	} else {
+		validateIgmpPacket(t, p, multicastAddr, header.IGMPv2MembershipReport, 0, multicastAddr)
+	}
+	if t.Failed() {
+		t.FailNow()
+	}
+	clock.Advance(ipv4.UnsolicitedReportIntervalMax)
+	if got := reportStat.Value(); got != 2 {
+		t.Errorf("got reportStat.Value() = %d, want = 2", got)
+	}
+	if p, ok := e.Read(); !ok {
+		t.Error("expected to send an IGMP membership report")
+	} else {
+		validateIgmpPacket(t, p, multicastAddr, header.IGMPv2MembershipReport, 0, multicastAddr)
+	}
+	if t.Failed() {
+		t.FailNow()
+	}
+
+	// Should have no more packets to send after the initial set of unsolicited
+	// reports.
+	clock.Advance(time.Hour)
+	if p, ok := e.Read(); ok {
+		t.Fatalf("got unexpected packet = %#v", p)
+	}
+}
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index e7c58ae0a..e9ff70d04 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -16,7 +16,9 @@
 package ipv4
 
 import (
+	"errors"
 	"fmt"
+	"math"
 	"sync/atomic"
 	"time"
 
@@ -31,6 +33,8 @@ import (
 )
 
 const (
+	// ReassembleTimeout is the time a packet stays in the reassembly
+	// system before being evicted.
 	// As per RFC 791 section 3.2:
 	//   The current recommendation for the initial timer setting is 15 seconds.
 	//   This may be changed as experience with this protocol accumulates.
@@ -38,7 +42,7 @@ const (
 	// Considering that it is an old recommendation, we use the same reassembly
 	// timeout that linux defines, which is 30 seconds:
 	// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ip.h#L138
-	reassembleTimeout = 30 * time.Second
+	ReassembleTimeout = 30 * time.Second
 
 	// ProtocolNumber is the ipv4 protocol number.
 	ProtocolNumber = header.IPv4ProtocolNumber
@@ -79,6 +83,7 @@ type endpoint struct {
 		sync.RWMutex
 
 		addressableEndpointState stack.AddressableEndpointState
+		igmp                     igmpState
 	}
 }
 
@@ -89,7 +94,10 @@ func (p *protocol) NewEndpoint(nic stack.NetworkInterface, _ stack.LinkAddressCa
 		dispatcher: dispatcher,
 		protocol:   p,
 	}
+	e.mu.Lock()
 	e.mu.addressableEndpointState.Init(e)
+	e.mu.igmp.init(e)
+	e.mu.Unlock()
 	return e
 }
 
@@ -117,11 +125,22 @@ func (e *endpoint) Enable() *tcpip.Error {
 	// We have no need for the address endpoint.
 	ep.DecRef()
 
+	// Groups may have been joined while the endpoint was disabled, or the
+	// endpoint may have left groups from the perspective of IGMP when the
+	// endpoint was disabled. Either way, we need to let routers know to
+	// send us multicast traffic.
+	e.mu.igmp.initializeAll()
+
 	// As per RFC 1122 section 3.3.7, all hosts should join the all-hosts
 	// multicast group. Note, the IANA calls the all-hosts multicast group the
 	// all-systems multicast group.
-	_, err = e.mu.addressableEndpointState.JoinGroup(header.IPv4AllSystems)
-	return err
+	if err := e.joinGroupLocked(header.IPv4AllSystems); err != nil {
+		// joinGroupLocked only returns an error if the group address is not a valid
+		// IPv4 multicast address.
+		panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllSystems, err))
+	}
+
+	return nil
 }
 
 // Enabled implements stack.NetworkEndpoint.
@@ -153,19 +172,27 @@ func (e *endpoint) Disable() {
 }
 
 func (e *endpoint) disableLocked() {
-	if !e.setEnabled(false) {
+	if !e.isEnabled() {
 		return
 	}
 
 	// The endpoint may have already left the multicast group.
-	if _, err := e.mu.addressableEndpointState.LeaveGroup(header.IPv4AllSystems); err != nil && err != tcpip.ErrBadLocalAddress {
+	if err := e.leaveGroupLocked(header.IPv4AllSystems); err != nil && err != tcpip.ErrBadLocalAddress {
 		panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err))
 	}
 
+	// Leave groups from the perspective of IGMP so that routers know that
+	// we are no longer interested in the group.
+	e.mu.igmp.softLeaveAll()
+
 	// The address may have already been removed.
 	if err := e.mu.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err != nil && err != tcpip.ErrBadLocalAddress {
 		panic(fmt.Sprintf("unexpected error when removing address = %s: %s", ipv4BroadcastAddr.Address, err))
 	}
+
+	if !e.setEnabled(false) {
+		panic("should have only done work to disable the endpoint if it was enabled")
+	}
 }
 
 // DefaultTTL is the default time-to-live value for this endpoint.
@@ -176,7 +203,11 @@ func (e *endpoint) DefaultTTL() uint8 {
 // MTU implements stack.NetworkEndpoint.MTU. It returns the link-layer MTU minus
 // the network layer max header length.
 func (e *endpoint) MTU() uint32 {
-	return calculateMTU(e.nic.MTU())
+	networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv4MinimumSize)
+	if err != nil {
+		return 0
+	}
+	return networkMTU
 }
 
 // MaxHeaderLength returns the maximum length needed by ipv4 headers (and
@@ -190,39 +221,48 @@ func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
 	return e.protocol.Number()
 }
 
-func (e *endpoint) addIPHeader(r *stack.Route, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams) {
-	ip := header.IPv4(pkt.NetworkHeader().Push(header.IPv4MinimumSize))
+func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, options header.IPv4OptionsSerializer) {
+	hdrLen := header.IPv4MinimumSize
+	var optLen int
+	if options != nil {
+		optLen = int(options.Length())
+	}
+	hdrLen += optLen
+	if hdrLen > header.IPv4MaximumHeaderSize {
+		// Since we have no way to report an error we must either panic or create
+		// a packet which is different to what was requested. Choose panic as this
+		// would be a programming error that should be caught in testing.
+		panic(fmt.Sprintf("IPv4 Options %d bytes, Max %d", optLen, header.IPv4MaximumOptionsSize))
+	}
+	ip := header.IPv4(pkt.NetworkHeader().Push(hdrLen))
 	length := uint16(pkt.Size())
 	// RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic
 	// datagrams. Since the DF bit is never being set here, all datagrams
 	// are non-atomic and need an ID.
-	id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, params.Protocol, e.protocol.hashIV)%buckets], 1)
+	id := atomic.AddUint32(&e.protocol.ids[hashRoute(srcAddr, dstAddr, params.Protocol, e.protocol.hashIV)%buckets], 1)
 	ip.Encode(&header.IPv4Fields{
-		IHL:         header.IPv4MinimumSize,
 		TotalLength: length,
 		ID:          uint16(id),
 		TTL:         params.TTL,
 		TOS:         params.TOS,
 		Protocol:    uint8(params.Protocol),
-		SrcAddr:     r.LocalAddress,
-		DstAddr:     r.RemoteAddress,
+		SrcAddr:     srcAddr,
+		DstAddr:     dstAddr,
+		Options:     options,
 	})
 	ip.SetChecksum(^ip.CalculateChecksum())
 	pkt.NetworkProtocolNumber = ProtocolNumber
 }
 
-func (e *endpoint) packetMustBeFragmented(pkt *stack.PacketBuffer, gso *stack.GSO) bool {
-	return (gso == nil || gso.Type == stack.GSONone) && pkt.Size() > int(e.nic.MTU())
-}
-
 // handleFragments fragments pkt and calls the handler function on each
 // fragment. It returns the number of fragments handled and the number of
 // fragments left to be processed. The IP header must already be present in the
-// original packet. The mtu is the maximum size of the packets.
-func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, mtu uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) {
-	fragMTU := int(calculateFragmentInnerMTU(mtu, pkt))
+// original packet.
+func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) {
+	// Round the MTU down to align to 8 bytes.
+	fragmentPayloadSize := networkMTU &^ 7
 	networkHeader := header.IPv4(pkt.NetworkHeader().View())
-	pf := fragmentation.MakePacketFragmenter(pkt, fragMTU, pkt.AvailableHeaderBytes()+len(networkHeader))
+	pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadSize, pkt.AvailableHeaderBytes()+len(networkHeader))
 
 	var n int
 	for {
@@ -239,18 +279,14 @@ func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, mtu uint32, p
 
 // WritePacket writes a packet to the given destination address and protocol.
 func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error {
-	e.addIPHeader(r, pkt, params)
-	return e.writePacket(r, gso, pkt)
-}
+	e.addIPHeader(r.LocalAddress, r.RemoteAddress, pkt, params, nil /* options */)
 
-func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer) *tcpip.Error {
 	// iptables filtering. All packets that reach here are locally
 	// generated.
 	nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
-	ipt := e.protocol.stack.IPTables()
-	if ok := ipt.Check(stack.Output, pkt, gso, r, "", nicName); !ok {
+	if ok := e.protocol.stack.IPTables().Check(stack.Output, pkt, gso, r, "", nicName); !ok {
 		// iptables is telling us to drop the packet.
-		r.Stats().IP.IPTablesOutputDropped.Increment()
+		e.protocol.stack.Stats().IP.IPTablesOutputDropped.Increment()
 		return nil
 	}
 
@@ -265,23 +301,43 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet
 		netHeader := header.IPv4(pkt.NetworkHeader().View())
 		ep, err := e.protocol.stack.FindNetworkEndpoint(ProtocolNumber, netHeader.DestinationAddress())
 		if err == nil {
-			route := r.ReverseRoute(netHeader.SourceAddress(), netHeader.DestinationAddress())
-			ep.HandlePacket(&route, pkt)
+			pkt := pkt.CloneToInbound()
+			if e.protocol.stack.ParsePacketBuffer(ProtocolNumber, pkt) == stack.ParsedOK {
+				// Since we rewrote the packet but it is being routed back to us, we can
+				// safely assume the checksum is valid.
+				pkt.RXTransportChecksumValidated = true
+				ep.(*endpoint).handlePacket(pkt)
+			}
 			return nil
 		}
 	}
 
+	return e.writePacket(r, gso, pkt, false /* headerIncluded */)
+}
+
+func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer, headerIncluded bool) *tcpip.Error {
 	if r.Loop&stack.PacketLoop != 0 {
-		loopedR := r.MakeLoopedRoute()
-		e.HandlePacket(&loopedR, pkt)
-		loopedR.Release()
+		pkt := pkt.CloneToInbound()
+		if e.protocol.stack.ParsePacketBuffer(ProtocolNumber, pkt) == stack.ParsedOK {
+			// If the packet was generated by the stack (not a raw/packet endpoint
+			// where a packet may be written with the header included), then we can
+			// safely assume the checksum is valid.
+			pkt.RXTransportChecksumValidated = !headerIncluded
+			e.handlePacket(pkt)
+		}
 	}
 	if r.Loop&stack.PacketOut == 0 {
 		return nil
 	}
 
-	if e.packetMustBeFragmented(pkt, gso) {
-		sent, remain, err := e.handleFragments(r, gso, e.nic.MTU(), pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
+	networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
+	if err != nil {
+		r.Stats().IP.OutgoingPacketErrors.Increment()
+		return err
+	}
+
+	if packetMustBeFragmented(pkt, networkMTU, gso) {
+		sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
 			// TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each
 			// fragment one by one using WritePacket() (current strategy) or if we
 			// want to create a PacketBufferList from the fragments and feed it to
@@ -292,6 +348,7 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet
 		r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(remain))
 		return err
 	}
+
 	if err := e.nic.WritePacket(r, gso, ProtocolNumber, pkt); err != nil {
 		r.Stats().IP.OutgoingPacketErrors.Increment()
 		return err
@@ -310,18 +367,24 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 	}
 
 	for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
-		e.addIPHeader(r, pkt, params)
-		if e.packetMustBeFragmented(pkt, gso) {
+		e.addIPHeader(r.LocalAddress, r.RemoteAddress, pkt, params, nil /* options */)
+		networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
+		if err != nil {
+			r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len()))
+			return 0, err
+		}
+
+		if packetMustBeFragmented(pkt, networkMTU, gso) {
 			// Keep track of the packet that is about to be fragmented so it can be
 			// removed once the fragmentation is done.
 			originalPkt := pkt
-			if _, _, err := e.handleFragments(r, gso, e.nic.MTU(), pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
+			if _, _, err := e.handleFragments(r, gso, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
 				// Modify the packet list in place with the new fragments.
 				pkts.InsertAfter(pkt, fragPkt)
 				pkt = fragPkt
 				return nil
 			}); err != nil {
-				panic(fmt.Sprintf("e.handleFragments(_, _, %d, _, _) = %s", e.nic.MTU(), err))
+				panic(fmt.Sprintf("e.handleFragments(_, _, %d, _, _) = %s", networkMTU, err))
 			}
 			// Remove the packet that was just fragmented and process the rest.
 			pkts.Remove(originalPkt)
@@ -331,8 +394,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 	nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
 	// iptables filtering. All packets that reach here are locally
 	// generated.
-	ipt := e.protocol.stack.IPTables()
-	dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r, nicName)
+	dropped, natPkts := e.protocol.stack.IPTables().CheckPackets(stack.Output, pkts, gso, r, nicName)
 	if len(dropped) == 0 && len(natPkts) == 0 {
 		// Fast path: If no packets are to be dropped then we can just invoke the
 		// faster WritePackets API directly.
@@ -355,10 +417,13 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 		if _, ok := natPkts[pkt]; ok {
 			netHeader := header.IPv4(pkt.NetworkHeader().View())
 			if ep, err := e.protocol.stack.FindNetworkEndpoint(ProtocolNumber, netHeader.DestinationAddress()); err == nil {
-				src := netHeader.SourceAddress()
-				dst := netHeader.DestinationAddress()
-				route := r.ReverseRoute(src, dst)
-				ep.HandlePacket(&route, pkt)
+				pkt := pkt.CloneToInbound()
+				if e.protocol.stack.ParsePacketBuffer(ProtocolNumber, pkt) == stack.ParsedOK {
+					// Since we rewrote the packet but it is being routed back to us, we
+					// can safely assume the checksum is valid.
+					pkt.RXTransportChecksumValidated = true
+					ep.(*endpoint).handlePacket(pkt)
+				}
 				n++
 				continue
 			}
@@ -385,6 +450,16 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
 	if !ok {
 		return tcpip.ErrMalformedHeader
 	}
+
+	hdrLen := header.IPv4(h).HeaderLength()
+	if hdrLen < header.IPv4MinimumSize {
+		return tcpip.ErrMalformedHeader
+	}
+
+	h, ok = pkt.Data.PullUp(int(hdrLen))
+	if !ok {
+		return tcpip.ErrMalformedHeader
+	}
 	ip := header.IPv4(h)
 
 	// Always set the total length.
@@ -406,7 +481,7 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
 		// non-atomic datagrams, so assign an ID to all such datagrams
 		// according to the definition given in RFC 6864 section 4.
 		if ip.Flags()&header.IPv4FlagDontFragment == 0 || ip.Flags()&header.IPv4FlagMoreFragments != 0 || ip.FragmentOffset() > 0 {
-			ip.SetID(uint16(atomic.AddUint32(&e.protocol.ids[hashRoute(r, 0 /* protocol */, e.protocol.hashIV)%buckets], 1)))
+			ip.SetID(uint16(atomic.AddUint32(&e.protocol.ids[hashRoute(r.LocalAddress, r.RemoteAddress, 0 /* protocol */, e.protocol.hashIV)%buckets], 1)))
 		}
 	}
 
@@ -424,19 +499,91 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
 		return tcpip.ErrMalformedHeader
 	}
 
-	return e.writePacket(r, nil /* gso */, pkt)
+	return e.writePacket(r, nil /* gso */, pkt, true /* headerIncluded */)
+}
+
+// forwardPacket attempts to forward a packet to its final destination.
+func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) *tcpip.Error {
+	h := header.IPv4(pkt.NetworkHeader().View())
+	ttl := h.TTL()
+	if ttl == 0 {
+		// As per RFC 792 page 6, Time Exceeded Message,
+		//
+		//  If the gateway processing a datagram finds the time to live field
+		//  is zero it must discard the datagram.  The gateway may also notify
+		//  the source host via the time exceeded message.
+		return e.protocol.returnError(&icmpReasonTTLExceeded{}, pkt)
+	}
+
+	dstAddr := h.DestinationAddress()
+
+	// Check if the destination is owned by the stack.
+	networkEndpoint, err := e.protocol.stack.FindNetworkEndpoint(ProtocolNumber, dstAddr)
+	if err == nil {
+		networkEndpoint.(*endpoint).handlePacket(pkt)
+		return nil
+	}
+	if err != tcpip.ErrBadAddress {
+		return err
+	}
+
+	r, err := e.protocol.stack.FindRoute(0, "", dstAddr, ProtocolNumber, false /* multicastLoop */)
+	if err != nil {
+		return err
+	}
+	defer r.Release()
+
+	// We need to do a deep copy of the IP packet because
+	// WriteHeaderIncludedPacket takes ownership of the packet buffer, but we do
+	// not own it.
+	newHdr := header.IPv4(stack.PayloadSince(pkt.NetworkHeader()))
+
+	// As per RFC 791 page 30, Time to Live,
+	//
+	//   This field must be decreased at each point that the internet header
+	//   is processed to reflect the time spent processing the datagram.
+	//   Even if no local information is available on the time actually
+	//   spent, the field must be decremented by 1.
+	newHdr.SetTTL(ttl - 1)
+
+	return r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{
+		ReserveHeaderBytes: int(r.MaxHeaderLength()),
+		Data:               buffer.View(newHdr).ToVectorisedView(),
+	}))
 }
 
 // HandlePacket is called by the link layer when new ipv4 packets arrive for
 // this endpoint.
-func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
+	stats := e.protocol.stack.Stats()
+	stats.IP.PacketsReceived.Increment()
+
 	if !e.isEnabled() {
+		stats.IP.DisabledPacketsReceived.Increment()
 		return
 	}
 
+	// Loopback traffic skips the prerouting chain.
+	if !e.nic.IsLoopback() {
+		if ok := e.protocol.stack.IPTables().Check(stack.Prerouting, pkt, nil, nil, e.MainAddress().Address, ""); !ok {
+			// iptables is telling us to drop the packet.
+			stats.IP.IPTablesPreroutingDropped.Increment()
+			return
+		}
+	}
+
+	e.handlePacket(pkt)
+}
+
+// handlePacket is like HandlePacket except it does not perform the prerouting
+// iptables hook.
+func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) {
+	pkt.NICID = e.nic.ID()
+	stats := e.protocol.stack.Stats()
+
 	h := header.IPv4(pkt.NetworkHeader().View())
 	if !h.IsValid(pkt.Data.Size() + pkt.NetworkHeader().View().Size() + pkt.TransportHeader().View().Size()) {
-		r.Stats().IP.MalformedPacketsReceived.Increment()
+		stats.IP.MalformedPacketsReceived.Increment()
 		return
 	}
 
@@ -462,25 +609,52 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 	//        is all 1 bits (-0 in 1's complement arithmetic), the check
 	//        succeeds.
 	if h.CalculateChecksum() != 0xffff {
-		r.Stats().IP.MalformedPacketsReceived.Increment()
+		stats.IP.MalformedPacketsReceived.Increment()
 		return
 	}
 
+	srcAddr := h.SourceAddress()
+	dstAddr := h.DestinationAddress()
+
 	// As per RFC 1122 section 3.2.1.3:
 	//   When a host sends any datagram, the IP source address MUST
 	//   be one of its own IP addresses (but not a broadcast or
 	//   multicast address).
-	if r.IsOutboundBroadcast() || header.IsV4MulticastAddress(r.RemoteAddress) {
-		r.Stats().IP.InvalidSourceAddressesReceived.Increment()
+	if srcAddr == header.IPv4Broadcast || header.IsV4MulticastAddress(srcAddr) {
+		stats.IP.InvalidSourceAddressesReceived.Increment()
+		return
+	}
+	// Make sure the source address is not a subnet-local broadcast address.
+	if addressEndpoint := e.AcquireAssignedAddress(srcAddr, false /* createTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil {
+		subnet := addressEndpoint.Subnet()
+		addressEndpoint.DecRef()
+		if subnet.IsBroadcast(srcAddr) {
+			stats.IP.InvalidSourceAddressesReceived.Increment()
+			return
+		}
+	}
+
+	// The destination address should be an address we own or a group we joined
+	// for us to receive the packet. Otherwise, attempt to forward the packet.
+	if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil {
+		subnet := addressEndpoint.AddressWithPrefix().Subnet()
+		addressEndpoint.DecRef()
+		pkt.NetworkPacketInfo.LocalAddressBroadcast = subnet.IsBroadcast(dstAddr) || dstAddr == header.IPv4Broadcast
+	} else if !e.IsInGroup(dstAddr) {
+		if !e.protocol.Forwarding() {
+			stats.IP.InvalidDestinationAddressesReceived.Increment()
+			return
+		}
+
+		_ = e.forwardPacket(pkt)
 		return
 	}
 
 	// iptables filtering. All packets that reach here are intended for
 	// this machine and will not be forwarded.
-	ipt := e.protocol.stack.IPTables()
-	if ok := ipt.Check(stack.Input, pkt, nil, nil, "", ""); !ok {
+	if ok := e.protocol.stack.IPTables().Check(stack.Input, pkt, nil, nil, "", ""); !ok {
 		// iptables is telling us to drop the packet.
-		r.Stats().IP.IPTablesInputDropped.Increment()
+		stats.IP.IPTablesInputDropped.Increment()
 		return
 	}
 
@@ -488,8 +662,8 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 		if pkt.Data.Size()+pkt.TransportHeader().View().Size() == 0 {
 			// Drop the packet as it's marked as a fragment but has
 			// no payload.
-			r.Stats().IP.MalformedPacketsReceived.Increment()
-			r.Stats().IP.MalformedFragmentsReceived.Increment()
+			stats.IP.MalformedPacketsReceived.Increment()
+			stats.IP.MalformedFragmentsReceived.Increment()
 			return
 		}
 		// The packet is a fragment, let's try to reassemble it.
@@ -502,14 +676,13 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 		// size). Otherwise the packet would've been rejected as invalid before
 		// reaching here.
 		if int(start)+pkt.Data.Size() > header.IPv4MaximumPayloadSize {
-			r.Stats().IP.MalformedPacketsReceived.Increment()
-			r.Stats().IP.MalformedFragmentsReceived.Increment()
+			stats.IP.MalformedPacketsReceived.Increment()
+			stats.IP.MalformedFragmentsReceived.Increment()
 			return
 		}
-		var ready bool
-		var err error
+
 		proto := h.Protocol()
-		pkt.Data, _, ready, err = e.protocol.fragmentation.Process(
+		data, _, ready, err := e.protocol.fragmentation.Process(
 			// As per RFC 791 section 2.3, the identification value is unique
 			// for a source-destination pair and protocol.
 			fragmentation.FragmentID{
@@ -522,30 +695,63 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			start+uint16(pkt.Data.Size())-1,
 			h.More(),
 			proto,
-			pkt.Data,
+			pkt,
 		)
 		if err != nil {
-			r.Stats().IP.MalformedPacketsReceived.Increment()
-			r.Stats().IP.MalformedFragmentsReceived.Increment()
+			stats.IP.MalformedPacketsReceived.Increment()
+			stats.IP.MalformedFragmentsReceived.Increment()
 			return
 		}
 		if !ready {
 			return
 		}
+		pkt.Data = data
+
+		// The reassembler doesn't take care of fixing up the header, so we need
+		// to do it here.
+		h.SetTotalLength(uint16(pkt.Data.Size() + len((h))))
+		h.SetFlagsFragmentOffset(0, 0)
 	}
+	stats.IP.PacketsDelivered.Increment()
 
-	r.Stats().IP.PacketsDelivered.Increment()
 	p := h.TransportProtocol()
 	if p == header.ICMPv4ProtocolNumber {
 		// TODO(gvisor.dev/issues/3810): when we sort out ICMP and transport
 		// headers, the setting of the transport number here should be
 		// unnecessary and removed.
 		pkt.TransportProtocolNumber = p
-		e.handleICMP(r, pkt)
+		e.handleICMP(pkt)
+		return
+	}
+	if p == header.IGMPProtocolNumber {
+		e.mu.Lock()
+		e.mu.igmp.handleIGMP(pkt)
+		e.mu.Unlock()
 		return
 	}
+	if opts := h.Options(); len(opts) != 0 {
+		// TODO(gvisor.dev/issue/4586):
+		// When we add forwarding support we should use the verified options
+		// rather than just throwing them away.
+		aux, _, err := e.processIPOptions(pkt, opts, &optionUsageReceive{})
+		if err != nil {
+			switch {
+			case
+				errors.Is(err, header.ErrIPv4OptDuplicate),
+				errors.Is(err, errIPv4RecordRouteOptInvalidPointer),
+				errors.Is(err, errIPv4RecordRouteOptInvalidLength),
+				errors.Is(err, errIPv4TimestampOptInvalidLength),
+				errors.Is(err, errIPv4TimestampOptInvalidPointer),
+				errors.Is(err, errIPv4TimestampOptOverflow):
+				_ = e.protocol.returnError(&icmpReasonParamProblem{pointer: aux}, pkt)
+				stats.MalformedRcvdPackets.Increment()
+				stats.IP.MalformedPacketsReceived.Increment()
+			}
+			return
+		}
+	}
 
-	switch res := e.dispatcher.DeliverTransportPacket(r, p, pkt); res {
+	switch res := e.dispatcher.DeliverTransportPacket(p, pkt); res {
 	case stack.TransportPacketHandled:
 	case stack.TransportPacketDestinationPortUnreachable:
 		// As per RFC: 1122 Section 3.2.2.1 A host SHOULD generate Destination
@@ -553,13 +759,13 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 		//     3 (Port Unreachable), when the designated transport protocol
 		//     (e.g., UDP) is unable to demultiplex the datagram but has no
 		//     protocol mechanism to inform the sender.
-		_ = e.protocol.returnError(r, &icmpReasonPortUnreachable{}, pkt)
+		_ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt)
 	case stack.TransportPacketProtocolUnreachable:
 		// As per RFC: 1122 Section 3.2.2.1
 		//   A host SHOULD generate Destination Unreachable messages with code:
 		//     2 (Protocol Unreachable), when the designated transport protocol
 		//     is not supported
-		_ = e.protocol.returnError(r, &icmpReasonProtoUnreachable{}, pkt)
+		_ = e.protocol.returnError(&icmpReasonProtoUnreachable{}, pkt)
 	default:
 		panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
 	}
@@ -578,7 +784,12 @@ func (e *endpoint) Close() {
 func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, *tcpip.Error) {
 	e.mu.Lock()
 	defer e.mu.Unlock()
-	return e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, peb, configType, deprecated)
+
+	ep, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, peb, configType, deprecated)
+	if err == nil {
+		e.mu.igmp.sendQueuedReports()
+	}
+	return ep, err
 }
 
 // RemovePermanentAddress implements stack.AddressableEndpoint.
@@ -601,34 +812,26 @@ func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp boo
 	defer e.mu.Unlock()
 
 	loopback := e.nic.IsLoopback()
-	addressEndpoint := e.mu.addressableEndpointState.ReadOnly().AddrOrMatching(localAddr, allowTemp, func(addressEndpoint stack.AddressEndpoint) bool {
-		subnet := addressEndpoint.AddressWithPrefix().Subnet()
+	return e.mu.addressableEndpointState.AcquireAssignedAddressOrMatching(localAddr, func(addressEndpoint stack.AddressEndpoint) bool {
+		subnet := addressEndpoint.Subnet()
 		// IPv4 has a notion of a subnet broadcast address and considers the
 		// loopback interface bound to an address's whole subnet (on linux).
 		return subnet.IsBroadcast(localAddr) || (loopback && subnet.Contains(localAddr))
-	})
-	if addressEndpoint != nil {
-		return addressEndpoint
-	}
-
-	if !allowTemp {
-		return nil
-	}
-
-	addr := localAddr.WithPrefix()
-	addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquireTemporaryAddress(addr, tempPEB)
-	if err != nil {
-		// AddAddress only returns an error if the address is already assigned,
-		// but we just checked above if the address exists so we expect no error.
-		panic(fmt.Sprintf("e.mu.addressableEndpointState.AddAndAcquireTemporaryAddress(%s, %d): %s", addr, tempPEB, err))
-	}
-	return addressEndpoint
+	}, allowTemp, tempPEB)
 }
 
 // AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint.
 func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
 	e.mu.RLock()
 	defer e.mu.RUnlock()
+	return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, allowExpired)
+}
+
+// acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress
+// but with locking requirements
+//
+// Precondition: igmp.ep.mu must be read locked.
+func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint {
 	return e.mu.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, allowExpired)
 }
 
@@ -647,32 +850,48 @@ func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix {
 }
 
 // JoinGroup implements stack.GroupAddressableEndpoint.
-func (e *endpoint) JoinGroup(addr tcpip.Address) (bool, *tcpip.Error) {
+func (e *endpoint) JoinGroup(addr tcpip.Address) *tcpip.Error {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	return e.joinGroupLocked(addr)
+}
+
+// joinGroupLocked is like JoinGroup but with locking requirements.
+//
+// Precondition: e.mu must be locked.
+func (e *endpoint) joinGroupLocked(addr tcpip.Address) *tcpip.Error {
 	if !header.IsV4MulticastAddress(addr) {
-		return false, tcpip.ErrBadAddress
+		return tcpip.ErrBadAddress
 	}
 
-	e.mu.Lock()
-	defer e.mu.Unlock()
-	return e.mu.addressableEndpointState.JoinGroup(addr)
+	e.mu.igmp.joinGroup(addr)
+	return nil
 }
 
 // LeaveGroup implements stack.GroupAddressableEndpoint.
-func (e *endpoint) LeaveGroup(addr tcpip.Address) (bool, *tcpip.Error) {
+func (e *endpoint) LeaveGroup(addr tcpip.Address) *tcpip.Error {
 	e.mu.Lock()
 	defer e.mu.Unlock()
-	return e.mu.addressableEndpointState.LeaveGroup(addr)
+	return e.leaveGroupLocked(addr)
+}
+
+// leaveGroupLocked is like LeaveGroup but with locking requirements.
+//
+// Precondition: e.mu must be locked.
+func (e *endpoint) leaveGroupLocked(addr tcpip.Address) *tcpip.Error {
+	return e.mu.igmp.leaveGroup(addr)
 }
 
 // IsInGroup implements stack.GroupAddressableEndpoint.
 func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
 	e.mu.RLock()
 	defer e.mu.RUnlock()
-	return e.mu.addressableEndpointState.IsInGroup(addr)
+	return e.mu.igmp.isInGroup(addr)
 }
 
 var _ stack.ForwardingNetworkProtocol = (*protocol)(nil)
 var _ stack.NetworkProtocol = (*protocol)(nil)
+var _ fragmentation.TimeoutHandler = (*protocol)(nil)
 
 type protocol struct {
 	stack *stack.Stack
@@ -693,6 +912,8 @@ type protocol struct {
 	hashIV uint32
 
 	fragmentation *fragmentation.Fragmentation
+
+	options Options
 }
 
 // Number returns the ipv4 protocol number.
@@ -778,26 +999,32 @@ func (p *protocol) SetForwarding(v bool) {
 	}
 }
 
-// calculateMTU calculates the network-layer payload MTU based on the link-layer
-// payload mtu.
-func calculateMTU(mtu uint32) uint32 {
-	if mtu > MaxTotalSize {
-		mtu = MaxTotalSize
+// calculateNetworkMTU calculates the network-layer payload MTU based on the
+// link-layer payload mtu.
+func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, *tcpip.Error) {
+	if linkMTU < header.IPv4MinimumMTU {
+		return 0, tcpip.ErrInvalidEndpointState
 	}
-	return mtu - header.IPv4MinimumSize
-}
 
-// calculateFragmentInnerMTU calculates the maximum number of bytes of
-// fragmentable data a fragment can have, based on the link layer mtu and pkt's
-// network header size.
-func calculateFragmentInnerMTU(mtu uint32, pkt *stack.PacketBuffer) uint32 {
-	if mtu > MaxTotalSize {
-		mtu = MaxTotalSize
+	// As per RFC 791 section 3.1, an IPv4 header cannot exceed 60 bytes in
+	// length:
+	//   The maximal internet header is 60 octets, and a typical internet header
+	//   is 20 octets, allowing a margin for headers of higher level protocols.
+	if networkHeaderSize > header.IPv4MaximumHeaderSize {
+		return 0, tcpip.ErrMalformedHeader
 	}
-	mtu -= uint32(pkt.NetworkHeader().View().Size())
-	// Round the MTU down to align to 8 bytes.
-	mtu &^= 7
-	return mtu
+
+	networkMTU := linkMTU
+	if networkMTU > MaxTotalSize {
+		networkMTU = MaxTotalSize
+	}
+
+	return networkMTU - uint32(networkHeaderSize), nil
+}
+
+func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32, gso *stack.GSO) bool {
+	payload := pkt.TransportHeader().View().Size() + pkt.Data.Size()
+	return (gso == nil || gso.Type == stack.GSONone) && uint32(payload) > networkMTU
 }
 
 // addressToUint32 translates an IPv4 address into its little endian uint32
@@ -811,17 +1038,23 @@ func addressToUint32(addr tcpip.Address) uint32 {
 	return uint32(addr[0]) | uint32(addr[1])<<8 | uint32(addr[2])<<16 | uint32(addr[3])<<24
 }
 
-// hashRoute calculates a hash value for the given route. It uses the source &
-// destination address, the transport protocol number and a 32-bit number to
-// generate the hash.
-func hashRoute(r *stack.Route, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 {
-	a := addressToUint32(r.LocalAddress)
-	b := addressToUint32(r.RemoteAddress)
+// hashRoute calculates a hash value for the given source/destination pair using
+// the addresses, transport protocol number and a 32-bit number to generate the
+// hash.
+func hashRoute(srcAddr, dstAddr tcpip.Address, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 {
+	a := addressToUint32(srcAddr)
+	b := addressToUint32(dstAddr)
 	return hash.Hash3Words(a, b, uint32(protocol), hashIV)
 }
 
-// NewProtocol returns an IPv4 network protocol.
-func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
+// Options holds options to configure a new protocol.
+type Options struct {
+	// IGMP holds options for IGMP.
+	IGMP IGMPOptions
+}
+
+// NewProtocolWithOptions returns an IPv4 network protocol.
+func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory {
 	ids := make([]uint32, buckets)
 
 	// Randomly initialize hashIV and the ids.
@@ -831,21 +1064,31 @@ func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
 	}
 	hashIV := r[buckets]
 
-	return &protocol{
-		stack:         s,
-		ids:           ids,
-		hashIV:        hashIV,
-		defaultTTL:    DefaultTTL,
-		fragmentation: fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, reassembleTimeout, s.Clock()),
+	return func(s *stack.Stack) stack.NetworkProtocol {
+		p := &protocol{
+			stack:      s,
+			ids:        ids,
+			hashIV:     hashIV,
+			defaultTTL: DefaultTTL,
+			options:    opts,
+		}
+		p.fragmentation = fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p)
+		return p
 	}
 }
 
+// NewProtocol is equivalent to NewProtocolWithOptions with an empty Options.
+func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
+	return NewProtocolWithOptions(Options{})(s)
+}
+
 func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader header.IPv4) (*stack.PacketBuffer, bool) {
 	fragPkt, offset, copied, more := pf.BuildNextFragment()
 	fragPkt.NetworkProtocolNumber = ProtocolNumber
 
 	originalIPHeaderLength := len(originalIPHeader)
 	nextFragIPHeader := header.IPv4(fragPkt.NetworkHeader().Push(originalIPHeaderLength))
+	fragPkt.NetworkProtocolNumber = ProtocolNumber
 
 	if copied := copy(nextFragIPHeader, originalIPHeader); copied != len(originalIPHeader) {
 		panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got = %d, want = %d", copied, originalIPHeaderLength))
@@ -862,3 +1105,338 @@ func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader head
 
 	return fragPkt, more
 }
+
+// optionAction describes possible actions that may be taken on an option
+// while processing it.
+type optionAction uint8
+
+const (
+	// optionRemove says that the option should not be in the output option set.
+	optionRemove optionAction = iota
+
+	// optionProcess says that the option should be fully processed.
+	optionProcess
+
+	// optionVerify says the option should be checked and passed unchanged.
+	optionVerify
+
+	// optionPass says to pass the output set without checking.
+	optionPass
+)
+
+// optionActions list what to do for each option in a given scenario.
+type optionActions struct {
+	// timestamp controls what to do with a Timestamp option.
+	timestamp optionAction
+
+	// recordroute controls what to do with a Record Route option.
+	recordRoute optionAction
+
+	// unknown controls what to do with an unknown option.
+	unknown optionAction
+}
+
+// optionsUsage specifies the ways options may be operated upon for a given
+// scenario during packet processing.
+type optionsUsage interface {
+	actions() optionActions
+}
+
+// optionUsageReceive implements optionsUsage for received packets.
+type optionUsageReceive struct{}
+
+// actions implements optionsUsage.
+func (*optionUsageReceive) actions() optionActions {
+	return optionActions{
+		timestamp:   optionVerify,
+		recordRoute: optionVerify,
+		unknown:     optionPass,
+	}
+}
+
+// TODO(gvisor.dev/issue/4586): Add an entry here for forwarding when it
+// is enabled (Process, Process, Pass) and for fragmenting (Process, Process,
+// Pass for frag1, but Remove,Remove,Remove for all other frags).
+
+// optionUsageEcho implements optionsUsage for echo packet processing.
+type optionUsageEcho struct{}
+
+// actions implements optionsUsage.
+func (*optionUsageEcho) actions() optionActions {
+	return optionActions{
+		timestamp:   optionProcess,
+		recordRoute: optionProcess,
+		unknown:     optionRemove,
+	}
+}
+
+var (
+	errIPv4TimestampOptInvalidLength  = errors.New("invalid Timestamp length")
+	errIPv4TimestampOptInvalidPointer = errors.New("invalid Timestamp pointer")
+	errIPv4TimestampOptOverflow       = errors.New("overflow in Timestamp")
+	errIPv4TimestampOptInvalidFlags   = errors.New("invalid Timestamp flags")
+)
+
+// handleTimestamp does any required processing on a Timestamp option
+// in place.
+func handleTimestamp(tsOpt header.IPv4OptionTimestamp, localAddress tcpip.Address, clock tcpip.Clock, usage optionsUsage) (uint8, error) {
+	flags := tsOpt.Flags()
+	var entrySize uint8
+	switch flags {
+	case header.IPv4OptionTimestampOnlyFlag:
+		entrySize = header.IPv4OptionTimestampSize
+	case
+		header.IPv4OptionTimestampWithIPFlag,
+		header.IPv4OptionTimestampWithPredefinedIPFlag:
+		entrySize = header.IPv4OptionTimestampWithAddrSize
+	default:
+		return header.IPv4OptTSOFLWAndFLGOffset, errIPv4TimestampOptInvalidFlags
+	}
+
+	pointer := tsOpt.Pointer()
+	// RFC 791 page 22 states: "The smallest legal value is 5."
+	// Since the pointer is 1 based, and the header is 4 bytes long the
+	// pointer must point beyond the header therefore 4 or less is bad.
+	if pointer <= header.IPv4OptionTimestampHdrLength {
+		return header.IPv4OptTSPointerOffset, errIPv4TimestampOptInvalidPointer
+	}
+	// To simplify processing below, base further work on the array of timestamps
+	// beyond the header, rather than on the whole option. Also to aid
+	// calculations set 'nextSlot' to be 0 based as in the packet it is 1 based.
+	nextSlot := pointer - (header.IPv4OptionTimestampHdrLength + 1)
+	optLen := tsOpt.Size()
+	dataLength := optLen - header.IPv4OptionTimestampHdrLength
+
+	// In the section below, we verify the pointer, length and overflow counter
+	// fields of the option. The distinction is in which byte you return as being
+	// in error in the ICMP packet. Offsets 1 (length), 2 pointer)
+	// or 3 (overflowed counter).
+	//
+	// The following RFC sections cover this section:
+	//
+	// RFC 791 (page 22):
+	//    If there is some room but not enough room for a full timestamp
+	//    to be inserted, or the overflow count itself overflows, the
+	//    original datagram is considered to be in error and is discarded.
+	//    In either case an ICMP parameter problem message may be sent to
+	//    the source host [3].
+	//
+	// You can get this situation in two ways. Firstly if the data area is not
+	// a multiple of the entry size or secondly, if the pointer is not at a
+	// multiple of the entry size. The wording of the RFC suggests that
+	// this is not an error until you actually run out of space.
+	if pointer > optLen {
+		// RFC 791 (page 22) says we should switch to using the overflow count.
+		//    If the timestamp data area is already full (the pointer exceeds
+		//    the length) the datagram is forwarded without inserting the
+		//    timestamp, but the overflow count is incremented by one.
+		if flags == header.IPv4OptionTimestampWithPredefinedIPFlag {
+			// By definition we have nothing to do.
+			return 0, nil
+		}
+
+		if tsOpt.IncOverflow() != 0 {
+			return 0, nil
+		}
+		// The overflow count is also full.
+		return header.IPv4OptTSOFLWAndFLGOffset, errIPv4TimestampOptOverflow
+	}
+	if nextSlot+entrySize > dataLength {
+		// The data area isn't full but there isn't room for a new entry.
+		// Either Length or Pointer could be bad.
+		if false {
+			// We must select Pointer for Linux compatibility, even if
+			// only the length is bad.
+			// The Linux code is at (in October 2020)
+			// https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L367-L370
+			//		if (optptr[2]+3 > optlen) {
+			//			pp_ptr = optptr + 2;
+			//			goto error;
+			//		}
+			// which doesn't distinguish between which of optptr[2] or optlen
+			// is wrong, but just arbitrarily decides on optptr+2.
+			if dataLength%entrySize != 0 {
+				// The Data section size should be a multiple of the expected
+				// timestamp entry size.
+				return header.IPv4OptionLengthOffset, errIPv4TimestampOptInvalidLength
+			}
+			// If the size is OK, the pointer must be corrupted.
+		}
+		return header.IPv4OptTSPointerOffset, errIPv4TimestampOptInvalidPointer
+	}
+
+	if usage.actions().timestamp == optionProcess {
+		tsOpt.UpdateTimestamp(localAddress, clock)
+	}
+	return 0, nil
+}
+
+var (
+	errIPv4RecordRouteOptInvalidLength  = errors.New("invalid length in Record Route")
+	errIPv4RecordRouteOptInvalidPointer = errors.New("invalid pointer in Record Route")
+)
+
+// handleRecordRoute checks and processes a Record route option. It is much
+// like the timestamp type 1 option, but without timestamps. The passed in
+// address is stored in the option in the correct spot if possible.
+func handleRecordRoute(rrOpt header.IPv4OptionRecordRoute, localAddress tcpip.Address, usage optionsUsage) (uint8, error) {
+	optlen := rrOpt.Size()
+
+	if optlen < header.IPv4AddressSize+header.IPv4OptionRecordRouteHdrLength {
+		return header.IPv4OptionLengthOffset, errIPv4RecordRouteOptInvalidLength
+	}
+
+	pointer := rrOpt.Pointer()
+	// RFC 791 page 20 states:
+	//      The pointer is relative to this option, and the
+	//      smallest legal value for the pointer is 4.
+	// Since the pointer is 1 based, and the header is 3 bytes long the
+	// pointer must point beyond the header therefore 3 or less is bad.
+	if pointer <= header.IPv4OptionRecordRouteHdrLength {
+		return header.IPv4OptRRPointerOffset, errIPv4RecordRouteOptInvalidPointer
+	}
+
+	// RFC 791 page 21 says
+	//       If the route data area is already full (the pointer exceeds the
+	//       length) the datagram is forwarded without inserting the address
+	//       into the recorded route. If there is some room but not enough
+	//       room for a full address to be inserted, the original datagram is
+	//       considered to be in error and is discarded.  In either case an
+	//       ICMP parameter problem message may be sent to the source
+	//       host.
+	// The use of the words "In either case" suggests that a 'full' RR option
+	// could generate an ICMP at every hop after it fills up. We chose to not
+	// do this (as do most implementations). It is probable that the inclusion
+	// of these words is a copy/paste error from the timestamp option where
+	// there are two failure reasons given.
+	if pointer > optlen {
+		return 0, nil
+	}
+
+	// The data area isn't full but there isn't room for a new entry.
+	// Either Length or Pointer could be bad. We must select Pointer for Linux
+	// compatibility, even if only the length is bad. NB. pointer is 1 based.
+	if pointer+header.IPv4AddressSize > optlen+1 {
+		if false {
+			// This is what we would do if we were not being Linux compatible.
+			// Check for bad pointer or length value. Must be a multiple of 4 after
+			// accounting for the 3 byte header and not within that header.
+			// RFC 791, page 20 says:
+			//       The pointer is relative to this option, and the
+			//       smallest legal value for the pointer is 4.
+			//
+			//       A recorded route is composed of a series of internet addresses.
+			//       Each internet address is 32 bits or 4 octets.
+			// Linux skips this test so we must too.  See Linux code at:
+			// https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L338-L341
+			//    if (optptr[2]+3 > optlen) {
+			//      pp_ptr = optptr + 2;
+			//      goto error;
+			//    }
+			if (optlen-header.IPv4OptionRecordRouteHdrLength)%header.IPv4AddressSize != 0 {
+				// Length is bad, not on integral number of slots.
+				return header.IPv4OptionLengthOffset, errIPv4RecordRouteOptInvalidLength
+			}
+			// If not length, the fault must be with the pointer.
+		}
+		return header.IPv4OptRRPointerOffset, errIPv4RecordRouteOptInvalidPointer
+	}
+	if usage.actions().recordRoute == optionVerify {
+		return 0, nil
+	}
+	rrOpt.StoreAddress(localAddress)
+	return 0, nil
+}
+
+// processIPOptions parses the IPv4 options and produces a new set of options
+// suitable for use in the next step of packet processing as informed by usage.
+// The original will not be touched.
+//
+// Returns
+// - The location of an error if there was one (or 0 if no error)
+// - If there is an error, information as to what it was was.
+// - The replacement option set.
+func (e *endpoint) processIPOptions(pkt *stack.PacketBuffer, orig header.IPv4Options, usage optionsUsage) (uint8, header.IPv4Options, error) {
+	stats := e.protocol.stack.Stats()
+	opts := header.IPv4Options(orig)
+	optIter := opts.MakeIterator()
+
+	// Each option other than NOP must only appear (RFC 791 section 3.1, at the
+	// definition of every type). Keep track of each of the possible types in
+	// the 8 bit 'type' field.
+	var seenOptions [math.MaxUint8 + 1]bool
+
+	// TODO(gvisor.dev/issue/4586):
+	// This will need tweaking  when we start really forwarding packets
+	// as we may need to get two addresses, for rx and tx interfaces.
+	// We will also have to take usage into account.
+	prefixedAddress, err := e.protocol.stack.GetMainNICAddress(e.nic.ID(), ProtocolNumber)
+	localAddress := prefixedAddress.Address
+	if err != nil {
+		h := header.IPv4(pkt.NetworkHeader().View())
+		dstAddr := h.DestinationAddress()
+		if pkt.NetworkPacketInfo.LocalAddressBroadcast || header.IsV4MulticastAddress(dstAddr) {
+			return 0 /* errCursor */, nil, header.ErrIPv4OptionAddress
+		}
+		localAddress = dstAddr
+	}
+
+	for {
+		option, done, err := optIter.Next()
+		if done || err != nil {
+			return optIter.ErrCursor, optIter.Finalize(), err
+		}
+		optType := option.Type()
+		if optType == header.IPv4OptionNOPType {
+			optIter.PushNOPOrEnd(optType)
+			continue
+		}
+		if optType == header.IPv4OptionListEndType {
+			optIter.PushNOPOrEnd(optType)
+			return 0 /* errCursor */, optIter.Finalize(), nil /* err */
+		}
+
+		// check for repeating options (multiple NOPs are OK)
+		if seenOptions[optType] {
+			return optIter.ErrCursor, nil, header.ErrIPv4OptDuplicate
+		}
+		seenOptions[optType] = true
+
+		optLen := int(option.Size())
+		switch option := option.(type) {
+		case *header.IPv4OptionTimestamp:
+			stats.IP.OptionTSReceived.Increment()
+			if usage.actions().timestamp != optionRemove {
+				clock := e.protocol.stack.Clock()
+				newBuffer := optIter.RemainingBuffer()[:len(*option)]
+				_ = copy(newBuffer, option.Contents())
+				offset, err := handleTimestamp(header.IPv4OptionTimestamp(newBuffer), localAddress, clock, usage)
+				if err != nil {
+					return optIter.ErrCursor + offset, nil, err
+				}
+				optIter.ConsumeBuffer(optLen)
+			}
+
+		case *header.IPv4OptionRecordRoute:
+			stats.IP.OptionRRReceived.Increment()
+			if usage.actions().recordRoute != optionRemove {
+				newBuffer := optIter.RemainingBuffer()[:len(*option)]
+				_ = copy(newBuffer, option.Contents())
+				offset, err := handleRecordRoute(header.IPv4OptionRecordRoute(newBuffer), localAddress, usage)
+				if err != nil {
+					return optIter.ErrCursor + offset, nil, err
+				}
+				optIter.ConsumeBuffer(optLen)
+			}
+
+		default:
+			stats.IP.OptionUnknownReceived.Increment()
+			if usage.actions().unknown == optionPass {
+				newBuffer := optIter.RemainingBuffer()[:optLen]
+				// Arguments already heavily checked.. ignore result.
+				_ = copy(newBuffer, option.Contents())
+				optIter.ConsumeBuffer(optLen)
+			}
+		}
+	}
+}
diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index fee11bb38..9e2d2cfd6 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -21,11 +21,13 @@ import (
 	"math"
 	"net"
 	"testing"
+	"time"
 
 	"github.com/google/go-cmp/cmp"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/checker"
+	"gvisor.dev/gvisor/pkg/tcpip/faketime"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
 	"gvisor.dev/gvisor/pkg/tcpip/link/sniffer"
@@ -39,7 +41,10 @@ import (
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
-const extraHeaderReserve = 50
+const (
+	extraHeaderReserve = 50
+	defaultMTU         = 65536
+)
 
 func TestExcludeBroadcast(t *testing.T) {
 	s := stack.New(stack.Options{
@@ -47,7 +52,6 @@ func TestExcludeBroadcast(t *testing.T) {
 		TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
 	})
 
-	const defaultMTU = 65536
 	ep := stack.LinkEndpoint(channel.New(256, defaultMTU, ""))
 	if testing.Verbose() {
 		ep = sniffer.New(ep)
@@ -99,11 +103,167 @@ func TestExcludeBroadcast(t *testing.T) {
 	})
 }
 
+func TestForwarding(t *testing.T) {
+	const (
+		nicID1         = 1
+		nicID2         = 2
+		randomSequence = 123
+		randomIdent    = 42
+	)
+
+	ipv4Addr1 := tcpip.AddressWithPrefix{
+		Address:   tcpip.Address(net.ParseIP("10.0.0.1").To4()),
+		PrefixLen: 8,
+	}
+	ipv4Addr2 := tcpip.AddressWithPrefix{
+		Address:   tcpip.Address(net.ParseIP("11.0.0.1").To4()),
+		PrefixLen: 8,
+	}
+	remoteIPv4Addr1 := tcpip.Address(net.ParseIP("10.0.0.2").To4())
+	remoteIPv4Addr2 := tcpip.Address(net.ParseIP("11.0.0.2").To4())
+
+	tests := []struct {
+		name            string
+		TTL             uint8
+		expectErrorICMP bool
+	}{
+		{
+			name:            "TTL of zero",
+			TTL:             0,
+			expectErrorICMP: true,
+		},
+		{
+			name:            "TTL of one",
+			TTL:             1,
+			expectErrorICMP: false,
+		},
+		{
+			name:            "TTL of two",
+			TTL:             2,
+			expectErrorICMP: false,
+		},
+		{
+			name:            "Max TTL",
+			TTL:             math.MaxUint8,
+			expectErrorICMP: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols:   []stack.NetworkProtocolFactory{ipv4.NewProtocol},
+				TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol4},
+			})
+			// We expect at most a single packet in response to our ICMP Echo Request.
+			e1 := channel.New(1, ipv4.MaxTotalSize, "")
+			if err := s.CreateNIC(nicID1, e1); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s", nicID1, err)
+			}
+			ipv4ProtoAddr1 := tcpip.ProtocolAddress{Protocol: header.IPv4ProtocolNumber, AddressWithPrefix: ipv4Addr1}
+			if err := s.AddProtocolAddress(nicID1, ipv4ProtoAddr1); err != nil {
+				t.Fatalf("AddProtocolAddress(%d, %#v): %s", nicID1, ipv4ProtoAddr1, err)
+			}
+
+			e2 := channel.New(1, ipv4.MaxTotalSize, "")
+			if err := s.CreateNIC(nicID2, e2); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s", nicID2, err)
+			}
+			ipv4ProtoAddr2 := tcpip.ProtocolAddress{Protocol: header.IPv4ProtocolNumber, AddressWithPrefix: ipv4Addr2}
+			if err := s.AddProtocolAddress(nicID2, ipv4ProtoAddr2); err != nil {
+				t.Fatalf("AddProtocolAddress(%d, %#v): %s", nicID2, ipv4ProtoAddr2, err)
+			}
+
+			s.SetRouteTable([]tcpip.Route{
+				{
+					Destination: ipv4Addr1.Subnet(),
+					NIC:         nicID1,
+				},
+				{
+					Destination: ipv4Addr2.Subnet(),
+					NIC:         nicID2,
+				},
+			})
+
+			if err := s.SetForwarding(header.IPv4ProtocolNumber, true); err != nil {
+				t.Fatalf("SetForwarding(%d, true): %s", header.IPv4ProtocolNumber, err)
+			}
+
+			totalLen := uint16(header.IPv4MinimumSize + header.ICMPv4MinimumSize)
+			hdr := buffer.NewPrependable(int(totalLen))
+			icmp := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize))
+			icmp.SetIdent(randomIdent)
+			icmp.SetSequence(randomSequence)
+			icmp.SetType(header.ICMPv4Echo)
+			icmp.SetCode(header.ICMPv4UnusedCode)
+			icmp.SetChecksum(0)
+			icmp.SetChecksum(^header.Checksum(icmp, 0))
+			ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
+			ip.Encode(&header.IPv4Fields{
+				TotalLength: totalLen,
+				Protocol:    uint8(header.ICMPv4ProtocolNumber),
+				TTL:         test.TTL,
+				SrcAddr:     remoteIPv4Addr1,
+				DstAddr:     remoteIPv4Addr2,
+			})
+			ip.SetChecksum(0)
+			ip.SetChecksum(^ip.CalculateChecksum())
+			requestPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+				Data: hdr.View().ToVectorisedView(),
+			})
+			e1.InjectInbound(header.IPv4ProtocolNumber, requestPkt)
+
+			if test.expectErrorICMP {
+				reply, ok := e1.Read()
+				if !ok {
+					t.Fatal("expected ICMP TTL Exceeded packet through incoming NIC")
+				}
+
+				checker.IPv4(t, header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader())),
+					checker.SrcAddr(ipv4Addr1.Address),
+					checker.DstAddr(remoteIPv4Addr1),
+					checker.TTL(ipv4.DefaultTTL),
+					checker.ICMPv4(
+						checker.ICMPv4Checksum(),
+						checker.ICMPv4Type(header.ICMPv4TimeExceeded),
+						checker.ICMPv4Code(header.ICMPv4TTLExceeded),
+						checker.ICMPv4Payload([]byte(hdr.View())),
+					),
+				)
+
+				if n := e2.Drain(); n != 0 {
+					t.Fatalf("got e2.Drain() = %d, want = 0", n)
+				}
+			} else {
+				reply, ok := e2.Read()
+				if !ok {
+					t.Fatal("expected ICMP Echo packet through outgoing NIC")
+				}
+
+				checker.IPv4(t, header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader())),
+					checker.SrcAddr(remoteIPv4Addr1),
+					checker.DstAddr(remoteIPv4Addr2),
+					checker.TTL(test.TTL-1),
+					checker.ICMPv4(
+						checker.ICMPv4Checksum(),
+						checker.ICMPv4Type(header.ICMPv4Echo),
+						checker.ICMPv4Code(header.ICMPv4UnusedCode),
+						checker.ICMPv4Payload(nil),
+					),
+				)
+
+				if n := e1.Drain(); n != 0 {
+					t.Fatalf("got e1.Drain() = %d, want = 0", n)
+				}
+			}
+		})
+	}
+}
+
 // TestIPv4Sanity sends IP/ICMP packets with various problems to the stack and
 // checks the response.
 func TestIPv4Sanity(t *testing.T) {
 	const (
-		defaultMTU     = header.IPv6MinimumMTU
 		ttl            = 255
 		nicID          = 1
 		randomSequence = 123
@@ -118,27 +278,29 @@ func TestIPv4Sanity(t *testing.T) {
 	)
 
 	tests := []struct {
-		name              string
-		headerLength      uint8 // value of 0 means "use correct size"
-		badHeaderChecksum bool
-		maxTotalLength    uint16
-		transportProtocol uint8
-		TTL               uint8
-		shouldFail        bool
-		expectICMP        bool
-		ICMPType          header.ICMPv4Type
-		ICMPCode          header.ICMPv4Code
-		options           []byte
+		name                string
+		headerLength        uint8 // value of 0 means "use correct size"
+		badHeaderChecksum   bool
+		maxTotalLength      uint16
+		transportProtocol   uint8
+		TTL                 uint8
+		options             header.IPv4Options
+		replyOptions        header.IPv4Options // reply should look like this
+		shouldFail          bool
+		expectErrorICMP     bool
+		ICMPType            header.ICMPv4Type
+		ICMPCode            header.ICMPv4Code
+		paramProblemPointer uint8
 	}{
 		{
-			name:              "valid",
-			maxTotalLength:    defaultMTU,
+			name:              "valid no options",
+			maxTotalLength:    ipv4.MaxTotalSize,
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               ttl,
 		},
 		{
 			name:              "bad header checksum",
-			maxTotalLength:    defaultMTU,
+			maxTotalLength:    ipv4.MaxTotalSize,
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               ttl,
 			badHeaderChecksum: true,
@@ -157,47 +319,47 @@ func TestIPv4Sanity(t *testing.T) {
 		//      received with TTL less than 2.
 		{
 			name:              "zero TTL",
-			maxTotalLength:    defaultMTU,
+			maxTotalLength:    ipv4.MaxTotalSize,
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               0,
-			shouldFail:        false,
 		},
 		{
 			name:              "one TTL",
-			maxTotalLength:    defaultMTU,
+			maxTotalLength:    ipv4.MaxTotalSize,
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               1,
-			shouldFail:        false,
 		},
 		{
 			name:              "End options",
-			maxTotalLength:    defaultMTU,
+			maxTotalLength:    ipv4.MaxTotalSize,
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               ttl,
-			options:           []byte{0, 0, 0, 0},
+			options:           header.IPv4Options{0, 0, 0, 0},
+			replyOptions:      header.IPv4Options{0, 0, 0, 0},
 		},
 		{
 			name:              "NOP options",
-			maxTotalLength:    defaultMTU,
+			maxTotalLength:    ipv4.MaxTotalSize,
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               ttl,
-			options:           []byte{1, 1, 1, 1},
+			options:           header.IPv4Options{1, 1, 1, 1},
+			replyOptions:      header.IPv4Options{1, 1, 1, 1},
 		},
 		{
 			name:              "NOP and End options",
-			maxTotalLength:    defaultMTU,
+			maxTotalLength:    ipv4.MaxTotalSize,
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               ttl,
-			options:           []byte{1, 1, 0, 0},
+			options:           header.IPv4Options{1, 1, 0, 0},
+			replyOptions:      header.IPv4Options{1, 1, 0, 0},
 		},
 		{
 			name:              "bad header length",
 			headerLength:      header.IPv4MinimumSize - 1,
-			maxTotalLength:    defaultMTU,
+			maxTotalLength:    ipv4.MaxTotalSize,
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               ttl,
 			shouldFail:        true,
-			expectICMP:        false,
 		},
 		{
 			name:              "bad total length (0)",
@@ -205,7 +367,6 @@ func TestIPv4Sanity(t *testing.T) {
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               ttl,
 			shouldFail:        true,
-			expectICMP:        false,
 		},
 		{
 			name:              "bad total length (ip - 1)",
@@ -213,7 +374,6 @@ func TestIPv4Sanity(t *testing.T) {
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               ttl,
 			shouldFail:        true,
-			expectICMP:        false,
 		},
 		{
 			name:              "bad total length (ip + icmp - 1)",
@@ -221,28 +381,465 @@ func TestIPv4Sanity(t *testing.T) {
 			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
 			TTL:               ttl,
 			shouldFail:        true,
-			expectICMP:        false,
 		},
 		{
 			name:              "bad protocol",
-			maxTotalLength:    defaultMTU,
+			maxTotalLength:    ipv4.MaxTotalSize,
 			transportProtocol: 99,
 			TTL:               ttl,
 			shouldFail:        true,
-			expectICMP:        true,
+			expectErrorICMP:   true,
 			ICMPType:          header.ICMPv4DstUnreachable,
 			ICMPCode:          header.ICMPv4ProtoUnreachable,
 		},
+		{
+			name:              "timestamp option overflow",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 12, 13, 0x11,
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+			},
+			replyOptions: header.IPv4Options{
+				68, 12, 13, 0x21,
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+			},
+		},
+		{
+			name:              "timestamp option overflow full",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 12, 13, 0xF1,
+				//            ^   Counter full (15/0xF)
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+			},
+			shouldFail:          true,
+			expectErrorICMP:     true,
+			ICMPType:            header.ICMPv4ParamProblem,
+			ICMPCode:            header.ICMPv4UnusedCode,
+			paramProblemPointer: header.IPv4MinimumSize + 3,
+			replyOptions:        header.IPv4Options{},
+		},
+		{
+			name:              "unknown option",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options:           header.IPv4Options{10, 4, 9, 0},
+			//                        ^^
+			// The unknown option should be stripped out of the reply.
+			replyOptions: header.IPv4Options{},
+		},
+		{
+			name:              "bad option - length 0",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 0, 9, 0,
+				//  ^
+				1, 2, 3, 4,
+			},
+			shouldFail: true,
+		},
+		{
+			name:              "bad option - length big",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 9, 9, 0,
+				//  ^
+				// There are only 8 bytes allocated to options so 9 bytes of timestamp
+				// space is not possible. (Second byte)
+				1, 2, 3, 4,
+			},
+			shouldFail: true,
+		},
+		{
+			// This tests for some linux compatible behaviour.
+			// The ICMP pointer returned is 22 for Linux but the
+			// error is actually in spot 21.
+			name:              "bad option - length bad",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			// Timestamps are in multiples of 4 or 8 but never 7.
+			// The option space should be padded out.
+			options: header.IPv4Options{
+				68, 7, 5, 0,
+				//  ^  ^ Linux points here which is wrong.
+				//  | Not a multiple of 4
+				1, 2, 3, 0,
+			},
+			shouldFail:          true,
+			expectErrorICMP:     true,
+			ICMPType:            header.ICMPv4ParamProblem,
+			ICMPCode:            header.ICMPv4UnusedCode,
+			paramProblemPointer: header.IPv4MinimumSize + 2,
+		},
+		{
+			name:              "multiple type 0 with room",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 24, 21, 0x00,
+				1, 2, 3, 4,
+				5, 6, 7, 8,
+				9, 10, 11, 12,
+				13, 14, 15, 16,
+				0, 0, 0, 0,
+			},
+			replyOptions: header.IPv4Options{
+				68, 24, 25, 0x00,
+				1, 2, 3, 4,
+				5, 6, 7, 8,
+				9, 10, 11, 12,
+				13, 14, 15, 16,
+				0x00, 0xad, 0x1c, 0x40, // time we expect from fakeclock
+			},
+		},
+		{
+			// The timestamp area is full so add to the overflow count.
+			name:              "multiple type 1 timestamps",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 20, 21, 0x11,
+				//            ^
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+				192, 168, 1, 13,
+				5, 6, 7, 8,
+			},
+			// Overflow count is the top nibble of the 4th byte.
+			replyOptions: header.IPv4Options{
+				68, 20, 21, 0x21,
+				//            ^
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+				192, 168, 1, 13,
+				5, 6, 7, 8,
+			},
+		},
+		{
+			name:              "multiple type 1 timestamps with room",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 28, 21, 0x01,
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+				192, 168, 1, 13,
+				5, 6, 7, 8,
+				0, 0, 0, 0,
+				0, 0, 0, 0,
+			},
+			replyOptions: header.IPv4Options{
+				68, 28, 29, 0x01,
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+				192, 168, 1, 13,
+				5, 6, 7, 8,
+				192, 168, 1, 58, // New IP Address.
+				0x00, 0xad, 0x1c, 0x40, // time we expect from fakeclock
+			},
+		},
+		{
+			// Timestamp pointer uses one based counting so 0 is invalid.
+			name:              "timestamp pointer invalid",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 8, 0, 0x00,
+				//      ^ 0 instead of 5 or more.
+				0, 0, 0, 0,
+			},
+			shouldFail:          true,
+			expectErrorICMP:     true,
+			ICMPType:            header.ICMPv4ParamProblem,
+			ICMPCode:            header.ICMPv4UnusedCode,
+			paramProblemPointer: header.IPv4MinimumSize + 2,
+		},
+		{
+			// Timestamp pointer cannot be less than 5. It must point past the header
+			// which is 4 bytes. (1 based counting)
+			name:              "timestamp pointer too small by 1",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 8, header.IPv4OptionTimestampHdrLength, 0x00,
+				//          ^ header is 4 bytes, so 4 should fail.
+				0, 0, 0, 0,
+			},
+			shouldFail:          true,
+			expectErrorICMP:     true,
+			ICMPType:            header.ICMPv4ParamProblem,
+			ICMPCode:            header.ICMPv4UnusedCode,
+			paramProblemPointer: header.IPv4MinimumSize + 2,
+		},
+		{
+			name:              "valid timestamp pointer",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 8, header.IPv4OptionTimestampHdrLength + 1, 0x00,
+				//          ^ header is 4 bytes, so 5 should succeed.
+				0, 0, 0, 0,
+			},
+			replyOptions: header.IPv4Options{
+				68, 8, 9, 0x00,
+				0x00, 0xad, 0x1c, 0x40, // time we expect from fakeclock
+			},
+		},
+		{
+			// Needs 8 bytes for a type 1 timestamp but there are only 4 free.
+			name:              "bad timer element alignment",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 20, 17, 0x01,
+				//  ^^  ^^   20 byte area, next free spot at 17.
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+				0, 0, 0, 0,
+				0, 0, 0, 0,
+			},
+			shouldFail:          true,
+			expectErrorICMP:     true,
+			ICMPType:            header.ICMPv4ParamProblem,
+			ICMPCode:            header.ICMPv4UnusedCode,
+			paramProblemPointer: header.IPv4MinimumSize + 2,
+		},
+		// End of option list with illegal option after it, which should be ignored.
+		{
+			name:              "end of options list",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				68, 12, 13, 0x11,
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+				0, 10, 3, 99,
+			},
+			replyOptions: header.IPv4Options{
+				68, 12, 13, 0x21,
+				192, 168, 1, 12,
+				1, 2, 3, 4,
+				0, 0, 0, 0, // 3 bytes unknown option
+			}, //   ^  End of options hides following bytes.
+		},
+		{
+			// Timestamp with a size too small.
+			name:              "timestamp truncated",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options:           header.IPv4Options{68, 1, 0, 0},
+			//                            ^ Smallest possible is 8.
+			shouldFail: true,
+		},
+		{
+			name:              "single record route with room",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				7, 7, 4, //  3 byte header
+				0, 0, 0, 0,
+				0,
+			},
+			replyOptions: header.IPv4Options{
+				7, 7, 8, // 3 byte header
+				192, 168, 1, 58, // New IP Address.
+				0, // padding to multiple of 4 bytes.
+			},
+		},
+		{
+			name:              "multiple record route with room",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				7, 23, 20, //  3 byte header
+				1, 2, 3, 4,
+				5, 6, 7, 8,
+				9, 10, 11, 12,
+				13, 14, 15, 16,
+				0, 0, 0, 0,
+				0,
+			},
+			replyOptions: header.IPv4Options{
+				7, 23, 24,
+				1, 2, 3, 4,
+				5, 6, 7, 8,
+				9, 10, 11, 12,
+				13, 14, 15, 16,
+				192, 168, 1, 58, // New IP Address.
+				0, // padding to multiple of 4 bytes.
+			},
+		},
+		{
+			name:              "single record route with no room",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				7, 7, 8, // 3 byte header
+				1, 2, 3, 4,
+				0,
+			},
+			replyOptions: header.IPv4Options{
+				7, 7, 8, // 3 byte header
+				1, 2, 3, 4,
+				0, // padding to multiple of 4 bytes.
+			},
+		},
+		{
+			// Unlike timestamp, this should just succeed.
+			name:              "multiple record route with no room",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				7, 23, 24, // 3 byte header
+				1, 2, 3, 4,
+				5, 6, 7, 8,
+				9, 10, 11, 12,
+				13, 14, 15, 16,
+				17, 18, 19, 20,
+				0,
+			},
+			replyOptions: header.IPv4Options{
+				7, 23, 24,
+				1, 2, 3, 4,
+				5, 6, 7, 8,
+				9, 10, 11, 12,
+				13, 14, 15, 16,
+				17, 18, 19, 20,
+				0, // padding to multiple of 4 bytes.
+			},
+		},
+		{
+			// Pointer uses one based counting so 0 is invalid.
+			name:              "record route pointer zero",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				7, 8, 0, // 3 byte header
+				0, 0, 0, 0,
+				0,
+			},
+			shouldFail:          true,
+			expectErrorICMP:     true,
+			ICMPType:            header.ICMPv4ParamProblem,
+			ICMPCode:            header.ICMPv4UnusedCode,
+			paramProblemPointer: header.IPv4MinimumSize + 2,
+		},
+		{
+			// Pointer must be 4 or more as it must point past the 3 byte header
+			// using 1 based counting. 3 should fail.
+			name:              "record route pointer too small by 1",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				7, 8, header.IPv4OptionRecordRouteHdrLength, // 3 byte header
+				0, 0, 0, 0,
+				0,
+			},
+			shouldFail:          true,
+			expectErrorICMP:     true,
+			ICMPType:            header.ICMPv4ParamProblem,
+			ICMPCode:            header.ICMPv4UnusedCode,
+			paramProblemPointer: header.IPv4MinimumSize + 2,
+		},
+		{
+			// Pointer must be 4 or more as it must point past the 3 byte header
+			// using 1 based counting. Check 4 passes. (Duplicates "single
+			// record route with room")
+			name:              "valid record route pointer",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				7, 7, header.IPv4OptionRecordRouteHdrLength + 1, // 3 byte header
+				0, 0, 0, 0,
+				0,
+			},
+			replyOptions: header.IPv4Options{
+				7, 7, 8, // 3 byte header
+				192, 168, 1, 58, // New IP Address.
+				0, // padding to multiple of 4 bytes.
+			},
+		},
+		{
+			// Confirm Linux bug for bug compatibility.
+			// Linux returns slot 22 but the error is in slot 21.
+			name:              "multiple record route with not enough room",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				7, 8, 8, // 3 byte header
+				// ^  ^ Linux points here. We must too.
+				// | Not enough room. 1 byte free, need 4.
+				1, 2, 3, 4,
+				0,
+			},
+			shouldFail:          true,
+			expectErrorICMP:     true,
+			ICMPType:            header.ICMPv4ParamProblem,
+			ICMPCode:            header.ICMPv4UnusedCode,
+			paramProblemPointer: header.IPv4MinimumSize + 2,
+			replyOptions:        header.IPv4Options{},
+		},
+		{
+			name:              "duplicate record route",
+			maxTotalLength:    ipv4.MaxTotalSize,
+			transportProtocol: uint8(header.ICMPv4ProtocolNumber),
+			TTL:               ttl,
+			options: header.IPv4Options{
+				7, 7, 8, // 3 byte header
+				1, 2, 3, 4,
+				7, 7, 8, // 3 byte header
+				1, 2, 3, 4,
+				0, 0, // pad
+			},
+			shouldFail:          true,
+			expectErrorICMP:     true,
+			ICMPType:            header.ICMPv4ParamProblem,
+			ICMPCode:            header.ICMPv4UnusedCode,
+			paramProblemPointer: header.IPv4MinimumSize + 7,
+			replyOptions:        header.IPv4Options{},
+		},
 	}
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
+			clock := faketime.NewManualClock()
 			s := stack.New(stack.Options{
 				NetworkProtocols:   []stack.NetworkProtocolFactory{ipv4.NewProtocol},
 				TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol4},
+				Clock:              clock,
 			})
 			// We expect at most a single packet in response to our ICMP Echo Request.
-			e := channel.New(1, defaultMTU, "")
+			e := channel.New(1, ipv4.MaxTotalSize, "")
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
 			}
@@ -250,6 +847,9 @@ func TestIPv4Sanity(t *testing.T) {
 			if err := s.AddProtocolAddress(nicID, ipv4ProtoAddr); err != nil {
 				t.Fatalf("AddProtocolAddress(%d, %#v): %s", nicID, ipv4ProtoAddr, err)
 			}
+			// Advance the clock by some unimportant amount to make
+			// sure it's all set up.
+			clock.Advance(time.Millisecond * 0x10203040)
 
 			// Default routes for IPv4 so ICMP can find a route to the remote
 			// node when attempting to send the ICMP Echo Reply.
@@ -260,14 +860,12 @@ func TestIPv4Sanity(t *testing.T) {
 				},
 			})
 
-			// Round up the header size to the next multiple of 4 as RFC 791, page 11
-			// says: "Internet Header Length is the length of the internet header
-			// in 32 bit words..." and on page 23: "The internet header padding is
-			// used to ensure that the internet header ends on a 32 bit boundary."
-			ipHeaderLength := ((header.IPv4MinimumSize + len(test.options)) + header.IPv4IHLStride - 1) & ^(header.IPv4IHLStride - 1)
-
+			if len(test.options)%4 != 0 {
+				t.Fatalf("options must be aligned to 32 bits, invalid test options: %x (len=%d)", test.options, len(test.options))
+			}
+			ipHeaderLength := header.IPv4MinimumSize + len(test.options)
 			if ipHeaderLength > header.IPv4MaximumHeaderSize {
-				t.Fatalf("too many bytes in options: got = %d, want <= %d ", ipHeaderLength, header.IPv4MaximumHeaderSize)
+				t.Fatalf("IP header length too large: got = %d, want <= %d ", ipHeaderLength, header.IPv4MaximumHeaderSize)
 			}
 			totalLen := uint16(ipHeaderLength + header.ICMPv4MinimumSize)
 			hdr := buffer.NewPrependable(int(totalLen))
@@ -284,20 +882,26 @@ func TestIPv4Sanity(t *testing.T) {
 			if test.maxTotalLength < totalLen {
 				totalLen = test.maxTotalLength
 			}
+
 			ip.Encode(&header.IPv4Fields{
-				IHL:         uint8(ipHeaderLength),
 				TotalLength: totalLen,
 				Protocol:    test.transportProtocol,
 				TTL:         test.TTL,
 				SrcAddr:     remoteIPv4Addr,
 				DstAddr:     ipv4Addr.Address,
 			})
-			if n := copy(ip.Options(), test.options); n != len(test.options) {
-				t.Fatalf("options larger than available space: copied %d/%d bytes", n, len(test.options))
-			}
-			// Override the correct value if the test case specified one.
 			if test.headerLength != 0 {
 				ip.SetHeaderLength(test.headerLength)
+			} else {
+				// Set the calculated header length, since we may manually add options.
+				ip.SetHeaderLength(uint8(ipHeaderLength))
+			}
+			if len(test.options) != 0 {
+				// Copy options manually. We do not use Encode for options so we can
+				// verify malformed options with handcrafted payloads.
+				if want, got := copy(ip.Options(), test.options), len(test.options); want != got {
+					t.Fatalf("got copy(ip.Options(), test.options) = %d, want = %d", got, want)
+				}
 			}
 			ip.SetChecksum(0)
 			ipHeaderChecksum := ip.CalculateChecksum()
@@ -312,14 +916,20 @@ func TestIPv4Sanity(t *testing.T) {
 			reply, ok := e.Read()
 			if !ok {
 				if test.shouldFail {
-					if test.expectICMP {
-						t.Fatal("expected ICMP error response missing")
+					if test.expectErrorICMP {
+						t.Fatalf("ICMP error response (type %d, code %d) missing", test.ICMPType, test.ICMPCode)
 					}
 					return // Expected silent failure.
 				}
 				t.Fatal("expected ICMP echo reply missing")
 			}
 
+			// We didn't expect a packet. Register our surprise but carry on to
+			// provide more information about what we got.
+			if test.shouldFail && !test.expectErrorICMP {
+				t.Error("unexpected packet response")
+			}
+
 			// Check the route that brought the packet to us.
 			if reply.Route.LocalAddress != ipv4Addr.Address {
 				t.Errorf("got pkt.Route.LocalAddress = %s, want = %s", reply.Route.LocalAddress, ipv4Addr.Address)
@@ -328,57 +938,90 @@ func TestIPv4Sanity(t *testing.T) {
 				t.Errorf("got pkt.Route.RemoteAddress = %s, want = %s", reply.Route.RemoteAddress, remoteIPv4Addr)
 			}
 
-			// Make sure it's all in one buffer.
-			vv := buffer.NewVectorisedView(reply.Pkt.Size(), reply.Pkt.Views())
-			replyIPHeader := header.IPv4(vv.ToView())
+			// Make sure it's all in one buffer for checker.
+			replyIPHeader := header.IPv4(stack.PayloadSince(reply.Pkt.NetworkHeader()))
 
-			// At this stage we only know it's an IP header so verify that much.
+			// At this stage we only know it's probably an IP+ICMP header so verify
+			// that much.
 			checker.IPv4(t, replyIPHeader,
 				checker.SrcAddr(ipv4Addr.Address),
 				checker.DstAddr(remoteIPv4Addr),
+				checker.ICMPv4(
+					checker.ICMPv4Checksum(),
+				),
 			)
 
-			// All expected responses are ICMP packets.
-			if got, want := replyIPHeader.Protocol(), uint8(header.ICMPv4ProtocolNumber); got != want {
-				t.Fatalf("not ICMP response, got protocol %d, want = %d", got, want)
+			// Don't proceed any further if the checker found problems.
+			if t.Failed() {
+				t.FailNow()
 			}
-			replyICMPHeader := header.ICMPv4(replyIPHeader.Payload())
 
-			// Sanity check the response.
+			// OK it's ICMP. We can safely look at the type now.
+			replyICMPHeader := header.ICMPv4(replyIPHeader.Payload())
 			switch replyICMPHeader.Type() {
-			case header.ICMPv4DstUnreachable:
+			case header.ICMPv4ParamProblem:
+				if !test.shouldFail {
+					t.Fatalf("got Parameter Problem with pointer %d, wanted Echo Reply", replyICMPHeader.Pointer())
+				}
+				if !test.expectErrorICMP {
+					t.Fatalf("got Parameter Problem with pointer %d, wanted no response", replyICMPHeader.Pointer())
+				}
 				checker.IPv4(t, replyIPHeader,
 					checker.IPFullLength(uint16(header.IPv4MinimumSize+header.ICMPv4MinimumSize+requestPkt.Size())),
 					checker.IPv4HeaderLength(header.IPv4MinimumSize),
 					checker.ICMPv4(
+						checker.ICMPv4Type(test.ICMPType),
 						checker.ICMPv4Code(test.ICMPCode),
-						checker.ICMPv4Checksum(),
+						checker.ICMPv4Pointer(test.paramProblemPointer),
 						checker.ICMPv4Payload([]byte(hdr.View())),
 					),
 				)
-				if !test.shouldFail || !test.expectICMP {
-					t.Fatalf("unexpected packet rejection, got ICMP error packet type %d, code %d",
+				return
+			case header.ICMPv4DstUnreachable:
+				if !test.shouldFail {
+					t.Fatalf("got ICMP error packet type %d, code %d, wanted Echo Reply",
+						header.ICMPv4DstUnreachable, replyICMPHeader.Code())
+				}
+				if !test.expectErrorICMP {
+					t.Fatalf("got ICMP error packet type %d, code %d, wanted no response",
 						header.ICMPv4DstUnreachable, replyICMPHeader.Code())
 				}
+				checker.IPv4(t, replyIPHeader,
+					checker.IPFullLength(uint16(header.IPv4MinimumSize+header.ICMPv4MinimumSize+requestPkt.Size())),
+					checker.IPv4HeaderLength(header.IPv4MinimumSize),
+					checker.ICMPv4(
+						checker.ICMPv4Type(test.ICMPType),
+						checker.ICMPv4Code(test.ICMPCode),
+						checker.ICMPv4Payload([]byte(hdr.View())),
+					),
+				)
 				return
 			case header.ICMPv4EchoReply:
+				if test.shouldFail {
+					if !test.expectErrorICMP {
+						t.Error("got Echo Reply packet, want no response")
+					} else {
+						t.Errorf("got Echo Reply, want ICMP error type %d, code %d", test.ICMPType, test.ICMPCode)
+					}
+				}
+				// If the IP options change size then the packet will change size, so
+				// some IP header fields will need to be adjusted for the checks.
+				sizeChange := len(test.replyOptions) - len(test.options)
+
 				checker.IPv4(t, replyIPHeader,
-					checker.IPv4HeaderLength(ipHeaderLength),
-					checker.IPv4Options(test.options),
-					checker.IPFullLength(uint16(requestPkt.Size())),
+					checker.IPv4HeaderLength(ipHeaderLength+sizeChange),
+					checker.IPv4Options(test.replyOptions),
+					checker.IPFullLength(uint16(requestPkt.Size()+sizeChange)),
 					checker.ICMPv4(
+						checker.ICMPv4Checksum(),
 						checker.ICMPv4Code(header.ICMPv4UnusedCode),
 						checker.ICMPv4Seq(randomSequence),
 						checker.ICMPv4Ident(randomIdent),
-						checker.ICMPv4Checksum(),
 					),
 				)
-				if test.shouldFail {
-					t.Fatalf("unexpected Echo Reply packet\n")
-				}
 			default:
-				t.Fatalf("unexpected ICMP response, got type %d, want = %d or %d",
-					replyICMPHeader.Type(), header.ICMPv4EchoReply, header.ICMPv4DstUnreachable)
+				t.Fatalf("unexpected ICMP response, got type %d, want = %d, %d or %d",
+					replyICMPHeader.Type(), header.ICMPv4EchoReply, header.ICMPv4DstUnreachable, header.ICMPv4ParamProblem)
 			}
 		})
 	}
@@ -462,7 +1105,7 @@ var fragmentationTests = []struct {
 	wantFragments         []fragmentInfo
 }{
 	{
-		description:           "No Fragmentation",
+		description:           "No fragmentation",
 		mtu:                   1280,
 		gso:                   nil,
 		transportHeaderLength: 0,
@@ -483,6 +1126,30 @@ var fragmentationTests = []struct {
 		},
 	},
 	{
+		description:           "Fragmented with the minimum mtu",
+		mtu:                   header.IPv4MinimumMTU,
+		gso:                   nil,
+		transportHeaderLength: 0,
+		payloadSize:           100,
+		wantFragments: []fragmentInfo{
+			{offset: 0, payloadSize: 48, more: true},
+			{offset: 48, payloadSize: 48, more: true},
+			{offset: 96, payloadSize: 4, more: false},
+		},
+	},
+	{
+		description:           "Fragmented with mtu not a multiple of 8",
+		mtu:                   header.IPv4MinimumMTU + 1,
+		gso:                   nil,
+		transportHeaderLength: 0,
+		payloadSize:           100,
+		wantFragments: []fragmentInfo{
+			{offset: 0, payloadSize: 48, more: true},
+			{offset: 48, payloadSize: 48, more: true},
+			{offset: 96, payloadSize: 4, more: false},
+		},
+	},
+	{
 		description:           "No fragmentation with big header",
 		mtu:                   2000,
 		gso:                   nil,
@@ -647,43 +1314,50 @@ func TestFragmentationWritePackets(t *testing.T) {
 	}
 }
 
-// TestFragmentationErrors checks that errors are returned from write packet
+// TestFragmentationErrors checks that errors are returned from WritePacket
 // correctly.
 func TestFragmentationErrors(t *testing.T) {
 	const ttl = 42
 
-	expectedError := tcpip.ErrAborted
-	fragTests := []struct {
+	tests := []struct {
 		description           string
 		mtu                   uint32
 		transportHeaderLength int
 		payloadSize           int
 		allowPackets          int
-		fragmentCount         int
+		outgoingErrors        int
+		mockError             *tcpip.Error
+		wantError             *tcpip.Error
 	}{
 		{
 			description:           "No frag",
 			mtu:                   2000,
-			transportHeaderLength: 0,
 			payloadSize:           1000,
+			transportHeaderLength: 0,
 			allowPackets:          0,
-			fragmentCount:         1,
+			outgoingErrors:        1,
+			mockError:             tcpip.ErrAborted,
+			wantError:             tcpip.ErrAborted,
 		},
 		{
 			description:           "Error on first frag",
 			mtu:                   500,
-			transportHeaderLength: 0,
 			payloadSize:           1000,
+			transportHeaderLength: 0,
 			allowPackets:          0,
-			fragmentCount:         3,
+			outgoingErrors:        3,
+			mockError:             tcpip.ErrAborted,
+			wantError:             tcpip.ErrAborted,
 		},
 		{
 			description:           "Error on second frag",
 			mtu:                   500,
-			transportHeaderLength: 0,
 			payloadSize:           1000,
+			transportHeaderLength: 0,
 			allowPackets:          1,
-			fragmentCount:         3,
+			outgoingErrors:        2,
+			mockError:             tcpip.ErrAborted,
+			wantError:             tcpip.ErrAborted,
 		},
 		{
 			description:           "Error on first frag MTU smaller than header",
@@ -691,28 +1365,40 @@ func TestFragmentationErrors(t *testing.T) {
 			transportHeaderLength: 1000,
 			payloadSize:           500,
 			allowPackets:          0,
-			fragmentCount:         4,
+			outgoingErrors:        4,
+			mockError:             tcpip.ErrAborted,
+			wantError:             tcpip.ErrAborted,
+		},
+		{
+			description:           "Error when MTU is smaller than IPv4 minimum MTU",
+			mtu:                   header.IPv4MinimumMTU - 1,
+			transportHeaderLength: 0,
+			payloadSize:           500,
+			allowPackets:          0,
+			outgoingErrors:        1,
+			mockError:             nil,
+			wantError:             tcpip.ErrInvalidEndpointState,
 		},
 	}
 
-	for _, ft := range fragTests {
+	for _, ft := range tests {
 		t.Run(ft.description, func(t *testing.T) {
-			ep := testutil.NewMockLinkEndpoint(ft.mtu, expectedError, ft.allowPackets)
-			r := buildRoute(t, ep)
 			pkt := testutil.MakeRandPkt(ft.transportHeaderLength, extraHeaderReserve+header.IPv4MinimumSize, []int{ft.payloadSize}, header.IPv4ProtocolNumber)
+			ep := testutil.NewMockLinkEndpoint(ft.mtu, ft.mockError, ft.allowPackets)
+			r := buildRoute(t, ep)
 			err := r.WritePacket(&stack.GSO{}, stack.NetworkHeaderParams{
 				Protocol: tcp.ProtocolNumber,
 				TTL:      ttl,
 				TOS:      stack.DefaultTOS,
 			}, pkt)
-			if err != expectedError {
-				t.Errorf("got WritePacket(_, _, _) = %s, want = %s", err, expectedError)
+			if err != ft.wantError {
+				t.Errorf("got WritePacket(_, _, _) = %s, want = %s", err, ft.wantError)
 			}
-			if got, want := len(ep.WrittenPackets), int(r.Stats().IP.PacketsSent.Value()); err != nil && got != want {
-				t.Errorf("got len(ep.WrittenPackets) = %d, want = %d", got, want)
+			if got := int(r.Stats().IP.PacketsSent.Value()); got != ft.allowPackets {
+				t.Errorf("got r.Stats().IP.PacketsSent.Value() = %d, want = %d", got, ft.allowPackets)
 			}
-			if got, want := int(r.Stats().IP.OutgoingPacketErrors.Value()), ft.fragmentCount-ft.allowPackets; got != want {
-				t.Errorf("got r.Stats().IP.OutgoingPacketErrors.Value() = %d, want = %d", got, want)
+			if got := int(r.Stats().IP.OutgoingPacketErrors.Value()); got != ft.outgoingErrors {
+				t.Errorf("got r.Stats().IP.OutgoingPacketErrors.Value() = %d, want = %d", got, ft.outgoingErrors)
 			}
 		})
 	}
@@ -739,12 +1425,13 @@ func TestInvalidFragments(t *testing.T) {
 	}
 
 	type fragmentData struct {
-		ipv4fields   header.IPv4Fields
+		ipv4fields header.IPv4Fields
+		// 0 means insert the correct IHL. Non 0 means override the correct IHL.
+		overrideIHL  int // For 0 use 1 as it is an int and will be divided by 4.
 		payload      []byte
-		autoChecksum bool // if true, the Checksum field will be overwritten.
+		autoChecksum bool // If true, the Checksum field will be overwritten.
 	}
 
-	// These packets have both IHL and TotalLength set to 0.
 	tests := []struct {
 		name                   string
 		fragments              []fragmentData
@@ -756,7 +1443,6 @@ func TestInvalidFragments(t *testing.T) {
 			fragments: []fragmentData{
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            0,
 						TOS:            tos,
 						TotalLength:    0,
 						ID:             ident,
@@ -767,6 +1453,7 @@ func TestInvalidFragments(t *testing.T) {
 						SrcAddr:        addr1,
 						DstAddr:        addr2,
 					},
+					overrideIHL:  1, // See note above.
 					payload:      payloadGen(12),
 					autoChecksum: true,
 				},
@@ -779,7 +1466,6 @@ func TestInvalidFragments(t *testing.T) {
 			fragments: []fragmentData{
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            0,
 						TOS:            tos,
 						TotalLength:    0,
 						ID:             ident,
@@ -790,6 +1476,7 @@ func TestInvalidFragments(t *testing.T) {
 						SrcAddr:        addr1,
 						DstAddr:        addr2,
 					},
+					overrideIHL:  1, // See note above.
 					payload:      payloadGen(12),
 					autoChecksum: true,
 				},
@@ -804,7 +1491,6 @@ func TestInvalidFragments(t *testing.T) {
 			fragments: []fragmentData{
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            header.IPv4MinimumSize,
 						TOS:            tos,
 						TotalLength:    header.IPv4MinimumSize + 17,
 						ID:             ident,
@@ -829,7 +1515,6 @@ func TestInvalidFragments(t *testing.T) {
 			fragments: []fragmentData{
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            header.IPv4MinimumSize,
 						TOS:            tos,
 						TotalLength:    header.IPv4MinimumSize + 16,
 						ID:             ident,
@@ -852,7 +1537,6 @@ func TestInvalidFragments(t *testing.T) {
 			fragments: []fragmentData{
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            header.IPv4MinimumSize - 12,
 						TOS:            tos,
 						TotalLength:    header.IPv4MinimumSize + 28,
 						ID:             ident,
@@ -864,11 +1548,11 @@ func TestInvalidFragments(t *testing.T) {
 						DstAddr:        addr2,
 					},
 					payload:      payloadGen(28),
+					overrideIHL:  header.IPv4MinimumSize - 12,
 					autoChecksum: true,
 				},
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            header.IPv4MinimumSize - 12,
 						TOS:            tos,
 						TotalLength:    header.IPv4MinimumSize - 12,
 						ID:             ident,
@@ -880,6 +1564,7 @@ func TestInvalidFragments(t *testing.T) {
 						DstAddr:        addr2,
 					},
 					payload:      payloadGen(28),
+					overrideIHL:  header.IPv4MinimumSize - 12,
 					autoChecksum: true,
 				},
 			},
@@ -891,7 +1576,6 @@ func TestInvalidFragments(t *testing.T) {
 			fragments: []fragmentData{
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            header.IPv4MinimumSize + 4,
 						TOS:            tos,
 						TotalLength:    header.IPv4MinimumSize + 28,
 						ID:             ident,
@@ -903,11 +1587,11 @@ func TestInvalidFragments(t *testing.T) {
 						DstAddr:        addr2,
 					},
 					payload:      payloadGen(28),
+					overrideIHL:  header.IPv4MinimumSize + 4,
 					autoChecksum: true,
 				},
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            header.IPv4MinimumSize + 4,
 						TOS:            tos,
 						TotalLength:    header.IPv4MinimumSize + 4,
 						ID:             ident,
@@ -919,6 +1603,7 @@ func TestInvalidFragments(t *testing.T) {
 						DstAddr:        addr2,
 					},
 					payload:      payloadGen(28),
+					overrideIHL:  header.IPv4MinimumSize + 4,
 					autoChecksum: true,
 				},
 			},
@@ -930,7 +1615,6 @@ func TestInvalidFragments(t *testing.T) {
 			fragments: []fragmentData{
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            header.IPv4MinimumSize,
 						TOS:            tos,
 						TotalLength:    header.IPv4MinimumSize + 8,
 						ID:             ident,
@@ -946,7 +1630,6 @@ func TestInvalidFragments(t *testing.T) {
 				},
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            header.IPv4MinimumSize,
 						TOS:            tos,
 						TotalLength:    header.IPv4MinimumSize + 8,
 						ID:             ident,
@@ -962,7 +1645,6 @@ func TestInvalidFragments(t *testing.T) {
 				},
 				{
 					ipv4fields: header.IPv4Fields{
-						IHL:            header.IPv4MinimumSize,
 						TOS:            tos,
 						TotalLength:    header.IPv4MinimumSize + 8,
 						ID:             ident,
@@ -984,7 +1666,6 @@ func TestInvalidFragments(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-
 			s := stack.New(stack.Options{
 				NetworkProtocols: []stack.NetworkProtocolFactory{
 					ipv4.NewProtocol,
@@ -1004,6 +1685,11 @@ func TestInvalidFragments(t *testing.T) {
 
 				ip := header.IPv4(hdr.Prepend(pktSize))
 				ip.Encode(&f.ipv4fields)
+				// Encode sets this up correctly. If we want a different value for
+				// testing then we need to overwrite the good value.
+				if f.overrideIHL != 0 {
+					ip.SetHeaderLength(uint8(f.overrideIHL))
+				}
 				copy(ip[header.IPv4MinimumSize:], f.payload)
 
 				if f.autoChecksum {
@@ -1027,6 +1713,251 @@ func TestInvalidFragments(t *testing.T) {
 	}
 }
 
+func TestFragmentReassemblyTimeout(t *testing.T) {
+	const (
+		nicID    = 1
+		linkAddr = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e")
+		addr1    = "\x0a\x00\x00\x01"
+		addr2    = "\x0a\x00\x00\x02"
+		tos      = 0
+		ident    = 1
+		ttl      = 48
+		protocol = 99
+		data     = "TEST_FRAGMENT_REASSEMBLY_TIMEOUT"
+	)
+
+	type fragmentData struct {
+		ipv4fields header.IPv4Fields
+		payload    []byte
+	}
+
+	tests := []struct {
+		name       string
+		fragments  []fragmentData
+		expectICMP bool
+	}{
+		{
+			name: "first fragment only",
+			fragments: []fragmentData{
+				{
+					ipv4fields: header.IPv4Fields{
+						TOS:            tos,
+						TotalLength:    header.IPv4MinimumSize + 16,
+						ID:             ident,
+						Flags:          header.IPv4FlagMoreFragments,
+						FragmentOffset: 0,
+						TTL:            ttl,
+						Protocol:       protocol,
+						SrcAddr:        addr1,
+						DstAddr:        addr2,
+					},
+					payload: []byte(data)[:16],
+				},
+			},
+			expectICMP: true,
+		},
+		{
+			name: "two first fragments",
+			fragments: []fragmentData{
+				{
+					ipv4fields: header.IPv4Fields{
+						TOS:            tos,
+						TotalLength:    header.IPv4MinimumSize + 16,
+						ID:             ident,
+						Flags:          header.IPv4FlagMoreFragments,
+						FragmentOffset: 0,
+						TTL:            ttl,
+						Protocol:       protocol,
+						SrcAddr:        addr1,
+						DstAddr:        addr2,
+					},
+					payload: []byte(data)[:16],
+				},
+				{
+					ipv4fields: header.IPv4Fields{
+						TOS:            tos,
+						TotalLength:    header.IPv4MinimumSize + 16,
+						ID:             ident,
+						Flags:          header.IPv4FlagMoreFragments,
+						FragmentOffset: 0,
+						TTL:            ttl,
+						Protocol:       protocol,
+						SrcAddr:        addr1,
+						DstAddr:        addr2,
+					},
+					payload: []byte(data)[:16],
+				},
+			},
+			expectICMP: true,
+		},
+		{
+			name: "second fragment only",
+			fragments: []fragmentData{
+				{
+					ipv4fields: header.IPv4Fields{
+						TOS:            tos,
+						TotalLength:    uint16(header.IPv4MinimumSize + len(data) - 16),
+						ID:             ident,
+						Flags:          0,
+						FragmentOffset: 8,
+						TTL:            ttl,
+						Protocol:       protocol,
+						SrcAddr:        addr1,
+						DstAddr:        addr2,
+					},
+					payload: []byte(data)[16:],
+				},
+			},
+			expectICMP: false,
+		},
+		{
+			name: "two fragments with a gap",
+			fragments: []fragmentData{
+				{
+					ipv4fields: header.IPv4Fields{
+						TOS:            tos,
+						TotalLength:    header.IPv4MinimumSize + 8,
+						ID:             ident,
+						Flags:          header.IPv4FlagMoreFragments,
+						FragmentOffset: 0,
+						TTL:            ttl,
+						Protocol:       protocol,
+						SrcAddr:        addr1,
+						DstAddr:        addr2,
+					},
+					payload: []byte(data)[:8],
+				},
+				{
+					ipv4fields: header.IPv4Fields{
+						TOS:            tos,
+						TotalLength:    uint16(header.IPv4MinimumSize + len(data) - 16),
+						ID:             ident,
+						Flags:          0,
+						FragmentOffset: 16,
+						TTL:            ttl,
+						Protocol:       protocol,
+						SrcAddr:        addr1,
+						DstAddr:        addr2,
+					},
+					payload: []byte(data)[16:],
+				},
+			},
+			expectICMP: true,
+		},
+		{
+			name: "two fragments with a gap in reverse order",
+			fragments: []fragmentData{
+				{
+					ipv4fields: header.IPv4Fields{
+						TOS:            tos,
+						TotalLength:    uint16(header.IPv4MinimumSize + len(data) - 16),
+						ID:             ident,
+						Flags:          0,
+						FragmentOffset: 16,
+						TTL:            ttl,
+						Protocol:       protocol,
+						SrcAddr:        addr1,
+						DstAddr:        addr2,
+					},
+					payload: []byte(data)[16:],
+				},
+				{
+					ipv4fields: header.IPv4Fields{
+						TOS:            tos,
+						TotalLength:    header.IPv4MinimumSize + 8,
+						ID:             ident,
+						Flags:          header.IPv4FlagMoreFragments,
+						FragmentOffset: 0,
+						TTL:            ttl,
+						Protocol:       protocol,
+						SrcAddr:        addr1,
+						DstAddr:        addr2,
+					},
+					payload: []byte(data)[:8],
+				},
+			},
+			expectICMP: true,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			clock := faketime.NewManualClock()
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocolFactory{
+					ipv4.NewProtocol,
+				},
+				Clock: clock,
+			})
+			e := channel.New(1, 1500, linkAddr)
+			if err := s.CreateNIC(nicID, e); err != nil {
+				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+			}
+			if err := s.AddAddress(nicID, ipv4.ProtocolNumber, addr2); err != nil {
+				t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv4ProtocolNumber, addr2, err)
+			}
+			s.SetRouteTable([]tcpip.Route{{
+				Destination: header.IPv4EmptySubnet,
+				NIC:         nicID,
+			}})
+
+			var firstFragmentSent buffer.View
+			for _, f := range test.fragments {
+				pktSize := header.IPv4MinimumSize
+				hdr := buffer.NewPrependable(pktSize)
+
+				ip := header.IPv4(hdr.Prepend(pktSize))
+				ip.Encode(&f.ipv4fields)
+
+				ip.SetChecksum(0)
+				ip.SetChecksum(^ip.CalculateChecksum())
+
+				vv := hdr.View().ToVectorisedView()
+				vv.AppendView(f.payload)
+
+				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+					Data: vv,
+				})
+
+				if firstFragmentSent == nil && ip.FragmentOffset() == 0 {
+					firstFragmentSent = stack.PayloadSince(pkt.NetworkHeader())
+				}
+
+				e.InjectInbound(header.IPv4ProtocolNumber, pkt)
+			}
+
+			clock.Advance(ipv4.ReassembleTimeout)
+
+			reply, ok := e.Read()
+			if !test.expectICMP {
+				if ok {
+					t.Fatalf("unexpected ICMP error message received: %#v", reply)
+				}
+				return
+			}
+			if !ok {
+				t.Fatal("expected ICMP error message missing")
+			}
+			if firstFragmentSent == nil {
+				t.Fatalf("unexpected ICMP error message received: %#v", reply)
+			}
+
+			checker.IPv4(t, stack.PayloadSince(reply.Pkt.NetworkHeader()),
+				checker.SrcAddr(addr2),
+				checker.DstAddr(addr1),
+				checker.IPFullLength(uint16(header.IPv4MinimumSize+header.ICMPv4MinimumSize+firstFragmentSent.Size())),
+				checker.IPv4HeaderLength(header.IPv4MinimumSize),
+				checker.ICMPv4(
+					checker.ICMPv4Type(header.ICMPv4TimeExceeded),
+					checker.ICMPv4Code(header.ICMPv4ReassemblyTimeout),
+					checker.ICMPv4Checksum(),
+					checker.ICMPv4Payload([]byte(firstFragmentSent)),
+				),
+			)
+		})
+	}
+}
+
 // TestReceiveFragments feeds fragments in through the incoming packet path to
 // test reassembly
 func TestReceiveFragments(t *testing.T) {
@@ -1392,6 +2323,28 @@ func TestReceiveFragments(t *testing.T) {
 			},
 			expectedPayloads: [][]byte{udpPayload4Addr1ToAddr2},
 		},
+		{
+			name: "Two fragments with MF flag reassembled into a maximum UDP packet",
+			fragments: []fragmentData{
+				{
+					srcAddr:        addr1,
+					dstAddr:        addr2,
+					id:             1,
+					flags:          header.IPv4FlagMoreFragments,
+					fragmentOffset: 0,
+					payload:        ipv4Payload4Addr1ToAddr2[:65512],
+				},
+				{
+					srcAddr:        addr1,
+					dstAddr:        addr2,
+					id:             1,
+					flags:          header.IPv4FlagMoreFragments,
+					fragmentOffset: 65512,
+					payload:        ipv4Payload4Addr1ToAddr2[65512:],
+				},
+			},
+			expectedPayloads: nil,
+		},
 	}
 
 	for _, test := range tests {
@@ -1432,7 +2385,6 @@ func TestReceiveFragments(t *testing.T) {
 				// Serialize IPv4 fixed header.
 				ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
 				ip.Encode(&header.IPv4Fields{
-					IHL:            header.IPv4MinimumSize,
 					TotalLength:    header.IPv4MinimumSize + uint16(len(frag.payload)),
 					ID:             frag.id,
 					Flags:          frag.flags,
@@ -1506,13 +2458,10 @@ func TestWriteStats(t *testing.T) {
 				// Install Output DROP rule.
 				t.Helper()
 				ipt := stk.IPTables()
-				filter, ok := ipt.GetTable(stack.FilterTable, false /* ipv6 */)
-				if !ok {
-					t.Fatalf("failed to find filter table")
-				}
+				filter := ipt.GetTable(stack.FilterID, false /* ipv6 */)
 				ruleIdx := filter.BuiltinChains[stack.Output]
 				filter.Rules[ruleIdx].Target = &stack.DropTarget{}
-				if err := ipt.ReplaceTable(stack.FilterTable, filter, false /* ipv6 */); err != nil {
+				if err := ipt.ReplaceTable(stack.FilterID, filter, false /* ipv6 */); err != nil {
 					t.Fatalf("failed to replace table: %s", err)
 				}
 			},
@@ -1527,17 +2476,14 @@ func TestWriteStats(t *testing.T) {
 				// of the 3 packets.
 				t.Helper()
 				ipt := stk.IPTables()
-				filter, ok := ipt.GetTable(stack.FilterTable, false /* ipv6 */)
-				if !ok {
-					t.Fatalf("failed to find filter table")
-				}
+				filter := ipt.GetTable(stack.FilterID, false /* ipv6 */)
 				// We'll match and DROP the last packet.
 				ruleIdx := filter.BuiltinChains[stack.Output]
 				filter.Rules[ruleIdx].Target = &stack.DropTarget{}
 				filter.Rules[ruleIdx].Matchers = []stack.Matcher{&limitedMatcher{nPackets - 1}}
 				// Make sure the next rule is ACCEPT.
 				filter.Rules[ruleIdx+1].Target = &stack.AcceptTarget{}
-				if err := ipt.ReplaceTable(stack.FilterTable, filter, false /* ipv6 */); err != nil {
+				if err := ipt.ReplaceTable(stack.FilterID, filter, false /* ipv6 */); err != nil {
 					t.Fatalf("failed to replace table: %s", err)
 				}
 			},
@@ -1577,7 +2523,7 @@ func TestWriteStats(t *testing.T) {
 		t.Run(writer.name, func(t *testing.T) {
 			for _, test := range tests {
 				t.Run(test.name, func(t *testing.T) {
-					ep := testutil.NewMockLinkEndpoint(header.IPv4MinimumSize+header.UDPMinimumSize, tcpip.ErrInvalidEndpointState, test.allowPackets)
+					ep := testutil.NewMockLinkEndpoint(header.IPv4MinimumMTU, tcpip.ErrInvalidEndpointState, test.allowPackets)
 					rt := buildRoute(t, ep)
 
 					var pkts stack.PacketBufferList
@@ -1592,7 +2538,7 @@ func TestWriteStats(t *testing.T) {
 
 					test.setup(t, rt.Stack())
 
-					nWritten, _ := writer.writePackets(&rt, pkts)
+					nWritten, _ := writer.writePackets(rt, pkts)
 
 					if got := int(rt.Stats().IP.PacketsSent.Value()); got != test.expectSent {
 						t.Errorf("sent %d packets, but expected to send %d", got, test.expectSent)
@@ -1609,7 +2555,7 @@ func TestWriteStats(t *testing.T) {
 	}
 }
 
-func buildRoute(t *testing.T, ep stack.LinkEndpoint) stack.Route {
+func buildRoute(t *testing.T, ep stack.LinkEndpoint) *stack.Route {
 	s := stack.New(stack.Options{
 		NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol},
 	})
@@ -1704,7 +2650,6 @@ func TestPacketQueing(t *testing.T) {
 				u.SetChecksum(^u.CalculateChecksum(sum))
 				ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
 				ip.Encode(&header.IPv4Fields{
-					IHL:         header.IPv4MinimumSize,
 					TotalLength: header.IPv4MinimumSize + header.UDPMinimumSize,
 					TTL:         ipv4.DefaultTTL,
 					Protocol:    uint8(udp.ProtocolNumber),
@@ -1724,8 +2669,8 @@ func TestPacketQueing(t *testing.T) {
 				if p.Proto != header.IPv4ProtocolNumber {
 					t.Errorf("got p.Proto = %d, want = %d", p.Proto, header.IPv4ProtocolNumber)
 				}
-				if p.Route.RemoteLinkAddress != host2NICLinkAddr {
-					t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, host2NICLinkAddr)
+				if got := p.Route.RemoteLinkAddress(); got != host2NICLinkAddr {
+					t.Errorf("got p.Route.RemoteLinkAddress() = %s, want = %s", got, host2NICLinkAddr)
 				}
 				checker.IPv4(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
 					checker.SrcAddr(host1IPv4Addr.AddressWithPrefix.Address),
@@ -1748,7 +2693,6 @@ func TestPacketQueing(t *testing.T) {
 				pkt.SetChecksum(^header.Checksum(pkt, 0))
 				ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
 				ip.Encode(&header.IPv4Fields{
-					IHL:         header.IPv4MinimumSize,
 					TotalLength: uint16(totalLen),
 					Protocol:    uint8(icmp.ProtocolNumber4),
 					TTL:         ipv4.DefaultTTL,
@@ -1768,8 +2712,8 @@ func TestPacketQueing(t *testing.T) {
 				if p.Proto != header.IPv4ProtocolNumber {
 					t.Errorf("got p.Proto = %d, want = %d", p.Proto, header.IPv4ProtocolNumber)
 				}
-				if p.Route.RemoteLinkAddress != host2NICLinkAddr {
-					t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, host2NICLinkAddr)
+				if got := p.Route.RemoteLinkAddress(); got != host2NICLinkAddr {
+					t.Errorf("got p.Route.RemoteLinkAddress() = %s, want = %s", got, host2NICLinkAddr)
 				}
 				checker.IPv4(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
 					checker.SrcAddr(host1IPv4Addr.AddressWithPrefix.Address),
@@ -1783,7 +2727,7 @@ func TestPacketQueing(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			e := channel.New(1, header.IPv6MinimumMTU, host1NICLinkAddr)
+			e := channel.New(1, defaultMTU, host1NICLinkAddr)
 			e.LinkEPCapabilities |= stack.CapabilityResolutionRequired
 			s := stack.New(stack.Options{
 				NetworkProtocols:   []stack.NetworkProtocolFactory{arp.NewProtocol, ipv4.NewProtocol},
@@ -1793,9 +2737,6 @@ func TestPacketQueing(t *testing.T) {
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
 			}
-			if err := s.AddAddress(nicID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-				t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, arp.ProtocolNumber, arp.ProtocolAddress, err)
-			}
 			if err := s.AddProtocolAddress(nicID, host1IPv4Addr); err != nil {
 				t.Fatalf("s.AddProtocolAddress(%d, %#v): %s", nicID, host1IPv4Addr, err)
 			}
@@ -1820,8 +2761,8 @@ func TestPacketQueing(t *testing.T) {
 				if p.Proto != arp.ProtocolNumber {
 					t.Errorf("got p.Proto = %d, want = %d", p.Proto, arp.ProtocolNumber)
 				}
-				if p.Route.RemoteLinkAddress != header.EthernetBroadcastAddress {
-					t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, header.EthernetBroadcastAddress)
+				if got := p.Route.RemoteLinkAddress(); got != header.EthernetBroadcastAddress {
+					t.Errorf("got p.Route.RemoteLinkAddress() = %s, want = %s", got, header.EthernetBroadcastAddress)
 				}
 				rep := header.ARP(p.Pkt.NetworkHeader().View())
 				if got := rep.Op(); got != header.ARPRequest {
diff --git a/pkg/tcpip/network/ipv6/BUILD b/pkg/tcpip/network/ipv6/BUILD
index a30437f02..afa45aefe 100644
--- a/pkg/tcpip/network/ipv6/BUILD
+++ b/pkg/tcpip/network/ipv6/BUILD
@@ -8,6 +8,7 @@ go_library(
         "dhcpv6configurationfromndpra_string.go",
         "icmp.go",
         "ipv6.go",
+        "mld.go",
         "ndp.go",
     ],
     visibility = ["//visibility:public"],
@@ -19,6 +20,7 @@ go_library(
         "//pkg/tcpip/header/parse",
         "//pkg/tcpip/network/fragmentation",
         "//pkg/tcpip/network/hash",
+        "//pkg/tcpip/network/ip",
         "//pkg/tcpip/stack",
     ],
 )
@@ -36,6 +38,7 @@ go_test(
         "//pkg/tcpip",
         "//pkg/tcpip/buffer",
         "//pkg/tcpip/checker",
+        "//pkg/tcpip/faketime",
         "//pkg/tcpip/header",
         "//pkg/tcpip/link/channel",
         "//pkg/tcpip/link/sniffer",
@@ -48,3 +51,19 @@ go_test(
         "@com_github_google_go_cmp//cmp:go_default_library",
     ],
 )
+
+go_test(
+    name = "ipv6_x_test",
+    size = "small",
+    srcs = ["mld_test.go"],
+    deps = [
+        ":ipv6",
+        "//pkg/tcpip",
+        "//pkg/tcpip/buffer",
+        "//pkg/tcpip/checker",
+        "//pkg/tcpip/faketime",
+        "//pkg/tcpip/header",
+        "//pkg/tcpip/link/channel",
+        "//pkg/tcpip/stack",
+    ],
+)
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index ead6bedcb..6ee162713 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -124,10 +124,10 @@ func getTargetLinkAddr(it header.NDPOptionIterator) (tcpip.LinkAddress, bool) {
 	})
 }
 
-func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragmentHeader bool) {
-	stats := r.Stats().ICMP
-	sent := stats.V6PacketsSent
-	received := stats.V6PacketsReceived
+func (e *endpoint) handleICMP(pkt *stack.PacketBuffer, hasFragmentHeader bool) {
+	stats := e.protocol.stack.Stats().ICMP
+	sent := stats.V6.PacketsSent
+	received := stats.V6.PacketsReceived
 	// TODO(gvisor.dev/issue/170): ICMP packets don't have their
 	// TransportHeader fields set. See icmp/protocol.go:protocol.Parse for a
 	// full explanation.
@@ -138,13 +138,15 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 	}
 	h := header.ICMPv6(v)
 	iph := header.IPv6(pkt.NetworkHeader().View())
+	srcAddr := iph.SourceAddress()
+	dstAddr := iph.DestinationAddress()
 
 	// Validate ICMPv6 checksum before processing the packet.
 	//
 	// This copy is used as extra payload during the checksum calculation.
 	payload := pkt.Data.Clone(nil)
 	payload.TrimFront(len(h))
-	if got, want := h.Checksum(), header.ICMPv6Checksum(h, iph.SourceAddress(), iph.DestinationAddress(), payload); got != want {
+	if got, want := h.Checksum(), header.ICMPv6Checksum(h, srcAddr, dstAddr, payload); got != want {
 		received.Invalid.Increment()
 		return
 	}
@@ -161,7 +163,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 	}
 
 	// TODO(b/112892170): Meaningfully handle all ICMP types.
-	switch h.Type() {
+	switch icmpType := h.Type(); icmpType {
 	case header.ICMPv6PacketTooBig:
 		received.PacketTooBig.Increment()
 		hdr, ok := pkt.Data.PullUp(header.ICMPv6PacketTooBigMinimumSize)
@@ -170,8 +172,11 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			return
 		}
 		pkt.Data.TrimFront(header.ICMPv6PacketTooBigMinimumSize)
-		mtu := header.ICMPv6(hdr).MTU()
-		e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), pkt)
+		networkMTU, err := calculateNetworkMTU(header.ICMPv6(hdr).MTU(), header.IPv6MinimumSize)
+		if err != nil {
+			networkMTU = 0
+		}
+		e.handleControl(stack.ControlPacketTooBig, networkMTU, pkt)
 
 	case header.ICMPv6DstUnreachable:
 		received.DstUnreachable.Increment()
@@ -221,7 +226,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			// we know we are also performing DAD on it). In this case we let the
 			// stack know so it can handle such a scenario and do nothing further with
 			// the NS.
-			if r.RemoteAddress == header.IPv6Any {
+			if srcAddr == header.IPv6Any {
 				// We would get an error if the address no longer exists or the address
 				// is no longer tentative (DAD resolved between the call to
 				// hasTentativeAddr and this point). Both of these are valid scenarios:
@@ -248,7 +253,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		// section 5.4.3.
 
 		// Is the NS targeting us?
-		if r.Stack().CheckLocalAddress(e.nic.ID(), ProtocolNumber, targetAddr) == 0 {
+		if e.protocol.stack.CheckLocalAddress(e.nic.ID(), ProtocolNumber, targetAddr) == 0 {
 			return
 		}
 
@@ -274,9 +279,9 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		// Otherwise, on link layers that have addresses this option MUST be
 		// included in multicast solicitations and SHOULD be included in unicast
 		// solicitations.
-		unspecifiedSource := r.RemoteAddress == header.IPv6Any
+		unspecifiedSource := srcAddr == header.IPv6Any
 		if len(sourceLinkAddr) == 0 {
-			if header.IsV6MulticastAddress(r.LocalAddress) && !unspecifiedSource {
+			if header.IsV6MulticastAddress(dstAddr) && !unspecifiedSource {
 				received.Invalid.Increment()
 				return
 			}
@@ -284,9 +289,9 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			received.Invalid.Increment()
 			return
 		} else if e.nud != nil {
-			e.nud.HandleProbe(r.RemoteAddress, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
+			e.nud.HandleProbe(srcAddr, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
 		} else {
-			e.linkAddrCache.AddLinkAddress(e.nic.ID(), r.RemoteAddress, sourceLinkAddr)
+			e.linkAddrCache.AddLinkAddress(e.nic.ID(), srcAddr, sourceLinkAddr)
 		}
 
 		// As per RFC 4861 section 7.1.1:
@@ -295,7 +300,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		//    ...
 		//    - If the IP source address is the unspecified address, the IP
 		//      destination address is a solicited-node multicast address.
-		if unspecifiedSource && !header.IsSolicitedNodeAddr(r.LocalAddress) {
+		if unspecifiedSource && !header.IsSolicitedNodeAddr(dstAddr) {
 			received.Invalid.Increment()
 			return
 		}
@@ -305,7 +310,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		//   If the source of the solicitation is the unspecified address, the node
 		//   MUST [...] and multicast the advertisement to the all-nodes address.
 		//
-		remoteAddr := r.RemoteAddress
+		remoteAddr := srcAddr
 		if unspecifiedSource {
 			remoteAddr = header.IPv6AllNodesMulticastAddress
 		}
@@ -353,7 +358,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		pkt.TransportProtocolNumber = header.ICMPv6ProtocolNumber
 		packet := header.ICMPv6(pkt.TransportHeader().Push(neighborAdvertSize))
 		packet.SetType(header.ICMPv6NeighborAdvert)
-		na := header.NDPNeighborAdvert(packet.NDPPayload())
+		na := header.NDPNeighborAdvert(packet.MessageBody())
 
 		// As per RFC 4861 section 7.2.4:
 		//
@@ -462,12 +467,12 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 
 		// As per RFC 4291 section 2.7, multicast addresses must not be used as
 		// source addresses in IPv6 packets.
-		localAddr := r.LocalAddress
-		if header.IsV6MulticastAddress(r.LocalAddress) {
+		localAddr := dstAddr
+		if header.IsV6MulticastAddress(dstAddr) {
 			localAddr = ""
 		}
 
-		r, err := r.Stack().FindRoute(e.nic.ID(), localAddr, r.RemoteAddress, ProtocolNumber, false /* multicastLoop */)
+		r, err := e.protocol.stack.FindRoute(e.nic.ID(), localAddr, srcAddr, ProtocolNumber, false /* multicastLoop */)
 		if err != nil {
 			// If we cannot find a route to the destination, silently drop the packet.
 			return
@@ -483,7 +488,11 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		copy(packet, icmpHdr)
 		packet.SetType(header.ICMPv6EchoReply)
 		packet.SetChecksum(header.ICMPv6Checksum(packet, r.LocalAddress, r.RemoteAddress, pkt.Data))
-		if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, replyPkt); err != nil {
+		if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{
+			Protocol: header.ICMPv6ProtocolNumber,
+			TTL:      r.DefaultTTL(),
+			TOS:      stack.DefaultTOS,
+		}, replyPkt); err != nil {
 			sent.Dropped.Increment()
 			return
 		}
@@ -495,7 +504,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			received.Invalid.Increment()
 			return
 		}
-		e.dispatcher.DeliverTransportPacket(r, header.ICMPv6ProtocolNumber, pkt)
+		e.dispatcher.DeliverTransportPacket(header.ICMPv6ProtocolNumber, pkt)
 
 	case header.ICMPv6TimeExceeded:
 		received.TimeExceeded.Increment()
@@ -516,7 +525,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			return
 		}
 
-		stack := r.Stack()
+		stack := e.protocol.stack
 
 		// Is the networking stack operating as a router?
 		if !stack.Forwarding(ProtocolNumber) {
@@ -547,7 +556,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			// As per RFC 4861 section 4.1, the Source Link-Layer Address Option MUST
 			// NOT be included when the source IP address is the unspecified address.
 			// Otherwise, it SHOULD be included on link layers that have addresses.
-			if r.RemoteAddress == header.IPv6Any {
+			if srcAddr == header.IPv6Any {
 				received.Invalid.Increment()
 				return
 			}
@@ -555,7 +564,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			if e.nud != nil {
 				// A RS with a specified source IP address modifies the NUD state
 				// machine in the same way a reachability probe would.
-				e.nud.HandleProbe(r.RemoteAddress, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
+				e.nud.HandleProbe(srcAddr, ProtocolNumber, sourceLinkAddr, e.protocol)
 			}
 		}
 
@@ -572,7 +581,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			return
 		}
 
-		routerAddr := iph.SourceAddress()
+		routerAddr := srcAddr
 
 		// Is the IP Source Address a link-local address?
 		if !header.IsV6LinkLocalAddress(routerAddr) {
@@ -605,7 +614,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 		// If the RA has the source link layer option, update the link address
 		// cache with the link address for the advertised router.
 		if len(sourceLinkAddr) != 0 && e.nud != nil {
-			e.nud.HandleProbe(routerAddr, r.LocalAddress, header.IPv6ProtocolNumber, sourceLinkAddr, e.protocol)
+			e.nud.HandleProbe(routerAddr, ProtocolNumber, sourceLinkAddr, e.protocol)
 		}
 
 		e.mu.Lock()
@@ -635,8 +644,39 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
 			return
 		}
 
+	case header.ICMPv6MulticastListenerQuery, header.ICMPv6MulticastListenerReport, header.ICMPv6MulticastListenerDone:
+		switch icmpType {
+		case header.ICMPv6MulticastListenerQuery:
+			received.MulticastListenerQuery.Increment()
+		case header.ICMPv6MulticastListenerReport:
+			received.MulticastListenerReport.Increment()
+		case header.ICMPv6MulticastListenerDone:
+			received.MulticastListenerDone.Increment()
+		default:
+			panic(fmt.Sprintf("unrecognized MLD message = %d", icmpType))
+		}
+
+		if pkt.Data.Size()-header.ICMPv6HeaderSize < header.MLDMinimumSize {
+			received.Invalid.Increment()
+			return
+		}
+
+		switch icmpType {
+		case header.ICMPv6MulticastListenerQuery:
+			e.mu.Lock()
+			e.mu.mld.handleMulticastListenerQuery(header.MLD(payload.ToView()))
+			e.mu.Unlock()
+		case header.ICMPv6MulticastListenerReport:
+			e.mu.Lock()
+			e.mu.mld.handleMulticastListenerReport(header.MLD(payload.ToView()))
+			e.mu.Unlock()
+		case header.ICMPv6MulticastListenerDone:
+		default:
+			panic(fmt.Sprintf("unrecognized MLD message = %d", icmpType))
+		}
+
 	default:
-		received.Invalid.Increment()
+		received.Unrecognized.Increment()
 	}
 }
 
@@ -648,52 +688,46 @@ func (*protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
 }
 
 // LinkAddressRequest implements stack.LinkAddressResolver.
-func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP stack.LinkEndpoint) *tcpip.Error {
-	// TODO(b/148672031): Use stack.FindRoute instead of manually creating the
-	// route here. Note, we would need the nicID to do this properly so the right
-	// NIC (associated to linkEP) is used to send the NDP NS message.
-	r := stack.Route{
-		LocalAddress:      localAddr,
-		RemoteAddress:     addr,
-		LocalLinkAddress:  linkEP.LinkAddress(),
-		RemoteLinkAddress: remoteLinkAddr,
+func (p *protocol) LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, nic stack.NetworkInterface) *tcpip.Error {
+	remoteAddr := targetAddr
+	if len(remoteLinkAddr) == 0 {
+		remoteAddr = header.SolicitedNodeAddr(targetAddr)
+		remoteLinkAddr = header.EthernetAddressFromMulticastIPv6Address(remoteAddr)
 	}
 
-	// If a remote address is not already known, then send a multicast
-	// solicitation since multicast addresses have a static mapping to link
-	// addresses.
-	if len(r.RemoteLinkAddress) == 0 {
-		r.RemoteAddress = header.SolicitedNodeAddr(addr)
-		r.RemoteLinkAddress = header.EthernetAddressFromMulticastIPv6Address(r.RemoteAddress)
+	r, err := p.stack.FindRoute(nic.ID(), localAddr, remoteAddr, ProtocolNumber, false /* multicastLoop */)
+	if err != nil {
+		return err
 	}
+	defer r.Release()
+	r.ResolveWith(remoteLinkAddr)
 
 	optsSerializer := header.NDPOptionsSerializer{
-		header.NDPSourceLinkLayerAddressOption(linkEP.LinkAddress()),
+		header.NDPSourceLinkLayerAddressOption(nic.LinkAddress()),
 	}
 	neighborSolicitSize := header.ICMPv6NeighborSolicitMinimumSize + optsSerializer.Length()
 	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-		ReserveHeaderBytes: int(linkEP.MaxHeaderLength()) + header.IPv6MinimumSize + neighborSolicitSize,
+		ReserveHeaderBytes: int(r.MaxHeaderLength()) + neighborSolicitSize,
 	})
 	pkt.TransportProtocolNumber = header.ICMPv6ProtocolNumber
 	packet := header.ICMPv6(pkt.TransportHeader().Push(neighborSolicitSize))
 	packet.SetType(header.ICMPv6NeighborSolicit)
-	ns := header.NDPNeighborSolicit(packet.NDPPayload())
-	ns.SetTargetAddress(addr)
+	ns := header.NDPNeighborSolicit(packet.MessageBody())
+	ns.SetTargetAddress(targetAddr)
 	ns.Options().Serialize(optsSerializer)
 	packet.SetChecksum(header.ICMPv6Checksum(packet, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
 
-	length := uint16(pkt.Size())
-	ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize))
-	ip.Encode(&header.IPv6Fields{
-		PayloadLength: length,
-		NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-		HopLimit:      header.NDPHopLimit,
-		SrcAddr:       r.LocalAddress,
-		DstAddr:       r.RemoteAddress,
-	})
+	stat := p.stack.Stats().ICMP.V6.PacketsSent
+	if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{
+		Protocol: header.ICMPv6ProtocolNumber,
+		TTL:      header.NDPHopLimit,
+	}, pkt); err != nil {
+		stat.Dropped.Increment()
+		return err
+	}
 
-	// TODO(stijlist): count this in ICMP stats.
-	return linkEP.WritePacket(&r, nil /* gso */, ProtocolNumber, pkt)
+	stat.NeighborSolicit.Increment()
+	return nil
 }
 
 // ResolveStaticAddress implements stack.LinkAddressResolver.
@@ -747,9 +781,26 @@ type icmpReasonPortUnreachable struct{}
 
 func (*icmpReasonPortUnreachable) isICMPReason() {}
 
+// icmpReasonHopLimitExceeded is an error where a packet's hop limit exceeded in
+// transit to its final destination, as per RFC 4443 section 3.3.
+type icmpReasonHopLimitExceeded struct{}
+
+func (*icmpReasonHopLimitExceeded) isICMPReason() {}
+
+// icmpReasonReassemblyTimeout is an error where insufficient fragments are
+// received to complete reassembly of a packet within a configured time after
+// the reception of the first-arriving fragment of that packet.
+type icmpReasonReassemblyTimeout struct{}
+
+func (*icmpReasonReassemblyTimeout) isICMPReason() {}
+
 // returnError takes an error descriptor and generates the appropriate ICMP
 // error packet for IPv6 and sends it.
-func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.PacketBuffer) *tcpip.Error {
+func (p *protocol) returnError(reason icmpReason, pkt *stack.PacketBuffer) *tcpip.Error {
+	origIPHdr := header.IPv6(pkt.NetworkHeader().View())
+	origIPHdrSrc := origIPHdr.SourceAddress()
+	origIPHdrDst := origIPHdr.DestinationAddress()
+
 	// Only send ICMP error if the address is not a multicast v6
 	// address and the source is not the unspecified address.
 	//
@@ -776,32 +827,49 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 		allowResponseToMulticast = reason.respondToMulticast
 	}
 
-	if (!allowResponseToMulticast && header.IsV6MulticastAddress(r.LocalAddress)) || r.RemoteAddress == header.IPv6Any {
+	isOrigDstMulticast := header.IsV6MulticastAddress(origIPHdrDst)
+	if (!allowResponseToMulticast && isOrigDstMulticast) || origIPHdrSrc == header.IPv6Any {
 		return nil
 	}
 
+	// If we hit a Hop Limit Exceeded error, then we know we are operating as a
+	// router. As per RFC 4443 section 3.3:
+	//
+	//   If a router receives a packet with a Hop Limit of zero, or if a
+	//   router decrements a packet's Hop Limit to zero, it MUST discard the
+	//   packet and originate an ICMPv6 Time Exceeded message with Code 0 to
+	//   the source of the packet.  This indicates either a routing loop or
+	//   too small an initial Hop Limit value.
+	//
+	// If we are operating as a router, do not use the packet's destination
+	// address as the response's source address as we should not own the
+	// destination address of a packet we are forwarding.
+	//
+	// If the packet was originally destined to a multicast address, then do not
+	// use the packet's destination address as the source for the response ICMP
+	// packet as "multicast addresses must not be used as source addresses in IPv6
+	// packets", as per RFC 4291 section 2.7.
+	localAddr := origIPHdrDst
+	if _, ok := reason.(*icmpReasonHopLimitExceeded); ok || isOrigDstMulticast {
+		localAddr = ""
+	}
 	// Even if we were able to receive a packet from some remote, we may not have
 	// a route to it - the remote may be blocked via routing rules. We must always
 	// consult our routing table and find a route to the remote before sending any
 	// packet.
-	route, err := p.stack.FindRoute(r.NICID(), r.LocalAddress, r.RemoteAddress, ProtocolNumber, false /* multicastLoop */)
+	route, err := p.stack.FindRoute(pkt.NICID, localAddr, origIPHdrSrc, ProtocolNumber, false /* multicastLoop */)
 	if err != nil {
 		return err
 	}
 	defer route.Release()
-	// From this point on, the incoming route should no longer be used; route
-	// must be used to send the ICMP error.
-	r = nil
 
 	stats := p.stack.Stats().ICMP
-	sent := stats.V6PacketsSent
+	sent := stats.V6.PacketsSent
 	if !p.stack.AllowICMPMessage() {
 		sent.RateLimited.Increment()
 		return nil
 	}
 
-	network, transport := pkt.NetworkHeader().View(), pkt.TransportHeader().View()
-
 	if pkt.TransportProtocolNumber == header.ICMPv6ProtocolNumber {
 		// TODO(gvisor.dev/issues/3810): Sort this out when ICMP headers are stored.
 		// Unfortunately at this time ICMP Packets do not have a transport
@@ -819,6 +887,8 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 		}
 	}
 
+	network, transport := pkt.NetworkHeader().View(), pkt.TransportHeader().View()
+
 	// As per RFC 4443 section 2.4
 	//
 	//    (c) Every ICMPv6 error message (type < 128) MUST include
@@ -839,7 +909,9 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 	if payloadLen > available {
 		payloadLen = available
 	}
-	payload := buffer.NewVectorisedView(pkt.Size(), pkt.Views())
+	payload := network.ToVectorisedView()
+	payload.AppendView(transport)
+	payload.Append(pkt.Data)
 	payload.CapLength(payloadLen)
 
 	newPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -860,6 +932,14 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 		icmpHdr.SetType(header.ICMPv6DstUnreachable)
 		icmpHdr.SetCode(header.ICMPv6PortUnreachable)
 		counter = sent.DstUnreachable
+	case *icmpReasonHopLimitExceeded:
+		icmpHdr.SetType(header.ICMPv6TimeExceeded)
+		icmpHdr.SetCode(header.ICMPv6HopLimitExceeded)
+		counter = sent.TimeExceeded
+	case *icmpReasonReassemblyTimeout:
+		icmpHdr.SetType(header.ICMPv6TimeExceeded)
+		icmpHdr.SetCode(header.ICMPv6ReassemblyTimeout)
+		counter = sent.TimeExceeded
 	default:
 		panic(fmt.Sprintf("unsupported ICMP type %T", reason))
 	}
@@ -879,3 +959,16 @@ func (p *protocol) returnError(r *stack.Route, reason icmpReason, pkt *stack.Pac
 	counter.Increment()
 	return nil
 }
+
+// OnReassemblyTimeout implements fragmentation.TimeoutHandler.
+func (p *protocol) OnReassemblyTimeout(pkt *stack.PacketBuffer) {
+	// OnReassemblyTimeout sends a Time Exceeded Message as per RFC 2460 Section
+	// 4.5:
+	//
+	//   If the first fragment (i.e., the one with a Fragment Offset of zero) has
+	//   been received, an ICMP Time Exceeded -- Fragment Reassembly Time Exceeded
+	//   message should be sent to the source of that fragment.
+	if pkt != nil {
+		p.returnError(&icmpReasonReassemblyTimeout{}, pkt)
+	}
+}
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index 8dc33c560..02b18e9a5 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -51,6 +51,7 @@ const (
 var (
 	lladdr0 = header.LinkLocalAddr(linkAddr0)
 	lladdr1 = header.LinkLocalAddr(linkAddr1)
+	lladdr2 = header.LinkLocalAddr(linkAddr2)
 )
 
 type stubLinkEndpoint struct {
@@ -86,7 +87,7 @@ type stubDispatcher struct {
 	stack.TransportDispatcher
 }
 
-func (*stubDispatcher) DeliverTransportPacket(*stack.Route, tcpip.TransportProtocolNumber, *stack.PacketBuffer) stack.TransportPacketDisposition {
+func (*stubDispatcher) DeliverTransportPacket(tcpip.TransportProtocolNumber, *stack.PacketBuffer) stack.TransportPacketDisposition {
 	return stack.TransportPacketHandled
 }
 
@@ -108,31 +109,27 @@ type stubNUDHandler struct {
 
 var _ stack.NUDHandler = (*stubNUDHandler)(nil)
 
-func (s *stubNUDHandler) HandleProbe(remoteAddr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes stack.LinkAddressResolver) {
+func (s *stubNUDHandler) HandleProbe(tcpip.Address, tcpip.NetworkProtocolNumber, tcpip.LinkAddress, stack.LinkAddressResolver) {
 	s.probeCount++
 }
 
-func (s *stubNUDHandler) HandleConfirmation(addr tcpip.Address, linkAddr tcpip.LinkAddress, flags stack.ReachabilityConfirmationFlags) {
+func (s *stubNUDHandler) HandleConfirmation(tcpip.Address, tcpip.LinkAddress, stack.ReachabilityConfirmationFlags) {
 	s.confirmationCount++
 }
 
-func (*stubNUDHandler) HandleUpperLevelConfirmation(addr tcpip.Address) {
+func (*stubNUDHandler) HandleUpperLevelConfirmation(tcpip.Address) {
 }
 
 var _ stack.NetworkInterface = (*testInterface)(nil)
 
 type testInterface struct {
-	stack.NetworkLinkEndpoint
-
-	linkAddr tcpip.LinkAddress
-}
+	stack.LinkEndpoint
 
-func (i *testInterface) LinkAddress() tcpip.LinkAddress {
-	return i.linkAddr
+	nicID tcpip.NICID
 }
 
 func (*testInterface) ID() tcpip.NICID {
-	return 0
+	return nicID
 }
 
 func (*testInterface) IsLoopback() bool {
@@ -147,6 +144,18 @@ func (*testInterface) Enabled() bool {
 	return true
 }
 
+func (*testInterface) Promiscuous() bool {
+	return false
+}
+
+func (t *testInterface) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
+	r := stack.Route{
+		NetProto: protocol,
+	}
+	r.ResolveWith(remoteLinkAddr)
+	return t.LinkEndpoint.WritePacket(&r, gso, protocol, pkt)
+}
+
 func TestICMPCounts(t *testing.T) {
 	tests := []struct {
 		name             string
@@ -169,13 +178,8 @@ func TestICMPCounts(t *testing.T) {
 				TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol6},
 				UseNeighborCache:   test.useNeighborCache,
 			})
-			{
-				if err := s.CreateNIC(nicID, &stubLinkEndpoint{}); err != nil {
-					t.Fatalf("CreateNIC(_, _) = %s", err)
-				}
-				if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
-					t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err)
-				}
+			if err := s.CreateNIC(nicID, &stubLinkEndpoint{}); err != nil {
+				t.Fatalf("CreateNIC(_, _) = %s", err)
 			}
 			{
 				subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1))))
@@ -201,11 +205,16 @@ func TestICMPCounts(t *testing.T) {
 				t.Fatalf("ep.Enable(): %s", err)
 			}
 
-			r, err := s.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
-			if err != nil {
-				t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err)
+			addressableEndpoint, ok := ep.(stack.AddressableEndpoint)
+			if !ok {
+				t.Fatalf("expected network endpoint to implement stack.AddressableEndpoint")
+			}
+			addr := lladdr0.WithPrefix()
+			if ep, err := addressableEndpoint.AddAndAcquirePermanentAddress(addr, stack.CanBePrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */); err != nil {
+				t.Fatalf("addressableEndpoint.AddAndAcquirePermanentAddress(%s, CanBePrimaryEndpoint, AddressConfigStatic, false): %s", addr, err)
+			} else {
+				ep.DecRef()
 			}
-			defer r.Release()
 
 			var tllData [header.NDPLinkLayerAddressSize]byte
 			header.NDPOptions(tllData[:]).Serialize(header.NDPOptionsSerializer{
@@ -262,6 +271,22 @@ func TestICMPCounts(t *testing.T) {
 					typ:  header.ICMPv6RedirectMsg,
 					size: header.ICMPv6MinimumSize,
 				},
+				{
+					typ:  header.ICMPv6MulticastListenerQuery,
+					size: header.MLDMinimumSize + header.ICMPv6HeaderSize,
+				},
+				{
+					typ:  header.ICMPv6MulticastListenerReport,
+					size: header.MLDMinimumSize + header.ICMPv6HeaderSize,
+				},
+				{
+					typ:  header.ICMPv6MulticastListenerDone,
+					size: header.MLDMinimumSize + header.ICMPv6HeaderSize,
+				},
+				{
+					typ:  255, /* Unrecognized */
+					size: 50,
+				},
 			}
 
 			handleIPv6Payload := func(icmp header.ICMPv6) {
@@ -271,20 +296,20 @@ func TestICMPCounts(t *testing.T) {
 				})
 				ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					PayloadLength: uint16(len(icmp)),
-					NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-					HopLimit:      header.NDPHopLimit,
-					SrcAddr:       r.LocalAddress,
-					DstAddr:       r.RemoteAddress,
+					PayloadLength:     uint16(len(icmp)),
+					TransportProtocol: header.ICMPv6ProtocolNumber,
+					HopLimit:          header.NDPHopLimit,
+					SrcAddr:           lladdr1,
+					DstAddr:           lladdr0,
 				})
-				ep.HandlePacket(&r, pkt)
+				ep.HandlePacket(pkt)
 			}
 
 			for _, typ := range types {
 				icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData)))
 				copy(icmp[typ.size:], typ.extraData)
 				icmp.SetType(typ.typ)
-				icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], r.LocalAddress, r.RemoteAddress, buffer.View(typ.extraData).ToVectorisedView()))
+				icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], lladdr0, lladdr1, buffer.View(typ.extraData).ToVectorisedView()))
 				handleIPv6Payload(icmp)
 			}
 
@@ -292,7 +317,7 @@ func TestICMPCounts(t *testing.T) {
 			// Stats().ICMP.ICMPv6ReceivedPacketStats.Invalid is incremented.
 			handleIPv6Payload(header.ICMPv6(buffer.NewView(header.IPv6MinimumSize)))
 
-			icmpv6Stats := s.Stats().ICMP.V6PacketsReceived
+			icmpv6Stats := s.Stats().ICMP.V6.PacketsReceived
 			visitStats(reflect.ValueOf(&icmpv6Stats).Elem(), func(name string, s *tcpip.StatCounter) {
 				if got, want := s.Value(), uint64(1); got != want {
 					t.Errorf("got %s = %d, want = %d", name, got, want)
@@ -311,13 +336,8 @@ func TestICMPCountsWithNeighborCache(t *testing.T) {
 		TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol6},
 		UseNeighborCache:   true,
 	})
-	{
-		if err := s.CreateNIC(nicID, &stubLinkEndpoint{}); err != nil {
-			t.Fatalf("CreateNIC(_, _) = %s", err)
-		}
-		if err := s.AddAddress(nicID, ProtocolNumber, lladdr0); err != nil {
-			t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, lladdr0, err)
-		}
+	if err := s.CreateNIC(nicID, &stubLinkEndpoint{}); err != nil {
+		t.Fatalf("CreateNIC(_, _) = %s", err)
 	}
 	{
 		subnet, err := tcpip.NewSubnet(lladdr1, tcpip.AddressMask(strings.Repeat("\xff", len(lladdr1))))
@@ -343,11 +363,16 @@ func TestICMPCountsWithNeighborCache(t *testing.T) {
 		t.Fatalf("ep.Enable(): %s", err)
 	}
 
-	r, err := s.FindRoute(nicID, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
-	if err != nil {
-		t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err)
+	addressableEndpoint, ok := ep.(stack.AddressableEndpoint)
+	if !ok {
+		t.Fatalf("expected network endpoint to implement stack.AddressableEndpoint")
+	}
+	addr := lladdr0.WithPrefix()
+	if ep, err := addressableEndpoint.AddAndAcquirePermanentAddress(addr, stack.CanBePrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */); err != nil {
+		t.Fatalf("addressableEndpoint.AddAndAcquirePermanentAddress(%s, CanBePrimaryEndpoint, AddressConfigStatic, false): %s", addr, err)
+	} else {
+		ep.DecRef()
 	}
-	defer r.Release()
 
 	var tllData [header.NDPLinkLayerAddressSize]byte
 	header.NDPOptions(tllData[:]).Serialize(header.NDPOptionsSerializer{
@@ -404,6 +429,22 @@ func TestICMPCountsWithNeighborCache(t *testing.T) {
 			typ:  header.ICMPv6RedirectMsg,
 			size: header.ICMPv6MinimumSize,
 		},
+		{
+			typ:  header.ICMPv6MulticastListenerQuery,
+			size: header.MLDMinimumSize + header.ICMPv6HeaderSize,
+		},
+		{
+			typ:  header.ICMPv6MulticastListenerReport,
+			size: header.MLDMinimumSize + header.ICMPv6HeaderSize,
+		},
+		{
+			typ:  header.ICMPv6MulticastListenerDone,
+			size: header.MLDMinimumSize + header.ICMPv6HeaderSize,
+		},
+		{
+			typ:  255, /* Unrecognized */
+			size: 50,
+		},
 	}
 
 	handleIPv6Payload := func(icmp header.ICMPv6) {
@@ -413,20 +454,20 @@ func TestICMPCountsWithNeighborCache(t *testing.T) {
 		})
 		ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize))
 		ip.Encode(&header.IPv6Fields{
-			PayloadLength: uint16(len(icmp)),
-			NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-			HopLimit:      header.NDPHopLimit,
-			SrcAddr:       r.LocalAddress,
-			DstAddr:       r.RemoteAddress,
+			PayloadLength:     uint16(len(icmp)),
+			TransportProtocol: header.ICMPv6ProtocolNumber,
+			HopLimit:          header.NDPHopLimit,
+			SrcAddr:           lladdr1,
+			DstAddr:           lladdr0,
 		})
-		ep.HandlePacket(&r, pkt)
+		ep.HandlePacket(pkt)
 	}
 
 	for _, typ := range types {
 		icmp := header.ICMPv6(buffer.NewView(typ.size + len(typ.extraData)))
 		copy(icmp[typ.size:], typ.extraData)
 		icmp.SetType(typ.typ)
-		icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], r.LocalAddress, r.RemoteAddress, buffer.View(typ.extraData).ToVectorisedView()))
+		icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], lladdr0, lladdr1, buffer.View(typ.extraData).ToVectorisedView()))
 		handleIPv6Payload(icmp)
 	}
 
@@ -434,7 +475,7 @@ func TestICMPCountsWithNeighborCache(t *testing.T) {
 	// Stats().ICMP.ICMPv6ReceivedPacketStats.Invalid is incremented.
 	handleIPv6Payload(header.ICMPv6(buffer.NewView(header.IPv6MinimumSize)))
 
-	icmpv6Stats := s.Stats().ICMP.V6PacketsReceived
+	icmpv6Stats := s.Stats().ICMP.V6.PacketsReceived
 	visitStats(reflect.ValueOf(&icmpv6Stats).Elem(), func(name string, s *tcpip.StatCounter) {
 		if got, want := s.Value(), uint64(1); got != want {
 			t.Errorf("got %s = %d, want = %d", name, got, want)
@@ -559,8 +600,8 @@ func routeICMPv6Packet(t *testing.T, args routeArgs, fn func(*testing.T, header.
 		return
 	}
 
-	if len(args.remoteLinkAddr) != 0 && args.remoteLinkAddr != pi.Route.RemoteLinkAddress {
-		t.Errorf("got remote link address = %s, want = %s", pi.Route.RemoteLinkAddress, args.remoteLinkAddr)
+	if got := pi.Route.RemoteLinkAddress(); len(args.remoteLinkAddr) != 0 && got != args.remoteLinkAddr {
+		t.Errorf("got remote link address = %s, want = %s", got, args.remoteLinkAddr)
 	}
 
 	// Pull the full payload since network header. Needed for header.IPv6 to
@@ -812,11 +853,11 @@ func TestICMPChecksumValidationSimple(t *testing.T) {
 							}
 							ip := header.IPv6(buffer.NewView(header.IPv6MinimumSize))
 							ip.Encode(&header.IPv6Fields{
-								PayloadLength: uint16(len(icmp)),
-								NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-								HopLimit:      header.NDPHopLimit,
-								SrcAddr:       lladdr1,
-								DstAddr:       lladdr0,
+								PayloadLength:     uint16(len(icmp)),
+								TransportProtocol: header.ICMPv6ProtocolNumber,
+								HopLimit:          header.NDPHopLimit,
+								SrcAddr:           lladdr1,
+								DstAddr:           lladdr0,
 							})
 							pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 								Data: buffer.NewVectorisedView(len(ip)+len(icmp), []buffer.View{buffer.View(ip), buffer.View(icmp)}),
@@ -824,7 +865,7 @@ func TestICMPChecksumValidationSimple(t *testing.T) {
 							e.InjectInbound(ProtocolNumber, pkt)
 						}
 
-						stats := s.Stats().ICMP.V6PacketsReceived
+						stats := s.Stats().ICMP.V6.PacketsReceived
 						invalid := stats.Invalid
 						routerOnly := stats.RouterOnlyPacketsDroppedByHost
 						typStat := typ.statCounter(stats)
@@ -889,11 +930,11 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) {
 	errorICMPBody := func(view buffer.View) {
 		ip := header.IPv6(view)
 		ip.Encode(&header.IPv6Fields{
-			PayloadLength: simpleBodySize,
-			NextHeader:    10,
-			HopLimit:      20,
-			SrcAddr:       lladdr0,
-			DstAddr:       lladdr1,
+			PayloadLength:     simpleBodySize,
+			TransportProtocol: 10,
+			HopLimit:          20,
+			SrcAddr:           lladdr0,
+			DstAddr:           lladdr1,
 		})
 		simpleBody(view[header.IPv6MinimumSize:])
 	}
@@ -1007,11 +1048,11 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) {
 
 				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					PayloadLength: uint16(icmpSize),
-					NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-					HopLimit:      header.NDPHopLimit,
-					SrcAddr:       lladdr1,
-					DstAddr:       lladdr0,
+					PayloadLength:     uint16(icmpSize),
+					TransportProtocol: header.ICMPv6ProtocolNumber,
+					HopLimit:          header.NDPHopLimit,
+					SrcAddr:           lladdr1,
+					DstAddr:           lladdr0,
 				})
 				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 					Data: hdr.View().ToVectorisedView(),
@@ -1019,7 +1060,7 @@ func TestICMPChecksumValidationWithPayload(t *testing.T) {
 				e.InjectInbound(ProtocolNumber, pkt)
 			}
 
-			stats := s.Stats().ICMP.V6PacketsReceived
+			stats := s.Stats().ICMP.V6.PacketsReceived
 			invalid := stats.Invalid
 			typStat := typ.statCounter(stats)
 
@@ -1067,11 +1108,11 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
 	errorICMPBody := func(view buffer.View) {
 		ip := header.IPv6(view)
 		ip.Encode(&header.IPv6Fields{
-			PayloadLength: simpleBodySize,
-			NextHeader:    10,
-			HopLimit:      20,
-			SrcAddr:       lladdr0,
-			DstAddr:       lladdr1,
+			PayloadLength:     simpleBodySize,
+			TransportProtocol: 10,
+			HopLimit:          20,
+			SrcAddr:           lladdr0,
+			DstAddr:           lladdr1,
 		})
 		simpleBody(view[header.IPv6MinimumSize:])
 	}
@@ -1186,11 +1227,11 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
 
 				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					PayloadLength: uint16(size + payloadSize),
-					NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-					HopLimit:      header.NDPHopLimit,
-					SrcAddr:       lladdr1,
-					DstAddr:       lladdr0,
+					PayloadLength:     uint16(size + payloadSize),
+					TransportProtocol: header.ICMPv6ProtocolNumber,
+					HopLimit:          header.NDPHopLimit,
+					SrcAddr:           lladdr1,
+					DstAddr:           lladdr0,
 				})
 				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 					Data: buffer.NewVectorisedView(header.IPv6MinimumSize+size+payloadSize, []buffer.View{hdr.View(), payload}),
@@ -1198,7 +1239,7 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
 				e.InjectInbound(ProtocolNumber, pkt)
 			}
 
-			stats := s.Stats().ICMP.V6PacketsReceived
+			stats := s.Stats().ICMP.V6.PacketsReceived
 			invalid := stats.Invalid
 			typStat := typ.statCounter(stats)
 
@@ -1235,26 +1276,72 @@ func TestICMPChecksumValidationWithPayloadMultipleViews(t *testing.T) {
 }
 
 func TestLinkAddressRequest(t *testing.T) {
+	const nicID = 1
+
 	snaddr := header.SolicitedNodeAddr(lladdr0)
 	mcaddr := header.EthernetAddressFromMulticastIPv6Address(snaddr)
 
 	tests := []struct {
-		name             string
-		remoteLinkAddr   tcpip.LinkAddress
-		expectedLinkAddr tcpip.LinkAddress
-		expectedAddr     tcpip.Address
+		name           string
+		nicAddr        tcpip.Address
+		localAddr      tcpip.Address
+		remoteLinkAddr tcpip.LinkAddress
+
+		expectedErr            *tcpip.Error
+		expectedRemoteAddr     tcpip.Address
+		expectedRemoteLinkAddr tcpip.LinkAddress
 	}{
 		{
-			name:             "Unicast",
-			remoteLinkAddr:   linkAddr1,
-			expectedLinkAddr: linkAddr1,
-			expectedAddr:     lladdr0,
+			name:                   "Unicast",
+			nicAddr:                lladdr1,
+			localAddr:              lladdr1,
+			remoteLinkAddr:         linkAddr1,
+			expectedRemoteAddr:     lladdr0,
+			expectedRemoteLinkAddr: linkAddr1,
+		},
+		{
+			name:                   "Multicast",
+			nicAddr:                lladdr1,
+			localAddr:              lladdr1,
+			remoteLinkAddr:         "",
+			expectedRemoteAddr:     snaddr,
+			expectedRemoteLinkAddr: mcaddr,
+		},
+		{
+			name:                   "Unicast with unspecified source",
+			nicAddr:                lladdr1,
+			remoteLinkAddr:         linkAddr1,
+			expectedRemoteAddr:     lladdr0,
+			expectedRemoteLinkAddr: linkAddr1,
 		},
 		{
-			name:             "Multicast",
-			remoteLinkAddr:   "",
-			expectedLinkAddr: mcaddr,
-			expectedAddr:     snaddr,
+			name:                   "Multicast with unspecified source",
+			nicAddr:                lladdr1,
+			remoteLinkAddr:         "",
+			expectedRemoteAddr:     snaddr,
+			expectedRemoteLinkAddr: mcaddr,
+		},
+		{
+			name:           "Unicast with unassigned address",
+			localAddr:      lladdr1,
+			remoteLinkAddr: linkAddr1,
+			expectedErr:    tcpip.ErrNetworkUnreachable,
+		},
+		{
+			name:           "Multicast with unassigned address",
+			localAddr:      lladdr1,
+			remoteLinkAddr: "",
+			expectedErr:    tcpip.ErrNetworkUnreachable,
+		},
+		{
+			name:           "Unicast with no local address available",
+			remoteLinkAddr: linkAddr1,
+			expectedErr:    tcpip.ErrNetworkUnreachable,
+		},
+		{
+			name:           "Multicast with no local address available",
+			remoteLinkAddr: "",
+			expectedErr:    tcpip.ErrNetworkUnreachable,
 		},
 	}
 
@@ -1269,26 +1356,43 @@ func TestLinkAddressRequest(t *testing.T) {
 		}
 
 		linkEP := channel.New(defaultChannelSize, defaultMTU, linkAddr0)
-		if err := linkRes.LinkAddressRequest(lladdr0, lladdr1, test.remoteLinkAddr, linkEP); err != nil {
-			t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s", lladdr0, lladdr1, test.remoteLinkAddr, err)
+		if err := s.CreateNIC(nicID, linkEP); err != nil {
+			t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
+		}
+		if len(test.nicAddr) != 0 {
+			if err := s.AddAddress(nicID, ProtocolNumber, test.nicAddr); err != nil {
+				t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, ProtocolNumber, test.nicAddr, err)
+			}
+		}
+
+		// We pass a test network interface to LinkAddressRequest with the same NIC
+		// ID and link endpoint used by the NIC we created earlier so that we can
+		// mock a link address request and observe the packets sent to the link
+		// endpoint even though the stack uses the real NIC.
+		if err := linkRes.LinkAddressRequest(lladdr0, test.localAddr, test.remoteLinkAddr, &testInterface{LinkEndpoint: linkEP, nicID: nicID}); err != test.expectedErr {
+			t.Errorf("got p.LinkAddressRequest(%s, %s, %s, _) = %s, want = %s", lladdr0, test.localAddr, test.remoteLinkAddr, err, test.expectedErr)
+		}
+
+		if test.expectedErr != nil {
+			return
 		}
 
 		pkt, ok := linkEP.Read()
 		if !ok {
 			t.Fatal("expected to send a link address request")
 		}
-		if pkt.Route.RemoteLinkAddress != test.expectedLinkAddr {
-			t.Errorf("got pkt.Route.RemoteLinkAddress = %s, want = %s", pkt.Route.RemoteLinkAddress, test.expectedLinkAddr)
+		if got := pkt.Route.RemoteLinkAddress(); got != test.expectedRemoteLinkAddr {
+			t.Errorf("got pkt.Route.RemoteLinkAddress() = %s, want = %s", got, test.expectedRemoteLinkAddr)
 		}
-		if pkt.Route.RemoteAddress != test.expectedAddr {
-			t.Errorf("got pkt.Route.RemoteAddress = %s, want = %s", pkt.Route.RemoteAddress, test.expectedAddr)
+		if pkt.Route.RemoteAddress != test.expectedRemoteAddr {
+			t.Errorf("got pkt.Route.RemoteAddress = %s, want = %s", pkt.Route.RemoteAddress, test.expectedRemoteAddr)
 		}
 		if pkt.Route.LocalAddress != lladdr1 {
 			t.Errorf("got pkt.Route.LocalAddress = %s, want = %s", pkt.Route.LocalAddress, lladdr1)
 		}
 		checker.IPv6(t, stack.PayloadSince(pkt.Pkt.NetworkHeader()),
 			checker.SrcAddr(lladdr1),
-			checker.DstAddr(test.expectedAddr),
+			checker.DstAddr(test.expectedRemoteAddr),
 			checker.TTL(header.NDPHopLimit),
 			checker.NDPNS(
 				checker.NDPNSTargetAddress(lladdr0),
@@ -1341,11 +1445,11 @@ func TestPacketQueing(t *testing.T) {
 				payloadLength := hdr.UsedLength()
 				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					PayloadLength: uint16(payloadLength),
-					NextHeader:    uint8(udp.ProtocolNumber),
-					HopLimit:      DefaultTTL,
-					SrcAddr:       host2IPv6Addr.AddressWithPrefix.Address,
-					DstAddr:       host1IPv6Addr.AddressWithPrefix.Address,
+					PayloadLength:     uint16(payloadLength),
+					TransportProtocol: udp.ProtocolNumber,
+					HopLimit:          DefaultTTL,
+					SrcAddr:           host2IPv6Addr.AddressWithPrefix.Address,
+					DstAddr:           host1IPv6Addr.AddressWithPrefix.Address,
 				})
 				e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 					Data: hdr.View().ToVectorisedView(),
@@ -1359,8 +1463,8 @@ func TestPacketQueing(t *testing.T) {
 				if p.Proto != ProtocolNumber {
 					t.Errorf("got p.Proto = %d, want = %d", p.Proto, ProtocolNumber)
 				}
-				if p.Route.RemoteLinkAddress != host2NICLinkAddr {
-					t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, host2NICLinkAddr)
+				if got := p.Route.RemoteLinkAddress(); got != host2NICLinkAddr {
+					t.Errorf("got p.Route.RemoteLinkAddress() = %s, want = %s", got, host2NICLinkAddr)
 				}
 				checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
 					checker.SrcAddr(host1IPv6Addr.AddressWithPrefix.Address),
@@ -1383,11 +1487,11 @@ func TestPacketQueing(t *testing.T) {
 				pkt.SetChecksum(header.ICMPv6Checksum(pkt, host2IPv6Addr.AddressWithPrefix.Address, host1IPv6Addr.AddressWithPrefix.Address, buffer.VectorisedView{}))
 				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					PayloadLength: header.ICMPv6MinimumSize,
-					NextHeader:    uint8(icmp.ProtocolNumber6),
-					HopLimit:      DefaultTTL,
-					SrcAddr:       host2IPv6Addr.AddressWithPrefix.Address,
-					DstAddr:       host1IPv6Addr.AddressWithPrefix.Address,
+					PayloadLength:     header.ICMPv6MinimumSize,
+					TransportProtocol: icmp.ProtocolNumber6,
+					HopLimit:          DefaultTTL,
+					SrcAddr:           host2IPv6Addr.AddressWithPrefix.Address,
+					DstAddr:           host1IPv6Addr.AddressWithPrefix.Address,
 				})
 				e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 					Data: hdr.View().ToVectorisedView(),
@@ -1401,8 +1505,8 @@ func TestPacketQueing(t *testing.T) {
 				if p.Proto != ProtocolNumber {
 					t.Errorf("got p.Proto = %d, want = %d", p.Proto, ProtocolNumber)
 				}
-				if p.Route.RemoteLinkAddress != host2NICLinkAddr {
-					t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, host2NICLinkAddr)
+				if got := p.Route.RemoteLinkAddress(); got != host2NICLinkAddr {
+					t.Errorf("got p.Route.RemoteLinkAddress() = %s, want = %s", got, host2NICLinkAddr)
 				}
 				checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
 					checker.SrcAddr(host1IPv6Addr.AddressWithPrefix.Address),
@@ -1452,8 +1556,8 @@ func TestPacketQueing(t *testing.T) {
 					t.Errorf("got Proto = %d, want = %d", p.Proto, ProtocolNumber)
 				}
 				snmc := header.SolicitedNodeAddr(host2IPv6Addr.AddressWithPrefix.Address)
-				if want := header.EthernetAddressFromMulticastIPv6Address(snmc); p.Route.RemoteLinkAddress != want {
-					t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, want)
+				if got, want := p.Route.RemoteLinkAddress(), header.EthernetAddressFromMulticastIPv6Address(snmc); got != want {
+					t.Errorf("got p.Route.RemoteLinkAddress() = %s, want = %s", got, want)
 				}
 				checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
 					checker.SrcAddr(host1IPv6Addr.AddressWithPrefix.Address),
@@ -1471,7 +1575,7 @@ func TestPacketQueing(t *testing.T) {
 				hdr := buffer.NewPrependable(header.IPv6MinimumSize + naSize)
 				pkt := header.ICMPv6(hdr.Prepend(naSize))
 				pkt.SetType(header.ICMPv6NeighborAdvert)
-				na := header.NDPNeighborAdvert(pkt.NDPPayload())
+				na := header.NDPNeighborAdvert(pkt.MessageBody())
 				na.SetSolicitedFlag(true)
 				na.SetOverrideFlag(true)
 				na.SetTargetAddress(host2IPv6Addr.AddressWithPrefix.Address)
@@ -1482,11 +1586,11 @@ func TestPacketQueing(t *testing.T) {
 				payloadLength := hdr.UsedLength()
 				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					PayloadLength: uint16(payloadLength),
-					NextHeader:    uint8(icmp.ProtocolNumber6),
-					HopLimit:      header.NDPHopLimit,
-					SrcAddr:       host2IPv6Addr.AddressWithPrefix.Address,
-					DstAddr:       host1IPv6Addr.AddressWithPrefix.Address,
+					PayloadLength:     uint16(payloadLength),
+					TransportProtocol: icmp.ProtocolNumber6,
+					HopLimit:          header.NDPHopLimit,
+					SrcAddr:           host2IPv6Addr.AddressWithPrefix.Address,
+					DstAddr:           host1IPv6Addr.AddressWithPrefix.Address,
 				})
 				e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 					Data: hdr.View().ToVectorisedView(),
@@ -1520,7 +1624,7 @@ func TestCallsToNeighborCache(t *testing.T) {
 				nsSize := header.ICMPv6NeighborSolicitMinimumSize + header.NDPLinkLayerAddressSize
 				icmp := header.ICMPv6(buffer.NewView(nsSize))
 				icmp.SetType(header.ICMPv6NeighborSolicit)
-				ns := header.NDPNeighborSolicit(icmp.NDPPayload())
+				ns := header.NDPNeighborSolicit(icmp.MessageBody())
 				ns.SetTargetAddress(lladdr0)
 				return icmp
 			},
@@ -1540,7 +1644,7 @@ func TestCallsToNeighborCache(t *testing.T) {
 				nsSize := header.ICMPv6NeighborSolicitMinimumSize + header.NDPLinkLayerAddressSize
 				icmp := header.ICMPv6(buffer.NewView(nsSize))
 				icmp.SetType(header.ICMPv6NeighborSolicit)
-				ns := header.NDPNeighborSolicit(icmp.NDPPayload())
+				ns := header.NDPNeighborSolicit(icmp.MessageBody())
 				ns.SetTargetAddress(lladdr0)
 				ns.Options().Serialize(header.NDPOptionsSerializer{
 					header.NDPSourceLinkLayerAddressOption(linkAddr1),
@@ -1557,7 +1661,7 @@ func TestCallsToNeighborCache(t *testing.T) {
 				nsSize := header.ICMPv6NeighborSolicitMinimumSize + header.NDPLinkLayerAddressSize
 				icmp := header.ICMPv6(buffer.NewView(nsSize))
 				icmp.SetType(header.ICMPv6NeighborSolicit)
-				ns := header.NDPNeighborSolicit(icmp.NDPPayload())
+				ns := header.NDPNeighborSolicit(icmp.MessageBody())
 				ns.SetTargetAddress(lladdr0)
 				return icmp
 			},
@@ -1573,7 +1677,7 @@ func TestCallsToNeighborCache(t *testing.T) {
 				nsSize := header.ICMPv6NeighborSolicitMinimumSize + header.NDPLinkLayerAddressSize
 				icmp := header.ICMPv6(buffer.NewView(nsSize))
 				icmp.SetType(header.ICMPv6NeighborSolicit)
-				ns := header.NDPNeighborSolicit(icmp.NDPPayload())
+				ns := header.NDPNeighborSolicit(icmp.MessageBody())
 				ns.SetTargetAddress(lladdr0)
 				ns.Options().Serialize(header.NDPOptionsSerializer{
 					header.NDPSourceLinkLayerAddressOption(linkAddr1),
@@ -1590,7 +1694,7 @@ func TestCallsToNeighborCache(t *testing.T) {
 				naSize := header.ICMPv6NeighborAdvertMinimumSize
 				icmp := header.ICMPv6(buffer.NewView(naSize))
 				icmp.SetType(header.ICMPv6NeighborAdvert)
-				na := header.NDPNeighborAdvert(icmp.NDPPayload())
+				na := header.NDPNeighborAdvert(icmp.MessageBody())
 				na.SetSolicitedFlag(true)
 				na.SetOverrideFlag(false)
 				na.SetTargetAddress(lladdr1)
@@ -1611,7 +1715,7 @@ func TestCallsToNeighborCache(t *testing.T) {
 				naSize := header.ICMPv6NeighborAdvertMinimumSize + header.NDPLinkLayerAddressSize
 				icmp := header.ICMPv6(buffer.NewView(naSize))
 				icmp.SetType(header.ICMPv6NeighborAdvert)
-				na := header.NDPNeighborAdvert(icmp.NDPPayload())
+				na := header.NDPNeighborAdvert(icmp.MessageBody())
 				na.SetSolicitedFlag(true)
 				na.SetOverrideFlag(false)
 				na.SetTargetAddress(lladdr1)
@@ -1630,7 +1734,7 @@ func TestCallsToNeighborCache(t *testing.T) {
 				naSize := header.ICMPv6NeighborAdvertMinimumSize + header.NDPLinkLayerAddressSize
 				icmp := header.ICMPv6(buffer.NewView(naSize))
 				icmp.SetType(header.ICMPv6NeighborAdvert)
-				na := header.NDPNeighborAdvert(icmp.NDPPayload())
+				na := header.NDPNeighborAdvert(icmp.MessageBody())
 				na.SetSolicitedFlag(false)
 				na.SetOverrideFlag(false)
 				na.SetTargetAddress(lladdr1)
@@ -1650,7 +1754,7 @@ func TestCallsToNeighborCache(t *testing.T) {
 				naSize := header.ICMPv6NeighborAdvertMinimumSize + header.NDPLinkLayerAddressSize
 				icmp := header.ICMPv6(buffer.NewView(naSize))
 				icmp.SetType(header.ICMPv6NeighborAdvert)
-				na := header.NDPNeighborAdvert(icmp.NDPPayload())
+				na := header.NDPNeighborAdvert(icmp.MessageBody())
 				na.SetSolicitedFlag(false)
 				na.SetOverrideFlag(false)
 				na.SetTargetAddress(lladdr1)
@@ -1698,37 +1802,39 @@ func TestCallsToNeighborCache(t *testing.T) {
 				t.Fatalf("cannot find protocol instance for network protocol %d", ProtocolNumber)
 			}
 			nudHandler := &stubNUDHandler{}
-			ep := netProto.NewEndpoint(&testInterface{linkAddr: linkAddr0}, &stubLinkAddressCache{}, nudHandler, &stubDispatcher{})
+			ep := netProto.NewEndpoint(&testInterface{LinkEndpoint: channel.New(0, header.IPv6MinimumMTU, linkAddr0)}, &stubLinkAddressCache{}, nudHandler, &stubDispatcher{})
 			defer ep.Close()
 
 			if err := ep.Enable(); err != nil {
 				t.Fatalf("ep.Enable(): %s", err)
 			}
 
-			r, err := s.FindRoute(nicID, lladdr0, test.source, ProtocolNumber, false /* multicastLoop */)
-			if err != nil {
-				t.Fatalf("FindRoute(%d, %s, %s, _, false) = (_, %s), want = (_, nil)", nicID, lladdr0, lladdr1, err)
+			addressableEndpoint, ok := ep.(stack.AddressableEndpoint)
+			if !ok {
+				t.Fatalf("expected network endpoint to implement stack.AddressableEndpoint")
+			}
+			addr := lladdr0.WithPrefix()
+			if ep, err := addressableEndpoint.AddAndAcquirePermanentAddress(addr, stack.CanBePrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */); err != nil {
+				t.Fatalf("addressableEndpoint.AddAndAcquirePermanentAddress(%s, CanBePrimaryEndpoint, AddressConfigStatic, false): %s", addr, err)
+			} else {
+				ep.DecRef()
 			}
-			defer r.Release()
-
-			// TODO(gvisor.dev/issue/4517): Remove the need for this manual patch.
-			r.LocalAddress = test.destination
 
 			icmp := test.createPacket()
-			icmp.SetChecksum(header.ICMPv6Checksum(icmp, r.RemoteAddress, r.LocalAddress, buffer.VectorisedView{}))
+			icmp.SetChecksum(header.ICMPv6Checksum(icmp, test.source, test.destination, buffer.VectorisedView{}))
 			pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 				ReserveHeaderBytes: header.IPv6MinimumSize,
 				Data:               buffer.View(icmp).ToVectorisedView(),
 			})
 			ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize))
 			ip.Encode(&header.IPv6Fields{
-				PayloadLength: uint16(len(icmp)),
-				NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-				HopLimit:      header.NDPHopLimit,
-				SrcAddr:       r.RemoteAddress,
-				DstAddr:       r.LocalAddress,
+				PayloadLength:     uint16(len(icmp)),
+				TransportProtocol: header.ICMPv6ProtocolNumber,
+				HopLimit:          header.NDPHopLimit,
+				SrcAddr:           test.source,
+				DstAddr:           test.destination,
 			})
-			ep.HandlePacket(&r, pkt)
+			ep.HandlePacket(pkt)
 
 			// Confirm the endpoint calls the correct NUDHandler method.
 			if nudHandler.probeCount != test.wantProbeCount {
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 9670696c7..a49b5ac77 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -19,6 +19,7 @@ import (
 	"encoding/binary"
 	"fmt"
 	"hash/fnv"
+	"math"
 	"sort"
 	"sync/atomic"
 	"time"
@@ -34,19 +35,21 @@ import (
 )
 
 const (
+	// ReassembleTimeout controls how long a fragment will be held.
 	// As per RFC 8200 section 4.5:
+	//
 	//   If insufficient fragments are received to complete reassembly of a packet
 	//   within 60 seconds of the reception of the first-arriving fragment of that
 	//   packet, reassembly of that packet must be abandoned.
 	//
 	// Linux also uses 60 seconds for reassembly timeout:
 	// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ipv6.h#L456
-	reassembleTimeout = 60 * time.Second
+	ReassembleTimeout = 60 * time.Second
 
 	// ProtocolNumber is the ipv6 protocol number.
 	ProtocolNumber = header.IPv6ProtocolNumber
 
-	// maxTotalSize is maximum size that can be encoded in the 16-bit
+	// maxPayloadSize is the maximum size that can be encoded in the 16-bit
 	// PayloadLength field of the ipv6 header.
 	maxPayloadSize = 0xffff
 
@@ -83,6 +86,7 @@ type endpoint struct {
 
 		addressableEndpointState stack.AddressableEndpointState
 		ndp                      ndpState
+		mld                      mldState
 	}
 }
 
@@ -118,6 +122,45 @@ type OpaqueInterfaceIdentifierOptions struct {
 	SecretKey []byte
 }
 
+// onAddressAssignedLocked handles an address being assigned.
+//
+// Precondition: e.mu must be exclusively locked.
+func (e *endpoint) onAddressAssignedLocked(addr tcpip.Address) {
+	// As per RFC 2710 section 3,
+	//
+	//   All MLD  messages described in this document are sent with a link-local
+	//   IPv6 Source Address, ...
+	//
+	// If we just completed DAD for a link-local address, then attempt to send any
+	// queued MLD reports. Note, we may have sent reports already for some of the
+	// groups before we had a valid link-local address to use as the source for
+	// the MLD messages, but that was only so that MLD snooping switches are aware
+	// of our membership to groups - routers would not have handled those reports.
+	//
+	// As per RFC 3590 section 4,
+	//
+	//   MLD Report and Done messages are sent with a link-local address as
+	//   the IPv6 source address, if a valid address is available on the
+	//   interface. If a valid link-local address is not available (e.g., one
+	//   has not been configured), the message is sent with the unspecified
+	//   address (::) as the IPv6 source address.
+	//
+	//   Once a valid link-local address is available, a node SHOULD generate
+	//   new MLD Report messages for all multicast addresses joined on the
+	//   interface.
+	//
+	//   Routers receiving an MLD Report or Done message with the unspecified
+	//   address as the IPv6 source address MUST silently discard the packet
+	//   without taking any action on the packets contents.
+	//
+	//   Snooping switches MUST manage multicast forwarding state based on MLD
+	//   Report and Done messages sent with the unspecified address as the
+	//   IPv6 source address.
+	if header.IsV6LinkLocalAddress(addr) {
+		e.mu.mld.sendQueuedReports()
+	}
+}
+
 // InvalidateDefaultRouter implements stack.NDPEndpoint.
 func (e *endpoint) InvalidateDefaultRouter(rtr tcpip.Address) {
 	e.mu.Lock()
@@ -166,7 +209,7 @@ func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address) *tcpip.Error {
 		return err
 	}
 
-	prefix := addressEndpoint.AddressWithPrefix().Subnet()
+	prefix := addressEndpoint.Subnet()
 
 	switch t := addressEndpoint.ConfigType(); t {
 	case stack.AddressConfigStatic:
@@ -224,6 +267,12 @@ func (e *endpoint) Enable() *tcpip.Error {
 		return nil
 	}
 
+	// Groups may have been joined when the endpoint was disabled, or the
+	// endpoint may have left groups from the perspective of MLD when the
+	// endpoint was disabled. Either way, we need to let routers know to
+	// send us multicast traffic.
+	e.mu.mld.initializeAll()
+
 	// Join the IPv6 All-Nodes Multicast group if the stack is configured to
 	// use IPv6. This is required to ensure that this node properly receives
 	// and responds to the various NDP messages that are destined to the
@@ -241,8 +290,10 @@ func (e *endpoint) Enable() *tcpip.Error {
 	// (NDP NS) messages may be sent to the All-Nodes multicast group if the
 	// source address of the NDP NS is the unspecified address, as per RFC 4861
 	// section 7.2.4.
-	if _, err := e.mu.addressableEndpointState.JoinGroup(header.IPv6AllNodesMulticastAddress); err != nil {
-		return err
+	if err := e.joinGroupLocked(header.IPv6AllNodesMulticastAddress); err != nil {
+		// joinGroupLocked only returns an error if the group address is not a valid
+		// IPv6 multicast address.
+		panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv6AllNodesMulticastAddress, err))
 	}
 
 	// Perform DAD on the all the unicast IPv6 endpoints that are in the permanent
@@ -251,7 +302,7 @@ func (e *endpoint) Enable() *tcpip.Error {
 	// Addresses may have aleady completed DAD but in the time since the endpoint
 	// was last enabled, other devices may have acquired the same addresses.
 	var err *tcpip.Error
-	e.mu.addressableEndpointState.ReadOnly().ForEach(func(addressEndpoint stack.AddressEndpoint) bool {
+	e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
 		addr := addressEndpoint.AddressWithPrefix().Address
 		if !header.IsV6UnicastAddress(addr) {
 			return true
@@ -273,7 +324,7 @@ func (e *endpoint) Enable() *tcpip.Error {
 	}
 
 	// Do not auto-generate an IPv6 link-local address for loopback devices.
-	if e.protocol.autoGenIPv6LinkLocal && !e.nic.IsLoopback() {
+	if e.protocol.options.AutoGenLinkLocal && !e.nic.IsLoopback() {
 		// The valid and preferred lifetime is infinite for the auto-generated
 		// link-local address.
 		e.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime)
@@ -322,7 +373,7 @@ func (e *endpoint) Disable() {
 }
 
 func (e *endpoint) disableLocked() {
-	if !e.setEnabled(false) {
+	if !e.Enabled() {
 		return
 	}
 
@@ -331,9 +382,17 @@ func (e *endpoint) disableLocked() {
 	e.stopDADForPermanentAddressesLocked()
 
 	// The endpoint may have already left the multicast group.
-	if _, err := e.mu.addressableEndpointState.LeaveGroup(header.IPv6AllNodesMulticastAddress); err != nil && err != tcpip.ErrBadLocalAddress {
+	if err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress); err != nil && err != tcpip.ErrBadLocalAddress {
 		panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err))
 	}
+
+	// Leave groups from the perspective of MLD so that routers know that
+	// we are no longer interested in the group.
+	e.mu.mld.softLeaveAll()
+
+	if !e.setEnabled(false) {
+		panic("should have only done work to disable the endpoint if it was enabled")
+	}
 }
 
 // stopDADForPermanentAddressesLocked stops DAD for all permaneent addresses.
@@ -341,7 +400,7 @@ func (e *endpoint) disableLocked() {
 // Precondition: e.mu must be write locked.
 func (e *endpoint) stopDADForPermanentAddressesLocked() {
 	// Stop DAD for all the tentative unicast addresses.
-	e.mu.addressableEndpointState.ReadOnly().ForEach(func(addressEndpoint stack.AddressEndpoint) bool {
+	e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
 		if addressEndpoint.GetKind() != stack.PermanentTentative {
 			return true
 		}
@@ -363,50 +422,75 @@ func (e *endpoint) DefaultTTL() uint8 {
 // MTU implements stack.NetworkEndpoint.MTU. It returns the link-layer MTU minus
 // the network layer max header length.
 func (e *endpoint) MTU() uint32 {
-	return calculateMTU(e.nic.MTU())
+	networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv6MinimumSize)
+	if err != nil {
+		return 0
+	}
+	return networkMTU
 }
 
 // MaxHeaderLength returns the maximum length needed by ipv6 headers (and
 // underlying protocols).
 func (e *endpoint) MaxHeaderLength() uint16 {
+	// TODO(gvisor.dev/issues/5035): The maximum header length returned here does
+	// not open the possibility for the caller to know about size required for
+	// extension headers.
 	return e.nic.MaxHeaderLength() + header.IPv6MinimumSize
 }
 
-func (e *endpoint) addIPHeader(r *stack.Route, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams) {
-	length := uint16(pkt.Size())
-	ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize))
+func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, extensionHeaders header.IPv6ExtHdrSerializer) {
+	extHdrsLen := extensionHeaders.Length()
+	length := pkt.Size() + extensionHeaders.Length()
+	if length > math.MaxUint16 {
+		panic(fmt.Sprintf("IPv6 payload too large: %d, must be <= %d", length, math.MaxUint16))
+	}
+	ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + extHdrsLen))
 	ip.Encode(&header.IPv6Fields{
-		PayloadLength: length,
-		NextHeader:    uint8(params.Protocol),
-		HopLimit:      params.TTL,
-		TrafficClass:  params.TOS,
-		SrcAddr:       r.LocalAddress,
-		DstAddr:       r.RemoteAddress,
+		PayloadLength:     uint16(length),
+		TransportProtocol: params.Protocol,
+		HopLimit:          params.TTL,
+		TrafficClass:      params.TOS,
+		SrcAddr:           srcAddr,
+		DstAddr:           dstAddr,
+		ExtensionHeaders:  extensionHeaders,
 	})
 	pkt.NetworkProtocolNumber = ProtocolNumber
 }
 
-func (e *endpoint) packetMustBeFragmented(pkt *stack.PacketBuffer, gso *stack.GSO) bool {
-	return (gso == nil || gso.Type == stack.GSONone) && pkt.Size() > int(e.nic.MTU())
+func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32, gso *stack.GSO) bool {
+	payload := pkt.TransportHeader().View().Size() + pkt.Data.Size()
+	return (gso == nil || gso.Type == stack.GSONone) && uint32(payload) > networkMTU
 }
 
 // handleFragments fragments pkt and calls the handler function on each
 // fragment. It returns the number of fragments handled and the number of
 // fragments left to be processed. The IP header must already be present in the
-// original packet. The mtu is the maximum size of the packets. The transport
-// header protocol number is required to avoid parsing the IPv6 extension
-// headers.
-func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, mtu uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) {
-	fragMTU := int(calculateFragmentInnerMTU(mtu, pkt))
-	if fragMTU < pkt.TransportHeader().View().Size() {
+// original packet. The transport header protocol number is required to avoid
+// parsing the IPv6 extension headers.
+func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, networkMTU uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) *tcpip.Error) (int, int, *tcpip.Error) {
+	networkHeader := header.IPv6(pkt.NetworkHeader().View())
+
+	// TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are
+	// supported for outbound packets, their length should not affect the fragment
+	// maximum payload length because they should only be transmitted once.
+	fragmentPayloadLen := (networkMTU - header.IPv6FragmentHeaderSize) &^ 7
+	if fragmentPayloadLen < header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit {
+		// We need at least 8 bytes of space left for the fragmentable part because
+		// the fragment payload must obviously be non-zero and must be a multiple
+		// of 8 as per RFC 8200 section 4.5:
+		//   Each complete fragment, except possibly the last ("rightmost") one, is
+		//   an integer multiple of 8 octets long.
+		return 0, 1, tcpip.ErrMessageTooLong
+	}
+
+	if fragmentPayloadLen < uint32(pkt.TransportHeader().View().Size()) {
 		// As per RFC 8200 Section 4.5, the Transport Header is expected to be small
 		// enough to fit in the first fragment.
 		return 0, 1, tcpip.ErrMessageTooLong
 	}
 
-	pf := fragmentation.MakePacketFragmenter(pkt, fragMTU, calculateFragmentReserve(pkt))
+	pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadLen, calculateFragmentReserve(pkt))
 	id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, e.protocol.hashIV)%buckets], 1)
-	networkHeader := header.IPv6(pkt.NetworkHeader().View())
 
 	var n int
 	for {
@@ -423,18 +507,14 @@ func (e *endpoint) handleFragments(r *stack.Route, gso *stack.GSO, mtu uint32, p
 
 // WritePacket writes a packet to the given destination address and protocol.
 func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error {
-	e.addIPHeader(r, pkt, params)
-	return e.writePacket(r, gso, pkt, params.Protocol)
-}
+	e.addIPHeader(r.LocalAddress, r.RemoteAddress, pkt, params, nil /* extensionHeaders */)
 
-func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer, protocol tcpip.TransportProtocolNumber) *tcpip.Error {
 	// iptables filtering. All packets that reach here are locally
 	// generated.
 	nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
-	ipt := e.protocol.stack.IPTables()
-	if ok := ipt.Check(stack.Output, pkt, gso, r, "", nicName); !ok {
+	if ok := e.protocol.stack.IPTables().Check(stack.Output, pkt, gso, r, "", nicName); !ok {
 		// iptables is telling us to drop the packet.
-		r.Stats().IP.IPTablesOutputDropped.Increment()
+		e.protocol.stack.Stats().IP.IPTablesOutputDropped.Increment()
 		return nil
 	}
 
@@ -448,28 +528,43 @@ func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.Packet
 	if pkt.NatDone {
 		netHeader := header.IPv6(pkt.NetworkHeader().View())
 		if ep, err := e.protocol.stack.FindNetworkEndpoint(ProtocolNumber, netHeader.DestinationAddress()); err == nil {
-			route := r.ReverseRoute(netHeader.SourceAddress(), netHeader.DestinationAddress())
-			ep.HandlePacket(&route, pkt)
+			pkt := pkt.CloneToInbound()
+			if e.protocol.stack.ParsePacketBuffer(ProtocolNumber, pkt) == stack.ParsedOK {
+				// Since we rewrote the packet but it is being routed back to us, we can
+				// safely assume the checksum is valid.
+				pkt.RXTransportChecksumValidated = true
+				ep.(*endpoint).handlePacket(pkt)
+			}
 			return nil
 		}
 	}
 
-	if r.Loop&stack.PacketLoop != 0 {
-		loopedR := r.MakeLoopedRoute()
-
-		e.HandlePacket(&loopedR, stack.NewPacketBuffer(stack.PacketBufferOptions{
-			// The inbound path expects an unparsed packet.
-			Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views()),
-		}))
+	return e.writePacket(r, gso, pkt, params.Protocol, false /* headerIncluded */)
+}
 
-		loopedR.Release()
+func (e *endpoint) writePacket(r *stack.Route, gso *stack.GSO, pkt *stack.PacketBuffer, protocol tcpip.TransportProtocolNumber, headerIncluded bool) *tcpip.Error {
+	if r.Loop&stack.PacketLoop != 0 {
+		pkt := pkt.CloneToInbound()
+		if e.protocol.stack.ParsePacketBuffer(ProtocolNumber, pkt) == stack.ParsedOK {
+			// If the packet was generated by the stack (not a raw/packet endpoint
+			// where a packet may be written with the header included), then we can
+			// safely assume the checksum is valid.
+			pkt.RXTransportChecksumValidated = !headerIncluded
+			e.handlePacket(pkt)
+		}
 	}
 	if r.Loop&stack.PacketOut == 0 {
 		return nil
 	}
 
-	if e.packetMustBeFragmented(pkt, gso) {
-		sent, remain, err := e.handleFragments(r, gso, e.nic.MTU(), pkt, protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
+	networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size()))
+	if err != nil {
+		r.Stats().IP.OutgoingPacketErrors.Increment()
+		return err
+	}
+
+	if packetMustBeFragmented(pkt, networkMTU, gso) {
+		sent, remain, err := e.handleFragments(r, gso, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
 			// TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each
 			// fragment one by one using WritePacket() (current strategy) or if we
 			// want to create a PacketBufferList from the fragments and feed it to
@@ -499,13 +594,20 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 		return pkts.Len(), nil
 	}
 
+	linkMTU := e.nic.MTU()
 	for pb := pkts.Front(); pb != nil; pb = pb.Next() {
-		e.addIPHeader(r, pb, params)
-		if e.packetMustBeFragmented(pb, gso) {
+		e.addIPHeader(r.LocalAddress, r.RemoteAddress, pb, params, nil /* extensionHeaders */)
+
+		networkMTU, err := calculateNetworkMTU(linkMTU, uint32(pb.NetworkHeader().View().Size()))
+		if err != nil {
+			r.Stats().IP.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len()))
+			return 0, err
+		}
+		if packetMustBeFragmented(pb, networkMTU, gso) {
 			// Keep track of the packet that is about to be fragmented so it can be
 			// removed once the fragmentation is done.
 			originalPkt := pb
-			if _, _, err := e.handleFragments(r, gso, e.nic.MTU(), pb, params.Protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
+			if _, _, err := e.handleFragments(r, gso, networkMTU, pb, params.Protocol, func(fragPkt *stack.PacketBuffer) *tcpip.Error {
 				// Modify the packet list in place with the new fragments.
 				pkts.InsertAfter(pb, fragPkt)
 				pb = fragPkt
@@ -522,8 +624,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 	// iptables filtering. All packets that reach here are locally
 	// generated.
 	nicName := e.protocol.stack.FindNICNameFromID(e.nic.ID())
-	ipt := e.protocol.stack.IPTables()
-	dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r, nicName)
+	dropped, natPkts := e.protocol.stack.IPTables().CheckPackets(stack.Output, pkts, gso, r, nicName)
 	if len(dropped) == 0 && len(natPkts) == 0 {
 		// Fast path: If no packets are to be dropped then we can just invoke the
 		// faster WritePackets API directly.
@@ -546,10 +647,13 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 		if _, ok := natPkts[pkt]; ok {
 			netHeader := header.IPv6(pkt.NetworkHeader().View())
 			if ep, err := e.protocol.stack.FindNetworkEndpoint(ProtocolNumber, netHeader.DestinationAddress()); err == nil {
-				src := netHeader.SourceAddress()
-				dst := netHeader.DestinationAddress()
-				route := r.ReverseRoute(src, dst)
-				ep.HandlePacket(&route, pkt)
+				pkt := pkt.CloneToInbound()
+				if e.protocol.stack.ParsePacketBuffer(ProtocolNumber, pkt) == stack.ParsedOK {
+					// Since we rewrote the packet but it is being routed back to us, we
+					// can safely assume the checksum is valid.
+					pkt.RXTransportChecksumValidated = true
+					ep.(*endpoint).handlePacket(pkt)
+				}
 				n++
 				continue
 			}
@@ -569,7 +673,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
 	return n + len(dropped), nil
 }
 
-// WriteHeaderIncludedPacker implements stack.NetworkEndpoint.
+// WriteHeaderIncludedPacket implements stack.NetworkEndpoint.
 func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) *tcpip.Error {
 	// The packet already has an IP header, but there are a few required checks.
 	h, ok := pkt.Data.PullUp(header.IPv6MinimumSize)
@@ -602,27 +706,115 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
 		return tcpip.ErrMalformedHeader
 	}
 
-	return e.writePacket(r, nil /* gso */, pkt, proto)
+	return e.writePacket(r, nil /* gso */, pkt, proto, true /* headerIncluded */)
+}
+
+// forwardPacket attempts to forward a packet to its final destination.
+func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) *tcpip.Error {
+	h := header.IPv6(pkt.NetworkHeader().View())
+	hopLimit := h.HopLimit()
+	if hopLimit <= 1 {
+		// As per RFC 4443 section 3.3,
+		//
+		//   If a router receives a packet with a Hop Limit of zero, or if a
+		//   router decrements a packet's Hop Limit to zero, it MUST discard the
+		//   packet and originate an ICMPv6 Time Exceeded message with Code 0 to
+		//   the source of the packet.  This indicates either a routing loop or
+		//   too small an initial Hop Limit value.
+		return e.protocol.returnError(&icmpReasonHopLimitExceeded{}, pkt)
+	}
+
+	dstAddr := h.DestinationAddress()
+
+	// Check if the destination is owned by the stack.
+	networkEndpoint, err := e.protocol.stack.FindNetworkEndpoint(ProtocolNumber, dstAddr)
+	if err == nil {
+		networkEndpoint.(*endpoint).handlePacket(pkt)
+		return nil
+	}
+	if err != tcpip.ErrBadAddress {
+		return err
+	}
+
+	r, err := e.protocol.stack.FindRoute(0, "", dstAddr, ProtocolNumber, false /* multicastLoop */)
+	if err != nil {
+		return err
+	}
+	defer r.Release()
+
+	// We need to do a deep copy of the IP packet because
+	// WriteHeaderIncludedPacket takes ownership of the packet buffer, but we do
+	// not own it.
+	newHdr := header.IPv6(stack.PayloadSince(pkt.NetworkHeader()))
+
+	// As per RFC 8200 section 3,
+	//
+	//   Hop Limit           8-bit unsigned integer. Decremented by 1 by
+	//                       each node that forwards the packet.
+	newHdr.SetHopLimit(hopLimit - 1)
+
+	return r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{
+		ReserveHeaderBytes: int(r.MaxHeaderLength()),
+		Data:               buffer.View(newHdr).ToVectorisedView(),
+	}))
 }
 
 // HandlePacket is called by the link layer when new ipv6 packets arrive for
 // this endpoint.
-func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
+	stats := e.protocol.stack.Stats()
+	stats.IP.PacketsReceived.Increment()
+
 	if !e.isEnabled() {
+		stats.IP.DisabledPacketsReceived.Increment()
 		return
 	}
 
+	// Loopback traffic skips the prerouting chain.
+	if !e.nic.IsLoopback() {
+		if ok := e.protocol.stack.IPTables().Check(stack.Prerouting, pkt, nil, nil, e.MainAddress().Address, ""); !ok {
+			// iptables is telling us to drop the packet.
+			stats.IP.IPTablesPreroutingDropped.Increment()
+			return
+		}
+	}
+
+	e.handlePacket(pkt)
+}
+
+// handlePacket is like HandlePacket except it does not perform the prerouting
+// iptables hook.
+func (e *endpoint) handlePacket(pkt *stack.PacketBuffer) {
+	pkt.NICID = e.nic.ID()
+	stats := e.protocol.stack.Stats()
+
 	h := header.IPv6(pkt.NetworkHeader().View())
 	if !h.IsValid(pkt.Data.Size() + pkt.NetworkHeader().View().Size() + pkt.TransportHeader().View().Size()) {
-		r.Stats().IP.MalformedPacketsReceived.Increment()
+		stats.IP.MalformedPacketsReceived.Increment()
 		return
 	}
+	srcAddr := h.SourceAddress()
+	dstAddr := h.DestinationAddress()
 
 	// As per RFC 4291 section 2.7:
 	//   Multicast addresses must not be used as source addresses in IPv6
 	//   packets or appear in any Routing header.
-	if header.IsV6MulticastAddress(r.RemoteAddress) {
-		r.Stats().IP.InvalidSourceAddressesReceived.Increment()
+	if header.IsV6MulticastAddress(srcAddr) {
+		stats.IP.InvalidSourceAddressesReceived.Increment()
+		return
+	}
+
+	// The destination address should be an address we own or a group we joined
+	// for us to receive the packet. Otherwise, attempt to forward the packet.
+	if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil {
+		addressEndpoint.DecRef()
+	} else if !e.IsInGroup(dstAddr) {
+		if !e.protocol.Forwarding() {
+			stats.IP.InvalidDestinationAddressesReceived.Increment()
+			return
+		}
+
+		_ = e.forwardPacket(pkt)
 		return
 	}
 
@@ -638,10 +830,9 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 
 	// iptables filtering. All packets that reach here are intended for
 	// this machine and need not be forwarded.
-	ipt := e.protocol.stack.IPTables()
-	if ok := ipt.Check(stack.Input, pkt, nil, nil, "", ""); !ok {
+	if ok := e.protocol.stack.IPTables().Check(stack.Input, pkt, nil, nil, "", ""); !ok {
 		// iptables is telling us to drop the packet.
-		r.Stats().IP.IPTablesInputDropped.Increment()
+		stats.IP.IPTablesInputDropped.Increment()
 		return
 	}
 
@@ -651,7 +842,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 		previousHeaderStart := it.HeaderOffset()
 		extHdr, done, err := it.Next()
 		if err != nil {
-			r.Stats().IP.MalformedPacketsReceived.Increment()
+			stats.IP.MalformedPacketsReceived.Increment()
 			return
 		}
 		if done {
@@ -663,7 +854,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			// As per RFC 8200 section 4.1, the Hop By Hop extension header is
 			// restricted to appear immediately after an IPv6 fixed header.
 			if previousHeaderStart != 0 {
-				_ = e.protocol.returnError(r, &icmpReasonParameterProblem{
+				_ = e.protocol.returnError(&icmpReasonParameterProblem{
 					code:    header.ICMPv6UnknownHeader,
 					pointer: previousHeaderStart,
 				}, pkt)
@@ -675,7 +866,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			for {
 				opt, done, err := optsIt.Next()
 				if err != nil {
-					r.Stats().IP.MalformedPacketsReceived.Increment()
+					stats.IP.MalformedPacketsReceived.Increment()
 					return
 				}
 				if done {
@@ -689,7 +880,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 				case header.IPv6OptionUnknownActionDiscard:
 					return
 				case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
-					if header.IsV6MulticastAddress(r.LocalAddress) {
+					if header.IsV6MulticastAddress(dstAddr) {
 						return
 					}
 					fallthrough
@@ -702,7 +893,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 					//    ICMP Parameter Problem, Code 2, message to the packet's
 					//    Source Address, pointing to the unrecognized Option Type.
 					//
-					_ = e.protocol.returnError(r, &icmpReasonParameterProblem{
+					_ = e.protocol.returnError(&icmpReasonParameterProblem{
 						code:               header.ICMPv6UnknownOption,
 						pointer:            it.ParseOffset() + optsIt.OptionOffset(),
 						respondToMulticast: true,
@@ -727,7 +918,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			// header, so we just make sure Segments Left is zero before processing
 			// the next extension header.
 			if extHdr.SegmentsLeft() != 0 {
-				_ = e.protocol.returnError(r, &icmpReasonParameterProblem{
+				_ = e.protocol.returnError(&icmpReasonParameterProblem{
 					code:    header.ICMPv6ErroneousHeader,
 					pointer: it.ParseOffset(),
 				}, pkt)
@@ -747,6 +938,8 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 				continue
 			}
 
+			fragmentFieldOffset := it.ParseOffset()
+
 			// Don't consume the iterator if we have the first fragment because we
 			// will use it to validate that the first fragment holds the upper layer
 			// header.
@@ -762,8 +955,8 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 				for {
 					it, done, err := it.Next()
 					if err != nil {
-						r.Stats().IP.MalformedPacketsReceived.Increment()
-						r.Stats().IP.MalformedFragmentsReceived.Increment()
+						stats.IP.MalformedPacketsReceived.Increment()
+						stats.IP.MalformedFragmentsReceived.Increment()
 						return
 					}
 					if done {
@@ -790,8 +983,8 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 				switch lastHdr.(type) {
 				case header.IPv6RawPayloadHeader:
 				default:
-					r.Stats().IP.MalformedPacketsReceived.Increment()
-					r.Stats().IP.MalformedFragmentsReceived.Increment()
+					stats.IP.MalformedPacketsReceived.Increment()
+					stats.IP.MalformedFragmentsReceived.Increment()
 					return
 				}
 			}
@@ -799,19 +992,47 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			fragmentPayloadLen := rawPayload.Buf.Size()
 			if fragmentPayloadLen == 0 {
 				// Drop the packet as it's marked as a fragment but has no payload.
-				r.Stats().IP.MalformedPacketsReceived.Increment()
-				r.Stats().IP.MalformedFragmentsReceived.Increment()
+				stats.IP.MalformedPacketsReceived.Increment()
+				stats.IP.MalformedFragmentsReceived.Increment()
+				return
+			}
+
+			// As per RFC 2460 Section 4.5:
+			//
+			//    If the length of a fragment, as derived from the fragment packet's
+			//    Payload Length field, is not a multiple of 8 octets and the M flag
+			//    of that fragment is 1, then that fragment must be discarded and an
+			//    ICMP Parameter Problem, Code 0, message should be sent to the source
+			//    of the fragment, pointing to the Payload Length field of the
+			//    fragment packet.
+			if extHdr.More() && fragmentPayloadLen%header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit != 0 {
+				stats.IP.MalformedPacketsReceived.Increment()
+				stats.IP.MalformedFragmentsReceived.Increment()
+				_ = e.protocol.returnError(&icmpReasonParameterProblem{
+					code:    header.ICMPv6ErroneousHeader,
+					pointer: header.IPv6PayloadLenOffset,
+				}, pkt)
 				return
 			}
 
 			// The packet is a fragment, let's try to reassemble it.
 			start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit
 
-			// Drop the fragment if the size of the reassembled payload would exceed
-			// the maximum payload size.
+			// As per RFC 2460 Section 4.5:
+			//
+			//    If the length and offset of a fragment are such that the Payload
+			//    Length of the packet reassembled from that fragment would exceed
+			//    65,535 octets, then that fragment must be discarded and an ICMP
+			//    Parameter Problem, Code 0, message should be sent to the source of
+			//    the fragment, pointing to the Fragment Offset field of the fragment
+			//    packet.
 			if int(start)+fragmentPayloadLen > header.IPv6MaximumPayloadSize {
-				r.Stats().IP.MalformedPacketsReceived.Increment()
-				r.Stats().IP.MalformedFragmentsReceived.Increment()
+				stats.IP.MalformedPacketsReceived.Increment()
+				stats.IP.MalformedFragmentsReceived.Increment()
+				_ = e.protocol.returnError(&icmpReasonParameterProblem{
+					code:    header.ICMPv6ErroneousHeader,
+					pointer: fragmentFieldOffset,
+				}, pkt)
 				return
 			}
 
@@ -821,24 +1042,25 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 				// IPv6 ignores the Protocol field since the ID only needs to be unique
 				// across source-destination pairs, as per RFC 8200 section 4.5.
 				fragmentation.FragmentID{
-					Source:      h.SourceAddress(),
-					Destination: h.DestinationAddress(),
+					Source:      srcAddr,
+					Destination: dstAddr,
 					ID:          extHdr.ID(),
 				},
 				start,
 				start+uint16(fragmentPayloadLen)-1,
 				extHdr.More(),
 				uint8(rawPayload.Identifier),
-				rawPayload.Buf,
+				pkt,
 			)
 			if err != nil {
-				r.Stats().IP.MalformedPacketsReceived.Increment()
-				r.Stats().IP.MalformedFragmentsReceived.Increment()
+				stats.IP.MalformedPacketsReceived.Increment()
+				stats.IP.MalformedFragmentsReceived.Increment()
 				return
 			}
-			pkt.Data = data
 
 			if ready {
+				pkt.Data = data
+
 				// We create a new iterator with the reassembled packet because we could
 				// have more extension headers in the reassembled payload, as per RFC
 				// 8200 section 4.5. We also use the NextHeader value from the first
@@ -852,7 +1074,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			for {
 				opt, done, err := optsIt.Next()
 				if err != nil {
-					r.Stats().IP.MalformedPacketsReceived.Increment()
+					stats.IP.MalformedPacketsReceived.Increment()
 					return
 				}
 				if done {
@@ -866,7 +1088,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 				case header.IPv6OptionUnknownActionDiscard:
 					return
 				case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest:
-					if header.IsV6MulticastAddress(r.LocalAddress) {
+					if header.IsV6MulticastAddress(dstAddr) {
 						return
 					}
 					fallthrough
@@ -879,7 +1101,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 					//    ICMP Parameter Problem, Code 2, message to the packet's
 					//    Source Address, pointing to the unrecognized Option Type.
 					//
-					_ = e.protocol.returnError(r, &icmpReasonParameterProblem{
+					_ = e.protocol.returnError(&icmpReasonParameterProblem{
 						code:               header.ICMPv6UnknownOption,
 						pointer:            it.ParseOffset() + optsIt.OptionOffset(),
 						respondToMulticast: true,
@@ -902,13 +1124,13 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			extHdr.Buf.TrimFront(pkt.TransportHeader().View().Size())
 			pkt.Data = extHdr.Buf
 
-			r.Stats().IP.PacketsDelivered.Increment()
+			stats.IP.PacketsDelivered.Increment()
 			if p := tcpip.TransportProtocolNumber(extHdr.Identifier); p == header.ICMPv6ProtocolNumber {
 				pkt.TransportProtocolNumber = p
-				e.handleICMP(r, pkt, hasFragmentHeader)
+				e.handleICMP(pkt, hasFragmentHeader)
 			} else {
-				r.Stats().IP.PacketsDelivered.Increment()
-				switch res := e.dispatcher.DeliverTransportPacket(r, p, pkt); res {
+				stats.IP.PacketsDelivered.Increment()
+				switch res := e.dispatcher.DeliverTransportPacket(p, pkt); res {
 				case stack.TransportPacketHandled:
 				case stack.TransportPacketDestinationPortUnreachable:
 					// As per RFC 4443 section 3.1:
@@ -916,7 +1138,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 					//   message with Code 4 in response to a packet for which the
 					//   transport protocol (e.g., UDP) has no listener, if that transport
 					//   protocol has no alternative means to inform the sender.
-					_ = e.protocol.returnError(r, &icmpReasonPortUnreachable{}, pkt)
+					_ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt)
 				case stack.TransportPacketProtocolUnreachable:
 					// As per RFC 8200 section 4. (page 7):
 					//   Extension headers are numbered from IANA IP Protocol Numbers
@@ -937,9 +1159,16 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 					//
 					// Which when taken together indicate that an unknown protocol should
 					// be treated as an unrecognized next header value.
-					_ = e.protocol.returnError(r, &icmpReasonParameterProblem{
+					// The location of the Next Header field is in a different place in
+					// the initial IPv6 header than it is in the extension headers so
+					// treat it specially.
+					prevHdrIDOffset := uint32(header.IPv6NextHeaderOffset)
+					if previousHeaderStart != 0 {
+						prevHdrIDOffset = previousHeaderStart
+					}
+					_ = e.protocol.returnError(&icmpReasonParameterProblem{
 						code:    header.ICMPv6UnknownHeader,
-						pointer: it.ParseOffset(),
+						pointer: prevHdrIDOffset,
 					}, pkt)
 				default:
 					panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res))
@@ -947,12 +1176,11 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
 			}
 
 		default:
-			_ = e.protocol.returnError(r, &icmpReasonParameterProblem{
-				code:    header.ICMPv6UnknownHeader,
-				pointer: it.ParseOffset(),
-			}, pkt)
-			r.Stats().UnknownProtocolRcvdPackets.Increment()
-			return
+			// Since the iterator returns IPv6RawPayloadHeader for unknown Extension
+			// Header IDs this should never happen unless we missed a supported type
+			// here.
+			panic(fmt.Sprintf("unrecognized type from it.Next() = %T", extHdr))
+
 		}
 	}
 }
@@ -1000,11 +1228,6 @@ func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPre
 		return addressEndpoint, nil
 	}
 
-	snmc := header.SolicitedNodeAddr(addr.Address)
-	if _, err := e.mu.addressableEndpointState.JoinGroup(snmc); err != nil {
-		return nil, err
-	}
-
 	addressEndpoint.SetKind(stack.PermanentTentative)
 
 	if e.Enabled() {
@@ -1013,6 +1236,13 @@ func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPre
 		}
 	}
 
+	snmc := header.SolicitedNodeAddr(addr.Address)
+	if err := e.joinGroupLocked(snmc); err != nil {
+		// joinGroupLocked only returns an error if the group address is not a valid
+		// IPv6 multicast address.
+		panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", snmc, err))
+	}
+
 	return addressEndpoint, nil
 }
 
@@ -1058,7 +1288,8 @@ func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEn
 	}
 
 	snmc := header.SolicitedNodeAddr(addr.Address)
-	if _, err := e.mu.addressableEndpointState.LeaveGroup(snmc); err != nil && err != tcpip.ErrBadLocalAddress {
+	// The endpoint may have already left the multicast group.
+	if err := e.leaveGroupLocked(snmc); err != nil && err != tcpip.ErrBadLocalAddress {
 		return err
 	}
 
@@ -1081,7 +1312,7 @@ func (e *endpoint) hasPermanentAddressRLocked(addr tcpip.Address) bool {
 //
 // Precondition: e.mu must be read or write locked.
 func (e *endpoint) getAddressRLocked(localAddr tcpip.Address) stack.AddressEndpoint {
-	return e.mu.addressableEndpointState.ReadOnly().Lookup(localAddr)
+	return e.mu.addressableEndpointState.GetAddress(localAddr)
 }
 
 // MainAddress implements stack.AddressableEndpoint.
@@ -1113,6 +1344,26 @@ func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allow
 	return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, allowExpired)
 }
 
+// getLinkLocalAddressRLocked returns a link-local address from the primary list
+// of addresses, if one is available.
+//
+// See stack.PrimaryEndpointBehavior for more details about the primary list.
+//
+// Precondition: e.mu must be read locked.
+func (e *endpoint) getLinkLocalAddressRLocked() tcpip.Address {
+	var linkLocalAddr tcpip.Address
+	e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
+		if addressEndpoint.IsAssigned(false /* allowExpired */) {
+			if addr := addressEndpoint.AddressWithPrefix().Address; header.IsV6LinkLocalAddress(addr) {
+				linkLocalAddr = addr
+				return false
+			}
+		}
+		return true
+	})
+	return linkLocalAddr
+}
+
 // acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress
 // but with locking requirements.
 //
@@ -1132,10 +1383,10 @@ func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address
 	// Create a candidate set of available addresses we can potentially use as a
 	// source address.
 	var cs []addrCandidate
-	e.mu.addressableEndpointState.ReadOnly().ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) {
+	e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool {
 		// If r is not valid for outgoing connections, it is not a valid endpoint.
 		if !addressEndpoint.IsAssigned(allowExpired) {
-			return
+			return true
 		}
 
 		addr := addressEndpoint.AddressWithPrefix().Address
@@ -1151,6 +1402,8 @@ func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address
 			addressEndpoint: addressEndpoint,
 			scope:           scope,
 		})
+
+		return true
 	})
 
 	remoteScope, err := header.ScopeForIPv6Address(remoteAddr)
@@ -1223,35 +1476,52 @@ func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix {
 }
 
 // JoinGroup implements stack.GroupAddressableEndpoint.
-func (e *endpoint) JoinGroup(addr tcpip.Address) (bool, *tcpip.Error) {
+func (e *endpoint) JoinGroup(addr tcpip.Address) *tcpip.Error {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	return e.joinGroupLocked(addr)
+}
+
+// joinGroupLocked is like JoinGroup but with locking requirements.
+//
+// Precondition: e.mu must be locked.
+func (e *endpoint) joinGroupLocked(addr tcpip.Address) *tcpip.Error {
 	if !header.IsV6MulticastAddress(addr) {
-		return false, tcpip.ErrBadAddress
+		return tcpip.ErrBadAddress
 	}
 
-	e.mu.Lock()
-	defer e.mu.Unlock()
-	return e.mu.addressableEndpointState.JoinGroup(addr)
+	e.mu.mld.joinGroup(addr)
+	return nil
 }
 
 // LeaveGroup implements stack.GroupAddressableEndpoint.
-func (e *endpoint) LeaveGroup(addr tcpip.Address) (bool, *tcpip.Error) {
+func (e *endpoint) LeaveGroup(addr tcpip.Address) *tcpip.Error {
 	e.mu.Lock()
 	defer e.mu.Unlock()
-	return e.mu.addressableEndpointState.LeaveGroup(addr)
+	return e.leaveGroupLocked(addr)
+}
+
+// leaveGroupLocked is like LeaveGroup but with locking requirements.
+//
+// Precondition: e.mu must be locked.
+func (e *endpoint) leaveGroupLocked(addr tcpip.Address) *tcpip.Error {
+	return e.mu.mld.leaveGroup(addr)
 }
 
 // IsInGroup implements stack.GroupAddressableEndpoint.
 func (e *endpoint) IsInGroup(addr tcpip.Address) bool {
 	e.mu.RLock()
 	defer e.mu.RUnlock()
-	return e.mu.addressableEndpointState.IsInGroup(addr)
+	return e.mu.mld.isInGroup(addr)
 }
 
 var _ stack.ForwardingNetworkProtocol = (*protocol)(nil)
 var _ stack.NetworkProtocol = (*protocol)(nil)
+var _ fragmentation.TimeoutHandler = (*protocol)(nil)
 
 type protocol struct {
-	stack *stack.Stack
+	stack   *stack.Stack
+	options Options
 
 	mu struct {
 		sync.RWMutex
@@ -1275,26 +1545,6 @@ type protocol struct {
 	forwarding uint32
 
 	fragmentation *fragmentation.Fragmentation
-
-	// ndpDisp is the NDP event dispatcher that is used to send the netstack
-	// integrator NDP related events.
-	ndpDisp NDPDispatcher
-
-	// ndpConfigs is the default NDP configurations used by an IPv6 endpoint.
-	ndpConfigs NDPConfigurations
-
-	// opaqueIIDOpts hold the options for generating opaque interface identifiers
-	// (IIDs) as outlined by RFC 7217.
-	opaqueIIDOpts OpaqueInterfaceIdentifierOptions
-
-	// tempIIDSeed is used to seed the initial temporary interface identifier
-	// history value used to generate IIDs for temporary SLAAC addresses.
-	tempIIDSeed []byte
-
-	// autoGenIPv6LinkLocal determines whether or not the stack attempts to
-	// auto-generate an IPv6 link-local address for newly enabled non-loopback
-	// NICs. See the AutoGenIPv6LinkLocal field of Options for more details.
-	autoGenIPv6LinkLocal bool
 }
 
 // Number returns the ipv6 protocol number.
@@ -1327,16 +1577,11 @@ func (p *protocol) NewEndpoint(nic stack.NetworkInterface, linkAddrCache stack.L
 		dispatcher:    dispatcher,
 		protocol:      p,
 	}
+	e.mu.Lock()
 	e.mu.addressableEndpointState.Init(e)
-	e.mu.ndp = ndpState{
-		ep:             e,
-		configs:        p.ndpConfigs,
-		dad:            make(map[tcpip.Address]dadState),
-		defaultRouters: make(map[tcpip.Address]defaultRouterState),
-		onLinkPrefixes: make(map[tcpip.Subnet]onLinkPrefixState),
-		slaacPrefixes:  make(map[tcpip.Subnet]slaacPrefixState),
-	}
-	e.mu.ndp.initializeTempAddrState()
+	e.mu.ndp.init(e)
+	e.mu.mld.init(e)
+	e.mu.Unlock()
 
 	p.mu.Lock()
 	defer p.mu.Unlock()
@@ -1427,14 +1672,31 @@ func (p *protocol) SetForwarding(v bool) {
 	}
 }
 
-// calculateMTU calculates the network-layer payload MTU based on the link-layer
-// payload mtu.
-func calculateMTU(mtu uint32) uint32 {
-	mtu -= header.IPv6MinimumSize
-	if mtu <= maxPayloadSize {
-		return mtu
+// calculateNetworkMTU calculates the network-layer payload MTU based on the
+// link-layer payload MTU and the length of every IPv6 header.
+// Note that this is different than the Payload Length field of the IPv6 header,
+// which includes the length of the extension headers.
+func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, *tcpip.Error) {
+	if linkMTU < header.IPv6MinimumMTU {
+		return 0, tcpip.ErrInvalidEndpointState
+	}
+
+	// As per RFC 7112 section 5, we should discard packets if their IPv6 header
+	// is bigger than 1280 bytes (ie, the minimum link MTU) since we do not
+	// support PMTU discovery:
+	//   Hosts that do not discover the Path MTU MUST limit the IPv6 Header Chain
+	//   length to 1280 bytes.  Limiting the IPv6 Header Chain length to 1280
+	//   bytes ensures that the header chain length does not exceed the IPv6
+	//   minimum MTU.
+	if networkHeadersLen > header.IPv6MinimumMTU {
+		return 0, tcpip.ErrMalformedHeader
+	}
+
+	networkMTU := linkMTU - uint32(networkHeadersLen)
+	if networkMTU > maxPayloadSize {
+		networkMTU = maxPayloadSize
 	}
-	return maxPayloadSize
+	return networkMTU, nil
 }
 
 // Options holds options to configure a new protocol.
@@ -1442,17 +1704,17 @@ type Options struct {
 	// NDPConfigs is the default NDP configurations used by interfaces.
 	NDPConfigs NDPConfigurations
 
-	// AutoGenIPv6LinkLocal determines whether or not the stack attempts to
-	// auto-generate an IPv6 link-local address for newly enabled non-loopback
+	// AutoGenLinkLocal determines whether or not the stack attempts to
+	// auto-generate a link-local address for newly enabled non-loopback
 	// NICs.
 	//
 	// Note, setting this to true does not mean that a link-local address is
 	// assigned right away, or at all. If Duplicate Address Detection is enabled,
 	// an address is only assigned if it successfully resolves. If it fails, no
-	// further attempts are made to auto-generate an IPv6 link-local adddress.
+	// further attempts are made to auto-generate a link-local adddress.
 	//
 	// The generated link-local address follows RFC 4291 Appendix A guidelines.
-	AutoGenIPv6LinkLocal bool
+	AutoGenLinkLocal bool
 
 	// NDPDisp is the NDP event dispatcher that an integrator can provide to
 	// receive NDP related events.
@@ -1476,6 +1738,9 @@ type Options struct {
 	// seed that is too small would reduce randomness and increase predictability,
 	// defeating the purpose of temporary SLAAC addresses.
 	TempIIDSeed []byte
+
+	// MLD holds options for MLD.
+	MLD MLDOptions
 }
 
 // NewProtocolWithOptions returns an IPv6 network protocol.
@@ -1487,17 +1752,13 @@ func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory {
 
 	return func(s *stack.Stack) stack.NetworkProtocol {
 		p := &protocol{
-			stack:         s,
-			fragmentation: fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, reassembleTimeout, s.Clock()),
-			ids:           ids,
-			hashIV:        hashIV,
-
-			ndpDisp:              opts.NDPDisp,
-			ndpConfigs:           opts.NDPConfigs,
-			opaqueIIDOpts:        opts.OpaqueIIDOpts,
-			tempIIDSeed:          opts.TempIIDSeed,
-			autoGenIPv6LinkLocal: opts.AutoGenIPv6LinkLocal,
+			stack:   s,
+			options: opts,
+
+			ids:    ids,
+			hashIV: hashIV,
 		}
+		p.fragmentation = fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p)
 		p.mu.eps = make(map[*endpoint]struct{})
 		p.SetDefaultTTL(DefaultTTL)
 		return p
@@ -1509,23 +1770,6 @@ func NewProtocol(s *stack.Stack) stack.NetworkProtocol {
 	return NewProtocolWithOptions(Options{})(s)
 }
 
-// calculateFragmentInnerMTU calculates the maximum number of bytes of
-// fragmentable data a fragment can have, based on the link layer mtu and pkt's
-// network header size.
-func calculateFragmentInnerMTU(mtu uint32, pkt *stack.PacketBuffer) uint32 {
-	// TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are
-	// supported for outbound packets, their length should not affect the fragment
-	// MTU because they should only be transmitted once.
-	mtu -= uint32(pkt.NetworkHeader().View().Size())
-	mtu -= header.IPv6FragmentHeaderSize
-	// Round the MTU down to align to 8 bytes.
-	mtu &^= 7
-	if mtu <= maxPayloadSize {
-		return mtu
-	}
-	return maxPayloadSize
-}
-
 func calculateFragmentReserve(pkt *stack.PacketBuffer) int {
 	return pkt.AvailableHeaderBytes() + pkt.NetworkHeader().View().Size() + header.IPv6FragmentHeaderSize
 }
@@ -1558,23 +1802,25 @@ func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeaders hea
 	fragPkt.NetworkProtocolNumber = ProtocolNumber
 
 	originalIPHeadersLength := len(originalIPHeaders)
-	fragmentIPHeadersLength := originalIPHeadersLength + header.IPv6FragmentHeaderSize
+
+	s := header.IPv6ExtHdrSerializer{&header.IPv6SerializableFragmentExtHdr{
+		FragmentOffset: uint16(offset / header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit),
+		M:              more,
+		Identification: id,
+	}}
+
+	fragmentIPHeadersLength := originalIPHeadersLength + s.Length()
 	fragmentIPHeaders := header.IPv6(fragPkt.NetworkHeader().Push(fragmentIPHeadersLength))
 
 	// Copy the IPv6 header and any extension headers already populated.
 	if copied := copy(fragmentIPHeaders, originalIPHeaders); copied != originalIPHeadersLength {
 		panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got %d, want %d", copied, originalIPHeadersLength))
 	}
-	fragmentIPHeaders.SetNextHeader(header.IPv6FragmentHeader)
-	fragmentIPHeaders.SetPayloadLength(uint16(copied + fragmentIPHeadersLength - header.IPv6MinimumSize))
 
-	fragmentHeader := header.IPv6Fragment(fragmentIPHeaders[originalIPHeadersLength:])
-	fragmentHeader.Encode(&header.IPv6FragmentFields{
-		M:              more,
-		FragmentOffset: uint16(offset / header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit),
-		Identification: id,
-		NextHeader:     uint8(transportProto),
-	})
+	nextHeader, _ := s.Serialize(transportProto, fragmentIPHeaders[originalIPHeadersLength:])
+
+	fragmentIPHeaders.SetNextHeader(nextHeader)
+	fragmentIPHeaders.SetPayloadLength(uint16(copied + fragmentIPHeadersLength - header.IPv6MinimumSize))
 
 	return fragPkt, more
 }
diff --git a/pkg/tcpip/network/ipv6/ipv6_test.go b/pkg/tcpip/network/ipv6/ipv6_test.go
index 297868f24..5f07d3af8 100644
--- a/pkg/tcpip/network/ipv6/ipv6_test.go
+++ b/pkg/tcpip/network/ipv6/ipv6_test.go
@@ -18,12 +18,14 @@ import (
 	"encoding/hex"
 	"fmt"
 	"math"
+	"net"
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/checker"
+	"gvisor.dev/gvisor/pkg/tcpip/faketime"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
 	"gvisor.dev/gvisor/pkg/tcpip/network/testutil"
@@ -49,6 +51,7 @@ const (
 	fragmentExtHdrID    = uint8(header.IPv6FragmentExtHdrIdentifier)
 	destinationExtHdrID = uint8(header.IPv6DestinationOptionsExtHdrIdentifier)
 	noNextHdrID         = uint8(header.IPv6NoNextHeaderIdentifier)
+	unknownHdrID        = uint8(header.IPv6UnknownExtHdrIdentifier)
 
 	extraHeaderReserve = 50
 )
@@ -66,18 +69,18 @@ func testReceiveICMP(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst
 	payloadLength := hdr.UsedLength()
 	ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 	ip.Encode(&header.IPv6Fields{
-		PayloadLength: uint16(payloadLength),
-		NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-		HopLimit:      255,
-		SrcAddr:       src,
-		DstAddr:       dst,
+		PayloadLength:     uint16(payloadLength),
+		TransportProtocol: header.ICMPv6ProtocolNumber,
+		HopLimit:          255,
+		SrcAddr:           src,
+		DstAddr:           dst,
 	})
 
 	e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 		Data: hdr.View().ToVectorisedView(),
 	}))
 
-	stats := s.Stats().ICMP.V6PacketsReceived
+	stats := s.Stats().ICMP.V6.PacketsReceived
 
 	if got := stats.NeighborAdvert.Value(); got != want {
 		t.Fatalf("got NeighborAdvert = %d, want = %d", got, want)
@@ -124,11 +127,11 @@ func testReceiveUDP(t *testing.T, s *stack.Stack, e *channel.Endpoint, src, dst
 	payloadLength := hdr.UsedLength()
 	ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 	ip.Encode(&header.IPv6Fields{
-		PayloadLength: uint16(payloadLength),
-		NextHeader:    uint8(udp.ProtocolNumber),
-		HopLimit:      255,
-		SrcAddr:       src,
-		DstAddr:       dst,
+		PayloadLength:     uint16(payloadLength),
+		TransportProtocol: udp.ProtocolNumber,
+		HopLimit:          255,
+		SrcAddr:           src,
+		DstAddr:           dst,
 	})
 
 	e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -238,7 +241,7 @@ func TestReceiveOnAllNodesMulticastAddr(t *testing.T) {
 				NetworkProtocols:   []stack.NetworkProtocolFactory{NewProtocol},
 				TransportProtocols: []stack.TransportProtocolFactory{test.protocolFactory},
 			})
-			e := channel.New(10, 1280, linkAddr1)
+			e := channel.New(10, header.IPv6MinimumMTU, linkAddr1)
 			if err := s.CreateNIC(1, e); err != nil {
 				t.Fatalf("CreateNIC(_) = %s", err)
 			}
@@ -271,7 +274,7 @@ func TestReceiveOnSolicitedNodeAddr(t *testing.T) {
 				NetworkProtocols:   []stack.NetworkProtocolFactory{NewProtocol},
 				TransportProtocols: []stack.TransportProtocolFactory{test.protocolFactory},
 			})
-			e := channel.New(1, 1280, linkAddr1)
+			e := channel.New(1, header.IPv6MinimumMTU, linkAddr1)
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
 			}
@@ -571,6 +574,33 @@ func TestReceiveIPv6ExtHdrs(t *testing.T) {
 			expectICMP:   false,
 		},
 		{
+			name: "unknown next header (first)",
+			extHdr: func(nextHdr uint8) ([]byte, uint8) {
+				return []byte{
+					nextHdr, 0, 63, 4, 1, 2, 3, 4,
+				}, unknownHdrID
+			},
+			shouldAccept: false,
+			expectICMP:   true,
+			ICMPType:     header.ICMPv6ParamProblem,
+			ICMPCode:     header.ICMPv6UnknownHeader,
+			pointer:      header.IPv6NextHeaderOffset,
+		},
+		{
+			name: "unknown next header (not first)",
+			extHdr: func(nextHdr uint8) ([]byte, uint8) {
+				return []byte{
+					unknownHdrID, 0,
+					63, 4, 1, 2, 3, 4,
+				}, hopByHopExtHdrID
+			},
+			shouldAccept: false,
+			expectICMP:   true,
+			ICMPType:     header.ICMPv6ParamProblem,
+			ICMPCode:     header.ICMPv6UnknownHeader,
+			pointer:      header.IPv6FixedHeaderSize,
+		},
+		{
 			name: "destination with unknown option skippable action",
 			extHdr: func(nextHdr uint8) ([]byte, uint8) {
 				return []byte{
@@ -753,11 +783,6 @@ func TestReceiveIPv6ExtHdrs(t *testing.T) {
 			pointer:      header.IPv6FixedHeaderSize,
 		},
 		{
-			name:         "No next header",
-			extHdr:       func(nextHdr uint8) ([]byte, uint8) { return []byte{}, noNextHdrID },
-			shouldAccept: false,
-		},
-		{
 			name: "hopbyhop (with skippable unknown) - routing - atomic fragment - destination (with skippable unknown)",
 			extHdr: func(nextHdr uint8) ([]byte, uint8) {
 				return []byte{
@@ -825,7 +850,7 @@ func TestReceiveIPv6ExtHdrs(t *testing.T) {
 				NetworkProtocols:   []stack.NetworkProtocolFactory{NewProtocol},
 				TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
 			})
-			e := channel.New(1, 1280, linkAddr1)
+			e := channel.New(1, header.IPv6MinimumMTU, linkAddr1)
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
 			}
@@ -871,7 +896,13 @@ func TestReceiveIPv6ExtHdrs(t *testing.T) {
 				Length:  uint16(udpLength),
 			})
 			copy(u.Payload(), udpPayload)
-			sum := header.PseudoHeaderChecksum(udp.ProtocolNumber, addr1, addr2, uint16(udpLength))
+
+			dstAddr := tcpip.Address(addr2)
+			if test.multicast {
+				dstAddr = header.IPv6AllNodesMulticastAddress
+			}
+
+			sum := header.PseudoHeaderChecksum(udp.ProtocolNumber, addr1, dstAddr, uint16(udpLength))
 			sum = header.Checksum(udpPayload, sum)
 			u.SetChecksum(^u.CalculateChecksum(sum))
 
@@ -882,16 +913,14 @@ func TestReceiveIPv6ExtHdrs(t *testing.T) {
 			// Serialize IPv6 fixed header.
 			payloadLength := hdr.UsedLength()
 			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
-			dstAddr := tcpip.Address(addr2)
-			if test.multicast {
-				dstAddr = header.IPv6AllNodesMulticastAddress
-			}
 			ip.Encode(&header.IPv6Fields{
 				PayloadLength: uint16(payloadLength),
-				NextHeader:    ipv6NextHdr,
-				HopLimit:      255,
-				SrcAddr:       addr1,
-				DstAddr:       dstAddr,
+				// We're lying about transport protocol here to be able to generate
+				// raw extension headers from the test definitions.
+				TransportProtocol: tcpip.TransportProtocolNumber(ipv6NextHdr),
+				HopLimit:          255,
+				SrcAddr:           addr1,
+				DstAddr:           dstAddr,
 			})
 
 			e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -980,9 +1009,10 @@ func TestReceiveIPv6Fragments(t *testing.T) {
 		udpPayload2Length = 128
 		// Used to test cases where the fragment blocks are not a multiple of
 		// the fragment block size of 8 (RFC 8200 section 4.5).
-		udpPayload3Length = 127
-		udpPayload4Length = header.IPv6MaximumPayloadSize - header.UDPMinimumSize
-		fragmentExtHdrLen = 8
+		udpPayload3Length     = 127
+		udpPayload4Length     = header.IPv6MaximumPayloadSize - header.UDPMinimumSize
+		udpMaximumSizeMinus15 = header.UDPMaximumSize - 15
+		fragmentExtHdrLen     = 8
 		// Note, not all routing extension headers will be 8 bytes but this test
 		// uses 8 byte routing extension headers for most sub tests.
 		routingExtHdrLen = 8
@@ -1326,14 +1356,14 @@ func TestReceiveIPv6Fragments(t *testing.T) {
 					dstAddr: addr2,
 					nextHdr: fragmentExtHdrID,
 					data: buffer.NewVectorisedView(
-						fragmentExtHdrLen+65520,
+						fragmentExtHdrLen+udpMaximumSizeMinus15,
 						[]buffer.View{
 							// Fragment extension header.
 							//
 							// Fragment offset = 0, More = true, ID = 1
 							buffer.View([]byte{uint8(header.UDPProtocolNumber), 0, 0, 1, 0, 0, 0, 1}),
 
-							ipv6Payload4Addr1ToAddr2[:65520],
+							ipv6Payload4Addr1ToAddr2[:udpMaximumSizeMinus15],
 						},
 					),
 				},
@@ -1342,14 +1372,17 @@ func TestReceiveIPv6Fragments(t *testing.T) {
 					dstAddr: addr2,
 					nextHdr: fragmentExtHdrID,
 					data: buffer.NewVectorisedView(
-						fragmentExtHdrLen+len(ipv6Payload4Addr1ToAddr2)-65520,
+						fragmentExtHdrLen+len(ipv6Payload4Addr1ToAddr2)-udpMaximumSizeMinus15,
 						[]buffer.View{
 							// Fragment extension header.
 							//
-							// Fragment offset = 8190, More = false, ID = 1
-							buffer.View([]byte{uint8(header.UDPProtocolNumber), 0, 255, 240, 0, 0, 0, 1}),
+							// Fragment offset = udpMaximumSizeMinus15/8, More = false, ID = 1
+							buffer.View([]byte{uint8(header.UDPProtocolNumber), 0,
+								udpMaximumSizeMinus15 >> 8,
+								udpMaximumSizeMinus15 & 0xff,
+								0, 0, 0, 1}),
 
-							ipv6Payload4Addr1ToAddr2[65520:],
+							ipv6Payload4Addr1ToAddr2[udpMaximumSizeMinus15:],
 						},
 					),
 				},
@@ -1357,6 +1390,47 @@ func TestReceiveIPv6Fragments(t *testing.T) {
 			expectedPayloads: [][]byte{udpPayload4Addr1ToAddr2},
 		},
 		{
+			name: "Two fragments with MF flag reassembled into a maximum UDP packet",
+			fragments: []fragmentData{
+				{
+					srcAddr: addr1,
+					dstAddr: addr2,
+					nextHdr: fragmentExtHdrID,
+					data: buffer.NewVectorisedView(
+						fragmentExtHdrLen+udpMaximumSizeMinus15,
+						[]buffer.View{
+							// Fragment extension header.
+							//
+							// Fragment offset = 0, More = true, ID = 1
+							buffer.View([]byte{uint8(header.UDPProtocolNumber), 0, 0, 1, 0, 0, 0, 1}),
+
+							ipv6Payload4Addr1ToAddr2[:udpMaximumSizeMinus15],
+						},
+					),
+				},
+				{
+					srcAddr: addr1,
+					dstAddr: addr2,
+					nextHdr: fragmentExtHdrID,
+					data: buffer.NewVectorisedView(
+						fragmentExtHdrLen+len(ipv6Payload4Addr1ToAddr2)-udpMaximumSizeMinus15,
+						[]buffer.View{
+							// Fragment extension header.
+							//
+							// Fragment offset = udpMaximumSizeMinus15/8, More = true, ID = 1
+							buffer.View([]byte{uint8(header.UDPProtocolNumber), 0,
+								udpMaximumSizeMinus15 >> 8,
+								(udpMaximumSizeMinus15 & 0xff) + 1,
+								0, 0, 0, 1}),
+
+							ipv6Payload4Addr1ToAddr2[udpMaximumSizeMinus15:],
+						},
+					),
+				},
+			},
+			expectedPayloads: nil,
+		},
+		{
 			name: "Two fragments with per-fragment routing header with zero segments left",
 			fragments: []fragmentData{
 				{
@@ -1844,7 +1918,7 @@ func TestReceiveIPv6Fragments(t *testing.T) {
 				NetworkProtocols:   []stack.NetworkProtocolFactory{NewProtocol},
 				TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
 			})
-			e := channel.New(0, 1280, linkAddr1)
+			e := channel.New(0, header.IPv6MinimumMTU, linkAddr1)
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
 			}
@@ -1875,10 +1949,12 @@ func TestReceiveIPv6Fragments(t *testing.T) {
 				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
 					PayloadLength: uint16(f.data.Size()),
-					NextHeader:    f.nextHdr,
-					HopLimit:      255,
-					SrcAddr:       f.srcAddr,
-					DstAddr:       f.dstAddr,
+					// We're lying about transport protocol here so that we can generate
+					// raw extension headers for the tests.
+					TransportProtocol: tcpip.TransportProtocolNumber(f.nextHdr),
+					HopLimit:          255,
+					SrcAddr:           f.srcAddr,
+					DstAddr:           f.dstAddr,
 				})
 
 				vv := hdr.View().ToVectorisedView()
@@ -1912,16 +1988,19 @@ func TestReceiveIPv6Fragments(t *testing.T) {
 
 func TestInvalidIPv6Fragments(t *testing.T) {
 	const (
-		nicID             = 1
-		fragmentExtHdrLen = 8
+		addr1     = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01"
+		addr2     = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
+		linkAddr1 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e")
+		nicID     = 1
+		hoplimit  = 255
+		ident     = 1
+		data      = "TEST_INVALID_IPV6_FRAGMENTS"
 	)
 
-	payloadGen := func(payloadLen int) []byte {
-		payload := make([]byte, payloadLen)
-		for i := 0; i < len(payload); i++ {
-			payload[i] = 0x30
-		}
-		return payload
+	type fragmentData struct {
+		ipv6Fields         header.IPv6Fields
+		ipv6FragmentFields header.IPv6SerializableFragmentExtHdr
+		payload            []byte
 	}
 
 	tests := []struct {
@@ -1929,31 +2008,62 @@ func TestInvalidIPv6Fragments(t *testing.T) {
 		fragments              []fragmentData
 		wantMalformedIPPackets uint64
 		wantMalformedFragments uint64
+		expectICMP             bool
+		expectICMPType         header.ICMPv6Type
+		expectICMPCode         header.ICMPv6Code
+		expectICMPTypeSpecific uint32
 	}{
 		{
+			name: "fragment size is not a multiple of 8 and the M flag is true",
+			fragments: []fragmentData{
+				{
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     header.IPv6FragmentHeaderSize + 9,
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: 0 >> 3,
+						M:              true,
+						Identification: ident,
+					},
+					payload: []byte(data)[:9],
+				},
+			},
+			wantMalformedIPPackets: 1,
+			wantMalformedFragments: 1,
+			expectICMP:             true,
+			expectICMPType:         header.ICMPv6ParamProblem,
+			expectICMPCode:         header.ICMPv6ErroneousHeader,
+			expectICMPTypeSpecific: header.IPv6PayloadLenOffset,
+		},
+		{
 			name: "fragments reassembled into a payload exceeding the max IPv6 payload size",
 			fragments: []fragmentData{
 				{
-					srcAddr: addr1,
-					dstAddr: addr2,
-					nextHdr: fragmentExtHdrID,
-					data: buffer.NewVectorisedView(
-						fragmentExtHdrLen+(header.IPv6MaximumPayloadSize+1)-16,
-						[]buffer.View{
-							// Fragment extension header.
-							// Fragment offset = 8190, More = false, ID = 1
-							buffer.View([]byte{uint8(header.UDPProtocolNumber), 0,
-								((header.IPv6MaximumPayloadSize + 1) - 16) >> 8,
-								((header.IPv6MaximumPayloadSize + 1) - 16) & math.MaxUint8,
-								0, 0, 0, 1}),
-							// Payload length = 16
-							payloadGen(16),
-						},
-					),
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     header.IPv6FragmentHeaderSize + 16,
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: ((header.IPv6MaximumPayloadSize + 1) - 16) >> 3,
+						M:              false,
+						Identification: ident,
+					},
+					payload: []byte(data)[:16],
 				},
 			},
 			wantMalformedIPPackets: 1,
 			wantMalformedFragments: 1,
+			expectICMP:             true,
+			expectICMPType:         header.ICMPv6ParamProblem,
+			expectICMPCode:         header.ICMPv6ErroneousHeader,
+			expectICMPTypeSpecific: header.IPv6MinimumSize + 2, /* offset for 'Fragment Offset' in the fragment header */
 		},
 	}
 
@@ -1964,33 +2074,39 @@ func TestInvalidIPv6Fragments(t *testing.T) {
 					NewProtocol,
 				},
 			})
-			e := channel.New(0, 1500, linkAddr1)
+			e := channel.New(1, 1500, linkAddr1)
 			if err := s.CreateNIC(nicID, e); err != nil {
 				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
 			}
 			if err := s.AddAddress(nicID, ProtocolNumber, addr2); err != nil {
 				t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, addr2, err)
 			}
+			s.SetRouteTable([]tcpip.Route{{
+				Destination: header.IPv6EmptySubnet,
+				NIC:         nicID,
+			}})
 
+			var expectICMPPayload buffer.View
 			for _, f := range test.fragments {
-				hdr := buffer.NewPrependable(header.IPv6MinimumSize)
+				hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.IPv6FragmentHeaderSize)
 
-				// Serialize IPv6 fixed header.
-				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
-				ip.Encode(&header.IPv6Fields{
-					PayloadLength: uint16(f.data.Size()),
-					NextHeader:    f.nextHdr,
-					HopLimit:      255,
-					SrcAddr:       f.srcAddr,
-					DstAddr:       f.dstAddr,
-				})
+				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize + header.IPv6FragmentHeaderSize))
+				encodeArgs := f.ipv6Fields
+				encodeArgs.ExtensionHeaders = append(encodeArgs.ExtensionHeaders, &f.ipv6FragmentFields)
+				ip.Encode(&encodeArgs)
 
 				vv := hdr.View().ToVectorisedView()
-				vv.Append(f.data)
+				vv.AppendView(f.payload)
 
-				e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
+				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 					Data: vv,
-				}))
+				})
+
+				if test.expectICMP {
+					expectICMPPayload = stack.PayloadSince(pkt.NetworkHeader())
+				}
+
+				e.InjectInbound(ProtocolNumber, pkt)
 			}
 
 			if got, want := s.Stats().IP.MalformedPacketsReceived.Value(), test.wantMalformedIPPackets; got != want {
@@ -1999,6 +2115,280 @@ func TestInvalidIPv6Fragments(t *testing.T) {
 			if got, want := s.Stats().IP.MalformedFragmentsReceived.Value(), test.wantMalformedFragments; got != want {
 				t.Errorf("got Stats.IP.MalformedFragmentsReceived = %d, want = %d", got, want)
 			}
+
+			reply, ok := e.Read()
+			if !test.expectICMP {
+				if ok {
+					t.Fatalf("unexpected ICMP error message received: %#v", reply)
+				}
+				return
+			}
+			if !ok {
+				t.Fatal("expected ICMP error message missing")
+			}
+
+			checker.IPv6(t, stack.PayloadSince(reply.Pkt.NetworkHeader()),
+				checker.SrcAddr(addr2),
+				checker.DstAddr(addr1),
+				checker.IPFullLength(uint16(header.IPv6MinimumSize+header.ICMPv6MinimumSize+expectICMPPayload.Size())),
+				checker.ICMPv6(
+					checker.ICMPv6Type(test.expectICMPType),
+					checker.ICMPv6Code(test.expectICMPCode),
+					checker.ICMPv6TypeSpecific(test.expectICMPTypeSpecific),
+					checker.ICMPv6Payload([]byte(expectICMPPayload)),
+				),
+			)
+		})
+	}
+}
+
+func TestFragmentReassemblyTimeout(t *testing.T) {
+	const (
+		addr1     = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01"
+		addr2     = "\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
+		linkAddr1 = tcpip.LinkAddress("\x0a\x0b\x0c\x0d\x0e\x0e")
+		nicID     = 1
+		hoplimit  = 255
+		ident     = 1
+		data      = "TEST_FRAGMENT_REASSEMBLY_TIMEOUT"
+	)
+
+	type fragmentData struct {
+		ipv6Fields         header.IPv6Fields
+		ipv6FragmentFields header.IPv6SerializableFragmentExtHdr
+		payload            []byte
+	}
+
+	tests := []struct {
+		name       string
+		fragments  []fragmentData
+		expectICMP bool
+	}{
+		{
+			name: "first fragment only",
+			fragments: []fragmentData{
+				{
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     header.IPv6FragmentHeaderSize + 16,
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: 0,
+						M:              true,
+						Identification: ident,
+					},
+					payload: []byte(data)[:16],
+				},
+			},
+			expectICMP: true,
+		},
+		{
+			name: "two first fragments",
+			fragments: []fragmentData{
+				{
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     header.IPv6FragmentHeaderSize + 16,
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: 0,
+						M:              true,
+						Identification: ident,
+					},
+					payload: []byte(data)[:16],
+				},
+				{
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     header.IPv6FragmentHeaderSize + 16,
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: 0,
+						M:              true,
+						Identification: ident,
+					},
+					payload: []byte(data)[:16],
+				},
+			},
+			expectICMP: true,
+		},
+		{
+			name: "second fragment only",
+			fragments: []fragmentData{
+				{
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     uint16(header.IPv6FragmentHeaderSize + len(data) - 16),
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: 8,
+						M:              false,
+						Identification: ident,
+					},
+					payload: []byte(data)[16:],
+				},
+			},
+			expectICMP: false,
+		},
+		{
+			name: "two fragments with a gap",
+			fragments: []fragmentData{
+				{
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     header.IPv6FragmentHeaderSize + 16,
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: 0,
+						M:              true,
+						Identification: ident,
+					},
+					payload: []byte(data)[:16],
+				},
+				{
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     uint16(header.IPv6FragmentHeaderSize + len(data) - 16),
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: 8,
+						M:              false,
+						Identification: ident,
+					},
+					payload: []byte(data)[16:],
+				},
+			},
+			expectICMP: true,
+		},
+		{
+			name: "two fragments with a gap in reverse order",
+			fragments: []fragmentData{
+				{
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     uint16(header.IPv6FragmentHeaderSize + len(data) - 16),
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: 8,
+						M:              false,
+						Identification: ident,
+					},
+					payload: []byte(data)[16:],
+				},
+				{
+					ipv6Fields: header.IPv6Fields{
+						PayloadLength:     header.IPv6FragmentHeaderSize + 16,
+						TransportProtocol: header.UDPProtocolNumber,
+						HopLimit:          hoplimit,
+						SrcAddr:           addr1,
+						DstAddr:           addr2,
+					},
+					ipv6FragmentFields: header.IPv6SerializableFragmentExtHdr{
+						FragmentOffset: 0,
+						M:              true,
+						Identification: ident,
+					},
+					payload: []byte(data)[:16],
+				},
+			},
+			expectICMP: true,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			clock := faketime.NewManualClock()
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocolFactory{
+					NewProtocol,
+				},
+				Clock: clock,
+			})
+
+			e := channel.New(1, 1500, linkAddr1)
+			if err := s.CreateNIC(nicID, e); err != nil {
+				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+			}
+			if err := s.AddAddress(nicID, ProtocolNumber, addr2); err != nil {
+				t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv6ProtocolNumber, addr2, err)
+			}
+			s.SetRouteTable([]tcpip.Route{{
+				Destination: header.IPv6EmptySubnet,
+				NIC:         nicID,
+			}})
+
+			var firstFragmentSent buffer.View
+			for _, f := range test.fragments {
+				hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.IPv6FragmentHeaderSize)
+
+				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize + header.IPv6FragmentHeaderSize))
+				encodeArgs := f.ipv6Fields
+				encodeArgs.ExtensionHeaders = append(encodeArgs.ExtensionHeaders, &f.ipv6FragmentFields)
+				ip.Encode(&encodeArgs)
+
+				fragHDR := header.IPv6Fragment(hdr.View()[header.IPv6MinimumSize:])
+
+				vv := hdr.View().ToVectorisedView()
+				vv.AppendView(f.payload)
+
+				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+					Data: vv,
+				})
+
+				if firstFragmentSent == nil && fragHDR.FragmentOffset() == 0 {
+					firstFragmentSent = stack.PayloadSince(pkt.NetworkHeader())
+				}
+
+				e.InjectInbound(ProtocolNumber, pkt)
+			}
+
+			clock.Advance(ReassembleTimeout)
+
+			reply, ok := e.Read()
+			if !test.expectICMP {
+				if ok {
+					t.Fatalf("unexpected ICMP error message received: %#v", reply)
+				}
+				return
+			}
+			if !ok {
+				t.Fatal("expected ICMP error message missing")
+			}
+			if firstFragmentSent == nil {
+				t.Fatalf("unexpected ICMP error message received: %#v", reply)
+			}
+
+			checker.IPv6(t, stack.PayloadSince(reply.Pkt.NetworkHeader()),
+				checker.SrcAddr(addr2),
+				checker.DstAddr(addr1),
+				checker.IPFullLength(uint16(header.IPv6MinimumSize+header.ICMPv6MinimumSize+firstFragmentSent.Size())),
+				checker.ICMPv6(
+					checker.ICMPv6Type(header.ICMPv6TimeExceeded),
+					checker.ICMPv6Code(header.ICMPv6ReassemblyTimeout),
+					checker.ICMPv6Payload([]byte(firstFragmentSent)),
+				),
+			)
 		})
 	}
 }
@@ -2035,13 +2425,10 @@ func TestWriteStats(t *testing.T) {
 				// Install Output DROP rule.
 				t.Helper()
 				ipt := stk.IPTables()
-				filter, ok := ipt.GetTable(stack.FilterTable, true /* ipv6 */)
-				if !ok {
-					t.Fatalf("failed to find filter table")
-				}
+				filter := ipt.GetTable(stack.FilterID, true /* ipv6 */)
 				ruleIdx := filter.BuiltinChains[stack.Output]
 				filter.Rules[ruleIdx].Target = &stack.DropTarget{}
-				if err := ipt.ReplaceTable(stack.FilterTable, filter, true /* ipv6 */); err != nil {
+				if err := ipt.ReplaceTable(stack.FilterID, filter, true /* ipv6 */); err != nil {
 					t.Fatalf("failed to replace table: %v", err)
 				}
 			},
@@ -2056,17 +2443,14 @@ func TestWriteStats(t *testing.T) {
 				// of the 3 packets.
 				t.Helper()
 				ipt := stk.IPTables()
-				filter, ok := ipt.GetTable(stack.FilterTable, true /* ipv6 */)
-				if !ok {
-					t.Fatalf("failed to find filter table")
-				}
+				filter := ipt.GetTable(stack.FilterID, true /* ipv6 */)
 				// We'll match and DROP the last packet.
 				ruleIdx := filter.BuiltinChains[stack.Output]
 				filter.Rules[ruleIdx].Target = &stack.DropTarget{}
 				filter.Rules[ruleIdx].Matchers = []stack.Matcher{&limitedMatcher{nPackets - 1}}
 				// Make sure the next rule is ACCEPT.
 				filter.Rules[ruleIdx+1].Target = &stack.AcceptTarget{}
-				if err := ipt.ReplaceTable(stack.FilterTable, filter, true /* ipv6 */); err != nil {
+				if err := ipt.ReplaceTable(stack.FilterID, filter, true /* ipv6 */); err != nil {
 					t.Fatalf("failed to replace table: %v", err)
 				}
 			},
@@ -2119,7 +2503,7 @@ func TestWriteStats(t *testing.T) {
 
 					test.setup(t, rt.Stack())
 
-					nWritten, _ := writer.writePackets(&rt, pkts)
+					nWritten, _ := writer.writePackets(rt, pkts)
 
 					if got := int(rt.Stats().IP.PacketsSent.Value()); got != test.expectSent {
 						t.Errorf("sent %d packets, but expected to send %d", got, test.expectSent)
@@ -2136,7 +2520,7 @@ func TestWriteStats(t *testing.T) {
 	}
 }
 
-func buildRoute(t *testing.T, ep stack.LinkEndpoint) stack.Route {
+func buildRoute(t *testing.T, ep stack.LinkEndpoint) *stack.Route {
 	s := stack.New(stack.Options{
 		NetworkProtocols: []stack.NetworkProtocolFactory{NewProtocol},
 	})
@@ -2230,8 +2614,8 @@ var fragmentationTests = []struct {
 	wantFragments []fragmentInfo
 }{
 	{
-		description: "No Fragmentation",
-		mtu:         1280,
+		description: "No fragmentation",
+		mtu:         header.IPv6MinimumMTU,
 		gso:         nil,
 		transHdrLen: 0,
 		payloadSize: 1000,
@@ -2241,7 +2625,18 @@ var fragmentationTests = []struct {
 	},
 	{
 		description: "Fragmented",
-		mtu:         1280,
+		mtu:         header.IPv6MinimumMTU,
+		gso:         nil,
+		transHdrLen: 0,
+		payloadSize: 2000,
+		wantFragments: []fragmentInfo{
+			{offset: 0, payloadSize: 1240, more: true},
+			{offset: 154, payloadSize: 776, more: false},
+		},
+	},
+	{
+		description: "Fragmented with mtu not a multiple of 8",
+		mtu:         header.IPv6MinimumMTU + 1,
 		gso:         nil,
 		transHdrLen: 0,
 		payloadSize: 2000,
@@ -2262,7 +2657,7 @@ var fragmentationTests = []struct {
 	},
 	{
 		description: "Fragmented with gso none",
-		mtu:         1280,
+		mtu:         header.IPv6MinimumMTU,
 		gso:         &stack.GSO{Type: stack.GSONone},
 		transHdrLen: 0,
 		payloadSize: 1400,
@@ -2273,7 +2668,7 @@ var fragmentationTests = []struct {
 	},
 	{
 		description: "Fragmented with big header",
-		mtu:         1280,
+		mtu:         header.IPv6MinimumMTU,
 		gso:         nil,
 		transHdrLen: 100,
 		payloadSize: 1200,
@@ -2448,8 +2843,8 @@ func TestFragmentationErrors(t *testing.T) {
 			wantError:      tcpip.ErrAborted,
 		},
 		{
-			description:    "Error on packet with MTU smaller than transport header",
-			mtu:            1280,
+			description:    "Error when MTU is smaller than transport header",
+			mtu:            header.IPv6MinimumMTU,
 			transHdrLen:    1500,
 			payloadSize:    500,
 			allowPackets:   0,
@@ -2457,6 +2852,16 @@ func TestFragmentationErrors(t *testing.T) {
 			mockError:      nil,
 			wantError:      tcpip.ErrMessageTooLong,
 		},
+		{
+			description:    "Error when MTU is smaller than IPv6 minimum MTU",
+			mtu:            header.IPv6MinimumMTU - 1,
+			transHdrLen:    0,
+			payloadSize:    500,
+			allowPackets:   0,
+			outgoingErrors: 1,
+			mockError:      nil,
+			wantError:      tcpip.ErrInvalidEndpointState,
+		},
 	}
 
 	for _, ft := range tests {
@@ -2481,3 +2886,160 @@ func TestFragmentationErrors(t *testing.T) {
 		})
 	}
 }
+
+func TestForwarding(t *testing.T) {
+	const (
+		nicID1         = 1
+		nicID2         = 2
+		randomSequence = 123
+		randomIdent    = 42
+	)
+
+	ipv6Addr1 := tcpip.AddressWithPrefix{
+		Address:   tcpip.Address(net.ParseIP("10::1").To16()),
+		PrefixLen: 64,
+	}
+	ipv6Addr2 := tcpip.AddressWithPrefix{
+		Address:   tcpip.Address(net.ParseIP("11::1").To16()),
+		PrefixLen: 64,
+	}
+	remoteIPv6Addr1 := tcpip.Address(net.ParseIP("10::2").To16())
+	remoteIPv6Addr2 := tcpip.Address(net.ParseIP("11::2").To16())
+
+	tests := []struct {
+		name            string
+		TTL             uint8
+		expectErrorICMP bool
+	}{
+		{
+			name:            "TTL of zero",
+			TTL:             0,
+			expectErrorICMP: true,
+		},
+		{
+			name:            "TTL of one",
+			TTL:             1,
+			expectErrorICMP: true,
+		},
+		{
+			name:            "TTL of two",
+			TTL:             2,
+			expectErrorICMP: false,
+		},
+		{
+			name:            "TTL of three",
+			TTL:             3,
+			expectErrorICMP: false,
+		},
+		{
+			name:            "Max TTL",
+			TTL:             math.MaxUint8,
+			expectErrorICMP: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols:   []stack.NetworkProtocolFactory{NewProtocol},
+				TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol6},
+			})
+			// We expect at most a single packet in response to our ICMP Echo Request.
+			e1 := channel.New(1, header.IPv6MinimumMTU, "")
+			if err := s.CreateNIC(nicID1, e1); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s", nicID1, err)
+			}
+			ipv6ProtoAddr1 := tcpip.ProtocolAddress{Protocol: ProtocolNumber, AddressWithPrefix: ipv6Addr1}
+			if err := s.AddProtocolAddress(nicID1, ipv6ProtoAddr1); err != nil {
+				t.Fatalf("AddProtocolAddress(%d, %#v): %s", nicID1, ipv6ProtoAddr1, err)
+			}
+
+			e2 := channel.New(1, header.IPv6MinimumMTU, "")
+			if err := s.CreateNIC(nicID2, e2); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s", nicID2, err)
+			}
+			ipv6ProtoAddr2 := tcpip.ProtocolAddress{Protocol: ProtocolNumber, AddressWithPrefix: ipv6Addr2}
+			if err := s.AddProtocolAddress(nicID2, ipv6ProtoAddr2); err != nil {
+				t.Fatalf("AddProtocolAddress(%d, %#v): %s", nicID2, ipv6ProtoAddr2, err)
+			}
+
+			s.SetRouteTable([]tcpip.Route{
+				{
+					Destination: ipv6Addr1.Subnet(),
+					NIC:         nicID1,
+				},
+				{
+					Destination: ipv6Addr2.Subnet(),
+					NIC:         nicID2,
+				},
+			})
+
+			if err := s.SetForwarding(ProtocolNumber, true); err != nil {
+				t.Fatalf("SetForwarding(%d, true): %s", ProtocolNumber, err)
+			}
+
+			hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.ICMPv6MinimumSize)
+			icmp := header.ICMPv6(hdr.Prepend(header.ICMPv6MinimumSize))
+			icmp.SetIdent(randomIdent)
+			icmp.SetSequence(randomSequence)
+			icmp.SetType(header.ICMPv6EchoRequest)
+			icmp.SetCode(header.ICMPv6UnusedCode)
+			icmp.SetChecksum(0)
+			icmp.SetChecksum(header.ICMPv6Checksum(icmp, remoteIPv6Addr1, remoteIPv6Addr2, buffer.VectorisedView{}))
+			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
+			ip.Encode(&header.IPv6Fields{
+				PayloadLength:     header.ICMPv6MinimumSize,
+				TransportProtocol: header.ICMPv6ProtocolNumber,
+				HopLimit:          test.TTL,
+				SrcAddr:           remoteIPv6Addr1,
+				DstAddr:           remoteIPv6Addr2,
+			})
+			requestPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+				Data: hdr.View().ToVectorisedView(),
+			})
+			e1.InjectInbound(ProtocolNumber, requestPkt)
+
+			if test.expectErrorICMP {
+				reply, ok := e1.Read()
+				if !ok {
+					t.Fatal("expected ICMP Hop Limit Exceeded packet through incoming NIC")
+				}
+
+				checker.IPv6(t, header.IPv6(stack.PayloadSince(reply.Pkt.NetworkHeader())),
+					checker.SrcAddr(ipv6Addr1.Address),
+					checker.DstAddr(remoteIPv6Addr1),
+					checker.TTL(DefaultTTL),
+					checker.ICMPv6(
+						checker.ICMPv6Type(header.ICMPv6TimeExceeded),
+						checker.ICMPv6Code(header.ICMPv6HopLimitExceeded),
+						checker.ICMPv6Payload([]byte(hdr.View())),
+					),
+				)
+
+				if n := e2.Drain(); n != 0 {
+					t.Fatalf("got e2.Drain() = %d, want = 0", n)
+				}
+			} else {
+				reply, ok := e2.Read()
+				if !ok {
+					t.Fatal("expected ICMP Echo Request packet through outgoing NIC")
+				}
+
+				checker.IPv6(t, header.IPv6(stack.PayloadSince(reply.Pkt.NetworkHeader())),
+					checker.SrcAddr(remoteIPv6Addr1),
+					checker.DstAddr(remoteIPv6Addr2),
+					checker.TTL(test.TTL-1),
+					checker.ICMPv6(
+						checker.ICMPv6Type(header.ICMPv6EchoRequest),
+						checker.ICMPv6Code(header.ICMPv6UnusedCode),
+						checker.ICMPv6Payload(nil),
+					),
+				)
+
+				if n := e1.Drain(); n != 0 {
+					t.Fatalf("got e1.Drain() = %d, want = 0", n)
+				}
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/network/ipv6/mld.go b/pkg/tcpip/network/ipv6/mld.go
new file mode 100644
index 000000000..6f64b8462
--- /dev/null
+++ b/pkg/tcpip/network/ipv6/mld.go
@@ -0,0 +1,258 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ipv6
+
+import (
+	"fmt"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ip"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+const (
+	// UnsolicitedReportIntervalMax is the maximum delay between sending
+	// unsolicited MLD reports.
+	//
+	// Obtained from RFC 2710 Section 7.10.
+	UnsolicitedReportIntervalMax = 10 * time.Second
+)
+
+// MLDOptions holds options for MLD.
+type MLDOptions struct {
+	// Enabled indicates whether MLD will be performed.
+	//
+	// When enabled, MLD may transmit MLD report and done messages when
+	// joining and leaving multicast groups respectively, and handle incoming
+	// MLD packets.
+	//
+	// This field is ignored and is always assumed to be false for interfaces
+	// without neighbouring nodes (e.g. loopback).
+	Enabled bool
+}
+
+var _ ip.MulticastGroupProtocol = (*mldState)(nil)
+
+// mldState is the per-interface MLD state.
+//
+// mldState.init MUST be called to initialize the MLD state.
+type mldState struct {
+	// The IPv6 endpoint this mldState is for.
+	ep *endpoint
+
+	genericMulticastProtocol ip.GenericMulticastProtocolState
+}
+
+// SendReport implements ip.MulticastGroupProtocol.
+//
+// Precondition: mld.ep.mu must be read locked.
+func (mld *mldState) SendReport(groupAddress tcpip.Address) (bool, *tcpip.Error) {
+	return mld.writePacket(groupAddress, groupAddress, header.ICMPv6MulticastListenerReport)
+}
+
+// SendLeave implements ip.MulticastGroupProtocol.
+//
+// Precondition: mld.ep.mu must be read locked.
+func (mld *mldState) SendLeave(groupAddress tcpip.Address) *tcpip.Error {
+	_, err := mld.writePacket(header.IPv6AllRoutersMulticastAddress, groupAddress, header.ICMPv6MulticastListenerDone)
+	return err
+}
+
+// init sets up an mldState struct, and is required to be called before using
+// a new mldState.
+//
+// Must only be called once for the lifetime of mld.
+func (mld *mldState) init(ep *endpoint) {
+	mld.ep = ep
+	mld.genericMulticastProtocol.Init(&ep.mu.RWMutex, ip.GenericMulticastProtocolOptions{
+		// No need to perform MLD on loopback interfaces since they don't have
+		// neighbouring nodes.
+		Enabled:                   ep.protocol.options.MLD.Enabled && !mld.ep.nic.IsLoopback(),
+		Rand:                      ep.protocol.stack.Rand(),
+		Clock:                     ep.protocol.stack.Clock(),
+		Protocol:                  mld,
+		MaxUnsolicitedReportDelay: UnsolicitedReportIntervalMax,
+		AllNodesAddress:           header.IPv6AllNodesMulticastAddress,
+	})
+}
+
+// handleMulticastListenerQuery handles a query message.
+//
+// Precondition: mld.ep.mu must be locked.
+func (mld *mldState) handleMulticastListenerQuery(mldHdr header.MLD) {
+	mld.genericMulticastProtocol.HandleQueryLocked(mldHdr.MulticastAddress(), mldHdr.MaximumResponseDelay())
+}
+
+// handleMulticastListenerReport handles a report message.
+//
+// Precondition: mld.ep.mu must be locked.
+func (mld *mldState) handleMulticastListenerReport(mldHdr header.MLD) {
+	mld.genericMulticastProtocol.HandleReportLocked(mldHdr.MulticastAddress())
+}
+
+// joinGroup handles joining a new group and sending and scheduling the required
+// messages.
+//
+// If the group is already joined, returns tcpip.ErrDuplicateAddress.
+//
+// Precondition: mld.ep.mu must be locked.
+func (mld *mldState) joinGroup(groupAddress tcpip.Address) {
+	mld.genericMulticastProtocol.JoinGroupLocked(groupAddress, !mld.ep.Enabled() /* dontInitialize */)
+}
+
+// isInGroup returns true if the specified group has been joined locally.
+//
+// Precondition: mld.ep.mu must be read locked.
+func (mld *mldState) isInGroup(groupAddress tcpip.Address) bool {
+	return mld.genericMulticastProtocol.IsLocallyJoinedRLocked(groupAddress)
+}
+
+// leaveGroup handles removing the group from the membership map, cancels any
+// delay timers associated with that group, and sends the Done message, if
+// required.
+//
+// Precondition: mld.ep.mu must be locked.
+func (mld *mldState) leaveGroup(groupAddress tcpip.Address) *tcpip.Error {
+	// LeaveGroup returns false only if the group was not joined.
+	if mld.genericMulticastProtocol.LeaveGroupLocked(groupAddress) {
+		return nil
+	}
+
+	return tcpip.ErrBadLocalAddress
+}
+
+// softLeaveAll leaves all groups from the perspective of MLD, but remains
+// joined locally.
+//
+// Precondition: mld.ep.mu must be locked.
+func (mld *mldState) softLeaveAll() {
+	mld.genericMulticastProtocol.MakeAllNonMemberLocked()
+}
+
+// initializeAll attemps to initialize the MLD state for each group that has
+// been joined locally.
+//
+// Precondition: mld.ep.mu must be locked.
+func (mld *mldState) initializeAll() {
+	mld.genericMulticastProtocol.InitializeGroupsLocked()
+}
+
+// sendQueuedReports attempts to send any reports that are queued for sending.
+//
+// Precondition: mld.ep.mu must be locked.
+func (mld *mldState) sendQueuedReports() {
+	mld.genericMulticastProtocol.SendQueuedReportsLocked()
+}
+
+// writePacket assembles and sends an MLD packet.
+//
+// Precondition: mld.ep.mu must be read locked.
+func (mld *mldState) writePacket(destAddress, groupAddress tcpip.Address, mldType header.ICMPv6Type) (bool, *tcpip.Error) {
+	sentStats := mld.ep.protocol.stack.Stats().ICMP.V6.PacketsSent
+	var mldStat *tcpip.StatCounter
+	switch mldType {
+	case header.ICMPv6MulticastListenerReport:
+		mldStat = sentStats.MulticastListenerReport
+	case header.ICMPv6MulticastListenerDone:
+		mldStat = sentStats.MulticastListenerDone
+	default:
+		panic(fmt.Sprintf("unrecognized mld type = %d", mldType))
+	}
+
+	icmp := header.ICMPv6(buffer.NewView(header.ICMPv6HeaderSize + header.MLDMinimumSize))
+	icmp.SetType(mldType)
+	header.MLD(icmp.MessageBody()).SetMulticastAddress(groupAddress)
+	// As per RFC 2710 section 3,
+	//
+	//   All MLD messages described in this document are sent with a link-local
+	//   IPv6 Source Address, an IPv6 Hop Limit of 1, and an IPv6 Router Alert
+	//   option in a Hop-by-Hop Options header.
+	//
+	// However, this would cause problems with Duplicate Address Detection with
+	// the first address as MLD snooping switches may not send multicast traffic
+	// that DAD depends on to the node performing DAD without the MLD report, as
+	// documented in RFC 4816:
+	//
+	//   Note that when a node joins a multicast address, it typically sends a
+	//   Multicast Listener Discovery (MLD) report message [RFC2710] [RFC3810]
+	//   for the multicast address. In the case of Duplicate Address
+	//   Detection, the MLD report message is required in order to inform MLD-
+	//   snooping switches, rather than routers, to forward multicast packets.
+	//   In the above description, the delay for joining the multicast address
+	//   thus means delaying transmission of the corresponding MLD report
+	//   message. Since the MLD specifications do not request a random delay
+	//   to avoid race conditions, just delaying Neighbor Solicitation would
+	//   cause congestion by the MLD report messages. The congestion would
+	//   then prevent the MLD-snooping switches from working correctly and, as
+	//   a result, prevent Duplicate Address Detection from working. The
+	//   requirement to include the delay for the MLD report in this case
+	//   avoids this scenario. [RFC3590] also talks about some interaction
+	//   issues between Duplicate Address Detection and MLD, and specifies
+	//   which source address should be used for the MLD report in this case.
+	//
+	// As per RFC 3590 section 4, we should still send out MLD reports with an
+	// unspecified source address if we do not have an assigned link-local
+	// address to use as the source address to ensure DAD works as expected on
+	// networks with MLD snooping switches:
+	//
+	//   MLD Report and Done messages are sent with a link-local address as
+	//   the IPv6 source address, if a valid address is available on the
+	//   interface.  If a valid link-local address is not available (e.g., one
+	//   has not been configured), the message is sent with the unspecified
+	//   address (::) as the IPv6 source address.
+	//
+	//   Once a valid link-local address is available, a node SHOULD generate
+	//   new MLD Report messages for all multicast addresses joined on the
+	//   interface.
+	//
+	//   Routers receiving an MLD Report or Done message with the unspecified
+	//   address as the IPv6 source address MUST silently discard the packet
+	//   without taking any action on the packets contents.
+	//
+	//   Snooping switches MUST manage multicast forwarding state based on MLD
+	//   Report and Done messages sent with the unspecified address as the
+	//   IPv6 source address.
+	localAddress := mld.ep.getLinkLocalAddressRLocked()
+	if len(localAddress) == 0 {
+		localAddress = header.IPv6Any
+	}
+
+	icmp.SetChecksum(header.ICMPv6Checksum(icmp, localAddress, destAddress, buffer.VectorisedView{}))
+
+	extensionHeaders := header.IPv6ExtHdrSerializer{
+		header.IPv6SerializableHopByHopExtHdr{
+			&header.IPv6RouterAlertOption{Value: header.IPv6RouterAlertMLD},
+		},
+	}
+
+	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+		ReserveHeaderBytes: int(mld.ep.MaxHeaderLength()) + extensionHeaders.Length(),
+		Data:               buffer.View(icmp).ToVectorisedView(),
+	})
+
+	mld.ep.addIPHeader(localAddress, destAddress, pkt, stack.NetworkHeaderParams{
+		Protocol: header.ICMPv6ProtocolNumber,
+		TTL:      header.MLDHopLimit,
+	}, extensionHeaders)
+	if err := mld.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv6Address(destAddress), nil /* gso */, ProtocolNumber, pkt); err != nil {
+		sentStats.Dropped.Increment()
+		return false, err
+	}
+	mldStat.Increment()
+	return localAddress != header.IPv6Any, nil
+}
diff --git a/pkg/tcpip/network/ipv6/mld_test.go b/pkg/tcpip/network/ipv6/mld_test.go
new file mode 100644
index 000000000..e2778b656
--- /dev/null
+++ b/pkg/tcpip/network/ipv6/mld_test.go
@@ -0,0 +1,297 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ipv6_test
+
+import (
+	"testing"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/checker"
+	"gvisor.dev/gvisor/pkg/tcpip/faketime"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+const (
+	linkLocalAddr       = "\xfe\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01"
+	globalAddr          = "\x0a\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01"
+	globalMulticastAddr = "\xff\x05\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
+)
+
+var (
+	linkLocalAddrSNMC = header.SolicitedNodeAddr(linkLocalAddr)
+	globalAddrSNMC    = header.SolicitedNodeAddr(globalAddr)
+)
+
+func validateMLDPacket(t *testing.T, p buffer.View, localAddress, remoteAddress tcpip.Address, mldType header.ICMPv6Type, groupAddress tcpip.Address) {
+	t.Helper()
+
+	checker.IPv6WithExtHdr(t, p,
+		checker.IPv6ExtHdr(
+			checker.IPv6HopByHopExtensionHeader(checker.IPv6RouterAlert(header.IPv6RouterAlertMLD)),
+		),
+		checker.SrcAddr(localAddress),
+		checker.DstAddr(remoteAddress),
+		// Hop Limit for an MLD message must be 1 as per RFC 2710 section 3.
+		checker.TTL(1),
+		checker.MLD(mldType, header.MLDMinimumSize,
+			checker.MLDMaxRespDelay(0),
+			checker.MLDMulticastAddress(groupAddress),
+		),
+	)
+}
+
+func TestIPv6JoinLeaveSolicitedNodeAddressPerformsMLD(t *testing.T) {
+	const nicID = 1
+
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+			MLD: ipv6.MLDOptions{
+				Enabled: true,
+			},
+		})},
+	})
+	e := channel.New(1, header.IPv6MinimumMTU, "")
+	if err := s.CreateNIC(nicID, e); err != nil {
+		t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
+	}
+
+	// The stack will join an address's solicited node multicast address when
+	// an address is added. An MLD report message should be sent for the
+	// solicited-node group.
+	if err := s.AddAddress(nicID, ipv6.ProtocolNumber, linkLocalAddr); err != nil {
+		t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ipv6.ProtocolNumber, linkLocalAddr, err)
+	}
+	if p, ok := e.Read(); !ok {
+		t.Fatal("expected a report message to be sent")
+	} else {
+		validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), linkLocalAddr, linkLocalAddrSNMC, header.ICMPv6MulticastListenerReport, linkLocalAddrSNMC)
+	}
+
+	// The stack will leave an address's solicited node multicast address when
+	// an address is removed. An MLD done message should be sent for the
+	// solicited-node group.
+	if err := s.RemoveAddress(nicID, linkLocalAddr); err != nil {
+		t.Fatalf("RemoveAddress(%d, %s) = %s", nicID, linkLocalAddr, err)
+	}
+	if p, ok := e.Read(); !ok {
+		t.Fatal("expected a done message to be sent")
+	} else {
+		validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), header.IPv6Any, header.IPv6AllRoutersMulticastAddress, header.ICMPv6MulticastListenerDone, linkLocalAddrSNMC)
+	}
+}
+
+func TestSendQueuedMLDReports(t *testing.T) {
+	const (
+		nicID      = 1
+		maxReports = 2
+	)
+
+	tests := []struct {
+		name            string
+		dadTransmits    uint8
+		retransmitTimer time.Duration
+	}{
+		{
+			name:            "DAD Disabled",
+			dadTransmits:    0,
+			retransmitTimer: 0,
+		},
+		{
+			name:            "DAD Enabled",
+			dadTransmits:    1,
+			retransmitTimer: time.Second,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			dadResolutionTime := test.retransmitTimer * time.Duration(test.dadTransmits)
+			clock := faketime.NewManualClock()
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+					NDPConfigs: ipv6.NDPConfigurations{
+						DupAddrDetectTransmits: test.dadTransmits,
+						RetransmitTimer:        test.retransmitTimer,
+					},
+					MLD: ipv6.MLDOptions{
+						Enabled: true,
+					},
+				})},
+				Clock: clock,
+			})
+
+			// Allow space for an extra packet so we can observe packets that were
+			// unexpectedly sent.
+			e := channel.New(maxReports+int(test.dadTransmits)+1 /* extra */, header.IPv6MinimumMTU, "")
+			if err := s.CreateNIC(nicID, e); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
+			}
+
+			resolveDAD := func(addr, snmc tcpip.Address) {
+				clock.Advance(dadResolutionTime)
+				if p, ok := e.Read(); !ok {
+					t.Fatal("expected DAD packet")
+				} else {
+					checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
+						checker.SrcAddr(header.IPv6Any),
+						checker.DstAddr(snmc),
+						checker.TTL(header.NDPHopLimit),
+						checker.NDPNS(
+							checker.NDPNSTargetAddress(addr),
+							checker.NDPNSOptions(nil),
+						))
+				}
+			}
+
+			var reportCounter uint64
+			reportStat := s.Stats().ICMP.V6.PacketsSent.MulticastListenerReport
+			if got := reportStat.Value(); got != reportCounter {
+				t.Errorf("got reportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			var doneCounter uint64
+			doneStat := s.Stats().ICMP.V6.PacketsSent.MulticastListenerDone
+			if got := doneStat.Value(); got != doneCounter {
+				t.Errorf("got doneStat.Value() = %d, want = %d", got, doneCounter)
+			}
+
+			// Joining a group without an assigned address should send an MLD report
+			// with the unspecified address.
+			if err := s.JoinGroup(ipv6.ProtocolNumber, nicID, globalMulticastAddr); err != nil {
+				t.Fatalf("JoinGroup(%d, %d, %s): %s", ipv6.ProtocolNumber, nicID, globalMulticastAddr, err)
+			}
+			reportCounter++
+			if got := reportStat.Value(); got != reportCounter {
+				t.Errorf("got reportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); !ok {
+				t.Errorf("expected MLD report for %s", globalMulticastAddr)
+			} else {
+				validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), header.IPv6Any, globalMulticastAddr, header.ICMPv6MulticastListenerReport, globalMulticastAddr)
+			}
+			clock.Advance(time.Hour)
+			if p, ok := e.Read(); ok {
+				t.Errorf("got unexpected packet = %#v", p)
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Adding a global address should not send reports for the already joined
+			// group since we should only send queued reports when a link-local
+			// addres sis assigned.
+			//
+			// Note, we will still expect to send a report for the global address's
+			// solicited node address from the unspecified address as per  RFC 3590
+			// section 4.
+			if err := s.AddAddressWithOptions(nicID, ipv6.ProtocolNumber, globalAddr, stack.FirstPrimaryEndpoint); err != nil {
+				t.Fatalf("AddAddressWithOptions(%d, %d, %s, %d): %s", nicID, ipv6.ProtocolNumber, globalAddr, stack.FirstPrimaryEndpoint, err)
+			}
+			reportCounter++
+			if got := reportStat.Value(); got != reportCounter {
+				t.Errorf("got reportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); !ok {
+				t.Errorf("expected MLD report for %s", globalAddrSNMC)
+			} else {
+				validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), header.IPv6Any, globalAddrSNMC, header.ICMPv6MulticastListenerReport, globalAddrSNMC)
+			}
+			if dadResolutionTime != 0 {
+				// Reports should not be sent when the address resolves.
+				resolveDAD(globalAddr, globalAddrSNMC)
+				if got := reportStat.Value(); got != reportCounter {
+					t.Errorf("got reportStat.Value() = %d, want = %d", got, reportCounter)
+				}
+			}
+			// Leave the group since we don't care about the global address's
+			// solicited node multicast group membership.
+			if err := s.LeaveGroup(ipv6.ProtocolNumber, nicID, globalAddrSNMC); err != nil {
+				t.Fatalf("LeaveGroup(%d, %d, %s): %s", ipv6.ProtocolNumber, nicID, globalAddrSNMC, err)
+			}
+			if got := doneStat.Value(); got != doneCounter {
+				t.Errorf("got doneStat.Value() = %d, want = %d", got, doneCounter)
+			}
+			if p, ok := e.Read(); ok {
+				t.Errorf("got unexpected packet = %#v", p)
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Adding a link-local address should send a report for its solicited node
+			// address and globalMulticastAddr.
+			if err := s.AddAddressWithOptions(nicID, ipv6.ProtocolNumber, linkLocalAddr, stack.CanBePrimaryEndpoint); err != nil {
+				t.Fatalf("AddAddressWithOptions(%d, %d, %s, %d): %s", nicID, ipv6.ProtocolNumber, linkLocalAddr, stack.CanBePrimaryEndpoint, err)
+			}
+			if dadResolutionTime != 0 {
+				reportCounter++
+				if got := reportStat.Value(); got != reportCounter {
+					t.Errorf("got reportStat.Value() = %d, want = %d", got, reportCounter)
+				}
+				if p, ok := e.Read(); !ok {
+					t.Errorf("expected MLD report for %s", linkLocalAddrSNMC)
+				} else {
+					validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), header.IPv6Any, linkLocalAddrSNMC, header.ICMPv6MulticastListenerReport, linkLocalAddrSNMC)
+				}
+				resolveDAD(linkLocalAddr, linkLocalAddrSNMC)
+			}
+
+			// We expect two batches of reports to be sent (1 batch when the
+			// link-local address is assigned, and another after the maximum
+			// unsolicited report interval.
+			for i := 0; i < 2; i++ {
+				// We expect reports to be sent (one for globalMulticastAddr and another
+				// for linkLocalAddrSNMC).
+				reportCounter += maxReports
+				if got := reportStat.Value(); got != reportCounter {
+					t.Errorf("got reportStat.Value() = %d, want = %d", got, reportCounter)
+				}
+
+				addrs := map[tcpip.Address]bool{
+					globalMulticastAddr: false,
+					linkLocalAddrSNMC:   false,
+				}
+				for _ = range addrs {
+					p, ok := e.Read()
+					if !ok {
+						t.Fatalf("expected MLD report for %s and %s; addrs = %#v", globalMulticastAddr, linkLocalAddrSNMC, addrs)
+					}
+
+					addr := header.IPv6(stack.PayloadSince(p.Pkt.NetworkHeader())).DestinationAddress()
+					if seen, ok := addrs[addr]; !ok {
+						t.Fatalf("got unexpected packet destined to %s", addr)
+					} else if seen {
+						t.Fatalf("got another packet destined to %s", addr)
+					}
+
+					addrs[addr] = true
+					validateMLDPacket(t, stack.PayloadSince(p.Pkt.NetworkHeader()), linkLocalAddr, addr, header.ICMPv6MulticastListenerReport, addr)
+
+					clock.Advance(ipv6.UnsolicitedReportIntervalMax)
+				}
+			}
+
+			// Should not send any more reports.
+			clock.Advance(time.Hour)
+			if p, ok := e.Read(); ok {
+				t.Errorf("got unexpected packet = %#v", p)
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/network/ipv6/ndp.go b/pkg/tcpip/network/ipv6/ndp.go
index 40da011f8..d515eb622 100644
--- a/pkg/tcpip/network/ipv6/ndp.go
+++ b/pkg/tcpip/network/ipv6/ndp.go
@@ -20,6 +20,7 @@ import (
 	"math/rand"
 	"time"
 
+	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -459,6 +460,9 @@ func (c *NDPConfigurations) validate() {
 
 // ndpState is the per-interface NDP state.
 type ndpState struct {
+	// Do not allow overwriting this state.
+	_ sync.NoCopy
+
 	// The IPv6 endpoint this ndpState is for.
 	ep *endpoint
 
@@ -471,17 +475,8 @@ type ndpState struct {
 	// The default routers discovered through Router Advertisements.
 	defaultRouters map[tcpip.Address]defaultRouterState
 
-	rtrSolicit struct {
-		// The timer used to send the next router solicitation message.
-		timer tcpip.Timer
-
-		// Used to let the Router Solicitation timer know that it has been stopped.
-		//
-		// Must only be read from or written to while protected by the lock of
-		// the IPv6 endpoint this ndpState is associated with. MUST be set when the
-		// timer is set.
-		done *bool
-	}
+	// The job used to send the next router solicitation message.
+	rtrSolicitJob *tcpip.Job
 
 	// The on-link prefixes discovered through Router Advertisements' Prefix
 	// Information option.
@@ -507,7 +502,7 @@ type ndpState struct {
 // to the DAD goroutine that DAD should stop.
 type dadState struct {
 	// The DAD timer to send the next NS message, or resolve the address.
-	timer tcpip.Timer
+	job *tcpip.Job
 
 	// Used to let the DAD timer know that it has been stopped.
 	//
@@ -648,96 +643,73 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, addressE
 
 		// Consider DAD to have resolved even if no DAD messages were actually
 		// transmitted.
-		if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+		if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil {
 			ndpDisp.OnDuplicateAddressDetectionStatus(ndp.ep.nic.ID(), addr, true, nil)
 		}
 
+		ndp.ep.onAddressAssignedLocked(addr)
 		return nil
 	}
 
-	var done bool
-	var timer tcpip.Timer
-	// We initially start a timer to fire immediately because some of the DAD work
-	// cannot be done while holding the IPv6 endpoint's lock. This is effectively
-	// the same as starting a goroutine but we use a timer that fires immediately
-	// so we can reset it for the next DAD iteration.
-	timer = ndp.ep.protocol.stack.Clock().AfterFunc(0, func() {
-		ndp.ep.mu.Lock()
-		defer ndp.ep.mu.Unlock()
-
-		if done {
-			// If we reach this point, it means that the DAD timer fired after
-			// another goroutine already obtained the IPv6 endpoint lock and stopped
-			// DAD before this function obtained the NIC lock. Simply return here and
-			// do nothing further.
-			return
-		}
+	state := dadState{
+		job: ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
+			state, ok := ndp.dad[addr]
+			if !ok {
+				panic(fmt.Sprintf("ndpdad: DAD timer fired but missing state for %s on NIC(%d)", addr, ndp.ep.nic.ID()))
+			}
 
-		if addressEndpoint.GetKind() != stack.PermanentTentative {
-			// The endpoint should still be marked as tentative since we are still
-			// performing DAD on it.
-			panic(fmt.Sprintf("ndpdad: addr %s is no longer tentative on NIC(%d)", addr, ndp.ep.nic.ID()))
-		}
+			if addressEndpoint.GetKind() != stack.PermanentTentative {
+				// The endpoint should still be marked as tentative since we are still
+				// performing DAD on it.
+				panic(fmt.Sprintf("ndpdad: addr %s is no longer tentative on NIC(%d)", addr, ndp.ep.nic.ID()))
+			}
 
-		dadDone := remaining == 0
-
-		var err *tcpip.Error
-		if !dadDone {
-			// Use the unspecified address as the source address when performing DAD.
-			addressEndpoint := ndp.ep.acquireAddressOrCreateTempLocked(header.IPv6Any, true /* createTemp */, stack.NeverPrimaryEndpoint)
-
-			// Do not hold the lock when sending packets which may be a long running
-			// task or may block link address resolution. We know this is safe
-			// because immediately after obtaining the lock again, we check if DAD
-			// has been stopped before doing any work with the IPv6 endpoint. Note,
-			// DAD would be stopped if the IPv6 endpoint was disabled or closed, or if
-			// the address was removed.
-			ndp.ep.mu.Unlock()
-			err = ndp.sendDADPacket(addr, addressEndpoint)
-			ndp.ep.mu.Lock()
-			addressEndpoint.DecRef()
-		}
+			dadDone := remaining == 0
 
-		if done {
-			// If we reach this point, it means that DAD was stopped after we released
-			// the IPv6 endpoint's read lock and before we obtained the write lock.
-			return
-		}
+			var err *tcpip.Error
+			if !dadDone {
+				err = ndp.sendDADPacket(addr, addressEndpoint)
+			}
 
-		if dadDone {
-			// DAD has resolved.
-			addressEndpoint.SetKind(stack.Permanent)
-		} else if err == nil {
-			// DAD is not done and we had no errors when sending the last NDP NS,
-			// schedule the next DAD timer.
-			remaining--
-			timer.Reset(ndp.configs.RetransmitTimer)
-			return
-		}
+			if dadDone {
+				// DAD has resolved.
+				addressEndpoint.SetKind(stack.Permanent)
+			} else if err == nil {
+				// DAD is not done and we had no errors when sending the last NDP NS,
+				// schedule the next DAD timer.
+				remaining--
+				state.job.Schedule(ndp.configs.RetransmitTimer)
+				return
+			}
 
-		// At this point we know that either DAD is done or we hit an error sending
-		// the last NDP NS. Either way, clean up addr's DAD state and let the
-		// integrator know DAD has completed.
-		delete(ndp.dad, addr)
+			// At this point we know that either DAD is done or we hit an error
+			// sending the last NDP NS. Either way, clean up addr's DAD state and let
+			// the integrator know DAD has completed.
+			delete(ndp.dad, addr)
 
-		if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
-			ndpDisp.OnDuplicateAddressDetectionStatus(ndp.ep.nic.ID(), addr, dadDone, err)
-		}
+			if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil {
+				ndpDisp.OnDuplicateAddressDetectionStatus(ndp.ep.nic.ID(), addr, dadDone, err)
+			}
 
-		// If DAD resolved for a stable SLAAC address, attempt generation of a
-		// temporary SLAAC address.
-		if dadDone && addressEndpoint.ConfigType() == stack.AddressConfigSlaac {
-			// Reset the generation attempts counter as we are starting the generation
-			// of a new address for the SLAAC prefix.
-			ndp.regenerateTempSLAACAddr(addressEndpoint.AddressWithPrefix().Subnet(), true /* resetGenAttempts */)
-		}
-	})
+			if dadDone {
+				if addressEndpoint.ConfigType() == stack.AddressConfigSlaac {
+					// Reset the generation attempts counter as we are starting the
+					// generation of a new address for the SLAAC prefix.
+					ndp.regenerateTempSLAACAddr(addressEndpoint.AddressWithPrefix().Subnet(), true /* resetGenAttempts */)
+				}
 
-	ndp.dad[addr] = dadState{
-		timer: timer,
-		done:  &done,
+				ndp.ep.onAddressAssignedLocked(addr)
+			}
+		}),
 	}
 
+	// We initially start a timer to fire immediately because some of the DAD work
+	// cannot be done while holding the IPv6 endpoint's lock. This is effectively
+	// the same as starting a goroutine but we use a timer that fires immediately
+	// so we can reset it for the next DAD iteration.
+	state.job.Schedule(0)
+	ndp.dad[addr] = state
+
 	return nil
 }
 
@@ -745,55 +717,31 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, addressE
 // addr.
 //
 // addr must be a tentative IPv6 address on ndp's IPv6 endpoint.
-//
-// The IPv6 endpoint that ndp belongs to MUST NOT be locked.
 func (ndp *ndpState) sendDADPacket(addr tcpip.Address, addressEndpoint stack.AddressEndpoint) *tcpip.Error {
 	snmc := header.SolicitedNodeAddr(addr)
 
-	r, err := ndp.ep.protocol.stack.FindRoute(ndp.ep.nic.ID(), header.IPv6Any, snmc, ProtocolNumber, false /* multicastLoop */)
-	if err != nil {
-		return err
-	}
-	defer r.Release()
-
-	// Route should resolve immediately since snmc is a multicast address so a
-	// remote link address can be calculated without a resolution process.
-	if c, err := r.Resolve(nil); err != nil {
-		// Do not consider the NIC being unknown or disabled as a fatal error.
-		// Since this method is required to be called when the IPv6 endpoint is not
-		// locked, the NIC could have been disabled or removed by another goroutine.
-		if err == tcpip.ErrUnknownNICID || err != tcpip.ErrInvalidEndpointState {
-			return err
-		}
-
-		panic(fmt.Sprintf("ndp: error when resolving route to send NDP NS for DAD (%s -> %s on NIC(%d)): %s", header.IPv6Any, snmc, ndp.ep.nic.ID(), err))
-	} else if c != nil {
-		panic(fmt.Sprintf("ndp: route resolution not immediate for route to send NDP NS for DAD (%s -> %s on NIC(%d))", header.IPv6Any, snmc, ndp.ep.nic.ID()))
-	}
-
-	icmpData := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize))
-	icmpData.SetType(header.ICMPv6NeighborSolicit)
-	ns := header.NDPNeighborSolicit(icmpData.NDPPayload())
+	icmp := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize))
+	icmp.SetType(header.ICMPv6NeighborSolicit)
+	ns := header.NDPNeighborSolicit(icmp.MessageBody())
 	ns.SetTargetAddress(addr)
-	icmpData.SetChecksum(header.ICMPv6Checksum(icmpData, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
+	icmp.SetChecksum(header.ICMPv6Checksum(icmp, header.IPv6Any, snmc, buffer.VectorisedView{}))
 
 	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-		ReserveHeaderBytes: int(r.MaxHeaderLength()),
-		Data:               buffer.View(icmpData).ToVectorisedView(),
+		ReserveHeaderBytes: int(ndp.ep.MaxHeaderLength()),
+		Data:               buffer.View(icmp).ToVectorisedView(),
 	})
 
-	sent := r.Stats().ICMP.V6PacketsSent
-	if err := r.WritePacket(nil,
-		stack.NetworkHeaderParams{
-			Protocol: header.ICMPv6ProtocolNumber,
-			TTL:      header.NDPHopLimit,
-		}, pkt,
-	); err != nil {
+	sent := ndp.ep.protocol.stack.Stats().ICMP.V6.PacketsSent
+	ndp.ep.addIPHeader(header.IPv6Any, snmc, pkt, stack.NetworkHeaderParams{
+		Protocol: header.ICMPv6ProtocolNumber,
+		TTL:      header.NDPHopLimit,
+	}, nil /* extensionHeaders */)
+
+	if err := ndp.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv6Address(snmc), nil /* gso */, ProtocolNumber, pkt); err != nil {
 		sent.Dropped.Increment()
 		return err
 	}
 	sent.NeighborSolicit.Increment()
-
 	return nil
 }
 
@@ -812,18 +760,11 @@ func (ndp *ndpState) stopDuplicateAddressDetection(addr tcpip.Address) {
 		return
 	}
 
-	if dad.timer != nil {
-		dad.timer.Stop()
-		dad.timer = nil
-
-		*dad.done = true
-		dad.done = nil
-	}
-
+	dad.job.Cancel()
 	delete(ndp.dad, addr)
 
 	// Let the integrator know DAD did not resolve.
-	if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+	if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil {
 		ndpDisp.OnDuplicateAddressDetectionStatus(ndp.ep.nic.ID(), addr, false, nil)
 	}
 }
@@ -846,7 +787,7 @@ func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
 	// Only worry about the DHCPv6 configuration if we have an NDPDispatcher as we
 	// only inform the dispatcher on configuration changes. We do nothing else
 	// with the information.
-	if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+	if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil {
 		var configuration DHCPv6ConfigurationFromNDPRA
 		switch {
 		case ra.ManagedAddrConfFlag():
@@ -903,20 +844,20 @@ func (ndp *ndpState) handleRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
 	for opt, done, _ := it.Next(); !done; opt, done, _ = it.Next() {
 		switch opt := opt.(type) {
 		case header.NDPRecursiveDNSServer:
-			if ndp.ep.protocol.ndpDisp == nil {
+			if ndp.ep.protocol.options.NDPDisp == nil {
 				continue
 			}
 
 			addrs, _ := opt.Addresses()
-			ndp.ep.protocol.ndpDisp.OnRecursiveDNSServerOption(ndp.ep.nic.ID(), addrs, opt.Lifetime())
+			ndp.ep.protocol.options.NDPDisp.OnRecursiveDNSServerOption(ndp.ep.nic.ID(), addrs, opt.Lifetime())
 
 		case header.NDPDNSSearchList:
-			if ndp.ep.protocol.ndpDisp == nil {
+			if ndp.ep.protocol.options.NDPDisp == nil {
 				continue
 			}
 
 			domainNames, _ := opt.DomainNames()
-			ndp.ep.protocol.ndpDisp.OnDNSSearchListOption(ndp.ep.nic.ID(), domainNames, opt.Lifetime())
+			ndp.ep.protocol.options.NDPDisp.OnDNSSearchListOption(ndp.ep.nic.ID(), domainNames, opt.Lifetime())
 
 		case header.NDPPrefixInformation:
 			prefix := opt.Subnet()
@@ -964,7 +905,7 @@ func (ndp *ndpState) invalidateDefaultRouter(ip tcpip.Address) {
 	delete(ndp.defaultRouters, ip)
 
 	// Let the integrator know a discovered default router is invalidated.
-	if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+	if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil {
 		ndpDisp.OnDefaultRouterInvalidated(ndp.ep.nic.ID(), ip)
 	}
 }
@@ -976,7 +917,7 @@ func (ndp *ndpState) invalidateDefaultRouter(ip tcpip.Address) {
 //
 // The IPv6 endpoint that ndp belongs to MUST be locked.
 func (ndp *ndpState) rememberDefaultRouter(ip tcpip.Address, rl time.Duration) {
-	ndpDisp := ndp.ep.protocol.ndpDisp
+	ndpDisp := ndp.ep.protocol.options.NDPDisp
 	if ndpDisp == nil {
 		return
 	}
@@ -1006,7 +947,7 @@ func (ndp *ndpState) rememberDefaultRouter(ip tcpip.Address, rl time.Duration) {
 //
 // The IPv6 endpoint that ndp belongs to MUST be locked.
 func (ndp *ndpState) rememberOnLinkPrefix(prefix tcpip.Subnet, l time.Duration) {
-	ndpDisp := ndp.ep.protocol.ndpDisp
+	ndpDisp := ndp.ep.protocol.options.NDPDisp
 	if ndpDisp == nil {
 		return
 	}
@@ -1047,7 +988,7 @@ func (ndp *ndpState) invalidateOnLinkPrefix(prefix tcpip.Subnet) {
 	delete(ndp.onLinkPrefixes, prefix)
 
 	// Let the integrator know a discovered on-link prefix is invalidated.
-	if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+	if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil {
 		ndpDisp.OnOnLinkPrefixInvalidated(ndp.ep.nic.ID(), prefix)
 	}
 }
@@ -1225,7 +1166,7 @@ func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
 // The IPv6 endpoint that ndp belongs to MUST be locked.
 func (ndp *ndpState) addAndAcquireSLAACAddr(addr tcpip.AddressWithPrefix, configType stack.AddressConfigType, deprecated bool) stack.AddressEndpoint {
 	// Inform the integrator that we have a new SLAAC address.
-	ndpDisp := ndp.ep.protocol.ndpDisp
+	ndpDisp := ndp.ep.protocol.options.NDPDisp
 	if ndpDisp == nil {
 		return nil
 	}
@@ -1272,7 +1213,7 @@ func (ndp *ndpState) generateSLAACAddr(prefix tcpip.Subnet, state *slaacPrefixSt
 		}
 
 		dadCounter := state.generationAttempts + state.stableAddr.localGenerationFailures
-		if oIID := ndp.ep.protocol.opaqueIIDOpts; oIID.NICNameFromID != nil {
+		if oIID := ndp.ep.protocol.options.OpaqueIIDOpts; oIID.NICNameFromID != nil {
 			addrBytes = header.AppendOpaqueInterfaceIdentifier(
 				addrBytes[:header.IIDOffsetInIPv6Address],
 				prefix,
@@ -1676,7 +1617,7 @@ func (ndp *ndpState) deprecateSLAACAddress(addressEndpoint stack.AddressEndpoint
 	}
 
 	addressEndpoint.SetDeprecated(true)
-	if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+	if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil {
 		ndpDisp.OnAutoGenAddressDeprecated(ndp.ep.nic.ID(), addressEndpoint.AddressWithPrefix())
 	}
 }
@@ -1701,7 +1642,7 @@ func (ndp *ndpState) invalidateSLAACPrefix(prefix tcpip.Subnet, state slaacPrefi
 //
 // The IPv6 endpoint that ndp belongs to MUST be locked.
 func (ndp *ndpState) cleanupSLAACAddrResourcesAndNotify(addr tcpip.AddressWithPrefix, invalidatePrefix bool) {
-	if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+	if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil {
 		ndpDisp.OnAutoGenAddressInvalidated(ndp.ep.nic.ID(), addr)
 	}
 
@@ -1761,7 +1702,7 @@ func (ndp *ndpState) invalidateTempSLAACAddr(tempAddrs map[tcpip.Address]tempSLA
 //
 // The IPv6 endpoint that ndp belongs to MUST be locked.
 func (ndp *ndpState) cleanupTempSLAACAddrResourcesAndNotify(addr tcpip.AddressWithPrefix, invalidateAddr bool) {
-	if ndpDisp := ndp.ep.protocol.ndpDisp; ndpDisp != nil {
+	if ndpDisp := ndp.ep.protocol.options.NDPDisp; ndpDisp != nil {
 		ndpDisp.OnAutoGenAddressInvalidated(ndp.ep.nic.ID(), addr)
 	}
 
@@ -1859,7 +1800,7 @@ func (ndp *ndpState) cleanupState(hostOnly bool) {
 //
 // The IPv6 endpoint that ndp belongs to MUST be locked.
 func (ndp *ndpState) startSolicitingRouters() {
-	if ndp.rtrSolicit.timer != nil {
+	if ndp.rtrSolicitJob != nil {
 		// We are already soliciting routers.
 		return
 	}
@@ -1876,56 +1817,14 @@ func (ndp *ndpState) startSolicitingRouters() {
 		delay = time.Duration(rand.Int63n(int64(ndp.configs.MaxRtrSolicitationDelay)))
 	}
 
-	var done bool
-	ndp.rtrSolicit.done = &done
-	ndp.rtrSolicit.timer = ndp.ep.protocol.stack.Clock().AfterFunc(delay, func() {
-		ndp.ep.mu.Lock()
-		if done {
-			// If we reach this point, it means that the RS timer fired after another
-			// goroutine already obtained the IPv6 endpoint lock and stopped
-			// solicitations. Simply return here and do nothing further.
-			ndp.ep.mu.Unlock()
-			return
-		}
-
+	ndp.rtrSolicitJob = ndp.ep.protocol.stack.NewJob(&ndp.ep.mu, func() {
 		// As per RFC 4861 section 4.1, the source of the RS is an address assigned
 		// to the sending interface, or the unspecified address if no address is
 		// assigned to the sending interface.
-		addressEndpoint := ndp.ep.acquireOutgoingPrimaryAddressRLocked(header.IPv6AllRoutersMulticastAddress, false)
-		if addressEndpoint == nil {
-			// Incase this ends up creating a new temporary address, we need to hold
-			// onto the endpoint until a route is obtained. If we decrement the
-			// reference count before obtaing a route, the address's resources would
-			// be released and attempting to obtain a route after would fail. Once a
-			// route is obtainted, it is safe to decrement the reference count since
-			// obtaining a route increments the address's reference count.
-			addressEndpoint = ndp.ep.acquireAddressOrCreateTempLocked(header.IPv6Any, true /* createTemp */, stack.NeverPrimaryEndpoint)
-		}
-		ndp.ep.mu.Unlock()
-
-		localAddr := addressEndpoint.AddressWithPrefix().Address
-		r, err := ndp.ep.protocol.stack.FindRoute(ndp.ep.nic.ID(), localAddr, header.IPv6AllRoutersMulticastAddress, ProtocolNumber, false /* multicastLoop */)
-		addressEndpoint.DecRef()
-		if err != nil {
-			return
-		}
-		defer r.Release()
-
-		// Route should resolve immediately since
-		// header.IPv6AllRoutersMulticastAddress is a multicast address so a
-		// remote link address can be calculated without a resolution process.
-		if c, err := r.Resolve(nil); err != nil {
-			// Do not consider the NIC being unknown or disabled as a fatal error.
-			// Since this method is required to be called when the IPv6 endpoint is
-			// not locked, the IPv6 endpoint could have been disabled or removed by
-			// another goroutine.
-			if err == tcpip.ErrUnknownNICID || err == tcpip.ErrInvalidEndpointState {
-				return
-			}
-
-			panic(fmt.Sprintf("ndp: error when resolving route to send NDP RS (%s -> %s on NIC(%d)): %s", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.ep.nic.ID(), err))
-		} else if c != nil {
-			panic(fmt.Sprintf("ndp: route resolution not immediate for route to send NDP RS (%s -> %s on NIC(%d))", header.IPv6Any, header.IPv6AllRoutersMulticastAddress, ndp.ep.nic.ID()))
+		localAddr := header.IPv6Any
+		if addressEndpoint := ndp.ep.acquireOutgoingPrimaryAddressRLocked(header.IPv6AllRoutersMulticastAddress, false); addressEndpoint != nil {
+			localAddr = addressEndpoint.AddressWithPrefix().Address
+			addressEndpoint.DecRef()
 		}
 
 		// As per RFC 4861 section 4.1, an NDP RS SHOULD include the source
@@ -1936,30 +1835,31 @@ func (ndp *ndpState) startSolicitingRouters() {
 		// TODO(b/141011931): Validate a LinkEndpoint's link address (provided by
 		// LinkEndpoint.LinkAddress) before reaching this point.
 		var optsSerializer header.NDPOptionsSerializer
-		if localAddr != header.IPv6Any && header.IsValidUnicastEthernetAddress(r.LocalLinkAddress) {
+		linkAddress := ndp.ep.nic.LinkAddress()
+		if localAddr != header.IPv6Any && header.IsValidUnicastEthernetAddress(linkAddress) {
 			optsSerializer = header.NDPOptionsSerializer{
-				header.NDPSourceLinkLayerAddressOption(r.LocalLinkAddress),
+				header.NDPSourceLinkLayerAddressOption(linkAddress),
 			}
 		}
 		payloadSize := header.ICMPv6HeaderSize + header.NDPRSMinimumSize + int(optsSerializer.Length())
 		icmpData := header.ICMPv6(buffer.NewView(payloadSize))
 		icmpData.SetType(header.ICMPv6RouterSolicit)
-		rs := header.NDPRouterSolicit(icmpData.NDPPayload())
+		rs := header.NDPRouterSolicit(icmpData.MessageBody())
 		rs.Options().Serialize(optsSerializer)
-		icmpData.SetChecksum(header.ICMPv6Checksum(icmpData, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
+		icmpData.SetChecksum(header.ICMPv6Checksum(icmpData, localAddr, header.IPv6AllRoutersMulticastAddress, buffer.VectorisedView{}))
 
 		pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-			ReserveHeaderBytes: int(r.MaxHeaderLength()),
+			ReserveHeaderBytes: int(ndp.ep.MaxHeaderLength()),
 			Data:               buffer.View(icmpData).ToVectorisedView(),
 		})
 
-		sent := r.Stats().ICMP.V6PacketsSent
-		if err := r.WritePacket(nil,
-			stack.NetworkHeaderParams{
-				Protocol: header.ICMPv6ProtocolNumber,
-				TTL:      header.NDPHopLimit,
-			}, pkt,
-		); err != nil {
+		sent := ndp.ep.protocol.stack.Stats().ICMP.V6.PacketsSent
+		ndp.ep.addIPHeader(localAddr, header.IPv6AllRoutersMulticastAddress, pkt, stack.NetworkHeaderParams{
+			Protocol: header.ICMPv6ProtocolNumber,
+			TTL:      header.NDPHopLimit,
+		}, nil /* extensionHeaders */)
+
+		if err := ndp.ep.nic.WritePacketToRemote(header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersMulticastAddress), nil /* gso */, ProtocolNumber, pkt); err != nil {
 			sent.Dropped.Increment()
 			log.Printf("startSolicitingRouters: error writing NDP router solicit message on NIC(%d); err = %s", ndp.ep.nic.ID(), err)
 			// Don't send any more messages if we had an error.
@@ -1969,21 +1869,12 @@ func (ndp *ndpState) startSolicitingRouters() {
 			remaining--
 		}
 
-		ndp.ep.mu.Lock()
-		if done || remaining == 0 {
-			ndp.rtrSolicit.timer = nil
-			ndp.rtrSolicit.done = nil
-		} else if ndp.rtrSolicit.timer != nil {
-			// Note, we need to explicitly check to make sure that
-			// the timer field is not nil because if it was nil but
-			// we still reached this point, then we know the IPv6 endpoint
-			// was requested to stop soliciting routers so we don't
-			// need to send the next Router Solicitation message.
-			ndp.rtrSolicit.timer.Reset(ndp.configs.RtrSolicitationInterval)
+		if remaining != 0 {
+			ndp.rtrSolicitJob.Schedule(ndp.configs.RtrSolicitationInterval)
 		}
-		ndp.ep.mu.Unlock()
 	})
 
+	ndp.rtrSolicitJob.Schedule(delay)
 }
 
 // stopSolicitingRouters stops soliciting routers. If routers are not currently
@@ -1991,22 +1882,28 @@ func (ndp *ndpState) startSolicitingRouters() {
 //
 // The IPv6 endpoint that ndp belongs to MUST be locked.
 func (ndp *ndpState) stopSolicitingRouters() {
-	if ndp.rtrSolicit.timer == nil {
+	if ndp.rtrSolicitJob == nil {
 		// Nothing to do.
 		return
 	}
 
-	*ndp.rtrSolicit.done = true
-	ndp.rtrSolicit.timer.Stop()
-	ndp.rtrSolicit.timer = nil
-	ndp.rtrSolicit.done = nil
+	ndp.rtrSolicitJob.Cancel()
+	ndp.rtrSolicitJob = nil
 }
 
-// initializeTempAddrState initializes state related to temporary SLAAC
-// addresses.
-func (ndp *ndpState) initializeTempAddrState() {
-	header.InitialTempIID(ndp.temporaryIIDHistory[:], ndp.ep.protocol.tempIIDSeed, ndp.ep.nic.ID())
+func (ndp *ndpState) init(ep *endpoint) {
+	if ndp.dad != nil {
+		panic("attempted to initialize NDP state twice")
+	}
+
+	ndp.ep = ep
+	ndp.configs = ep.protocol.options.NDPConfigs
+	ndp.dad = make(map[tcpip.Address]dadState)
+	ndp.defaultRouters = make(map[tcpip.Address]defaultRouterState)
+	ndp.onLinkPrefixes = make(map[tcpip.Subnet]onLinkPrefixState)
+	ndp.slaacPrefixes = make(map[tcpip.Subnet]slaacPrefixState)
 
+	header.InitialTempIID(ndp.temporaryIIDHistory[:], ndp.ep.protocol.options.TempIIDSeed, ndp.ep.nic.ID())
 	if MaxDesyncFactor != 0 {
 		ndp.temporaryAddressDesyncFactor = time.Duration(rand.Int63n(int64(MaxDesyncFactor)))
 	}
diff --git a/pkg/tcpip/network/ipv6/ndp_test.go b/pkg/tcpip/network/ipv6/ndp_test.go
index ac20f217e..05a0d95b2 100644
--- a/pkg/tcpip/network/ipv6/ndp_test.go
+++ b/pkg/tcpip/network/ipv6/ndp_test.go
@@ -45,10 +45,6 @@ func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address, useNeig
 	if err := s.CreateNIC(1, &stubLinkEndpoint{}); err != nil {
 		t.Fatalf("CreateNIC(_) = %s", err)
 	}
-	if err := s.AddAddress(1, ProtocolNumber, llladdr); err != nil {
-		t.Fatalf("AddAddress(_, %d, %s) = %s", ProtocolNumber, llladdr, err)
-	}
-
 	{
 		subnet, err := tcpip.NewSubnet(rlladdr, tcpip.AddressMask(strings.Repeat("\xff", len(rlladdr))))
 		if err != nil {
@@ -73,6 +69,17 @@ func setupStackAndEndpoint(t *testing.T, llladdr, rlladdr tcpip.Address, useNeig
 	}
 	t.Cleanup(ep.Close)
 
+	addressableEndpoint, ok := ep.(stack.AddressableEndpoint)
+	if !ok {
+		t.Fatalf("expected network endpoint to implement stack.AddressableEndpoint")
+	}
+	addr := llladdr.WithPrefix()
+	if addressEP, err := addressableEndpoint.AddAndAcquirePermanentAddress(addr, stack.CanBePrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */); err != nil {
+		t.Fatalf("addressableEndpoint.AddAndAcquirePermanentAddress(%s, CanBePrimaryEndpoint, AddressConfigStatic, false): %s", addr, err)
+	} else {
+		addressEP.DecRef()
+	}
+
 	return s, ep
 }
 
@@ -198,7 +205,7 @@ func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) {
 			hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNSSize)
 			pkt := header.ICMPv6(hdr.Prepend(ndpNSSize))
 			pkt.SetType(header.ICMPv6NeighborSolicit)
-			ns := header.NDPNeighborSolicit(pkt.NDPPayload())
+			ns := header.NDPNeighborSolicit(pkt.MessageBody())
 			ns.SetTargetAddress(lladdr0)
 			opts := ns.Options()
 			copy(opts, test.optsBuf)
@@ -206,14 +213,14 @@ func TestNeighorSolicitationWithSourceLinkLayerOption(t *testing.T) {
 			payloadLength := hdr.UsedLength()
 			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 			ip.Encode(&header.IPv6Fields{
-				PayloadLength: uint16(payloadLength),
-				NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-				HopLimit:      255,
-				SrcAddr:       lladdr1,
-				DstAddr:       lladdr0,
+				PayloadLength:     uint16(payloadLength),
+				TransportProtocol: header.ICMPv6ProtocolNumber,
+				HopLimit:          255,
+				SrcAddr:           lladdr1,
+				DstAddr:           lladdr0,
 			})
 
-			invalid := s.Stats().ICMP.V6PacketsReceived.Invalid
+			invalid := s.Stats().ICMP.V6.PacketsReceived.Invalid
 
 			// Invalid count should initially be 0.
 			if got := invalid.Value(); got != 0 {
@@ -304,7 +311,7 @@ func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testi
 			hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNSSize)
 			pkt := header.ICMPv6(hdr.Prepend(ndpNSSize))
 			pkt.SetType(header.ICMPv6NeighborSolicit)
-			ns := header.NDPNeighborSolicit(pkt.NDPPayload())
+			ns := header.NDPNeighborSolicit(pkt.MessageBody())
 			ns.SetTargetAddress(lladdr0)
 			opts := ns.Options()
 			copy(opts, test.optsBuf)
@@ -312,23 +319,23 @@ func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testi
 			payloadLength := hdr.UsedLength()
 			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 			ip.Encode(&header.IPv6Fields{
-				PayloadLength: uint16(payloadLength),
-				NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-				HopLimit:      255,
-				SrcAddr:       lladdr1,
-				DstAddr:       lladdr0,
+				PayloadLength:     uint16(payloadLength),
+				TransportProtocol: header.ICMPv6ProtocolNumber,
+				HopLimit:          255,
+				SrcAddr:           lladdr1,
+				DstAddr:           lladdr0,
 			})
 
-			invalid := s.Stats().ICMP.V6PacketsReceived.Invalid
+			invalid := s.Stats().ICMP.V6.PacketsReceived.Invalid
 
 			// Invalid count should initially be 0.
 			if got := invalid.Value(); got != 0 {
 				t.Fatalf("got invalid = %d, want = 0", got)
 			}
 
-			e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
+			e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 				Data: hdr.View().ToVectorisedView(),
-			})
+			}))
 
 			neighbors, err := s.Neighbors(nicID)
 			if err != nil {
@@ -341,7 +348,7 @@ func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testi
 					if diff := cmp.Diff(existing, n); diff != "" {
 						t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, diff)
 					}
-					t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %s", nicID, existing)
+					t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %#v", nicID, existing)
 				}
 				neighborByAddr[n.Addr] = n
 			}
@@ -368,7 +375,7 @@ func TestNeighorSolicitationWithSourceLinkLayerOptionUsingNeighborCache(t *testi
 				}
 
 				if ok {
-					t.Fatalf("unexpectedly got neighbor entry: %s", neigh)
+					t.Fatalf("unexpectedly got neighbor entry: %#v", neigh)
 				}
 			}
 		})
@@ -573,11 +580,18 @@ func TestNeighorSolicitationResponse(t *testing.T) {
 						t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, ProtocolNumber, nicAddr, err)
 					}
 
+					s.SetRouteTable([]tcpip.Route{
+						tcpip.Route{
+							Destination: header.IPv6EmptySubnet,
+							NIC:         1,
+						},
+					})
+
 					ndpNSSize := header.ICMPv6NeighborSolicitMinimumSize + test.nsOpts.Length()
 					hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNSSize)
 					pkt := header.ICMPv6(hdr.Prepend(ndpNSSize))
 					pkt.SetType(header.ICMPv6NeighborSolicit)
-					ns := header.NDPNeighborSolicit(pkt.NDPPayload())
+					ns := header.NDPNeighborSolicit(pkt.MessageBody())
 					ns.SetTargetAddress(nicAddr)
 					opts := ns.Options()
 					opts.Serialize(test.nsOpts)
@@ -585,14 +599,14 @@ func TestNeighorSolicitationResponse(t *testing.T) {
 					payloadLength := hdr.UsedLength()
 					ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 					ip.Encode(&header.IPv6Fields{
-						PayloadLength: uint16(payloadLength),
-						NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-						HopLimit:      255,
-						SrcAddr:       test.nsSrc,
-						DstAddr:       test.nsDst,
+						PayloadLength:     uint16(payloadLength),
+						TransportProtocol: header.ICMPv6ProtocolNumber,
+						HopLimit:          255,
+						SrcAddr:           test.nsSrc,
+						DstAddr:           test.nsDst,
 					})
 
-					invalid := s.Stats().ICMP.V6PacketsReceived.Invalid
+					invalid := s.Stats().ICMP.V6.PacketsReceived.Invalid
 
 					// Invalid count should initially be 0.
 					if got := invalid.Value(); got != 0 {
@@ -636,8 +650,8 @@ func TestNeighorSolicitationResponse(t *testing.T) {
 						if p.Route.RemoteAddress != respNSDst {
 							t.Errorf("got p.Route.RemoteAddress = %s, want = %s", p.Route.RemoteAddress, respNSDst)
 						}
-						if want := header.EthernetAddressFromMulticastIPv6Address(respNSDst); p.Route.RemoteLinkAddress != want {
-							t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, want)
+						if got, want := p.Route.RemoteLinkAddress(), header.EthernetAddressFromMulticastIPv6Address(respNSDst); got != want {
+							t.Errorf("got p.Route.RemoteLinkAddress() = %s, want = %s", got, want)
 						}
 
 						checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
@@ -658,7 +672,7 @@ func TestNeighorSolicitationResponse(t *testing.T) {
 						hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNASize)
 						pkt := header.ICMPv6(hdr.Prepend(ndpNASize))
 						pkt.SetType(header.ICMPv6NeighborAdvert)
-						na := header.NDPNeighborAdvert(pkt.NDPPayload())
+						na := header.NDPNeighborAdvert(pkt.MessageBody())
 						na.SetSolicitedFlag(true)
 						na.SetOverrideFlag(true)
 						na.SetTargetAddress(test.nsSrc)
@@ -667,11 +681,11 @@ func TestNeighorSolicitationResponse(t *testing.T) {
 						payloadLength := hdr.UsedLength()
 						ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 						ip.Encode(&header.IPv6Fields{
-							PayloadLength: uint16(payloadLength),
-							NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-							HopLimit:      header.NDPHopLimit,
-							SrcAddr:       test.nsSrc,
-							DstAddr:       nicAddr,
+							PayloadLength:     uint16(payloadLength),
+							TransportProtocol: header.ICMPv6ProtocolNumber,
+							HopLimit:          header.NDPHopLimit,
+							SrcAddr:           test.nsSrc,
+							DstAddr:           nicAddr,
 						})
 						e.InjectLinkAddr(ProtocolNumber, "", stack.NewPacketBuffer(stack.PacketBufferOptions{
 							Data: hdr.View().ToVectorisedView(),
@@ -692,8 +706,8 @@ func TestNeighorSolicitationResponse(t *testing.T) {
 					if p.Route.RemoteAddress != test.naDst {
 						t.Errorf("got p.Route.RemoteAddress = %s, want = %s", p.Route.RemoteAddress, test.naDst)
 					}
-					if p.Route.RemoteLinkAddress != test.naDstLinkAddr {
-						t.Errorf("got p.Route.RemoteLinkAddress = %s, want = %s", p.Route.RemoteLinkAddress, test.naDstLinkAddr)
+					if got := p.Route.RemoteLinkAddress(); got != test.naDstLinkAddr {
+						t.Errorf("got p.Route.RemoteLinkAddress() = %s, want = %s", got, test.naDstLinkAddr)
 					}
 
 					checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
@@ -763,7 +777,7 @@ func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) {
 			hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNASize)
 			pkt := header.ICMPv6(hdr.Prepend(ndpNASize))
 			pkt.SetType(header.ICMPv6NeighborAdvert)
-			ns := header.NDPNeighborAdvert(pkt.NDPPayload())
+			ns := header.NDPNeighborAdvert(pkt.MessageBody())
 			ns.SetTargetAddress(lladdr1)
 			opts := ns.Options()
 			copy(opts, test.optsBuf)
@@ -771,14 +785,14 @@ func TestNeighorAdvertisementWithTargetLinkLayerOption(t *testing.T) {
 			payloadLength := hdr.UsedLength()
 			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 			ip.Encode(&header.IPv6Fields{
-				PayloadLength: uint16(payloadLength),
-				NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-				HopLimit:      255,
-				SrcAddr:       lladdr1,
-				DstAddr:       lladdr0,
+				PayloadLength:     uint16(payloadLength),
+				TransportProtocol: header.ICMPv6ProtocolNumber,
+				HopLimit:          255,
+				SrcAddr:           lladdr1,
+				DstAddr:           lladdr0,
 			})
 
-			invalid := s.Stats().ICMP.V6PacketsReceived.Invalid
+			invalid := s.Stats().ICMP.V6.PacketsReceived.Invalid
 
 			// Invalid count should initially be 0.
 			if got := invalid.Value(); got != 0 {
@@ -876,7 +890,7 @@ func TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache(t *test
 			hdr := buffer.NewPrependable(header.IPv6MinimumSize + ndpNASize)
 			pkt := header.ICMPv6(hdr.Prepend(ndpNASize))
 			pkt.SetType(header.ICMPv6NeighborAdvert)
-			ns := header.NDPNeighborAdvert(pkt.NDPPayload())
+			ns := header.NDPNeighborAdvert(pkt.MessageBody())
 			ns.SetTargetAddress(lladdr1)
 			opts := ns.Options()
 			copy(opts, test.optsBuf)
@@ -884,23 +898,23 @@ func TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache(t *test
 			payloadLength := hdr.UsedLength()
 			ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 			ip.Encode(&header.IPv6Fields{
-				PayloadLength: uint16(payloadLength),
-				NextHeader:    uint8(header.ICMPv6ProtocolNumber),
-				HopLimit:      255,
-				SrcAddr:       lladdr1,
-				DstAddr:       lladdr0,
+				PayloadLength:     uint16(payloadLength),
+				TransportProtocol: header.ICMPv6ProtocolNumber,
+				HopLimit:          255,
+				SrcAddr:           lladdr1,
+				DstAddr:           lladdr0,
 			})
 
-			invalid := s.Stats().ICMP.V6PacketsReceived.Invalid
+			invalid := s.Stats().ICMP.V6.PacketsReceived.Invalid
 
 			// Invalid count should initially be 0.
 			if got := invalid.Value(); got != 0 {
 				t.Fatalf("got invalid = %d, want = 0", got)
 			}
 
-			e.InjectInbound(ProtocolNumber, &stack.PacketBuffer{
+			e.InjectInbound(ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
 				Data: hdr.View().ToVectorisedView(),
-			})
+			}))
 
 			neighbors, err := s.Neighbors(nicID)
 			if err != nil {
@@ -913,13 +927,13 @@ func TestNeighorAdvertisementWithTargetLinkLayerOptionUsingNeighborCache(t *test
 					if diff := cmp.Diff(existing, n); diff != "" {
 						t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry (-existing +got):\n%s", nicID, diff)
 					}
-					t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %s", nicID, existing)
+					t.Fatalf("s.Neighbors(%d) returned unexpected duplicate neighbor entry: %#v", nicID, existing)
 				}
 				neighborByAddr[n.Addr] = n
 			}
 
 			if neigh, ok := neighborByAddr[lladdr1]; ok {
-				t.Fatalf("unexpectedly got neighbor entry: %s", neigh)
+				t.Fatalf("unexpectedly got neighbor entry: %#v", neigh)
 			}
 
 			if test.isValid {
@@ -954,46 +968,37 @@ func TestNDPValidation(t *testing.T) {
 
 	for _, stackTyp := range stacks {
 		t.Run(stackTyp.name, func(t *testing.T) {
-			setup := func(t *testing.T) (*stack.Stack, stack.NetworkEndpoint, stack.Route) {
+			setup := func(t *testing.T) (*stack.Stack, stack.NetworkEndpoint) {
 				t.Helper()
 
 				// Create a stack with the assigned link-local address lladdr0
 				// and an endpoint to lladdr1.
 				s, ep := setupStackAndEndpoint(t, lladdr0, lladdr1, stackTyp.useNeighborCache)
 
-				r, err := s.FindRoute(1, lladdr0, lladdr1, ProtocolNumber, false /* multicastLoop */)
-				if err != nil {
-					t.Fatalf("FindRoute(_) = _, %s, want = _, nil", err)
-				}
-
-				return s, ep, r
+				return s, ep
 			}
 
-			handleIPv6Payload := func(payload buffer.View, hopLimit uint8, atomicFragment bool, ep stack.NetworkEndpoint, r *stack.Route) {
-				nextHdr := uint8(header.ICMPv6ProtocolNumber)
-				var extensions buffer.View
+			handleIPv6Payload := func(payload buffer.View, hopLimit uint8, atomicFragment bool, ep stack.NetworkEndpoint) {
+				var extHdrs header.IPv6ExtHdrSerializer
 				if atomicFragment {
-					extensions = buffer.NewView(header.IPv6FragmentExtHdrLength)
-					extensions[0] = nextHdr
-					nextHdr = uint8(header.IPv6FragmentExtHdrIdentifier)
+					extHdrs = append(extHdrs, &header.IPv6SerializableFragmentExtHdr{})
 				}
+				extHdrsLen := extHdrs.Length()
 
 				pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-					ReserveHeaderBytes: header.IPv6MinimumSize + len(extensions),
+					ReserveHeaderBytes: header.IPv6MinimumSize + extHdrsLen,
 					Data:               payload.ToVectorisedView(),
 				})
-				ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + len(extensions)))
+				ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + extHdrsLen))
 				ip.Encode(&header.IPv6Fields{
-					PayloadLength: uint16(len(payload) + len(extensions)),
-					NextHeader:    nextHdr,
-					HopLimit:      hopLimit,
-					SrcAddr:       r.LocalAddress,
-					DstAddr:       r.RemoteAddress,
+					PayloadLength:     uint16(len(payload) + extHdrsLen),
+					TransportProtocol: header.ICMPv6ProtocolNumber,
+					HopLimit:          hopLimit,
+					SrcAddr:           lladdr1,
+					DstAddr:           lladdr0,
+					ExtensionHeaders:  extHdrs,
 				})
-				if n := copy(ip[header.IPv6MinimumSize:], extensions); n != len(extensions) {
-					t.Fatalf("expected to write %d bytes of extensions, but wrote %d", len(extensions), n)
-				}
-				ep.HandlePacket(r, pkt)
+				ep.HandlePacket(pkt)
 			}
 
 			var tllData [header.NDPLinkLayerAddressSize]byte
@@ -1106,15 +1111,14 @@ func TestNDPValidation(t *testing.T) {
 					t.Run(name, func(t *testing.T) {
 						for _, test := range subTests {
 							t.Run(test.name, func(t *testing.T) {
-								s, ep, r := setup(t)
-								defer r.Release()
+								s, ep := setup(t)
 
 								if isRouter {
 									// Enabling forwarding makes the stack act as a router.
 									s.SetForwarding(ProtocolNumber, true)
 								}
 
-								stats := s.Stats().ICMP.V6PacketsReceived
+								stats := s.Stats().ICMP.V6.PacketsReceived
 								invalid := stats.Invalid
 								routerOnly := stats.RouterOnlyPacketsDroppedByHost
 								typStat := typ.statCounter(stats)
@@ -1123,7 +1127,7 @@ func TestNDPValidation(t *testing.T) {
 								copy(icmp[typ.size:], typ.extraData)
 								icmp.SetType(typ.typ)
 								icmp.SetCode(test.code)
-								icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], r.LocalAddress, r.RemoteAddress, buffer.View(typ.extraData).ToVectorisedView()))
+								icmp.SetChecksum(header.ICMPv6Checksum(icmp[:typ.size], lladdr0, lladdr1, buffer.View(typ.extraData).ToVectorisedView()))
 
 								// Rx count of the NDP message should initially be 0.
 								if got := typStat.Value(); got != 0 {
@@ -1144,7 +1148,7 @@ func TestNDPValidation(t *testing.T) {
 									t.FailNow()
 								}
 
-								handleIPv6Payload(buffer.View(icmp), test.hopLimit, test.atomicFragment, ep, &r)
+								handleIPv6Payload(buffer.View(icmp), test.hopLimit, test.atomicFragment, ep)
 
 								// Rx count of the NDP packet should have increased.
 								if got := typStat.Value(); got != 1 {
@@ -1338,19 +1342,19 @@ func TestRouterAdvertValidation(t *testing.T) {
 					pkt := header.ICMPv6(hdr.Prepend(icmpSize))
 					pkt.SetType(header.ICMPv6RouterAdvert)
 					pkt.SetCode(test.code)
-					copy(pkt.NDPPayload(), test.ndpPayload)
+					copy(pkt.MessageBody(), test.ndpPayload)
 					payloadLength := hdr.UsedLength()
 					pkt.SetChecksum(header.ICMPv6Checksum(pkt, test.src, header.IPv6AllNodesMulticastAddress, buffer.VectorisedView{}))
 					ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 					ip.Encode(&header.IPv6Fields{
-						PayloadLength: uint16(payloadLength),
-						NextHeader:    uint8(icmp.ProtocolNumber6),
-						HopLimit:      test.hopLimit,
-						SrcAddr:       test.src,
-						DstAddr:       header.IPv6AllNodesMulticastAddress,
+						PayloadLength:     uint16(payloadLength),
+						TransportProtocol: icmp.ProtocolNumber6,
+						HopLimit:          test.hopLimit,
+						SrcAddr:           test.src,
+						DstAddr:           header.IPv6AllNodesMulticastAddress,
 					})
 
-					stats := s.Stats().ICMP.V6PacketsReceived
+					stats := s.Stats().ICMP.V6.PacketsReceived
 					invalid := stats.Invalid
 					rxRA := stats.RouterAdvert
 
diff --git a/pkg/tcpip/network/multicast_group_test.go b/pkg/tcpip/network/multicast_group_test.go
new file mode 100644
index 000000000..05d98a0a5
--- /dev/null
+++ b/pkg/tcpip/network/multicast_group_test.go
@@ -0,0 +1,1261 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ip_test
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+	"time"
+
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/checker"
+	"gvisor.dev/gvisor/pkg/tcpip/faketime"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
+	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+)
+
+const (
+	linkAddr = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06")
+
+	ipv4Addr = tcpip.Address("\x0a\x00\x00\x01")
+	ipv6Addr = tcpip.Address("\xfe\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01")
+
+	ipv4MulticastAddr1 = tcpip.Address("\xe0\x00\x00\x03")
+	ipv4MulticastAddr2 = tcpip.Address("\xe0\x00\x00\x04")
+	ipv4MulticastAddr3 = tcpip.Address("\xe0\x00\x00\x05")
+	ipv6MulticastAddr1 = tcpip.Address("\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03")
+	ipv6MulticastAddr2 = tcpip.Address("\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04")
+	ipv6MulticastAddr3 = tcpip.Address("\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05")
+
+	igmpMembershipQuery    = uint8(header.IGMPMembershipQuery)
+	igmpv1MembershipReport = uint8(header.IGMPv1MembershipReport)
+	igmpv2MembershipReport = uint8(header.IGMPv2MembershipReport)
+	igmpLeaveGroup         = uint8(header.IGMPLeaveGroup)
+	mldQuery               = uint8(header.ICMPv6MulticastListenerQuery)
+	mldReport              = uint8(header.ICMPv6MulticastListenerReport)
+	mldDone                = uint8(header.ICMPv6MulticastListenerDone)
+
+	maxUnsolicitedReports = 2
+)
+
+var (
+	// unsolicitedIGMPReportIntervalMaxTenthSec is the maximum amount of time the
+	// NIC will wait before sending an unsolicited report after joining a
+	// multicast group, in deciseconds.
+	unsolicitedIGMPReportIntervalMaxTenthSec = func() uint8 {
+		const decisecond = time.Second / 10
+		if ipv4.UnsolicitedReportIntervalMax%decisecond != 0 {
+			panic(fmt.Sprintf("UnsolicitedReportIntervalMax of %d is a lossy conversion to deciseconds", ipv4.UnsolicitedReportIntervalMax))
+		}
+		return uint8(ipv4.UnsolicitedReportIntervalMax / decisecond)
+	}()
+
+	ipv6AddrSNMC = header.SolicitedNodeAddr(ipv6Addr)
+)
+
+// validateMLDPacket checks that a passed PacketInfo is an IPv6 MLD packet
+// sent to the provided address with the passed fields set.
+func validateMLDPacket(t *testing.T, p channel.PacketInfo, remoteAddress tcpip.Address, mldType uint8, maxRespTime byte, groupAddress tcpip.Address) {
+	t.Helper()
+
+	payload := header.IPv6(stack.PayloadSince(p.Pkt.NetworkHeader()))
+	checker.IPv6WithExtHdr(t, payload,
+		checker.IPv6ExtHdr(
+			checker.IPv6HopByHopExtensionHeader(checker.IPv6RouterAlert(header.IPv6RouterAlertMLD)),
+		),
+		checker.SrcAddr(ipv6Addr),
+		checker.DstAddr(remoteAddress),
+		// Hop Limit for an MLD message must be 1 as per RFC 2710 section 3.
+		checker.TTL(1),
+		checker.MLD(header.ICMPv6Type(mldType), header.MLDMinimumSize,
+			checker.MLDMaxRespDelay(time.Duration(maxRespTime)*time.Millisecond),
+			checker.MLDMulticastAddress(groupAddress),
+		),
+	)
+}
+
+// validateIGMPPacket checks that a passed PacketInfo is an IPv4 IGMP packet
+// sent to the provided address with the passed fields set.
+func validateIGMPPacket(t *testing.T, p channel.PacketInfo, remoteAddress tcpip.Address, igmpType uint8, maxRespTime byte, groupAddress tcpip.Address) {
+	t.Helper()
+
+	payload := header.IPv4(stack.PayloadSince(p.Pkt.NetworkHeader()))
+	checker.IPv4(t, payload,
+		checker.SrcAddr(ipv4Addr),
+		checker.DstAddr(remoteAddress),
+		// TTL for an IGMP message must be 1 as per RFC 2236 section 2.
+		checker.TTL(1),
+		checker.IPv4RouterAlert(),
+		checker.IGMP(
+			checker.IGMPType(header.IGMPType(igmpType)),
+			checker.IGMPMaxRespTime(header.DecisecondToDuration(maxRespTime)),
+			checker.IGMPGroupAddress(groupAddress),
+		),
+	)
+}
+
+func createStack(t *testing.T, v4, mgpEnabled bool) (*channel.Endpoint, *stack.Stack, *faketime.ManualClock) {
+	t.Helper()
+
+	e := channel.New(maxUnsolicitedReports, header.IPv6MinimumMTU, linkAddr)
+	s, clock := createStackWithLinkEndpoint(t, v4, mgpEnabled, e)
+	return e, s, clock
+}
+
+func createStackWithLinkEndpoint(t *testing.T, v4, mgpEnabled bool, e stack.LinkEndpoint) (*stack.Stack, *faketime.ManualClock) {
+	t.Helper()
+
+	igmpEnabled := v4 && mgpEnabled
+	mldEnabled := !v4 && mgpEnabled
+
+	clock := faketime.NewManualClock()
+	s := stack.New(stack.Options{
+		NetworkProtocols: []stack.NetworkProtocolFactory{
+			ipv4.NewProtocolWithOptions(ipv4.Options{
+				IGMP: ipv4.IGMPOptions{
+					Enabled: igmpEnabled,
+				},
+			}),
+			ipv6.NewProtocolWithOptions(ipv6.Options{
+				MLD: ipv6.MLDOptions{
+					Enabled: mldEnabled,
+				},
+			}),
+		},
+		Clock: clock,
+	})
+	if err := s.CreateNIC(nicID, e); err != nil {
+		t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+	}
+	if err := s.AddAddress(nicID, ipv4.ProtocolNumber, ipv4Addr); err != nil {
+		t.Fatalf("AddAddress(%d, %d, %s): %s", nicID, ipv4.ProtocolNumber, ipv4Addr, err)
+	}
+	if err := s.AddAddress(nicID, ipv6.ProtocolNumber, ipv6Addr); err != nil {
+		t.Fatalf("AddAddress(%d, %d, %s): %s", nicID, ipv6.ProtocolNumber, ipv6Addr, err)
+	}
+
+	return s, clock
+}
+
+// checkInitialIPv6Groups checks the initial IPv6 groups that a NIC will join
+// when it is created with an IPv6 address.
+//
+// To not interfere with tests, checkInitialIPv6Groups will leave the added
+// address's solicited node multicast group so that the tests can all assume
+// the NIC has not joined any IPv6 groups.
+func checkInitialIPv6Groups(t *testing.T, e *channel.Endpoint, s *stack.Stack, clock *faketime.ManualClock) (reportCounter uint64, leaveCounter uint64) {
+	t.Helper()
+
+	stats := s.Stats().ICMP.V6.PacketsSent
+
+	reportCounter++
+	if got := stats.MulticastListenerReport.Value(); got != reportCounter {
+		t.Errorf("got stats.MulticastListenerReport.Value() = %d, want = %d", got, reportCounter)
+	}
+	if p, ok := e.Read(); !ok {
+		t.Fatal("expected a report message to be sent")
+	} else {
+		validateMLDPacket(t, p, ipv6AddrSNMC, mldReport, 0, ipv6AddrSNMC)
+	}
+
+	// Leave the group to not affect the tests. This is fine since we are not
+	// testing DAD or the solicited node address specifically.
+	if err := s.LeaveGroup(ipv6.ProtocolNumber, nicID, ipv6AddrSNMC); err != nil {
+		t.Fatalf("LeaveGroup(%d, %d, %s): %s", ipv6.ProtocolNumber, nicID, ipv6AddrSNMC, err)
+	}
+	leaveCounter++
+	if got := stats.MulticastListenerDone.Value(); got != leaveCounter {
+		t.Errorf("got stats.MulticastListenerDone.Value() = %d, want = %d", got, leaveCounter)
+	}
+	if p, ok := e.Read(); !ok {
+		t.Fatal("expected a report message to be sent")
+	} else {
+		validateMLDPacket(t, p, header.IPv6AllRoutersMulticastAddress, mldDone, 0, ipv6AddrSNMC)
+	}
+
+	// Should not send any more packets.
+	clock.Advance(time.Hour)
+	if p, ok := e.Read(); ok {
+		t.Fatalf("sent unexpected packet = %#v", p)
+	}
+
+	return reportCounter, leaveCounter
+}
+
+// createAndInjectIGMPPacket creates and injects an IGMP packet with the
+// specified fields.
+//
+// Note, the router alert option is not included in this packet.
+//
+// TODO(b/162198658): set the router alert option.
+func createAndInjectIGMPPacket(e *channel.Endpoint, igmpType byte, maxRespTime byte, groupAddress tcpip.Address) {
+	buf := buffer.NewView(header.IPv4MinimumSize + header.IGMPQueryMinimumSize)
+
+	ip := header.IPv4(buf)
+	ip.Encode(&header.IPv4Fields{
+		TotalLength: uint16(len(buf)),
+		TTL:         header.IGMPTTL,
+		Protocol:    uint8(header.IGMPProtocolNumber),
+		SrcAddr:     header.IPv4Any,
+		DstAddr:     header.IPv4AllSystems,
+	})
+	ip.SetChecksum(^ip.CalculateChecksum())
+
+	igmp := header.IGMP(buf[header.IPv4MinimumSize:])
+	igmp.SetType(header.IGMPType(igmpType))
+	igmp.SetMaxRespTime(maxRespTime)
+	igmp.SetGroupAddress(groupAddress)
+	igmp.SetChecksum(header.IGMPCalculateChecksum(igmp))
+
+	e.InjectInbound(ipv4.ProtocolNumber, &stack.PacketBuffer{
+		Data: buf.ToVectorisedView(),
+	})
+}
+
+// createAndInjectMLDPacket creates and injects an MLD packet with the
+// specified fields.
+//
+// Note, the router alert option is not included in this packet.
+//
+// TODO(b/162198658): set the router alert option.
+func createAndInjectMLDPacket(e *channel.Endpoint, mldType uint8, maxRespDelay byte, groupAddress tcpip.Address) {
+	icmpSize := header.ICMPv6HeaderSize + header.MLDMinimumSize
+	buf := buffer.NewView(header.IPv6MinimumSize + icmpSize)
+
+	ip := header.IPv6(buf)
+	ip.Encode(&header.IPv6Fields{
+		PayloadLength:     uint16(icmpSize),
+		HopLimit:          header.MLDHopLimit,
+		TransportProtocol: header.ICMPv6ProtocolNumber,
+		SrcAddr:           header.IPv4Any,
+		DstAddr:           header.IPv6AllNodesMulticastAddress,
+	})
+
+	icmp := header.ICMPv6(buf[header.IPv6MinimumSize:])
+	icmp.SetType(header.ICMPv6Type(mldType))
+	mld := header.MLD(icmp.MessageBody())
+	mld.SetMaximumResponseDelay(uint16(maxRespDelay))
+	mld.SetMulticastAddress(groupAddress)
+	icmp.SetChecksum(header.ICMPv6Checksum(icmp, header.IPv6Any, header.IPv6AllNodesMulticastAddress, buffer.VectorisedView{}))
+
+	e.InjectInbound(ipv6.ProtocolNumber, &stack.PacketBuffer{
+		Data: buf.ToVectorisedView(),
+	})
+}
+
+// TestMGPDisabled tests that the multicast group protocol is not enabled by
+// default.
+func TestMGPDisabled(t *testing.T) {
+	tests := []struct {
+		name              string
+		protoNum          tcpip.NetworkProtocolNumber
+		multicastAddr     tcpip.Address
+		sentReportStat    func(*stack.Stack) *tcpip.StatCounter
+		receivedQueryStat func(*stack.Stack) *tcpip.StatCounter
+		rxQuery           func(*channel.Endpoint)
+	}{
+		{
+			name:          "IGMP",
+			protoNum:      ipv4.ProtocolNumber,
+			multicastAddr: ipv4MulticastAddr1,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.V2MembershipReport
+			},
+			receivedQueryStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsReceived.MembershipQuery
+			},
+			rxQuery: func(e *channel.Endpoint) {
+				createAndInjectIGMPPacket(e, igmpMembershipQuery, unsolicitedIGMPReportIntervalMaxTenthSec, header.IPv4Any)
+			},
+		},
+		{
+			name:          "MLD",
+			protoNum:      ipv6.ProtocolNumber,
+			multicastAddr: ipv6MulticastAddr1,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerReport
+			},
+			receivedQueryStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsReceived.MulticastListenerQuery
+			},
+			rxQuery: func(e *channel.Endpoint) {
+				createAndInjectMLDPacket(e, mldQuery, 0, header.IPv6Any)
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			e, s, clock := createStack(t, test.protoNum == ipv4.ProtocolNumber /* v4 */, false /* mgpEnabled */)
+
+			// This NIC may join multicast groups when it is enabled but since MGP is
+			// disabled, no reports should be sent.
+			sentReportStat := test.sentReportStat(s)
+			if got := sentReportStat.Value(); got != 0 {
+				t.Fatalf("got sentReportStat.Value() = %d, want = 0", got)
+			}
+			clock.Advance(time.Hour)
+			if p, ok := e.Read(); ok {
+				t.Fatalf("sent unexpected packet, stack with disabled MGP sent packet = %#v", p.Pkt)
+			}
+
+			// Test joining a specific group explicitly and verify that no reports are
+			// sent.
+			if err := s.JoinGroup(test.protoNum, nicID, test.multicastAddr); err != nil {
+				t.Fatalf("JoinGroup(%d, %d, %s): %s", test.protoNum, nicID, test.multicastAddr, err)
+			}
+			if got := sentReportStat.Value(); got != 0 {
+				t.Fatalf("got sentReportStat.Value() = %d, want = 0", got)
+			}
+			clock.Advance(time.Hour)
+			if p, ok := e.Read(); ok {
+				t.Fatalf("sent unexpected packet, stack with disabled IGMP sent packet = %#v", p.Pkt)
+			}
+
+			// Inject a general query message. This should only trigger a report to be
+			// sent if the MGP was enabled.
+			test.rxQuery(e)
+			if got := test.receivedQueryStat(s).Value(); got != 1 {
+				t.Fatalf("got receivedQueryStat(_).Value() = %d, want = 1", got)
+			}
+			clock.Advance(time.Hour)
+			if p, ok := e.Read(); ok {
+				t.Fatalf("sent unexpected packet, stack with disabled IGMP sent packet = %+v", p.Pkt)
+			}
+		})
+	}
+}
+
+func TestMGPReceiveCounters(t *testing.T) {
+	tests := []struct {
+		name         string
+		headerType   uint8
+		maxRespTime  byte
+		groupAddress tcpip.Address
+		statCounter  func(*stack.Stack) *tcpip.StatCounter
+		rxMGPkt      func(*channel.Endpoint, byte, byte, tcpip.Address)
+	}{
+		{
+			name:         "IGMP Membership Query",
+			headerType:   igmpMembershipQuery,
+			maxRespTime:  unsolicitedIGMPReportIntervalMaxTenthSec,
+			groupAddress: header.IPv4Any,
+			statCounter: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsReceived.MembershipQuery
+			},
+			rxMGPkt: createAndInjectIGMPPacket,
+		},
+		{
+			name:         "IGMPv1 Membership Report",
+			headerType:   igmpv1MembershipReport,
+			maxRespTime:  0,
+			groupAddress: header.IPv4AllSystems,
+			statCounter: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsReceived.V1MembershipReport
+			},
+			rxMGPkt: createAndInjectIGMPPacket,
+		},
+		{
+			name:         "IGMPv2 Membership Report",
+			headerType:   igmpv2MembershipReport,
+			maxRespTime:  0,
+			groupAddress: header.IPv4AllSystems,
+			statCounter: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsReceived.V2MembershipReport
+			},
+			rxMGPkt: createAndInjectIGMPPacket,
+		},
+		{
+			name:         "IGMP Leave Group",
+			headerType:   igmpLeaveGroup,
+			maxRespTime:  0,
+			groupAddress: header.IPv4AllRoutersGroup,
+			statCounter: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsReceived.LeaveGroup
+			},
+			rxMGPkt: createAndInjectIGMPPacket,
+		},
+		{
+			name:         "MLD Query",
+			headerType:   mldQuery,
+			maxRespTime:  0,
+			groupAddress: header.IPv6Any,
+			statCounter: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsReceived.MulticastListenerQuery
+			},
+			rxMGPkt: createAndInjectMLDPacket,
+		},
+		{
+			name:         "MLD Report",
+			headerType:   mldReport,
+			maxRespTime:  0,
+			groupAddress: header.IPv6Any,
+			statCounter: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsReceived.MulticastListenerReport
+			},
+			rxMGPkt: createAndInjectMLDPacket,
+		},
+		{
+			name:         "MLD Done",
+			headerType:   mldDone,
+			maxRespTime:  0,
+			groupAddress: header.IPv6Any,
+			statCounter: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsReceived.MulticastListenerDone
+			},
+			rxMGPkt: createAndInjectMLDPacket,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			e, s, _ := createStack(t, len(test.groupAddress) == header.IPv4AddressSize /* v4 */, true /* mgpEnabled */)
+
+			test.rxMGPkt(e, test.headerType, test.maxRespTime, test.groupAddress)
+			if got := test.statCounter(s).Value(); got != 1 {
+				t.Fatalf("got %s received = %d, want = 1", test.name, got)
+			}
+		})
+	}
+}
+
+// TestMGPJoinGroup tests that when explicitly joining a multicast group, the
+// stack schedules and sends correct Membership Reports.
+func TestMGPJoinGroup(t *testing.T) {
+	tests := []struct {
+		name                        string
+		protoNum                    tcpip.NetworkProtocolNumber
+		multicastAddr               tcpip.Address
+		maxUnsolicitedResponseDelay time.Duration
+		sentReportStat              func(*stack.Stack) *tcpip.StatCounter
+		receivedQueryStat           func(*stack.Stack) *tcpip.StatCounter
+		validateReport              func(*testing.T, channel.PacketInfo)
+		checkInitialGroups          func(*testing.T, *channel.Endpoint, *stack.Stack, *faketime.ManualClock) (uint64, uint64)
+	}{
+		{
+			name:                        "IGMP",
+			protoNum:                    ipv4.ProtocolNumber,
+			multicastAddr:               ipv4MulticastAddr1,
+			maxUnsolicitedResponseDelay: ipv4.UnsolicitedReportIntervalMax,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.V2MembershipReport
+			},
+			receivedQueryStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsReceived.MembershipQuery
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateIGMPPacket(t, p, ipv4MulticastAddr1, igmpv2MembershipReport, 0, ipv4MulticastAddr1)
+			},
+		},
+		{
+			name:                        "MLD",
+			protoNum:                    ipv6.ProtocolNumber,
+			multicastAddr:               ipv6MulticastAddr1,
+			maxUnsolicitedResponseDelay: ipv6.UnsolicitedReportIntervalMax,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerReport
+			},
+			receivedQueryStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsReceived.MulticastListenerQuery
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateMLDPacket(t, p, ipv6MulticastAddr1, mldReport, 0, ipv6MulticastAddr1)
+			},
+			checkInitialGroups: checkInitialIPv6Groups,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			e, s, clock := createStack(t, test.protoNum == ipv4.ProtocolNumber /* v4 */, true /* mgpEnabled */)
+
+			var reportCounter uint64
+			if test.checkInitialGroups != nil {
+				reportCounter, _ = test.checkInitialGroups(t, e, s, clock)
+			}
+
+			// Test joining a specific address explicitly and verify a Report is sent
+			// immediately.
+			if err := s.JoinGroup(test.protoNum, nicID, test.multicastAddr); err != nil {
+				t.Fatalf("JoinGroup(%d, %d, %s): %s", test.protoNum, nicID, test.multicastAddr, err)
+			}
+			reportCounter++
+			sentReportStat := test.sentReportStat(s)
+			if got := sentReportStat.Value(); got != reportCounter {
+				t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); !ok {
+				t.Fatal("expected a report message to be sent")
+			} else {
+				test.validateReport(t, p)
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Verify the second report is sent by the maximum unsolicited response
+			// interval.
+			p, ok := e.Read()
+			if ok {
+				t.Fatalf("sent unexpected packet, expected report only after advancing the clock = %#v", p.Pkt)
+			}
+			clock.Advance(test.maxUnsolicitedResponseDelay)
+			reportCounter++
+			if got := sentReportStat.Value(); got != reportCounter {
+				t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); !ok {
+				t.Fatal("expected a report message to be sent")
+			} else {
+				test.validateReport(t, p)
+			}
+
+			// Should not send any more packets.
+			clock.Advance(time.Hour)
+			if p, ok := e.Read(); ok {
+				t.Fatalf("sent unexpected packet = %#v", p)
+			}
+		})
+	}
+}
+
+// TestMGPLeaveGroup tests that when leaving a previously joined multicast
+// group the stack sends a leave/done message.
+func TestMGPLeaveGroup(t *testing.T) {
+	tests := []struct {
+		name               string
+		protoNum           tcpip.NetworkProtocolNumber
+		multicastAddr      tcpip.Address
+		sentReportStat     func(*stack.Stack) *tcpip.StatCounter
+		sentLeaveStat      func(*stack.Stack) *tcpip.StatCounter
+		validateReport     func(*testing.T, channel.PacketInfo)
+		validateLeave      func(*testing.T, channel.PacketInfo)
+		checkInitialGroups func(*testing.T, *channel.Endpoint, *stack.Stack, *faketime.ManualClock) (uint64, uint64)
+	}{
+		{
+			name:          "IGMP",
+			protoNum:      ipv4.ProtocolNumber,
+			multicastAddr: ipv4MulticastAddr1,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.V2MembershipReport
+			},
+			sentLeaveStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.LeaveGroup
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateIGMPPacket(t, p, ipv4MulticastAddr1, igmpv2MembershipReport, 0, ipv4MulticastAddr1)
+			},
+			validateLeave: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateIGMPPacket(t, p, header.IPv4AllRoutersGroup, igmpLeaveGroup, 0, ipv4MulticastAddr1)
+			},
+		},
+		{
+			name:          "MLD",
+			protoNum:      ipv6.ProtocolNumber,
+			multicastAddr: ipv6MulticastAddr1,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerReport
+			},
+			sentLeaveStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerDone
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateMLDPacket(t, p, ipv6MulticastAddr1, mldReport, 0, ipv6MulticastAddr1)
+			},
+			validateLeave: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateMLDPacket(t, p, header.IPv6AllRoutersMulticastAddress, mldDone, 0, ipv6MulticastAddr1)
+			},
+			checkInitialGroups: checkInitialIPv6Groups,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			e, s, clock := createStack(t, test.protoNum == ipv4.ProtocolNumber /* v4 */, true /* mgpEnabled */)
+
+			var reportCounter uint64
+			var leaveCounter uint64
+			if test.checkInitialGroups != nil {
+				reportCounter, leaveCounter = test.checkInitialGroups(t, e, s, clock)
+			}
+
+			if err := s.JoinGroup(test.protoNum, nicID, test.multicastAddr); err != nil {
+				t.Fatalf("JoinGroup(%d, %d, %s): %s", test.protoNum, nicID, test.multicastAddr, err)
+			}
+			reportCounter++
+			if got := test.sentReportStat(s).Value(); got != reportCounter {
+				t.Errorf("got sentReportStat(_).Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); !ok {
+				t.Fatal("expected a report message to be sent")
+			} else {
+				test.validateReport(t, p)
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Leaving the group should trigger an leave/done message to be sent.
+			if err := s.LeaveGroup(test.protoNum, nicID, test.multicastAddr); err != nil {
+				t.Fatalf("LeaveGroup(%d, nic, %s): %s", test.protoNum, test.multicastAddr, err)
+			}
+			leaveCounter++
+			if got := test.sentLeaveStat(s).Value(); got != leaveCounter {
+				t.Fatalf("got sentLeaveStat(_).Value() = %d, want = %d", got, leaveCounter)
+			}
+			if p, ok := e.Read(); !ok {
+				t.Fatal("expected a leave message to be sent")
+			} else {
+				test.validateLeave(t, p)
+			}
+
+			// Should not send any more packets.
+			clock.Advance(time.Hour)
+			if p, ok := e.Read(); ok {
+				t.Fatalf("sent unexpected packet = %#v", p)
+			}
+		})
+	}
+}
+
+// TestMGPQueryMessages tests that a report is sent in response to query
+// messages.
+func TestMGPQueryMessages(t *testing.T) {
+	tests := []struct {
+		name                        string
+		protoNum                    tcpip.NetworkProtocolNumber
+		multicastAddr               tcpip.Address
+		maxUnsolicitedResponseDelay time.Duration
+		sentReportStat              func(*stack.Stack) *tcpip.StatCounter
+		receivedQueryStat           func(*stack.Stack) *tcpip.StatCounter
+		rxQuery                     func(*channel.Endpoint, uint8, tcpip.Address)
+		validateReport              func(*testing.T, channel.PacketInfo)
+		maxRespTimeToDuration       func(uint8) time.Duration
+		checkInitialGroups          func(*testing.T, *channel.Endpoint, *stack.Stack, *faketime.ManualClock) (uint64, uint64)
+	}{
+		{
+			name:                        "IGMP",
+			protoNum:                    ipv4.ProtocolNumber,
+			multicastAddr:               ipv4MulticastAddr1,
+			maxUnsolicitedResponseDelay: ipv4.UnsolicitedReportIntervalMax,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.V2MembershipReport
+			},
+			receivedQueryStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsReceived.MembershipQuery
+			},
+			rxQuery: func(e *channel.Endpoint, maxRespTime uint8, groupAddress tcpip.Address) {
+				createAndInjectIGMPPacket(e, igmpMembershipQuery, maxRespTime, groupAddress)
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateIGMPPacket(t, p, ipv4MulticastAddr1, igmpv2MembershipReport, 0, ipv4MulticastAddr1)
+			},
+			maxRespTimeToDuration: header.DecisecondToDuration,
+		},
+		{
+			name:                        "MLD",
+			protoNum:                    ipv6.ProtocolNumber,
+			multicastAddr:               ipv6MulticastAddr1,
+			maxUnsolicitedResponseDelay: ipv6.UnsolicitedReportIntervalMax,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerReport
+			},
+			receivedQueryStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsReceived.MulticastListenerQuery
+			},
+			rxQuery: func(e *channel.Endpoint, maxRespTime uint8, groupAddress tcpip.Address) {
+				createAndInjectMLDPacket(e, mldQuery, maxRespTime, groupAddress)
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateMLDPacket(t, p, ipv6MulticastAddr1, mldReport, 0, ipv6MulticastAddr1)
+			},
+			maxRespTimeToDuration: func(d uint8) time.Duration {
+				return time.Duration(d) * time.Millisecond
+			},
+			checkInitialGroups: checkInitialIPv6Groups,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			subTests := []struct {
+				name          string
+				multicastAddr tcpip.Address
+				expectReport  bool
+			}{
+				{
+					name:          "Unspecified",
+					multicastAddr: tcpip.Address(strings.Repeat("\x00", len(test.multicastAddr))),
+					expectReport:  true,
+				},
+				{
+					name:          "Specified",
+					multicastAddr: test.multicastAddr,
+					expectReport:  true,
+				},
+				{
+					name: "Specified other address",
+					multicastAddr: func() tcpip.Address {
+						addrBytes := []byte(test.multicastAddr)
+						addrBytes[len(addrBytes)-1]++
+						return tcpip.Address(addrBytes)
+					}(),
+					expectReport: false,
+				},
+			}
+
+			for _, subTest := range subTests {
+				t.Run(subTest.name, func(t *testing.T) {
+					e, s, clock := createStack(t, test.protoNum == ipv4.ProtocolNumber /* v4 */, true /* mgpEnabled */)
+
+					var reportCounter uint64
+					if test.checkInitialGroups != nil {
+						reportCounter, _ = test.checkInitialGroups(t, e, s, clock)
+					}
+
+					if err := s.JoinGroup(test.protoNum, nicID, test.multicastAddr); err != nil {
+						t.Fatalf("JoinGroup(%d, %d, %s): %s", test.protoNum, nicID, test.multicastAddr, err)
+					}
+					sentReportStat := test.sentReportStat(s)
+					for i := 0; i < maxUnsolicitedReports; i++ {
+						sentReportStat := test.sentReportStat(s)
+						reportCounter++
+						if got := sentReportStat.Value(); got != reportCounter {
+							t.Errorf("(i=%d) got sentReportStat.Value() = %d, want = %d", i, got, reportCounter)
+						}
+						if p, ok := e.Read(); !ok {
+							t.Fatalf("expected %d-th report message to be sent", i)
+						} else {
+							test.validateReport(t, p)
+						}
+						clock.Advance(test.maxUnsolicitedResponseDelay)
+					}
+					if t.Failed() {
+						t.FailNow()
+					}
+
+					// Should not send any more packets until a query.
+					clock.Advance(time.Hour)
+					if p, ok := e.Read(); ok {
+						t.Fatalf("sent unexpected packet = %#v", p)
+					}
+
+					// Receive a query message which should trigger a report to be sent at
+					// some time before the maximum response time if the report is
+					// targeted at the host.
+					const maxRespTime = 100
+					test.rxQuery(e, maxRespTime, subTest.multicastAddr)
+					if p, ok := e.Read(); ok {
+						t.Fatalf("sent unexpected packet = %#v", p.Pkt)
+					}
+
+					if subTest.expectReport {
+						clock.Advance(test.maxRespTimeToDuration(maxRespTime))
+						reportCounter++
+						if got := sentReportStat.Value(); got != reportCounter {
+							t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+						}
+						if p, ok := e.Read(); !ok {
+							t.Fatal("expected a report message to be sent")
+						} else {
+							test.validateReport(t, p)
+						}
+					}
+
+					// Should not send any more packets.
+					clock.Advance(time.Hour)
+					if p, ok := e.Read(); ok {
+						t.Fatalf("sent unexpected packet = %#v", p)
+					}
+				})
+			}
+		})
+	}
+}
+
+// TestMGPQueryMessages tests that no further reports or leave/done messages
+// are sent after receiving a report.
+func TestMGPReportMessages(t *testing.T) {
+	tests := []struct {
+		name                  string
+		protoNum              tcpip.NetworkProtocolNumber
+		multicastAddr         tcpip.Address
+		sentReportStat        func(*stack.Stack) *tcpip.StatCounter
+		sentLeaveStat         func(*stack.Stack) *tcpip.StatCounter
+		rxReport              func(*channel.Endpoint)
+		validateReport        func(*testing.T, channel.PacketInfo)
+		maxRespTimeToDuration func(uint8) time.Duration
+		checkInitialGroups    func(*testing.T, *channel.Endpoint, *stack.Stack, *faketime.ManualClock) (uint64, uint64)
+	}{
+		{
+			name:          "IGMP",
+			protoNum:      ipv4.ProtocolNumber,
+			multicastAddr: ipv4MulticastAddr1,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.V2MembershipReport
+			},
+			sentLeaveStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.LeaveGroup
+			},
+			rxReport: func(e *channel.Endpoint) {
+				createAndInjectIGMPPacket(e, igmpv2MembershipReport, 0, ipv4MulticastAddr1)
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateIGMPPacket(t, p, ipv4MulticastAddr1, igmpv2MembershipReport, 0, ipv4MulticastAddr1)
+			},
+			maxRespTimeToDuration: header.DecisecondToDuration,
+		},
+		{
+			name:          "MLD",
+			protoNum:      ipv6.ProtocolNumber,
+			multicastAddr: ipv6MulticastAddr1,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerReport
+			},
+			sentLeaveStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerDone
+			},
+			rxReport: func(e *channel.Endpoint) {
+				createAndInjectMLDPacket(e, mldReport, 0, ipv6MulticastAddr1)
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo) {
+				t.Helper()
+
+				validateMLDPacket(t, p, ipv6MulticastAddr1, mldReport, 0, ipv6MulticastAddr1)
+			},
+			maxRespTimeToDuration: func(d uint8) time.Duration {
+				return time.Duration(d) * time.Millisecond
+			},
+			checkInitialGroups: checkInitialIPv6Groups,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			e, s, clock := createStack(t, test.protoNum == ipv4.ProtocolNumber /* v4 */, true /* mgpEnabled */)
+
+			var reportCounter uint64
+			var leaveCounter uint64
+			if test.checkInitialGroups != nil {
+				reportCounter, leaveCounter = test.checkInitialGroups(t, e, s, clock)
+			}
+
+			if err := s.JoinGroup(test.protoNum, nicID, test.multicastAddr); err != nil {
+				t.Fatalf("JoinGroup(%d, %d, %s): %s", test.protoNum, nicID, test.multicastAddr, err)
+			}
+			sentReportStat := test.sentReportStat(s)
+			reportCounter++
+			if got := sentReportStat.Value(); got != reportCounter {
+				t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); !ok {
+				t.Fatal("expected a report message to be sent")
+			} else {
+				test.validateReport(t, p)
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Receiving a report for a group we joined should cancel any further
+			// reports.
+			test.rxReport(e)
+			clock.Advance(time.Hour)
+			if got := sentReportStat.Value(); got != reportCounter {
+				t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); ok {
+				t.Errorf("sent unexpected packet = %#v", p)
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Leaving a group after getting a report should not send a leave/done
+			// message.
+			if err := s.LeaveGroup(test.protoNum, nicID, test.multicastAddr); err != nil {
+				t.Fatalf("LeaveGroup(%d, nic, %s): %s", test.protoNum, test.multicastAddr, err)
+			}
+			clock.Advance(time.Hour)
+			if got := test.sentLeaveStat(s).Value(); got != leaveCounter {
+				t.Fatalf("got sentLeaveStat(_).Value() = %d, want = %d", got, leaveCounter)
+			}
+
+			// Should not send any more packets.
+			clock.Advance(time.Hour)
+			if p, ok := e.Read(); ok {
+				t.Fatalf("sent unexpected packet = %#v", p)
+			}
+		})
+	}
+}
+
+func TestMGPWithNICLifecycle(t *testing.T) {
+	tests := []struct {
+		name                        string
+		protoNum                    tcpip.NetworkProtocolNumber
+		multicastAddrs              []tcpip.Address
+		finalMulticastAddr          tcpip.Address
+		maxUnsolicitedResponseDelay time.Duration
+		sentReportStat              func(*stack.Stack) *tcpip.StatCounter
+		sentLeaveStat               func(*stack.Stack) *tcpip.StatCounter
+		validateReport              func(*testing.T, channel.PacketInfo, tcpip.Address)
+		validateLeave               func(*testing.T, channel.PacketInfo, tcpip.Address)
+		getAndCheckGroupAddress     func(*testing.T, map[tcpip.Address]bool, channel.PacketInfo) tcpip.Address
+		checkInitialGroups          func(*testing.T, *channel.Endpoint, *stack.Stack, *faketime.ManualClock) (uint64, uint64)
+	}{
+		{
+			name:                        "IGMP",
+			protoNum:                    ipv4.ProtocolNumber,
+			multicastAddrs:              []tcpip.Address{ipv4MulticastAddr1, ipv4MulticastAddr2},
+			finalMulticastAddr:          ipv4MulticastAddr3,
+			maxUnsolicitedResponseDelay: ipv4.UnsolicitedReportIntervalMax,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.V2MembershipReport
+			},
+			sentLeaveStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.LeaveGroup
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo, addr tcpip.Address) {
+				t.Helper()
+
+				validateIGMPPacket(t, p, addr, igmpv2MembershipReport, 0, addr)
+			},
+			validateLeave: func(t *testing.T, p channel.PacketInfo, addr tcpip.Address) {
+				t.Helper()
+
+				validateIGMPPacket(t, p, header.IPv4AllRoutersGroup, igmpLeaveGroup, 0, addr)
+			},
+			getAndCheckGroupAddress: func(t *testing.T, seen map[tcpip.Address]bool, p channel.PacketInfo) tcpip.Address {
+				t.Helper()
+
+				ipv4 := header.IPv4(stack.PayloadSince(p.Pkt.NetworkHeader()))
+				if got := tcpip.TransportProtocolNumber(ipv4.Protocol()); got != header.IGMPProtocolNumber {
+					t.Fatalf("got ipv4.Protocol() = %d, want = %d", got, header.IGMPProtocolNumber)
+				}
+				addr := header.IGMP(ipv4.Payload()).GroupAddress()
+				s, ok := seen[addr]
+				if !ok {
+					t.Fatalf("unexpectedly got a packet for group %s", addr)
+				}
+				if s {
+					t.Fatalf("already saw packet for group %s", addr)
+				}
+				seen[addr] = true
+				return addr
+			},
+		},
+		{
+			name:                        "MLD",
+			protoNum:                    ipv6.ProtocolNumber,
+			multicastAddrs:              []tcpip.Address{ipv6MulticastAddr1, ipv6MulticastAddr2},
+			finalMulticastAddr:          ipv6MulticastAddr3,
+			maxUnsolicitedResponseDelay: ipv6.UnsolicitedReportIntervalMax,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerReport
+			},
+			sentLeaveStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerDone
+			},
+			validateReport: func(t *testing.T, p channel.PacketInfo, addr tcpip.Address) {
+				t.Helper()
+
+				validateMLDPacket(t, p, addr, mldReport, 0, addr)
+			},
+			validateLeave: func(t *testing.T, p channel.PacketInfo, addr tcpip.Address) {
+				t.Helper()
+
+				validateMLDPacket(t, p, header.IPv6AllRoutersMulticastAddress, mldDone, 0, addr)
+			},
+			getAndCheckGroupAddress: func(t *testing.T, seen map[tcpip.Address]bool, p channel.PacketInfo) tcpip.Address {
+				t.Helper()
+
+				ipv6 := header.IPv6(stack.PayloadSince(p.Pkt.NetworkHeader()))
+
+				ipv6HeaderIter := header.MakeIPv6PayloadIterator(
+					header.IPv6ExtensionHeaderIdentifier(ipv6.NextHeader()),
+					buffer.View(ipv6.Payload()).ToVectorisedView(),
+				)
+
+				var transport header.IPv6RawPayloadHeader
+				for {
+					h, done, err := ipv6HeaderIter.Next()
+					if err != nil {
+						t.Fatalf("ipv6HeaderIter.Next(): %s", err)
+					}
+					if done {
+						t.Fatalf("ipv6HeaderIter.Next() = (%T, %t, _), want = (_, false, _)", h, done)
+					}
+					if t, ok := h.(header.IPv6RawPayloadHeader); ok {
+						transport = t
+						break
+					}
+				}
+
+				if got := tcpip.TransportProtocolNumber(transport.Identifier); got != header.ICMPv6ProtocolNumber {
+					t.Fatalf("got ipv6.NextHeader() = %d, want = %d", got, header.ICMPv6ProtocolNumber)
+				}
+				icmpv6 := header.ICMPv6(transport.Buf.ToView())
+				if got := icmpv6.Type(); got != header.ICMPv6MulticastListenerReport && got != header.ICMPv6MulticastListenerDone {
+					t.Fatalf("got icmpv6.Type() = %d, want = %d or %d", got, header.ICMPv6MulticastListenerReport, header.ICMPv6MulticastListenerDone)
+				}
+				addr := header.MLD(icmpv6.MessageBody()).MulticastAddress()
+				s, ok := seen[addr]
+				if !ok {
+					t.Fatalf("unexpectedly got a packet for group %s", addr)
+				}
+				if s {
+					t.Fatalf("already saw packet for group %s", addr)
+				}
+				seen[addr] = true
+				return addr
+			},
+			checkInitialGroups: checkInitialIPv6Groups,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			e, s, clock := createStack(t, test.protoNum == ipv4.ProtocolNumber /* v4 */, true /* mgpEnabled */)
+
+			var reportCounter uint64
+			var leaveCounter uint64
+			if test.checkInitialGroups != nil {
+				reportCounter, leaveCounter = test.checkInitialGroups(t, e, s, clock)
+			}
+
+			sentReportStat := test.sentReportStat(s)
+			for _, a := range test.multicastAddrs {
+				if err := s.JoinGroup(test.protoNum, nicID, a); err != nil {
+					t.Fatalf("JoinGroup(%d, %d, %s): %s", test.protoNum, nicID, a, err)
+				}
+				reportCounter++
+				if got := sentReportStat.Value(); got != reportCounter {
+					t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+				}
+				if p, ok := e.Read(); !ok {
+					t.Fatalf("expected a report message to be sent for %s", a)
+				} else {
+					test.validateReport(t, p, a)
+				}
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Leave messages should be sent for the joined groups when the NIC is
+			// disabled.
+			if err := s.DisableNIC(nicID); err != nil {
+				t.Fatalf("DisableNIC(%d): %s", nicID, err)
+			}
+			sentLeaveStat := test.sentLeaveStat(s)
+			leaveCounter += uint64(len(test.multicastAddrs))
+			if got := sentLeaveStat.Value(); got != leaveCounter {
+				t.Errorf("got sentLeaveStat.Value() = %d, want = %d", got, leaveCounter)
+			}
+			{
+				seen := make(map[tcpip.Address]bool)
+				for _, a := range test.multicastAddrs {
+					seen[a] = false
+				}
+
+				for i, _ := range test.multicastAddrs {
+					p, ok := e.Read()
+					if !ok {
+						t.Fatalf("expected (%d-th) leave message to be sent", i)
+					}
+
+					test.validateLeave(t, p, test.getAndCheckGroupAddress(t, seen, p))
+				}
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Reports should be sent for the joined groups when the NIC is enabled.
+			if err := s.EnableNIC(nicID); err != nil {
+				t.Fatalf("EnableNIC(%d): %s", nicID, err)
+			}
+			reportCounter += uint64(len(test.multicastAddrs))
+			if got := sentReportStat.Value(); got != reportCounter {
+				t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			{
+				seen := make(map[tcpip.Address]bool)
+				for _, a := range test.multicastAddrs {
+					seen[a] = false
+				}
+
+				for i, _ := range test.multicastAddrs {
+					p, ok := e.Read()
+					if !ok {
+						t.Fatalf("expected (%d-th) report message to be sent", i)
+					}
+
+					test.validateReport(t, p, test.getAndCheckGroupAddress(t, seen, p))
+				}
+			}
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Joining/leaving a group while disabled should not send any messages.
+			if err := s.DisableNIC(nicID); err != nil {
+				t.Fatalf("DisableNIC(%d): %s", nicID, err)
+			}
+			leaveCounter += uint64(len(test.multicastAddrs))
+			if got := sentLeaveStat.Value(); got != leaveCounter {
+				t.Errorf("got sentLeaveStat.Value() = %d, want = %d", got, leaveCounter)
+			}
+			for i, _ := range test.multicastAddrs {
+				if _, ok := e.Read(); !ok {
+					t.Fatalf("expected (%d-th) leave message to be sent", i)
+				}
+			}
+			for _, a := range test.multicastAddrs {
+				if err := s.LeaveGroup(test.protoNum, nicID, a); err != nil {
+					t.Fatalf("LeaveGroup(%d, nic, %s): %s", test.protoNum, a, err)
+				}
+				if got := sentLeaveStat.Value(); got != leaveCounter {
+					t.Errorf("got sentLeaveStat.Value() = %d, want = %d", got, leaveCounter)
+				}
+				if p, ok := e.Read(); ok {
+					t.Fatalf("leaving group %s on disabled NIC sent unexpected packet = %#v", a, p.Pkt)
+				}
+			}
+			if err := s.JoinGroup(test.protoNum, nicID, test.finalMulticastAddr); err != nil {
+				t.Fatalf("JoinGroup(%d, %d, %s): %s", test.protoNum, nicID, test.finalMulticastAddr, err)
+			}
+			if got := sentReportStat.Value(); got != reportCounter {
+				t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); ok {
+				t.Fatalf("joining group %s on disabled NIC sent unexpected packet = %#v", test.finalMulticastAddr, p.Pkt)
+			}
+
+			// A report should only be sent for the group we last joined after
+			// enabling the NIC since the original groups were all left.
+			if err := s.EnableNIC(nicID); err != nil {
+				t.Fatalf("EnableNIC(%d): %s", nicID, err)
+			}
+			reportCounter++
+			if got := sentReportStat.Value(); got != reportCounter {
+				t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); !ok {
+				t.Fatal("expected a report message to be sent")
+			} else {
+				test.validateReport(t, p, test.finalMulticastAddr)
+			}
+
+			clock.Advance(test.maxUnsolicitedResponseDelay)
+			reportCounter++
+			if got := sentReportStat.Value(); got != reportCounter {
+				t.Errorf("got sentReportStat.Value() = %d, want = %d", got, reportCounter)
+			}
+			if p, ok := e.Read(); !ok {
+				t.Fatal("expected a report message to be sent")
+			} else {
+				test.validateReport(t, p, test.finalMulticastAddr)
+			}
+
+			// Should not send any more packets.
+			clock.Advance(time.Hour)
+			if p, ok := e.Read(); ok {
+				t.Fatalf("sent unexpected packet = %#v", p)
+			}
+		})
+	}
+}
+
+// TestMGPDisabledOnLoopback tests that the multicast group protocol is not
+// performed on loopback interfaces since they have no neighbours.
+func TestMGPDisabledOnLoopback(t *testing.T) {
+	tests := []struct {
+		name           string
+		protoNum       tcpip.NetworkProtocolNumber
+		multicastAddr  tcpip.Address
+		sentReportStat func(*stack.Stack) *tcpip.StatCounter
+	}{
+		{
+			name:          "IGMP",
+			protoNum:      ipv4.ProtocolNumber,
+			multicastAddr: ipv4MulticastAddr1,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().IGMP.PacketsSent.V2MembershipReport
+			},
+		},
+		{
+			name:          "MLD",
+			protoNum:      ipv6.ProtocolNumber,
+			multicastAddr: ipv6MulticastAddr1,
+			sentReportStat: func(s *stack.Stack) *tcpip.StatCounter {
+				return s.Stats().ICMP.V6.PacketsSent.MulticastListenerReport
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s, clock := createStackWithLinkEndpoint(t, test.protoNum == ipv4.ProtocolNumber /* v4 */, true /* mgpEnabled */, loopback.New())
+
+			sentReportStat := test.sentReportStat(s)
+			if got := sentReportStat.Value(); got != 0 {
+				t.Fatalf("got sentReportStat.Value() = %d, want = 0", got)
+			}
+			clock.Advance(time.Hour)
+			if got := sentReportStat.Value(); got != 0 {
+				t.Fatalf("got sentReportStat.Value() = %d, want = 0", got)
+			}
+
+			// Test joining a specific group explicitly and verify that no reports are
+			// sent.
+			if err := s.JoinGroup(test.protoNum, nicID, test.multicastAddr); err != nil {
+				t.Fatalf("JoinGroup(%d, %d, %s): %s", test.protoNum, nicID, test.multicastAddr, err)
+			}
+			if got := sentReportStat.Value(); got != 0 {
+				t.Fatalf("got sentReportStat.Value() = %d, want = 0", got)
+			}
+			clock.Advance(time.Hour)
+			if got := sentReportStat.Value(); got != 0 {
+				t.Fatalf("got sentReportStat.Value() = %d, want = 0", got)
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/network/testutil/testutil.go b/pkg/tcpip/network/testutil/testutil.go
index 7cc52985e..5c3363759 100644
--- a/pkg/tcpip/network/testutil/testutil.go
+++ b/pkg/tcpip/network/testutil/testutil.go
@@ -85,21 +85,6 @@ func (ep *MockLinkEndpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts st
 	return n, nil
 }
 
-// WriteRawPacket implements LinkEndpoint.WriteRawPacket.
-func (ep *MockLinkEndpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	if ep.allowPackets == 0 {
-		return ep.err
-	}
-	ep.allowPackets--
-
-	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
-		Data: vv,
-	})
-	ep.WrittenPackets = append(ep.WrittenPackets, pkt)
-
-	return nil
-}
-
 // Attach implements LinkEndpoint.Attach.
 func (*MockLinkEndpoint) Attach(stack.NetworkDispatcher) {}
 
diff --git a/pkg/tcpip/sample/tun_tcp_echo/main.go b/pkg/tcpip/sample/tun_tcp_echo/main.go
index 8e0ee1cd7..1c2afd554 100644
--- a/pkg/tcpip/sample/tun_tcp_echo/main.go
+++ b/pkg/tcpip/sample/tun_tcp_echo/main.go
@@ -148,10 +148,6 @@ func main() {
 		log.Fatal(err)
 	}
 
-	if err := s.AddAddress(1, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-		log.Fatal(err)
-	}
-
 	subnet, err := tcpip.NewSubnet(tcpip.Address(strings.Repeat("\x00", len(addr))), tcpip.AddressMask(strings.Repeat("\x00", len(addr))))
 	if err != nil {
 		log.Fatal(err)
diff --git a/pkg/tcpip/socketops.go b/pkg/tcpip/socketops.go
new file mode 100644
index 000000000..b60a5fd76
--- /dev/null
+++ b/pkg/tcpip/socketops.go
@@ -0,0 +1,364 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcpip
+
+import (
+	"sync/atomic"
+
+	"gvisor.dev/gvisor/pkg/sync"
+)
+
+// SocketOptionsHandler holds methods that help define endpoint specific
+// behavior for socket level socket options. These must be implemented by
+// endpoints to get notified when socket level options are set.
+type SocketOptionsHandler interface {
+	// OnReuseAddressSet is invoked when SO_REUSEADDR is set for an endpoint.
+	OnReuseAddressSet(v bool)
+
+	// OnReusePortSet is invoked when SO_REUSEPORT is set for an endpoint.
+	OnReusePortSet(v bool)
+
+	// OnKeepAliveSet is invoked when SO_KEEPALIVE is set for an endpoint.
+	OnKeepAliveSet(v bool)
+
+	// OnDelayOptionSet is invoked when TCP_NODELAY is set for an endpoint.
+	// Note that v will be the inverse of TCP_NODELAY option.
+	OnDelayOptionSet(v bool)
+
+	// OnCorkOptionSet is invoked when TCP_CORK is set for an endpoint.
+	OnCorkOptionSet(v bool)
+
+	// LastError is invoked when SO_ERROR is read for an endpoint.
+	LastError() *Error
+}
+
+// DefaultSocketOptionsHandler is an embeddable type that implements no-op
+// implementations for SocketOptionsHandler methods.
+type DefaultSocketOptionsHandler struct{}
+
+var _ SocketOptionsHandler = (*DefaultSocketOptionsHandler)(nil)
+
+// OnReuseAddressSet implements SocketOptionsHandler.OnReuseAddressSet.
+func (*DefaultSocketOptionsHandler) OnReuseAddressSet(bool) {}
+
+// OnReusePortSet implements SocketOptionsHandler.OnReusePortSet.
+func (*DefaultSocketOptionsHandler) OnReusePortSet(bool) {}
+
+// OnKeepAliveSet implements SocketOptionsHandler.OnKeepAliveSet.
+func (*DefaultSocketOptionsHandler) OnKeepAliveSet(bool) {}
+
+// OnDelayOptionSet implements SocketOptionsHandler.OnDelayOptionSet.
+func (*DefaultSocketOptionsHandler) OnDelayOptionSet(bool) {}
+
+// OnCorkOptionSet implements SocketOptionsHandler.OnCorkOptionSet.
+func (*DefaultSocketOptionsHandler) OnCorkOptionSet(bool) {}
+
+// LastError implements SocketOptionsHandler.LastError.
+func (*DefaultSocketOptionsHandler) LastError() *Error {
+	return nil
+}
+
+// SocketOptions contains all the variables which store values for SOL_SOCKET,
+// SOL_IP, SOL_IPV6 and SOL_TCP level options.
+//
+// +stateify savable
+type SocketOptions struct {
+	handler SocketOptionsHandler
+
+	// These fields are accessed and modified using atomic operations.
+
+	// broadcastEnabled determines whether datagram sockets are allowed to
+	// send packets to a broadcast address.
+	broadcastEnabled uint32
+
+	// passCredEnabled determines whether SCM_CREDENTIALS socket control
+	// messages are enabled.
+	passCredEnabled uint32
+
+	// noChecksumEnabled determines whether UDP checksum is disabled while
+	// transmitting for this socket.
+	noChecksumEnabled uint32
+
+	// reuseAddressEnabled determines whether Bind() should allow reuse of
+	// local address.
+	reuseAddressEnabled uint32
+
+	// reusePortEnabled determines whether to permit multiple sockets to be
+	// bound to an identical socket address.
+	reusePortEnabled uint32
+
+	// keepAliveEnabled determines whether TCP keepalive is enabled for this
+	// socket.
+	keepAliveEnabled uint32
+
+	// multicastLoopEnabled determines whether multicast packets sent over a
+	// non-loopback interface will be looped back. Analogous to inet->mc_loop.
+	multicastLoopEnabled uint32
+
+	// receiveTOSEnabled is used to specify if the TOS ancillary message is
+	// passed with incoming packets.
+	receiveTOSEnabled uint32
+
+	// receiveTClassEnabled is used to specify if the IPV6_TCLASS ancillary
+	// message is passed with incoming packets.
+	receiveTClassEnabled uint32
+
+	// receivePacketInfoEnabled is used to specify if more inforamtion is
+	// provided with incoming packets such as interface index and address.
+	receivePacketInfoEnabled uint32
+
+	// hdrIncludeEnabled is used to indicate for a raw endpoint that all packets
+	// being written have an IP header and the endpoint should not attach an IP
+	// header.
+	hdrIncludedEnabled uint32
+
+	// v6OnlyEnabled is used to determine whether an IPv6 socket is to be
+	// restricted to sending and receiving IPv6 packets only.
+	v6OnlyEnabled uint32
+
+	// quickAckEnabled is used to represent the value of TCP_QUICKACK option.
+	// It currently does not have any effect on the TCP endpoint.
+	quickAckEnabled uint32
+
+	// delayOptionEnabled is used to specify if data should be sent out immediately
+	// by the transport protocol. For TCP, it determines if the Nagle algorithm
+	// is on or off.
+	delayOptionEnabled uint32
+
+	// corkOptionEnabled is used to specify if data should be held until segments
+	// are full by the TCP transport protocol.
+	corkOptionEnabled uint32
+
+	// receiveOriginalDstAddress is used to specify if the original destination of
+	// the incoming packet should be returned as an ancillary message.
+	receiveOriginalDstAddress uint32
+
+	// mu protects the access to the below fields.
+	mu sync.Mutex `state:"nosave"`
+
+	// linger determines the amount of time the socket should linger before
+	// close. We currently implement this option for TCP socket only.
+	linger LingerOption
+}
+
+// InitHandler initializes the handler. This must be called before using the
+// socket options utility.
+func (so *SocketOptions) InitHandler(handler SocketOptionsHandler) {
+	so.handler = handler
+}
+
+func storeAtomicBool(addr *uint32, v bool) {
+	var val uint32
+	if v {
+		val = 1
+	}
+	atomic.StoreUint32(addr, val)
+}
+
+// GetBroadcast gets value for SO_BROADCAST option.
+func (so *SocketOptions) GetBroadcast() bool {
+	return atomic.LoadUint32(&so.broadcastEnabled) != 0
+}
+
+// SetBroadcast sets value for SO_BROADCAST option.
+func (so *SocketOptions) SetBroadcast(v bool) {
+	storeAtomicBool(&so.broadcastEnabled, v)
+}
+
+// GetPassCred gets value for SO_PASSCRED option.
+func (so *SocketOptions) GetPassCred() bool {
+	return atomic.LoadUint32(&so.passCredEnabled) != 0
+}
+
+// SetPassCred sets value for SO_PASSCRED option.
+func (so *SocketOptions) SetPassCred(v bool) {
+	storeAtomicBool(&so.passCredEnabled, v)
+}
+
+// GetNoChecksum gets value for SO_NO_CHECK option.
+func (so *SocketOptions) GetNoChecksum() bool {
+	return atomic.LoadUint32(&so.noChecksumEnabled) != 0
+}
+
+// SetNoChecksum sets value for SO_NO_CHECK option.
+func (so *SocketOptions) SetNoChecksum(v bool) {
+	storeAtomicBool(&so.noChecksumEnabled, v)
+}
+
+// GetReuseAddress gets value for SO_REUSEADDR option.
+func (so *SocketOptions) GetReuseAddress() bool {
+	return atomic.LoadUint32(&so.reuseAddressEnabled) != 0
+}
+
+// SetReuseAddress sets value for SO_REUSEADDR option.
+func (so *SocketOptions) SetReuseAddress(v bool) {
+	storeAtomicBool(&so.reuseAddressEnabled, v)
+	so.handler.OnReuseAddressSet(v)
+}
+
+// GetReusePort gets value for SO_REUSEPORT option.
+func (so *SocketOptions) GetReusePort() bool {
+	return atomic.LoadUint32(&so.reusePortEnabled) != 0
+}
+
+// SetReusePort sets value for SO_REUSEPORT option.
+func (so *SocketOptions) SetReusePort(v bool) {
+	storeAtomicBool(&so.reusePortEnabled, v)
+	so.handler.OnReusePortSet(v)
+}
+
+// GetKeepAlive gets value for SO_KEEPALIVE option.
+func (so *SocketOptions) GetKeepAlive() bool {
+	return atomic.LoadUint32(&so.keepAliveEnabled) != 0
+}
+
+// SetKeepAlive sets value for SO_KEEPALIVE option.
+func (so *SocketOptions) SetKeepAlive(v bool) {
+	storeAtomicBool(&so.keepAliveEnabled, v)
+	so.handler.OnKeepAliveSet(v)
+}
+
+// GetMulticastLoop gets value for IP_MULTICAST_LOOP option.
+func (so *SocketOptions) GetMulticastLoop() bool {
+	return atomic.LoadUint32(&so.multicastLoopEnabled) != 0
+}
+
+// SetMulticastLoop sets value for IP_MULTICAST_LOOP option.
+func (so *SocketOptions) SetMulticastLoop(v bool) {
+	storeAtomicBool(&so.multicastLoopEnabled, v)
+}
+
+// GetReceiveTOS gets value for IP_RECVTOS option.
+func (so *SocketOptions) GetReceiveTOS() bool {
+	return atomic.LoadUint32(&so.receiveTOSEnabled) != 0
+}
+
+// SetReceiveTOS sets value for IP_RECVTOS option.
+func (so *SocketOptions) SetReceiveTOS(v bool) {
+	storeAtomicBool(&so.receiveTOSEnabled, v)
+}
+
+// GetReceiveTClass gets value for IPV6_RECVTCLASS option.
+func (so *SocketOptions) GetReceiveTClass() bool {
+	return atomic.LoadUint32(&so.receiveTClassEnabled) != 0
+}
+
+// SetReceiveTClass sets value for IPV6_RECVTCLASS option.
+func (so *SocketOptions) SetReceiveTClass(v bool) {
+	storeAtomicBool(&so.receiveTClassEnabled, v)
+}
+
+// GetReceivePacketInfo gets value for IP_PKTINFO option.
+func (so *SocketOptions) GetReceivePacketInfo() bool {
+	return atomic.LoadUint32(&so.receivePacketInfoEnabled) != 0
+}
+
+// SetReceivePacketInfo sets value for IP_PKTINFO option.
+func (so *SocketOptions) SetReceivePacketInfo(v bool) {
+	storeAtomicBool(&so.receivePacketInfoEnabled, v)
+}
+
+// GetHeaderIncluded gets value for IP_HDRINCL option.
+func (so *SocketOptions) GetHeaderIncluded() bool {
+	return atomic.LoadUint32(&so.hdrIncludedEnabled) != 0
+}
+
+// SetHeaderIncluded sets value for IP_HDRINCL option.
+func (so *SocketOptions) SetHeaderIncluded(v bool) {
+	storeAtomicBool(&so.hdrIncludedEnabled, v)
+}
+
+// GetV6Only gets value for IPV6_V6ONLY option.
+func (so *SocketOptions) GetV6Only() bool {
+	return atomic.LoadUint32(&so.v6OnlyEnabled) != 0
+}
+
+// SetV6Only sets value for IPV6_V6ONLY option.
+//
+// Preconditions: the backing TCP or UDP endpoint must be in initial state.
+func (so *SocketOptions) SetV6Only(v bool) {
+	storeAtomicBool(&so.v6OnlyEnabled, v)
+}
+
+// GetQuickAck gets value for TCP_QUICKACK option.
+func (so *SocketOptions) GetQuickAck() bool {
+	return atomic.LoadUint32(&so.quickAckEnabled) != 0
+}
+
+// SetQuickAck sets value for TCP_QUICKACK option.
+func (so *SocketOptions) SetQuickAck(v bool) {
+	storeAtomicBool(&so.quickAckEnabled, v)
+}
+
+// GetDelayOption gets inverted value for TCP_NODELAY option.
+func (so *SocketOptions) GetDelayOption() bool {
+	return atomic.LoadUint32(&so.delayOptionEnabled) != 0
+}
+
+// SetDelayOption sets inverted value for TCP_NODELAY option.
+func (so *SocketOptions) SetDelayOption(v bool) {
+	storeAtomicBool(&so.delayOptionEnabled, v)
+	so.handler.OnDelayOptionSet(v)
+}
+
+// GetCorkOption gets value for TCP_CORK option.
+func (so *SocketOptions) GetCorkOption() bool {
+	return atomic.LoadUint32(&so.corkOptionEnabled) != 0
+}
+
+// SetCorkOption sets value for TCP_CORK option.
+func (so *SocketOptions) SetCorkOption(v bool) {
+	storeAtomicBool(&so.corkOptionEnabled, v)
+	so.handler.OnCorkOptionSet(v)
+}
+
+// GetReceiveOriginalDstAddress gets value for IP(V6)_RECVORIGDSTADDR option.
+func (so *SocketOptions) GetReceiveOriginalDstAddress() bool {
+	return atomic.LoadUint32(&so.receiveOriginalDstAddress) != 0
+}
+
+// SetReceiveOriginalDstAddress sets value for IP(V6)_RECVORIGDSTADDR option.
+func (so *SocketOptions) SetReceiveOriginalDstAddress(v bool) {
+	storeAtomicBool(&so.receiveOriginalDstAddress, v)
+}
+
+// GetLastError gets value for SO_ERROR option.
+func (so *SocketOptions) GetLastError() *Error {
+	return so.handler.LastError()
+}
+
+// GetOutOfBandInline gets value for SO_OOBINLINE option.
+func (*SocketOptions) GetOutOfBandInline() bool {
+	return true
+}
+
+// SetOutOfBandInline sets value for SO_OOBINLINE option. We currently do not
+// support disabling this option.
+func (*SocketOptions) SetOutOfBandInline(bool) {}
+
+// GetLinger gets value for SO_LINGER option.
+func (so *SocketOptions) GetLinger() LingerOption {
+	so.mu.Lock()
+	linger := so.linger
+	so.mu.Unlock()
+	return linger
+}
+
+// SetLinger sets value for SO_LINGER option.
+func (so *SocketOptions) SetLinger(linger LingerOption) {
+	so.mu.Lock()
+	so.linger = linger
+	so.mu.Unlock()
+}
diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD
index d09ebe7fa..9cc6074da 100644
--- a/pkg/tcpip/stack/BUILD
+++ b/pkg/tcpip/stack/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test", "most_shards")
 load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 package(licenses = ["notice"])
@@ -112,7 +112,7 @@ go_test(
         "transport_demuxer_test.go",
         "transport_test.go",
     ],
-    shard_count = 20,
+    shard_count = most_shards,
     deps = [
         ":stack",
         "//pkg/rand",
@@ -120,6 +120,7 @@ go_test(
         "//pkg/tcpip",
         "//pkg/tcpip/buffer",
         "//pkg/tcpip/checker",
+        "//pkg/tcpip/faketime",
         "//pkg/tcpip/header",
         "//pkg/tcpip/link/channel",
         "//pkg/tcpip/link/loopback",
@@ -131,7 +132,6 @@ go_test(
         "//pkg/tcpip/transport/udp",
         "//pkg/waiter",
         "@com_github_google_go_cmp//cmp:go_default_library",
-        "@com_github_google_go_cmp//cmp/cmpopts:go_default_library",
     ],
 )
 
diff --git a/pkg/tcpip/stack/addressable_endpoint_state.go b/pkg/tcpip/stack/addressable_endpoint_state.go
index 4d3acab96..cd423bf71 100644
--- a/pkg/tcpip/stack/addressable_endpoint_state.go
+++ b/pkg/tcpip/stack/addressable_endpoint_state.go
@@ -21,7 +21,6 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 )
 
-var _ GroupAddressableEndpoint = (*AddressableEndpointState)(nil)
 var _ AddressableEndpoint = (*AddressableEndpointState)(nil)
 
 // AddressableEndpointState is an implementation of an AddressableEndpoint.
@@ -37,10 +36,6 @@ type AddressableEndpointState struct {
 
 		endpoints map[tcpip.Address]*addressState
 		primary   []*addressState
-
-		// groups holds the mapping between group addresses and the number of times
-		// they have been joined.
-		groups map[tcpip.Address]uint32
 	}
 }
 
@@ -53,65 +48,33 @@ func (a *AddressableEndpointState) Init(networkEndpoint NetworkEndpoint) {
 	a.mu.Lock()
 	defer a.mu.Unlock()
 	a.mu.endpoints = make(map[tcpip.Address]*addressState)
-	a.mu.groups = make(map[tcpip.Address]uint32)
-}
-
-// ReadOnlyAddressableEndpointState provides read-only access to an
-// AddressableEndpointState.
-type ReadOnlyAddressableEndpointState struct {
-	inner *AddressableEndpointState
 }
 
-// AddrOrMatching returns an endpoint for the passed address that is consisdered
-// bound to the wrapped AddressableEndpointState.
+// GetAddress returns the AddressEndpoint for the passed address.
 //
-// If addr is an exact match with an existing address, that address is returned.
-// Otherwise, f is called with each address and the address that f returns true
-// for is returned.
+// GetAddress does not increment the address's reference count or check if the
+// address is considered bound to the endpoint.
 //
-// Returns nil of no address matches.
-func (m ReadOnlyAddressableEndpointState) AddrOrMatching(addr tcpip.Address, spoofingOrPrimiscuous bool, f func(AddressEndpoint) bool) AddressEndpoint {
-	m.inner.mu.RLock()
-	defer m.inner.mu.RUnlock()
-
-	if ep, ok := m.inner.mu.endpoints[addr]; ok {
-		if ep.IsAssigned(spoofingOrPrimiscuous) && ep.IncRef() {
-			return ep
-		}
-	}
-
-	for _, ep := range m.inner.mu.endpoints {
-		if ep.IsAssigned(spoofingOrPrimiscuous) && f(ep) && ep.IncRef() {
-			return ep
-		}
-	}
-
-	return nil
-}
-
-// Lookup returns the AddressEndpoint for the passed address.
-//
-// Returns nil if the passed address is not associated with the
-// AddressableEndpointState.
-func (m ReadOnlyAddressableEndpointState) Lookup(addr tcpip.Address) AddressEndpoint {
-	m.inner.mu.RLock()
-	defer m.inner.mu.RUnlock()
+// Returns nil if the passed address is not associated with the endpoint.
+func (a *AddressableEndpointState) GetAddress(addr tcpip.Address) AddressEndpoint {
+	a.mu.RLock()
+	defer a.mu.RUnlock()
 
-	ep, ok := m.inner.mu.endpoints[addr]
+	ep, ok := a.mu.endpoints[addr]
 	if !ok {
 		return nil
 	}
 	return ep
 }
 
-// ForEach calls f for each address pair.
+// ForEachEndpoint calls f for each address.
 //
-// If f returns false, f is no longer be called.
-func (m ReadOnlyAddressableEndpointState) ForEach(f func(AddressEndpoint) bool) {
-	m.inner.mu.RLock()
-	defer m.inner.mu.RUnlock()
+// Once f returns false, f will no longer be called.
+func (a *AddressableEndpointState) ForEachEndpoint(f func(AddressEndpoint) bool) {
+	a.mu.RLock()
+	defer a.mu.RUnlock()
 
-	for _, ep := range m.inner.mu.endpoints {
+	for _, ep := range a.mu.endpoints {
 		if !f(ep) {
 			return
 		}
@@ -120,18 +83,16 @@ func (m ReadOnlyAddressableEndpointState) ForEach(f func(AddressEndpoint) bool)
 
 // ForEachPrimaryEndpoint calls f for each primary address.
 //
-// If f returns false, f is no longer be called.
-func (m ReadOnlyAddressableEndpointState) ForEachPrimaryEndpoint(f func(AddressEndpoint)) {
-	m.inner.mu.RLock()
-	defer m.inner.mu.RUnlock()
-	for _, ep := range m.inner.mu.primary {
-		f(ep)
-	}
-}
+// Once f returns false, f will no longer be called.
+func (a *AddressableEndpointState) ForEachPrimaryEndpoint(f func(AddressEndpoint) bool) {
+	a.mu.RLock()
+	defer a.mu.RUnlock()
 
-// ReadOnly returns a readonly reference to a.
-func (a *AddressableEndpointState) ReadOnly() ReadOnlyAddressableEndpointState {
-	return ReadOnlyAddressableEndpointState{inner: a}
+	for _, ep := range a.mu.primary {
+		if !f(ep) {
+			return
+		}
+	}
 }
 
 func (a *AddressableEndpointState) releaseAddressState(addrState *addressState) {
@@ -272,6 +233,9 @@ func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.Address
 		addrState = &addressState{
 			addressableEndpointState: a,
 			addr:                     addr,
+			// Cache the subnet in addrState to avoid calls to addr.Subnet() as that
+			// results in allocations on every call.
+			subnet: addr.Subnet(),
 		}
 		a.mu.endpoints[addr.Address] = addrState
 		addrState.mu.Lock()
@@ -332,11 +296,6 @@ func (a *AddressableEndpointState) addAndAcquireAddressLocked(addr tcpip.Address
 func (a *AddressableEndpointState) RemovePermanentAddress(addr tcpip.Address) *tcpip.Error {
 	a.mu.Lock()
 	defer a.mu.Unlock()
-
-	if _, ok := a.mu.groups[addr]; ok {
-		panic(fmt.Sprintf("group address = %s must be removed with LeaveGroup", addr))
-	}
-
 	return a.removePermanentAddressLocked(addr)
 }
 
@@ -361,6 +320,8 @@ func (a *AddressableEndpointState) RemovePermanentEndpoint(ep AddressEndpoint) *
 		return tcpip.ErrInvalidEndpointState
 	}
 
+	a.mu.Lock()
+	defer a.mu.Unlock()
 	return a.removePermanentEndpointLocked(addrState)
 }
 
@@ -466,8 +427,19 @@ func (a *AddressableEndpointState) acquirePrimaryAddressRLocked(isValid func(*ad
 	return deprecatedEndpoint
 }
 
-// AcquireAssignedAddress implements AddressableEndpoint.
-func (a *AddressableEndpointState) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB PrimaryEndpointBehavior) AddressEndpoint {
+// AcquireAssignedAddressOrMatching returns an address endpoint that is
+// considered assigned to the addressable endpoint.
+//
+// If the address is an exact match with an existing address, that address is
+// returned. Otherwise, if f is provided, f is called with each address and
+// the address that f returns true for is returned.
+//
+// If there is no matching address, a temporary address will be returned if
+// allowTemp is true.
+//
+// Regardless how the address was obtained, it will be acquired before it is
+// returned.
+func (a *AddressableEndpointState) AcquireAssignedAddressOrMatching(localAddr tcpip.Address, f func(AddressEndpoint) bool, allowTemp bool, tempPEB PrimaryEndpointBehavior) AddressEndpoint {
 	a.mu.Lock()
 	defer a.mu.Unlock()
 
@@ -483,6 +455,14 @@ func (a *AddressableEndpointState) AcquireAssignedAddress(localAddr tcpip.Addres
 		return addrState
 	}
 
+	if f != nil {
+		for _, addrState := range a.mu.endpoints {
+			if addrState.IsAssigned(allowTemp) && f(addrState) && addrState.IncRef() {
+				return addrState
+			}
+		}
+	}
+
 	if !allowTemp {
 		return nil
 	}
@@ -515,6 +495,11 @@ func (a *AddressableEndpointState) AcquireAssignedAddress(localAddr tcpip.Addres
 	return ep
 }
 
+// AcquireAssignedAddress implements AddressableEndpoint.
+func (a *AddressableEndpointState) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB PrimaryEndpointBehavior) AddressEndpoint {
+	return a.AcquireAssignedAddressOrMatching(localAddr, nil, allowTemp, tempPEB)
+}
+
 // AcquireOutgoingPrimaryAddress implements AddressableEndpoint.
 func (a *AddressableEndpointState) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) AddressEndpoint {
 	a.mu.RLock()
@@ -583,72 +568,11 @@ func (a *AddressableEndpointState) PermanentAddresses() []tcpip.AddressWithPrefi
 	return addrs
 }
 
-// JoinGroup implements GroupAddressableEndpoint.
-func (a *AddressableEndpointState) JoinGroup(group tcpip.Address) (bool, *tcpip.Error) {
-	a.mu.Lock()
-	defer a.mu.Unlock()
-
-	joins, ok := a.mu.groups[group]
-	if !ok {
-		ep, err := a.addAndAcquireAddressLocked(group.WithPrefix(), NeverPrimaryEndpoint, AddressConfigStatic, false /* deprecated */, true /* permanent */)
-		if err != nil {
-			return false, err
-		}
-		// We have no need for the address endpoint.
-		a.decAddressRefLocked(ep)
-	}
-
-	a.mu.groups[group] = joins + 1
-	return !ok, nil
-}
-
-// LeaveGroup implements GroupAddressableEndpoint.
-func (a *AddressableEndpointState) LeaveGroup(group tcpip.Address) (bool, *tcpip.Error) {
-	a.mu.Lock()
-	defer a.mu.Unlock()
-
-	joins, ok := a.mu.groups[group]
-	if !ok {
-		return false, tcpip.ErrBadLocalAddress
-	}
-
-	if joins == 1 {
-		a.removeGroupAddressLocked(group)
-		delete(a.mu.groups, group)
-		return true, nil
-	}
-
-	a.mu.groups[group] = joins - 1
-	return false, nil
-}
-
-// IsInGroup implements GroupAddressableEndpoint.
-func (a *AddressableEndpointState) IsInGroup(group tcpip.Address) bool {
-	a.mu.RLock()
-	defer a.mu.RUnlock()
-	_, ok := a.mu.groups[group]
-	return ok
-}
-
-func (a *AddressableEndpointState) removeGroupAddressLocked(group tcpip.Address) {
-	if err := a.removePermanentAddressLocked(group); err != nil {
-		// removePermanentEndpointLocked would only return an error if group is
-		// not bound to the addressable endpoint, but we know it MUST be assigned
-		// since we have group in our map of groups.
-		panic(fmt.Sprintf("error removing group address = %s: %s", group, err))
-	}
-}
-
 // Cleanup forcefully leaves all groups and removes all permanent addresses.
 func (a *AddressableEndpointState) Cleanup() {
 	a.mu.Lock()
 	defer a.mu.Unlock()
 
-	for group := range a.mu.groups {
-		a.removeGroupAddressLocked(group)
-	}
-	a.mu.groups = make(map[tcpip.Address]uint32)
-
 	for _, ep := range a.mu.endpoints {
 		// removePermanentEndpointLocked returns tcpip.ErrBadLocalAddress if ep is
 		// not a permanent address.
@@ -664,7 +588,7 @@ var _ AddressEndpoint = (*addressState)(nil)
 type addressState struct {
 	addressableEndpointState *AddressableEndpointState
 	addr                     tcpip.AddressWithPrefix
-
+	subnet                   tcpip.Subnet
 	// Lock ordering (from outer to inner lock ordering):
 	//
 	// AddressableEndpointState.mu
@@ -684,6 +608,11 @@ func (a *addressState) AddressWithPrefix() tcpip.AddressWithPrefix {
 	return a.addr
 }
 
+// Subnet implements AddressEndpoint.
+func (a *addressState) Subnet() tcpip.Subnet {
+	return a.subnet
+}
+
 // GetKind implements AddressEndpoint.
 func (a *addressState) GetKind() AddressKind {
 	a.mu.RLock()
diff --git a/pkg/tcpip/stack/addressable_endpoint_state_test.go b/pkg/tcpip/stack/addressable_endpoint_state_test.go
index 26787d0a3..140f146f6 100644
--- a/pkg/tcpip/stack/addressable_endpoint_state_test.go
+++ b/pkg/tcpip/stack/addressable_endpoint_state_test.go
@@ -53,25 +53,9 @@ func TestAddressableEndpointStateCleanup(t *testing.T) {
 		ep.DecRef()
 	}
 
-	group := tcpip.Address("\x02")
-	if added, err := s.JoinGroup(group); err != nil {
-		t.Fatalf("s.JoinGroup(%s): %s", group, err)
-	} else if !added {
-		t.Fatalf("got s.JoinGroup(%s) = false, want = true", group)
-	}
-	if !s.IsInGroup(group) {
-		t.Fatalf("got s.IsInGroup(%s) = false, want = true", group)
-	}
-
 	s.Cleanup()
-	{
-		ep := s.AcquireAssignedAddress(addr.Address, false /* allowTemp */, stack.NeverPrimaryEndpoint)
-		if ep != nil {
-			ep.DecRef()
-			t.Fatalf("got s.AcquireAssignedAddress(%s, false, NeverPrimaryEndpoint) = %s, want = nil", addr.Address, ep.AddressWithPrefix())
-		}
-	}
-	if s.IsInGroup(group) {
-		t.Fatalf("got s.IsInGroup(%s) = true, want = false", group)
+	if ep := s.AcquireAssignedAddress(addr.Address, false /* allowTemp */, stack.NeverPrimaryEndpoint); ep != nil {
+		ep.DecRef()
+		t.Fatalf("got s.AcquireAssignedAddress(%s, false, NeverPrimaryEndpoint) = %s, want = nil", addr.Address, ep.AddressWithPrefix())
 	}
 }
diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go
index 0cd1da11f..9a17efcba 100644
--- a/pkg/tcpip/stack/conntrack.go
+++ b/pkg/tcpip/stack/conntrack.go
@@ -269,7 +269,7 @@ func (ct *ConnTrack) connForTID(tid tupleID) (*conn, direction) {
 	return nil, dirOriginal
 }
 
-func (ct *ConnTrack) insertRedirectConn(pkt *PacketBuffer, hook Hook, rt *RedirectTarget) *conn {
+func (ct *ConnTrack) insertRedirectConn(pkt *PacketBuffer, hook Hook, port uint16, address tcpip.Address) *conn {
 	tid, err := packetToTupleID(pkt)
 	if err != nil {
 		return nil
@@ -282,8 +282,8 @@ func (ct *ConnTrack) insertRedirectConn(pkt *PacketBuffer, hook Hook, rt *Redire
 	// rule. This tuple will be used to manipulate the packet in
 	// handlePacket.
 	replyTID := tid.reply()
-	replyTID.srcAddr = rt.Addr
-	replyTID.srcPort = rt.Port
+	replyTID.srcAddr = address
+	replyTID.srcPort = port
 	var manip manipType
 	switch hook {
 	case Prerouting:
@@ -401,12 +401,12 @@ func handlePacketOutput(pkt *PacketBuffer, conn *conn, gso *GSO, r *Route, dir d
 
 	// Calculate the TCP checksum and set it.
 	tcpHeader.SetChecksum(0)
-	length := uint16(pkt.Size()) - uint16(len(pkt.NetworkHeader().View()))
-	xsum := r.PseudoHeaderChecksum(header.TCPProtocolNumber, length)
+	length := uint16(len(tcpHeader) + pkt.Data.Size())
+	xsum := header.PseudoHeaderChecksum(header.TCPProtocolNumber, netHeader.SourceAddress(), netHeader.DestinationAddress(), length)
 	if gso != nil && gso.NeedsCsum {
 		tcpHeader.SetChecksum(xsum)
-	} else if r.Capabilities()&CapabilityTXChecksumOffload == 0 {
-		xsum = header.ChecksumVVWithOffset(pkt.Data, xsum, int(tcpHeader.DataOffset()), pkt.Data.Size())
+	} else if r.RequiresTXTransportChecksum() {
+		xsum = header.ChecksumVV(pkt.Data, xsum)
 		tcpHeader.SetChecksum(^tcpHeader.CalculateChecksum(xsum))
 	}
 
diff --git a/pkg/tcpip/stack/forwarding_test.go b/pkg/tcpip/stack/forwarding_test.go
index cf042309e..5ec9b3411 100644
--- a/pkg/tcpip/stack/forwarding_test.go
+++ b/pkg/tcpip/stack/forwarding_test.go
@@ -73,9 +73,31 @@ func (*fwdTestNetworkEndpoint) DefaultTTL() uint8 {
 	return 123
 }
 
-func (f *fwdTestNetworkEndpoint) HandlePacket(r *Route, pkt *PacketBuffer) {
-	// Dispatch the packet to the transport protocol.
-	f.dispatcher.DeliverTransportPacket(r, tcpip.TransportProtocolNumber(pkt.NetworkHeader().View()[protocolNumberOffset]), pkt)
+func (f *fwdTestNetworkEndpoint) HandlePacket(pkt *PacketBuffer) {
+	netHdr := pkt.NetworkHeader().View()
+	_, dst := f.proto.ParseAddresses(netHdr)
+
+	addressEndpoint := f.AcquireAssignedAddress(dst, f.nic.Promiscuous(), CanBePrimaryEndpoint)
+	if addressEndpoint != nil {
+		addressEndpoint.DecRef()
+		// Dispatch the packet to the transport protocol.
+		f.dispatcher.DeliverTransportPacket(tcpip.TransportProtocolNumber(netHdr[protocolNumberOffset]), pkt)
+		return
+	}
+
+	r, err := f.proto.stack.FindRoute(0, "", dst, fwdTestNetNumber, false /* multicastLoop */)
+	if err != nil {
+		return
+	}
+	defer r.Release()
+
+	vv := buffer.NewVectorisedView(pkt.Size(), pkt.Views())
+	pkt = NewPacketBuffer(PacketBufferOptions{
+		ReserveHeaderBytes: int(r.MaxHeaderLength()),
+		Data:               vv.ToView().ToVectorisedView(),
+	})
+	// TODO(b/143425874) Decrease the TTL field in forwarded packets.
+	_ = r.WriteHeaderIncludedPacket(pkt)
 }
 
 func (f *fwdTestNetworkEndpoint) MaxHeaderLength() uint16 {
@@ -106,8 +128,13 @@ func (f *fwdTestNetworkEndpoint) WritePackets(r *Route, gso *GSO, pkts PacketBuf
 	panic("not implemented")
 }
 
-func (*fwdTestNetworkEndpoint) WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) *tcpip.Error {
-	return tcpip.ErrNotSupported
+func (f *fwdTestNetworkEndpoint) WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) *tcpip.Error {
+	// The network header should not already be populated.
+	if _, ok := pkt.NetworkHeader().Consume(fwdTestNetHeaderLen); !ok {
+		return tcpip.ErrMalformedHeader
+	}
+
+	return f.nic.WritePacket(r, nil /* gso */, fwdTestNetNumber, pkt)
 }
 
 func (f *fwdTestNetworkEndpoint) Close() {
@@ -117,6 +144,8 @@ func (f *fwdTestNetworkEndpoint) Close() {
 // fwdTestNetworkProtocol is a network-layer protocol that implements Address
 // resolution.
 type fwdTestNetworkProtocol struct {
+	stack *Stack
+
 	addrCache              *linkAddrCache
 	neigh                  *neighborCache
 	addrResolveDelay       time.Duration
@@ -178,7 +207,7 @@ func (*fwdTestNetworkProtocol) Close() {}
 
 func (*fwdTestNetworkProtocol) Wait() {}
 
-func (f *fwdTestNetworkProtocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP LinkEndpoint) *tcpip.Error {
+func (f *fwdTestNetworkProtocol) LinkAddressRequest(addr, _ tcpip.Address, remoteLinkAddr tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error {
 	if f.onLinkAddressResolved != nil {
 		time.AfterFunc(f.addrResolveDelay, func() {
 			f.onLinkAddressResolved(f.addrCache, f.neigh, addr, remoteLinkAddr)
@@ -280,7 +309,7 @@ func (e *fwdTestLinkEndpoint) LinkAddress() tcpip.LinkAddress {
 
 func (e fwdTestLinkEndpoint) WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error {
 	p := fwdTestPacketInfo{
-		RemoteLinkAddress: r.RemoteLinkAddress,
+		RemoteLinkAddress: r.RemoteLinkAddress(),
 		LocalLinkAddress:  r.LocalLinkAddress,
 		Pkt:               pkt,
 	}
@@ -304,20 +333,6 @@ func (e *fwdTestLinkEndpoint) WritePackets(r *Route, gso *GSO, pkts PacketBuffer
 	return n, nil
 }
 
-// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *fwdTestLinkEndpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
-	p := fwdTestPacketInfo{
-		Pkt: NewPacketBuffer(PacketBufferOptions{Data: vv}),
-	}
-
-	select {
-	case e.C <- p:
-	default:
-	}
-
-	return nil
-}
-
 // Wait implements stack.LinkEndpoint.Wait.
 func (*fwdTestLinkEndpoint) Wait() {}
 
@@ -334,7 +349,10 @@ func (e *fwdTestLinkEndpoint) AddHeader(local, remote tcpip.LinkAddress, protoco
 func fwdTestNetFactory(t *testing.T, proto *fwdTestNetworkProtocol, useNeighborCache bool) (ep1, ep2 *fwdTestLinkEndpoint) {
 	// Create a stack with the network protocol and two NICs.
 	s := New(Options{
-		NetworkProtocols: []NetworkProtocolFactory{func(*Stack) NetworkProtocol { return proto }},
+		NetworkProtocols: []NetworkProtocolFactory{func(s *Stack) NetworkProtocol {
+			proto.stack = s
+			return proto
+		}},
 		UseNeighborCache: useNeighborCache,
 	})
 
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 8d6d9a7f1..2d8c883cd 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -22,30 +22,17 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 )
 
-// tableID is an index into IPTables.tables.
-type tableID int
+// TableID identifies a specific table.
+type TableID int
 
+// Each value identifies a specific table.
 const (
-	natID tableID = iota
-	mangleID
-	filterID
-	numTables
+	NATID TableID = iota
+	MangleID
+	FilterID
+	NumTables
 )
 
-// Table names.
-const (
-	NATTable    = "nat"
-	MangleTable = "mangle"
-	FilterTable = "filter"
-)
-
-// nameToID is immutable.
-var nameToID = map[string]tableID{
-	NATTable:    natID,
-	MangleTable: mangleID,
-	FilterTable: filterID,
-}
-
 // HookUnset indicates that there is no hook set for an entrypoint or
 // underflow.
 const HookUnset = -1
@@ -57,8 +44,8 @@ const reaperDelay = 5 * time.Second
 // all packets.
 func DefaultTables() *IPTables {
 	return &IPTables{
-		v4Tables: [numTables]Table{
-			natID: Table{
+		v4Tables: [NumTables]Table{
+			NATID: Table{
 				Rules: []Rule{
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
@@ -81,7 +68,7 @@ func DefaultTables() *IPTables {
 					Postrouting: 3,
 				},
 			},
-			mangleID: Table{
+			MangleID: Table{
 				Rules: []Rule{
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
@@ -99,7 +86,7 @@ func DefaultTables() *IPTables {
 					Postrouting: HookUnset,
 				},
 			},
-			filterID: Table{
+			FilterID: Table{
 				Rules: []Rule{
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
@@ -122,8 +109,8 @@ func DefaultTables() *IPTables {
 				},
 			},
 		},
-		v6Tables: [numTables]Table{
-			natID: Table{
+		v6Tables: [NumTables]Table{
+			NATID: Table{
 				Rules: []Rule{
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
@@ -146,7 +133,7 @@ func DefaultTables() *IPTables {
 					Postrouting: 3,
 				},
 			},
-			mangleID: Table{
+			MangleID: Table{
 				Rules: []Rule{
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
@@ -164,7 +151,7 @@ func DefaultTables() *IPTables {
 					Postrouting: HookUnset,
 				},
 			},
-			filterID: Table{
+			FilterID: Table{
 				Rules: []Rule{
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
 					Rule{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
@@ -187,10 +174,10 @@ func DefaultTables() *IPTables {
 				},
 			},
 		},
-		priorities: [NumHooks][]tableID{
-			Prerouting: []tableID{mangleID, natID},
-			Input:      []tableID{natID, filterID},
-			Output:     []tableID{mangleID, natID, filterID},
+		priorities: [NumHooks][]TableID{
+			Prerouting: []TableID{MangleID, NATID},
+			Input:      []TableID{NATID, FilterID},
+			Output:     []TableID{MangleID, NATID, FilterID},
 		},
 		connections: ConnTrack{
 			seed: generateRandUint32(),
@@ -229,26 +216,20 @@ func EmptyNATTable() Table {
 	}
 }
 
-// GetTable returns a table by name.
-func (it *IPTables) GetTable(name string, ipv6 bool) (Table, bool) {
-	id, ok := nameToID[name]
-	if !ok {
-		return Table{}, false
-	}
+// GetTable returns a table with the given id and IP version. It panics when an
+// invalid id is provided.
+func (it *IPTables) GetTable(id TableID, ipv6 bool) Table {
 	it.mu.RLock()
 	defer it.mu.RUnlock()
 	if ipv6 {
-		return it.v6Tables[id], true
+		return it.v6Tables[id]
 	}
-	return it.v4Tables[id], true
+	return it.v4Tables[id]
 }
 
-// ReplaceTable replaces or inserts table by name.
-func (it *IPTables) ReplaceTable(name string, table Table, ipv6 bool) *tcpip.Error {
-	id, ok := nameToID[name]
-	if !ok {
-		return tcpip.ErrInvalidOptionValue
-	}
+// ReplaceTable replaces or inserts table by name. It panics when an invalid id
+// is provided.
+func (it *IPTables) ReplaceTable(id TableID, table Table, ipv6 bool) *tcpip.Error {
 	it.mu.Lock()
 	defer it.mu.Unlock()
 	// If iptables is being enabled, initialize the conntrack table and
@@ -311,7 +292,7 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, prer
 	for _, tableID := range priorities {
 		// If handlePacket already NATed the packet, we don't need to
 		// check the NAT table.
-		if tableID == natID && pkt.NatDone {
+		if tableID == NATID && pkt.NatDone {
 			continue
 		}
 		var table Table
diff --git a/pkg/tcpip/stack/iptables_targets.go b/pkg/tcpip/stack/iptables_targets.go
index 538c4625d..d63e9757c 100644
--- a/pkg/tcpip/stack/iptables_targets.go
+++ b/pkg/tcpip/stack/iptables_targets.go
@@ -15,6 +15,8 @@
 package stack
 
 import (
+	"fmt"
+
 	"gvisor.dev/gvisor/pkg/log"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -26,13 +28,6 @@ type AcceptTarget struct {
 	NetworkProtocol tcpip.NetworkProtocolNumber
 }
 
-// ID implements Target.ID.
-func (at *AcceptTarget) ID() TargetID {
-	return TargetID{
-		NetworkProtocol: at.NetworkProtocol,
-	}
-}
-
 // Action implements Target.Action.
 func (*AcceptTarget) Action(*PacketBuffer, *ConnTrack, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) {
 	return RuleAccept, 0
@@ -44,22 +39,11 @@ type DropTarget struct {
 	NetworkProtocol tcpip.NetworkProtocolNumber
 }
 
-// ID implements Target.ID.
-func (dt *DropTarget) ID() TargetID {
-	return TargetID{
-		NetworkProtocol: dt.NetworkProtocol,
-	}
-}
-
 // Action implements Target.Action.
 func (*DropTarget) Action(*PacketBuffer, *ConnTrack, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) {
 	return RuleDrop, 0
 }
 
-// ErrorTargetName is used to mark targets as error targets. Error targets
-// shouldn't be reached - an error has occurred if we fall through to one.
-const ErrorTargetName = "ERROR"
-
 // ErrorTarget logs an error and drops the packet. It represents a target that
 // should be unreachable.
 type ErrorTarget struct {
@@ -67,14 +51,6 @@ type ErrorTarget struct {
 	NetworkProtocol tcpip.NetworkProtocolNumber
 }
 
-// ID implements Target.ID.
-func (et *ErrorTarget) ID() TargetID {
-	return TargetID{
-		Name:            ErrorTargetName,
-		NetworkProtocol: et.NetworkProtocol,
-	}
-}
-
 // Action implements Target.Action.
 func (*ErrorTarget) Action(*PacketBuffer, *ConnTrack, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) {
 	log.Debugf("ErrorTarget triggered.")
@@ -90,14 +66,6 @@ type UserChainTarget struct {
 	NetworkProtocol tcpip.NetworkProtocolNumber
 }
 
-// ID implements Target.ID.
-func (uc *UserChainTarget) ID() TargetID {
-	return TargetID{
-		Name:            ErrorTargetName,
-		NetworkProtocol: uc.NetworkProtocol,
-	}
-}
-
 // Action implements Target.Action.
 func (*UserChainTarget) Action(*PacketBuffer, *ConnTrack, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) {
 	panic("UserChainTarget should never be called.")
@@ -110,50 +78,39 @@ type ReturnTarget struct {
 	NetworkProtocol tcpip.NetworkProtocolNumber
 }
 
-// ID implements Target.ID.
-func (rt *ReturnTarget) ID() TargetID {
-	return TargetID{
-		NetworkProtocol: rt.NetworkProtocol,
-	}
-}
-
 // Action implements Target.Action.
 func (*ReturnTarget) Action(*PacketBuffer, *ConnTrack, Hook, *GSO, *Route, tcpip.Address) (RuleVerdict, int) {
 	return RuleReturn, 0
 }
 
-// RedirectTargetName is used to mark targets as redirect targets. Redirect
-// targets should be reached for only NAT and Mangle tables. These targets will
-// change the destination port/destination IP for packets.
-const RedirectTargetName = "REDIRECT"
-
-// RedirectTarget redirects the packet by modifying the destination port/IP.
+// RedirectTarget redirects the packet to this machine by modifying the
+// destination port/IP. Outgoing packets are redirected to the loopback device,
+// and incoming packets are redirected to the incoming interface (rather than
+// forwarded).
+//
 // TODO(gvisor.dev/issue/170): Other flags need to be added after we support
 // them.
 type RedirectTarget struct {
-	// Addr indicates address used to redirect.
-	Addr tcpip.Address
-
-	// Port indicates port used to redirect.
+	// Port indicates port used to redirect. It is immutable.
 	Port uint16
 
-	// NetworkProtocol is the network protocol the target is used with.
+	// NetworkProtocol is the network protocol the target is used with. It
+	// is immutable.
 	NetworkProtocol tcpip.NetworkProtocolNumber
 }
 
-// ID implements Target.ID.
-func (rt *RedirectTarget) ID() TargetID {
-	return TargetID{
-		Name:            RedirectTargetName,
-		NetworkProtocol: rt.NetworkProtocol,
-	}
-}
-
 // Action implements Target.Action.
 // TODO(gvisor.dev/issue/170): Parse headers without copying. The current
-// implementation only works for PREROUTING and calls pkt.Clone(), neither
+// implementation only works for Prerouting and calls pkt.Clone(), neither
 // of which should be the case.
 func (rt *RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, gso *GSO, r *Route, address tcpip.Address) (RuleVerdict, int) {
+	// Sanity check.
+	if rt.NetworkProtocol != pkt.NetworkProtocolNumber {
+		panic(fmt.Sprintf(
+			"RedirectTarget.Action with NetworkProtocol %d called on packet with NetworkProtocolNumber %d",
+			rt.NetworkProtocol, pkt.NetworkProtocolNumber))
+	}
+
 	// Packet is already manipulated.
 	if pkt.NatDone {
 		return RuleAccept, 0
@@ -164,17 +121,17 @@ func (rt *RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, gs
 		return RuleDrop, 0
 	}
 
-	// Change the address to localhost (127.0.0.1 or ::1) in Output and to
+	// Change the address to loopback (127.0.0.1 or ::1) in Output and to
 	// the primary address of the incoming interface in Prerouting.
 	switch hook {
 	case Output:
 		if pkt.NetworkProtocolNumber == header.IPv4ProtocolNumber {
-			rt.Addr = tcpip.Address([]byte{127, 0, 0, 1})
+			address = tcpip.Address([]byte{127, 0, 0, 1})
 		} else {
-			rt.Addr = header.IPv6Loopback
+			address = header.IPv6Loopback
 		}
 	case Prerouting:
-		rt.Addr = address
+		// No-op, as address is already set correctly.
 	default:
 		panic("redirect target is supported only on output and prerouting hooks")
 	}
@@ -189,21 +146,18 @@ func (rt *RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, gs
 		// Calculate UDP checksum and set it.
 		if hook == Output {
 			udpHeader.SetChecksum(0)
+			netHeader := pkt.Network()
+			netHeader.SetDestinationAddress(address)
 
 			// Only calculate the checksum if offloading isn't supported.
-			if r.Capabilities()&CapabilityTXChecksumOffload == 0 {
+			if r.RequiresTXTransportChecksum() {
 				length := uint16(pkt.Size()) - uint16(len(pkt.NetworkHeader().View()))
-				xsum := r.PseudoHeaderChecksum(protocol, length)
-				for _, v := range pkt.Data.Views() {
-					xsum = header.Checksum(v, xsum)
-				}
-				udpHeader.SetChecksum(0)
+				xsum := header.PseudoHeaderChecksum(protocol, netHeader.SourceAddress(), netHeader.DestinationAddress(), length)
+				xsum = header.ChecksumVV(pkt.Data, xsum)
 				udpHeader.SetChecksum(^udpHeader.CalculateChecksum(xsum))
 			}
 		}
 
-		pkt.Network().SetDestinationAddress(rt.Addr)
-
 		// After modification, IPv4 packets need a valid checksum.
 		if pkt.NetworkProtocolNumber == header.IPv4ProtocolNumber {
 			netHeader := header.IPv4(pkt.NetworkHeader().View())
@@ -219,7 +173,7 @@ func (rt *RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, gs
 		// Set up conection for matching NAT rule. Only the first
 		// packet of the connection comes here. Other packets will be
 		// manipulated in connection tracking.
-		if conn := ct.insertRedirectConn(pkt, hook, rt); conn != nil {
+		if conn := ct.insertRedirectConn(pkt, hook, rt.Port, address); conn != nil {
 			ct.handlePacket(pkt, hook, gso, r)
 		}
 	default:
diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go
index 7b3f3e88b..4b86c1be9 100644
--- a/pkg/tcpip/stack/iptables_types.go
+++ b/pkg/tcpip/stack/iptables_types.go
@@ -37,7 +37,6 @@ import (
 // ----->[Prerouting]----->routing----->[Forward]---------[Postrouting]----->
 type Hook uint
 
-// These values correspond to values in include/uapi/linux/netfilter.h.
 const (
 	// Prerouting happens before a packet is routed to applications or to
 	// be forwarded.
@@ -86,8 +85,8 @@ type IPTables struct {
 	mu sync.RWMutex
 	// v4Tables and v6tables map tableIDs to tables. They hold builtin
 	// tables only, not user tables. mu must be locked for accessing.
-	v4Tables [numTables]Table
-	v6Tables [numTables]Table
+	v4Tables [NumTables]Table
+	v6Tables [NumTables]Table
 	// modified is whether tables have been modified at least once. It is
 	// used to elide the iptables performance overhead for workloads that
 	// don't utilize iptables.
@@ -96,7 +95,7 @@ type IPTables struct {
 	// priorities maps each hook to a list of table names. The order of the
 	// list is the order in which each table should be visited for that
 	// hook. It is immutable.
-	priorities [NumHooks][]tableID
+	priorities [NumHooks][]TableID
 
 	connections ConnTrack
 
@@ -104,6 +103,24 @@ type IPTables struct {
 	reaperDone chan struct{}
 }
 
+// VisitTargets traverses all the targets of all tables and replaces each with
+// transform(target).
+func (it *IPTables) VisitTargets(transform func(Target) Target) {
+	it.mu.Lock()
+	defer it.mu.Unlock()
+
+	for tid := range it.v4Tables {
+		for i, rule := range it.v4Tables[tid].Rules {
+			it.v4Tables[tid].Rules[i].Target = transform(rule.Target)
+		}
+	}
+	for tid := range it.v6Tables {
+		for i, rule := range it.v6Tables[tid].Rules {
+			it.v6Tables[tid].Rules[i].Target = transform(rule.Target)
+		}
+	}
+}
+
 // A Table defines a set of chains and hooks into the network stack.
 //
 // It is a list of Rules, entry points (BuiltinChains), and error handlers
@@ -169,7 +186,6 @@ type IPHeaderFilter struct {
 
 	// CheckProtocol determines whether the Protocol field should be
 	// checked during matching.
-	// TODO(gvisor.dev/issue/3549): Check this field during matching.
 	CheckProtocol bool
 
 	// Dst matches the destination IP address.
@@ -309,23 +325,8 @@ type Matcher interface {
 	Match(hook Hook, packet *PacketBuffer, interfaceName string) (matches bool, hotdrop bool)
 }
 
-// A TargetID uniquely identifies a target.
-type TargetID struct {
-	// Name is the target name as stored in the xt_entry_target struct.
-	Name string
-
-	// NetworkProtocol is the protocol to which the target applies.
-	NetworkProtocol tcpip.NetworkProtocolNumber
-
-	// Revision is the version of the target.
-	Revision uint8
-}
-
 // A Target is the interface for taking an action for a packet.
 type Target interface {
-	// ID uniquely identifies the Target.
-	ID() TargetID
-
 	// Action takes an action on the packet and returns a verdict on how
 	// traversal should (or should not) continue. If the return value is
 	// Jump, it also returns the index of the rule to jump to.
diff --git a/pkg/tcpip/stack/linkaddrcache.go b/pkg/tcpip/stack/linkaddrcache.go
index 6f73a0ce4..c9b13cd0e 100644
--- a/pkg/tcpip/stack/linkaddrcache.go
+++ b/pkg/tcpip/stack/linkaddrcache.go
@@ -180,7 +180,7 @@ func (c *linkAddrCache) getOrCreateEntryLocked(k tcpip.FullAddress) *linkAddrEnt
 }
 
 // get reports any known link address for k.
-func (c *linkAddrCache) get(k tcpip.FullAddress, linkRes LinkAddressResolver, localAddr tcpip.Address, linkEP LinkEndpoint, waker *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) {
+func (c *linkAddrCache) get(k tcpip.FullAddress, linkRes LinkAddressResolver, localAddr tcpip.Address, nic NetworkInterface, waker *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) {
 	if linkRes != nil {
 		if addr, ok := linkRes.ResolveStaticAddress(k.Addr); ok {
 			return addr, nil, nil
@@ -221,7 +221,7 @@ func (c *linkAddrCache) get(k tcpip.FullAddress, linkRes LinkAddressResolver, lo
 			}
 
 			entry.done = make(chan struct{})
-			go c.startAddressResolution(k, linkRes, localAddr, linkEP, entry.done) // S/R-SAFE: link non-savable; wakers dropped synchronously.
+			go c.startAddressResolution(k, linkRes, localAddr, nic, entry.done) // S/R-SAFE: link non-savable; wakers dropped synchronously.
 		}
 
 		return entry.linkAddr, entry.done, tcpip.ErrWouldBlock
@@ -240,11 +240,11 @@ func (c *linkAddrCache) removeWaker(k tcpip.FullAddress, waker *sleep.Waker) {
 	}
 }
 
-func (c *linkAddrCache) startAddressResolution(k tcpip.FullAddress, linkRes LinkAddressResolver, localAddr tcpip.Address, linkEP LinkEndpoint, done <-chan struct{}) {
+func (c *linkAddrCache) startAddressResolution(k tcpip.FullAddress, linkRes LinkAddressResolver, localAddr tcpip.Address, nic NetworkInterface, done <-chan struct{}) {
 	for i := 0; ; i++ {
 		// Send link request, then wait for the timeout limit and check
 		// whether the request succeeded.
-		linkRes.LinkAddressRequest(k.Addr, localAddr, "" /* linkAddr */, linkEP)
+		linkRes.LinkAddressRequest(k.Addr, localAddr, "" /* linkAddr */, nic)
 
 		select {
 		case now := <-time.After(c.resolutionTimeout):
diff --git a/pkg/tcpip/stack/linkaddrcache_test.go b/pkg/tcpip/stack/linkaddrcache_test.go
index 33806340e..d2e37f38d 100644
--- a/pkg/tcpip/stack/linkaddrcache_test.go
+++ b/pkg/tcpip/stack/linkaddrcache_test.go
@@ -49,8 +49,8 @@ type testLinkAddressResolver struct {
 	onLinkAddressRequest func()
 }
 
-func (r *testLinkAddressResolver) LinkAddressRequest(addr, _ tcpip.Address, _ tcpip.LinkAddress, _ LinkEndpoint) *tcpip.Error {
-	time.AfterFunc(r.delay, func() { r.fakeRequest(addr) })
+func (r *testLinkAddressResolver) LinkAddressRequest(targetAddr, _ tcpip.Address, _ tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error {
+	time.AfterFunc(r.delay, func() { r.fakeRequest(targetAddr) })
 	if f := r.onLinkAddressRequest; f != nil {
 		f()
 	}
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index 73a01c2dd..03d7b4e0d 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -26,6 +26,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/checker"
+	"gvisor.dev/gvisor/pkg/tcpip/faketime"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
@@ -352,7 +353,7 @@ func TestDADDisabled(t *testing.T) {
 	}
 
 	// We should not have sent any NDP NS messages.
-	if got := s.Stats().ICMP.V6PacketsSent.NeighborSolicit.Value(); got != 0 {
+	if got := s.Stats().ICMP.V6.PacketsSent.NeighborSolicit.Value(); got != 0 {
 		t.Fatalf("got NeighborSolicit = %d, want = 0", got)
 	}
 }
@@ -465,14 +466,18 @@ func TestDADResolve(t *testing.T) {
 				if err != tcpip.ErrNoRoute {
 					t.Errorf("got FindRoute(%d, '', %s, %d, false) = (%+v, %v), want = (_, %s)", nicID, addr2, header.IPv6ProtocolNumber, r, err, tcpip.ErrNoRoute)
 				}
-				r.Release()
+				if r != nil {
+					r.Release()
+				}
 			}
 			{
 				r, err := s.FindRoute(nicID, addr1, addr2, header.IPv6ProtocolNumber, false)
 				if err != tcpip.ErrNoRoute {
 					t.Errorf("got FindRoute(%d, %s, %s, %d, false) = (%+v, %v), want = (_, %s)", nicID, addr1, addr2, header.IPv6ProtocolNumber, r, err, tcpip.ErrNoRoute)
 				}
-				r.Release()
+				if r != nil {
+					r.Release()
+				}
 			}
 
 			if t.Failed() {
@@ -510,7 +515,9 @@ func TestDADResolve(t *testing.T) {
 				} else if r.LocalAddress != addr1 {
 					t.Errorf("got r.LocalAddress = %s, want = %s", r.LocalAddress, addr1)
 				}
-				r.Release()
+				if r != nil {
+					r.Release()
+				}
 			}
 
 			if t.Failed() {
@@ -518,7 +525,7 @@ func TestDADResolve(t *testing.T) {
 			}
 
 			// Should not have sent any more NS messages.
-			if got := s.Stats().ICMP.V6PacketsSent.NeighborSolicit.Value(); got != uint64(test.dupAddrDetectTransmits) {
+			if got := s.Stats().ICMP.V6.PacketsSent.NeighborSolicit.Value(); got != uint64(test.dupAddrDetectTransmits) {
 				t.Fatalf("got NeighborSolicit = %d, want = %d", got, test.dupAddrDetectTransmits)
 			}
 
@@ -533,8 +540,8 @@ func TestDADResolve(t *testing.T) {
 
 				// Make sure the right remote link address is used.
 				snmc := header.SolicitedNodeAddr(addr1)
-				if want := header.EthernetAddressFromMulticastIPv6Address(snmc); p.Route.RemoteLinkAddress != want {
-					t.Errorf("got remote link address = %s, want = %s", p.Route.RemoteLinkAddress, want)
+				if got, want := p.Route.RemoteLinkAddress(), header.EthernetAddressFromMulticastIPv6Address(snmc); got != want {
+					t.Errorf("got remote link address = %s, want = %s", got, want)
 				}
 
 				// Check NDP NS packet.
@@ -563,18 +570,18 @@ func rxNDPSolicit(e *channel.Endpoint, tgt tcpip.Address) {
 	hdr := buffer.NewPrependable(header.IPv6MinimumSize + header.ICMPv6NeighborSolicitMinimumSize)
 	pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborSolicitMinimumSize))
 	pkt.SetType(header.ICMPv6NeighborSolicit)
-	ns := header.NDPNeighborSolicit(pkt.NDPPayload())
+	ns := header.NDPNeighborSolicit(pkt.MessageBody())
 	ns.SetTargetAddress(tgt)
 	snmc := header.SolicitedNodeAddr(tgt)
 	pkt.SetChecksum(header.ICMPv6Checksum(pkt, header.IPv6Any, snmc, buffer.VectorisedView{}))
 	payloadLength := hdr.UsedLength()
 	ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 	ip.Encode(&header.IPv6Fields{
-		PayloadLength: uint16(payloadLength),
-		NextHeader:    uint8(icmp.ProtocolNumber6),
-		HopLimit:      255,
-		SrcAddr:       header.IPv6Any,
-		DstAddr:       snmc,
+		PayloadLength:     uint16(payloadLength),
+		TransportProtocol: icmp.ProtocolNumber6,
+		HopLimit:          255,
+		SrcAddr:           header.IPv6Any,
+		DstAddr:           snmc,
 	})
 	e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{Data: hdr.View().ToVectorisedView()}))
 }
@@ -605,7 +612,7 @@ func TestDADFail(t *testing.T) {
 				hdr := buffer.NewPrependable(header.IPv6MinimumSize + naSize)
 				pkt := header.ICMPv6(hdr.Prepend(naSize))
 				pkt.SetType(header.ICMPv6NeighborAdvert)
-				na := header.NDPNeighborAdvert(pkt.NDPPayload())
+				na := header.NDPNeighborAdvert(pkt.MessageBody())
 				na.SetSolicitedFlag(true)
 				na.SetOverrideFlag(true)
 				na.SetTargetAddress(tgt)
@@ -616,11 +623,11 @@ func TestDADFail(t *testing.T) {
 				payloadLength := hdr.UsedLength()
 				ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 				ip.Encode(&header.IPv6Fields{
-					PayloadLength: uint16(payloadLength),
-					NextHeader:    uint8(icmp.ProtocolNumber6),
-					HopLimit:      255,
-					SrcAddr:       tgt,
-					DstAddr:       header.IPv6AllNodesMulticastAddress,
+					PayloadLength:     uint16(payloadLength),
+					TransportProtocol: icmp.ProtocolNumber6,
+					HopLimit:          255,
+					SrcAddr:           tgt,
+					DstAddr:           header.IPv6AllNodesMulticastAddress,
 				})
 				e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{Data: hdr.View().ToVectorisedView()}))
 			},
@@ -666,7 +673,7 @@ func TestDADFail(t *testing.T) {
 			// Receive a packet to simulate an address conflict.
 			test.rxPkt(e, addr1)
 
-			stat := test.getStat(s.Stats().ICMP.V6PacketsReceived)
+			stat := test.getStat(s.Stats().ICMP.V6.PacketsReceived)
 			if got := stat.Value(); got != 1 {
 				t.Fatalf("got stat = %d, want = 1", got)
 			}
@@ -803,7 +810,7 @@ func TestDADStop(t *testing.T) {
 			}
 
 			// Should not have sent more than 1 NS message.
-			if got := s.Stats().ICMP.V6PacketsSent.NeighborSolicit.Value(); got > 1 {
+			if got := s.Stats().ICMP.V6.PacketsSent.NeighborSolicit.Value(); got > 1 {
 				t.Errorf("got NeighborSolicit = %d, want <= 1", got)
 			}
 		})
@@ -982,7 +989,7 @@ func raBufWithOptsAndDHCPv6(ip tcpip.Address, rl uint16, managedAddress, otherCo
 	pkt := header.ICMPv6(hdr.Prepend(icmpSize))
 	pkt.SetType(header.ICMPv6RouterAdvert)
 	pkt.SetCode(0)
-	raPayload := pkt.NDPPayload()
+	raPayload := pkt.MessageBody()
 	ra := header.NDPRouterAdvert(raPayload)
 	// Populate the Router Lifetime.
 	binary.BigEndian.PutUint16(raPayload[2:], rl)
@@ -1004,11 +1011,11 @@ func raBufWithOptsAndDHCPv6(ip tcpip.Address, rl uint16, managedAddress, otherCo
 	payloadLength := hdr.UsedLength()
 	iph := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 	iph.Encode(&header.IPv6Fields{
-		PayloadLength: uint16(payloadLength),
-		NextHeader:    uint8(icmp.ProtocolNumber6),
-		HopLimit:      header.NDPHopLimit,
-		SrcAddr:       ip,
-		DstAddr:       header.IPv6AllNodesMulticastAddress,
+		PayloadLength:     uint16(payloadLength),
+		TransportProtocol: icmp.ProtocolNumber6,
+		HopLimit:          header.NDPHopLimit,
+		SrcAddr:           ip,
+		DstAddr:           header.IPv6AllNodesMulticastAddress,
 	})
 
 	return stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -2162,8 +2169,8 @@ func TestNoAutoGenTempAddrForLinkLocal(t *testing.T) {
 						NDPConfigs: ipv6.NDPConfigurations{
 							AutoGenTempGlobalAddresses: true,
 						},
-						NDPDisp:              &ndpDisp,
-						AutoGenIPv6LinkLocal: true,
+						NDPDisp:          &ndpDisp,
+						AutoGenLinkLocal: true,
 					})},
 				})
 
@@ -2843,9 +2850,7 @@ func addrForNewConnectionTo(t *testing.T, s *stack.Stack, addr tcpip.FullAddress
 		t.Fatalf("s.NewEndpoint(%d, %d, _): %s", header.UDPProtocolNumber, header.IPv6ProtocolNumber, err)
 	}
 	defer ep.Close()
-	if err := ep.SetSockOptBool(tcpip.V6OnlyOption, true); err != nil {
-		t.Fatalf("SetSockOpt(tcpip.V6OnlyOption, true): %s", err)
-	}
+	ep.SocketOptions().SetV6Only(true)
 	if err := ep.Connect(addr); err != nil {
 		t.Fatalf("ep.Connect(%+v): %s", addr, err)
 	}
@@ -2879,9 +2884,7 @@ func addrForNewConnectionWithAddr(t *testing.T, s *stack.Stack, addr tcpip.FullA
 		t.Fatalf("s.NewEndpoint(%d, %d, _): %s", header.UDPProtocolNumber, header.IPv6ProtocolNumber, err)
 	}
 	defer ep.Close()
-	if err := ep.SetSockOptBool(tcpip.V6OnlyOption, true); err != nil {
-		t.Fatalf("SetSockOpt(tcpip.V6OnlyOption, true): %s", err)
-	}
+	ep.SocketOptions().SetV6Only(true)
 	if err := ep.Bind(addr); err != nil {
 		t.Fatalf("ep.Bind(%+v): %s", addr, err)
 	}
@@ -3250,9 +3253,7 @@ func TestAutoGenAddrJobDeprecation(t *testing.T) {
 				t.Fatalf("s.NewEndpoint(%d, %d, _): %s", header.UDPProtocolNumber, header.IPv6ProtocolNumber, err)
 			}
 			defer ep.Close()
-			if err := ep.SetSockOptBool(tcpip.V6OnlyOption, true); err != nil {
-				t.Fatalf("SetSockOpt(tcpip.V6OnlyOption, true): %s", err)
-			}
+			ep.SocketOptions().SetV6Only(true)
 
 			if err := ep.Connect(dstAddr); err != tcpip.ErrNoRoute {
 				t.Errorf("got ep.Connect(%+v) = %s, want = %s", dstAddr, err, tcpip.ErrNoRoute)
@@ -4044,9 +4045,9 @@ func TestAutoGenAddrInResponseToDADConflicts(t *testing.T) {
 						ndpConfigs.AutoGenAddressConflictRetries = maxRetries
 						s := stack.New(stack.Options{
 							NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
-								AutoGenIPv6LinkLocal: addrType.autoGenLinkLocal,
-								NDPConfigs:           ndpConfigs,
-								NDPDisp:              &ndpDisp,
+								AutoGenLinkLocal: addrType.autoGenLinkLocal,
+								NDPConfigs:       ndpConfigs,
+								NDPDisp:          &ndpDisp,
 								OpaqueIIDOpts: ipv6.OpaqueInterfaceIdentifierOptions{
 									NICNameFromID: func(_ tcpip.NICID, nicName string) string {
 										return nicName
@@ -4179,9 +4180,9 @@ func TestAutoGenAddrWithEUI64IIDNoDADRetries(t *testing.T) {
 			e := channel.New(0, 1280, linkAddr1)
 			s := stack.New(stack.Options{
 				NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
-					AutoGenIPv6LinkLocal: addrType.autoGenLinkLocal,
-					NDPConfigs:           addrType.ndpConfigs,
-					NDPDisp:              &ndpDisp,
+					AutoGenLinkLocal: addrType.autoGenLinkLocal,
+					NDPConfigs:       addrType.ndpConfigs,
+					NDPDisp:          &ndpDisp,
 				})},
 			})
 			if err := s.CreateNIC(nicID, e); err != nil {
@@ -4708,7 +4709,7 @@ func TestCleanupNDPState(t *testing.T) {
 			}
 			s := stack.New(stack.Options{
 				NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
-					AutoGenIPv6LinkLocal: true,
+					AutoGenLinkLocal: true,
 					NDPConfigs: ipv6.NDPConfigurations{
 						HandleRAs:              true,
 						DiscoverDefaultRouters: true,
@@ -5174,113 +5175,99 @@ func TestRouterSolicitation(t *testing.T) {
 		},
 	}
 
-	// This Run will not return until the parallel tests finish.
-	//
-	// We need this because we need to do some teardown work after the
-	// parallel tests complete.
-	//
-	// See https://godoc.org/testing#hdr-Subtests_and_Sub_benchmarks for
-	// more details.
-	t.Run("group", func(t *testing.T) {
-		for _, test := range tests {
-			test := test
-
-			t.Run(test.name, func(t *testing.T) {
-				t.Parallel()
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			clock := faketime.NewManualClock()
+			e := channelLinkWithHeaderLength{
+				Endpoint:     channel.New(int(test.maxRtrSolicit), 1280, test.linkAddr),
+				headerLength: test.linkHeaderLen,
+			}
+			e.Endpoint.LinkEPCapabilities |= stack.CapabilityResolutionRequired
+			waitForPkt := func(timeout time.Duration) {
+				t.Helper()
 
-				e := channelLinkWithHeaderLength{
-					Endpoint:     channel.New(int(test.maxRtrSolicit), 1280, test.linkAddr),
-					headerLength: test.linkHeaderLen,
+				clock.Advance(timeout)
+				p, ok := e.Read()
+				if !ok {
+					t.Fatal("expected router solicitation packet")
 				}
-				e.Endpoint.LinkEPCapabilities |= stack.CapabilityResolutionRequired
-				waitForPkt := func(timeout time.Duration) {
-					t.Helper()
-					ctx, cancel := context.WithTimeout(context.Background(), timeout)
-					defer cancel()
-					p, ok := e.ReadContext(ctx)
-					if !ok {
-						t.Fatal("timed out waiting for packet")
-						return
-					}
 
-					if p.Proto != header.IPv6ProtocolNumber {
-						t.Fatalf("got Proto = %d, want = %d", p.Proto, header.IPv6ProtocolNumber)
-					}
+				if p.Proto != header.IPv6ProtocolNumber {
+					t.Fatalf("got Proto = %d, want = %d", p.Proto, header.IPv6ProtocolNumber)
+				}
 
-					// Make sure the right remote link address is used.
-					if want := header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersMulticastAddress); p.Route.RemoteLinkAddress != want {
-						t.Errorf("got remote link address = %s, want = %s", p.Route.RemoteLinkAddress, want)
-					}
+				// Make sure the right remote link address is used.
+				if got, want := p.Route.RemoteLinkAddress(), header.EthernetAddressFromMulticastIPv6Address(header.IPv6AllRoutersMulticastAddress); got != want {
+					t.Errorf("got remote link address = %s, want = %s", got, want)
+				}
 
-					checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
-						checker.SrcAddr(test.expectedSrcAddr),
-						checker.DstAddr(header.IPv6AllRoutersMulticastAddress),
-						checker.TTL(header.NDPHopLimit),
-						checker.NDPRS(checker.NDPRSOptions(test.expectedNDPOpts)),
-					)
+				checker.IPv6(t, stack.PayloadSince(p.Pkt.NetworkHeader()),
+					checker.SrcAddr(test.expectedSrcAddr),
+					checker.DstAddr(header.IPv6AllRoutersMulticastAddress),
+					checker.TTL(header.NDPHopLimit),
+					checker.NDPRS(checker.NDPRSOptions(test.expectedNDPOpts)),
+				)
 
-					if l, want := p.Pkt.AvailableHeaderBytes(), int(test.linkHeaderLen); l != want {
-						t.Errorf("got p.Pkt.AvailableHeaderBytes() = %d; want = %d", l, want)
-					}
-				}
-				waitForNothing := func(timeout time.Duration) {
-					t.Helper()
-					ctx, cancel := context.WithTimeout(context.Background(), timeout)
-					defer cancel()
-					if _, ok := e.ReadContext(ctx); ok {
-						t.Fatal("unexpectedly got a packet")
-					}
-				}
-				s := stack.New(stack.Options{
-					NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
-						NDPConfigs: ipv6.NDPConfigurations{
-							MaxRtrSolicitations:     test.maxRtrSolicit,
-							RtrSolicitationInterval: test.rtrSolicitInt,
-							MaxRtrSolicitationDelay: test.maxRtrSolicitDelay,
-						},
-					})},
-				})
-				if err := s.CreateNIC(nicID, &e); err != nil {
-					t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+				if l, want := p.Pkt.AvailableHeaderBytes(), int(test.linkHeaderLen); l != want {
+					t.Errorf("got p.Pkt.AvailableHeaderBytes() = %d; want = %d", l, want)
 				}
+			}
+			waitForNothing := func(timeout time.Duration) {
+				t.Helper()
 
-				if addr := test.nicAddr; addr != "" {
-					if err := s.AddAddress(nicID, header.IPv6ProtocolNumber, addr); err != nil {
-						t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv6ProtocolNumber, addr, err)
-					}
+				clock.Advance(timeout)
+				if p, ok := e.Read(); ok {
+					t.Fatalf("unexpectedly got a packet = %#v", p)
 				}
+			}
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
+					NDPConfigs: ipv6.NDPConfigurations{
+						MaxRtrSolicitations:     test.maxRtrSolicit,
+						RtrSolicitationInterval: test.rtrSolicitInt,
+						MaxRtrSolicitationDelay: test.maxRtrSolicitDelay,
+					},
+				})},
+				Clock: clock,
+			})
+			if err := s.CreateNIC(nicID, &e); err != nil {
+				t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+			}
 
-				// Make sure each RS is sent at the right time.
-				remaining := test.maxRtrSolicit
-				if remaining > 0 {
-					waitForPkt(test.effectiveMaxRtrSolicitDelay + defaultAsyncPositiveEventTimeout)
-					remaining--
+			if addr := test.nicAddr; addr != "" {
+				if err := s.AddAddress(nicID, header.IPv6ProtocolNumber, addr); err != nil {
+					t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv6ProtocolNumber, addr, err)
 				}
+			}
 
-				for ; remaining > 0; remaining-- {
-					if test.effectiveRtrSolicitInt > defaultAsyncPositiveEventTimeout {
-						waitForNothing(test.effectiveRtrSolicitInt - defaultAsyncNegativeEventTimeout)
-						waitForPkt(defaultAsyncPositiveEventTimeout)
-					} else {
-						waitForPkt(test.effectiveRtrSolicitInt + defaultAsyncPositiveEventTimeout)
-					}
-				}
+			// Make sure each RS is sent at the right time.
+			remaining := test.maxRtrSolicit
+			if remaining > 0 {
+				waitForPkt(test.effectiveMaxRtrSolicitDelay)
+				remaining--
+			}
 
-				// Make sure no more RS.
-				if test.effectiveRtrSolicitInt > test.effectiveMaxRtrSolicitDelay {
-					waitForNothing(test.effectiveRtrSolicitInt + defaultAsyncNegativeEventTimeout)
+			for ; remaining > 0; remaining-- {
+				if test.effectiveRtrSolicitInt > defaultAsyncPositiveEventTimeout {
+					waitForNothing(test.effectiveRtrSolicitInt - time.Nanosecond)
+					waitForPkt(time.Nanosecond)
 				} else {
-					waitForNothing(test.effectiveMaxRtrSolicitDelay + defaultAsyncNegativeEventTimeout)
+					waitForPkt(test.effectiveRtrSolicitInt)
 				}
+			}
 
-				// Make sure the counter got properly
-				// incremented.
-				if got, want := s.Stats().ICMP.V6PacketsSent.RouterSolicit.Value(), uint64(test.maxRtrSolicit); got != want {
-					t.Fatalf("got sent RouterSolicit = %d, want = %d", got, want)
-				}
-			})
-		}
-	})
+			// Make sure no more RS.
+			if test.effectiveRtrSolicitInt > test.effectiveMaxRtrSolicitDelay {
+				waitForNothing(test.effectiveRtrSolicitInt)
+			} else {
+				waitForNothing(test.effectiveMaxRtrSolicitDelay)
+			}
+
+			if got, want := s.Stats().ICMP.V6.PacketsSent.RouterSolicit.Value(), uint64(test.maxRtrSolicit); got != want {
+				t.Fatalf("got sent RouterSolicit = %d, want = %d", got, want)
+			}
+		})
+	}
 }
 
 func TestStopStartSolicitingRouters(t *testing.T) {
diff --git a/pkg/tcpip/stack/neighbor_cache.go b/pkg/tcpip/stack/neighbor_cache.go
index 4df288798..317f6871d 100644
--- a/pkg/tcpip/stack/neighbor_cache.go
+++ b/pkg/tcpip/stack/neighbor_cache.go
@@ -16,7 +16,6 @@ package stack
 
 import (
 	"fmt"
-	"time"
 
 	"gvisor.dev/gvisor/pkg/sleep"
 	"gvisor.dev/gvisor/pkg/sync"
@@ -25,9 +24,16 @@ import (
 
 const neighborCacheSize = 512 // max entries per interface
 
+// NeighborStats holds metrics for the neighbor table.
+type NeighborStats struct {
+	// FailedEntryLookups counts the number of lookups performed on an entry in
+	// Failed state.
+	FailedEntryLookups *tcpip.StatCounter
+}
+
 // neighborCache maps IP addresses to link addresses. It uses the Least
 // Recently Used (LRU) eviction strategy to implement a bounded cache for
-// dynmically acquired entries. It contains the state machine and configuration
+// dynamically acquired entries. It contains the state machine and configuration
 // for running Neighbor Unreachability Detection (NUD).
 //
 // There are two types of entries in the neighbor cache:
@@ -68,7 +74,7 @@ var _ NUDHandler = (*neighborCache)(nil)
 // reset to state incomplete, and returned. If no matching entry exists and the
 // cache is not full, a new entry with state incomplete is allocated and
 // returned.
-func (n *neighborCache) getOrCreateEntry(remoteAddr, localAddr tcpip.Address, linkRes LinkAddressResolver) *neighborEntry {
+func (n *neighborCache) getOrCreateEntry(remoteAddr tcpip.Address, linkRes LinkAddressResolver) *neighborEntry {
 	n.mu.Lock()
 	defer n.mu.Unlock()
 
@@ -84,7 +90,7 @@ func (n *neighborCache) getOrCreateEntry(remoteAddr, localAddr tcpip.Address, li
 
 	// The entry that needs to be created must be dynamic since all static
 	// entries are directly added to the cache via addStaticEntry.
-	entry := newNeighborEntry(n.nic, remoteAddr, localAddr, n.state, linkRes)
+	entry := newNeighborEntry(n.nic, remoteAddr, n.state, linkRes)
 	if n.dynamic.count == neighborCacheSize {
 		e := n.dynamic.lru.Back()
 		e.mu.Lock()
@@ -111,28 +117,31 @@ func (n *neighborCache) getOrCreateEntry(remoteAddr, localAddr tcpip.Address, li
 // provided, it will be notified when address resolution is complete (success
 // or not).
 //
+// If specified, the local address must be an address local to the interface the
+// neighbor cache belongs to. The local address is the source address of a
+// packet prompting NUD/link address resolution.
+//
 // If address resolution is required, ErrNoLinkAddress and a notification
 // channel is returned for the top level caller to block. Channel is closed
 // once address resolution is complete (success or not).
 func (n *neighborCache) entry(remoteAddr, localAddr tcpip.Address, linkRes LinkAddressResolver, w *sleep.Waker) (NeighborEntry, <-chan struct{}, *tcpip.Error) {
 	if linkAddr, ok := linkRes.ResolveStaticAddress(remoteAddr); ok {
 		e := NeighborEntry{
-			Addr:      remoteAddr,
-			LocalAddr: localAddr,
-			LinkAddr:  linkAddr,
-			State:     Static,
-			UpdatedAt: time.Now(),
+			Addr:           remoteAddr,
+			LinkAddr:       linkAddr,
+			State:          Static,
+			UpdatedAtNanos: 0,
 		}
 		return e, nil, nil
 	}
 
-	entry := n.getOrCreateEntry(remoteAddr, localAddr, linkRes)
+	entry := n.getOrCreateEntry(remoteAddr, linkRes)
 	entry.mu.Lock()
 	defer entry.mu.Unlock()
 
 	switch s := entry.neigh.State; s {
 	case Stale:
-		entry.handlePacketQueuedLocked()
+		entry.handlePacketQueuedLocked(localAddr)
 		fallthrough
 	case Reachable, Static, Delay, Probe:
 		// As per RFC 4861 section 7.3.3:
@@ -152,7 +161,7 @@ func (n *neighborCache) entry(remoteAddr, localAddr tcpip.Address, linkRes LinkA
 			entry.done = make(chan struct{})
 		}
 
-		entry.handlePacketQueuedLocked()
+		entry.handlePacketQueuedLocked(localAddr)
 		return entry.neigh, entry.done, tcpip.ErrWouldBlock
 	case Failed:
 		return entry.neigh, nil, tcpip.ErrNoLinkAddress
@@ -173,14 +182,15 @@ func (n *neighborCache) removeWaker(addr tcpip.Address, waker *sleep.Waker) {
 
 // entries returns all entries in the neighbor cache.
 func (n *neighborCache) entries() []NeighborEntry {
-	entries := make([]NeighborEntry, 0, len(n.cache))
 	n.mu.RLock()
+	defer n.mu.RUnlock()
+
+	entries := make([]NeighborEntry, 0, len(n.cache))
 	for _, entry := range n.cache {
 		entry.mu.RLock()
 		entries = append(entries, entry.neigh)
 		entry.mu.RUnlock()
 	}
-	n.mu.RUnlock()
 	return entries
 }
 
@@ -207,7 +217,7 @@ func (n *neighborCache) addStaticEntry(addr tcpip.Address, linkAddr tcpip.LinkAd
 		} else {
 			// Static entry found with the same address but different link address.
 			entry.neigh.LinkAddr = linkAddr
-			entry.dispatchChangeEventLocked(entry.neigh.State)
+			entry.dispatchChangeEventLocked()
 			entry.mu.Unlock()
 			return
 		}
@@ -220,11 +230,12 @@ func (n *neighborCache) addStaticEntry(addr tcpip.Address, linkAddr tcpip.LinkAd
 		entry.mu.Unlock()
 	}
 
-	entry := newStaticNeighborEntry(n.nic, addr, linkAddr, n.state)
-	n.cache[addr] = entry
+	n.cache[addr] = newStaticNeighborEntry(n.nic, addr, linkAddr, n.state)
 }
 
 // removeEntryLocked removes the specified entry from the neighbor cache.
+//
+// Prerequisite: n.mu and entry.mu MUST be locked.
 func (n *neighborCache) removeEntryLocked(entry *neighborEntry) {
 	if entry.neigh.State != Static {
 		n.dynamic.lru.Remove(entry)
@@ -292,8 +303,8 @@ func (n *neighborCache) setConfig(config NUDConfigurations) {
 // HandleProbe implements NUDHandler.HandleProbe by following the logic defined
 // in RFC 4861 section 7.2.3. Validation of the probe is expected to be handled
 // by the caller.
-func (n *neighborCache) HandleProbe(remoteAddr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes LinkAddressResolver) {
-	entry := n.getOrCreateEntry(remoteAddr, localAddr, linkRes)
+func (n *neighborCache) HandleProbe(remoteAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes LinkAddressResolver) {
+	entry := n.getOrCreateEntry(remoteAddr, linkRes)
 	entry.mu.Lock()
 	entry.handleProbeLocked(remoteLinkAddr)
 	entry.mu.Unlock()
diff --git a/pkg/tcpip/stack/neighbor_cache_test.go b/pkg/tcpip/stack/neighbor_cache_test.go
index fcd54ed83..732a299f7 100644
--- a/pkg/tcpip/stack/neighbor_cache_test.go
+++ b/pkg/tcpip/stack/neighbor_cache_test.go
@@ -61,39 +61,39 @@ const (
 )
 
 // entryDiffOpts returns the options passed to cmp.Diff to compare neighbor
-// entries. The UpdatedAt field is ignored due to a lack of a deterministic
-// method to predict the time that an event will be dispatched.
+// entries. The UpdatedAtNanos field is ignored due to a lack of a
+// deterministic method to predict the time that an event will be dispatched.
 func entryDiffOpts() []cmp.Option {
 	return []cmp.Option{
-		cmpopts.IgnoreFields(NeighborEntry{}, "UpdatedAt"),
+		cmpopts.IgnoreFields(NeighborEntry{}, "UpdatedAtNanos"),
 	}
 }
 
 // entryDiffOptsWithSort is like entryDiffOpts but also includes an option to
 // sort slices of entries for cases where ordering must be ignored.
 func entryDiffOptsWithSort() []cmp.Option {
-	return []cmp.Option{
-		cmpopts.IgnoreFields(NeighborEntry{}, "UpdatedAt"),
-		cmpopts.SortSlices(func(a, b NeighborEntry) bool {
-			return strings.Compare(string(a.Addr), string(b.Addr)) < 0
-		}),
-	}
+	return append(entryDiffOpts(), cmpopts.SortSlices(func(a, b NeighborEntry) bool {
+		return strings.Compare(string(a.Addr), string(b.Addr)) < 0
+	}))
 }
 
 func newTestNeighborCache(nudDisp NUDDispatcher, config NUDConfigurations, clock tcpip.Clock) *neighborCache {
 	config.resetInvalidFields()
 	rng := rand.New(rand.NewSource(time.Now().UnixNano()))
-	return &neighborCache{
+	neigh := &neighborCache{
 		nic: &NIC{
 			stack: &Stack{
 				clock:   clock,
 				nudDisp: nudDisp,
 			},
-			id: 1,
+			id:    1,
+			stats: makeNICStats(),
 		},
 		state: NewNUDState(config, rng),
 		cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize),
 	}
+	neigh.nic.neigh = neigh
+	return neigh
 }
 
 // testEntryStore contains a set of IP to NeighborEntry mappings.
@@ -128,9 +128,8 @@ func newTestEntryStore() *testEntryStore {
 		linkAddr := toLinkAddress(i)
 
 		store.entriesMap[addr] = NeighborEntry{
-			Addr:      addr,
-			LocalAddr: testEntryLocalAddr,
-			LinkAddr:  linkAddr,
+			Addr:     addr,
+			LinkAddr: linkAddr,
 		}
 	}
 	return store
@@ -195,10 +194,10 @@ type testNeighborResolver struct {
 
 var _ LinkAddressResolver = (*testNeighborResolver)(nil)
 
-func (r *testNeighborResolver) LinkAddressRequest(addr, localAddr tcpip.Address, linkAddr tcpip.LinkAddress, linkEP LinkEndpoint) *tcpip.Error {
+func (r *testNeighborResolver) LinkAddressRequest(targetAddr, _ tcpip.Address, _ tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error {
 	// Delay handling the request to emulate network latency.
 	r.clock.AfterFunc(r.delay, func() {
-		r.fakeRequest(addr)
+		r.fakeRequest(targetAddr)
 	})
 
 	// Execute post address resolution action, if available.
@@ -294,9 +293,8 @@ func TestNeighborCacheEntry(t *testing.T) {
 	if !ok {
 		t.Fatalf("store.entry(0) not found")
 	}
-	_, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
-	if err != tcpip.ErrWouldBlock {
-		t.Errorf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Errorf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 
 	clock.Advance(typicalLatency)
@@ -305,15 +303,19 @@ func TestNeighborCacheEntry(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:  entry.Addr,
+				State: Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -324,8 +326,8 @@ func TestNeighborCacheEntry(t *testing.T) {
 		t.Fatalf("nud dispatcher events mismatch (-got, +want):\n%s", diff)
 	}
 
-	if _, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil); err != nil {
-		t.Fatalf("unexpected error from neigh.entry(%s, %s, _, nil): %s", entry.Addr, entry.LocalAddr, err)
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != nil {
+		t.Fatalf("unexpected error from neigh.entry(%s, '', _, nil): %s", entry.Addr, err)
 	}
 
 	// No more events should have been dispatched.
@@ -354,9 +356,9 @@ func TestNeighborCacheRemoveEntry(t *testing.T) {
 	if !ok {
 		t.Fatalf("store.entry(0) not found")
 	}
-	_, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
-	if err != tcpip.ErrWouldBlock {
-		t.Errorf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Errorf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 
 	clock.Advance(typicalLatency)
@@ -365,15 +367,19 @@ func TestNeighborCacheRemoveEntry(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:  entry.Addr,
+				State: Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -391,9 +397,11 @@ func TestNeighborCacheRemoveEntry(t *testing.T) {
 			{
 				EventType: entryTestRemoved,
 				NICID:     1,
-				Addr:      entry.Addr,
-				LinkAddr:  entry.LinkAddr,
-				State:     Reachable,
+				Entry: NeighborEntry{
+					Addr:     entry.Addr,
+					LinkAddr: entry.LinkAddr,
+					State:    Reachable,
+				},
 			},
 		}
 		nudDisp.mu.Lock()
@@ -404,8 +412,8 @@ func TestNeighborCacheRemoveEntry(t *testing.T) {
 		}
 	}
 
-	if _, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil); err != tcpip.ErrWouldBlock {
-		t.Errorf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Errorf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 }
 
@@ -452,8 +460,8 @@ func (c *testContext) overflowCache(opts overflowOptions) error {
 		if !ok {
 			return fmt.Errorf("c.store.entry(%d) not found", i)
 		}
-		if _, _, err := c.neigh.entry(entry.Addr, entry.LocalAddr, c.linkRes, nil); err != tcpip.ErrWouldBlock {
-			return fmt.Errorf("got c.neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+		if _, _, err := c.neigh.entry(entry.Addr, "", c.linkRes, nil); err != tcpip.ErrWouldBlock {
+			return fmt.Errorf("got c.neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 		}
 		c.clock.Advance(c.neigh.config().RetransmitTimer)
 
@@ -470,23 +478,29 @@ func (c *testContext) overflowCache(opts overflowOptions) error {
 			wantEvents = append(wantEvents, testEntryEventInfo{
 				EventType: entryTestRemoved,
 				NICID:     1,
-				Addr:      removedEntry.Addr,
-				LinkAddr:  removedEntry.LinkAddr,
-				State:     Reachable,
+				Entry: NeighborEntry{
+					Addr:     removedEntry.Addr,
+					LinkAddr: removedEntry.LinkAddr,
+					State:    Reachable,
+				},
 			})
 		}
 
 		wantEvents = append(wantEvents, testEntryEventInfo{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:  entry.Addr,
+				State: Incomplete,
+			},
 		}, testEntryEventInfo{
 			EventType: entryTestChanged,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		})
 
 		c.nudDisp.mu.Lock()
@@ -508,10 +522,9 @@ func (c *testContext) overflowCache(opts overflowOptions) error {
 			return fmt.Errorf("c.store.entry(%d) not found", i)
 		}
 		wantEntry := NeighborEntry{
-			Addr:      entry.Addr,
-			LocalAddr: entry.LocalAddr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Addr:     entry.Addr,
+			LinkAddr: entry.LinkAddr,
+			State:    Reachable,
 		}
 		wantUnsortedEntries = append(wantUnsortedEntries, wantEntry)
 	}
@@ -564,24 +577,27 @@ func TestNeighborCacheRemoveEntryThenOverflow(t *testing.T) {
 	if !ok {
 		t.Fatalf("c.store.entry(0) not found")
 	}
-	_, _, err := c.neigh.entry(entry.Addr, entry.LocalAddr, c.linkRes, nil)
-	if err != tcpip.ErrWouldBlock {
-		t.Errorf("got c.neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+	if _, _, err := c.neigh.entry(entry.Addr, "", c.linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Errorf("got c.neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	c.clock.Advance(c.neigh.config().RetransmitTimer)
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:  entry.Addr,
+				State: Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		},
 	}
 	c.nudDisp.mu.Lock()
@@ -600,9 +616,11 @@ func TestNeighborCacheRemoveEntryThenOverflow(t *testing.T) {
 			{
 				EventType: entryTestRemoved,
 				NICID:     1,
-				Addr:      entry.Addr,
-				LinkAddr:  entry.LinkAddr,
-				State:     Reachable,
+				Entry: NeighborEntry{
+					Addr:     entry.Addr,
+					LinkAddr: entry.LinkAddr,
+					State:    Reachable,
+				},
 			},
 		}
 		c.nudDisp.mu.Lock()
@@ -640,9 +658,11 @@ func TestNeighborCacheDuplicateStaticEntryWithSameLinkAddress(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  staticLinkAddr,
-			State:     Static,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: staticLinkAddr,
+				State:    Static,
+			},
 		},
 	}
 	c.nudDisp.mu.Lock()
@@ -682,9 +702,11 @@ func TestNeighborCacheDuplicateStaticEntryWithDifferentLinkAddress(t *testing.T)
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  staticLinkAddr,
-			State:     Static,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: staticLinkAddr,
+				State:    Static,
+			},
 		},
 	}
 	c.nudDisp.mu.Lock()
@@ -703,9 +725,11 @@ func TestNeighborCacheDuplicateStaticEntryWithDifferentLinkAddress(t *testing.T)
 			{
 				EventType: entryTestChanged,
 				NICID:     1,
-				Addr:      entry.Addr,
-				LinkAddr:  staticLinkAddr,
-				State:     Static,
+				Entry: NeighborEntry{
+					Addr:     entry.Addr,
+					LinkAddr: staticLinkAddr,
+					State:    Static,
+				},
 			},
 		}
 		c.nudDisp.mu.Lock()
@@ -740,9 +764,11 @@ func TestNeighborCacheRemoveStaticEntryThenOverflow(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  staticLinkAddr,
-			State:     Static,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: staticLinkAddr,
+				State:    Static,
+			},
 		},
 	}
 	c.nudDisp.mu.Lock()
@@ -760,9 +786,11 @@ func TestNeighborCacheRemoveStaticEntryThenOverflow(t *testing.T) {
 			{
 				EventType: entryTestRemoved,
 				NICID:     1,
-				Addr:      entry.Addr,
-				LinkAddr:  staticLinkAddr,
-				State:     Static,
+				Entry: NeighborEntry{
+					Addr:     entry.Addr,
+					LinkAddr: staticLinkAddr,
+					State:    Static,
+				},
 			},
 		}
 		c.nudDisp.mu.Lock()
@@ -800,24 +828,27 @@ func TestNeighborCacheOverwriteWithStaticEntryThenOverflow(t *testing.T) {
 	if !ok {
 		t.Fatalf("c.store.entry(0) not found")
 	}
-	_, _, err := c.neigh.entry(entry.Addr, entry.LocalAddr, c.linkRes, nil)
-	if err != tcpip.ErrWouldBlock {
-		t.Errorf("got c.neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+	if _, _, err := c.neigh.entry(entry.Addr, "", c.linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Errorf("got c.neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	c.clock.Advance(typicalLatency)
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:  entry.Addr,
+				State: Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		},
 	}
 	c.nudDisp.mu.Lock()
@@ -836,16 +867,20 @@ func TestNeighborCacheOverwriteWithStaticEntryThenOverflow(t *testing.T) {
 			{
 				EventType: entryTestRemoved,
 				NICID:     1,
-				Addr:      entry.Addr,
-				LinkAddr:  entry.LinkAddr,
-				State:     Reachable,
+				Entry: NeighborEntry{
+					Addr:     entry.Addr,
+					LinkAddr: entry.LinkAddr,
+					State:    Reachable,
+				},
 			},
 			{
 				EventType: entryTestAdded,
 				NICID:     1,
-				Addr:      entry.Addr,
-				LinkAddr:  staticLinkAddr,
-				State:     Static,
+				Entry: NeighborEntry{
+					Addr:     entry.Addr,
+					LinkAddr: staticLinkAddr,
+					State:    Static,
+				},
 			},
 		}
 		c.nudDisp.mu.Lock()
@@ -861,10 +896,9 @@ func TestNeighborCacheOverwriteWithStaticEntryThenOverflow(t *testing.T) {
 		startAtEntryIndex: 1,
 		wantStaticEntries: []NeighborEntry{
 			{
-				Addr:      entry.Addr,
-				LocalAddr: "", // static entries don't need a local address
-				LinkAddr:  staticLinkAddr,
-				State:     Static,
+				Addr:     entry.Addr,
+				LinkAddr: staticLinkAddr,
+				State:    Static,
 			},
 		},
 	}
@@ -896,12 +930,12 @@ func TestNeighborCacheNotifiesWaker(t *testing.T) {
 	if !ok {
 		t.Fatalf("store.entry(0) not found")
 	}
-	_, doneCh, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, &w)
+	_, doneCh, err := neigh.entry(entry.Addr, "", linkRes, &w)
 	if err != tcpip.ErrWouldBlock {
-		t.Fatalf("got neigh.entry(%s, %s, _, _ = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+		t.Fatalf("got neigh.entry(%s, '', _, _ = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	if doneCh == nil {
-		t.Fatalf("expected done channel from neigh.entry(%s, %s, _, _)", entry.Addr, entry.LocalAddr)
+		t.Fatalf("expected done channel from neigh.entry(%s, '', _, _)", entry.Addr)
 	}
 	clock.Advance(typicalLatency)
 
@@ -913,7 +947,7 @@ func TestNeighborCacheNotifiesWaker(t *testing.T) {
 
 	id, ok := s.Fetch(false /* block */)
 	if !ok {
-		t.Errorf("expected waker to be notified after neigh.entry(%s, %s, _, _)", entry.Addr, entry.LocalAddr)
+		t.Errorf("expected waker to be notified after neigh.entry(%s, '', _, _)", entry.Addr)
 	}
 	if id != wakerID {
 		t.Errorf("got s.Fetch(false) = %d, want = %d", id, wakerID)
@@ -923,15 +957,19 @@ func TestNeighborCacheNotifiesWaker(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:  entry.Addr,
+				State: Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -964,12 +1002,12 @@ func TestNeighborCacheRemoveWaker(t *testing.T) {
 	if !ok {
 		t.Fatalf("store.entry(0) not found")
 	}
-	_, doneCh, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, &w)
+	_, doneCh, err := neigh.entry(entry.Addr, "", linkRes, &w)
 	if err != tcpip.ErrWouldBlock {
-		t.Fatalf("got neigh.entry(%s, %s, _, _) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+		t.Fatalf("got neigh.entry(%s, '', _, _) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	if doneCh == nil {
-		t.Fatalf("expected done channel from neigh.entry(%s, %s, _, _)", entry.Addr, entry.LocalAddr)
+		t.Fatalf("expected done channel from neigh.entry(%s, '', _, _)", entry.Addr)
 	}
 
 	// Remove the waker before the neighbor cache has the opportunity to send a
@@ -991,15 +1029,19 @@ func TestNeighborCacheRemoveWaker(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:  entry.Addr,
+				State: Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1028,10 +1070,9 @@ func TestNeighborCacheAddStaticEntryThenOverflow(t *testing.T) {
 		t.Errorf("unexpected error from c.neigh.entry(%s, \"\", _, nil): %s", entry.Addr, err)
 	}
 	want := NeighborEntry{
-		Addr:      entry.Addr,
-		LocalAddr: "", // static entries don't need a local address
-		LinkAddr:  entry.LinkAddr,
-		State:     Static,
+		Addr:     entry.Addr,
+		LinkAddr: entry.LinkAddr,
+		State:    Static,
 	}
 	if diff := cmp.Diff(e, want, entryDiffOpts()...); diff != "" {
 		t.Errorf("c.neigh.entry(%s, \"\", _, nil) mismatch (-got, +want):\n%s", entry.Addr, diff)
@@ -1041,9 +1082,11 @@ func TestNeighborCacheAddStaticEntryThenOverflow(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Static,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Static,
+			},
 		},
 	}
 	c.nudDisp.mu.Lock()
@@ -1058,10 +1101,9 @@ func TestNeighborCacheAddStaticEntryThenOverflow(t *testing.T) {
 		startAtEntryIndex: 1,
 		wantStaticEntries: []NeighborEntry{
 			{
-				Addr:      entry.Addr,
-				LocalAddr: "", // static entries don't need a local address
-				LinkAddr:  entry.LinkAddr,
-				State:     Static,
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Static,
 			},
 		},
 	}
@@ -1089,9 +1131,8 @@ func TestNeighborCacheClear(t *testing.T) {
 	if !ok {
 		t.Fatalf("store.entry(0) not found")
 	}
-	_, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
-	if err != tcpip.ErrWouldBlock {
-		t.Errorf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Errorf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	clock.Advance(typicalLatency)
 
@@ -1099,15 +1140,19 @@ func TestNeighborCacheClear(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:  entry.Addr,
+				State: Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1126,9 +1171,11 @@ func TestNeighborCacheClear(t *testing.T) {
 			{
 				EventType: entryTestAdded,
 				NICID:     1,
-				Addr:      entryTestAddr1,
-				LinkAddr:  entryTestLinkAddr1,
-				State:     Static,
+				Entry: NeighborEntry{
+					Addr:     entryTestAddr1,
+					LinkAddr: entryTestLinkAddr1,
+					State:    Static,
+				},
 			},
 		}
 		nudDisp.mu.Lock()
@@ -1149,16 +1196,20 @@ func TestNeighborCacheClear(t *testing.T) {
 		{
 			EventType: entryTestRemoved,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestRemoved,
 			NICID:     1,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Static,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Static,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1185,24 +1236,27 @@ func TestNeighborCacheClearThenOverflow(t *testing.T) {
 	if !ok {
 		t.Fatalf("c.store.entry(0) not found")
 	}
-	_, _, err := c.neigh.entry(entry.Addr, entry.LocalAddr, c.linkRes, nil)
-	if err != tcpip.ErrWouldBlock {
-		t.Errorf("got c.neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+	if _, _, err := c.neigh.entry(entry.Addr, "", c.linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Errorf("got c.neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	c.clock.Advance(typicalLatency)
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     1,
-			Addr:      entry.Addr,
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:  entry.Addr,
+				State: Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     1,
-			Addr:      entry.Addr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entry.Addr,
+				LinkAddr: entry.LinkAddr,
+				State:    Reachable,
+			},
 		},
 	}
 	c.nudDisp.mu.Lock()
@@ -1220,9 +1274,11 @@ func TestNeighborCacheClearThenOverflow(t *testing.T) {
 			{
 				EventType: entryTestRemoved,
 				NICID:     1,
-				Addr:      entry.Addr,
-				LinkAddr:  entry.LinkAddr,
-				State:     Reachable,
+				Entry: NeighborEntry{
+					Addr:     entry.Addr,
+					LinkAddr: entry.LinkAddr,
+					State:    Reachable,
+				},
 			},
 		}
 		c.nudDisp.mu.Lock()
@@ -1274,29 +1330,33 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) {
 		if !ok {
 			t.Fatalf("store.entry(%d) not found", i)
 		}
-		_, doneCh, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
+		_, doneCh, err := neigh.entry(entry.Addr, "", linkRes, nil)
 		if err != tcpip.ErrWouldBlock {
-			t.Errorf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+			t.Errorf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 		}
 		clock.Advance(typicalLatency)
 		select {
 		case <-doneCh:
 		default:
-			t.Fatalf("expected notification from done channel returned by neigh.entry(%s, %s, _, nil)", entry.Addr, entry.LocalAddr)
+			t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, nil)", entry.Addr)
 		}
 		wantEvents := []testEntryEventInfo{
 			{
 				EventType: entryTestAdded,
 				NICID:     1,
-				Addr:      entry.Addr,
-				State:     Incomplete,
+				Entry: NeighborEntry{
+					Addr:  entry.Addr,
+					State: Incomplete,
+				},
 			},
 			{
 				EventType: entryTestChanged,
 				NICID:     1,
-				Addr:      entry.Addr,
-				LinkAddr:  entry.LinkAddr,
-				State:     Reachable,
+				Entry: NeighborEntry{
+					Addr:     entry.Addr,
+					LinkAddr: entry.LinkAddr,
+					State:    Reachable,
+				},
 			},
 		}
 		nudDisp.mu.Lock()
@@ -1312,9 +1372,8 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) {
 	for i := neighborCacheSize; i < store.size(); i++ {
 		// Periodically refresh the frequently used entry
 		if i%(neighborCacheSize/2) == 0 {
-			_, _, err := neigh.entry(frequentlyUsedEntry.Addr, frequentlyUsedEntry.LocalAddr, linkRes, nil)
-			if err != nil {
-				t.Errorf("unexpected error from neigh.entry(%s, %s, _, nil): %s", frequentlyUsedEntry.Addr, frequentlyUsedEntry.LocalAddr, err)
+			if _, _, err := neigh.entry(frequentlyUsedEntry.Addr, "", linkRes, nil); err != nil {
+				t.Errorf("unexpected error from neigh.entry(%s, '', _, nil): %s", frequentlyUsedEntry.Addr, err)
 			}
 		}
 
@@ -1322,15 +1381,15 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) {
 		if !ok {
 			t.Fatalf("store.entry(%d) not found", i)
 		}
-		_, doneCh, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
+		_, doneCh, err := neigh.entry(entry.Addr, "", linkRes, nil)
 		if err != tcpip.ErrWouldBlock {
-			t.Errorf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+			t.Errorf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 		}
 		clock.Advance(typicalLatency)
 		select {
 		case <-doneCh:
 		default:
-			t.Fatalf("expected notification from done channel returned by neigh.entry(%s, %s, _, nil)", entry.Addr, entry.LocalAddr)
+			t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, nil)", entry.Addr)
 		}
 
 		// An entry should have been removed, as per the LRU eviction strategy
@@ -1342,22 +1401,28 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) {
 			{
 				EventType: entryTestRemoved,
 				NICID:     1,
-				Addr:      removedEntry.Addr,
-				LinkAddr:  removedEntry.LinkAddr,
-				State:     Reachable,
+				Entry: NeighborEntry{
+					Addr:     removedEntry.Addr,
+					LinkAddr: removedEntry.LinkAddr,
+					State:    Reachable,
+				},
 			},
 			{
 				EventType: entryTestAdded,
 				NICID:     1,
-				Addr:      entry.Addr,
-				State:     Incomplete,
+				Entry: NeighborEntry{
+					Addr:  entry.Addr,
+					State: Incomplete,
+				},
 			},
 			{
 				EventType: entryTestChanged,
 				NICID:     1,
-				Addr:      entry.Addr,
-				LinkAddr:  entry.LinkAddr,
-				State:     Reachable,
+				Entry: NeighborEntry{
+					Addr:     entry.Addr,
+					LinkAddr: entry.LinkAddr,
+					State:    Reachable,
+				},
 			},
 		}
 		nudDisp.mu.Lock()
@@ -1374,10 +1439,9 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) {
 	// have to be sorted before comparison.
 	wantUnsortedEntries := []NeighborEntry{
 		{
-			Addr:      frequentlyUsedEntry.Addr,
-			LocalAddr: frequentlyUsedEntry.LocalAddr,
-			LinkAddr:  frequentlyUsedEntry.LinkAddr,
-			State:     Reachable,
+			Addr:     frequentlyUsedEntry.Addr,
+			LinkAddr: frequentlyUsedEntry.LinkAddr,
+			State:    Reachable,
 		},
 	}
 
@@ -1387,10 +1451,9 @@ func TestNeighborCacheKeepFrequentlyUsed(t *testing.T) {
 			t.Fatalf("store.entry(%d) not found", i)
 		}
 		wantEntry := NeighborEntry{
-			Addr:      entry.Addr,
-			LocalAddr: entry.LocalAddr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Addr:     entry.Addr,
+			LinkAddr: entry.LinkAddr,
+			State:    Reachable,
 		}
 		wantUnsortedEntries = append(wantUnsortedEntries, wantEntry)
 	}
@@ -1430,9 +1493,8 @@ func TestNeighborCacheConcurrent(t *testing.T) {
 			wg.Add(1)
 			go func(entry NeighborEntry) {
 				defer wg.Done()
-				e, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
-				if err != nil && err != tcpip.ErrWouldBlock {
-					t.Errorf("got neigh.entry(%s, %s, _, nil) = (%+v, _, %s), want (_, _, nil) or (_, _, %s)", entry.Addr, entry.LocalAddr, e, err, tcpip.ErrWouldBlock)
+				if e, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != nil && err != tcpip.ErrWouldBlock {
+					t.Errorf("got neigh.entry(%s, '', _, nil) = (%+v, _, %s), want (_, _, nil) or (_, _, %s)", entry.Addr, e, err, tcpip.ErrWouldBlock)
 				}
 			}(entry)
 		}
@@ -1456,10 +1518,9 @@ func TestNeighborCacheConcurrent(t *testing.T) {
 			t.Errorf("store.entry(%d) not found", i)
 		}
 		wantEntry := NeighborEntry{
-			Addr:      entry.Addr,
-			LocalAddr: entry.LocalAddr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Addr:     entry.Addr,
+			LinkAddr: entry.LinkAddr,
+			State:    Reachable,
 		}
 		wantUnsortedEntries = append(wantUnsortedEntries, wantEntry)
 	}
@@ -1488,37 +1549,36 @@ func TestNeighborCacheReplace(t *testing.T) {
 	if !ok {
 		t.Fatalf("store.entry(0) not found")
 	}
-	_, doneCh, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
+	_, doneCh, err := neigh.entry(entry.Addr, "", linkRes, nil)
 	if err != tcpip.ErrWouldBlock {
-		t.Fatalf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+		t.Fatalf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	clock.Advance(typicalLatency)
 	select {
 	case <-doneCh:
 	default:
-		t.Fatalf("expected notification from done channel returned by neigh.entry(%s, %s, _, nil)", entry.Addr, entry.LocalAddr)
+		t.Fatalf("expected notification from done channel returned by neigh.entry(%s, '', _, nil)", entry.Addr)
 	}
 
 	// Verify the entry exists
 	{
-		e, doneCh, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
+		e, doneCh, err := neigh.entry(entry.Addr, "", linkRes, nil)
 		if err != nil {
-			t.Errorf("unexpected error from neigh.entry(%s, %s, _, nil): %s", entry.Addr, entry.LocalAddr, err)
+			t.Errorf("unexpected error from neigh.entry(%s, '', _, nil): %s", entry.Addr, err)
 		}
 		if doneCh != nil {
-			t.Errorf("unexpected done channel from neigh.entry(%s, %s, _, nil): %v", entry.Addr, entry.LocalAddr, doneCh)
+			t.Errorf("unexpected done channel from neigh.entry(%s, '', _, nil): %v", entry.Addr, doneCh)
 		}
 		if t.Failed() {
 			t.FailNow()
 		}
 		want := NeighborEntry{
-			Addr:      entry.Addr,
-			LocalAddr: entry.LocalAddr,
-			LinkAddr:  entry.LinkAddr,
-			State:     Reachable,
+			Addr:     entry.Addr,
+			LinkAddr: entry.LinkAddr,
+			State:    Reachable,
 		}
 		if diff := cmp.Diff(e, want, entryDiffOpts()...); diff != "" {
-			t.Errorf("neigh.entry(%s, %s, _, nil) mismatch (-got, +want):\n%s", entry.Addr, entry.LinkAddr, diff)
+			t.Errorf("neigh.entry(%s, '', _, nil) mismatch (-got, +want):\n%s", entry.Addr, diff)
 		}
 	}
 
@@ -1542,37 +1602,35 @@ func TestNeighborCacheReplace(t *testing.T) {
 	//
 	// Verify the entry's new link address and the new state.
 	{
-		e, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
+		e, _, err := neigh.entry(entry.Addr, "", linkRes, nil)
 		if err != nil {
-			t.Fatalf("neigh.entry(%s, %s, _, nil): %s", entry.Addr, entry.LocalAddr, err)
+			t.Fatalf("neigh.entry(%s, '', _, nil): %s", entry.Addr, err)
 		}
 		want := NeighborEntry{
-			Addr:      entry.Addr,
-			LocalAddr: entry.LocalAddr,
-			LinkAddr:  updatedLinkAddr,
-			State:     Delay,
+			Addr:     entry.Addr,
+			LinkAddr: updatedLinkAddr,
+			State:    Delay,
 		}
 		if diff := cmp.Diff(e, want, entryDiffOpts()...); diff != "" {
-			t.Errorf("neigh.entry(%s, %s, _, nil) mismatch (-got, +want):\n%s", entry.Addr, entry.LocalAddr, diff)
+			t.Errorf("neigh.entry(%s, '', _, nil) mismatch (-got, +want):\n%s", entry.Addr, diff)
 		}
 		clock.Advance(config.DelayFirstProbeTime + typicalLatency)
 	}
 
 	// Verify that the neighbor is now reachable.
 	{
-		e, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
+		e, _, err := neigh.entry(entry.Addr, "", linkRes, nil)
 		clock.Advance(typicalLatency)
 		if err != nil {
-			t.Errorf("unexpected error from neigh.entry(%s, %s, _, nil): %s", entry.Addr, entry.LocalAddr, err)
+			t.Errorf("unexpected error from neigh.entry(%s, '', _, nil): %s", entry.Addr, err)
 		}
 		want := NeighborEntry{
-			Addr:      entry.Addr,
-			LocalAddr: entry.LocalAddr,
-			LinkAddr:  updatedLinkAddr,
-			State:     Reachable,
+			Addr:     entry.Addr,
+			LinkAddr: updatedLinkAddr,
+			State:    Reachable,
 		}
 		if diff := cmp.Diff(e, want, entryDiffOpts()...); diff != "" {
-			t.Errorf("neigh.entry(%s, %s, _, nil) mismatch (-got, +want):\n%s", entry.Addr, entry.LocalAddr, diff)
+			t.Errorf("neigh.entry(%s, '', _, nil) mismatch (-got, +want):\n%s", entry.Addr, diff)
 		}
 	}
 }
@@ -1601,35 +1659,34 @@ func TestNeighborCacheResolutionFailed(t *testing.T) {
 	if !ok {
 		t.Fatalf("store.entry(0) not found")
 	}
-	if _, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil); err != tcpip.ErrWouldBlock {
-		t.Fatalf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Fatalf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	clock.Advance(typicalLatency)
-	got, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
+	got, _, err := neigh.entry(entry.Addr, "", linkRes, nil)
 	if err != nil {
-		t.Fatalf("unexpected error from neigh.entry(%s, %s, _, nil): %s", entry.Addr, entry.LocalAddr, err)
+		t.Fatalf("unexpected error from neigh.entry(%s, '', _, nil): %s", entry.Addr, err)
 	}
 	want := NeighborEntry{
-		Addr:      entry.Addr,
-		LocalAddr: entry.LocalAddr,
-		LinkAddr:  entry.LinkAddr,
-		State:     Reachable,
+		Addr:     entry.Addr,
+		LinkAddr: entry.LinkAddr,
+		State:    Reachable,
 	}
 	if diff := cmp.Diff(got, want, entryDiffOpts()...); diff != "" {
-		t.Errorf("neigh.entry(%s, %s, _, nil) mismatch (-got, +want):\n%s", entry.Addr, entry.LocalAddr, diff)
+		t.Errorf("neigh.entry(%s, '', _, nil) mismatch (-got, +want):\n%s", entry.Addr, diff)
 	}
 
 	// Verify that address resolution for an unknown address returns ErrNoLinkAddress
 	before := atomic.LoadUint32(&requestCount)
 
 	entry.Addr += "2"
-	if _, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil); err != tcpip.ErrWouldBlock {
-		t.Fatalf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Fatalf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	waitFor := config.DelayFirstProbeTime + typicalLatency*time.Duration(config.MaxMulticastProbes)
 	clock.Advance(waitFor)
-	if _, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil); err != tcpip.ErrNoLinkAddress {
-		t.Fatalf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrNoLinkAddress)
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrNoLinkAddress {
+		t.Fatalf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrNoLinkAddress)
 	}
 
 	maxAttempts := neigh.config().MaxUnicastProbes
@@ -1659,13 +1716,13 @@ func TestNeighborCacheResolutionTimeout(t *testing.T) {
 	if !ok {
 		t.Fatalf("store.entry(0) not found")
 	}
-	if _, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil); err != tcpip.ErrWouldBlock {
-		t.Fatalf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrWouldBlock {
+		t.Fatalf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 	}
 	waitFor := config.RetransmitTimer * time.Duration(config.MaxMulticastProbes)
 	clock.Advance(waitFor)
-	if _, _, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil); err != tcpip.ErrNoLinkAddress {
-		t.Fatalf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrNoLinkAddress)
+	if _, _, err := neigh.entry(entry.Addr, "", linkRes, nil); err != tcpip.ErrNoLinkAddress {
+		t.Fatalf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrNoLinkAddress)
 	}
 }
 
@@ -1683,18 +1740,17 @@ func TestNeighborCacheStaticResolution(t *testing.T) {
 		delay:   typicalLatency,
 	}
 
-	got, _, err := neigh.entry(testEntryBroadcastAddr, testEntryLocalAddr, linkRes, nil)
+	got, _, err := neigh.entry(testEntryBroadcastAddr, "", linkRes, nil)
 	if err != nil {
-		t.Fatalf("unexpected error from neigh.entry(%s, %s, _, nil): %s", testEntryBroadcastAddr, testEntryLocalAddr, err)
+		t.Fatalf("unexpected error from neigh.entry(%s, '', _, nil): %s", testEntryBroadcastAddr, err)
 	}
 	want := NeighborEntry{
-		Addr:      testEntryBroadcastAddr,
-		LocalAddr: testEntryLocalAddr,
-		LinkAddr:  testEntryBroadcastLinkAddr,
-		State:     Static,
+		Addr:     testEntryBroadcastAddr,
+		LinkAddr: testEntryBroadcastLinkAddr,
+		State:    Static,
 	}
 	if diff := cmp.Diff(got, want, entryDiffOpts()...); diff != "" {
-		t.Errorf("neigh.entry(%s, %s, _, nil) mismatch (-got, +want):\n%s", testEntryBroadcastAddr, testEntryLocalAddr, diff)
+		t.Errorf("neigh.entry(%s, '', _, nil) mismatch (-got, +want):\n%s", testEntryBroadcastAddr, diff)
 	}
 }
 
@@ -1719,9 +1775,9 @@ func BenchmarkCacheClear(b *testing.B) {
 			if !ok {
 				b.Fatalf("store.entry(%d) not found", i)
 			}
-			_, doneCh, err := neigh.entry(entry.Addr, entry.LocalAddr, linkRes, nil)
+			_, doneCh, err := neigh.entry(entry.Addr, "", linkRes, nil)
 			if err != tcpip.ErrWouldBlock {
-				b.Fatalf("got neigh.entry(%s, %s, _, nil) = %v, want = %s", entry.Addr, entry.LocalAddr, err, tcpip.ErrWouldBlock)
+				b.Fatalf("got neigh.entry(%s, '', _, nil) = %v, want = %s", entry.Addr, err, tcpip.ErrWouldBlock)
 			}
 			if doneCh != nil {
 				<-doneCh
diff --git a/pkg/tcpip/stack/neighbor_entry.go b/pkg/tcpip/stack/neighbor_entry.go
index be61a21af..32399b4f5 100644
--- a/pkg/tcpip/stack/neighbor_entry.go
+++ b/pkg/tcpip/stack/neighbor_entry.go
@@ -24,13 +24,18 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 )
 
+const (
+	// immediateDuration is a duration of zero for scheduling work that needs to
+	// be done immediately but asynchronously to avoid deadlock.
+	immediateDuration time.Duration = 0
+)
+
 // NeighborEntry describes a neighboring device in the local network.
 type NeighborEntry struct {
-	Addr      tcpip.Address
-	LocalAddr tcpip.Address
-	LinkAddr  tcpip.LinkAddress
-	State     NeighborState
-	UpdatedAt time.Time
+	Addr           tcpip.Address
+	LinkAddr       tcpip.LinkAddress
+	State          NeighborState
+	UpdatedAtNanos int64
 }
 
 // NeighborState defines the state of a NeighborEntry within the Neighbor
@@ -106,35 +111,35 @@ type neighborEntry struct {
 // state, Unknown. Transition out of Unknown by calling either
 // `handlePacketQueuedLocked` or `handleProbeLocked` on the newly created
 // neighborEntry.
-func newNeighborEntry(nic *NIC, remoteAddr tcpip.Address, localAddr tcpip.Address, nudState *NUDState, linkRes LinkAddressResolver) *neighborEntry {
+func newNeighborEntry(nic *NIC, remoteAddr tcpip.Address, nudState *NUDState, linkRes LinkAddressResolver) *neighborEntry {
 	return &neighborEntry{
 		nic:      nic,
 		linkRes:  linkRes,
 		nudState: nudState,
 		neigh: NeighborEntry{
-			Addr:      remoteAddr,
-			LocalAddr: localAddr,
-			State:     Unknown,
+			Addr:  remoteAddr,
+			State: Unknown,
 		},
 	}
 }
 
-// newStaticNeighborEntry creates a neighbor cache entry starting at the Static
-// state. The entry can only transition out of Static by directly calling
-// `setStateLocked`.
+// newStaticNeighborEntry creates a neighbor cache entry starting at the
+// Static state. The entry can only transition out of Static by directly
+// calling `setStateLocked`.
 func newStaticNeighborEntry(nic *NIC, addr tcpip.Address, linkAddr tcpip.LinkAddress, state *NUDState) *neighborEntry {
+	entry := NeighborEntry{
+		Addr:           addr,
+		LinkAddr:       linkAddr,
+		State:          Static,
+		UpdatedAtNanos: nic.stack.clock.NowNanoseconds(),
+	}
 	if nic.stack.nudDisp != nil {
-		nic.stack.nudDisp.OnNeighborAdded(nic.id, addr, linkAddr, Static, time.Now())
+		nic.stack.nudDisp.OnNeighborAdded(nic.id, entry)
 	}
 	return &neighborEntry{
 		nic:      nic,
 		nudState: state,
-		neigh: NeighborEntry{
-			Addr:      addr,
-			LinkAddr:  linkAddr,
-			State:     Static,
-			UpdatedAt: time.Now(),
-		},
+		neigh:    entry,
 	}
 }
 
@@ -165,17 +170,17 @@ func (e *neighborEntry) notifyWakersLocked() {
 
 // dispatchAddEventLocked signals to stack's NUD Dispatcher that the entry has
 // been added.
-func (e *neighborEntry) dispatchAddEventLocked(nextState NeighborState) {
+func (e *neighborEntry) dispatchAddEventLocked() {
 	if nudDisp := e.nic.stack.nudDisp; nudDisp != nil {
-		nudDisp.OnNeighborAdded(e.nic.id, e.neigh.Addr, e.neigh.LinkAddr, nextState, time.Now())
+		nudDisp.OnNeighborAdded(e.nic.id, e.neigh)
 	}
 }
 
 // dispatchChangeEventLocked signals to stack's NUD Dispatcher that the entry
 // has changed state or link-layer address.
-func (e *neighborEntry) dispatchChangeEventLocked(nextState NeighborState) {
+func (e *neighborEntry) dispatchChangeEventLocked() {
 	if nudDisp := e.nic.stack.nudDisp; nudDisp != nil {
-		nudDisp.OnNeighborChanged(e.nic.id, e.neigh.Addr, e.neigh.LinkAddr, nextState, time.Now())
+		nudDisp.OnNeighborChanged(e.nic.id, e.neigh)
 	}
 }
 
@@ -183,7 +188,7 @@ func (e *neighborEntry) dispatchChangeEventLocked(nextState NeighborState) {
 // has been removed.
 func (e *neighborEntry) dispatchRemoveEventLocked() {
 	if nudDisp := e.nic.stack.nudDisp; nudDisp != nil {
-		nudDisp.OnNeighborRemoved(e.nic.id, e.neigh.Addr, e.neigh.LinkAddr, e.neigh.State, time.Now())
+		nudDisp.OnNeighborRemoved(e.nic.id, e.neigh)
 	}
 }
 
@@ -201,68 +206,24 @@ func (e *neighborEntry) setStateLocked(next NeighborState) {
 
 	prev := e.neigh.State
 	e.neigh.State = next
-	e.neigh.UpdatedAt = time.Now()
+	e.neigh.UpdatedAtNanos = e.nic.stack.clock.NowNanoseconds()
 	config := e.nudState.Config()
 
 	switch next {
 	case Incomplete:
-		var retryCounter uint32
-		var sendMulticastProbe func()
-
-		sendMulticastProbe = func() {
-			if retryCounter == config.MaxMulticastProbes {
-				// "If no Neighbor Advertisement is received after
-				// MAX_MULTICAST_SOLICIT solicitations, address resolution has failed.
-				// The sender MUST return ICMP destination unreachable indications with
-				// code 3 (Address Unreachable) for each packet queued awaiting address
-				// resolution." - RFC 4861 section 7.2.2
-				//
-				// There is no need to send an ICMP destination unreachable indication
-				// since the failure to resolve the address is expected to only occur
-				// on this node. Thus, redirecting traffic is currently not supported.
-				//
-				// "If the error occurs on a node other than the node originating the
-				// packet, an ICMP error message is generated. If the error occurs on
-				// the originating node, an implementation is not required to actually
-				// create and send an ICMP error packet to the source, as long as the
-				// upper-layer sender is notified through an appropriate mechanism
-				// (e.g. return value from a procedure call). Note, however, that an
-				// implementation may find it convenient in some cases to return errors
-				// to the sender by taking the offending packet, generating an ICMP
-				// error message, and then delivering it (locally) through the generic
-				// error-handling routines.' - RFC 4861 section 2.1
-				e.dispatchRemoveEventLocked()
-				e.setStateLocked(Failed)
-				return
-			}
-
-			if err := e.linkRes.LinkAddressRequest(e.neigh.Addr, e.neigh.LocalAddr, "", e.nic.LinkEndpoint); err != nil {
-				// There is no need to log the error here; the NUD implementation may
-				// assume a working link. A valid link should be the responsibility of
-				// the NIC/stack.LinkEndpoint.
-				e.dispatchRemoveEventLocked()
-				e.setStateLocked(Failed)
-				return
-			}
-
-			retryCounter++
-			e.job = e.nic.stack.newJob(&e.mu, sendMulticastProbe)
-			e.job.Schedule(config.RetransmitTimer)
-		}
-
-		sendMulticastProbe()
+		panic(fmt.Sprintf("should never transition to Incomplete with setStateLocked; neigh = %#v, prev state = %s", e.neigh, prev))
 
 	case Reachable:
 		e.job = e.nic.stack.newJob(&e.mu, func() {
-			e.dispatchChangeEventLocked(Stale)
 			e.setStateLocked(Stale)
+			e.dispatchChangeEventLocked()
 		})
 		e.job.Schedule(e.nudState.ReachableTime())
 
 	case Delay:
 		e.job = e.nic.stack.newJob(&e.mu, func() {
-			e.dispatchChangeEventLocked(Probe)
 			e.setStateLocked(Probe)
+			e.dispatchChangeEventLocked()
 		})
 		e.job.Schedule(config.DelayFirstProbeTime)
 
@@ -277,28 +238,27 @@ func (e *neighborEntry) setStateLocked(next NeighborState) {
 				return
 			}
 
-			if err := e.linkRes.LinkAddressRequest(e.neigh.Addr, e.neigh.LocalAddr, e.neigh.LinkAddr, e.nic.LinkEndpoint); err != nil {
+			if err := e.linkRes.LinkAddressRequest(e.neigh.Addr, "" /* localAddr */, e.neigh.LinkAddr, e.nic); err != nil {
 				e.dispatchRemoveEventLocked()
 				e.setStateLocked(Failed)
 				return
 			}
 
 			retryCounter++
-			if retryCounter == config.MaxUnicastProbes {
-				e.dispatchRemoveEventLocked()
-				e.setStateLocked(Failed)
-				return
-			}
-
 			e.job = e.nic.stack.newJob(&e.mu, sendUnicastProbe)
 			e.job.Schedule(config.RetransmitTimer)
 		}
 
-		sendUnicastProbe()
+		// Send a probe in another gorountine to free this thread of execution
+		// for finishing the state transition. This is necessary to avoid
+		// deadlock where sending and processing probes are done synchronously,
+		// such as loopback and integration tests.
+		e.job = e.nic.stack.newJob(&e.mu, sendUnicastProbe)
+		e.job.Schedule(immediateDuration)
 
 	case Failed:
 		e.notifyWakersLocked()
-		e.job = e.nic.stack.newJob(&e.mu, func() {
+		e.job = e.nic.stack.newJob(&doubleLock{first: &e.nic.neigh.mu, second: &e.mu}, func() {
 			e.nic.neigh.removeEntryLocked(e)
 		})
 		e.job.Schedule(config.UnreachableTime)
@@ -315,19 +275,82 @@ func (e *neighborEntry) setStateLocked(next NeighborState) {
 // being queued for outgoing transmission.
 //
 // Follows the logic defined in RFC 4861 section 7.3.3.
-func (e *neighborEntry) handlePacketQueuedLocked() {
+func (e *neighborEntry) handlePacketQueuedLocked(localAddr tcpip.Address) {
 	switch e.neigh.State {
 	case Unknown:
-		e.dispatchAddEventLocked(Incomplete)
-		e.setStateLocked(Incomplete)
+		e.neigh.State = Incomplete
+		e.neigh.UpdatedAtNanos = e.nic.stack.clock.NowNanoseconds()
+
+		e.dispatchAddEventLocked()
+
+		config := e.nudState.Config()
+
+		var retryCounter uint32
+		var sendMulticastProbe func()
+
+		sendMulticastProbe = func() {
+			if retryCounter == config.MaxMulticastProbes {
+				// "If no Neighbor Advertisement is received after
+				// MAX_MULTICAST_SOLICIT solicitations, address resolution has failed.
+				// The sender MUST return ICMP destination unreachable indications with
+				// code 3 (Address Unreachable) for each packet queued awaiting address
+				// resolution." - RFC 4861 section 7.2.2
+				//
+				// There is no need to send an ICMP destination unreachable indication
+				// since the failure to resolve the address is expected to only occur
+				// on this node. Thus, redirecting traffic is currently not supported.
+				//
+				// "If the error occurs on a node other than the node originating the
+				// packet, an ICMP error message is generated. If the error occurs on
+				// the originating node, an implementation is not required to actually
+				// create and send an ICMP error packet to the source, as long as the
+				// upper-layer sender is notified through an appropriate mechanism
+				// (e.g. return value from a procedure call). Note, however, that an
+				// implementation may find it convenient in some cases to return errors
+				// to the sender by taking the offending packet, generating an ICMP
+				// error message, and then delivering it (locally) through the generic
+				// error-handling routines.' - RFC 4861 section 2.1
+				e.dispatchRemoveEventLocked()
+				e.setStateLocked(Failed)
+				return
+			}
+
+			// As per RFC 4861 section 7.2.2:
+			//
+			//  If the source address of the packet prompting the solicitation is the
+			//  same as one of the addresses assigned to the outgoing interface, that
+			//  address SHOULD be placed in the IP Source Address of the outgoing
+			//  solicitation.
+			//
+			if err := e.linkRes.LinkAddressRequest(e.neigh.Addr, localAddr, "", e.nic); err != nil {
+				// There is no need to log the error here; the NUD implementation may
+				// assume a working link. A valid link should be the responsibility of
+				// the NIC/stack.LinkEndpoint.
+				e.dispatchRemoveEventLocked()
+				e.setStateLocked(Failed)
+				return
+			}
+
+			retryCounter++
+			e.job = e.nic.stack.newJob(&e.mu, sendMulticastProbe)
+			e.job.Schedule(config.RetransmitTimer)
+		}
+
+		// Send a probe in another gorountine to free this thread of execution
+		// for finishing the state transition. This is necessary to avoid
+		// deadlock where sending and processing probes are done synchronously,
+		// such as loopback and integration tests.
+		e.job = e.nic.stack.newJob(&e.mu, sendMulticastProbe)
+		e.job.Schedule(immediateDuration)
 
 	case Stale:
-		e.dispatchChangeEventLocked(Delay)
 		e.setStateLocked(Delay)
+		e.dispatchChangeEventLocked()
 
-	case Incomplete, Reachable, Delay, Probe, Static, Failed:
+	case Incomplete, Reachable, Delay, Probe, Static:
 		// Do nothing
-
+	case Failed:
+		e.nic.stats.Neighbor.FailedEntryLookups.Increment()
 	default:
 		panic(fmt.Sprintf("Invalid cache entry state: %s", e.neigh.State))
 	}
@@ -345,21 +368,21 @@ func (e *neighborEntry) handleProbeLocked(remoteLinkAddr tcpip.LinkAddress) {
 	switch e.neigh.State {
 	case Unknown, Incomplete, Failed:
 		e.neigh.LinkAddr = remoteLinkAddr
-		e.dispatchAddEventLocked(Stale)
 		e.setStateLocked(Stale)
 		e.notifyWakersLocked()
+		e.dispatchAddEventLocked()
 
 	case Reachable, Delay, Probe:
 		if e.neigh.LinkAddr != remoteLinkAddr {
 			e.neigh.LinkAddr = remoteLinkAddr
-			e.dispatchChangeEventLocked(Stale)
 			e.setStateLocked(Stale)
+			e.dispatchChangeEventLocked()
 		}
 
 	case Stale:
 		if e.neigh.LinkAddr != remoteLinkAddr {
 			e.neigh.LinkAddr = remoteLinkAddr
-			e.dispatchChangeEventLocked(Stale)
+			e.dispatchChangeEventLocked()
 		}
 
 	case Static:
@@ -393,12 +416,11 @@ func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, fla
 
 		e.neigh.LinkAddr = linkAddr
 		if flags.Solicited {
-			e.dispatchChangeEventLocked(Reachable)
 			e.setStateLocked(Reachable)
 		} else {
-			e.dispatchChangeEventLocked(Stale)
 			e.setStateLocked(Stale)
 		}
+		e.dispatchChangeEventLocked()
 		e.isRouter = flags.IsRouter
 		e.notifyWakersLocked()
 
@@ -411,8 +433,8 @@ func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, fla
 		if isLinkAddrDifferent {
 			if !flags.Override {
 				if e.neigh.State == Reachable {
-					e.dispatchChangeEventLocked(Stale)
 					e.setStateLocked(Stale)
+					e.dispatchChangeEventLocked()
 				}
 				break
 			}
@@ -421,23 +443,24 @@ func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, fla
 
 			if !flags.Solicited {
 				if e.neigh.State != Stale {
-					e.dispatchChangeEventLocked(Stale)
 					e.setStateLocked(Stale)
+					e.dispatchChangeEventLocked()
 				} else {
 					// Notify the LinkAddr change, even though NUD state hasn't changed.
-					e.dispatchChangeEventLocked(e.neigh.State)
+					e.dispatchChangeEventLocked()
 				}
 				break
 			}
 		}
 
 		if flags.Solicited && (flags.Override || !isLinkAddrDifferent) {
-			if e.neigh.State != Reachable {
-				e.dispatchChangeEventLocked(Reachable)
-			}
+			wasReachable := e.neigh.State == Reachable
 			// Set state to Reachable again to refresh timers.
 			e.setStateLocked(Reachable)
 			e.notifyWakersLocked()
+			if !wasReachable {
+				e.dispatchChangeEventLocked()
+			}
 		}
 
 		if e.isRouter && !flags.IsRouter && header.IsV6UnicastAddress(e.neigh.Addr) {
@@ -475,11 +498,12 @@ func (e *neighborEntry) handleConfirmationLocked(linkAddr tcpip.LinkAddress, fla
 func (e *neighborEntry) handleUpperLevelConfirmationLocked() {
 	switch e.neigh.State {
 	case Reachable, Stale, Delay, Probe:
-		if e.neigh.State != Reachable {
-			e.dispatchChangeEventLocked(Reachable)
-			// Set state to Reachable again to refresh timers.
-		}
+		wasReachable := e.neigh.State == Reachable
+		// Set state to Reachable again to refresh timers.
 		e.setStateLocked(Reachable)
+		if !wasReachable {
+			e.dispatchChangeEventLocked()
+		}
 
 	case Unknown, Incomplete, Failed, Static:
 		// Do nothing
@@ -488,3 +512,23 @@ func (e *neighborEntry) handleUpperLevelConfirmationLocked() {
 		panic(fmt.Sprintf("Invalid cache entry state: %s", e.neigh.State))
 	}
 }
+
+// doubleLock combines two locks into one while maintaining lock ordering.
+//
+// TODO(gvisor.dev/issue/4796): Remove this once subsequent traffic to a Failed
+// neighbor is allowed.
+type doubleLock struct {
+	first, second sync.Locker
+}
+
+// Lock locks both locks in order: first then second.
+func (l *doubleLock) Lock() {
+	l.first.Lock()
+	l.second.Lock()
+}
+
+// Unlock unlocks both locks in reverse order: second then first.
+func (l *doubleLock) Unlock() {
+	l.second.Unlock()
+	l.first.Unlock()
+}
diff --git a/pkg/tcpip/stack/neighbor_entry_test.go b/pkg/tcpip/stack/neighbor_entry_test.go
index 3ee2a3b31..c497d3932 100644
--- a/pkg/tcpip/stack/neighbor_entry_test.go
+++ b/pkg/tcpip/stack/neighbor_entry_test.go
@@ -47,24 +47,27 @@ const (
 	entryTestNetDefaultMTU = 65536
 )
 
+// runImmediatelyScheduledJobs runs all jobs scheduled to run at the current
+// time.
+func runImmediatelyScheduledJobs(clock *faketime.ManualClock) {
+	clock.Advance(immediateDuration)
+}
+
 // eventDiffOpts are the options passed to cmp.Diff to compare entry events.
-// The UpdatedAt field is ignored due to a lack of a deterministic method to
-// predict the time that an event will be dispatched.
+// The UpdatedAtNanos field is ignored due to a lack of a deterministic method
+// to predict the time that an event will be dispatched.
 func eventDiffOpts() []cmp.Option {
 	return []cmp.Option{
-		cmpopts.IgnoreFields(testEntryEventInfo{}, "UpdatedAt"),
+		cmpopts.IgnoreFields(NeighborEntry{}, "UpdatedAtNanos"),
 	}
 }
 
 // eventDiffOptsWithSort is like eventDiffOpts but also includes an option to
 // sort slices of events for cases where ordering must be ignored.
 func eventDiffOptsWithSort() []cmp.Option {
-	return []cmp.Option{
-		cmpopts.IgnoreFields(testEntryEventInfo{}, "UpdatedAt"),
-		cmpopts.SortSlices(func(a, b testEntryEventInfo) bool {
-			return strings.Compare(string(a.Addr), string(b.Addr)) < 0
-		}),
-	}
+	return append(eventDiffOpts(), cmpopts.SortSlices(func(a, b testEntryEventInfo) bool {
+		return strings.Compare(string(a.Entry.Addr), string(b.Entry.Addr)) < 0
+	}))
 }
 
 // The following unit tests exercise every state transition and verify its
@@ -86,7 +89,7 @@ func eventDiffOptsWithSort() []cmp.Option {
 // | Stale      | Reachable  | Solicited confirmation w/o address         | Notify wakers   | Changed |
 // | Stale      | Stale      | Override confirmation                      | Update LinkAddr | Changed |
 // | Stale      | Stale      | Probe w/ different address                 | Update LinkAddr | Changed |
-// | Stale      | Delay      | Packet sent                                |                 | Changed |
+// | Stale      | Delay      | Packet queued                              |                 | Changed |
 // | Delay      | Reachable  | Upper-layer confirmation                   |                 | Changed |
 // | Delay      | Reachable  | Solicited override confirmation            | Update LinkAddr | Changed |
 // | Delay      | Reachable  | Solicited confirmation w/o address         | Notify wakers   | Changed |
@@ -98,6 +101,7 @@ func eventDiffOptsWithSort() []cmp.Option {
 // | Probe      | Stale      | Probe or confirmation w/ different address |                 | Changed |
 // | Probe      | Probe      | Retransmit timer expired                   | Send probe      | Changed |
 // | Probe      | Failed     | Max probes sent without reply              | Notify wakers   | Removed |
+// | Failed     | Failed     | Packet queued                              |                 |         |
 // | Failed     |            | Unreachability timer expired               | Delete entry    |         |
 
 type testEntryEventType uint8
@@ -125,14 +129,11 @@ func (t testEntryEventType) String() string {
 type testEntryEventInfo struct {
 	EventType testEntryEventType
 	NICID     tcpip.NICID
-	Addr      tcpip.Address
-	LinkAddr  tcpip.LinkAddress
-	State     NeighborState
-	UpdatedAt time.Time
+	Entry     NeighborEntry
 }
 
 func (e testEntryEventInfo) String() string {
-	return fmt.Sprintf("%s event for NIC #%d, addr=%q, linkAddr=%q, state=%q", e.EventType, e.NICID, e.Addr, e.LinkAddr, e.State)
+	return fmt.Sprintf("%s event for NIC #%d, %#v", e.EventType, e.NICID, e.Entry)
 }
 
 // testNUDDispatcher implements NUDDispatcher to validate the dispatching of
@@ -150,36 +151,27 @@ func (d *testNUDDispatcher) queueEvent(e testEntryEventInfo) {
 	d.events = append(d.events, e)
 }
 
-func (d *testNUDDispatcher) OnNeighborAdded(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state NeighborState, updatedAt time.Time) {
+func (d *testNUDDispatcher) OnNeighborAdded(nicID tcpip.NICID, entry NeighborEntry) {
 	d.queueEvent(testEntryEventInfo{
 		EventType: entryTestAdded,
 		NICID:     nicID,
-		Addr:      addr,
-		LinkAddr:  linkAddr,
-		State:     state,
-		UpdatedAt: updatedAt,
+		Entry:     entry,
 	})
 }
 
-func (d *testNUDDispatcher) OnNeighborChanged(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state NeighborState, updatedAt time.Time) {
+func (d *testNUDDispatcher) OnNeighborChanged(nicID tcpip.NICID, entry NeighborEntry) {
 	d.queueEvent(testEntryEventInfo{
 		EventType: entryTestChanged,
 		NICID:     nicID,
-		Addr:      addr,
-		LinkAddr:  linkAddr,
-		State:     state,
-		UpdatedAt: updatedAt,
+		Entry:     entry,
 	})
 }
 
-func (d *testNUDDispatcher) OnNeighborRemoved(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress, state NeighborState, updatedAt time.Time) {
+func (d *testNUDDispatcher) OnNeighborRemoved(nicID tcpip.NICID, entry NeighborEntry) {
 	d.queueEvent(testEntryEventInfo{
 		EventType: entryTestRemoved,
 		NICID:     nicID,
-		Addr:      addr,
-		LinkAddr:  linkAddr,
-		State:     state,
-		UpdatedAt: updatedAt,
+		Entry:     entry,
 	})
 }
 
@@ -202,9 +194,9 @@ func (p entryTestProbeInfo) String() string {
 
 // LinkAddressRequest sends a request for the LinkAddress of addr. Broadcasts
 // to the local network if linkAddr is the zero value.
-func (r *entryTestLinkResolver) LinkAddressRequest(addr, localAddr tcpip.Address, linkAddr tcpip.LinkAddress, linkEP LinkEndpoint) *tcpip.Error {
+func (r *entryTestLinkResolver) LinkAddressRequest(targetAddr, localAddr tcpip.Address, linkAddr tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error {
 	p := entryTestProbeInfo{
-		RemoteAddress:     addr,
+		RemoteAddress:     targetAddr,
 		RemoteLinkAddress: linkAddr,
 		LocalAddress:      localAddr,
 	}
@@ -237,6 +229,7 @@ func entryTestSetup(c NUDConfigurations) (*neighborEntry, *testNUDDispatcher, *e
 			clock:   clock,
 			nudDisp: &disp,
 		},
+		stats: makeNICStats(),
 	}
 	nic.networkEndpoints = map[tcpip.NetworkProtocolNumber]NetworkEndpoint{
 		header.IPv6ProtocolNumber: (&testIPv6Protocol{}).NewEndpoint(&nic, nil, nil, nil),
@@ -245,7 +238,7 @@ func entryTestSetup(c NUDConfigurations) (*neighborEntry, *testNUDDispatcher, *e
 	rng := rand.New(rand.NewSource(time.Now().UnixNano()))
 	nudState := NewNUDState(c, rng)
 	linkRes := entryTestLinkResolver{}
-	entry := newNeighborEntry(&nic, entryTestAddr1 /* remoteAddr */, entryTestAddr2 /* localAddr */, nudState, &linkRes)
+	entry := newNeighborEntry(&nic, entryTestAddr1 /* remoteAddr */, nudState, &linkRes)
 
 	// Stub out the neighbor cache to verify deletion from the cache.
 	nic.neigh = &neighborCache{
@@ -323,15 +316,16 @@ func TestEntryUnknownToUnknownWhenConfirmationWithUnknownAddress(t *testing.T) {
 
 func TestEntryUnknownToIncomplete(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	if got, want := e.neigh.State, Incomplete; got != want {
 		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
 	}
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -350,9 +344,11 @@ func TestEntryUnknownToIncomplete(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 	}
 	{
@@ -367,7 +363,7 @@ func TestEntryUnknownToIncomplete(t *testing.T) {
 
 func TestEntryUnknownToStale(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
 	e.handleProbeLocked(entryTestLinkAddr1)
@@ -377,6 +373,7 @@ func TestEntryUnknownToStale(t *testing.T) {
 	e.mu.Unlock()
 
 	// No probes should have been sent.
+	runImmediatelyScheduledJobs(clock)
 	linkRes.mu.Lock()
 	diff := cmp.Diff(linkRes.probes, []entryTestProbeInfo(nil))
 	linkRes.mu.Unlock()
@@ -388,9 +385,11 @@ func TestEntryUnknownToStale(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -406,11 +405,11 @@ func TestEntryIncompleteToIncompleteDoesNotChangeUpdatedAt(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	if got, want := e.neigh.State, Incomplete; got != want {
 		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
 	}
-	updatedAt := e.neigh.UpdatedAt
+	updatedAtNanos := e.neigh.UpdatedAtNanos
 	e.mu.Unlock()
 
 	clock.Advance(c.RetransmitTimer)
@@ -437,7 +436,7 @@ func TestEntryIncompleteToIncompleteDoesNotChangeUpdatedAt(t *testing.T) {
 	}
 
 	e.mu.Lock()
-	if got, want := e.neigh.UpdatedAt, updatedAt; got != want {
+	if got, want := e.neigh.UpdatedAtNanos, updatedAtNanos; got != want {
 		t.Errorf("got e.neigh.UpdatedAt = %q, want = %q", got, want)
 	}
 	e.mu.Unlock()
@@ -468,16 +467,20 @@ func TestEntryIncompleteToIncompleteDoesNotChangeUpdatedAt(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestRemoved,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -487,7 +490,7 @@ func TestEntryIncompleteToIncompleteDoesNotChangeUpdatedAt(t *testing.T) {
 	nudDisp.mu.Unlock()
 
 	e.mu.Lock()
-	if got, notWant := e.neigh.UpdatedAt, updatedAt; got == notWant {
+	if got, notWant := e.neigh.UpdatedAtNanos, updatedAtNanos; got == notWant {
 		t.Errorf("expected e.neigh.UpdatedAt to change, got = %q", got)
 	}
 	e.mu.Unlock()
@@ -495,23 +498,16 @@ func TestEntryIncompleteToIncompleteDoesNotChangeUpdatedAt(t *testing.T) {
 
 func TestEntryIncompleteToReachable(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	if got, want := e.neigh.State, Incomplete; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: true,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	if e.neigh.State != Incomplete {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Incomplete)
 	}
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -526,20 +522,35 @@ func TestEntryIncompleteToReachable(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: true,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -555,7 +566,7 @@ func TestEntryIncompleteToReachable(t *testing.T) {
 // to Reachable.
 func TestEntryAddsAndClearsWakers(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	w := sleep.Waker{}
 	s := sleep.Sleeper{}
@@ -563,7 +574,25 @@ func TestEntryAddsAndClearsWakers(t *testing.T) {
 	defer s.Done()
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	wantProbes := []entryTestProbeInfo{
+		{
+			RemoteAddress:     entryTestAddr1,
+			RemoteLinkAddress: tcpip.LinkAddress(""),
+			LocalAddress:      entryTestAddr2,
+		},
+	}
+	linkRes.mu.Lock()
+	diff := cmp.Diff(linkRes.probes, wantProbes)
+	linkRes.mu.Unlock()
+	if diff != "" {
+		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	}
+
+	e.mu.Lock()
 	if got := e.wakers; got != nil {
 		t.Errorf("got e.wakers = %v, want = nil", got)
 	}
@@ -587,34 +616,24 @@ func TestEntryAddsAndClearsWakers(t *testing.T) {
 	}
 	e.mu.Unlock()
 
-	wantProbes := []entryTestProbeInfo{
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
-	}
-
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -626,26 +645,16 @@ func TestEntryAddsAndClearsWakers(t *testing.T) {
 
 func TestEntryIncompleteToReachableWithRouterFlag(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	if got, want := e.neigh.State, Incomplete; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: true,
-		Override:  false,
-		IsRouter:  true,
-	})
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	if got, want := e.isRouter, true; got != want {
-		t.Errorf("got e.isRouter = %t, want = %t", got, want)
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	if e.neigh.State != Incomplete {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Incomplete)
 	}
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -659,20 +668,38 @@ func TestEntryIncompleteToReachableWithRouterFlag(t *testing.T) {
 	}
 	linkRes.mu.Unlock()
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: true,
+		Override:  false,
+		IsRouter:  true,
+	})
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	if !e.isRouter {
+		t.Errorf("got e.isRouter = %t, want = true", e.isRouter)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -684,23 +711,16 @@ func TestEntryIncompleteToReachableWithRouterFlag(t *testing.T) {
 
 func TestEntryIncompleteToStale(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	if got, want := e.neigh.State, Incomplete; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	if e.neigh.State != Incomplete {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Incomplete)
 	}
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -715,20 +735,35 @@ func TestEntryIncompleteToStale(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -744,7 +779,7 @@ func TestEntryIncompleteToFailed(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	if got, want := e.neigh.State, Incomplete; got != want {
 		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
 	}
@@ -783,16 +818,20 @@ func TestEntryIncompleteToFailed(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestRemoved,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -817,12 +856,30 @@ func (*testLocker) Unlock() {}
 
 func TestEntryStaysReachableWhenConfirmationWithRouterFlag(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	ipv6EP := e.nic.networkEndpoints[header.IPv6ProtocolNumber].(*testIPv6Endpoint)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	wantProbes := []entryTestProbeInfo{
+		{
+			RemoteAddress:     entryTestAddr1,
+			RemoteLinkAddress: tcpip.LinkAddress(""),
+			LocalAddress:      entryTestAddr2,
+		},
+	}
+	linkRes.mu.Lock()
+	diff := cmp.Diff(linkRes.probes, wantProbes)
+	linkRes.mu.Unlock()
+	if diff != "" {
+		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: true,
 		Override:  false,
@@ -848,34 +905,24 @@ func TestEntryStaysReachableWhenConfirmationWithRouterFlag(t *testing.T) {
 	}
 	e.mu.Unlock()
 
-	wantProbes := []entryTestProbeInfo{
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
-	}
-
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -893,27 +940,13 @@ func TestEntryStaysReachableWhenConfirmationWithRouterFlag(t *testing.T) {
 
 func TestEntryStaysReachableWhenProbeWithSameAddress(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: true,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleProbeLocked(entryTestLinkAddr1)
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	if got, want := e.neigh.LinkAddr, entryTestLinkAddr1; got != want {
-		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -928,20 +961,42 @@ func TestEntryStaysReachableWhenProbeWithSameAddress(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: true,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	e.handleProbeLocked(entryTestLinkAddr1)
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	if e.neigh.LinkAddr != entryTestLinkAddr1 {
+		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", e.neigh.LinkAddr, entryTestLinkAddr1)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -961,17 +1016,10 @@ func TestEntryReachableToStaleWhenTimeout(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: true,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -986,29 +1034,46 @@ func TestEntryReachableToStaleWhenTimeout(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: true,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	e.mu.Unlock()
+
 	clock.Advance(c.BaseReachableTime)
 
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1026,24 +1091,13 @@ func TestEntryReachableToStaleWhenTimeout(t *testing.T) {
 
 func TestEntryReachableToStaleWhenProbeWithDifferentAddress(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: true,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleProbeLocked(entryTestLinkAddr2)
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1058,27 +1112,48 @@ func TestEntryReachableToStaleWhenProbeWithDifferentAddress(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: true,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	e.handleProbeLocked(entryTestLinkAddr2)
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1086,38 +1161,17 @@ func TestEntryReachableToStaleWhenProbeWithDifferentAddress(t *testing.T) {
 		t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff)
 	}
 	nudDisp.mu.Unlock()
-
-	e.mu.Lock()
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.mu.Unlock()
 }
 
 func TestEntryReachableToStaleWhenConfirmationWithDifferentAddress(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: true,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1132,27 +1186,52 @@ func TestEntryReachableToStaleWhenConfirmationWithDifferentAddress(t *testing.T)
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: true,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1160,38 +1239,17 @@ func TestEntryReachableToStaleWhenConfirmationWithDifferentAddress(t *testing.T)
 		t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff)
 	}
 	nudDisp.mu.Unlock()
-
-	e.mu.Lock()
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.mu.Unlock()
 }
 
 func TestEntryReachableToStaleWhenConfirmationWithDifferentAddressAndOverride(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: true,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  true,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1206,27 +1264,52 @@ func TestEntryReachableToStaleWhenConfirmationWithDifferentAddressAndOverride(t
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: true,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  true,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1234,37 +1317,17 @@ func TestEntryReachableToStaleWhenConfirmationWithDifferentAddressAndOverride(t
 		t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff)
 	}
 	nudDisp.mu.Unlock()
-
-	e.mu.Lock()
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.mu.Unlock()
 }
 
 func TestEntryStaysStaleWhenProbeWithSameAddress(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleProbeLocked(entryTestLinkAddr1)
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	if got, want := e.neigh.LinkAddr, entryTestLinkAddr1; got != want {
-		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1279,20 +1342,42 @@ func TestEntryStaysStaleWhenProbeWithSameAddress(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.handleProbeLocked(entryTestLinkAddr1)
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	if e.neigh.LinkAddr != entryTestLinkAddr1 {
+		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", e.neigh.LinkAddr, entryTestLinkAddr1)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1304,31 +1389,13 @@ func TestEntryStaysStaleWhenProbeWithSameAddress(t *testing.T) {
 
 func TestEntryStaleToReachableWhenSolicitedOverrideConfirmation(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
-		Solicited: true,
-		Override:  true,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	if got, want := e.neigh.LinkAddr, entryTestLinkAddr2; got != want {
-		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1343,27 +1410,55 @@ func TestEntryStaleToReachableWhenSolicitedOverrideConfirmation(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
+		Solicited: true,
+		Override:  true,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	if e.neigh.LinkAddr != entryTestLinkAddr2 {
+		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", e.neigh.LinkAddr, entryTestLinkAddr2)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1375,10 +1470,28 @@ func TestEntryStaleToReachableWhenSolicitedOverrideConfirmation(t *testing.T) {
 
 func TestEntryStaleToReachableWhenSolicitedConfirmationWithoutAddress(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
+
+	e.mu.Lock()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	wantProbes := []entryTestProbeInfo{
+		{
+			RemoteAddress:     entryTestAddr1,
+			RemoteLinkAddress: tcpip.LinkAddress(""),
+			LocalAddress:      entryTestAddr2,
+		},
+	}
+	linkRes.mu.Lock()
+	diff := cmp.Diff(linkRes.probes, wantProbes)
+	linkRes.mu.Unlock()
+	if diff != "" {
+		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	}
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
@@ -1400,41 +1513,33 @@ func TestEntryStaleToReachableWhenSolicitedConfirmationWithoutAddress(t *testing
 	}
 	e.mu.Unlock()
 
-	wantProbes := []entryTestProbeInfo{
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
-	}
-
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1446,31 +1551,13 @@ func TestEntryStaleToReachableWhenSolicitedConfirmationWithoutAddress(t *testing
 
 func TestEntryStaleToStaleWhenOverrideConfirmation(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  true,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	if got, want := e.neigh.LinkAddr, entryTestLinkAddr2; got != want {
-		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1485,27 +1572,55 @@ func TestEntryStaleToStaleWhenOverrideConfirmation(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  true,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	if e.neigh.LinkAddr != entryTestLinkAddr2 {
+		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", e.neigh.LinkAddr, entryTestLinkAddr2)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1517,27 +1632,13 @@ func TestEntryStaleToStaleWhenOverrideConfirmation(t *testing.T) {
 
 func TestEntryStaleToStaleWhenProbeUpdateAddress(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleProbeLocked(entryTestLinkAddr2)
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	if got, want := e.neigh.LinkAddr, entryTestLinkAddr2; got != want {
-		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1552,27 +1653,51 @@ func TestEntryStaleToStaleWhenProbeUpdateAddress(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.handleProbeLocked(entryTestLinkAddr2)
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	if e.neigh.LinkAddr != entryTestLinkAddr2 {
+		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", e.neigh.LinkAddr, entryTestLinkAddr2)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1584,24 +1709,13 @@ func TestEntryStaleToStaleWhenProbeUpdateAddress(t *testing.T) {
 
 func TestEntryStaleToDelay(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handlePacketQueuedLocked()
-	if got, want := e.neigh.State, Delay; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1616,27 +1730,48 @@ func TestEntryStaleToDelay(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	if e.neigh.State != Delay {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1656,22 +1791,10 @@ func TestEntryDelayToReachableWhenUpperLevelConfirmation(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	e.handlePacketQueuedLocked()
-	if got, want := e.neigh.State, Delay; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleUpperLevelConfirmationLocked()
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1686,43 +1809,68 @@ func TestEntryDelayToReachableWhenUpperLevelConfirmation(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
-	clock.Advance(c.BaseReachableTime)
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	if e.neigh.State != Delay {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Delay)
+	}
+	e.handleUpperLevelConfirmationLocked()
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	e.mu.Unlock()
 
+	clock.Advance(c.BaseReachableTime)
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1743,29 +1891,10 @@ func TestEntryDelayToReachableWhenSolicitedOverrideConfirmation(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	e.handlePacketQueuedLocked()
-	if got, want := e.neigh.State, Delay; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
-		Solicited: true,
-		Override:  true,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Reachable; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	if got, want := e.neigh.LinkAddr, entryTestLinkAddr2; got != want {
-		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1780,43 +1909,75 @@ func TestEntryDelayToReachableWhenSolicitedOverrideConfirmation(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
-	clock.Advance(c.BaseReachableTime)
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	if e.neigh.State != Delay {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Delay)
+	}
+	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
+		Solicited: true,
+		Override:  true,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Reachable {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Reachable)
+	}
+	if e.neigh.LinkAddr != entryTestLinkAddr2 {
+		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", e.neigh.LinkAddr, entryTestLinkAddr2)
+	}
+	e.mu.Unlock()
 
+	clock.Advance(c.BaseReachableTime)
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1837,13 +1998,31 @@ func TestEntryDelayToReachableWhenSolicitedConfirmationWithoutAddress(t *testing
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	wantProbes := []entryTestProbeInfo{
+		{
+			RemoteAddress:     entryTestAddr1,
+			RemoteLinkAddress: tcpip.LinkAddress(""),
+			LocalAddress:      entryTestAddr2,
+		},
+	}
+	linkRes.mu.Lock()
+	diff := cmp.Diff(linkRes.probes, wantProbes)
+	linkRes.mu.Unlock()
+	if diff != "" {
+		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	if e.neigh.State != Delay {
 		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Delay)
 	}
@@ -1860,57 +2039,52 @@ func TestEntryDelayToReachableWhenSolicitedConfirmationWithoutAddress(t *testing
 	}
 	e.mu.Unlock()
 
-	wantProbes := []entryTestProbeInfo{
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
-	}
-
 	clock.Advance(c.BaseReachableTime)
-
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1922,32 +2096,13 @@ func TestEntryDelayToReachableWhenSolicitedConfirmationWithoutAddress(t *testing
 
 func TestEntryStaysDelayWhenOverrideConfirmationWithSameAddress(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	e.handlePacketQueuedLocked()
-	if got, want := e.neigh.State, Delay; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  true,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Delay; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	if got, want := e.neigh.LinkAddr, entryTestLinkAddr1; got != want {
-		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -1962,27 +2117,56 @@ func TestEntryStaysDelayWhenOverrideConfirmationWithSameAddress(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	if e.neigh.State != Delay {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Delay)
+	}
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  true,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Delay {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Delay)
+	}
+	if e.neigh.LinkAddr != entryTestLinkAddr1 {
+		t.Errorf("got e.neigh.LinkAddr = %q, want = %q", e.neigh.LinkAddr, entryTestLinkAddr1)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -1994,25 +2178,13 @@ func TestEntryStaysDelayWhenOverrideConfirmationWithSameAddress(t *testing.T) {
 
 func TestEntryDelayToStaleWhenProbeWithDifferentAddress(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	e.handlePacketQueuedLocked()
-	if got, want := e.neigh.State, Delay; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleProbeLocked(entryTestLinkAddr2)
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -2027,34 +2199,58 @@ func TestEntryDelayToStaleWhenProbeWithDifferentAddress(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	if e.neigh.State != Delay {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Delay)
+	}
+	e.handleProbeLocked(entryTestLinkAddr2)
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2066,29 +2262,13 @@ func TestEntryDelayToStaleWhenProbeWithDifferentAddress(t *testing.T) {
 
 func TestEntryDelayToStaleWhenConfirmationWithDifferentAddress(t *testing.T) {
 	c := DefaultNUDConfigurations()
-	e, nudDisp, linkRes, _ := entryTestSetup(c)
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
-	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  false,
-		IsRouter:  false,
-	})
-	e.handlePacketQueuedLocked()
-	if got, want := e.neigh.State, Delay; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
-		Solicited: false,
-		Override:  true,
-		IsRouter:  false,
-	})
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
+	runImmediatelyScheduledJobs(clock)
 	wantProbes := []entryTestProbeInfo{
 		{
 			RemoteAddress:     entryTestAddr1,
@@ -2103,34 +2283,62 @@ func TestEntryDelayToStaleWhenConfirmationWithDifferentAddress(t *testing.T) {
 		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
 	}
 
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	if e.neigh.State != Delay {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Delay)
+	}
+	e.handleConfirmationLocked(entryTestLinkAddr2, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  true,
+		IsRouter:  false,
+	})
+	if e.neigh.State != Stale {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Stale)
+	}
+	e.mu.Unlock()
+
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2145,69 +2353,91 @@ func TestEntryDelayToProbe(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: tcpip.LinkAddress(""),
+				LocalAddress:      entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	if got, want := e.neigh.State, Delay; got != want {
 		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
 	}
 	e.mu.Unlock()
 
 	clock.Advance(c.DelayFirstProbeTime)
-
-	wantProbes := []entryTestProbeInfo{
-		// The first probe is caused by the Unknown-to-Incomplete transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-		// The second probe is caused by the Delay-to-Probe transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
 	}
 
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2228,36 +2458,50 @@ func TestEntryProbeToStaleWhenProbeWithDifferentAddress(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: tcpip.LinkAddress(""),
+				LocalAddress:      entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
 	clock.Advance(c.DelayFirstProbeTime)
-
-	wantProbes := []entryTestProbeInfo{
-		// The first probe is caused by the Unknown-to-Incomplete transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-		// The second probe is caused by the Delay-to-Probe transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
 	}
 
 	e.mu.Lock()
@@ -2274,37 +2518,47 @@ func TestEntryProbeToStaleWhenProbeWithDifferentAddress(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2312,12 +2566,6 @@ func TestEntryProbeToStaleWhenProbeWithDifferentAddress(t *testing.T) {
 		t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff)
 	}
 	nudDisp.mu.Unlock()
-
-	e.mu.Lock()
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.mu.Unlock()
 }
 
 func TestEntryProbeToStaleWhenConfirmationWithDifferentAddress(t *testing.T) {
@@ -2325,36 +2573,50 @@ func TestEntryProbeToStaleWhenConfirmationWithDifferentAddress(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: tcpip.LinkAddress(""),
+				LocalAddress:      entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
 	clock.Advance(c.DelayFirstProbeTime)
-
-	wantProbes := []entryTestProbeInfo{
-		// The first probe is caused by the Unknown-to-Incomplete transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-		// The second probe is caused by the Delay-to-Probe transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
 	}
 
 	e.mu.Lock()
@@ -2375,37 +2637,47 @@ func TestEntryProbeToStaleWhenConfirmationWithDifferentAddress(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2413,12 +2685,6 @@ func TestEntryProbeToStaleWhenConfirmationWithDifferentAddress(t *testing.T) {
 		t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff)
 	}
 	nudDisp.mu.Unlock()
-
-	e.mu.Lock()
-	if got, want := e.neigh.State, Stale; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
-	}
-	e.mu.Unlock()
 }
 
 func TestEntryStaysProbeWhenOverrideConfirmationWithSameAddress(t *testing.T) {
@@ -2426,36 +2692,51 @@ func TestEntryStaysProbeWhenOverrideConfirmationWithSameAddress(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: tcpip.LinkAddress(""),
+				LocalAddress:      entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
 	clock.Advance(c.DelayFirstProbeTime)
-
-	wantProbes := []entryTestProbeInfo{
-		// The first probe is caused by the Unknown-to-Incomplete transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-		// The second probe is caused by the Delay-to-Probe transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	{
+		wantProbes := []entryTestProbeInfo{
+			// The second probe is caused by the Delay-to-Probe transition.
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
 	}
 
 	e.mu.Lock()
@@ -2479,30 +2760,38 @@ func TestEntryStaysProbeWhenOverrideConfirmationWithSameAddress(t *testing.T) {
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2529,17 +2818,14 @@ func TestEntryUnknownToStaleToProbeToReachable(t *testing.T) {
 
 	e.mu.Lock()
 	e.handleProbeLocked(entryTestLinkAddr1)
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
 	clock.Advance(c.DelayFirstProbeTime)
-
 	wantProbes := []entryTestProbeInfo{
-		// Probe caused by the Delay-to-Probe transition
 		{
 			RemoteAddress:     entryTestAddr1,
 			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
 		},
 	}
 	linkRes.mu.Lock()
@@ -2567,42 +2853,51 @@ func TestEntryUnknownToStaleToProbeToReachable(t *testing.T) {
 	e.mu.Unlock()
 
 	clock.Advance(c.BaseReachableTime)
-
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2622,36 +2917,50 @@ func TestEntryProbeToReachableWhenSolicitedOverrideConfirmation(t *testing.T) {
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: tcpip.LinkAddress(""),
+				LocalAddress:      entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
 	clock.Advance(c.DelayFirstProbeTime)
-
-	wantProbes := []entryTestProbeInfo{
-		// The first probe is caused by the Unknown-to-Incomplete transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-		// The second probe is caused by the Delay-to-Probe transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
 	}
 
 	e.mu.Lock()
@@ -2672,49 +2981,60 @@ func TestEntryProbeToReachableWhenSolicitedOverrideConfirmation(t *testing.T) {
 	e.mu.Unlock()
 
 	clock.Advance(c.BaseReachableTime)
-
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr2,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr2,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2734,36 +3054,50 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithSameAddress(t *testin
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: tcpip.LinkAddress(""),
+				LocalAddress:      entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
 	clock.Advance(c.DelayFirstProbeTime)
-
-	wantProbes := []entryTestProbeInfo{
-		// The first probe is caused by the Unknown-to-Incomplete transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-		// The second probe is caused by the Delay-to-Probe transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
 	}
 
 	e.mu.Lock()
@@ -2781,49 +3115,60 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithSameAddress(t *testin
 	e.mu.Unlock()
 
 	clock.Advance(c.BaseReachableTime)
-
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2843,36 +3188,50 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithoutAddress(t *testing
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: tcpip.LinkAddress(""),
+				LocalAddress:      entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
 	clock.Advance(c.DelayFirstProbeTime)
-
-	wantProbes := []entryTestProbeInfo{
-		// The first probe is caused by the Unknown-to-Incomplete transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-		// The second probe is caused by the Delay-to-Probe transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
 	}
 
 	e.mu.Lock()
@@ -2890,49 +3249,60 @@ func TestEntryProbeToReachableWhenSolicitedConfirmationWithoutAddress(t *testing
 	e.mu.Unlock()
 
 	clock.Advance(c.BaseReachableTime)
-
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Reachable,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Reachable,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -2946,87 +3316,238 @@ func TestEntryProbeToFailed(t *testing.T) {
 	c := DefaultNUDConfigurations()
 	c.MaxMulticastProbes = 3
 	c.MaxUnicastProbes = 3
+	c.DelayFirstProbeTime = c.RetransmitTimer
 	e, nudDisp, linkRes, clock := entryTestSetup(c)
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress: entryTestAddr1,
+				LocalAddress:  entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
-	waitFor := c.DelayFirstProbeTime + c.RetransmitTimer*time.Duration(c.MaxUnicastProbes)
-	clock.Advance(waitFor)
+	// Observe each probe sent while in the Probe state.
+	for i := uint32(0); i < c.MaxUnicastProbes; i++ {
+		clock.Advance(c.RetransmitTimer)
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probe #%d mismatch (-got, +want):\n%s", i+1, diff)
+		}
 
-	wantProbes := []entryTestProbeInfo{
-		// The first probe is caused by the Unknown-to-Incomplete transition.
+		e.mu.Lock()
+		if e.neigh.State != Probe {
+			t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Probe)
+		}
+		e.mu.Unlock()
+	}
+
+	// Wait for the last probe to expire, causing a transition to Failed.
+	clock.Advance(c.RetransmitTimer)
+	e.mu.Lock()
+	if e.neigh.State != Failed {
+		t.Errorf("got e.neigh.State = %q, want = %q", e.neigh.State, Failed)
+	}
+	e.mu.Unlock()
+
+	wantEvents := []testEntryEventInfo{
 		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
+			EventType: entryTestAdded,
+			NICID:     entryTestNICID,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
-		// The next three probe are caused by the Delay-to-Probe transition.
 		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
+			EventType: entryTestChanged,
+			NICID:     entryTestNICID,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
+			EventType: entryTestChanged,
+			NICID:     entryTestNICID,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
+			EventType: entryTestChanged,
+			NICID:     entryTestNICID,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
+		},
+		{
+			EventType: entryTestRemoved,
+			NICID:     entryTestNICID,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	nudDisp.mu.Lock()
+	if diff := cmp.Diff(nudDisp.events, wantEvents, eventDiffOpts()...); diff != "" {
+		t.Errorf("nud dispatcher events mismatch (-got, +want):\n%s", diff)
+	}
+	nudDisp.mu.Unlock()
+}
+
+func TestEntryFailedToFailed(t *testing.T) {
+	c := DefaultNUDConfigurations()
+	c.MaxMulticastProbes = 3
+	c.MaxUnicastProbes = 3
+	e, nudDisp, linkRes, clock := entryTestSetup(c)
+
+	// Verify the cache contains the entry.
+	if _, ok := e.nic.neigh.cache[entryTestAddr1]; !ok {
+		t.Errorf("expected entry %q to exist in the neighbor cache", entryTestAddr1)
+	}
+
+	// TODO(gvisor.dev/issue/4872): Use helper functions to start entry tests in
+	// their expected state.
+	e.mu.Lock()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress: entryTestAddr1,
+				LocalAddress:  entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
+	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
+		Solicited: false,
+		Override:  false,
+		IsRouter:  false,
+	})
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	waitFor := c.DelayFirstProbeTime + c.RetransmitTimer*time.Duration(c.MaxUnicastProbes)
+	clock.Advance(waitFor)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
 	}
 
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 		{
 			EventType: entryTestRemoved,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
@@ -3035,11 +3556,23 @@ func TestEntryProbeToFailed(t *testing.T) {
 	}
 	nudDisp.mu.Unlock()
 
-	e.mu.Lock()
-	if got, want := e.neigh.State, Failed; got != want {
-		t.Errorf("got e.neigh.State = %q, want = %q", got, want)
+	failedLookups := e.nic.stats.Neighbor.FailedEntryLookups
+	if got := failedLookups.Value(); got != 0 {
+		t.Errorf("got Neighbor.FailedEntryLookups = %d, want = 0", got)
 	}
+
+	e.mu.Lock()
+	// Verify queuing a packet to the entry immediately fails.
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	state := e.neigh.State
 	e.mu.Unlock()
+	if state != Failed {
+		t.Errorf("got e.neigh.State = %q, want = %q", state, Failed)
+	}
+
+	if got := failedLookups.Value(); got != 1 {
+		t.Errorf("got Neighbor.FailedEntryLookups = %d, want = 1", got)
+	}
 }
 
 func TestEntryFailedGetsDeleted(t *testing.T) {
@@ -3054,84 +3587,106 @@ func TestEntryFailedGetsDeleted(t *testing.T) {
 	}
 
 	e.mu.Lock()
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
+	e.mu.Unlock()
+
+	runImmediatelyScheduledJobs(clock)
+	{
+		wantProbes := []entryTestProbeInfo{
+			{
+				RemoteAddress: entryTestAddr1,
+				LocalAddress:  entryTestAddr2,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.probes = nil
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
+	}
+
+	e.mu.Lock()
 	e.handleConfirmationLocked(entryTestLinkAddr1, ReachabilityConfirmationFlags{
 		Solicited: false,
 		Override:  false,
 		IsRouter:  false,
 	})
-	e.handlePacketQueuedLocked()
+	e.handlePacketQueuedLocked(entryTestAddr2)
 	e.mu.Unlock()
 
 	waitFor := c.DelayFirstProbeTime + c.RetransmitTimer*time.Duration(c.MaxUnicastProbes) + c.UnreachableTime
 	clock.Advance(waitFor)
-
-	wantProbes := []entryTestProbeInfo{
-		// The first probe is caused by the Unknown-to-Incomplete transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: tcpip.LinkAddress(""),
-			LocalAddress:      entryTestAddr2,
-		},
-		// The next three probe are caused by the Delay-to-Probe transition.
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-		{
-			RemoteAddress:     entryTestAddr1,
-			RemoteLinkAddress: entryTestLinkAddr1,
-			LocalAddress:      entryTestAddr2,
-		},
-	}
-	linkRes.mu.Lock()
-	diff := cmp.Diff(linkRes.probes, wantProbes)
-	linkRes.mu.Unlock()
-	if diff != "" {
-		t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+	{
+		wantProbes := []entryTestProbeInfo{
+			// The next three probe are sent in Probe.
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+			{
+				RemoteAddress:     entryTestAddr1,
+				RemoteLinkAddress: entryTestLinkAddr1,
+			},
+		}
+		linkRes.mu.Lock()
+		diff := cmp.Diff(linkRes.probes, wantProbes)
+		linkRes.mu.Unlock()
+		if diff != "" {
+			t.Fatalf("link address resolver probes mismatch (-got, +want):\n%s", diff)
+		}
 	}
 
 	wantEvents := []testEntryEventInfo{
 		{
 			EventType: entryTestAdded,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  tcpip.LinkAddress(""),
-			State:     Incomplete,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: tcpip.LinkAddress(""),
+				State:    Incomplete,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Stale,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Stale,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Delay,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Delay,
+			},
 		},
 		{
 			EventType: entryTestChanged,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 		{
 			EventType: entryTestRemoved,
 			NICID:     entryTestNICID,
-			Addr:      entryTestAddr1,
-			LinkAddr:  entryTestLinkAddr1,
-			State:     Probe,
+			Entry: NeighborEntry{
+				Addr:     entryTestAddr1,
+				LinkAddr: entryTestLinkAddr1,
+				State:    Probe,
+			},
 		},
 	}
 	nudDisp.mu.Lock()
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index dcd4319bf..5d037a27e 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -54,18 +54,20 @@ type NIC struct {
 		sync.RWMutex
 		spoofing    bool
 		promiscuous bool
-		// packetEPs is protected by mu, but the contained PacketEndpoint
-		// values are not.
-		packetEPs map[tcpip.NetworkProtocolNumber][]PacketEndpoint
+		// packetEPs is protected by mu, but the contained packetEndpointList are
+		// not.
+		packetEPs map[tcpip.NetworkProtocolNumber]*packetEndpointList
 	}
 }
 
-// NICStats includes transmitted and received stats.
+// NICStats hold statistics for a NIC.
 type NICStats struct {
 	Tx DirectionStats
 	Rx DirectionStats
 
 	DisabledRx DirectionStats
+
+	Neighbor NeighborStats
 }
 
 func makeNICStats() NICStats {
@@ -80,6 +82,39 @@ type DirectionStats struct {
 	Bytes   *tcpip.StatCounter
 }
 
+type packetEndpointList struct {
+	mu sync.RWMutex
+
+	// eps is protected by mu, but the contained PacketEndpoint values are not.
+	eps []PacketEndpoint
+}
+
+func (p *packetEndpointList) add(ep PacketEndpoint) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.eps = append(p.eps, ep)
+}
+
+func (p *packetEndpointList) remove(ep PacketEndpoint) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	for i, epOther := range p.eps {
+		if epOther == ep {
+			p.eps = append(p.eps[:i], p.eps[i+1:]...)
+			break
+		}
+	}
+}
+
+// forEach calls fn with each endpoints in p while holding the read lock on p.
+func (p *packetEndpointList) forEach(fn func(PacketEndpoint)) {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	for _, ep := range p.eps {
+		fn(ep)
+	}
+}
+
 // newNIC returns a new NIC using the default NDP configurations from stack.
 func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICContext) *NIC {
 	// TODO(b/141011931): Validate a LinkEndpoint (ep) is valid. For
@@ -100,7 +135,7 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
 		stats:            makeNICStats(),
 		networkEndpoints: make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint),
 	}
-	nic.mu.packetEPs = make(map[tcpip.NetworkProtocolNumber][]PacketEndpoint)
+	nic.mu.packetEPs = make(map[tcpip.NetworkProtocolNumber]*packetEndpointList)
 
 	// Check for Neighbor Unreachability Detection support.
 	var nud NUDHandler
@@ -123,11 +158,11 @@ func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICC
 
 	// Register supported packet and network endpoint protocols.
 	for _, netProto := range header.Ethertypes {
-		nic.mu.packetEPs[netProto] = []PacketEndpoint{}
+		nic.mu.packetEPs[netProto] = new(packetEndpointList)
 	}
 	for _, netProto := range stack.networkProtocols {
 		netNum := netProto.Number()
-		nic.mu.packetEPs[netNum] = nil
+		nic.mu.packetEPs[netNum] = new(packetEndpointList)
 		nic.networkEndpoints[netNum] = netProto.NewEndpoint(nic, stack, nud, nic)
 	}
 
@@ -170,7 +205,7 @@ func (n *NIC) disable() {
 //
 // n MUST be locked.
 func (n *NIC) disableLocked() {
-	if !n.setEnabled(false) {
+	if !n.Enabled() {
 		return
 	}
 
@@ -182,6 +217,10 @@ func (n *NIC) disableLocked() {
 	for _, ep := range n.networkEndpoints {
 		ep.Disable()
 	}
+
+	if !n.setEnabled(false) {
+		panic("should have only done work to disable the NIC if it was enabled")
+	}
 }
 
 // enable enables n.
@@ -232,7 +271,8 @@ func (n *NIC) setPromiscuousMode(enable bool) {
 	n.mu.Unlock()
 }
 
-func (n *NIC) isPromiscuousMode() bool {
+// Promiscuous implements NetworkInterface.
+func (n *NIC) Promiscuous() bool {
 	n.mu.RLock()
 	rv := n.mu.promiscuous
 	n.mu.RUnlock()
@@ -264,7 +304,7 @@ func (n *NIC) WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumb
 	if ch, err := r.Resolve(nil); err != nil {
 		if err == tcpip.ErrWouldBlock {
 			r := r.Clone()
-			n.stack.linkResQueue.enqueue(ch, &r, protocol, pkt)
+			n.stack.linkResQueue.enqueue(ch, r, protocol, pkt)
 			return nil
 		}
 		return err
@@ -273,6 +313,15 @@ func (n *NIC) WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumb
 	return n.writePacket(r, gso, protocol, pkt)
 }
 
+// WritePacketToRemote implements NetworkInterface.
+func (n *NIC) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error {
+	r := Route{
+		NetProto: protocol,
+	}
+	r.ResolveWith(remoteLinkAddr)
+	return n.writePacket(&r, gso, protocol, pkt)
+}
+
 func (n *NIC) writePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error {
 	// WritePacket takes ownership of pkt, calculate numBytes first.
 	numBytes := pkt.Size()
@@ -311,16 +360,21 @@ func (n *NIC) setSpoofing(enable bool) {
 // primaryAddress returns an address that can be used to communicate with
 // remoteAddr.
 func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber, remoteAddr tcpip.Address) AssignableAddressEndpoint {
-	n.mu.RLock()
-	spoofing := n.mu.spoofing
-	n.mu.RUnlock()
-
 	ep, ok := n.networkEndpoints[protocol]
 	if !ok {
 		return nil
 	}
 
-	return ep.AcquireOutgoingPrimaryAddress(remoteAddr, spoofing)
+	addressableEndpoint, ok := ep.(AddressableEndpoint)
+	if !ok {
+		return nil
+	}
+
+	n.mu.RLock()
+	spoofing := n.mu.spoofing
+	n.mu.RUnlock()
+
+	return addressableEndpoint.AcquireOutgoingPrimaryAddress(remoteAddr, spoofing)
 }
 
 type getAddressBehaviour int
@@ -339,6 +393,16 @@ func (n *NIC) getAddress(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address
 	return n.getAddressOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, promiscuous)
 }
 
+func (n *NIC) hasAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
+	ep := n.getAddressOrCreateTempInner(protocol, addr, false, NeverPrimaryEndpoint)
+	if ep != nil {
+		ep.DecRef()
+		return true
+	}
+
+	return false
+}
+
 // findEndpoint finds the endpoint, if any, with the given address.
 func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
 	return n.getAddressOrCreateTemp(protocol, address, peb, spoofing)
@@ -369,11 +433,17 @@ func (n *NIC) getAddressOrCreateTemp(protocol tcpip.NetworkProtocolNumber, addre
 // getAddressOrCreateTempInner is like getAddressEpOrCreateTemp except a boolean
 // is passed to indicate whether or not we should generate temporary endpoints.
 func (n *NIC) getAddressOrCreateTempInner(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, createTemp bool, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
-	if ep, ok := n.networkEndpoints[protocol]; ok {
-		return ep.AcquireAssignedAddress(address, createTemp, peb)
+	ep, ok := n.networkEndpoints[protocol]
+	if !ok {
+		return nil
 	}
 
-	return nil
+	addressableEndpoint, ok := ep.(AddressableEndpoint)
+	if !ok {
+		return nil
+	}
+
+	return addressableEndpoint.AcquireAssignedAddress(address, createTemp, peb)
 }
 
 // addAddress adds a new address to n, so that it starts accepting packets
@@ -384,7 +454,12 @@ func (n *NIC) addAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpo
 		return tcpip.ErrUnknownProtocol
 	}
 
-	addressEndpoint, err := ep.AddAndAcquirePermanentAddress(protocolAddress.AddressWithPrefix, peb, AddressConfigStatic, false /* deprecated */)
+	addressableEndpoint, ok := ep.(AddressableEndpoint)
+	if !ok {
+		return tcpip.ErrNotSupported
+	}
+
+	addressEndpoint, err := addressableEndpoint.AddAndAcquirePermanentAddress(protocolAddress.AddressWithPrefix, peb, AddressConfigStatic, false /* deprecated */)
 	if err == nil {
 		// We have no need for the address endpoint.
 		addressEndpoint.DecRef()
@@ -397,7 +472,12 @@ func (n *NIC) addAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpo
 func (n *NIC) allPermanentAddresses() []tcpip.ProtocolAddress {
 	var addrs []tcpip.ProtocolAddress
 	for p, ep := range n.networkEndpoints {
-		for _, a := range ep.PermanentAddresses() {
+		addressableEndpoint, ok := ep.(AddressableEndpoint)
+		if !ok {
+			continue
+		}
+
+		for _, a := range addressableEndpoint.PermanentAddresses() {
 			addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
 		}
 	}
@@ -408,7 +488,12 @@ func (n *NIC) allPermanentAddresses() []tcpip.ProtocolAddress {
 func (n *NIC) primaryAddresses() []tcpip.ProtocolAddress {
 	var addrs []tcpip.ProtocolAddress
 	for p, ep := range n.networkEndpoints {
-		for _, a := range ep.PrimaryAddresses() {
+		addressableEndpoint, ok := ep.(AddressableEndpoint)
+		if !ok {
+			continue
+		}
+
+		for _, a := range addressableEndpoint.PrimaryAddresses() {
 			addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
 		}
 	}
@@ -426,13 +511,23 @@ func (n *NIC) primaryAddress(proto tcpip.NetworkProtocolNumber) tcpip.AddressWit
 		return tcpip.AddressWithPrefix{}
 	}
 
-	return ep.MainAddress()
+	addressableEndpoint, ok := ep.(AddressableEndpoint)
+	if !ok {
+		return tcpip.AddressWithPrefix{}
+	}
+
+	return addressableEndpoint.MainAddress()
 }
 
 // removeAddress removes an address from n.
 func (n *NIC) removeAddress(addr tcpip.Address) *tcpip.Error {
 	for _, ep := range n.networkEndpoints {
-		if err := ep.RemovePermanentAddress(addr); err == tcpip.ErrBadLocalAddress {
+		addressableEndpoint, ok := ep.(AddressableEndpoint)
+		if !ok {
+			continue
+		}
+
+		if err := addressableEndpoint.RemovePermanentAddress(addr); err == tcpip.ErrBadLocalAddress {
 			continue
 		} else {
 			return err
@@ -505,8 +600,7 @@ func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address
 		return tcpip.ErrNotSupported
 	}
 
-	_, err := gep.JoinGroup(addr)
-	return err
+	return gep.JoinGroup(addr)
 }
 
 // leaveGroup decrements the count for the given multicast address, and when it
@@ -522,11 +616,7 @@ func (n *NIC) leaveGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Addres
 		return tcpip.ErrNotSupported
 	}
 
-	if _, err := gep.LeaveGroup(addr); err != nil {
-		return err
-	}
-
-	return nil
+	return gep.LeaveGroup(addr)
 }
 
 // isInGroup returns true if n has joined the multicast group addr.
@@ -545,13 +635,6 @@ func (n *NIC) isInGroup(addr tcpip.Address) bool {
 	return false
 }
 
-func (n *NIC) handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, remotelinkAddr tcpip.LinkAddress, addressEndpoint AssignableAddressEndpoint, pkt *PacketBuffer) {
-	r := makeRoute(protocol, dst, src, n, addressEndpoint, false /* handleLocal */, false /* multicastLoop */)
-	defer r.Release()
-	r.RemoteLinkAddress = remotelinkAddr
-	n.getNetworkEndpoint(protocol).HandlePacket(&r, pkt)
-}
-
 // DeliverNetworkPacket finds the appropriate network protocol endpoint and
 // hands the packet over for further processing. This function is called when
 // the NIC receives a packet from the link endpoint.
@@ -573,7 +656,7 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
 	n.stats.Rx.Packets.Increment()
 	n.stats.Rx.Bytes.IncrementBy(uint64(pkt.Data.Size()))
 
-	netProto, ok := n.stack.networkProtocols[protocol]
+	networkEndpoint, ok := n.networkEndpoints[protocol]
 	if !ok {
 		n.mu.RUnlock()
 		n.stack.stats.UnknownProtocolRcvdPackets.Increment()
@@ -585,23 +668,29 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
 	if local == "" {
 		local = n.LinkEndpoint.LinkAddress()
 	}
+	pkt.RXTransportChecksumValidated = n.LinkEndpoint.Capabilities()&CapabilityRXChecksumOffload != 0
 
 	// Are any packet type sockets listening for this network protocol?
-	packetEPs := n.mu.packetEPs[protocol]
-	// Add any other packet type sockets that may be listening for all protocols.
-	packetEPs = append(packetEPs, n.mu.packetEPs[header.EthernetProtocolAll]...)
+	protoEPs := n.mu.packetEPs[protocol]
+	// Other packet type sockets that are listening for all protocols.
+	anyEPs := n.mu.packetEPs[header.EthernetProtocolAll]
 	n.mu.RUnlock()
-	for _, ep := range packetEPs {
+
+	// Deliver to interested packet endpoints without holding NIC lock.
+	deliverPacketEPs := func(ep PacketEndpoint) {
 		p := pkt.Clone()
 		p.PktType = tcpip.PacketHost
 		ep.HandlePacket(n.id, local, protocol, p)
 	}
-
-	if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber {
-		n.stack.stats.IP.PacketsReceived.Increment()
+	if protoEPs != nil {
+		protoEPs.forEach(deliverPacketEPs)
+	}
+	if anyEPs != nil {
+		anyEPs.forEach(deliverPacketEPs)
 	}
 
 	// Parse headers.
+	netProto := n.stack.NetworkProtocolInstance(protocol)
 	transProtoNum, hasTransportHdr, ok := netProto.Parse(pkt)
 	if !ok {
 		// The packet is too small to contain a network header.
@@ -616,9 +705,8 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
 		}
 	}
 
-	src, dst := netProto.ParseAddresses(pkt.NetworkHeader().View())
-
 	if n.stack.handleLocal && !n.IsLoopback() {
+		src, _ := netProto.ParseAddresses(pkt.NetworkHeader().View())
 		if r := n.getAddress(protocol, src); r != nil {
 			r.DecRef()
 
@@ -631,78 +719,7 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
 		}
 	}
 
-	// Loopback traffic skips the prerouting chain.
-	if !n.IsLoopback() {
-		// iptables filtering.
-		ipt := n.stack.IPTables()
-		address := n.primaryAddress(protocol)
-		if ok := ipt.Check(Prerouting, pkt, nil, nil, address.Address, ""); !ok {
-			// iptables is telling us to drop the packet.
-			n.stack.stats.IP.IPTablesPreroutingDropped.Increment()
-			return
-		}
-	}
-
-	if addressEndpoint := n.getAddress(protocol, dst); addressEndpoint != nil {
-		n.handlePacket(protocol, dst, src, remote, addressEndpoint, pkt)
-		return
-	}
-
-	// This NIC doesn't care about the packet. Find a NIC that cares about the
-	// packet and forward it to the NIC.
-	//
-	// TODO: Should we be forwarding the packet even if promiscuous?
-	if n.stack.Forwarding(protocol) {
-		r, err := n.stack.FindRoute(0, "", dst, protocol, false /* multicastLoop */)
-		if err != nil {
-			n.stack.stats.IP.InvalidDestinationAddressesReceived.Increment()
-			return
-		}
-
-		// Found a NIC.
-		n := r.nic
-		if addressEndpoint := n.getAddressOrCreateTempInner(protocol, dst, false, NeverPrimaryEndpoint); addressEndpoint != nil {
-			if n.isValidForOutgoing(addressEndpoint) {
-				r.LocalLinkAddress = n.LinkEndpoint.LinkAddress()
-				r.RemoteLinkAddress = remote
-				r.RemoteAddress = src
-				// TODO(b/123449044): Update the source NIC as well.
-				n.getNetworkEndpoint(protocol).HandlePacket(&r, pkt)
-				addressEndpoint.DecRef()
-				r.Release()
-				return
-			}
-
-			addressEndpoint.DecRef()
-		}
-
-		// n doesn't have a destination endpoint.
-		// Send the packet out of n.
-		// TODO(gvisor.dev/issue/1085): According to the RFC, we must decrease the TTL field for ipv4/ipv6.
-
-		// pkt may have set its header and may not have enough headroom for
-		// link-layer header for the other link to prepend. Here we create a new
-		// packet to forward.
-		fwdPkt := NewPacketBuffer(PacketBufferOptions{
-			ReserveHeaderBytes: int(n.LinkEndpoint.MaxHeaderLength()),
-			// We need to do a deep copy of the IP packet because WritePacket (and
-			// friends) take ownership of the packet buffer, but we do not own it.
-			Data: PayloadSince(pkt.NetworkHeader()).ToVectorisedView(),
-		})
-
-		// TODO(b/143425874) Decrease the TTL field in forwarded packets.
-		if err := n.WritePacket(&r, nil, protocol, fwdPkt); err != nil {
-			n.stack.stats.IP.InvalidDestinationAddressesReceived.Increment()
-		}
-
-		r.Release()
-		return
-	}
-
-	// If a packet socket handled the packet, don't treat it as invalid.
-	if len(packetEPs) == 0 {
-		n.stack.stats.IP.InvalidDestinationAddressesReceived.Increment()
-	}
+	networkEndpoint.HandlePacket(pkt)
 }
 
 // DeliverOutboundPacket implements NetworkDispatcher.DeliverOutboundPacket.
@@ -711,21 +728,22 @@ func (n *NIC) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tc
 	// We do not deliver to protocol specific packet endpoints as on Linux
 	// only ETH_P_ALL endpoints get outbound packets.
 	// Add any other packet sockets that maybe listening for all protocols.
-	packetEPs := n.mu.packetEPs[header.EthernetProtocolAll]
+	eps := n.mu.packetEPs[header.EthernetProtocolAll]
 	n.mu.RUnlock()
-	for _, ep := range packetEPs {
+
+	eps.forEach(func(ep PacketEndpoint) {
 		p := pkt.Clone()
 		p.PktType = tcpip.PacketOutgoing
 		// Add the link layer header as outgoing packets are intercepted
 		// before the link layer header is created.
 		n.LinkEndpoint.AddHeader(local, remote, protocol, p)
 		ep.HandlePacket(n.id, local, protocol, p)
-	}
+	})
 }
 
 // DeliverTransportPacket delivers the packets to the appropriate transport
 // protocol endpoint.
-func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) TransportPacketDisposition {
+func (n *NIC) DeliverTransportPacket(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) TransportPacketDisposition {
 	state, ok := n.stack.transportProtocols[protocol]
 	if !ok {
 		n.stack.stats.UnknownProtocolRcvdPackets.Increment()
@@ -737,7 +755,7 @@ func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolN
 	// Raw socket packets are delivered based solely on the transport
 	// protocol number. We do not inspect the payload to ensure it's
 	// validly formed.
-	n.stack.demux.deliverRawPacket(r, protocol, pkt)
+	n.stack.demux.deliverRawPacket(protocol, pkt)
 
 	// TransportHeader is empty only when pkt is an ICMP packet or was reassembled
 	// from fragments.
@@ -766,14 +784,25 @@ func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolN
 		return TransportPacketHandled
 	}
 
-	id := TransportEndpointID{dstPort, r.LocalAddress, srcPort, r.RemoteAddress}
-	if n.stack.demux.deliverPacket(r, protocol, pkt, id) {
+	netProto, ok := n.stack.networkProtocols[pkt.NetworkProtocolNumber]
+	if !ok {
+		panic(fmt.Sprintf("expected network protocol = %d, have = %#v", pkt.NetworkProtocolNumber, n.stack.networkProtocolNumbers()))
+	}
+
+	src, dst := netProto.ParseAddresses(pkt.NetworkHeader().View())
+	id := TransportEndpointID{
+		LocalPort:     dstPort,
+		LocalAddress:  dst,
+		RemotePort:    srcPort,
+		RemoteAddress: src,
+	}
+	if n.stack.demux.deliverPacket(protocol, pkt, id) {
 		return TransportPacketHandled
 	}
 
 	// Try to deliver to per-stack default handler.
 	if state.defaultHandler != nil {
-		if state.defaultHandler(r, id, pkt) {
+		if state.defaultHandler(id, pkt) {
 			return TransportPacketHandled
 		}
 	}
@@ -781,7 +810,7 @@ func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolN
 	// We could not find an appropriate destination for this packet so
 	// give the protocol specific error handler a chance to handle it.
 	// If it doesn't handle it then we should do so.
-	switch res := transProto.HandleUnknownDestinationPacket(r, id, pkt); res {
+	switch res := transProto.HandleUnknownDestinationPacket(id, pkt); res {
 	case UnknownDestinationPacketMalformed:
 		n.stack.stats.MalformedRcvdPackets.Increment()
 		return TransportPacketHandled
@@ -862,7 +891,7 @@ func (n *NIC) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep Pa
 	if !ok {
 		return tcpip.ErrNotSupported
 	}
-	n.mu.packetEPs[netProto] = append(eps, ep)
+	eps.add(ep)
 
 	return nil
 }
@@ -875,17 +904,11 @@ func (n *NIC) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep
 	if !ok {
 		return
 	}
-
-	for i, epOther := range eps {
-		if epOther == ep {
-			n.mu.packetEPs[netProto] = append(eps[:i], eps[i+1:]...)
-			return
-		}
-	}
+	eps.remove(ep)
 }
 
 // isValidForOutgoing returns true if the endpoint can be used to send out a
-// packet. It requires the endpoint to not be marked expired (i.e., its address)
+// packet. It requires the endpoint to not be marked expired (i.e., its address
 // has been removed) unless the NIC is in spoofing mode, or temporary.
 func (n *NIC) isValidForOutgoing(ep AssignableAddressEndpoint) bool {
 	n.mu.RLock()
diff --git a/pkg/tcpip/stack/nic_test.go b/pkg/tcpip/stack/nic_test.go
index 97a96af62..5b5c58afb 100644
--- a/pkg/tcpip/stack/nic_test.go
+++ b/pkg/tcpip/stack/nic_test.go
@@ -83,8 +83,7 @@ func (*testIPv6Endpoint) WriteHeaderIncludedPacket(*Route, *PacketBuffer) *tcpip
 }
 
 // HandlePacket implements NetworkEndpoint.HandlePacket.
-func (*testIPv6Endpoint) HandlePacket(*Route, *PacketBuffer) {
-}
+func (*testIPv6Endpoint) HandlePacket(*PacketBuffer) {}
 
 // Close implements NetworkEndpoint.Close.
 func (e *testIPv6Endpoint) Close() {
@@ -169,7 +168,7 @@ func (*testIPv6Protocol) LinkAddressProtocol() tcpip.NetworkProtocolNumber {
 }
 
 // LinkAddressRequest implements LinkAddressResolver.
-func (*testIPv6Protocol) LinkAddressRequest(_, _ tcpip.Address, _ tcpip.LinkAddress, _ LinkEndpoint) *tcpip.Error {
+func (*testIPv6Protocol) LinkAddressRequest(_, _ tcpip.Address, _ tcpip.LinkAddress, _ NetworkInterface) *tcpip.Error {
 	return nil
 }
 
diff --git a/pkg/tcpip/stack/nud.go b/pkg/tcpip/stack/nud.go
index e1ec15487..ab629b3a4 100644
--- a/pkg/tcpip/stack/nud.go
+++ b/pkg/tcpip/stack/nud.go
@@ -129,7 +129,7 @@ type NUDDispatcher interface {
 	// the stack's operation.
 	//
 	// May be called concurrently.
-	OnNeighborAdded(nicID tcpip.NICID, ipAddr tcpip.Address, linkAddr tcpip.LinkAddress, state NeighborState, updatedAt time.Time)
+	OnNeighborAdded(tcpip.NICID, NeighborEntry)
 
 	// OnNeighborChanged will be called when an entry in a NIC's (with ID nicID)
 	// neighbor table changes state and/or link address.
@@ -138,7 +138,7 @@ type NUDDispatcher interface {
 	// the stack's operation.
 	//
 	// May be called concurrently.
-	OnNeighborChanged(nicID tcpip.NICID, ipAddr tcpip.Address, linkAddr tcpip.LinkAddress, state NeighborState, updatedAt time.Time)
+	OnNeighborChanged(tcpip.NICID, NeighborEntry)
 
 	// OnNeighborRemoved will be called when an entry is removed from a NIC's
 	// (with ID nicID) neighbor table.
@@ -147,7 +147,7 @@ type NUDDispatcher interface {
 	// the stack's operation.
 	//
 	// May be called concurrently.
-	OnNeighborRemoved(nicID tcpip.NICID, ipAddr tcpip.Address, linkAddr tcpip.LinkAddress, state NeighborState, updatedAt time.Time)
+	OnNeighborRemoved(tcpip.NICID, NeighborEntry)
 }
 
 // ReachabilityConfirmationFlags describes the flags used within a reachability
@@ -177,7 +177,7 @@ type NUDHandler interface {
 	// Neighbor Solicitation for ARP or NDP, respectively). Validation of the
 	// probe needs to be performed before calling this function since the
 	// Neighbor Cache doesn't have access to view the NIC's assigned addresses.
-	HandleProbe(remoteAddr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes LinkAddressResolver)
+	HandleProbe(remoteAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, remoteLinkAddr tcpip.LinkAddress, linkRes LinkAddressResolver)
 
 	// HandleConfirmation processes an incoming neighbor confirmation (e.g. ARP
 	// reply or Neighbor Advertisement for ARP or NDP, respectively).
diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go
index 7f54a6de8..664cc6fa0 100644
--- a/pkg/tcpip/stack/packet_buffer.go
+++ b/pkg/tcpip/stack/packet_buffer.go
@@ -112,6 +112,16 @@ type PacketBuffer struct {
 	// PktType indicates the SockAddrLink.PacketType of the packet as defined in
 	// https://www.man7.org/linux/man-pages/man7/packet.7.html.
 	PktType tcpip.PacketType
+
+	// NICID is the ID of the interface the network packet was received at.
+	NICID tcpip.NICID
+
+	// RXTransportChecksumValidated indicates that transport checksum verification
+	// may be safely skipped.
+	RXTransportChecksumValidated bool
+
+	// NetworkPacketInfo holds an incoming packet's network-layer information.
+	NetworkPacketInfo NetworkPacketInfo
 }
 
 // NewPacketBuffer creates a new PacketBuffer with opts.
@@ -240,20 +250,33 @@ func (pk *PacketBuffer) consume(typ headerType, size int) (v buffer.View, consum
 // Clone should be called in such cases so that no modifications is done to
 // underlying packet payload.
 func (pk *PacketBuffer) Clone() *PacketBuffer {
-	newPk := &PacketBuffer{
-		PacketBufferEntry:       pk.PacketBufferEntry,
-		Data:                    pk.Data.Clone(nil),
-		headers:                 pk.headers,
-		header:                  pk.header,
-		Hash:                    pk.Hash,
-		Owner:                   pk.Owner,
-		EgressRoute:             pk.EgressRoute,
-		GSOOptions:              pk.GSOOptions,
-		NetworkProtocolNumber:   pk.NetworkProtocolNumber,
-		NatDone:                 pk.NatDone,
-		TransportProtocolNumber: pk.TransportProtocolNumber,
+	return &PacketBuffer{
+		PacketBufferEntry:            pk.PacketBufferEntry,
+		Data:                         pk.Data.Clone(nil),
+		headers:                      pk.headers,
+		header:                       pk.header,
+		Hash:                         pk.Hash,
+		Owner:                        pk.Owner,
+		GSOOptions:                   pk.GSOOptions,
+		NetworkProtocolNumber:        pk.NetworkProtocolNumber,
+		NatDone:                      pk.NatDone,
+		TransportProtocolNumber:      pk.TransportProtocolNumber,
+		PktType:                      pk.PktType,
+		NICID:                        pk.NICID,
+		RXTransportChecksumValidated: pk.RXTransportChecksumValidated,
+		NetworkPacketInfo:            pk.NetworkPacketInfo,
 	}
-	return newPk
+}
+
+// SourceLinkAddress returns the source link address of the packet.
+func (pk *PacketBuffer) SourceLinkAddress() tcpip.LinkAddress {
+	link := pk.LinkHeader().View()
+
+	if link.IsEmpty() {
+		return ""
+	}
+
+	return header.Ethernet(link).SourceAddress()
 }
 
 // Network returns the network header as a header.Network.
@@ -270,6 +293,17 @@ func (pk *PacketBuffer) Network() header.Network {
 	}
 }
 
+// CloneToInbound makes a shallow copy of the packet buffer to be used as an
+// inbound packet.
+//
+// See PacketBuffer.Data for details about how a packet buffer holds an inbound
+// packet.
+func (pk *PacketBuffer) CloneToInbound() *PacketBuffer {
+	return NewPacketBuffer(PacketBufferOptions{
+		Data: buffer.NewVectorisedView(pk.Size(), pk.Views()),
+	})
+}
+
 // headerInfo stores metadata about a header in a packet.
 type headerInfo struct {
 	// buf is the memorized slice for both prepended and consumed header.
diff --git a/pkg/tcpip/stack/pending_packets.go b/pkg/tcpip/stack/pending_packets.go
index f838eda8d..5d364a2b0 100644
--- a/pkg/tcpip/stack/pending_packets.go
+++ b/pkg/tcpip/stack/pending_packets.go
@@ -106,7 +106,7 @@ func (f *packetsPendingLinkResolution) enqueue(ch <-chan struct{}, r *Route, pro
 			} else if _, err := p.route.Resolve(nil); err != nil {
 				p.route.Stats().IP.OutgoingPacketErrors.Increment()
 			} else {
-				p.route.nic.writePacket(p.route, nil /* gso */, p.proto, p.pkt)
+				p.route.outgoingNIC.writePacket(p.route, nil /* gso */, p.proto, p.pkt)
 			}
 			p.route.Release()
 		}
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index defb9129b..b334e27c4 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -63,17 +63,24 @@ const (
 	ControlUnknown
 )
 
+// NetworkPacketInfo holds information about a network layer packet.
+type NetworkPacketInfo struct {
+	// LocalAddressBroadcast is true if the packet's local address is a broadcast
+	// address.
+	LocalAddressBroadcast bool
+}
+
 // TransportEndpoint is the interface that needs to be implemented by transport
 // protocol (e.g., tcp, udp) endpoints that can handle packets.
 type TransportEndpoint interface {
 	// UniqueID returns an unique ID for this transport endpoint.
 	UniqueID() uint64
 
-	// HandlePacket is called by the stack when new packets arrive to
-	// this transport endpoint. It sets pkt.TransportHeader.
+	// HandlePacket is called by the stack when new packets arrive to this
+	// transport endpoint. It sets the packet buffer's transport header.
 	//
-	// HandlePacket takes ownership of pkt.
-	HandlePacket(r *Route, id TransportEndpointID, pkt *PacketBuffer)
+	// HandlePacket takes ownership of the packet.
+	HandlePacket(TransportEndpointID, *PacketBuffer)
 
 	// HandleControlPacket is called by the stack when new control (e.g.
 	// ICMP) packets arrive to this transport endpoint.
@@ -105,8 +112,8 @@ type RawTransportEndpoint interface {
 	// this transport endpoint. The packet contains all data from the link
 	// layer up.
 	//
-	// HandlePacket takes ownership of pkt.
-	HandlePacket(r *Route, pkt *PacketBuffer)
+	// HandlePacket takes ownership of the packet.
+	HandlePacket(*PacketBuffer)
 }
 
 // PacketEndpoint is the interface that needs to be implemented by packet
@@ -127,7 +134,7 @@ type PacketEndpoint interface {
 	HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
 }
 
-// UnknownDestinationPacketDisposition enumerates the possible return vaues from
+// UnknownDestinationPacketDisposition enumerates the possible return values from
 // HandleUnknownDestinationPacket().
 type UnknownDestinationPacketDisposition int
 
@@ -172,9 +179,9 @@ type TransportProtocol interface {
 	// protocol that don't match any existing endpoint. For example,
 	// it is targeted at a port that has no listeners.
 	//
-	// HandleUnknownDestinationPacket takes ownership of pkt if it handles
+	// HandleUnknownDestinationPacket takes ownership of the packet if it handles
 	// the issue.
-	HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, pkt *PacketBuffer) UnknownDestinationPacketDisposition
+	HandleUnknownDestinationPacket(TransportEndpointID, *PacketBuffer) UnknownDestinationPacketDisposition
 
 	// SetOption allows enabling/disabling protocol specific features.
 	// SetOption returns an error if the option is not supported or the
@@ -227,8 +234,8 @@ type TransportDispatcher interface {
 	//
 	// pkt.NetworkHeader must be set before calling DeliverTransportPacket.
 	//
-	// DeliverTransportPacket takes ownership of pkt.
-	DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) TransportPacketDisposition
+	// DeliverTransportPacket takes ownership of the packet.
+	DeliverTransportPacket(tcpip.TransportProtocolNumber, *PacketBuffer) TransportPacketDisposition
 
 	// DeliverTransportControlPacket delivers control packets to the
 	// appropriate transport protocol endpoint.
@@ -270,15 +277,11 @@ type NetworkHeaderParams struct {
 // An endpoint is considered to support group addressing when one or more
 // endpoints may associate themselves with the same identifier (group address).
 type GroupAddressableEndpoint interface {
-	// JoinGroup joins the spcified group.
-	//
-	// Returns true if the group was newly joined.
-	JoinGroup(group tcpip.Address) (bool, *tcpip.Error)
+	// JoinGroup joins the specified group.
+	JoinGroup(group tcpip.Address) *tcpip.Error
 
 	// LeaveGroup attempts to leave the specified group.
-	//
-	// Returns tcpip.ErrBadLocalAddress if the endpoint has not joined the group.
-	LeaveGroup(group tcpip.Address) (bool, *tcpip.Error)
+	LeaveGroup(group tcpip.Address) *tcpip.Error
 
 	// IsInGroup returns true if the endpoint is a member of the specified group.
 	IsInGroup(group tcpip.Address) bool
@@ -329,6 +332,9 @@ type AssignableAddressEndpoint interface {
 	// AddressWithPrefix returns the endpoint's address.
 	AddressWithPrefix() tcpip.AddressWithPrefix
 
+	// Subnet returns the subnet of the endpoint's address.
+	Subnet() tcpip.Subnet
+
 	// IsAssigned returns whether or not the endpoint is considered bound
 	// to its NetworkEndpoint.
 	IsAssigned(allowExpired bool) bool
@@ -364,7 +370,7 @@ type AddressEndpoint interface {
 	SetDeprecated(bool)
 }
 
-// AddressKind is the kind of of an address.
+// AddressKind is the kind of an address.
 //
 // See the values of AddressKind for more details.
 type AddressKind int
@@ -490,13 +496,17 @@ type NetworkInterface interface {
 
 	// Enabled returns true if the interface is enabled.
 	Enabled() bool
+
+	// Promiscuous returns true if the interface is in promiscuous mode.
+	Promiscuous() bool
+
+	// WritePacketToRemote writes the packet to the given remote link address.
+	WritePacketToRemote(tcpip.LinkAddress, *GSO, tcpip.NetworkProtocolNumber, *PacketBuffer) *tcpip.Error
 }
 
 // NetworkEndpoint is the interface that needs to be implemented by endpoints
 // of network layer protocols (e.g., ipv4, ipv6).
 type NetworkEndpoint interface {
-	AddressableEndpoint
-
 	// Enable enables the endpoint.
 	//
 	// Must only be called when the stack is in a state that allows the endpoint
@@ -544,7 +554,7 @@ type NetworkEndpoint interface {
 	// this network endpoint. It sets pkt.NetworkHeader.
 	//
 	// HandlePacket takes ownership of pkt.
-	HandlePacket(r *Route, pkt *PacketBuffer)
+	HandlePacket(pkt *PacketBuffer)
 
 	// Close is called when the endpoint is reomved from a stack.
 	Close()
@@ -712,10 +722,6 @@ type LinkEndpoint interface {
 	// endpoint.
 	Capabilities() LinkEndpointCapabilities
 
-	// WriteRawPacket writes a packet directly to the link. The packet
-	// should already have an ethernet header. It takes ownership of vv.
-	WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error
-
 	// Attach attaches the data link layer endpoint to the network-layer
 	// dispatcher of the stack.
 	//
@@ -764,13 +770,13 @@ type InjectableLinkEndpoint interface {
 // A LinkAddressResolver is an extension to a NetworkProtocol that
 // can resolve link addresses.
 type LinkAddressResolver interface {
-	// LinkAddressRequest sends a request for the LinkAddress of addr. Broadcasts
-	// the request on the local network if remoteLinkAddr is the zero value. The
-	// request is sent on linkEP with localAddr as the source.
+	// LinkAddressRequest sends a request for the link address of the target
+	// address. The request is broadcasted on the local network if a remote link
+	// address is not provided.
 	//
-	// A valid response will cause the discovery protocol's network
-	// endpoint to call AddLinkAddress.
-	LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, linkEP LinkEndpoint) *tcpip.Error
+	// The request is sent from the passed network interface. If the interface
+	// local address is unspecified, any interface local address may be used.
+	LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress, nic NetworkInterface) *tcpip.Error
 
 	// ResolveStaticAddress attempts to resolve address without sending
 	// requests. It either resolves the name immediately or returns the
diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go
index b76e2d37b..de5fe6ffe 100644
--- a/pkg/tcpip/stack/route.go
+++ b/pkg/tcpip/stack/route.go
@@ -15,20 +15,25 @@
 package stack
 
 import (
+	"fmt"
+
 	"gvisor.dev/gvisor/pkg/sleep"
+	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 )
 
 // Route represents a route through the networking stack to a given destination.
+//
+// It is safe to call Route's methods from multiple goroutines.
+//
+// The exported fields are immutable.
+//
+// TODO(gvisor.dev/issue/4902): Unexpose immutable fields.
 type Route struct {
 	// RemoteAddress is the final destination of the route.
 	RemoteAddress tcpip.Address
 
-	// RemoteLinkAddress is the link-layer (MAC) address of the
-	// final destination of the route.
-	RemoteLinkAddress tcpip.LinkAddress
-
 	// LocalAddress is the local address where the route starts.
 	LocalAddress tcpip.Address
 
@@ -45,11 +50,24 @@ type Route struct {
 	// Loop controls where WritePacket should send packets.
 	Loop PacketLooping
 
-	// nic is the NIC the route goes through.
-	nic *NIC
+	// localAddressNIC is the interface the address is associated with.
+	// TODO(gvisor.dev/issue/4548): Remove this field once we can query the
+	// address's assigned status without the NIC.
+	localAddressNIC *NIC
 
-	// addressEndpoint is the local address this route is associated with.
-	addressEndpoint AssignableAddressEndpoint
+	mu struct {
+		sync.RWMutex
+
+		// localAddressEndpoint is the local address this route is associated with.
+		localAddressEndpoint AssignableAddressEndpoint
+
+		// remoteLinkAddress is the link-layer (MAC) address of the next hop in the
+		// route.
+		remoteLinkAddress tcpip.LinkAddress
+	}
+
+	// outgoingNIC is the interface this route uses to write packets.
+	outgoingNIC *NIC
 
 	// linkCache is set if link address resolution is enabled for this protocol on
 	// the route's NIC.
@@ -60,51 +78,139 @@ type Route struct {
 	linkRes LinkAddressResolver
 }
 
+// constructAndValidateRoute validates and initializes a route. It takes
+// ownership of the provided local address.
+//
+// Returns an empty route if validation fails.
+func constructAndValidateRoute(netProto tcpip.NetworkProtocolNumber, addressEndpoint AssignableAddressEndpoint, localAddressNIC, outgoingNIC *NIC, gateway, localAddr, remoteAddr tcpip.Address, handleLocal, multicastLoop bool) *Route {
+	if len(localAddr) == 0 {
+		localAddr = addressEndpoint.AddressWithPrefix().Address
+	}
+
+	if localAddressNIC != outgoingNIC && header.IsV6LinkLocalAddress(localAddr) {
+		addressEndpoint.DecRef()
+		return nil
+	}
+
+	// If no remote address is provided, use the local address.
+	if len(remoteAddr) == 0 {
+		remoteAddr = localAddr
+	}
+
+	r := makeRoute(
+		netProto,
+		localAddr,
+		remoteAddr,
+		outgoingNIC,
+		localAddressNIC,
+		addressEndpoint,
+		handleLocal,
+		multicastLoop,
+	)
+
+	// If the route requires us to send a packet through some gateway, do not
+	// broadcast it.
+	if len(gateway) > 0 {
+		r.NextHop = gateway
+	} else if subnet := addressEndpoint.Subnet(); subnet.IsBroadcast(remoteAddr) {
+		r.ResolveWith(header.EthernetBroadcastAddress)
+	}
+
+	return r
+}
+
 // makeRoute initializes a new route. It takes ownership of the provided
 // AssignableAddressEndpoint.
-func makeRoute(netProto tcpip.NetworkProtocolNumber, localAddr, remoteAddr tcpip.Address, nic *NIC, addressEndpoint AssignableAddressEndpoint, handleLocal, multicastLoop bool) Route {
+func makeRoute(netProto tcpip.NetworkProtocolNumber, localAddr, remoteAddr tcpip.Address, outgoingNIC, localAddressNIC *NIC, localAddressEndpoint AssignableAddressEndpoint, handleLocal, multicastLoop bool) *Route {
+	if localAddressNIC.stack != outgoingNIC.stack {
+		panic(fmt.Sprintf("cannot create a route with NICs from different stacks"))
+	}
+
+	if len(localAddr) == 0 {
+		localAddr = localAddressEndpoint.AddressWithPrefix().Address
+	}
+
 	loop := PacketOut
-	if handleLocal && localAddr != "" && remoteAddr == localAddr {
-		loop = PacketLoop
-	} else if multicastLoop && (header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr)) {
-		loop |= PacketLoop
-	} else if remoteAddr == header.IPv4Broadcast {
-		loop |= PacketLoop
+
+	// TODO(gvisor.dev/issue/4689): Loopback interface loops back packets at the
+	// link endpoint level. We can remove this check once loopback interfaces
+	// loop back packets at the network layer.
+	if !outgoingNIC.IsLoopback() {
+		if handleLocal && localAddr != "" && remoteAddr == localAddr {
+			loop = PacketLoop
+		} else if multicastLoop && (header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr)) {
+			loop |= PacketLoop
+		} else if remoteAddr == header.IPv4Broadcast {
+			loop |= PacketLoop
+		} else if subnet := localAddressEndpoint.AddressWithPrefix().Subnet(); subnet.IsBroadcast(remoteAddr) {
+			loop |= PacketLoop
+		}
 	}
 
-	r := Route{
+	return makeRouteInner(netProto, localAddr, remoteAddr, outgoingNIC, localAddressNIC, localAddressEndpoint, loop)
+}
+
+func makeRouteInner(netProto tcpip.NetworkProtocolNumber, localAddr, remoteAddr tcpip.Address, outgoingNIC, localAddressNIC *NIC, localAddressEndpoint AssignableAddressEndpoint, loop PacketLooping) *Route {
+	r := &Route{
 		NetProto:         netProto,
 		LocalAddress:     localAddr,
-		LocalLinkAddress: nic.LinkEndpoint.LinkAddress(),
+		LocalLinkAddress: outgoingNIC.LinkEndpoint.LinkAddress(),
 		RemoteAddress:    remoteAddr,
-		addressEndpoint:  addressEndpoint,
-		nic:              nic,
+		localAddressNIC:  localAddressNIC,
+		outgoingNIC:      outgoingNIC,
 		Loop:             loop,
 	}
 
-	if r.nic.LinkEndpoint.Capabilities()&CapabilityResolutionRequired != 0 {
-		if linkRes, ok := r.nic.stack.linkAddrResolvers[r.NetProto]; ok {
+	r.mu.Lock()
+	r.mu.localAddressEndpoint = localAddressEndpoint
+	r.mu.Unlock()
+
+	if r.outgoingNIC.LinkEndpoint.Capabilities()&CapabilityResolutionRequired != 0 {
+		if linkRes, ok := r.outgoingNIC.stack.linkAddrResolvers[r.NetProto]; ok {
 			r.linkRes = linkRes
-			r.linkCache = r.nic.stack
+			r.linkCache = r.outgoingNIC.stack
 		}
 	}
 
 	return r
 }
 
+// makeLocalRoute initializes a new local route. It takes ownership of the
+// provided AssignableAddressEndpoint.
+//
+// A local route is a route to a destination that is local to the stack.
+func makeLocalRoute(netProto tcpip.NetworkProtocolNumber, localAddr, remoteAddr tcpip.Address, outgoingNIC, localAddressNIC *NIC, localAddressEndpoint AssignableAddressEndpoint) *Route {
+	loop := PacketLoop
+	// TODO(gvisor.dev/issue/4689): Loopback interface loops back packets at the
+	// link endpoint level. We can remove this check once loopback interfaces
+	// loop back packets at the network layer.
+	if outgoingNIC.IsLoopback() {
+		loop = PacketOut
+	}
+	return makeRouteInner(netProto, localAddr, remoteAddr, outgoingNIC, localAddressNIC, localAddressEndpoint, loop)
+}
+
+// RemoteLinkAddress returns the link-layer (MAC) address of the next hop in
+// the route.
+func (r *Route) RemoteLinkAddress() tcpip.LinkAddress {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return r.mu.remoteLinkAddress
+}
+
 // NICID returns the id of the NIC from which this route originates.
 func (r *Route) NICID() tcpip.NICID {
-	return r.nic.ID()
+	return r.outgoingNIC.ID()
 }
 
 // MaxHeaderLength forwards the call to the network endpoint's implementation.
 func (r *Route) MaxHeaderLength() uint16 {
-	return r.nic.getNetworkEndpoint(r.NetProto).MaxHeaderLength()
+	return r.outgoingNIC.getNetworkEndpoint(r.NetProto).MaxHeaderLength()
 }
 
 // Stats returns a mutable copy of current stats.
 func (r *Route) Stats() tcpip.Stats {
-	return r.nic.stack.Stats()
+	return r.outgoingNIC.stack.Stats()
 }
 
 // PseudoHeaderChecksum forwards the call to the network endpoint's
@@ -113,14 +219,38 @@ func (r *Route) PseudoHeaderChecksum(protocol tcpip.TransportProtocolNumber, tot
 	return header.PseudoHeaderChecksum(protocol, r.LocalAddress, r.RemoteAddress, totalLen)
 }
 
-// Capabilities returns the link-layer capabilities of the route.
-func (r *Route) Capabilities() LinkEndpointCapabilities {
-	return r.nic.LinkEndpoint.Capabilities()
+// RequiresTXTransportChecksum returns false if the route does not require
+// transport checksums to be populated.
+func (r *Route) RequiresTXTransportChecksum() bool {
+	if r.local() {
+		return false
+	}
+	return r.outgoingNIC.LinkEndpoint.Capabilities()&CapabilityTXChecksumOffload == 0
+}
+
+// HasSoftwareGSOCapability returns true if the route supports software GSO.
+func (r *Route) HasSoftwareGSOCapability() bool {
+	return r.outgoingNIC.LinkEndpoint.Capabilities()&CapabilitySoftwareGSO != 0
+}
+
+// HasHardwareGSOCapability returns true if the route supports hardware GSO.
+func (r *Route) HasHardwareGSOCapability() bool {
+	return r.outgoingNIC.LinkEndpoint.Capabilities()&CapabilityHardwareGSO != 0
+}
+
+// HasSaveRestoreCapability returns true if the route supports save/restore.
+func (r *Route) HasSaveRestoreCapability() bool {
+	return r.outgoingNIC.LinkEndpoint.Capabilities()&CapabilitySaveRestore != 0
+}
+
+// HasDisconncetOkCapability returns true if the route supports disconnecting.
+func (r *Route) HasDisconncetOkCapability() bool {
+	return r.outgoingNIC.LinkEndpoint.Capabilities()&CapabilityDisconnectOk != 0
 }
 
 // GSOMaxSize returns the maximum GSO packet size.
 func (r *Route) GSOMaxSize() uint32 {
-	if gso, ok := r.nic.LinkEndpoint.(GSOEndpoint); ok {
+	if gso, ok := r.outgoingNIC.LinkEndpoint.(GSOEndpoint); ok {
 		return gso.GSOMaxSize()
 	}
 	return 0
@@ -129,7 +259,9 @@ func (r *Route) GSOMaxSize() uint32 {
 // ResolveWith immediately resolves a route with the specified remote link
 // address.
 func (r *Route) ResolveWith(addr tcpip.LinkAddress) {
-	r.RemoteLinkAddress = addr
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.mu.remoteLinkAddress = addr
 }
 
 // Resolve attempts to resolve the link address if necessary. Returns ErrWouldBlock in
@@ -142,7 +274,10 @@ func (r *Route) ResolveWith(addr tcpip.LinkAddress) {
 //
 // The NIC r uses must not be locked.
 func (r *Route) Resolve(waker *sleep.Waker) (<-chan struct{}, *tcpip.Error) {
-	if !r.IsResolutionRequired() {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if !r.isResolutionRequiredRLocked() {
 		// Nothing to do if there is no cache (which does the resolution on cache miss) or
 		// link address is already known.
 		return nil, nil
@@ -152,26 +287,33 @@ func (r *Route) Resolve(waker *sleep.Waker) (<-chan struct{}, *tcpip.Error) {
 	if nextAddr == "" {
 		// Local link address is already known.
 		if r.RemoteAddress == r.LocalAddress {
-			r.RemoteLinkAddress = r.LocalLinkAddress
+			r.mu.remoteLinkAddress = r.LocalLinkAddress
 			return nil, nil
 		}
 		nextAddr = r.RemoteAddress
 	}
 
-	if neigh := r.nic.neigh; neigh != nil {
-		entry, ch, err := neigh.entry(nextAddr, r.LocalAddress, r.linkRes, waker)
+	// If specified, the local address used for link address resolution must be an
+	// address on the outgoing interface.
+	var linkAddressResolutionRequestLocalAddr tcpip.Address
+	if r.localAddressNIC == r.outgoingNIC {
+		linkAddressResolutionRequestLocalAddr = r.LocalAddress
+	}
+
+	if neigh := r.outgoingNIC.neigh; neigh != nil {
+		entry, ch, err := neigh.entry(nextAddr, linkAddressResolutionRequestLocalAddr, r.linkRes, waker)
 		if err != nil {
 			return ch, err
 		}
-		r.RemoteLinkAddress = entry.LinkAddr
+		r.mu.remoteLinkAddress = entry.LinkAddr
 		return nil, nil
 	}
 
-	linkAddr, ch, err := r.linkCache.GetLinkAddress(r.nic.ID(), nextAddr, r.LocalAddress, r.NetProto, waker)
+	linkAddr, ch, err := r.linkCache.GetLinkAddress(r.outgoingNIC.ID(), nextAddr, linkAddressResolutionRequestLocalAddr, r.NetProto, waker)
 	if err != nil {
 		return ch, err
 	}
-	r.RemoteLinkAddress = linkAddr
+	r.mu.remoteLinkAddress = linkAddr
 	return nil, nil
 }
 
@@ -182,100 +324,146 @@ func (r *Route) RemoveWaker(waker *sleep.Waker) {
 		nextAddr = r.RemoteAddress
 	}
 
-	if neigh := r.nic.neigh; neigh != nil {
+	if neigh := r.outgoingNIC.neigh; neigh != nil {
 		neigh.removeWaker(nextAddr, waker)
 		return
 	}
 
-	r.linkCache.RemoveWaker(r.nic.ID(), nextAddr, waker)
+	r.linkCache.RemoveWaker(r.outgoingNIC.ID(), nextAddr, waker)
+}
+
+// local returns true if the route is a local route.
+func (r *Route) local() bool {
+	return r.Loop == PacketLoop || r.outgoingNIC.IsLoopback()
 }
 
 // IsResolutionRequired returns true if Resolve() must be called to resolve
-// the link address before the this route can be written to.
+// the link address before the route can be written to.
 //
-// The NIC r uses must not be locked.
+// The NICs the route is associated with must not be locked.
 func (r *Route) IsResolutionRequired() bool {
-	if r.nic.neigh != nil {
-		return r.nic.isValidForOutgoing(r.addressEndpoint) && r.linkRes != nil && r.RemoteLinkAddress == ""
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return r.isResolutionRequiredRLocked()
+}
+
+func (r *Route) isResolutionRequiredRLocked() bool {
+	if !r.isValidForOutgoingRLocked() || r.mu.remoteLinkAddress != "" || r.local() {
+		return false
 	}
-	return r.nic.isValidForOutgoing(r.addressEndpoint) && r.linkCache != nil && r.RemoteLinkAddress == ""
+
+	return (r.outgoingNIC.neigh != nil && r.linkRes != nil) || r.linkCache != nil
+}
+
+func (r *Route) isValidForOutgoing() bool {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	return r.isValidForOutgoingRLocked()
+}
+
+func (r *Route) isValidForOutgoingRLocked() bool {
+	if !r.outgoingNIC.Enabled() {
+		return false
+	}
+
+	localAddressEndpoint := r.mu.localAddressEndpoint
+	if localAddressEndpoint == nil || !r.localAddressNIC.isValidForOutgoing(localAddressEndpoint) {
+		return false
+	}
+
+	// If the source NIC and outgoing NIC are different, make sure the stack has
+	// forwarding enabled, or the packet will be handled locally.
+	if r.outgoingNIC != r.localAddressNIC && !r.outgoingNIC.stack.Forwarding(r.NetProto) && (!r.outgoingNIC.stack.handleLocal || !r.outgoingNIC.hasAddress(r.NetProto, r.RemoteAddress)) {
+		return false
+	}
+
+	return true
 }
 
 // WritePacket writes the packet through the given route.
 func (r *Route) WritePacket(gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) *tcpip.Error {
-	if !r.nic.isValidForOutgoing(r.addressEndpoint) {
+	if !r.isValidForOutgoing() {
 		return tcpip.ErrInvalidEndpointState
 	}
 
-	return r.nic.getNetworkEndpoint(r.NetProto).WritePacket(r, gso, params, pkt)
+	return r.outgoingNIC.getNetworkEndpoint(r.NetProto).WritePacket(r, gso, params, pkt)
 }
 
 // WritePackets writes a list of n packets through the given route and returns
 // the number of packets written.
 func (r *Route) WritePackets(gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, *tcpip.Error) {
-	if !r.nic.isValidForOutgoing(r.addressEndpoint) {
+	if !r.isValidForOutgoing() {
 		return 0, tcpip.ErrInvalidEndpointState
 	}
 
-	return r.nic.getNetworkEndpoint(r.NetProto).WritePackets(r, gso, pkts, params)
+	return r.outgoingNIC.getNetworkEndpoint(r.NetProto).WritePackets(r, gso, pkts, params)
 }
 
 // WriteHeaderIncludedPacket writes a packet already containing a network
 // header through the given route.
 func (r *Route) WriteHeaderIncludedPacket(pkt *PacketBuffer) *tcpip.Error {
-	if !r.nic.isValidForOutgoing(r.addressEndpoint) {
+	if !r.isValidForOutgoing() {
 		return tcpip.ErrInvalidEndpointState
 	}
 
-	return r.nic.getNetworkEndpoint(r.NetProto).WriteHeaderIncludedPacket(r, pkt)
+	return r.outgoingNIC.getNetworkEndpoint(r.NetProto).WriteHeaderIncludedPacket(r, pkt)
 }
 
 // DefaultTTL returns the default TTL of the underlying network endpoint.
 func (r *Route) DefaultTTL() uint8 {
-	return r.nic.getNetworkEndpoint(r.NetProto).DefaultTTL()
+	return r.outgoingNIC.getNetworkEndpoint(r.NetProto).DefaultTTL()
 }
 
 // MTU returns the MTU of the underlying network endpoint.
 func (r *Route) MTU() uint32 {
-	return r.nic.getNetworkEndpoint(r.NetProto).MTU()
+	return r.outgoingNIC.getNetworkEndpoint(r.NetProto).MTU()
 }
 
 // Release frees all resources associated with the route.
 func (r *Route) Release() {
-	if r.addressEndpoint != nil {
-		r.addressEndpoint.DecRef()
-		r.addressEndpoint = nil
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.mu.localAddressEndpoint != nil {
+		r.mu.localAddressEndpoint.DecRef()
+		r.mu.localAddressEndpoint = nil
 	}
 }
 
 // Clone clones the route.
-func (r *Route) Clone() Route {
-	if r.addressEndpoint != nil {
-		_ = r.addressEndpoint.IncRef()
+func (r *Route) Clone() *Route {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	newRoute := &Route{
+		RemoteAddress:    r.RemoteAddress,
+		LocalAddress:     r.LocalAddress,
+		LocalLinkAddress: r.LocalLinkAddress,
+		NextHop:          r.NextHop,
+		NetProto:         r.NetProto,
+		Loop:             r.Loop,
+		localAddressNIC:  r.localAddressNIC,
+		outgoingNIC:      r.outgoingNIC,
+		linkCache:        r.linkCache,
+		linkRes:          r.linkRes,
 	}
-	return *r
-}
 
-// MakeLoopedRoute duplicates the given route with special handling for routes
-// used for sending multicast or broadcast packets. In those cases the
-// multicast/broadcast address is the remote address when sending out, but for
-// incoming (looped) packets it becomes the local address. Similarly, the local
-// interface address that was the local address going out becomes the remote
-// address coming in. This is different to unicast routes where local and
-// remote addresses remain the same as they identify location (local vs remote)
-// not direction (source vs destination).
-func (r *Route) MakeLoopedRoute() Route {
-	l := r.Clone()
-	if r.RemoteAddress == header.IPv4Broadcast || header.IsV4MulticastAddress(r.RemoteAddress) || header.IsV6MulticastAddress(r.RemoteAddress) {
-		l.RemoteAddress, l.LocalAddress = l.LocalAddress, l.RemoteAddress
-		l.RemoteLinkAddress = l.LocalLinkAddress
+	newRoute.mu.Lock()
+	defer newRoute.mu.Unlock()
+	newRoute.mu.localAddressEndpoint = r.mu.localAddressEndpoint
+	if newRoute.mu.localAddressEndpoint != nil {
+		if !newRoute.mu.localAddressEndpoint.IncRef() {
+			panic(fmt.Sprintf("failed to increment reference count for local address endpoint = %s", newRoute.LocalAddress))
+		}
 	}
-	return l
+	newRoute.mu.remoteLinkAddress = r.mu.remoteLinkAddress
+
+	return newRoute
 }
 
 // Stack returns the instance of the Stack that owns this route.
 func (r *Route) Stack() *Stack {
-	return r.nic.stack
+	return r.outgoingNIC.stack
 }
 
 func (r *Route) isV4Broadcast(addr tcpip.Address) bool {
@@ -283,7 +471,14 @@ func (r *Route) isV4Broadcast(addr tcpip.Address) bool {
 		return true
 	}
 
-	subnet := r.addressEndpoint.AddressWithPrefix().Subnet()
+	r.mu.RLock()
+	localAddressEndpoint := r.mu.localAddressEndpoint
+	r.mu.RUnlock()
+	if localAddressEndpoint == nil {
+		return false
+	}
+
+	subnet := localAddressEndpoint.Subnet()
 	return subnet.IsBroadcast(addr)
 }
 
@@ -293,26 +488,3 @@ func (r *Route) IsOutboundBroadcast() bool {
 	// Only IPv4 has a notion of broadcast.
 	return r.isV4Broadcast(r.RemoteAddress)
 }
-
-// IsInboundBroadcast returns true if the route is for an inbound broadcast
-// packet.
-func (r *Route) IsInboundBroadcast() bool {
-	// Only IPv4 has a notion of broadcast.
-	return r.isV4Broadcast(r.LocalAddress)
-}
-
-// ReverseRoute returns new route with given source and destination address.
-func (r *Route) ReverseRoute(src tcpip.Address, dst tcpip.Address) Route {
-	return Route{
-		NetProto:          r.NetProto,
-		LocalAddress:      dst,
-		LocalLinkAddress:  r.RemoteLinkAddress,
-		RemoteAddress:     src,
-		RemoteLinkAddress: r.LocalLinkAddress,
-		Loop:              r.Loop,
-		addressEndpoint:   r.addressEndpoint,
-		nic:               r.nic,
-		linkCache:         r.linkCache,
-		linkRes:           r.linkRes,
-	}
-}
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 3a07577c8..dc4f5b3e7 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -22,6 +22,7 @@ package stack
 import (
 	"bytes"
 	"encoding/binary"
+	"fmt"
 	mathrand "math/rand"
 	"sync/atomic"
 	"time"
@@ -52,7 +53,7 @@ const (
 
 type transportProtocolState struct {
 	proto          TransportProtocol
-	defaultHandler func(r *Route, id TransportEndpointID, pkt *PacketBuffer) bool
+	defaultHandler func(id TransportEndpointID, pkt *PacketBuffer) bool
 }
 
 // TCPProbeFunc is the expected function type for a TCP probe function to be
@@ -81,6 +82,7 @@ type TCPRACKState struct {
 	FACK        seqnum.Value
 	RTT         time.Duration
 	Reord       bool
+	DSACKSeen   bool
 }
 
 // TCPEndpointID is the unique 4 tuple that identifies a given endpoint.
@@ -518,6 +520,10 @@ type Options struct {
 	//
 	// RandSource must be thread-safe.
 	RandSource mathrand.Source
+
+	// IPTables are the initial iptables rules. If nil, iptables will allow
+	// all traffic.
+	IPTables *IPTables
 }
 
 // TransportEndpointInfo holds useful information about a transport endpoint
@@ -620,6 +626,10 @@ func New(opts Options) *Stack {
 		randSrc = &lockedRandomSource{src: mathrand.NewSource(generateRandInt64())}
 	}
 
+	if opts.IPTables == nil {
+		opts.IPTables = DefaultTables()
+	}
+
 	opts.NUDConfigs.resetInvalidFields()
 
 	s := &Stack{
@@ -633,7 +643,7 @@ func New(opts Options) *Stack {
 		clock:              clock,
 		stats:              opts.Stats.FillIn(),
 		handleLocal:        opts.HandleLocal,
-		tables:             DefaultTables(),
+		tables:             opts.IPTables,
 		icmpRateLimiter:    NewICMPRateLimiter(),
 		seed:               generateRandUint32(),
 		nudConfigs:         opts.NUDConfigs,
@@ -751,7 +761,7 @@ func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber,
 //
 // It must be called only during initialization of the stack. Changing it as the
 // stack is operating is not supported.
-func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(*Route, TransportEndpointID, *PacketBuffer) bool) {
+func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(TransportEndpointID, *PacketBuffer) bool) {
 	state := s.transportProtocols[p]
 	if state != nil {
 		state.defaultHandler = h
@@ -830,6 +840,20 @@ func (s *Stack) AddRoute(route tcpip.Route) {
 	s.routeTable = append(s.routeTable, route)
 }
 
+// RemoveRoutes removes matching routes from the route table.
+func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	var filteredRoutes []tcpip.Route
+	for _, route := range s.routeTable {
+		if !match(route) {
+			filteredRoutes = append(filteredRoutes, route)
+		}
+	}
+	s.routeTable = filteredRoutes
+}
+
 // NewEndpoint creates a new transport layer endpoint of the given protocol.
 func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
 	t, ok := s.transportProtocols[transport]
@@ -1057,7 +1081,7 @@ func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo {
 		flags := NICStateFlags{
 			Up:          true, // Netstack interfaces are always up.
 			Running:     nic.Enabled(),
-			Promiscuous: nic.isPromiscuousMode(),
+			Promiscuous: nic.Promiscuous(),
 			Loopback:    nic.IsLoopback(),
 		}
 		nics[id] = NICInfo{
@@ -1094,6 +1118,16 @@ func (s *Stack) AddAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber,
 	return s.AddAddressWithOptions(id, protocol, addr, CanBePrimaryEndpoint)
 }
 
+// AddAddressWithPrefix is the same as AddAddress, but allows you to specify
+// the address prefix.
+func (s *Stack) AddAddressWithPrefix(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.AddressWithPrefix) *tcpip.Error {
+	ap := tcpip.ProtocolAddress{
+		Protocol:          protocol,
+		AddressWithPrefix: addr,
+	}
+	return s.AddProtocolAddressWithOptions(id, ap, CanBePrimaryEndpoint)
+}
+
 // AddProtocolAddress adds a new network-layer protocol address to the
 // specified NIC.
 func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress) *tcpip.Error {
@@ -1180,54 +1214,225 @@ func (s *Stack) getAddressEP(nic *NIC, localAddr, remoteAddr tcpip.Address, netP
 	return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint)
 }
 
+// findLocalRouteFromNICRLocked is like findLocalRouteRLocked but finds a route
+// from the specified NIC.
+//
+// Precondition: s.mu must be read locked.
+func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *NIC, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
+	localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, false /* createTemp */, NeverPrimaryEndpoint)
+	if localAddressEndpoint == nil {
+		return nil
+	}
+
+	var outgoingNIC *NIC
+	// Prefer a local route to the same interface as the local address.
+	if localAddressNIC.hasAddress(netProto, remoteAddr) {
+		outgoingNIC = localAddressNIC
+	}
+
+	// If the remote address isn't owned by the local address's NIC, check all
+	// NICs.
+	if outgoingNIC == nil {
+		for _, nic := range s.nics {
+			if nic.hasAddress(netProto, remoteAddr) {
+				outgoingNIC = nic
+				break
+			}
+		}
+	}
+
+	// If the remote address is not owned by the stack, we can't return a local
+	// route.
+	if outgoingNIC == nil {
+		localAddressEndpoint.DecRef()
+		return nil
+	}
+
+	r := makeLocalRoute(
+		netProto,
+		localAddr,
+		remoteAddr,
+		outgoingNIC,
+		localAddressNIC,
+		localAddressEndpoint,
+	)
+
+	if r.IsOutboundBroadcast() {
+		r.Release()
+		return nil
+	}
+
+	return r
+}
+
+// findLocalRouteRLocked returns a local route.
+//
+// A local route is a route to some remote address which the stack owns. That
+// is, a local route is a route where packets never have to leave the stack.
+//
+// Precondition: s.mu must be read locked.
+func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
+	if len(localAddr) == 0 {
+		localAddr = remoteAddr
+	}
+
+	if localAddressNICID == 0 {
+		for _, localAddressNIC := range s.nics {
+			if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil {
+				return r
+			}
+		}
+
+		return nil
+	}
+
+	if localAddressNIC, ok := s.nics[localAddressNICID]; ok {
+		return s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto)
+	}
+
+	return nil
+}
+
 // FindRoute creates a route to the given destination address, leaving through
-// the given nic and local address (if provided).
-func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (Route, *tcpip.Error) {
+// the given NIC and local address (if provided).
+//
+// If a NIC is not specified, the returned route will leave through the same
+// NIC as the NIC that has the local address assigned when forwarding is
+// disabled. If forwarding is enabled and the NIC is unspecified, the route may
+// leave through any interface unless the route is link-local.
+//
+// If no local address is provided, the stack will select a local address. If no
+// remote address is provided, the stack wil use a remote address equal to the
+// local address.
+func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, *tcpip.Error) {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
 
+	isLinkLocal := header.IsV6LinkLocalAddress(remoteAddr) || header.IsV6LinkLocalMulticastAddress(remoteAddr)
 	isLocalBroadcast := remoteAddr == header.IPv4Broadcast
 	isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr)
-	needRoute := !(isLocalBroadcast || isMulticast || header.IsV6LinkLocalAddress(remoteAddr))
+	isLoopback := header.IsV4LoopbackAddress(remoteAddr) || header.IsV6LoopbackAddress(remoteAddr)
+	needRoute := !(isLocalBroadcast || isMulticast || isLinkLocal || isLoopback)
+
+	if s.handleLocal && !isMulticast && !isLocalBroadcast {
+		if r := s.findLocalRouteRLocked(id, localAddr, remoteAddr, netProto); r != nil {
+			return r, nil
+		}
+	}
+
+	// If the interface is specified and we do not need a route, return a route
+	// through the interface if the interface is valid and enabled.
 	if id != 0 && !needRoute {
 		if nic, ok := s.nics[id]; ok && nic.Enabled() {
 			if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil {
-				return makeRoute(netProto, addressEndpoint.AddressWithPrefix().Address, remoteAddr, nic, addressEndpoint, s.handleLocal && !nic.IsLoopback(), multicastLoop && !nic.IsLoopback()), nil
+				return makeRoute(
+					netProto,
+					localAddr,
+					remoteAddr,
+					nic, /* outboundNIC */
+					nic, /* localAddressNIC*/
+					addressEndpoint,
+					s.handleLocal,
+					multicastLoop,
+				), nil
 			}
 		}
-	} else {
-		for _, route := range s.routeTable {
-			if (id != 0 && id != route.NIC) || (len(remoteAddr) != 0 && !route.Destination.Contains(remoteAddr)) {
-				continue
+
+		if isLoopback {
+			return nil, tcpip.ErrBadLocalAddress
+		}
+		return nil, tcpip.ErrNetworkUnreachable
+	}
+
+	canForward := s.Forwarding(netProto) && !header.IsV6LinkLocalAddress(localAddr) && !isLinkLocal
+
+	// Find a route to the remote with the route table.
+	var chosenRoute tcpip.Route
+	for _, route := range s.routeTable {
+		if len(remoteAddr) != 0 && !route.Destination.Contains(remoteAddr) {
+			continue
+		}
+
+		nic, ok := s.nics[route.NIC]
+		if !ok || !nic.Enabled() {
+			continue
+		}
+
+		if id == 0 || id == route.NIC {
+			if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil {
+				var gateway tcpip.Address
+				if needRoute {
+					gateway = route.Gateway
+				}
+				r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop)
+				if r == nil {
+					panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr))
+				}
+				return r, nil
 			}
-			if nic, ok := s.nics[route.NIC]; ok && nic.Enabled() {
-				if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, netProto); addressEndpoint != nil {
-					if len(remoteAddr) == 0 {
-						// If no remote address was provided, then the route
-						// provided will refer to the link local address.
-						remoteAddr = addressEndpoint.AddressWithPrefix().Address
-					}
+		}
+
+		// If the stack has forwarding enabled and we haven't found a valid route to
+		// the remote address yet, keep track of the first valid route. We keep
+		// iterating because we prefer routes that let us use a local address that
+		// is assigned to the outgoing interface. There is no requirement to do this
+		// from any RFC but simply a choice made to better follow a strong host
+		// model which the netstack follows at the time of writing.
+		if canForward && chosenRoute == (tcpip.Route{}) {
+			chosenRoute = route
+		}
+	}
+
+	if chosenRoute != (tcpip.Route{}) {
+		// At this point we know the stack has forwarding enabled since chosenRoute is
+		// only set when forwarding is enabled.
+		nic, ok := s.nics[chosenRoute.NIC]
+		if !ok {
+			// If the route's NIC was invalid, we should not have chosen the route.
+			panic(fmt.Sprintf("chosen route must have a valid NIC with ID = %d", chosenRoute.NIC))
+		}
+
+		var gateway tcpip.Address
+		if needRoute {
+			gateway = chosenRoute.Gateway
+		}
 
-					r := makeRoute(netProto, addressEndpoint.AddressWithPrefix().Address, remoteAddr, nic, addressEndpoint, s.handleLocal && !nic.IsLoopback(), multicastLoop && !nic.IsLoopback())
-					if len(route.Gateway) > 0 {
-						if needRoute {
-							r.NextHop = route.Gateway
-						}
-					} else if subnet := addressEndpoint.AddressWithPrefix().Subnet(); subnet.IsBroadcast(remoteAddr) {
-						r.RemoteLinkAddress = header.EthernetBroadcastAddress
+		// Use the specified NIC to get the local address endpoint.
+		if id != 0 {
+			if aNIC, ok := s.nics[id]; ok {
+				if addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, netProto); addressEndpoint != nil {
+					if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil {
+						return r, nil
 					}
+				}
+			}
+
+			return nil, tcpip.ErrNoRoute
+		}
+
+		if id == 0 {
+			// If an interface is not specified, try to find a NIC that holds the local
+			// address endpoint to construct a route.
+			for _, aNIC := range s.nics {
+				addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, netProto)
+				if addressEndpoint == nil {
+					continue
+				}
 
+				if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop); r != nil {
 					return r, nil
 				}
 			}
 		}
 	}
 
-	if !needRoute {
-		return Route{}, tcpip.ErrNetworkUnreachable
+	if needRoute {
+		return nil, tcpip.ErrNoRoute
 	}
-
-	return Route{}, tcpip.ErrNoRoute
+	if header.IsV6LoopbackAddress(remoteAddr) {
+		return nil, tcpip.ErrBadLocalAddress
+	}
+	return nil, tcpip.ErrNetworkUnreachable
 }
 
 // CheckNetworkProtocol checks if a given network protocol is enabled in the
@@ -1323,7 +1528,7 @@ func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address,
 
 	fullAddr := tcpip.FullAddress{NIC: nicID, Addr: addr}
 	linkRes := s.linkAddrResolvers[protocol]
-	return s.linkAddrCache.get(fullAddr, linkRes, localAddr, nic.LinkEndpoint, waker)
+	return s.linkAddrCache.get(fullAddr, linkRes, localAddr, nic, waker)
 }
 
 // Neighbors returns all IP to MAC address associations.
@@ -1443,8 +1648,8 @@ func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) {
 
 // FindTransportEndpoint finds an endpoint that most closely matches the provided
 // id. If no endpoint is found it returns nil.
-func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, r *Route) TransportEndpoint {
-	return s.demux.findTransportEndpoint(netProto, transProto, id, r)
+func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint {
+	return s.demux.findTransportEndpoint(netProto, transProto, id, nicID)
 }
 
 // RegisterRawTransportEndpoint registers the given endpoint with the stack
@@ -1615,49 +1820,20 @@ func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip
 	nic.unregisterPacketEndpoint(netProto, ep)
 }
 
-// WritePacket writes data directly to the specified NIC. It adds an ethernet
-// header based on the arguments.
-func (s *Stack) WritePacket(nicID tcpip.NICID, dst tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.VectorisedView) *tcpip.Error {
-	s.mu.Lock()
-	nic, ok := s.nics[nicID]
-	s.mu.Unlock()
-	if !ok {
-		return tcpip.ErrUnknownDevice
-	}
-
-	// Add our own fake ethernet header.
-	ethFields := header.EthernetFields{
-		SrcAddr: nic.LinkEndpoint.LinkAddress(),
-		DstAddr: dst,
-		Type:    netProto,
-	}
-	fakeHeader := make(header.Ethernet, header.EthernetMinimumSize)
-	fakeHeader.Encode(&ethFields)
-	vv := buffer.View(fakeHeader).ToVectorisedView()
-	vv.Append(payload)
-
-	if err := nic.LinkEndpoint.WriteRawPacket(vv); err != nil {
-		return err
-	}
-
-	return nil
-}
-
-// WriteRawPacket writes data directly to the specified NIC without adding any
-// headers.
-func (s *Stack) WriteRawPacket(nicID tcpip.NICID, payload buffer.VectorisedView) *tcpip.Error {
+// WritePacketToRemote writes a payload on the specified NIC using the provided
+// network protocol and remote link address.
+func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.VectorisedView) *tcpip.Error {
 	s.mu.Lock()
 	nic, ok := s.nics[nicID]
 	s.mu.Unlock()
 	if !ok {
 		return tcpip.ErrUnknownDevice
 	}
-
-	if err := nic.LinkEndpoint.WriteRawPacket(payload); err != nil {
-		return err
-	}
-
-	return nil
+	pkt := NewPacketBuffer(PacketBufferOptions{
+		ReserveHeaderBytes: int(nic.MaxHeaderLength()),
+		Data:               payload,
+	})
+	return nic.WritePacketToRemote(remote, nil, netProto, pkt)
 }
 
 // NetworkProtocolInstance returns the protocol instance in the stack for the
@@ -1717,7 +1893,6 @@ func (s *Stack) RemoveTCPProbe() {
 
 // JoinGroup joins the given multicast group on the given NIC.
 func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) *tcpip.Error {
-	// TODO: notify network of subscription via igmp protocol.
 	s.mu.RLock()
 	defer s.mu.RUnlock()
 
@@ -1896,3 +2071,111 @@ func (s *Stack) FindNICNameFromID(id tcpip.NICID) string {
 func (s *Stack) NewJob(l sync.Locker, f func()) *tcpip.Job {
 	return tcpip.NewJob(s.clock, l, f)
 }
+
+// ParseResult indicates the result of a parsing attempt.
+type ParseResult int
+
+const (
+	// ParsedOK indicates that a packet was successfully parsed.
+	ParsedOK ParseResult = iota
+
+	// UnknownNetworkProtocol indicates that the network protocol is unknown.
+	UnknownNetworkProtocol
+
+	// NetworkLayerParseError indicates that the network packet was not
+	// successfully parsed.
+	NetworkLayerParseError
+
+	// UnknownTransportProtocol indicates that the transport protocol is unknown.
+	UnknownTransportProtocol
+
+	// TransportLayerParseError indicates that the transport packet was not
+	// successfully parsed.
+	TransportLayerParseError
+)
+
+// ParsePacketBuffer parses the provided packet buffer.
+func (s *Stack) ParsePacketBuffer(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) ParseResult {
+	netProto, ok := s.networkProtocols[protocol]
+	if !ok {
+		return UnknownNetworkProtocol
+	}
+
+	transProtoNum, hasTransportHdr, ok := netProto.Parse(pkt)
+	if !ok {
+		return NetworkLayerParseError
+	}
+	if !hasTransportHdr {
+		return ParsedOK
+	}
+
+	// TODO(gvisor.dev/issue/170): ICMP packets don't have their TransportHeader
+	// fields set yet, parse it here. See icmp/protocol.go:protocol.Parse for a
+	// full explanation.
+	if transProtoNum == header.ICMPv4ProtocolNumber || transProtoNum == header.ICMPv6ProtocolNumber {
+		return ParsedOK
+	}
+
+	pkt.TransportProtocolNumber = transProtoNum
+	// Parse the transport header if present.
+	state, ok := s.transportProtocols[transProtoNum]
+	if !ok {
+		return UnknownTransportProtocol
+	}
+
+	if !state.proto.Parse(pkt) {
+		return TransportLayerParseError
+	}
+
+	return ParsedOK
+}
+
+// networkProtocolNumbers returns the network protocol numbers the stack is
+// configured with.
+func (s *Stack) networkProtocolNumbers() []tcpip.NetworkProtocolNumber {
+	protos := make([]tcpip.NetworkProtocolNumber, 0, len(s.networkProtocols))
+	for p := range s.networkProtocols {
+		protos = append(protos, p)
+	}
+	return protos
+}
+
+func isSubnetBroadcastOnNIC(nic *NIC, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
+	addressEndpoint := nic.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint)
+	if addressEndpoint == nil {
+		return false
+	}
+
+	subnet := addressEndpoint.Subnet()
+	addressEndpoint.DecRef()
+	return subnet.IsBroadcast(addr)
+}
+
+// IsSubnetBroadcast returns true if the provided address is a subnet-local
+// broadcast address on the specified NIC and protocol.
+//
+// Returns false if the NIC is unknown or if the protocol is unknown or does
+// not support addressing.
+//
+// If the NIC is not specified, the stack will check all NICs.
+func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	if nicID != 0 {
+		nic, ok := s.nics[nicID]
+		if !ok {
+			return false
+		}
+
+		return isSubnetBroadcastOnNIC(nic, protocol, addr)
+	}
+
+	for _, nic := range s.nics {
+		if isSubnetBroadcastOnNIC(nic, protocol, addr) {
+			return true
+		}
+	}
+
+	return false
+}
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index e75f58c64..457990945 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -21,12 +21,12 @@ import (
 	"bytes"
 	"fmt"
 	"math"
+	"net"
 	"sort"
 	"testing"
 	"time"
 
 	"github.com/google/go-cmp/cmp"
-	"github.com/google/go-cmp/cmp/cmpopts"
 	"gvisor.dev/gvisor/pkg/rand"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
@@ -108,12 +108,21 @@ func (*fakeNetworkEndpoint) DefaultTTL() uint8 {
 	return 123
 }
 
-func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
+func (f *fakeNetworkEndpoint) HandlePacket(pkt *stack.PacketBuffer) {
 	// Increment the received packet count in the protocol descriptor.
-	f.proto.packetCount[int(r.LocalAddress[0])%len(f.proto.packetCount)]++
+	netHdr := pkt.NetworkHeader().View()
+
+	dst := tcpip.Address(netHdr[dstAddrOffset:][:1])
+	addressEndpoint := f.AcquireAssignedAddress(dst, f.nic.Promiscuous(), stack.CanBePrimaryEndpoint)
+	if addressEndpoint == nil {
+		return
+	}
+	addressEndpoint.DecRef()
+
+	f.proto.packetCount[int(dst[0])%len(f.proto.packetCount)]++
 
 	// Handle control packets.
-	if pkt.NetworkHeader().View()[protocolNumberOffset] == uint8(fakeControlProtocol) {
+	if netHdr[protocolNumberOffset] == uint8(fakeControlProtocol) {
 		nb, ok := pkt.Data.PullUp(fakeNetHeaderLen)
 		if !ok {
 			return
@@ -129,7 +138,7 @@ func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuff
 	}
 
 	// Dispatch the packet to the transport protocol.
-	f.dispatcher.DeliverTransportPacket(r, tcpip.TransportProtocolNumber(pkt.NetworkHeader().View()[protocolNumberOffset]), pkt)
+	f.dispatcher.DeliverTransportPacket(tcpip.TransportProtocolNumber(pkt.NetworkHeader().View()[protocolNumberOffset]), pkt)
 }
 
 func (f *fakeNetworkEndpoint) MaxHeaderLength() uint16 {
@@ -151,12 +160,13 @@ func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, params
 	// Add the protocol's header to the packet and send it to the link
 	// endpoint.
 	hdr := pkt.NetworkHeader().Push(fakeNetHeaderLen)
+	pkt.NetworkProtocolNumber = fakeNetNumber
 	hdr[dstAddrOffset] = r.RemoteAddress[0]
 	hdr[srcAddrOffset] = r.LocalAddress[0]
 	hdr[protocolNumberOffset] = byte(params.Protocol)
 
 	if r.Loop&stack.PacketLoop != 0 {
-		f.HandlePacket(r, pkt)
+		f.HandlePacket(pkt.Clone())
 	}
 	if r.Loop&stack.PacketOut == 0 {
 		return nil
@@ -254,6 +264,7 @@ func (*fakeNetworkProtocol) Parse(pkt *stack.PacketBuffer) (tcpip.TransportProto
 	if !ok {
 		return 0, false, false
 	}
+	pkt.NetworkProtocolNumber = fakeNetNumber
 	return tcpip.TransportProtocolNumber(hdr[protocolNumberOffset]), true, true
 }
 
@@ -395,7 +406,7 @@ func sendTo(s *stack.Stack, addr tcpip.Address, payload buffer.View) *tcpip.Erro
 	return send(r, payload)
 }
 
-func send(r stack.Route, payload buffer.View) *tcpip.Error {
+func send(r *stack.Route, payload buffer.View) *tcpip.Error {
 	return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: fakeTransNumber, TTL: 123, TOS: stack.DefaultTOS}, stack.NewPacketBuffer(stack.PacketBufferOptions{
 		ReserveHeaderBytes: int(r.MaxHeaderLength()),
 		Data:               payload.ToVectorisedView(),
@@ -413,7 +424,7 @@ func testSendTo(t *testing.T, s *stack.Stack, addr tcpip.Address, ep *channel.En
 	}
 }
 
-func testSend(t *testing.T, r stack.Route, ep *channel.Endpoint, payload buffer.View) {
+func testSend(t *testing.T, r *stack.Route, ep *channel.Endpoint, payload buffer.View) {
 	t.Helper()
 	ep.Drain()
 	if err := send(r, payload); err != nil {
@@ -424,7 +435,7 @@ func testSend(t *testing.T, r stack.Route, ep *channel.Endpoint, payload buffer.
 	}
 }
 
-func testFailingSend(t *testing.T, r stack.Route, ep *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) {
+func testFailingSend(t *testing.T, r *stack.Route, ep *channel.Endpoint, payload buffer.View, wantErr *tcpip.Error) {
 	t.Helper()
 	if gotErr := send(r, payload); gotErr != wantErr {
 		t.Errorf("send failed: got = %s, want = %s ", gotErr, wantErr)
@@ -1334,6 +1345,106 @@ func TestPromiscuousMode(t *testing.T) {
 	testFailingRecv(t, fakeNet, localAddrByte, ep, buf)
 }
 
+// TestExternalSendWithHandleLocal tests that the stack creates a non-local
+// route when spoofing or promiscuous mode are enabled.
+//
+// This test makes sure that packets are transmitted from the stack.
+func TestExternalSendWithHandleLocal(t *testing.T) {
+	const (
+		unspecifiedNICID = 0
+		nicID            = 1
+
+		localAddr = tcpip.Address("\x01")
+		dstAddr   = tcpip.Address("\x03")
+	)
+
+	subnet, err := tcpip.NewSubnet("\x00", "\x00")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	tests := []struct {
+		name           string
+		configureStack func(*testing.T, *stack.Stack)
+	}{
+		{
+			name:           "Default",
+			configureStack: func(*testing.T, *stack.Stack) {},
+		},
+		{
+			name: "Spoofing",
+			configureStack: func(t *testing.T, s *stack.Stack) {
+				if err := s.SetSpoofing(nicID, true); err != nil {
+					t.Fatalf("s.SetSpoofing(%d, true): %s", nicID, err)
+				}
+			},
+		},
+		{
+			name: "Promiscuous",
+			configureStack: func(t *testing.T, s *stack.Stack) {
+				if err := s.SetPromiscuousMode(nicID, true); err != nil {
+					t.Fatalf("s.SetPromiscuousMode(%d, true): %s", nicID, err)
+				}
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			for _, handleLocal := range []bool{true, false} {
+				t.Run(fmt.Sprintf("HandleLocal=%t", handleLocal), func(t *testing.T) {
+					s := stack.New(stack.Options{
+						NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
+						HandleLocal:      handleLocal,
+					})
+
+					ep := channel.New(1, defaultMTU, "")
+					if err := s.CreateNIC(nicID, ep); err != nil {
+						t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
+					}
+					if err := s.AddAddress(nicID, fakeNetNumber, localAddr); err != nil {
+						t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, fakeNetNumber, localAddr, err)
+					}
+
+					s.SetRouteTable([]tcpip.Route{{Destination: subnet, NIC: nicID}})
+
+					test.configureStack(t, s)
+
+					r, err := s.FindRoute(unspecifiedNICID, localAddr, dstAddr, fakeNetNumber, false /* multicastLoop */)
+					if err != nil {
+						t.Fatalf("s.FindRoute(%d, %s, %s, %d, false): %s", unspecifiedNICID, localAddr, dstAddr, fakeNetNumber, err)
+					}
+					defer r.Release()
+
+					if r.LocalAddress != localAddr {
+						t.Errorf("got r.LocalAddress = %s, want = %s", r.LocalAddress, localAddr)
+					}
+					if r.RemoteAddress != dstAddr {
+						t.Errorf("got r.RemoteAddress = %s, want = %s", r.RemoteAddress, dstAddr)
+					}
+
+					if n := ep.Drain(); n != 0 {
+						t.Fatalf("got ep.Drain() = %d, want = 0", n)
+					}
+					if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{
+						Protocol: fakeTransNumber,
+						TTL:      123,
+						TOS:      stack.DefaultTOS,
+					}, stack.NewPacketBuffer(stack.PacketBufferOptions{
+						ReserveHeaderBytes: int(r.MaxHeaderLength()),
+						Data:               buffer.NewView(10).ToVectorisedView(),
+					})); err != nil {
+						t.Fatalf("r.WritePacket(nil, _, _): %s", err)
+					}
+					if n := ep.Drain(); n != 1 {
+						t.Fatalf("got ep.Drain() = %d, want = 1", n)
+					}
+				})
+			}
+		})
+	}
+}
+
 func TestSpoofingWithAddress(t *testing.T) {
 	localAddr := tcpip.Address("\x01")
 	nonExistentLocalAddr := tcpip.Address("\x02")
@@ -1451,15 +1562,15 @@ func TestSpoofingNoAddress(t *testing.T) {
 	// testSendTo(t, s, remoteAddr, ep, nil)
 }
 
-func verifyRoute(gotRoute, wantRoute stack.Route) error {
+func verifyRoute(gotRoute, wantRoute *stack.Route) error {
 	if gotRoute.LocalAddress != wantRoute.LocalAddress {
 		return fmt.Errorf("bad local address: got %s, want = %s", gotRoute.LocalAddress, wantRoute.LocalAddress)
 	}
 	if gotRoute.RemoteAddress != wantRoute.RemoteAddress {
 		return fmt.Errorf("bad remote address: got %s, want = %s", gotRoute.RemoteAddress, wantRoute.RemoteAddress)
 	}
-	if gotRoute.RemoteLinkAddress != wantRoute.RemoteLinkAddress {
-		return fmt.Errorf("bad remote link address: got %s, want = %s", gotRoute.RemoteLinkAddress, wantRoute.RemoteLinkAddress)
+	if got, want := gotRoute.RemoteLinkAddress(), wantRoute.RemoteLinkAddress(); got != want {
+		return fmt.Errorf("bad remote link address: got %s, want = %s", got, want)
 	}
 	if gotRoute.NextHop != wantRoute.NextHop {
 		return fmt.Errorf("bad next-hop address: got %s, want = %s", gotRoute.NextHop, wantRoute.NextHop)
@@ -1491,7 +1602,7 @@ func TestOutgoingBroadcastWithEmptyRouteTable(t *testing.T) {
 	if err != nil {
 		t.Fatalf("FindRoute(1, %v, %v, %d) failed: %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err)
 	}
-	if err := verifyRoute(r, stack.Route{LocalAddress: header.IPv4Any, RemoteAddress: header.IPv4Broadcast}); err != nil {
+	if err := verifyRoute(r, &stack.Route{LocalAddress: header.IPv4Any, RemoteAddress: header.IPv4Broadcast}); err != nil {
 		t.Errorf("FindRoute(1, %v, %v, %d) returned unexpected Route: %v", header.IPv4Any, header.IPv4Broadcast, fakeNetNumber, err)
 	}
 
@@ -1545,7 +1656,7 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) {
 	if err != nil {
 		t.Fatalf("FindRoute(1, %v, %v, %d) failed: %v", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err)
 	}
-	if err := verifyRoute(r, stack.Route{LocalAddress: nic1Addr.Address, RemoteAddress: header.IPv4Broadcast}); err != nil {
+	if err := verifyRoute(r, &stack.Route{LocalAddress: nic1Addr.Address, RemoteAddress: header.IPv4Broadcast}); err != nil {
 		t.Errorf("FindRoute(1, %v, %v, %d) returned unexpected Route: %v", nic1Addr.Address, header.IPv4Broadcast, fakeNetNumber, err)
 	}
 
@@ -1555,7 +1666,7 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) {
 	if err != nil {
 		t.Fatalf("FindRoute(0, \"\", %s, %d) failed: %s", header.IPv4Broadcast, fakeNetNumber, err)
 	}
-	if err := verifyRoute(r, stack.Route{LocalAddress: nic2Addr.Address, RemoteAddress: header.IPv4Broadcast}); err != nil {
+	if err := verifyRoute(r, &stack.Route{LocalAddress: nic2Addr.Address, RemoteAddress: header.IPv4Broadcast}); err != nil {
 		t.Errorf("FindRoute(0, \"\", %s, %d) returned unexpected Route: %s)", header.IPv4Broadcast, fakeNetNumber, err)
 	}
 
@@ -1571,7 +1682,7 @@ func TestOutgoingBroadcastWithRouteTable(t *testing.T) {
 	if err != nil {
 		t.Fatalf("FindRoute(0, \"\", %s, %d) failed: %s", header.IPv4Broadcast, fakeNetNumber, err)
 	}
-	if err := verifyRoute(r, stack.Route{LocalAddress: nic1Addr.Address, RemoteAddress: header.IPv4Broadcast}); err != nil {
+	if err := verifyRoute(r, &stack.Route{LocalAddress: nic1Addr.Address, RemoteAddress: header.IPv4Broadcast}); err != nil {
 		t.Errorf("FindRoute(0, \"\", %s, %d) returned unexpected Route: %s)", header.IPv4Broadcast, fakeNetNumber, err)
 	}
 }
@@ -2108,88 +2219,6 @@ func TestNICStats(t *testing.T) {
 	}
 }
 
-func TestNICForwarding(t *testing.T) {
-	const nicID1 = 1
-	const nicID2 = 2
-	const dstAddr = tcpip.Address("\x03")
-
-	tests := []struct {
-		name      string
-		headerLen uint16
-	}{
-		{
-			name: "Zero header length",
-		},
-		{
-			name:      "Non-zero header length",
-			headerLen: 16,
-		},
-	}
-
-	for _, test := range tests {
-		t.Run(test.name, func(t *testing.T) {
-			s := stack.New(stack.Options{
-				NetworkProtocols: []stack.NetworkProtocolFactory{fakeNetFactory},
-			})
-			s.SetForwarding(fakeNetNumber, true)
-
-			ep1 := channel.New(10, defaultMTU, "")
-			if err := s.CreateNIC(nicID1, ep1); err != nil {
-				t.Fatalf("CreateNIC(%d, _): %s", nicID1, err)
-			}
-			if err := s.AddAddress(nicID1, fakeNetNumber, "\x01"); err != nil {
-				t.Fatalf("AddAddress(%d, %d, 0x01): %s", nicID1, fakeNetNumber, err)
-			}
-
-			ep2 := channelLinkWithHeaderLength{
-				Endpoint:     channel.New(10, defaultMTU, ""),
-				headerLength: test.headerLen,
-			}
-			if err := s.CreateNIC(nicID2, &ep2); err != nil {
-				t.Fatalf("CreateNIC(%d, _): %s", nicID2, err)
-			}
-			if err := s.AddAddress(nicID2, fakeNetNumber, "\x02"); err != nil {
-				t.Fatalf("AddAddress(%d, %d, 0x02): %s", nicID2, fakeNetNumber, err)
-			}
-
-			// Route all packets to dstAddr to NIC 2.
-			{
-				subnet, err := tcpip.NewSubnet(dstAddr, "\xff")
-				if err != nil {
-					t.Fatal(err)
-				}
-				s.SetRouteTable([]tcpip.Route{{Destination: subnet, Gateway: "\x00", NIC: nicID2}})
-			}
-
-			// Send a packet to dstAddr.
-			buf := buffer.NewView(30)
-			buf[dstAddrOffset] = dstAddr[0]
-			ep1.InjectInbound(fakeNetNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
-				Data: buf.ToVectorisedView(),
-			}))
-
-			pkt, ok := ep2.Read()
-			if !ok {
-				t.Fatal("packet not forwarded")
-			}
-
-			// Test that the link's MaxHeaderLength is honoured.
-			if capacity, want := pkt.Pkt.AvailableHeaderBytes(), int(test.headerLen); capacity != want {
-				t.Errorf("got LinkHeader.AvailableLength() = %d, want = %d", capacity, want)
-			}
-
-			// Test that forwarding increments Tx stats correctly.
-			if got, want := s.NICInfo()[nicID2].Stats.Tx.Packets.Value(), uint64(1); got != want {
-				t.Errorf("got Tx.Packets.Value() = %d, want = %d", got, want)
-			}
-
-			if got, want := s.NICInfo()[nicID2].Stats.Tx.Bytes.Value(), uint64(len(buf)); got != want {
-				t.Errorf("got Tx.Bytes.Value() = %d, want = %d", got, want)
-			}
-		})
-	}
-}
-
 // TestNICContextPreservation tests that you can read out via stack.NICInfo the
 // Context data you pass via NICContext.Context in stack.CreateNICWithOptions.
 func TestNICContextPreservation(t *testing.T) {
@@ -2377,9 +2406,9 @@ func TestNICAutoGenLinkLocalAddr(t *testing.T) {
 			}
 			opts := stack.Options{
 				NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
-					AutoGenIPv6LinkLocal: test.autoGen,
-					NDPDisp:              &ndpDisp,
-					OpaqueIIDOpts:        test.iidOpts,
+					AutoGenLinkLocal: test.autoGen,
+					NDPDisp:          &ndpDisp,
+					OpaqueIIDOpts:    test.iidOpts,
 				})},
 			}
 
@@ -2472,8 +2501,8 @@ func TestNoLinkLocalAutoGenForLoopbackNIC(t *testing.T) {
 		t.Run(test.name, func(t *testing.T) {
 			opts := stack.Options{
 				NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
-					AutoGenIPv6LinkLocal: true,
-					OpaqueIIDOpts:        test.opaqueIIDOpts,
+					AutoGenLinkLocal: true,
+					OpaqueIIDOpts:    test.opaqueIIDOpts,
 				})},
 			}
 
@@ -2506,9 +2535,9 @@ func TestNICAutoGenAddrDoesDAD(t *testing.T) {
 	ndpConfigs := ipv6.DefaultNDPConfigurations()
 	opts := stack.Options{
 		NetworkProtocols: []stack.NetworkProtocolFactory{ipv6.NewProtocolWithOptions(ipv6.Options{
-			NDPConfigs:           ndpConfigs,
-			AutoGenIPv6LinkLocal: true,
-			NDPDisp:              &ndpDisp,
+			NDPConfigs:       ndpConfigs,
+			AutoGenLinkLocal: true,
+			NDPDisp:          &ndpDisp,
 		})},
 	}
 
@@ -3321,11 +3350,16 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 	remNetSubnetBcast := remNetSubnet.Broadcast()
 
 	tests := []struct {
-		name          string
-		nicAddr       tcpip.ProtocolAddress
-		routes        []tcpip.Route
-		remoteAddr    tcpip.Address
-		expectedRoute stack.Route
+		name                      string
+		nicAddr                   tcpip.ProtocolAddress
+		routes                    []tcpip.Route
+		remoteAddr                tcpip.Address
+		expectedLocalAddress      tcpip.Address
+		expectedRemoteAddress     tcpip.Address
+		expectedRemoteLinkAddress tcpip.LinkAddress
+		expectedNextHop           tcpip.Address
+		expectedNetProto          tcpip.NetworkProtocolNumber
+		expectedLoop              stack.PacketLooping
 	}{
 		// Broadcast to a locally attached subnet populates the broadcast MAC.
 		{
@@ -3340,14 +3374,12 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 					NIC:         nicID1,
 				},
 			},
-			remoteAddr: ipv4SubnetBcast,
-			expectedRoute: stack.Route{
-				LocalAddress:      ipv4Addr.Address,
-				RemoteAddress:     ipv4SubnetBcast,
-				RemoteLinkAddress: header.EthernetBroadcastAddress,
-				NetProto:          header.IPv4ProtocolNumber,
-				Loop:              stack.PacketOut,
-			},
+			remoteAddr:                ipv4SubnetBcast,
+			expectedLocalAddress:      ipv4Addr.Address,
+			expectedRemoteAddress:     ipv4SubnetBcast,
+			expectedRemoteLinkAddress: header.EthernetBroadcastAddress,
+			expectedNetProto:          header.IPv4ProtocolNumber,
+			expectedLoop:              stack.PacketOut | stack.PacketLoop,
 		},
 		// Broadcast to a locally attached /31 subnet does not populate the
 		// broadcast MAC.
@@ -3363,13 +3395,11 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 					NIC:         nicID1,
 				},
 			},
-			remoteAddr: ipv4Subnet31Bcast,
-			expectedRoute: stack.Route{
-				LocalAddress:  ipv4AddrPrefix31.Address,
-				RemoteAddress: ipv4Subnet31Bcast,
-				NetProto:      header.IPv4ProtocolNumber,
-				Loop:          stack.PacketOut,
-			},
+			remoteAddr:            ipv4Subnet31Bcast,
+			expectedLocalAddress:  ipv4AddrPrefix31.Address,
+			expectedRemoteAddress: ipv4Subnet31Bcast,
+			expectedNetProto:      header.IPv4ProtocolNumber,
+			expectedLoop:          stack.PacketOut,
 		},
 		// Broadcast to a locally attached /32 subnet does not populate the
 		// broadcast MAC.
@@ -3385,13 +3415,11 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 					NIC:         nicID1,
 				},
 			},
-			remoteAddr: ipv4Subnet32Bcast,
-			expectedRoute: stack.Route{
-				LocalAddress:  ipv4AddrPrefix32.Address,
-				RemoteAddress: ipv4Subnet32Bcast,
-				NetProto:      header.IPv4ProtocolNumber,
-				Loop:          stack.PacketOut,
-			},
+			remoteAddr:            ipv4Subnet32Bcast,
+			expectedLocalAddress:  ipv4AddrPrefix32.Address,
+			expectedRemoteAddress: ipv4Subnet32Bcast,
+			expectedNetProto:      header.IPv4ProtocolNumber,
+			expectedLoop:          stack.PacketOut,
 		},
 		// IPv6 has no notion of a broadcast.
 		{
@@ -3406,13 +3434,11 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 					NIC:         nicID1,
 				},
 			},
-			remoteAddr: ipv6SubnetBcast,
-			expectedRoute: stack.Route{
-				LocalAddress:  ipv6Addr.Address,
-				RemoteAddress: ipv6SubnetBcast,
-				NetProto:      header.IPv6ProtocolNumber,
-				Loop:          stack.PacketOut,
-			},
+			remoteAddr:            ipv6SubnetBcast,
+			expectedLocalAddress:  ipv6Addr.Address,
+			expectedRemoteAddress: ipv6SubnetBcast,
+			expectedNetProto:      header.IPv6ProtocolNumber,
+			expectedLoop:          stack.PacketOut,
 		},
 		// Broadcast to a remote subnet in the route table is send to the next-hop
 		// gateway.
@@ -3429,14 +3455,12 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 					NIC:         nicID1,
 				},
 			},
-			remoteAddr: remNetSubnetBcast,
-			expectedRoute: stack.Route{
-				LocalAddress:  ipv4Addr.Address,
-				RemoteAddress: remNetSubnetBcast,
-				NextHop:       ipv4Gateway,
-				NetProto:      header.IPv4ProtocolNumber,
-				Loop:          stack.PacketOut,
-			},
+			remoteAddr:            remNetSubnetBcast,
+			expectedLocalAddress:  ipv4Addr.Address,
+			expectedRemoteAddress: remNetSubnetBcast,
+			expectedNextHop:       ipv4Gateway,
+			expectedNetProto:      header.IPv4ProtocolNumber,
+			expectedLoop:          stack.PacketOut,
 		},
 		// Broadcast to an unknown subnet follows the default route. Note that this
 		// is essentially just routing an unknown destination IP, because w/o any
@@ -3454,14 +3478,12 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 					NIC:         nicID1,
 				},
 			},
-			remoteAddr: remNetSubnetBcast,
-			expectedRoute: stack.Route{
-				LocalAddress:  ipv4Addr.Address,
-				RemoteAddress: remNetSubnetBcast,
-				NextHop:       ipv4Gateway,
-				NetProto:      header.IPv4ProtocolNumber,
-				Loop:          stack.PacketOut,
-			},
+			remoteAddr:            remNetSubnetBcast,
+			expectedLocalAddress:  ipv4Addr.Address,
+			expectedRemoteAddress: remNetSubnetBcast,
+			expectedNextHop:       ipv4Gateway,
+			expectedNetProto:      header.IPv4ProtocolNumber,
+			expectedLoop:          stack.PacketOut,
 		},
 	}
 
@@ -3490,10 +3512,27 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 				t.Fatalf("got unexpected address length = %d bytes", l)
 			}
 
-			if r, err := s.FindRoute(unspecifiedNICID, "" /* localAddr */, test.remoteAddr, netProto, false /* multicastLoop */); err != nil {
+			r, err := s.FindRoute(unspecifiedNICID, "" /* localAddr */, test.remoteAddr, netProto, false /* multicastLoop */)
+			if err != nil {
 				t.Fatalf("FindRoute(%d, '', %s, %d): %s", unspecifiedNICID, test.remoteAddr, netProto, err)
-			} else if diff := cmp.Diff(r, test.expectedRoute, cmpopts.IgnoreUnexported(r)); diff != "" {
-				t.Errorf("route mismatch (-want +got):\n%s", diff)
+			}
+			if r.LocalAddress != test.expectedLocalAddress {
+				t.Errorf("got r.LocalAddress = %s, want = %s", r.LocalAddress, test.expectedLocalAddress)
+			}
+			if r.RemoteAddress != test.expectedRemoteAddress {
+				t.Errorf("got r.RemoteAddress = %s, want = %s", r.RemoteAddress, test.expectedRemoteAddress)
+			}
+			if got := r.RemoteLinkAddress(); got != test.expectedRemoteLinkAddress {
+				t.Errorf("got r.RemoteLinkAddress() = %s, want = %s", got, test.expectedRemoteLinkAddress)
+			}
+			if r.NextHop != test.expectedNextHop {
+				t.Errorf("got r.NextHop = %s, want = %s", r.NextHop, test.expectedNextHop)
+			}
+			if r.NetProto != test.expectedNetProto {
+				t.Errorf("got r.NetProto = %d, want = %d", r.NetProto, test.expectedNetProto)
+			}
+			if r.Loop != test.expectedLoop {
+				t.Errorf("got r.Loop = %x, want = %x", r.Loop, test.expectedLoop)
 			}
 		})
 	}
@@ -3672,3 +3711,515 @@ func TestGetMainNICAddressWhenNICDisabled(t *testing.T) {
 		t.Fatalf("got GetMainNICAddress(%d, %d) = %s, want = %s", nicID, fakeNetNumber, gotAddr, protocolAddress.AddressWithPrefix)
 	}
 }
+
+// TestAddRoute tests Stack.AddRoute
+func TestAddRoute(t *testing.T) {
+	const nicID = 1
+
+	s := stack.New(stack.Options{})
+
+	subnet1, err := tcpip.NewSubnet("\x00", "\x00")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	subnet2, err := tcpip.NewSubnet("\x01", "\x01")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := []tcpip.Route{
+		{Destination: subnet1, Gateway: "\x00", NIC: 1},
+		{Destination: subnet2, Gateway: "\x00", NIC: 1},
+	}
+
+	// Initialize the route table with one route.
+	s.SetRouteTable([]tcpip.Route{expected[0]})
+
+	// Add another route.
+	s.AddRoute(expected[1])
+
+	rt := s.GetRouteTable()
+	if got, want := len(rt), len(expected); got != want {
+		t.Fatalf("Unexpected route table length got = %d, want = %d", got, want)
+	}
+	for i, route := range rt {
+		if got, want := route, expected[i]; got != want {
+			t.Fatalf("Unexpected route got = %#v, want = %#v", got, want)
+		}
+	}
+}
+
+// TestRemoveRoutes tests Stack.RemoveRoutes
+func TestRemoveRoutes(t *testing.T) {
+	const nicID = 1
+
+	s := stack.New(stack.Options{})
+
+	addressToRemove := tcpip.Address("\x01")
+	subnet1, err := tcpip.NewSubnet(addressToRemove, "\x01")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	subnet2, err := tcpip.NewSubnet(addressToRemove, "\x01")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	subnet3, err := tcpip.NewSubnet("\x02", "\x02")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Initialize the route table with three routes.
+	s.SetRouteTable([]tcpip.Route{
+		{Destination: subnet1, Gateway: "\x00", NIC: 1},
+		{Destination: subnet2, Gateway: "\x00", NIC: 1},
+		{Destination: subnet3, Gateway: "\x00", NIC: 1},
+	})
+
+	// Remove routes with the specific address.
+	s.RemoveRoutes(func(r tcpip.Route) bool {
+		return r.Destination.ID() == addressToRemove
+	})
+
+	expected := []tcpip.Route{{Destination: subnet3, Gateway: "\x00", NIC: 1}}
+	rt := s.GetRouteTable()
+	if got, want := len(rt), len(expected); got != want {
+		t.Fatalf("Unexpected route table length got = %d, want = %d", got, want)
+	}
+	for i, route := range rt {
+		if got, want := route, expected[i]; got != want {
+			t.Fatalf("Unexpected route got = %#v, want = %#v", got, want)
+		}
+	}
+}
+
+func TestFindRouteWithForwarding(t *testing.T) {
+	const (
+		nicID1 = 1
+		nicID2 = 2
+
+		nic1Addr   = tcpip.Address("\x01")
+		nic2Addr   = tcpip.Address("\x02")
+		remoteAddr = tcpip.Address("\x03")
+	)
+
+	type netCfg struct {
+		proto      tcpip.NetworkProtocolNumber
+		factory    stack.NetworkProtocolFactory
+		nic1Addr   tcpip.Address
+		nic2Addr   tcpip.Address
+		remoteAddr tcpip.Address
+	}
+
+	fakeNetCfg := netCfg{
+		proto:      fakeNetNumber,
+		factory:    fakeNetFactory,
+		nic1Addr:   nic1Addr,
+		nic2Addr:   nic2Addr,
+		remoteAddr: remoteAddr,
+	}
+
+	globalIPv6Addr1 := tcpip.Address(net.ParseIP("a::1").To16())
+	globalIPv6Addr2 := tcpip.Address(net.ParseIP("a::2").To16())
+
+	ipv6LinkLocalNIC1WithGlobalRemote := netCfg{
+		proto:      ipv6.ProtocolNumber,
+		factory:    ipv6.NewProtocol,
+		nic1Addr:   llAddr1,
+		nic2Addr:   globalIPv6Addr2,
+		remoteAddr: globalIPv6Addr1,
+	}
+	ipv6GlobalNIC1WithLinkLocalRemote := netCfg{
+		proto:      ipv6.ProtocolNumber,
+		factory:    ipv6.NewProtocol,
+		nic1Addr:   globalIPv6Addr1,
+		nic2Addr:   llAddr1,
+		remoteAddr: llAddr2,
+	}
+	ipv6GlobalNIC1WithLinkLocalMulticastRemote := netCfg{
+		proto:      ipv6.ProtocolNumber,
+		factory:    ipv6.NewProtocol,
+		nic1Addr:   globalIPv6Addr1,
+		nic2Addr:   globalIPv6Addr2,
+		remoteAddr: "\xff\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
+	}
+
+	tests := []struct {
+		name string
+
+		netCfg            netCfg
+		forwardingEnabled bool
+
+		addrNIC   tcpip.NICID
+		localAddr tcpip.Address
+
+		findRouteErr          *tcpip.Error
+		dependentOnForwarding bool
+	}{
+		{
+			name:                  "forwarding disabled and localAddr not on specified NIC but route from different NIC",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     false,
+			addrNIC:               nicID1,
+			localAddr:             fakeNetCfg.nic2Addr,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and localAddr not on specified NIC but route from different NIC",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     true,
+			addrNIC:               nicID1,
+			localAddr:             fakeNetCfg.nic2Addr,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and localAddr on specified NIC but route from different NIC",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     false,
+			addrNIC:               nicID1,
+			localAddr:             fakeNetCfg.nic1Addr,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and localAddr on specified NIC but route from different NIC",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     true,
+			addrNIC:               nicID1,
+			localAddr:             fakeNetCfg.nic1Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: true,
+		},
+		{
+			name:                  "forwarding disabled and localAddr on specified NIC and route from same NIC",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     false,
+			addrNIC:               nicID2,
+			localAddr:             fakeNetCfg.nic2Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and localAddr on specified NIC and route from same NIC",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     true,
+			addrNIC:               nicID2,
+			localAddr:             fakeNetCfg.nic2Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and localAddr not on specified NIC but route from same NIC",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     false,
+			addrNIC:               nicID2,
+			localAddr:             fakeNetCfg.nic1Addr,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and localAddr not on specified NIC but route from same NIC",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     true,
+			addrNIC:               nicID2,
+			localAddr:             fakeNetCfg.nic1Addr,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and localAddr on same NIC as route",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     false,
+			localAddr:             fakeNetCfg.nic2Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and localAddr on same NIC as route",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     false,
+			localAddr:             fakeNetCfg.nic2Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and localAddr on different NIC as route",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     false,
+			localAddr:             fakeNetCfg.nic1Addr,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and localAddr on different NIC as route",
+			netCfg:                fakeNetCfg,
+			forwardingEnabled:     true,
+			localAddr:             fakeNetCfg.nic1Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: true,
+		},
+		{
+			name:                  "forwarding disabled and specified NIC only has link-local addr with route on different NIC",
+			netCfg:                ipv6LinkLocalNIC1WithGlobalRemote,
+			forwardingEnabled:     false,
+			addrNIC:               nicID1,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and specified NIC only has link-local addr with route on different NIC",
+			netCfg:                ipv6LinkLocalNIC1WithGlobalRemote,
+			forwardingEnabled:     true,
+			addrNIC:               nicID1,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and link-local local addr with route on different NIC",
+			netCfg:                ipv6LinkLocalNIC1WithGlobalRemote,
+			forwardingEnabled:     false,
+			localAddr:             ipv6LinkLocalNIC1WithGlobalRemote.nic1Addr,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and link-local local addr with route on same NIC",
+			netCfg:                ipv6LinkLocalNIC1WithGlobalRemote,
+			forwardingEnabled:     true,
+			localAddr:             ipv6LinkLocalNIC1WithGlobalRemote.nic1Addr,
+			findRouteErr:          tcpip.ErrNoRoute,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and global local addr with route on same NIC",
+			netCfg:                ipv6LinkLocalNIC1WithGlobalRemote,
+			forwardingEnabled:     true,
+			localAddr:             ipv6LinkLocalNIC1WithGlobalRemote.nic2Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and link-local local addr with route on same NIC",
+			netCfg:                ipv6GlobalNIC1WithLinkLocalRemote,
+			forwardingEnabled:     false,
+			localAddr:             ipv6GlobalNIC1WithLinkLocalRemote.nic2Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and link-local local addr with route on same NIC",
+			netCfg:                ipv6GlobalNIC1WithLinkLocalRemote,
+			forwardingEnabled:     true,
+			localAddr:             ipv6GlobalNIC1WithLinkLocalRemote.nic2Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and global local addr with link-local remote on different NIC",
+			netCfg:                ipv6GlobalNIC1WithLinkLocalRemote,
+			forwardingEnabled:     false,
+			localAddr:             ipv6GlobalNIC1WithLinkLocalRemote.nic1Addr,
+			findRouteErr:          tcpip.ErrNetworkUnreachable,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and global local addr with link-local remote on different NIC",
+			netCfg:                ipv6GlobalNIC1WithLinkLocalRemote,
+			forwardingEnabled:     true,
+			localAddr:             ipv6GlobalNIC1WithLinkLocalRemote.nic1Addr,
+			findRouteErr:          tcpip.ErrNetworkUnreachable,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and global local addr with link-local multicast remote on different NIC",
+			netCfg:                ipv6GlobalNIC1WithLinkLocalMulticastRemote,
+			forwardingEnabled:     false,
+			localAddr:             ipv6GlobalNIC1WithLinkLocalMulticastRemote.nic1Addr,
+			findRouteErr:          tcpip.ErrNetworkUnreachable,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and global local addr with link-local multicast remote on different NIC",
+			netCfg:                ipv6GlobalNIC1WithLinkLocalMulticastRemote,
+			forwardingEnabled:     true,
+			localAddr:             ipv6GlobalNIC1WithLinkLocalMulticastRemote.nic1Addr,
+			findRouteErr:          tcpip.ErrNetworkUnreachable,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding disabled and global local addr with link-local multicast remote on same NIC",
+			netCfg:                ipv6GlobalNIC1WithLinkLocalMulticastRemote,
+			forwardingEnabled:     false,
+			localAddr:             ipv6GlobalNIC1WithLinkLocalMulticastRemote.nic2Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: false,
+		},
+		{
+			name:                  "forwarding enabled and global local addr with link-local multicast remote on same NIC",
+			netCfg:                ipv6GlobalNIC1WithLinkLocalMulticastRemote,
+			forwardingEnabled:     true,
+			localAddr:             ipv6GlobalNIC1WithLinkLocalMulticastRemote.nic2Addr,
+			findRouteErr:          nil,
+			dependentOnForwarding: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols: []stack.NetworkProtocolFactory{test.netCfg.factory},
+			})
+
+			ep1 := channel.New(1, defaultMTU, "")
+			if err := s.CreateNIC(nicID1, ep1); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s:", nicID1, err)
+			}
+
+			ep2 := channel.New(1, defaultMTU, "")
+			if err := s.CreateNIC(nicID2, ep2); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s:", nicID2, err)
+			}
+
+			if err := s.AddAddress(nicID1, test.netCfg.proto, test.netCfg.nic1Addr); err != nil {
+				t.Fatalf("AddAddress(%d, %d, %s): %s", nicID1, test.netCfg.proto, test.netCfg.nic1Addr, err)
+			}
+
+			if err := s.AddAddress(nicID2, test.netCfg.proto, test.netCfg.nic2Addr); err != nil {
+				t.Fatalf("AddAddress(%d, %d, %s): %s", nicID2, test.netCfg.proto, test.netCfg.nic2Addr, err)
+			}
+
+			if err := s.SetForwarding(test.netCfg.proto, test.forwardingEnabled); err != nil {
+				t.Fatalf("SetForwarding(%d, %t): %s", test.netCfg.proto, test.forwardingEnabled, err)
+			}
+
+			s.SetRouteTable([]tcpip.Route{{Destination: test.netCfg.remoteAddr.WithPrefix().Subnet(), NIC: nicID2}})
+
+			r, err := s.FindRoute(test.addrNIC, test.localAddr, test.netCfg.remoteAddr, test.netCfg.proto, false /* multicastLoop */)
+			if r != nil {
+				defer r.Release()
+			}
+			if err != test.findRouteErr {
+				t.Fatalf("FindRoute(%d, %s, %s, %d, false) = %s, want = %s", test.addrNIC, test.localAddr, test.netCfg.remoteAddr, test.netCfg.proto, err, test.findRouteErr)
+			}
+
+			if test.findRouteErr != nil {
+				return
+			}
+
+			if r.LocalAddress != test.localAddr {
+				t.Errorf("got r.LocalAddress = %s, want = %s", r.LocalAddress, test.localAddr)
+			}
+			if r.RemoteAddress != test.netCfg.remoteAddr {
+				t.Errorf("got r.RemoteAddress = %s, want = %s", r.RemoteAddress, test.netCfg.remoteAddr)
+			}
+
+			if t.Failed() {
+				t.FailNow()
+			}
+
+			// Sending a packet should always go through NIC2 since we only install a
+			// route to test.netCfg.remoteAddr through NIC2.
+			data := buffer.View([]byte{1, 2, 3, 4})
+			if err := send(r, data); err != nil {
+				t.Fatalf("send(_, _): %s", err)
+			}
+			if n := ep1.Drain(); n != 0 {
+				t.Errorf("got %d unexpected packets from ep1", n)
+			}
+			pkt, ok := ep2.Read()
+			if !ok {
+				t.Fatal("packet not sent through ep2")
+			}
+			if pkt.Route.LocalAddress != test.localAddr {
+				t.Errorf("got pkt.Route.LocalAddress = %s, want = %s", pkt.Route.LocalAddress, test.localAddr)
+			}
+			if pkt.Route.RemoteAddress != test.netCfg.remoteAddr {
+				t.Errorf("got pkt.Route.RemoteAddress = %s, want = %s", pkt.Route.RemoteAddress, test.netCfg.remoteAddr)
+			}
+
+			if !test.forwardingEnabled || !test.dependentOnForwarding {
+				return
+			}
+
+			// Disabling forwarding when the route is dependent on forwarding being
+			// enabled should make the route invalid.
+			if err := s.SetForwarding(test.netCfg.proto, false); err != nil {
+				t.Fatalf("SetForwarding(%d, false): %s", test.netCfg.proto, err)
+			}
+			if err := send(r, data); err != tcpip.ErrInvalidEndpointState {
+				t.Fatalf("got send(_, _) = %s, want = %s", err, tcpip.ErrInvalidEndpointState)
+			}
+			if n := ep1.Drain(); n != 0 {
+				t.Errorf("got %d unexpected packets from ep1", n)
+			}
+			if n := ep2.Drain(); n != 0 {
+				t.Errorf("got %d unexpected packets from ep2", n)
+			}
+		})
+	}
+}
+
+func TestWritePacketToRemote(t *testing.T) {
+	const nicID = 1
+	const MTU = 1280
+	e := channel.New(1, MTU, linkAddr1)
+	s := stack.New(stack.Options{})
+	if err := s.CreateNIC(nicID, e); err != nil {
+		t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+	}
+	if err := s.EnableNIC(nicID); err != nil {
+		t.Fatalf("CreateNIC(%d) = %s", nicID, err)
+	}
+	tests := []struct {
+		name     string
+		protocol tcpip.NetworkProtocolNumber
+		payload  []byte
+	}{
+		{
+			name:     "SuccessIPv4",
+			protocol: header.IPv4ProtocolNumber,
+			payload:  []byte{1, 2, 3, 4},
+		},
+		{
+			name:     "SuccessIPv6",
+			protocol: header.IPv6ProtocolNumber,
+			payload:  []byte{5, 6, 7, 8},
+		},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			if err := s.WritePacketToRemote(nicID, linkAddr2, test.protocol, buffer.View(test.payload).ToVectorisedView()); err != nil {
+				t.Fatalf("s.WritePacketToRemote(_, _, _, _) = %s", err)
+			}
+
+			pkt, ok := e.Read()
+			if got, want := ok, true; got != want {
+				t.Fatalf("e.Read() = %t, want %t", got, want)
+			}
+			if got, want := pkt.Proto, test.protocol; got != want {
+				t.Fatalf("pkt.Proto = %d, want %d", got, want)
+			}
+			if got, want := pkt.Route.RemoteLinkAddress(), linkAddr2; got != want {
+				t.Fatalf("pkt.Route.RemoteAddress = %s, want %s", got, want)
+			}
+			if diff := cmp.Diff(pkt.Pkt.Data.ToView(), buffer.View(test.payload)); diff != "" {
+				t.Errorf("pkt.Pkt.Data mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+
+	t.Run("InvalidNICID", func(t *testing.T) {
+		if got, want := s.WritePacketToRemote(234, linkAddr2, header.IPv4ProtocolNumber, buffer.View([]byte{1}).ToVectorisedView()), tcpip.ErrUnknownDevice; got != want {
+			t.Fatalf("s.WritePacketToRemote(_, _, _, _) = %s, want = %s", got, want)
+		}
+		pkt, ok := e.Read()
+		if got, want := ok, false; got != want {
+			t.Fatalf("e.Read() = %t, %v; want %t", got, pkt, want)
+		}
+	})
+}
diff --git a/pkg/tcpip/stack/transport_demuxer.go b/pkg/tcpip/stack/transport_demuxer.go
index 35e5b1a2e..f183ec6e4 100644
--- a/pkg/tcpip/stack/transport_demuxer.go
+++ b/pkg/tcpip/stack/transport_demuxer.go
@@ -152,10 +152,10 @@ func (epsByNIC *endpointsByNIC) transportEndpoints() []TransportEndpoint {
 
 // HandlePacket is called by the stack when new packets arrive to this transport
 // endpoint.
-func (epsByNIC *endpointsByNIC) handlePacket(r *Route, id TransportEndpointID, pkt *PacketBuffer) {
+func (epsByNIC *endpointsByNIC) handlePacket(id TransportEndpointID, pkt *PacketBuffer) {
 	epsByNIC.mu.RLock()
 
-	mpep, ok := epsByNIC.endpoints[r.nic.ID()]
+	mpep, ok := epsByNIC.endpoints[pkt.NICID]
 	if !ok {
 		if mpep, ok = epsByNIC.endpoints[0]; !ok {
 			epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
@@ -165,20 +165,20 @@ func (epsByNIC *endpointsByNIC) handlePacket(r *Route, id TransportEndpointID, p
 
 	// If this is a broadcast or multicast datagram, deliver the datagram to all
 	// endpoints bound to the right device.
-	if isInboundMulticastOrBroadcast(r) {
-		mpep.handlePacketAll(r, id, pkt)
+	if isInboundMulticastOrBroadcast(pkt, id.LocalAddress) {
+		mpep.handlePacketAll(id, pkt)
 		epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
 		return
 	}
 	// multiPortEndpoints are guaranteed to have at least one element.
 	transEP := selectEndpoint(id, mpep, epsByNIC.seed)
 	if queuedProtocol, mustQueue := mpep.demux.queuedProtocols[protocolIDs{mpep.netProto, mpep.transProto}]; mustQueue {
-		queuedProtocol.QueuePacket(r, transEP, id, pkt)
+		queuedProtocol.QueuePacket(transEP, id, pkt)
 		epsByNIC.mu.RUnlock()
 		return
 	}
 
-	transEP.HandlePacket(r, id, pkt)
+	transEP.HandlePacket(id, pkt)
 	epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
 }
 
@@ -253,6 +253,8 @@ func (epsByNIC *endpointsByNIC) unregisterEndpoint(bindToDevice tcpip.NICID, t T
 // based on endpoints IDs. It should only be instantiated via
 // newTransportDemuxer.
 type transportDemuxer struct {
+	stack *Stack
+
 	// protocol is immutable.
 	protocol        map[protocolIDs]*transportEndpoints
 	queuedProtocols map[protocolIDs]queuedTransportProtocol
@@ -262,11 +264,12 @@ type transportDemuxer struct {
 // the dispatcher to delivery packets to the QueuePacket method instead of
 // calling HandlePacket directly on the endpoint.
 type queuedTransportProtocol interface {
-	QueuePacket(r *Route, ep TransportEndpoint, id TransportEndpointID, pkt *PacketBuffer)
+	QueuePacket(ep TransportEndpoint, id TransportEndpointID, pkt *PacketBuffer)
 }
 
 func newTransportDemuxer(stack *Stack) *transportDemuxer {
 	d := &transportDemuxer{
+		stack:           stack,
 		protocol:        make(map[protocolIDs]*transportEndpoints),
 		queuedProtocols: make(map[protocolIDs]queuedTransportProtocol),
 	}
@@ -377,22 +380,22 @@ func selectEndpoint(id TransportEndpointID, mpep *multiPortEndpoint, seed uint32
 	return mpep.endpoints[idx]
 }
 
-func (ep *multiPortEndpoint) handlePacketAll(r *Route, id TransportEndpointID, pkt *PacketBuffer) {
+func (ep *multiPortEndpoint) handlePacketAll(id TransportEndpointID, pkt *PacketBuffer) {
 	ep.mu.RLock()
 	queuedProtocol, mustQueue := ep.demux.queuedProtocols[protocolIDs{ep.netProto, ep.transProto}]
 	// HandlePacket takes ownership of pkt, so each endpoint needs
 	// its own copy except for the final one.
 	for _, endpoint := range ep.endpoints[:len(ep.endpoints)-1] {
 		if mustQueue {
-			queuedProtocol.QueuePacket(r, endpoint, id, pkt.Clone())
+			queuedProtocol.QueuePacket(endpoint, id, pkt.Clone())
 		} else {
-			endpoint.HandlePacket(r, id, pkt.Clone())
+			endpoint.HandlePacket(id, pkt.Clone())
 		}
 	}
 	if endpoint := ep.endpoints[len(ep.endpoints)-1]; mustQueue {
-		queuedProtocol.QueuePacket(r, endpoint, id, pkt)
+		queuedProtocol.QueuePacket(endpoint, id, pkt)
 	} else {
-		endpoint.HandlePacket(r, id, pkt)
+		endpoint.HandlePacket(id, pkt)
 	}
 	ep.mu.RUnlock() // Don't use defer for performance reasons.
 }
@@ -518,29 +521,29 @@ func (d *transportDemuxer) unregisterEndpoint(netProtos []tcpip.NetworkProtocolN
 // deliverPacket attempts to find one or more matching transport endpoints, and
 // then, if matches are found, delivers the packet to them. Returns true if
 // the packet no longer needs to be handled.
-func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer, id TransportEndpointID) bool {
-	eps, ok := d.protocol[protocolIDs{r.NetProto, protocol}]
+func (d *transportDemuxer) deliverPacket(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer, id TransportEndpointID) bool {
+	eps, ok := d.protocol[protocolIDs{pkt.NetworkProtocolNumber, protocol}]
 	if !ok {
 		return false
 	}
 
 	// If the packet is a UDP broadcast or multicast, then find all matching
 	// transport endpoints.
-	if protocol == header.UDPProtocolNumber && isInboundMulticastOrBroadcast(r) {
+	if protocol == header.UDPProtocolNumber && isInboundMulticastOrBroadcast(pkt, id.LocalAddress) {
 		eps.mu.RLock()
 		destEPs := eps.findAllEndpointsLocked(id)
 		eps.mu.RUnlock()
 		// Fail if we didn't find at least one matching transport endpoint.
 		if len(destEPs) == 0 {
-			r.Stats().UDP.UnknownPortErrors.Increment()
+			d.stack.stats.UDP.UnknownPortErrors.Increment()
 			return false
 		}
 		// handlePacket takes ownership of pkt, so each endpoint needs its own
 		// copy except for the final one.
 		for _, ep := range destEPs[:len(destEPs)-1] {
-			ep.handlePacket(r, id, pkt.Clone())
+			ep.handlePacket(id, pkt.Clone())
 		}
-		destEPs[len(destEPs)-1].handlePacket(r, id, pkt)
+		destEPs[len(destEPs)-1].handlePacket(id, pkt)
 		return true
 	}
 
@@ -548,10 +551,10 @@ func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProto
 	// destination address, then do nothing further and instruct the caller to do
 	// the same. The network layer handles address validation for specified source
 	// addresses.
-	if protocol == header.TCPProtocolNumber && (!isSpecified(r.LocalAddress) || !isSpecified(r.RemoteAddress) || isInboundMulticastOrBroadcast(r)) {
+	if protocol == header.TCPProtocolNumber && (!isSpecified(id.LocalAddress) || !isSpecified(id.RemoteAddress) || isInboundMulticastOrBroadcast(pkt, id.LocalAddress)) {
 		// TCP can only be used to communicate between a single source and a
-		// single destination; the addresses must be unicast.
-		r.Stats().TCP.InvalidSegmentsReceived.Increment()
+		// single destination; the addresses must be unicast.e
+		d.stack.stats.TCP.InvalidSegmentsReceived.Increment()
 		return true
 	}
 
@@ -560,18 +563,18 @@ func (d *transportDemuxer) deliverPacket(r *Route, protocol tcpip.TransportProto
 	eps.mu.RUnlock()
 	if ep == nil {
 		if protocol == header.UDPProtocolNumber {
-			r.Stats().UDP.UnknownPortErrors.Increment()
+			d.stack.stats.UDP.UnknownPortErrors.Increment()
 		}
 		return false
 	}
-	ep.handlePacket(r, id, pkt)
+	ep.handlePacket(id, pkt)
 	return true
 }
 
 // deliverRawPacket attempts to deliver the given packet and returns whether it
 // was delivered successfully.
-func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) bool {
-	eps, ok := d.protocol[protocolIDs{r.NetProto, protocol}]
+func (d *transportDemuxer) deliverRawPacket(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) bool {
+	eps, ok := d.protocol[protocolIDs{pkt.NetworkProtocolNumber, protocol}]
 	if !ok {
 		return false
 	}
@@ -584,7 +587,7 @@ func (d *transportDemuxer) deliverRawPacket(r *Route, protocol tcpip.TransportPr
 	for _, rawEP := range eps.rawEndpoints {
 		// Each endpoint gets its own copy of the packet for the sake
 		// of save/restore.
-		rawEP.HandlePacket(r, pkt)
+		rawEP.HandlePacket(pkt.Clone())
 		foundRaw = true
 	}
 	eps.mu.RUnlock()
@@ -612,7 +615,7 @@ func (d *transportDemuxer) deliverControlPacket(n *NIC, net tcpip.NetworkProtoco
 }
 
 // findTransportEndpoint find a single endpoint that most closely matches the provided id.
-func (d *transportDemuxer) findTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, r *Route) TransportEndpoint {
+func (d *transportDemuxer) findTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint {
 	eps, ok := d.protocol[protocolIDs{netProto, transProto}]
 	if !ok {
 		return nil
@@ -628,7 +631,7 @@ func (d *transportDemuxer) findTransportEndpoint(netProto tcpip.NetworkProtocolN
 	epsByNIC.mu.RLock()
 	eps.mu.RUnlock()
 
-	mpep, ok := epsByNIC.endpoints[r.nic.ID()]
+	mpep, ok := epsByNIC.endpoints[nicID]
 	if !ok {
 		if mpep, ok = epsByNIC.endpoints[0]; !ok {
 			epsByNIC.mu.RUnlock() // Don't use defer for performance reasons.
@@ -679,8 +682,8 @@ func (d *transportDemuxer) unregisterRawEndpoint(netProto tcpip.NetworkProtocolN
 	eps.mu.Unlock()
 }
 
-func isInboundMulticastOrBroadcast(r *Route) bool {
-	return r.IsInboundBroadcast() || header.IsV4MulticastAddress(r.LocalAddress) || header.IsV6MulticastAddress(r.LocalAddress)
+func isInboundMulticastOrBroadcast(pkt *PacketBuffer, localAddr tcpip.Address) bool {
+	return pkt.NetworkPacketInfo.LocalAddressBroadcast || header.IsV4MulticastAddress(localAddr) || header.IsV6MulticastAddress(localAddr)
 }
 
 func isSpecified(addr tcpip.Address) bool {
diff --git a/pkg/tcpip/stack/transport_demuxer_test.go b/pkg/tcpip/stack/transport_demuxer_test.go
index 698c8609e..a692af20b 100644
--- a/pkg/tcpip/stack/transport_demuxer_test.go
+++ b/pkg/tcpip/stack/transport_demuxer_test.go
@@ -102,7 +102,6 @@ func (c *testContext) sendV4Packet(payload []byte, h *headers, linkEpID tcpip.NI
 	// Initialize the IP header.
 	ip := header.IPv4(buf)
 	ip.Encode(&header.IPv4Fields{
-		IHL:         header.IPv4MinimumSize,
 		TOS:         0x80,
 		TotalLength: uint16(len(buf)),
 		TTL:         65,
@@ -142,11 +141,11 @@ func (c *testContext) sendV6Packet(payload []byte, h *headers, linkEpID tcpip.NI
 	// Initialize the IP header.
 	ip := header.IPv6(buf)
 	ip.Encode(&header.IPv6Fields{
-		PayloadLength: uint16(header.UDPMinimumSize + len(payload)),
-		NextHeader:    uint8(udp.ProtocolNumber),
-		HopLimit:      65,
-		SrcAddr:       testSrcAddrV6,
-		DstAddr:       testDstAddrV6,
+		PayloadLength:     uint16(header.UDPMinimumSize + len(payload)),
+		TransportProtocol: udp.ProtocolNumber,
+		HopLimit:          65,
+		SrcAddr:           testSrcAddrV6,
+		DstAddr:           testDstAddrV6,
 	})
 
 	// Initialize the UDP header.
@@ -308,9 +307,7 @@ func TestBindToDeviceDistribution(t *testing.T) {
 						}(ep)
 
 						defer ep.Close()
-						if err := ep.SetSockOptBool(tcpip.ReusePortOption, endpoint.reuse); err != nil {
-							t.Fatalf("SetSockOptBool(ReusePortOption, %t) on endpoint %d failed: %s", endpoint.reuse, i, err)
-						}
+						ep.SocketOptions().SetReusePort(endpoint.reuse)
 						bindToDeviceOption := tcpip.BindToDeviceOption(endpoint.bindToDevice)
 						if err := ep.SetSockOpt(&bindToDeviceOption); err != nil {
 							t.Fatalf("SetSockOpt(&%T(%d)) on endpoint %d failed: %s", bindToDeviceOption, bindToDeviceOption, i, err)
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index 62ab6d92f..66eb562ba 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -20,7 +20,6 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
-	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
 	"gvisor.dev/gvisor/pkg/tcpip/ports"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
 	"gvisor.dev/gvisor/pkg/waiter"
@@ -28,7 +27,7 @@ import (
 
 const (
 	fakeTransNumber    tcpip.TransportProtocolNumber = 1
-	fakeTransHeaderLen                               = 3
+	fakeTransHeaderLen int                           = 3
 )
 
 // fakeTransportEndpoint is a transport-layer protocol endpoint. It counts
@@ -39,14 +38,18 @@ const (
 // use it.
 type fakeTransportEndpoint struct {
 	stack.TransportEndpointInfo
+	tcpip.DefaultSocketOptionsHandler
 
 	proto    *fakeTransportProtocol
 	peerAddr tcpip.Address
-	route    stack.Route
+	route    *stack.Route
 	uniqueID uint64
 
 	// acceptQueue is non-nil iff bound.
-	acceptQueue []fakeTransportEndpoint
+	acceptQueue []*fakeTransportEndpoint
+
+	// ops is used to set and get socket options.
+	ops tcpip.SocketOptions
 }
 
 func (f *fakeTransportEndpoint) Info() tcpip.EndpointInfo {
@@ -59,8 +62,14 @@ func (*fakeTransportEndpoint) Stats() tcpip.EndpointStats {
 
 func (*fakeTransportEndpoint) SetOwner(owner tcpip.PacketOwner) {}
 
+func (f *fakeTransportEndpoint) SocketOptions() *tcpip.SocketOptions {
+	return &f.ops
+}
+
 func newFakeTransportEndpoint(proto *fakeTransportProtocol, netProto tcpip.NetworkProtocolNumber, uniqueID uint64) tcpip.Endpoint {
-	return &fakeTransportEndpoint{TransportEndpointInfo: stack.TransportEndpointInfo{NetProto: netProto}, proto: proto, uniqueID: uniqueID}
+	ep := &fakeTransportEndpoint{TransportEndpointInfo: stack.TransportEndpointInfo{NetProto: netProto}, proto: proto, uniqueID: uniqueID}
+	ep.ops.InitHandler(ep)
+	return ep
 }
 
 func (f *fakeTransportEndpoint) Abort() {
@@ -100,8 +109,8 @@ func (f *fakeTransportEndpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions
 	return int64(len(v)), nil, nil
 }
 
-func (*fakeTransportEndpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
-	return 0, tcpip.ControlMessages{}, nil
+func (*fakeTransportEndpoint) Peek([][]byte) (int64, *tcpip.Error) {
+	return 0, nil
 }
 
 // SetSockOpt sets a socket option. Currently not supported.
@@ -109,21 +118,11 @@ func (*fakeTransportEndpoint) SetSockOpt(tcpip.SettableSocketOption) *tcpip.Erro
 	return tcpip.ErrInvalidEndpointState
 }
 
-// SetSockOptBool sets a socket option. Currently not supported.
-func (*fakeTransportEndpoint) SetSockOptBool(tcpip.SockOptBool, bool) *tcpip.Error {
-	return tcpip.ErrInvalidEndpointState
-}
-
 // SetSockOptInt sets a socket option. Currently not supported.
 func (*fakeTransportEndpoint) SetSockOptInt(tcpip.SockOptInt, int) *tcpip.Error {
 	return tcpip.ErrInvalidEndpointState
 }
 
-// GetSockOptBool implements tcpip.Endpoint.GetSockOptBool.
-func (*fakeTransportEndpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
-	return false, tcpip.ErrUnknownProtocolOption
-}
-
 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
 func (*fakeTransportEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 	return -1, tcpip.ErrUnknownProtocolOption
@@ -186,7 +185,7 @@ func (f *fakeTransportEndpoint) Accept(*tcpip.FullAddress) (tcpip.Endpoint, *wai
 	}
 	a := f.acceptQueue[0]
 	f.acceptQueue = f.acceptQueue[1:]
-	return &a, nil, nil
+	return a, nil, nil
 }
 
 func (f *fakeTransportEndpoint) Bind(a tcpip.FullAddress) *tcpip.Error {
@@ -201,7 +200,7 @@ func (f *fakeTransportEndpoint) Bind(a tcpip.FullAddress) *tcpip.Error {
 	); err != nil {
 		return err
 	}
-	f.acceptQueue = []fakeTransportEndpoint{}
+	f.acceptQueue = []*fakeTransportEndpoint{}
 	return nil
 }
 
@@ -213,20 +212,31 @@ func (*fakeTransportEndpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Erro
 	return tcpip.FullAddress{}, nil
 }
 
-func (f *fakeTransportEndpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, _ *stack.PacketBuffer) {
+func (f *fakeTransportEndpoint) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
 	// Increment the number of received packets.
 	f.proto.packetCount++
-	if f.acceptQueue != nil {
-		f.acceptQueue = append(f.acceptQueue, fakeTransportEndpoint{
-			TransportEndpointInfo: stack.TransportEndpointInfo{
-				ID:       f.ID,
-				NetProto: f.NetProto,
-			},
-			proto:    f.proto,
-			peerAddr: r.RemoteAddress,
-			route:    r.Clone(),
-		})
+	if f.acceptQueue == nil {
+		return
+	}
+
+	netHdr := pkt.NetworkHeader().View()
+	route, err := f.proto.stack.FindRoute(pkt.NICID, tcpip.Address(netHdr[dstAddrOffset]), tcpip.Address(netHdr[srcAddrOffset]), pkt.NetworkProtocolNumber, false /* multicastLoop */)
+	if err != nil {
+		return
 	}
+	route.ResolveWith(pkt.SourceLinkAddress())
+
+	ep := &fakeTransportEndpoint{
+		TransportEndpointInfo: stack.TransportEndpointInfo{
+			ID:       f.ID,
+			NetProto: f.NetProto,
+		},
+		proto:    f.proto,
+		peerAddr: route.RemoteAddress,
+		route:    route,
+	}
+	ep.ops.InitHandler(ep)
+	f.acceptQueue = append(f.acceptQueue, ep)
 }
 
 func (f *fakeTransportEndpoint) HandleControlPacket(stack.TransportEndpointID, stack.ControlType, uint32, *stack.PacketBuffer) {
@@ -288,7 +298,7 @@ func (*fakeTransportProtocol) ParsePorts(buffer.View) (src, dst uint16, err *tcp
 	return 0, 0, nil
 }
 
-func (*fakeTransportProtocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, *stack.PacketBuffer) stack.UnknownDestinationPacketDisposition {
+func (*fakeTransportProtocol) HandleUnknownDestinationPacket(stack.TransportEndpointID, *stack.PacketBuffer) stack.UnknownDestinationPacketDisposition {
 	return stack.UnknownDestinationPacketHandled
 }
 
@@ -544,87 +554,3 @@ func TestTransportOptions(t *testing.T) {
 		t.Fatalf("got tcpip.TCPModerateReceiveBufferOption = false, want = true")
 	}
 }
-
-func TestTransportForwarding(t *testing.T) {
-	s := stack.New(stack.Options{
-		NetworkProtocols:   []stack.NetworkProtocolFactory{fakeNetFactory},
-		TransportProtocols: []stack.TransportProtocolFactory{fakeTransFactory},
-	})
-	s.SetForwarding(fakeNetNumber, true)
-
-	// TODO(b/123449044): Change this to a channel NIC.
-	ep1 := loopback.New()
-	if err := s.CreateNIC(1, ep1); err != nil {
-		t.Fatalf("CreateNIC #1 failed: %v", err)
-	}
-	if err := s.AddAddress(1, fakeNetNumber, "\x01"); err != nil {
-		t.Fatalf("AddAddress #1 failed: %v", err)
-	}
-
-	ep2 := channel.New(10, defaultMTU, "")
-	if err := s.CreateNIC(2, ep2); err != nil {
-		t.Fatalf("CreateNIC #2 failed: %v", err)
-	}
-	if err := s.AddAddress(2, fakeNetNumber, "\x02"); err != nil {
-		t.Fatalf("AddAddress #2 failed: %v", err)
-	}
-
-	// Route all packets to address 3 to NIC 2 and all packets to address
-	// 1 to NIC 1.
-	{
-		subnet0, err := tcpip.NewSubnet("\x03", "\xff")
-		if err != nil {
-			t.Fatal(err)
-		}
-		subnet1, err := tcpip.NewSubnet("\x01", "\xff")
-		if err != nil {
-			t.Fatal(err)
-		}
-		s.SetRouteTable([]tcpip.Route{
-			{Destination: subnet0, Gateway: "\x00", NIC: 2},
-			{Destination: subnet1, Gateway: "\x00", NIC: 1},
-		})
-	}
-
-	wq := waiter.Queue{}
-	ep, err := s.NewEndpoint(fakeTransNumber, fakeNetNumber, &wq)
-	if err != nil {
-		t.Fatalf("NewEndpoint failed: %v", err)
-	}
-
-	if err := ep.Bind(tcpip.FullAddress{Addr: "\x01", NIC: 1}); err != nil {
-		t.Fatalf("Bind failed: %v", err)
-	}
-
-	// Send a packet to address 1 from address 3.
-	req := buffer.NewView(30)
-	req[0] = 1
-	req[1] = 3
-	req[2] = byte(fakeTransNumber)
-	ep2.InjectInbound(fakeNetNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
-		Data: req.ToVectorisedView(),
-	}))
-
-	aep, _, err := ep.Accept(nil)
-	if err != nil || aep == nil {
-		t.Fatalf("Accept failed: %v, %v", aep, err)
-	}
-
-	resp := buffer.NewView(30)
-	if _, _, err := aep.Write(tcpip.SlicePayload(resp), tcpip.WriteOptions{}); err != nil {
-		t.Fatalf("Write failed: %v", err)
-	}
-
-	p, ok := ep2.Read()
-	if !ok {
-		t.Fatal("Response packet not forwarded")
-	}
-
-	nh := stack.PayloadSince(p.Pkt.NetworkHeader())
-	if dst := nh[0]; dst != 3 {
-		t.Errorf("Response packet has incorrect destination addresss: got = %d, want = 3", dst)
-	}
-	if src := nh[1]; src != 1 {
-		t.Errorf("Response packet has incorrect source addresss: got = %d, want = 3", src)
-	}
-}
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index d77848d61..45fa62720 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -49,8 +49,9 @@ const ipv4AddressSize = 4
 // Error represents an error in the netstack error space. Using a special type
 // ensures that errors outside of this space are not accidentally introduced.
 //
-// Note: to support save / restore, it is important that all tcpip errors have
-// distinct error messages.
+// All errors must have unique msg strings.
+//
+// +stateify savable
 type Error struct {
 	msg string
 
@@ -247,6 +248,16 @@ func (a Address) WithPrefix() AddressWithPrefix {
 	}
 }
 
+// Unspecified returns true if the address is unspecified.
+func (a Address) Unspecified() bool {
+	for _, b := range a {
+		if b != 0 {
+			return false
+		}
+	}
+	return true
+}
+
 // AddressMask is a bitmask for an address.
 type AddressMask string
 
@@ -356,10 +367,9 @@ func (s *Subnet) IsBroadcast(address Address) bool {
 	return s.Prefix() <= 30 && s.Broadcast() == address
 }
 
-// Equal returns true if s equals o.
-//
-// Needed to use cmp.Equal on Subnet as its fields are unexported.
+// Equal returns true if this Subnet is equal to the given Subnet.
 func (s Subnet) Equal(o Subnet) bool {
+	// If this changes, update Route.Equal accordingly.
 	return s == o
 }
 
@@ -482,6 +492,14 @@ type ControlMessages struct {
 
 	// PacketInfo holds interface and address data on an incoming packet.
 	PacketInfo IPPacketInfo
+
+	// HasOriginalDestinationAddress indicates whether OriginalDstAddress is
+	// set.
+	HasOriginalDstAddress bool
+
+	// OriginalDestinationAddress holds the original destination address
+	// and port of the incoming packet.
+	OriginalDstAddress FullAddress
 }
 
 // PacketOwner is used to get UID and GID of the packet.
@@ -536,7 +554,7 @@ type Endpoint interface {
 	// Peek reads data without consuming it from the endpoint.
 	//
 	// This method does not block if there is no data pending.
-	Peek([][]byte) (int64, ControlMessages, *Error)
+	Peek([][]byte) (int64, *Error)
 
 	// Connect connects the endpoint to its peer. Specifying a NIC is
 	// optional.
@@ -594,10 +612,6 @@ type Endpoint interface {
 	// SetSockOpt sets a socket option.
 	SetSockOpt(opt SettableSocketOption) *Error
 
-	// SetSockOptBool sets a socket option, for simple cases where a value
-	// has the bool type.
-	SetSockOptBool(opt SockOptBool, v bool) *Error
-
 	// SetSockOptInt sets a socket option, for simple cases where a value
 	// has the int type.
 	SetSockOptInt(opt SockOptInt, v int) *Error
@@ -605,10 +619,6 @@ type Endpoint interface {
 	// GetSockOpt gets a socket option.
 	GetSockOpt(opt GettableSocketOption) *Error
 
-	// GetSockOptBool gets a socket option for simple cases where a return
-	// value has the bool type.
-	GetSockOptBool(SockOptBool) (bool, *Error)
-
 	// GetSockOptInt gets a socket option for simple cases where a return
 	// value has the int type.
 	GetSockOptInt(SockOptInt) (int, *Error)
@@ -635,6 +645,10 @@ type Endpoint interface {
 
 	// LastError clears and returns the last error reported by the endpoint.
 	LastError() *Error
+
+	// SocketOptions returns the structure which contains all the socket
+	// level options.
+	SocketOptions() *SocketOptions
 }
 
 // LinkPacketInfo holds Link layer information for a received packet.
@@ -691,80 +705,6 @@ type WriteOptions struct {
 	Atomic bool
 }
 
-// SockOptBool represents socket options which values have the bool type.
-type SockOptBool int
-
-const (
-	// BroadcastOption is used by SetSockOptBool/GetSockOptBool to specify
-	// whether datagram sockets are allowed to send packets to a broadcast
-	// address.
-	BroadcastOption SockOptBool = iota
-
-	// CorkOption is used by SetSockOptBool/GetSockOptBool to specify if
-	// data should be held until segments are full by the TCP transport
-	// protocol.
-	CorkOption
-
-	// DelayOption is used by SetSockOptBool/GetSockOptBool to specify if
-	// data should be sent out immediately by the transport protocol. For
-	// TCP, it determines if the Nagle algorithm is on or off.
-	DelayOption
-
-	// KeepaliveEnabledOption is used by SetSockOptBool/GetSockOptBool to
-	// specify whether TCP keepalive is enabled for this socket.
-	KeepaliveEnabledOption
-
-	// MulticastLoopOption is used by SetSockOptBool/GetSockOptBool to
-	// specify whether multicast packets sent over a non-loopback interface
-	// will be looped back.
-	MulticastLoopOption
-
-	// NoChecksumOption is used by SetSockOptBool/GetSockOptBool to specify
-	// whether UDP checksum is disabled for this socket.
-	NoChecksumOption
-
-	// PasscredOption is used by SetSockOptBool/GetSockOptBool to specify
-	// whether SCM_CREDENTIALS socket control messages are enabled.
-	//
-	// Only supported on Unix sockets.
-	PasscredOption
-
-	// QuickAckOption is stubbed out in SetSockOptBool/GetSockOptBool.
-	QuickAckOption
-
-	// ReceiveTClassOption is used by SetSockOptBool/GetSockOptBool to
-	// specify if the IPV6_TCLASS ancillary message is passed with incoming
-	// packets.
-	ReceiveTClassOption
-
-	// ReceiveTOSOption is used by SetSockOptBool/GetSockOptBool to specify
-	// if the TOS ancillary message is passed with incoming packets.
-	ReceiveTOSOption
-
-	// ReceiveIPPacketInfoOption is used by SetSockOptBool/GetSockOptBool to
-	// specify if more inforamtion is provided with incoming packets such as
-	// interface index and address.
-	ReceiveIPPacketInfoOption
-
-	// ReuseAddressOption is used by SetSockOptBool/GetSockOptBool to
-	// specify whether Bind() should allow reuse of local address.
-	ReuseAddressOption
-
-	// ReusePortOption is used by SetSockOptBool/GetSockOptBool to permit
-	// multiple sockets to be bound to an identical socket address.
-	ReusePortOption
-
-	// V6OnlyOption is used by SetSockOptBool/GetSockOptBool to specify
-	// whether an IPv6 socket is to be restricted to sending and receiving
-	// IPv6 packets only.
-	V6OnlyOption
-
-	// IPHdrIncludedOption is used by SetSockOpt to indicate for a raw
-	// endpoint that all packets being written have an IP header and the
-	// endpoint should not attach an IP header.
-	IPHdrIncludedOption
-)
-
 // SockOptInt represents socket options which values have the int type.
 type SockOptInt int
 
@@ -1156,14 +1096,6 @@ type RemoveMembershipOption MembershipOption
 
 func (*RemoveMembershipOption) isSettableSocketOption() {}
 
-// OutOfBandInlineOption is used by SetSockOpt/GetSockOpt to specify whether
-// TCP out-of-band data is delivered along with the normal in-band data.
-type OutOfBandInlineOption int
-
-func (*OutOfBandInlineOption) isGettableSocketOption() {}
-
-func (*OutOfBandInlineOption) isSettableSocketOption() {}
-
 // SocketDetachFilterOption is used by SetSockOpt to detach a previously attached
 // classic BPF filter on a given endpoint.
 type SocketDetachFilterOption int
@@ -1213,10 +1145,6 @@ type LingerOption struct {
 	Timeout time.Duration
 }
 
-func (*LingerOption) isGettableSocketOption() {}
-
-func (*LingerOption) isSettableSocketOption() {}
-
 // IPPacketInfo is the message structure for IP_PKTINFO.
 //
 // +stateify savable
@@ -1256,6 +1184,12 @@ func (r Route) String() string {
 	return out.String()
 }
 
+// Equal returns true if the given Route is equal to this Route.
+func (r Route) Equal(to Route) bool {
+	// NOTE: This relies on the fact that r.Destination == to.Destination
+	return r == to
+}
+
 // TransportProtocolNumber is the number of a transport protocol.
 type TransportProtocolNumber uint32
 
@@ -1381,6 +1315,18 @@ type ICMPv6PacketStats struct {
 	// RedirectMsg is the total number of ICMPv6 redirect message packets
 	// counted.
 	RedirectMsg *StatCounter
+
+	// MulticastListenerQuery is the total number of Multicast Listener Query
+	// messages counted.
+	MulticastListenerQuery *StatCounter
+
+	// MulticastListenerReport is the total number of Multicast Listener Report
+	// messages counted.
+	MulticastListenerReport *StatCounter
+
+	// MulticastListenerDone is the total number of Multicast Listener Done
+	// messages counted.
+	MulticastListenerDone *StatCounter
 }
 
 // ICMPv4SentPacketStats collects outbound ICMPv4-specific stats.
@@ -1422,6 +1368,10 @@ type ICMPv6SentPacketStats struct {
 type ICMPv6ReceivedPacketStats struct {
 	ICMPv6PacketStats
 
+	// Unrecognized is the total number of ICMPv6 packets received that the
+	// transport layer does not know how to parse.
+	Unrecognized *StatCounter
+
 	// Invalid is the total number of ICMPv6 packets received that the
 	// transport layer could not parse.
 	Invalid *StatCounter
@@ -1431,33 +1381,102 @@ type ICMPv6ReceivedPacketStats struct {
 	RouterOnlyPacketsDroppedByHost *StatCounter
 }
 
-// ICMPStats collects ICMP-specific stats (both v4 and v6).
-type ICMPStats struct {
+// ICMPv4Stats collects ICMPv4-specific stats.
+type ICMPv4Stats struct {
 	// ICMPv4SentPacketStats contains counts of sent packets by ICMPv4 packet type
 	// and a single count of packets which failed to write to the link
 	// layer.
-	V4PacketsSent ICMPv4SentPacketStats
+	PacketsSent ICMPv4SentPacketStats
 
 	// ICMPv4ReceivedPacketStats contains counts of received packets by ICMPv4
 	// packet type and a single count of invalid packets received.
-	V4PacketsReceived ICMPv4ReceivedPacketStats
+	PacketsReceived ICMPv4ReceivedPacketStats
+}
 
+// ICMPv6Stats collects ICMPv6-specific stats.
+type ICMPv6Stats struct {
 	// ICMPv6SentPacketStats contains counts of sent packets by ICMPv6 packet type
 	// and a single count of packets which failed to write to the link
 	// layer.
-	V6PacketsSent ICMPv6SentPacketStats
+	PacketsSent ICMPv6SentPacketStats
 
 	// ICMPv6ReceivedPacketStats contains counts of received packets by ICMPv6
 	// packet type and a single count of invalid packets received.
-	V6PacketsReceived ICMPv6ReceivedPacketStats
+	PacketsReceived ICMPv6ReceivedPacketStats
+}
+
+// ICMPStats collects ICMP-specific stats (both v4 and v6).
+type ICMPStats struct {
+	// V4 contains the ICMPv4-specifics stats.
+	V4 ICMPv4Stats
+
+	// V6 contains the ICMPv4-specifics stats.
+	V6 ICMPv6Stats
+}
+
+// IGMPPacketStats enumerates counts for all IGMP packet types.
+type IGMPPacketStats struct {
+	// MembershipQuery is the total number of Membership Query messages counted.
+	MembershipQuery *StatCounter
+
+	// V1MembershipReport is the total number of Version 1 Membership Report
+	// messages counted.
+	V1MembershipReport *StatCounter
+
+	// V2MembershipReport is the total number of Version 2 Membership Report
+	// messages counted.
+	V2MembershipReport *StatCounter
+
+	// LeaveGroup is the total number of Leave Group messages counted.
+	LeaveGroup *StatCounter
+}
+
+// IGMPSentPacketStats collects outbound IGMP-specific stats.
+type IGMPSentPacketStats struct {
+	IGMPPacketStats
+
+	// Dropped is the total number of IGMP packets dropped.
+	Dropped *StatCounter
+}
+
+// IGMPReceivedPacketStats collects inbound IGMP-specific stats.
+type IGMPReceivedPacketStats struct {
+	IGMPPacketStats
+
+	// Invalid is the total number of IGMP packets received that IGMP could not
+	// parse.
+	Invalid *StatCounter
+
+	// ChecksumErrors is the total number of IGMP packets dropped due to bad
+	// checksums.
+	ChecksumErrors *StatCounter
+
+	// Unrecognized is the total number of unrecognized messages counted, these
+	// are silently ignored for forward-compatibilty.
+	Unrecognized *StatCounter
+}
+
+// IGMPStats colelcts IGMP-specific stats.
+type IGMPStats struct {
+	// IGMPSentPacketStats contains counts of sent packets by IGMP packet type
+	// and a single count of invalid packets received.
+	PacketsSent IGMPSentPacketStats
+
+	// IGMPReceivedPacketStats contains counts of received packets by IGMP packet
+	// type and a single count of invalid packets received.
+	PacketsReceived IGMPReceivedPacketStats
 }
 
 // IPStats collects IP-specific stats (both v4 and v6).
 type IPStats struct {
 	// PacketsReceived is the total number of IP packets received from the
-	// link layer in nic.DeliverNetworkPacket.
+	// link layer.
 	PacketsReceived *StatCounter
 
+	// DisabledPacketsReceived is the total number of IP packets received from the
+	// link layer when the IP layer is disabled.
+	DisabledPacketsReceived *StatCounter
+
 	// InvalidDestinationAddressesReceived is the total number of IP packets
 	// received with an unknown or invalid destination address.
 	InvalidDestinationAddressesReceived *StatCounter
@@ -1496,6 +1515,15 @@ type IPStats struct {
 	// IPTablesOutputDropped is the total number of IP packets dropped in
 	// the Output chain.
 	IPTablesOutputDropped *StatCounter
+
+	// OptionTSReceived is the number of Timestamp options seen.
+	OptionTSReceived *StatCounter
+
+	// OptionRRReceived is the number of Record Route options seen.
+	OptionRRReceived *StatCounter
+
+	// OptionUnknownReceived is the number of unknown IP options seen.
+	OptionUnknownReceived *StatCounter
 }
 
 // TCPStats collects TCP-specific stats.
@@ -1644,6 +1672,9 @@ type Stats struct {
 	// ICMP breaks out ICMP-specific stats (both v4 and v6).
 	ICMP ICMPStats
 
+	// IGMP breaks out IGMP-specific stats.
+	IGMP IGMPStats
+
 	// IP breaks out IP-specific stats (both v4 and v6).
 	IP IPStats
 
diff --git a/pkg/tcpip/tcpip_test.go b/pkg/tcpip/tcpip_test.go
index 1c8e2bc34..c461da137 100644
--- a/pkg/tcpip/tcpip_test.go
+++ b/pkg/tcpip/tcpip_test.go
@@ -226,3 +226,47 @@ func TestAddressWithPrefixSubnet(t *testing.T) {
 		}
 	}
 }
+
+func TestAddressUnspecified(t *testing.T) {
+	tests := []struct {
+		addr        Address
+		unspecified bool
+	}{
+		{
+			addr:        "",
+			unspecified: true,
+		},
+		{
+			addr:        "\x00",
+			unspecified: true,
+		},
+		{
+			addr:        "\x01",
+			unspecified: false,
+		},
+		{
+			addr:        "\x00\x00",
+			unspecified: true,
+		},
+		{
+			addr:        "\x01\x00",
+			unspecified: false,
+		},
+		{
+			addr:        "\x00\x01",
+			unspecified: false,
+		},
+		{
+			addr:        "\x01\x01",
+			unspecified: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(fmt.Sprintf("addr=%s", test.addr), func(t *testing.T) {
+			if got := test.addr.Unspecified(); got != test.unspecified {
+				t.Fatalf("got addr.Unspecified() = %t, want = %t", got, test.unspecified)
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/tests/integration/BUILD b/pkg/tcpip/tests/integration/BUILD
index 34aab32d0..800025fb9 100644
--- a/pkg/tcpip/tests/integration/BUILD
+++ b/pkg/tcpip/tests/integration/BUILD
@@ -10,6 +10,7 @@ go_test(
         "link_resolution_test.go",
         "loopback_test.go",
         "multicast_broadcast_test.go",
+        "route_test.go",
     ],
     deps = [
         "//pkg/tcpip",
@@ -24,6 +25,7 @@ go_test(
         "//pkg/tcpip/network/ipv6",
         "//pkg/tcpip/stack",
         "//pkg/tcpip/transport/icmp",
+        "//pkg/tcpip/transport/tcp",
         "//pkg/tcpip/transport/udp",
         "//pkg/waiter",
         "@com_github_google_go_cmp//cmp:go_default_library",
diff --git a/pkg/tcpip/tests/integration/forward_test.go b/pkg/tcpip/tests/integration/forward_test.go
index 0dcef7b04..39343b966 100644
--- a/pkg/tcpip/tests/integration/forward_test.go
+++ b/pkg/tcpip/tests/integration/forward_test.go
@@ -33,11 +33,6 @@ import (
 
 func TestForwarding(t *testing.T) {
 	const (
-		host1NICLinkAddr   = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x06")
-		routerNIC1LinkAddr = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x07")
-		routerNIC2LinkAddr = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x08")
-		host2NICLinkAddr   = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x09")
-
 		host1NICID   = 1
 		routerNICID1 = 2
 		routerNICID2 = 3
@@ -166,6 +161,38 @@ func TestForwarding(t *testing.T) {
 				}
 			},
 		},
+		{
+			name: "IPv4 host2 server with routerNIC1 client",
+			epAndAddrs: func(t *testing.T, host1Stack, routerStack, host2Stack *stack.Stack) endpointAndAddresses {
+				ep1, ep1WECH := newEP(t, host2Stack, udp.ProtocolNumber, ipv4.ProtocolNumber)
+				ep2, ep2WECH := newEP(t, routerStack, udp.ProtocolNumber, ipv4.ProtocolNumber)
+				return endpointAndAddresses{
+					serverEP:         ep1,
+					serverAddr:       host2IPv4Addr.AddressWithPrefix.Address,
+					serverReadableCH: ep1WECH,
+
+					clientEP:         ep2,
+					clientAddr:       routerNIC1IPv4Addr.AddressWithPrefix.Address,
+					clientReadableCH: ep2WECH,
+				}
+			},
+		},
+		{
+			name: "IPv6 routerNIC2 server with host1 client",
+			epAndAddrs: func(t *testing.T, host1Stack, routerStack, host2Stack *stack.Stack) endpointAndAddresses {
+				ep1, ep1WECH := newEP(t, routerStack, udp.ProtocolNumber, ipv6.ProtocolNumber)
+				ep2, ep2WECH := newEP(t, host1Stack, udp.ProtocolNumber, ipv6.ProtocolNumber)
+				return endpointAndAddresses{
+					serverEP:         ep1,
+					serverAddr:       routerNIC2IPv6Addr.AddressWithPrefix.Address,
+					serverReadableCH: ep1WECH,
+
+					clientEP:         ep2,
+					clientAddr:       host1IPv6Addr.AddressWithPrefix.Address,
+					clientReadableCH: ep2WECH,
+				}
+			},
+		},
 	}
 
 	for _, test := range tests {
@@ -179,8 +206,8 @@ func TestForwarding(t *testing.T) {
 			routerStack := stack.New(stackOpts)
 			host2Stack := stack.New(stackOpts)
 
-			host1NIC, routerNIC1 := pipe.New(host1NICLinkAddr, routerNIC1LinkAddr)
-			routerNIC2, host2NIC := pipe.New(routerNIC2LinkAddr, host2NICLinkAddr)
+			host1NIC, routerNIC1 := pipe.New(linkAddr1, linkAddr2)
+			routerNIC2, host2NIC := pipe.New(linkAddr3, linkAddr4)
 
 			if err := host1Stack.CreateNIC(host1NICID, ethernet.New(host1NIC)); err != nil {
 				t.Fatalf("host1Stack.CreateNIC(%d, _): %s", host1NICID, err)
@@ -202,19 +229,6 @@ func TestForwarding(t *testing.T) {
 				t.Fatalf("routerStack.SetForwarding(%d): %s", ipv6.ProtocolNumber, err)
 			}
 
-			if err := host1Stack.AddAddress(host1NICID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-				t.Fatalf("host1Stack.AddAddress(%d, %d, %s): %s", host1NICID, arp.ProtocolNumber, arp.ProtocolAddress, err)
-			}
-			if err := routerStack.AddAddress(routerNICID1, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-				t.Fatalf("routerStack.AddAddress(%d, %d, %s): %s", routerNICID1, arp.ProtocolNumber, arp.ProtocolAddress, err)
-			}
-			if err := routerStack.AddAddress(routerNICID2, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-				t.Fatalf("routerStack.AddAddress(%d, %d, %s): %s", routerNICID2, arp.ProtocolNumber, arp.ProtocolAddress, err)
-			}
-			if err := host2Stack.AddAddress(host2NICID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-				t.Fatalf("host2Stack.AddAddress(%d, %d, %s): %s", host2NICID, arp.ProtocolNumber, arp.ProtocolAddress, err)
-			}
-
 			if err := host1Stack.AddProtocolAddress(host1NICID, host1IPv4Addr); err != nil {
 				t.Fatalf("host1Stack.AddProtocolAddress(%d, %#v): %s", host1NICID, host1IPv4Addr, err)
 			}
@@ -321,12 +335,8 @@ func TestForwarding(t *testing.T) {
 				if err == tcpip.ErrNoLinkAddress {
 					// Wait for link resolution to complete.
 					<-ch
-
 					n, _, err = ep.Write(dataPayload, wOpts)
-				} else if err != nil {
-					t.Fatalf("ep.Write(_, _): %s", err)
 				}
-
 				if err != nil {
 					t.Fatalf("ep.Write(_, _): %s", err)
 				}
@@ -343,7 +353,6 @@ func TestForwarding(t *testing.T) {
 
 				// Wait for the endpoint to be readable.
 				<-ch
-
 				var addr tcpip.FullAddress
 				v, _, err := ep.Read(&addr)
 				if err != nil {
diff --git a/pkg/tcpip/tests/integration/link_resolution_test.go b/pkg/tcpip/tests/integration/link_resolution_test.go
index 6ddcda70c..bf8a1241f 100644
--- a/pkg/tcpip/tests/integration/link_resolution_test.go
+++ b/pkg/tcpip/tests/integration/link_resolution_test.go
@@ -32,32 +32,36 @@ import (
 	"gvisor.dev/gvisor/pkg/waiter"
 )
 
-var (
-	host1NICLinkAddr = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x06")
-	host2NICLinkAddr = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x09")
+const (
+	linkAddr1 = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x06")
+	linkAddr2 = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x07")
+	linkAddr3 = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x08")
+	linkAddr4 = tcpip.LinkAddress("\x02\x03\x03\x04\x05\x09")
+)
 
-	host1IPv4Addr = tcpip.ProtocolAddress{
+var (
+	ipv4Addr1 = tcpip.ProtocolAddress{
 		Protocol: ipv4.ProtocolNumber,
 		AddressWithPrefix: tcpip.AddressWithPrefix{
 			Address:   tcpip.Address(net.ParseIP("192.168.0.1").To4()),
 			PrefixLen: 24,
 		},
 	}
-	host2IPv4Addr = tcpip.ProtocolAddress{
+	ipv4Addr2 = tcpip.ProtocolAddress{
 		Protocol: ipv4.ProtocolNumber,
 		AddressWithPrefix: tcpip.AddressWithPrefix{
 			Address:   tcpip.Address(net.ParseIP("192.168.0.2").To4()),
 			PrefixLen: 8,
 		},
 	}
-	host1IPv6Addr = tcpip.ProtocolAddress{
+	ipv6Addr1 = tcpip.ProtocolAddress{
 		Protocol: ipv6.ProtocolNumber,
 		AddressWithPrefix: tcpip.AddressWithPrefix{
 			Address:   tcpip.Address(net.ParseIP("a::1").To16()),
 			PrefixLen: 64,
 		},
 	}
-	host2IPv6Addr = tcpip.ProtocolAddress{
+	ipv6Addr2 = tcpip.ProtocolAddress{
 		Protocol: ipv6.ProtocolNumber,
 		AddressWithPrefix: tcpip.AddressWithPrefix{
 			Address:   tcpip.Address(net.ParseIP("a::2").To16()),
@@ -89,7 +93,7 @@ func TestPing(t *testing.T) {
 			name:       "IPv4 Ping",
 			transProto: icmp.ProtocolNumber4,
 			netProto:   ipv4.ProtocolNumber,
-			remoteAddr: host2IPv4Addr.AddressWithPrefix.Address,
+			remoteAddr: ipv4Addr2.AddressWithPrefix.Address,
 			icmpBuf: func(t *testing.T) buffer.View {
 				data := [8]byte{1, 2, 3, 4, 5, 6, 7, 8}
 				hdr := header.ICMPv4(make([]byte, header.ICMPv4MinimumSize+len(data)))
@@ -104,7 +108,7 @@ func TestPing(t *testing.T) {
 			name:       "IPv6 Ping",
 			transProto: icmp.ProtocolNumber6,
 			netProto:   ipv6.ProtocolNumber,
-			remoteAddr: host2IPv6Addr.AddressWithPrefix.Address,
+			remoteAddr: ipv6Addr2.AddressWithPrefix.Address,
 			icmpBuf: func(t *testing.T) buffer.View {
 				data := [8]byte{1, 2, 3, 4, 5, 6, 7, 8}
 				hdr := header.ICMPv6(make([]byte, header.ICMPv6MinimumSize+len(data)))
@@ -127,7 +131,7 @@ func TestPing(t *testing.T) {
 			host1Stack := stack.New(stackOpts)
 			host2Stack := stack.New(stackOpts)
 
-			host1NIC, host2NIC := pipe.New(host1NICLinkAddr, host2NICLinkAddr)
+			host1NIC, host2NIC := pipe.New(linkAddr1, linkAddr2)
 
 			if err := host1Stack.CreateNIC(host1NICID, ethernet.New(host1NIC)); err != nil {
 				t.Fatalf("host1Stack.CreateNIC(%d, _): %s", host1NICID, err)
@@ -136,43 +140,36 @@ func TestPing(t *testing.T) {
 				t.Fatalf("host2Stack.CreateNIC(%d, _): %s", host2NICID, err)
 			}
 
-			if err := host1Stack.AddAddress(host1NICID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-				t.Fatalf("host1Stack.AddAddress(%d, %d, %s): %s", host1NICID, arp.ProtocolNumber, arp.ProtocolAddress, err)
-			}
-			if err := host2Stack.AddAddress(host2NICID, arp.ProtocolNumber, arp.ProtocolAddress); err != nil {
-				t.Fatalf("host2Stack.AddAddress(%d, %d, %s): %s", host2NICID, arp.ProtocolNumber, arp.ProtocolAddress, err)
-			}
-
-			if err := host1Stack.AddProtocolAddress(host1NICID, host1IPv4Addr); err != nil {
-				t.Fatalf("host1Stack.AddProtocolAddress(%d, %#v): %s", host1NICID, host1IPv4Addr, err)
+			if err := host1Stack.AddProtocolAddress(host1NICID, ipv4Addr1); err != nil {
+				t.Fatalf("host1Stack.AddProtocolAddress(%d, %#v): %s", host1NICID, ipv4Addr1, err)
 			}
-			if err := host2Stack.AddProtocolAddress(host2NICID, host2IPv4Addr); err != nil {
-				t.Fatalf("host2Stack.AddProtocolAddress(%d, %#v): %s", host2NICID, host2IPv4Addr, err)
+			if err := host2Stack.AddProtocolAddress(host2NICID, ipv4Addr2); err != nil {
+				t.Fatalf("host2Stack.AddProtocolAddress(%d, %#v): %s", host2NICID, ipv4Addr2, err)
 			}
-			if err := host1Stack.AddProtocolAddress(host1NICID, host1IPv6Addr); err != nil {
-				t.Fatalf("host1Stack.AddProtocolAddress(%d, %#v): %s", host1NICID, host1IPv6Addr, err)
+			if err := host1Stack.AddProtocolAddress(host1NICID, ipv6Addr1); err != nil {
+				t.Fatalf("host1Stack.AddProtocolAddress(%d, %#v): %s", host1NICID, ipv6Addr1, err)
 			}
-			if err := host2Stack.AddProtocolAddress(host2NICID, host2IPv6Addr); err != nil {
-				t.Fatalf("host2Stack.AddProtocolAddress(%d, %#v): %s", host2NICID, host2IPv6Addr, err)
+			if err := host2Stack.AddProtocolAddress(host2NICID, ipv6Addr2); err != nil {
+				t.Fatalf("host2Stack.AddProtocolAddress(%d, %#v): %s", host2NICID, ipv6Addr2, err)
 			}
 
 			host1Stack.SetRouteTable([]tcpip.Route{
 				tcpip.Route{
-					Destination: host1IPv4Addr.AddressWithPrefix.Subnet(),
+					Destination: ipv4Addr1.AddressWithPrefix.Subnet(),
 					NIC:         host1NICID,
 				},
 				tcpip.Route{
-					Destination: host1IPv6Addr.AddressWithPrefix.Subnet(),
+					Destination: ipv6Addr1.AddressWithPrefix.Subnet(),
 					NIC:         host1NICID,
 				},
 			})
 			host2Stack.SetRouteTable([]tcpip.Route{
 				tcpip.Route{
-					Destination: host2IPv4Addr.AddressWithPrefix.Subnet(),
+					Destination: ipv4Addr2.AddressWithPrefix.Subnet(),
 					NIC:         host2NICID,
 				},
 				tcpip.Route{
-					Destination: host2IPv6Addr.AddressWithPrefix.Subnet(),
+					Destination: ipv6Addr2.AddressWithPrefix.Subnet(),
 					NIC:         host2NICID,
 				},
 			})
diff --git a/pkg/tcpip/tests/integration/loopback_test.go b/pkg/tcpip/tests/integration/loopback_test.go
index e8caf09ba..baaa741cd 100644
--- a/pkg/tcpip/tests/integration/loopback_test.go
+++ b/pkg/tcpip/tests/integration/loopback_test.go
@@ -26,6 +26,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
 	"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
@@ -70,8 +71,8 @@ func TestInitialLoopbackAddresses(t *testing.T) {
 
 	s := stack.New(stack.Options{
 		NetworkProtocols: []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocolWithOptions(ipv6.Options{
-			NDPDisp:              &ndpDispatcher{},
-			AutoGenIPv6LinkLocal: true,
+			NDPDisp:          &ndpDispatcher{},
+			AutoGenLinkLocal: true,
 			OpaqueIIDOpts: ipv6.OpaqueInterfaceIdentifierOptions{
 				NICNameFromID: func(nicID tcpip.NICID, nicName string) string {
 					t.Fatalf("should not attempt to get name for NIC with ID = %d; nicName = %s", nicID, nicName)
@@ -93,9 +94,10 @@ func TestInitialLoopbackAddresses(t *testing.T) {
 	}
 }
 
-// TestLoopbackAcceptAllInSubnet tests that a loopback interface considers
-// itself bound to all addresses in the subnet of an assigned address.
-func TestLoopbackAcceptAllInSubnet(t *testing.T) {
+// TestLoopbackAcceptAllInSubnetUDP tests that a loopback interface considers
+// itself bound to all addresses in the subnet of an assigned address and UDP
+// traffic is sent/received correctly.
+func TestLoopbackAcceptAllInSubnetUDP(t *testing.T) {
 	const (
 		nicID     = 1
 		localPort = 80
@@ -107,7 +109,7 @@ func TestLoopbackAcceptAllInSubnet(t *testing.T) {
 		Protocol:          header.IPv4ProtocolNumber,
 		AddressWithPrefix: ipv4Addr,
 	}
-	ipv4Bytes := []byte(ipv4Addr.Address)
+	ipv4Bytes := []byte(ipv4ProtocolAddress.AddressWithPrefix.Address)
 	ipv4Bytes[len(ipv4Bytes)-1]++
 	otherIPv4Address := tcpip.Address(ipv4Bytes)
 
@@ -129,7 +131,7 @@ func TestLoopbackAcceptAllInSubnet(t *testing.T) {
 		{
 			name:       "IPv4 bind to wildcard and send to assigned address",
 			addAddress: ipv4ProtocolAddress,
-			dstAddr:    ipv4Addr.Address,
+			dstAddr:    ipv4ProtocolAddress.AddressWithPrefix.Address,
 			expectRx:   true,
 		},
 		{
@@ -148,7 +150,7 @@ func TestLoopbackAcceptAllInSubnet(t *testing.T) {
 			name:       "IPv4 bind to other subnet-local address and send to assigned address",
 			addAddress: ipv4ProtocolAddress,
 			bindAddr:   otherIPv4Address,
-			dstAddr:    ipv4Addr.Address,
+			dstAddr:    ipv4ProtocolAddress.AddressWithPrefix.Address,
 			expectRx:   false,
 		},
 		{
@@ -161,7 +163,7 @@ func TestLoopbackAcceptAllInSubnet(t *testing.T) {
 		{
 			name:       "IPv4 bind to assigned address and send to other subnet-local address",
 			addAddress: ipv4ProtocolAddress,
-			bindAddr:   ipv4Addr.Address,
+			bindAddr:   ipv4ProtocolAddress.AddressWithPrefix.Address,
 			dstAddr:    otherIPv4Address,
 			expectRx:   false,
 		},
@@ -204,7 +206,7 @@ func TestLoopbackAcceptAllInSubnet(t *testing.T) {
 				},
 			})
 
-			wq := waiter.Queue{}
+			var wq waiter.Queue
 			rep, err := s.NewEndpoint(udp.ProtocolNumber, test.addAddress.Protocol, &wq)
 			if err != nil {
 				t.Fatalf("NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, test.addAddress.Protocol, err)
@@ -236,13 +238,17 @@ func TestLoopbackAcceptAllInSubnet(t *testing.T) {
 				t.Fatalf("got sep.Write(_, _) = (%d, _, nil), want = (%d, _, nil)", n, want)
 			}
 
-			if gotPayload, _, err := rep.Read(nil); test.expectRx {
+			var addr tcpip.FullAddress
+			if gotPayload, _, err := rep.Read(&addr); test.expectRx {
 				if err != nil {
-					t.Fatalf("reep.Read(nil): %s", err)
+					t.Fatalf("reep.Read(_): %s", err)
 				}
 				if diff := cmp.Diff(buffer.View(data), gotPayload); diff != "" {
 					t.Errorf("got UDP payload mismatch (-want +got):\n%s", diff)
 				}
+				if addr.Addr != test.addAddress.AddressWithPrefix.Address {
+					t.Errorf("got addr.Addr = %s, want = %s", addr.Addr, test.addAddress.AddressWithPrefix.Address)
+				}
 			} else {
 				if err != tcpip.ErrWouldBlock {
 					t.Fatalf("got rep.Read(nil) = (%x, _, %s), want = (_, _, %s)", gotPayload, err, tcpip.ErrWouldBlock)
@@ -312,3 +318,168 @@ func TestLoopbackSubnetLifetimeBoundToAddr(t *testing.T) {
 		t.Fatalf("got r.WritePacket(nil, %#v, _) = %s, want = %s", params, err, tcpip.ErrInvalidEndpointState)
 	}
 }
+
+// TestLoopbackAcceptAllInSubnetTCP tests that a loopback interface considers
+// itself bound to all addresses in the subnet of an assigned address and TCP
+// traffic is sent/received correctly.
+func TestLoopbackAcceptAllInSubnetTCP(t *testing.T) {
+	const (
+		nicID     = 1
+		localPort = 80
+	)
+
+	ipv4ProtocolAddress := tcpip.ProtocolAddress{
+		Protocol:          header.IPv4ProtocolNumber,
+		AddressWithPrefix: ipv4Addr,
+	}
+	ipv4ProtocolAddress.AddressWithPrefix.PrefixLen = 8
+	ipv4Bytes := []byte(ipv4ProtocolAddress.AddressWithPrefix.Address)
+	ipv4Bytes[len(ipv4Bytes)-1]++
+	otherIPv4Address := tcpip.Address(ipv4Bytes)
+
+	ipv6ProtocolAddress := tcpip.ProtocolAddress{
+		Protocol:          header.IPv6ProtocolNumber,
+		AddressWithPrefix: ipv6Addr,
+	}
+	ipv6Bytes := []byte(ipv6Addr.Address)
+	ipv6Bytes[len(ipv6Bytes)-1]++
+	otherIPv6Address := tcpip.Address(ipv6Bytes)
+
+	tests := []struct {
+		name         string
+		addAddress   tcpip.ProtocolAddress
+		bindAddr     tcpip.Address
+		dstAddr      tcpip.Address
+		expectAccept bool
+	}{
+		{
+			name:         "IPv4 bind to wildcard and send to assigned address",
+			addAddress:   ipv4ProtocolAddress,
+			dstAddr:      ipv4ProtocolAddress.AddressWithPrefix.Address,
+			expectAccept: true,
+		},
+		{
+			name:         "IPv4 bind to wildcard and send to other subnet-local address",
+			addAddress:   ipv4ProtocolAddress,
+			dstAddr:      otherIPv4Address,
+			expectAccept: true,
+		},
+		{
+			name:         "IPv4 bind to wildcard send to other address",
+			addAddress:   ipv4ProtocolAddress,
+			dstAddr:      remoteIPv4Addr,
+			expectAccept: false,
+		},
+		{
+			name:         "IPv4 bind to other subnet-local address and send to assigned address",
+			addAddress:   ipv4ProtocolAddress,
+			bindAddr:     otherIPv4Address,
+			dstAddr:      ipv4ProtocolAddress.AddressWithPrefix.Address,
+			expectAccept: false,
+		},
+		{
+			name:         "IPv4 bind and send to other subnet-local address",
+			addAddress:   ipv4ProtocolAddress,
+			bindAddr:     otherIPv4Address,
+			dstAddr:      otherIPv4Address,
+			expectAccept: true,
+		},
+		{
+			name:         "IPv4 bind to assigned address and send to other subnet-local address",
+			addAddress:   ipv4ProtocolAddress,
+			bindAddr:     ipv4ProtocolAddress.AddressWithPrefix.Address,
+			dstAddr:      otherIPv4Address,
+			expectAccept: false,
+		},
+
+		{
+			name:         "IPv6 bind and send to assigned address",
+			addAddress:   ipv6ProtocolAddress,
+			bindAddr:     ipv6Addr.Address,
+			dstAddr:      ipv6Addr.Address,
+			expectAccept: true,
+		},
+		{
+			name:         "IPv6 bind to wildcard and send to other subnet-local address",
+			addAddress:   ipv6ProtocolAddress,
+			dstAddr:      otherIPv6Address,
+			expectAccept: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols:   []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+				TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
+			})
+			if err := s.CreateNIC(nicID, loopback.New()); err != nil {
+				t.Fatalf("CreateNIC(%d, _): %s", nicID, err)
+			}
+			if err := s.AddProtocolAddress(nicID, test.addAddress); err != nil {
+				t.Fatalf("AddProtocolAddress(%d, %#v): %s", nicID, test.addAddress, err)
+			}
+			s.SetRouteTable([]tcpip.Route{
+				tcpip.Route{
+					Destination: header.IPv4EmptySubnet,
+					NIC:         nicID,
+				},
+				tcpip.Route{
+					Destination: header.IPv6EmptySubnet,
+					NIC:         nicID,
+				},
+			})
+
+			var wq waiter.Queue
+			we, ch := waiter.NewChannelEntry(nil)
+			wq.EventRegister(&we, waiter.EventIn)
+			defer wq.EventUnregister(&we)
+			listeningEndpoint, err := s.NewEndpoint(tcp.ProtocolNumber, test.addAddress.Protocol, &wq)
+			if err != nil {
+				t.Fatalf("NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, test.addAddress.Protocol, err)
+			}
+			defer listeningEndpoint.Close()
+
+			bindAddr := tcpip.FullAddress{Addr: test.bindAddr, Port: localPort}
+			if err := listeningEndpoint.Bind(bindAddr); err != nil {
+				t.Fatalf("listeningEndpoint.Bind(%#v): %s", bindAddr, err)
+			}
+
+			if err := listeningEndpoint.Listen(1); err != nil {
+				t.Fatalf("listeningEndpoint.Listen(1): %s", err)
+			}
+
+			connectingEndpoint, err := s.NewEndpoint(tcp.ProtocolNumber, test.addAddress.Protocol, &wq)
+			if err != nil {
+				t.Fatalf("s.NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, test.addAddress.Protocol, err)
+			}
+			defer connectingEndpoint.Close()
+
+			connectAddr := tcpip.FullAddress{
+				Addr: test.dstAddr,
+				Port: localPort,
+			}
+			if err := connectingEndpoint.Connect(connectAddr); err != tcpip.ErrConnectStarted {
+				t.Fatalf("connectingEndpoint.Connect(%#v): %s", connectAddr, err)
+			}
+
+			if !test.expectAccept {
+				if _, _, err := listeningEndpoint.Accept(nil); err != tcpip.ErrWouldBlock {
+					t.Fatalf("got listeningEndpoint.Accept(nil) = %s, want = %s", err, tcpip.ErrWouldBlock)
+				}
+				return
+			}
+
+			// Wait for the listening endpoint to be "readable". That is, wait for a
+			// new connection.
+			<-ch
+			var addr tcpip.FullAddress
+			if _, _, err := listeningEndpoint.Accept(&addr); err != nil {
+				t.Fatalf("listeningEndpoint.Accept(nil): %s", err)
+			}
+			if addr.Addr != test.addAddress.AddressWithPrefix.Address {
+				t.Errorf("got addr.Addr = %s, want = %s", addr.Addr, test.addAddress.AddressWithPrefix.Address)
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/tests/integration/multicast_broadcast_test.go b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
index f1028823b..2e59f6a42 100644
--- a/pkg/tcpip/tests/integration/multicast_broadcast_test.go
+++ b/pkg/tcpip/tests/integration/multicast_broadcast_test.go
@@ -73,7 +73,6 @@ func TestPingMulticastBroadcast(t *testing.T) {
 		pkt.SetChecksum(^header.Checksum(pkt, 0))
 		ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
 		ip.Encode(&header.IPv4Fields{
-			IHL:         header.IPv4MinimumSize,
 			TotalLength: uint16(totalLen),
 			Protocol:    uint8(icmp.ProtocolNumber4),
 			TTL:         ttl,
@@ -97,11 +96,11 @@ func TestPingMulticastBroadcast(t *testing.T) {
 		pkt.SetChecksum(header.ICMPv6Checksum(pkt, remoteIPv6Addr, dst, buffer.VectorisedView{}))
 		ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 		ip.Encode(&header.IPv6Fields{
-			PayloadLength: header.ICMPv6MinimumSize,
-			NextHeader:    uint8(icmp.ProtocolNumber6),
-			HopLimit:      ttl,
-			SrcAddr:       remoteIPv6Addr,
-			DstAddr:       dst,
+			PayloadLength:     header.ICMPv6MinimumSize,
+			TransportProtocol: icmp.ProtocolNumber6,
+			HopLimit:          ttl,
+			SrcAddr:           remoteIPv6Addr,
+			DstAddr:           dst,
 		})
 
 		e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -244,7 +243,6 @@ func TestIncomingMulticastAndBroadcast(t *testing.T) {
 
 		ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
 		ip.Encode(&header.IPv4Fields{
-			IHL:         header.IPv4MinimumSize,
 			TotalLength: uint16(totalLen),
 			Protocol:    uint8(udp.ProtocolNumber),
 			TTL:         ttl,
@@ -274,11 +272,11 @@ func TestIncomingMulticastAndBroadcast(t *testing.T) {
 
 		ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
 		ip.Encode(&header.IPv6Fields{
-			PayloadLength: uint16(payloadLen),
-			NextHeader:    uint8(udp.ProtocolNumber),
-			HopLimit:      ttl,
-			SrcAddr:       remoteIPv6Addr,
-			DstAddr:       dst,
+			PayloadLength:     uint16(payloadLen),
+			TransportProtocol: udp.ProtocolNumber,
+			HopLimit:          ttl,
+			SrcAddr:           remoteIPv6Addr,
+			DstAddr:           dst,
 		})
 
 		e.InjectInbound(header.IPv6ProtocolNumber, stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -409,7 +407,7 @@ func TestIncomingMulticastAndBroadcast(t *testing.T) {
 				t.Fatalf("got unexpected address length = %d bytes", l)
 			}
 
-			wq := waiter.Queue{}
+			var wq waiter.Queue
 			ep, err := s.NewEndpoint(udp.ProtocolNumber, netproto, &wq)
 			if err != nil {
 				t.Fatalf("NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, netproto, err)
@@ -447,8 +445,6 @@ func TestReuseAddrAndBroadcast(t *testing.T) {
 		loopbackBroadcast = tcpip.Address("\x7f\xff\xff\xff")
 	)
 
-	data := tcpip.SlicePayload([]byte{1, 2, 3, 4})
-
 	tests := []struct {
 		name          string
 		broadcastAddr tcpip.Address
@@ -492,29 +488,30 @@ func TestReuseAddrAndBroadcast(t *testing.T) {
 				},
 			})
 
+			type endpointAndWaiter struct {
+				ep tcpip.Endpoint
+				ch chan struct{}
+			}
+			var eps []endpointAndWaiter
 			// We create endpoints that bind to both the wildcard address and the
 			// broadcast address to make sure both of these types of "broadcast
 			// interested" endpoints receive broadcast packets.
-			wq := waiter.Queue{}
-			var eps []tcpip.Endpoint
 			for _, bindWildcard := range []bool{false, true} {
 				// Create multiple endpoints for each type of "broadcast interested"
 				// endpoint so we can test that all endpoints receive the broadcast
 				// packet.
 				for i := 0; i < 2; i++ {
+					var wq waiter.Queue
+					we, ch := waiter.NewChannelEntry(nil)
+					wq.EventRegister(&we, waiter.EventIn)
 					ep, err := s.NewEndpoint(udp.ProtocolNumber, ipv4.ProtocolNumber, &wq)
 					if err != nil {
 						t.Fatalf("(eps[%d]) NewEndpoint(%d, %d, _): %s", len(eps), udp.ProtocolNumber, ipv4.ProtocolNumber, err)
 					}
 					defer ep.Close()
 
-					if err := ep.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil {
-						t.Fatalf("eps[%d].SetSockOptBool(tcpip.ReuseAddressOption, true): %s", len(eps), err)
-					}
-
-					if err := ep.SetSockOptBool(tcpip.BroadcastOption, true); err != nil {
-						t.Fatalf("eps[%d].SetSockOptBool(tcpip.BroadcastOption, true): %s", len(eps), err)
-					}
+					ep.SocketOptions().SetReuseAddress(true)
+					ep.SocketOptions().SetBroadcast(true)
 
 					bindAddr := tcpip.FullAddress{Port: localPort}
 					if bindWildcard {
@@ -528,7 +525,7 @@ func TestReuseAddrAndBroadcast(t *testing.T) {
 						}
 					}
 
-					eps = append(eps, ep)
+					eps = append(eps, endpointAndWaiter{ep: ep, ch: ch})
 				}
 			}
 
@@ -539,14 +536,18 @@ func TestReuseAddrAndBroadcast(t *testing.T) {
 						Port: localPort,
 					},
 				}
-				if n, _, err := wep.Write(data, writeOpts); err != nil {
+				data := tcpip.SlicePayload([]byte{byte(i), 2, 3, 4})
+				if n, _, err := wep.ep.Write(data, writeOpts); err != nil {
 					t.Fatalf("eps[%d].Write(_, _): %s", i, err)
 				} else if want := int64(len(data)); n != want {
 					t.Fatalf("got eps[%d].Write(_, _) = (%d, nil, nil), want = (%d, nil, nil)", i, n, want)
 				}
 
 				for j, rep := range eps {
-					if gotPayload, _, err := rep.Read(nil); err != nil {
+					// Wait for the endpoint to become readable.
+					<-rep.ch
+
+					if gotPayload, _, err := rep.ep.Read(nil); err != nil {
 						t.Errorf("(eps[%d] write) eps[%d].Read(nil): %s", i, j, err)
 					} else if diff := cmp.Diff(buffer.View(data), gotPayload); diff != "" {
 						t.Errorf("(eps[%d] write) got UDP payload from eps[%d] mismatch (-want +got):\n%s", i, j, diff)
diff --git a/pkg/tcpip/tests/integration/route_test.go b/pkg/tcpip/tests/integration/route_test.go
new file mode 100644
index 000000000..02fc47015
--- /dev/null
+++ b/pkg/tcpip/tests/integration/route_test.go
@@ -0,0 +1,388 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package integration_test
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"gvisor.dev/gvisor/pkg/tcpip"
+	"gvisor.dev/gvisor/pkg/tcpip/buffer"
+	"gvisor.dev/gvisor/pkg/tcpip/header"
+	"gvisor.dev/gvisor/pkg/tcpip/link/channel"
+	"gvisor.dev/gvisor/pkg/tcpip/link/loopback"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
+	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
+	"gvisor.dev/gvisor/pkg/tcpip/stack"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
+	"gvisor.dev/gvisor/pkg/waiter"
+)
+
+// TestLocalPing tests pinging a remote that is local the stack.
+//
+// This tests that a local route is created and packets do not leave the stack.
+func TestLocalPing(t *testing.T) {
+	const (
+		nicID        = 1
+		ipv4Loopback = tcpip.Address("\x7f\x00\x00\x01")
+
+		// icmpDataOffset is the offset to the data in both ICMPv4 and ICMPv6 echo
+		// request/reply packets.
+		icmpDataOffset = 8
+	)
+
+	channelEP := func() stack.LinkEndpoint { return channel.New(1, header.IPv6MinimumMTU, "") }
+	channelEPCheck := func(t *testing.T, e stack.LinkEndpoint) {
+		channelEP := e.(*channel.Endpoint)
+		if n := channelEP.Drain(); n != 0 {
+			t.Fatalf("got channelEP.Drain() = %d, want = 0", n)
+		}
+	}
+
+	ipv4ICMPBuf := func(t *testing.T) buffer.View {
+		data := [8]byte{1, 2, 3, 4, 5, 6, 7, 8}
+		hdr := header.ICMPv4(make([]byte, header.ICMPv4MinimumSize+len(data)))
+		hdr.SetType(header.ICMPv4Echo)
+		if n := copy(hdr.Payload(), data[:]); n != len(data) {
+			t.Fatalf("copied %d bytes but expected to copy %d bytes", n, len(data))
+		}
+		return buffer.View(hdr)
+	}
+
+	ipv6ICMPBuf := func(t *testing.T) buffer.View {
+		data := [8]byte{1, 2, 3, 4, 5, 6, 7, 9}
+		hdr := header.ICMPv6(make([]byte, header.ICMPv6MinimumSize+len(data)))
+		hdr.SetType(header.ICMPv6EchoRequest)
+		if n := copy(hdr.Payload(), data[:]); n != len(data) {
+			t.Fatalf("copied %d bytes but expected to copy %d bytes", n, len(data))
+		}
+		return buffer.View(hdr)
+	}
+
+	tests := []struct {
+		name               string
+		transProto         tcpip.TransportProtocolNumber
+		netProto           tcpip.NetworkProtocolNumber
+		linkEndpoint       func() stack.LinkEndpoint
+		localAddr          tcpip.Address
+		icmpBuf            func(*testing.T) buffer.View
+		expectedConnectErr *tcpip.Error
+		checkLinkEndpoint  func(t *testing.T, e stack.LinkEndpoint)
+	}{
+		{
+			name:              "IPv4 loopback",
+			transProto:        icmp.ProtocolNumber4,
+			netProto:          ipv4.ProtocolNumber,
+			linkEndpoint:      loopback.New,
+			localAddr:         ipv4Loopback,
+			icmpBuf:           ipv4ICMPBuf,
+			checkLinkEndpoint: func(*testing.T, stack.LinkEndpoint) {},
+		},
+		{
+			name:              "IPv6 loopback",
+			transProto:        icmp.ProtocolNumber6,
+			netProto:          ipv6.ProtocolNumber,
+			linkEndpoint:      loopback.New,
+			localAddr:         header.IPv6Loopback,
+			icmpBuf:           ipv6ICMPBuf,
+			checkLinkEndpoint: func(*testing.T, stack.LinkEndpoint) {},
+		},
+		{
+			name:              "IPv4 non-loopback",
+			transProto:        icmp.ProtocolNumber4,
+			netProto:          ipv4.ProtocolNumber,
+			linkEndpoint:      channelEP,
+			localAddr:         ipv4Addr.Address,
+			icmpBuf:           ipv4ICMPBuf,
+			checkLinkEndpoint: channelEPCheck,
+		},
+		{
+			name:              "IPv6 non-loopback",
+			transProto:        icmp.ProtocolNumber6,
+			netProto:          ipv6.ProtocolNumber,
+			linkEndpoint:      channelEP,
+			localAddr:         ipv6Addr.Address,
+			icmpBuf:           ipv6ICMPBuf,
+			checkLinkEndpoint: channelEPCheck,
+		},
+		{
+			name:               "IPv4 loopback without local address",
+			transProto:         icmp.ProtocolNumber4,
+			netProto:           ipv4.ProtocolNumber,
+			linkEndpoint:       loopback.New,
+			icmpBuf:            ipv4ICMPBuf,
+			expectedConnectErr: tcpip.ErrNoRoute,
+			checkLinkEndpoint:  func(*testing.T, stack.LinkEndpoint) {},
+		},
+		{
+			name:               "IPv6 loopback without local address",
+			transProto:         icmp.ProtocolNumber6,
+			netProto:           ipv6.ProtocolNumber,
+			linkEndpoint:       loopback.New,
+			icmpBuf:            ipv6ICMPBuf,
+			expectedConnectErr: tcpip.ErrNoRoute,
+			checkLinkEndpoint:  func(*testing.T, stack.LinkEndpoint) {},
+		},
+		{
+			name:               "IPv4 non-loopback without local address",
+			transProto:         icmp.ProtocolNumber4,
+			netProto:           ipv4.ProtocolNumber,
+			linkEndpoint:       channelEP,
+			icmpBuf:            ipv4ICMPBuf,
+			expectedConnectErr: tcpip.ErrNoRoute,
+			checkLinkEndpoint:  channelEPCheck,
+		},
+		{
+			name:               "IPv6 non-loopback without local address",
+			transProto:         icmp.ProtocolNumber6,
+			netProto:           ipv6.ProtocolNumber,
+			linkEndpoint:       channelEP,
+			icmpBuf:            ipv6ICMPBuf,
+			expectedConnectErr: tcpip.ErrNoRoute,
+			checkLinkEndpoint:  channelEPCheck,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			s := stack.New(stack.Options{
+				NetworkProtocols:   []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+				TransportProtocols: []stack.TransportProtocolFactory{icmp.NewProtocol4, icmp.NewProtocol6},
+				HandleLocal:        true,
+			})
+			e := test.linkEndpoint()
+			if err := s.CreateNIC(nicID, e); err != nil {
+				t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
+			}
+
+			if len(test.localAddr) != 0 {
+				if err := s.AddAddress(nicID, test.netProto, test.localAddr); err != nil {
+					t.Fatalf("s.AddAddress(%d, %d, %s): %s", nicID, test.netProto, test.localAddr, err)
+				}
+			}
+
+			var wq waiter.Queue
+			we, ch := waiter.NewChannelEntry(nil)
+			wq.EventRegister(&we, waiter.EventIn)
+			ep, err := s.NewEndpoint(test.transProto, test.netProto, &wq)
+			if err != nil {
+				t.Fatalf("s.NewEndpoint(%d, %d, _): %s", test.transProto, test.netProto, err)
+			}
+			defer ep.Close()
+
+			connAddr := tcpip.FullAddress{Addr: test.localAddr}
+			if err := ep.Connect(connAddr); err != test.expectedConnectErr {
+				t.Fatalf("got ep.Connect(%#v) = %s, want = %s", connAddr, err, test.expectedConnectErr)
+			}
+
+			if test.expectedConnectErr != nil {
+				return
+			}
+
+			payload := tcpip.SlicePayload(test.icmpBuf(t))
+			var wOpts tcpip.WriteOptions
+			if n, _, err := ep.Write(payload, wOpts); err != nil {
+				t.Fatalf("ep.Write(%#v, %#v): %s", payload, wOpts, err)
+			} else if n != int64(len(payload)) {
+				t.Fatalf("got ep.Write(%#v, %#v) = (%d, _, nil), want = (%d, _, nil)", payload, wOpts, n, len(payload))
+			}
+
+			// Wait for the endpoint to become readable.
+			<-ch
+
+			var addr tcpip.FullAddress
+			v, _, err := ep.Read(&addr)
+			if err != nil {
+				t.Fatalf("ep.Read(_): %s", err)
+			}
+			if diff := cmp.Diff(v[icmpDataOffset:], buffer.View(payload[icmpDataOffset:])); diff != "" {
+				t.Errorf("received data mismatch (-want +got):\n%s", diff)
+			}
+			if addr.Addr != test.localAddr {
+				t.Errorf("got addr.Addr = %s, want = %s", addr.Addr, test.localAddr)
+			}
+
+			test.checkLinkEndpoint(t, e)
+		})
+	}
+}
+
+// TestLocalUDP tests sending UDP packets between two endpoints that are local
+// to the stack.
+//
+// This tests that that packets never leave the stack and the addresses
+// used when sending a packet.
+func TestLocalUDP(t *testing.T) {
+	const (
+		nicID = 1
+	)
+
+	tests := []struct {
+		name             string
+		canBePrimaryAddr tcpip.ProtocolAddress
+		firstPrimaryAddr tcpip.ProtocolAddress
+	}{
+		{
+			name:             "IPv4",
+			canBePrimaryAddr: ipv4Addr1,
+			firstPrimaryAddr: ipv4Addr2,
+		},
+		{
+			name:             "IPv6",
+			canBePrimaryAddr: ipv6Addr1,
+			firstPrimaryAddr: ipv6Addr2,
+		},
+	}
+
+	subTests := []struct {
+		name             string
+		addAddress       bool
+		expectedWriteErr *tcpip.Error
+	}{
+		{
+			name:             "Unassigned local address",
+			addAddress:       false,
+			expectedWriteErr: tcpip.ErrNoRoute,
+		},
+		{
+			name:             "Assigned local address",
+			addAddress:       true,
+			expectedWriteErr: nil,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			for _, subTest := range subTests {
+				t.Run(subTest.name, func(t *testing.T) {
+					stackOpts := stack.Options{
+						NetworkProtocols:   []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
+						TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
+						HandleLocal:        true,
+					}
+
+					s := stack.New(stackOpts)
+					ep := channel.New(1, header.IPv6MinimumMTU, "")
+
+					if err := s.CreateNIC(nicID, ep); err != nil {
+						t.Fatalf("s.CreateNIC(%d, _): %s", nicID, err)
+					}
+
+					if subTest.addAddress {
+						if err := s.AddProtocolAddressWithOptions(nicID, test.canBePrimaryAddr, stack.CanBePrimaryEndpoint); err != nil {
+							t.Fatalf("s.AddProtocolAddressWithOptions(%d, %#v, %d): %s", nicID, test.canBePrimaryAddr, stack.FirstPrimaryEndpoint, err)
+						}
+						if err := s.AddProtocolAddressWithOptions(nicID, test.firstPrimaryAddr, stack.FirstPrimaryEndpoint); err != nil {
+							t.Fatalf("s.AddProtocolAddressWithOptions(%d, %#v, %d): %s", nicID, test.firstPrimaryAddr, stack.FirstPrimaryEndpoint, err)
+						}
+					}
+
+					var serverWQ waiter.Queue
+					serverWE, serverCH := waiter.NewChannelEntry(nil)
+					serverWQ.EventRegister(&serverWE, waiter.EventIn)
+					server, err := s.NewEndpoint(udp.ProtocolNumber, test.firstPrimaryAddr.Protocol, &serverWQ)
+					if err != nil {
+						t.Fatalf("s.NewEndpoint(%d, %d): %s", udp.ProtocolNumber, test.firstPrimaryAddr.Protocol, err)
+					}
+					defer server.Close()
+
+					bindAddr := tcpip.FullAddress{Port: 80}
+					if err := server.Bind(bindAddr); err != nil {
+						t.Fatalf("server.Bind(%#v): %s", bindAddr, err)
+					}
+
+					var clientWQ waiter.Queue
+					clientWE, clientCH := waiter.NewChannelEntry(nil)
+					clientWQ.EventRegister(&clientWE, waiter.EventIn)
+					client, err := s.NewEndpoint(udp.ProtocolNumber, test.firstPrimaryAddr.Protocol, &clientWQ)
+					if err != nil {
+						t.Fatalf("s.NewEndpoint(%d, %d): %s", udp.ProtocolNumber, test.firstPrimaryAddr.Protocol, err)
+					}
+					defer client.Close()
+
+					serverAddr := tcpip.FullAddress{
+						Addr: test.canBePrimaryAddr.AddressWithPrefix.Address,
+						Port: 80,
+					}
+
+					clientPayload := tcpip.SlicePayload([]byte{1, 2, 3, 4})
+					{
+						wOpts := tcpip.WriteOptions{
+							To: &serverAddr,
+						}
+						if n, _, err := client.Write(clientPayload, wOpts); err != subTest.expectedWriteErr {
+							t.Fatalf("got client.Write(%#v, %#v) = (%d, _, %s_), want = (_, _, %s)", clientPayload, wOpts, n, err, subTest.expectedWriteErr)
+						} else if subTest.expectedWriteErr != nil {
+							// Nothing else to test if we expected not to be able to send the
+							// UDP packet.
+							return
+						} else if n != int64(len(clientPayload)) {
+							t.Fatalf("got client.Write(%#v, %#v) = (%d, _, nil), want = (%d, _, nil)", clientPayload, wOpts, n, len(clientPayload))
+						}
+					}
+
+					// Wait for the server endpoint to become readable.
+					<-serverCH
+
+					var clientAddr tcpip.FullAddress
+					if v, _, err := server.Read(&clientAddr); err != nil {
+						t.Fatalf("server.Read(_): %s", err)
+					} else {
+						if diff := cmp.Diff(buffer.View(clientPayload), v); diff != "" {
+							t.Errorf("server read clientPayload mismatch (-want +got):\n%s", diff)
+						}
+						if clientAddr.Addr != test.canBePrimaryAddr.AddressWithPrefix.Address {
+							t.Errorf("got clientAddr.Addr = %s, want = %s", clientAddr.Addr, test.canBePrimaryAddr.AddressWithPrefix.Address)
+						}
+						if t.Failed() {
+							t.FailNow()
+						}
+					}
+
+					serverPayload := tcpip.SlicePayload([]byte{1, 2, 3, 4})
+					{
+						wOpts := tcpip.WriteOptions{
+							To: &clientAddr,
+						}
+						if n, _, err := server.Write(serverPayload, wOpts); err != nil {
+							t.Fatalf("server.Write(%#v, %#v): %s", serverPayload, wOpts, err)
+						} else if n != int64(len(serverPayload)) {
+							t.Fatalf("got server.Write(%#v, %#v) = (%d, _, nil), want = (%d, _, nil)", serverPayload, wOpts, n, len(serverPayload))
+						}
+					}
+
+					// Wait for the client endpoint to become readable.
+					<-clientCH
+
+					var gotServerAddr tcpip.FullAddress
+					if v, _, err := client.Read(&gotServerAddr); err != nil {
+						t.Fatalf("client.Read(_): %s", err)
+					} else {
+						if diff := cmp.Diff(buffer.View(serverPayload), v); diff != "" {
+							t.Errorf("client read serverPayload mismatch (-want +got):\n%s", diff)
+						}
+						if gotServerAddr.Addr != serverAddr.Addr {
+							t.Errorf("got gotServerAddr.Addr = %s, want = %s", gotServerAddr.Addr, serverAddr.Addr)
+						}
+						if t.Failed() {
+							t.FailNow()
+						}
+					}
+				})
+			}
+		})
+	}
+}
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index 41eb0ca44..74fe19e98 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -49,6 +49,7 @@ const (
 // +stateify savable
 type endpoint struct {
 	stack.TransportEndpointInfo
+	tcpip.DefaultSocketOptionsHandler
 
 	// The following fields are initialized at creation time and are
 	// immutable.
@@ -71,18 +72,19 @@ type endpoint struct {
 	// shutdownFlags represent the current shutdown state of the endpoint.
 	shutdownFlags tcpip.ShutdownFlags
 	state         endpointState
-	route         stack.Route `state:"manual"`
+	route         *stack.Route `state:"manual"`
 	ttl           uint8
 	stats         tcpip.TransportEndpointStats `state:"nosave"`
-	// linger is used for SO_LINGER socket option.
-	linger tcpip.LingerOption
 
 	// owner is used to get uid and gid of the packet.
 	owner tcpip.PacketOwner
+
+	// ops is used to get socket level options.
+	ops tcpip.SocketOptions
 }
 
 func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
-	return &endpoint{
+	ep := &endpoint{
 		stack: s,
 		TransportEndpointInfo: stack.TransportEndpointInfo{
 			NetProto:   netProto,
@@ -93,7 +95,9 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt
 		sndBufSize:    32 * 1024,
 		state:         stateInitial,
 		uniqueID:      s.UniqueID(),
-	}, nil
+	}
+	ep.ops.InitHandler(ep)
+	return ep, nil
 }
 
 // UniqueID implements stack.TransportEndpoint.UniqueID.
@@ -126,7 +130,10 @@ func (e *endpoint) Close() {
 	}
 	e.rcvMu.Unlock()
 
-	e.route.Release()
+	if e.route != nil {
+		e.route.Release()
+		e.route = nil
+	}
 
 	// Update the state.
 	e.state = stateClosed
@@ -139,6 +146,7 @@ func (e *endpoint) Close() {
 // ModerateRecvBuf implements tcpip.Endpoint.ModerateRecvBuf.
 func (e *endpoint) ModerateRecvBuf(copied int) {}
 
+// SetOwner implements tcpip.Endpoint.SetOwner.
 func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
 	e.owner = owner
 }
@@ -264,26 +272,8 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 		}
 	}
 
-	var route *stack.Route
-	if to == nil {
-		route = &e.route
-
-		if route.IsResolutionRequired() {
-			// Promote lock to exclusive if using a shared route,
-			// given that it may need to change in Route.Resolve()
-			// call below.
-			e.mu.RUnlock()
-			defer e.mu.RLock()
-
-			e.mu.Lock()
-			defer e.mu.Unlock()
-
-			// Recheck state after lock was re-acquired.
-			if e.state != stateConnected {
-				return 0, nil, tcpip.ErrInvalidEndpointState
-			}
-		}
-	} else {
+	route := e.route
+	if to != nil {
 		// Reject destination address if it goes through a different
 		// NIC than the endpoint was bound to.
 		nicID := to.NIC
@@ -307,7 +297,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 		}
 		defer r.Release()
 
-		route = &r
+		route = r
 	}
 
 	if route.IsResolutionRequired() {
@@ -340,26 +330,12 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 }
 
 // Peek only returns data from a single datagram, so do nothing here.
-func (e *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
-	return 0, tcpip.ControlMessages{}, nil
+func (e *endpoint) Peek([][]byte) (int64, *tcpip.Error) {
+	return 0, nil
 }
 
 // SetSockOpt sets a socket option.
 func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
-	switch v := opt.(type) {
-	case *tcpip.SocketDetachFilterOption:
-		return nil
-
-	case *tcpip.LingerOption:
-		e.mu.Lock()
-		e.linger = *v
-		e.mu.Unlock()
-	}
-	return nil
-}
-
-// SetSockOptBool sets a socket option. Currently not supported.
-func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
 	return nil
 }
 
@@ -375,17 +351,6 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 	return nil
 }
 
-// GetSockOptBool implements tcpip.Endpoint.GetSockOptBool.
-func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
-	switch opt {
-	case tcpip.KeepaliveEnabledOption:
-		return false, nil
-
-	default:
-		return false, tcpip.ErrUnknownProtocolOption
-	}
-}
-
 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
 func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 	switch opt {
@@ -423,16 +388,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
-	switch o := opt.(type) {
-	case *tcpip.LingerOption:
-		e.mu.Lock()
-		*o = e.linger
-		e.mu.Unlock()
-		return nil
-
-	default:
-		return tcpip.ErrUnknownProtocolOption
-	}
+	return tcpip.ErrUnknownProtocolOption
 }
 
 func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpip.PacketOwner) *tcpip.Error {
@@ -755,7 +711,7 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 
 // HandlePacket is called by the stack when new packets arrive to this transport
 // endpoint.
-func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
 	// Only accept echo replies.
 	switch e.NetProto {
 	case header.IPv4ProtocolNumber:
@@ -800,7 +756,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 	// Push new packet into receive list and increment the buffer size.
 	packet := &icmpPacket{
 		senderAddress: tcpip.FullAddress{
-			NIC:  r.NICID(),
+			NIC:  pkt.NICID,
 			Addr: id.RemoteAddress,
 		},
 	}
@@ -853,3 +809,8 @@ func (*endpoint) Wait() {}
 func (*endpoint) LastError() *tcpip.Error {
 	return nil
 }
+
+// SocketOptions implements tcpip.Endpoint.SocketOptions.
+func (e *endpoint) SocketOptions() *tcpip.SocketOptions {
+	return &e.ops
+}
diff --git a/pkg/tcpip/transport/icmp/protocol.go b/pkg/tcpip/transport/icmp/protocol.go
index 87d510f96..3820e5dc7 100644
--- a/pkg/tcpip/transport/icmp/protocol.go
+++ b/pkg/tcpip/transport/icmp/protocol.go
@@ -101,7 +101,7 @@ func (p *protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error)
 
 // HandleUnknownDestinationPacket handles packets targeted at this protocol but
 // that don't match any existing endpoint.
-func (*protocol) HandleUnknownDestinationPacket(*stack.Route, stack.TransportEndpointID, *stack.PacketBuffer) stack.UnknownDestinationPacketDisposition {
+func (*protocol) HandleUnknownDestinationPacket(stack.TransportEndpointID, *stack.PacketBuffer) stack.UnknownDestinationPacketDisposition {
 	return stack.UnknownDestinationPacketHandled
 }
 
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index 072601d2d..9faab4b9e 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -60,6 +60,8 @@ type packet struct {
 // +stateify savable
 type endpoint struct {
 	stack.TransportEndpointInfo
+	tcpip.DefaultSocketOptionsHandler
+
 	// The following fields are initialized at creation time and are
 	// immutable.
 	stack       *stack.Stack `state:"manual"`
@@ -83,12 +85,13 @@ type endpoint struct {
 	stats         tcpip.TransportEndpointStats `state:"nosave"`
 	bound         bool
 	boundNIC      tcpip.NICID
-	// linger is used for SO_LINGER socket option.
-	linger tcpip.LingerOption
 
 	// lastErrorMu protects lastError.
 	lastErrorMu sync.Mutex   `state:"nosave"`
 	lastError   *tcpip.Error `state:".(string)"`
+
+	// ops is used to get socket level options.
+	ops tcpip.SocketOptions
 }
 
 // NewEndpoint returns a new packet endpoint.
@@ -104,6 +107,7 @@ func NewEndpoint(s *stack.Stack, cooked bool, netProto tcpip.NetworkProtocolNumb
 		rcvBufSizeMax: 32 * 1024,
 		sndBufSize:    32 * 1024,
 	}
+	ep.ops.InitHandler(ep)
 
 	// Override with stack defaults.
 	var ss stack.SendBufferSizeOption
@@ -200,8 +204,8 @@ func (*endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-cha
 }
 
 // Peek implements tcpip.Endpoint.Peek.
-func (*endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
-	return 0, tcpip.ControlMessages{}, nil
+func (*endpoint) Peek([][]byte) (int64, *tcpip.Error) {
+	return 0, nil
 }
 
 // Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be
@@ -300,26 +304,15 @@ func (ep *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 // used with SetSockOpt, and this function always returns
 // tcpip.ErrNotSupported.
 func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
-	switch v := opt.(type) {
+	switch opt.(type) {
 	case *tcpip.SocketDetachFilterOption:
 		return nil
 
-	case *tcpip.LingerOption:
-		ep.mu.Lock()
-		ep.linger = *v
-		ep.mu.Unlock()
-		return nil
-
 	default:
 		return tcpip.ErrUnknownProtocolOption
 	}
 }
 
-// SetSockOptBool implements tcpip.Endpoint.SetSockOptBool.
-func (ep *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
-	return tcpip.ErrUnknownProtocolOption
-}
-
 // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
 func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 	switch opt {
@@ -375,21 +368,7 @@ func (ep *endpoint) LastError() *tcpip.Error {
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (ep *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
-	switch o := opt.(type) {
-	case *tcpip.LingerOption:
-		ep.mu.Lock()
-		*o = ep.linger
-		ep.mu.Unlock()
-		return nil
-
-	default:
-		return tcpip.ErrNotSupported
-	}
-}
-
-// GetSockOptBool implements tcpip.Endpoint.GetSockOptBool.
-func (*endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
-	return false, tcpip.ErrNotSupported
+	return tcpip.ErrNotSupported
 }
 
 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
@@ -543,4 +522,10 @@ func (ep *endpoint) Stats() tcpip.EndpointStats {
 	return &ep.stats
 }
 
+// SetOwner implements tcpip.Endpoint.SetOwner.
 func (ep *endpoint) SetOwner(owner tcpip.PacketOwner) {}
+
+// SocketOptions implements tcpip.Endpoint.SocketOptions.
+func (ep *endpoint) SocketOptions() *tcpip.SocketOptions {
+	return &ep.ops
+}
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index e37c00523..87c60bdab 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -58,12 +58,13 @@ type rawPacket struct {
 // +stateify savable
 type endpoint struct {
 	stack.TransportEndpointInfo
+	tcpip.DefaultSocketOptionsHandler
+
 	// The following fields are initialized at creation time and are
 	// immutable.
 	stack       *stack.Stack `state:"manual"`
 	waiterQueue *waiter.Queue
 	associated  bool
-	hdrIncluded bool
 
 	// The following fields are used to manage the receive queue and are
 	// protected by rcvMu.
@@ -82,13 +83,14 @@ type endpoint struct {
 	bound         bool
 	// route is the route to a remote network endpoint. It is set via
 	// Connect(), and is valid only when conneted is true.
-	route stack.Route                  `state:"manual"`
+	route *stack.Route                 `state:"manual"`
 	stats tcpip.TransportEndpointStats `state:"nosave"`
-	// linger is used for SO_LINGER socket option.
-	linger tcpip.LingerOption
 
 	// owner is used to get uid and gid of the packet.
 	owner tcpip.PacketOwner
+
+	// ops is used to get socket level options.
+	ops tcpip.SocketOptions
 }
 
 // NewEndpoint returns a raw  endpoint for the given protocols.
@@ -111,8 +113,9 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt
 		rcvBufSizeMax: 32 * 1024,
 		sndBufSizeMax: 32 * 1024,
 		associated:    associated,
-		hdrIncluded:   !associated,
 	}
+	e.ops.InitHandler(e)
+	e.ops.SetHeaderIncluded(!associated)
 
 	// Override with stack defaults.
 	var ss stack.SendBufferSizeOption
@@ -167,9 +170,11 @@ func (e *endpoint) Close() {
 		e.rcvList.Remove(e.rcvList.Front())
 	}
 
-	if e.connected {
+	e.connected = false
+
+	if e.route != nil {
 		e.route.Release()
-		e.connected = false
+		e.route = nil
 	}
 
 	e.closed = true
@@ -220,6 +225,13 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 		return 0, nil, tcpip.ErrInvalidOptionValue
 	}
 
+	if opts.To != nil {
+		// Raw sockets do not support sending to a IPv4 address on a IPv6 endpoint.
+		if e.TransportEndpointInfo.NetProto == header.IPv6ProtocolNumber && len(opts.To.Addr) != header.IPv6AddressSize {
+			return 0, nil, tcpip.ErrInvalidOptionValue
+		}
+	}
+
 	n, ch, err := e.write(p, opts)
 	switch err {
 	case nil:
@@ -263,7 +275,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 
 	// If this is an unassociated socket and callee provided a nonzero
 	// destination address, route using that address.
-	if e.hdrIncluded {
+	if e.ops.GetHeaderIncluded() {
 		ip := header.IPv4(payloadBytes)
 		if !ip.IsValid(len(payloadBytes)) {
 			e.mu.RUnlock()
@@ -293,7 +305,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 		}
 
 		if e.route.IsResolutionRequired() {
-			savedRoute := &e.route
+			savedRoute := e.route
 			// Promote lock to exclusive if using a shared route,
 			// given that it may need to change in finishWrite.
 			e.mu.RUnlock()
@@ -301,7 +313,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 
 			// Make sure that the route didn't change during the
 			// time we didn't hold the lock.
-			if !e.connected || savedRoute != &e.route {
+			if !e.connected || savedRoute != e.route {
 				e.mu.Unlock()
 				return 0, nil, tcpip.ErrInvalidEndpointState
 			}
@@ -311,7 +323,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 			return n, ch, err
 		}
 
-		n, ch, err := e.finishWrite(payloadBytes, &e.route)
+		n, ch, err := e.finishWrite(payloadBytes, e.route)
 		e.mu.RUnlock()
 		return n, ch, err
 	}
@@ -332,7 +344,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 		return 0, nil, err
 	}
 
-	n, ch, err := e.finishWrite(payloadBytes, &route)
+	n, ch, err := e.finishWrite(payloadBytes, route)
 	route.Release()
 	e.mu.RUnlock()
 	return n, ch, err
@@ -353,7 +365,7 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64,
 		}
 	}
 
-	if e.hdrIncluded {
+	if e.ops.GetHeaderIncluded() {
 		pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
 			Data: buffer.View(payloadBytes).ToVectorisedView(),
 		})
@@ -379,8 +391,8 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64,
 }
 
 // Peek implements tcpip.Endpoint.Peek.
-func (e *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
-	return 0, tcpip.ControlMessages{}, nil
+func (e *endpoint) Peek([][]byte) (int64, *tcpip.Error) {
+	return 0, nil
 }
 
 // Disconnect implements tcpip.Endpoint.Disconnect.
@@ -390,6 +402,11 @@ func (*endpoint) Disconnect() *tcpip.Error {
 
 // Connect implements tcpip.Endpoint.Connect.
 func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
+	// Raw sockets do not support connecting to a IPv4 address on a IPv6 endpoint.
+	if e.TransportEndpointInfo.NetProto == header.IPv6ProtocolNumber && len(addr.Addr) != header.IPv6AddressSize {
+		return tcpip.ErrAddressFamilyNotSupported
+	}
+
 	e.mu.Lock()
 	defer e.mu.Unlock()
 
@@ -513,33 +530,15 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 
 // SetSockOpt implements tcpip.Endpoint.SetSockOpt.
 func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
-	switch v := opt.(type) {
+	switch opt.(type) {
 	case *tcpip.SocketDetachFilterOption:
 		return nil
 
-	case *tcpip.LingerOption:
-		e.mu.Lock()
-		e.linger = *v
-		e.mu.Unlock()
-		return nil
-
 	default:
 		return tcpip.ErrUnknownProtocolOption
 	}
 }
 
-// SetSockOptBool implements tcpip.Endpoint.SetSockOptBool.
-func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
-	switch opt {
-	case tcpip.IPHdrIncludedOption:
-		e.mu.Lock()
-		e.hdrIncluded = v
-		e.mu.Unlock()
-		return nil
-	}
-	return tcpip.ErrUnknownProtocolOption
-}
-
 // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
 func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 	switch opt {
@@ -586,33 +585,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
 
 // GetSockOpt implements tcpip.Endpoint.GetSockOpt.
 func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
-	switch o := opt.(type) {
-	case *tcpip.LingerOption:
-		e.mu.Lock()
-		*o = e.linger
-		e.mu.Unlock()
-		return nil
-
-	default:
-		return tcpip.ErrUnknownProtocolOption
-	}
-}
-
-// GetSockOptBool implements tcpip.Endpoint.GetSockOptBool.
-func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
-	switch opt {
-	case tcpip.KeepaliveEnabledOption:
-		return false, nil
-
-	case tcpip.IPHdrIncludedOption:
-		e.mu.Lock()
-		v := e.hdrIncluded
-		e.mu.Unlock()
-		return v, nil
-
-	default:
-		return false, tcpip.ErrUnknownProtocolOption
-	}
+	return tcpip.ErrUnknownProtocolOption
 }
 
 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
@@ -646,7 +619,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 }
 
 // HandlePacket implements stack.RawTransportEndpoint.HandlePacket.
-func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
+func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
 	e.rcvMu.Lock()
 
 	// Drop the packet if our buffer is currently full or if this is an unassociated
@@ -671,14 +644,16 @@ func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
 		return
 	}
 
+	remoteAddr := pkt.Network().SourceAddress()
+
 	if e.bound {
 		// If bound to a NIC, only accept data for that NIC.
-		if e.BindNICID != 0 && e.BindNICID != route.NICID() {
+		if e.BindNICID != 0 && e.BindNICID != pkt.NICID {
 			e.rcvMu.Unlock()
 			return
 		}
 		// If bound to an address, only accept data for that address.
-		if e.BindAddr != "" && e.BindAddr != route.RemoteAddress {
+		if e.BindAddr != "" && e.BindAddr != remoteAddr {
 			e.rcvMu.Unlock()
 			return
 		}
@@ -686,7 +661,7 @@ func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
 
 	// If connected, only accept packets from the remote address we
 	// connected to.
-	if e.connected && e.route.RemoteAddress != route.RemoteAddress {
+	if e.connected && e.route.RemoteAddress != remoteAddr {
 		e.rcvMu.Unlock()
 		return
 	}
@@ -696,8 +671,8 @@ func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
 	// Push new packet into receive list and increment the buffer size.
 	packet := &rawPacket{
 		senderAddr: tcpip.FullAddress{
-			NIC:  route.NICID(),
-			Addr: route.RemoteAddress,
+			NIC:  pkt.NICID,
+			Addr: remoteAddr,
 		},
 	}
 
@@ -751,6 +726,12 @@ func (e *endpoint) Stats() tcpip.EndpointStats {
 // Wait implements stack.TransportEndpoint.Wait.
 func (*endpoint) Wait() {}
 
+// LastError implements tcpip.Endpoint.LastError.
 func (*endpoint) LastError() *tcpip.Error {
 	return nil
 }
+
+// SocketOptions implements tcpip.Endpoint.SocketOptions.
+func (e *endpoint) SocketOptions() *tcpip.SocketOptions {
+	return &e.ops
+}
diff --git a/pkg/tcpip/transport/raw/endpoint_state.go b/pkg/tcpip/transport/raw/endpoint_state.go
index 33bfb56cd..4a7e1c039 100644
--- a/pkg/tcpip/transport/raw/endpoint_state.go
+++ b/pkg/tcpip/transport/raw/endpoint_state.go
@@ -37,57 +37,63 @@ func (p *rawPacket) loadData(data buffer.VectorisedView) {
 }
 
 // beforeSave is invoked by stateify.
-func (ep *endpoint) beforeSave() {
+func (e *endpoint) beforeSave() {
 	// Stop incoming packets from being handled (and mutate endpoint state).
 	// The lock will be released after saveRcvBufSizeMax(), which would have
-	// saved ep.rcvBufSizeMax and set it to 0 to continue blocking incoming
+	// saved e.rcvBufSizeMax and set it to 0 to continue blocking incoming
 	// packets.
-	ep.rcvMu.Lock()
+	e.rcvMu.Lock()
 }
 
 // saveRcvBufSizeMax is invoked by stateify.
-func (ep *endpoint) saveRcvBufSizeMax() int {
-	max := ep.rcvBufSizeMax
+func (e *endpoint) saveRcvBufSizeMax() int {
+	max := e.rcvBufSizeMax
 	// Make sure no new packets will be handled regardless of the lock.
-	ep.rcvBufSizeMax = 0
+	e.rcvBufSizeMax = 0
 	// Release the lock acquired in beforeSave() so regular endpoint closing
 	// logic can proceed after save.
-	ep.rcvMu.Unlock()
+	e.rcvMu.Unlock()
 	return max
 }
 
 // loadRcvBufSizeMax is invoked by stateify.
-func (ep *endpoint) loadRcvBufSizeMax(max int) {
-	ep.rcvBufSizeMax = max
+func (e *endpoint) loadRcvBufSizeMax(max int) {
+	e.rcvBufSizeMax = max
 }
 
 // afterLoad is invoked by stateify.
-func (ep *endpoint) afterLoad() {
-	stack.StackFromEnv.RegisterRestoredEndpoint(ep)
+func (e *endpoint) afterLoad() {
+	stack.StackFromEnv.RegisterRestoredEndpoint(e)
 }
 
 // Resume implements tcpip.ResumableEndpoint.Resume.
-func (ep *endpoint) Resume(s *stack.Stack) {
-	ep.stack = s
+func (e *endpoint) Resume(s *stack.Stack) {
+	e.stack = s
 
 	// If the endpoint is connected, re-connect.
-	if ep.connected {
+	if e.connected {
 		var err *tcpip.Error
-		ep.route, err = ep.stack.FindRoute(ep.RegisterNICID, ep.BindAddr, ep.route.RemoteAddress, ep.NetProto, false)
+		// TODO(gvisor.dev/issue/4906): Properly restore the route with the right
+		// remote address. We used to pass e.remote.RemoteAddress which was
+		// effectively the empty address but since moving e.route to hold a pointer
+		// to a route instead of the route by value, we pass the empty address
+		// directly. Obviously this was always wrong since we should provide the
+		// remote address we were connected to, to properly restore the route.
+		e.route, err = e.stack.FindRoute(e.RegisterNICID, e.BindAddr, "", e.NetProto, false)
 		if err != nil {
 			panic(err)
 		}
 	}
 
 	// If the endpoint is bound, re-bind.
-	if ep.bound {
-		if ep.stack.CheckLocalAddress(ep.RegisterNICID, ep.NetProto, ep.BindAddr) == 0 {
+	if e.bound {
+		if e.stack.CheckLocalAddress(e.RegisterNICID, e.NetProto, e.BindAddr) == 0 {
 			panic(tcpip.ErrBadLocalAddress)
 		}
 	}
 
-	if ep.associated {
-		if err := ep.stack.RegisterRawTransportEndpoint(ep.RegisterNICID, ep.NetProto, ep.TransProto, ep); err != nil {
+	if e.associated {
+		if err := e.stack.RegisterRawTransportEndpoint(e.RegisterNICID, e.NetProto, e.TransProto, e); err != nil {
 			panic(err)
 		}
 	}
diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index 518449602..cf232b508 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test", "more_shards")
 load("//tools/go_generics:defs.bzl", "go_template_instance")
 
 package(licenses = ["notice"])
@@ -45,7 +45,9 @@ go_library(
         "rcv.go",
         "rcv_state.go",
         "reno.go",
+        "reno_recovery.go",
         "sack.go",
+        "sack_recovery.go",
         "sack_scoreboard.go",
         "segment.go",
         "segment_heap.go",
@@ -91,7 +93,7 @@ go_test(
         "tcp_test.go",
         "tcp_timestamp_test.go",
     ],
-    shard_count = 10,
+    shard_count = more_shards,
     deps = [
         ":tcp",
         "//pkg/rand",
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go
index b706438bd..3e1041cbe 100644
--- a/pkg/tcpip/transport/tcp/accept.go
+++ b/pkg/tcpip/transport/tcp/accept.go
@@ -199,18 +199,25 @@ func (l *listenContext) isCookieValid(id stack.TransportEndpointID, cookie seqnu
 
 // createConnectingEndpoint creates a new endpoint in a connecting state, with
 // the connection parameters given by the arguments.
-func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions, queue *waiter.Queue) *endpoint {
+func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, irs seqnum.Value, rcvdSynOpts *header.TCPSynOptions, queue *waiter.Queue) (*endpoint, *tcpip.Error) {
 	// Create a new endpoint.
 	netProto := l.netProto
 	if netProto == 0 {
-		netProto = s.route.NetProto
+		netProto = s.netProto
 	}
+
+	route, err := l.stack.FindRoute(s.nicID, s.dstAddr, s.srcAddr, s.netProto, false /* multicastLoop */)
+	if err != nil {
+		return nil, err
+	}
+	route.ResolveWith(s.remoteLinkAddr)
+
 	n := newEndpoint(l.stack, netProto, queue)
-	n.v6only = l.v6Only
+	n.ops.SetV6Only(l.v6Only)
 	n.ID = s.id
-	n.boundNICID = s.route.NICID()
-	n.route = s.route.Clone()
-	n.effectiveNetProtos = []tcpip.NetworkProtocolNumber{s.route.NetProto}
+	n.boundNICID = s.nicID
+	n.route = route
+	n.effectiveNetProtos = []tcpip.NetworkProtocolNumber{s.netProto}
 	n.rcvBufSize = int(l.rcvWnd)
 	n.amss = calculateAdvertisedMSS(n.userMSS, n.route)
 	n.setEndpointState(StateConnecting)
@@ -225,18 +232,25 @@ func (l *listenContext) createConnectingEndpoint(s *segment, iss seqnum.Value, i
 	// window to grow to a really large value.
 	n.rcvAutoParams.prevCopied = n.initialReceiveWindow()
 
-	return n
+	return n, nil
 }
 
-// createEndpointAndPerformHandshake creates a new endpoint in connected state
-// and then performs the TCP 3-way handshake.
+// startHandshake creates a new endpoint in connecting state and then sends
+// the SYN-ACK for the TCP 3-way handshake. It returns the state of the
+// handshake in progress, which includes the new endpoint in the SYN-RCVD
+// state.
 //
-// The new endpoint is returned with e.mu held.
-func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*endpoint, *tcpip.Error) {
+// On success, a handshake h is returned with h.ep.mu held.
+//
+// Precondition: if l.listenEP != nil, l.listenEP.mu must be locked.
+func (l *listenContext) startHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*handshake, *tcpip.Error) {
 	// Create new endpoint.
 	irs := s.sequenceNumber
 	isn := generateSecureISN(s.id, l.stack.Seed())
-	ep := l.createConnectingEndpoint(s, isn, irs, opts, queue)
+	ep, err := l.createConnectingEndpoint(s, isn, irs, opts, queue)
+	if err != nil {
+		return nil, err
+	}
 
 	// Lock the endpoint before registering to ensure that no out of
 	// band changes are possible due to incoming packets etc till
@@ -247,10 +261,8 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head
 	// listenEP is nil when listenContext is used by tcp.Forwarder.
 	deferAccept := time.Duration(0)
 	if l.listenEP != nil {
-		l.listenEP.mu.Lock()
 		if l.listenEP.EndpointState() != StateListen {
 
-			l.listenEP.mu.Unlock()
 			// Ensure we release any registrations done by the newly
 			// created endpoint.
 			ep.mu.Unlock()
@@ -268,16 +280,12 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head
 			ep.mu.Unlock()
 			ep.Close()
 
-			if l.listenEP != nil {
-				l.removePendingEndpoint(ep)
-				l.listenEP.mu.Unlock()
-			}
+			l.removePendingEndpoint(ep)
 
 			return nil, tcpip.ErrConnectionAborted
 		}
 
 		deferAccept = l.listenEP.deferAccept
-		l.listenEP.mu.Unlock()
 	}
 
 	// Register new endpoint so that packets are routed to it.
@@ -296,28 +304,33 @@ func (l *listenContext) createEndpointAndPerformHandshake(s *segment, opts *head
 
 	ep.isRegistered = true
 
-	// Perform the 3-way handshake.
-	h := newPassiveHandshake(ep, seqnum.Size(ep.initialReceiveWindow()), isn, irs, opts, deferAccept)
-	if err := h.execute(); err != nil {
-		ep.mu.Unlock()
-		ep.Close()
-		ep.notifyAborted()
-
-		if l.listenEP != nil {
-			l.removePendingEndpoint(ep)
-		}
-
-		ep.drainClosingSegmentQueue()
-
+	// Initialize and start the handshake.
+	h := ep.newPassiveHandshake(isn, irs, opts, deferAccept)
+	if err := h.start(); err != nil {
+		l.cleanupFailedHandshake(h)
 		return nil, err
 	}
-	ep.isConnectNotified = true
+	return h, nil
+}
 
-	// Update the receive window scaling. We can't do it before the
-	// handshake because it's possible that the peer doesn't support window
-	// scaling.
-	ep.rcv.rcvWndScale = h.effectiveRcvWndScale()
+// performHandshake performs a TCP 3-way handshake. On success, the new
+// established endpoint is returned with e.mu held.
+//
+// Precondition: if l.listenEP != nil, l.listenEP.mu must be locked.
+func (l *listenContext) performHandshake(s *segment, opts *header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*endpoint, *tcpip.Error) {
+	h, err := l.startHandshake(s, opts, queue, owner)
+	if err != nil {
+		return nil, err
+	}
+	ep := h.ep
 
+	if err := h.complete(); err != nil {
+		ep.stack.Stats().TCP.FailedConnectionAttempts.Increment()
+		ep.stats.FailedConnectionAttempts.Increment()
+		l.cleanupFailedHandshake(h)
+		return nil, err
+	}
+	l.cleanupCompletedHandshake(h)
 	return ep, nil
 }
 
@@ -344,6 +357,39 @@ func (l *listenContext) closeAllPendingEndpoints() {
 	l.pending.Wait()
 }
 
+// Precondition: h.ep.mu must be held.
+func (l *listenContext) cleanupFailedHandshake(h *handshake) {
+	e := h.ep
+	e.mu.Unlock()
+	e.Close()
+	e.notifyAborted()
+	if l.listenEP != nil {
+		l.removePendingEndpoint(e)
+	}
+	e.drainClosingSegmentQueue()
+	e.h = nil
+}
+
+// cleanupCompletedHandshake transfers any state from the completed handshake to
+// the new endpoint.
+//
+// Precondition: h.ep.mu must be held.
+func (l *listenContext) cleanupCompletedHandshake(h *handshake) {
+	e := h.ep
+	if l.listenEP != nil {
+		l.removePendingEndpoint(e)
+	}
+	e.isConnectNotified = true
+
+	// Update the receive window scaling. We can't do it before the
+	// handshake because it's possible that the peer doesn't support window
+	// scaling.
+	e.rcv.rcvWndScale = e.h.effectiveRcvWndScale()
+
+	// Clean up handshake state stored in the endpoint so that it can be GCed.
+	e.h = nil
+}
+
 // deliverAccepted delivers the newly-accepted endpoint to the listener. If the
 // endpoint has transitioned out of the listen state (acceptedChan is nil),
 // the new endpoint is closed instead.
@@ -423,26 +469,40 @@ func (e *endpoint) notifyAborted() {
 //
 // A limited number of these goroutines are allowed before TCP starts using SYN
 // cookies to accept connections.
-func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header.TCPSynOptions) {
-	defer ctx.synRcvdCount.dec()
-	defer func() {
-		e.mu.Lock()
-		e.decSynRcvdCount()
-		e.mu.Unlock()
-	}()
+//
+// Precondition: if ctx.listenEP != nil, ctx.listenEP.mu must be locked.
+func (e *endpoint) handleSynSegment(ctx *listenContext, s *segment, opts *header.TCPSynOptions) *tcpip.Error {
 	defer s.decRef()
 
-	n, err := ctx.createEndpointAndPerformHandshake(s, opts, &waiter.Queue{}, e.owner)
+	h, err := ctx.startHandshake(s, opts, &waiter.Queue{}, e.owner)
 	if err != nil {
 		e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 		e.stats.FailedConnectionAttempts.Increment()
-		return
+		e.synRcvdCount--
+		return err
 	}
-	ctx.removePendingEndpoint(n)
-	n.startAcceptedLoop()
-	e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()
 
-	e.deliverAccepted(n)
+	go func() {
+		defer ctx.synRcvdCount.dec()
+		if err := h.complete(); err != nil {
+			e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
+			e.stats.FailedConnectionAttempts.Increment()
+			ctx.cleanupFailedHandshake(h)
+			e.mu.Lock()
+			e.synRcvdCount--
+			e.mu.Unlock()
+			return
+		}
+		ctx.cleanupCompletedHandshake(h)
+		e.mu.Lock()
+		e.synRcvdCount--
+		e.mu.Unlock()
+		h.ep.startAcceptedLoop()
+		e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()
+		e.deliverAccepted(h.ep)
+	}() // S/R-SAFE: synRcvdCount is the barrier.
+
+	return nil
 }
 
 func (e *endpoint) incSynRcvdCount() bool {
@@ -455,10 +515,6 @@ func (e *endpoint) incSynRcvdCount() bool {
 	return canInc
 }
 
-func (e *endpoint) decSynRcvdCount() {
-	e.synRcvdCount--
-}
-
 func (e *endpoint) acceptQueueIsFull() bool {
 	e.acceptMu.Lock()
 	full := len(e.acceptedChan)+e.synRcvdCount >= cap(e.acceptedChan)
@@ -468,7 +524,9 @@ func (e *endpoint) acceptQueueIsFull() bool {
 
 // handleListenSegment is called when a listening endpoint receives a segment
 // and needs to handle it.
-func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
+//
+// Precondition: if ctx.listenEP != nil, ctx.listenEP.mu must be locked.
+func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) *tcpip.Error {
 	e.rcvListMu.Lock()
 	rcvClosed := e.rcvClosed
 	e.rcvListMu.Unlock()
@@ -478,8 +536,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 		// RFC 793 section 3.4 page 35 (figure 12) outlines that a RST
 		// must be sent in response to a SYN-ACK while in the listen
 		// state to prevent completing a handshake from an old SYN.
-		replyWithReset(s, e.sendTOS, e.ttl)
-		return
+		return replyWithReset(e.stack, s, e.sendTOS, e.ttl)
 	}
 
 	switch {
@@ -492,14 +549,14 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 			//     backlog.
 			if !e.acceptQueueIsFull() && e.incSynRcvdCount() {
 				s.incRef()
-				go e.handleSynSegment(ctx, s, &opts) // S/R-SAFE: synRcvdCount is the barrier.
-				return
+				_ = e.handleSynSegment(ctx, s, &opts)
+				return nil
 			}
 			ctx.synRcvdCount.dec()
 			e.stack.Stats().TCP.ListenOverflowSynDrop.Increment()
 			e.stats.ReceiveErrors.ListenOverflowSynDrop.Increment()
 			e.stack.Stats().DroppedPackets.Increment()
-			return
+			return nil
 		} else {
 			// If cookies are in use but the endpoint accept queue
 			// is full then drop the syn.
@@ -507,10 +564,17 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 				e.stack.Stats().TCP.ListenOverflowSynDrop.Increment()
 				e.stats.ReceiveErrors.ListenOverflowSynDrop.Increment()
 				e.stack.Stats().DroppedPackets.Increment()
-				return
+				return nil
 			}
 			cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS))
 
+			route, err := e.stack.FindRoute(s.nicID, s.dstAddr, s.srcAddr, s.netProto, false /* multicastLoop */)
+			if err != nil {
+				return err
+			}
+			defer route.Release()
+			route.ResolveWith(s.remoteLinkAddr)
+
 			// Send SYN without window scaling because we currently
 			// don't encode this information in the cookie.
 			//
@@ -524,9 +588,9 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 				TS:    opts.TS,
 				TSVal: tcpTimeStamp(time.Now(), timeStampOffset()),
 				TSEcr: opts.TSVal,
-				MSS:   calculateAdvertisedMSS(e.userMSS, s.route),
+				MSS:   calculateAdvertisedMSS(e.userMSS, route),
 			}
-			e.sendSynTCP(&s.route, tcpFields{
+			fields := tcpFields{
 				id:     s.id,
 				ttl:    e.ttl,
 				tos:    e.sendTOS,
@@ -534,8 +598,12 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 				seq:    cookie,
 				ack:    s.sequenceNumber + 1,
 				rcvWnd: ctx.rcvWnd,
-			}, synOpts)
+			}
+			if err := e.sendSynTCP(route, fields, synOpts); err != nil {
+				return err
+			}
 			e.stack.Stats().TCP.ListenOverflowSynCookieSent.Increment()
+			return nil
 		}
 
 	case (s.flags & header.TCPFlagAck) != 0:
@@ -548,7 +616,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 			e.stack.Stats().TCP.ListenOverflowAckDrop.Increment()
 			e.stats.ReceiveErrors.ListenOverflowAckDrop.Increment()
 			e.stack.Stats().DroppedPackets.Increment()
-			return
+			return nil
 		}
 
 		if !ctx.synRcvdCount.synCookiesInUse() {
@@ -567,8 +635,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 			// The only time we should reach here when a connection
 			// was opened and closed really quickly and a delayed
 			// ACK was received from the sender.
-			replyWithReset(s, e.sendTOS, e.ttl)
-			return
+			return replyWithReset(e.stack, s, e.sendTOS, e.ttl)
 		}
 
 		iss := s.ackNumber - 1
@@ -588,7 +655,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 		if !ok || int(data) >= len(mssTable) {
 			e.stack.Stats().TCP.ListenOverflowInvalidSynCookieRcvd.Increment()
 			e.stack.Stats().DroppedPackets.Increment()
-			return
+			return nil
 		}
 		e.stack.Stats().TCP.ListenOverflowSynCookieRcvd.Increment()
 		// Create newly accepted endpoint and deliver it.
@@ -609,7 +676,10 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 			rcvdSynOptions.TSEcr = s.parsedOptions.TSEcr
 		}
 
-		n := ctx.createConnectingEndpoint(s, iss, irs, rcvdSynOptions, &waiter.Queue{})
+		n, err := ctx.createConnectingEndpoint(s, iss, irs, rcvdSynOptions, &waiter.Queue{})
+		if err != nil {
+			return err
+		}
 
 		n.mu.Lock()
 
@@ -623,7 +693,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 
 			e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 			e.stats.FailedConnectionAttempts.Increment()
-			return
+			return nil
 		}
 
 		// Register new endpoint so that packets are routed to it.
@@ -633,7 +703,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 
 			e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 			e.stats.FailedConnectionAttempts.Increment()
-			return
+			return err
 		}
 
 		n.isRegistered = true
@@ -671,14 +741,18 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) {
 		n.startAcceptedLoop()
 		e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()
 		go e.deliverAccepted(n)
+		return nil
+
+	default:
+		return nil
 	}
 }
 
 // protocolListenLoop is the main loop of a listening TCP endpoint. It runs in
 // its own goroutine and is responsible for handling connection requests.
-func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error {
+func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) {
 	e.mu.Lock()
-	v6Only := e.v6only
+	v6Only := e.ops.GetV6Only()
 	ctx := newListenContext(e.stack, e, rcvWnd, v6Only, e.NetProto)
 
 	defer func() {
@@ -687,7 +761,7 @@ func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error {
 		// to the endpoint.
 		e.setEndpointState(StateClose)
 
-		// close any endpoints in SYN-RCVD state.
+		// Close any endpoints in SYN-RCVD state.
 		ctx.closeAllPendingEndpoints()
 
 		// Do cleanup if needed.
@@ -715,12 +789,14 @@ func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error {
 		case wakerForNotification:
 			n := e.fetchNotifications()
 			if n&notifyClose != 0 {
-				return nil
+				return
 			}
 			if n&notifyDrain != 0 {
 				for !e.segmentQueue.empty() {
 					s := e.segmentQueue.dequeue()
-					e.handleListenSegment(ctx, s)
+					// TODO(gvisor.dev/issue/4690): Better handle errors instead of
+					// silently dropping.
+					_ = e.handleListenSegment(ctx, s)
 					s.decRef()
 				}
 				close(e.drainDone)
@@ -739,7 +815,9 @@ func (e *endpoint) protocolListenLoop(rcvWnd seqnum.Size) *tcpip.Error {
 					break
 				}
 
-				e.handleListenSegment(ctx, s)
+				// TODO(gvisor.dev/issue/4690): Better handle errors instead of
+				// silently dropping.
+				_ = e.handleListenSegment(ctx, s)
 				s.decRef()
 			}
 
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 0aaef495d..c944dccc0 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -16,6 +16,7 @@ package tcp
 
 import (
 	"encoding/binary"
+	"math"
 	"time"
 
 	"gvisor.dev/gvisor/pkg/rand"
@@ -102,21 +103,26 @@ type handshake struct {
 	// been received. This is required to stop retransmitting the
 	// original SYN-ACK when deferAccept is enabled.
 	acked bool
+
+	// sendSYNOpts is the cached values for the SYN options to be sent.
+	sendSYNOpts header.TCPSynOptions
 }
 
-func newHandshake(ep *endpoint, rcvWnd seqnum.Size) handshake {
-	h := handshake{
-		ep:          ep,
+func (e *endpoint) newHandshake() *handshake {
+	h := &handshake{
+		ep:          e,
 		active:      true,
-		rcvWnd:      rcvWnd,
-		rcvWndScale: ep.rcvWndScaleForHandshake(),
+		rcvWnd:      seqnum.Size(e.initialReceiveWindow()),
+		rcvWndScale: e.rcvWndScaleForHandshake(),
 	}
 	h.resetState()
+	// Store reference to handshake state in endpoint.
+	e.h = h
 	return h
 }
 
-func newPassiveHandshake(ep *endpoint, rcvWnd seqnum.Size, isn, irs seqnum.Value, opts *header.TCPSynOptions, deferAccept time.Duration) handshake {
-	h := newHandshake(ep, rcvWnd)
+func (e *endpoint) newPassiveHandshake(isn, irs seqnum.Value, opts *header.TCPSynOptions, deferAccept time.Duration) *handshake {
+	h := e.newHandshake()
 	h.resetToSynRcvd(isn, irs, opts, deferAccept)
 	return h
 }
@@ -128,7 +134,7 @@ func FindWndScale(wnd seqnum.Size) int {
 		return 0
 	}
 
-	max := seqnum.Size(0xffff)
+	max := seqnum.Size(math.MaxUint16)
 	s := 0
 	for wnd > max && s < header.MaxWndScale {
 		s++
@@ -293,9 +299,9 @@ func (h *handshake) synSentState(s *segment) *tcpip.Error {
 		MSS:           amss,
 	}
 	if ttl == 0 {
-		ttl = s.route.DefaultTTL()
+		ttl = h.ep.route.DefaultTTL()
 	}
-	h.ep.sendSynTCP(&s.route, tcpFields{
+	h.ep.sendSynTCP(h.ep.route, tcpFields{
 		id:     h.ep.ID,
 		ttl:    ttl,
 		tos:    h.ep.sendTOS,
@@ -356,7 +362,7 @@ func (h *handshake) synRcvdState(s *segment) *tcpip.Error {
 			SACKPermitted: h.ep.sackPermitted,
 			MSS:           h.ep.amss,
 		}
-		h.ep.sendSynTCP(&s.route, tcpFields{
+		h.ep.sendSynTCP(h.ep.route, tcpFields{
 			id:     h.ep.ID,
 			ttl:    h.ep.ttl,
 			tos:    h.ep.sendTOS,
@@ -491,17 +497,20 @@ func (h *handshake) resolveRoute() *tcpip.Error {
 				h.ep.mu.Lock()
 			}
 			if n&notifyError != 0 {
-				return h.ep.LastError()
+				return h.ep.lastErrorLocked()
 			}
 		}
 
 		// Wait for notification.
-		index, _ = s.Fetch(true)
+		h.ep.mu.Unlock()
+		index, _ = s.Fetch(true /* block */)
+		h.ep.mu.Lock()
 	}
 }
 
-// execute executes the TCP 3-way handshake.
-func (h *handshake) execute() *tcpip.Error {
+// start resolves the route if necessary and sends the first
+// SYN/SYN-ACK.
+func (h *handshake) start() *tcpip.Error {
 	if h.ep.route.IsResolutionRequired() {
 		if err := h.resolveRoute(); err != nil {
 			return err
@@ -509,19 +518,7 @@ func (h *handshake) execute() *tcpip.Error {
 	}
 
 	h.startTime = time.Now()
-	// Initialize the resend timer.
-	resendWaker := sleep.Waker{}
-	timeOut := time.Duration(time.Second)
-	rt := time.AfterFunc(timeOut, resendWaker.Assert)
-	defer rt.Stop()
-
-	// Set up the wakers.
-	s := sleep.Sleeper{}
-	s.AddWaker(&resendWaker, wakerForResend)
-	s.AddWaker(&h.ep.notificationWaker, wakerForNotification)
-	s.AddWaker(&h.ep.newSegmentWaker, wakerForNewSegment)
-	defer s.Done()
-
+	h.ep.amss = calculateAdvertisedMSS(h.ep.userMSS, h.ep.route)
 	var sackEnabled tcpip.TCPSACKEnabled
 	if err := h.ep.stack.TransportProtocolOption(ProtocolNumber, &sackEnabled); err != nil {
 		// If stack returned an error when checking for SACKEnabled
@@ -529,10 +526,6 @@ func (h *handshake) execute() *tcpip.Error {
 		sackEnabled = false
 	}
 
-	// Send the initial SYN segment and loop until the handshake is
-	// completed.
-	h.ep.amss = calculateAdvertisedMSS(h.ep.userMSS, h.ep.route)
-
 	synOpts := header.TCPSynOptions{
 		WS:            h.rcvWndScale,
 		TS:            true,
@@ -542,9 +535,8 @@ func (h *handshake) execute() *tcpip.Error {
 		MSS:           h.ep.amss,
 	}
 
-	// Execute is also called in a listen context so we want to make sure we
-	// only send the TS/SACK option when we received the TS/SACK in the
-	// initial SYN.
+	// start() is also called in a listen context so we want to make sure we only
+	// send the TS/SACK option when we received the TS/SACK in the initial SYN.
 	if h.state == handshakeSynRcvd {
 		synOpts.TS = h.ep.sendTSOk
 		synOpts.SACKPermitted = h.ep.sackPermitted && bool(sackEnabled)
@@ -555,7 +547,8 @@ func (h *handshake) execute() *tcpip.Error {
 		}
 	}
 
-	h.ep.sendSynTCP(&h.ep.route, tcpFields{
+	h.sendSYNOpts = synOpts
+	h.ep.sendSynTCP(h.ep.route, tcpFields{
 		id:     h.ep.ID,
 		ttl:    h.ep.ttl,
 		tos:    h.ep.sendTOS,
@@ -564,19 +557,37 @@ func (h *handshake) execute() *tcpip.Error {
 		ack:    h.ackNum,
 		rcvWnd: h.rcvWnd,
 	}, synOpts)
+	return nil
+}
+
+// complete completes the TCP 3-way handshake initiated by h.start().
+func (h *handshake) complete() *tcpip.Error {
+	// Set up the wakers.
+	s := sleep.Sleeper{}
+	resendWaker := sleep.Waker{}
+	s.AddWaker(&resendWaker, wakerForResend)
+	s.AddWaker(&h.ep.notificationWaker, wakerForNotification)
+	s.AddWaker(&h.ep.newSegmentWaker, wakerForNewSegment)
+	defer s.Done()
 
+	// Initialize the resend timer.
+	timer, err := newBackoffTimer(time.Second, MaxRTO, resendWaker.Assert)
+	if err != nil {
+		return err
+	}
+	defer timer.stop()
 	for h.state != handshakeCompleted {
+		// Unlock before blocking, and reacquire again afterwards (h.ep.mu is held
+		// throughout handshake processing).
 		h.ep.mu.Unlock()
-		index, _ := s.Fetch(true)
+		index, _ := s.Fetch(true /* block */)
 		h.ep.mu.Lock()
 		switch index {
 
 		case wakerForResend:
-			timeOut *= 2
-			if timeOut > MaxRTO {
-				return tcpip.ErrTimeout
+			if err := timer.reset(); err != nil {
+				return err
 			}
-			rt.Reset(timeOut)
 			// Resend the SYN/SYN-ACK only if the following conditions hold.
 			//  - It's an active handshake (deferAccept does not apply)
 			//  - It's a passive handshake and we have not yet got the final-ACK.
@@ -586,7 +597,7 @@ func (h *handshake) execute() *tcpip.Error {
 			// the connection with another ACK or data (as ACKs are never
 			// retransmitted on their own).
 			if h.active || !h.acked || h.deferAccept != 0 && time.Since(h.startTime) > h.deferAccept {
-				h.ep.sendSynTCP(&h.ep.route, tcpFields{
+				h.ep.sendSynTCP(h.ep.route, tcpFields{
 					id:     h.ep.ID,
 					ttl:    h.ep.ttl,
 					tos:    h.ep.sendTOS,
@@ -594,7 +605,7 @@ func (h *handshake) execute() *tcpip.Error {
 					seq:    h.iss,
 					ack:    h.ackNum,
 					rcvWnd: h.rcvWnd,
-				}, synOpts)
+				}, h.sendSYNOpts)
 			}
 
 		case wakerForNotification:
@@ -620,9 +631,8 @@ func (h *handshake) execute() *tcpip.Error {
 				h.ep.mu.Lock()
 			}
 			if n&notifyError != 0 {
-				return h.ep.LastError()
+				return h.ep.lastErrorLocked()
 			}
-
 		case wakerForNewSegment:
 			if err := h.processSegments(); err != nil {
 				return err
@@ -633,6 +643,34 @@ func (h *handshake) execute() *tcpip.Error {
 	return nil
 }
 
+type backoffTimer struct {
+	timeout    time.Duration
+	maxTimeout time.Duration
+	t          *time.Timer
+}
+
+func newBackoffTimer(timeout, maxTimeout time.Duration, f func()) (*backoffTimer, *tcpip.Error) {
+	if timeout > maxTimeout {
+		return nil, tcpip.ErrTimeout
+	}
+	bt := &backoffTimer{timeout: timeout, maxTimeout: maxTimeout}
+	bt.t = time.AfterFunc(timeout, f)
+	return bt, nil
+}
+
+func (bt *backoffTimer) reset() *tcpip.Error {
+	bt.timeout *= 2
+	if bt.timeout > MaxRTO {
+		return tcpip.ErrTimeout
+	}
+	bt.t.Reset(bt.timeout)
+	return nil
+}
+
+func (bt *backoffTimer) stop() {
+	bt.t.Stop()
+}
+
 func parseSynSegmentOptions(s *segment) header.TCPSynOptions {
 	synOpts := header.ParseSynOptions(s.options, s.flagIsSet(header.TCPFlagAck))
 	if synOpts.TS {
@@ -767,7 +805,7 @@ func buildTCPHdr(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso *sta
 		// TCP header, then the kernel calculate a checksum of the
 		// header and data and get the right sum of the TCP packet.
 		tcp.SetChecksum(xsum)
-	} else if r.Capabilities()&stack.CapabilityTXChecksumOffload == 0 {
+	} else if r.RequiresTXTransportChecksum() {
 		xsum = header.ChecksumVV(pkt.Data, xsum)
 		tcp.SetChecksum(^tcp.CalculateChecksum(xsum))
 	}
@@ -781,8 +819,8 @@ func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso
 	data = data.Clone(nil)
 
 	optLen := len(tf.opts)
-	if tf.rcvWnd > 0xffff {
-		tf.rcvWnd = 0xffff
+	if tf.rcvWnd > math.MaxUint16 {
+		tf.rcvWnd = math.MaxUint16
 	}
 
 	mss := int(gso.MSS)
@@ -826,8 +864,8 @@ func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso
 // network endpoint and under the provided identity.
 func sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stack.GSO, owner tcpip.PacketOwner) *tcpip.Error {
 	optLen := len(tf.opts)
-	if tf.rcvWnd > 0xffff {
-		tf.rcvWnd = 0xffff
+	if tf.rcvWnd > math.MaxUint16 {
+		tf.rcvWnd = math.MaxUint16
 	}
 
 	if r.Loop&stack.PacketLoop == 0 && gso != nil && gso.Type == stack.GSOSW && int(gso.MSS) < data.Size() {
@@ -902,7 +940,7 @@ func (e *endpoint) sendRaw(data buffer.VectorisedView, flags byte, seq, ack seqn
 		sackBlocks = e.sack.Blocks[:e.sack.NumBlocks]
 	}
 	options := e.makeOptions(sackBlocks)
-	err := e.sendTCP(&e.route, tcpFields{
+	err := e.sendTCP(e.route, tcpFields{
 		id:     e.ID,
 		ttl:    e.ttl,
 		tos:    e.sendTOS,
@@ -963,7 +1001,7 @@ func (e *endpoint) resetConnectionLocked(err *tcpip.Error) {
 	// Only send a reset if the connection is being aborted for a reason
 	// other than receiving a reset.
 	e.setEndpointState(StateError)
-	e.HardError = err
+	e.hardError = err
 	if err != tcpip.ErrConnectionReset && err != tcpip.ErrTimeout {
 		// The exact sequence number to be used for the RST is the same as the
 		// one used by Linux. We need to handle the case of window being shrunk
@@ -1040,13 +1078,13 @@ func (e *endpoint) transitionToStateCloseLocked() {
 // only when the endpoint is in StateClose and we want to deliver the segment
 // to any other listening endpoint. We reply with RST if we cannot find one.
 func (e *endpoint) tryDeliverSegmentFromClosedEndpoint(s *segment) {
-	ep := e.stack.FindTransportEndpoint(e.NetProto, e.TransProto, e.ID, &s.route)
-	if ep == nil && e.NetProto == header.IPv6ProtocolNumber && e.EndpointInfo.TransportEndpointInfo.ID.LocalAddress.To4() != "" {
+	ep := e.stack.FindTransportEndpoint(e.NetProto, e.TransProto, e.ID, s.nicID)
+	if ep == nil && e.NetProto == header.IPv6ProtocolNumber && e.TransportEndpointInfo.ID.LocalAddress.To4() != "" {
 		// Dual-stack socket, try IPv4.
-		ep = e.stack.FindTransportEndpoint(header.IPv4ProtocolNumber, e.TransProto, e.ID, &s.route)
+		ep = e.stack.FindTransportEndpoint(header.IPv4ProtocolNumber, e.TransProto, e.ID, s.nicID)
 	}
 	if ep == nil {
-		replyWithReset(s, stack.DefaultTOS, s.route.DefaultTTL())
+		replyWithReset(e.stack, s, stack.DefaultTOS, 0 /* ttl */)
 		s.decRef()
 		return
 	}
@@ -1102,7 +1140,7 @@ func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) {
 		//  delete the TCB, and return.
 		case StateCloseWait:
 			e.transitionToStateCloseLocked()
-			e.HardError = tcpip.ErrAborted
+			e.hardError = tcpip.ErrAborted
 			e.notifyProtocolGoroutine(notifyTickleWorker)
 			return false, nil
 		default:
@@ -1247,7 +1285,7 @@ func (e *endpoint) keepaliveTimerExpired() *tcpip.Error {
 	userTimeout := e.userTimeout
 
 	e.keepalive.Lock()
-	if !e.keepalive.enabled || !e.keepalive.timer.checkExpiration() {
+	if !e.SocketOptions().GetKeepAlive() || !e.keepalive.timer.checkExpiration() {
 		e.keepalive.Unlock()
 		return nil
 	}
@@ -1284,7 +1322,7 @@ func (e *endpoint) resetKeepaliveTimer(receivedData bool) {
 	}
 	// Start the keepalive timer IFF it's enabled and there is no pending
 	// data to send.
-	if !e.keepalive.enabled || e.snd == nil || e.snd.sndUna != e.snd.sndNxt {
+	if !e.SocketOptions().GetKeepAlive() || e.snd == nil || e.snd.sndUna != e.snd.sndNxt {
 		e.keepalive.timer.disable()
 		e.keepalive.Unlock()
 		return
@@ -1314,7 +1352,6 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 
 	epilogue := func() {
 		// e.mu is expected to be hold upon entering this section.
-
 		if e.snd != nil {
 			e.snd.resendTimer.cleanup()
 		}
@@ -1338,20 +1375,13 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 	}
 
 	if handshake {
-		// This is an active connection, so we must initiate the 3-way
-		// handshake, and then inform potential waiters about its
-		// completion.
-		initialRcvWnd := e.initialReceiveWindow()
-		h := newHandshake(e, seqnum.Size(initialRcvWnd))
-		h.ep.setEndpointState(StateSynSent)
-
-		if err := h.execute(); err != nil {
+		if err := e.h.complete(); err != nil {
 			e.lastErrorMu.Lock()
 			e.lastError = err
 			e.lastErrorMu.Unlock()
 
 			e.setEndpointState(StateError)
-			e.HardError = err
+			e.hardError = err
 
 			e.workerCleanup = true
 			// Lock released below.
@@ -1360,13 +1390,12 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 		}
 	}
 
-	e.keepalive.timer.init(&e.keepalive.waker)
-	defer e.keepalive.timer.cleanup()
-
 	drained := e.drainDone != nil
 	if drained {
 		close(e.drainDone)
+		e.mu.Unlock()
 		<-e.undrain
+		e.mu.Lock()
 	}
 
 	// Set up the functions that will be called when the main protocol loop
@@ -1445,7 +1474,6 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
 						// The socket has been closed and we are in FIN_WAIT2
 						// so start the FIN_WAIT2 timer.
 						closeTimer = time.AfterFunc(e.tcpLingerTimeout, closeWaker.Assert)
-						e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
 					}
 				}
 
@@ -1535,7 +1563,7 @@ loop:
 		}
 
 		e.mu.Unlock()
-		v, _ := s.Fetch(true)
+		v, _ := s.Fetch(true /* block */)
 		e.mu.Lock()
 
 		// We need to double check here because the notification may be
@@ -1608,7 +1636,7 @@ func (e *endpoint) handleTimeWaitSegments() (extendTimeWait bool, reuseTW func()
 		}
 		extTW, newSyn := e.rcv.handleTimeWaitSegment(s)
 		if newSyn {
-			info := e.EndpointInfo.TransportEndpointInfo
+			info := e.TransportEndpointInfo
 			newID := info.ID
 			newID.RemoteAddress = ""
 			newID.RemotePort = 0
@@ -1620,7 +1648,7 @@ func (e *endpoint) handleTimeWaitSegments() (extendTimeWait bool, reuseTW func()
 				netProtos = []tcpip.NetworkProtocolNumber{header.IPv4ProtocolNumber, header.IPv6ProtocolNumber}
 			}
 			for _, netProto := range netProtos {
-				if listenEP := e.stack.FindTransportEndpoint(netProto, info.TransProto, newID, &s.route); listenEP != nil {
+				if listenEP := e.stack.FindTransportEndpoint(netProto, info.TransProto, newID, s.nicID); listenEP != nil {
 					tcpEP := listenEP.(*endpoint)
 					if EndpointState(tcpEP.State()) == StateListen {
 						reuseTW = func() {
@@ -1683,7 +1711,7 @@ func (e *endpoint) doTimeWait() (twReuse func()) {
 
 	for {
 		e.mu.Unlock()
-		v, _ := s.Fetch(true)
+		v, _ := s.Fetch(true /* block */)
 		e.mu.Lock()
 		switch v {
 		case newSegment:
diff --git a/pkg/tcpip/transport/tcp/dispatcher.go b/pkg/tcpip/transport/tcp/dispatcher.go
index 98aecab9e..21162f01a 100644
--- a/pkg/tcpip/transport/tcp/dispatcher.go
+++ b/pkg/tcpip/transport/tcp/dispatcher.go
@@ -172,10 +172,11 @@ func (d *dispatcher) wait() {
 	d.wg.Wait()
 }
 
-func (d *dispatcher) queuePacket(r *stack.Route, stackEP stack.TransportEndpoint, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
+func (d *dispatcher) queuePacket(stackEP stack.TransportEndpoint, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
 	ep := stackEP.(*endpoint)
-	s := newSegment(r, id, pkt)
-	if !s.parse() {
+
+	s := newIncomingSegment(id, pkt)
+	if !s.parse(pkt.RXTransportChecksumValidated) {
 		ep.stack.Stats().MalformedRcvdPackets.Increment()
 		ep.stack.Stats().TCP.InvalidSegmentsReceived.Increment()
 		ep.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
diff --git a/pkg/tcpip/transport/tcp/dual_stack_test.go b/pkg/tcpip/transport/tcp/dual_stack_test.go
index 560b4904c..1d1b01a6c 100644
--- a/pkg/tcpip/transport/tcp/dual_stack_test.go
+++ b/pkg/tcpip/transport/tcp/dual_stack_test.go
@@ -236,6 +236,25 @@ func TestV6ConnectWhenBoundToWildcard(t *testing.T) {
 	testV6Connect(t, c)
 }
 
+func TestStackV6OnlyConnectWhenBoundToWildcard(t *testing.T) {
+	c := context.NewWithOpts(t, context.Options{
+		EnableV6: true,
+		MTU:      defaultMTU,
+	})
+	defer c.Cleanup()
+
+	// Create a v6 endpoint but don't set the v6-only TCP option.
+	c.CreateV6Endpoint(false)
+
+	// Bind to wildcard.
+	if err := c.EP.Bind(tcpip.FullAddress{}); err != nil {
+		t.Fatalf("Bind failed: %v", err)
+	}
+
+	// Test the connection request.
+	testV6Connect(t, c)
+}
+
 func TestV6ConnectWhenBoundToLocalAddress(t *testing.T) {
 	c := context.New(t, defaultMTU)
 	defer c.Cleanup()
@@ -386,14 +405,6 @@ func testV4Accept(t *testing.T, c *context.Context) {
 		}
 	}
 
-	// Make sure we get the same error when calling the original ep and the
-	// new one. This validates that v4-mapped endpoints are still able to
-	// query the V6Only flag, whereas pure v4 endpoints are not.
-	_, expected := c.EP.GetSockOptBool(tcpip.V6OnlyOption)
-	if _, err := nep.GetSockOptBool(tcpip.V6OnlyOption); err != expected {
-		t.Fatalf("GetSockOpt returned unexpected value: got %v, want %v", err, expected)
-	}
-
 	// Check the peer address.
 	addr, err := nep.GetRemoteAddress()
 	if err != nil {
@@ -511,12 +522,12 @@ func TestV6AcceptOnV6(t *testing.T) {
 	c.WQ.EventRegister(&we, waiter.EventIn)
 	defer c.WQ.EventUnregister(&we)
 	var addr tcpip.FullAddress
-	nep, _, err := c.EP.Accept(&addr)
+	_, _, err := c.EP.Accept(&addr)
 	if err == tcpip.ErrWouldBlock {
 		// Wait for connection to be established.
 		select {
 		case <-ch:
-			nep, _, err = c.EP.Accept(&addr)
+			_, _, err = c.EP.Accept(&addr)
 			if err != nil {
 				t.Fatalf("Accept failed: %v", err)
 			}
@@ -529,12 +540,6 @@ func TestV6AcceptOnV6(t *testing.T) {
 	if addr.Addr != context.TestV6Addr {
 		t.Errorf("Unexpected remote address: got %s, want %s", addr.Addr, context.TestV6Addr)
 	}
-
-	// Make sure we can still query the v6 only status of the new endpoint,
-	// that is, that it is in fact a v6 socket.
-	if _, err := nep.GetSockOptBool(tcpip.V6OnlyOption); err != nil {
-		t.Errorf("GetSockOptBool(tcpip.V6OnlyOption) failed: %s", err)
-	}
 }
 
 func TestV4AcceptOnV4(t *testing.T) {
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 3bcd3923a..7a37c10bb 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -310,16 +310,12 @@ type Stats struct {
 func (*Stats) IsEndpointStats() {}
 
 // EndpointInfo holds useful information about a transport endpoint which
-// can be queried by monitoring tools.
+// can be queried by monitoring tools. This exists to allow tcp-only state to
+// be exposed.
 //
 // +stateify savable
 type EndpointInfo struct {
 	stack.TransportEndpointInfo
-
-	// HardError is meaningful only when state is stateError. It stores the
-	// error to be returned when read/write syscalls are called and the
-	// endpoint is in this state. HardError is protected by endpoint mu.
-	HardError *tcpip.Error `state:".(string)"`
 }
 
 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo
@@ -367,6 +363,7 @@ func (*EndpointInfo) IsEndpointInfo() {}
 // +stateify savable
 type endpoint struct {
 	EndpointInfo
+	tcpip.DefaultSocketOptionsHandler
 
 	// endpointEntry is used to queue endpoints for processing to the
 	// a given tcp processor goroutine.
@@ -386,6 +383,11 @@ type endpoint struct {
 	waiterQueue *waiter.Queue `state:"wait"`
 	uniqueID    uint64
 
+	// hardError is meaningful only when state is stateError. It stores the
+	// error to be returned when read/write syscalls are called and the
+	// endpoint is in this state. hardError is protected by endpoint mu.
+	hardError *tcpip.Error `state:".(string)"`
+
 	// lastError represents the last error that the endpoint reported;
 	// access to it is protected by the following mutex.
 	lastErrorMu sync.Mutex   `state:"nosave"`
@@ -421,7 +423,10 @@ type endpoint struct {
 
 	// mu protects all endpoint fields unless documented otherwise. mu must
 	// be acquired before interacting with the endpoint fields.
-	mu          sync.Mutex `state:"nosave"`
+	//
+	// During handshake, mu is locked by the protocol listen goroutine and
+	// released by the handshake completion goroutine.
+	mu          sync.CrossGoroutineMutex `state:"nosave"`
 	ownedByUser uint32
 
 	// state must be read/set using the EndpointState()/setEndpointState()
@@ -436,13 +441,14 @@ type endpoint struct {
 	isPortReserved    bool `state:"manual"`
 	isRegistered      bool `state:"manual"`
 	boundNICID        tcpip.NICID
-	route             stack.Route `state:"manual"`
+	route             *stack.Route `state:"manual"`
 	ttl               uint8
-	v6only            bool
 	isConnectNotified bool
-	// TCP should never broadcast but Linux nevertheless supports enabling/
-	// disabling SO_BROADCAST, albeit as a NOOP.
-	broadcast bool
+
+	// h stores a reference to the current handshake state if the endpoint is in
+	// the SYN-SENT or SYN-RECV states, in which case endpoint == endpoint.h.ep.
+	// nil otherwise.
+	h *handshake `state:"nosave"`
 
 	// portFlags stores the current values of port related flags.
 	portFlags ports.Flags
@@ -504,24 +510,9 @@ type endpoint struct {
 	// delay is a boolean (0 is false) and must be accessed atomically.
 	delay uint32
 
-	// cork holds back segments until full.
-	//
-	// cork is a boolean (0 is false) and must be accessed atomically.
-	cork uint32
-
 	// scoreboard holds TCP SACK Scoreboard information for this endpoint.
 	scoreboard *SACKScoreboard
 
-	// The options below aren't implemented, but we remember the user
-	// settings because applications expect to be able to set/query these
-	// options.
-
-	// slowAck holds the negated state of quick ack. It is stubbed out and
-	// does nothing.
-	//
-	// slowAck is a boolean (0 is false) and must be accessed atomically.
-	slowAck uint32
-
 	// segmentQueue is used to hand received segments to the protocol
 	// goroutine. Segments are queued as long as the queue is not full,
 	// and dropped when it is.
@@ -683,8 +674,8 @@ type endpoint struct {
 	// owner is used to get uid and gid of the packet.
 	owner tcpip.PacketOwner
 
-	// linger is used for SO_LINGER socket option.
-	linger tcpip.LingerOption
+	// ops is used to get socket level options.
+	ops tcpip.SocketOptions
 }
 
 // UniqueID implements stack.TransportEndpoint.UniqueID.
@@ -696,7 +687,7 @@ func (e *endpoint) UniqueID() uint64 {
 //
 // If userMSS is non-zero and is not greater than the maximum possible MSS for
 // r, it will be used; otherwise, the maximum possible MSS will be used.
-func calculateAdvertisedMSS(userMSS uint16, r stack.Route) uint16 {
+func calculateAdvertisedMSS(userMSS uint16, r *stack.Route) uint16 {
 	// The maximum possible MSS is dependent on the route.
 	// TODO(b/143359391): Respect TCP Min and Max size.
 	maxMSS := uint16(r.MTU() - header.TCPMinimumSize)
@@ -721,9 +712,9 @@ func (e *endpoint) LockUser() {
 	for {
 		// Try first if the sock is locked then check if it's owned
 		// by another user goroutine if not then we spin, otherwise
-		// we just goto sleep on the Lock() and wait.
+		// we just go to sleep on the Lock() and wait.
 		if !e.mu.TryLock() {
-			// If socket is owned by the user then just goto sleep
+			// If socket is owned by the user then just go to sleep
 			// as the lock could be held for a reasonably long time.
 			if atomic.LoadUint32(&e.ownedByUser) == 1 {
 				e.mu.Lock()
@@ -845,7 +836,6 @@ func (e *endpoint) recentTimestamp() uint32 {
 // +stateify savable
 type keepalive struct {
 	sync.Mutex `state:"nosave"`
-	enabled    bool
 	idle       time.Duration
 	interval   time.Duration
 	count      int
@@ -879,6 +869,9 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
 		windowClamp:   DefaultReceiveBufferSize,
 		maxSynRetries: DefaultSynRetries,
 	}
+	e.ops.InitHandler(e)
+	e.ops.SetMulticastLoop(true)
+	e.ops.SetQuickAck(true)
 
 	var ss tcpip.TCPSendBufferSizeRangeOption
 	if err := s.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
@@ -902,7 +895,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
 
 	var de tcpip.TCPDelayEnabled
 	if err := s.TransportProtocolOption(ProtocolNumber, &de); err == nil && de {
-		e.SetSockOptBool(tcpip.DelayOption, true)
+		e.ops.SetDelayOption(true)
 	}
 
 	var tcpLT tcpip.TCPLingerTimeoutOption
@@ -922,6 +915,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
 	e.segmentQueue.ep = e
 	e.tsOffset = timeStampOffset()
 	e.acceptCond = sync.NewCond(&e.acceptMu)
+	e.keepalive.timer.init(&e.keepalive.waker)
 
 	return e
 }
@@ -1043,7 +1037,8 @@ func (e *endpoint) Close() {
 		return
 	}
 
-	if e.linger.Enabled && e.linger.Timeout == 0 {
+	linger := e.SocketOptions().GetLinger()
+	if linger.Enabled && linger.Timeout == 0 {
 		s := e.EndpointState()
 		isResetState := s == StateEstablished || s == StateCloseWait || s == StateFinWait1 || s == StateFinWait2 || s == StateSynRecv
 		if isResetState {
@@ -1069,9 +1064,7 @@ func (e *endpoint) Close() {
 	e.closeNoShutdownLocked()
 }
 
-// closeNoShutdown closes the endpoint without doing a full shutdown. This is
-// used when a connection needs to be aborted with a RST and we want to skip
-// a full 4 way TCP shutdown.
+// closeNoShutdown closes the endpoint without doing a full shutdown.
 func (e *endpoint) closeNoShutdownLocked() {
 	// For listening sockets, we always release ports inline so that they
 	// are immediately available for reuse after Close() is called. If also
@@ -1098,6 +1091,7 @@ func (e *endpoint) closeNoShutdownLocked() {
 		return
 	}
 
+	eventMask := waiter.EventIn | waiter.EventOut
 	// Either perform the local cleanup or kick the worker to make sure it
 	// knows it needs to cleanup.
 	if e.workerRunning {
@@ -1109,8 +1103,12 @@ func (e *endpoint) closeNoShutdownLocked() {
 	} else {
 		e.transitionToStateCloseLocked()
 		// Notify that the endpoint is closed.
-		e.waiterQueue.Notify(waiter.EventHUp)
+		eventMask |= waiter.EventHUp
 	}
+
+	// The TCP closing state-machine would eventually notify EventHUp, but we
+	// notify EventIn|EventOut immediately to unblock any blocked waiters.
+	e.waiterQueue.Notify(eventMask)
 }
 
 // closePendingAcceptableConnections closes all connections that have completed
@@ -1143,6 +1141,7 @@ func (e *endpoint) cleanupLocked() {
 	// Close all endpoints that might have been accepted by TCP but not by
 	// the client.
 	e.closePendingAcceptableConnectionsLocked()
+	e.keepalive.timer.cleanup()
 
 	e.workerCleanup = false
 
@@ -1159,7 +1158,11 @@ func (e *endpoint) cleanupLocked() {
 	e.boundPortFlags = ports.Flags{}
 	e.boundDest = tcpip.FullAddress{}
 
-	e.route.Release()
+	if e.route != nil {
+		e.route.Release()
+		e.route = nil
+	}
+
 	e.stack.CompleteTransportEndpointCleanup(e)
 	tcpip.DeleteDanglingEndpoint(e)
 }
@@ -1269,11 +1272,20 @@ func (e *endpoint) ModerateRecvBuf(copied int) {
 	e.rcvListMu.Unlock()
 }
 
+// SetOwner implements tcpip.Endpoint.SetOwner.
 func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
 	e.owner = owner
 }
 
-func (e *endpoint) LastError() *tcpip.Error {
+// Preconditions: e.mu must be held to call this function.
+func (e *endpoint) hardErrorLocked() *tcpip.Error {
+	err := e.hardError
+	e.hardError = nil
+	return err
+}
+
+// Preconditions: e.mu must be held to call this function.
+func (e *endpoint) lastErrorLocked() *tcpip.Error {
 	e.lastErrorMu.Lock()
 	defer e.lastErrorMu.Unlock()
 	err := e.lastError
@@ -1281,6 +1293,16 @@ func (e *endpoint) LastError() *tcpip.Error {
 	return err
 }
 
+// LastError implements tcpip.Endpoint.LastError.
+func (e *endpoint) LastError() *tcpip.Error {
+	e.LockUser()
+	defer e.UnlockUser()
+	if err := e.hardErrorLocked(); err != nil {
+		return err
+	}
+	return e.lastErrorLocked()
+}
+
 // Read reads data from the endpoint.
 func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
 	e.LockUser()
@@ -1302,9 +1324,11 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages,
 	bufUsed := e.rcvBufUsed
 	if s := e.EndpointState(); !s.connected() && s != StateClose && bufUsed == 0 {
 		e.rcvListMu.Unlock()
-		he := e.HardError
 		if s == StateError {
-			return buffer.View{}, tcpip.ControlMessages{}, he
+			if err := e.hardErrorLocked(); err != nil {
+				return buffer.View{}, tcpip.ControlMessages{}, err
+			}
+			return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrClosedForReceive
 		}
 		e.stats.ReadErrors.NotConnected.Increment()
 		return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrNotConnected
@@ -1360,9 +1384,13 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) {
 // indicating the reason why it's not writable.
 // Caller must hold e.mu and e.sndBufMu
 func (e *endpoint) isEndpointWritableLocked() (int, *tcpip.Error) {
+	// The endpoint cannot be written to if it's not connected.
 	switch s := e.EndpointState(); {
 	case s == StateError:
-		return 0, e.HardError
+		if err := e.hardErrorLocked(); err != nil {
+			return 0, err
+		}
+		return 0, tcpip.ErrClosedForSend
 	case !s.connecting() && !s.connected():
 		return 0, tcpip.ErrClosedForSend
 	case s.connecting():
@@ -1425,7 +1453,7 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 
 	queueAndSend := func() (int64, <-chan struct{}, *tcpip.Error) {
 		// Add data to the send queue.
-		s := newSegmentFromView(&e.route, e.ID, v)
+		s := newOutgoingSegment(e.ID, v)
 		e.sndBufUsed += len(v)
 		e.sndBufInQueue += seqnum.Size(len(v))
 		e.sndQueue.PushBack(s)
@@ -1468,7 +1496,7 @@ func (e *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 // Peek reads data without consuming it from the endpoint.
 //
 // This method does not block if there is no data pending.
-func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
+func (e *endpoint) Peek(vec [][]byte) (int64, *tcpip.Error) {
 	e.LockUser()
 	defer e.UnlockUser()
 
@@ -1476,10 +1504,10 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro
 	// but has some pending unread data.
 	if s := e.EndpointState(); !s.connected() && s != StateClose {
 		if s == StateError {
-			return 0, tcpip.ControlMessages{}, e.HardError
+			return 0, e.hardErrorLocked()
 		}
 		e.stats.ReadErrors.InvalidEndpointState.Increment()
-		return 0, tcpip.ControlMessages{}, tcpip.ErrInvalidEndpointState
+		return 0, tcpip.ErrInvalidEndpointState
 	}
 
 	e.rcvListMu.Lock()
@@ -1488,9 +1516,9 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro
 	if e.rcvBufUsed == 0 {
 		if e.rcvClosed || !e.EndpointState().connected() {
 			e.stats.ReadErrors.ReadClosed.Increment()
-			return 0, tcpip.ControlMessages{}, tcpip.ErrClosedForReceive
+			return 0, tcpip.ErrClosedForReceive
 		}
-		return 0, tcpip.ControlMessages{}, tcpip.ErrWouldBlock
+		return 0, tcpip.ErrWouldBlock
 	}
 
 	// Make a copy of vec so we can modify the slide headers.
@@ -1505,7 +1533,7 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro
 
 			for len(v) > 0 {
 				if len(vec) == 0 {
-					return num, tcpip.ControlMessages{}, nil
+					return num, nil
 				}
 				if len(vec[0]) == 0 {
 					vec = vec[1:]
@@ -1520,7 +1548,7 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro
 		}
 	}
 
-	return num, tcpip.ControlMessages{}, nil
+	return num, nil
 }
 
 // selectWindowLocked returns the new window without checking for shrinking or scaling
@@ -1592,77 +1620,39 @@ func (e *endpoint) windowCrossedACKThresholdLocked(deltaBefore int) (crossed boo
 	return false, false
 }
 
-// SetSockOptBool sets a socket option.
-func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
-	switch opt {
-
-	case tcpip.BroadcastOption:
-		e.LockUser()
-		e.broadcast = v
-		e.UnlockUser()
-
-	case tcpip.CorkOption:
-		e.LockUser()
-		if !v {
-			atomic.StoreUint32(&e.cork, 0)
-
-			// Handle the corked data.
-			e.sndWaker.Assert()
-		} else {
-			atomic.StoreUint32(&e.cork, 1)
-		}
-		e.UnlockUser()
-
-	case tcpip.DelayOption:
-		if v {
-			atomic.StoreUint32(&e.delay, 1)
-		} else {
-			atomic.StoreUint32(&e.delay, 0)
-
-			// Handle delayed data.
-			e.sndWaker.Assert()
-		}
-
-	case tcpip.KeepaliveEnabledOption:
-		e.keepalive.Lock()
-		e.keepalive.enabled = v
-		e.keepalive.Unlock()
-		e.notifyProtocolGoroutine(notifyKeepaliveChanged)
-
-	case tcpip.QuickAckOption:
-		o := uint32(1)
-		if v {
-			o = 0
-		}
-		atomic.StoreUint32(&e.slowAck, o)
-
-	case tcpip.ReuseAddressOption:
-		e.LockUser()
-		e.portFlags.TupleOnly = v
-		e.UnlockUser()
-
-	case tcpip.ReusePortOption:
-		e.LockUser()
-		e.portFlags.LoadBalanced = v
-		e.UnlockUser()
+// OnReuseAddressSet implements tcpip.SocketOptionsHandler.OnReuseAddressSet.
+func (e *endpoint) OnReuseAddressSet(v bool) {
+	e.LockUser()
+	e.portFlags.TupleOnly = v
+	e.UnlockUser()
+}
 
-	case tcpip.V6OnlyOption:
-		// We only recognize this option on v6 endpoints.
-		if e.NetProto != header.IPv6ProtocolNumber {
-			return tcpip.ErrInvalidEndpointState
-		}
+// OnReusePortSet implements tcpip.SocketOptionsHandler.OnReusePortSet.
+func (e *endpoint) OnReusePortSet(v bool) {
+	e.LockUser()
+	e.portFlags.LoadBalanced = v
+	e.UnlockUser()
+}
 
-		// We only allow this to be set when we're in the initial state.
-		if e.EndpointState() != StateInitial {
-			return tcpip.ErrInvalidEndpointState
-		}
+// OnKeepAliveSet implements tcpip.SocketOptionsHandler.OnKeepAliveSet.
+func (e *endpoint) OnKeepAliveSet(v bool) {
+	e.notifyProtocolGoroutine(notifyKeepaliveChanged)
+}
 
-		e.LockUser()
-		e.v6only = v
-		e.UnlockUser()
+// OnDelayOptionSet implements tcpip.SocketOptionsHandler.OnDelayOptionSet.
+func (e *endpoint) OnDelayOptionSet(v bool) {
+	if !v {
+		// Handle delayed data.
+		e.sndWaker.Assert()
 	}
+}
 
-	return nil
+// OnCorkOptionSet implements tcpip.SocketOptionsHandler.OnCorkOptionSet.
+func (e *endpoint) OnCorkOptionSet(v bool) {
+	if !v {
+		// Handle the corked data.
+		e.sndWaker.Assert()
+	}
 }
 
 // SetSockOptInt sets a socket option.
@@ -1846,9 +1836,6 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 		e.keepalive.Unlock()
 		e.notifyProtocolGoroutine(notifyKeepaliveChanged)
 
-	case *tcpip.OutOfBandInlineOption:
-		// We don't currently support disabling this option.
-
 	case *tcpip.TCPUserTimeoutOption:
 		e.LockUser()
 		e.userTimeout = time.Duration(*v)
@@ -1917,11 +1904,6 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 	case *tcpip.SocketDetachFilterOption:
 		return nil
 
-	case *tcpip.LingerOption:
-		e.LockUser()
-		e.linger = *v
-		e.UnlockUser()
-
 	default:
 		return nil
 	}
@@ -1944,66 +1926,6 @@ func (e *endpoint) readyReceiveSize() (int, *tcpip.Error) {
 	return e.rcvBufUsed, nil
 }
 
-// GetSockOptBool implements tcpip.Endpoint.GetSockOptBool.
-func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
-	switch opt {
-	case tcpip.BroadcastOption:
-		e.LockUser()
-		v := e.broadcast
-		e.UnlockUser()
-		return v, nil
-
-	case tcpip.CorkOption:
-		return atomic.LoadUint32(&e.cork) != 0, nil
-
-	case tcpip.DelayOption:
-		return atomic.LoadUint32(&e.delay) != 0, nil
-
-	case tcpip.KeepaliveEnabledOption:
-		e.keepalive.Lock()
-		v := e.keepalive.enabled
-		e.keepalive.Unlock()
-
-		return v, nil
-
-	case tcpip.QuickAckOption:
-		v := atomic.LoadUint32(&e.slowAck) == 0
-		return v, nil
-
-	case tcpip.ReuseAddressOption:
-		e.LockUser()
-		v := e.portFlags.TupleOnly
-		e.UnlockUser()
-
-		return v, nil
-
-	case tcpip.ReusePortOption:
-		e.LockUser()
-		v := e.portFlags.LoadBalanced
-		e.UnlockUser()
-
-		return v, nil
-
-	case tcpip.V6OnlyOption:
-		// We only recognize this option on v6 endpoints.
-		if e.NetProto != header.IPv6ProtocolNumber {
-			return false, tcpip.ErrUnknownProtocolOption
-		}
-
-		e.LockUser()
-		v := e.v6only
-		e.UnlockUser()
-
-		return v, nil
-
-	case tcpip.MulticastLoopOption:
-		return true, nil
-
-	default:
-		return false, tcpip.ErrUnknownProtocolOption
-	}
-}
-
 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
 func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 	switch opt {
@@ -2114,10 +2036,6 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
 		*o = tcpip.TCPUserTimeoutOption(e.userTimeout)
 		e.UnlockUser()
 
-	case *tcpip.OutOfBandInlineOption:
-		// We don't currently support disabling this option.
-		*o = 1
-
 	case *tcpip.CongestionControlOption:
 		e.LockUser()
 		*o = e.cc
@@ -2146,11 +2064,6 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
 			Port: port,
 		}
 
-	case *tcpip.LingerOption:
-		e.LockUser()
-		*o = e.linger
-		e.UnlockUser()
-
 	default:
 		return tcpip.ErrUnknownProtocolOption
 	}
@@ -2160,7 +2073,7 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
 // checkV4MappedLocked determines the effective network protocol and converts
 // addr to its canonical form.
 func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, *tcpip.Error) {
-	unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProtoLocked(addr, e.v6only)
+	unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProtoLocked(addr, e.ops.GetV6Only())
 	if err != nil {
 		return tcpip.FullAddress{}, 0, err
 	}
@@ -2176,6 +2089,8 @@ func (*endpoint) Disconnect() *tcpip.Error {
 func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 	err := e.connect(addr, true, true)
 	if err != nil && !err.IgnoreStats() {
+		// Connect failed. Let's wake up any waiters.
+		e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
 		e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
 		e.stats.FailedConnectionAttempts.Increment()
 	}
@@ -2235,7 +2150,10 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc
 		return tcpip.ErrAlreadyConnecting
 
 	case StateError:
-		return e.HardError
+		if err := e.hardErrorLocked(); err != nil {
+			return err
+		}
+		return tcpip.ErrConnectionAborted
 
 	default:
 		return tcpip.ErrInvalidEndpointState
@@ -2310,7 +2228,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc
 				// done yet) or the reservation was freed between the check above and
 				// the FindTransportEndpoint below. But rather than retry the same port
 				// we just skip it and move on.
-				transEP := e.stack.FindTransportEndpoint(netProto, ProtocolNumber, transEPID, &r)
+				transEP := e.stack.FindTransportEndpoint(netProto, ProtocolNumber, transEPID, r.NICID())
 				if transEP == nil {
 					// ReservePort failed but there is no registered endpoint with
 					// demuxer. Which indicates there is at least some endpoint that has
@@ -2379,7 +2297,6 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc
 		for _, l := range []segmentList{e.segmentQueue.list, e.sndQueue, e.snd.writeList} {
 			for s := l.Front(); s != nil; s = s.Next() {
 				s.id = e.ID
-				s.route = r.Clone()
 				e.sndWaker.Assert()
 			}
 		}
@@ -2389,14 +2306,70 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc
 	}
 
 	if run {
-		e.workerRunning = true
-		e.stack.Stats().TCP.ActiveConnectionOpenings.Increment()
-		go e.protocolMainLoop(handshake, nil) // S/R-SAFE: will be drained before save.
+		if err := e.startMainLoop(handshake); err != nil {
+			return err
+		}
 	}
 
 	return tcpip.ErrConnectStarted
 }
 
+// startMainLoop sends the initial SYN and starts the main loop for the
+// endpoint.
+func (e *endpoint) startMainLoop(handshake bool) *tcpip.Error {
+	preloop := func() *tcpip.Error {
+		if handshake {
+			h := e.newHandshake()
+			e.setEndpointState(StateSynSent)
+			if err := h.start(); err != nil {
+				e.lastErrorMu.Lock()
+				e.lastError = err
+				e.lastErrorMu.Unlock()
+
+				e.setEndpointState(StateError)
+				e.hardError = err
+
+				// Call cleanupLocked to free up any reservations.
+				e.cleanupLocked()
+				return err
+			}
+		}
+		e.stack.Stats().TCP.ActiveConnectionOpenings.Increment()
+		return nil
+	}
+
+	if e.route.IsResolutionRequired() {
+		// If the endpoint is closed between releasing e.mu and the goroutine below
+		// acquiring it, make sure that cleanup is deferred to the new goroutine.
+		e.workerRunning = true
+
+		// Sending the initial SYN may block due to route resolution; do it in a
+		// separate goroutine to avoid blocking the syscall goroutine.
+		go func() { // S/R-SAFE: will be drained before save.
+			e.mu.Lock()
+			if err := preloop(); err != nil {
+				e.workerRunning = false
+				e.mu.Unlock()
+				return
+			}
+			e.mu.Unlock()
+			_ = e.protocolMainLoop(handshake, nil)
+		}()
+		return nil
+	}
+
+	// No route resolution is required, so we can send the initial SYN here without
+	// blocking. This will hopefully reduce overall latency by overlapping time
+	// spent waiting for a SYN-ACK and time spent spinning up a new goroutine
+	// for the main loop.
+	if err := preloop(); err != nil {
+		return err
+	}
+	e.workerRunning = true
+	go e.protocolMainLoop(handshake, nil) // S/R-SAFE: will be drained before save.
+	return nil
+}
+
 // ConnectEndpoint is not supported.
 func (*endpoint) ConnectEndpoint(tcpip.Endpoint) *tcpip.Error {
 	return tcpip.ErrInvalidEndpointState
@@ -2445,7 +2418,7 @@ func (e *endpoint) shutdownLocked(flags tcpip.ShutdownFlags) *tcpip.Error {
 			}
 
 			// Queue fin segment.
-			s := newSegmentFromView(&e.route, e.ID, nil)
+			s := newOutgoingSegment(e.ID, nil)
 			e.sndQueue.PushBack(s)
 			e.sndBufInQueue++
 			// Mark endpoint as closed.
@@ -2627,14 +2600,16 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) {
 		return err
 	}
 
-	// Expand netProtos to include v4 and v6 if the caller is binding to a
-	// wildcard (empty) address, and this is an IPv6 endpoint with v6only
-	// set to false.
 	netProtos := []tcpip.NetworkProtocolNumber{netProto}
-	if netProto == header.IPv6ProtocolNumber && !e.v6only && addr.Addr == "" {
-		netProtos = []tcpip.NetworkProtocolNumber{
-			header.IPv6ProtocolNumber,
-			header.IPv4ProtocolNumber,
+
+	// Expand netProtos to include v4 and v6 under dual-stack if the caller is
+	// binding to a wildcard (empty) address, and this is an IPv6 endpoint with
+	// v6only set to false.
+	if netProto == header.IPv6ProtocolNumber {
+		stackHasV4 := e.stack.CheckNetworkProtocol(header.IPv4ProtocolNumber)
+		alsoBindToV4 := !e.ops.GetV6Only() && addr.Addr == "" && stackHasV4
+		if alsoBindToV4 {
+			netProtos = append(netProtos, header.IPv4ProtocolNumber)
 		}
 	}
 
@@ -2715,9 +2690,9 @@ func (e *endpoint) getRemoteAddress() tcpip.FullAddress {
 	}
 }
 
-func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
+func (*endpoint) HandlePacket(stack.TransportEndpointID, *stack.PacketBuffer) {
 	// TCP HandlePacket is not required anymore as inbound packets first
-	// land at the Dispatcher which then can either delivery using the
+	// land at the Dispatcher which then can either deliver using the
 	// worker go routine or directly do the invoke the tcp processing inline
 	// based on the state of the endpoint.
 }
@@ -3051,6 +3026,7 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
 		FACK:        rc.fack,
 		RTT:         rc.rtt,
 		Reord:       rc.reorderSeen,
+		DSACKSeen:   rc.dsackSeen,
 	}
 	return s
 }
@@ -3074,9 +3050,9 @@ func (e *endpoint) initHardwareGSO() {
 }
 
 func (e *endpoint) initGSO() {
-	if e.route.Capabilities()&stack.CapabilityHardwareGSO != 0 {
+	if e.route.HasHardwareGSOCapability() {
 		e.initHardwareGSO()
-	} else if e.route.Capabilities()&stack.CapabilitySoftwareGSO != 0 {
+	} else if e.route.HasSoftwareGSOCapability() {
 		e.gso = &stack.GSO{
 			MaxSize:   e.route.GSOMaxSize(),
 			Type:      stack.GSOSW,
@@ -3095,7 +3071,7 @@ func (e *endpoint) State() uint32 {
 func (e *endpoint) Info() tcpip.EndpointInfo {
 	e.LockUser()
 	// Make a copy of the endpoint info.
-	ret := e.EndpointInfo
+	ret := e.TransportEndpointInfo
 	e.UnlockUser()
 	return &ret
 }
@@ -3120,3 +3096,8 @@ func (e *endpoint) Wait() {
 		<-notifyCh
 	}
 }
+
+// SocketOptions implements tcpip.Endpoint.SocketOptions.
+func (e *endpoint) SocketOptions() *tcpip.SocketOptions {
+	return &e.ops
+}
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index b25431467..ba67176b5 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -53,8 +53,8 @@ func (e *endpoint) beforeSave() {
 	switch {
 	case epState == StateInitial || epState == StateBound:
 	case epState.connected() || epState.handshake():
-		if e.route.Capabilities()&stack.CapabilitySaveRestore == 0 {
-			if e.route.Capabilities()&stack.CapabilityDisconnectOk == 0 {
+		if !e.route.HasSaveRestoreCapability() {
+			if !e.route.HasDisconncetOkCapability() {
 				panic(tcpip.ErrSaveRejection{fmt.Errorf("endpoint cannot be saved in connected state: local %v:%d, remote %v:%d", e.ID.LocalAddress, e.ID.LocalPort, e.ID.RemoteAddress, e.ID.RemotePort)})
 			}
 			e.resetConnectionLocked(tcpip.ErrConnectionAborted)
@@ -172,6 +172,7 @@ func (e *endpoint) afterLoad() {
 	// Condition variables and mutexs are not S/R'ed so reinitialize
 	// acceptCond with e.acceptMu.
 	e.acceptCond = sync.NewCond(&e.acceptMu)
+	e.keepalive.timer.init(&e.keepalive.waker)
 	stack.StackFromEnv.RegisterRestoredEndpoint(e)
 }
 
@@ -320,21 +321,21 @@ func (e *endpoint) loadRecentTSTime(unix unixTime) {
 }
 
 // saveHardError is invoked by stateify.
-func (e *EndpointInfo) saveHardError() string {
-	if e.HardError == nil {
+func (e *endpoint) saveHardError() string {
+	if e.hardError == nil {
 		return ""
 	}
 
-	return e.HardError.String()
+	return e.hardError.String()
 }
 
 // loadHardError is invoked by stateify.
-func (e *EndpointInfo) loadHardError(s string) {
+func (e *endpoint) loadHardError(s string) {
 	if s == "" {
 		return
 	}
 
-	e.HardError = tcpip.StringToError(s)
+	e.hardError = tcpip.StringToError(s)
 }
 
 // saveMeasureTime is invoked by stateify.
diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go
index 070b634b4..596178625 100644
--- a/pkg/tcpip/transport/tcp/forwarder.go
+++ b/pkg/tcpip/transport/tcp/forwarder.go
@@ -30,6 +30,8 @@ import (
 // The canonical way of using it is to pass the Forwarder.HandlePacket function
 // to stack.SetTransportProtocolHandler.
 type Forwarder struct {
+	stack *stack.Stack
+
 	maxInFlight int
 	handler     func(*ForwarderRequest)
 
@@ -48,6 +50,7 @@ func NewForwarder(s *stack.Stack, rcvWnd, maxInFlight int, handler func(*Forward
 		rcvWnd = DefaultReceiveBufferSize
 	}
 	return &Forwarder{
+		stack:       s,
 		maxInFlight: maxInFlight,
 		handler:     handler,
 		inFlight:    make(map[stack.TransportEndpointID]struct{}),
@@ -61,12 +64,12 @@ func NewForwarder(s *stack.Stack, rcvWnd, maxInFlight int, handler func(*Forward
 //
 // This function is expected to be passed as an argument to the
 // stack.SetTransportProtocolHandler function.
-func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
-	s := newSegment(r, id, pkt)
+func (f *Forwarder) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
+	s := newIncomingSegment(id, pkt)
 	defer s.decRef()
 
 	// We only care about well-formed SYN packets.
-	if !s.parse() || !s.csumValid || s.flags != header.TCPFlagSyn {
+	if !s.parse(pkt.RXTransportChecksumValidated) || !s.csumValid || s.flags != header.TCPFlagSyn {
 		return false
 	}
 
@@ -128,9 +131,8 @@ func (r *ForwarderRequest) Complete(sendReset bool) {
 	delete(r.forwarder.inFlight, r.segment.id)
 	r.forwarder.mu.Unlock()
 
-	// If the caller requested, send a reset.
 	if sendReset {
-		replyWithReset(r.segment, stack.DefaultTOS, r.segment.route.DefaultTTL())
+		replyWithReset(r.forwarder.stack, r.segment, stack.DefaultTOS, 0 /* ttl */)
 	}
 
 	// Release all resources.
@@ -150,7 +152,7 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint,
 	}
 
 	f := r.forwarder
-	ep, err := f.listen.createEndpointAndPerformHandshake(r.segment, &header.TCPSynOptions{
+	ep, err := f.listen.performHandshake(r.segment, &header.TCPSynOptions{
 		MSS:           r.synOptions.MSS,
 		WS:            r.synOptions.WS,
 		TS:            r.synOptions.TS,
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 5bce73605..672159eed 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -187,8 +187,8 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) {
 // to a specific processing queue. Each queue is serviced by its own processor
 // goroutine which is responsible for dequeuing and doing full TCP dispatch of
 // the packet.
-func (p *protocol) QueuePacket(r *stack.Route, ep stack.TransportEndpoint, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
-	p.dispatcher.queuePacket(r, ep, id, pkt)
+func (p *protocol) QueuePacket(ep stack.TransportEndpoint, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
+	p.dispatcher.queuePacket(ep, id, pkt)
 }
 
 // HandleUnknownDestinationPacket handles packets targeted at this protocol but
@@ -198,24 +198,32 @@ func (p *protocol) QueuePacket(r *stack.Route, ep stack.TransportEndpoint, id st
 // a reset is sent in response to any incoming segment except another reset. In
 // particular, SYNs addressed to a non-existent connection are rejected by this
 // means."
-
-func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) stack.UnknownDestinationPacketDisposition {
-	s := newSegment(r, id, pkt)
+func (p *protocol) HandleUnknownDestinationPacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) stack.UnknownDestinationPacketDisposition {
+	s := newIncomingSegment(id, pkt)
 	defer s.decRef()
 
-	if !s.parse() || !s.csumValid {
+	if !s.parse(pkt.RXTransportChecksumValidated) || !s.csumValid {
 		return stack.UnknownDestinationPacketMalformed
 	}
 
 	if !s.flagIsSet(header.TCPFlagRst) {
-		replyWithReset(s, stack.DefaultTOS, s.route.DefaultTTL())
+		replyWithReset(p.stack, s, stack.DefaultTOS, 0)
 	}
 
 	return stack.UnknownDestinationPacketHandled
 }
 
 // replyWithReset replies to the given segment with a reset segment.
-func replyWithReset(s *segment, tos, ttl uint8) {
+//
+// If the passed TTL is 0, then the route's default TTL will be used.
+func replyWithReset(stack *stack.Stack, s *segment, tos, ttl uint8) *tcpip.Error {
+	route, err := stack.FindRoute(s.nicID, s.dstAddr, s.srcAddr, s.netProto, false /* multicastLoop */)
+	if err != nil {
+		return err
+	}
+	defer route.Release()
+	route.ResolveWith(s.remoteLinkAddr)
+
 	// Get the seqnum from the packet if the ack flag is set.
 	seq := seqnum.Value(0)
 	ack := seqnum.Value(0)
@@ -237,7 +245,12 @@ func replyWithReset(s *segment, tos, ttl uint8) {
 		flags |= header.TCPFlagAck
 		ack = s.sequenceNumber.Add(s.logicalLen())
 	}
-	sendTCP(&s.route, tcpFields{
+
+	if ttl == 0 {
+		ttl = route.DefaultTTL()
+	}
+
+	return sendTCP(route, tcpFields{
 		id:     s.id,
 		ttl:    ttl,
 		tos:    tos,
diff --git a/pkg/tcpip/transport/tcp/rack.go b/pkg/tcpip/transport/tcp/rack.go
index d312b1b8b..e0a50a919 100644
--- a/pkg/tcpip/transport/tcp/rack.go
+++ b/pkg/tcpip/transport/tcp/rack.go
@@ -29,12 +29,12 @@ import (
 //
 // +stateify savable
 type rackControl struct {
+	// dsackSeen indicates if the connection has seen a DSACK.
+	dsackSeen bool
+
 	// endSequence is the ending TCP sequence number of rackControl.seg.
 	endSequence seqnum.Value
 
-	// dsack indicates if the connection has seen a DSACK.
-	dsack bool
-
 	// fack is the highest selectively or cumulatively acknowledged
 	// sequence.
 	fack seqnum.Value
@@ -122,3 +122,8 @@ func (rc *rackControl) detectReorder(seg *segment) {
 		rc.reorderSeen = true
 	}
 }
+
+// setDSACKSeen updates rack control if duplicate SACK is seen by the connection.
+func (rc *rackControl) setDSACKSeen() {
+	rc.dsackSeen = true
+}
diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index 8e0b7c843..f2b1b68da 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -16,6 +16,7 @@ package tcp
 
 import (
 	"container/heap"
+	"math"
 	"time"
 
 	"gvisor.dev/gvisor/pkg/tcpip"
@@ -48,6 +49,10 @@ type receiver struct {
 
 	rcvWndScale uint8
 
+	// prevBufused is the snapshot of endpoint rcvBufUsed taken when we
+	// advertise a receive window.
+	prevBufUsed int
+
 	closed bool
 
 	// pendingRcvdSegments is bounded by the receive buffer size of the
@@ -80,9 +85,9 @@ func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool {
 	// outgoing packets, we should use what we have advertised for acceptability
 	// test.
 	scaledWindowSize := r.rcvWnd >> r.rcvWndScale
-	if scaledWindowSize > 0xffff {
+	if scaledWindowSize > math.MaxUint16 {
 		// This is what we actually put in the Window field.
-		scaledWindowSize = 0xffff
+		scaledWindowSize = math.MaxUint16
 	}
 	advertisedWindowSize := scaledWindowSize << r.rcvWndScale
 	return header.Acceptable(segSeq, segLen, r.rcvNxt, r.rcvNxt.Add(advertisedWindowSize))
@@ -106,6 +111,34 @@ func (r *receiver) currentWindow() (curWnd seqnum.Size) {
 func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) {
 	newWnd := r.ep.selectWindow()
 	curWnd := r.currentWindow()
+	unackLen := int(r.ep.snd.maxSentAck.Size(r.rcvNxt))
+	bufUsed := r.ep.receiveBufferUsed()
+
+	// Grow the right edge of the window only for payloads larger than the
+	// the segment overhead OR if the application is actively consuming data.
+	//
+	// Avoiding growing the right edge otherwise, addresses a situation below:
+	// An application has been slow in reading data and we have burst of
+	// incoming segments lengths < segment overhead. Here, our available free
+	// memory would reduce drastically when compared to the advertised receive
+	// window.
+	//
+	// For example: With incoming 512 bytes segments, segment overhead of
+	// 552 bytes (at the time of writing this comment), with receive window
+	// starting from 1MB and with rcvAdvWndScale being 1, buffer would reach 0
+	// when the curWnd is still 19436 bytes, because for every incoming segment
+	// newWnd would reduce by (552+512) >> rcvAdvWndScale (current value 1),
+	// while curWnd would reduce by 512 bytes.
+	// Such a situation causes us to keep tail dropping the incoming segments
+	// and never advertise zero receive window to the peer.
+	//
+	// Linux does a similar check for minimal sk_buff size (128):
+	// https://github.com/torvalds/linux/blob/d5beb3140f91b1c8a3d41b14d729aefa4dcc58bc/net/ipv4/tcp_input.c#L783
+	//
+	// Also, if the application is reading the data, we keep growing the right
+	// edge, as we are still advertising a window that we think can be serviced.
+	toGrow := unackLen >= SegSize || bufUsed <= r.prevBufUsed
+
 	// Update rcvAcc only if new window is > previously advertised window. We
 	// should never shrink the acceptable sequence space once it has been
 	// advertised the peer. If we shrink the acceptable sequence space then we
@@ -115,7 +148,7 @@ func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) {
 	// rcvWUP       rcvNxt         rcvAcc          new rcvAcc
 	//               <=====curWnd ===>
 	//               <========= newWnd > curWnd ========= >
-	if r.rcvNxt.Add(seqnum.Size(curWnd)).LessThan(r.rcvNxt.Add(seqnum.Size(newWnd))) {
+	if r.rcvNxt.Add(seqnum.Size(curWnd)).LessThan(r.rcvNxt.Add(seqnum.Size(newWnd))) && toGrow {
 		// If the new window moves the right edge, then update rcvAcc.
 		r.rcvAcc = r.rcvNxt.Add(seqnum.Size(newWnd))
 	} else {
@@ -130,11 +163,24 @@ func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) {
 	// receiver's estimated RTT.
 	r.rcvWnd = newWnd
 	r.rcvWUP = r.rcvNxt
+	r.prevBufUsed = bufUsed
 	scaledWnd := r.rcvWnd >> r.rcvWndScale
 	if scaledWnd == 0 {
 		// Increment a metric if we are advertising an actual zero window.
 		r.ep.stats.ReceiveErrors.ZeroRcvWindowState.Increment()
 	}
+
+	// If we started off with a window larger than what can he held in
+	// the 16bit window field, we ceil the value to the max value.
+	// While ceiling, we still do not want to grow the right edge when
+	// not applicable.
+	if scaledWnd > math.MaxUint16 {
+		if toGrow {
+			scaledWnd = seqnum.Size(math.MaxUint16)
+		} else {
+			scaledWnd = seqnum.Size(uint16(scaledWnd))
+		}
+	}
 	return r.rcvNxt, scaledWnd
 }
 
diff --git a/pkg/tcpip/transport/tcp/reno_recovery.go b/pkg/tcpip/transport/tcp/reno_recovery.go
new file mode 100644
index 000000000..2aa708e97
--- /dev/null
+++ b/pkg/tcpip/transport/tcp/reno_recovery.go
@@ -0,0 +1,67 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp
+
+// renoRecovery stores the variables related to TCP Reno loss recovery
+// algorithm.
+//
+// +stateify savable
+type renoRecovery struct {
+	s *sender
+}
+
+func newRenoRecovery(s *sender) *renoRecovery {
+	return &renoRecovery{s: s}
+}
+
+func (rr *renoRecovery) DoRecovery(rcvdSeg *segment, fastRetransmit bool) {
+	ack := rcvdSeg.ackNumber
+	snd := rr.s
+
+	// We are in fast recovery mode. Ignore the ack if it's out of range.
+	if !ack.InRange(snd.sndUna, snd.sndNxt+1) {
+		return
+	}
+
+	// Don't count this as a duplicate if it is carrying data or
+	// updating the window.
+	if rcvdSeg.logicalLen() != 0 || snd.sndWnd != rcvdSeg.window {
+		return
+	}
+
+	// Inflate the congestion window if we're getting duplicate acks
+	// for the packet we retransmitted.
+	if !fastRetransmit && ack == snd.fr.first {
+		// We received a dup, inflate the congestion window by 1 packet
+		// if we're not at the max yet. Only inflate the window if
+		// regular FastRecovery is in use, RFC6675 does not require
+		// inflating cwnd on duplicate ACKs.
+		if snd.sndCwnd < snd.fr.maxCwnd {
+			snd.sndCwnd++
+		}
+		return
+	}
+
+	// A partial ack was received. Retransmit this packet and remember it
+	// so that we don't retransmit it again.
+	//
+	// We don't inflate the window because we're putting the same packet
+	// back onto the wire.
+	//
+	// N.B. The retransmit timer will be reset by the caller.
+	snd.fr.first = ack
+	snd.dupAckCount = 0
+	snd.resendSegment()
+}
diff --git a/pkg/tcpip/transport/tcp/sack_recovery.go b/pkg/tcpip/transport/tcp/sack_recovery.go
new file mode 100644
index 000000000..7e813fa96
--- /dev/null
+++ b/pkg/tcpip/transport/tcp/sack_recovery.go
@@ -0,0 +1,120 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tcp
+
+import "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+
+// sackRecovery stores the variables related to TCP SACK loss recovery
+// algorithm.
+//
+// +stateify savable
+type sackRecovery struct {
+	s *sender
+}
+
+func newSACKRecovery(s *sender) *sackRecovery {
+	return &sackRecovery{s: s}
+}
+
+// handleSACKRecovery implements the loss recovery phase as described in RFC6675
+// section 5, step C.
+func (sr *sackRecovery) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) {
+	snd := sr.s
+	snd.SetPipe()
+
+	if smss := int(snd.ep.scoreboard.SMSS()); limit > smss {
+		// Cap segment size limit to s.smss as SACK recovery requires
+		// that all retransmissions or new segments send during recovery
+		// be of <= SMSS.
+		limit = smss
+	}
+
+	nextSegHint := snd.writeList.Front()
+	for snd.outstanding < snd.sndCwnd {
+		var nextSeg *segment
+		var rescueRtx bool
+		nextSeg, nextSegHint, rescueRtx = snd.NextSeg(nextSegHint)
+		if nextSeg == nil {
+			return dataSent
+		}
+		if !snd.isAssignedSequenceNumber(nextSeg) || snd.sndNxt.LessThanEq(nextSeg.sequenceNumber) {
+			// New data being sent.
+
+			// Step C.3 described below is handled by
+			// maybeSendSegment which increments sndNxt when
+			// a segment is transmitted.
+			//
+			// Step C.3 "If any of the data octets sent in
+			// (C.1) are above HighData, HighData must be
+			// updated to reflect the transmission of
+			// previously unsent data."
+			//
+			// We pass s.smss as the limit as the Step 2) requires that
+			// new data sent should be of size s.smss or less.
+			if sent := snd.maybeSendSegment(nextSeg, limit, end); !sent {
+				return dataSent
+			}
+			dataSent = true
+			snd.outstanding++
+			snd.writeNext = nextSeg.Next()
+			continue
+		}
+
+		// Now handle the retransmission case where we matched either step 1,3 or 4
+		// of the NextSeg algorithm.
+		// RFC 6675, Step C.4.
+		//
+		// "The estimate of the amount of data outstanding in the network
+		// must be updated by incrementing pipe by the number of octets
+		// transmitted in (C.1)."
+		snd.outstanding++
+		dataSent = true
+		snd.sendSegment(nextSeg)
+
+		segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen())
+		if rescueRtx {
+			// We do the last part of rule (4) of NextSeg here to update
+			// RescueRxt as until this point we don't know if we are going
+			// to use the rescue transmission.
+			snd.fr.rescueRxt = snd.fr.last
+		} else {
+			// RFC 6675, Step C.2
+			//
+			// "If any of the data octets sent in (C.1) are below
+			// HighData, HighRxt MUST be set to the highest sequence
+			// number of the retransmitted segment unless NextSeg ()
+			// rule (4) was invoked for this retransmission."
+			snd.fr.highRxt = segEnd - 1
+		}
+	}
+	return dataSent
+}
+
+func (sr *sackRecovery) DoRecovery(rcvdSeg *segment, fastRetransmit bool) {
+	snd := sr.s
+	if fastRetransmit {
+		snd.resendSegment()
+	}
+
+	// We are in fast recovery mode. Ignore the ack if it's out of range.
+	if ack := rcvdSeg.ackNumber; !ack.InRange(snd.sndUna, snd.sndNxt+1) {
+		return
+	}
+
+	// RFC 6675 recovery algorithm step C 1-5.
+	end := snd.sndUna.Add(snd.sndWnd)
+	dataSent := sr.handleSACKRecovery(snd.maxPayloadSize, end)
+	snd.postXmit(dataSent)
+}
diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard.go b/pkg/tcpip/transport/tcp/sack_scoreboard.go
index 7ef2df377..833a7b470 100644
--- a/pkg/tcpip/transport/tcp/sack_scoreboard.go
+++ b/pkg/tcpip/transport/tcp/sack_scoreboard.go
@@ -164,7 +164,7 @@ func (s *SACKScoreboard) IsSACKED(r header.SACKBlock) bool {
 	return found
 }
 
-// Dump prints the state of the scoreboard structure.
+// String returns human-readable state of the scoreboard structure.
 func (s *SACKScoreboard) String() string {
 	var str strings.Builder
 	str.WriteString("SACKScoreboard: {")
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index 1f9c5cf50..5ef73ec74 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -19,6 +19,7 @@ import (
 	"sync/atomic"
 	"time"
 
+	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/header"
 	"gvisor.dev/gvisor/pkg/tcpip/seqnum"
@@ -45,9 +46,18 @@ type segment struct {
 	ep     *endpoint
 	qFlags queueFlags
 	id     stack.TransportEndpointID `state:"manual"`
-	route  stack.Route               `state:"manual"`
-	data   buffer.VectorisedView     `state:".(buffer.VectorisedView)"`
-	hdr    header.TCP
+
+	// TODO(gvisor.dev/issue/4417): Hold a stack.PacketBuffer instead of
+	// individual members for link/network packet info.
+	srcAddr        tcpip.Address
+	dstAddr        tcpip.Address
+	netProto       tcpip.NetworkProtocolNumber
+	nicID          tcpip.NICID
+	remoteLinkAddr tcpip.LinkAddress
+
+	data buffer.VectorisedView `state:".(buffer.VectorisedView)"`
+
+	hdr header.TCP
 	// views is used as buffer for data when its length is large
 	// enough to store a VectorisedView.
 	views [8]buffer.View `state:"nosave"`
@@ -76,11 +86,16 @@ type segment struct {
 	acked bool
 }
 
-func newSegment(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) *segment {
+func newIncomingSegment(id stack.TransportEndpointID, pkt *stack.PacketBuffer) *segment {
+	netHdr := pkt.Network()
 	s := &segment{
-		refCnt: 1,
-		id:     id,
-		route:  r.Clone(),
+		refCnt:         1,
+		id:             id,
+		srcAddr:        netHdr.SourceAddress(),
+		dstAddr:        netHdr.DestinationAddress(),
+		netProto:       pkt.NetworkProtocolNumber,
+		nicID:          pkt.NICID,
+		remoteLinkAddr: pkt.SourceLinkAddress(),
 	}
 	s.data = pkt.Data.Clone(s.views[:])
 	s.hdr = header.TCP(pkt.TransportHeader().View())
@@ -88,11 +103,10 @@ func newSegment(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketB
 	return s
 }
 
-func newSegmentFromView(r *stack.Route, id stack.TransportEndpointID, v buffer.View) *segment {
+func newOutgoingSegment(id stack.TransportEndpointID, v buffer.View) *segment {
 	s := &segment{
 		refCnt: 1,
 		id:     id,
-		route:  r.Clone(),
 	}
 	s.rcvdTime = time.Now()
 	if len(v) != 0 {
@@ -110,7 +124,9 @@ func (s *segment) clone() *segment {
 		ackNumber:      s.ackNumber,
 		flags:          s.flags,
 		window:         s.window,
-		route:          s.route.Clone(),
+		netProto:       s.netProto,
+		nicID:          s.nicID,
+		remoteLinkAddr: s.remoteLinkAddr,
 		viewToDeliver:  s.viewToDeliver,
 		rcvdTime:       s.rcvdTime,
 		xmitTime:       s.xmitTime,
@@ -160,7 +176,6 @@ func (s *segment) decRef() {
 				panic(fmt.Sprintf("unexpected queue flag %b set for segment", s.qFlags))
 			}
 		}
-		s.route.Release()
 	}
 }
 
@@ -189,7 +204,7 @@ func (s *segment) payloadSize() int {
 // segMemSize is the amount of memory used to hold the segment data and
 // the associated metadata.
 func (s *segment) segMemSize() int {
-	return segSize + s.data.Size()
+	return SegSize + s.data.Size()
 }
 
 // parse populates the sequence & ack numbers, flags, and window fields of the
@@ -198,10 +213,10 @@ func (s *segment) segMemSize() int {
 //
 // Returns boolean indicating if the parsing was successful.
 //
-// If checksum verification is not offloaded then parse also verifies the
+// If checksum verification may not be skipped, parse also verifies the
 // TCP checksum and stores the checksum and result of checksum verification in
 // the csum and csumValid fields of the segment.
-func (s *segment) parse() bool {
+func (s *segment) parse(skipChecksumValidation bool) bool {
 	// h is the header followed by the payload. We check that the offset to
 	// the data respects the following constraints:
 	// 1. That it's at least the minimum header size; if we don't do this
@@ -220,16 +235,14 @@ func (s *segment) parse() bool {
 	s.options = []byte(s.hdr[header.TCPMinimumSize:])
 	s.parsedOptions = header.ParseTCPOptions(s.options)
 
-	// Query the link capabilities to decide if checksum validation is
-	// required.
 	verifyChecksum := true
-	if s.route.Capabilities()&stack.CapabilityRXChecksumOffload != 0 {
+	if skipChecksumValidation {
 		s.csumValid = true
 		verifyChecksum = false
 	}
 	if verifyChecksum {
 		s.csum = s.hdr.Checksum()
-		xsum := s.route.PseudoHeaderChecksum(ProtocolNumber, uint16(s.data.Size()+len(s.hdr)))
+		xsum := header.PseudoHeaderChecksum(ProtocolNumber, s.srcAddr, s.dstAddr, uint16(s.data.Size()+len(s.hdr)))
 		xsum = s.hdr.CalculateChecksum(xsum)
 		xsum = header.ChecksumVV(s.data, xsum)
 		s.csumValid = xsum == 0xffff
diff --git a/pkg/tcpip/transport/tcp/segment_unsafe.go b/pkg/tcpip/transport/tcp/segment_unsafe.go
index 0ab7b8f56..392ff0859 100644
--- a/pkg/tcpip/transport/tcp/segment_unsafe.go
+++ b/pkg/tcpip/transport/tcp/segment_unsafe.go
@@ -19,5 +19,6 @@ import (
 )
 
 const (
-	segSize = int(unsafe.Sizeof(segment{}))
+	// SegSize is the minimal size of the segment overhead.
+	SegSize = int(unsafe.Sizeof(segment{}))
 )
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 6fa8d63cd..baec762e1 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -18,7 +18,6 @@ import (
 	"fmt"
 	"math"
 	"sort"
-	"sync/atomic"
 	"time"
 
 	"gvisor.dev/gvisor/pkg/sleep"
@@ -92,6 +91,17 @@ type congestionControl interface {
 	PostRecovery()
 }
 
+// lossRecovery is an interface that must be implemented by any supported
+// loss recovery algorithm.
+type lossRecovery interface {
+	// DoRecovery is invoked when loss is detected and segments need
+	// to be retransmitted. The cumulative or selective ACK is passed along
+	// with the flag which identifies whether the connection entered fast
+	// retransmit with this ACK and to retransmit the first unacknowledged
+	// segment.
+	DoRecovery(rcvdSeg *segment, fastRetransmit bool)
+}
+
 // sender holds the state necessary to send TCP segments.
 //
 // +stateify savable
@@ -108,6 +118,9 @@ type sender struct {
 	// fr holds state related to fast recovery.
 	fr fastRecovery
 
+	// lr is the loss recovery algorithm used by the sender.
+	lr lossRecovery
+
 	// sndCwnd is the congestion window, in packets.
 	sndCwnd int
 
@@ -276,6 +289,8 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint
 
 	s.cc = s.initCongestionControl(ep.cc)
 
+	s.lr = s.initLossRecovery()
+
 	// A negative sndWndScale means that no scaling is in use, otherwise we
 	// store the scaling value.
 	if sndWndScale > 0 {
@@ -330,6 +345,14 @@ func (s *sender) initCongestionControl(congestionControlName tcpip.CongestionCon
 	}
 }
 
+// initLossRecovery initiates the loss recovery algorithm for the sender.
+func (s *sender) initLossRecovery() lossRecovery {
+	if s.ep.sackPermitted {
+		return newSACKRecovery(s)
+	}
+	return newRenoRecovery(s)
+}
+
 // updateMaxPayloadSize updates the maximum payload size based on the given
 // MTU. If this is in response to "packet too big" control packets (indicated
 // by the count argument), it also reduces the number of outstanding packets and
@@ -550,7 +573,7 @@ func (s *sender) retransmitTimerExpired() bool {
 		// We were attempting fast recovery but were not successful.
 		// Leave the state. We don't need to update ssthresh because it
 		// has already been updated when entered fast-recovery.
-		s.leaveFastRecovery()
+		s.leaveRecovery()
 	}
 
 	s.state = RTORecovery
@@ -789,7 +812,7 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
 			}
 			if !nextTooBig && seg.data.Size() < available {
 				// Segment is not full.
-				if s.outstanding > 0 && atomic.LoadUint32(&s.ep.delay) != 0 {
+				if s.outstanding > 0 && s.ep.ops.GetDelayOption() {
 					// Nagle's algorithm. From Wikipedia:
 					//   Nagle's algorithm works by
 					//   combining a number of small
@@ -808,7 +831,7 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
 				// send space and MSS.
 				// TODO(gvisor.dev/issue/2833): Drain the held segments after a
 				// timeout.
-				if seg.data.Size() < s.maxPayloadSize && atomic.LoadUint32(&s.ep.cork) != 0 {
+				if seg.data.Size() < s.maxPayloadSize && s.ep.ops.GetCorkOption() {
 					return false
 				}
 			}
@@ -913,79 +936,6 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
 	return true
 }
 
-// handleSACKRecovery implements the loss recovery phase as described in RFC6675
-// section 5, step C.
-func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) {
-	s.SetPipe()
-
-	if smss := int(s.ep.scoreboard.SMSS()); limit > smss {
-		// Cap segment size limit to s.smss as SACK recovery requires
-		// that all retransmissions or new segments send during recovery
-		// be of <= SMSS.
-		limit = smss
-	}
-
-	nextSegHint := s.writeList.Front()
-	for s.outstanding < s.sndCwnd {
-		var nextSeg *segment
-		var rescueRtx bool
-		nextSeg, nextSegHint, rescueRtx = s.NextSeg(nextSegHint)
-		if nextSeg == nil {
-			return dataSent
-		}
-		if !s.isAssignedSequenceNumber(nextSeg) || s.sndNxt.LessThanEq(nextSeg.sequenceNumber) {
-			// New data being sent.
-
-			// Step C.3 described below is handled by
-			// maybeSendSegment which increments sndNxt when
-			// a segment is transmitted.
-			//
-			// Step C.3 "If any of the data octets sent in
-			// (C.1) are above HighData, HighData must be
-			// updated to reflect the transmission of
-			// previously unsent data."
-			//
-			// We pass s.smss as the limit as the Step 2) requires that
-			// new data sent should be of size s.smss or less.
-			if sent := s.maybeSendSegment(nextSeg, limit, end); !sent {
-				return dataSent
-			}
-			dataSent = true
-			s.outstanding++
-			s.writeNext = nextSeg.Next()
-			continue
-		}
-
-		// Now handle the retransmission case where we matched either step 1,3 or 4
-		// of the NextSeg algorithm.
-		// RFC 6675, Step C.4.
-		//
-		// "The estimate of the amount of data outstanding in the network
-		// must be updated by incrementing pipe by the number of octets
-		// transmitted in (C.1)."
-		s.outstanding++
-		dataSent = true
-		s.sendSegment(nextSeg)
-
-		segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen())
-		if rescueRtx {
-			// We do the last part of rule (4) of NextSeg here to update
-			// RescueRxt as until this point we don't know if we are going
-			// to use the rescue transmission.
-			s.fr.rescueRxt = s.fr.last
-		} else {
-			// RFC 6675, Step C.2
-			//
-			// "If any of the data octets sent in (C.1) are below
-			// HighData, HighRxt MUST be set to the highest sequence
-			// number of the retransmitted segment unless NextSeg ()
-			// rule (4) was invoked for this retransmission."
-			s.fr.highRxt = segEnd - 1
-		}
-	}
-	return dataSent
-}
-
 func (s *sender) sendZeroWindowProbe() {
 	ack, win := s.ep.rcv.getSendParams()
 	s.unackZeroWindowProbes++
@@ -1014,6 +964,30 @@ func (s *sender) disableZeroWindowProbing() {
 	s.resendTimer.disable()
 }
 
+func (s *sender) postXmit(dataSent bool) {
+	if dataSent {
+		// We sent data, so we should stop the keepalive timer to ensure
+		// that no keepalives are sent while there is pending data.
+		s.ep.disableKeepaliveTimer()
+	}
+
+	// If the sender has advertized zero receive window and we have
+	// data to be sent out, start zero window probing to query the
+	// the remote for it's receive window size.
+	if s.writeNext != nil && s.sndWnd == 0 {
+		s.enableZeroWindowProbing()
+	}
+
+	// Enable the timer if we have pending data and it's not enabled yet.
+	if !s.resendTimer.enabled() && s.sndUna != s.sndNxt {
+		s.resendTimer.enable(s.rto)
+	}
+	// If we have no more pending data, start the keepalive timer.
+	if s.sndUna == s.sndNxt {
+		s.ep.resetKeepaliveTimer(false)
+	}
+}
+
 // sendData sends new data segments. It is called when data becomes available or
 // when the send window opens up.
 func (s *sender) sendData() {
@@ -1034,55 +1008,29 @@ func (s *sender) sendData() {
 	}
 
 	var dataSent bool
-
-	// RFC 6675 recovery algorithm step C 1-5.
-	if s.fr.active && s.ep.sackPermitted {
-		dataSent = s.handleSACKRecovery(s.maxPayloadSize, end)
-	} else {
-		for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.Next() {
-			cwndLimit := (s.sndCwnd - s.outstanding) * s.maxPayloadSize
-			if cwndLimit < limit {
-				limit = cwndLimit
-			}
-			if s.isAssignedSequenceNumber(seg) && s.ep.sackPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) {
-				// Move writeNext along so that we don't try and scan data that
-				// has already been SACKED.
-				s.writeNext = seg.Next()
-				continue
-			}
-			if sent := s.maybeSendSegment(seg, limit, end); !sent {
-				break
-			}
-			dataSent = true
-			s.outstanding += s.pCount(seg)
+	for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.Next() {
+		cwndLimit := (s.sndCwnd - s.outstanding) * s.maxPayloadSize
+		if cwndLimit < limit {
+			limit = cwndLimit
+		}
+		if s.isAssignedSequenceNumber(seg) && s.ep.sackPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) {
+			// Move writeNext along so that we don't try and scan data that
+			// has already been SACKED.
 			s.writeNext = seg.Next()
+			continue
 		}
+		if sent := s.maybeSendSegment(seg, limit, end); !sent {
+			break
+		}
+		dataSent = true
+		s.outstanding += s.pCount(seg)
+		s.writeNext = seg.Next()
 	}
 
-	if dataSent {
-		// We sent data, so we should stop the keepalive timer to ensure
-		// that no keepalives are sent while there is pending data.
-		s.ep.disableKeepaliveTimer()
-	}
-
-	// If the sender has advertized zero receive window and we have
-	// data to be sent out, start zero window probing to query the
-	// the remote for it's receive window size.
-	if s.writeNext != nil && s.sndWnd == 0 {
-		s.enableZeroWindowProbing()
-	}
-
-	// Enable the timer if we have pending data and it's not enabled yet.
-	if !s.resendTimer.enabled() && s.sndUna != s.sndNxt {
-		s.resendTimer.enable(s.rto)
-	}
-	// If we have no more pending data, start the keepalive timer.
-	if s.sndUna == s.sndNxt {
-		s.ep.resetKeepaliveTimer(false)
-	}
+	s.postXmit(dataSent)
 }
 
-func (s *sender) enterFastRecovery() {
+func (s *sender) enterRecovery() {
 	s.fr.active = true
 	// Save state to reflect we're now in fast recovery.
 	//
@@ -1104,7 +1052,7 @@ func (s *sender) enterFastRecovery() {
 	s.ep.stack.Stats().TCP.FastRecovery.Increment()
 }
 
-func (s *sender) leaveFastRecovery() {
+func (s *sender) leaveRecovery() {
 	s.fr.active = false
 	s.fr.maxCwnd = 0
 	s.dupAckCount = 0
@@ -1115,57 +1063,6 @@ func (s *sender) leaveFastRecovery() {
 	s.cc.PostRecovery()
 }
 
-func (s *sender) handleFastRecovery(seg *segment) (rtx bool) {
-	ack := seg.ackNumber
-	// We are in fast recovery mode. Ignore the ack if it's out of
-	// range.
-	if !ack.InRange(s.sndUna, s.sndNxt+1) {
-		return false
-	}
-
-	// Leave fast recovery if it acknowledges all the data covered by
-	// this fast recovery session.
-	if s.fr.last.LessThan(ack) {
-		s.leaveFastRecovery()
-		return false
-	}
-
-	if s.ep.sackPermitted {
-		// When SACK is enabled we let retransmission be governed by
-		// the SACK logic.
-		return false
-	}
-
-	// Don't count this as a duplicate if it is carrying data or
-	// updating the window.
-	if seg.logicalLen() != 0 || s.sndWnd != seg.window {
-		return false
-	}
-
-	// Inflate the congestion window if we're getting duplicate acks
-	// for the packet we retransmitted.
-	if ack == s.fr.first {
-		// We received a dup, inflate the congestion window by 1 packet
-		// if we're not at the max yet. Only inflate the window if
-		// regular FastRecovery is in use, RFC6675 does not require
-		// inflating cwnd on duplicate ACKs.
-		if s.sndCwnd < s.fr.maxCwnd {
-			s.sndCwnd++
-		}
-		return false
-	}
-
-	// A partial ack was received. Retransmit this packet and
-	// remember it so that we don't retransmit it again. We don't
-	// inflate the window because we're putting the same packet back
-	// onto the wire.
-	//
-	// N.B. The retransmit timer will be reset by the caller.
-	s.fr.first = ack
-	s.dupAckCount = 0
-	return true
-}
-
 // isAssignedSequenceNumber relies on the fact that we only set flags once a
 // sequencenumber is assigned and that is only done right before we send the
 // segment. As a result any segment that has a non-zero flag has a valid
@@ -1228,14 +1125,11 @@ func (s *sender) SetPipe() {
 	s.outstanding = pipe
 }
 
-// checkDuplicateAck is called when an ack is received. It manages the state
-// related to duplicate acks and determines if a retransmit is needed according
-// to the rules in RFC 6582 (NewReno).
-func (s *sender) checkDuplicateAck(seg *segment) (rtx bool) {
+// detectLoss is called when an ack is received and returns whether a loss is
+// detected. It manages the state related to duplicate acks and determines if
+// a retransmit is needed according to the rules in RFC 6582 (NewReno).
+func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) {
 	ack := seg.ackNumber
-	if s.fr.active {
-		return s.handleFastRecovery(seg)
-	}
 
 	// We're not in fast recovery yet. A segment is considered a duplicate
 	// only if it doesn't carry any data and doesn't update the send window,
@@ -1266,14 +1160,14 @@ func (s *sender) checkDuplicateAck(seg *segment) (rtx bool) {
 	// See: https://tools.ietf.org/html/rfc6582#section-3.2 Step 2
 	//
 	// We only do the check here, the incrementing of last to the highest
-	// sequence number transmitted till now is done when enterFastRecovery
+	// sequence number transmitted till now is done when enterRecovery
 	// is invoked.
 	if !s.fr.last.LessThan(seg.ackNumber) {
 		s.dupAckCount = 0
 		return false
 	}
 	s.cc.HandleNDupAcks()
-	s.enterFastRecovery()
+	s.enterRecovery()
 	s.dupAckCount = 0
 	return true
 }
@@ -1285,21 +1179,29 @@ func (s *sender) checkDuplicateAck(seg *segment) (rtx bool) {
 // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.2
 // steps 2 and 3.
 func (s *sender) walkSACK(rcvdSeg *segment) {
+	// Look for DSACK block.
+	idx := 0
+	n := len(rcvdSeg.parsedOptions.SACKBlocks)
+	if s.checkDSACK(rcvdSeg) {
+		s.rc.setDSACKSeen()
+		idx = 1
+		n--
+	}
+
+	if n == 0 {
+		return
+	}
+
 	// Sort the SACK blocks. The first block is the most recent unacked
 	// block. The following blocks can be in arbitrary order.
-	sackBlocks := make([]header.SACKBlock, len(rcvdSeg.parsedOptions.SACKBlocks))
-	copy(sackBlocks, rcvdSeg.parsedOptions.SACKBlocks)
+	sackBlocks := make([]header.SACKBlock, n)
+	copy(sackBlocks, rcvdSeg.parsedOptions.SACKBlocks[idx:])
 	sort.Slice(sackBlocks, func(i, j int) bool {
 		return sackBlocks[j].Start.LessThan(sackBlocks[i].Start)
 	})
 
 	seg := s.writeList.Front()
 	for _, sb := range sackBlocks {
-		// This check excludes DSACK blocks.
-		if sb.Start.LessThanEq(rcvdSeg.ackNumber) || sb.Start.LessThanEq(s.sndUna) || s.sndNxt.LessThan(sb.End) {
-			continue
-		}
-
 		for seg != nil && seg.sequenceNumber.LessThan(sb.End) && seg.xmitCount != 0 {
 			if sb.Start.LessThanEq(seg.sequenceNumber) && !seg.acked {
 				s.rc.update(seg, rcvdSeg, s.ep.tsOffset)
@@ -1311,6 +1213,50 @@ func (s *sender) walkSACK(rcvdSeg *segment) {
 	}
 }
 
+// checkDSACK checks if a DSACK is reported and updates it in RACK.
+func (s *sender) checkDSACK(rcvdSeg *segment) bool {
+	n := len(rcvdSeg.parsedOptions.SACKBlocks)
+	if n == 0 {
+		return false
+	}
+
+	sb := rcvdSeg.parsedOptions.SACKBlocks[0]
+	// Check if SACK block is invalid.
+	if sb.End.LessThan(sb.Start) {
+		return false
+	}
+
+	// See: https://tools.ietf.org/html/rfc2883#section-5 DSACK is sent in
+	// at most one SACK block. DSACK is detected in the below two cases:
+	// * If the SACK sequence space is less than this cumulative ACK, it is
+	//   an indication that the segment identified by the SACK block has
+	//   been received more than once by the receiver.
+	// * If the sequence space in the first SACK block is greater than the
+	//   cumulative ACK, then the sender next compares the sequence space
+	//   in the first SACK block with the sequence space in the second SACK
+	//   block, if there is one. This comparison can determine if the first
+	//   SACK block is reporting duplicate data that lies above the
+	//   cumulative ACK.
+	if sb.Start.LessThan(rcvdSeg.ackNumber) {
+		return true
+	}
+
+	if n > 1 {
+		sb1 := rcvdSeg.parsedOptions.SACKBlocks[1]
+		if sb1.End.LessThan(sb1.Start) {
+			return false
+		}
+
+		// If the first SACK block is fully covered by second SACK
+		// block, then the first block is a DSACK block.
+		if sb.End.LessThanEq(sb1.End) && sb1.Start.LessThanEq(sb.Start) {
+			return true
+		}
+	}
+
+	return false
+}
+
 // handleRcvdSegment is called when a segment is received; it is responsible for
 // updating the send-related state.
 func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
@@ -1363,14 +1309,23 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 		s.SetPipe()
 	}
 
-	// Count the duplicates and do the fast retransmit if needed.
-	rtx := s.checkDuplicateAck(rcvdSeg)
+	ack := rcvdSeg.ackNumber
+	fastRetransmit := false
+	// Do not leave fast recovery, if the ACK is out of range.
+	if s.fr.active {
+		// Leave fast recovery if it acknowledges all the data covered by
+		// this fast recovery session.
+		if ack.InRange(s.sndUna, s.sndNxt+1) && s.fr.last.LessThan(ack) {
+			s.leaveRecovery()
+		}
+	} else {
+		// Detect loss by counting the duplicates and enter recovery.
+		fastRetransmit = s.detectLoss(rcvdSeg)
+	}
 
 	// Stash away the current window size.
 	s.sndWnd = rcvdSeg.window
 
-	ack := rcvdSeg.ackNumber
-
 	// Disable zero window probing if remote advertizes a non-zero receive
 	// window. This can be with an ACK to the zero window probe (where the
 	// acknumber refers to the already acknowledged byte) OR to any previously
@@ -1487,19 +1442,24 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
 			s.resendTimer.disable()
 		}
 	}
+
 	// Now that we've popped all acknowledged data from the retransmit
 	// queue, retransmit if needed.
-	if rtx {
-		s.resendSegment()
+	if s.fr.active {
+		s.lr.DoRecovery(rcvdSeg, fastRetransmit)
+		// When SACK is enabled data sending is governed by steps in
+		// RFC 6675 Section 5 recovery steps  A-C.
+		// See: https://tools.ietf.org/html/rfc6675#section-5.
+		if s.ep.sackPermitted {
+			return
+		}
 	}
 
 	// Send more data now that some of the pending data has been ack'd, or
 	// that the window opened up, or the congestion window was inflated due
 	// to a duplicate ack during fast recovery. This will also re-enable
 	// the retransmit timer if needed.
-	if !s.ep.sackPermitted || s.fr.active || s.dupAckCount == 0 || rcvdSeg.hasNewSACKInfo {
-		s.sendData()
-	}
+	s.sendData()
 }
 
 // sendSegment sends the specified segment.
diff --git a/pkg/tcpip/transport/tcp/tcp_rack_test.go b/pkg/tcpip/transport/tcp/tcp_rack_test.go
index d3f92b48c..9818ffa0f 100644
--- a/pkg/tcpip/transport/tcp/tcp_rack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_rack_test.go
@@ -30,15 +30,17 @@ const (
 	maxPayload       = 10
 	tsOptionSize     = 12
 	maxTCPOptionSize = 40
+	mtu              = header.TCPMinimumSize + header.IPv4MinimumSize + maxTCPOptionSize + maxPayload
 )
 
 // TestRACKUpdate tests the RACK related fields are updated when an ACK is
 // received on a SACK enabled connection.
 func TestRACKUpdate(t *testing.T) {
-	c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxTCPOptionSize+maxPayload))
+	c := context.New(t, uint32(mtu))
 	defer c.Cleanup()
 
 	var xmitTime time.Time
+	probeDone := make(chan struct{})
 	c.Stack().AddTCPProbe(func(state stack.TCPEndpointState) {
 		// Validate that the endpoint Sender.RACKState is what we expect.
 		if state.Sender.RACKState.XmitTime.Before(xmitTime) {
@@ -54,6 +56,7 @@ func TestRACKUpdate(t *testing.T) {
 		if state.Sender.RACKState.RTT == 0 {
 			t.Fatalf("RACK RTT failed to update when an ACK is received, got RACKState.RTT == 0 want != 0")
 		}
+		close(probeDone)
 	})
 	setStackSACKPermitted(t, c, true)
 	createConnectedWithSACKAndTS(c)
@@ -73,18 +76,20 @@ func TestRACKUpdate(t *testing.T) {
 	c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
 	bytesRead += maxPayload
 	c.SendAck(seqnum.Value(context.TestInitialSequenceNumber).Add(1), bytesRead)
-	time.Sleep(200 * time.Millisecond)
+
+	// Wait for the probe function to finish processing the ACK before the
+	// test completes.
+	<-probeDone
 }
 
 // TestRACKDetectReorder tests that RACK detects packet reordering.
 func TestRACKDetectReorder(t *testing.T) {
-	c := context.New(t, uint32(header.TCPMinimumSize+header.IPv4MinimumSize+maxTCPOptionSize+maxPayload))
+	c := context.New(t, uint32(mtu))
 	defer c.Cleanup()
 
-	const ackNum = 2
-
 	var n int
-	ch := make(chan struct{})
+	const ackNumToVerify = 2
+	probeDone := make(chan struct{})
 	c.Stack().AddTCPProbe(func(state stack.TCPEndpointState) {
 		gotSeq := state.Sender.RACKState.FACK
 		wantSeq := state.Sender.SndNxt
@@ -95,7 +100,7 @@ func TestRACKDetectReorder(t *testing.T) {
 		}
 
 		n++
-		if n < ackNum {
+		if n < ackNumToVerify {
 			if state.Sender.RACKState.Reord {
 				t.Fatalf("RACK reorder detected when there is no reordering")
 			}
@@ -105,11 +110,11 @@ func TestRACKDetectReorder(t *testing.T) {
 		if state.Sender.RACKState.Reord == false {
 			t.Fatalf("RACK reorder detection failed")
 		}
-		close(ch)
+		close(probeDone)
 	})
 	setStackSACKPermitted(t, c, true)
 	createConnectedWithSACKAndTS(c)
-	data := buffer.NewView(ackNum * maxPayload)
+	data := buffer.NewView(ackNumToVerify * maxPayload)
 	for i := range data {
 		data[i] = byte(i)
 	}
@@ -120,7 +125,7 @@ func TestRACKDetectReorder(t *testing.T) {
 	}
 
 	bytesRead := 0
-	for i := 0; i < ackNum; i++ {
+	for i := 0; i < ackNumToVerify; i++ {
 		c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
 		bytesRead += maxPayload
 	}
@@ -133,5 +138,393 @@ func TestRACKDetectReorder(t *testing.T) {
 
 	// Wait for the probe function to finish processing the ACK before the
 	// test completes.
-	<-ch
+	<-probeDone
+}
+
+func sendAndReceive(t *testing.T, c *context.Context, numPackets int) buffer.View {
+	setStackSACKPermitted(t, c, true)
+	createConnectedWithSACKAndTS(c)
+
+	data := buffer.NewView(numPackets * maxPayload)
+	for i := range data {
+		data[i] = byte(i)
+	}
+
+	// Write the data.
+	if _, _, err := c.EP.Write(tcpip.SlicePayload(data), tcpip.WriteOptions{}); err != nil {
+		t.Fatalf("Write failed: %s", err)
+	}
+
+	bytesRead := 0
+	for i := 0; i < numPackets; i++ {
+		c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+		bytesRead += maxPayload
+	}
+
+	return data
+}
+
+const (
+	validDSACKDetected   = 1
+	failedToDetectDSACK  = 2
+	invalidDSACKDetected = 3
+)
+
+func addDSACKSeenCheckerProbe(t *testing.T, c *context.Context, numACK int, probeDone chan int) {
+	var n int
+	c.Stack().AddTCPProbe(func(state stack.TCPEndpointState) {
+		// Validate that RACK detects DSACK.
+		n++
+		if n < numACK {
+			if state.Sender.RACKState.DSACKSeen {
+				probeDone <- invalidDSACKDetected
+			}
+			return
+		}
+
+		if !state.Sender.RACKState.DSACKSeen {
+			probeDone <- failedToDetectDSACK
+			return
+		}
+		probeDone <- validDSACKDetected
+	})
+}
+
+// TestRACKDetectDSACK tests that RACK detects DSACK with duplicate segments.
+// See: https://tools.ietf.org/html/rfc2883#section-4.1.1.
+func TestRACKDetectDSACK(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+
+	probeDone := make(chan int)
+	const ackNumToVerify = 2
+	addDSACKSeenCheckerProbe(t, c, ackNumToVerify, probeDone)
+
+	numPackets := 8
+	data := sendAndReceive(t, c, numPackets)
+
+	// Cumulative ACK for [1-5] packets.
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	bytesRead := 5 * maxPayload
+	c.SendAck(seq, bytesRead)
+
+	// Expect retransmission of #6 packet.
+	c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+
+	// Send DSACK block for #6 packet indicating both
+	// initial and retransmitted packet are received and
+	// packets [1-7] are received.
+	start := c.IRS.Add(seqnum.Size(bytesRead))
+	end := start.Add(maxPayload)
+	bytesRead += 2 * maxPayload
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}})
+
+	// Wait for the probe function to finish processing the
+	// ACK before the test completes.
+	err := <-probeDone
+	switch err {
+	case failedToDetectDSACK:
+		t.Fatalf("RACK DSACK detection failed")
+	case invalidDSACKDetected:
+		t.Fatalf("RACK DSACK detected when there is no duplicate SACK")
+	}
+}
+
+// TestRACKDetectDSACKWithOutOfOrder tests that RACK detects DSACK with out of
+// order segments.
+// See: https://tools.ietf.org/html/rfc2883#section-4.1.2.
+func TestRACKDetectDSACKWithOutOfOrder(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+
+	probeDone := make(chan int)
+	const ackNumToVerify = 2
+	addDSACKSeenCheckerProbe(t, c, ackNumToVerify, probeDone)
+
+	numPackets := 10
+	data := sendAndReceive(t, c, numPackets)
+
+	// Cumulative ACK for [1-5] packets.
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	bytesRead := 5 * maxPayload
+	c.SendAck(seq, bytesRead)
+
+	// Expect retransmission of #6 packet.
+	c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+
+	// Send DSACK block for #6 packet indicating both
+	// initial and retransmitted packet are received and
+	// packets [1-7] are received.
+	start := c.IRS.Add(seqnum.Size(bytesRead))
+	end := start.Add(maxPayload)
+	bytesRead += 2 * maxPayload
+	// Send DSACK block for #6 along with out of
+	// order #9 packet is received.
+	start1 := c.IRS.Add(seqnum.Size(bytesRead) + maxPayload)
+	end1 := start1.Add(maxPayload)
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}, {start1, end1}})
+
+	// Wait for the probe function to finish processing the
+	// ACK before the test completes.
+	err := <-probeDone
+	switch err {
+	case failedToDetectDSACK:
+		t.Fatalf("RACK DSACK detection failed")
+	case invalidDSACKDetected:
+		t.Fatalf("RACK DSACK detected when there is no duplicate SACK")
+	}
+}
+
+// TestRACKDetectDSACKWithOutOfOrderDup tests that DSACK is detected on a
+// duplicate of out of order packet.
+// See: https://tools.ietf.org/html/rfc2883#section-4.1.3
+func TestRACKDetectDSACKWithOutOfOrderDup(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+
+	probeDone := make(chan int)
+	const ackNumToVerify = 4
+	addDSACKSeenCheckerProbe(t, c, ackNumToVerify, probeDone)
+
+	numPackets := 10
+	sendAndReceive(t, c, numPackets)
+
+	// ACK [1-5] packets.
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	bytesRead := 5 * maxPayload
+	c.SendAck(seq, bytesRead)
+
+	// Send SACK indicating #6 packet is missing and received #7 packet.
+	offset := seqnum.Size(bytesRead + maxPayload)
+	start := c.IRS.Add(1 + offset)
+	end := start.Add(maxPayload)
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}})
+
+	// Send SACK with #6 packet is missing and received [7-8] packets.
+	end = start.Add(2 * maxPayload)
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}})
+
+	// Consider #8 packet is duplicated on the network and send DSACK.
+	dsackStart := c.IRS.Add(1 + offset + maxPayload)
+	dsackEnd := dsackStart.Add(maxPayload)
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{dsackStart, dsackEnd}, {start, end}})
+
+	// Wait for the probe function to finish processing the ACK before the
+	// test completes.
+	err := <-probeDone
+	switch err {
+	case failedToDetectDSACK:
+		t.Fatalf("RACK DSACK detection failed")
+	case invalidDSACKDetected:
+		t.Fatalf("RACK DSACK detected when there is no duplicate SACK")
+	}
+}
+
+// TestRACKDetectDSACKSingleDup tests DSACK for a single duplicate subsegment.
+// See: https://tools.ietf.org/html/rfc2883#section-4.2.1.
+func TestRACKDetectDSACKSingleDup(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+
+	probeDone := make(chan int)
+	const ackNumToVerify = 4
+	addDSACKSeenCheckerProbe(t, c, ackNumToVerify, probeDone)
+
+	numPackets := 4
+	data := sendAndReceive(t, c, numPackets)
+
+	// Send ACK for #1 packet.
+	bytesRead := maxPayload
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	c.SendAck(seq, bytesRead)
+
+	// Missing [2-3] packets and received #4 packet.
+	seq = seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	start := c.IRS.Add(1 + seqnum.Size(3*maxPayload))
+	end := start.Add(seqnum.Size(maxPayload))
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}})
+
+	// Expect retransmission of #2 packet.
+	c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+
+	// ACK for retransmitted #2 packet.
+	bytesRead += maxPayload
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}})
+
+	// Simulate receving delayed subsegment of #2 packet and delayed #3 packet by
+	// sending DSACK block for the subsegment.
+	dsackStart := c.IRS.Add(1 + seqnum.Size(bytesRead))
+	dsackEnd := dsackStart.Add(seqnum.Size(maxPayload / 2))
+	c.SendAckWithSACK(seq, numPackets*maxPayload, []header.SACKBlock{{dsackStart, dsackEnd}})
+
+	// Wait for the probe function to finish processing the ACK before the
+	// test completes.
+	err := <-probeDone
+	switch err {
+	case failedToDetectDSACK:
+		t.Fatalf("RACK DSACK detection failed")
+	case invalidDSACKDetected:
+		t.Fatalf("RACK DSACK detected when there is no duplicate SACK")
+	}
+}
+
+// TestRACKDetectDSACKDupWithCumulativeACK tests DSACK for two non-contiguous
+// duplicate subsegments covered by the cumulative acknowledgement.
+// See: https://tools.ietf.org/html/rfc2883#section-4.2.2.
+func TestRACKDetectDSACKDupWithCumulativeACK(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+
+	probeDone := make(chan int)
+	const ackNumToVerify = 5
+	addDSACKSeenCheckerProbe(t, c, ackNumToVerify, probeDone)
+
+	numPackets := 6
+	data := sendAndReceive(t, c, numPackets)
+
+	// Send ACK for #1 packet.
+	bytesRead := maxPayload
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	c.SendAck(seq, bytesRead)
+
+	// Missing [2-5] packets and received #6 packet.
+	seq = seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	start := c.IRS.Add(1 + seqnum.Size(5*maxPayload))
+	end := start.Add(seqnum.Size(maxPayload))
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}})
+
+	// Expect retransmission of #2 packet.
+	c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+
+	// Received delayed #2 packet.
+	bytesRead += maxPayload
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}})
+
+	// Received delayed #4 packet.
+	start1 := c.IRS.Add(1 + seqnum.Size(3*maxPayload))
+	end1 := start1.Add(seqnum.Size(maxPayload))
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start1, end1}, {start, end}})
+
+	// Simulate receiving retransmitted subsegment for #2 packet and delayed #3
+	// packet by sending DSACK block for #2 packet.
+	dsackStart := c.IRS.Add(1 + seqnum.Size(maxPayload))
+	dsackEnd := dsackStart.Add(seqnum.Size(maxPayload / 2))
+	c.SendAckWithSACK(seq, 4*maxPayload, []header.SACKBlock{{dsackStart, dsackEnd}, {start, end}})
+
+	// Wait for the probe function to finish processing the ACK before the
+	// test completes.
+	err := <-probeDone
+	switch err {
+	case failedToDetectDSACK:
+		t.Fatalf("RACK DSACK detection failed")
+	case invalidDSACKDetected:
+		t.Fatalf("RACK DSACK detected when there is no duplicate SACK")
+	}
+}
+
+// TestRACKDetectDSACKDup tests two non-contiguous duplicate subsegments not
+// covered by the cumulative acknowledgement.
+// See: https://tools.ietf.org/html/rfc2883#section-4.2.3.
+func TestRACKDetectDSACKDup(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+
+	probeDone := make(chan int)
+	const ackNumToVerify = 5
+	addDSACKSeenCheckerProbe(t, c, ackNumToVerify, probeDone)
+
+	numPackets := 7
+	data := sendAndReceive(t, c, numPackets)
+
+	// Send ACK for #1 packet.
+	bytesRead := maxPayload
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	c.SendAck(seq, bytesRead)
+
+	// Missing [2-6] packets and SACK #7 packet.
+	seq = seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	start := c.IRS.Add(1 + seqnum.Size(6*maxPayload))
+	end := start.Add(seqnum.Size(maxPayload))
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start, end}})
+
+	// Received delayed #3 packet.
+	start1 := c.IRS.Add(1 + seqnum.Size(2*maxPayload))
+	end1 := start1.Add(seqnum.Size(maxPayload))
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start1, end1}, {start, end}})
+
+	// Expect retransmission of #2 packet.
+	c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+
+	// Consider #2 packet has been dropped and SACK #4 packet.
+	start2 := c.IRS.Add(1 + seqnum.Size(3*maxPayload))
+	end2 := start2.Add(seqnum.Size(maxPayload))
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start2, end2}, {start1, end1}, {start, end}})
+
+	// Simulate receiving retransmitted subsegment for #3 packet and delayed #5
+	// packet by sending DSACK block for the subsegment.
+	dsackStart := c.IRS.Add(1 + seqnum.Size(2*maxPayload))
+	dsackEnd := dsackStart.Add(seqnum.Size(maxPayload / 2))
+	end1 = end1.Add(seqnum.Size(2 * maxPayload))
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{dsackStart, dsackEnd}, {start1, end1}})
+
+	// Wait for the probe function to finish processing the ACK before the
+	// test completes.
+	err := <-probeDone
+	switch err {
+	case failedToDetectDSACK:
+		t.Fatalf("RACK DSACK detection failed")
+	case invalidDSACKDetected:
+		t.Fatalf("RACK DSACK detected when there is no duplicate SACK")
+	}
+}
+
+// TestRACKWithInvalidDSACKBlock tests that DSACK is not detected when DSACK
+// is not the first SACK block.
+func TestRACKWithInvalidDSACKBlock(t *testing.T) {
+	c := context.New(t, uint32(mtu))
+	defer c.Cleanup()
+
+	probeDone := make(chan struct{})
+	const ackNumToVerify = 2
+	var n int
+	c.Stack().AddTCPProbe(func(state stack.TCPEndpointState) {
+		// Validate that RACK does not detect DSACK when DSACK block is
+		// not the first SACK block.
+		n++
+		t.Helper()
+		if state.Sender.RACKState.DSACKSeen {
+			t.Fatalf("RACK DSACK detected when there is no duplicate SACK")
+		}
+
+		if n == ackNumToVerify {
+			close(probeDone)
+		}
+	})
+
+	numPackets := 10
+	data := sendAndReceive(t, c, numPackets)
+
+	// Cumulative ACK for [1-5] packets.
+	seq := seqnum.Value(context.TestInitialSequenceNumber).Add(1)
+	bytesRead := 5 * maxPayload
+	c.SendAck(seq, bytesRead)
+
+	// Expect retransmission of #6 packet.
+	c.ReceiveAndCheckPacketWithOptions(data, bytesRead, maxPayload, tsOptionSize)
+
+	// Send DSACK block for #6 packet indicating both
+	// initial and retransmitted packet are received and
+	// packets [1-7] are received.
+	start := c.IRS.Add(seqnum.Size(bytesRead))
+	end := start.Add(maxPayload)
+	bytesRead += 2 * maxPayload
+
+	// Send DSACK block as second block.
+	start1 := c.IRS.Add(seqnum.Size(bytesRead) + maxPayload)
+	end1 := start1.Add(maxPayload)
+	c.SendAckWithSACK(seq, bytesRead, []header.SACKBlock{{start1, end1}, {start, end}})
+
+	// Wait for the probe function to finish processing the
+	// ACK before the test completes.
+	<-probeDone
 }
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index a7149efd0..7581bdc97 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -63,7 +63,7 @@ func TestGiveUpConnect(t *testing.T) {
 
 	// Register for notification, then start connection attempt.
 	waitEntry, notifyCh := waiter.NewChannelEntry(nil)
-	wq.EventRegister(&waitEntry, waiter.EventOut)
+	wq.EventRegister(&waitEntry, waiter.EventHUp)
 	defer wq.EventUnregister(&waitEntry)
 
 	if err := ep.Connect(tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}); err != tcpip.ErrConnectStarted {
@@ -75,9 +75,6 @@ func TestGiveUpConnect(t *testing.T) {
 
 	// Wait for ep to become writable.
 	<-notifyCh
-	if err := ep.LastError(); err != tcpip.ErrAborted {
-		t.Fatalf("got ep.LastError() = %s, want = %s", err, tcpip.ErrAborted)
-	}
 
 	// Call Connect again to retreive the handshake failure status
 	// and stats updates.
@@ -267,7 +264,7 @@ func TestTCPResetsSentNoICMP(t *testing.T) {
 	}
 
 	// Read outgoing ICMP stats and check no ICMP DstUnreachable was recorded.
-	sent := stats.ICMP.V4PacketsSent
+	sent := stats.ICMP.V4.PacketsSent
 	if got, want := sent.DstUnreachable.Value(), uint64(0); got != want {
 		t.Errorf("got ICMP DstUnreachable.Value() = %d, want = %d", got, want)
 	}
@@ -1448,7 +1445,7 @@ func TestSynSent(t *testing.T) {
 
 			// Start connection attempt.
 			waitEntry, ch := waiter.NewChannelEntry(nil)
-			c.WQ.EventRegister(&waitEntry, waiter.EventOut)
+			c.WQ.EventRegister(&waitEntry, waiter.EventHUp)
 			defer c.WQ.EventUnregister(&waitEntry)
 
 			addr := tcpip.FullAddress{Addr: context.TestAddr, Port: context.TestPort}
@@ -2532,10 +2529,10 @@ func TestSegmentMerging(t *testing.T) {
 		{
 			"cork",
 			func(ep tcpip.Endpoint) {
-				ep.SetSockOptBool(tcpip.CorkOption, true)
+				ep.SocketOptions().SetCorkOption(true)
 			},
 			func(ep tcpip.Endpoint) {
-				ep.SetSockOptBool(tcpip.CorkOption, false)
+				ep.SocketOptions().SetCorkOption(false)
 			},
 		},
 	}
@@ -2627,7 +2624,7 @@ func TestDelay(t *testing.T) {
 
 	c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
 
-	c.EP.SetSockOptBool(tcpip.DelayOption, true)
+	c.EP.SocketOptions().SetDelayOption(true)
 
 	var allData []byte
 	for i, data := range [][]byte{{0}, {1, 2, 3, 4}, {5, 6, 7}, {8, 9}, {10}, {11}} {
@@ -2675,7 +2672,7 @@ func TestUndelay(t *testing.T) {
 
 	c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
 
-	c.EP.SetSockOptBool(tcpip.DelayOption, true)
+	c.EP.SocketOptions().SetDelayOption(true)
 
 	allData := [][]byte{{0}, {1, 2, 3}}
 	for i, data := range allData {
@@ -2708,7 +2705,7 @@ func TestUndelay(t *testing.T) {
 	// Check that we don't get the second packet yet.
 	c.CheckNoPacketTimeout("delayed second packet transmitted", 100*time.Millisecond)
 
-	c.EP.SetSockOptBool(tcpip.DelayOption, false)
+	c.EP.SocketOptions().SetDelayOption(false)
 
 	// Check that data is received.
 	second := c.GetPacket()
@@ -2745,8 +2742,8 @@ func TestMSSNotDelayed(t *testing.T) {
 		fn   func(tcpip.Endpoint)
 	}{
 		{"no-op", func(tcpip.Endpoint) {}},
-		{"delay", func(ep tcpip.Endpoint) { ep.SetSockOptBool(tcpip.DelayOption, true) }},
-		{"cork", func(ep tcpip.Endpoint) { ep.SetSockOptBool(tcpip.CorkOption, true) }},
+		{"delay", func(ep tcpip.Endpoint) { ep.SocketOptions().SetDelayOption(true) }},
+		{"cork", func(ep tcpip.Endpoint) { ep.SocketOptions().SetCorkOption(true) }},
 	}
 
 	for _, test := range tests {
@@ -3198,6 +3195,11 @@ loop:
 		case tcpip.ErrWouldBlock:
 			select {
 			case <-ch:
+				// Expect the state to be StateError and subsequent Reads to fail with HardError.
+				if _, _, err := c.EP.Read(nil); err != tcpip.ErrConnectionReset {
+					t.Fatalf("got c.EP.Read(nil) = %s, want = %s", err, tcpip.ErrConnectionReset)
+				}
+				break loop
 			case <-time.After(1 * time.Second):
 				t.Fatalf("Timed out waiting for reset to arrive")
 			}
@@ -3207,14 +3209,10 @@ loop:
 			t.Fatalf("got c.EP.Read(nil) = %s, want = %s", err, tcpip.ErrConnectionReset)
 		}
 	}
-	// Expect the state to be StateError and subsequent Reads to fail with HardError.
-	if _, _, err := c.EP.Read(nil); err != tcpip.ErrConnectionReset {
-		t.Fatalf("got c.EP.Read(nil) = %s, want = %s", err, tcpip.ErrConnectionReset)
-	}
+
 	if tcp.EndpointState(c.EP.State()) != tcp.StateError {
 		t.Fatalf("got EP state is not StateError")
 	}
-
 	if got := c.Stack().Stats().TCP.EstablishedResets.Value(); got != 1 {
 		t.Errorf("got stats.TCP.EstablishedResets.Value() = %d, want = 1", got)
 	}
@@ -4150,7 +4148,7 @@ func TestReadAfterClosedState(t *testing.T) {
 
 	// Check that peek works.
 	peekBuf := make([]byte, 10)
-	n, _, err := c.EP.Peek([][]byte{peekBuf})
+	n, err := c.EP.Peek([][]byte{peekBuf})
 	if err != nil {
 		t.Fatalf("Peek failed: %s", err)
 	}
@@ -4176,7 +4174,7 @@ func TestReadAfterClosedState(t *testing.T) {
 		t.Fatalf("got c.EP.Read(nil) = %s, want = %s", err, tcpip.ErrClosedForReceive)
 	}
 
-	if _, _, err := c.EP.Peek([][]byte{peekBuf}); err != tcpip.ErrClosedForReceive {
+	if _, err := c.EP.Peek([][]byte{peekBuf}); err != tcpip.ErrClosedForReceive {
 		t.Fatalf("got c.EP.Peek(...) = %s, want = %s", err, tcpip.ErrClosedForReceive)
 	}
 }
@@ -4193,9 +4191,7 @@ func TestReusePort(t *testing.T) {
 	if err != nil {
 		t.Fatalf("NewEndpoint failed; %s", err)
 	}
-	if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil {
-		t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err)
-	}
+	c.EP.SocketOptions().SetReuseAddress(true)
 	if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
 		t.Fatalf("Bind failed: %s", err)
 	}
@@ -4205,9 +4201,7 @@ func TestReusePort(t *testing.T) {
 	if err != nil {
 		t.Fatalf("NewEndpoint failed; %s", err)
 	}
-	if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil {
-		t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err)
-	}
+	c.EP.SocketOptions().SetReuseAddress(true)
 	if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
 		t.Fatalf("Bind failed: %s", err)
 	}
@@ -4218,9 +4212,7 @@ func TestReusePort(t *testing.T) {
 	if err != nil {
 		t.Fatalf("NewEndpoint failed; %s", err)
 	}
-	if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil {
-		t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err)
-	}
+	c.EP.SocketOptions().SetReuseAddress(true)
 	if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
 		t.Fatalf("Bind failed: %s", err)
 	}
@@ -4233,9 +4225,7 @@ func TestReusePort(t *testing.T) {
 	if err != nil {
 		t.Fatalf("NewEndpoint failed; %s", err)
 	}
-	if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil {
-		t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err)
-	}
+	c.EP.SocketOptions().SetReuseAddress(true)
 	if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
 		t.Fatalf("Bind failed: %s", err)
 	}
@@ -4246,9 +4236,7 @@ func TestReusePort(t *testing.T) {
 	if err != nil {
 		t.Fatalf("NewEndpoint failed; %s", err)
 	}
-	if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil {
-		t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err)
-	}
+	c.EP.SocketOptions().SetReuseAddress(true)
 	if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
 		t.Fatalf("Bind failed: %s", err)
 	}
@@ -4261,9 +4249,7 @@ func TestReusePort(t *testing.T) {
 	if err != nil {
 		t.Fatalf("NewEndpoint failed; %s", err)
 	}
-	if err := c.EP.SetSockOptBool(tcpip.ReuseAddressOption, true); err != nil {
-		t.Fatalf("SetSockOptBool ReuseAddressOption failed: %s", err)
-	}
+	c.EP.SocketOptions().SetReuseAddress(true)
 	if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
 		t.Fatalf("Bind failed: %s", err)
 	}
@@ -4656,13 +4642,9 @@ func TestConnectAvoidsBoundPorts(t *testing.T) {
 												switch network {
 												case "ipv4":
 												case "ipv6":
-													if err := ep.SetSockOptBool(tcpip.V6OnlyOption, true); err != nil {
-														t.Fatalf("SetSockOptBool(V6OnlyOption(true)) failed: %s", err)
-													}
+													ep.SocketOptions().SetV6Only(true)
 												case "dual":
-													if err := ep.SetSockOptBool(tcpip.V6OnlyOption, false); err != nil {
-														t.Fatalf("SetSockOptBool(V6OnlyOption(false)) failed: %s", err)
-													}
+													ep.SocketOptions().SetV6Only(false)
 												default:
 													t.Fatalf("unknown network: '%s'", network)
 												}
@@ -4998,9 +4980,7 @@ func TestKeepalive(t *testing.T) {
 	if err := c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 5); err != nil {
 		t.Fatalf("c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 5): %s", err)
 	}
-	if err := c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true); err != nil {
-		t.Fatalf("c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true): %s", err)
-	}
+	c.EP.SocketOptions().SetKeepAlive(true)
 
 	// 5 unacked keepalives are sent. ACK each one, and check that the
 	// connection stays alive after 5.
@@ -5131,6 +5111,7 @@ func TestKeepalive(t *testing.T) {
 }
 
 func executeHandshake(t *testing.T, c *context.Context, srcPort uint16, synCookieInUse bool) (irs, iss seqnum.Value) {
+	t.Helper()
 	// Send a SYN request.
 	irs = seqnum.Value(789)
 	c.SendPacket(nil, &context.Headers{
@@ -5175,6 +5156,7 @@ func executeHandshake(t *testing.T, c *context.Context, srcPort uint16, synCooki
 }
 
 func executeV6Handshake(t *testing.T, c *context.Context, srcPort uint16, synCookieInUse bool) (irs, iss seqnum.Value) {
+	t.Helper()
 	// Send a SYN request.
 	irs = seqnum.Value(789)
 	c.SendV6Packet(nil, &context.Headers{
@@ -5238,13 +5220,14 @@ func TestListenBacklogFull(t *testing.T) {
 
 	// Test acceptance.
 	// Start listening.
-	listenBacklog := 2
+	listenBacklog := 10
 	if err := c.EP.Listen(listenBacklog); err != nil {
 		t.Fatalf("Listen failed: %s", err)
 	}
 
-	for i := 0; i < listenBacklog; i++ {
-		executeHandshake(t, c, context.TestPort+uint16(i), false /*synCookieInUse */)
+	lastPortOffset := uint16(0)
+	for ; int(lastPortOffset) < listenBacklog; lastPortOffset++ {
+		executeHandshake(t, c, context.TestPort+lastPortOffset, false /*synCookieInUse */)
 	}
 
 	time.Sleep(50 * time.Millisecond)
@@ -5252,7 +5235,7 @@ func TestListenBacklogFull(t *testing.T) {
 	// Now execute send one more SYN. The stack should not respond as the backlog
 	// is full at this point.
 	c.SendPacket(nil, &context.Headers{
-		SrcPort: context.TestPort + 2,
+		SrcPort: context.TestPort + uint16(lastPortOffset),
 		DstPort: context.StackPort,
 		Flags:   header.TCPFlagSyn,
 		SeqNum:  seqnum.Value(789),
@@ -5293,7 +5276,7 @@ func TestListenBacklogFull(t *testing.T) {
 	}
 
 	// Now a new handshake must succeed.
-	executeHandshake(t, c, context.TestPort+2, false /*synCookieInUse */)
+	executeHandshake(t, c, context.TestPort+lastPortOffset, false /*synCookieInUse */)
 
 	newEP, _, err := c.EP.Accept(nil)
 	if err == tcpip.ErrWouldBlock {
@@ -5714,6 +5697,50 @@ func TestListenBacklogFullSynCookieInUse(t *testing.T) {
 	}
 }
 
+func TestSYNRetransmit(t *testing.T) {
+	c := context.New(t, defaultMTU)
+	defer c.Cleanup()
+
+	// Create TCP endpoint.
+	var err *tcpip.Error
+	c.EP, err = c.Stack().NewEndpoint(tcp.ProtocolNumber, ipv4.ProtocolNumber, &c.WQ)
+	if err != nil {
+		t.Fatalf("NewEndpoint failed: %s", err)
+	}
+
+	// Bind to wildcard.
+	if err := c.EP.Bind(tcpip.FullAddress{Port: context.StackPort}); err != nil {
+		t.Fatalf("Bind failed: %s", err)
+	}
+
+	// Start listening.
+	if err := c.EP.Listen(10); err != nil {
+		t.Fatalf("Listen failed: %s", err)
+	}
+
+	// Send the same SYN packet multiple times. We should still get a valid SYN-ACK
+	// reply.
+	irs := seqnum.Value(789)
+	for i := 0; i < 5; i++ {
+		c.SendPacket(nil, &context.Headers{
+			SrcPort: context.TestPort,
+			DstPort: context.StackPort,
+			Flags:   header.TCPFlagSyn,
+			SeqNum:  irs,
+			RcvWnd:  30000,
+		})
+	}
+
+	// Receive the SYN-ACK reply.
+	tcpCheckers := []checker.TransportChecker{
+		checker.SrcPort(context.StackPort),
+		checker.DstPort(context.TestPort),
+		checker.TCPFlags(header.TCPFlagAck | header.TCPFlagSyn),
+		checker.TCPAckNum(uint32(irs) + 1),
+	}
+	checker.IPv4(t, c.GetPacket(), checker.TCP(tcpCheckers...))
+}
+
 func TestSynRcvdBadSeqNumber(t *testing.T) {
 	c := context.New(t, defaultMTU)
 	defer c.Cleanup()
@@ -6071,10 +6098,13 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
 	// Introduce a 25ms latency by delaying the first byte.
 	latency := 25 * time.Millisecond
 	time.Sleep(latency)
-	rawEP.SendPacketWithTS([]byte{1}, tsVal)
+	// Send an initial payload with atleast segment overhead size. The receive
+	// window would not grow for smaller segments.
+	rawEP.SendPacketWithTS(make([]byte, tcp.SegSize), tsVal)
 
 	pkt := rawEP.VerifyAndReturnACKWithTS(tsVal)
 	rcvWnd := header.TCP(header.IPv4(pkt).Payload()).WindowSize()
+
 	time.Sleep(25 * time.Millisecond)
 
 	// Allocate a large enough payload for the test.
@@ -6347,10 +6377,7 @@ func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcpip.T
 	if err != nil {
 		t.Fatalf("NewEndPoint(tcp, ipv4, new(waiter.Queue)) failed: %s", err)
 	}
-	gotDelayOption, err := ep.GetSockOptBool(tcpip.DelayOption)
-	if err != nil {
-		t.Fatalf("ep.GetSockOptBool(tcpip.DelayOption) failed: %s", err)
-	}
+	gotDelayOption := ep.SocketOptions().GetDelayOption()
 	if gotDelayOption != wantDelayOption {
 		t.Errorf("ep.GetSockOptBool(tcpip.DelayOption) got: %t, want: %t", gotDelayOption, wantDelayOption)
 	}
@@ -6722,6 +6749,13 @@ func TestTCPTimeWaitNewSyn(t *testing.T) {
 
 	c.CheckNoPacketTimeout("unexpected packet received in response to SYN", 1*time.Second)
 
+	// drain any older notifications from the notification channel before attempting
+	// 2nd connection.
+	select {
+	case <-ch:
+	default:
+	}
+
 	// Send a SYN request w/ sequence number higher than
 	// the highest sequence number sent.
 	iss = seqnum.Value(792)
@@ -7196,9 +7230,7 @@ func TestKeepaliveWithUserTimeout(t *testing.T) {
 	if err := c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 10); err != nil {
 		t.Fatalf("c.EP.SetSockOptInt(tcpip.KeepaliveCountOption, 10): %s", err)
 	}
-	if err := c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true); err != nil {
-		t.Fatalf("c.EP.SetSockOptBool(tcpip.KeepaliveEnabledOption, true): %s", err)
-	}
+	c.EP.SocketOptions().SetKeepAlive(true)
 
 	// Set userTimeout to be the duration to be 1 keepalive
 	// probes. Which means that after the first probe is sent
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 4d7847142..ee55f030c 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -112,6 +112,18 @@ type Headers struct {
 	TCPOpts []byte
 }
 
+// Options contains options for creating a new test context.
+type Options struct {
+	// EnableV4 indicates whether IPv4 should be enabled.
+	EnableV4 bool
+
+	// EnableV6 indicates whether IPv4 should be enabled.
+	EnableV6 bool
+
+	// MTU indicates the maximum transmission unit on the link layer.
+	MTU uint32
+}
+
 // Context provides an initialized Network stack and a link layer endpoint
 // for use in TCP tests.
 type Context struct {
@@ -154,10 +166,30 @@ type Context struct {
 // New allocates and initializes a test context containing a new
 // stack and a link-layer endpoint.
 func New(t *testing.T, mtu uint32) *Context {
-	s := stack.New(stack.Options{
-		NetworkProtocols:   []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
-		TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
+	return NewWithOpts(t, Options{
+		EnableV4: true,
+		EnableV6: true,
+		MTU:      mtu,
 	})
+}
+
+// NewWithOpts allocates and initializes a test context containing a new
+// stack and a link-layer endpoint with specific options.
+func NewWithOpts(t *testing.T, opts Options) *Context {
+	if opts.MTU == 0 {
+		panic("MTU must be greater than 0")
+	}
+
+	stackOpts := stack.Options{
+		TransportProtocols: []stack.TransportProtocolFactory{tcp.NewProtocol},
+	}
+	if opts.EnableV4 {
+		stackOpts.NetworkProtocols = append(stackOpts.NetworkProtocols, ipv4.NewProtocol)
+	}
+	if opts.EnableV6 {
+		stackOpts.NetworkProtocols = append(stackOpts.NetworkProtocols, ipv6.NewProtocol)
+	}
+	s := stack.New(stackOpts)
 
 	const sendBufferSize = 1 << 20 // 1 MiB
 	const recvBufferSize = 1 << 20 // 1 MiB
@@ -182,50 +214,55 @@ func New(t *testing.T, mtu uint32) *Context {
 
 	// Some of the congestion control tests send up to 640 packets, we so
 	// set the channel size to 1000.
-	ep := channel.New(1000, mtu, "")
+	ep := channel.New(1000, opts.MTU, "")
 	wep := stack.LinkEndpoint(ep)
 	if testing.Verbose() {
 		wep = sniffer.New(ep)
 	}
-	opts := stack.NICOptions{Name: "nic1"}
-	if err := s.CreateNICWithOptions(1, wep, opts); err != nil {
+	nicOpts := stack.NICOptions{Name: "nic1"}
+	if err := s.CreateNICWithOptions(1, wep, nicOpts); err != nil {
 		t.Fatalf("CreateNICWithOptions(_, _, %+v) failed: %v", opts, err)
 	}
-	wep2 := stack.LinkEndpoint(channel.New(1000, mtu, ""))
+	wep2 := stack.LinkEndpoint(channel.New(1000, opts.MTU, ""))
 	if testing.Verbose() {
-		wep2 = sniffer.New(channel.New(1000, mtu, ""))
+		wep2 = sniffer.New(channel.New(1000, opts.MTU, ""))
 	}
 	opts2 := stack.NICOptions{Name: "nic2"}
 	if err := s.CreateNICWithOptions(2, wep2, opts2); err != nil {
 		t.Fatalf("CreateNICWithOptions(_, _, %+v) failed: %v", opts2, err)
 	}
 
-	v4ProtocolAddr := tcpip.ProtocolAddress{
-		Protocol:          ipv4.ProtocolNumber,
-		AddressWithPrefix: StackAddrWithPrefix,
-	}
-	if err := s.AddProtocolAddress(1, v4ProtocolAddr); err != nil {
-		t.Fatalf("AddProtocolAddress(1, %#v): %s", v4ProtocolAddr, err)
-	}
-
-	v6ProtocolAddr := tcpip.ProtocolAddress{
-		Protocol:          ipv6.ProtocolNumber,
-		AddressWithPrefix: StackV6AddrWithPrefix,
-	}
-	if err := s.AddProtocolAddress(1, v6ProtocolAddr); err != nil {
-		t.Fatalf("AddProtocolAddress(1, %#v): %s", v6ProtocolAddr, err)
-	}
+	var routeTable []tcpip.Route
 
-	s.SetRouteTable([]tcpip.Route{
-		{
+	if opts.EnableV4 {
+		v4ProtocolAddr := tcpip.ProtocolAddress{
+			Protocol:          ipv4.ProtocolNumber,
+			AddressWithPrefix: StackAddrWithPrefix,
+		}
+		if err := s.AddProtocolAddress(1, v4ProtocolAddr); err != nil {
+			t.Fatalf("AddProtocolAddress(1, %#v): %s", v4ProtocolAddr, err)
+		}
+		routeTable = append(routeTable, tcpip.Route{
 			Destination: header.IPv4EmptySubnet,
 			NIC:         1,
-		},
-		{
+		})
+	}
+
+	if opts.EnableV6 {
+		v6ProtocolAddr := tcpip.ProtocolAddress{
+			Protocol:          ipv6.ProtocolNumber,
+			AddressWithPrefix: StackV6AddrWithPrefix,
+		}
+		if err := s.AddProtocolAddress(1, v6ProtocolAddr); err != nil {
+			t.Fatalf("AddProtocolAddress(1, %#v): %s", v6ProtocolAddr, err)
+		}
+		routeTable = append(routeTable, tcpip.Route{
 			Destination: header.IPv6EmptySubnet,
 			NIC:         1,
-		},
-	})
+		})
+	}
+
+	s.SetRouteTable(routeTable)
 
 	return &Context{
 		t:           t,
@@ -358,7 +395,6 @@ func (c *Context) SendICMPPacket(typ header.ICMPv4Type, code header.ICMPv4Code,
 
 	ip := header.IPv4(buf)
 	ip.Encode(&header.IPv4Fields{
-		IHL:         header.IPv4MinimumSize,
 		TotalLength: uint16(len(buf)),
 		TTL:         65,
 		Protocol:    uint8(header.ICMPv4ProtocolNumber),
@@ -373,6 +409,9 @@ func (c *Context) SendICMPPacket(typ header.ICMPv4Type, code header.ICMPv4Code,
 	const icmpv4VariableHeaderOffset = 4
 	copy(icmp[icmpv4VariableHeaderOffset:], p1)
 	copy(icmp[header.ICMPv4PayloadOffset:], p2)
+	icmp.SetChecksum(0)
+	checksum := ^header.Checksum(icmp, 0 /* initial */)
+	icmp.SetChecksum(checksum)
 
 	// Inject packet.
 	pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
@@ -397,7 +436,6 @@ func (c *Context) BuildSegmentWithAddrs(payload []byte, h *Headers, src, dst tcp
 	// Initialize the IP header.
 	ip := header.IPv4(buf)
 	ip.Encode(&header.IPv4Fields{
-		IHL:         header.IPv4MinimumSize,
 		TotalLength: uint16(len(buf)),
 		TTL:         65,
 		Protocol:    uint8(tcp.ProtocolNumber),
@@ -554,9 +592,7 @@ func (c *Context) CreateV6Endpoint(v6only bool) {
 		c.t.Fatalf("NewEndpoint failed: %v", err)
 	}
 
-	if err := c.EP.SetSockOptBool(tcpip.V6OnlyOption, v6only); err != nil {
-		c.t.Fatalf("SetSockOpt failed failed: %v", err)
-	}
+	c.EP.SocketOptions().SetV6Only(v6only)
 }
 
 // GetV6Packet reads a single packet from the link layer endpoint of the context
@@ -599,11 +635,11 @@ func (c *Context) SendV6PacketWithAddrs(payload []byte, h *Headers, src, dst tcp
 	// Initialize the IP header.
 	ip := header.IPv6(buf)
 	ip.Encode(&header.IPv6Fields{
-		PayloadLength: uint16(header.TCPMinimumSize + len(payload)),
-		NextHeader:    uint8(tcp.ProtocolNumber),
-		HopLimit:      65,
-		SrcAddr:       src,
-		DstAddr:       dst,
+		PayloadLength:     uint16(header.TCPMinimumSize + len(payload)),
+		TransportProtocol: tcp.ProtocolNumber,
+		HopLimit:          65,
+		SrcAddr:           src,
+		DstAddr:           dst,
 	})
 
 	// Initialize the TCP header.
diff --git a/pkg/tcpip/transport/tcp/timer.go b/pkg/tcpip/transport/tcp/timer.go
index 7981d469b..38a335840 100644
--- a/pkg/tcpip/transport/tcp/timer.go
+++ b/pkg/tcpip/transport/tcp/timer.go
@@ -84,6 +84,10 @@ func (t *timer) init(w *sleep.Waker) {
 
 // cleanup frees all resources associated with the timer.
 func (t *timer) cleanup() {
+	if t.timer == nil {
+		// No cleanup needed.
+		return
+	}
 	t.timer.Stop()
 	*t = timer{}
 }
diff --git a/pkg/tcpip/transport/udp/BUILD b/pkg/tcpip/transport/udp/BUILD
index c78549424..153e8c950 100644
--- a/pkg/tcpip/transport/udp/BUILD
+++ b/pkg/tcpip/transport/udp/BUILD
@@ -56,6 +56,8 @@ go_test(
         "//pkg/tcpip/network/ipv4",
         "//pkg/tcpip/network/ipv6",
         "//pkg/tcpip/stack",
+        "//pkg/tcpip/transport/icmp",
         "//pkg/waiter",
+        "@com_github_google_go_cmp//cmp:go_default_library",
     ],
 )
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index d57ed5d79..763d1d654 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -16,8 +16,8 @@ package udp
 
 import (
 	"fmt"
+	"sync/atomic"
 
-	"gvisor.dev/gvisor/pkg/sleep"
 	"gvisor.dev/gvisor/pkg/sync"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
@@ -30,10 +30,11 @@ import (
 // +stateify savable
 type udpPacket struct {
 	udpPacketEntry
-	senderAddress tcpip.FullAddress
-	packetInfo    tcpip.IPPacketInfo
-	data          buffer.VectorisedView `state:".(buffer.VectorisedView)"`
-	timestamp     int64
+	senderAddress      tcpip.FullAddress
+	destinationAddress tcpip.FullAddress
+	packetInfo         tcpip.IPPacketInfo
+	data               buffer.VectorisedView `state:".(buffer.VectorisedView)"`
+	timestamp          int64
 	// tos stores either the receiveTOS or receiveTClass value.
 	tos uint8
 }
@@ -77,6 +78,7 @@ func (s EndpointState) String() string {
 // +stateify savable
 type endpoint struct {
 	stack.TransportEndpointInfo
+	tcpip.DefaultSocketOptionsHandler
 
 	// The following fields are initialized at creation time and do not
 	// change throughout the lifetime of the endpoint.
@@ -94,22 +96,20 @@ type endpoint struct {
 	rcvClosed     bool
 
 	// The following fields are protected by the mu mutex.
-	mu             sync.RWMutex `state:"nosave"`
-	sndBufSize     int
-	sndBufSizeMax  int
+	mu            sync.RWMutex `state:"nosave"`
+	sndBufSize    int
+	sndBufSizeMax int
+	// state must be read/set using the EndpointState()/setEndpointState()
+	// methods.
 	state          EndpointState
-	route          stack.Route `state:"manual"`
+	route          *stack.Route `state:"manual"`
 	dstPort        uint16
-	v6only         bool
 	ttl            uint8
 	multicastTTL   uint8
 	multicastAddr  tcpip.Address
 	multicastNICID tcpip.NICID
-	multicastLoop  bool
 	portFlags      ports.Flags
 	bindToDevice   tcpip.NICID
-	broadcast      bool
-	noChecksum     bool
 
 	lastErrorMu sync.Mutex   `state:"nosave"`
 	lastError   *tcpip.Error `state:".(string)"`
@@ -123,17 +123,6 @@ type endpoint struct {
 	// applied while sending packets. Defaults to 0 as on Linux.
 	sendTOS uint8
 
-	// receiveTOS determines if the incoming IPv4 TOS header field is passed
-	// as ancillary data to ControlMessages on Read.
-	receiveTOS bool
-
-	// receiveTClass determines if the incoming IPv6 TClass header field is
-	// passed as ancillary data to ControlMessages on Read.
-	receiveTClass bool
-
-	// receiveIPPacketInfo determines if the packet info is returned by Read.
-	receiveIPPacketInfo bool
-
 	// shutdownFlags represent the current shutdown state of the endpoint.
 	shutdownFlags tcpip.ShutdownFlags
 
@@ -155,8 +144,8 @@ type endpoint struct {
 	// owner is used to get uid and gid of the packet.
 	owner tcpip.PacketOwner
 
-	// linger is used for SO_LINGER socket option.
-	linger tcpip.LingerOption
+	// ops is used to get socket level options.
+	ops tcpip.SocketOptions
 }
 
 // +stateify savable
@@ -186,13 +175,14 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
 		//
 		// Linux defaults to TTL=1.
 		multicastTTL:         1,
-		multicastLoop:        true,
 		rcvBufSizeMax:        32 * 1024,
 		sndBufSizeMax:        32 * 1024,
 		multicastMemberships: make(map[multicastMembership]struct{}),
 		state:                StateInitial,
 		uniqueID:             s.UniqueID(),
 	}
+	e.ops.InitHandler(e)
+	e.ops.SetMulticastLoop(true)
 
 	// Override with stack defaults.
 	var ss stack.SendBufferSizeOption
@@ -208,6 +198,20 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
 	return e
 }
 
+// setEndpointState updates the state of the endpoint to state atomically. This
+// method is unexported as the only place we should update the state is in this
+// package but we allow the state to be read freely without holding e.mu.
+//
+// Precondition: e.mu must be held to call this method.
+func (e *endpoint) setEndpointState(state EndpointState) {
+	atomic.StoreUint32((*uint32)(&e.state), uint32(state))
+}
+
+// EndpointState() returns the current state of the endpoint.
+func (e *endpoint) EndpointState() EndpointState {
+	return EndpointState(atomic.LoadUint32((*uint32)(&e.state)))
+}
+
 // UniqueID implements stack.TransportEndpoint.UniqueID.
 func (e *endpoint) UniqueID() uint64 {
 	return e.uniqueID
@@ -233,7 +237,7 @@ func (e *endpoint) Close() {
 	e.mu.Lock()
 	e.shutdownFlags = tcpip.ShutdownRead | tcpip.ShutdownWrite
 
-	switch e.state {
+	switch e.EndpointState() {
 	case StateBound, StateConnected:
 		e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, e.boundPortFlags, e.boundBindToDevice)
 		e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, e.boundPortFlags, e.boundBindToDevice, tcpip.FullAddress{})
@@ -256,10 +260,13 @@ func (e *endpoint) Close() {
 	}
 	e.rcvMu.Unlock()
 
-	e.route.Release()
+	if e.route != nil {
+		e.route.Release()
+		e.route = nil
+	}
 
 	// Update the state.
-	e.state = StateClosed
+	e.setEndpointState(StateClosed)
 
 	e.mu.Unlock()
 
@@ -301,24 +308,23 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess
 		HasTimestamp: true,
 		Timestamp:    p.timestamp,
 	}
-	e.mu.RLock()
-	receiveTOS := e.receiveTOS
-	receiveTClass := e.receiveTClass
-	receiveIPPacketInfo := e.receiveIPPacketInfo
-	e.mu.RUnlock()
-	if receiveTOS {
+	if e.ops.GetReceiveTOS() {
 		cm.HasTOS = true
 		cm.TOS = p.tos
 	}
-	if receiveTClass {
+	if e.ops.GetReceiveTClass() {
 		cm.HasTClass = true
 		// Although TClass is an 8-bit value it's read in the CMsg as a uint32.
 		cm.TClass = uint32(p.tos)
 	}
-	if receiveIPPacketInfo {
+	if e.ops.GetReceivePacketInfo() {
 		cm.HasIPPacketInfo = true
 		cm.PacketInfo = p.packetInfo
 	}
+	if e.ops.GetReceiveOriginalDstAddress() {
+		cm.HasOriginalDstAddress = true
+		cm.OriginalDstAddress = p.destinationAddress
+	}
 	return p.data.ToView(), cm, nil
 }
 
@@ -328,7 +334,7 @@ func (e *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMess
 //
 // Returns true for retry if preparation should be retried.
 func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpip.Error) {
-	switch e.state {
+	switch e.EndpointState() {
 	case StateInitial:
 	case StateConnected:
 		return false, nil
@@ -350,7 +356,7 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpi
 
 	// The state changed when we released the shared locked and re-acquired
 	// it in exclusive mode. Try again.
-	if e.state != StateInitial {
+	if e.EndpointState() != StateInitial {
 		return true, nil
 	}
 
@@ -365,9 +371,9 @@ func (e *endpoint) prepareForWrite(to *tcpip.FullAddress) (retry bool, err *tcpi
 // connectRoute establishes a route to the specified interface or the
 // configured multicast interface if no interface is specified and the
 // specified address is a multicast address.
-func (e *endpoint) connectRoute(nicID tcpip.NICID, addr tcpip.FullAddress, netProto tcpip.NetworkProtocolNumber) (stack.Route, tcpip.NICID, *tcpip.Error) {
+func (e *endpoint) connectRoute(nicID tcpip.NICID, addr tcpip.FullAddress, netProto tcpip.NetworkProtocolNumber) (*stack.Route, tcpip.NICID, *tcpip.Error) {
 	localAddr := e.ID.LocalAddress
-	if isBroadcastOrMulticast(localAddr) {
+	if e.isBroadcastOrMulticast(nicID, netProto, localAddr) {
 		// A packet can only originate from a unicast address (i.e., an interface).
 		localAddr = ""
 	}
@@ -382,9 +388,9 @@ func (e *endpoint) connectRoute(nicID tcpip.NICID, addr tcpip.FullAddress, netPr
 	}
 
 	// Find a route to the desired destination.
-	r, err := e.stack.FindRoute(nicID, localAddr, addr.Addr, netProto, e.multicastLoop)
+	r, err := e.stack.FindRoute(nicID, localAddr, addr.Addr, netProto, e.ops.GetMulticastLoop())
 	if err != nil {
-		return stack.Route{}, 0, err
+		return nil, 0, err
 	}
 	return r, nicID, nil
 }
@@ -427,7 +433,13 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 	to := opts.To
 
 	e.mu.RLock()
-	defer e.mu.RUnlock()
+	lockReleased := false
+	defer func() {
+		if lockReleased {
+			return
+		}
+		e.mu.RUnlock()
+	}()
 
 	// If we've shutdown with SHUT_WR we are in an invalid state for sending.
 	if e.shutdownFlags&tcpip.ShutdownWrite != 0 {
@@ -446,36 +458,9 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 		}
 	}
 
-	var route *stack.Route
-	var resolve func(waker *sleep.Waker) (ch <-chan struct{}, err *tcpip.Error)
-	var dstPort uint16
-	if to == nil {
-		route = &e.route
-		dstPort = e.dstPort
-		resolve = func(waker *sleep.Waker) (ch <-chan struct{}, err *tcpip.Error) {
-			// Promote lock to exclusive if using a shared route, given that it may
-			// need to change in Route.Resolve() call below.
-			e.mu.RUnlock()
-			e.mu.Lock()
-
-			// Recheck state after lock was re-acquired.
-			if e.state != StateConnected {
-				err = tcpip.ErrInvalidEndpointState
-			}
-			if err == nil && route.IsResolutionRequired() {
-				ch, err = route.Resolve(waker)
-			}
-
-			e.mu.Unlock()
-			e.mu.RLock()
-
-			// Recheck state after lock was re-acquired.
-			if e.state != StateConnected {
-				err = tcpip.ErrInvalidEndpointState
-			}
-			return
-		}
-	} else {
+	route := e.route
+	dstPort := e.dstPort
+	if to != nil {
 		// Reject destination address if it goes through a different
 		// NIC than the endpoint was bound to.
 		nicID := to.NIC
@@ -487,6 +472,11 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 			nicID = e.BindNICID
 		}
 
+		if to.Port == 0 {
+			// Port 0 is an invalid port to send to.
+			return 0, nil, tcpip.ErrInvalidEndpointState
+		}
+
 		dst, netProto, err := e.checkV4MappedLocked(*to)
 		if err != nil {
 			return 0, nil, err
@@ -498,17 +488,16 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 		}
 		defer r.Release()
 
-		route = &r
+		route = r
 		dstPort = dst.Port
-		resolve = route.Resolve
 	}
 
-	if !e.broadcast && route.IsOutboundBroadcast() {
+	if !e.ops.GetBroadcast() && route.IsOutboundBroadcast() {
 		return 0, nil, tcpip.ErrBroadcastDisabled
 	}
 
 	if route.IsResolutionRequired() {
-		if ch, err := resolve(nil); err != nil {
+		if ch, err := route.Resolve(nil); err != nil {
 			if err == tcpip.ErrWouldBlock {
 				return 0, ch, tcpip.ErrNoLinkAddress
 			}
@@ -534,83 +523,46 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, <-c
 		useDefaultTTL = false
 	}
 
-	if err := sendUDP(route, buffer.View(v).ToVectorisedView(), e.ID.LocalPort, dstPort, ttl, useDefaultTTL, e.sendTOS, e.owner, e.noChecksum); err != nil {
+	localPort := e.ID.LocalPort
+	sendTOS := e.sendTOS
+	owner := e.owner
+	noChecksum := e.SocketOptions().GetNoChecksum()
+	lockReleased = true
+	e.mu.RUnlock()
+
+	// Do not hold lock when sending as loopback is synchronous and if the UDP
+	// datagram ends up generating an ICMP response then it can result in a
+	// deadlock where the ICMP response handling ends up acquiring this endpoint's
+	// mutex using e.mu.RLock() in endpoint.HandleControlPacket which can cause a
+	// deadlock if another caller is trying to acquire e.mu in exclusive mode w/
+	// e.mu.Lock(). Since e.mu.Lock() prevents any new read locks to ensure the
+	// lock can be eventually acquired.
+	//
+	// See: https://golang.org/pkg/sync/#RWMutex for details on why recursive read
+	// locking is prohibited.
+	if err := sendUDP(route, buffer.View(v).ToVectorisedView(), localPort, dstPort, ttl, useDefaultTTL, sendTOS, owner, noChecksum); err != nil {
 		return 0, nil, err
 	}
 	return int64(len(v)), nil, nil
 }
 
 // Peek only returns data from a single datagram, so do nothing here.
-func (e *endpoint) Peek([][]byte) (int64, tcpip.ControlMessages, *tcpip.Error) {
-	return 0, tcpip.ControlMessages{}, nil
+func (e *endpoint) Peek([][]byte) (int64, *tcpip.Error) {
+	return 0, nil
 }
 
-// SetSockOptBool implements tcpip.Endpoint.SetSockOptBool.
-func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
-	switch opt {
-	case tcpip.BroadcastOption:
-		e.mu.Lock()
-		e.broadcast = v
-		e.mu.Unlock()
-
-	case tcpip.MulticastLoopOption:
-		e.mu.Lock()
-		e.multicastLoop = v
-		e.mu.Unlock()
-
-	case tcpip.NoChecksumOption:
-		e.mu.Lock()
-		e.noChecksum = v
-		e.mu.Unlock()
-
-	case tcpip.ReceiveTOSOption:
-		e.mu.Lock()
-		e.receiveTOS = v
-		e.mu.Unlock()
-
-	case tcpip.ReceiveTClassOption:
-		// We only support this option on v6 endpoints.
-		if e.NetProto != header.IPv6ProtocolNumber {
-			return tcpip.ErrNotSupported
-		}
-
-		e.mu.Lock()
-		e.receiveTClass = v
-		e.mu.Unlock()
-
-	case tcpip.ReceiveIPPacketInfoOption:
-		e.mu.Lock()
-		e.receiveIPPacketInfo = v
-		e.mu.Unlock()
-
-	case tcpip.ReuseAddressOption:
-		e.mu.Lock()
-		e.portFlags.MostRecent = v
-		e.mu.Unlock()
-
-	case tcpip.ReusePortOption:
-		e.mu.Lock()
-		e.portFlags.LoadBalanced = v
-		e.mu.Unlock()
-
-	case tcpip.V6OnlyOption:
-		// We only recognize this option on v6 endpoints.
-		if e.NetProto != header.IPv6ProtocolNumber {
-			return tcpip.ErrInvalidEndpointState
-		}
-
-		e.mu.Lock()
-		defer e.mu.Unlock()
-
-		// We only allow this to be set when we're in the initial state.
-		if e.state != StateInitial {
-			return tcpip.ErrInvalidEndpointState
-		}
-
-		e.v6only = v
-	}
+// OnReuseAddressSet implements tcpip.SocketOptionsHandler.OnReuseAddressSet.
+func (e *endpoint) OnReuseAddressSet(v bool) {
+	e.mu.Lock()
+	e.portFlags.MostRecent = v
+	e.mu.Unlock()
+}
 
-	return nil
+// OnReusePortSet implements tcpip.SocketOptionsHandler.OnReusePortSet.
+func (e *endpoint) OnReusePortSet(v bool) {
+	e.mu.Lock()
+	e.portFlags.LoadBalanced = v
+	e.mu.Unlock()
 }
 
 // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
@@ -813,93 +765,10 @@ func (e *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
 
 	case *tcpip.SocketDetachFilterOption:
 		return nil
-
-	case *tcpip.LingerOption:
-		e.mu.Lock()
-		e.linger = *v
-		e.mu.Unlock()
 	}
 	return nil
 }
 
-// GetSockOptBool implements tcpip.Endpoint.GetSockOptBool.
-func (e *endpoint) GetSockOptBool(opt tcpip.SockOptBool) (bool, *tcpip.Error) {
-	switch opt {
-	case tcpip.BroadcastOption:
-		e.mu.RLock()
-		v := e.broadcast
-		e.mu.RUnlock()
-		return v, nil
-
-	case tcpip.KeepaliveEnabledOption:
-		return false, nil
-
-	case tcpip.MulticastLoopOption:
-		e.mu.RLock()
-		v := e.multicastLoop
-		e.mu.RUnlock()
-		return v, nil
-
-	case tcpip.NoChecksumOption:
-		e.mu.RLock()
-		v := e.noChecksum
-		e.mu.RUnlock()
-		return v, nil
-
-	case tcpip.ReceiveTOSOption:
-		e.mu.RLock()
-		v := e.receiveTOS
-		e.mu.RUnlock()
-		return v, nil
-
-	case tcpip.ReceiveTClassOption:
-		// We only support this option on v6 endpoints.
-		if e.NetProto != header.IPv6ProtocolNumber {
-			return false, tcpip.ErrNotSupported
-		}
-
-		e.mu.RLock()
-		v := e.receiveTClass
-		e.mu.RUnlock()
-		return v, nil
-
-	case tcpip.ReceiveIPPacketInfoOption:
-		e.mu.RLock()
-		v := e.receiveIPPacketInfo
-		e.mu.RUnlock()
-		return v, nil
-
-	case tcpip.ReuseAddressOption:
-		e.mu.RLock()
-		v := e.portFlags.MostRecent
-		e.mu.RUnlock()
-
-		return v, nil
-
-	case tcpip.ReusePortOption:
-		e.mu.RLock()
-		v := e.portFlags.LoadBalanced
-		e.mu.RUnlock()
-
-		return v, nil
-
-	case tcpip.V6OnlyOption:
-		// We only recognize this option on v6 endpoints.
-		if e.NetProto != header.IPv6ProtocolNumber {
-			return false, tcpip.ErrUnknownProtocolOption
-		}
-
-		e.mu.RLock()
-		v := e.v6only
-		e.mu.RUnlock()
-
-		return v, nil
-
-	default:
-		return false, tcpip.ErrUnknownProtocolOption
-	}
-}
-
 // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
 func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
 	switch opt {
@@ -974,11 +843,6 @@ func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
 		*o = tcpip.BindToDeviceOption(e.bindToDevice)
 		e.mu.RUnlock()
 
-	case *tcpip.LingerOption:
-		e.mu.RLock()
-		*o = e.linger
-		e.mu.RUnlock()
-
 	default:
 		return tcpip.ErrUnknownProtocolOption
 	}
@@ -1009,7 +873,7 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u
 	// On IPv4, UDP checksum is optional, and a zero value indicates the
 	// transmitter skipped the checksum generation (RFC768).
 	// On IPv6, UDP checksum is not optional (RFC2460 Section 8.1).
-	if r.Capabilities()&stack.CapabilityTXChecksumOffload == 0 &&
+	if r.RequiresTXTransportChecksum() &&
 		(!noChecksum || r.NetProto == header.IPv6ProtocolNumber) {
 		xsum := r.PseudoHeaderChecksum(ProtocolNumber, length)
 		for _, v := range data.Views() {
@@ -1038,7 +902,7 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u
 // checkV4MappedLocked determines the effective network protocol and converts
 // addr to its canonical form.
 func (e *endpoint) checkV4MappedLocked(addr tcpip.FullAddress) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, *tcpip.Error) {
-	unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProtoLocked(addr, e.v6only)
+	unwrapped, netProto, err := e.TransportEndpointInfo.AddrNetProtoLocked(addr, e.ops.GetV6Only())
 	if err != nil {
 		return tcpip.FullAddress{}, 0, err
 	}
@@ -1050,7 +914,7 @@ func (e *endpoint) Disconnect() *tcpip.Error {
 	e.mu.Lock()
 	defer e.mu.Unlock()
 
-	if e.state != StateConnected {
+	if e.EndpointState() != StateConnected {
 		return nil
 	}
 	var (
@@ -1073,7 +937,7 @@ func (e *endpoint) Disconnect() *tcpip.Error {
 		if err != nil {
 			return err
 		}
-		e.state = StateBound
+		e.setEndpointState(StateBound)
 		boundPortFlags = e.boundPortFlags
 	} else {
 		if e.ID.LocalPort != 0 {
@@ -1081,14 +945,14 @@ func (e *endpoint) Disconnect() *tcpip.Error {
 			e.stack.ReleasePort(e.effectiveNetProtos, ProtocolNumber, e.ID.LocalAddress, e.ID.LocalPort, boundPortFlags, e.boundBindToDevice, tcpip.FullAddress{})
 			e.boundPortFlags = ports.Flags{}
 		}
-		e.state = StateInitial
+		e.setEndpointState(StateInitial)
 	}
 
 	e.stack.UnregisterTransportEndpoint(e.RegisterNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e, boundPortFlags, e.boundBindToDevice)
 	e.ID = id
 	e.boundBindToDevice = btd
 	e.route.Release()
-	e.route = stack.Route{}
+	e.route = nil
 	e.dstPort = 0
 
 	return nil
@@ -1106,7 +970,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 
 	nicID := addr.NIC
 	var localPort uint16
-	switch e.state {
+	switch e.EndpointState() {
 	case StateInitial:
 	case StateBound, StateConnected:
 		localPort = e.ID.LocalPort
@@ -1141,7 +1005,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 		RemoteAddress: r.RemoteAddress,
 	}
 
-	if e.state == StateInitial {
+	if e.EndpointState() == StateInitial {
 		id.LocalAddress = r.LocalAddress
 	}
 
@@ -1149,7 +1013,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 	// packets on a different network protocol, so we register both even if
 	// v6only is set to false and this is an ipv6 endpoint.
 	netProtos := []tcpip.NetworkProtocolNumber{netProto}
-	if netProto == header.IPv6ProtocolNumber && !e.v6only {
+	if netProto == header.IPv6ProtocolNumber && !e.ops.GetV6Only() {
 		netProtos = []tcpip.NetworkProtocolNumber{
 			header.IPv4ProtocolNumber,
 			header.IPv6ProtocolNumber,
@@ -1175,7 +1039,7 @@ func (e *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
 	e.RegisterNICID = nicID
 	e.effectiveNetProtos = netProtos
 
-	e.state = StateConnected
+	e.setEndpointState(StateConnected)
 
 	e.rcvMu.Lock()
 	e.rcvReady = true
@@ -1197,7 +1061,7 @@ func (e *endpoint) Shutdown(flags tcpip.ShutdownFlags) *tcpip.Error {
 
 	// A socket in the bound state can still receive multicast messages,
 	// so we need to notify waiters on shutdown.
-	if e.state != StateBound && e.state != StateConnected {
+	if state := e.EndpointState(); state != StateBound && state != StateConnected {
 		return tcpip.ErrNotConnected
 	}
 
@@ -1248,7 +1112,7 @@ func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.Networ
 func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error {
 	// Don't allow binding once endpoint is not in the initial state
 	// anymore.
-	if e.state != StateInitial {
+	if e.EndpointState() != StateInitial {
 		return tcpip.ErrInvalidEndpointState
 	}
 
@@ -1261,7 +1125,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error {
 	// wildcard (empty) address, and this is an IPv6 endpoint with v6only
 	// set to false.
 	netProtos := []tcpip.NetworkProtocolNumber{netProto}
-	if netProto == header.IPv6ProtocolNumber && !e.v6only && addr.Addr == "" {
+	if netProto == header.IPv6ProtocolNumber && !e.ops.GetV6Only() && addr.Addr == "" {
 		netProtos = []tcpip.NetworkProtocolNumber{
 			header.IPv6ProtocolNumber,
 			header.IPv4ProtocolNumber,
@@ -1269,7 +1133,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error {
 	}
 
 	nicID := addr.NIC
-	if len(addr.Addr) != 0 && !isBroadcastOrMulticast(addr.Addr) {
+	if len(addr.Addr) != 0 && !e.isBroadcastOrMulticast(addr.NIC, netProto, addr.Addr) {
 		// A local unicast address was specified, verify that it's valid.
 		nicID = e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr)
 		if nicID == 0 {
@@ -1292,7 +1156,7 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) *tcpip.Error {
 	e.effectiveNetProtos = netProtos
 
 	// Mark endpoint as bound.
-	e.state = StateBound
+	e.setEndpointState(StateBound)
 
 	e.rcvMu.Lock()
 	e.rcvReady = true
@@ -1324,7 +1188,7 @@ func (e *endpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
 	defer e.mu.RUnlock()
 
 	addr := e.ID.LocalAddress
-	if e.state == StateConnected {
+	if e.EndpointState() == StateConnected {
 		addr = e.route.LocalAddress
 	}
 
@@ -1340,7 +1204,7 @@ func (e *endpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
 	e.mu.RLock()
 	defer e.mu.RUnlock()
 
-	if e.state != StateConnected {
+	if e.EndpointState() != StateConnected {
 		return tcpip.FullAddress{}, tcpip.ErrNotConnected
 	}
 
@@ -1366,6 +1230,12 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 		e.rcvMu.Unlock()
 	}
 
+	e.lastErrorMu.Lock()
+	hasError := e.lastError != nil
+	e.lastErrorMu.Unlock()
+	if hasError {
+		result |= waiter.EventErr
+	}
 	return result
 }
 
@@ -1373,10 +1243,11 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
 // On IPv4, UDP checksum is optional, and a zero value means the transmitter
 // omitted the checksum generation (RFC768).
 // On IPv6, UDP checksum is not optional (RFC2460 Section 8.1).
-func verifyChecksum(r *stack.Route, hdr header.UDP, pkt *stack.PacketBuffer) bool {
-	if r.Capabilities()&stack.CapabilityRXChecksumOffload == 0 &&
-		(hdr.Checksum() != 0 || r.NetProto == header.IPv6ProtocolNumber) {
-		xsum := r.PseudoHeaderChecksum(ProtocolNumber, hdr.Length())
+func verifyChecksum(hdr header.UDP, pkt *stack.PacketBuffer) bool {
+	if !pkt.RXTransportChecksumValidated &&
+		(hdr.Checksum() != 0 || pkt.NetworkProtocolNumber == header.IPv6ProtocolNumber) {
+		netHdr := pkt.Network()
+		xsum := header.PseudoHeaderChecksum(ProtocolNumber, netHdr.DestinationAddress(), netHdr.SourceAddress(), hdr.Length())
 		for _, v := range pkt.Data.Views() {
 			xsum = header.Checksum(v, xsum)
 		}
@@ -1387,8 +1258,7 @@ func verifyChecksum(r *stack.Route, hdr header.UDP, pkt *stack.PacketBuffer) boo
 
 // HandlePacket is called by the stack when new packets arrive to this transport
 // endpoint.
-func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
-	// Get the header then trim it from the view.
+func (e *endpoint) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
 	hdr := header.UDP(pkt.TransportHeader().View())
 	if int(hdr.Length()) > pkt.Data.Size()+header.UDPMinimumSize {
 		// Malformed packet.
@@ -1397,7 +1267,11 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 		return
 	}
 
-	if !verifyChecksum(r, hdr, pkt) {
+	// TODO(gvisor.dev/issues/5033): We should mirror the Network layer and cap
+	// packets at "Parse" instead of when handling a packet.
+	pkt.Data.CapLength(int(hdr.PayloadLength()))
+
+	if !verifyChecksum(hdr, pkt) {
 		// Checksum Error.
 		e.stack.Stats().UDP.ChecksumErrors.Increment()
 		e.stats.ReceiveErrors.ChecksumErrors.Increment()
@@ -1428,9 +1302,14 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 	// Push new packet into receive list and increment the buffer size.
 	packet := &udpPacket{
 		senderAddress: tcpip.FullAddress{
-			NIC:  r.NICID(),
+			NIC:  pkt.NICID,
 			Addr: id.RemoteAddress,
-			Port: header.UDP(hdr).SourcePort(),
+			Port: hdr.SourcePort(),
+		},
+		destinationAddress: tcpip.FullAddress{
+			NIC:  pkt.NICID,
+			Addr: id.LocalAddress,
+			Port: header.UDP(hdr).DestinationPort(),
 		},
 	}
 	packet.data = pkt.Data
@@ -1438,7 +1317,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 	e.rcvBufSize += pkt.Data.Size()
 
 	// Save any useful information from the network header to the packet.
-	switch r.NetProto {
+	switch pkt.NetworkProtocolNumber {
 	case header.IPv4ProtocolNumber:
 		packet.tos, _ = header.IPv4(pkt.NetworkHeader().View()).TOS()
 	case header.IPv6ProtocolNumber:
@@ -1448,9 +1327,10 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 	// TODO(gvisor.dev/issue/3556): r.LocalAddress may be a multicast or broadcast
 	// address. packetInfo.LocalAddr should hold a unicast address that can be
 	// used to respond to the incoming packet.
-	packet.packetInfo.LocalAddr = r.LocalAddress
-	packet.packetInfo.DestinationAddr = r.LocalAddress
-	packet.packetInfo.NIC = r.NICID()
+	localAddr := pkt.Network().DestinationAddress()
+	packet.packetInfo.LocalAddr = localAddr
+	packet.packetInfo.DestinationAddr = localAddr
+	packet.packetInfo.NIC = pkt.NICID
 	packet.timestamp = e.stack.Clock().NowNanoseconds()
 
 	e.rcvMu.Unlock()
@@ -1464,23 +1344,20 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
 // HandleControlPacket implements stack.TransportEndpoint.HandleControlPacket.
 func (e *endpoint) HandleControlPacket(id stack.TransportEndpointID, typ stack.ControlType, extra uint32, pkt *stack.PacketBuffer) {
 	if typ == stack.ControlPortUnreachable {
-		e.mu.RLock()
-		defer e.mu.RUnlock()
-
-		if e.state == StateConnected {
+		if e.EndpointState() == StateConnected {
 			e.lastErrorMu.Lock()
-			defer e.lastErrorMu.Unlock()
-
 			e.lastError = tcpip.ErrConnectionRefused
+			e.lastErrorMu.Unlock()
+
+			e.waiterQueue.Notify(waiter.EventErr)
+			return
 		}
 	}
 }
 
 // State implements tcpip.Endpoint.State.
 func (e *endpoint) State() uint32 {
-	e.mu.Lock()
-	defer e.mu.Unlock()
-	return uint32(e.state)
+	return uint32(e.EndpointState())
 }
 
 // Info returns a copy of the endpoint info.
@@ -1500,10 +1377,16 @@ func (e *endpoint) Stats() tcpip.EndpointStats {
 // Wait implements tcpip.Endpoint.Wait.
 func (*endpoint) Wait() {}
 
-func isBroadcastOrMulticast(a tcpip.Address) bool {
-	return a == header.IPv4Broadcast || header.IsV4MulticastAddress(a) || header.IsV6MulticastAddress(a)
+func (e *endpoint) isBroadcastOrMulticast(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
+	return addr == header.IPv4Broadcast || header.IsV4MulticastAddress(addr) || header.IsV6MulticastAddress(addr) || e.stack.IsSubnetBroadcast(nicID, netProto, addr)
 }
 
+// SetOwner implements tcpip.Endpoint.SetOwner.
 func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
 	e.owner = owner
 }
+
+// SocketOptions implements tcpip.Endpoint.SocketOptions.
+func (e *endpoint) SocketOptions() *tcpip.SocketOptions {
+	return &e.ops
+}
diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go
index 858c99a45..13b72dc88 100644
--- a/pkg/tcpip/transport/udp/endpoint_state.go
+++ b/pkg/tcpip/transport/udp/endpoint_state.go
@@ -98,7 +98,8 @@ func (e *endpoint) Resume(s *stack.Stack) {
 		}
 	}
 
-	if e.state != StateBound && e.state != StateConnected {
+	state := e.EndpointState()
+	if state != StateBound && state != StateConnected {
 		return
 	}
 
@@ -113,12 +114,12 @@ func (e *endpoint) Resume(s *stack.Stack) {
 	}
 
 	var err *tcpip.Error
-	if e.state == StateConnected {
-		e.route, err = e.stack.FindRoute(e.RegisterNICID, e.ID.LocalAddress, e.ID.RemoteAddress, netProto, e.multicastLoop)
+	if state == StateConnected {
+		e.route, err = e.stack.FindRoute(e.RegisterNICID, e.ID.LocalAddress, e.ID.RemoteAddress, netProto, e.ops.GetMulticastLoop())
 		if err != nil {
 			panic(err)
 		}
-	} else if len(e.ID.LocalAddress) != 0 && !isBroadcastOrMulticast(e.ID.LocalAddress) { // stateBound
+	} else if len(e.ID.LocalAddress) != 0 && !e.isBroadcastOrMulticast(e.RegisterNICID, netProto, e.ID.LocalAddress) { // stateBound
 		// A local unicast address is specified, verify that it's valid.
 		if e.stack.CheckLocalAddress(e.RegisterNICID, netProto, e.ID.LocalAddress) == 0 {
 			panic(tcpip.ErrBadLocalAddress)
diff --git a/pkg/tcpip/transport/udp/forwarder.go b/pkg/tcpip/transport/udp/forwarder.go
index 3ae6cc221..14e4648cd 100644
--- a/pkg/tcpip/transport/udp/forwarder.go
+++ b/pkg/tcpip/transport/udp/forwarder.go
@@ -43,10 +43,9 @@ func NewForwarder(s *stack.Stack, handler func(*ForwarderRequest)) *Forwarder {
 //
 // This function is expected to be passed as an argument to the
 // stack.SetTransportProtocolHandler function.
-func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
+func (f *Forwarder) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
 	f.handler(&ForwarderRequest{
 		stack: f.stack,
-		route: r,
 		id:    id,
 		pkt:   pkt,
 	})
@@ -59,7 +58,6 @@ func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, p
 // it via CreateEndpoint.
 type ForwarderRequest struct {
 	stack *stack.Stack
-	route *stack.Route
 	id    stack.TransportEndpointID
 	pkt   *stack.PacketBuffer
 }
@@ -72,17 +70,25 @@ func (r *ForwarderRequest) ID() stack.TransportEndpointID {
 
 // CreateEndpoint creates a connected UDP endpoint for the session request.
 func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
-	ep := newEndpoint(r.stack, r.route.NetProto, queue)
-	if err := r.stack.RegisterTransportEndpoint(r.route.NICID(), []tcpip.NetworkProtocolNumber{r.route.NetProto}, ProtocolNumber, r.id, ep, ep.portFlags, ep.bindToDevice); err != nil {
+	netHdr := r.pkt.Network()
+	route, err := r.stack.FindRoute(r.pkt.NICID, netHdr.DestinationAddress(), netHdr.SourceAddress(), r.pkt.NetworkProtocolNumber, false /* multicastLoop */)
+	if err != nil {
+		return nil, err
+	}
+	route.ResolveWith(r.pkt.SourceLinkAddress())
+
+	ep := newEndpoint(r.stack, r.pkt.NetworkProtocolNumber, queue)
+	if err := r.stack.RegisterTransportEndpoint(r.pkt.NICID, []tcpip.NetworkProtocolNumber{r.pkt.NetworkProtocolNumber}, ProtocolNumber, r.id, ep, ep.portFlags, ep.bindToDevice); err != nil {
 		ep.Close()
+		route.Release()
 		return nil, err
 	}
 
 	ep.ID = r.id
-	ep.route = r.route.Clone()
+	ep.route = route
 	ep.dstPort = r.id.RemotePort
-	ep.effectiveNetProtos = []tcpip.NetworkProtocolNumber{r.route.NetProto}
-	ep.RegisterNICID = r.route.NICID()
+	ep.effectiveNetProtos = []tcpip.NetworkProtocolNumber{r.pkt.NetworkProtocolNumber}
+	ep.RegisterNICID = r.pkt.NICID
 	ep.boundPortFlags = ep.portFlags
 
 	ep.state = StateConnected
@@ -91,7 +97,7 @@ func (r *ForwarderRequest) CreateEndpoint(queue *waiter.Queue) (tcpip.Endpoint,
 	ep.rcvReady = true
 	ep.rcvMu.Unlock()
 
-	ep.HandlePacket(r.route, r.id, r.pkt)
+	ep.HandlePacket(r.id, r.pkt)
 
 	return ep, nil
 }
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index da5b1deb2..91420edd3 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -78,15 +78,15 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) {
 
 // HandleUnknownDestinationPacket handles packets that are targeted at this
 // protocol but don't match any existing endpoint.
-func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) stack.UnknownDestinationPacketDisposition {
+func (p *protocol) HandleUnknownDestinationPacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) stack.UnknownDestinationPacketDisposition {
 	hdr := header.UDP(pkt.TransportHeader().View())
 	if int(hdr.Length()) > pkt.Data.Size()+header.UDPMinimumSize {
-		r.Stack().Stats().UDP.MalformedPacketsReceived.Increment()
+		p.stack.Stats().UDP.MalformedPacketsReceived.Increment()
 		return stack.UnknownDestinationPacketMalformed
 	}
 
-	if !verifyChecksum(r, hdr, pkt) {
-		r.Stack().Stats().UDP.ChecksumErrors.Increment()
+	if !verifyChecksum(hdr, pkt) {
+		p.stack.Stats().UDP.ChecksumErrors.Increment()
 		return stack.UnknownDestinationPacketMalformed
 	}
 
diff --git a/pkg/tcpip/transport/udp/udp_test.go b/pkg/tcpip/transport/udp/udp_test.go
index b4604ba35..08980c298 100644
--- a/pkg/tcpip/transport/udp/udp_test.go
+++ b/pkg/tcpip/transport/udp/udp_test.go
@@ -22,6 +22,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/google/go-cmp/cmp"
 	"gvisor.dev/gvisor/pkg/tcpip"
 	"gvisor.dev/gvisor/pkg/tcpip/buffer"
 	"gvisor.dev/gvisor/pkg/tcpip/checker"
@@ -32,6 +33,7 @@ import (
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
 	"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
 	"gvisor.dev/gvisor/pkg/tcpip/stack"
+	"gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
 	"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
 	"gvisor.dev/gvisor/pkg/waiter"
 )
@@ -54,6 +56,7 @@ const (
 	stackPort       = 1234
 	testAddr        = "\x0a\x00\x00\x02"
 	testPort        = 4096
+	invalidPort     = 8192
 	multicastAddr   = "\xe8\x2b\xd3\xea"
 	multicastV6Addr = "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
 	broadcastAddr   = header.IPv4Broadcast
@@ -295,7 +298,8 @@ func newDualTestContext(t *testing.T, mtu uint32) *testContext {
 	t.Helper()
 	return newDualTestContextWithOptions(t, mtu, stack.Options{
 		NetworkProtocols:   []stack.NetworkProtocolFactory{ipv4.NewProtocol, ipv6.NewProtocol},
-		TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol},
+		TransportProtocols: []stack.TransportProtocolFactory{udp.NewProtocol, icmp.NewProtocol6, icmp.NewProtocol4},
+		HandleLocal:        true,
 	})
 }
 
@@ -360,13 +364,9 @@ func (c *testContext) createEndpointForFlow(flow testFlow) {
 
 	c.createEndpoint(flow.sockProto())
 	if flow.isV6Only() {
-		if err := c.ep.SetSockOptBool(tcpip.V6OnlyOption, true); err != nil {
-			c.t.Fatalf("SetSockOptBool failed: %s", err)
-		}
+		c.ep.SocketOptions().SetV6Only(true)
 	} else if flow.isBroadcast() {
-		if err := c.ep.SetSockOptBool(tcpip.BroadcastOption, true); err != nil {
-			c.t.Fatalf("SetSockOptBool failed: %s", err)
-		}
+		c.ep.SocketOptions().SetBroadcast(true)
 	}
 }
 
@@ -453,12 +453,12 @@ func (c *testContext) buildV6Packet(payload []byte, h *header4Tuple) buffer.View
 	// Initialize the IP header.
 	ip := header.IPv6(buf)
 	ip.Encode(&header.IPv6Fields{
-		TrafficClass:  testTOS,
-		PayloadLength: uint16(header.UDPMinimumSize + len(payload)),
-		NextHeader:    uint8(udp.ProtocolNumber),
-		HopLimit:      65,
-		SrcAddr:       h.srcAddr.Addr,
-		DstAddr:       h.dstAddr.Addr,
+		TrafficClass:      testTOS,
+		PayloadLength:     uint16(header.UDPMinimumSize + len(payload)),
+		TransportProtocol: udp.ProtocolNumber,
+		HopLimit:          65,
+		SrcAddr:           h.srcAddr.Addr,
+		DstAddr:           h.dstAddr.Addr,
 	})
 
 	// Initialize the UDP header.
@@ -490,7 +490,6 @@ func (c *testContext) buildV4Packet(payload []byte, h *header4Tuple) buffer.View
 	// Initialize the IP header.
 	ip := header.IPv4(buf)
 	ip.Encode(&header.IPv4Fields{
-		IHL:         header.IPv4MinimumSize,
 		TOS:         testTOS,
 		TotalLength: uint16(len(buf)),
 		TTL:         65,
@@ -975,7 +974,7 @@ func testFailingWrite(c *testContext, flow testFlow, wantErr *tcpip.Error) {
 // provided.
 func testWrite(c *testContext, flow testFlow, checkers ...checker.NetworkChecker) uint16 {
 	c.t.Helper()
-	return testWriteInternal(c, flow, true, checkers...)
+	return testWriteAndVerifyInternal(c, flow, true, checkers...)
 }
 
 // testWriteWithoutDestination sends a packet of the given test flow from the
@@ -984,10 +983,10 @@ func testWrite(c *testContext, flow testFlow, checkers ...checker.NetworkChecker
 // checker functions provided.
 func testWriteWithoutDestination(c *testContext, flow testFlow, checkers ...checker.NetworkChecker) uint16 {
 	c.t.Helper()
-	return testWriteInternal(c, flow, false, checkers...)
+	return testWriteAndVerifyInternal(c, flow, false, checkers...)
 }
 
-func testWriteInternal(c *testContext, flow testFlow, setDest bool, checkers ...checker.NetworkChecker) uint16 {
+func testWriteNoVerify(c *testContext, flow testFlow, setDest bool) buffer.View {
 	c.t.Helper()
 	// Take a snapshot of the stats to validate them at the end of the test.
 	epstats := c.ep.Stats().(*tcpip.TransportEndpointStats).Clone()
@@ -1009,6 +1008,12 @@ func testWriteInternal(c *testContext, flow testFlow, setDest bool, checkers ...
 		c.t.Fatalf("Bad number of bytes written: got %v, want %v", n, len(payload))
 	}
 	c.checkEndpointWriteStats(1, epstats, err)
+	return payload
+}
+
+func testWriteAndVerifyInternal(c *testContext, flow testFlow, setDest bool, checkers ...checker.NetworkChecker) uint16 {
+	c.t.Helper()
+	payload := testWriteNoVerify(c, flow, setDest)
 	// Received the packet and check the payload.
 	b := c.getPacketAndVerify(flow, checkers...)
 	var udp header.UDP
@@ -1153,6 +1158,39 @@ func TestV4WriteOnConnected(t *testing.T) {
 	testWriteWithoutDestination(c, unicastV4)
 }
 
+func TestWriteOnConnectedInvalidPort(t *testing.T) {
+	protocols := map[string]tcpip.NetworkProtocolNumber{
+		"ipv4": ipv4.ProtocolNumber,
+		"ipv6": ipv6.ProtocolNumber,
+	}
+	for name, pn := range protocols {
+		t.Run(name, func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpoint(pn)
+			if err := c.ep.Connect(tcpip.FullAddress{Addr: stackAddr, Port: invalidPort}); err != nil {
+				c.t.Fatalf("Connect failed: %s", err)
+			}
+			writeOpts := tcpip.WriteOptions{
+				To: &tcpip.FullAddress{Addr: stackAddr, Port: invalidPort},
+			}
+			payload := buffer.View(newPayload())
+			n, _, err := c.ep.Write(tcpip.SlicePayload(payload), writeOpts)
+			if err != nil {
+				c.t.Fatalf("c.ep.Write(...) = %+s, want nil", err)
+			}
+			if got, want := n, int64(len(payload)); got != want {
+				c.t.Fatalf("c.ep.Write(...) wrote %d bytes, want %d bytes", got, want)
+			}
+
+			if err := c.ep.LastError(); err != tcpip.ErrConnectionRefused {
+				c.t.Fatalf("expected c.ep.LastError() == ErrConnectionRefused, got: %+v", err)
+			}
+		})
+	}
+}
+
 // TestWriteOnBoundToV4Multicast checks that we can send packets out of a socket
 // that is bound to a V4 multicast address.
 func TestWriteOnBoundToV4Multicast(t *testing.T) {
@@ -1375,9 +1413,7 @@ func TestReadIPPacketInfo(t *testing.T) {
 				}
 			}
 
-			if err := c.ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, true); err != nil {
-				t.Fatalf("c.ep.SetSockOptBool(tcpip.ReceiveIPPacketInfoOption, true): %s", err)
-			}
+			c.ep.SocketOptions().SetReceivePacketInfo(true)
 
 			testRead(c, test.flow, checker.ReceiveIPPacketInfo(tcpip.IPPacketInfo{
 				NIC:             1,
@@ -1392,6 +1428,93 @@ func TestReadIPPacketInfo(t *testing.T) {
 	}
 }
 
+func TestReadRecvOriginalDstAddr(t *testing.T) {
+	tests := []struct {
+		name                    string
+		proto                   tcpip.NetworkProtocolNumber
+		flow                    testFlow
+		expectedOriginalDstAddr tcpip.FullAddress
+	}{
+		{
+			name:                    "IPv4 unicast",
+			proto:                   header.IPv4ProtocolNumber,
+			flow:                    unicastV4,
+			expectedOriginalDstAddr: tcpip.FullAddress{1, stackAddr, stackPort},
+		},
+		{
+			name:  "IPv4 multicast",
+			proto: header.IPv4ProtocolNumber,
+			flow:  multicastV4,
+			// This should actually be a unicast address assigned to the interface.
+			//
+			// TODO(gvisor.dev/issue/3556): This check is validating incorrect
+			// behaviour. We still include the test so that once the bug is
+			// resolved, this test will start to fail and the individual tasked
+			// with fixing this bug knows to also fix this test :).
+			expectedOriginalDstAddr: tcpip.FullAddress{1, multicastAddr, stackPort},
+		},
+		{
+			name:  "IPv4 broadcast",
+			proto: header.IPv4ProtocolNumber,
+			flow:  broadcast,
+			// This should actually be a unicast address assigned to the interface.
+			//
+			// TODO(gvisor.dev/issue/3556): This check is validating incorrect
+			// behaviour. We still include the test so that once the bug is
+			// resolved, this test will start to fail and the individual tasked
+			// with fixing this bug knows to also fix this test :).
+			expectedOriginalDstAddr: tcpip.FullAddress{1, broadcastAddr, stackPort},
+		},
+		{
+			name:                    "IPv6 unicast",
+			proto:                   header.IPv6ProtocolNumber,
+			flow:                    unicastV6,
+			expectedOriginalDstAddr: tcpip.FullAddress{1, stackV6Addr, stackPort},
+		},
+		{
+			name:  "IPv6 multicast",
+			proto: header.IPv6ProtocolNumber,
+			flow:  multicastV6,
+			// This should actually be a unicast address assigned to the interface.
+			//
+			// TODO(gvisor.dev/issue/3556): This check is validating incorrect
+			// behaviour. We still include the test so that once the bug is
+			// resolved, this test will start to fail and the individual tasked
+			// with fixing this bug knows to also fix this test :).
+			expectedOriginalDstAddr: tcpip.FullAddress{1, multicastV6Addr, stackPort},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpoint(test.proto)
+
+			bindAddr := tcpip.FullAddress{Port: stackPort}
+			if err := c.ep.Bind(bindAddr); err != nil {
+				t.Fatalf("Bind(%#v): %s", bindAddr, err)
+			}
+
+			if test.flow.isMulticast() {
+				ifoptSet := tcpip.AddMembershipOption{NIC: 1, MulticastAddr: test.flow.getMcastAddr()}
+				if err := c.ep.SetSockOpt(&ifoptSet); err != nil {
+					c.t.Fatalf("SetSockOpt(&%#v): %s:", ifoptSet, err)
+				}
+			}
+
+			c.ep.SocketOptions().SetReceiveOriginalDstAddress(true)
+
+			testRead(c, test.flow, checker.ReceiveOriginalDstAddr(test.expectedOriginalDstAddr))
+
+			if got := c.s.Stats().UDP.PacketsReceived.Value(); got != 1 {
+				t.Fatalf("Read did not increment PacketsReceived: got = %d, want = 1", got)
+			}
+		})
+	}
+}
+
 func TestWriteIncrementsPacketsSent(t *testing.T) {
 	c := newDualTestContext(t, defaultMTU)
 	defer c.cleanup()
@@ -1415,16 +1538,12 @@ func TestNoChecksum(t *testing.T) {
 			c.createEndpointForFlow(flow)
 
 			// Disable the checksum generation.
-			if err := c.ep.SetSockOptBool(tcpip.NoChecksumOption, true); err != nil {
-				t.Fatalf("SetSockOptBool failed: %s", err)
-			}
+			c.ep.SocketOptions().SetNoChecksum(true)
 			// This option is effective on IPv4 only.
 			testWrite(c, flow, checker.UDP(checker.NoChecksum(flow.isV4())))
 
 			// Enable the checksum generation.
-			if err := c.ep.SetSockOptBool(tcpip.NoChecksumOption, false); err != nil {
-				t.Fatalf("SetSockOptBool failed: %s", err)
-			}
+			c.ep.SocketOptions().SetNoChecksum(false)
 			testWrite(c, flow, checker.UDP(checker.NoChecksum(false)))
 		})
 	}
@@ -1452,6 +1571,14 @@ func (*testInterface) Enabled() bool {
 	return true
 }
 
+func (*testInterface) Promiscuous() bool {
+	return false
+}
+
+func (*testInterface) WritePacketToRemote(tcpip.LinkAddress, *stack.GSO, tcpip.NetworkProtocolNumber, *stack.PacketBuffer) *tcpip.Error {
+	return tcpip.ErrNotSupported
+}
+
 func TestTTL(t *testing.T) {
 	for _, flow := range []testFlow{unicastV4, unicastV4in6, unicastV6, unicastV6Only, multicastV4, multicastV4in6, multicastV6, broadcast, broadcastIn6} {
 		t.Run(fmt.Sprintf("flow:%s", flow), func(t *testing.T) {
@@ -1586,13 +1713,15 @@ func TestSetTClass(t *testing.T) {
 }
 
 func TestReceiveTosTClass(t *testing.T) {
+	const RcvTOSOpt = "ReceiveTosOption"
+	const RcvTClassOpt = "ReceiveTClassOption"
+
 	testCases := []struct {
-		name             string
-		getReceiveOption tcpip.SockOptBool
-		tests            []testFlow
+		name  string
+		tests []testFlow
 	}{
-		{"ReceiveTosOption", tcpip.ReceiveTOSOption, []testFlow{unicastV4, broadcast}},
-		{"ReceiveTClassOption", tcpip.ReceiveTClassOption, []testFlow{unicastV4in6, unicastV6, unicastV6Only, broadcastIn6}},
+		{RcvTOSOpt, []testFlow{unicastV4, broadcast}},
+		{RcvTClassOpt, []testFlow{unicastV4in6, unicastV6, unicastV6Only, broadcastIn6}},
 	}
 	for _, testCase := range testCases {
 		for _, flow := range testCase.tests {
@@ -1601,29 +1730,32 @@ func TestReceiveTosTClass(t *testing.T) {
 				defer c.cleanup()
 
 				c.createEndpointForFlow(flow)
-				option := testCase.getReceiveOption
 				name := testCase.name
 
-				// Verify that setting and reading the option works.
-				v, err := c.ep.GetSockOptBool(option)
-				if err != nil {
-					c.t.Errorf("GetSockOptBool(%s) failed: %s", name, err)
+				var optionGetter func() bool
+				var optionSetter func(bool)
+				switch name {
+				case RcvTOSOpt:
+					optionGetter = c.ep.SocketOptions().GetReceiveTOS
+					optionSetter = c.ep.SocketOptions().SetReceiveTOS
+				case RcvTClassOpt:
+					optionGetter = c.ep.SocketOptions().GetReceiveTClass
+					optionSetter = c.ep.SocketOptions().SetReceiveTClass
+				default:
+					t.Fatalf("unkown test variant: %s", name)
 				}
+
+				// Verify that setting and reading the option works.
+				v := optionGetter()
 				// Test for expected default value.
 				if v != false {
 					c.t.Errorf("got GetSockOptBool(%s) = %t, want = %t", name, v, false)
 				}
 
 				want := true
-				if err := c.ep.SetSockOptBool(option, want); err != nil {
-					c.t.Fatalf("SetSockOptBool(%s, %t) failed: %s", name, want, err)
-				}
-
-				got, err := c.ep.GetSockOptBool(option)
-				if err != nil {
-					c.t.Errorf("GetSockOptBool(%s) failed: %s", name, err)
-				}
+				optionSetter(want)
 
+				got := optionGetter()
 				if got != want {
 					c.t.Errorf("got GetSockOptBool(%s) = %t, want = %t", name, got, want)
 				}
@@ -1633,10 +1765,10 @@ func TestReceiveTosTClass(t *testing.T) {
 				if err := c.ep.Bind(tcpip.FullAddress{Port: stackPort}); err != nil {
 					c.t.Fatalf("Bind failed: %s", err)
 				}
-				switch option {
-				case tcpip.ReceiveTClassOption:
+				switch name {
+				case RcvTClassOpt:
 					testRead(c, flow, checker.ReceiveTClass(testTOS))
-				case tcpip.ReceiveTOSOption:
+				case RcvTOSOpt:
 					testRead(c, flow, checker.ReceiveTOS(testTOS))
 				default:
 					t.Fatalf("unknown test variant: %s", name)
@@ -1783,28 +1915,31 @@ func TestV4UnknownDestination(t *testing.T) {
 			icmpPkt := header.ICMPv4(hdr.Payload())
 			payloadIPHeader := header.IPv4(icmpPkt.Payload())
 			incomingHeaderLength := header.IPv4MinimumSize + header.UDPMinimumSize
-			wantLen := len(payload)
+			wantPayloadLen := len(payload)
 			if tc.largePayload {
 				// To work out the data size we need to simulate what the sender would
 				// have done. The wanted size is the total available minus the sum of
 				// the headers in the UDP AND ICMP packets, given that we know the test
 				// had only a minimal IP header but the ICMP sender will have allowed
 				// for a maximally sized packet header.
-				wantLen = header.IPv4MinimumProcessableDatagramSize - header.IPv4MaximumHeaderSize - header.ICMPv4MinimumSize - incomingHeaderLength
-
+				wantPayloadLen = header.IPv4MinimumProcessableDatagramSize - header.IPv4MaximumHeaderSize - header.ICMPv4MinimumSize - incomingHeaderLength
 			}
 
 			// In the case of large payloads the IP packet may be truncated. Update
 			// the length field before retrieving the udp datagram payload.
 			// Add back the two headers within the payload.
-			payloadIPHeader.SetTotalLength(uint16(wantLen + incomingHeaderLength))
-
+			payloadIPHeader.SetTotalLength(uint16(wantPayloadLen + incomingHeaderLength))
 			origDgram := header.UDP(payloadIPHeader.Payload())
-			if got, want := len(origDgram.Payload()), wantLen; got != want {
-				t.Fatalf("unexpected payload length got: %d, want: %d", got, want)
+			wantDgramLen := wantPayloadLen + header.UDPMinimumSize
+
+			if got, want := len(origDgram), wantDgramLen; got != want {
+				t.Fatalf("got len(origDgram) = %d, want = %d", got, want)
 			}
-			if got, want := origDgram.Payload(), payload[:wantLen]; !bytes.Equal(got, want) {
-				t.Fatalf("unexpected payload got: %d, want: %d", got, want)
+			// Correct UDP length to access payload.
+			origDgram.SetLength(uint16(wantDgramLen))
+
+			if got, want := origDgram.Payload(), payload[:wantPayloadLen]; !bytes.Equal(got, want) {
+				t.Fatalf("got origDgram.Payload() = %x, want = %x", got, want)
 			}
 		})
 	}
@@ -1879,20 +2014,23 @@ func TestV6UnknownDestination(t *testing.T) {
 
 			icmpPkt := header.ICMPv6(hdr.Payload())
 			payloadIPHeader := header.IPv6(icmpPkt.Payload())
-			wantLen := len(payload)
+			wantPayloadLen := len(payload)
 			if tc.largePayload {
-				wantLen = header.IPv6MinimumMTU - header.IPv6MinimumSize*2 - header.ICMPv6MinimumSize - header.UDPMinimumSize
+				wantPayloadLen = header.IPv6MinimumMTU - header.IPv6MinimumSize*2 - header.ICMPv6MinimumSize - header.UDPMinimumSize
 			}
+			wantDgramLen := wantPayloadLen + header.UDPMinimumSize
 			// In case of large payloads the IP packet may be truncated. Update
 			// the length field before retrieving the udp datagram payload.
-			payloadIPHeader.SetPayloadLength(uint16(wantLen + header.UDPMinimumSize))
+			payloadIPHeader.SetPayloadLength(uint16(wantDgramLen))
 
 			origDgram := header.UDP(payloadIPHeader.Payload())
-			if got, want := len(origDgram.Payload()), wantLen; got != want {
-				t.Fatalf("unexpected payload length got: %d, want: %d", got, want)
+			if got, want := len(origDgram), wantPayloadLen+header.UDPMinimumSize; got != want {
+				t.Fatalf("got len(origDgram) = %d, want = %d", got, want)
 			}
-			if got, want := origDgram.Payload(), payload[:wantLen]; !bytes.Equal(got, want) {
-				t.Fatalf("unexpected payload got: %v, want: %v", got, want)
+			// Correct UDP length to access payload.
+			origDgram.SetLength(uint16(wantPayloadLen + header.UDPMinimumSize))
+			if diff := cmp.Diff(payload[:wantPayloadLen], origDgram.Payload()); diff != "" {
+				t.Fatalf("origDgram.Payload() mismatch (-want +got):\n%s", diff)
 			}
 		})
 	}
@@ -1951,12 +2089,12 @@ func TestShortHeader(t *testing.T) {
 	// Initialize the IP header.
 	ip := header.IPv6(buf)
 	ip.Encode(&header.IPv6Fields{
-		TrafficClass:  testTOS,
-		PayloadLength: uint16(udpSize),
-		NextHeader:    uint8(udp.ProtocolNumber),
-		HopLimit:      65,
-		SrcAddr:       h.srcAddr.Addr,
-		DstAddr:       h.dstAddr.Addr,
+		TrafficClass:      testTOS,
+		PayloadLength:     uint16(udpSize),
+		TransportProtocol: udp.ProtocolNumber,
+		HopLimit:          65,
+		SrcAddr:           h.srcAddr.Addr,
+		DstAddr:           h.dstAddr.Addr,
 	})
 
 	// Initialize the UDP header.
@@ -2391,17 +2529,13 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 				t.Fatalf("got ep.Write(_, _) = (%d, _, %v), want = (_, _, %v)", n, err, expectedErrWithoutBcastOpt)
 			}
 
-			if err := ep.SetSockOptBool(tcpip.BroadcastOption, true); err != nil {
-				t.Fatalf("got SetSockOptBool(BroadcastOption, true): %s", err)
-			}
+			ep.SocketOptions().SetBroadcast(true)
 
 			if n, _, err := ep.Write(data, opts); err != nil {
 				t.Fatalf("got ep.Write(_, _) = (%d, _, %s), want = (_, _, nil)", n, err)
 			}
 
-			if err := ep.SetSockOptBool(tcpip.BroadcastOption, false); err != nil {
-				t.Fatalf("got SetSockOptBool(BroadcastOption, false): %s", err)
-			}
+			ep.SocketOptions().SetBroadcast(false)
 
 			if n, _, err := ep.Write(data, opts); err != expectedErrWithoutBcastOpt {
 				t.Fatalf("got ep.Write(_, _) = (%d, _, %v), want = (_, _, %v)", n, err, expectedErrWithoutBcastOpt)
@@ -2409,3 +2543,67 @@ func TestOutgoingSubnetBroadcast(t *testing.T) {
 		})
 	}
 }
+
+func TestReceiveShortLength(t *testing.T) {
+	flows := []testFlow{unicastV4, unicastV6}
+	for _, flow := range flows {
+		t.Run(flow.String(), func(t *testing.T) {
+			c := newDualTestContext(t, defaultMTU)
+			defer c.cleanup()
+
+			c.createEndpointForFlow(flow)
+
+			// Bind to wildcard.
+			bindAddr := tcpip.FullAddress{Port: stackPort}
+			if err := c.ep.Bind(bindAddr); err != nil {
+				c.t.Fatalf("c.ep.Bind(%#v): %s", bindAddr, err)
+			}
+
+			payload := newPayload()
+			extraBytes := []byte{1, 2, 3, 4}
+			h := flow.header4Tuple(incoming)
+			var buf buffer.View
+			var proto tcpip.NetworkProtocolNumber
+
+			// Build packets with extra bytes not accounted for in the UDP length
+			// field.
+			var udp header.UDP
+			if flow.isV4() {
+				buf = c.buildV4Packet(payload, &h)
+				buf = append(buf, extraBytes...)
+				ip := header.IPv4(buf)
+				ip.SetTotalLength(ip.TotalLength() + uint16(len(extraBytes)))
+				ip.SetChecksum(0)
+				ip.SetChecksum(^ip.CalculateChecksum())
+				proto = ipv4.ProtocolNumber
+				udp = ip.Payload()
+			} else {
+				buf = c.buildV6Packet(payload, &h)
+				buf = append(buf, extraBytes...)
+				ip := header.IPv6(buf)
+				ip.SetPayloadLength(ip.PayloadLength() + uint16(len(extraBytes)))
+				proto = ipv6.ProtocolNumber
+				udp = ip.Payload()
+			}
+
+			if diff := cmp.Diff(payload, udp.Payload()); diff != "" {
+				t.Errorf("udp.Payload() mismatch (-want +got):\n%s", diff)
+			}
+
+			c.linkEP.InjectInbound(proto, stack.NewPacketBuffer(stack.PacketBufferOptions{
+				Data: buf.ToVectorisedView(),
+			}))
+
+			// Try to receive the data.
+			v, _, err := c.ep.Read(nil)
+			if err != nil {
+				t.Fatalf("c.ep.Read(nil): %s", err)
+			}
+
+			// Check the payload is read back without extra bytes.
+			if diff := cmp.Diff(buffer.View(payload), v); diff != "" {
+				t.Errorf("c.ep.Read(nil) mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}