// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package stack

import (
	"fmt"
	"math/rand"
	"reflect"
	"sort"
	"sync/atomic"

	"gvisor.dev/gvisor/pkg/sleep"
	"gvisor.dev/gvisor/pkg/sync"
	"gvisor.dev/gvisor/pkg/tcpip"
	"gvisor.dev/gvisor/pkg/tcpip/buffer"
	"gvisor.dev/gvisor/pkg/tcpip/header"
)

var ipv4BroadcastAddr = tcpip.ProtocolAddress{
	Protocol: header.IPv4ProtocolNumber,
	AddressWithPrefix: tcpip.AddressWithPrefix{
		Address:   header.IPv4Broadcast,
		PrefixLen: 8 * header.IPv4AddressSize,
	},
}

// NIC represents a "network interface card" to which the networking stack is
// attached.
type NIC struct {
	stack   *Stack
	id      tcpip.NICID
	name    string
	linkEP  LinkEndpoint
	context NICContext

	stats            NICStats
	neigh            *neighborCache
	networkEndpoints map[tcpip.NetworkProtocolNumber]NetworkEndpoint

	mu struct {
		sync.RWMutex
		enabled     bool
		spoofing    bool
		promiscuous bool
		primary     map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint
		endpoints   map[NetworkEndpointID]*referencedNetworkEndpoint
		mcastJoins  map[NetworkEndpointID]uint32
		// packetEPs is protected by mu, but the contained PacketEndpoint
		// values are not.
		packetEPs map[tcpip.NetworkProtocolNumber][]PacketEndpoint
		ndp       ndpState
	}
}

// NICStats includes transmitted and received stats.
type NICStats struct {
	Tx DirectionStats
	Rx DirectionStats

	DisabledRx DirectionStats
}

func makeNICStats() NICStats {
	var s NICStats
	tcpip.InitStatCounters(reflect.ValueOf(&s).Elem())
	return s
}

// DirectionStats includes packet and byte counts.
type DirectionStats struct {
	Packets *tcpip.StatCounter
	Bytes   *tcpip.StatCounter
}

// PrimaryEndpointBehavior is an enumeration of an endpoint's primacy behavior.
type PrimaryEndpointBehavior int

const (
	// CanBePrimaryEndpoint indicates the endpoint can be used as a primary
	// endpoint for new connections with no local address. This is the
	// default when calling NIC.AddAddress.
	CanBePrimaryEndpoint PrimaryEndpointBehavior = iota

	// FirstPrimaryEndpoint indicates the endpoint should be the first
	// primary endpoint considered. If there are multiple endpoints with
	// this behavior, the most recently-added one will be first.
	FirstPrimaryEndpoint

	// NeverPrimaryEndpoint indicates the endpoint should never be a
	// primary endpoint.
	NeverPrimaryEndpoint
)

// newNIC returns a new NIC using the default NDP configurations from stack.
func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, ctx NICContext) *NIC {
	// TODO(b/141011931): Validate a LinkEndpoint (ep) is valid. For
	// example, make sure that the link address it provides is a valid
	// unicast ethernet address.

	// TODO(b/143357959): RFC 8200 section 5 requires that IPv6 endpoints
	// observe an MTU of at least 1280 bytes. Ensure that this requirement
	// of IPv6 is supported on this endpoint's LinkEndpoint.

	nic := &NIC{
		stack:            stack,
		id:               id,
		name:             name,
		linkEP:           ep,
		context:          ctx,
		stats:            makeNICStats(),
		networkEndpoints: make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint),
	}
	nic.mu.primary = make(map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint)
	nic.mu.endpoints = make(map[NetworkEndpointID]*referencedNetworkEndpoint)
	nic.mu.mcastJoins = make(map[NetworkEndpointID]uint32)
	nic.mu.packetEPs = make(map[tcpip.NetworkProtocolNumber][]PacketEndpoint)
	nic.mu.ndp = ndpState{
		nic:            nic,
		configs:        stack.ndpConfigs,
		dad:            make(map[tcpip.Address]dadState),
		defaultRouters: make(map[tcpip.Address]defaultRouterState),
		onLinkPrefixes: make(map[tcpip.Subnet]onLinkPrefixState),
		slaacPrefixes:  make(map[tcpip.Subnet]slaacPrefixState),
	}
	nic.mu.ndp.initializeTempAddrState()

	// Check for Neighbor Unreachability Detection support.
	var nud NUDHandler
	if ep.Capabilities()&CapabilityResolutionRequired != 0 && len(stack.linkAddrResolvers) != 0 && stack.useNeighborCache {
		rng := rand.New(rand.NewSource(stack.clock.NowNanoseconds()))
		nic.neigh = &neighborCache{
			nic:   nic,
			state: NewNUDState(stack.nudConfigs, rng),
			cache: make(map[tcpip.Address]*neighborEntry, neighborCacheSize),
		}

		// An interface value that holds a nil pointer but non-nil type is not the
		// same as the nil interface. Because of this, nud must only be assignd if
		// nic.neigh is non-nil since a nil reference to a neighborCache is not
		// valid.
		//
		// See https://golang.org/doc/faq#nil_error for more information.
		nud = nic.neigh
	}

	// Register supported packet and network endpoint protocols.
	for _, netProto := range header.Ethertypes {
		nic.mu.packetEPs[netProto] = []PacketEndpoint{}
	}
	for _, netProto := range stack.networkProtocols {
		netNum := netProto.Number()
		nic.mu.packetEPs[netNum] = nil
		nic.networkEndpoints[netNum] = netProto.NewEndpoint(id, stack, nud, nic, ep, stack)
	}

	nic.linkEP.Attach(nic)

	return nic
}

// enabled returns true if n is enabled.
func (n *NIC) enabled() bool {
	n.mu.RLock()
	enabled := n.mu.enabled
	n.mu.RUnlock()
	return enabled
}

// disable disables n.
//
// It undoes the work done by enable.
func (n *NIC) disable() *tcpip.Error {
	n.mu.RLock()
	enabled := n.mu.enabled
	n.mu.RUnlock()
	if !enabled {
		return nil
	}

	n.mu.Lock()
	err := n.disableLocked()
	n.mu.Unlock()
	return err
}

// disableLocked disables n.
//
// It undoes the work done by enable.
//
// n MUST be locked.
func (n *NIC) disableLocked() *tcpip.Error {
	if !n.mu.enabled {
		return nil
	}

	// TODO(gvisor.dev/issue/1491): Should Routes that are currently bound to n be
	// invalidated? Currently, Routes will continue to work when a NIC is enabled
	// again, and applications may not know that the underlying NIC was ever
	// disabled.

	if _, ok := n.stack.networkProtocols[header.IPv6ProtocolNumber]; ok {
		n.mu.ndp.stopSolicitingRouters()
		n.mu.ndp.cleanupState(false /* hostOnly */)

		// Stop DAD for all the unicast IPv6 endpoints that are in the
		// permanentTentative state.
		for _, r := range n.mu.endpoints {
			if addr := r.address(); r.getKind() == permanentTentative && header.IsV6UnicastAddress(addr) {
				n.mu.ndp.stopDuplicateAddressDetection(addr)
			}
		}

		// The NIC may have already left the multicast group.
		if err := n.leaveGroupLocked(header.IPv6AllNodesMulticastAddress, false /* force */); err != nil && err != tcpip.ErrBadLocalAddress {
			return err
		}
	}

	if _, ok := n.stack.networkProtocols[header.IPv4ProtocolNumber]; ok {
		// The NIC may have already left the multicast group.
		if err := n.leaveGroupLocked(header.IPv4AllSystems, false /* force */); err != nil && err != tcpip.ErrBadLocalAddress {
			return err
		}

		// The address may have already been removed.
		if err := n.removePermanentAddressLocked(ipv4BroadcastAddr.AddressWithPrefix.Address); err != nil && err != tcpip.ErrBadLocalAddress {
			return err
		}
	}

	n.mu.enabled = false
	return nil
}

// enable enables n.
//
// If the stack has IPv6 enabled, enable will join the IPv6 All-Nodes Multicast
// address (ff02::1), start DAD for permanent addresses, and start soliciting
// routers if the stack is not operating as a router. If the stack is also
// configured to auto-generate a link-local address, one will be generated.
func (n *NIC) enable() *tcpip.Error {
	n.mu.RLock()
	enabled := n.mu.enabled
	n.mu.RUnlock()
	if enabled {
		return nil
	}

	n.mu.Lock()
	defer n.mu.Unlock()

	if n.mu.enabled {
		return nil
	}

	n.mu.enabled = true

	// Create an endpoint to receive broadcast packets on this interface.
	if _, ok := n.stack.networkProtocols[header.IPv4ProtocolNumber]; ok {
		if _, err := n.addAddressLocked(ipv4BroadcastAddr, NeverPrimaryEndpoint, permanent, static, false /* deprecated */); err != nil {
			return err
		}

		// As per RFC 1122 section 3.3.7, all hosts should join the all-hosts
		// multicast group. Note, the IANA calls the all-hosts multicast group the
		// all-systems multicast group.
		if err := n.joinGroupLocked(header.IPv4ProtocolNumber, header.IPv4AllSystems); err != nil {
			return err
		}
	}

	// Join the IPv6 All-Nodes Multicast group if the stack is configured to
	// use IPv6. This is required to ensure that this node properly receives
	// and responds to the various NDP messages that are destined to the
	// all-nodes multicast address. An example is the Neighbor Advertisement
	// when we perform Duplicate Address Detection, or Router Advertisement
	// when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861
	// section 4.2 for more information.
	//
	// Also auto-generate an IPv6 link-local address based on the NIC's
	// link address if it is configured to do so. Note, each interface is
	// required to have IPv6 link-local unicast address, as per RFC 4291
	// section 2.1.
	_, ok := n.stack.networkProtocols[header.IPv6ProtocolNumber]
	if !ok {
		return nil
	}

	// Join the All-Nodes multicast group before starting DAD as responses to DAD
	// (NDP NS) messages may be sent to the All-Nodes multicast group if the
	// source address of the NDP NS is the unspecified address, as per RFC 4861
	// section 7.2.4.
	if err := n.joinGroupLocked(header.IPv6ProtocolNumber, header.IPv6AllNodesMulticastAddress); err != nil {
		return err
	}

	// Perform DAD on the all the unicast IPv6 endpoints that are in the permanent
	// state.
	//
	// Addresses may have aleady completed DAD but in the time since the NIC was
	// last enabled, other devices may have acquired the same addresses.
	for _, r := range n.mu.endpoints {
		addr := r.address()
		if k := r.getKind(); (k != permanent && k != permanentTentative) || !header.IsV6UnicastAddress(addr) {
			continue
		}

		r.setKind(permanentTentative)
		if err := n.mu.ndp.startDuplicateAddressDetection(addr, r); err != nil {
			return err
		}
	}

	// Do not auto-generate an IPv6 link-local address for loopback devices.
	if n.stack.autoGenIPv6LinkLocal && !n.isLoopback() {
		// The valid and preferred lifetime is infinite for the auto-generated
		// link-local address.
		n.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime)
	}

	// If we are operating as a router, then do not solicit routers since we
	// won't process the RAs anyways.
	//
	// Routers do not process Router Advertisements (RA) the same way a host
	// does. That is, routers do not learn from RAs (e.g. on-link prefixes
	// and default routers). Therefore, soliciting RAs from other routers on
	// a link is unnecessary for routers.
	if !n.stack.forwarding {
		n.mu.ndp.startSolicitingRouters()
	}

	return nil
}

// remove detaches NIC from the link endpoint, and marks existing referenced
// network endpoints expired. This guarantees no packets between this NIC and
// the network stack.
func (n *NIC) remove() *tcpip.Error {
	n.mu.Lock()
	defer n.mu.Unlock()

	n.disableLocked()

	// TODO(b/151378115): come up with a better way to pick an error than the
	// first one.
	var err *tcpip.Error

	// Forcefully leave multicast groups.
	for nid := range n.mu.mcastJoins {
		if tempErr := n.leaveGroupLocked(nid.LocalAddress, true /* force */); tempErr != nil && err == nil {
			err = tempErr
		}
	}

	// Remove permanent and permanentTentative addresses, so no packet goes out.
	for nid, ref := range n.mu.endpoints {
		switch ref.getKind() {
		case permanentTentative, permanent:
			if tempErr := n.removePermanentAddressLocked(nid.LocalAddress); tempErr != nil && err == nil {
				err = tempErr
			}
		}
	}

	// Release any resources the network endpoint may hold.
	for _, ep := range n.networkEndpoints {
		ep.Close()
	}

	// Detach from link endpoint, so no packet comes in.
	n.linkEP.Attach(nil)

	return err
}

// becomeIPv6Router transitions n into an IPv6 router.
//
// When transitioning into an IPv6 router, host-only state (NDP discovered
// routers, discovered on-link prefixes, and auto-generated addresses) will
// be cleaned up/invalidated and NDP router solicitations will be stopped.
func (n *NIC) becomeIPv6Router() {
	n.mu.Lock()
	defer n.mu.Unlock()

	n.mu.ndp.cleanupState(true /* hostOnly */)
	n.mu.ndp.stopSolicitingRouters()
}

// becomeIPv6Host transitions n into an IPv6 host.
//
// When transitioning into an IPv6 host, NDP router solicitations will be
// started.
func (n *NIC) becomeIPv6Host() {
	n.mu.Lock()
	defer n.mu.Unlock()

	n.mu.ndp.startSolicitingRouters()
}

// setPromiscuousMode enables or disables promiscuous mode.
func (n *NIC) setPromiscuousMode(enable bool) {
	n.mu.Lock()
	n.mu.promiscuous = enable
	n.mu.Unlock()
}

func (n *NIC) isPromiscuousMode() bool {
	n.mu.RLock()
	rv := n.mu.promiscuous
	n.mu.RUnlock()
	return rv
}

func (n *NIC) isLoopback() bool {
	return n.linkEP.Capabilities()&CapabilityLoopback != 0
}

// setSpoofing enables or disables address spoofing.
func (n *NIC) setSpoofing(enable bool) {
	n.mu.Lock()
	n.mu.spoofing = enable
	n.mu.Unlock()
}

// primaryEndpoint will return the first non-deprecated endpoint if such an
// endpoint exists for the given protocol and remoteAddr. If no non-deprecated
// endpoint exists, the first deprecated endpoint will be returned.
//
// If an IPv6 primary endpoint is requested, Source Address Selection (as
// defined by RFC 6724 section 5) will be performed.
func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber, remoteAddr tcpip.Address) *referencedNetworkEndpoint {
	if protocol == header.IPv6ProtocolNumber && len(remoteAddr) != 0 {
		return n.primaryIPv6Endpoint(remoteAddr)
	}

	n.mu.RLock()
	defer n.mu.RUnlock()

	var deprecatedEndpoint *referencedNetworkEndpoint
	for _, r := range n.mu.primary[protocol] {
		if !r.isValidForOutgoingRLocked() {
			continue
		}

		if !r.deprecated {
			if r.tryIncRef() {
				// r is not deprecated, so return it immediately.
				//
				// If we kept track of a deprecated endpoint, decrement its reference
				// count since it was incremented when we decided to keep track of it.
				if deprecatedEndpoint != nil {
					deprecatedEndpoint.decRefLocked()
					deprecatedEndpoint = nil
				}

				return r
			}
		} else if deprecatedEndpoint == nil && r.tryIncRef() {
			// We prefer an endpoint that is not deprecated, but we keep track of r in
			// case n doesn't have any non-deprecated endpoints.
			//
			// If we end up finding a more preferred endpoint, r's reference count
			// will be decremented when such an endpoint is found.
			deprecatedEndpoint = r
		}
	}

	// n doesn't have any valid non-deprecated endpoints, so return
	// deprecatedEndpoint (which may be nil if n doesn't have any valid deprecated
	// endpoints either).
	return deprecatedEndpoint
}

// ipv6AddrCandidate is an IPv6 candidate for Source Address Selection (RFC
// 6724 section 5).
type ipv6AddrCandidate struct {
	ref   *referencedNetworkEndpoint
	scope header.IPv6AddressScope
}

// primaryIPv6Endpoint returns an IPv6 endpoint following Source Address
// Selection (RFC 6724 section 5).
//
// Note, only rules 1-3 and 7 are followed.
//
// remoteAddr must be a valid IPv6 address.
func (n *NIC) primaryIPv6Endpoint(remoteAddr tcpip.Address) *referencedNetworkEndpoint {
	n.mu.RLock()
	ref := n.primaryIPv6EndpointRLocked(remoteAddr)
	n.mu.RUnlock()
	return ref
}

// primaryIPv6EndpointLocked returns an IPv6 endpoint following Source Address
// Selection (RFC 6724 section 5).
//
// Note, only rules 1-3 and 7 are followed.
//
// remoteAddr must be a valid IPv6 address.
//
// n.mu MUST be read locked.
func (n *NIC) primaryIPv6EndpointRLocked(remoteAddr tcpip.Address) *referencedNetworkEndpoint {
	primaryAddrs := n.mu.primary[header.IPv6ProtocolNumber]

	if len(primaryAddrs) == 0 {
		return nil
	}

	// Create a candidate set of available addresses we can potentially use as a
	// source address.
	cs := make([]ipv6AddrCandidate, 0, len(primaryAddrs))
	for _, r := range primaryAddrs {
		// If r is not valid for outgoing connections, it is not a valid endpoint.
		if !r.isValidForOutgoingRLocked() {
			continue
		}

		addr := r.address()
		scope, err := header.ScopeForIPv6Address(addr)
		if err != nil {
			// Should never happen as we got r from the primary IPv6 endpoint list and
			// ScopeForIPv6Address only returns an error if addr is not an IPv6
			// address.
			panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", addr, err))
		}

		cs = append(cs, ipv6AddrCandidate{
			ref:   r,
			scope: scope,
		})
	}

	remoteScope, err := header.ScopeForIPv6Address(remoteAddr)
	if err != nil {
		// primaryIPv6Endpoint should never be called with an invalid IPv6 address.
		panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", remoteAddr, err))
	}

	// Sort the addresses as per RFC 6724 section 5 rules 1-3.
	//
	// TODO(b/146021396): Implement rules 4-8 of RFC 6724 section 5.
	sort.Slice(cs, func(i, j int) bool {
		sa := cs[i]
		sb := cs[j]

		// Prefer same address as per RFC 6724 section 5 rule 1.
		if sa.ref.address() == remoteAddr {
			return true
		}
		if sb.ref.address() == remoteAddr {
			return false
		}

		// Prefer appropriate scope as per RFC 6724 section 5 rule 2.
		if sa.scope < sb.scope {
			return sa.scope >= remoteScope
		} else if sb.scope < sa.scope {
			return sb.scope < remoteScope
		}

		// Avoid deprecated addresses as per RFC 6724 section 5 rule 3.
		if saDep, sbDep := sa.ref.deprecated, sb.ref.deprecated; saDep != sbDep {
			// If sa is not deprecated, it is preferred over sb.
			return sbDep
		}

		// Prefer temporary addresses as per RFC 6724 section 5 rule 7.
		if saTemp, sbTemp := sa.ref.configType == slaacTemp, sb.ref.configType == slaacTemp; saTemp != sbTemp {
			return saTemp
		}

		// sa and sb are equal, return the endpoint that is closest to the front of
		// the primary endpoint list.
		return i < j
	})

	// Return the most preferred address that can have its reference count
	// incremented.
	for _, c := range cs {
		if r := c.ref; r.tryIncRef() {
			return r
		}
	}

	return nil
}

// hasPermanentAddrLocked returns true if n has a permanent (including currently
// tentative) address, addr.
func (n *NIC) hasPermanentAddrLocked(addr tcpip.Address) bool {
	ref, ok := n.mu.endpoints[NetworkEndpointID{addr}]

	if !ok {
		return false
	}

	kind := ref.getKind()

	return kind == permanent || kind == permanentTentative
}

type getRefBehaviour int

const (
	// spoofing indicates that the NIC's spoofing flag should be observed when
	// getting a NIC's referenced network endpoint.
	spoofing getRefBehaviour = iota

	// promiscuous indicates that the NIC's promiscuous flag should be observed
	// when getting a NIC's referenced network endpoint.
	promiscuous
)

func (n *NIC) getRef(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) *referencedNetworkEndpoint {
	return n.getRefOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, promiscuous)
}

// findEndpoint finds the endpoint, if any, with the given address.
func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) *referencedNetworkEndpoint {
	return n.getRefOrCreateTemp(protocol, address, peb, spoofing)
}

// getRefEpOrCreateTemp returns the referenced network endpoint for the given
// protocol and address.
//
// If none exists a temporary one may be created if we are in promiscuous mode
// or spoofing. Promiscuous mode will only be checked if promiscuous is true.
// Similarly, spoofing will only be checked if spoofing is true.
//
// If the address is the IPv4 broadcast address for an endpoint's network, that
// endpoint will be returned.
func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, tempRef getRefBehaviour) *referencedNetworkEndpoint {
	n.mu.RLock()

	var spoofingOrPromiscuous bool
	switch tempRef {
	case spoofing:
		spoofingOrPromiscuous = n.mu.spoofing
	case promiscuous:
		spoofingOrPromiscuous = n.mu.promiscuous
	}

	if ref, ok := n.mu.endpoints[NetworkEndpointID{address}]; ok {
		// An endpoint with this id exists, check if it can be used and return it.
		if !ref.isAssignedRLocked(spoofingOrPromiscuous) {
			n.mu.RUnlock()
			return nil
		}

		if ref.tryIncRef() {
			n.mu.RUnlock()
			return ref
		}
	}

	// Check if address is a broadcast address for the endpoint's network.
	//
	// Only IPv4 has a notion of broadcast addresses.
	if protocol == header.IPv4ProtocolNumber {
		if ref := n.getRefForBroadcastRLocked(address); ref != nil {
			n.mu.RUnlock()
			return ref
		}
	}

	// A usable reference was not found, create a temporary one if requested by
	// the caller or if the IPv4 address is found in the NIC's subnets and the NIC
	// is a loopback interface.
	createTempEP := spoofingOrPromiscuous
	if !createTempEP && n.isLoopback() && protocol == header.IPv4ProtocolNumber {
		for _, r := range n.mu.endpoints {
			addr := r.addrWithPrefix()
			subnet := addr.Subnet()
			if subnet.Contains(address) {
				createTempEP = true
				break
			}
		}
	}
	n.mu.RUnlock()

	if !createTempEP {
		return nil
	}

	// Try again with the lock in exclusive mode. If we still can't get the
	// endpoint, create a new "temporary" endpoint. It will only exist while
	// there's a route through it.
	n.mu.Lock()
	ref := n.getRefOrCreateTempLocked(protocol, address, peb)
	n.mu.Unlock()
	return ref
}

// getRefForBroadcastLocked returns an endpoint where address is the IPv4
// broadcast address for the endpoint's network.
//
// n.mu MUST be read locked.
func (n *NIC) getRefForBroadcastRLocked(address tcpip.Address) *referencedNetworkEndpoint {
	for _, ref := range n.mu.endpoints {
		// Only IPv4 has a notion of broadcast addresses.
		if ref.protocol != header.IPv4ProtocolNumber {
			continue
		}

		addr := ref.addrWithPrefix()
		subnet := addr.Subnet()
		if subnet.IsBroadcast(address) && ref.tryIncRef() {
			return ref
		}
	}

	return nil
}

/// getRefOrCreateTempLocked returns an existing endpoint for address or creates
/// and returns a temporary endpoint.
//
// If the address is the IPv4 broadcast address for an endpoint's network, that
// endpoint will be returned.
//
// n.mu must be write locked.
func (n *NIC) getRefOrCreateTempLocked(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) *referencedNetworkEndpoint {
	if ref, ok := n.mu.endpoints[NetworkEndpointID{address}]; ok {
		// No need to check the type as we are ok with expired endpoints at this
		// point.
		if ref.tryIncRef() {
			return ref
		}
		// tryIncRef failing means the endpoint is scheduled to be removed once the
		// lock is released. Remove it here so we can create a new (temporary) one.
		// The removal logic waiting for the lock handles this case.
		n.removeEndpointLocked(ref)
	}

	// Check if address is a broadcast address for an endpoint's network.
	//
	// Only IPv4 has a notion of broadcast addresses.
	if protocol == header.IPv4ProtocolNumber {
		if ref := n.getRefForBroadcastRLocked(address); ref != nil {
			return ref
		}
	}

	// Add a new temporary endpoint.
	netProto, ok := n.stack.networkProtocols[protocol]
	if !ok {
		return nil
	}
	ref, _ := n.addAddressLocked(tcpip.ProtocolAddress{
		Protocol: protocol,
		AddressWithPrefix: tcpip.AddressWithPrefix{
			Address:   address,
			PrefixLen: netProto.DefaultPrefixLen(),
		},
	}, peb, temporary, static, false)
	return ref
}

// addAddressLocked adds a new protocolAddress to n.
//
// If n already has the address in a non-permanent state, and the kind given is
// permanent, that address will be promoted in place and its properties set to
// the properties provided. Otherwise, it returns tcpip.ErrDuplicateAddress.
func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior, kind networkEndpointKind, configType networkEndpointConfigType, deprecated bool) (*referencedNetworkEndpoint, *tcpip.Error) {
	// TODO(b/141022673): Validate IP addresses before adding them.

	// Sanity check.
	id := NetworkEndpointID{LocalAddress: protocolAddress.AddressWithPrefix.Address}
	if ref, ok := n.mu.endpoints[id]; ok {
		// Endpoint already exists.
		if kind != permanent {
			return nil, tcpip.ErrDuplicateAddress
		}
		switch ref.getKind() {
		case permanentTentative, permanent:
			// The NIC already have a permanent endpoint with that address.
			return nil, tcpip.ErrDuplicateAddress
		case permanentExpired, temporary:
			// Promote the endpoint to become permanent and respect the new peb,
			// configType and deprecated status.
			if ref.tryIncRef() {
				// TODO(b/147748385): Perform Duplicate Address Detection when promoting
				// an IPv6 endpoint to permanent.
				ref.setKind(permanent)
				ref.deprecated = deprecated
				ref.configType = configType

				refs := n.mu.primary[ref.protocol]
				for i, r := range refs {
					if r == ref {
						switch peb {
						case CanBePrimaryEndpoint:
							return ref, nil
						case FirstPrimaryEndpoint:
							if i == 0 {
								return ref, nil
							}
							n.mu.primary[r.protocol] = append(refs[:i], refs[i+1:]...)
						case NeverPrimaryEndpoint:
							n.mu.primary[r.protocol] = append(refs[:i], refs[i+1:]...)
							return ref, nil
						}
					}
				}

				n.insertPrimaryEndpointLocked(ref, peb)

				return ref, nil
			}
			// tryIncRef failing means the endpoint is scheduled to be removed once
			// the lock is released. Remove it here so we can create a new
			// (permanent) one. The removal logic waiting for the lock handles this
			// case.
			n.removeEndpointLocked(ref)
		}
	}

	ep, ok := n.networkEndpoints[protocolAddress.Protocol]
	if !ok {
		return nil, tcpip.ErrUnknownProtocol
	}

	isIPv6Unicast := protocolAddress.Protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(protocolAddress.AddressWithPrefix.Address)

	// If the address is an IPv6 address and it is a permanent address,
	// mark it as tentative so it goes through the DAD process if the NIC is
	// enabled. If the NIC is not enabled, DAD will be started when the NIC is
	// enabled.
	if isIPv6Unicast && kind == permanent {
		kind = permanentTentative
	}

	ref := &referencedNetworkEndpoint{
		refs:       1,
		addr:       protocolAddress.AddressWithPrefix,
		ep:         ep,
		nic:        n,
		protocol:   protocolAddress.Protocol,
		kind:       kind,
		configType: configType,
		deprecated: deprecated,
	}

	// Set up resolver if link address resolution exists for this protocol.
	if n.linkEP.Capabilities()&CapabilityResolutionRequired != 0 {
		if linkRes, ok := n.stack.linkAddrResolvers[protocolAddress.Protocol]; ok {
			ref.linkCache = n.stack
			ref.linkRes = linkRes
		}
	}

	// If we are adding an IPv6 unicast address, join the solicited-node
	// multicast address.
	if isIPv6Unicast {
		snmc := header.SolicitedNodeAddr(protocolAddress.AddressWithPrefix.Address)
		if err := n.joinGroupLocked(protocolAddress.Protocol, snmc); err != nil {
			return nil, err
		}
	}

	n.mu.endpoints[id] = ref

	n.insertPrimaryEndpointLocked(ref, peb)

	// If we are adding a tentative IPv6 address, start DAD if the NIC is enabled.
	if isIPv6Unicast && kind == permanentTentative && n.mu.enabled {
		if err := n.mu.ndp.startDuplicateAddressDetection(protocolAddress.AddressWithPrefix.Address, ref); err != nil {
			return nil, err
		}
	}

	return ref, nil
}

// AddAddress adds a new address to n, so that it starts accepting packets
// targeted at the given address (and network protocol).
func (n *NIC) AddAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error {
	// Add the endpoint.
	n.mu.Lock()
	_, err := n.addAddressLocked(protocolAddress, peb, permanent, static, false /* deprecated */)
	n.mu.Unlock()

	return err
}

// AllAddresses returns all addresses (primary and non-primary) associated with
// this NIC.
func (n *NIC) AllAddresses() []tcpip.ProtocolAddress {
	n.mu.RLock()
	defer n.mu.RUnlock()

	addrs := make([]tcpip.ProtocolAddress, 0, len(n.mu.endpoints))
	for _, ref := range n.mu.endpoints {
		// Don't include tentative, expired or temporary endpoints to
		// avoid confusion and prevent the caller from using those.
		switch ref.getKind() {
		case permanentExpired, temporary:
			continue
		}

		addrs = append(addrs, tcpip.ProtocolAddress{
			Protocol:          ref.protocol,
			AddressWithPrefix: ref.addrWithPrefix(),
		})
	}
	return addrs
}

// PrimaryAddresses returns the primary addresses associated with this NIC.
func (n *NIC) PrimaryAddresses() []tcpip.ProtocolAddress {
	n.mu.RLock()
	defer n.mu.RUnlock()

	var addrs []tcpip.ProtocolAddress
	for proto, list := range n.mu.primary {
		for _, ref := range list {
			// Don't include tentative, expired or tempory endpoints
			// to avoid confusion and prevent the caller from using
			// those.
			switch ref.getKind() {
			case permanentTentative, permanentExpired, temporary:
				continue
			}

			addrs = append(addrs, tcpip.ProtocolAddress{
				Protocol:          proto,
				AddressWithPrefix: ref.addrWithPrefix(),
			})
		}
	}
	return addrs
}

// primaryAddress returns the primary address associated with this NIC.
//
// primaryAddress will return the first non-deprecated address if such an
// address exists. If no non-deprecated address exists, the first deprecated
// address will be returned.
func (n *NIC) primaryAddress(proto tcpip.NetworkProtocolNumber) tcpip.AddressWithPrefix {
	n.mu.RLock()
	defer n.mu.RUnlock()

	list, ok := n.mu.primary[proto]
	if !ok {
		return tcpip.AddressWithPrefix{}
	}

	var deprecatedEndpoint *referencedNetworkEndpoint
	for _, ref := range list {
		// Don't include tentative, expired or tempory endpoints to avoid confusion
		// and prevent the caller from using those.
		switch ref.getKind() {
		case permanentTentative, permanentExpired, temporary:
			continue
		}

		if !ref.deprecated {
			return ref.addrWithPrefix()
		}

		if deprecatedEndpoint == nil {
			deprecatedEndpoint = ref
		}
	}

	if deprecatedEndpoint != nil {
		return deprecatedEndpoint.addrWithPrefix()
	}

	return tcpip.AddressWithPrefix{}
}

// insertPrimaryEndpointLocked adds r to n's primary endpoint list as required
// by peb.
//
// n MUST be locked.
func (n *NIC) insertPrimaryEndpointLocked(r *referencedNetworkEndpoint, peb PrimaryEndpointBehavior) {
	switch peb {
	case CanBePrimaryEndpoint:
		n.mu.primary[r.protocol] = append(n.mu.primary[r.protocol], r)
	case FirstPrimaryEndpoint:
		n.mu.primary[r.protocol] = append([]*referencedNetworkEndpoint{r}, n.mu.primary[r.protocol]...)
	}
}

func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) {
	id := NetworkEndpointID{LocalAddress: r.address()}

	// Nothing to do if the reference has already been replaced with a different
	// one. This happens in the case where 1) this endpoint's ref count hit zero
	// and was waiting (on the lock) to be removed and 2) the same address was
	// re-added in the meantime by removing this endpoint from the list and
	// adding a new one.
	if n.mu.endpoints[id] != r {
		return
	}

	if r.getKind() == permanent {
		panic("Reference count dropped to zero before being removed")
	}

	delete(n.mu.endpoints, id)
	refs := n.mu.primary[r.protocol]
	for i, ref := range refs {
		if ref == r {
			n.mu.primary[r.protocol] = append(refs[:i], refs[i+1:]...)
			refs[len(refs)-1] = nil
			break
		}
	}
}

func (n *NIC) removeEndpoint(r *referencedNetworkEndpoint) {
	n.mu.Lock()
	n.removeEndpointLocked(r)
	n.mu.Unlock()
}

func (n *NIC) removePermanentAddressLocked(addr tcpip.Address) *tcpip.Error {
	r, ok := n.mu.endpoints[NetworkEndpointID{addr}]
	if !ok {
		return tcpip.ErrBadLocalAddress
	}

	kind := r.getKind()
	if kind != permanent && kind != permanentTentative {
		return tcpip.ErrBadLocalAddress
	}

	switch r.protocol {
	case header.IPv6ProtocolNumber:
		return n.removePermanentIPv6EndpointLocked(r, true /* allowSLAACInvalidation */)
	default:
		r.expireLocked()
		return nil
	}
}

func (n *NIC) removePermanentIPv6EndpointLocked(r *referencedNetworkEndpoint, allowSLAACInvalidation bool) *tcpip.Error {
	addr := r.addrWithPrefix()

	isIPv6Unicast := header.IsV6UnicastAddress(addr.Address)

	if isIPv6Unicast {
		n.mu.ndp.stopDuplicateAddressDetection(addr.Address)

		// If we are removing an address generated via SLAAC, cleanup
		// its SLAAC resources and notify the integrator.
		switch r.configType {
		case slaac:
			n.mu.ndp.cleanupSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation)
		case slaacTemp:
			n.mu.ndp.cleanupTempSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation)
		}
	}

	r.expireLocked()

	// At this point the endpoint is deleted.

	// If we are removing an IPv6 unicast address, leave the solicited-node
	// multicast address.
	//
	// We ignore the tcpip.ErrBadLocalAddress error because the solicited-node
	// multicast group may be left by user action.
	if isIPv6Unicast {
		snmc := header.SolicitedNodeAddr(addr.Address)
		if err := n.leaveGroupLocked(snmc, false /* force */); err != nil && err != tcpip.ErrBadLocalAddress {
			return err
		}
	}

	return nil
}

// RemoveAddress removes an address from n.
func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error {
	n.mu.Lock()
	defer n.mu.Unlock()
	return n.removePermanentAddressLocked(addr)
}

func (n *NIC) neighbors() ([]NeighborEntry, *tcpip.Error) {
	if n.neigh == nil {
		return nil, tcpip.ErrNotSupported
	}

	return n.neigh.entries(), nil
}

func (n *NIC) removeWaker(addr tcpip.Address, w *sleep.Waker) {
	if n.neigh == nil {
		return
	}

	n.neigh.removeWaker(addr, w)
}

func (n *NIC) addStaticNeighbor(addr tcpip.Address, linkAddress tcpip.LinkAddress) *tcpip.Error {
	if n.neigh == nil {
		return tcpip.ErrNotSupported
	}

	n.neigh.addStaticEntry(addr, linkAddress)
	return nil
}

func (n *NIC) removeNeighbor(addr tcpip.Address) *tcpip.Error {
	if n.neigh == nil {
		return tcpip.ErrNotSupported
	}

	if !n.neigh.removeEntry(addr) {
		return tcpip.ErrBadAddress
	}
	return nil
}

func (n *NIC) clearNeighbors() *tcpip.Error {
	if n.neigh == nil {
		return tcpip.ErrNotSupported
	}

	n.neigh.clear()
	return nil
}

// joinGroup adds a new endpoint for the given multicast address, if none
// exists yet. Otherwise it just increments its count.
func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
	n.mu.Lock()
	defer n.mu.Unlock()

	return n.joinGroupLocked(protocol, addr)
}

// joinGroupLocked adds a new endpoint for the given multicast address, if none
// exists yet. Otherwise it just increments its count. n MUST be locked before
// joinGroupLocked is called.
func (n *NIC) joinGroupLocked(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error {
	// TODO(b/143102137): When implementing MLD, make sure MLD packets are
	// not sent unless a valid link-local address is available for use on n
	// as an MLD packet's source address must be a link-local address as
	// outlined in RFC 3810 section 5.

	id := NetworkEndpointID{addr}
	joins := n.mu.mcastJoins[id]
	if joins == 0 {
		netProto, ok := n.stack.networkProtocols[protocol]
		if !ok {
			return tcpip.ErrUnknownProtocol
		}
		if _, err := n.addAddressLocked(tcpip.ProtocolAddress{
			Protocol: protocol,
			AddressWithPrefix: tcpip.AddressWithPrefix{
				Address:   addr,
				PrefixLen: netProto.DefaultPrefixLen(),
			},
		}, NeverPrimaryEndpoint, permanent, static, false /* deprecated */); err != nil {
			return err
		}
	}
	n.mu.mcastJoins[id] = joins + 1
	return nil
}

// leaveGroup decrements the count for the given multicast address, and when it
// reaches zero removes the endpoint for this address.
func (n *NIC) leaveGroup(addr tcpip.Address) *tcpip.Error {
	n.mu.Lock()
	defer n.mu.Unlock()

	return n.leaveGroupLocked(addr, false /* force */)
}

// leaveGroupLocked decrements the count for the given multicast address, and
// when it reaches zero removes the endpoint for this address. n MUST be locked
// before leaveGroupLocked is called.
//
// If force is true, then the count for the multicast addres is ignored and the
// endpoint will be removed immediately.
func (n *NIC) leaveGroupLocked(addr tcpip.Address, force bool) *tcpip.Error {
	id := NetworkEndpointID{addr}
	joins, ok := n.mu.mcastJoins[id]
	if !ok {
		// There are no joins with this address on this NIC.
		return tcpip.ErrBadLocalAddress
	}

	joins--
	if force || joins == 0 {
		// There are no outstanding joins or we are forced to leave, clean up.
		delete(n.mu.mcastJoins, id)
		return n.removePermanentAddressLocked(addr)
	}

	n.mu.mcastJoins[id] = joins
	return nil
}

// isInGroup returns true if n has joined the multicast group addr.
func (n *NIC) isInGroup(addr tcpip.Address) bool {
	n.mu.RLock()
	joins := n.mu.mcastJoins[NetworkEndpointID{addr}]
	n.mu.RUnlock()

	return joins != 0
}

func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt *PacketBuffer) {
	r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */)
	r.RemoteLinkAddress = remotelinkAddr

	ref.ep.HandlePacket(&r, pkt)
	ref.decRef()
}

// DeliverNetworkPacket finds the appropriate network protocol endpoint and
// hands the packet over for further processing. This function is called when
// the NIC receives a packet from the link endpoint.
// Note that the ownership of the slice backing vv is retained by the caller.
// This rule applies only to the slice itself, not to the items of the slice;
// the ownership of the items is not retained by the caller.
func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
	n.mu.RLock()
	enabled := n.mu.enabled
	// If the NIC is not yet enabled, don't receive any packets.
	if !enabled {
		n.mu.RUnlock()

		n.stats.DisabledRx.Packets.Increment()
		n.stats.DisabledRx.Bytes.IncrementBy(uint64(pkt.Data.Size()))
		return
	}

	n.stats.Rx.Packets.Increment()
	n.stats.Rx.Bytes.IncrementBy(uint64(pkt.Data.Size()))

	netProto, ok := n.stack.networkProtocols[protocol]
	if !ok {
		n.mu.RUnlock()
		n.stack.stats.UnknownProtocolRcvdPackets.Increment()
		return
	}

	// If no local link layer address is provided, assume it was sent
	// directly to this NIC.
	if local == "" {
		local = n.linkEP.LinkAddress()
	}

	// Are any packet sockets listening for this network protocol?
	packetEPs := n.mu.packetEPs[protocol]
	// Add any other packet sockets that maybe listening for all protocols.
	packetEPs = append(packetEPs, n.mu.packetEPs[header.EthernetProtocolAll]...)
	n.mu.RUnlock()
	for _, ep := range packetEPs {
		p := pkt.Clone()
		p.PktType = tcpip.PacketHost
		ep.HandlePacket(n.id, local, protocol, p)
	}

	if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber {
		n.stack.stats.IP.PacketsReceived.Increment()
	}

	// Parse headers.
	transProtoNum, hasTransportHdr, ok := netProto.Parse(pkt)
	if !ok {
		// The packet is too small to contain a network header.
		n.stack.stats.MalformedRcvdPackets.Increment()
		return
	}
	if hasTransportHdr {
		// Parse the transport header if present.
		if state, ok := n.stack.transportProtocols[transProtoNum]; ok {
			state.proto.Parse(pkt)
		}
	}

	src, dst := netProto.ParseAddresses(pkt.NetworkHeader().View())

	if n.stack.handleLocal && !n.isLoopback() && n.getRef(protocol, src) != nil {
		// The source address is one of our own, so we never should have gotten a
		// packet like this unless handleLocal is false. Loopback also calls this
		// function even though the packets didn't come from the physical interface
		// so don't drop those.
		n.stack.stats.IP.InvalidSourceAddressesReceived.Increment()
		return
	}

	// TODO(gvisor.dev/issue/170): Not supporting iptables for IPv6 yet.
	// Loopback traffic skips the prerouting chain.
	if protocol == header.IPv4ProtocolNumber && !n.isLoopback() {
		// iptables filtering.
		ipt := n.stack.IPTables()
		address := n.primaryAddress(protocol)
		if ok := ipt.Check(Prerouting, pkt, nil, nil, address.Address, ""); !ok {
			// iptables is telling us to drop the packet.
			return
		}
	}

	if ref := n.getRef(protocol, dst); ref != nil {
		handlePacket(protocol, dst, src, n.linkEP.LinkAddress(), remote, ref, pkt)
		return
	}

	// This NIC doesn't care about the packet. Find a NIC that cares about the
	// packet and forward it to the NIC.
	//
	// TODO: Should we be forwarding the packet even if promiscuous?
	if n.stack.Forwarding() {
		r, err := n.stack.FindRoute(0, "", dst, protocol, false /* multicastLoop */)
		if err != nil {
			n.stack.stats.IP.InvalidDestinationAddressesReceived.Increment()
			return
		}

		// Found a NIC.
		n := r.ref.nic
		n.mu.RLock()
		ref, ok := n.mu.endpoints[NetworkEndpointID{dst}]
		ok = ok && ref.isValidForOutgoingRLocked() && ref.tryIncRef()
		n.mu.RUnlock()
		if ok {
			r.LocalLinkAddress = n.linkEP.LinkAddress()
			r.RemoteLinkAddress = remote
			r.RemoteAddress = src
			// TODO(b/123449044): Update the source NIC as well.
			ref.ep.HandlePacket(&r, pkt)
			ref.decRef()
			r.Release()
			return
		}

		// n doesn't have a destination endpoint.
		// Send the packet out of n.
		// TODO(b/128629022): move this logic to route.WritePacket.
		if ch, err := r.Resolve(nil); err != nil {
			if err == tcpip.ErrWouldBlock {
				n.stack.forwarder.enqueue(ch, n, &r, protocol, pkt)
				// forwarder will release route.
				return
			}
			n.stack.stats.IP.InvalidDestinationAddressesReceived.Increment()
			r.Release()
			return
		}

		// The link-address resolution finished immediately.
		n.forwardPacket(&r, protocol, pkt)
		r.Release()
		return
	}

	// If a packet socket handled the packet, don't treat it as invalid.
	if len(packetEPs) == 0 {
		n.stack.stats.IP.InvalidDestinationAddressesReceived.Increment()
	}
}

// DeliverOutboundPacket implements NetworkDispatcher.DeliverOutboundPacket.
func (n *NIC) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
	n.mu.RLock()
	// We do not deliver to protocol specific packet endpoints as on Linux
	// only ETH_P_ALL endpoints get outbound packets.
	// Add any other packet sockets that maybe listening for all protocols.
	packetEPs := n.mu.packetEPs[header.EthernetProtocolAll]
	n.mu.RUnlock()
	for _, ep := range packetEPs {
		p := pkt.Clone()
		p.PktType = tcpip.PacketOutgoing
		// Add the link layer header as outgoing packets are intercepted
		// before the link layer header is created.
		n.linkEP.AddHeader(local, remote, protocol, p)
		ep.HandlePacket(n.id, local, protocol, p)
	}
}

func (n *NIC) forwardPacket(r *Route, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
	// TODO(b/143425874) Decrease the TTL field in forwarded packets.

	// pkt may have set its header and may not have enough headroom for link-layer
	// header for the other link to prepend. Here we create a new packet to
	// forward.
	fwdPkt := NewPacketBuffer(PacketBufferOptions{
		ReserveHeaderBytes: int(n.linkEP.MaxHeaderLength()),
		Data:               buffer.NewVectorisedView(pkt.Size(), pkt.Views()),
	})

	// WritePacket takes ownership of fwdPkt, calculate numBytes first.
	numBytes := fwdPkt.Size()

	if err := n.linkEP.WritePacket(r, nil /* gso */, protocol, fwdPkt); err != nil {
		r.Stats().IP.OutgoingPacketErrors.Increment()
		return
	}

	n.stats.Tx.Packets.Increment()
	n.stats.Tx.Bytes.IncrementBy(uint64(numBytes))
}

// DeliverTransportPacket delivers the packets to the appropriate transport
// protocol endpoint.
func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) {
	state, ok := n.stack.transportProtocols[protocol]
	if !ok {
		n.stack.stats.UnknownProtocolRcvdPackets.Increment()
		return
	}

	transProto := state.proto

	// Raw socket packets are delivered based solely on the transport
	// protocol number. We do not inspect the payload to ensure it's
	// validly formed.
	n.stack.demux.deliverRawPacket(r, protocol, pkt)

	// TransportHeader is empty only when pkt is an ICMP packet or was reassembled
	// from fragments.
	if pkt.TransportHeader().View().IsEmpty() {
		// TODO(gvisor.dev/issue/170): ICMP packets don't have their TransportHeader
		// fields set yet, parse it here. See icmp/protocol.go:protocol.Parse for a
		// full explanation.
		if protocol == header.ICMPv4ProtocolNumber || protocol == header.ICMPv6ProtocolNumber {
			// ICMP packets may be longer, but until icmp.Parse is implemented, here
			// we parse it using the minimum size.
			if _, ok := pkt.TransportHeader().Consume(transProto.MinimumPacketSize()); !ok {
				n.stack.stats.MalformedRcvdPackets.Increment()
				return
			}
		} else {
			// This is either a bad packet or was re-assembled from fragments.
			transProto.Parse(pkt)
		}
	}

	if pkt.TransportHeader().View().Size() < transProto.MinimumPacketSize() {
		n.stack.stats.MalformedRcvdPackets.Increment()
		return
	}

	srcPort, dstPort, err := transProto.ParsePorts(pkt.TransportHeader().View())
	if err != nil {
		n.stack.stats.MalformedRcvdPackets.Increment()
		return
	}

	id := TransportEndpointID{dstPort, r.LocalAddress, srcPort, r.RemoteAddress}
	if n.stack.demux.deliverPacket(r, protocol, pkt, id) {
		return
	}

	// Try to deliver to per-stack default handler.
	if state.defaultHandler != nil {
		if state.defaultHandler(r, id, pkt) {
			return
		}
	}

	// We could not find an appropriate destination for this packet, so
	// deliver it to the global handler.
	if !transProto.HandleUnknownDestinationPacket(r, id, pkt) {
		n.stack.stats.MalformedRcvdPackets.Increment()
	}
}

// DeliverTransportControlPacket delivers control packets to the appropriate
// transport protocol endpoint.
func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer) {
	state, ok := n.stack.transportProtocols[trans]
	if !ok {
		return
	}

	transProto := state.proto

	// ICMPv4 only guarantees that 8 bytes of the transport protocol will
	// be present in the payload. We know that the ports are within the
	// first 8 bytes for all known transport protocols.
	transHeader, ok := pkt.Data.PullUp(8)
	if !ok {
		return
	}

	srcPort, dstPort, err := transProto.ParsePorts(transHeader)
	if err != nil {
		return
	}

	id := TransportEndpointID{srcPort, local, dstPort, remote}
	if n.stack.demux.deliverControlPacket(n, net, trans, typ, extra, pkt, id) {
		return
	}
}

// ID returns the identifier of n.
func (n *NIC) ID() tcpip.NICID {
	return n.id
}

// Name returns the name of n.
func (n *NIC) Name() string {
	return n.name
}

// Stack returns the instance of the Stack that owns this NIC.
func (n *NIC) Stack() *Stack {
	return n.stack
}

// LinkEndpoint returns the link endpoint of n.
func (n *NIC) LinkEndpoint() LinkEndpoint {
	return n.linkEP
}

// isAddrTentative returns true if addr is tentative on n.
//
// Note that if addr is not associated with n, then this function will return
// false. It will only return true if the address is associated with the NIC
// AND it is tentative.
func (n *NIC) isAddrTentative(addr tcpip.Address) bool {
	n.mu.RLock()
	defer n.mu.RUnlock()

	ref, ok := n.mu.endpoints[NetworkEndpointID{addr}]
	if !ok {
		return false
	}

	return ref.getKind() == permanentTentative
}

// dupTentativeAddrDetected attempts to inform n that a tentative addr is a
// duplicate on a link.
//
// dupTentativeAddrDetected will remove the tentative address if it exists. If
// the address was generated via SLAAC, an attempt will be made to generate a
// new address.
func (n *NIC) dupTentativeAddrDetected(addr tcpip.Address) *tcpip.Error {
	n.mu.Lock()
	defer n.mu.Unlock()

	ref, ok := n.mu.endpoints[NetworkEndpointID{addr}]
	if !ok {
		return tcpip.ErrBadAddress
	}

	if ref.getKind() != permanentTentative {
		return tcpip.ErrInvalidEndpointState
	}

	// If the address is a SLAAC address, do not invalidate its SLAAC prefix as a
	// new address will be generated for it.
	if err := n.removePermanentIPv6EndpointLocked(ref, false /* allowSLAACInvalidation */); err != nil {
		return err
	}

	prefix := ref.addrWithPrefix().Subnet()

	switch ref.configType {
	case slaac:
		n.mu.ndp.regenerateSLAACAddr(prefix)
	case slaacTemp:
		// Do not reset the generation attempts counter for the prefix as the
		// temporary address is being regenerated in response to a DAD conflict.
		n.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */)
	}

	return nil
}

// setNDPConfigs sets the NDP configurations for n.
//
// Note, if c contains invalid NDP configuration values, it will be fixed to
// use default values for the erroneous values.
func (n *NIC) setNDPConfigs(c NDPConfigurations) {
	c.validate()

	n.mu.Lock()
	n.mu.ndp.configs = c
	n.mu.Unlock()
}

// NUDConfigs gets the NUD configurations for n.
func (n *NIC) NUDConfigs() (NUDConfigurations, *tcpip.Error) {
	if n.neigh == nil {
		return NUDConfigurations{}, tcpip.ErrNotSupported
	}
	return n.neigh.config(), nil
}

// setNUDConfigs sets the NUD configurations for n.
//
// Note, if c contains invalid NUD configuration values, it will be fixed to
// use default values for the erroneous values.
func (n *NIC) setNUDConfigs(c NUDConfigurations) *tcpip.Error {
	if n.neigh == nil {
		return tcpip.ErrNotSupported
	}
	c.resetInvalidFields()
	n.neigh.setConfig(c)
	return nil
}

// handleNDPRA handles an NDP Router Advertisement message that arrived on n.
func (n *NIC) handleNDPRA(ip tcpip.Address, ra header.NDPRouterAdvert) {
	n.mu.Lock()
	defer n.mu.Unlock()

	n.mu.ndp.handleRA(ip, ra)
}

type networkEndpointKind int32

const (
	// A permanentTentative endpoint is a permanent address that is not yet
	// considered to be fully bound to an interface in the traditional
	// sense. That is, the address is associated with a NIC, but packets
	// destined to the address MUST NOT be accepted and MUST be silently
	// dropped, and the address MUST NOT be used as a source address for
	// outgoing packets. For IPv6, addresses will be of this kind until
	// NDP's Duplicate Address Detection has resolved, or be deleted if
	// the process results in detecting a duplicate address.
	permanentTentative networkEndpointKind = iota

	// A permanent endpoint is created by adding a permanent address (vs. a
	// temporary one) to the NIC. Its reference count is biased by 1 to avoid
	// removal when no route holds a reference to it. It is removed by explicitly
	// removing the permanent address from the NIC.
	permanent

	// An expired permanent endpoint is a permanent endpoint that had its address
	// removed from the NIC, and it is waiting to be removed once no more routes
	// hold a reference to it. This is achieved by decreasing its reference count
	// by 1. If its address is re-added before the endpoint is removed, its type
	// changes back to permanent and its reference count increases by 1 again.
	permanentExpired

	// A temporary endpoint is created for spoofing outgoing packets, or when in
	// promiscuous mode and accepting incoming packets that don't match any
	// permanent endpoint. Its reference count is not biased by 1 and the
	// endpoint is removed immediately when no more route holds a reference to
	// it. A temporary endpoint can be promoted to permanent if its address
	// is added permanently.
	temporary
)

func (n *NIC) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error {
	n.mu.Lock()
	defer n.mu.Unlock()

	eps, ok := n.mu.packetEPs[netProto]
	if !ok {
		return tcpip.ErrNotSupported
	}
	n.mu.packetEPs[netProto] = append(eps, ep)

	return nil
}

func (n *NIC) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
	n.mu.Lock()
	defer n.mu.Unlock()

	eps, ok := n.mu.packetEPs[netProto]
	if !ok {
		return
	}

	for i, epOther := range eps {
		if epOther == ep {
			n.mu.packetEPs[netProto] = append(eps[:i], eps[i+1:]...)
			return
		}
	}
}

type networkEndpointConfigType int32

const (
	// A statically configured endpoint is an address that was added by
	// some user-specified action (adding an explicit address, joining a
	// multicast group).
	static networkEndpointConfigType = iota

	// A SLAAC configured endpoint is an IPv6 endpoint that was added by
	// SLAAC as per RFC 4862 section 5.5.3.
	slaac

	// A temporary SLAAC configured endpoint is an IPv6 endpoint that was added by
	// SLAAC as per RFC 4941. Temporary SLAAC addresses are short-lived and are
	// not expected to be valid (or preferred) forever; hence the term temporary.
	slaacTemp
)

type referencedNetworkEndpoint struct {
	ep       NetworkEndpoint
	addr     tcpip.AddressWithPrefix
	nic      *NIC
	protocol tcpip.NetworkProtocolNumber

	// linkCache is set if link address resolution is enabled for this
	// protocol. Set to nil otherwise.
	linkCache LinkAddressCache

	// linkRes is set if link address resolution is enabled for this protocol.
	// Set to nil otherwise.
	linkRes LinkAddressResolver

	// refs is counting references held for this endpoint. When refs hits zero it
	// triggers the automatic removal of the endpoint from the NIC.
	refs int32

	// networkEndpointKind must only be accessed using {get,set}Kind().
	kind networkEndpointKind

	// configType is the method that was used to configure this endpoint.
	// This must never change except during endpoint creation and promotion to
	// permanent.
	configType networkEndpointConfigType

	// deprecated indicates whether or not the endpoint should be considered
	// deprecated. That is, when deprecated is true, other endpoints that are not
	// deprecated should be preferred.
	deprecated bool
}

func (r *referencedNetworkEndpoint) address() tcpip.Address {
	return r.addr.Address
}

func (r *referencedNetworkEndpoint) addrWithPrefix() tcpip.AddressWithPrefix {
	return r.addr
}

func (r *referencedNetworkEndpoint) getKind() networkEndpointKind {
	return networkEndpointKind(atomic.LoadInt32((*int32)(&r.kind)))
}

func (r *referencedNetworkEndpoint) setKind(kind networkEndpointKind) {
	atomic.StoreInt32((*int32)(&r.kind), int32(kind))
}

// isValidForOutgoing returns true if the endpoint can be used to send out a
// packet. It requires the endpoint to not be marked expired (i.e., its address)
// has been removed) unless the NIC is in spoofing mode, or temporary.
func (r *referencedNetworkEndpoint) isValidForOutgoing() bool {
	r.nic.mu.RLock()
	defer r.nic.mu.RUnlock()

	return r.isValidForOutgoingRLocked()
}

// isValidForOutgoingRLocked is the same as isValidForOutgoing but requires
// r.nic.mu to be read locked.
func (r *referencedNetworkEndpoint) isValidForOutgoingRLocked() bool {
	if !r.nic.mu.enabled {
		return false
	}

	return r.isAssignedRLocked(r.nic.mu.spoofing)
}

// isAssignedRLocked returns true if r is considered to be assigned to the NIC.
//
// r.nic.mu must be read locked.
func (r *referencedNetworkEndpoint) isAssignedRLocked(spoofingOrPromiscuous bool) bool {
	switch r.getKind() {
	case permanentTentative:
		return false
	case permanentExpired:
		return spoofingOrPromiscuous
	default:
		return true
	}
}

// expireLocked decrements the reference count and marks the permanent endpoint
// as expired.
func (r *referencedNetworkEndpoint) expireLocked() {
	r.setKind(permanentExpired)
	r.decRefLocked()
}

// decRef decrements the ref count and cleans up the endpoint once it reaches
// zero.
func (r *referencedNetworkEndpoint) decRef() {
	if atomic.AddInt32(&r.refs, -1) == 0 {
		r.nic.removeEndpoint(r)
	}
}

// decRefLocked is the same as decRef but assumes that the NIC.mu mutex is
// locked.
func (r *referencedNetworkEndpoint) decRefLocked() {
	if atomic.AddInt32(&r.refs, -1) == 0 {
		r.nic.removeEndpointLocked(r)
	}
}

// incRef increments the ref count. It must only be called when the caller is
// known to be holding a reference to the endpoint, otherwise tryIncRef should
// be used.
func (r *referencedNetworkEndpoint) incRef() {
	atomic.AddInt32(&r.refs, 1)
}

// tryIncRef attempts to increment the ref count from n to n+1, but only if n is
// not zero. That is, it will increment the count if the endpoint is still
// alive, and do nothing if it has already been clean up.
func (r *referencedNetworkEndpoint) tryIncRef() bool {
	for {
		v := atomic.LoadInt32(&r.refs)
		if v == 0 {
			return false
		}

		if atomic.CompareAndSwapInt32(&r.refs, v, v+1) {
			return true
		}
	}
}

// stack returns the Stack instance that owns the underlying endpoint.
func (r *referencedNetworkEndpoint) stack() *Stack {
	return r.nic.stack
}