diff options
Diffstat (limited to 'pkg/tcpip/tcpip.go')
-rw-r--r-- | pkg/tcpip/tcpip.go | 1616 |
1 files changed, 1616 insertions, 0 deletions
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go new file mode 100644 index 000000000..25534a10d --- /dev/null +++ b/pkg/tcpip/tcpip.go @@ -0,0 +1,1616 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package tcpip provides the interfaces and related types that users of the +// tcpip stack will use in order to create endpoints used to send and receive +// data over the network stack. +// +// The starting point is the creation and configuration of a stack. A stack can +// be created by calling the New() function of the tcpip/stack/stack package; +// configuring a stack involves creating NICs (via calls to Stack.CreateNIC()), +// adding network addresses (via calls to Stack.AddAddress()), and +// setting a route table (via a call to Stack.SetRouteTable()). +// +// Once a stack is configured, endpoints can be created by calling +// Stack.NewEndpoint(). Such endpoints can be used to send/receive data, connect +// to peers, listen for connections, accept connections, etc., depending on the +// transport protocol selected. +package tcpip + +import ( + "errors" + "fmt" + "math/bits" + "reflect" + "strconv" + "strings" + "sync/atomic" + "time" + + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/waiter" +) + +// Error represents an error in the netstack error space. Using a special type +// ensures that errors outside of this space are not accidentally introduced. +// +// Note: to support save / restore, it is important that all tcpip errors have +// distinct error messages. +type Error struct { + msg string + + ignoreStats bool +} + +// String implements fmt.Stringer.String. +func (e *Error) String() string { + if e == nil { + return "<nil>" + } + return e.msg +} + +// IgnoreStats indicates whether this error type should be included in failure +// counts in tcpip.Stats structs. +func (e *Error) IgnoreStats() bool { + return e.ignoreStats +} + +// Errors that can be returned by the network stack. +var ( + ErrUnknownProtocol = &Error{msg: "unknown protocol"} + ErrUnknownNICID = &Error{msg: "unknown nic id"} + ErrUnknownDevice = &Error{msg: "unknown device"} + ErrUnknownProtocolOption = &Error{msg: "unknown option for protocol"} + ErrDuplicateNICID = &Error{msg: "duplicate nic id"} + ErrDuplicateAddress = &Error{msg: "duplicate address"} + ErrNoRoute = &Error{msg: "no route"} + ErrBadLinkEndpoint = &Error{msg: "bad link layer endpoint"} + ErrAlreadyBound = &Error{msg: "endpoint already bound", ignoreStats: true} + ErrInvalidEndpointState = &Error{msg: "endpoint is in invalid state"} + ErrAlreadyConnecting = &Error{msg: "endpoint is already connecting", ignoreStats: true} + ErrAlreadyConnected = &Error{msg: "endpoint is already connected", ignoreStats: true} + ErrNoPortAvailable = &Error{msg: "no ports are available"} + ErrPortInUse = &Error{msg: "port is in use"} + ErrBadLocalAddress = &Error{msg: "bad local address"} + ErrClosedForSend = &Error{msg: "endpoint is closed for send"} + ErrClosedForReceive = &Error{msg: "endpoint is closed for receive"} + ErrWouldBlock = &Error{msg: "operation would block", ignoreStats: true} + ErrConnectionRefused = &Error{msg: "connection was refused"} + ErrTimeout = &Error{msg: "operation timed out"} + ErrAborted = &Error{msg: "operation aborted"} + ErrConnectStarted = &Error{msg: "connection attempt started", ignoreStats: true} + ErrDestinationRequired = &Error{msg: "destination address is required"} + ErrNotSupported = &Error{msg: "operation not supported"} + ErrQueueSizeNotSupported = &Error{msg: "queue size querying not supported"} + ErrNotConnected = &Error{msg: "endpoint not connected"} + ErrConnectionReset = &Error{msg: "connection reset by peer"} + ErrConnectionAborted = &Error{msg: "connection aborted"} + ErrNoSuchFile = &Error{msg: "no such file"} + ErrInvalidOptionValue = &Error{msg: "invalid option value specified"} + ErrNoLinkAddress = &Error{msg: "no remote link address"} + ErrBadAddress = &Error{msg: "bad address"} + ErrNetworkUnreachable = &Error{msg: "network is unreachable"} + ErrMessageTooLong = &Error{msg: "message too long"} + ErrNoBufferSpace = &Error{msg: "no buffer space available"} + ErrBroadcastDisabled = &Error{msg: "broadcast socket option disabled"} + ErrNotPermitted = &Error{msg: "operation not permitted"} + ErrAddressFamilyNotSupported = &Error{msg: "address family not supported by protocol"} +) + +var messageToError map[string]*Error + +var populate sync.Once + +// StringToError converts an error message to the error. +func StringToError(s string) *Error { + populate.Do(func() { + var errors = []*Error{ + ErrUnknownProtocol, + ErrUnknownNICID, + ErrUnknownDevice, + ErrUnknownProtocolOption, + ErrDuplicateNICID, + ErrDuplicateAddress, + ErrNoRoute, + ErrBadLinkEndpoint, + ErrAlreadyBound, + ErrInvalidEndpointState, + ErrAlreadyConnecting, + ErrAlreadyConnected, + ErrNoPortAvailable, + ErrPortInUse, + ErrBadLocalAddress, + ErrClosedForSend, + ErrClosedForReceive, + ErrWouldBlock, + ErrConnectionRefused, + ErrTimeout, + ErrAborted, + ErrConnectStarted, + ErrDestinationRequired, + ErrNotSupported, + ErrQueueSizeNotSupported, + ErrNotConnected, + ErrConnectionReset, + ErrConnectionAborted, + ErrNoSuchFile, + ErrInvalidOptionValue, + ErrNoLinkAddress, + ErrBadAddress, + ErrNetworkUnreachable, + ErrMessageTooLong, + ErrNoBufferSpace, + ErrBroadcastDisabled, + ErrNotPermitted, + ErrAddressFamilyNotSupported, + } + + messageToError = make(map[string]*Error) + for _, e := range errors { + if messageToError[e.String()] != nil { + panic("tcpip errors with duplicated message: " + e.String()) + } + messageToError[e.String()] = e + } + }) + + e, ok := messageToError[s] + if !ok { + panic("unknown error message: " + s) + } + + return e +} + +// Errors related to Subnet +var ( + errSubnetLengthMismatch = errors.New("subnet length of address and mask differ") + errSubnetAddressMasked = errors.New("subnet address has bits set outside the mask") +) + +// ErrSaveRejection indicates a failed save due to unsupported networking state. +// This type of errors is only used for save logic. +type ErrSaveRejection struct { + Err error +} + +// Error returns a sensible description of the save rejection error. +func (e ErrSaveRejection) Error() string { + return "save rejected due to unsupported networking state: " + e.Err.Error() +} + +// A Clock provides the current time. +// +// Times returned by a Clock should always be used for application-visible +// time. Only monotonic times should be used for netstack internal timekeeping. +type Clock interface { + // NowNanoseconds returns the current real time as a number of + // nanoseconds since the Unix epoch. + NowNanoseconds() int64 + + // NowMonotonic returns a monotonic time value. + NowMonotonic() int64 +} + +// Address is a byte slice cast as a string that represents the address of a +// network node. Or, in the case of unix endpoints, it may represent a path. +type Address string + +// AddressMask is a bitmask for an address. +type AddressMask string + +// String implements Stringer. +func (m AddressMask) String() string { + return Address(m).String() +} + +// Prefix returns the number of bits before the first host bit. +func (m AddressMask) Prefix() int { + p := 0 + for _, b := range []byte(m) { + p += bits.LeadingZeros8(^b) + } + return p +} + +// Subnet is a subnet defined by its address and mask. +type Subnet struct { + address Address + mask AddressMask +} + +// NewSubnet creates a new Subnet, checking that the address and mask are the same length. +func NewSubnet(a Address, m AddressMask) (Subnet, error) { + if len(a) != len(m) { + return Subnet{}, errSubnetLengthMismatch + } + for i := 0; i < len(a); i++ { + if a[i]&^m[i] != 0 { + return Subnet{}, errSubnetAddressMasked + } + } + return Subnet{a, m}, nil +} + +// String implements Stringer. +func (s Subnet) String() string { + return fmt.Sprintf("%s/%d", s.ID(), s.Prefix()) +} + +// Contains returns true iff the address is of the same length and matches the +// subnet address and mask. +func (s *Subnet) Contains(a Address) bool { + if len(a) != len(s.address) { + return false + } + for i := 0; i < len(a); i++ { + if a[i]&s.mask[i] != s.address[i] { + return false + } + } + return true +} + +// ID returns the subnet ID. +func (s *Subnet) ID() Address { + return s.address +} + +// Bits returns the number of ones (network bits) and zeros (host bits) in the +// subnet mask. +func (s *Subnet) Bits() (ones int, zeros int) { + ones = s.mask.Prefix() + return ones, len(s.mask)*8 - ones +} + +// Prefix returns the number of bits before the first host bit. +func (s *Subnet) Prefix() int { + return s.mask.Prefix() +} + +// Mask returns the subnet mask. +func (s *Subnet) Mask() AddressMask { + return s.mask +} + +// Broadcast returns the subnet's broadcast address. +func (s *Subnet) Broadcast() Address { + addr := []byte(s.address) + for i := range addr { + addr[i] |= ^s.mask[i] + } + return Address(addr) +} + +// Equal returns true if s equals o. +// +// Needed to use cmp.Equal on Subnet as its fields are unexported. +func (s Subnet) Equal(o Subnet) bool { + return s == o +} + +// NICID is a number that uniquely identifies a NIC. +type NICID int32 + +// ShutdownFlags represents flags that can be passed to the Shutdown() method +// of the Endpoint interface. +type ShutdownFlags int + +// Values of the flags that can be passed to the Shutdown() method. They can +// be OR'ed together. +const ( + ShutdownRead ShutdownFlags = 1 << iota + ShutdownWrite +) + +// FullAddress represents a full transport node address, as required by the +// Connect() and Bind() methods. +// +// +stateify savable +type FullAddress struct { + // NIC is the ID of the NIC this address refers to. + // + // This may not be used by all endpoint types. + NIC NICID + + // Addr is the network or link layer address. + Addr Address + + // Port is the transport port. + // + // This may not be used by all endpoint types. + Port uint16 +} + +// Payloader is an interface that provides data. +// +// This interface allows the endpoint to request the amount of data it needs +// based on internal buffers without exposing them. +type Payloader interface { + // FullPayload returns all available bytes. + FullPayload() ([]byte, *Error) + + // Payload returns a slice containing at most size bytes. + Payload(size int) ([]byte, *Error) +} + +// SlicePayload implements Payloader for slices. +// +// This is typically used for tests. +type SlicePayload []byte + +// FullPayload implements Payloader.FullPayload. +func (s SlicePayload) FullPayload() ([]byte, *Error) { + return s, nil +} + +// Payload implements Payloader.Payload. +func (s SlicePayload) Payload(size int) ([]byte, *Error) { + if size > len(s) { + size = len(s) + } + return s[:size], nil +} + +// A ControlMessages contains socket control messages for IP sockets. +// +// +stateify savable +type ControlMessages struct { + // HasTimestamp indicates whether Timestamp is valid/set. + HasTimestamp bool + + // Timestamp is the time (in ns) that the last packet used to create + // the read data was received. + Timestamp int64 + + // HasInq indicates whether Inq is valid/set. + HasInq bool + + // Inq is the number of bytes ready to be received. + Inq int32 + + // HasTOS indicates whether Tos is valid/set. + HasTOS bool + + // TOS is the IPv4 type of service of the associated packet. + TOS uint8 + + // HasTClass indicates whether TClass is valid/set. + HasTClass bool + + // TClass is the IPv6 traffic class of the associated packet. + TClass uint32 + + // HasIPPacketInfo indicates whether PacketInfo is set. + HasIPPacketInfo bool + + // PacketInfo holds interface and address data on an incoming packet. + PacketInfo IPPacketInfo +} + +// PacketOwner is used to get UID and GID of the packet. +type PacketOwner interface { + // UID returns UID of the packet. + UID() uint32 + + // GID returns GID of the packet. + GID() uint32 +} + +// Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) +// that exposes functionality like read, write, connect, etc. to users of the +// networking stack. +type Endpoint interface { + // Close puts the endpoint in a closed state and frees all resources + // associated with it. Close initiates the teardown process, the + // Endpoint may not be fully closed when Close returns. + Close() + + // Abort initiates an expedited endpoint teardown. As compared to + // Close, Abort prioritizes closing the Endpoint quickly over cleanly. + // Abort is best effort; implementing Abort with Close is acceptable. + Abort() + + // Read reads data from the endpoint and optionally returns the sender. + // + // This method does not block if there is no data pending. It will also + // either return an error or data, never both. + Read(*FullAddress) (buffer.View, ControlMessages, *Error) + + // Write writes data to the endpoint's peer. This method does not block if + // the data cannot be written. + // + // Unlike io.Writer.Write, Endpoint.Write transfers ownership of any bytes + // successfully written to the Endpoint. That is, if a call to + // Write(SlicePayload{data}) returns (n, err), it may retain data[:n], and + // the caller should not use data[:n] after Write returns. + // + // Note that unlike io.Writer.Write, it is not an error for Write to + // perform a partial write (if n > 0, no error may be returned). Only + // stream (TCP) Endpoints may return partial writes, and even then only + // in the case where writing additional data would block. Other Endpoints + // will either write the entire message or return an error. + // + // For UDP and Ping sockets if address resolution is required, + // ErrNoLinkAddress and a notification channel is returned for the caller to + // block. Channel is closed once address resolution is complete (success or + // not). The channel is only non-nil in this case. + Write(Payloader, WriteOptions) (int64, <-chan struct{}, *Error) + + // Peek reads data without consuming it from the endpoint. + // + // This method does not block if there is no data pending. + Peek([][]byte) (int64, ControlMessages, *Error) + + // Connect connects the endpoint to its peer. Specifying a NIC is + // optional. + // + // There are three classes of return values: + // nil -- the attempt to connect succeeded. + // ErrConnectStarted/ErrAlreadyConnecting -- the connect attempt started + // but hasn't completed yet. In this case, the caller must call Connect + // or GetSockOpt(ErrorOption) when the endpoint becomes writable to + // get the actual result. The first call to Connect after the socket has + // connected returns nil. Calling connect again results in ErrAlreadyConnected. + // Anything else -- the attempt to connect failed. + // + // If address.Addr is empty, this means that Enpoint has to be + // disconnected if this is supported, otherwise + // ErrAddressFamilyNotSupported must be returned. + Connect(address FullAddress) *Error + + // Disconnect disconnects the endpoint from its peer. + Disconnect() *Error + + // Shutdown closes the read and/or write end of the endpoint connection + // to its peer. + Shutdown(flags ShutdownFlags) *Error + + // Listen puts the endpoint in "listen" mode, which allows it to accept + // new connections. + Listen(backlog int) *Error + + // Accept returns a new endpoint if a peer has established a connection + // to an endpoint previously set to listen mode. This method does not + // block if no new connections are available. + // + // The returned Queue is the wait queue for the newly created endpoint. + Accept() (Endpoint, *waiter.Queue, *Error) + + // Bind binds the endpoint to a specific local address and port. + // Specifying a NIC is optional. + Bind(address FullAddress) *Error + + // GetLocalAddress returns the address to which the endpoint is bound. + GetLocalAddress() (FullAddress, *Error) + + // GetRemoteAddress returns the address to which the endpoint is + // connected. + GetRemoteAddress() (FullAddress, *Error) + + // Readiness returns the current readiness of the endpoint. For example, + // if waiter.EventIn is set, the endpoint is immediately readable. + Readiness(mask waiter.EventMask) waiter.EventMask + + // SetSockOpt sets a socket option. opt should be one of the *Option types. + SetSockOpt(opt interface{}) *Error + + // SetSockOptBool sets a socket option, for simple cases where a value + // has the bool type. + SetSockOptBool(opt SockOptBool, v bool) *Error + + // SetSockOptInt sets a socket option, for simple cases where a value + // has the int type. + SetSockOptInt(opt SockOptInt, v int) *Error + + // GetSockOpt gets a socket option. opt should be a pointer to one of the + // *Option types. + GetSockOpt(opt interface{}) *Error + + // GetSockOptBool gets a socket option for simple cases where a return + // value has the bool type. + GetSockOptBool(SockOptBool) (bool, *Error) + + // GetSockOptInt gets a socket option for simple cases where a return + // value has the int type. + GetSockOptInt(SockOptInt) (int, *Error) + + // State returns a socket's lifecycle state. The returned value is + // protocol-specific and is primarily used for diagnostics. + State() uint32 + + // ModerateRecvBuf should be called everytime data is copied to the user + // space. This allows for dynamic tuning of recv buffer space for a + // given socket. + // + // NOTE: This method is a no-op for sockets other than TCP. + ModerateRecvBuf(copied int) + + // Info returns a copy to the transport endpoint info. + Info() EndpointInfo + + // Stats returns a reference to the endpoint stats. + Stats() EndpointStats + + // SetOwner sets the task owner to the endpoint owner. + SetOwner(owner PacketOwner) +} + +// EndpointInfo is the interface implemented by each endpoint info struct. +type EndpointInfo interface { + // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo + // marker interface. + IsEndpointInfo() +} + +// EndpointStats is the interface implemented by each endpoint stats struct. +type EndpointStats interface { + // IsEndpointStats is an empty method to implement the tcpip.EndpointStats + // marker interface. + IsEndpointStats() +} + +// WriteOptions contains options for Endpoint.Write. +type WriteOptions struct { + // If To is not nil, write to the given address instead of the endpoint's + // peer. + To *FullAddress + + // More has the same semantics as Linux's MSG_MORE. + More bool + + // EndOfRecord has the same semantics as Linux's MSG_EOR. + EndOfRecord bool + + // Atomic means that all data fetched from Payloader must be written to the + // endpoint. If Atomic is false, then data fetched from the Payloader may be + // discarded if available endpoint buffer space is unsufficient. + Atomic bool +} + +// SockOptBool represents socket options which values have the bool type. +type SockOptBool int + +const ( + // BroadcastOption is used by SetSockOptBool/GetSockOptBool to specify + // whether datagram sockets are allowed to send packets to a broadcast + // address. + BroadcastOption SockOptBool = iota + + // CorkOption is used by SetSockOptBool/GetSockOptBool to specify if + // data should be held until segments are full by the TCP transport + // protocol. + CorkOption + + // DelayOption is used by SetSockOptBool/GetSockOptBool to specify if + // data should be sent out immediately by the transport protocol. For + // TCP, it determines if the Nagle algorithm is on or off. + DelayOption + + // KeepaliveEnabledOption is used by SetSockOptBool/GetSockOptBool to + // specify whether TCP keepalive is enabled for this socket. + KeepaliveEnabledOption + + // MulticastLoopOption is used by SetSockOptBool/GetSockOptBool to + // specify whether multicast packets sent over a non-loopback interface + // will be looped back. + MulticastLoopOption + + // NoChecksumOption is used by SetSockOptBool/GetSockOptBool to specify + // whether UDP checksum is disabled for this socket. + NoChecksumOption + + // PasscredOption is used by SetSockOptBool/GetSockOptBool to specify + // whether SCM_CREDENTIALS socket control messages are enabled. + // + // Only supported on Unix sockets. + PasscredOption + + // QuickAckOption is stubbed out in SetSockOptBool/GetSockOptBool. + QuickAckOption + + // ReceiveTClassOption is used by SetSockOptBool/GetSockOptBool to + // specify if the IPV6_TCLASS ancillary message is passed with incoming + // packets. + ReceiveTClassOption + + // ReceiveTOSOption is used by SetSockOptBool/GetSockOptBool to specify + // if the TOS ancillary message is passed with incoming packets. + ReceiveTOSOption + + // ReceiveIPPacketInfoOption is used by SetSockOptBool/GetSockOptBool to + // specify if more inforamtion is provided with incoming packets such as + // interface index and address. + ReceiveIPPacketInfoOption + + // ReuseAddressOption is used by SetSockOptBool/GetSockOptBool to + // specify whether Bind() should allow reuse of local address. + ReuseAddressOption + + // ReusePortOption is used by SetSockOptBool/GetSockOptBool to permit + // multiple sockets to be bound to an identical socket address. + ReusePortOption + + // V6OnlyOption is used by SetSockOptBool/GetSockOptBool to specify + // whether an IPv6 socket is to be restricted to sending and receiving + // IPv6 packets only. + V6OnlyOption + + // IPHdrIncludedOption is used by SetSockOpt to indicate for a raw + // endpoint that all packets being written have an IP header and the + // endpoint should not attach an IP header. + IPHdrIncludedOption +) + +// SockOptInt represents socket options which values have the int type. +type SockOptInt int + +const ( + // KeepaliveCountOption is used by SetSockOptInt/GetSockOptInt to + // specify the number of un-ACKed TCP keepalives that will be sent + // before the connection is closed. + KeepaliveCountOption SockOptInt = iota + + // IPv4TOSOption is used by SetSockOptInt/GetSockOptInt to specify TOS + // for all subsequent outgoing IPv4 packets from the endpoint. + IPv4TOSOption + + // IPv6TrafficClassOption is used by SetSockOptInt/GetSockOptInt to + // specify TOS for all subsequent outgoing IPv6 packets from the + // endpoint. + IPv6TrafficClassOption + + // MaxSegOption is used by SetSockOptInt/GetSockOptInt to set/get the + // current Maximum Segment Size(MSS) value as specified using the + // TCP_MAXSEG option. + MaxSegOption + + // MTUDiscoverOption is used to set/get the path MTU discovery setting. + // + // NOTE: Setting this option to any other value than PMTUDiscoveryDont + // is not supported and will fail as such, and getting this option will + // always return PMTUDiscoveryDont. + MTUDiscoverOption + + // MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control + // the default TTL value for multicast messages. The default is 1. + MulticastTTLOption + + // ReceiveQueueSizeOption is used in GetSockOptInt to specify that the + // number of unread bytes in the input buffer should be returned. + ReceiveQueueSizeOption + + // SendBufferSizeOption is used by SetSockOptInt/GetSockOptInt to + // specify the send buffer size option. + SendBufferSizeOption + + // ReceiveBufferSizeOption is used by SetSockOptInt/GetSockOptInt to + // specify the receive buffer size option. + ReceiveBufferSizeOption + + // SendQueueSizeOption is used in GetSockOptInt to specify that the + // number of unread bytes in the output buffer should be returned. + SendQueueSizeOption + + // TTLOption is used by SetSockOptInt/GetSockOptInt to control the + // default TTL/hop limit value for unicast messages. The default is + // protocol specific. + // + // A zero value indicates the default. + TTLOption + + // TCPSynCountOption is used by SetSockOptInt/GetSockOptInt to specify + // the number of SYN retransmits that TCP should send before aborting + // the attempt to connect. It cannot exceed 255. + // + // NOTE: This option is currently only stubbed out and is no-op. + TCPSynCountOption + + // TCPWindowClampOption is used by SetSockOptInt/GetSockOptInt to bound + // the size of the advertised window to this value. + // + // NOTE: This option is currently only stubed out and is a no-op + TCPWindowClampOption +) + +const ( + // PMTUDiscoveryWant is a setting of the MTUDiscoverOption to use + // per-route settings. + PMTUDiscoveryWant int = iota + + // PMTUDiscoveryDont is a setting of the MTUDiscoverOption to disable + // path MTU discovery. + PMTUDiscoveryDont + + // PMTUDiscoveryDo is a setting of the MTUDiscoverOption to always do + // path MTU discovery. + PMTUDiscoveryDo + + // PMTUDiscoveryProbe is a setting of the MTUDiscoverOption to set DF + // but ignore path MTU. + PMTUDiscoveryProbe +) + +// ErrorOption is used in GetSockOpt to specify that the last error reported by +// the endpoint should be cleared and returned. +type ErrorOption struct{} + +// BindToDeviceOption is used by SetSockOpt/GetSockOpt to specify that sockets +// should bind only on a specific NIC. +type BindToDeviceOption NICID + +// TCPInfoOption is used by GetSockOpt to expose TCP statistics. +// +// TODO(b/64800844): Add and populate stat fields. +type TCPInfoOption struct { + RTT time.Duration + RTTVar time.Duration +} + +// KeepaliveIdleOption is used by SetSockOpt/GetSockOpt to specify the time a +// connection must remain idle before the first TCP keepalive packet is sent. +// Once this time is reached, KeepaliveIntervalOption is used instead. +type KeepaliveIdleOption time.Duration + +// KeepaliveIntervalOption is used by SetSockOpt/GetSockOpt to specify the +// interval between sending TCP keepalive packets. +type KeepaliveIntervalOption time.Duration + +// TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user +// specified timeout for a given TCP connection. +// See: RFC5482 for details. +type TCPUserTimeoutOption time.Duration + +// CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get +// the current congestion control algorithm. +type CongestionControlOption string + +// AvailableCongestionControlOption is used to query the supported congestion +// control algorithms. +type AvailableCongestionControlOption string + +// buffer moderation. +type ModerateReceiveBufferOption bool + +// TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the +// maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state +// before being marked closed. +type TCPLingerTimeoutOption time.Duration + +// TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the +// maximum duration for which a socket lingers in the TIME_WAIT state +// before being marked closed. +type TCPTimeWaitTimeoutOption time.Duration + +// TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a +// accept to return a completed connection only when there is data to be +// read. This usually means the listening socket will drop the final ACK +// for a handshake till the specified timeout until a segment with data arrives. +type TCPDeferAcceptOption time.Duration + +// TCPMinRTOOption is use by SetSockOpt/GetSockOpt to allow overriding +// default MinRTO used by the Stack. +type TCPMinRTOOption time.Duration + +// TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding +// default MaxRTO used by the Stack. +type TCPMaxRTOOption time.Duration + +// TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the +// maximum number of retransmits after which we time out the connection. +type TCPMaxRetriesOption uint64 + +// TCPSynRcvdCountThresholdOption is used by SetSockOpt/GetSockOpt to specify +// the number of endpoints that can be in SYN-RCVD state before the stack +// switches to using SYN cookies. +type TCPSynRcvdCountThresholdOption uint64 + +// TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide +// default for number of times SYN is retransmitted before aborting a connect. +type TCPSynRetriesOption uint8 + +// MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a +// default interface for multicast. +type MulticastInterfaceOption struct { + NIC NICID + InterfaceAddr Address +} + +// MembershipOption is used by SetSockOpt/GetSockOpt as an argument to +// AddMembershipOption and RemoveMembershipOption. +type MembershipOption struct { + NIC NICID + InterfaceAddr Address + MulticastAddr Address +} + +// AddMembershipOption is used by SetSockOpt/GetSockOpt to join a multicast +// group identified by the given multicast address, on the interface matching +// the given interface address. +type AddMembershipOption MembershipOption + +// RemoveMembershipOption is used by SetSockOpt/GetSockOpt to leave a multicast +// group identified by the given multicast address, on the interface matching +// the given interface address. +type RemoveMembershipOption MembershipOption + +// OutOfBandInlineOption is used by SetSockOpt/GetSockOpt to specify whether +// TCP out-of-band data is delivered along with the normal in-band data. +type OutOfBandInlineOption int + +// DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify +// a default TTL. +type DefaultTTLOption uint8 + +// +// IPPacketInfo is the message structure for IP_PKTINFO. +// +// +stateify savable +type IPPacketInfo struct { + // NIC is the ID of the NIC to be used. + NIC NICID + + // LocalAddr is the local address. + LocalAddr Address + + // DestinationAddr is the destination address. + DestinationAddr Address +} + +// Route is a row in the routing table. It specifies through which NIC (and +// gateway) sets of packets should be routed. A row is considered viable if the +// masked target address matches the destination address in the row. +type Route struct { + // Destination must contain the target address for this row to be viable. + Destination Subnet + + // Gateway is the gateway to be used if this row is viable. + Gateway Address + + // NIC is the id of the nic to be used if this row is viable. + NIC NICID +} + +// String implements the fmt.Stringer interface. +func (r Route) String() string { + var out strings.Builder + fmt.Fprintf(&out, "%s", r.Destination) + if len(r.Gateway) > 0 { + fmt.Fprintf(&out, " via %s", r.Gateway) + } + fmt.Fprintf(&out, " nic %d", r.NIC) + return out.String() +} + +// TransportProtocolNumber is the number of a transport protocol. +type TransportProtocolNumber uint32 + +// NetworkProtocolNumber is the number of a network protocol. +type NetworkProtocolNumber uint32 + +// A StatCounter keeps track of a statistic. +type StatCounter struct { + count uint64 +} + +// Increment adds one to the counter. +func (s *StatCounter) Increment() { + s.IncrementBy(1) +} + +// Decrement minuses one to the counter. +func (s *StatCounter) Decrement() { + s.IncrementBy(^uint64(0)) +} + +// Value returns the current value of the counter. +func (s *StatCounter) Value() uint64 { + return atomic.LoadUint64(&s.count) +} + +// IncrementBy increments the counter by v. +func (s *StatCounter) IncrementBy(v uint64) { + atomic.AddUint64(&s.count, v) +} + +func (s *StatCounter) String() string { + return strconv.FormatUint(s.Value(), 10) +} + +// ICMPv4PacketStats enumerates counts for all ICMPv4 packet types. +type ICMPv4PacketStats struct { + // Echo is the total number of ICMPv4 echo packets counted. + Echo *StatCounter + + // EchoReply is the total number of ICMPv4 echo reply packets counted. + EchoReply *StatCounter + + // DstUnreachable is the total number of ICMPv4 destination unreachable + // packets counted. + DstUnreachable *StatCounter + + // SrcQuench is the total number of ICMPv4 source quench packets + // counted. + SrcQuench *StatCounter + + // Redirect is the total number of ICMPv4 redirect packets counted. + Redirect *StatCounter + + // TimeExceeded is the total number of ICMPv4 time exceeded packets + // counted. + TimeExceeded *StatCounter + + // ParamProblem is the total number of ICMPv4 parameter problem packets + // counted. + ParamProblem *StatCounter + + // Timestamp is the total number of ICMPv4 timestamp packets counted. + Timestamp *StatCounter + + // TimestampReply is the total number of ICMPv4 timestamp reply packets + // counted. + TimestampReply *StatCounter + + // InfoRequest is the total number of ICMPv4 information request + // packets counted. + InfoRequest *StatCounter + + // InfoReply is the total number of ICMPv4 information reply packets + // counted. + InfoReply *StatCounter +} + +// ICMPv6PacketStats enumerates counts for all ICMPv6 packet types. +type ICMPv6PacketStats struct { + // EchoRequest is the total number of ICMPv6 echo request packets + // counted. + EchoRequest *StatCounter + + // EchoReply is the total number of ICMPv6 echo reply packets counted. + EchoReply *StatCounter + + // DstUnreachable is the total number of ICMPv6 destination unreachable + // packets counted. + DstUnreachable *StatCounter + + // PacketTooBig is the total number of ICMPv6 packet too big packets + // counted. + PacketTooBig *StatCounter + + // TimeExceeded is the total number of ICMPv6 time exceeded packets + // counted. + TimeExceeded *StatCounter + + // ParamProblem is the total number of ICMPv6 parameter problem packets + // counted. + ParamProblem *StatCounter + + // RouterSolicit is the total number of ICMPv6 router solicit packets + // counted. + RouterSolicit *StatCounter + + // RouterAdvert is the total number of ICMPv6 router advert packets + // counted. + RouterAdvert *StatCounter + + // NeighborSolicit is the total number of ICMPv6 neighbor solicit + // packets counted. + NeighborSolicit *StatCounter + + // NeighborAdvert is the total number of ICMPv6 neighbor advert packets + // counted. + NeighborAdvert *StatCounter + + // RedirectMsg is the total number of ICMPv6 redirect message packets + // counted. + RedirectMsg *StatCounter +} + +// ICMPv4SentPacketStats collects outbound ICMPv4-specific stats. +type ICMPv4SentPacketStats struct { + ICMPv4PacketStats + + // Dropped is the total number of ICMPv4 packets dropped due to link + // layer errors. + Dropped *StatCounter + + // RateLimited is the total number of ICMPv6 packets dropped due to + // rate limit being exceeded. + RateLimited *StatCounter +} + +// ICMPv4ReceivedPacketStats collects inbound ICMPv4-specific stats. +type ICMPv4ReceivedPacketStats struct { + ICMPv4PacketStats + + // Invalid is the total number of ICMPv4 packets received that the + // transport layer could not parse. + Invalid *StatCounter +} + +// ICMPv6SentPacketStats collects outbound ICMPv6-specific stats. +type ICMPv6SentPacketStats struct { + ICMPv6PacketStats + + // Dropped is the total number of ICMPv6 packets dropped due to link + // layer errors. + Dropped *StatCounter + + // RateLimited is the total number of ICMPv6 packets dropped due to + // rate limit being exceeded. + RateLimited *StatCounter +} + +// ICMPv6ReceivedPacketStats collects inbound ICMPv6-specific stats. +type ICMPv6ReceivedPacketStats struct { + ICMPv6PacketStats + + // Invalid is the total number of ICMPv6 packets received that the + // transport layer could not parse. + Invalid *StatCounter +} + +// ICMPStats collects ICMP-specific stats (both v4 and v6). +type ICMPStats struct { + // ICMPv4SentPacketStats contains counts of sent packets by ICMPv4 packet type + // and a single count of packets which failed to write to the link + // layer. + V4PacketsSent ICMPv4SentPacketStats + + // ICMPv4ReceivedPacketStats contains counts of received packets by ICMPv4 + // packet type and a single count of invalid packets received. + V4PacketsReceived ICMPv4ReceivedPacketStats + + // ICMPv6SentPacketStats contains counts of sent packets by ICMPv6 packet type + // and a single count of packets which failed to write to the link + // layer. + V6PacketsSent ICMPv6SentPacketStats + + // ICMPv6ReceivedPacketStats contains counts of received packets by ICMPv6 + // packet type and a single count of invalid packets received. + V6PacketsReceived ICMPv6ReceivedPacketStats +} + +// IPStats collects IP-specific stats (both v4 and v6). +type IPStats struct { + // PacketsReceived is the total number of IP packets received from the + // link layer in nic.DeliverNetworkPacket. + PacketsReceived *StatCounter + + // InvalidDestinationAddressesReceived is the total number of IP packets + // received with an unknown or invalid destination address. + InvalidDestinationAddressesReceived *StatCounter + + // InvalidSourceAddressesReceived is the total number of IP packets received + // with a source address that should never have been received on the wire. + InvalidSourceAddressesReceived *StatCounter + + // PacketsDelivered is the total number of incoming IP packets that + // are successfully delivered to the transport layer via HandlePacket. + PacketsDelivered *StatCounter + + // PacketsSent is the total number of IP packets sent via WritePacket. + PacketsSent *StatCounter + + // OutgoingPacketErrors is the total number of IP packets which failed + // to write to a link-layer endpoint. + OutgoingPacketErrors *StatCounter + + // MalformedPacketsReceived is the total number of IP Packets that were + // dropped due to the IP packet header failing validation checks. + MalformedPacketsReceived *StatCounter + + // MalformedFragmentsReceived is the total number of IP Fragments that were + // dropped due to the fragment failing validation checks. + MalformedFragmentsReceived *StatCounter +} + +// TCPStats collects TCP-specific stats. +type TCPStats struct { + // ActiveConnectionOpenings is the number of connections opened + // successfully via Connect. + ActiveConnectionOpenings *StatCounter + + // PassiveConnectionOpenings is the number of connections opened + // successfully via Listen. + PassiveConnectionOpenings *StatCounter + + // CurrentEstablished is the number of TCP connections for which the + // current state is ESTABLISHED. + CurrentEstablished *StatCounter + + // CurrentConnected is the number of TCP connections that + // are in connected state. + CurrentConnected *StatCounter + + // EstablishedResets is the number of times TCP connections have made + // a direct transition to the CLOSED state from either the + // ESTABLISHED state or the CLOSE-WAIT state. + EstablishedResets *StatCounter + + // EstablishedClosed is the number of times established TCP connections + // made a transition to CLOSED state. + EstablishedClosed *StatCounter + + // EstablishedTimedout is the number of times an established connection + // was reset because of keep-alive time out. + EstablishedTimedout *StatCounter + + // ListenOverflowSynDrop is the number of times the listen queue overflowed + // and a SYN was dropped. + ListenOverflowSynDrop *StatCounter + + // ListenOverflowAckDrop is the number of times the final ACK + // in the handshake was dropped due to overflow. + ListenOverflowAckDrop *StatCounter + + // ListenOverflowCookieSent is the number of times a SYN cookie was sent. + ListenOverflowSynCookieSent *StatCounter + + // ListenOverflowSynCookieRcvd is the number of times a valid SYN + // cookie was received. + ListenOverflowSynCookieRcvd *StatCounter + + // ListenOverflowInvalidSynCookieRcvd is the number of times an invalid SYN cookie + // was received. + ListenOverflowInvalidSynCookieRcvd *StatCounter + + // FailedConnectionAttempts is the number of calls to Connect or Listen + // (active and passive openings, respectively) that end in an error. + FailedConnectionAttempts *StatCounter + + // ValidSegmentsReceived is the number of TCP segments received that + // the transport layer successfully parsed. + ValidSegmentsReceived *StatCounter + + // InvalidSegmentsReceived is the number of TCP segments received that + // the transport layer could not parse. + InvalidSegmentsReceived *StatCounter + + // SegmentsSent is the number of TCP segments sent. + SegmentsSent *StatCounter + + // SegmentSendErrors is the number of TCP segments failed to be sent. + SegmentSendErrors *StatCounter + + // ResetsSent is the number of TCP resets sent. + ResetsSent *StatCounter + + // ResetsReceived is the number of TCP resets received. + ResetsReceived *StatCounter + + // Retransmits is the number of TCP segments retransmitted. + Retransmits *StatCounter + + // FastRecovery is the number of times Fast Recovery was used to + // recover from packet loss. + FastRecovery *StatCounter + + // SACKRecovery is the number of times SACK Recovery was used to + // recover from packet loss. + SACKRecovery *StatCounter + + // SlowStartRetransmits is the number of segments retransmitted in slow + // start. + SlowStartRetransmits *StatCounter + + // FastRetransmit is the number of segments retransmitted in fast + // recovery. + FastRetransmit *StatCounter + + // Timeouts is the number of times the RTO expired. + Timeouts *StatCounter + + // ChecksumErrors is the number of segments dropped due to bad checksums. + ChecksumErrors *StatCounter +} + +// UDPStats collects UDP-specific stats. +type UDPStats struct { + // PacketsReceived is the number of UDP datagrams received via + // HandlePacket. + PacketsReceived *StatCounter + + // UnknownPortErrors is the number of incoming UDP datagrams dropped + // because they did not have a known destination port. + UnknownPortErrors *StatCounter + + // ReceiveBufferErrors is the number of incoming UDP datagrams dropped + // due to the receiving buffer being in an invalid state. + ReceiveBufferErrors *StatCounter + + // MalformedPacketsReceived is the number of incoming UDP datagrams + // dropped due to the UDP header being in a malformed state. + MalformedPacketsReceived *StatCounter + + // PacketsSent is the number of UDP datagrams sent via sendUDP. + PacketsSent *StatCounter + + // PacketSendErrors is the number of datagrams failed to be sent. + PacketSendErrors *StatCounter + + // ChecksumErrors is the number of datagrams dropped due to bad checksums. + ChecksumErrors *StatCounter +} + +// Stats holds statistics about the networking stack. +// +// All fields are optional. +type Stats struct { + // UnknownProtocolRcvdPackets is the number of packets received by the + // stack that were for an unknown or unsupported protocol. + UnknownProtocolRcvdPackets *StatCounter + + // MalformedRcvdPackets is the number of packets received by the stack + // that were deemed malformed. + MalformedRcvdPackets *StatCounter + + // DroppedPackets is the number of packets dropped due to full queues. + DroppedPackets *StatCounter + + // ICMP breaks out ICMP-specific stats (both v4 and v6). + ICMP ICMPStats + + // IP breaks out IP-specific stats (both v4 and v6). + IP IPStats + + // TCP breaks out TCP-specific stats. + TCP TCPStats + + // UDP breaks out UDP-specific stats. + UDP UDPStats +} + +// ReceiveErrors collects packet receive errors within transport endpoint. +type ReceiveErrors struct { + // ReceiveBufferOverflow is the number of received packets dropped + // due to the receive buffer being full. + ReceiveBufferOverflow StatCounter + + // MalformedPacketsReceived is the number of incoming packets + // dropped due to the packet header being in a malformed state. + MalformedPacketsReceived StatCounter + + // ClosedReceiver is the number of received packets dropped because + // of receiving endpoint state being closed. + ClosedReceiver StatCounter + + // ChecksumErrors is the number of packets dropped due to bad checksums. + ChecksumErrors StatCounter +} + +// SendErrors collects packet send errors within the transport layer for +// an endpoint. +type SendErrors struct { + // SendToNetworkFailed is the number of packets failed to be written to + // the network endpoint. + SendToNetworkFailed StatCounter + + // NoRoute is the number of times we failed to resolve IP route. + NoRoute StatCounter + + // NoLinkAddr is the number of times we failed to resolve ARP. + NoLinkAddr StatCounter +} + +// ReadErrors collects segment read errors from an endpoint read call. +type ReadErrors struct { + // ReadClosed is the number of received packet drops because the endpoint + // was shutdown for read. + ReadClosed StatCounter + + // InvalidEndpointState is the number of times we found the endpoint state + // to be unexpected. + InvalidEndpointState StatCounter + + // NotConnected is the number of times we tried to read but found that the + // endpoint was not connected. + NotConnected StatCounter +} + +// WriteErrors collects packet write errors from an endpoint write call. +type WriteErrors struct { + // WriteClosed is the number of packet drops because the endpoint + // was shutdown for write. + WriteClosed StatCounter + + // InvalidEndpointState is the number of times we found the endpoint state + // to be unexpected. + InvalidEndpointState StatCounter + + // InvalidArgs is the number of times invalid input arguments were + // provided for endpoint Write call. + InvalidArgs StatCounter +} + +// TransportEndpointStats collects statistics about the endpoint. +type TransportEndpointStats struct { + // PacketsReceived is the number of successful packet receives. + PacketsReceived StatCounter + + // PacketsSent is the number of successful packet sends. + PacketsSent StatCounter + + // ReceiveErrors collects packet receive errors within transport layer. + ReceiveErrors ReceiveErrors + + // ReadErrors collects packet read errors from an endpoint read call. + ReadErrors ReadErrors + + // SendErrors collects packet send errors within the transport layer. + SendErrors SendErrors + + // WriteErrors collects packet write errors from an endpoint write call. + WriteErrors WriteErrors +} + +// IsEndpointStats is an empty method to implement the tcpip.EndpointStats +// marker interface. +func (*TransportEndpointStats) IsEndpointStats() {} + +// InitStatCounters initializes v's fields with nil StatCounter fields to new +// StatCounters. +func InitStatCounters(v reflect.Value) { + for i := 0; i < v.NumField(); i++ { + v := v.Field(i) + if s, ok := v.Addr().Interface().(**StatCounter); ok { + if *s == nil { + *s = new(StatCounter) + } + } else { + InitStatCounters(v) + } + } +} + +// FillIn returns a copy of s with nil fields initialized to new StatCounters. +func (s Stats) FillIn() Stats { + InitStatCounters(reflect.ValueOf(&s).Elem()) + return s +} + +// Clone returns a copy of the TransportEndpointStats by atomically reading +// each field. +func (src *TransportEndpointStats) Clone() TransportEndpointStats { + var dst TransportEndpointStats + clone(reflect.ValueOf(&dst).Elem(), reflect.ValueOf(src).Elem()) + return dst +} + +func clone(dst reflect.Value, src reflect.Value) { + for i := 0; i < dst.NumField(); i++ { + d := dst.Field(i) + s := src.Field(i) + if c, ok := s.Addr().Interface().(*StatCounter); ok { + d.Addr().Interface().(*StatCounter).IncrementBy(c.Value()) + } else { + clone(d, s) + } + } +} + +// String implements the fmt.Stringer interface. +func (a Address) String() string { + switch len(a) { + case 4: + return fmt.Sprintf("%d.%d.%d.%d", int(a[0]), int(a[1]), int(a[2]), int(a[3])) + case 16: + // Find the longest subsequence of hexadecimal zeros. + start, end := -1, -1 + for i := 0; i < len(a); i += 2 { + j := i + for j < len(a) && a[j] == 0 && a[j+1] == 0 { + j += 2 + } + if j > i+2 && j-i > end-start { + start, end = i, j + } + } + + var b strings.Builder + for i := 0; i < len(a); i += 2 { + if i == start { + b.WriteString("::") + i = end + if end >= len(a) { + break + } + } else if i > 0 { + b.WriteByte(':') + } + v := uint16(a[i+0])<<8 | uint16(a[i+1]) + if v == 0 { + b.WriteByte('0') + } else { + const digits = "0123456789abcdef" + for i := uint(3); i < 4; i-- { + if v := v >> (i * 4); v != 0 { + b.WriteByte(digits[v&0xf]) + } + } + } + } + return b.String() + default: + return fmt.Sprintf("%x", []byte(a)) + } +} + +// To4 converts the IPv4 address to a 4-byte representation. +// If the address is not an IPv4 address, To4 returns "". +func (a Address) To4() Address { + const ( + ipv4len = 4 + ipv6len = 16 + ) + if len(a) == ipv4len { + return a + } + if len(a) == ipv6len && + isZeros(a[0:10]) && + a[10] == 0xff && + a[11] == 0xff { + return a[12:16] + } + return "" +} + +// isZeros reports whether a is all zeros. +func isZeros(a Address) bool { + for i := 0; i < len(a); i++ { + if a[i] != 0 { + return false + } + } + return true +} + +// LinkAddress is a byte slice cast as a string that represents a link address. +// It is typically a 6-byte MAC address. +type LinkAddress string + +// String implements the fmt.Stringer interface. +func (a LinkAddress) String() string { + switch len(a) { + case 6: + return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], a[4], a[5]) + default: + return fmt.Sprintf("%x", []byte(a)) + } +} + +// ParseMACAddress parses an IEEE 802 address. +// +// It must be in the format aa:bb:cc:dd:ee:ff or aa-bb-cc-dd-ee-ff. +func ParseMACAddress(s string) (LinkAddress, error) { + parts := strings.FieldsFunc(s, func(c rune) bool { + return c == ':' || c == '-' + }) + if len(parts) != 6 { + return "", fmt.Errorf("inconsistent parts: %s", s) + } + addr := make([]byte, 0, len(parts)) + for _, part := range parts { + u, err := strconv.ParseUint(part, 16, 8) + if err != nil { + return "", fmt.Errorf("invalid hex digits: %s", s) + } + addr = append(addr, byte(u)) + } + return LinkAddress(addr), nil +} + +// AddressWithPrefix is an address with its subnet prefix length. +type AddressWithPrefix struct { + // Address is a network address. + Address Address + + // PrefixLen is the subnet prefix length. + PrefixLen int +} + +// String implements the fmt.Stringer interface. +func (a AddressWithPrefix) String() string { + return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen) +} + +// Subnet converts the address and prefix into a Subnet value and returns it. +func (a AddressWithPrefix) Subnet() Subnet { + addrLen := len(a.Address) + if a.PrefixLen <= 0 { + return Subnet{ + address: Address(strings.Repeat("\x00", addrLen)), + mask: AddressMask(strings.Repeat("\x00", addrLen)), + } + } + if a.PrefixLen >= addrLen*8 { + return Subnet{ + address: a.Address, + mask: AddressMask(strings.Repeat("\xff", addrLen)), + } + } + + sa := make([]byte, addrLen) + sm := make([]byte, addrLen) + n := uint(a.PrefixLen) + for i := 0; i < addrLen; i++ { + if n >= 8 { + sa[i] = a.Address[i] + sm[i] = 0xff + n -= 8 + continue + } + sm[i] = ^byte(0xff >> n) + sa[i] = a.Address[i] & sm[i] + n = 0 + } + + // For extra caution, call NewSubnet rather than directly creating the Subnet + // value. If that fails it indicates a serious bug in this code, so panic is + // in order. + s, err := NewSubnet(Address(sa), AddressMask(sm)) + if err != nil { + panic("invalid subnet: " + err.Error()) + } + return s +} + +// ProtocolAddress is an address and the network protocol it is associated +// with. +type ProtocolAddress struct { + // Protocol is the protocol of the address. + Protocol NetworkProtocolNumber + + // AddressWithPrefix is a network address with its subnet prefix length. + AddressWithPrefix AddressWithPrefix +} + +var ( + // danglingEndpointsMu protects access to danglingEndpoints. + danglingEndpointsMu sync.Mutex + + // danglingEndpoints tracks all dangling endpoints no longer owned by the app. + danglingEndpoints = make(map[Endpoint]struct{}) +) + +// GetDanglingEndpoints returns all dangling endpoints. +func GetDanglingEndpoints() []Endpoint { + danglingEndpointsMu.Lock() + es := make([]Endpoint, 0, len(danglingEndpoints)) + for e := range danglingEndpoints { + es = append(es, e) + } + danglingEndpointsMu.Unlock() + return es +} + +// AddDanglingEndpoint adds a dangling endpoint. +func AddDanglingEndpoint(e Endpoint) { + danglingEndpointsMu.Lock() + danglingEndpoints[e] = struct{}{} + danglingEndpointsMu.Unlock() +} + +// DeleteDanglingEndpoint removes a dangling endpoint. +func DeleteDanglingEndpoint(e Endpoint) { + danglingEndpointsMu.Lock() + delete(danglingEndpoints, e) + danglingEndpointsMu.Unlock() +} + +// AsyncLoading is the global barrier for asynchronous endpoint loading +// activities. +var AsyncLoading sync.WaitGroup |