diff options
Diffstat (limited to 'pkg/tcpip/stack/registration.go')
-rw-r--r-- | pkg/tcpip/stack/registration.go | 560 |
1 files changed, 560 insertions, 0 deletions
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go new file mode 100644 index 000000000..5cbc946b6 --- /dev/null +++ b/pkg/tcpip/stack/registration.go @@ -0,0 +1,560 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stack + +import ( + "gvisor.dev/gvisor/pkg/sleep" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/waiter" +) + +// NetworkEndpointID is the identifier of a network layer protocol endpoint. +// Currently the local address is sufficient because all supported protocols +// (i.e., IPv4 and IPv6) have different sizes for their addresses. +type NetworkEndpointID struct { + LocalAddress tcpip.Address +} + +// TransportEndpointID is the identifier of a transport layer protocol endpoint. +// +// +stateify savable +type TransportEndpointID struct { + // LocalPort is the local port associated with the endpoint. + LocalPort uint16 + + // LocalAddress is the local [network layer] address associated with + // the endpoint. + LocalAddress tcpip.Address + + // RemotePort is the remote port associated with the endpoint. + RemotePort uint16 + + // RemoteAddress it the remote [network layer] address associated with + // the endpoint. + RemoteAddress tcpip.Address +} + +// ControlType is the type of network control message. +type ControlType int + +// The following are the allowed values for ControlType values. +const ( + ControlPacketTooBig ControlType = iota + ControlPortUnreachable + ControlUnknown +) + +// TransportEndpoint is the interface that needs to be implemented by transport +// protocol (e.g., tcp, udp) endpoints that can handle packets. +type TransportEndpoint interface { + // UniqueID returns an unique ID for this transport endpoint. + UniqueID() uint64 + + // HandlePacket is called by the stack when new packets arrive to + // this transport endpoint. It sets pkt.TransportHeader. + // + // HandlePacket takes ownership of pkt. + HandlePacket(r *Route, id TransportEndpointID, pkt *PacketBuffer) + + // HandleControlPacket is called by the stack when new control (e.g. + // ICMP) packets arrive to this transport endpoint. + // HandleControlPacket takes ownership of pkt. + HandleControlPacket(id TransportEndpointID, typ ControlType, extra uint32, pkt *PacketBuffer) + + // Abort initiates an expedited endpoint teardown. It puts the endpoint + // in a closed state and frees all resources associated with it. This + // cleanup may happen asynchronously. Wait can be used to block on this + // asynchronous cleanup. + Abort() + + // Wait waits for any worker goroutines owned by the endpoint to stop. + // + // An endpoint can be requested to stop its worker goroutines by calling + // its Close method. + // + // Wait will not block if the endpoint hasn't started any goroutines + // yet, even if it might later. + Wait() +} + +// RawTransportEndpoint is the interface that needs to be implemented by raw +// transport protocol endpoints. RawTransportEndpoints receive the entire +// packet - including the network and transport headers - as delivered to +// netstack. +type RawTransportEndpoint interface { + // HandlePacket is called by the stack when new packets arrive to + // this transport endpoint. The packet contains all data from the link + // layer up. + // + // HandlePacket takes ownership of pkt. + HandlePacket(r *Route, pkt *PacketBuffer) +} + +// PacketEndpoint is the interface that needs to be implemented by packet +// transport protocol endpoints. These endpoints receive link layer headers in +// addition to whatever they contain (usually network and transport layer +// headers and a payload). +type PacketEndpoint interface { + // HandlePacket is called by the stack when new packets arrive that + // match the endpoint. + // + // Implementers should treat packet as immutable and should copy it + // before before modification. + // + // linkHeader may have a length of 0, in which case the PacketEndpoint + // should construct its own ethernet header for applications. + // + // HandlePacket takes ownership of pkt. + HandlePacket(nicID tcpip.NICID, addr tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer) +} + +// TransportProtocol is the interface that needs to be implemented by transport +// protocols (e.g., tcp, udp) that want to be part of the networking stack. +type TransportProtocol interface { + // Number returns the transport protocol number. + Number() tcpip.TransportProtocolNumber + + // NewEndpoint creates a new endpoint of the transport protocol. + NewEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) + + // NewRawEndpoint creates a new raw endpoint of the transport protocol. + NewRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) + + // MinimumPacketSize returns the minimum valid packet size of this + // transport protocol. The stack automatically drops any packets smaller + // than this targeted at this protocol. + MinimumPacketSize() int + + // ParsePorts returns the source and destination ports stored in a + // packet of this protocol. + ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) + + // HandleUnknownDestinationPacket handles packets targeted at this + // protocol but that don't match any existing endpoint. For example, + // it is targeted at a port that have no listeners. + // + // The return value indicates whether the packet was well-formed (for + // stats purposes only). + // + // HandleUnknownDestinationPacket takes ownership of pkt. + HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, pkt *PacketBuffer) bool + + // SetOption allows enabling/disabling protocol specific features. + // SetOption returns an error if the option is not supported or the + // provided option value is invalid. + SetOption(option interface{}) *tcpip.Error + + // Option allows retrieving protocol specific option values. + // Option returns an error if the option is not supported or the + // provided option value is invalid. + Option(option interface{}) *tcpip.Error + + // Close requests that any worker goroutines owned by the protocol + // stop. + Close() + + // Wait waits for any worker goroutines owned by the protocol to stop. + Wait() + + // Parse sets pkt.TransportHeader and trims pkt.Data appropriately. It does + // neither and returns false if pkt.Data is too small, i.e. pkt.Data.Size() < + // MinimumPacketSize() + Parse(pkt *PacketBuffer) (ok bool) +} + +// TransportDispatcher contains the methods used by the network stack to deliver +// packets to the appropriate transport endpoint after it has been handled by +// the network layer. +type TransportDispatcher interface { + // DeliverTransportPacket delivers packets to the appropriate + // transport protocol endpoint. + // + // pkt.NetworkHeader must be set before calling DeliverTransportPacket. + // + // DeliverTransportPacket takes ownership of pkt. + DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) + + // DeliverTransportControlPacket delivers control packets to the + // appropriate transport protocol endpoint. + // + // pkt.NetworkHeader must be set before calling + // DeliverTransportControlPacket. + // + // DeliverTransportControlPacket takes ownership of pkt. + DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt *PacketBuffer) +} + +// PacketLooping specifies where an outbound packet should be sent. +type PacketLooping byte + +const ( + // PacketOut indicates that the packet should be passed to the link + // endpoint. + PacketOut PacketLooping = 1 << iota + + // PacketLoop indicates that the packet should be handled locally. + PacketLoop +) + +// NetworkHeaderParams are the header parameters given as input by the +// transport endpoint to the network. +type NetworkHeaderParams struct { + // Protocol refers to the transport protocol number. + Protocol tcpip.TransportProtocolNumber + + // TTL refers to Time To Live field of the IP-header. + TTL uint8 + + // TOS refers to TypeOfService or TrafficClass field of the IP-header. + TOS uint8 +} + +// NetworkEndpoint is the interface that needs to be implemented by endpoints +// of network layer protocols (e.g., ipv4, ipv6). +type NetworkEndpoint interface { + // DefaultTTL is the default time-to-live value (or hop limit, in ipv6) + // for this endpoint. + DefaultTTL() uint8 + + // MTU is the maximum transmission unit for this endpoint. This is + // generally calculated as the MTU of the underlying data link endpoint + // minus the network endpoint max header length. + MTU() uint32 + + // Capabilities returns the set of capabilities supported by the + // underlying link-layer endpoint. + Capabilities() LinkEndpointCapabilities + + // MaxHeaderLength returns the maximum size the network (and lower + // level layers combined) headers can have. Higher levels use this + // information to reserve space in the front of the packets they're + // building. + MaxHeaderLength() uint16 + + // WritePacket writes a packet to the given destination address and + // protocol. It takes ownership of pkt. pkt.TransportHeader must have already + // been set. + WritePacket(r *Route, gso *GSO, params NetworkHeaderParams, pkt *PacketBuffer) *tcpip.Error + + // WritePackets writes packets to the given destination address and + // protocol. pkts must not be zero length. It takes ownership of pkts and + // underlying packets. + WritePackets(r *Route, gso *GSO, pkts PacketBufferList, params NetworkHeaderParams) (int, *tcpip.Error) + + // WriteHeaderIncludedPacket writes a packet that includes a network + // header to the given destination address. It takes ownership of pkt. + WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) *tcpip.Error + + // ID returns the network protocol endpoint ID. + ID() *NetworkEndpointID + + // PrefixLen returns the network endpoint's subnet prefix length in bits. + PrefixLen() int + + // NICID returns the id of the NIC this endpoint belongs to. + NICID() tcpip.NICID + + // HandlePacket is called by the link layer when new packets arrive to + // this network endpoint. It sets pkt.NetworkHeader. + // + // HandlePacket takes ownership of pkt. + HandlePacket(r *Route, pkt *PacketBuffer) + + // Close is called when the endpoint is reomved from a stack. + Close() + + // NetworkProtocolNumber returns the tcpip.NetworkProtocolNumber for + // this endpoint. + NetworkProtocolNumber() tcpip.NetworkProtocolNumber +} + +// NetworkProtocol is the interface that needs to be implemented by network +// protocols (e.g., ipv4, ipv6) that want to be part of the networking stack. +type NetworkProtocol interface { + // Number returns the network protocol number. + Number() tcpip.NetworkProtocolNumber + + // MinimumPacketSize returns the minimum valid packet size of this + // network protocol. The stack automatically drops any packets smaller + // than this targeted at this protocol. + MinimumPacketSize() int + + // DefaultPrefixLen returns the protocol's default prefix length. + DefaultPrefixLen() int + + // ParseAddresses returns the source and destination addresses stored in a + // packet of this protocol. + ParseAddresses(v buffer.View) (src, dst tcpip.Address) + + // NewEndpoint creates a new endpoint of this protocol. + NewEndpoint(nicID tcpip.NICID, addrWithPrefix tcpip.AddressWithPrefix, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, sender LinkEndpoint, st *Stack) (NetworkEndpoint, *tcpip.Error) + + // SetOption allows enabling/disabling protocol specific features. + // SetOption returns an error if the option is not supported or the + // provided option value is invalid. + SetOption(option interface{}) *tcpip.Error + + // Option allows retrieving protocol specific option values. + // Option returns an error if the option is not supported or the + // provided option value is invalid. + Option(option interface{}) *tcpip.Error + + // Close requests that any worker goroutines owned by the protocol + // stop. + Close() + + // Wait waits for any worker goroutines owned by the protocol to stop. + Wait() + + // Parse sets pkt.NetworkHeader and trims pkt.Data appropriately. It + // returns: + // - The encapsulated protocol, if present. + // - Whether there is an encapsulated transport protocol payload (e.g. ARP + // does not encapsulate anything). + // - Whether pkt.Data was large enough to parse and set pkt.NetworkHeader. + Parse(pkt *PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) +} + +// NetworkDispatcher contains the methods used by the network stack to deliver +// packets to the appropriate network endpoint after it has been handled by +// the data link layer. +type NetworkDispatcher interface { + // DeliverNetworkPacket finds the appropriate network protocol endpoint + // and hands the packet over for further processing. + // + // pkt.LinkHeader may or may not be set before calling + // DeliverNetworkPacket. Some packets do not have link headers (e.g. + // packets sent via loopback), and won't have the field set. + // + // DeliverNetworkPacket takes ownership of pkt. + DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) +} + +// LinkEndpointCapabilities is the type associated with the capabilities +// supported by a link-layer endpoint. It is a set of bitfields. +type LinkEndpointCapabilities uint + +// The following are the supported link endpoint capabilities. +const ( + CapabilityNone LinkEndpointCapabilities = 0 + // CapabilityTXChecksumOffload indicates that the link endpoint supports + // checksum computation for outgoing packets and the stack can skip + // computing checksums when sending packets. + CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota + // CapabilityRXChecksumOffload indicates that the link endpoint supports + // checksum verification on received packets and that it's safe for the + // stack to skip checksum verification. + CapabilityRXChecksumOffload + CapabilityResolutionRequired + CapabilitySaveRestore + CapabilityDisconnectOk + CapabilityLoopback + CapabilityHardwareGSO + + // CapabilitySoftwareGSO indicates the link endpoint supports of sending + // multiple packets using a single call (LinkEndpoint.WritePackets). + CapabilitySoftwareGSO +) + +// LinkEndpoint is the interface implemented by data link layer protocols (e.g., +// ethernet, loopback, raw) and used by network layer protocols to send packets +// out through the implementer's data link endpoint. When a link header exists, +// it sets each PacketBuffer's LinkHeader field before passing it up the +// stack. +type LinkEndpoint interface { + // MTU is the maximum transmission unit for this endpoint. This is + // usually dictated by the backing physical network; when such a + // physical network doesn't exist, the limit is generally 64k, which + // includes the maximum size of an IP packet. + MTU() uint32 + + // Capabilities returns the set of capabilities supported by the + // endpoint. + Capabilities() LinkEndpointCapabilities + + // MaxHeaderLength returns the maximum size the data link (and + // lower level layers combined) headers can have. Higher levels use this + // information to reserve space in the front of the packets they're + // building. + MaxHeaderLength() uint16 + + // LinkAddress returns the link address (typically a MAC) of the + // link endpoint. + LinkAddress() tcpip.LinkAddress + + // WritePacket writes a packet with the given protocol through the + // given route. It takes ownership of pkt. pkt.NetworkHeader and + // pkt.TransportHeader must have already been set. + // + // To participate in transparent bridging, a LinkEndpoint implementation + // should call eth.Encode with header.EthernetFields.SrcAddr set to + // r.LocalLinkAddress if it is provided. + WritePacket(r *Route, gso *GSO, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) *tcpip.Error + + // WritePackets writes packets with the given protocol through the + // given route. pkts must not be zero length. It takes ownership of pkts and + // underlying packets. + // + // Right now, WritePackets is used only when the software segmentation + // offload is enabled. If it will be used for something else, it may + // require to change syscall filters. + WritePackets(r *Route, gso *GSO, pkts PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) + + // WriteRawPacket writes a packet directly to the link. The packet + // should already have an ethernet header. It takes ownership of vv. + WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error + + // Attach attaches the data link layer endpoint to the network-layer + // dispatcher of the stack. + // + // Attach will be called with a nil dispatcher if the receiver's associated + // NIC is being removed. + Attach(dispatcher NetworkDispatcher) + + // IsAttached returns whether a NetworkDispatcher is attached to the + // endpoint. + IsAttached() bool + + // Wait waits for any worker goroutines owned by the endpoint to stop. + // + // For now, requesting that an endpoint's worker goroutine(s) stop is + // implementation specific. + // + // Wait will not block if the endpoint hasn't started any goroutines + // yet, even if it might later. + Wait() +} + +// InjectableLinkEndpoint is a LinkEndpoint where inbound packets are +// delivered via the Inject method. +type InjectableLinkEndpoint interface { + LinkEndpoint + + // InjectInbound injects an inbound packet. + InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) + + // InjectOutbound writes a fully formed outbound packet directly to the + // link. + // + // dest is used by endpoints with multiple raw destinations. + InjectOutbound(dest tcpip.Address, packet []byte) *tcpip.Error +} + +// A LinkAddressResolver is an extension to a NetworkProtocol that +// can resolve link addresses. +type LinkAddressResolver interface { + // LinkAddressRequest sends a request for the LinkAddress of addr. + // The request is sent on linkEP with localAddr as the source. + // + // A valid response will cause the discovery protocol's network + // endpoint to call AddLinkAddress. + LinkAddressRequest(addr, localAddr tcpip.Address, linkEP LinkEndpoint) *tcpip.Error + + // ResolveStaticAddress attempts to resolve address without sending + // requests. It either resolves the name immediately or returns the + // empty LinkAddress. + // + // It can be used to resolve broadcast addresses for example. + ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) + + // LinkAddressProtocol returns the network protocol of the + // addresses this this resolver can resolve. + LinkAddressProtocol() tcpip.NetworkProtocolNumber +} + +// A LinkAddressCache caches link addresses. +type LinkAddressCache interface { + // CheckLocalAddress determines if the given local address exists, and if it + // does not exist. + CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID + + // AddLinkAddress adds a link address to the cache. + AddLinkAddress(nicID tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress) + + // GetLinkAddress looks up the cache to translate address to link address (e.g. IP -> MAC). + // If the LinkEndpoint requests address resolution and there is a LinkAddressResolver + // registered with the network protocol, the cache attempts to resolve the address + // and returns ErrWouldBlock. Waker is notified when address resolution is + // complete (success or not). + // + // If address resolution is required, ErrNoLinkAddress and a notification channel is + // returned for the top level caller to block. Channel is closed once address resolution + // is complete (success or not). + GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, w *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) + + // RemoveWaker removes a waker that has been added in GetLinkAddress(). + RemoveWaker(nicID tcpip.NICID, addr tcpip.Address, waker *sleep.Waker) +} + +// RawFactory produces endpoints for writing various types of raw packets. +type RawFactory interface { + // NewUnassociatedEndpoint produces endpoints for writing packets not + // associated with a particular transport protocol. Such endpoints can + // be used to write arbitrary packets that include the network header. + NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) + + // NewPacketEndpoint produces endpoints for reading and writing packets + // that include network and (when cooked is false) link layer headers. + NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) +} + +// GSOType is the type of GSO segments. +// +// +stateify savable +type GSOType int + +// Types of gso segments. +const ( + GSONone GSOType = iota + + // Hardware GSO types: + GSOTCPv4 + GSOTCPv6 + + // GSOSW is used for software GSO segments which have to be sent by + // endpoint.WritePackets. + GSOSW +) + +// GSO contains generic segmentation offload properties. +// +// +stateify savable +type GSO struct { + // Type is one of GSONone, GSOTCPv4, etc. + Type GSOType + // NeedsCsum is set if the checksum offload is enabled. + NeedsCsum bool + // CsumOffset is offset after that to place checksum. + CsumOffset uint16 + + // Mss is maximum segment size. + MSS uint16 + // L3Len is L3 (IP) header length. + L3HdrLen uint16 + + // MaxSize is maximum GSO packet size. + MaxSize uint32 +} + +// GSOEndpoint provides access to GSO properties. +type GSOEndpoint interface { + // GSOMaxSize returns the maximum GSO packet size. + GSOMaxSize() uint32 +} + +// SoftwareGSOMaxSize is a maximum allowed size of a software GSO segment. +// This isn't a hard limit, because it is never set into packet headers. +const SoftwareGSOMaxSize = (1 << 16) |