diff options
Diffstat (limited to 'pkg/tcpip/stack/registration.go')
-rw-r--r-- | pkg/tcpip/stack/registration.go | 441 |
1 files changed, 441 insertions, 0 deletions
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go new file mode 100644 index 000000000..c70533a35 --- /dev/null +++ b/pkg/tcpip/stack/registration.go @@ -0,0 +1,441 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stack + +import ( + "sync" + + "gvisor.googlesource.com/gvisor/pkg/sleep" + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// NetworkEndpointID is the identifier of a network layer protocol endpoint. +// Currently the local address is sufficient because all supported protocols +// (i.e., IPv4 and IPv6) have different sizes for their addresses. +type NetworkEndpointID struct { + LocalAddress tcpip.Address +} + +// TransportEndpointID is the identifier of a transport layer protocol endpoint. +// +// +stateify savable +type TransportEndpointID struct { + // LocalPort is the local port associated with the endpoint. + LocalPort uint16 + + // LocalAddress is the local [network layer] address associated with + // the endpoint. + LocalAddress tcpip.Address + + // RemotePort is the remote port associated with the endpoint. + RemotePort uint16 + + // RemoteAddress it the remote [network layer] address associated with + // the endpoint. + RemoteAddress tcpip.Address +} + +// ControlType is the type of network control message. +type ControlType int + +// The following are the allowed values for ControlType values. +const ( + ControlPacketTooBig ControlType = iota + ControlPortUnreachable + ControlUnknown +) + +// TransportEndpoint is the interface that needs to be implemented by transport +// protocol (e.g., tcp, udp) endpoints that can handle packets. +type TransportEndpoint interface { + // HandlePacket is called by the stack when new packets arrive to + // this transport endpoint. + HandlePacket(r *Route, id TransportEndpointID, vv buffer.VectorisedView) + + // HandleControlPacket is called by the stack when new control (e.g., + // ICMP) packets arrive to this transport endpoint. + HandleControlPacket(id TransportEndpointID, typ ControlType, extra uint32, vv buffer.VectorisedView) +} + +// RawTransportEndpoint is the interface that needs to be implemented by raw +// transport protocol endpoints. RawTransportEndpoints receive the entire +// packet - including the link, network, and transport headers - as delivered +// to netstack. +type RawTransportEndpoint interface { + // HandlePacket is called by the stack when new packets arrive to + // this transport endpoint. The packet contains all data from the link + // layer up. + HandlePacket(r *Route, netHeader buffer.View, packet buffer.VectorisedView) +} + +// TransportProtocol is the interface that needs to be implemented by transport +// protocols (e.g., tcp, udp) that want to be part of the networking stack. +type TransportProtocol interface { + // Number returns the transport protocol number. + Number() tcpip.TransportProtocolNumber + + // NewEndpoint creates a new endpoint of the transport protocol. + NewEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) + + // NewRawEndpoint creates a new raw endpoint of the transport protocol. + NewRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) + + // MinimumPacketSize returns the minimum valid packet size of this + // transport protocol. The stack automatically drops any packets smaller + // than this targeted at this protocol. + MinimumPacketSize() int + + // ParsePorts returns the source and destination ports stored in a + // packet of this protocol. + ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) + + // HandleUnknownDestinationPacket handles packets targeted at this + // protocol but that don't match any existing endpoint. For example, + // it is targeted at a port that have no listeners. + // + // The return value indicates whether the packet was well-formed (for + // stats purposes only). + HandleUnknownDestinationPacket(r *Route, id TransportEndpointID, vv buffer.VectorisedView) bool + + // SetOption allows enabling/disabling protocol specific features. + // SetOption returns an error if the option is not supported or the + // provided option value is invalid. + SetOption(option interface{}) *tcpip.Error + + // Option allows retrieving protocol specific option values. + // Option returns an error if the option is not supported or the + // provided option value is invalid. + Option(option interface{}) *tcpip.Error +} + +// TransportDispatcher contains the methods used by the network stack to deliver +// packets to the appropriate transport endpoint after it has been handled by +// the network layer. +type TransportDispatcher interface { + // DeliverTransportPacket delivers packets to the appropriate + // transport protocol endpoint. It also returns the network layer + // header for the enpoint to inspect or pass up the stack. + DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, netHeader buffer.View, vv buffer.VectorisedView) + + // DeliverTransportControlPacket delivers control packets to the + // appropriate transport protocol endpoint. + DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, vv buffer.VectorisedView) +} + +// PacketLooping specifies where an outbound packet should be sent. +type PacketLooping byte + +const ( + // PacketOut indicates that the packet should be passed to the link + // endpoint. + PacketOut PacketLooping = 1 << iota + + // PacketLoop indicates that the packet should be handled locally. + PacketLoop +) + +// NetworkEndpoint is the interface that needs to be implemented by endpoints +// of network layer protocols (e.g., ipv4, ipv6). +type NetworkEndpoint interface { + // DefaultTTL is the default time-to-live value (or hop limit, in ipv6) + // for this endpoint. + DefaultTTL() uint8 + + // MTU is the maximum transmission unit for this endpoint. This is + // generally calculated as the MTU of the underlying data link endpoint + // minus the network endpoint max header length. + MTU() uint32 + + // Capabilities returns the set of capabilities supported by the + // underlying link-layer endpoint. + Capabilities() LinkEndpointCapabilities + + // MaxHeaderLength returns the maximum size the network (and lower + // level layers combined) headers can have. Higher levels use this + // information to reserve space in the front of the packets they're + // building. + MaxHeaderLength() uint16 + + // WritePacket writes a packet to the given destination address and + // protocol. + WritePacket(r *Route, gso *GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.TransportProtocolNumber, ttl uint8, loop PacketLooping) *tcpip.Error + + // ID returns the network protocol endpoint ID. + ID() *NetworkEndpointID + + // NICID returns the id of the NIC this endpoint belongs to. + NICID() tcpip.NICID + + // HandlePacket is called by the link layer when new packets arrive to + // this network endpoint. + HandlePacket(r *Route, vv buffer.VectorisedView) + + // Close is called when the endpoint is reomved from a stack. + Close() +} + +// NetworkProtocol is the interface that needs to be implemented by network +// protocols (e.g., ipv4, ipv6) that want to be part of the networking stack. +type NetworkProtocol interface { + // Number returns the network protocol number. + Number() tcpip.NetworkProtocolNumber + + // MinimumPacketSize returns the minimum valid packet size of this + // network protocol. The stack automatically drops any packets smaller + // than this targeted at this protocol. + MinimumPacketSize() int + + // ParsePorts returns the source and destination addresses stored in a + // packet of this protocol. + ParseAddresses(v buffer.View) (src, dst tcpip.Address) + + // NewEndpoint creates a new endpoint of this protocol. + NewEndpoint(nicid tcpip.NICID, addr tcpip.Address, linkAddrCache LinkAddressCache, dispatcher TransportDispatcher, sender LinkEndpoint) (NetworkEndpoint, *tcpip.Error) + + // SetOption allows enabling/disabling protocol specific features. + // SetOption returns an error if the option is not supported or the + // provided option value is invalid. + SetOption(option interface{}) *tcpip.Error + + // Option allows retrieving protocol specific option values. + // Option returns an error if the option is not supported or the + // provided option value is invalid. + Option(option interface{}) *tcpip.Error +} + +// NetworkDispatcher contains the methods used by the network stack to deliver +// packets to the appropriate network endpoint after it has been handled by +// the data link layer. +type NetworkDispatcher interface { + // DeliverNetworkPacket finds the appropriate network protocol + // endpoint and hands the packet over for further processing. + DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) +} + +// LinkEndpointCapabilities is the type associated with the capabilities +// supported by a link-layer endpoint. It is a set of bitfields. +type LinkEndpointCapabilities uint + +// The following are the supported link endpoint capabilities. +const ( + CapabilityNone LinkEndpointCapabilities = 0 + // CapabilityTXChecksumOffload indicates that the link endpoint supports + // checksum computation for outgoing packets and the stack can skip + // computing checksums when sending packets. + CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota + // CapabilityRXChecksumOffload indicates that the link endpoint supports + // checksum verification on received packets and that it's safe for the + // stack to skip checksum verification. + CapabilityRXChecksumOffload + CapabilityResolutionRequired + CapabilitySaveRestore + CapabilityDisconnectOk + CapabilityLoopback + CapabilityGSO +) + +// LinkEndpoint is the interface implemented by data link layer protocols (e.g., +// ethernet, loopback, raw) and used by network layer protocols to send packets +// out through the implementer's data link endpoint. +type LinkEndpoint interface { + // MTU is the maximum transmission unit for this endpoint. This is + // usually dictated by the backing physical network; when such a + // physical network doesn't exist, the limit is generally 64k, which + // includes the maximum size of an IP packet. + MTU() uint32 + + // Capabilities returns the set of capabilities supported by the + // endpoint. + Capabilities() LinkEndpointCapabilities + + // MaxHeaderLength returns the maximum size the data link (and + // lower level layers combined) headers can have. Higher levels use this + // information to reserve space in the front of the packets they're + // building. + MaxHeaderLength() uint16 + + // LinkAddress returns the link address (typically a MAC) of the + // link endpoint. + LinkAddress() tcpip.LinkAddress + + // WritePacket writes a packet with the given protocol through the given + // route. + // + // To participate in transparent bridging, a LinkEndpoint implementation + // should call eth.Encode with header.EthernetFields.SrcAddr set to + // r.LocalLinkAddress if it is provided. + WritePacket(r *Route, gso *GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error + + // Attach attaches the data link layer endpoint to the network-layer + // dispatcher of the stack. + Attach(dispatcher NetworkDispatcher) + + // IsAttached returns whether a NetworkDispatcher is attached to the + // endpoint. + IsAttached() bool +} + +// InjectableLinkEndpoint is a LinkEndpoint where inbound packets are +// delivered via the Inject method. +type InjectableLinkEndpoint interface { + LinkEndpoint + + // Inject injects an inbound packet. + Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) + + // WriteRawPacket writes a fully formed outbound packet directly to the link. + // + // dest is used by endpoints with multiple raw destinations. + WriteRawPacket(dest tcpip.Address, packet []byte) *tcpip.Error +} + +// A LinkAddressResolver is an extension to a NetworkProtocol that +// can resolve link addresses. +type LinkAddressResolver interface { + // LinkAddressRequest sends a request for the LinkAddress of addr. + // The request is sent on linkEP with localAddr as the source. + // + // A valid response will cause the discovery protocol's network + // endpoint to call AddLinkAddress. + LinkAddressRequest(addr, localAddr tcpip.Address, linkEP LinkEndpoint) *tcpip.Error + + // ResolveStaticAddress attempts to resolve address without sending + // requests. It either resolves the name immediately or returns the + // empty LinkAddress. + // + // It can be used to resolve broadcast addresses for example. + ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool) + + // LinkAddressProtocol returns the network protocol of the + // addresses this this resolver can resolve. + LinkAddressProtocol() tcpip.NetworkProtocolNumber +} + +// A LinkAddressCache caches link addresses. +type LinkAddressCache interface { + // CheckLocalAddress determines if the given local address exists, and if it + // does not exist. + CheckLocalAddress(nicid tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID + + // AddLinkAddress adds a link address to the cache. + AddLinkAddress(nicid tcpip.NICID, addr tcpip.Address, linkAddr tcpip.LinkAddress) + + // GetLinkAddress looks up the cache to translate address to link address (e.g. IP -> MAC). + // If the LinkEndpoint requests address resolution and there is a LinkAddressResolver + // registered with the network protocol, the cache attempts to resolve the address + // and returns ErrWouldBlock. Waker is notified when address resolution is + // complete (success or not). + // + // If address resolution is required, ErrNoLinkAddress and a notification channel is + // returned for the top level caller to block. Channel is closed once address resolution + // is complete (success or not). + GetLinkAddress(nicid tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, w *sleep.Waker) (tcpip.LinkAddress, <-chan struct{}, *tcpip.Error) + + // RemoveWaker removes a waker that has been added in GetLinkAddress(). + RemoveWaker(nicid tcpip.NICID, addr tcpip.Address, waker *sleep.Waker) +} + +// TransportProtocolFactory functions are used by the stack to instantiate +// transport protocols. +type TransportProtocolFactory func() TransportProtocol + +// NetworkProtocolFactory provides methods to be used by the stack to +// instantiate network protocols. +type NetworkProtocolFactory func() NetworkProtocol + +var ( + transportProtocols = make(map[string]TransportProtocolFactory) + networkProtocols = make(map[string]NetworkProtocolFactory) + + linkEPMu sync.RWMutex + nextLinkEndpointID tcpip.LinkEndpointID = 1 + linkEndpoints = make(map[tcpip.LinkEndpointID]LinkEndpoint) +) + +// RegisterTransportProtocolFactory registers a new transport protocol factory +// with the stack so that it becomes available to users of the stack. This +// function is intended to be called by init() functions of the protocols. +func RegisterTransportProtocolFactory(name string, p TransportProtocolFactory) { + transportProtocols[name] = p +} + +// RegisterNetworkProtocolFactory registers a new network protocol factory with +// the stack so that it becomes available to users of the stack. This function +// is intended to be called by init() functions of the protocols. +func RegisterNetworkProtocolFactory(name string, p NetworkProtocolFactory) { + networkProtocols[name] = p +} + +// RegisterLinkEndpoint register a link-layer protocol endpoint and returns an +// ID that can be used to refer to it. +func RegisterLinkEndpoint(linkEP LinkEndpoint) tcpip.LinkEndpointID { + linkEPMu.Lock() + defer linkEPMu.Unlock() + + v := nextLinkEndpointID + nextLinkEndpointID++ + + linkEndpoints[v] = linkEP + + return v +} + +// FindLinkEndpoint finds the link endpoint associated with the given ID. +func FindLinkEndpoint(id tcpip.LinkEndpointID) LinkEndpoint { + linkEPMu.RLock() + defer linkEPMu.RUnlock() + + return linkEndpoints[id] +} + +// GSOType is the type of GSO segments. +// +// +stateify savable +type GSOType int + +// Types of gso segments. +const ( + GSONone GSOType = iota + GSOTCPv4 + GSOTCPv6 +) + +// GSO contains generic segmentation offload properties. +// +// +stateify savable +type GSO struct { + // Type is one of GSONone, GSOTCPv4, etc. + Type GSOType + // NeedsCsum is set if the checksum offload is enabled. + NeedsCsum bool + // CsumOffset is offset after that to place checksum. + CsumOffset uint16 + + // Mss is maximum segment size. + MSS uint16 + // L3Len is L3 (IP) header length. + L3HdrLen uint16 + + // MaxSize is maximum GSO packet size. + MaxSize uint32 +} + +// GSOEndpoint provides access to GSO properties. +type GSOEndpoint interface { + // GSOMaxSize returns the maximum GSO packet size. + GSOMaxSize() uint32 +} |