diff options
Diffstat (limited to 'pkg/tcpip/tcpip.go')
-rw-r--r-- | pkg/tcpip/tcpip.go | 368 |
1 files changed, 315 insertions, 53 deletions
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 418e771d2..f62fd729f 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -57,6 +57,9 @@ type Error struct { // String implements fmt.Stringer.String. func (e *Error) String() string { + if e == nil { + return "<nil>" + } return e.msg } @@ -228,6 +231,13 @@ func (s *Subnet) Broadcast() Address { return Address(addr) } +// Equal returns true if s equals o. +// +// Needed to use cmp.Equal on Subnet as its fields are unexported. +func (s Subnet) Equal(o Subnet) bool { + return s == o +} + // NICID is a number that uniquely identifies a NIC. type NICID int32 @@ -252,7 +262,7 @@ type FullAddress struct { // This may not be used by all endpoint types. NIC NICID - // Addr is the network address. + // Addr is the network or link layer address. Addr Address // Port is the transport port. @@ -261,31 +271,34 @@ type FullAddress struct { Port uint16 } -// Payload provides an interface around data that is being sent to an endpoint. -// This allows the endpoint to request the amount of data it needs based on -// internal buffers without exposing them. 'p.Get(p.Size())' reads all the data. -type Payload interface { - // Get returns a slice containing exactly 'min(size, p.Size())' bytes. - Get(size int) ([]byte, *Error) - - // Size returns the payload size. - Size() int +// Payloader is an interface that provides data. +// +// This interface allows the endpoint to request the amount of data it needs +// based on internal buffers without exposing them. +type Payloader interface { + // FullPayload returns all available bytes. + FullPayload() ([]byte, *Error) + + // Payload returns a slice containing at most size bytes. + Payload(size int) ([]byte, *Error) } -// SlicePayload implements Payload on top of slices for convenience. +// SlicePayload implements Payloader for slices. +// +// This is typically used for tests. type SlicePayload []byte -// Get implements Payload. -func (s SlicePayload) Get(size int) ([]byte, *Error) { - if size > s.Size() { - size = s.Size() - } - return s[:size], nil +// FullPayload implements Payloader.FullPayload. +func (s SlicePayload) FullPayload() ([]byte, *Error) { + return s, nil } -// Size implements Payload. -func (s SlicePayload) Size() int { - return len(s) +// Payload implements Payloader.Payload. +func (s SlicePayload) Payload(size int) ([]byte, *Error) { + if size > len(s) { + size = len(s) + } + return s[:size], nil } // A ControlMessages contains socket control messages for IP sockets. @@ -295,7 +308,7 @@ type ControlMessages struct { // HasTimestamp indicates whether Timestamp is valid/set. HasTimestamp bool - // Timestamp is the time (in ns) that the last packed used to create + // Timestamp is the time (in ns) that the last packet used to create // the read data was received. Timestamp int64 @@ -304,6 +317,18 @@ type ControlMessages struct { // Inq is the number of bytes ready to be received. Inq int32 + + // HasTOS indicates whether Tos is valid/set. + HasTOS bool + + // TOS is the IPv4 type of service of the associated packet. + TOS int8 + + // HasTClass indicates whether Tclass is valid/set. + HasTClass bool + + // Tclass is the IPv6 traffic class of the associated packet. + TClass int32 } // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp) @@ -338,7 +363,7 @@ type Endpoint interface { // ErrNoLinkAddress and a notification channel is returned for the caller to // block. Channel is closed once address resolution is complete (success or // not). The channel is only non-nil in this case. - Write(Payload, WriteOptions) (int64, <-chan struct{}, *Error) + Write(Payloader, WriteOptions) (int64, <-chan struct{}, *Error) // Peek reads data without consuming it from the endpoint. // @@ -398,6 +423,10 @@ type Endpoint interface { // SetSockOpt sets a socket option. opt should be one of the *Option types. SetSockOpt(opt interface{}) *Error + // SetSockOptInt sets a socket option, for simple cases where a value + // has the int type. + SetSockOptInt(opt SockOpt, v int) *Error + // GetSockOpt gets a socket option. opt should be a pointer to one of the // *Option types. GetSockOpt(opt interface{}) *Error @@ -419,6 +448,26 @@ type Endpoint interface { // IPTables returns the iptables for this endpoint's stack. IPTables() (iptables.IPTables, error) + + // Info returns a copy to the transport endpoint info. + Info() EndpointInfo + + // Stats returns a reference to the endpoint stats. + Stats() EndpointStats +} + +// EndpointInfo is the interface implemented by each endpoint info struct. +type EndpointInfo interface { + // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo + // marker interface. + IsEndpointInfo() +} + +// EndpointStats is the interface implemented by each endpoint stats struct. +type EndpointStats interface { + // IsEndpointStats is an empty method to implement the tcpip.EndpointStats + // marker interface. + IsEndpointStats() } // WriteOptions contains options for Endpoint.Write. @@ -432,16 +481,38 @@ type WriteOptions struct { // EndOfRecord has the same semantics as Linux's MSG_EOR. EndOfRecord bool + + // Atomic means that all data fetched from Payloader must be written to the + // endpoint. If Atomic is false, then data fetched from the Payloader may be + // discarded if available endpoint buffer space is unsufficient. + Atomic bool } // SockOpt represents socket options which values have the int type. type SockOpt int const ( - // ReceiveQueueSizeOption is used in GetSockOpt to specify that the number of - // unread bytes in the input buffer should be returned. + // ReceiveQueueSizeOption is used in GetSockOptInt to specify that the + // number of unread bytes in the input buffer should be returned. ReceiveQueueSizeOption SockOpt = iota + // SendBufferSizeOption is used by SetSockOptInt/GetSockOptInt to + // specify the send buffer size option. + SendBufferSizeOption + + // ReceiveBufferSizeOption is used by SetSockOptInt/GetSockOptInt to + // specify the receive buffer size option. + ReceiveBufferSizeOption + + // SendQueueSizeOption is used in GetSockOptInt to specify that the + // number of unread bytes in the output buffer should be returned. + SendQueueSizeOption + + // DelayOption is used by SetSockOpt/GetSockOpt to specify if data + // should be sent out immediately by the transport protocol. For TCP, + // it determines if the Nagle algorithm is on or off. + DelayOption + // TODO(b/137664753): convert all int socket options to be handled via // GetSockOptInt. ) @@ -450,27 +521,10 @@ const ( // the endpoint should be cleared and returned. type ErrorOption struct{} -// SendBufferSizeOption is used by SetSockOpt/GetSockOpt to specify the send -// buffer size option. -type SendBufferSizeOption int - -// ReceiveBufferSizeOption is used by SetSockOpt/GetSockOpt to specify the -// receive buffer size option. -type ReceiveBufferSizeOption int - -// SendQueueSizeOption is used in GetSockOpt to specify that the number of -// unread bytes in the output buffer should be returned. -type SendQueueSizeOption int - // V6OnlyOption is used by SetSockOpt/GetSockOpt to specify whether an IPv6 // socket is to be restricted to sending and receiving IPv6 packets only. type V6OnlyOption int -// DelayOption is used by SetSockOpt/GetSockOpt to specify if data should be -// sent out immediately by the transport protocol. For TCP, it determines if the -// Nagle algorithm is on or off. -type DelayOption int - // CorkOption is used by SetSockOpt/GetSockOpt to specify if data should be // held until segments are full by the TCP transport protocol. type CorkOption int @@ -483,6 +537,10 @@ type ReuseAddressOption int // to be bound to an identical socket address. type ReusePortOption int +// BindToDeviceOption is used by SetSockOpt/GetSockOpt to specify that sockets +// should bind only on a specific NIC. +type BindToDeviceOption string + // QuickAckOption is stubbed out in SetSockOpt/GetSockOpt. type QuickAckOption int @@ -518,6 +576,11 @@ type KeepaliveIntervalOption time.Duration // closed. type KeepaliveCountOption int +// TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user +// specified timeout for a given TCP connection. +// See: RFC5482 for details. +type TCPUserTimeoutOption time.Duration + // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get // the current congestion control algorithm. type CongestionControlOption string @@ -534,6 +597,22 @@ type ModerateReceiveBufferOption bool // Maximum Segment Size(MSS) value as specified using the TCP_MAXSEG option. type MaxSegOption int +// TTLOption is used by SetSockOpt/GetSockOpt to control the default TTL/hop +// limit value for unicast messages. The default is protocol specific. +// +// A zero value indicates the default. +type TTLOption uint8 + +// TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the +// maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state +// before being marked closed. +type TCPLingerTimeoutOption time.Duration + +// TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the +// maximum duration for which a socket lingers in the TIME_WAIT state +// before being marked closed. +type TCPTimeWaitTimeoutOption time.Duration + // MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default // TTL value for multicast messages. The default is 1. type MulticastTTLOption uint8 @@ -575,6 +654,18 @@ type OutOfBandInlineOption int // datagram sockets are allowed to send packets to a broadcast address. type BroadcastOption int +// DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify +// a default TTL. +type DefaultTTLOption uint8 + +// IPv4TOSOption is used by SetSockOpt/GetSockOpt to specify TOS +// for all subsequent outgoing IPv4 packets from the endpoint. +type IPv4TOSOption uint8 + +// IPv6TrafficClassOption is used by SetSockOpt/GetSockOpt to specify TOS +// for all subsequent outgoing IPv6 packets from the endpoint. +type IPv6TrafficClassOption uint8 + // Route is a row in the routing table. It specifies through which NIC (and // gateway) sets of packets should be routed. A row is considered viable if the // masked target address matches the destination address in the row. @@ -600,9 +691,6 @@ func (r Route) String() string { return out.String() } -// LinkEndpointID represents a data link layer endpoint. -type LinkEndpointID uint64 - // TransportProtocolNumber is the number of a transport protocol. type TransportProtocolNumber uint32 @@ -619,6 +707,11 @@ func (s *StatCounter) Increment() { s.IncrementBy(1) } +// Decrement minuses one to the counter. +func (s *StatCounter) Decrement() { + s.IncrementBy(^uint64(0)) +} + // Value returns the current value of the counter. func (s *StatCounter) Value() uint64 { return atomic.LoadUint64(&s.count) @@ -807,6 +900,14 @@ type IPStats struct { // OutgoingPacketErrors is the total number of IP packets which failed // to write to a link-layer endpoint. OutgoingPacketErrors *StatCounter + + // MalformedPacketsReceived is the total number of IP Packets that were + // dropped due to the IP packet header failing validation checks. + MalformedPacketsReceived *StatCounter + + // MalformedFragmentsReceived is the total number of IP Fragments that were + // dropped due to the fragment failing validation checks. + MalformedFragmentsReceived *StatCounter } // TCPStats collects TCP-specific stats. @@ -819,6 +920,23 @@ type TCPStats struct { // successfully via Listen. PassiveConnectionOpenings *StatCounter + // CurrentEstablished is the number of TCP connections for which the + // current state is either ESTABLISHED or CLOSE-WAIT. + CurrentEstablished *StatCounter + + // EstablishedResets is the number of times TCP connections have made + // a direct transition to the CLOSED state from either the + // ESTABLISHED state or the CLOSE-WAIT state. + EstablishedResets *StatCounter + + // EstablishedClosed is the number of times established TCP connections + // made a transition to CLOSED state. + EstablishedClosed *StatCounter + + // EstablishedTimedout is the number of times an established connection + // was reset because of keep-alive time out. + EstablishedTimedout *StatCounter + // ListenOverflowSynDrop is the number of times the listen queue overflowed // and a SYN was dropped. ListenOverflowSynDrop *StatCounter @@ -853,6 +971,9 @@ type TCPStats struct { // SegmentsSent is the number of TCP segments sent. SegmentsSent *StatCounter + // SegmentSendErrors is the number of TCP segments failed to be sent. + SegmentSendErrors *StatCounter + // ResetsSent is the number of TCP resets sent. ResetsSent *StatCounter @@ -905,6 +1026,9 @@ type UDPStats struct { // PacketsSent is the number of UDP datagrams sent via sendUDP. PacketsSent *StatCounter + + // PacketSendErrors is the number of datagrams failed to be sent. + PacketSendErrors *StatCounter } // Stats holds statistics about the networking stack. @@ -915,7 +1039,7 @@ type Stats struct { // stack that were for an unknown or unsupported protocol. UnknownProtocolRcvdPackets *StatCounter - // MalformedRcvPackets is the number of packets received by the stack + // MalformedRcvdPackets is the number of packets received by the stack // that were deemed malformed. MalformedRcvdPackets *StatCounter @@ -935,18 +1059,95 @@ type Stats struct { UDP UDPStats } +// ReceiveErrors collects packet receive errors within transport endpoint. +type ReceiveErrors struct { + // ReceiveBufferOverflow is the number of received packets dropped + // due to the receive buffer being full. + ReceiveBufferOverflow StatCounter + + // MalformedPacketsReceived is the number of incoming packets + // dropped due to the packet header being in a malformed state. + MalformedPacketsReceived StatCounter + + // ClosedReceiver is the number of received packets dropped because + // of receiving endpoint state being closed. + ClosedReceiver StatCounter +} + +// SendErrors collects packet send errors within the transport layer for +// an endpoint. +type SendErrors struct { + // SendToNetworkFailed is the number of packets failed to be written to + // the network endpoint. + SendToNetworkFailed StatCounter + + // NoRoute is the number of times we failed to resolve IP route. + NoRoute StatCounter + + // NoLinkAddr is the number of times we failed to resolve ARP. + NoLinkAddr StatCounter +} + +// ReadErrors collects segment read errors from an endpoint read call. +type ReadErrors struct { + // ReadClosed is the number of received packet drops because the endpoint + // was shutdown for read. + ReadClosed StatCounter + + // InvalidEndpointState is the number of times we found the endpoint state + // to be unexpected. + InvalidEndpointState StatCounter +} + +// WriteErrors collects packet write errors from an endpoint write call. +type WriteErrors struct { + // WriteClosed is the number of packet drops because the endpoint + // was shutdown for write. + WriteClosed StatCounter + + // InvalidEndpointState is the number of times we found the endpoint state + // to be unexpected. + InvalidEndpointState StatCounter + + // InvalidArgs is the number of times invalid input arguments were + // provided for endpoint Write call. + InvalidArgs StatCounter +} + +// TransportEndpointStats collects statistics about the endpoint. +type TransportEndpointStats struct { + // PacketsReceived is the number of successful packet receives. + PacketsReceived StatCounter + + // PacketsSent is the number of successful packet sends. + PacketsSent StatCounter + + // ReceiveErrors collects packet receive errors within transport layer. + ReceiveErrors ReceiveErrors + + // ReadErrors collects packet read errors from an endpoint read call. + ReadErrors ReadErrors + + // SendErrors collects packet send errors within the transport layer. + SendErrors SendErrors + + // WriteErrors collects packet write errors from an endpoint write call. + WriteErrors WriteErrors +} + +// IsEndpointStats is an empty method to implement the tcpip.EndpointStats +// marker interface. +func (*TransportEndpointStats) IsEndpointStats() {} + func fillIn(v reflect.Value) { for i := 0; i < v.NumField(); i++ { v := v.Field(i) - switch v.Kind() { - case reflect.Ptr: - if s := v.Addr().Interface().(**StatCounter); *s == nil { - *s = &StatCounter{} + if s, ok := v.Addr().Interface().(**StatCounter); ok { + if *s == nil { + *s = new(StatCounter) } - case reflect.Struct: + } else { fillIn(v) - default: - panic(fmt.Sprintf("unexpected type %s", v.Type())) } } } @@ -957,6 +1158,26 @@ func (s Stats) FillIn() Stats { return s } +// Clone returns a copy of the TransportEndpointStats by atomically reading +// each field. +func (src *TransportEndpointStats) Clone() TransportEndpointStats { + var dst TransportEndpointStats + clone(reflect.ValueOf(&dst).Elem(), reflect.ValueOf(src).Elem()) + return dst +} + +func clone(dst reflect.Value, src reflect.Value) { + for i := 0; i < dst.NumField(); i++ { + d := dst.Field(i) + s := src.Field(i) + if c, ok := s.Addr().Interface().(*StatCounter); ok { + d.Addr().Interface().(*StatCounter).IncrementBy(c.Value()) + } else { + clone(d, s) + } + } +} + // String implements the fmt.Stringer interface. func (a Address) String() string { switch len(a) { @@ -1082,6 +1303,47 @@ func (a AddressWithPrefix) String() string { return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen) } +// Subnet converts the address and prefix into a Subnet value and returns it. +func (a AddressWithPrefix) Subnet() Subnet { + addrLen := len(a.Address) + if a.PrefixLen <= 0 { + return Subnet{ + address: Address(strings.Repeat("\x00", addrLen)), + mask: AddressMask(strings.Repeat("\x00", addrLen)), + } + } + if a.PrefixLen >= addrLen*8 { + return Subnet{ + address: a.Address, + mask: AddressMask(strings.Repeat("\xff", addrLen)), + } + } + + sa := make([]byte, addrLen) + sm := make([]byte, addrLen) + n := uint(a.PrefixLen) + for i := 0; i < addrLen; i++ { + if n >= 8 { + sa[i] = a.Address[i] + sm[i] = 0xff + n -= 8 + continue + } + sm[i] = ^byte(0xff >> n) + sa[i] = a.Address[i] & sm[i] + n = 0 + } + + // For extra caution, call NewSubnet rather than directly creating the Subnet + // value. If that fails it indicates a serious bug in this code, so panic is + // in order. + s, err := NewSubnet(Address(sa), AddressMask(sm)) + if err != nil { + panic("invalid subnet: " + err.Error()) + } + return s +} + // ProtocolAddress is an address and the network protocol it is associated // with. type ProtocolAddress struct { @@ -1102,8 +1364,8 @@ var ( // GetDanglingEndpoints returns all dangling endpoints. func GetDanglingEndpoints() []Endpoint { - es := make([]Endpoint, 0, len(danglingEndpoints)) danglingEndpointsMu.Lock() + es := make([]Endpoint, 0, len(danglingEndpoints)) for e := range danglingEndpoints { es = append(es, e) } |