summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/tcpip.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip/tcpip.go')
-rw-r--r--pkg/tcpip/tcpip.go499
1 files changed, 499 insertions, 0 deletions
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
new file mode 100644
index 000000000..f3a94f353
--- /dev/null
+++ b/pkg/tcpip/tcpip.go
@@ -0,0 +1,499 @@
+// Copyright 2016 The Netstack Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package tcpip provides the interfaces and related types that users of the
+// tcpip stack will use in order to create endpoints used to send and receive
+// data over the network stack.
+//
+// The starting point is the creation and configuration of a stack. A stack can
+// be created by calling the New() function of the tcpip/stack/stack package;
+// configuring a stack involves creating NICs (via calls to Stack.CreateNIC()),
+// adding network addresses (via calls to Stack.AddAddress()), and
+// setting a route table (via a call to Stack.SetRouteTable()).
+//
+// Once a stack is configured, endpoints can be created by calling
+// Stack.NewEndpoint(). Such endpoints can be used to send/receive data, connect
+// to peers, listen for connections, accept connections, etc., depending on the
+// transport protocol selected.
+package tcpip
+
+import (
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.googlesource.com/gvisor/pkg/waiter"
+)
+
+// Error represents an error in the netstack error space. Using a special type
+// ensures that errors outside of this space are not accidentally introduced.
+type Error struct {
+ string
+}
+
+// String implements fmt.Stringer.String.
+func (e *Error) String() string {
+ return e.string
+}
+
+// Errors that can be returned by the network stack.
+var (
+ ErrUnknownProtocol = &Error{"unknown protocol"}
+ ErrUnknownNICID = &Error{"unknown nic id"}
+ ErrUnknownProtocolOption = &Error{"unknown option for protocol"}
+ ErrDuplicateNICID = &Error{"duplicate nic id"}
+ ErrDuplicateAddress = &Error{"duplicate address"}
+ ErrNoRoute = &Error{"no route"}
+ ErrBadLinkEndpoint = &Error{"bad link layer endpoint"}
+ ErrAlreadyBound = &Error{"endpoint already bound"}
+ ErrInvalidEndpointState = &Error{"endpoint is in invalid state"}
+ ErrAlreadyConnecting = &Error{"endpoint is already connecting"}
+ ErrAlreadyConnected = &Error{"endpoint is already connected"}
+ ErrNoPortAvailable = &Error{"no ports are available"}
+ ErrPortInUse = &Error{"port is in use"}
+ ErrBadLocalAddress = &Error{"bad local address"}
+ ErrClosedForSend = &Error{"endpoint is closed for send"}
+ ErrClosedForReceive = &Error{"endpoint is closed for receive"}
+ ErrWouldBlock = &Error{"operation would block"}
+ ErrConnectionRefused = &Error{"connection was refused"}
+ ErrTimeout = &Error{"operation timed out"}
+ ErrAborted = &Error{"operation aborted"}
+ ErrConnectStarted = &Error{"connection attempt started"}
+ ErrDestinationRequired = &Error{"destination address is required"}
+ ErrNotSupported = &Error{"operation not supported"}
+ ErrQueueSizeNotSupported = &Error{"queue size querying not supported"}
+ ErrNotConnected = &Error{"endpoint not connected"}
+ ErrConnectionReset = &Error{"connection reset by peer"}
+ ErrConnectionAborted = &Error{"connection aborted"}
+ ErrNoSuchFile = &Error{"no such file"}
+ ErrInvalidOptionValue = &Error{"invalid option value specified"}
+ ErrNoLinkAddress = &Error{"no remote link address"}
+ ErrBadAddress = &Error{"bad address"}
+)
+
+// Errors related to Subnet
+var (
+ errSubnetLengthMismatch = errors.New("subnet length of address and mask differ")
+ errSubnetAddressMasked = errors.New("subnet address has bits set outside the mask")
+)
+
+// Address is a byte slice cast as a string that represents the address of a
+// network node. Or, in the case of unix endpoints, it may represent a path.
+type Address string
+
+// AddressMask is a bitmask for an address.
+type AddressMask string
+
+// Subnet is a subnet defined by its address and mask.
+type Subnet struct {
+ address Address
+ mask AddressMask
+}
+
+// NewSubnet creates a new Subnet, checking that the address and mask are the same length.
+func NewSubnet(a Address, m AddressMask) (Subnet, error) {
+ if len(a) != len(m) {
+ return Subnet{}, errSubnetLengthMismatch
+ }
+ for i := 0; i < len(a); i++ {
+ if a[i]&^m[i] != 0 {
+ return Subnet{}, errSubnetAddressMasked
+ }
+ }
+ return Subnet{a, m}, nil
+}
+
+// Contains returns true iff the address is of the same length and matches the
+// subnet address and mask.
+func (s *Subnet) Contains(a Address) bool {
+ if len(a) != len(s.address) {
+ return false
+ }
+ for i := 0; i < len(a); i++ {
+ if a[i]&s.mask[i] != s.address[i] {
+ return false
+ }
+ }
+ return true
+}
+
+// ID returns the subnet ID.
+func (s *Subnet) ID() Address {
+ return s.address
+}
+
+// Bits returns the number of ones (network bits) and zeros (host bits) in the
+// subnet mask.
+func (s *Subnet) Bits() (ones int, zeros int) {
+ for _, b := range []byte(s.mask) {
+ for i := uint(0); i < 8; i++ {
+ if b&(1<<i) == 0 {
+ zeros++
+ } else {
+ ones++
+ }
+ }
+ }
+ return
+}
+
+// Prefix returns the number of bits before the first host bit.
+func (s *Subnet) Prefix() int {
+ for i, b := range []byte(s.mask) {
+ for j := 7; j >= 0; j-- {
+ if b&(1<<uint(j)) == 0 {
+ return i*8 + 7 - j
+ }
+ }
+ }
+ return len(s.mask) * 8
+}
+
+// NICID is a number that uniquely identifies a NIC.
+type NICID int32
+
+// ShutdownFlags represents flags that can be passed to the Shutdown() method
+// of the Endpoint interface.
+type ShutdownFlags int
+
+// Values of the flags that can be passed to the Shutdown() method. They can
+// be OR'ed together.
+const (
+ ShutdownRead ShutdownFlags = 1 << iota
+ ShutdownWrite
+)
+
+// FullAddress represents a full transport node address, as required by the
+// Connect() and Bind() methods.
+type FullAddress struct {
+ // NIC is the ID of the NIC this address refers to.
+ //
+ // This may not be used by all endpoint types.
+ NIC NICID
+
+ // Addr is the network address.
+ Addr Address
+
+ // Port is the transport port.
+ //
+ // This may not be used by all endpoint types.
+ Port uint16
+}
+
+// Payload provides an interface around data that is being sent to an endpoint.
+// This allows the endpoint to request the amount of data it needs based on
+// internal buffers without exposing them. 'p.Get(p.Size())' reads all the data.
+type Payload interface {
+ // Get returns a slice containing exactly 'min(size, p.Size())' bytes.
+ Get(size int) ([]byte, *Error)
+
+ // Size returns the payload size.
+ Size() int
+}
+
+// SlicePayload implements Payload on top of slices for convenience.
+type SlicePayload []byte
+
+// Get implements Payload.
+func (s SlicePayload) Get(size int) ([]byte, *Error) {
+ if size > s.Size() {
+ size = s.Size()
+ }
+ return s[:size], nil
+}
+
+// Size implements Payload.
+func (s SlicePayload) Size() int {
+ return len(s)
+}
+
+// Endpoint is the interface implemented by transport protocols (e.g., tcp, udp)
+// that exposes functionality like read, write, connect, etc. to users of the
+// networking stack.
+type Endpoint interface {
+ // Close puts the endpoint in a closed state and frees all resources
+ // associated with it.
+ Close()
+
+ // Read reads data from the endpoint and optionally returns the sender.
+ // This method does not block if there is no data pending.
+ // It will also either return an error or data, never both.
+ Read(*FullAddress) (buffer.View, *Error)
+
+ // Write writes data to the endpoint's peer. This method does not block if
+ // the data cannot be written.
+ //
+ // Unlike io.Writer.Write, Endpoint.Write transfers ownership of any bytes
+ // successfully written to the Endpoint. That is, if a call to
+ // Write(SlicePayload{data}) returns (n, err), it may retain data[:n], and
+ // the caller should not use data[:n] after Write returns.
+ //
+ // Note that unlike io.Writer.Write, it is not an error for Write to
+ // perform a partial write.
+ Write(Payload, WriteOptions) (uintptr, *Error)
+
+ // Peek reads data without consuming it from the endpoint.
+ //
+ // This method does not block if there is no data pending.
+ Peek([][]byte) (uintptr, *Error)
+
+ // Connect connects the endpoint to its peer. Specifying a NIC is
+ // optional.
+ //
+ // There are three classes of return values:
+ // nil -- the attempt to connect succeeded.
+ // ErrConnectStarted/ErrAlreadyConnecting -- the connect attempt started
+ // but hasn't completed yet. In this case, the caller must call Connect
+ // or GetSockOpt(ErrorOption) when the endpoint becomes writable to
+ // get the actual result. The first call to Connect after the socket has
+ // connected returns nil. Calling connect again results in ErrAlreadyConnected.
+ // Anything else -- the attempt to connect failed.
+ Connect(address FullAddress) *Error
+
+ // Shutdown closes the read and/or write end of the endpoint connection
+ // to its peer.
+ Shutdown(flags ShutdownFlags) *Error
+
+ // Listen puts the endpoint in "listen" mode, which allows it to accept
+ // new connections.
+ Listen(backlog int) *Error
+
+ // Accept returns a new endpoint if a peer has established a connection
+ // to an endpoint previously set to listen mode. This method does not
+ // block if no new connections are available.
+ //
+ // The returned Queue is the wait queue for the newly created endpoint.
+ Accept() (Endpoint, *waiter.Queue, *Error)
+
+ // Bind binds the endpoint to a specific local address and port.
+ // Specifying a NIC is optional.
+ //
+ // An optional commit function will be executed atomically with respect
+ // to binding the endpoint. If this returns an error, the bind will not
+ // occur and the error will be propagated back to the caller.
+ Bind(address FullAddress, commit func() *Error) *Error
+
+ // GetLocalAddress returns the address to which the endpoint is bound.
+ GetLocalAddress() (FullAddress, *Error)
+
+ // GetRemoteAddress returns the address to which the endpoint is
+ // connected.
+ GetRemoteAddress() (FullAddress, *Error)
+
+ // Readiness returns the current readiness of the endpoint. For example,
+ // if waiter.EventIn is set, the endpoint is immediately readable.
+ Readiness(mask waiter.EventMask) waiter.EventMask
+
+ // SetSockOpt sets a socket option. opt should be one of the *Option types.
+ SetSockOpt(opt interface{}) *Error
+
+ // GetSockOpt gets a socket option. opt should be a pointer to one of the
+ // *Option types.
+ GetSockOpt(opt interface{}) *Error
+}
+
+// WriteOptions contains options for Endpoint.Write.
+type WriteOptions struct {
+ // If To is not nil, write to the given address instead of the endpoint's
+ // peer.
+ To *FullAddress
+
+ // More has the same semantics as Linux's MSG_MORE.
+ More bool
+
+ // EndOfRecord has the same semantics as Linux's MSG_EOR.
+ EndOfRecord bool
+}
+
+// ErrorOption is used in GetSockOpt to specify that the last error reported by
+// the endpoint should be cleared and returned.
+type ErrorOption struct{}
+
+// SendBufferSizeOption is used by SetSockOpt/GetSockOpt to specify the send
+// buffer size option.
+type SendBufferSizeOption int
+
+// ReceiveBufferSizeOption is used by SetSockOpt/GetSockOpt to specify the
+// receive buffer size option.
+type ReceiveBufferSizeOption int
+
+// SendQueueSizeOption is used in GetSockOpt to specify that the number of
+// unread bytes in the output buffer should be returned.
+type SendQueueSizeOption int
+
+// ReceiveQueueSizeOption is used in GetSockOpt to specify that the number of
+// unread bytes in the input buffer should be returned.
+type ReceiveQueueSizeOption int
+
+// V6OnlyOption is used by SetSockOpt/GetSockOpt to specify whether an IPv6
+// socket is to be restricted to sending and receiving IPv6 packets only.
+type V6OnlyOption int
+
+// NoDelayOption is used by SetSockOpt/GetSockOpt to specify if data should be
+// sent out immediately by the transport protocol. For TCP, it determines if the
+// Nagle algorithm is on or off.
+type NoDelayOption int
+
+// ReuseAddressOption is used by SetSockOpt/GetSockOpt to specify whether Bind()
+// should allow reuse of local address.
+type ReuseAddressOption int
+
+// PasscredOption is used by SetSockOpt/GetSockOpt to specify whether
+// SCM_CREDENTIALS socket control messages are enabled.
+//
+// Only supported on Unix sockets.
+type PasscredOption int
+
+// TCPInfoOption is used by GetSockOpt to expose TCP statistics.
+//
+// TODO: Add and populate stat fields.
+type TCPInfoOption struct{}
+
+// Route is a row in the routing table. It specifies through which NIC (and
+// gateway) sets of packets should be routed. A row is considered viable if the
+// masked target address matches the destination adddress in the row.
+type Route struct {
+ // Destination is the address that must be matched against the masked
+ // target address to check if this row is viable.
+ Destination Address
+
+ // Mask specifies which bits of the Destination and the target address
+ // must match for this row to be viable.
+ Mask Address
+
+ // Gateway is the gateway to be used if this row is viable.
+ Gateway Address
+
+ // NIC is the id of the nic to be used if this row is viable.
+ NIC NICID
+}
+
+// Match determines if r is viable for the given destination address.
+func (r *Route) Match(addr Address) bool {
+ if len(addr) != len(r.Destination) {
+ return false
+ }
+
+ for i := 0; i < len(r.Destination); i++ {
+ if (addr[i] & r.Mask[i]) != r.Destination[i] {
+ return false
+ }
+ }
+
+ return true
+}
+
+// LinkEndpointID represents a data link layer endpoint.
+type LinkEndpointID uint64
+
+// TransportProtocolNumber is the number of a transport protocol.
+type TransportProtocolNumber uint32
+
+// NetworkProtocolNumber is the number of a network protocol.
+type NetworkProtocolNumber uint32
+
+// Stats holds statistics about the networking stack.
+type Stats struct {
+ // UnknownProtocolRcvdPackets is the number of packets received by the
+ // stack that were for an unknown or unsupported protocol.
+ UnknownProtocolRcvdPackets uint64
+
+ // UnknownNetworkEndpointRcvdPackets is the number of packets received
+ // by the stack that were for a supported network protocol, but whose
+ // destination address didn't having a matching endpoint.
+ UnknownNetworkEndpointRcvdPackets uint64
+
+ // MalformedRcvPackets is the number of packets received by the stack
+ // that were deemed malformed.
+ MalformedRcvdPackets uint64
+
+ // DroppedPackets is the number of packets dropped due to full queues.
+ DroppedPackets uint64
+}
+
+// String implements the fmt.Stringer interface.
+func (a Address) String() string {
+ switch len(a) {
+ case 4:
+ return fmt.Sprintf("%d.%d.%d.%d", int(a[0]), int(a[1]), int(a[2]), int(a[3]))
+ default:
+ return fmt.Sprintf("%x", []byte(a))
+ }
+}
+
+// To4 converts the IPv4 address to a 4-byte representation.
+// If the address is not an IPv4 address, To4 returns "".
+func (a Address) To4() Address {
+ const (
+ ipv4len = 4
+ ipv6len = 16
+ )
+ if len(a) == ipv4len {
+ return a
+ }
+ if len(a) == ipv6len &&
+ isZeros(a[0:10]) &&
+ a[10] == 0xff &&
+ a[11] == 0xff {
+ return a[12:16]
+ }
+ return ""
+}
+
+// isZeros reports whether a is all zeros.
+func isZeros(a Address) bool {
+ for i := 0; i < len(a); i++ {
+ if a[i] != 0 {
+ return false
+ }
+ }
+ return true
+}
+
+// LinkAddress is a byte slice cast as a string that represents a link address.
+// It is typically a 6-byte MAC address.
+type LinkAddress string
+
+// String implements the fmt.Stringer interface.
+func (a LinkAddress) String() string {
+ switch len(a) {
+ case 6:
+ return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], a[4], a[5])
+ default:
+ return fmt.Sprintf("%x", []byte(a))
+ }
+}
+
+// ParseMACAddress parses an IEEE 802 address.
+//
+// It must be in the format aa:bb:cc:dd:ee:ff or aa-bb-cc-dd-ee-ff.
+func ParseMACAddress(s string) (LinkAddress, error) {
+ parts := strings.FieldsFunc(s, func(c rune) bool {
+ return c == ':' || c == '-'
+ })
+ if len(parts) != 6 {
+ return "", fmt.Errorf("inconsistent parts: %s", s)
+ }
+ addr := make([]byte, 0, len(parts))
+ for _, part := range parts {
+ u, err := strconv.ParseUint(part, 16, 8)
+ if err != nil {
+ return "", fmt.Errorf("invalid hex digits: %s", s)
+ }
+ addr = append(addr, byte(u))
+ }
+ return LinkAddress(addr), nil
+}
+
+// ProtocolAddress is an address and the network protocol it is associated
+// with.
+type ProtocolAddress struct {
+ // Protocol is the protocol of the address.
+ Protocol NetworkProtocolNumber
+
+ // Address is a network address.
+ Address Address
+}