// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package linux import ( "gvisor.dev/gvisor/pkg/binary" "gvisor.dev/gvisor/pkg/marshal" ) // Address families, from linux/socket.h. const ( AF_UNSPEC = 0 AF_UNIX = 1 AF_INET = 2 AF_AX25 = 3 AF_IPX = 4 AF_APPLETALK = 5 AF_NETROM = 6 AF_BRIDGE = 7 AF_ATMPVC = 8 AF_X25 = 9 AF_INET6 = 10 AF_ROSE = 11 AF_DECnet = 12 AF_NETBEUI = 13 AF_SECURITY = 14 AF_KEY = 15 AF_NETLINK = 16 AF_PACKET = 17 AF_ASH = 18 AF_ECONET = 19 AF_ATMSVC = 20 AF_RDS = 21 AF_SNA = 22 AF_IRDA = 23 AF_PPPOX = 24 AF_WANPIPE = 25 AF_LLC = 26 AF_IB = 27 AF_MPLS = 28 AF_CAN = 29 AF_TIPC = 30 AF_BLUETOOTH = 31 AF_IUCV = 32 AF_RXRPC = 33 AF_ISDN = 34 AF_PHONET = 35 AF_IEEE802154 = 36 AF_CAIF = 37 AF_ALG = 38 AF_NFC = 39 AF_VSOCK = 40 ) // sendmsg(2)/recvmsg(2) flags, from linux/socket.h. const ( MSG_OOB = 0x1 MSG_PEEK = 0x2 MSG_DONTROUTE = 0x4 MSG_TRYHARD = 0x4 MSG_CTRUNC = 0x8 MSG_PROBE = 0x10 MSG_TRUNC = 0x20 MSG_DONTWAIT = 0x40 MSG_EOR = 0x80 MSG_WAITALL = 0x100 MSG_FIN = 0x200 MSG_EOF = MSG_FIN MSG_SYN = 0x400 MSG_CONFIRM = 0x800 MSG_RST = 0x1000 MSG_ERRQUEUE = 0x2000 MSG_NOSIGNAL = 0x4000 MSG_MORE = 0x8000 MSG_WAITFORONE = 0x10000 MSG_SENDPAGE_NOTLAST = 0x20000 MSG_ZEROCOPY = 0x4000000 MSG_FASTOPEN = 0x20000000 MSG_CMSG_CLOEXEC = 0x40000000 ) // Set/get socket option levels, from socket.h. const ( SOL_IP = 0 SOL_SOCKET = 1 SOL_TCP = 6 SOL_UDP = 17 SOL_IPV6 = 41 SOL_ICMPV6 = 58 SOL_RAW = 255 SOL_PACKET = 263 SOL_NETLINK = 270 ) // A SockType is a type (as opposed to family) of sockets. These are enumerated // below as SOCK_* constants. type SockType int // Socket types, from linux/net.h. const ( SOCK_STREAM SockType = 1 SOCK_DGRAM SockType = 2 SOCK_RAW SockType = 3 SOCK_RDM SockType = 4 SOCK_SEQPACKET SockType = 5 SOCK_DCCP SockType = 6 SOCK_PACKET SockType = 10 ) // SOCK_TYPE_MASK covers all of the above socket types. The remaining bits are // flags. From linux/net.h. const SOCK_TYPE_MASK = 0xf // socket(2)/socketpair(2)/accept4(2) flags, from linux/net.h. const ( SOCK_CLOEXEC = O_CLOEXEC SOCK_NONBLOCK = O_NONBLOCK ) // shutdown(2) how commands, from <linux/net.h>. const ( SHUT_RD = 0 SHUT_WR = 1 SHUT_RDWR = 2 ) // Packet types from <linux/if_packet.h> const ( PACKET_HOST = 0 // To us PACKET_BROADCAST = 1 // To all PACKET_MULTICAST = 2 // To group PACKET_OTHERHOST = 3 // To someone else PACKET_OUTGOING = 4 // Outgoing of any type ) // Socket options from socket.h. const ( SO_DEBUG = 1 SO_REUSEADDR = 2 SO_TYPE = 3 SO_ERROR = 4 SO_DONTROUTE = 5 SO_BROADCAST = 6 SO_SNDBUF = 7 SO_RCVBUF = 8 SO_KEEPALIVE = 9 SO_OOBINLINE = 10 SO_NO_CHECK = 11 SO_PRIORITY = 12 SO_LINGER = 13 SO_BSDCOMPAT = 14 SO_REUSEPORT = 15 SO_PASSCRED = 16 SO_PEERCRED = 17 SO_RCVLOWAT = 18 SO_SNDLOWAT = 19 SO_RCVTIMEO = 20 SO_SNDTIMEO = 21 SO_BINDTODEVICE = 25 SO_ATTACH_FILTER = 26 SO_DETACH_FILTER = 27 SO_GET_FILTER = SO_ATTACH_FILTER SO_PEERNAME = 28 SO_TIMESTAMP = 29 SO_ACCEPTCONN = 30 SO_PEERSEC = 31 SO_SNDBUFFORCE = 32 SO_RCVBUFFORCE = 33 SO_PASSSEC = 34 SO_TIMESTAMPNS = 35 SO_MARK = 36 SO_TIMESTAMPING = 37 SO_PROTOCOL = 38 SO_DOMAIN = 39 SO_RXQ_OVFL = 40 SO_WIFI_STATUS = 41 SO_PEEK_OFF = 42 SO_NOFCS = 43 SO_LOCK_FILTER = 44 SO_SELECT_ERR_QUEUE = 45 SO_BUSY_POLL = 46 SO_MAX_PACING_RATE = 47 SO_BPF_EXTENSIONS = 48 SO_INCOMING_CPU = 49 SO_ATTACH_BPF = 50 SO_ATTACH_REUSEPORT_CBPF = 51 SO_ATTACH_REUSEPORT_EBPF = 52 SO_CNX_ADVICE = 53 SO_MEMINFO = 55 SO_INCOMING_NAPI_ID = 56 SO_COOKIE = 57 SO_PEERGROUPS = 59 SO_ZEROCOPY = 60 SO_TXTIME = 61 ) // enum socket_state, from uapi/linux/net.h. const ( SS_FREE = 0 // Not allocated. SS_UNCONNECTED = 1 // Unconnected to any socket. SS_CONNECTING = 2 // In process of connecting. SS_CONNECTED = 3 // Connected to socket. SS_DISCONNECTING = 4 // In process of disconnecting. ) // TCP protocol states, from include/net/tcp_states.h. const ( TCP_ESTABLISHED uint32 = iota + 1 TCP_SYN_SENT TCP_SYN_RECV TCP_FIN_WAIT1 TCP_FIN_WAIT2 TCP_TIME_WAIT TCP_CLOSE TCP_CLOSE_WAIT TCP_LAST_ACK TCP_LISTEN TCP_CLOSING TCP_NEW_SYN_RECV ) // SockAddrMax is the maximum size of a struct sockaddr, from // uapi/linux/socket.h. const SockAddrMax = 128 // InetAddr is struct in_addr, from uapi/linux/in.h. // // +marshal type InetAddr [4]byte // SockAddrInet is struct sockaddr_in, from uapi/linux/in.h. // // +marshal type SockAddrInet struct { Family uint16 Port uint16 Addr InetAddr _ [8]uint8 // pad to sizeof(struct sockaddr). } // Inet6MulticastRequest is struct ipv6_mreq, from uapi/linux/in6.h. type Inet6MulticastRequest struct { MulticastAddr Inet6Addr InterfaceIndex int32 } // InetMulticastRequest is struct ip_mreq, from uapi/linux/in.h. type InetMulticastRequest struct { MulticastAddr InetAddr InterfaceAddr InetAddr } // InetMulticastRequestWithNIC is struct ip_mreqn, from uapi/linux/in.h. type InetMulticastRequestWithNIC struct { InetMulticastRequest InterfaceIndex int32 } // Inet6Addr is struct in6_addr, from uapi/linux/in6.h. // // +marshal type Inet6Addr [16]byte // SockAddrInet6 is struct sockaddr_in6, from uapi/linux/in6.h. // // +marshal type SockAddrInet6 struct { Family uint16 Port uint16 Flowinfo uint32 Addr [16]byte Scope_id uint32 } // SockAddrLink is a struct sockaddr_ll, from uapi/linux/if_packet.h. // // +marshal type SockAddrLink struct { Family uint16 Protocol uint16 InterfaceIndex int32 ARPHardwareType uint16 PacketType byte HardwareAddrLen byte HardwareAddr [8]byte } // UnixPathMax is the maximum length of the path in an AF_UNIX socket. // // From uapi/linux/un.h. const UnixPathMax = 108 // SockAddrUnix is struct sockaddr_un, from uapi/linux/un.h. // // +marshal type SockAddrUnix struct { Family uint16 Path [UnixPathMax]int8 } // SockAddr represents a union of valid socket address types. This is logically // equivalent to struct sockaddr. SockAddr ensures that a well-defined set of // types can be used as socket addresses. type SockAddr interface { marshal.Marshallable // implementsSockAddr exists purely to allow a type to indicate that they // implement this interface. This method is a no-op and shouldn't be called. implementsSockAddr() } func (s *SockAddrInet) implementsSockAddr() {} func (s *SockAddrInet6) implementsSockAddr() {} func (s *SockAddrLink) implementsSockAddr() {} func (s *SockAddrUnix) implementsSockAddr() {} func (s *SockAddrNetlink) implementsSockAddr() {} // Linger is struct linger, from include/linux/socket.h. // // +marshal type Linger struct { OnOff int32 Linger int32 } // SizeOfLinger is the binary size of a Linger struct. const SizeOfLinger = 8 // TCPInfo is a collection of TCP statistics. // // From uapi/linux/tcp.h. Newer versions of Linux continue to add new fields to // the end of this struct or within existing unusued space, so its size grows // over time. The current iteration is based on linux v4.17. New versions are // always backwards compatible. // // +marshal type TCPInfo struct { // State is the state of the connection. State uint8 // CaState is the congestion control state. CaState uint8 // Retransmits is the number of retransmissions triggered by RTO. Retransmits uint8 // Probes is the number of unanswered zero window probes. Probes uint8 // BackOff indicates exponential backoff. Backoff uint8 // Options indicates the options enabled for the connection. Options uint8 // WindowScale is the combination of snd_wscale (first 4 bits) and // rcv_wscale (second 4 bits) WindowScale uint8 // DeliveryRateAppLimited is a boolean and only the first bit is // meaningful. DeliveryRateAppLimited uint8 // RTO is the retransmission timeout. RTO uint32 // ATO is the acknowledgement timeout interval. ATO uint32 // SndMss is the send maximum segment size. SndMss uint32 // RcvMss is the receive maximum segment size. RcvMss uint32 // Unacked is the number of packets sent but not acknowledged. Unacked uint32 // Sacked is the number of packets which are selectively acknowledged. Sacked uint32 // Lost is the number of packets marked as lost. Lost uint32 // Retrans is the number of retransmitted packets. Retrans uint32 // Fackets is not used and is always zero. Fackets uint32 // Times. LastDataSent uint32 LastAckSent uint32 LastDataRecv uint32 LastAckRecv uint32 // Metrics. PMTU uint32 RcvSsthresh uint32 RTT uint32 RTTVar uint32 SndSsthresh uint32 SndCwnd uint32 Advmss uint32 Reordering uint32 // RcvRTT is the receiver round trip time. RcvRTT uint32 // RcvSpace is the current buffer space available for receiving data. RcvSpace uint32 // TotalRetrans is the total number of retransmits seen since the start // of the connection. TotalRetrans uint32 // PacingRate is the pacing rate in bytes per second. PacingRate uint64 // MaxPacingRate is the maximum pacing rate. MaxPacingRate uint64 // BytesAcked is RFC4898 tcpEStatsAppHCThruOctetsAcked. BytesAcked uint64 // BytesReceived is RFC4898 tcpEStatsAppHCThruOctetsReceived. BytesReceived uint64 // SegsOut is RFC4898 tcpEStatsPerfSegsOut. SegsOut uint32 // SegsIn is RFC4898 tcpEStatsPerfSegsIn. SegsIn uint32 // NotSentBytes is the amount of bytes in the write queue that are not // yet sent. NotSentBytes uint32 // MinRTT is the minimum round trip time seen in the connection. MinRTT uint32 // DataSegsIn is RFC4898 tcpEStatsDataSegsIn. DataSegsIn uint32 // DataSegsOut is RFC4898 tcpEStatsDataSegsOut. DataSegsOut uint32 // DeliveryRate is the most recent delivery rate in bytes per second. DeliveryRate uint64 // BusyTime is the time in microseconds busy sending data. BusyTime uint64 // RwndLimited is the time in microseconds limited by receive window. RwndLimited uint64 // SndBufLimited is the time in microseconds limited by send buffer. SndBufLimited uint64 // Delivered is the total data packets delivered including retransmits. Delivered uint32 // DeliveredCE is the total ECE marked data packets delivered including // retransmits. DeliveredCE uint32 // BytesSent is RFC4898 tcpEStatsPerfHCDataOctetsOut. BytesSent uint64 // BytesRetrans is RFC4898 tcpEStatsPerfOctetsRetrans. BytesRetrans uint64 // DSACKDups is RFC4898 tcpEStatsStackDSACKDups. DSACKDups uint32 // ReordSeen is the number of reordering events seen since the start of // the connection. ReordSeen uint32 } // SizeOfTCPInfo is the binary size of a TCPInfo struct. var SizeOfTCPInfo = int(binary.Size(TCPInfo{})) // Control message types, from linux/socket.h. const ( SCM_CREDENTIALS = 0x2 SCM_RIGHTS = 0x1 ) // A ControlMessageHeader is the header for a socket control message. // // ControlMessageHeader represents struct cmsghdr from linux/socket.h. type ControlMessageHeader struct { Length uint64 Level int32 Type int32 } // SizeOfControlMessageHeader is the binary size of a ControlMessageHeader // struct. var SizeOfControlMessageHeader = int(binary.Size(ControlMessageHeader{})) // A ControlMessageCredentials is an SCM_CREDENTIALS socket control message. // // ControlMessageCredentials represents struct ucred from linux/socket.h. // // +marshal type ControlMessageCredentials struct { PID int32 UID uint32 GID uint32 } // A ControlMessageIPPacketInfo is IP_PKTINFO socket control message. // // ControlMessageIPPacketInfo represents struct in_pktinfo from linux/in.h. // // +stateify savable type ControlMessageIPPacketInfo struct { NIC int32 LocalAddr InetAddr DestinationAddr InetAddr } // SizeOfControlMessageCredentials is the binary size of a // ControlMessageCredentials struct. var SizeOfControlMessageCredentials = int(binary.Size(ControlMessageCredentials{})) // A ControlMessageRights is an SCM_RIGHTS socket control message. type ControlMessageRights []int32 // SizeOfControlMessageRight is the size of a single element in // ControlMessageRights. const SizeOfControlMessageRight = 4 // SizeOfControlMessageInq is the size of a TCP_INQ control message. const SizeOfControlMessageInq = 4 // SizeOfControlMessageTOS is the size of an IP_TOS control message. const SizeOfControlMessageTOS = 1 // SizeOfControlMessageTClass is the size of an IPV6_TCLASS control message. const SizeOfControlMessageTClass = 4 // SizeOfControlMessageIPPacketInfo is the size of an IP_PKTINFO // control message. const SizeOfControlMessageIPPacketInfo = 12 // SCM_MAX_FD is the maximum number of FDs accepted in a single sendmsg call. // From net/scm.h. const SCM_MAX_FD = 253 // SO_ACCEPTCON is defined as __SO_ACCEPTCON in // include/uapi/linux/net.h, which represents a listening socket // state. Note that this is distinct from SO_ACCEPTCONN, which is a // socket option for querying whether a socket is in a listening // state. const SO_ACCEPTCON = 1 << 16