summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/socket/epsocket/epsocket.go
diff options
context:
space:
mode:
authorKevin Krakauer <krakauer@google.com>2019-10-07 13:39:18 -0700
committergVisor bot <gvisor-bot@google.com>2019-10-07 13:57:59 -0700
commit6a9823794975d2401ae1bda3937a63de959192ab (patch)
treec18b9c024fd3a7b07581c4e307410ba5d2b36041 /pkg/sentry/socket/epsocket/epsocket.go
parent8fce24d33a62d9a8fd121bc31046ee8b29443016 (diff)
Rename epsocket to netstack.
PiperOrigin-RevId: 273365058
Diffstat (limited to 'pkg/sentry/socket/epsocket/epsocket.go')
-rw-r--r--pkg/sentry/socket/epsocket/epsocket.go2637
1 files changed, 0 insertions, 2637 deletions
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go
deleted file mode 100644
index 5812085fa..000000000
--- a/pkg/sentry/socket/epsocket/epsocket.go
+++ /dev/null
@@ -1,2637 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package epsocket provides an implementation of the socket.Socket interface
-// that is backed by a tcpip.Endpoint.
-//
-// It does not depend on any particular endpoint implementation, and thus can
-// be used to expose certain endpoints to the sentry while leaving others out,
-// for example, TCP endpoints and Unix-domain endpoints.
-//
-// Lock ordering: netstack => mm: ioSequencePayload copies user memory inside
-// tcpip.Endpoint.Write(). Netstack is allowed to (and does) hold locks during
-// this operation.
-package epsocket
-
-import (
- "bytes"
- "io"
- "math"
- "reflect"
- "sync"
- "syscall"
- "time"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/binary"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/metric"
- "gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
- "gvisor.dev/gvisor/pkg/sentry/inet"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/sentry/safemem"
- "gvisor.dev/gvisor/pkg/sentry/socket"
- "gvisor.dev/gvisor/pkg/sentry/socket/netfilter"
- "gvisor.dev/gvisor/pkg/sentry/unimpl"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
- "gvisor.dev/gvisor/pkg/syserr"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/tcpip"
- "gvisor.dev/gvisor/pkg/tcpip/buffer"
- "gvisor.dev/gvisor/pkg/tcpip/stack"
- "gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
- "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
- "gvisor.dev/gvisor/pkg/waiter"
-)
-
-func mustCreateMetric(name, description string) *tcpip.StatCounter {
- var cm tcpip.StatCounter
- metric.MustRegisterCustomUint64Metric(name, false /* sync */, description, cm.Value)
- return &cm
-}
-
-// Metrics contains metrics exported by netstack.
-var Metrics = tcpip.Stats{
- UnknownProtocolRcvdPackets: mustCreateMetric("/netstack/unknown_protocol_received_packets", "Number of packets received by netstack that were for an unknown or unsupported protocol."),
- MalformedRcvdPackets: mustCreateMetric("/netstack/malformed_received_packets", "Number of packets received by netstack that were deemed malformed."),
- DroppedPackets: mustCreateMetric("/netstack/dropped_packets", "Number of packets dropped by netstack due to full queues."),
- ICMP: tcpip.ICMPStats{
- V4PacketsSent: tcpip.ICMPv4SentPacketStats{
- ICMPv4PacketStats: tcpip.ICMPv4PacketStats{
- Echo: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo", "Total number of ICMPv4 echo packets sent by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/echo_reply", "Total number of ICMPv4 echo reply packets sent by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_sent/dst_unreachable", "Total number of ICMPv4 destination unreachable packets sent by netstack."),
- SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_sent/src_quench", "Total number of ICMPv4 source quench packets sent by netstack."),
- Redirect: mustCreateMetric("/netstack/icmp/v4/packets_sent/redirect", "Total number of ICMPv4 redirect packets sent by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_sent/time_exceeded", "Total number of ICMPv4 time exceeded packets sent by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_sent/param_problem", "Total number of ICMPv4 parameter problem packets sent by netstack."),
- Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp", "Total number of ICMPv4 timestamp packets sent by netstack."),
- TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/timestamp_reply", "Total number of ICMPv4 timestamp reply packets sent by netstack."),
- InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_request", "Total number of ICMPv4 information request packets sent by netstack."),
- InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_sent/info_reply", "Total number of ICMPv4 information reply packets sent by netstack."),
- },
- Dropped: mustCreateMetric("/netstack/icmp/v4/packets_sent/dropped", "Total number of ICMPv4 packets dropped by netstack due to link layer errors."),
- },
- V4PacketsReceived: tcpip.ICMPv4ReceivedPacketStats{
- ICMPv4PacketStats: tcpip.ICMPv4PacketStats{
- Echo: mustCreateMetric("/netstack/icmp/v4/packets_received/echo", "Total number of ICMPv4 echo packets received by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/echo_reply", "Total number of ICMPv4 echo reply packets received by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v4/packets_received/dst_unreachable", "Total number of ICMPv4 destination unreachable packets received by netstack."),
- SrcQuench: mustCreateMetric("/netstack/icmp/v4/packets_received/src_quench", "Total number of ICMPv4 source quench packets received by netstack."),
- Redirect: mustCreateMetric("/netstack/icmp/v4/packets_received/redirect", "Total number of ICMPv4 redirect packets received by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v4/packets_received/time_exceeded", "Total number of ICMPv4 time exceeded packets received by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v4/packets_received/param_problem", "Total number of ICMPv4 parameter problem packets received by netstack."),
- Timestamp: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp", "Total number of ICMPv4 timestamp packets received by netstack."),
- TimestampReply: mustCreateMetric("/netstack/icmp/v4/packets_received/timestamp_reply", "Total number of ICMPv4 timestamp reply packets received by netstack."),
- InfoRequest: mustCreateMetric("/netstack/icmp/v4/packets_received/info_request", "Total number of ICMPv4 information request packets received by netstack."),
- InfoReply: mustCreateMetric("/netstack/icmp/v4/packets_received/info_reply", "Total number of ICMPv4 information reply packets received by netstack."),
- },
- Invalid: mustCreateMetric("/netstack/icmp/v4/packets_received/invalid", "Total number of ICMPv4 packets received that the transport layer could not parse."),
- },
- V6PacketsSent: tcpip.ICMPv6SentPacketStats{
- ICMPv6PacketStats: tcpip.ICMPv6PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_request", "Total number of ICMPv6 echo request packets sent by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_sent/echo_reply", "Total number of ICMPv6 echo reply packets sent by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_sent/dst_unreachable", "Total number of ICMPv6 destination unreachable packets sent by netstack."),
- PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_sent/packet_too_big", "Total number of ICMPv6 packet too big packets sent by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_sent/time_exceeded", "Total number of ICMPv6 time exceeded packets sent by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_sent/param_problem", "Total number of ICMPv6 parameter problem packets sent by netstack."),
- RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_solicit", "Total number of ICMPv6 router solicit packets sent by netstack."),
- RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/router_advert", "Total number of ICMPv6 router advert packets sent by netstack."),
- NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_solicit", "Total number of ICMPv6 neighbor solicit packets sent by netstack."),
- NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_sent/neighbor_advert", "Total number of ICMPv6 neighbor advert packets sent by netstack."),
- RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_sent/redirect_msg", "Total number of ICMPv6 redirect message packets sent by netstack."),
- },
- Dropped: mustCreateMetric("/netstack/icmp/v6/packets_sent/dropped", "Total number of ICMPv6 packets dropped by netstack due to link layer errors."),
- },
- V6PacketsReceived: tcpip.ICMPv6ReceivedPacketStats{
- ICMPv6PacketStats: tcpip.ICMPv6PacketStats{
- EchoRequest: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_request", "Total number of ICMPv6 echo request packets received by netstack."),
- EchoReply: mustCreateMetric("/netstack/icmp/v6/packets_received/echo_reply", "Total number of ICMPv6 echo reply packets received by netstack."),
- DstUnreachable: mustCreateMetric("/netstack/icmp/v6/packets_received/dst_unreachable", "Total number of ICMPv6 destination unreachable packets received by netstack."),
- PacketTooBig: mustCreateMetric("/netstack/icmp/v6/packets_received/packet_too_big", "Total number of ICMPv6 packet too big packets received by netstack."),
- TimeExceeded: mustCreateMetric("/netstack/icmp/v6/packets_received/time_exceeded", "Total number of ICMPv6 time exceeded packets received by netstack."),
- ParamProblem: mustCreateMetric("/netstack/icmp/v6/packets_received/param_problem", "Total number of ICMPv6 parameter problem packets received by netstack."),
- RouterSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/router_solicit", "Total number of ICMPv6 router solicit packets received by netstack."),
- RouterAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/router_advert", "Total number of ICMPv6 router advert packets received by netstack."),
- NeighborSolicit: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_solicit", "Total number of ICMPv6 neighbor solicit packets received by netstack."),
- NeighborAdvert: mustCreateMetric("/netstack/icmp/v6/packets_received/neighbor_advert", "Total number of ICMPv6 neighbor advert packets received by netstack."),
- RedirectMsg: mustCreateMetric("/netstack/icmp/v6/packets_received/redirect_msg", "Total number of ICMPv6 redirect message packets received by netstack."),
- },
- Invalid: mustCreateMetric("/netstack/icmp/v6/packets_received/invalid", "Total number of ICMPv6 packets received that the transport layer could not parse."),
- },
- },
- IP: tcpip.IPStats{
- PacketsReceived: mustCreateMetric("/netstack/ip/packets_received", "Total number of IP packets received from the link layer in nic.DeliverNetworkPacket."),
- InvalidAddressesReceived: mustCreateMetric("/netstack/ip/invalid_addresses_received", "Total number of IP packets received with an unknown or invalid destination address."),
- PacketsDelivered: mustCreateMetric("/netstack/ip/packets_delivered", "Total number of incoming IP packets that are successfully delivered to the transport layer via HandlePacket."),
- PacketsSent: mustCreateMetric("/netstack/ip/packets_sent", "Total number of IP packets sent via WritePacket."),
- OutgoingPacketErrors: mustCreateMetric("/netstack/ip/outgoing_packet_errors", "Total number of IP packets which failed to write to a link-layer endpoint."),
- },
- TCP: tcpip.TCPStats{
- ActiveConnectionOpenings: mustCreateMetric("/netstack/tcp/active_connection_openings", "Number of connections opened successfully via Connect."),
- PassiveConnectionOpenings: mustCreateMetric("/netstack/tcp/passive_connection_openings", "Number of connections opened successfully via Listen."),
- ListenOverflowSynDrop: mustCreateMetric("/netstack/tcp/listen_overflow_syn_drop", "Number of times the listen queue overflowed and a SYN was dropped."),
- ListenOverflowAckDrop: mustCreateMetric("/netstack/tcp/listen_overflow_ack_drop", "Number of times the listen queue overflowed and the final ACK in the handshake was dropped."),
- ListenOverflowSynCookieSent: mustCreateMetric("/netstack/tcp/listen_overflow_syn_cookie_sent", "Number of times a SYN cookie was sent."),
- ListenOverflowSynCookieRcvd: mustCreateMetric("/netstack/tcp/listen_overflow_syn_cookie_rcvd", "Number of times a SYN cookie was received."),
- ListenOverflowInvalidSynCookieRcvd: mustCreateMetric("/netstack/tcp/listen_overflow_invalid_syn_cookie_rcvd", "Number of times an invalid SYN cookie was received."),
- FailedConnectionAttempts: mustCreateMetric("/netstack/tcp/failed_connection_attempts", "Number of calls to Connect or Listen (active and passive openings, respectively) that end in an error."),
- ValidSegmentsReceived: mustCreateMetric("/netstack/tcp/valid_segments_received", "Number of TCP segments received that the transport layer successfully parsed."),
- InvalidSegmentsReceived: mustCreateMetric("/netstack/tcp/invalid_segments_received", "Number of TCP segments received that the transport layer could not parse."),
- SegmentsSent: mustCreateMetric("/netstack/tcp/segments_sent", "Number of TCP segments sent."),
- ResetsSent: mustCreateMetric("/netstack/tcp/resets_sent", "Number of TCP resets sent."),
- ResetsReceived: mustCreateMetric("/netstack/tcp/resets_received", "Number of TCP resets received."),
- Retransmits: mustCreateMetric("/netstack/tcp/retransmits", "Number of TCP segments retransmitted."),
- FastRecovery: mustCreateMetric("/netstack/tcp/fast_recovery", "Number of times fast recovery was used to recover from packet loss."),
- SACKRecovery: mustCreateMetric("/netstack/tcp/sack_recovery", "Number of times SACK recovery was used to recover from packet loss."),
- SlowStartRetransmits: mustCreateMetric("/netstack/tcp/slow_start_retransmits", "Number of segments retransmitted in slow start mode."),
- FastRetransmit: mustCreateMetric("/netstack/tcp/fast_retransmit", "Number of TCP segments which were fast retransmitted."),
- Timeouts: mustCreateMetric("/netstack/tcp/timeouts", "Number of times RTO expired."),
- ChecksumErrors: mustCreateMetric("/netstack/tcp/checksum_errors", "Number of segments dropped due to bad checksums."),
- },
- UDP: tcpip.UDPStats{
- PacketsReceived: mustCreateMetric("/netstack/udp/packets_received", "Number of UDP datagrams received via HandlePacket."),
- UnknownPortErrors: mustCreateMetric("/netstack/udp/unknown_port_errors", "Number of incoming UDP datagrams dropped because they did not have a known destination port."),
- ReceiveBufferErrors: mustCreateMetric("/netstack/udp/receive_buffer_errors", "Number of incoming UDP datagrams dropped due to the receiving buffer being in an invalid state."),
- MalformedPacketsReceived: mustCreateMetric("/netstack/udp/malformed_packets_received", "Number of incoming UDP datagrams dropped due to the UDP header being in a malformed state."),
- PacketsSent: mustCreateMetric("/netstack/udp/packets_sent", "Number of UDP datagrams sent via sendUDP."),
- },
-}
-
-const sizeOfInt32 int = 4
-
-var errStackType = syserr.New("expected but did not receive an epsocket.Stack", linux.EINVAL)
-
-// ntohs converts a 16-bit number from network byte order to host byte order. It
-// assumes that the host is little endian.
-func ntohs(v uint16) uint16 {
- return v<<8 | v>>8
-}
-
-// htons converts a 16-bit number from host byte order to network byte order. It
-// assumes that the host is little endian.
-func htons(v uint16) uint16 {
- return ntohs(v)
-}
-
-// commonEndpoint represents the intersection of a tcpip.Endpoint and a
-// transport.Endpoint.
-type commonEndpoint interface {
- // GetLocalAddress implements tcpip.Endpoint.GetLocalAddress and
- // transport.Endpoint.GetLocalAddress.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
-
- // GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress and
- // transport.Endpoint.GetRemoteAddress.
- GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error)
-
- // Readiness implements tcpip.Endpoint.Readiness and
- // transport.Endpoint.Readiness.
- Readiness(mask waiter.EventMask) waiter.EventMask
-
- // SetSockOpt implements tcpip.Endpoint.SetSockOpt and
- // transport.Endpoint.SetSockOpt.
- SetSockOpt(interface{}) *tcpip.Error
-
- // SetSockOptInt implements tcpip.Endpoint.SetSockOptInt and
- // transport.Endpoint.SetSockOptInt.
- SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error
-
- // GetSockOpt implements tcpip.Endpoint.GetSockOpt and
- // transport.Endpoint.GetSockOpt.
- GetSockOpt(interface{}) *tcpip.Error
-
- // GetSockOptInt implements tcpip.Endpoint.GetSockOptInt and
- // transport.Endpoint.GetSockOpt.
- GetSockOptInt(opt tcpip.SockOpt) (int, *tcpip.Error)
-}
-
-// SocketOperations encapsulates all the state needed to represent a network stack
-// endpoint in the kernel context.
-//
-// +stateify savable
-type SocketOperations struct {
- fsutil.FilePipeSeek `state:"nosave"`
- fsutil.FileNotDirReaddir `state:"nosave"`
- fsutil.FileNoopFlush `state:"nosave"`
- fsutil.FileNoFsync `state:"nosave"`
- fsutil.FileNoMMap `state:"nosave"`
- fsutil.FileUseInodeUnstableAttr `state:"nosave"`
- socket.SendReceiveTimeout
- *waiter.Queue
-
- family int
- Endpoint tcpip.Endpoint
- skType linux.SockType
- protocol int
-
- // readMu protects access to the below fields.
- readMu sync.Mutex `state:"nosave"`
- // readView contains the remaining payload from the last packet.
- readView buffer.View
- // readCM holds control message information for the last packet read
- // from Endpoint.
- readCM tcpip.ControlMessages
- sender tcpip.FullAddress
-
- // sockOptTimestamp corresponds to SO_TIMESTAMP. When true, timestamps
- // of returned messages can be returned via control messages. When
- // false, the same timestamp is instead stored and can be read via the
- // SIOCGSTAMP ioctl. It is protected by readMu. See socket(7).
- sockOptTimestamp bool
- // timestampValid indicates whether timestamp for SIOCGSTAMP has been
- // set. It is protected by readMu.
- timestampValid bool
- // timestampNS holds the timestamp to use with SIOCTSTAMP. It is only
- // valid when timestampValid is true. It is protected by readMu.
- timestampNS int64
-
- // sockOptInq corresponds to TCP_INQ. It is implemented on the epsocket
- // level, because it takes into account data from readView.
- sockOptInq bool
-}
-
-// New creates a new endpoint socket.
-func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue *waiter.Queue, endpoint tcpip.Endpoint) (*fs.File, *syserr.Error) {
- if skType == linux.SOCK_STREAM {
- if err := endpoint.SetSockOpt(tcpip.DelayOption(1)); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
- }
-
- dirent := socket.NewDirent(t, epsocketDevice)
- defer dirent.DecRef()
- return fs.NewFile(t, dirent, fs.FileFlags{Read: true, Write: true, NonSeekable: true}, &SocketOperations{
- Queue: queue,
- family: family,
- Endpoint: endpoint,
- skType: skType,
- protocol: protocol,
- }), nil
-}
-
-var sockAddrInetSize = int(binary.Size(linux.SockAddrInet{}))
-var sockAddrInet6Size = int(binary.Size(linux.SockAddrInet6{}))
-
-// bytesToIPAddress converts an IPv4 or IPv6 address from the user to the
-// netstack representation taking any addresses into account.
-func bytesToIPAddress(addr []byte) tcpip.Address {
- if bytes.Equal(addr, make([]byte, 4)) || bytes.Equal(addr, make([]byte, 16)) {
- return ""
- }
- return tcpip.Address(addr)
-}
-
-// AddressAndFamily reads an sockaddr struct from the given address and
-// converts it to the FullAddress format. It supports AF_UNIX, AF_INET and
-// AF_INET6 addresses.
-//
-// strict indicates whether addresses with the AF_UNSPEC family are accepted of not.
-//
-// AddressAndFamily returns an address, its family.
-func AddressAndFamily(sfamily int, addr []byte, strict bool) (tcpip.FullAddress, uint16, *syserr.Error) {
- // Make sure we have at least 2 bytes for the address family.
- if len(addr) < 2 {
- return tcpip.FullAddress{}, 0, syserr.ErrInvalidArgument
- }
-
- family := usermem.ByteOrder.Uint16(addr)
- if family != uint16(sfamily) && (!strict && family != linux.AF_UNSPEC) {
- return tcpip.FullAddress{}, family, syserr.ErrAddressFamilyNotSupported
- }
-
- // Get the rest of the fields based on the address family.
- switch family {
- case linux.AF_UNIX:
- path := addr[2:]
- if len(path) > linux.UnixPathMax {
- return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
- }
- // Drop the terminating NUL (if one exists) and everything after
- // it for filesystem (non-abstract) addresses.
- if len(path) > 0 && path[0] != 0 {
- if n := bytes.IndexByte(path[1:], 0); n >= 0 {
- path = path[:n+1]
- }
- }
- return tcpip.FullAddress{
- Addr: tcpip.Address(path),
- }, family, nil
-
- case linux.AF_INET:
- var a linux.SockAddrInet
- if len(addr) < sockAddrInetSize {
- return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
- }
- binary.Unmarshal(addr[:sockAddrInetSize], usermem.ByteOrder, &a)
-
- out := tcpip.FullAddress{
- Addr: bytesToIPAddress(a.Addr[:]),
- Port: ntohs(a.Port),
- }
- return out, family, nil
-
- case linux.AF_INET6:
- var a linux.SockAddrInet6
- if len(addr) < sockAddrInet6Size {
- return tcpip.FullAddress{}, family, syserr.ErrInvalidArgument
- }
- binary.Unmarshal(addr[:sockAddrInet6Size], usermem.ByteOrder, &a)
-
- out := tcpip.FullAddress{
- Addr: bytesToIPAddress(a.Addr[:]),
- Port: ntohs(a.Port),
- }
- if isLinkLocal(out.Addr) {
- out.NIC = tcpip.NICID(a.Scope_id)
- }
- return out, family, nil
-
- case linux.AF_UNSPEC:
- return tcpip.FullAddress{}, family, nil
-
- default:
- return tcpip.FullAddress{}, 0, syserr.ErrAddressFamilyNotSupported
- }
-}
-
-func (s *SocketOperations) isPacketBased() bool {
- return s.skType == linux.SOCK_DGRAM || s.skType == linux.SOCK_SEQPACKET || s.skType == linux.SOCK_RDM || s.skType == linux.SOCK_RAW
-}
-
-// fetchReadView updates the readView field of the socket if it's currently
-// empty. It assumes that the socket is locked.
-func (s *SocketOperations) fetchReadView() *syserr.Error {
- if len(s.readView) > 0 {
- return nil
- }
-
- s.readView = nil
- s.sender = tcpip.FullAddress{}
-
- v, cms, err := s.Endpoint.Read(&s.sender)
- if err != nil {
- return syserr.TranslateNetstackError(err)
- }
-
- s.readView = v
- s.readCM = cms
-
- return nil
-}
-
-// Release implements fs.FileOperations.Release.
-func (s *SocketOperations) Release() {
- s.Endpoint.Close()
-}
-
-// Read implements fs.FileOperations.Read.
-func (s *SocketOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) {
- if dst.NumBytes() == 0 {
- return 0, nil
- }
- n, _, _, _, _, err := s.nonBlockingRead(ctx, dst, false, false, false)
- if err == syserr.ErrWouldBlock {
- return int64(n), syserror.ErrWouldBlock
- }
- if err != nil {
- return 0, err.ToError()
- }
- return int64(n), nil
-}
-
-// WriteTo implements fs.FileOperations.WriteTo.
-func (s *SocketOperations) WriteTo(ctx context.Context, _ *fs.File, dst io.Writer, count int64, dup bool) (int64, error) {
- s.readMu.Lock()
-
- // Copy as much data as possible.
- done := int64(0)
- for count > 0 {
- // This may return a blocking error.
- if err := s.fetchReadView(); err != nil {
- s.readMu.Unlock()
- return done, err.ToError()
- }
-
- // Write to the underlying file.
- n, err := dst.Write(s.readView)
- done += int64(n)
- count -= int64(n)
- if dup {
- // That's all we support for dup. This is generally
- // supported by any Linux system calls, but the
- // expectation is that now a caller will call read to
- // actually remove these bytes from the socket.
- break
- }
-
- // Drop that part of the view.
- s.readView.TrimFront(n)
- if err != nil {
- s.readMu.Unlock()
- return done, err
- }
- }
-
- s.readMu.Unlock()
- return done, nil
-}
-
-// ioSequencePayload implements tcpip.Payload.
-//
-// t copies user memory bytes on demand based on the requested size.
-type ioSequencePayload struct {
- ctx context.Context
- src usermem.IOSequence
-}
-
-// FullPayload implements tcpip.Payloader.FullPayload
-func (i *ioSequencePayload) FullPayload() ([]byte, *tcpip.Error) {
- return i.Payload(int(i.src.NumBytes()))
-}
-
-// Payload implements tcpip.Payloader.Payload.
-func (i *ioSequencePayload) Payload(size int) ([]byte, *tcpip.Error) {
- if max := int(i.src.NumBytes()); size > max {
- size = max
- }
- v := buffer.NewView(size)
- if _, err := i.src.CopyIn(i.ctx, v); err != nil {
- return nil, tcpip.ErrBadAddress
- }
- return v, nil
-}
-
-// DropFirst drops the first n bytes from underlying src.
-func (i *ioSequencePayload) DropFirst(n int) {
- i.src = i.src.DropFirst(int(n))
-}
-
-// Write implements fs.FileOperations.Write.
-func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
- f := &ioSequencePayload{ctx: ctx, src: src}
- n, resCh, err := s.Endpoint.Write(f, tcpip.WriteOptions{})
- if err == tcpip.ErrWouldBlock {
- return 0, syserror.ErrWouldBlock
- }
-
- if resCh != nil {
- t := ctx.(*kernel.Task)
- if err := t.Block(resCh); err != nil {
- return 0, syserr.FromError(err).ToError()
- }
-
- n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{})
- }
-
- if err != nil {
- return 0, syserr.TranslateNetstackError(err).ToError()
- }
-
- if int64(n) < src.NumBytes() {
- return int64(n), syserror.ErrWouldBlock
- }
-
- return int64(n), nil
-}
-
-// readerPayload implements tcpip.Payloader.
-//
-// It allocates a view and reads from a reader on-demand, based on available
-// capacity in the endpoint.
-type readerPayload struct {
- ctx context.Context
- r io.Reader
- count int64
- err error
-}
-
-// FullPayload implements tcpip.Payloader.FullPayload.
-func (r *readerPayload) FullPayload() ([]byte, *tcpip.Error) {
- return r.Payload(int(r.count))
-}
-
-// Payload implements tcpip.Payloader.Payload.
-func (r *readerPayload) Payload(size int) ([]byte, *tcpip.Error) {
- if size > int(r.count) {
- size = int(r.count)
- }
- v := buffer.NewView(size)
- n, err := r.r.Read(v)
- if n > 0 {
- // We ignore the error here. It may re-occur on subsequent
- // reads, but for now we can enqueue some amount of data.
- r.count -= int64(n)
- return v[:n], nil
- }
- if err == syserror.ErrWouldBlock {
- return nil, tcpip.ErrWouldBlock
- } else if err != nil {
- r.err = err // Save for propation.
- return nil, tcpip.ErrBadAddress
- }
-
- // There is no data and no error. Return an error, which will propagate
- // r.err, which will be nil. This is the desired result: (0, nil).
- return nil, tcpip.ErrBadAddress
-}
-
-// ReadFrom implements fs.FileOperations.ReadFrom.
-func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader, count int64) (int64, error) {
- f := &readerPayload{ctx: ctx, r: r, count: count}
- n, resCh, err := s.Endpoint.Write(f, tcpip.WriteOptions{
- // Reads may be destructive but should be very fast,
- // so we can't release the lock while copying data.
- Atomic: true,
- })
- if err == tcpip.ErrWouldBlock {
- return 0, syserror.ErrWouldBlock
- }
-
- if resCh != nil {
- t := ctx.(*kernel.Task)
- if err := t.Block(resCh); err != nil {
- return 0, syserr.FromError(err).ToError()
- }
-
- n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{
- Atomic: true, // See above.
- })
- }
- if err == tcpip.ErrWouldBlock {
- return n, syserror.ErrWouldBlock
- } else if err != nil {
- return int64(n), f.err // Propagate error.
- }
-
- return int64(n), nil
-}
-
-// Readiness returns a mask of ready events for socket s.
-func (s *SocketOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
- r := s.Endpoint.Readiness(mask)
-
- // Check our cached value iff the caller asked for readability and the
- // endpoint itself is currently not readable.
- if (mask & ^r & waiter.EventIn) != 0 {
- s.readMu.Lock()
- if len(s.readView) > 0 {
- r |= waiter.EventIn
- }
- s.readMu.Unlock()
- }
-
- return r
-}
-
-// Connect implements the linux syscall connect(2) for sockets backed by
-// tpcip.Endpoint.
-func (s *SocketOperations) Connect(t *kernel.Task, sockaddr []byte, blocking bool) *syserr.Error {
- addr, family, err := AddressAndFamily(s.family, sockaddr, false /* strict */)
- if err != nil {
- return err
- }
-
- if family == linux.AF_UNSPEC {
- err := s.Endpoint.Disconnect()
- if err == tcpip.ErrNotSupported {
- return syserr.ErrAddressFamilyNotSupported
- }
- return syserr.TranslateNetstackError(err)
- }
- // Always return right away in the non-blocking case.
- if !blocking {
- return syserr.TranslateNetstackError(s.Endpoint.Connect(addr))
- }
-
- // Register for notification when the endpoint becomes writable, then
- // initiate the connection.
- e, ch := waiter.NewChannelEntry(nil)
- s.EventRegister(&e, waiter.EventOut)
- defer s.EventUnregister(&e)
-
- if err := s.Endpoint.Connect(addr); err != tcpip.ErrConnectStarted && err != tcpip.ErrAlreadyConnecting {
- return syserr.TranslateNetstackError(err)
- }
-
- // It's pending, so we have to wait for a notification, and fetch the
- // result once the wait completes.
- if err := t.Block(ch); err != nil {
- return syserr.FromError(err)
- }
-
- // Call Connect() again after blocking to find connect's result.
- return syserr.TranslateNetstackError(s.Endpoint.Connect(addr))
-}
-
-// Bind implements the linux syscall bind(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
- addr, _, err := AddressAndFamily(s.family, sockaddr, true /* strict */)
- if err != nil {
- return err
- }
-
- // Issue the bind request to the endpoint.
- return syserr.TranslateNetstackError(s.Endpoint.Bind(addr))
-}
-
-// Listen implements the linux syscall listen(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) Listen(t *kernel.Task, backlog int) *syserr.Error {
- return syserr.TranslateNetstackError(s.Endpoint.Listen(backlog))
-}
-
-// blockingAccept implements a blocking version of accept(2), that is, if no
-// connections are ready to be accept, it will block until one becomes ready.
-func (s *SocketOperations) blockingAccept(t *kernel.Task) (tcpip.Endpoint, *waiter.Queue, *syserr.Error) {
- // Register for notifications.
- e, ch := waiter.NewChannelEntry(nil)
- s.EventRegister(&e, waiter.EventIn)
- defer s.EventUnregister(&e)
-
- // Try to accept the connection again; if it fails, then wait until we
- // get a notification.
- for {
- if ep, wq, err := s.Endpoint.Accept(); err != tcpip.ErrWouldBlock {
- return ep, wq, syserr.TranslateNetstackError(err)
- }
-
- if err := t.Block(ch); err != nil {
- return nil, nil, syserr.FromError(err)
- }
- }
-}
-
-// Accept implements the linux syscall accept(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int, blocking bool) (int32, linux.SockAddr, uint32, *syserr.Error) {
- // Issue the accept request to get the new endpoint.
- ep, wq, terr := s.Endpoint.Accept()
- if terr != nil {
- if terr != tcpip.ErrWouldBlock || !blocking {
- return 0, nil, 0, syserr.TranslateNetstackError(terr)
- }
-
- var err *syserr.Error
- ep, wq, err = s.blockingAccept(t)
- if err != nil {
- return 0, nil, 0, err
- }
- }
-
- ns, err := New(t, s.family, s.skType, s.protocol, wq, ep)
- if err != nil {
- return 0, nil, 0, err
- }
- defer ns.DecRef()
-
- if flags&linux.SOCK_NONBLOCK != 0 {
- flags := ns.Flags()
- flags.NonBlocking = true
- ns.SetFlags(flags.Settable())
- }
-
- var addr linux.SockAddr
- var addrLen uint32
- if peerRequested {
- // Get address of the peer and write it to peer slice.
- var err *syserr.Error
- addr, addrLen, err = ns.FileOperations.(*SocketOperations).GetPeerName(t)
- if err != nil {
- return 0, nil, 0, err
- }
- }
-
- fd, e := t.NewFDFrom(0, ns, kernel.FDFlags{
- CloseOnExec: flags&linux.SOCK_CLOEXEC != 0,
- })
-
- t.Kernel().RecordSocket(ns)
-
- return fd, addr, addrLen, syserr.FromError(e)
-}
-
-// ConvertShutdown converts Linux shutdown flags into tcpip shutdown flags.
-func ConvertShutdown(how int) (tcpip.ShutdownFlags, *syserr.Error) {
- var f tcpip.ShutdownFlags
- switch how {
- case linux.SHUT_RD:
- f = tcpip.ShutdownRead
- case linux.SHUT_WR:
- f = tcpip.ShutdownWrite
- case linux.SHUT_RDWR:
- f = tcpip.ShutdownRead | tcpip.ShutdownWrite
- default:
- return 0, syserr.ErrInvalidArgument
- }
- return f, nil
-}
-
-// Shutdown implements the linux syscall shutdown(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) Shutdown(t *kernel.Task, how int) *syserr.Error {
- f, err := ConvertShutdown(how)
- if err != nil {
- return err
- }
-
- // Issue shutdown request.
- return syserr.TranslateNetstackError(s.Endpoint.Shutdown(f))
-}
-
-// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) GetSockOpt(t *kernel.Task, level, name int, outPtr usermem.Addr, outLen int) (interface{}, *syserr.Error) {
- // TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is
- // implemented specifically for epsocket.SocketOperations rather than
- // commonEndpoint. commonEndpoint should be extended to support socket
- // options where the implementation is not shared, as unix sockets need
- // their own support for SO_TIMESTAMP.
- if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP {
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
- val := int32(0)
- s.readMu.Lock()
- defer s.readMu.Unlock()
- if s.sockOptTimestamp {
- val = 1
- }
- return val, nil
- }
- if level == linux.SOL_TCP && name == linux.TCP_INQ {
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
- val := int32(0)
- s.readMu.Lock()
- defer s.readMu.Unlock()
- if s.sockOptInq {
- val = 1
- }
- return val, nil
- }
-
- if s.skType == linux.SOCK_RAW && level == linux.IPPROTO_IP {
- switch name {
- case linux.IPT_SO_GET_INFO:
- if outLen < linux.SizeOfIPTGetinfo {
- return nil, syserr.ErrInvalidArgument
- }
-
- info, err := netfilter.GetInfo(t, s.Endpoint, outPtr)
- if err != nil {
- return nil, err
- }
- return info, nil
-
- case linux.IPT_SO_GET_ENTRIES:
- if outLen < linux.SizeOfIPTGetEntries {
- return nil, syserr.ErrInvalidArgument
- }
-
- entries, err := netfilter.GetEntries(t, s.Endpoint, outPtr, outLen)
- if err != nil {
- return nil, err
- }
- return entries, nil
-
- }
- }
-
- return GetSockOpt(t, s, s.Endpoint, s.family, s.skType, level, name, outLen)
-}
-
-// GetSockOpt can be used to implement the linux syscall getsockopt(2) for
-// sockets backed by a commonEndpoint.
-func GetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, skType linux.SockType, level, name, outLen int) (interface{}, *syserr.Error) {
- switch level {
- case linux.SOL_SOCKET:
- return getSockOptSocket(t, s, ep, family, skType, name, outLen)
-
- case linux.SOL_TCP:
- return getSockOptTCP(t, ep, name, outLen)
-
- case linux.SOL_IPV6:
- return getSockOptIPv6(t, ep, name, outLen)
-
- case linux.SOL_IP:
- return getSockOptIP(t, ep, name, outLen)
-
- case linux.SOL_UDP,
- linux.SOL_ICMPV6,
- linux.SOL_RAW,
- linux.SOL_PACKET:
-
- t.Kernel().EmitUnimplementedEvent(t)
- }
-
- return nil, syserr.ErrProtocolNotAvailable
-}
-
-// getSockOptSocket implements GetSockOpt when level is SOL_SOCKET.
-func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family int, skType linux.SockType, name, outLen int) (interface{}, *syserr.Error) {
- // TODO(b/124056281): Stop rejecting short optLen values in getsockopt.
- switch name {
- case linux.SO_ERROR:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- // Get the last error and convert it.
- err := ep.GetSockOpt(tcpip.ErrorOption{})
- if err == nil {
- return int32(0), nil
- }
- return int32(syserr.TranslateNetstackError(err).ToLinux().Number()), nil
-
- case linux.SO_PEERCRED:
- if family != linux.AF_UNIX || outLen < syscall.SizeofUcred {
- return nil, syserr.ErrInvalidArgument
- }
-
- tcred := t.Credentials()
- return syscall.Ucred{
- Pid: int32(t.ThreadGroup().ID()),
- Uid: uint32(tcred.EffectiveKUID.In(tcred.UserNamespace).OrOverflow()),
- Gid: uint32(tcred.EffectiveKGID.In(tcred.UserNamespace).OrOverflow()),
- }, nil
-
- case linux.SO_PASSCRED:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.PasscredOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.SO_SNDBUF:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- size, err := ep.GetSockOptInt(tcpip.SendBufferSizeOption)
- if err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- if size > math.MaxInt32 {
- size = math.MaxInt32
- }
-
- return int32(size), nil
-
- case linux.SO_RCVBUF:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- size, err := ep.GetSockOptInt(tcpip.ReceiveBufferSizeOption)
- if err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- if size > math.MaxInt32 {
- size = math.MaxInt32
- }
-
- return int32(size), nil
-
- case linux.SO_REUSEADDR:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.ReuseAddressOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.SO_REUSEPORT:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.ReusePortOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.SO_BINDTODEVICE:
- var v tcpip.BindToDeviceOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
- if len(v) == 0 {
- return []byte{}, nil
- }
- if outLen < linux.IFNAMSIZ {
- return nil, syserr.ErrInvalidArgument
- }
- return append([]byte(v), 0), nil
-
- case linux.SO_BROADCAST:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.BroadcastOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.SO_KEEPALIVE:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.KeepaliveEnabledOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.SO_LINGER:
- if outLen < linux.SizeOfLinger {
- return nil, syserr.ErrInvalidArgument
- }
- return linux.Linger{}, nil
-
- case linux.SO_SNDTIMEO:
- // TODO(igudger): Linux allows shorter lengths for partial results.
- if outLen < linux.SizeOfTimeval {
- return nil, syserr.ErrInvalidArgument
- }
-
- return linux.NsecToTimeval(s.SendTimeout()), nil
-
- case linux.SO_RCVTIMEO:
- // TODO(igudger): Linux allows shorter lengths for partial results.
- if outLen < linux.SizeOfTimeval {
- return nil, syserr.ErrInvalidArgument
- }
-
- return linux.NsecToTimeval(s.RecvTimeout()), nil
-
- case linux.SO_OOBINLINE:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.OutOfBandInlineOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- default:
- socket.GetSockOptEmitUnimplementedEvent(t, name)
- }
- return nil, syserr.ErrProtocolNotAvailable
-}
-
-// getSockOptTCP implements GetSockOpt when level is SOL_TCP.
-func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) {
- switch name {
- case linux.TCP_NODELAY:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.DelayOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- if v == 0 {
- return int32(1), nil
- }
- return int32(0), nil
-
- case linux.TCP_CORK:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.CorkOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.TCP_QUICKACK:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.QuickAckOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.TCP_MAXSEG:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.MaxSegOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.TCP_KEEPIDLE:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.KeepaliveIdleOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(time.Duration(v) / time.Second), nil
-
- case linux.TCP_KEEPINTVL:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.KeepaliveIntervalOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(time.Duration(v) / time.Second), nil
-
- case linux.TCP_INFO:
- var v tcpip.TCPInfoOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- // TODO(b/64800844): Translate fields once they are added to
- // tcpip.TCPInfoOption.
- info := linux.TCPInfo{}
-
- // Linux truncates the output binary to outLen.
- ib := binary.Marshal(nil, usermem.ByteOrder, &info)
- if len(ib) > outLen {
- ib = ib[:outLen]
- }
-
- return ib, nil
-
- case linux.TCP_CC_INFO,
- linux.TCP_NOTSENT_LOWAT,
- linux.TCP_ZEROCOPY_RECEIVE:
-
- t.Kernel().EmitUnimplementedEvent(t)
-
- case linux.TCP_CONGESTION:
- if outLen <= 0 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.CongestionControlOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- // We match linux behaviour here where it returns the lower of
- // TCP_CA_NAME_MAX bytes or the value of the option length.
- //
- // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
- const tcpCANameMax = 16
-
- toCopy := tcpCANameMax
- if outLen < tcpCANameMax {
- toCopy = outLen
- }
- b := make([]byte, toCopy)
- copy(b, v)
- return b, nil
-
- default:
- emitUnimplementedEventTCP(t, name)
- }
- return nil, syserr.ErrProtocolNotAvailable
-}
-
-// getSockOptIPv6 implements GetSockOpt when level is SOL_IPV6.
-func getSockOptIPv6(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) {
- switch name {
- case linux.IPV6_V6ONLY:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.V6OnlyOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.IPV6_PATHMTU:
- t.Kernel().EmitUnimplementedEvent(t)
-
- default:
- emitUnimplementedEventIPv6(t, name)
- }
- return nil, syserr.ErrProtocolNotAvailable
-}
-
-// getSockOptIP implements GetSockOpt when level is SOL_IP.
-func getSockOptIP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interface{}, *syserr.Error) {
- switch name {
- case linux.IP_MULTICAST_TTL:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.MulticastTTLOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- return int32(v), nil
-
- case linux.IP_MULTICAST_IF:
- if outLen < len(linux.InetAddr{}) {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.MulticastInterfaceOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- a, _ := ConvertAddress(linux.AF_INET, tcpip.FullAddress{Addr: v.InterfaceAddr})
-
- return a.(*linux.SockAddrInet).Addr, nil
-
- case linux.IP_MULTICAST_LOOP:
- if outLen < sizeOfInt32 {
- return nil, syserr.ErrInvalidArgument
- }
-
- var v tcpip.MulticastLoopOption
- if err := ep.GetSockOpt(&v); err != nil {
- return nil, syserr.TranslateNetstackError(err)
- }
-
- if v {
- return int32(1), nil
- }
- return int32(0), nil
-
- default:
- emitUnimplementedEventIP(t, name)
- }
- return nil, syserr.ErrProtocolNotAvailable
-}
-
-// SetSockOpt implements the linux syscall setsockopt(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) SetSockOpt(t *kernel.Task, level int, name int, optVal []byte) *syserr.Error {
- // TODO(b/78348848): Unlike other socket options, SO_TIMESTAMP is
- // implemented specifically for epsocket.SocketOperations rather than
- // commonEndpoint. commonEndpoint should be extended to support socket
- // options where the implementation is not shared, as unix sockets need
- // their own support for SO_TIMESTAMP.
- if level == linux.SOL_SOCKET && name == linux.SO_TIMESTAMP {
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
- s.readMu.Lock()
- defer s.readMu.Unlock()
- s.sockOptTimestamp = usermem.ByteOrder.Uint32(optVal) != 0
- return nil
- }
- if level == linux.SOL_TCP && name == linux.TCP_INQ {
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
- s.readMu.Lock()
- defer s.readMu.Unlock()
- s.sockOptInq = usermem.ByteOrder.Uint32(optVal) != 0
- return nil
- }
-
- return SetSockOpt(t, s, s.Endpoint, level, name, optVal)
-}
-
-// SetSockOpt can be used to implement the linux syscall setsockopt(2) for
-// sockets backed by a commonEndpoint.
-func SetSockOpt(t *kernel.Task, s socket.Socket, ep commonEndpoint, level int, name int, optVal []byte) *syserr.Error {
- switch level {
- case linux.SOL_SOCKET:
- return setSockOptSocket(t, s, ep, name, optVal)
-
- case linux.SOL_TCP:
- return setSockOptTCP(t, ep, name, optVal)
-
- case linux.SOL_IPV6:
- return setSockOptIPv6(t, ep, name, optVal)
-
- case linux.SOL_IP:
- return setSockOptIP(t, ep, name, optVal)
-
- case linux.SOL_UDP,
- linux.SOL_ICMPV6,
- linux.SOL_RAW,
- linux.SOL_PACKET:
-
- t.Kernel().EmitUnimplementedEvent(t)
- }
-
- // Default to the old behavior; hand off to network stack.
- return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
-}
-
-// setSockOptSocket implements SetSockOpt when level is SOL_SOCKET.
-func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
- switch name {
- case linux.SO_SNDBUF:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.SendBufferSizeOption, int(v)))
-
- case linux.SO_RCVBUF:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.ReceiveBufferSizeOption, int(v)))
-
- case linux.SO_REUSEADDR:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.ReuseAddressOption(v)))
-
- case linux.SO_REUSEPORT:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.ReusePortOption(v)))
-
- case linux.SO_BINDTODEVICE:
- n := bytes.IndexByte(optVal, 0)
- if n == -1 {
- n = len(optVal)
- }
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.BindToDeviceOption(optVal[:n])))
-
- case linux.SO_BROADCAST:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.BroadcastOption(v)))
-
- case linux.SO_PASSCRED:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.PasscredOption(v)))
-
- case linux.SO_KEEPALIVE:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.KeepaliveEnabledOption(v)))
-
- case linux.SO_SNDTIMEO:
- if len(optVal) < linux.SizeOfTimeval {
- return syserr.ErrInvalidArgument
- }
-
- var v linux.Timeval
- binary.Unmarshal(optVal[:linux.SizeOfTimeval], usermem.ByteOrder, &v)
- if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
- return syserr.ErrDomain
- }
- s.SetSendTimeout(v.ToNsecCapped())
- return nil
-
- case linux.SO_RCVTIMEO:
- if len(optVal) < linux.SizeOfTimeval {
- return syserr.ErrInvalidArgument
- }
-
- var v linux.Timeval
- binary.Unmarshal(optVal[:linux.SizeOfTimeval], usermem.ByteOrder, &v)
- if v.Usec < 0 || v.Usec >= int64(time.Second/time.Microsecond) {
- return syserr.ErrDomain
- }
- s.SetRecvTimeout(v.ToNsecCapped())
- return nil
-
- case linux.SO_OOBINLINE:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
-
- if v == 0 {
- socket.SetSockOptEmitUnimplementedEvent(t, name)
- }
-
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.OutOfBandInlineOption(v)))
-
- case linux.SO_LINGER:
- if len(optVal) < linux.SizeOfLinger {
- return syserr.ErrInvalidArgument
- }
-
- var v linux.Linger
- binary.Unmarshal(optVal[:linux.SizeOfLinger], usermem.ByteOrder, &v)
-
- if v != (linux.Linger{}) {
- socket.SetSockOptEmitUnimplementedEvent(t, name)
- }
-
- return nil
-
- default:
- socket.SetSockOptEmitUnimplementedEvent(t, name)
- }
-
- // Default to the old behavior; hand off to network stack.
- return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
-}
-
-// setSockOptTCP implements SetSockOpt when level is SOL_TCP.
-func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
- switch name {
- case linux.TCP_NODELAY:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- var o tcpip.DelayOption
- if v == 0 {
- o = 1
- }
- return syserr.TranslateNetstackError(ep.SetSockOpt(o))
-
- case linux.TCP_CORK:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.CorkOption(v)))
-
- case linux.TCP_QUICKACK:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.QuickAckOption(v)))
-
- case linux.TCP_MAXSEG:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.MaxSegOption(v)))
-
- case linux.TCP_KEEPIDLE:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- if v < 1 || v > linux.MAX_TCP_KEEPIDLE {
- return syserr.ErrInvalidArgument
- }
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.KeepaliveIdleOption(time.Second * time.Duration(v))))
-
- case linux.TCP_KEEPINTVL:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- if v < 1 || v > linux.MAX_TCP_KEEPINTVL {
- return syserr.ErrInvalidArgument
- }
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.KeepaliveIntervalOption(time.Second * time.Duration(v))))
-
- case linux.TCP_CONGESTION:
- v := tcpip.CongestionControlOption(optVal)
- if err := ep.SetSockOpt(v); err != nil {
- return syserr.TranslateNetstackError(err)
- }
- return nil
-
- case linux.TCP_REPAIR_OPTIONS:
- t.Kernel().EmitUnimplementedEvent(t)
-
- default:
- emitUnimplementedEventTCP(t, name)
- }
-
- // Default to the old behavior; hand off to network stack.
- return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
-}
-
-// setSockOptIPv6 implements SetSockOpt when level is SOL_IPV6.
-func setSockOptIPv6(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
- switch name {
- case linux.IPV6_V6ONLY:
- if len(optVal) < sizeOfInt32 {
- return syserr.ErrInvalidArgument
- }
-
- v := usermem.ByteOrder.Uint32(optVal)
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.V6OnlyOption(v)))
-
- case linux.IPV6_ADD_MEMBERSHIP,
- linux.IPV6_DROP_MEMBERSHIP,
- linux.IPV6_IPSEC_POLICY,
- linux.IPV6_JOIN_ANYCAST,
- linux.IPV6_LEAVE_ANYCAST,
- linux.IPV6_PKTINFO,
- linux.IPV6_ROUTER_ALERT,
- linux.IPV6_XFRM_POLICY,
- linux.MCAST_BLOCK_SOURCE,
- linux.MCAST_JOIN_GROUP,
- linux.MCAST_JOIN_SOURCE_GROUP,
- linux.MCAST_LEAVE_GROUP,
- linux.MCAST_LEAVE_SOURCE_GROUP,
- linux.MCAST_UNBLOCK_SOURCE:
-
- t.Kernel().EmitUnimplementedEvent(t)
-
- default:
- emitUnimplementedEventIPv6(t, name)
- }
-
- // Default to the old behavior; hand off to network stack.
- return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
-}
-
-var (
- inetMulticastRequestSize = int(binary.Size(linux.InetMulticastRequest{}))
- inetMulticastRequestWithNICSize = int(binary.Size(linux.InetMulticastRequestWithNIC{}))
-)
-
-// copyInMulticastRequest copies in a variable-size multicast request. The
-// kernel determines which structure was passed by its length. IP_MULTICAST_IF
-// supports ip_mreqn, ip_mreq and in_addr, while IP_ADD_MEMBERSHIP and
-// IP_DROP_MEMBERSHIP only support ip_mreqn and ip_mreq. To handle this,
-// allowAddr controls whether in_addr is accepted or rejected.
-func copyInMulticastRequest(optVal []byte, allowAddr bool) (linux.InetMulticastRequestWithNIC, *syserr.Error) {
- if len(optVal) < len(linux.InetAddr{}) {
- return linux.InetMulticastRequestWithNIC{}, syserr.ErrInvalidArgument
- }
-
- if len(optVal) < inetMulticastRequestSize {
- if !allowAddr {
- return linux.InetMulticastRequestWithNIC{}, syserr.ErrInvalidArgument
- }
-
- var req linux.InetMulticastRequestWithNIC
- copy(req.InterfaceAddr[:], optVal)
- return req, nil
- }
-
- if len(optVal) >= inetMulticastRequestWithNICSize {
- var req linux.InetMulticastRequestWithNIC
- binary.Unmarshal(optVal[:inetMulticastRequestWithNICSize], usermem.ByteOrder, &req)
- return req, nil
- }
-
- var req linux.InetMulticastRequestWithNIC
- binary.Unmarshal(optVal[:inetMulticastRequestSize], usermem.ByteOrder, &req.InetMulticastRequest)
- return req, nil
-}
-
-// parseIntOrChar copies either a 32-bit int or an 8-bit uint out of buf.
-//
-// net/ipv4/ip_sockglue.c:do_ip_setsockopt does this for its socket options.
-func parseIntOrChar(buf []byte) (int32, *syserr.Error) {
- if len(buf) == 0 {
- return 0, syserr.ErrInvalidArgument
- }
-
- if len(buf) >= sizeOfInt32 {
- return int32(usermem.ByteOrder.Uint32(buf)), nil
- }
-
- return int32(buf[0]), nil
-}
-
-// setSockOptIP implements SetSockOpt when level is SOL_IP.
-func setSockOptIP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *syserr.Error {
- switch name {
- case linux.IP_MULTICAST_TTL:
- v, err := parseIntOrChar(optVal)
- if err != nil {
- return err
- }
-
- if v == -1 {
- // Linux translates -1 to 1.
- v = 1
- }
- if v < 0 || v > 255 {
- return syserr.ErrInvalidArgument
- }
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.MulticastTTLOption(v)))
-
- case linux.IP_ADD_MEMBERSHIP:
- req, err := copyInMulticastRequest(optVal, false /* allowAddr */)
- if err != nil {
- return err
- }
-
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.AddMembershipOption{
- NIC: tcpip.NICID(req.InterfaceIndex),
- // TODO(igudger): Change AddMembership to use the standard
- // any address representation.
- InterfaceAddr: tcpip.Address(req.InterfaceAddr[:]),
- MulticastAddr: tcpip.Address(req.MulticastAddr[:]),
- }))
-
- case linux.IP_DROP_MEMBERSHIP:
- req, err := copyInMulticastRequest(optVal, false /* allowAddr */)
- if err != nil {
- return err
- }
-
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.RemoveMembershipOption{
- NIC: tcpip.NICID(req.InterfaceIndex),
- // TODO(igudger): Change DropMembership to use the standard
- // any address representation.
- InterfaceAddr: tcpip.Address(req.InterfaceAddr[:]),
- MulticastAddr: tcpip.Address(req.MulticastAddr[:]),
- }))
-
- case linux.IP_MULTICAST_IF:
- req, err := copyInMulticastRequest(optVal, true /* allowAddr */)
- if err != nil {
- return err
- }
-
- return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.MulticastInterfaceOption{
- NIC: tcpip.NICID(req.InterfaceIndex),
- InterfaceAddr: bytesToIPAddress(req.InterfaceAddr[:]),
- }))
-
- case linux.IP_MULTICAST_LOOP:
- v, err := parseIntOrChar(optVal)
- if err != nil {
- return err
- }
-
- return syserr.TranslateNetstackError(ep.SetSockOpt(
- tcpip.MulticastLoopOption(v != 0),
- ))
-
- case linux.MCAST_JOIN_GROUP:
- // FIXME(b/124219304): Implement MCAST_JOIN_GROUP.
- t.Kernel().EmitUnimplementedEvent(t)
- return syserr.ErrInvalidArgument
-
- case linux.IP_ADD_SOURCE_MEMBERSHIP,
- linux.IP_BIND_ADDRESS_NO_PORT,
- linux.IP_BLOCK_SOURCE,
- linux.IP_CHECKSUM,
- linux.IP_DROP_SOURCE_MEMBERSHIP,
- linux.IP_FREEBIND,
- linux.IP_HDRINCL,
- linux.IP_IPSEC_POLICY,
- linux.IP_MINTTL,
- linux.IP_MSFILTER,
- linux.IP_MTU_DISCOVER,
- linux.IP_MULTICAST_ALL,
- linux.IP_NODEFRAG,
- linux.IP_OPTIONS,
- linux.IP_PASSSEC,
- linux.IP_PKTINFO,
- linux.IP_RECVERR,
- linux.IP_RECVFRAGSIZE,
- linux.IP_RECVOPTS,
- linux.IP_RECVORIGDSTADDR,
- linux.IP_RECVTOS,
- linux.IP_RECVTTL,
- linux.IP_RETOPTS,
- linux.IP_TOS,
- linux.IP_TRANSPARENT,
- linux.IP_TTL,
- linux.IP_UNBLOCK_SOURCE,
- linux.IP_UNICAST_IF,
- linux.IP_XFRM_POLICY,
- linux.MCAST_BLOCK_SOURCE,
- linux.MCAST_JOIN_SOURCE_GROUP,
- linux.MCAST_LEAVE_GROUP,
- linux.MCAST_LEAVE_SOURCE_GROUP,
- linux.MCAST_MSFILTER,
- linux.MCAST_UNBLOCK_SOURCE:
-
- t.Kernel().EmitUnimplementedEvent(t)
- }
-
- // Default to the old behavior; hand off to network stack.
- return syserr.TranslateNetstackError(ep.SetSockOpt(struct{}{}))
-}
-
-// emitUnimplementedEventTCP emits unimplemented event if name is valid. This
-// function contains names that are common between Get and SetSockOpt when
-// level is SOL_TCP.
-func emitUnimplementedEventTCP(t *kernel.Task, name int) {
- switch name {
- case linux.TCP_CONGESTION,
- linux.TCP_CORK,
- linux.TCP_DEFER_ACCEPT,
- linux.TCP_FASTOPEN,
- linux.TCP_FASTOPEN_CONNECT,
- linux.TCP_FASTOPEN_KEY,
- linux.TCP_FASTOPEN_NO_COOKIE,
- linux.TCP_KEEPCNT,
- linux.TCP_KEEPIDLE,
- linux.TCP_KEEPINTVL,
- linux.TCP_LINGER2,
- linux.TCP_MAXSEG,
- linux.TCP_QUEUE_SEQ,
- linux.TCP_QUICKACK,
- linux.TCP_REPAIR,
- linux.TCP_REPAIR_QUEUE,
- linux.TCP_REPAIR_WINDOW,
- linux.TCP_SAVED_SYN,
- linux.TCP_SAVE_SYN,
- linux.TCP_SYNCNT,
- linux.TCP_THIN_DUPACK,
- linux.TCP_THIN_LINEAR_TIMEOUTS,
- linux.TCP_TIMESTAMP,
- linux.TCP_ULP,
- linux.TCP_USER_TIMEOUT,
- linux.TCP_WINDOW_CLAMP:
-
- t.Kernel().EmitUnimplementedEvent(t)
- }
-}
-
-// emitUnimplementedEventIPv6 emits unimplemented event if name is valid. It
-// contains names that are common between Get and SetSockOpt when level is
-// SOL_IPV6.
-func emitUnimplementedEventIPv6(t *kernel.Task, name int) {
- switch name {
- case linux.IPV6_2292DSTOPTS,
- linux.IPV6_2292HOPLIMIT,
- linux.IPV6_2292HOPOPTS,
- linux.IPV6_2292PKTINFO,
- linux.IPV6_2292PKTOPTIONS,
- linux.IPV6_2292RTHDR,
- linux.IPV6_ADDR_PREFERENCES,
- linux.IPV6_AUTOFLOWLABEL,
- linux.IPV6_DONTFRAG,
- linux.IPV6_DSTOPTS,
- linux.IPV6_FLOWINFO,
- linux.IPV6_FLOWINFO_SEND,
- linux.IPV6_FLOWLABEL_MGR,
- linux.IPV6_FREEBIND,
- linux.IPV6_HOPOPTS,
- linux.IPV6_MINHOPCOUNT,
- linux.IPV6_MTU,
- linux.IPV6_MTU_DISCOVER,
- linux.IPV6_MULTICAST_ALL,
- linux.IPV6_MULTICAST_HOPS,
- linux.IPV6_MULTICAST_IF,
- linux.IPV6_MULTICAST_LOOP,
- linux.IPV6_RECVDSTOPTS,
- linux.IPV6_RECVERR,
- linux.IPV6_RECVFRAGSIZE,
- linux.IPV6_RECVHOPLIMIT,
- linux.IPV6_RECVHOPOPTS,
- linux.IPV6_RECVORIGDSTADDR,
- linux.IPV6_RECVPATHMTU,
- linux.IPV6_RECVPKTINFO,
- linux.IPV6_RECVRTHDR,
- linux.IPV6_RECVTCLASS,
- linux.IPV6_RTHDR,
- linux.IPV6_RTHDRDSTOPTS,
- linux.IPV6_TCLASS,
- linux.IPV6_TRANSPARENT,
- linux.IPV6_UNICAST_HOPS,
- linux.IPV6_UNICAST_IF,
- linux.MCAST_MSFILTER,
- linux.IPV6_ADDRFORM:
-
- t.Kernel().EmitUnimplementedEvent(t)
- }
-}
-
-// emitUnimplementedEventIP emits unimplemented event if name is valid. It
-// contains names that are common between Get and SetSockOpt when level is
-// SOL_IP.
-func emitUnimplementedEventIP(t *kernel.Task, name int) {
- switch name {
- case linux.IP_TOS,
- linux.IP_TTL,
- linux.IP_HDRINCL,
- linux.IP_OPTIONS,
- linux.IP_ROUTER_ALERT,
- linux.IP_RECVOPTS,
- linux.IP_RETOPTS,
- linux.IP_PKTINFO,
- linux.IP_PKTOPTIONS,
- linux.IP_MTU_DISCOVER,
- linux.IP_RECVERR,
- linux.IP_RECVTTL,
- linux.IP_RECVTOS,
- linux.IP_MTU,
- linux.IP_FREEBIND,
- linux.IP_IPSEC_POLICY,
- linux.IP_XFRM_POLICY,
- linux.IP_PASSSEC,
- linux.IP_TRANSPARENT,
- linux.IP_ORIGDSTADDR,
- linux.IP_MINTTL,
- linux.IP_NODEFRAG,
- linux.IP_CHECKSUM,
- linux.IP_BIND_ADDRESS_NO_PORT,
- linux.IP_RECVFRAGSIZE,
- linux.IP_MULTICAST_IF,
- linux.IP_MULTICAST_TTL,
- linux.IP_MULTICAST_LOOP,
- linux.IP_ADD_MEMBERSHIP,
- linux.IP_DROP_MEMBERSHIP,
- linux.IP_UNBLOCK_SOURCE,
- linux.IP_BLOCK_SOURCE,
- linux.IP_ADD_SOURCE_MEMBERSHIP,
- linux.IP_DROP_SOURCE_MEMBERSHIP,
- linux.IP_MSFILTER,
- linux.MCAST_JOIN_GROUP,
- linux.MCAST_BLOCK_SOURCE,
- linux.MCAST_UNBLOCK_SOURCE,
- linux.MCAST_LEAVE_GROUP,
- linux.MCAST_JOIN_SOURCE_GROUP,
- linux.MCAST_LEAVE_SOURCE_GROUP,
- linux.MCAST_MSFILTER,
- linux.IP_MULTICAST_ALL,
- linux.IP_UNICAST_IF:
-
- t.Kernel().EmitUnimplementedEvent(t)
- }
-}
-
-// isLinkLocal determines if the given IPv6 address is link-local. This is the
-// case when it has the fe80::/10 prefix. This check is used to determine when
-// the NICID is relevant for a given IPv6 address.
-func isLinkLocal(addr tcpip.Address) bool {
- return len(addr) >= 2 && addr[0] == 0xfe && addr[1]&0xc0 == 0x80
-}
-
-// ConvertAddress converts the given address to a native format.
-func ConvertAddress(family int, addr tcpip.FullAddress) (linux.SockAddr, uint32) {
- switch family {
- case linux.AF_UNIX:
- var out linux.SockAddrUnix
- out.Family = linux.AF_UNIX
- l := len([]byte(addr.Addr))
- for i := 0; i < l; i++ {
- out.Path[i] = int8(addr.Addr[i])
- }
-
- // Linux returns the used length of the address struct (including the
- // null terminator) for filesystem paths. The Family field is 2 bytes.
- // It is sometimes allowed to exclude the null terminator if the
- // address length is the max. Abstract and empty paths always return
- // the full exact length.
- if l == 0 || out.Path[0] == 0 || l == len(out.Path) {
- return &out, uint32(2 + l)
- }
- return &out, uint32(3 + l)
- case linux.AF_INET:
- var out linux.SockAddrInet
- copy(out.Addr[:], addr.Addr)
- out.Family = linux.AF_INET
- out.Port = htons(addr.Port)
- return &out, uint32(binary.Size(out))
- case linux.AF_INET6:
- var out linux.SockAddrInet6
- if len(addr.Addr) == 4 {
- // Copy address is v4-mapped format.
- copy(out.Addr[12:], addr.Addr)
- out.Addr[10] = 0xff
- out.Addr[11] = 0xff
- } else {
- copy(out.Addr[:], addr.Addr)
- }
- out.Family = linux.AF_INET6
- out.Port = htons(addr.Port)
- if isLinkLocal(addr.Addr) {
- out.Scope_id = uint32(addr.NIC)
- }
- return &out, uint32(binary.Size(out))
- default:
- return nil, 0
- }
-}
-
-// GetSockName implements the linux syscall getsockname(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) GetSockName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
- addr, err := s.Endpoint.GetLocalAddress()
- if err != nil {
- return nil, 0, syserr.TranslateNetstackError(err)
- }
-
- a, l := ConvertAddress(s.family, addr)
- return a, l, nil
-}
-
-// GetPeerName implements the linux syscall getpeername(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) GetPeerName(t *kernel.Task) (linux.SockAddr, uint32, *syserr.Error) {
- addr, err := s.Endpoint.GetRemoteAddress()
- if err != nil {
- return nil, 0, syserr.TranslateNetstackError(err)
- }
-
- a, l := ConvertAddress(s.family, addr)
- return a, l, nil
-}
-
-// coalescingRead is the fast path for non-blocking, non-peek, stream-based
-// case. It coalesces as many packets as possible before returning to the
-// caller.
-//
-// Precondition: s.readMu must be locked.
-func (s *SocketOperations) coalescingRead(ctx context.Context, dst usermem.IOSequence, discard bool) (int, *syserr.Error) {
- var err *syserr.Error
- var copied int
-
- // Copy as many views as possible into the user-provided buffer.
- for dst.NumBytes() != 0 {
- err = s.fetchReadView()
- if err != nil {
- break
- }
-
- var n int
- var e error
- if discard {
- n = len(s.readView)
- if int64(n) > dst.NumBytes() {
- n = int(dst.NumBytes())
- }
- } else {
- n, e = dst.CopyOut(ctx, s.readView)
- // Set the control message, even if 0 bytes were read.
- if e == nil {
- s.updateTimestamp()
- }
- }
- copied += n
- s.readView.TrimFront(n)
- dst = dst.DropFirst(n)
- if e != nil {
- err = syserr.FromError(e)
- break
- }
- }
-
- // If we managed to copy something, we must deliver it.
- if copied > 0 {
- s.Endpoint.ModerateRecvBuf(copied)
- return copied, nil
- }
-
- return 0, err
-}
-
-func (s *SocketOperations) fillCmsgInq(cmsg *socket.ControlMessages) {
- if !s.sockOptInq {
- return
- }
- rcvBufUsed, err := s.Endpoint.GetSockOptInt(tcpip.ReceiveQueueSizeOption)
- if err != nil {
- return
- }
- cmsg.IP.HasInq = true
- cmsg.IP.Inq = int32(len(s.readView) + rcvBufUsed)
-}
-
-// nonBlockingRead issues a non-blocking read.
-//
-// TODO(b/78348848): Support timestamps for stream sockets.
-func (s *SocketOperations) nonBlockingRead(ctx context.Context, dst usermem.IOSequence, peek, trunc, senderRequested bool) (int, int, linux.SockAddr, uint32, socket.ControlMessages, *syserr.Error) {
- isPacket := s.isPacketBased()
-
- // Fast path for regular reads from stream (e.g., TCP) endpoints. Note
- // that senderRequested is ignored for stream sockets.
- if !peek && !isPacket {
- // TCP sockets discard the data if MSG_TRUNC is set.
- //
- // This behavior is documented in man 7 tcp:
- // Since version 2.4, Linux supports the use of MSG_TRUNC in the flags
- // argument of recv(2) (and recvmsg(2)). This flag causes the received
- // bytes of data to be discarded, rather than passed back in a
- // caller-supplied buffer.
- s.readMu.Lock()
- n, err := s.coalescingRead(ctx, dst, trunc)
- s.readMu.Unlock()
- cmsg := s.controlMessages()
- s.fillCmsgInq(&cmsg)
- return n, 0, nil, 0, cmsg, err
- }
-
- s.readMu.Lock()
- defer s.readMu.Unlock()
-
- if err := s.fetchReadView(); err != nil {
- return 0, 0, nil, 0, socket.ControlMessages{}, err
- }
-
- if !isPacket && peek && trunc {
- // MSG_TRUNC with MSG_PEEK on a TCP socket returns the
- // amount that could be read.
- rql, err := s.Endpoint.GetSockOptInt(tcpip.ReceiveQueueSizeOption)
- if err != nil {
- return 0, 0, nil, 0, socket.ControlMessages{}, syserr.TranslateNetstackError(err)
- }
- available := len(s.readView) + int(rql)
- bufLen := int(dst.NumBytes())
- if available < bufLen {
- return available, 0, nil, 0, socket.ControlMessages{}, nil
- }
- return bufLen, 0, nil, 0, socket.ControlMessages{}, nil
- }
-
- n, err := dst.CopyOut(ctx, s.readView)
- // Set the control message, even if 0 bytes were read.
- if err == nil {
- s.updateTimestamp()
- }
- var addr linux.SockAddr
- var addrLen uint32
- if isPacket && senderRequested {
- addr, addrLen = ConvertAddress(s.family, s.sender)
- }
-
- if peek {
- if l := len(s.readView); trunc && l > n {
- // isPacket must be true.
- return l, linux.MSG_TRUNC, addr, addrLen, s.controlMessages(), syserr.FromError(err)
- }
-
- if isPacket || err != nil {
- return n, 0, addr, addrLen, s.controlMessages(), syserr.FromError(err)
- }
-
- // We need to peek beyond the first message.
- dst = dst.DropFirst(n)
- num, err := dst.CopyOutFrom(ctx, safemem.FromVecReaderFunc{func(dsts [][]byte) (int64, error) {
- n, _, err := s.Endpoint.Peek(dsts)
- // TODO(b/78348848): Handle peek timestamp.
- if err != nil {
- return int64(n), syserr.TranslateNetstackError(err).ToError()
- }
- return int64(n), nil
- }})
- n += int(num)
- if err == syserror.ErrWouldBlock && n > 0 {
- // We got some data, so no need to return an error.
- err = nil
- }
- return n, 0, nil, 0, s.controlMessages(), syserr.FromError(err)
- }
-
- var msgLen int
- if isPacket {
- msgLen = len(s.readView)
- s.readView = nil
- } else {
- msgLen = int(n)
- s.readView.TrimFront(int(n))
- }
-
- var flags int
- if msgLen > int(n) {
- flags |= linux.MSG_TRUNC
- }
-
- if trunc {
- n = msgLen
- }
-
- cmsg := s.controlMessages()
- s.fillCmsgInq(&cmsg)
- return n, flags, addr, addrLen, cmsg, syserr.FromError(err)
-}
-
-func (s *SocketOperations) controlMessages() socket.ControlMessages {
- return socket.ControlMessages{IP: tcpip.ControlMessages{HasTimestamp: s.readCM.HasTimestamp && s.sockOptTimestamp, Timestamp: s.readCM.Timestamp}}
-}
-
-// updateTimestamp sets the timestamp for SIOCGSTAMP. It should be called after
-// successfully writing packet data out to userspace.
-//
-// Precondition: s.readMu must be locked.
-func (s *SocketOperations) updateTimestamp() {
- // Save the SIOCGSTAMP timestamp only if SO_TIMESTAMP is disabled.
- if !s.sockOptTimestamp {
- s.timestampValid = true
- s.timestampNS = s.readCM.Timestamp
- }
-}
-
-// RecvMsg implements the linux syscall recvmsg(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) RecvMsg(t *kernel.Task, dst usermem.IOSequence, flags int, haveDeadline bool, deadline ktime.Time, senderRequested bool, controlDataLen uint64) (n int, msgFlags int, senderAddr linux.SockAddr, senderAddrLen uint32, controlMessages socket.ControlMessages, err *syserr.Error) {
- trunc := flags&linux.MSG_TRUNC != 0
- peek := flags&linux.MSG_PEEK != 0
- dontWait := flags&linux.MSG_DONTWAIT != 0
- waitAll := flags&linux.MSG_WAITALL != 0
- if senderRequested && !s.isPacketBased() {
- // Stream sockets ignore the sender address.
- senderRequested = false
- }
- n, msgFlags, senderAddr, senderAddrLen, controlMessages, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested)
-
- if s.isPacketBased() && err == syserr.ErrClosedForReceive && flags&linux.MSG_DONTWAIT != 0 {
- // In this situation we should return EAGAIN.
- return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain
- }
-
- if err != nil && (err != syserr.ErrWouldBlock || dontWait) {
- // Read failed and we should not retry.
- return 0, 0, nil, 0, socket.ControlMessages{}, err
- }
-
- if err == nil && (dontWait || !waitAll || s.isPacketBased() || int64(n) >= dst.NumBytes()) {
- // We got all the data we need.
- return
- }
-
- // Don't overwrite any data we received.
- dst = dst.DropFirst(n)
-
- // We'll have to block. Register for notifications and keep trying to
- // send all the data.
- e, ch := waiter.NewChannelEntry(nil)
- s.EventRegister(&e, waiter.EventIn)
- defer s.EventUnregister(&e)
-
- for {
- var rn int
- rn, msgFlags, senderAddr, senderAddrLen, controlMessages, err = s.nonBlockingRead(t, dst, peek, trunc, senderRequested)
- n += rn
- if err != nil && err != syserr.ErrWouldBlock {
- // Always stop on errors other than would block as we generally
- // won't be able to get any more data. Eat the error if we got
- // any data.
- if n > 0 {
- err = nil
- }
- return
- }
- if err == nil && (s.isPacketBased() || !waitAll || int64(rn) >= dst.NumBytes()) {
- // We got all the data we need.
- return
- }
- dst = dst.DropFirst(rn)
-
- if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
- if n > 0 {
- return n, msgFlags, senderAddr, senderAddrLen, controlMessages, nil
- }
- if err == syserror.ETIMEDOUT {
- return 0, 0, nil, 0, socket.ControlMessages{}, syserr.ErrTryAgain
- }
- return 0, 0, nil, 0, socket.ControlMessages{}, syserr.FromError(err)
- }
- }
-}
-
-// SendMsg implements the linux syscall sendmsg(2) for sockets backed by
-// tcpip.Endpoint.
-func (s *SocketOperations) SendMsg(t *kernel.Task, src usermem.IOSequence, to []byte, flags int, haveDeadline bool, deadline ktime.Time, controlMessages socket.ControlMessages) (int, *syserr.Error) {
- // Reject Unix control messages.
- if !controlMessages.Unix.Empty() {
- return 0, syserr.ErrInvalidArgument
- }
-
- var addr *tcpip.FullAddress
- if len(to) > 0 {
- addrBuf, _, err := AddressAndFamily(s.family, to, true /* strict */)
- if err != nil {
- return 0, err
- }
-
- addr = &addrBuf
- }
-
- opts := tcpip.WriteOptions{
- To: addr,
- More: flags&linux.MSG_MORE != 0,
- EndOfRecord: flags&linux.MSG_EOR != 0,
- }
-
- v := &ioSequencePayload{t, src}
- n, resCh, err := s.Endpoint.Write(v, opts)
- if resCh != nil {
- if err := t.Block(resCh); err != nil {
- return 0, syserr.FromError(err)
- }
- n, _, err = s.Endpoint.Write(v, opts)
- }
- dontWait := flags&linux.MSG_DONTWAIT != 0
- if err == nil && (n >= v.src.NumBytes() || dontWait) {
- // Complete write.
- return int(n), nil
- }
- if err != nil && (err != tcpip.ErrWouldBlock || dontWait) {
- return int(n), syserr.TranslateNetstackError(err)
- }
-
- // We'll have to block. Register for notification and keep trying to
- // send all the data.
- e, ch := waiter.NewChannelEntry(nil)
- s.EventRegister(&e, waiter.EventOut)
- defer s.EventUnregister(&e)
-
- v.DropFirst(int(n))
- total := n
- for {
- n, _, err = s.Endpoint.Write(v, opts)
- v.DropFirst(int(n))
- total += n
-
- if err != nil && err != tcpip.ErrWouldBlock && total == 0 {
- return 0, syserr.TranslateNetstackError(err)
- }
-
- if err == nil && v.src.NumBytes() == 0 || err != nil && err != tcpip.ErrWouldBlock {
- return int(total), nil
- }
-
- if err := t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
- if err == syserror.ETIMEDOUT {
- return int(total), syserr.ErrTryAgain
- }
- // handleIOError will consume errors from t.Block if needed.
- return int(total), syserr.FromError(err)
- }
- }
-}
-
-// Ioctl implements fs.FileOperations.Ioctl.
-func (s *SocketOperations) Ioctl(ctx context.Context, _ *fs.File, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- // SIOCGSTAMP is implemented by epsocket rather than all commonEndpoint
- // sockets.
- // TODO(b/78348848): Add a commonEndpoint method to support SIOCGSTAMP.
- switch args[1].Int() {
- case syscall.SIOCGSTAMP:
- s.readMu.Lock()
- defer s.readMu.Unlock()
- if !s.timestampValid {
- return 0, syserror.ENOENT
- }
-
- tv := linux.NsecToTimeval(s.timestampNS)
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &tv, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case linux.TIOCINQ:
- v, terr := s.Endpoint.GetSockOptInt(tcpip.ReceiveQueueSizeOption)
- if terr != nil {
- return 0, syserr.TranslateNetstackError(terr).ToError()
- }
-
- // Add bytes removed from the endpoint but not yet sent to the caller.
- v += len(s.readView)
-
- if v > math.MaxInt32 {
- v = math.MaxInt32
- }
-
- // Copy result to user-space.
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
- }
-
- return Ioctl(ctx, s.Endpoint, io, args)
-}
-
-// Ioctl performs a socket ioctl.
-func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
- switch arg := int(args[1].Int()); arg {
- case syscall.SIOCGIFFLAGS,
- syscall.SIOCGIFADDR,
- syscall.SIOCGIFBRDADDR,
- syscall.SIOCGIFDSTADDR,
- syscall.SIOCGIFHWADDR,
- syscall.SIOCGIFINDEX,
- syscall.SIOCGIFMAP,
- syscall.SIOCGIFMETRIC,
- syscall.SIOCGIFMTU,
- syscall.SIOCGIFNAME,
- syscall.SIOCGIFNETMASK,
- syscall.SIOCGIFTXQLEN:
-
- var ifr linux.IFReq
- if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifr, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return 0, err
- }
- if err := interfaceIoctl(ctx, io, arg, &ifr); err != nil {
- return 0, err.ToError()
- }
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), &ifr, usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case syscall.SIOCGIFCONF:
- // Return a list of interface addresses or the buffer size
- // necessary to hold the list.
- var ifc linux.IFConf
- if _, err := usermem.CopyObjectIn(ctx, io, args[2].Pointer(), &ifc, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return 0, err
- }
-
- if err := ifconfIoctl(ctx, io, &ifc); err != nil {
- return 0, err
- }
-
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), ifc, usermem.IOOpts{
- AddressSpaceActive: true,
- })
-
- return 0, err
-
- case linux.TIOCINQ:
- v, terr := ep.GetSockOptInt(tcpip.ReceiveQueueSizeOption)
- if terr != nil {
- return 0, syserr.TranslateNetstackError(terr).ToError()
- }
-
- if v > math.MaxInt32 {
- v = math.MaxInt32
- }
- // Copy result to user-space.
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case linux.TIOCOUTQ:
- v, terr := ep.GetSockOptInt(tcpip.SendQueueSizeOption)
- if terr != nil {
- return 0, syserr.TranslateNetstackError(terr).ToError()
- }
-
- if v > math.MaxInt32 {
- v = math.MaxInt32
- }
-
- // Copy result to user-space.
- _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{
- AddressSpaceActive: true,
- })
- return 0, err
-
- case linux.SIOCGIFMEM, linux.SIOCGIFPFLAGS, linux.SIOCGMIIPHY, linux.SIOCGMIIREG:
- unimpl.EmitUnimplementedEvent(ctx)
- }
-
- return 0, syserror.ENOTTY
-}
-
-// interfaceIoctl implements interface requests.
-func interfaceIoctl(ctx context.Context, io usermem.IO, arg int, ifr *linux.IFReq) *syserr.Error {
- var (
- iface inet.Interface
- index int32
- found bool
- )
-
- // Find the relevant device.
- stack := inet.StackFromContext(ctx)
- if stack == nil {
- return syserr.ErrNoDevice
- }
-
- // SIOCGIFNAME uses ifr.ifr_ifindex rather than ifr.ifr_name to
- // identify a device.
- if arg == syscall.SIOCGIFNAME {
- // Gets the name of the interface given the interface index
- // stored in ifr_ifindex.
- index = int32(usermem.ByteOrder.Uint32(ifr.Data[:4]))
- if iface, ok := stack.Interfaces()[index]; ok {
- ifr.SetName(iface.Name)
- return nil
- }
- return syserr.ErrNoDevice
- }
-
- // Find the relevant device.
- for index, iface = range stack.Interfaces() {
- if iface.Name == ifr.Name() {
- found = true
- break
- }
- }
- if !found {
- return syserr.ErrNoDevice
- }
-
- switch arg {
- case syscall.SIOCGIFINDEX:
- // Copy out the index to the data.
- usermem.ByteOrder.PutUint32(ifr.Data[:], uint32(index))
-
- case syscall.SIOCGIFHWADDR:
- // Copy the hardware address out.
- ifr.Data[0] = 6 // IEEE802.2 arp type.
- ifr.Data[1] = 0
- n := copy(ifr.Data[2:], iface.Addr)
- for i := 2 + n; i < len(ifr.Data); i++ {
- ifr.Data[i] = 0 // Clear padding.
- }
- usermem.ByteOrder.PutUint16(ifr.Data[:2], uint16(n))
-
- case syscall.SIOCGIFFLAGS:
- f, err := interfaceStatusFlags(stack, iface.Name)
- if err != nil {
- return err
- }
- // Drop the flags that don't fit in the size that we need to return. This
- // matches Linux behavior.
- usermem.ByteOrder.PutUint16(ifr.Data[:2], uint16(f))
-
- case syscall.SIOCGIFADDR:
- // Copy the IPv4 address out.
- for _, addr := range stack.InterfaceAddrs()[index] {
- // This ioctl is only compatible with AF_INET addresses.
- if addr.Family != linux.AF_INET {
- continue
- }
- copy(ifr.Data[4:8], addr.Addr)
- break
- }
-
- case syscall.SIOCGIFMETRIC:
- // Gets the metric of the device. As per netdevice(7), this
- // always just sets ifr_metric to 0.
- usermem.ByteOrder.PutUint32(ifr.Data[:4], 0)
-
- case syscall.SIOCGIFMTU:
- // Gets the MTU of the device.
- usermem.ByteOrder.PutUint32(ifr.Data[:4], iface.MTU)
-
- case syscall.SIOCGIFMAP:
- // Gets the hardware parameters of the device.
- // TODO(gvisor.dev/issue/505): Implement.
-
- case syscall.SIOCGIFTXQLEN:
- // Gets the transmit queue length of the device.
- // TODO(gvisor.dev/issue/505): Implement.
-
- case syscall.SIOCGIFDSTADDR:
- // Gets the destination address of a point-to-point device.
- // TODO(gvisor.dev/issue/505): Implement.
-
- case syscall.SIOCGIFBRDADDR:
- // Gets the broadcast address of a device.
- // TODO(gvisor.dev/issue/505): Implement.
-
- case syscall.SIOCGIFNETMASK:
- // Gets the network mask of a device.
- for _, addr := range stack.InterfaceAddrs()[index] {
- // This ioctl is only compatible with AF_INET addresses.
- if addr.Family != linux.AF_INET {
- continue
- }
- // Populate ifr.ifr_netmask (type sockaddr).
- usermem.ByteOrder.PutUint16(ifr.Data[0:2], uint16(linux.AF_INET))
- usermem.ByteOrder.PutUint16(ifr.Data[2:4], 0)
- var mask uint32 = 0xffffffff << (32 - addr.PrefixLen)
- // Netmask is expected to be returned as a big endian
- // value.
- binary.BigEndian.PutUint32(ifr.Data[4:8], mask)
- break
- }
-
- default:
- // Not a valid call.
- return syserr.ErrInvalidArgument
- }
-
- return nil
-}
-
-// ifconfIoctl populates a struct ifconf for the SIOCGIFCONF ioctl.
-func ifconfIoctl(ctx context.Context, io usermem.IO, ifc *linux.IFConf) error {
- // If Ptr is NULL, return the necessary buffer size via Len.
- // Otherwise, write up to Len bytes starting at Ptr containing ifreq
- // structs.
- stack := inet.StackFromContext(ctx)
- if stack == nil {
- return syserr.ErrNoDevice.ToError()
- }
-
- if ifc.Ptr == 0 {
- ifc.Len = int32(len(stack.Interfaces())) * int32(linux.SizeOfIFReq)
- return nil
- }
-
- max := ifc.Len
- ifc.Len = 0
- for key, ifaceAddrs := range stack.InterfaceAddrs() {
- iface := stack.Interfaces()[key]
- for _, ifaceAddr := range ifaceAddrs {
- // Don't write past the end of the buffer.
- if ifc.Len+int32(linux.SizeOfIFReq) > max {
- break
- }
- if ifaceAddr.Family != linux.AF_INET {
- continue
- }
-
- // Populate ifr.ifr_addr.
- ifr := linux.IFReq{}
- ifr.SetName(iface.Name)
- usermem.ByteOrder.PutUint16(ifr.Data[0:2], uint16(ifaceAddr.Family))
- usermem.ByteOrder.PutUint16(ifr.Data[2:4], 0)
- copy(ifr.Data[4:8], ifaceAddr.Addr[:4])
-
- // Copy the ifr to userspace.
- dst := uintptr(ifc.Ptr) + uintptr(ifc.Len)
- ifc.Len += int32(linux.SizeOfIFReq)
- if _, err := usermem.CopyObjectOut(ctx, io, usermem.Addr(dst), ifr, usermem.IOOpts{
- AddressSpaceActive: true,
- }); err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-// interfaceStatusFlags returns status flags for an interface in the stack.
-// Flag values and meanings are described in greater detail in netdevice(7) in
-// the SIOCGIFFLAGS section.
-func interfaceStatusFlags(stack inet.Stack, name string) (uint32, *syserr.Error) {
- // epsocket should only ever be passed an epsocket.Stack.
- epstack, ok := stack.(*Stack)
- if !ok {
- return 0, errStackType
- }
-
- // Find the NIC corresponding to this interface.
- for _, info := range epstack.Stack.NICInfo() {
- if info.Name == name {
- return nicStateFlagsToLinux(info.Flags), nil
- }
- }
- return 0, syserr.ErrNoDevice
-}
-
-func nicStateFlagsToLinux(f stack.NICStateFlags) uint32 {
- var rv uint32
- if f.Up {
- rv |= linux.IFF_UP | linux.IFF_LOWER_UP
- }
- if f.Running {
- rv |= linux.IFF_RUNNING
- }
- if f.Promiscuous {
- rv |= linux.IFF_PROMISC
- }
- if f.Loopback {
- rv |= linux.IFF_LOOPBACK
- }
- return rv
-}
-
-// State implements socket.Socket.State. State translates the internal state
-// returned by netstack to values defined by Linux.
-func (s *SocketOperations) State() uint32 {
- if s.family != linux.AF_INET && s.family != linux.AF_INET6 {
- // States not implemented for this socket's family.
- return 0
- }
-
- switch {
- case s.skType == linux.SOCK_STREAM && s.protocol == 0 || s.protocol == syscall.IPPROTO_TCP:
- // TCP socket.
- switch tcp.EndpointState(s.Endpoint.State()) {
- case tcp.StateEstablished:
- return linux.TCP_ESTABLISHED
- case tcp.StateSynSent:
- return linux.TCP_SYN_SENT
- case tcp.StateSynRecv:
- return linux.TCP_SYN_RECV
- case tcp.StateFinWait1:
- return linux.TCP_FIN_WAIT1
- case tcp.StateFinWait2:
- return linux.TCP_FIN_WAIT2
- case tcp.StateTimeWait:
- return linux.TCP_TIME_WAIT
- case tcp.StateClose, tcp.StateInitial, tcp.StateBound, tcp.StateConnecting, tcp.StateError:
- return linux.TCP_CLOSE
- case tcp.StateCloseWait:
- return linux.TCP_CLOSE_WAIT
- case tcp.StateLastAck:
- return linux.TCP_LAST_ACK
- case tcp.StateListen:
- return linux.TCP_LISTEN
- case tcp.StateClosing:
- return linux.TCP_CLOSING
- default:
- // Internal or unknown state.
- return 0
- }
- case s.skType == linux.SOCK_DGRAM && s.protocol == 0 || s.protocol == syscall.IPPROTO_UDP:
- // UDP socket.
- switch udp.EndpointState(s.Endpoint.State()) {
- case udp.StateInitial, udp.StateBound, udp.StateClosed:
- return linux.TCP_CLOSE
- case udp.StateConnected:
- return linux.TCP_ESTABLISHED
- default:
- return 0
- }
- case s.skType == linux.SOCK_DGRAM && s.protocol == syscall.IPPROTO_ICMP || s.protocol == syscall.IPPROTO_ICMPV6:
- // TODO(b/112063468): Export states for ICMP sockets.
- case s.skType == linux.SOCK_RAW:
- // TODO(b/112063468): Export states for raw sockets.
- default:
- // Unknown transport protocol, how did we make this socket?
- log.Warningf("Unknown transport protocol for an existing socket: family=%v, type=%v, protocol=%v, internal type %v", s.family, s.skType, s.protocol, reflect.TypeOf(s.Endpoint).Elem())
- return 0
- }
-
- return 0
-}
-
-// Type implements socket.Socket.Type.
-func (s *SocketOperations) Type() (family int, skType linux.SockType, protocol int) {
- return s.family, s.skType, s.protocol
-}