diff options
Diffstat (limited to 'pkg/tcpip/link/fdbased/endpoint.go')
-rw-r--r-- | pkg/tcpip/link/fdbased/endpoint.go | 239 |
1 files changed, 30 insertions, 209 deletions
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 4da376774..1f889c2a0 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -34,18 +34,11 @@ import ( "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" ) -const ( - // MaxMsgsPerRecv is the maximum number of packets we want to retrieve - // in a single RecvMMsg call. - MaxMsgsPerRecv = 8 -) - -// BufConfig defines the shape of the vectorised view used to read packets from the NIC. -var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768} - // linkDispatcher reads packets from the link FD and dispatches them to the // NetworkDispatcher. -type linkDispatcher func() (bool, *tcpip.Error) +type linkDispatcher interface { + dispatch() (bool, *tcpip.Error) +} // PacketDispatchMode are the various supported methods of receiving and // dispatching packets from the underlying FD. @@ -92,11 +85,6 @@ type endpoint struct { // its end of the communication pipe. closed func(*tcpip.Error) - views [][]buffer.View - iovecs [][]syscall.Iovec - // msgHdrs is only used by the RecvMMsg dispatcher. - msgHdrs []rawfile.MMsgHdr - inboundDispatcher linkDispatcher dispatcher stack.NetworkDispatcher @@ -104,14 +92,6 @@ type endpoint struct { // endpoint. packetDispatchMode PacketDispatchMode - // ringBuffer is only used when PacketMMap dispatcher is used and points - // to the start of the mmapped PACKET_RX_RING buffer. - ringBuffer []byte - - // ringOffset is the current offset into the ring buffer where the next - // inbound packet will be placed by the kernel. - ringOffset int - // gsoMaxSize is the maximum GSO packet size. It is zero if GSO is // disabled. gsoMaxSize uint32 @@ -174,11 +154,7 @@ func New(opts *Options) (tcpip.LinkEndpointID, error) { packetDispatchMode: opts.PacketDispatchMode, } - // For non-socket FDs we read one packet a time (e.g. TAP devices). - msgsPerRecv := 1 - e.inboundDispatcher = e.dispatch - - isSocket, err := isSocketFD(opts.FD) + isSocket, err := isSocketFD(e.fd) if err != nil { return 0, err } @@ -187,44 +163,41 @@ func New(opts *Options) (tcpip.LinkEndpointID, error) { e.caps |= stack.CapabilityGSO e.gsoMaxSize = opts.GSOMaxSize } + } + e.inboundDispatcher, err = createInboundDispatcher(e, isSocket) + if err != nil { + return 0, fmt.Errorf("createInboundDispatcher(...) = %v", err) + } + + return stack.RegisterLinkEndpoint(e), nil +} +func createInboundDispatcher(e *endpoint, isSocket bool) (linkDispatcher, error) { + // By default use the readv() dispatcher as it works with all kinds of + // FDs (tap/tun/unix domain sockets and af_packet). + inboundDispatcher, err := newReadVDispatcher(e.fd, e) + if err != nil { + return nil, fmt.Errorf("newReadVDispatcher(%d, %+v) = %v", e.fd, e, err) + } + + if isSocket { switch e.packetDispatchMode { case PacketMMap: - if err := e.setupPacketRXRing(); err != nil { - return 0, fmt.Errorf("e.setupPacketRXRing failed: %v", err) + inboundDispatcher, err = newPacketMMapDispatcher(e.fd, e) + if err != nil { + return nil, fmt.Errorf("newPacketMMapDispatcher(%d, %+v) = %v", e.fd, e, err) } - e.inboundDispatcher = e.packetMMapDispatch - return stack.RegisterLinkEndpoint(e), nil - case RecvMMsg: // If the provided FD is a socket then we optimize // packet reads by using recvmmsg() instead of read() to // read packets in a batch. - e.inboundDispatcher = e.recvMMsgDispatch - msgsPerRecv = MaxMsgsPerRecv + inboundDispatcher, err = newRecvMMsgDispatcher(e.fd, e) + if err != nil { + return nil, fmt.Errorf("newRecvMMsgDispatcher(%d, %+v) = %v", e.fd, e, err) + } } } - - e.views = make([][]buffer.View, msgsPerRecv) - for i := range e.views { - e.views[i] = make([]buffer.View, len(BufConfig)) - } - e.iovecs = make([][]syscall.Iovec, msgsPerRecv) - iovLen := len(BufConfig) - if e.Capabilities()&stack.CapabilityGSO != 0 { - // virtioNetHdr is prepended before each packet. - iovLen++ - } - for i := range e.iovecs { - e.iovecs[i] = make([]syscall.Iovec, iovLen) - } - e.msgHdrs = make([]rawfile.MMsgHdr, msgsPerRecv) - for i := range e.msgHdrs { - e.msgHdrs[i].Msg.Iov = &e.iovecs[i][0] - e.msgHdrs[i].Msg.Iovlen = uint64(iovLen) - } - - return stack.RegisterLinkEndpoint(e), nil + return inboundDispatcher, nil } func isSocketFD(fd int) (bool, error) { @@ -347,163 +320,11 @@ func (e *endpoint) WriteRawPacket(dest tcpip.Address, packet []byte) *tcpip.Erro return rawfile.NonBlockingWrite(e.fd, packet) } -func (e *endpoint) capViews(k, n int, buffers []int) int { - c := 0 - for i, s := range buffers { - c += s - if c >= n { - e.views[k][i].CapLength(s - (c - n)) - return i + 1 - } - } - return len(buffers) -} - -func (e *endpoint) allocateViews(bufConfig []int) { - for k := 0; k < len(e.views); k++ { - var vnetHdr [virtioNetHdrSize]byte - vnetHdrOff := 0 - if e.Capabilities()&stack.CapabilityGSO != 0 { - // The kernel adds virtioNetHdr before each packet, but - // we don't use it, so so we allocate a buffer for it, - // add it in iovecs but don't add it in a view. - e.iovecs[k][0] = syscall.Iovec{ - Base: &vnetHdr[0], - Len: uint64(virtioNetHdrSize), - } - vnetHdrOff++ - } - for i := 0; i < len(bufConfig); i++ { - if e.views[k][i] != nil { - break - } - b := buffer.NewView(bufConfig[i]) - e.views[k][i] = b - e.iovecs[k][i+vnetHdrOff] = syscall.Iovec{ - Base: &b[0], - Len: uint64(len(b)), - } - } - } -} - -// dispatch reads one packet from the file descriptor and dispatches it. -func (e *endpoint) dispatch() (bool, *tcpip.Error) { - e.allocateViews(BufConfig) - - n, err := rawfile.BlockingReadv(e.fd, e.iovecs[0]) - if err != nil { - return false, err - } - if e.Capabilities()&stack.CapabilityGSO != 0 { - // Skip virtioNetHdr which is added before each packet, it - // isn't used and it isn't in a view. - n -= virtioNetHdrSize - } - if n <= e.hdrSize { - return false, nil - } - - var ( - p tcpip.NetworkProtocolNumber - remote, local tcpip.LinkAddress - ) - if e.hdrSize > 0 { - eth := header.Ethernet(e.views[0][0]) - p = eth.Type() - remote = eth.SourceAddress() - local = eth.DestinationAddress() - } else { - // We don't get any indication of what the packet is, so try to guess - // if it's an IPv4 or IPv6 packet. - switch header.IPVersion(e.views[0][0]) { - case header.IPv4Version: - p = header.IPv4ProtocolNumber - case header.IPv6Version: - p = header.IPv6ProtocolNumber - default: - return true, nil - } - } - - used := e.capViews(0, n, BufConfig) - vv := buffer.NewVectorisedView(n, e.views[0][:used]) - vv.TrimFront(e.hdrSize) - - e.dispatcher.DeliverNetworkPacket(e, remote, local, p, vv) - - // Prepare e.views for another packet: release used views. - for i := 0; i < used; i++ { - e.views[0][i] = nil - } - - return true, nil -} - -// recvMMsgDispatch reads more than one packet at a time from the file -// descriptor and dispatches it. -func (e *endpoint) recvMMsgDispatch() (bool, *tcpip.Error) { - e.allocateViews(BufConfig) - - nMsgs, err := rawfile.BlockingRecvMMsg(e.fd, e.msgHdrs) - if err != nil { - return false, err - } - // Process each of received packets. - for k := 0; k < nMsgs; k++ { - n := int(e.msgHdrs[k].Len) - if e.Capabilities()&stack.CapabilityGSO != 0 { - n -= virtioNetHdrSize - } - if n <= e.hdrSize { - return false, nil - } - - var ( - p tcpip.NetworkProtocolNumber - remote, local tcpip.LinkAddress - ) - if e.hdrSize > 0 { - eth := header.Ethernet(e.views[k][0]) - p = eth.Type() - remote = eth.SourceAddress() - local = eth.DestinationAddress() - } else { - // We don't get any indication of what the packet is, so try to guess - // if it's an IPv4 or IPv6 packet. - switch header.IPVersion(e.views[k][0]) { - case header.IPv4Version: - p = header.IPv4ProtocolNumber - case header.IPv6Version: - p = header.IPv6ProtocolNumber - default: - return true, nil - } - } - - used := e.capViews(k, int(n), BufConfig) - vv := buffer.NewVectorisedView(int(n), e.views[k][:used]) - vv.TrimFront(e.hdrSize) - e.dispatcher.DeliverNetworkPacket(e, remote, local, p, vv) - - // Prepare e.views for another packet: release used views. - for i := 0; i < used; i++ { - e.views[k][i] = nil - } - } - - for k := 0; k < nMsgs; k++ { - e.msgHdrs[k].Len = 0 - } - - return true, nil -} - // dispatchLoop reads packets from the file descriptor in a loop and dispatches // them to the network stack. func (e *endpoint) dispatchLoop() *tcpip.Error { for { - cont, err := e.inboundDispatcher() + cont, err := e.inboundDispatcher.dispatch() if err != nil || !cont { if e.closed != nil { e.closed(err) |