summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go
diff options
context:
space:
mode:
authorBhasker Hariharan <bhaskerh@google.com>2019-05-21 15:23:12 -0700
committerShentubot <shentubot@google.com>2019-05-21 15:24:25 -0700
commit2ac0aeeb42ef517743a18224f9f6945c1c77d732 (patch)
tree3cc0942ae3948485013bc30dbb1404305b501209 /pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go
parent9cdae51feca5cee9faa198161b92a0aeece52d6c (diff)
Refactor fdbased endpoint dispatcher code.
This is in preparation to support an fdbased endpoint that can read/dispatch packets from multiple underlying fds. Updates #231 PiperOrigin-RevId: 249337074 Change-Id: Id7d375186cffcf55ae5e38986e7d605a96916d35
Diffstat (limited to 'pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go')
-rw-r--r--pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go174
1 files changed, 12 insertions, 162 deletions
diff --git a/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go b/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go
index 135da2498..47cb1d1cc 100644
--- a/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go
+++ b/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go
@@ -17,76 +17,17 @@
package fdbased
import (
- "encoding/binary"
"fmt"
"sync/atomic"
"syscall"
"unsafe"
"golang.org/x/sys/unix"
- "gvisor.googlesource.com/gvisor/pkg/tcpip"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
- "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
)
-const (
- tPacketAlignment = uintptr(16)
- tpStatusKernel = 0
- tpStatusUser = 1
- tpStatusCopy = 2
- tpStatusLosing = 4
-)
-
-// We overallocate the frame size to accommodate space for the
-// TPacketHdr+RawSockAddrLinkLayer+MAC header and any padding.
-//
-// Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB
-//
-// NOTE:
-// Frames need to be aligned at 16 byte boundaries.
-// BlockSize needs to be page aligned.
-//
-// For details see PACKET_MMAP setting constraints in
-// https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt
-const (
- tpFrameSize = 65536 + 128
- tpBlockSize = tpFrameSize * 32
- tpBlockNR = 1
- tpFrameNR = (tpBlockSize * tpBlockNR) / tpFrameSize
-)
-
-// tPacketAlign aligns the pointer v at a tPacketAlignment boundary. Direct
-// translation of the TPACKET_ALIGN macro in <linux/if_packet.h>.
-func tPacketAlign(v uintptr) uintptr {
- return (v + tPacketAlignment - 1) & uintptr(^(tPacketAlignment - 1))
-}
-
// tPacketHdrlen is the TPACKET_HDRLEN variable defined in <linux/if_packet.h>.
var tPacketHdrlen = tPacketAlign(unsafe.Sizeof(tPacketHdr{}) + unsafe.Sizeof(syscall.RawSockaddrLinklayer{}))
-// tPacketReq is the tpacket_req structure as described in
-// https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt
-type tPacketReq struct {
- tpBlockSize uint32
- tpBlockNR uint32
- tpFrameSize uint32
- tpFrameNR uint32
-}
-
-// tPacketHdr is tpacket_hdr structure as described in <linux/if_packet.h>
-type tPacketHdr []byte
-
-const (
- tpStatusOffset = 0
- tpLenOffset = 8
- tpSnapLenOffset = 12
- tpMacOffset = 16
- tpNetOffset = 18
- tpSecOffset = 20
- tpUSecOffset = 24
-)
-
// tpStatus returns the frame status field.
// The status is concurrently updated by the kernel as a result we must
// use atomic operations to prevent races.
@@ -105,38 +46,14 @@ func (t tPacketHdr) setTPStatus(status uint32) {
atomic.StoreUint32((*uint32)(statusPtr), status)
}
-func (t tPacketHdr) tpLen() uint32 {
- return binary.LittleEndian.Uint32(t[tpLenOffset:])
-}
-
-func (t tPacketHdr) tpSnapLen() uint32 {
- return binary.LittleEndian.Uint32(t[tpSnapLenOffset:])
-}
-
-func (t tPacketHdr) tpMac() uint16 {
- return binary.LittleEndian.Uint16(t[tpMacOffset:])
-}
-
-func (t tPacketHdr) tpNet() uint16 {
- return binary.LittleEndian.Uint16(t[tpNetOffset:])
-}
-
-func (t tPacketHdr) tpSec() uint32 {
- return binary.LittleEndian.Uint32(t[tpSecOffset:])
-}
-
-func (t tPacketHdr) tpUSec() uint32 {
- return binary.LittleEndian.Uint32(t[tpUSecOffset:])
-}
-
-func (t tPacketHdr) Payload() []byte {
- return t[uint32(t.tpMac()) : uint32(t.tpMac())+t.tpSnapLen()]
-}
-
-func (e *endpoint) setupPacketRXRing() error {
+func newPacketMMapDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
+ d := &packetMMapDispatcher{
+ fd: fd,
+ e: e,
+ }
pageSize := unix.Getpagesize()
if tpBlockSize%pageSize != 0 {
- return fmt.Errorf("tpBlockSize: %d is not page aligned, pagesize: %d", tpBlockSize, pageSize)
+ return nil, fmt.Errorf("tpBlockSize: %d is not page aligned, pagesize: %d", tpBlockSize, pageSize)
}
tReq := tPacketReq{
tpBlockSize: uint32(tpBlockSize),
@@ -145,84 +62,17 @@ func (e *endpoint) setupPacketRXRing() error {
tpFrameNR: uint32(tpFrameNR),
}
// Setup PACKET_RX_RING.
- if err := setsockopt(e.fd, syscall.SOL_PACKET, syscall.PACKET_RX_RING, unsafe.Pointer(&tReq), unsafe.Sizeof(tReq)); err != nil {
- return fmt.Errorf("failed to enable PACKET_RX_RING: %v", err)
+ if err := setsockopt(d.fd, syscall.SOL_PACKET, syscall.PACKET_RX_RING, unsafe.Pointer(&tReq), unsafe.Sizeof(tReq)); err != nil {
+ return nil, fmt.Errorf("failed to enable PACKET_RX_RING: %v", err)
}
// Let's mmap the blocks.
sz := tpBlockSize * tpBlockNR
- buf, err := syscall.Mmap(e.fd, 0, sz, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED)
- if err != nil {
- return fmt.Errorf("syscall.Mmap(...,0, %v, ...) failed = %v", sz, err)
- }
- e.ringBuffer = buf
- return nil
-}
-
-func (e *endpoint) readMMappedPacket() ([]byte, *tcpip.Error) {
- hdr := (tPacketHdr)(e.ringBuffer[e.ringOffset*tpFrameSize:])
- for hdr.tpStatus()&tpStatusUser == 0 {
- event := rawfile.PollEvent{
- FD: int32(e.fd),
- Events: unix.POLLIN | unix.POLLERR,
- }
- _, errno := rawfile.BlockingPoll(&event, 1, -1)
- if errno != 0 {
- if errno == syscall.EINTR {
- continue
- }
- return nil, rawfile.TranslateErrno(errno)
- }
- if hdr.tpStatus()&tpStatusCopy != 0 {
- // This frame is truncated so skip it after flipping the
- // buffer to the kernel.
- hdr.setTPStatus(tpStatusKernel)
- e.ringOffset = (e.ringOffset + 1) % tpFrameNR
- hdr = (tPacketHdr)(e.ringBuffer[e.ringOffset*tpFrameSize:])
- continue
- }
- }
-
- // Copy out the packet from the mmapped frame to a locally owned buffer.
- pkt := make([]byte, hdr.tpSnapLen())
- copy(pkt, hdr.Payload())
- // Release packet to kernel.
- hdr.setTPStatus(tpStatusKernel)
- e.ringOffset = (e.ringOffset + 1) % tpFrameNR
- return pkt, nil
-}
-
-// packetMMapDispatch reads packets from an mmaped ring buffer and dispatches
-// them to the network stack.
-func (e *endpoint) packetMMapDispatch() (bool, *tcpip.Error) {
- pkt, err := e.readMMappedPacket()
+ buf, err := syscall.Mmap(d.fd, 0, sz, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED)
if err != nil {
- return false, err
+ return nil, fmt.Errorf("syscall.Mmap(...,0, %v, ...) failed = %v", sz, err)
}
- var (
- p tcpip.NetworkProtocolNumber
- remote, local tcpip.LinkAddress
- )
- if e.hdrSize > 0 {
- eth := header.Ethernet(pkt)
- p = eth.Type()
- remote = eth.SourceAddress()
- local = eth.DestinationAddress()
- } else {
- // We don't get any indication of what the packet is, so try to guess
- // if it's an IPv4 or IPv6 packet.
- switch header.IPVersion(pkt) {
- case header.IPv4Version:
- p = header.IPv4ProtocolNumber
- case header.IPv6Version:
- p = header.IPv6ProtocolNumber
- default:
- return true, nil
- }
- }
-
- pkt = pkt[e.hdrSize:]
- e.dispatcher.DeliverNetworkPacket(e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)}))
- return true, nil
+ d.ringBuffer = buf
+ return d, nil
}
func setsockopt(fd, level, name int, val unsafe.Pointer, vallen uintptr) error {