diff options
author | Bhasker Hariharan <bhaskerh@google.com> | 2019-04-23 17:46:13 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2019-04-23 17:47:56 -0700 |
commit | 56cadcac4e275e1f49dccf7735a79af469991860 (patch) | |
tree | 49aeac225ba697a929e656e3f3ba865d0be84501 /pkg | |
parent | 7c9c5fd36dbff21cf255aaf08278729b02613222 (diff) |
Fixes to PacketMMap dispatcher.
This CL fixes the following bugs:
- Uses atomic to set/read status instead of binary.LittleEndian.PutUint32 etc
which are not atomic.
- Increments ringOffsets for frames that are truncated (i.e status is
tpStatusCopy)
- Does not ignore frames with tpStatusLost bit set as they are valid frames and
only indicate that there some frames were lost before this one and metrics can
be retrieved with a getsockopt call.
- Adds checks to make sure blockSize is a multiple of page size. This is
required as the kernel allocates in pages per block and rejects sizes that are
not page aligned with an EINVAL.
Updates #210
PiperOrigin-RevId: 244959464
Change-Id: I5d61337b7e4c0f8a3063dcfc07791d4c4521ba1f
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go | 38 |
1 files changed, 30 insertions, 8 deletions
diff --git a/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go b/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go index e49cf9f61..e5ac7996d 100644 --- a/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go +++ b/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go @@ -19,6 +19,7 @@ package fdbased import ( "encoding/binary" "fmt" + "sync/atomic" "syscall" "unsafe" @@ -42,7 +43,12 @@ const ( // // Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB // -// NOTE: Frames need to be aligned at 16 byte boundaries. +// NOTE: +// Frames need to be aligned at 16 byte boundaries. +// BlockSize needs to be page aligned. +// +// For details see PACKET_MMAP setting constraints in +// https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt const ( tpFrameSize = 65536 + 128 tpBlockSize = tpFrameSize * 32 @@ -81,12 +87,22 @@ const ( tpUSecOffset = 24 ) +// tpStatus returns the frame status field. +// The status is concurrently updated by the kernel as a result we must +// use atomic operations to prevent races. func (t tPacketHdr) tpStatus() uint32 { - return binary.LittleEndian.Uint32(t[tpStatusOffset:]) + hdr := unsafe.Pointer(&t[0]) + statusPtr := unsafe.Pointer(uintptr(hdr) + uintptr(tpStatusOffset)) + return atomic.LoadUint32((*uint32)(statusPtr)) } +// setTPStatus set's the frame status to the provided status. +// The status is concurrently updated by the kernel as a result we must +// use atomic operations to prevent races. func (t tPacketHdr) setTPStatus(status uint32) { - binary.LittleEndian.PutUint32(t[tpStatusOffset:], status) + hdr := unsafe.Pointer(&t[0]) + statusPtr := unsafe.Pointer(uintptr(hdr) + uintptr(tpStatusOffset)) + atomic.StoreUint32((*uint32)(statusPtr), status) } func (t tPacketHdr) tpLen() uint32 { @@ -118,6 +134,10 @@ func (t tPacketHdr) Payload() []byte { } func (e *endpoint) setupPacketRXRing() error { + pageSize := unix.Getpagesize() + if tpBlockSize%pageSize != 0 { + return fmt.Errorf("tpBlockSize: %d is not page aligned, pagesize: %d", tpBlockSize, pageSize) + } tReq := tPacketReq{ tpBlockSize: uint32(tpBlockSize), tpBlockNR: uint32(tpBlockNR), @@ -139,8 +159,8 @@ func (e *endpoint) setupPacketRXRing() error { } func (e *endpoint) readMMappedPacket() ([]byte, *tcpip.Error) { - hdr := (tPacketHdr)(e.ringBuffer[0+e.ringOffset*tpFrameSize:]) - for (hdr.tpStatus() & tpStatusUser) == 0 { + hdr := (tPacketHdr)(e.ringBuffer[e.ringOffset*tpFrameSize:]) + for hdr.tpStatus()&tpStatusUser == 0 { event := rawfile.PollEvent{ FD: int32(e.fd), Events: unix.POLLIN | unix.POLLERR, @@ -153,9 +173,11 @@ func (e *endpoint) readMMappedPacket() ([]byte, *tcpip.Error) { return nil, rawfile.TranslateErrno(errno) } if hdr.tpStatus()&tpStatusCopy != 0 { - continue - } - if hdr.tpStatus()&tpStatusLosing != 0 { + // This frame is truncated so skip it after flipping the + // buffer to the kernel. + hdr.setTPStatus(tpStatusKernel) + e.ringOffset = (e.ringOffset + 1) % tpFrameNR + hdr = (tPacketHdr)(e.ringBuffer[e.ringOffset*tpFrameSize:]) continue } } |