summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorBhasker Hariharan <bhaskerh@google.com>2019-04-23 17:46:13 -0700
committerShentubot <shentubot@google.com>2019-04-23 17:47:56 -0700
commit56cadcac4e275e1f49dccf7735a79af469991860 (patch)
tree49aeac225ba697a929e656e3f3ba865d0be84501
parent7c9c5fd36dbff21cf255aaf08278729b02613222 (diff)
Fixes to PacketMMap dispatcher.
This CL fixes the following bugs: - Uses atomic to set/read status instead of binary.LittleEndian.PutUint32 etc which are not atomic. - Increments ringOffsets for frames that are truncated (i.e status is tpStatusCopy) - Does not ignore frames with tpStatusLost bit set as they are valid frames and only indicate that there some frames were lost before this one and metrics can be retrieved with a getsockopt call. - Adds checks to make sure blockSize is a multiple of page size. This is required as the kernel allocates in pages per block and rejects sizes that are not page aligned with an EINVAL. Updates #210 PiperOrigin-RevId: 244959464 Change-Id: I5d61337b7e4c0f8a3063dcfc07791d4c4521ba1f
-rw-r--r--pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go38
1 files changed, 30 insertions, 8 deletions
diff --git a/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go b/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go
index e49cf9f61..e5ac7996d 100644
--- a/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go
+++ b/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go
@@ -19,6 +19,7 @@ package fdbased
import (
"encoding/binary"
"fmt"
+ "sync/atomic"
"syscall"
"unsafe"
@@ -42,7 +43,12 @@ const (
//
// Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB
//
-// NOTE: Frames need to be aligned at 16 byte boundaries.
+// NOTE:
+// Frames need to be aligned at 16 byte boundaries.
+// BlockSize needs to be page aligned.
+//
+// For details see PACKET_MMAP setting constraints in
+// https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt
const (
tpFrameSize = 65536 + 128
tpBlockSize = tpFrameSize * 32
@@ -81,12 +87,22 @@ const (
tpUSecOffset = 24
)
+// tpStatus returns the frame status field.
+// The status is concurrently updated by the kernel as a result we must
+// use atomic operations to prevent races.
func (t tPacketHdr) tpStatus() uint32 {
- return binary.LittleEndian.Uint32(t[tpStatusOffset:])
+ hdr := unsafe.Pointer(&t[0])
+ statusPtr := unsafe.Pointer(uintptr(hdr) + uintptr(tpStatusOffset))
+ return atomic.LoadUint32((*uint32)(statusPtr))
}
+// setTPStatus set's the frame status to the provided status.
+// The status is concurrently updated by the kernel as a result we must
+// use atomic operations to prevent races.
func (t tPacketHdr) setTPStatus(status uint32) {
- binary.LittleEndian.PutUint32(t[tpStatusOffset:], status)
+ hdr := unsafe.Pointer(&t[0])
+ statusPtr := unsafe.Pointer(uintptr(hdr) + uintptr(tpStatusOffset))
+ atomic.StoreUint32((*uint32)(statusPtr), status)
}
func (t tPacketHdr) tpLen() uint32 {
@@ -118,6 +134,10 @@ func (t tPacketHdr) Payload() []byte {
}
func (e *endpoint) setupPacketRXRing() error {
+ pageSize := unix.Getpagesize()
+ if tpBlockSize%pageSize != 0 {
+ return fmt.Errorf("tpBlockSize: %d is not page aligned, pagesize: %d", tpBlockSize, pageSize)
+ }
tReq := tPacketReq{
tpBlockSize: uint32(tpBlockSize),
tpBlockNR: uint32(tpBlockNR),
@@ -139,8 +159,8 @@ func (e *endpoint) setupPacketRXRing() error {
}
func (e *endpoint) readMMappedPacket() ([]byte, *tcpip.Error) {
- hdr := (tPacketHdr)(e.ringBuffer[0+e.ringOffset*tpFrameSize:])
- for (hdr.tpStatus() & tpStatusUser) == 0 {
+ hdr := (tPacketHdr)(e.ringBuffer[e.ringOffset*tpFrameSize:])
+ for hdr.tpStatus()&tpStatusUser == 0 {
event := rawfile.PollEvent{
FD: int32(e.fd),
Events: unix.POLLIN | unix.POLLERR,
@@ -153,9 +173,11 @@ func (e *endpoint) readMMappedPacket() ([]byte, *tcpip.Error) {
return nil, rawfile.TranslateErrno(errno)
}
if hdr.tpStatus()&tpStatusCopy != 0 {
- continue
- }
- if hdr.tpStatus()&tpStatusLosing != 0 {
+ // This frame is truncated so skip it after flipping the
+ // buffer to the kernel.
+ hdr.setTPStatus(tpStatusKernel)
+ e.ringOffset = (e.ringOffset + 1) % tpFrameNR
+ hdr = (tPacketHdr)(e.ringBuffer[e.ringOffset*tpFrameSize:])
continue
}
}