summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/link
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip/link')
-rw-r--r--pkg/tcpip/link/fdbased/packet_dispatchers.go256
1 files changed, 118 insertions, 138 deletions
diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go
index 8c3ca86d6..edab110b5 100644
--- a/pkg/tcpip/link/fdbased/packet_dispatchers.go
+++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go
@@ -29,92 +29,124 @@ import (
// BufConfig defines the shape of the vectorised view used to read packets from the NIC.
var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768}
-// readVDispatcher uses readv() system call to read inbound packets and
-// dispatches them.
-type readVDispatcher struct {
- // fd is the file descriptor used to send and receive packets.
- fd int
-
- // e is the endpoint this dispatcher is attached to.
- e *endpoint
-
+type iovecBuffer struct {
// views are the actual buffers that hold the packet contents.
views []buffer.View
// iovecs are initialized with base pointers/len of the corresponding
- // entries in the views defined above, except when GSO is enabled then
- // the first iovec points to a buffer for the vnet header which is
- // stripped before the views are passed up the stack for further
+ // entries in the views defined above, except when GSO is enabled
+ // (skipsVnetHdr) then the first iovec points to a buffer for the vnet header
+ // which is stripped before the views are passed up the stack for further
// processing.
iovecs []syscall.Iovec
+
+ // sizes is an array of buffer sizes for the underlying views. sizes is
+ // immutable.
+ sizes []int
+
+ // skipsVnetHdr is true if virtioNetHdr is to skipped.
+ skipsVnetHdr bool
}
-func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
- d := &readVDispatcher{fd: fd, e: e}
- d.views = make([]buffer.View, len(BufConfig))
- iovLen := len(BufConfig)
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- iovLen++
+func newIovecBuffer(sizes []int, skipsVnetHdr bool) *iovecBuffer {
+ b := &iovecBuffer{
+ views: make([]buffer.View, len(sizes)),
+ sizes: sizes,
+ skipsVnetHdr: skipsVnetHdr,
}
- d.iovecs = make([]syscall.Iovec, iovLen)
- return d, nil
+ niov := len(b.views)
+ if b.skipsVnetHdr {
+ niov++
+ }
+ b.iovecs = make([]syscall.Iovec, niov)
+ return b
}
-func (d *readVDispatcher) allocateViews(bufConfig []int) {
- var vnetHdr [virtioNetHdrSize]byte
+func (b *iovecBuffer) nextIovecs() []syscall.Iovec {
vnetHdrOff := 0
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
+ if b.skipsVnetHdr {
+ var vnetHdr [virtioNetHdrSize]byte
// The kernel adds virtioNetHdr before each packet, but
// we don't use it, so so we allocate a buffer for it,
// add it in iovecs but don't add it in a view.
- d.iovecs[0] = syscall.Iovec{
+ b.iovecs[0] = syscall.Iovec{
Base: &vnetHdr[0],
Len: uint64(virtioNetHdrSize),
}
vnetHdrOff++
}
- for i := 0; i < len(bufConfig); i++ {
- if d.views[i] != nil {
+ for i := range b.views {
+ if b.views[i] != nil {
break
}
- b := buffer.NewView(bufConfig[i])
- d.views[i] = b
- d.iovecs[i+vnetHdrOff] = syscall.Iovec{
- Base: &b[0],
- Len: uint64(len(b)),
+ v := buffer.NewView(b.sizes[i])
+ b.views[i] = v
+ b.iovecs[i+vnetHdrOff] = syscall.Iovec{
+ Base: &v[0],
+ Len: uint64(len(v)),
}
}
+ return b.iovecs
}
-func (d *readVDispatcher) capViews(n int, buffers []int) int {
+func (b *iovecBuffer) pullViews(n int) buffer.VectorisedView {
+ var views []buffer.View
c := 0
- for i, s := range buffers {
- c += s
+ if b.skipsVnetHdr {
+ c += virtioNetHdrSize
if c >= n {
- d.views[i].CapLength(s - (c - n))
- return i + 1
+ // Nothing in the packet.
+ return buffer.NewVectorisedView(0, nil)
+ }
+ }
+ for i, v := range b.views {
+ c += len(v)
+ if c >= n {
+ b.views[i].CapLength(len(v) - (c - n))
+ views = append([]buffer.View(nil), b.views[:i+1]...)
+ break
}
}
- return len(buffers)
+ // Remove the first len(views) used views from the state.
+ for i := range views {
+ b.views[i] = nil
+ }
+ if b.skipsVnetHdr {
+ // Exclude the size of the vnet header.
+ n -= virtioNetHdrSize
+ }
+ return buffer.NewVectorisedView(n, views)
+}
+
+// readVDispatcher uses readv() system call to read inbound packets and
+// dispatches them.
+type readVDispatcher struct {
+ // fd is the file descriptor used to send and receive packets.
+ fd int
+
+ // e is the endpoint this dispatcher is attached to.
+ e *endpoint
+
+ // buf is the iovec buffer that contains the packet contents.
+ buf *iovecBuffer
+}
+
+func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
+ d := &readVDispatcher{fd: fd, e: e}
+ skipsVnetHdr := d.e.Capabilities()&stack.CapabilityHardwareGSO != 0
+ d.buf = newIovecBuffer(BufConfig, skipsVnetHdr)
+ return d, nil
}
// dispatch reads one packet from the file descriptor and dispatches it.
func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
- d.allocateViews(BufConfig)
-
- n, err := rawfile.BlockingReadv(d.fd, d.iovecs)
+ n, err := rawfile.BlockingReadv(d.fd, d.buf.nextIovecs())
if n == 0 || err != nil {
return false, err
}
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- // Skip virtioNetHdr which is added before each packet, it
- // isn't used and it isn't in a view.
- n -= virtioNetHdrSize
- }
- used := d.capViews(n, BufConfig)
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- Data: buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)),
+ Data: d.buf.pullViews(n),
})
var (
@@ -133,7 +165,12 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
} else {
// We don't get any indication of what the packet is, so try to guess
// if it's an IPv4 or IPv6 packet.
- switch header.IPVersion(d.views[0]) {
+ // IP version information is at the first octet, so pulling up 1 byte.
+ h, ok := pkt.Data.PullUp(1)
+ if !ok {
+ return true, nil
+ }
+ switch header.IPVersion(h) {
case header.IPv4Version:
p = header.IPv4ProtocolNumber
case header.IPv6Version:
@@ -145,11 +182,6 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
- // Prepare e.views for another packet: release used views.
- for i := 0; i < used; i++ {
- d.views[i] = nil
- }
-
return true, nil
}
@@ -162,15 +194,8 @@ type recvMMsgDispatcher struct {
// e is the endpoint this dispatcher is attached to.
e *endpoint
- // views is an array of array of buffers that contain packet contents.
- views [][]buffer.View
-
- // iovecs is an array of array of iovec records where each iovec base
- // pointer and length are initialzed to the corresponding view above,
- // except when GSO is enabled then the first iovec in each array of
- // iovecs points to a buffer for the vnet header which is stripped
- // before the views are passed up the stack for further processing.
- iovecs [][]syscall.Iovec
+ // bufs is an array of iovec buffers that contain packet contents.
+ bufs []*iovecBuffer
// msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to
// reference an array of iovecs in the iovecs field defined above. This
@@ -187,74 +212,32 @@ const (
func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
d := &recvMMsgDispatcher{
- fd: fd,
- e: e,
- }
- d.views = make([][]buffer.View, MaxMsgsPerRecv)
- for i := range d.views {
- d.views[i] = make([]buffer.View, len(BufConfig))
- }
- d.iovecs = make([][]syscall.Iovec, MaxMsgsPerRecv)
- iovLen := len(BufConfig)
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- // virtioNetHdr is prepended before each packet.
- iovLen++
+ fd: fd,
+ e: e,
+ bufs: make([]*iovecBuffer, MaxMsgsPerRecv),
+ msgHdrs: make([]rawfile.MMsgHdr, MaxMsgsPerRecv),
}
- for i := range d.iovecs {
- d.iovecs[i] = make([]syscall.Iovec, iovLen)
- }
- d.msgHdrs = make([]rawfile.MMsgHdr, MaxMsgsPerRecv)
- for i := range d.msgHdrs {
- d.msgHdrs[i].Msg.Iov = &d.iovecs[i][0]
- d.msgHdrs[i].Msg.Iovlen = uint64(iovLen)
+ skipsVnetHdr := d.e.Capabilities()&stack.CapabilityHardwareGSO != 0
+ for i := range d.bufs {
+ d.bufs[i] = newIovecBuffer(BufConfig, skipsVnetHdr)
}
return d, nil
}
-func (d *recvMMsgDispatcher) capViews(k, n int, buffers []int) int {
- c := 0
- for i, s := range buffers {
- c += s
- if c >= n {
- d.views[k][i].CapLength(s - (c - n))
- return i + 1
- }
- }
- return len(buffers)
-}
-
-func (d *recvMMsgDispatcher) allocateViews(bufConfig []int) {
- for k := 0; k < len(d.views); k++ {
- var vnetHdr [virtioNetHdrSize]byte
- vnetHdrOff := 0
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- // The kernel adds virtioNetHdr before each packet, but
- // we don't use it, so so we allocate a buffer for it,
- // add it in iovecs but don't add it in a view.
- d.iovecs[k][0] = syscall.Iovec{
- Base: &vnetHdr[0],
- Len: uint64(virtioNetHdrSize),
- }
- vnetHdrOff++
- }
- for i := 0; i < len(bufConfig); i++ {
- if d.views[k][i] != nil {
- break
- }
- b := buffer.NewView(bufConfig[i])
- d.views[k][i] = b
- d.iovecs[k][i+vnetHdrOff] = syscall.Iovec{
- Base: &b[0],
- Len: uint64(len(b)),
- }
- }
- }
-}
-
// recvMMsgDispatch reads more than one packet at a time from the file
// descriptor and dispatches it.
func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
- d.allocateViews(BufConfig)
+ // Fill message headers.
+ for k := range d.msgHdrs {
+ if d.msgHdrs[k].Msg.Iovlen > 0 {
+ break
+ }
+ iovecs := d.bufs[k].nextIovecs()
+ iovLen := len(iovecs)
+ d.msgHdrs[k].Len = 0
+ d.msgHdrs[k].Msg.Iov = &iovecs[0]
+ d.msgHdrs[k].Msg.Iovlen = uint64(iovLen)
+ }
nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs)
if err != nil {
@@ -263,15 +246,14 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
// Process each of received packets.
for k := 0; k < nMsgs; k++ {
n := int(d.msgHdrs[k].Len)
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- n -= virtioNetHdrSize
- }
- used := d.capViews(k, int(n), BufConfig)
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- Data: buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)),
+ Data: d.bufs[k].pullViews(n),
})
+ // Mark that this iovec has been processed.
+ d.msgHdrs[k].Msg.Iovlen = 0
+
var (
p tcpip.NetworkProtocolNumber
remote, local tcpip.LinkAddress
@@ -288,26 +270,24 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
} else {
// We don't get any indication of what the packet is, so try to guess
// if it's an IPv4 or IPv6 packet.
- switch header.IPVersion(d.views[k][0]) {
+ // IP version information is at the first octet, so pulling up 1 byte.
+ h, ok := pkt.Data.PullUp(1)
+ if !ok {
+ // Skip this packet.
+ continue
+ }
+ switch header.IPVersion(h) {
case header.IPv4Version:
p = header.IPv4ProtocolNumber
case header.IPv6Version:
p = header.IPv6ProtocolNumber
default:
- return true, nil
+ // Skip this packet.
+ continue
}
}
d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
-
- // Prepare e.views for another packet: release used views.
- for i := 0; i < used; i++ {
- d.views[k][i] = nil
- }
- }
-
- for k := 0; k < nMsgs; k++ {
- d.msgHdrs[k].Len = 0
}
return true, nil