summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/link/fdbased/packet_dispatchers.go
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
committergVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
commitceb0d792f328d1fc0692197d8856a43c3936a571 (patch)
tree83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/tcpip/link/fdbased/packet_dispatchers.go
parentdeb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff)
parent216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff)
Merge 216da0b7 (automated)
Diffstat (limited to 'pkg/tcpip/link/fdbased/packet_dispatchers.go')
-rw-r--r--pkg/tcpip/link/fdbased/packet_dispatchers.go309
1 files changed, 309 insertions, 0 deletions
diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go
new file mode 100644
index 000000000..1ae0e3359
--- /dev/null
+++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go
@@ -0,0 +1,309 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build linux
+
+package fdbased
+
+import (
+ "syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+)
+
+// BufConfig defines the shape of the vectorised view used to read packets from the NIC.
+var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768}
+
+// readVDispatcher uses readv() system call to read inbound packets and
+// dispatches them.
+type readVDispatcher struct {
+ // fd is the file descriptor used to send and receive packets.
+ fd int
+
+ // e is the endpoint this dispatcher is attached to.
+ e *endpoint
+
+ // views are the actual buffers that hold the packet contents.
+ views []buffer.View
+
+ // iovecs are initialized with base pointers/len of the corresponding
+ // entries in the views defined above, except when GSO is enabled then
+ // the first iovec points to a buffer for the vnet header which is
+ // stripped before the views are passed up the stack for further
+ // processing.
+ iovecs []syscall.Iovec
+}
+
+func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
+ d := &readVDispatcher{fd: fd, e: e}
+ d.views = make([]buffer.View, len(BufConfig))
+ iovLen := len(BufConfig)
+ if d.e.Capabilities()&stack.CapabilityGSO != 0 {
+ iovLen++
+ }
+ d.iovecs = make([]syscall.Iovec, iovLen)
+ return d, nil
+}
+
+func (d *readVDispatcher) allocateViews(bufConfig []int) {
+ var vnetHdr [virtioNetHdrSize]byte
+ vnetHdrOff := 0
+ if d.e.Capabilities()&stack.CapabilityGSO != 0 {
+ // The kernel adds virtioNetHdr before each packet, but
+ // we don't use it, so so we allocate a buffer for it,
+ // add it in iovecs but don't add it in a view.
+ d.iovecs[0] = syscall.Iovec{
+ Base: &vnetHdr[0],
+ Len: uint64(virtioNetHdrSize),
+ }
+ vnetHdrOff++
+ }
+ for i := 0; i < len(bufConfig); i++ {
+ if d.views[i] != nil {
+ break
+ }
+ b := buffer.NewView(bufConfig[i])
+ d.views[i] = b
+ d.iovecs[i+vnetHdrOff] = syscall.Iovec{
+ Base: &b[0],
+ Len: uint64(len(b)),
+ }
+ }
+}
+
+func (d *readVDispatcher) capViews(n int, buffers []int) int {
+ c := 0
+ for i, s := range buffers {
+ c += s
+ if c >= n {
+ d.views[i].CapLength(s - (c - n))
+ return i + 1
+ }
+ }
+ return len(buffers)
+}
+
+// dispatch reads one packet from the file descriptor and dispatches it.
+func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
+ d.allocateViews(BufConfig)
+
+ n, err := rawfile.BlockingReadv(d.fd, d.iovecs)
+ if err != nil {
+ return false, err
+ }
+ if d.e.Capabilities()&stack.CapabilityGSO != 0 {
+ // Skip virtioNetHdr which is added before each packet, it
+ // isn't used and it isn't in a view.
+ n -= virtioNetHdrSize
+ }
+ if n <= d.e.hdrSize {
+ return false, nil
+ }
+
+ var (
+ p tcpip.NetworkProtocolNumber
+ remote, local tcpip.LinkAddress
+ )
+ if d.e.hdrSize > 0 {
+ eth := header.Ethernet(d.views[0])
+ p = eth.Type()
+ remote = eth.SourceAddress()
+ local = eth.DestinationAddress()
+ } else {
+ // We don't get any indication of what the packet is, so try to guess
+ // if it's an IPv4 or IPv6 packet.
+ switch header.IPVersion(d.views[0]) {
+ case header.IPv4Version:
+ p = header.IPv4ProtocolNumber
+ case header.IPv6Version:
+ p = header.IPv6ProtocolNumber
+ default:
+ return true, nil
+ }
+ }
+
+ used := d.capViews(n, BufConfig)
+ vv := buffer.NewVectorisedView(n, d.views[:used])
+ vv.TrimFront(d.e.hdrSize)
+
+ d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv)
+
+ // Prepare e.views for another packet: release used views.
+ for i := 0; i < used; i++ {
+ d.views[i] = nil
+ }
+
+ return true, nil
+}
+
+// recvMMsgDispatcher uses the recvmmsg system call to read inbound packets and
+// dispatches them.
+type recvMMsgDispatcher struct {
+ // fd is the file descriptor used to send and receive packets.
+ fd int
+
+ // e is the endpoint this dispatcher is attached to.
+ e *endpoint
+
+ // views is an array of array of buffers that contain packet contents.
+ views [][]buffer.View
+
+ // iovecs is an array of array of iovec records where each iovec base
+ // pointer and length are initialzed to the corresponding view above,
+ // except when GSO is neabled then the first iovec in each array of
+ // iovecs points to a buffer for the vnet header which is stripped
+ // before the views are passed up the stack for further processing.
+ iovecs [][]syscall.Iovec
+
+ // msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to
+ // reference an array of iovecs in the iovecs field defined above. This
+ // array is passed as the parameter to recvmmsg call to retrieve
+ // potentially more than 1 packet per syscall.
+ msgHdrs []rawfile.MMsgHdr
+}
+
+const (
+ // MaxMsgsPerRecv is the maximum number of packets we want to retrieve
+ // in a single RecvMMsg call.
+ MaxMsgsPerRecv = 8
+)
+
+func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
+ d := &recvMMsgDispatcher{
+ fd: fd,
+ e: e,
+ }
+ d.views = make([][]buffer.View, MaxMsgsPerRecv)
+ for i := range d.views {
+ d.views[i] = make([]buffer.View, len(BufConfig))
+ }
+ d.iovecs = make([][]syscall.Iovec, MaxMsgsPerRecv)
+ iovLen := len(BufConfig)
+ if d.e.Capabilities()&stack.CapabilityGSO != 0 {
+ // virtioNetHdr is prepended before each packet.
+ iovLen++
+ }
+ for i := range d.iovecs {
+ d.iovecs[i] = make([]syscall.Iovec, iovLen)
+ }
+ d.msgHdrs = make([]rawfile.MMsgHdr, MaxMsgsPerRecv)
+ for i := range d.msgHdrs {
+ d.msgHdrs[i].Msg.Iov = &d.iovecs[i][0]
+ d.msgHdrs[i].Msg.Iovlen = uint64(iovLen)
+ }
+ return d, nil
+}
+
+func (d *recvMMsgDispatcher) capViews(k, n int, buffers []int) int {
+ c := 0
+ for i, s := range buffers {
+ c += s
+ if c >= n {
+ d.views[k][i].CapLength(s - (c - n))
+ return i + 1
+ }
+ }
+ return len(buffers)
+}
+
+func (d *recvMMsgDispatcher) allocateViews(bufConfig []int) {
+ for k := 0; k < len(d.views); k++ {
+ var vnetHdr [virtioNetHdrSize]byte
+ vnetHdrOff := 0
+ if d.e.Capabilities()&stack.CapabilityGSO != 0 {
+ // The kernel adds virtioNetHdr before each packet, but
+ // we don't use it, so so we allocate a buffer for it,
+ // add it in iovecs but don't add it in a view.
+ d.iovecs[k][0] = syscall.Iovec{
+ Base: &vnetHdr[0],
+ Len: uint64(virtioNetHdrSize),
+ }
+ vnetHdrOff++
+ }
+ for i := 0; i < len(bufConfig); i++ {
+ if d.views[k][i] != nil {
+ break
+ }
+ b := buffer.NewView(bufConfig[i])
+ d.views[k][i] = b
+ d.iovecs[k][i+vnetHdrOff] = syscall.Iovec{
+ Base: &b[0],
+ Len: uint64(len(b)),
+ }
+ }
+ }
+}
+
+// recvMMsgDispatch reads more than one packet at a time from the file
+// descriptor and dispatches it.
+func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
+ d.allocateViews(BufConfig)
+
+ nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs)
+ if err != nil {
+ return false, err
+ }
+ // Process each of received packets.
+ for k := 0; k < nMsgs; k++ {
+ n := int(d.msgHdrs[k].Len)
+ if d.e.Capabilities()&stack.CapabilityGSO != 0 {
+ n -= virtioNetHdrSize
+ }
+ if n <= d.e.hdrSize {
+ return false, nil
+ }
+
+ var (
+ p tcpip.NetworkProtocolNumber
+ remote, local tcpip.LinkAddress
+ )
+ if d.e.hdrSize > 0 {
+ eth := header.Ethernet(d.views[k][0])
+ p = eth.Type()
+ remote = eth.SourceAddress()
+ local = eth.DestinationAddress()
+ } else {
+ // We don't get any indication of what the packet is, so try to guess
+ // if it's an IPv4 or IPv6 packet.
+ switch header.IPVersion(d.views[k][0]) {
+ case header.IPv4Version:
+ p = header.IPv4ProtocolNumber
+ case header.IPv6Version:
+ p = header.IPv6ProtocolNumber
+ default:
+ return true, nil
+ }
+ }
+
+ used := d.capViews(k, int(n), BufConfig)
+ vv := buffer.NewVectorisedView(int(n), d.views[k][:used])
+ vv.TrimFront(d.e.hdrSize)
+ d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv)
+
+ // Prepare e.views for another packet: release used views.
+ for i := 0; i < used; i++ {
+ d.views[k][i] = nil
+ }
+ }
+
+ for k := 0; k < nMsgs; k++ {
+ d.msgHdrs[k].Len = 0
+ }
+
+ return true, nil
+}