summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip')
-rw-r--r--pkg/tcpip/link/fdbased/BUILD1
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_test.go123
-rw-r--r--pkg/tcpip/link/fdbased/packet_dispatchers.go256
3 files changed, 188 insertions, 192 deletions
diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD
index 10072eac1..ae1394ebf 100644
--- a/pkg/tcpip/link/fdbased/BUILD
+++ b/pkg/tcpip/link/fdbased/BUILD
@@ -35,7 +35,6 @@ go_test(
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
- "//pkg/tcpip/link/rawfile",
"//pkg/tcpip/stack",
"@com_github_google_go_cmp//cmp:go_default_library",
],
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index 90da22d34..e2985cb84 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -30,7 +30,6 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
- "gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)
@@ -465,67 +464,85 @@ var capLengthTestCases = []struct {
config: []int{1, 2, 3},
n: 3,
wantUsed: 2,
- wantLengths: []int{1, 2, 3},
+ wantLengths: []int{1, 2},
},
}
-func TestReadVDispatcherCapLength(t *testing.T) {
+func TestIovecBuffer(t *testing.T) {
for _, c := range capLengthTestCases {
- // fd does not matter for this test.
- d := readVDispatcher{fd: -1, e: &endpoint{}}
- d.views = make([]buffer.View, len(c.config))
- d.iovecs = make([]syscall.Iovec, len(c.config))
- d.allocateViews(c.config)
-
- used := d.capViews(c.n, c.config)
- if used != c.wantUsed {
- t.Errorf("Test %q failed when calling capViews(%d, %v). Got %d. Want %d", c.comment, c.n, c.config, used, c.wantUsed)
- }
- lengths := make([]int, len(d.views))
- for i, v := range d.views {
- lengths[i] = len(v)
- }
- if !reflect.DeepEqual(lengths, c.wantLengths) {
- t.Errorf("Test %q failed when calling capViews(%d, %v). Got %v. Want %v", c.comment, c.n, c.config, lengths, c.wantLengths)
- }
- }
-}
+ t.Run(c.comment, func(t *testing.T) {
+ b := newIovecBuffer(c.config, false /* skipsVnetHdr */)
-func TestRecvMMsgDispatcherCapLength(t *testing.T) {
- for _, c := range capLengthTestCases {
- d := recvMMsgDispatcher{
- fd: -1, // fd does not matter for this test.
- e: &endpoint{},
- views: make([][]buffer.View, 1),
- iovecs: make([][]syscall.Iovec, 1),
- msgHdrs: make([]rawfile.MMsgHdr, 1),
- }
+ // Test initial allocation.
+ iovecs := b.nextIovecs()
+ if got, want := len(iovecs), len(c.config); got != want {
+ t.Fatalf("len(iovecs) = %d, want %d", got, want)
+ }
- for i := range d.views {
- d.views[i] = make([]buffer.View, len(c.config))
- }
- for i := range d.iovecs {
- d.iovecs[i] = make([]syscall.Iovec, len(c.config))
- }
- for k, msgHdr := range d.msgHdrs {
- msgHdr.Msg.Iov = &d.iovecs[k][0]
- msgHdr.Msg.Iovlen = uint64(len(c.config))
- }
+ // Make a copy as iovecs points to internal slice. We will need this state
+ // later.
+ oldIovecs := append([]syscall.Iovec(nil), iovecs...)
- d.allocateViews(c.config)
+ // Test the views that get pulled.
+ vv := b.pullViews(c.n)
+ var lengths []int
+ for _, v := range vv.Views() {
+ lengths = append(lengths, len(v))
+ }
+ if !reflect.DeepEqual(lengths, c.wantLengths) {
+ t.Errorf("Pulled view lengths = %v, want %v", lengths, c.wantLengths)
+ }
- used := d.capViews(0, c.n, c.config)
- if used != c.wantUsed {
- t.Errorf("Test %q failed when calling capViews(%d, %v). Got %d. Want %d", c.comment, c.n, c.config, used, c.wantUsed)
- }
- lengths := make([]int, len(d.views[0]))
- for i, v := range d.views[0] {
- lengths[i] = len(v)
- }
- if !reflect.DeepEqual(lengths, c.wantLengths) {
- t.Errorf("Test %q failed when calling capViews(%d, %v). Got %v. Want %v", c.comment, c.n, c.config, lengths, c.wantLengths)
- }
+ // Test that new views get reallocated.
+ for i, newIov := range b.nextIovecs() {
+ if i < c.wantUsed {
+ if newIov.Base == oldIovecs[i].Base {
+ t.Errorf("b.views[%d] should have been reallocated", i)
+ }
+ } else {
+ if newIov.Base != oldIovecs[i].Base {
+ t.Errorf("b.views[%d] should not have been reallocated", i)
+ }
+ }
+ }
+ })
+ }
+}
+func TestIovecBufferSkipVnetHdr(t *testing.T) {
+ for _, test := range []struct {
+ desc string
+ readN int
+ wantLen int
+ }{
+ {
+ desc: "nothing read",
+ readN: 0,
+ wantLen: 0,
+ },
+ {
+ desc: "smaller than vnet header",
+ readN: virtioNetHdrSize - 1,
+ wantLen: 0,
+ },
+ {
+ desc: "header skipped",
+ readN: virtioNetHdrSize + 100,
+ wantLen: 100,
+ },
+ } {
+ t.Run(test.desc, func(t *testing.T) {
+ b := newIovecBuffer([]int{10, 20, 50, 50}, true)
+ // Pretend a read happend.
+ b.nextIovecs()
+ vv := b.pullViews(test.readN)
+ if got, want := vv.Size(), test.wantLen; got != want {
+ t.Errorf("b.pullView(%d).Size() = %d; want %d", test.readN, got, want)
+ }
+ if got, want := len(vv.ToOwnedView()), test.wantLen; got != want {
+ t.Errorf("b.pullView(%d).ToOwnedView() has length %d; want %d", test.readN, got, want)
+ }
+ })
}
}
diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go
index 8c3ca86d6..edab110b5 100644
--- a/pkg/tcpip/link/fdbased/packet_dispatchers.go
+++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go
@@ -29,92 +29,124 @@ import (
// BufConfig defines the shape of the vectorised view used to read packets from the NIC.
var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768}
-// readVDispatcher uses readv() system call to read inbound packets and
-// dispatches them.
-type readVDispatcher struct {
- // fd is the file descriptor used to send and receive packets.
- fd int
-
- // e is the endpoint this dispatcher is attached to.
- e *endpoint
-
+type iovecBuffer struct {
// views are the actual buffers that hold the packet contents.
views []buffer.View
// iovecs are initialized with base pointers/len of the corresponding
- // entries in the views defined above, except when GSO is enabled then
- // the first iovec points to a buffer for the vnet header which is
- // stripped before the views are passed up the stack for further
+ // entries in the views defined above, except when GSO is enabled
+ // (skipsVnetHdr) then the first iovec points to a buffer for the vnet header
+ // which is stripped before the views are passed up the stack for further
// processing.
iovecs []syscall.Iovec
+
+ // sizes is an array of buffer sizes for the underlying views. sizes is
+ // immutable.
+ sizes []int
+
+ // skipsVnetHdr is true if virtioNetHdr is to skipped.
+ skipsVnetHdr bool
}
-func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
- d := &readVDispatcher{fd: fd, e: e}
- d.views = make([]buffer.View, len(BufConfig))
- iovLen := len(BufConfig)
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- iovLen++
+func newIovecBuffer(sizes []int, skipsVnetHdr bool) *iovecBuffer {
+ b := &iovecBuffer{
+ views: make([]buffer.View, len(sizes)),
+ sizes: sizes,
+ skipsVnetHdr: skipsVnetHdr,
}
- d.iovecs = make([]syscall.Iovec, iovLen)
- return d, nil
+ niov := len(b.views)
+ if b.skipsVnetHdr {
+ niov++
+ }
+ b.iovecs = make([]syscall.Iovec, niov)
+ return b
}
-func (d *readVDispatcher) allocateViews(bufConfig []int) {
- var vnetHdr [virtioNetHdrSize]byte
+func (b *iovecBuffer) nextIovecs() []syscall.Iovec {
vnetHdrOff := 0
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
+ if b.skipsVnetHdr {
+ var vnetHdr [virtioNetHdrSize]byte
// The kernel adds virtioNetHdr before each packet, but
// we don't use it, so so we allocate a buffer for it,
// add it in iovecs but don't add it in a view.
- d.iovecs[0] = syscall.Iovec{
+ b.iovecs[0] = syscall.Iovec{
Base: &vnetHdr[0],
Len: uint64(virtioNetHdrSize),
}
vnetHdrOff++
}
- for i := 0; i < len(bufConfig); i++ {
- if d.views[i] != nil {
+ for i := range b.views {
+ if b.views[i] != nil {
break
}
- b := buffer.NewView(bufConfig[i])
- d.views[i] = b
- d.iovecs[i+vnetHdrOff] = syscall.Iovec{
- Base: &b[0],
- Len: uint64(len(b)),
+ v := buffer.NewView(b.sizes[i])
+ b.views[i] = v
+ b.iovecs[i+vnetHdrOff] = syscall.Iovec{
+ Base: &v[0],
+ Len: uint64(len(v)),
}
}
+ return b.iovecs
}
-func (d *readVDispatcher) capViews(n int, buffers []int) int {
+func (b *iovecBuffer) pullViews(n int) buffer.VectorisedView {
+ var views []buffer.View
c := 0
- for i, s := range buffers {
- c += s
+ if b.skipsVnetHdr {
+ c += virtioNetHdrSize
if c >= n {
- d.views[i].CapLength(s - (c - n))
- return i + 1
+ // Nothing in the packet.
+ return buffer.NewVectorisedView(0, nil)
+ }
+ }
+ for i, v := range b.views {
+ c += len(v)
+ if c >= n {
+ b.views[i].CapLength(len(v) - (c - n))
+ views = append([]buffer.View(nil), b.views[:i+1]...)
+ break
}
}
- return len(buffers)
+ // Remove the first len(views) used views from the state.
+ for i := range views {
+ b.views[i] = nil
+ }
+ if b.skipsVnetHdr {
+ // Exclude the size of the vnet header.
+ n -= virtioNetHdrSize
+ }
+ return buffer.NewVectorisedView(n, views)
+}
+
+// readVDispatcher uses readv() system call to read inbound packets and
+// dispatches them.
+type readVDispatcher struct {
+ // fd is the file descriptor used to send and receive packets.
+ fd int
+
+ // e is the endpoint this dispatcher is attached to.
+ e *endpoint
+
+ // buf is the iovec buffer that contains the packet contents.
+ buf *iovecBuffer
+}
+
+func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
+ d := &readVDispatcher{fd: fd, e: e}
+ skipsVnetHdr := d.e.Capabilities()&stack.CapabilityHardwareGSO != 0
+ d.buf = newIovecBuffer(BufConfig, skipsVnetHdr)
+ return d, nil
}
// dispatch reads one packet from the file descriptor and dispatches it.
func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
- d.allocateViews(BufConfig)
-
- n, err := rawfile.BlockingReadv(d.fd, d.iovecs)
+ n, err := rawfile.BlockingReadv(d.fd, d.buf.nextIovecs())
if n == 0 || err != nil {
return false, err
}
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- // Skip virtioNetHdr which is added before each packet, it
- // isn't used and it isn't in a view.
- n -= virtioNetHdrSize
- }
- used := d.capViews(n, BufConfig)
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- Data: buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)),
+ Data: d.buf.pullViews(n),
})
var (
@@ -133,7 +165,12 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
} else {
// We don't get any indication of what the packet is, so try to guess
// if it's an IPv4 or IPv6 packet.
- switch header.IPVersion(d.views[0]) {
+ // IP version information is at the first octet, so pulling up 1 byte.
+ h, ok := pkt.Data.PullUp(1)
+ if !ok {
+ return true, nil
+ }
+ switch header.IPVersion(h) {
case header.IPv4Version:
p = header.IPv4ProtocolNumber
case header.IPv6Version:
@@ -145,11 +182,6 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
- // Prepare e.views for another packet: release used views.
- for i := 0; i < used; i++ {
- d.views[i] = nil
- }
-
return true, nil
}
@@ -162,15 +194,8 @@ type recvMMsgDispatcher struct {
// e is the endpoint this dispatcher is attached to.
e *endpoint
- // views is an array of array of buffers that contain packet contents.
- views [][]buffer.View
-
- // iovecs is an array of array of iovec records where each iovec base
- // pointer and length are initialzed to the corresponding view above,
- // except when GSO is enabled then the first iovec in each array of
- // iovecs points to a buffer for the vnet header which is stripped
- // before the views are passed up the stack for further processing.
- iovecs [][]syscall.Iovec
+ // bufs is an array of iovec buffers that contain packet contents.
+ bufs []*iovecBuffer
// msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to
// reference an array of iovecs in the iovecs field defined above. This
@@ -187,74 +212,32 @@ const (
func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) {
d := &recvMMsgDispatcher{
- fd: fd,
- e: e,
- }
- d.views = make([][]buffer.View, MaxMsgsPerRecv)
- for i := range d.views {
- d.views[i] = make([]buffer.View, len(BufConfig))
- }
- d.iovecs = make([][]syscall.Iovec, MaxMsgsPerRecv)
- iovLen := len(BufConfig)
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- // virtioNetHdr is prepended before each packet.
- iovLen++
+ fd: fd,
+ e: e,
+ bufs: make([]*iovecBuffer, MaxMsgsPerRecv),
+ msgHdrs: make([]rawfile.MMsgHdr, MaxMsgsPerRecv),
}
- for i := range d.iovecs {
- d.iovecs[i] = make([]syscall.Iovec, iovLen)
- }
- d.msgHdrs = make([]rawfile.MMsgHdr, MaxMsgsPerRecv)
- for i := range d.msgHdrs {
- d.msgHdrs[i].Msg.Iov = &d.iovecs[i][0]
- d.msgHdrs[i].Msg.Iovlen = uint64(iovLen)
+ skipsVnetHdr := d.e.Capabilities()&stack.CapabilityHardwareGSO != 0
+ for i := range d.bufs {
+ d.bufs[i] = newIovecBuffer(BufConfig, skipsVnetHdr)
}
return d, nil
}
-func (d *recvMMsgDispatcher) capViews(k, n int, buffers []int) int {
- c := 0
- for i, s := range buffers {
- c += s
- if c >= n {
- d.views[k][i].CapLength(s - (c - n))
- return i + 1
- }
- }
- return len(buffers)
-}
-
-func (d *recvMMsgDispatcher) allocateViews(bufConfig []int) {
- for k := 0; k < len(d.views); k++ {
- var vnetHdr [virtioNetHdrSize]byte
- vnetHdrOff := 0
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- // The kernel adds virtioNetHdr before each packet, but
- // we don't use it, so so we allocate a buffer for it,
- // add it in iovecs but don't add it in a view.
- d.iovecs[k][0] = syscall.Iovec{
- Base: &vnetHdr[0],
- Len: uint64(virtioNetHdrSize),
- }
- vnetHdrOff++
- }
- for i := 0; i < len(bufConfig); i++ {
- if d.views[k][i] != nil {
- break
- }
- b := buffer.NewView(bufConfig[i])
- d.views[k][i] = b
- d.iovecs[k][i+vnetHdrOff] = syscall.Iovec{
- Base: &b[0],
- Len: uint64(len(b)),
- }
- }
- }
-}
-
// recvMMsgDispatch reads more than one packet at a time from the file
// descriptor and dispatches it.
func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
- d.allocateViews(BufConfig)
+ // Fill message headers.
+ for k := range d.msgHdrs {
+ if d.msgHdrs[k].Msg.Iovlen > 0 {
+ break
+ }
+ iovecs := d.bufs[k].nextIovecs()
+ iovLen := len(iovecs)
+ d.msgHdrs[k].Len = 0
+ d.msgHdrs[k].Msg.Iov = &iovecs[0]
+ d.msgHdrs[k].Msg.Iovlen = uint64(iovLen)
+ }
nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs)
if err != nil {
@@ -263,15 +246,14 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
// Process each of received packets.
for k := 0; k < nMsgs; k++ {
n := int(d.msgHdrs[k].Len)
- if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
- n -= virtioNetHdrSize
- }
- used := d.capViews(k, int(n), BufConfig)
pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- Data: buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)),
+ Data: d.bufs[k].pullViews(n),
})
+ // Mark that this iovec has been processed.
+ d.msgHdrs[k].Msg.Iovlen = 0
+
var (
p tcpip.NetworkProtocolNumber
remote, local tcpip.LinkAddress
@@ -288,26 +270,24 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
} else {
// We don't get any indication of what the packet is, so try to guess
// if it's an IPv4 or IPv6 packet.
- switch header.IPVersion(d.views[k][0]) {
+ // IP version information is at the first octet, so pulling up 1 byte.
+ h, ok := pkt.Data.PullUp(1)
+ if !ok {
+ // Skip this packet.
+ continue
+ }
+ switch header.IPVersion(h) {
case header.IPv4Version:
p = header.IPv4ProtocolNumber
case header.IPv6Version:
p = header.IPv6ProtocolNumber
default:
- return true, nil
+ // Skip this packet.
+ continue
}
}
d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
-
- // Prepare e.views for another packet: release used views.
- for i := 0; i < used; i++ {
- d.views[k][i] = nil
- }
- }
-
- for k := 0; k < nMsgs; k++ {
- d.msgHdrs[k].Len = 0
}
return true, nil