summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/link
diff options
context:
space:
mode:
authorAndrei Vagin <avagin@google.com>2019-03-28 11:02:23 -0700
committerShentubot <shentubot@google.com>2019-03-28 11:03:41 -0700
commitf4105ac21a9f11f5231681239ca92ac814b5149d (patch)
tree2ecd11f283e674bce8cd29b514ae0745fdd0fa83 /pkg/tcpip/link
parent9c188978870051f0b42ceb1a3f16320286936976 (diff)
netstack/fdbased: add generic segmentation offload (GSO) support
The linux packet socket can handle GSO packets, so we can segment packets to 64K instead of the MTU which is usually 1500. Here are numbers for the nginx-1m test: runsc: 579330.01 [Kbytes/sec] received runsc-gso: 1794121.66 [Kbytes/sec] received runc: 2122139.06 [Kbytes/sec] received and for tcp_benchmark: $ tcp_benchmark --duration 15 --ideal [ 4] 0.0-15.0 sec 86647 MBytes 48456 Mbits/sec $ tcp_benchmark --client --duration 15 --ideal [ 4] 0.0-15.0 sec 2173 MBytes 1214 Mbits/sec $ tcp_benchmark --client --duration 15 --ideal --gso 65536 [ 4] 0.0-15.0 sec 19357 MBytes 10825 Mbits/sec PiperOrigin-RevId: 240809103 Change-Id: I2637f104db28b5d4c64e1e766c610162a195775a
Diffstat (limited to 'pkg/tcpip/link')
-rw-r--r--pkg/tcpip/link/channel/channel.go2
-rw-r--r--pkg/tcpip/link/fdbased/BUILD1
-rw-r--r--pkg/tcpip/link/fdbased/endpoint.go104
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_test.go160
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_unsafe.go32
-rw-r--r--pkg/tcpip/link/loopback/loopback.go2
-rw-r--r--pkg/tcpip/link/muxed/injectable.go4
-rw-r--r--pkg/tcpip/link/muxed/injectable_test.go4
-rw-r--r--pkg/tcpip/link/rawfile/rawfile_unsafe.go15
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem.go2
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem_test.go24
-rw-r--r--pkg/tcpip/link/sniffer/sniffer.go12
-rw-r--r--pkg/tcpip/link/waitable/waitable.go4
-rw-r--r--pkg/tcpip/link/waitable/waitable_test.go8
14 files changed, 274 insertions, 100 deletions
diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index 25cffa787..8c0d11288 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -109,7 +109,7 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress {
}
// WritePacket stores outbound packets into the channel.
-func (e *Endpoint) WritePacket(_ *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *Endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
p := PacketInfo{
Header: hdr.View(),
Proto: protocol,
diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD
index bcf9c023e..50ce91a4e 100644
--- a/pkg/tcpip/link/fdbased/BUILD
+++ b/pkg/tcpip/link/fdbased/BUILD
@@ -6,6 +6,7 @@ go_library(
name = "fdbased",
srcs = [
"endpoint.go",
+ "endpoint_unsafe.go",
"mmap.go",
"mmap_amd64_unsafe.go",
],
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index d726551b0..20e34c5ee 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -111,6 +111,10 @@ type endpoint struct {
// ringOffset is the current offset into the ring buffer where the next
// inbound packet will be placed by the kernel.
ringOffset int
+
+ // gsoMaxSize is the maximum GSO packet size. It is zero if GSO is
+ // disabled.
+ gsoMaxSize uint32
}
// Options specify the details about the fd-based endpoint to be created.
@@ -123,6 +127,7 @@ type Options struct {
Address tcpip.LinkAddress
SaveRestore bool
DisconnectOk bool
+ GSOMaxSize uint32
PacketDispatchMode PacketDispatchMode
}
@@ -165,6 +170,10 @@ func New(opts *Options) tcpip.LinkEndpointID {
packetDispatchMode: opts.PacketDispatchMode,
}
+ if opts.GSOMaxSize != 0 && isSocketFD(opts.FD) {
+ e.caps |= stack.CapabilityGSO
+ e.gsoMaxSize = opts.GSOMaxSize
+ }
if isSocketFD(opts.FD) && e.packetDispatchMode == PacketMMap {
if err := e.setupPacketRXRing(); err != nil {
// TODO: replace panic with an error return.
@@ -185,17 +194,22 @@ func New(opts *Options) tcpip.LinkEndpointID {
}
e.views = make([][]buffer.View, msgsPerRecv)
- for i, _ := range e.views {
+ for i := range e.views {
e.views[i] = make([]buffer.View, len(BufConfig))
}
e.iovecs = make([][]syscall.Iovec, msgsPerRecv)
- for i, _ := range e.iovecs {
- e.iovecs[i] = make([]syscall.Iovec, len(BufConfig))
+ iovLen := len(BufConfig)
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ // virtioNetHdr is prepended before each packet.
+ iovLen++
+ }
+ for i := range e.iovecs {
+ e.iovecs[i] = make([]syscall.Iovec, iovLen)
}
e.msgHdrs = make([]rawfile.MMsgHdr, msgsPerRecv)
- for i, _ := range e.msgHdrs {
+ for i := range e.msgHdrs {
e.msgHdrs[i].Msg.Iov = &e.iovecs[i][0]
- e.msgHdrs[i].Msg.Iovlen = uint64(len(BufConfig))
+ e.msgHdrs[i].Msg.Iovlen = uint64(iovLen)
}
return stack.RegisterLinkEndpoint(e)
@@ -246,9 +260,27 @@ func (e *endpoint) LinkAddress() tcpip.LinkAddress {
return e.addr
}
+// virtioNetHdr is declared in linux/virtio_net.h.
+type virtioNetHdr struct {
+ flags uint8
+ gsoType uint8
+ hdrLen uint16
+ gsoSize uint16
+ csumStart uint16
+ csumOffset uint16
+}
+
+// These constants are declared in linux/virtio_net.h.
+const (
+ _VIRTIO_NET_HDR_F_NEEDS_CSUM = 1
+
+ _VIRTIO_NET_HDR_GSO_TCPV4 = 1
+ _VIRTIO_NET_HDR_GSO_TCPV6 = 4
+)
+
// WritePacket writes outbound packets to the file descriptor. If it is not
// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
if e.hdrSize > 0 {
// Add ethernet header if needed.
eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize))
@@ -266,11 +298,37 @@ func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload b
eth.Encode(ethHdr)
}
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ vnetHdr := virtioNetHdr{}
+ vnetHdrBuf := vnetHdrToByteSlice(&vnetHdr)
+ if gso != nil {
+ vnetHdr.hdrLen = uint16(hdr.UsedLength())
+ if gso.NeedsCsum {
+ vnetHdr.flags = _VIRTIO_NET_HDR_F_NEEDS_CSUM
+ vnetHdr.csumStart = header.EthernetMinimumSize + gso.L3HdrLen
+ vnetHdr.csumOffset = gso.CsumOffset
+ }
+ if gso.Type != stack.GSONone && uint16(payload.Size()) > gso.MSS {
+ switch gso.Type {
+ case stack.GSOTCPv4:
+ vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV4
+ case stack.GSOTCPv6:
+ vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV6
+ default:
+ panic(fmt.Sprintf("Unknown gso type: %v", gso.Type))
+ }
+ vnetHdr.gsoSize = gso.MSS
+ }
+ }
+
+ return rawfile.NonBlockingWrite3(e.fd, vnetHdrBuf, hdr.View(), payload.ToView())
+ }
+
if payload.Size() == 0 {
return rawfile.NonBlockingWrite(e.fd, hdr.View())
}
- return rawfile.NonBlockingWrite2(e.fd, hdr.View(), payload.ToView())
+ return rawfile.NonBlockingWrite3(e.fd, hdr.View(), payload.ToView(), nil)
}
// WriteRawPacket writes a raw packet directly to the file descriptor.
@@ -292,13 +350,25 @@ func (e *endpoint) capViews(k, n int, buffers []int) int {
func (e *endpoint) allocateViews(bufConfig []int) {
for k := 0; k < len(e.views); k++ {
+ var vnetHdr [virtioNetHdrSize]byte
+ vnetHdrOff := 0
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ // The kernel adds virtioNetHdr before each packet, but
+ // we don't use it, so so we allocate a buffer for it,
+ // add it in iovecs but don't add it in a view.
+ e.iovecs[k][0] = syscall.Iovec{
+ Base: &vnetHdr[0],
+ Len: uint64(virtioNetHdrSize),
+ }
+ vnetHdrOff++
+ }
for i := 0; i < len(bufConfig); i++ {
if e.views[k][i] != nil {
break
}
b := buffer.NewView(bufConfig[i])
e.views[k][i] = b
- e.iovecs[k][i] = syscall.Iovec{
+ e.iovecs[k][i+vnetHdrOff] = syscall.Iovec{
Base: &b[0],
Len: uint64(len(b)),
}
@@ -314,7 +384,11 @@ func (e *endpoint) dispatch() (bool, *tcpip.Error) {
if err != nil {
return false, err
}
-
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ // Skip virtioNetHdr which is added before each packet, it
+ // isn't used and it isn't in a view.
+ n -= virtioNetHdrSize
+ }
if n <= e.hdrSize {
return false, nil
}
@@ -366,8 +440,11 @@ func (e *endpoint) recvMMsgDispatch() (bool, *tcpip.Error) {
}
// Process each of received packets.
for k := 0; k < nMsgs; k++ {
- n := e.msgHdrs[k].Len
- if n <= uint32(e.hdrSize) {
+ n := int(e.msgHdrs[k].Len)
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ n -= virtioNetHdrSize
+ }
+ if n <= e.hdrSize {
return false, nil
}
@@ -425,6 +502,11 @@ func (e *endpoint) dispatchLoop() *tcpip.Error {
}
}
+// GSOMaxSize returns the maximum GSO packet size.
+func (e *endpoint) GSOMaxSize() uint32 {
+ return e.gsoMaxSize
+}
+
// InjectableEndpoint is an injectable fd-based endpoint. The endpoint writes
// to the FD, but does not read from it. All reads come from injected packets.
type InjectableEndpoint struct {
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index 14abacdf2..ecc5b73f3 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -24,6 +24,7 @@ import (
"syscall"
"testing"
"time"
+ "unsafe"
"gvisor.googlesource.com/gvisor/pkg/tcpip"
"gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
@@ -33,10 +34,12 @@ import (
)
const (
- mtu = 1500
- laddr = tcpip.LinkAddress("\x11\x22\x33\x44\x55\x66")
- raddr = tcpip.LinkAddress("\x77\x88\x99\xaa\xbb\xcc")
- proto = 10
+ mtu = 1500
+ laddr = tcpip.LinkAddress("\x11\x22\x33\x44\x55\x66")
+ raddr = tcpip.LinkAddress("\x77\x88\x99\xaa\xbb\xcc")
+ proto = 10
+ csumOffset = 48
+ gsoMSS = 500
)
type packetInfo struct {
@@ -130,67 +133,108 @@ func TestAddress(t *testing.T) {
}
}
-func TestWritePacket(t *testing.T) {
- lengths := []int{0, 100, 1000}
- eths := []bool{true, false}
+func testWritePacket(t *testing.T, plen int, eth bool, gsoMaxSize uint32) {
+ c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth, GSOMaxSize: gsoMaxSize})
+ defer c.cleanup()
- for _, eth := range eths {
- for _, plen := range lengths {
- t.Run(fmt.Sprintf("Eth=%v,PayloadLen=%v", eth, plen), func(t *testing.T) {
- c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth})
- defer c.cleanup()
+ r := &stack.Route{
+ RemoteLinkAddress: raddr,
+ }
- r := &stack.Route{
- RemoteLinkAddress: raddr,
- }
+ // Build header.
+ hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()) + 100)
+ b := hdr.Prepend(100)
+ for i := range b {
+ b[i] = uint8(rand.Intn(256))
+ }
- // Build header.
- hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()) + 100)
- b := hdr.Prepend(100)
- for i := range b {
- b[i] = uint8(rand.Intn(256))
- }
+ // Build payload and write.
+ payload := make(buffer.View, plen)
+ for i := range payload {
+ payload[i] = uint8(rand.Intn(256))
+ }
+ want := append(hdr.View(), payload...)
+ var gso *stack.GSO
+ if gsoMaxSize != 0 {
+ gso = &stack.GSO{
+ Type: stack.GSOTCPv6,
+ NeedsCsum: true,
+ CsumOffset: csumOffset,
+ MSS: gsoMSS,
+ MaxSize: gsoMaxSize,
+ }
+ }
+ if err := c.ep.WritePacket(r, gso, hdr, payload.ToVectorisedView(), proto); err != nil {
+ t.Fatalf("WritePacket failed: %v", err)
+ }
- // Build payload and write.
- payload := make(buffer.View, plen)
- for i := range payload {
- payload[i] = uint8(rand.Intn(256))
- }
- want := append(hdr.View(), payload...)
- if err := c.ep.WritePacket(r, hdr, payload.ToVectorisedView(), proto); err != nil {
- t.Fatalf("WritePacket failed: %v", err)
- }
+ // Read from fd, then compare with what we wrote.
+ b = make([]byte, mtu)
+ n, err := syscall.Read(c.fds[0], b)
+ if err != nil {
+ t.Fatalf("Read failed: %v", err)
+ }
+ b = b[:n]
+ if gsoMaxSize != 0 {
+ vnetHdr := *(*virtioNetHdr)(unsafe.Pointer(&b[0]))
+ if vnetHdr.flags&_VIRTIO_NET_HDR_F_NEEDS_CSUM == 0 {
+ t.Fatalf("virtioNetHdr.flags %v doesn't contain %v", vnetHdr.flags, _VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ }
+ csumStart := header.EthernetMinimumSize + gso.L3HdrLen
+ if vnetHdr.csumStart != csumStart {
+ t.Fatalf("vnetHdr.csumStart = %v, want %v", vnetHdr.csumStart, csumStart)
+ }
+ if vnetHdr.csumOffset != csumOffset {
+ t.Fatalf("vnetHdr.csumOffset = %v, want %v", vnetHdr.csumOffset, csumOffset)
+ }
+ gsoType := uint8(0)
+ if int(gso.MSS) < plen {
+ gsoType = _VIRTIO_NET_HDR_GSO_TCPV6
+ }
+ if vnetHdr.gsoType != gsoType {
+ t.Fatalf("vnetHdr.gsoType = %v, want %v", vnetHdr.gsoType, gsoType)
+ }
+ b = b[virtioNetHdrSize:]
+ }
+ if eth {
+ h := header.Ethernet(b)
+ b = b[header.EthernetMinimumSize:]
- // Read from fd, then compare with what we wrote.
- b = make([]byte, mtu)
- n, err := syscall.Read(c.fds[0], b)
- if err != nil {
- t.Fatalf("Read failed: %v", err)
- }
- b = b[:n]
- if eth {
- h := header.Ethernet(b)
- b = b[header.EthernetMinimumSize:]
+ if a := h.SourceAddress(); a != laddr {
+ t.Fatalf("SourceAddress() = %v, want %v", a, laddr)
+ }
- if a := h.SourceAddress(); a != laddr {
- t.Fatalf("SourceAddress() = %v, want %v", a, laddr)
- }
+ if a := h.DestinationAddress(); a != raddr {
+ t.Fatalf("DestinationAddress() = %v, want %v", a, raddr)
+ }
- if a := h.DestinationAddress(); a != raddr {
- t.Fatalf("DestinationAddress() = %v, want %v", a, raddr)
- }
+ if et := h.Type(); et != proto {
+ t.Fatalf("Type() = %v, want %v", et, proto)
+ }
+ }
+ if len(b) != len(want) {
+ t.Fatalf("Read returned %v bytes, want %v", len(b), len(want))
+ }
+ if !bytes.Equal(b, want) {
+ t.Fatalf("Read returned %x, want %x", b, want)
+ }
+}
- if et := h.Type(); et != proto {
- t.Fatalf("Type() = %v, want %v", et, proto)
- }
- }
- if len(b) != len(want) {
- t.Fatalf("Read returned %v bytes, want %v", len(b), len(want))
- }
- if !bytes.Equal(b, want) {
- t.Fatalf("Read returned %x, want %x", b, want)
- }
- })
+func TestWritePacket(t *testing.T) {
+ lengths := []int{0, 100, 1000}
+ eths := []bool{true, false}
+ gsos := []uint32{0, 32768}
+
+ for _, eth := range eths {
+ for _, plen := range lengths {
+ for _, gso := range gsos {
+ t.Run(
+ fmt.Sprintf("Eth=%v,PayloadLen=%v,GSOMaxSize=%v", eth, plen, gso),
+ func(t *testing.T) {
+ testWritePacket(t, plen, eth, gso)
+ },
+ )
+ }
}
}
}
@@ -210,7 +254,7 @@ func TestPreserveSrcAddress(t *testing.T) {
// WritePacket panics given a prependable with anything less than
// the minimum size of the ethernet header.
hdr := buffer.NewPrependable(header.EthernetMinimumSize)
- if err := c.ep.WritePacket(r, hdr, buffer.VectorisedView{}, proto); err != nil {
+ if err := c.ep.WritePacket(r, nil /* gso */, hdr, buffer.VectorisedView{}, proto); err != nil {
t.Fatalf("WritePacket failed: %v", err)
}
diff --git a/pkg/tcpip/link/fdbased/endpoint_unsafe.go b/pkg/tcpip/link/fdbased/endpoint_unsafe.go
new file mode 100644
index 000000000..36e7fe5a9
--- /dev/null
+++ b/pkg/tcpip/link/fdbased/endpoint_unsafe.go
@@ -0,0 +1,32 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build linux
+
+package fdbased
+
+import (
+ "reflect"
+ "unsafe"
+)
+
+const virtioNetHdrSize = int(unsafe.Sizeof(virtioNetHdr{}))
+
+func vnetHdrToByteSlice(hdr *virtioNetHdr) (slice []byte) {
+ sh := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
+ sh.Data = uintptr(unsafe.Pointer(hdr))
+ sh.Len = virtioNetHdrSize
+ sh.Cap = virtioNetHdrSize
+ return
+}
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index fa54872da..d58c0f885 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -72,7 +72,7 @@ func (*endpoint) LinkAddress() tcpip.LinkAddress {
// WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound
// packets to the network-layer dispatcher.
-func (e *endpoint) WritePacket(_ *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
views := make([]buffer.View, 1, 1+len(payload.Views()))
views[0] = hdr.View()
views = append(views, payload.Views()...)
diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go
index 29073afae..99edc232d 100644
--- a/pkg/tcpip/link/muxed/injectable.go
+++ b/pkg/tcpip/link/muxed/injectable.go
@@ -87,9 +87,9 @@ func (m *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buf
// WritePacket writes outbound packets to the appropriate LinkInjectableEndpoint
// based on the RemoteAddress. HandleLocal only works if r.RemoteAddress has a
// route registered in this endpoint.
-func (m *InjectableEndpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (m *InjectableEndpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
if endpoint, ok := m.routes[r.RemoteAddress]; ok {
- return endpoint.WritePacket(r, hdr, payload, protocol)
+ return endpoint.WritePacket(r, nil /* gso */, hdr, payload, protocol)
}
return tcpip.ErrNoRoute
}
diff --git a/pkg/tcpip/link/muxed/injectable_test.go b/pkg/tcpip/link/muxed/injectable_test.go
index d1d2875cc..7d25effad 100644
--- a/pkg/tcpip/link/muxed/injectable_test.go
+++ b/pkg/tcpip/link/muxed/injectable_test.go
@@ -50,7 +50,7 @@ func TestInjectableEndpointDispatch(t *testing.T) {
hdr.Prepend(1)[0] = 0xFA
packetRoute := stack.Route{RemoteAddress: dstIP}
- endpoint.WritePacket(&packetRoute, hdr,
+ endpoint.WritePacket(&packetRoute, nil /* gso */, hdr,
buffer.NewViewFromBytes([]byte{0xFB}).ToVectorisedView(), ipv4.ProtocolNumber)
buf := make([]byte, 6500)
@@ -68,7 +68,7 @@ func TestInjectableEndpointDispatchHdrOnly(t *testing.T) {
hdr := buffer.NewPrependable(1)
hdr.Prepend(1)[0] = 0xFA
packetRoute := stack.Route{RemoteAddress: dstIP}
- endpoint.WritePacket(&packetRoute, hdr,
+ endpoint.WritePacket(&packetRoute, nil /* gso */, hdr,
buffer.NewView(0).ToVectorisedView(), ipv4.ProtocolNumber)
buf := make([]byte, 6500)
bytesRead, err := sock.Read(buf)
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 5d36ebe57..fe2779125 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -65,9 +65,9 @@ func NonBlockingWrite(fd int, buf []byte) *tcpip.Error {
return nil
}
-// NonBlockingWrite2 writes up to two byte slices to a file descriptor in a
+// NonBlockingWrite3 writes up to three byte slices to a file descriptor in a
// single syscall. It fails if partial data is written.
-func NonBlockingWrite2(fd int, b1, b2 []byte) *tcpip.Error {
+func NonBlockingWrite3(fd int, b1, b2, b3 []byte) *tcpip.Error {
// If the is no second buffer, issue a regular write.
if len(b2) == 0 {
return NonBlockingWrite(fd, b1)
@@ -75,7 +75,7 @@ func NonBlockingWrite2(fd int, b1, b2 []byte) *tcpip.Error {
// We have two buffers. Build the iovec that represents them and issue
// a writev syscall.
- iovec := [...]syscall.Iovec{
+ iovec := [3]syscall.Iovec{
{
Base: &b1[0],
Len: uint64(len(b1)),
@@ -85,8 +85,15 @@ func NonBlockingWrite2(fd int, b1, b2 []byte) *tcpip.Error {
Len: uint64(len(b2)),
},
}
+ iovecLen := uintptr(2)
- _, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovec[0])), uintptr(len(iovec)))
+ if len(b3) > 0 {
+ iovecLen++
+ iovec[2].Base = &b3[0]
+ iovec[2].Len = uint64(len(b3))
+ }
+
+ _, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovec[0])), iovecLen)
if e != 0 {
return TranslateErrno(e)
}
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index 27d7eb3b9..6e6aa5a13 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -184,7 +184,7 @@ func (e *endpoint) LinkAddress() tcpip.LinkAddress {
// WritePacket writes outbound packets to the file descriptor. If it is not
// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
// Add the ethernet header here.
eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize))
ethHdr := &header.EthernetFields{
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 4b8061b13..1f44e224c 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -272,7 +272,7 @@ func TestSimpleSend(t *testing.T) {
randomFill(buf)
proto := tcpip.NetworkProtocolNumber(rand.Intn(0x10000))
- if err := c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), proto); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), proto); err != nil {
t.Fatalf("WritePacket failed: %v", err)
}
@@ -341,7 +341,7 @@ func TestPreserveSrcAddressInSend(t *testing.T) {
hdr := buffer.NewPrependable(header.EthernetMinimumSize)
proto := tcpip.NetworkProtocolNumber(rand.Intn(0x10000))
- if err := c.ep.WritePacket(&r, hdr, buffer.VectorisedView{}, proto); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buffer.VectorisedView{}, proto); err != nil {
t.Fatalf("WritePacket failed: %v", err)
}
@@ -395,7 +395,7 @@ func TestFillTxQueue(t *testing.T) {
for i := queuePipeSize / 40; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
@@ -410,7 +410,7 @@ func TestFillTxQueue(t *testing.T) {
// Next attempt to write must fail.
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != want {
+ if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != want {
t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want)
}
}
@@ -435,7 +435,7 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
// Send two packets so that the id slice has at least two slots.
for i := 2; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
}
@@ -455,7 +455,7 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
ids := make(map[uint64]struct{})
for i := queuePipeSize / 40; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
@@ -470,7 +470,7 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
// Next attempt to write must fail.
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != want {
+ if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != want {
t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want)
}
}
@@ -493,7 +493,7 @@ func TestFillTxMemory(t *testing.T) {
ids := make(map[uint64]struct{})
for i := queueDataSize / bufferSize; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
@@ -509,7 +509,7 @@ func TestFillTxMemory(t *testing.T) {
// Next attempt to write must fail.
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- err := c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber)
+ err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber)
if want := tcpip.ErrWouldBlock; err != want {
t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want)
}
@@ -534,7 +534,7 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
// until there is only one buffer left.
for i := queueDataSize/bufferSize - 1; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
@@ -547,7 +547,7 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
{
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
uu := buffer.NewView(bufferSize).ToVectorisedView()
- if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, hdr, uu, header.IPv4ProtocolNumber); err != want {
+ if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, hdr, uu, header.IPv4ProtocolNumber); err != want {
t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want)
}
}
@@ -555,7 +555,7 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
// Attempt to write the one-buffer packet again. It must succeed.
{
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
}
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index 4768321d3..462a6e3a3 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -185,10 +185,18 @@ func (e *endpoint) LinkAddress() tcpip.LinkAddress {
return e.lower.LinkAddress()
}
+// GSOMaxSize returns the maximum GSO packet size.
+func (e *endpoint) GSOMaxSize() uint32 {
+ if gso, ok := e.lower.(stack.GSOEndpoint); ok {
+ return gso.GSOMaxSize()
+ }
+ return 0
+}
+
// WritePacket implements the stack.LinkEndpoint interface. It is called by
// higher-level protocols to write packets; it just logs the packet and forwards
// the request to the lower endpoint.
-func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil {
logPacket("send", protocol, hdr.View())
}
@@ -229,7 +237,7 @@ func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload b
panic(err)
}
}
- return e.lower.WritePacket(r, hdr, payload, protocol)
+ return e.lower.WritePacket(r, gso, hdr, payload, protocol)
}
func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.View) {
diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go
index 39217e49c..bd9f9845b 100644
--- a/pkg/tcpip/link/waitable/waitable.go
+++ b/pkg/tcpip/link/waitable/waitable.go
@@ -100,12 +100,12 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress {
// WritePacket implements stack.LinkEndpoint.WritePacket. It is called by
// higher-level protocols to write packets. It only forwards packets to the
// lower endpoint if Wait or WaitWrite haven't been called.
-func (e *Endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
if !e.writeGate.Enter() {
return nil
}
- err := e.lower.WritePacket(r, hdr, payload, protocol)
+ err := e.lower.WritePacket(r, gso, hdr, payload, protocol)
e.writeGate.Leave()
return err
}
diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go
index 6c57e597a..a2df6be95 100644
--- a/pkg/tcpip/link/waitable/waitable_test.go
+++ b/pkg/tcpip/link/waitable/waitable_test.go
@@ -65,7 +65,7 @@ func (e *countedEndpoint) LinkAddress() tcpip.LinkAddress {
return e.linkAddr
}
-func (e *countedEndpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *countedEndpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
e.writeCount++
return nil
}
@@ -75,21 +75,21 @@ func TestWaitWrite(t *testing.T) {
_, wep := New(stack.RegisterLinkEndpoint(ep))
// Write and check that it goes through.
- wep.WritePacket(nil, buffer.Prependable{}, buffer.VectorisedView{}, 0)
+ wep.WritePacket(nil, nil /* gso */, buffer.Prependable{}, buffer.VectorisedView{}, 0)
if want := 1; ep.writeCount != want {
t.Fatalf("Unexpected writeCount: got=%v, want=%v", ep.writeCount, want)
}
// Wait on dispatches, then try to write. It must go through.
wep.WaitDispatch()
- wep.WritePacket(nil, buffer.Prependable{}, buffer.VectorisedView{}, 0)
+ wep.WritePacket(nil, nil /* gso */, buffer.Prependable{}, buffer.VectorisedView{}, 0)
if want := 2; ep.writeCount != want {
t.Fatalf("Unexpected writeCount: got=%v, want=%v", ep.writeCount, want)
}
// Wait on writes, then try to write. It must not go through.
wep.WaitWrite()
- wep.WritePacket(nil, buffer.Prependable{}, buffer.VectorisedView{}, 0)
+ wep.WritePacket(nil, nil /* gso */, buffer.Prependable{}, buffer.VectorisedView{}, 0)
if want := 2; ep.writeCount != want {
t.Fatalf("Unexpected writeCount: got=%v, want=%v", ep.writeCount, want)
}