summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/link/fdbased
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip/link/fdbased')
-rw-r--r--pkg/tcpip/link/fdbased/BUILD1
-rw-r--r--pkg/tcpip/link/fdbased/endpoint.go104
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_test.go160
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_unsafe.go32
4 files changed, 228 insertions, 69 deletions
diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD
index bcf9c023e..50ce91a4e 100644
--- a/pkg/tcpip/link/fdbased/BUILD
+++ b/pkg/tcpip/link/fdbased/BUILD
@@ -6,6 +6,7 @@ go_library(
name = "fdbased",
srcs = [
"endpoint.go",
+ "endpoint_unsafe.go",
"mmap.go",
"mmap_amd64_unsafe.go",
],
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index d726551b0..20e34c5ee 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -111,6 +111,10 @@ type endpoint struct {
// ringOffset is the current offset into the ring buffer where the next
// inbound packet will be placed by the kernel.
ringOffset int
+
+ // gsoMaxSize is the maximum GSO packet size. It is zero if GSO is
+ // disabled.
+ gsoMaxSize uint32
}
// Options specify the details about the fd-based endpoint to be created.
@@ -123,6 +127,7 @@ type Options struct {
Address tcpip.LinkAddress
SaveRestore bool
DisconnectOk bool
+ GSOMaxSize uint32
PacketDispatchMode PacketDispatchMode
}
@@ -165,6 +170,10 @@ func New(opts *Options) tcpip.LinkEndpointID {
packetDispatchMode: opts.PacketDispatchMode,
}
+ if opts.GSOMaxSize != 0 && isSocketFD(opts.FD) {
+ e.caps |= stack.CapabilityGSO
+ e.gsoMaxSize = opts.GSOMaxSize
+ }
if isSocketFD(opts.FD) && e.packetDispatchMode == PacketMMap {
if err := e.setupPacketRXRing(); err != nil {
// TODO: replace panic with an error return.
@@ -185,17 +194,22 @@ func New(opts *Options) tcpip.LinkEndpointID {
}
e.views = make([][]buffer.View, msgsPerRecv)
- for i, _ := range e.views {
+ for i := range e.views {
e.views[i] = make([]buffer.View, len(BufConfig))
}
e.iovecs = make([][]syscall.Iovec, msgsPerRecv)
- for i, _ := range e.iovecs {
- e.iovecs[i] = make([]syscall.Iovec, len(BufConfig))
+ iovLen := len(BufConfig)
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ // virtioNetHdr is prepended before each packet.
+ iovLen++
+ }
+ for i := range e.iovecs {
+ e.iovecs[i] = make([]syscall.Iovec, iovLen)
}
e.msgHdrs = make([]rawfile.MMsgHdr, msgsPerRecv)
- for i, _ := range e.msgHdrs {
+ for i := range e.msgHdrs {
e.msgHdrs[i].Msg.Iov = &e.iovecs[i][0]
- e.msgHdrs[i].Msg.Iovlen = uint64(len(BufConfig))
+ e.msgHdrs[i].Msg.Iovlen = uint64(iovLen)
}
return stack.RegisterLinkEndpoint(e)
@@ -246,9 +260,27 @@ func (e *endpoint) LinkAddress() tcpip.LinkAddress {
return e.addr
}
+// virtioNetHdr is declared in linux/virtio_net.h.
+type virtioNetHdr struct {
+ flags uint8
+ gsoType uint8
+ hdrLen uint16
+ gsoSize uint16
+ csumStart uint16
+ csumOffset uint16
+}
+
+// These constants are declared in linux/virtio_net.h.
+const (
+ _VIRTIO_NET_HDR_F_NEEDS_CSUM = 1
+
+ _VIRTIO_NET_HDR_GSO_TCPV4 = 1
+ _VIRTIO_NET_HDR_GSO_TCPV6 = 4
+)
+
// WritePacket writes outbound packets to the file descriptor. If it is not
// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
if e.hdrSize > 0 {
// Add ethernet header if needed.
eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize))
@@ -266,11 +298,37 @@ func (e *endpoint) WritePacket(r *stack.Route, hdr buffer.Prependable, payload b
eth.Encode(ethHdr)
}
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ vnetHdr := virtioNetHdr{}
+ vnetHdrBuf := vnetHdrToByteSlice(&vnetHdr)
+ if gso != nil {
+ vnetHdr.hdrLen = uint16(hdr.UsedLength())
+ if gso.NeedsCsum {
+ vnetHdr.flags = _VIRTIO_NET_HDR_F_NEEDS_CSUM
+ vnetHdr.csumStart = header.EthernetMinimumSize + gso.L3HdrLen
+ vnetHdr.csumOffset = gso.CsumOffset
+ }
+ if gso.Type != stack.GSONone && uint16(payload.Size()) > gso.MSS {
+ switch gso.Type {
+ case stack.GSOTCPv4:
+ vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV4
+ case stack.GSOTCPv6:
+ vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV6
+ default:
+ panic(fmt.Sprintf("Unknown gso type: %v", gso.Type))
+ }
+ vnetHdr.gsoSize = gso.MSS
+ }
+ }
+
+ return rawfile.NonBlockingWrite3(e.fd, vnetHdrBuf, hdr.View(), payload.ToView())
+ }
+
if payload.Size() == 0 {
return rawfile.NonBlockingWrite(e.fd, hdr.View())
}
- return rawfile.NonBlockingWrite2(e.fd, hdr.View(), payload.ToView())
+ return rawfile.NonBlockingWrite3(e.fd, hdr.View(), payload.ToView(), nil)
}
// WriteRawPacket writes a raw packet directly to the file descriptor.
@@ -292,13 +350,25 @@ func (e *endpoint) capViews(k, n int, buffers []int) int {
func (e *endpoint) allocateViews(bufConfig []int) {
for k := 0; k < len(e.views); k++ {
+ var vnetHdr [virtioNetHdrSize]byte
+ vnetHdrOff := 0
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ // The kernel adds virtioNetHdr before each packet, but
+ // we don't use it, so so we allocate a buffer for it,
+ // add it in iovecs but don't add it in a view.
+ e.iovecs[k][0] = syscall.Iovec{
+ Base: &vnetHdr[0],
+ Len: uint64(virtioNetHdrSize),
+ }
+ vnetHdrOff++
+ }
for i := 0; i < len(bufConfig); i++ {
if e.views[k][i] != nil {
break
}
b := buffer.NewView(bufConfig[i])
e.views[k][i] = b
- e.iovecs[k][i] = syscall.Iovec{
+ e.iovecs[k][i+vnetHdrOff] = syscall.Iovec{
Base: &b[0],
Len: uint64(len(b)),
}
@@ -314,7 +384,11 @@ func (e *endpoint) dispatch() (bool, *tcpip.Error) {
if err != nil {
return false, err
}
-
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ // Skip virtioNetHdr which is added before each packet, it
+ // isn't used and it isn't in a view.
+ n -= virtioNetHdrSize
+ }
if n <= e.hdrSize {
return false, nil
}
@@ -366,8 +440,11 @@ func (e *endpoint) recvMMsgDispatch() (bool, *tcpip.Error) {
}
// Process each of received packets.
for k := 0; k < nMsgs; k++ {
- n := e.msgHdrs[k].Len
- if n <= uint32(e.hdrSize) {
+ n := int(e.msgHdrs[k].Len)
+ if e.Capabilities()&stack.CapabilityGSO != 0 {
+ n -= virtioNetHdrSize
+ }
+ if n <= e.hdrSize {
return false, nil
}
@@ -425,6 +502,11 @@ func (e *endpoint) dispatchLoop() *tcpip.Error {
}
}
+// GSOMaxSize returns the maximum GSO packet size.
+func (e *endpoint) GSOMaxSize() uint32 {
+ return e.gsoMaxSize
+}
+
// InjectableEndpoint is an injectable fd-based endpoint. The endpoint writes
// to the FD, but does not read from it. All reads come from injected packets.
type InjectableEndpoint struct {
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index 14abacdf2..ecc5b73f3 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -24,6 +24,7 @@ import (
"syscall"
"testing"
"time"
+ "unsafe"
"gvisor.googlesource.com/gvisor/pkg/tcpip"
"gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
@@ -33,10 +34,12 @@ import (
)
const (
- mtu = 1500
- laddr = tcpip.LinkAddress("\x11\x22\x33\x44\x55\x66")
- raddr = tcpip.LinkAddress("\x77\x88\x99\xaa\xbb\xcc")
- proto = 10
+ mtu = 1500
+ laddr = tcpip.LinkAddress("\x11\x22\x33\x44\x55\x66")
+ raddr = tcpip.LinkAddress("\x77\x88\x99\xaa\xbb\xcc")
+ proto = 10
+ csumOffset = 48
+ gsoMSS = 500
)
type packetInfo struct {
@@ -130,67 +133,108 @@ func TestAddress(t *testing.T) {
}
}
-func TestWritePacket(t *testing.T) {
- lengths := []int{0, 100, 1000}
- eths := []bool{true, false}
+func testWritePacket(t *testing.T, plen int, eth bool, gsoMaxSize uint32) {
+ c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth, GSOMaxSize: gsoMaxSize})
+ defer c.cleanup()
- for _, eth := range eths {
- for _, plen := range lengths {
- t.Run(fmt.Sprintf("Eth=%v,PayloadLen=%v", eth, plen), func(t *testing.T) {
- c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth})
- defer c.cleanup()
+ r := &stack.Route{
+ RemoteLinkAddress: raddr,
+ }
- r := &stack.Route{
- RemoteLinkAddress: raddr,
- }
+ // Build header.
+ hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()) + 100)
+ b := hdr.Prepend(100)
+ for i := range b {
+ b[i] = uint8(rand.Intn(256))
+ }
- // Build header.
- hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()) + 100)
- b := hdr.Prepend(100)
- for i := range b {
- b[i] = uint8(rand.Intn(256))
- }
+ // Build payload and write.
+ payload := make(buffer.View, plen)
+ for i := range payload {
+ payload[i] = uint8(rand.Intn(256))
+ }
+ want := append(hdr.View(), payload...)
+ var gso *stack.GSO
+ if gsoMaxSize != 0 {
+ gso = &stack.GSO{
+ Type: stack.GSOTCPv6,
+ NeedsCsum: true,
+ CsumOffset: csumOffset,
+ MSS: gsoMSS,
+ MaxSize: gsoMaxSize,
+ }
+ }
+ if err := c.ep.WritePacket(r, gso, hdr, payload.ToVectorisedView(), proto); err != nil {
+ t.Fatalf("WritePacket failed: %v", err)
+ }
- // Build payload and write.
- payload := make(buffer.View, plen)
- for i := range payload {
- payload[i] = uint8(rand.Intn(256))
- }
- want := append(hdr.View(), payload...)
- if err := c.ep.WritePacket(r, hdr, payload.ToVectorisedView(), proto); err != nil {
- t.Fatalf("WritePacket failed: %v", err)
- }
+ // Read from fd, then compare with what we wrote.
+ b = make([]byte, mtu)
+ n, err := syscall.Read(c.fds[0], b)
+ if err != nil {
+ t.Fatalf("Read failed: %v", err)
+ }
+ b = b[:n]
+ if gsoMaxSize != 0 {
+ vnetHdr := *(*virtioNetHdr)(unsafe.Pointer(&b[0]))
+ if vnetHdr.flags&_VIRTIO_NET_HDR_F_NEEDS_CSUM == 0 {
+ t.Fatalf("virtioNetHdr.flags %v doesn't contain %v", vnetHdr.flags, _VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ }
+ csumStart := header.EthernetMinimumSize + gso.L3HdrLen
+ if vnetHdr.csumStart != csumStart {
+ t.Fatalf("vnetHdr.csumStart = %v, want %v", vnetHdr.csumStart, csumStart)
+ }
+ if vnetHdr.csumOffset != csumOffset {
+ t.Fatalf("vnetHdr.csumOffset = %v, want %v", vnetHdr.csumOffset, csumOffset)
+ }
+ gsoType := uint8(0)
+ if int(gso.MSS) < plen {
+ gsoType = _VIRTIO_NET_HDR_GSO_TCPV6
+ }
+ if vnetHdr.gsoType != gsoType {
+ t.Fatalf("vnetHdr.gsoType = %v, want %v", vnetHdr.gsoType, gsoType)
+ }
+ b = b[virtioNetHdrSize:]
+ }
+ if eth {
+ h := header.Ethernet(b)
+ b = b[header.EthernetMinimumSize:]
- // Read from fd, then compare with what we wrote.
- b = make([]byte, mtu)
- n, err := syscall.Read(c.fds[0], b)
- if err != nil {
- t.Fatalf("Read failed: %v", err)
- }
- b = b[:n]
- if eth {
- h := header.Ethernet(b)
- b = b[header.EthernetMinimumSize:]
+ if a := h.SourceAddress(); a != laddr {
+ t.Fatalf("SourceAddress() = %v, want %v", a, laddr)
+ }
- if a := h.SourceAddress(); a != laddr {
- t.Fatalf("SourceAddress() = %v, want %v", a, laddr)
- }
+ if a := h.DestinationAddress(); a != raddr {
+ t.Fatalf("DestinationAddress() = %v, want %v", a, raddr)
+ }
- if a := h.DestinationAddress(); a != raddr {
- t.Fatalf("DestinationAddress() = %v, want %v", a, raddr)
- }
+ if et := h.Type(); et != proto {
+ t.Fatalf("Type() = %v, want %v", et, proto)
+ }
+ }
+ if len(b) != len(want) {
+ t.Fatalf("Read returned %v bytes, want %v", len(b), len(want))
+ }
+ if !bytes.Equal(b, want) {
+ t.Fatalf("Read returned %x, want %x", b, want)
+ }
+}
- if et := h.Type(); et != proto {
- t.Fatalf("Type() = %v, want %v", et, proto)
- }
- }
- if len(b) != len(want) {
- t.Fatalf("Read returned %v bytes, want %v", len(b), len(want))
- }
- if !bytes.Equal(b, want) {
- t.Fatalf("Read returned %x, want %x", b, want)
- }
- })
+func TestWritePacket(t *testing.T) {
+ lengths := []int{0, 100, 1000}
+ eths := []bool{true, false}
+ gsos := []uint32{0, 32768}
+
+ for _, eth := range eths {
+ for _, plen := range lengths {
+ for _, gso := range gsos {
+ t.Run(
+ fmt.Sprintf("Eth=%v,PayloadLen=%v,GSOMaxSize=%v", eth, plen, gso),
+ func(t *testing.T) {
+ testWritePacket(t, plen, eth, gso)
+ },
+ )
+ }
}
}
}
@@ -210,7 +254,7 @@ func TestPreserveSrcAddress(t *testing.T) {
// WritePacket panics given a prependable with anything less than
// the minimum size of the ethernet header.
hdr := buffer.NewPrependable(header.EthernetMinimumSize)
- if err := c.ep.WritePacket(r, hdr, buffer.VectorisedView{}, proto); err != nil {
+ if err := c.ep.WritePacket(r, nil /* gso */, hdr, buffer.VectorisedView{}, proto); err != nil {
t.Fatalf("WritePacket failed: %v", err)
}
diff --git a/pkg/tcpip/link/fdbased/endpoint_unsafe.go b/pkg/tcpip/link/fdbased/endpoint_unsafe.go
new file mode 100644
index 000000000..36e7fe5a9
--- /dev/null
+++ b/pkg/tcpip/link/fdbased/endpoint_unsafe.go
@@ -0,0 +1,32 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build linux
+
+package fdbased
+
+import (
+ "reflect"
+ "unsafe"
+)
+
+const virtioNetHdrSize = int(unsafe.Sizeof(virtioNetHdr{}))
+
+func vnetHdrToByteSlice(hdr *virtioNetHdr) (slice []byte) {
+ sh := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
+ sh.Data = uintptr(unsafe.Pointer(hdr))
+ sh.Len = virtioNetHdrSize
+ sh.Cap = virtioNetHdrSize
+ return
+}