summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/link/fdbased
diff options
context:
space:
mode:
authorGoogler <noreply@google.com>2018-04-27 10:37:02 -0700
committerAdin Scannell <ascannell@google.com>2018-04-28 01:44:26 -0400
commitd02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree54f95eef73aee6bacbfc736fffc631be2605ed53 /pkg/tcpip/link/fdbased
parentf70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)
Check in gVisor.
PiperOrigin-RevId: 194583126 Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
Diffstat (limited to 'pkg/tcpip/link/fdbased')
-rw-r--r--pkg/tcpip/link/fdbased/BUILD32
-rw-r--r--pkg/tcpip/link/fdbased/endpoint.go261
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_test.go336
3 files changed, 629 insertions, 0 deletions
diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD
new file mode 100644
index 000000000..b5ab1ea6a
--- /dev/null
+++ b/pkg/tcpip/link/fdbased/BUILD
@@ -0,0 +1,32 @@
+package(licenses = ["notice"]) # BSD
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "fdbased",
+ srcs = ["endpoint.go"],
+ importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased",
+ visibility = [
+ "//visibility:public",
+ ],
+ deps = [
+ "//pkg/tcpip",
+ "//pkg/tcpip/buffer",
+ "//pkg/tcpip/header",
+ "//pkg/tcpip/link/rawfile",
+ "//pkg/tcpip/stack",
+ ],
+)
+
+go_test(
+ name = "fdbased_test",
+ size = "small",
+ srcs = ["endpoint_test.go"],
+ embed = [":fdbased"],
+ deps = [
+ "//pkg/tcpip",
+ "//pkg/tcpip/buffer",
+ "//pkg/tcpip/header",
+ "//pkg/tcpip/stack",
+ ],
+)
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
new file mode 100644
index 000000000..da74cd644
--- /dev/null
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -0,0 +1,261 @@
+// Copyright 2016 The Netstack Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package fdbased provides the implemention of data-link layer endpoints
+// backed by boundary-preserving file descriptors (e.g., TUN devices,
+// seqpacket/datagram sockets).
+//
+// FD based endpoints can be used in the networking stack by calling New() to
+// create a new endpoint, and then passing it as an argument to
+// Stack.CreateNIC().
+package fdbased
+
+import (
+ "syscall"
+
+ "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+)
+
+// BufConfig defines the shape of the vectorised view used to read packets from the NIC.
+var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768}
+
+type endpoint struct {
+ // fd is the file descriptor used to send and receive packets.
+ fd int
+
+ // mtu (maximum transmission unit) is the maximum size of a packet.
+ mtu uint32
+
+ // hdrSize specifies the link-layer header size. If set to 0, no header
+ // is added/removed; otherwise an ethernet header is used.
+ hdrSize int
+
+ // addr is the address of the endpoint.
+ addr tcpip.LinkAddress
+
+ // caps holds the endpoint capabilities.
+ caps stack.LinkEndpointCapabilities
+
+ // closed is a function to be called when the FD's peer (if any) closes
+ // its end of the communication pipe.
+ closed func(*tcpip.Error)
+
+ vv *buffer.VectorisedView
+ iovecs []syscall.Iovec
+ views []buffer.View
+}
+
+// Options specify the details about the fd-based endpoint to be created.
+type Options struct {
+ FD int
+ MTU uint32
+ EthernetHeader bool
+ ChecksumOffload bool
+ ClosedFunc func(*tcpip.Error)
+ Address tcpip.LinkAddress
+}
+
+// New creates a new fd-based endpoint.
+//
+// Makes fd non-blocking, but does not take ownership of fd, which must remain
+// open for the lifetime of the returned endpoint.
+func New(opts *Options) tcpip.LinkEndpointID {
+ syscall.SetNonblock(opts.FD, true)
+
+ caps := stack.LinkEndpointCapabilities(0)
+ if opts.ChecksumOffload {
+ caps |= stack.CapabilityChecksumOffload
+ }
+
+ hdrSize := 0
+ if opts.EthernetHeader {
+ hdrSize = header.EthernetMinimumSize
+ caps |= stack.CapabilityResolutionRequired
+ }
+
+ e := &endpoint{
+ fd: opts.FD,
+ mtu: opts.MTU,
+ caps: caps,
+ closed: opts.ClosedFunc,
+ addr: opts.Address,
+ hdrSize: hdrSize,
+ views: make([]buffer.View, len(BufConfig)),
+ iovecs: make([]syscall.Iovec, len(BufConfig)),
+ }
+ vv := buffer.NewVectorisedView(0, e.views)
+ e.vv = &vv
+ return stack.RegisterLinkEndpoint(e)
+}
+
+// Attach launches the goroutine that reads packets from the file descriptor and
+// dispatches them via the provided dispatcher.
+func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) {
+ go e.dispatchLoop(dispatcher) // S/R-FIXME
+}
+
+// MTU implements stack.LinkEndpoint.MTU. It returns the value initialized
+// during construction.
+func (e *endpoint) MTU() uint32 {
+ return e.mtu
+}
+
+// Capabilities implements stack.LinkEndpoint.Capabilities.
+func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities {
+ return e.caps
+}
+
+// MaxHeaderLength returns the maximum size of the link-layer header.
+func (e *endpoint) MaxHeaderLength() uint16 {
+ return uint16(e.hdrSize)
+}
+
+// LinkAddress returns the link address of this endpoint.
+func (e *endpoint) LinkAddress() tcpip.LinkAddress {
+ return e.addr
+}
+
+// WritePacket writes outbound packets to the file descriptor. If it is not
+// currently writable, the packet is dropped.
+func (e *endpoint) WritePacket(r *stack.Route, hdr *buffer.Prependable, payload buffer.View, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+ if e.hdrSize > 0 {
+ // Add ethernet header if needed.
+ eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize))
+ eth.Encode(&header.EthernetFields{
+ DstAddr: r.RemoteLinkAddress,
+ SrcAddr: e.addr,
+ Type: protocol,
+ })
+ }
+
+ if len(payload) == 0 {
+ return rawfile.NonBlockingWrite(e.fd, hdr.UsedBytes())
+
+ }
+
+ return rawfile.NonBlockingWrite2(e.fd, hdr.UsedBytes(), payload)
+}
+
+func (e *endpoint) capViews(n int, buffers []int) int {
+ c := 0
+ for i, s := range buffers {
+ c += s
+ if c >= n {
+ e.views[i].CapLength(s - (c - n))
+ return i + 1
+ }
+ }
+ return len(buffers)
+}
+
+func (e *endpoint) allocateViews(bufConfig []int) {
+ for i, v := range e.views {
+ if v != nil {
+ break
+ }
+ b := buffer.NewView(bufConfig[i])
+ e.views[i] = b
+ e.iovecs[i] = syscall.Iovec{
+ Base: &b[0],
+ Len: uint64(len(b)),
+ }
+ }
+}
+
+// dispatch reads one packet from the file descriptor and dispatches it.
+func (e *endpoint) dispatch(d stack.NetworkDispatcher, largeV buffer.View) (bool, *tcpip.Error) {
+ e.allocateViews(BufConfig)
+
+ n, err := rawfile.BlockingReadv(e.fd, e.iovecs)
+ if err != nil {
+ return false, err
+ }
+
+ if n <= e.hdrSize {
+ return false, nil
+ }
+
+ var p tcpip.NetworkProtocolNumber
+ var addr tcpip.LinkAddress
+ if e.hdrSize > 0 {
+ eth := header.Ethernet(e.views[0])
+ p = eth.Type()
+ addr = eth.SourceAddress()
+ } else {
+ // We don't get any indication of what the packet is, so try to guess
+ // if it's an IPv4 or IPv6 packet.
+ switch header.IPVersion(e.views[0]) {
+ case header.IPv4Version:
+ p = header.IPv4ProtocolNumber
+ case header.IPv6Version:
+ p = header.IPv6ProtocolNumber
+ default:
+ return true, nil
+ }
+ }
+
+ used := e.capViews(n, BufConfig)
+ e.vv.SetViews(e.views[:used])
+ e.vv.SetSize(n)
+ e.vv.TrimFront(e.hdrSize)
+
+ d.DeliverNetworkPacket(e, addr, p, e.vv)
+
+ // Prepare e.views for another packet: release used views.
+ for i := 0; i < used; i++ {
+ e.views[i] = nil
+ }
+
+ return true, nil
+}
+
+// dispatchLoop reads packets from the file descriptor in a loop and dispatches
+// them to the network stack.
+func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) *tcpip.Error {
+ v := buffer.NewView(header.MaxIPPacketSize)
+ for {
+ cont, err := e.dispatch(d, v)
+ if err != nil || !cont {
+ if e.closed != nil {
+ e.closed(err)
+ }
+ return err
+ }
+ }
+}
+
+// InjectableEndpoint is an injectable fd-based endpoint. The endpoint writes
+// to the FD, but does not read from it. All reads come from injected packets.
+type InjectableEndpoint struct {
+ endpoint
+
+ dispatcher stack.NetworkDispatcher
+}
+
+// Attach saves the stack network-layer dispatcher for use later when packets
+// are injected.
+func (e *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
+ e.dispatcher = dispatcher
+}
+
+// Inject injects an inbound packet.
+func (e *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv *buffer.VectorisedView) {
+ e.dispatcher.DeliverNetworkPacket(e, "", protocol, vv)
+}
+
+// NewInjectable creates a new fd-based InjectableEndpoint.
+func NewInjectable(fd int, mtu uint32) (tcpip.LinkEndpointID, *InjectableEndpoint) {
+ syscall.SetNonblock(fd, true)
+
+ e := &InjectableEndpoint{endpoint: endpoint{
+ fd: fd,
+ mtu: mtu,
+ }}
+
+ return stack.RegisterLinkEndpoint(e), e
+}
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
new file mode 100644
index 000000000..f7bbb28e1
--- /dev/null
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -0,0 +1,336 @@
+// Copyright 2016 The Netstack Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fdbased
+
+import (
+ "fmt"
+ "math/rand"
+ "reflect"
+ "syscall"
+ "testing"
+ "time"
+
+ "gvisor.googlesource.com/gvisor/pkg/tcpip"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/header"
+ "gvisor.googlesource.com/gvisor/pkg/tcpip/stack"
+)
+
+type packetInfo struct {
+ raddr tcpip.LinkAddress
+ proto tcpip.NetworkProtocolNumber
+ contents buffer.View
+}
+
+type context struct {
+ t *testing.T
+ fds [2]int
+ ep stack.LinkEndpoint
+ ch chan packetInfo
+ done chan struct{}
+}
+
+func newContext(t *testing.T, opt *Options) *context {
+ fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET, 0)
+ if err != nil {
+ t.Fatalf("Socketpair failed: %v", err)
+ }
+
+ done := make(chan struct{}, 1)
+ opt.ClosedFunc = func(*tcpip.Error) {
+ done <- struct{}{}
+ }
+
+ opt.FD = fds[1]
+ ep := stack.FindLinkEndpoint(New(opt)).(*endpoint)
+
+ c := &context{
+ t: t,
+ fds: fds,
+ ep: ep,
+ ch: make(chan packetInfo, 100),
+ done: done,
+ }
+
+ ep.Attach(c)
+
+ return c
+}
+
+func (c *context) cleanup() {
+ syscall.Close(c.fds[0])
+ <-c.done
+ syscall.Close(c.fds[1])
+}
+
+func (c *context) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remoteLinkAddr tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv *buffer.VectorisedView) {
+ c.ch <- packetInfo{remoteLinkAddr, protocol, vv.ToView()}
+}
+
+func TestNoEthernetProperties(t *testing.T) {
+ const mtu = 1500
+ c := newContext(t, &Options{MTU: mtu})
+ defer c.cleanup()
+
+ if want, v := uint16(0), c.ep.MaxHeaderLength(); want != v {
+ t.Fatalf("MaxHeaderLength() = %v, want %v", v, want)
+ }
+
+ if want, v := uint32(mtu), c.ep.MTU(); want != v {
+ t.Fatalf("MTU() = %v, want %v", v, want)
+ }
+}
+
+func TestEthernetProperties(t *testing.T) {
+ const mtu = 1500
+ c := newContext(t, &Options{EthernetHeader: true, MTU: mtu})
+ defer c.cleanup()
+
+ if want, v := uint16(header.EthernetMinimumSize), c.ep.MaxHeaderLength(); want != v {
+ t.Fatalf("MaxHeaderLength() = %v, want %v", v, want)
+ }
+
+ if want, v := uint32(mtu), c.ep.MTU(); want != v {
+ t.Fatalf("MTU() = %v, want %v", v, want)
+ }
+}
+
+func TestAddress(t *testing.T) {
+ const mtu = 1500
+ addrs := []tcpip.LinkAddress{"", "abc", "def"}
+ for _, a := range addrs {
+ t.Run(fmt.Sprintf("Address: %q", a), func(t *testing.T) {
+ c := newContext(t, &Options{Address: a, MTU: mtu})
+ defer c.cleanup()
+
+ if want, v := a, c.ep.LinkAddress(); want != v {
+ t.Fatalf("LinkAddress() = %v, want %v", v, want)
+ }
+ })
+ }
+}
+
+func TestWritePacket(t *testing.T) {
+ const (
+ mtu = 1500
+ laddr = tcpip.LinkAddress("\x11\x22\x33\x44\x55\x66")
+ raddr = tcpip.LinkAddress("\x77\x88\x99\xaa\xbb\xcc")
+ proto = 10
+ )
+
+ lengths := []int{0, 100, 1000}
+ eths := []bool{true, false}
+
+ for _, eth := range eths {
+ for _, plen := range lengths {
+ t.Run(fmt.Sprintf("Eth=%v,PayloadLen=%v", eth, plen), func(t *testing.T) {
+ c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth})
+ defer c.cleanup()
+
+ r := &stack.Route{
+ RemoteLinkAddress: raddr,
+ }
+
+ // Build header.
+ hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()) + 100)
+ b := hdr.Prepend(100)
+ for i := range b {
+ b[i] = uint8(rand.Intn(256))
+ }
+
+ // Buiild payload and write.
+ payload := make([]byte, plen)
+ for i := range payload {
+ payload[i] = uint8(rand.Intn(256))
+ }
+ want := append(hdr.UsedBytes(), payload...)
+ if err := c.ep.WritePacket(r, &hdr, payload, proto); err != nil {
+ t.Fatalf("WritePacket failed: %v", err)
+ }
+
+ // Read from fd, then compare with what we wrote.
+ b = make([]byte, mtu)
+ n, err := syscall.Read(c.fds[0], b)
+ if err != nil {
+ t.Fatalf("Read failed: %v", err)
+ }
+ b = b[:n]
+ if eth {
+ h := header.Ethernet(b)
+ b = b[header.EthernetMinimumSize:]
+
+ if a := h.SourceAddress(); a != laddr {
+ t.Fatalf("SourceAddress() = %v, want %v", a, laddr)
+ }
+
+ if a := h.DestinationAddress(); a != raddr {
+ t.Fatalf("DestinationAddress() = %v, want %v", a, raddr)
+ }
+
+ if et := h.Type(); et != proto {
+ t.Fatalf("Type() = %v, want %v", et, proto)
+ }
+ }
+ if len(b) != len(want) {
+ t.Fatalf("Read returned %v bytes, want %v", len(b), len(want))
+ }
+ if !reflect.DeepEqual(b, want) {
+ t.Fatalf("Read returned %x, want %x", b, want)
+ }
+ })
+ }
+ }
+}
+
+func TestDeliverPacket(t *testing.T) {
+ const (
+ mtu = 1500
+ laddr = tcpip.LinkAddress("\x11\x22\x33\x44\x55\x66")
+ raddr = tcpip.LinkAddress("\x77\x88\x99\xaa\xbb\xcc")
+ proto = 10
+ )
+
+ lengths := []int{100, 1000}
+ eths := []bool{true, false}
+
+ for _, eth := range eths {
+ for _, plen := range lengths {
+ t.Run(fmt.Sprintf("Eth=%v,PayloadLen=%v", eth, plen), func(t *testing.T) {
+ c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth})
+ defer c.cleanup()
+
+ // Build packet.
+ b := make([]byte, plen)
+ all := b
+ for i := range b {
+ b[i] = uint8(rand.Intn(256))
+ }
+
+ if !eth {
+ // So that it looks like an IPv4 packet.
+ b[0] = 0x40
+ } else {
+ hdr := make(header.Ethernet, header.EthernetMinimumSize)
+ hdr.Encode(&header.EthernetFields{
+ SrcAddr: raddr,
+ DstAddr: laddr,
+ Type: proto,
+ })
+ all = append(hdr, b...)
+ }
+
+ // Write packet via the file descriptor.
+ if _, err := syscall.Write(c.fds[0], all); err != nil {
+ t.Fatalf("Write failed: %v", err)
+ }
+
+ // Receive packet through the endpoint.
+ select {
+ case pi := <-c.ch:
+ want := packetInfo{
+ raddr: raddr,
+ proto: proto,
+ contents: b,
+ }
+ if !eth {
+ want.proto = header.IPv4ProtocolNumber
+ want.raddr = ""
+ }
+ if !reflect.DeepEqual(want, pi) {
+ t.Fatalf("Unexpected received packet: %+v, want %+v", pi, want)
+ }
+ case <-time.After(10 * time.Second):
+ t.Fatalf("Timed out waiting for packet")
+ }
+ })
+ }
+ }
+}
+
+func TestBufConfigMaxLength(t *testing.T) {
+ got := 0
+ for _, i := range BufConfig {
+ got += i
+ }
+ want := header.MaxIPPacketSize // maximum TCP packet size
+ if got < want {
+ t.Errorf("total buffer size is invalid: got %d, want >= %d", got, want)
+ }
+}
+
+func TestBufConfigFirst(t *testing.T) {
+ // The stack assumes that the TCP/IP header is enterily contained in the first view.
+ // Therefore, the first view needs to be large enough to contain the maximum TCP/IP
+ // header, which is 120 bytes (60 bytes for IP + 60 bytes for TCP).
+ want := 120
+ got := BufConfig[0]
+ if got < want {
+ t.Errorf("first view has an invalid size: got %d, want >= %d", got, want)
+ }
+}
+
+func build(bufConfig []int) *endpoint {
+ e := &endpoint{
+ views: make([]buffer.View, len(bufConfig)),
+ iovecs: make([]syscall.Iovec, len(bufConfig)),
+ }
+ e.allocateViews(bufConfig)
+ return e
+}
+
+var capLengthTestCases = []struct {
+ comment string
+ config []int
+ n int
+ wantUsed int
+ wantLengths []int
+}{
+ {
+ comment: "Single slice",
+ config: []int{2},
+ n: 1,
+ wantUsed: 1,
+ wantLengths: []int{1},
+ },
+ {
+ comment: "Multiple slices",
+ config: []int{1, 2},
+ n: 2,
+ wantUsed: 2,
+ wantLengths: []int{1, 1},
+ },
+ {
+ comment: "Entire buffer",
+ config: []int{1, 2},
+ n: 3,
+ wantUsed: 2,
+ wantLengths: []int{1, 2},
+ },
+ {
+ comment: "Entire buffer but not on the last slice",
+ config: []int{1, 2, 3},
+ n: 3,
+ wantUsed: 2,
+ wantLengths: []int{1, 2, 3},
+ },
+}
+
+func TestCapLength(t *testing.T) {
+ for _, c := range capLengthTestCases {
+ e := build(c.config)
+ used := e.capViews(c.n, c.config)
+ if used != c.wantUsed {
+ t.Errorf("Test \"%s\" failed when calling capViews(%d, %v). Got %d. Want %d", c.comment, c.n, c.config, used, c.wantUsed)
+ }
+ lengths := make([]int, len(e.views))
+ for i, v := range e.views {
+ lengths[i] = len(v)
+ }
+ if !reflect.DeepEqual(lengths, c.wantLengths) {
+ t.Errorf("Test \"%s\" failed when calling capViews(%d, %v). Got %v. Want %v", c.comment, c.n, c.config, lengths, c.wantLengths)
+ }
+
+ }
+}