diff options
Diffstat (limited to 'pkg/tcpip/link/fdbased')
-rw-r--r-- | pkg/tcpip/link/fdbased/BUILD | 32 | ||||
-rw-r--r-- | pkg/tcpip/link/fdbased/endpoint.go | 261 | ||||
-rw-r--r-- | pkg/tcpip/link/fdbased/endpoint_test.go | 336 |
3 files changed, 629 insertions, 0 deletions
diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD new file mode 100644 index 000000000..b5ab1ea6a --- /dev/null +++ b/pkg/tcpip/link/fdbased/BUILD @@ -0,0 +1,32 @@ +package(licenses = ["notice"]) # BSD + +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "fdbased", + srcs = ["endpoint.go"], + importpath = "gvisor.googlesource.com/gvisor/pkg/tcpip/link/fdbased", + visibility = [ + "//visibility:public", + ], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/buffer", + "//pkg/tcpip/header", + "//pkg/tcpip/link/rawfile", + "//pkg/tcpip/stack", + ], +) + +go_test( + name = "fdbased_test", + size = "small", + srcs = ["endpoint_test.go"], + embed = [":fdbased"], + deps = [ + "//pkg/tcpip", + "//pkg/tcpip/buffer", + "//pkg/tcpip/header", + "//pkg/tcpip/stack", + ], +) diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go new file mode 100644 index 000000000..da74cd644 --- /dev/null +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -0,0 +1,261 @@ +// Copyright 2016 The Netstack Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package fdbased provides the implemention of data-link layer endpoints +// backed by boundary-preserving file descriptors (e.g., TUN devices, +// seqpacket/datagram sockets). +// +// FD based endpoints can be used in the networking stack by calling New() to +// create a new endpoint, and then passing it as an argument to +// Stack.CreateNIC(). +package fdbased + +import ( + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" +) + +// BufConfig defines the shape of the vectorised view used to read packets from the NIC. +var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768} + +type endpoint struct { + // fd is the file descriptor used to send and receive packets. + fd int + + // mtu (maximum transmission unit) is the maximum size of a packet. + mtu uint32 + + // hdrSize specifies the link-layer header size. If set to 0, no header + // is added/removed; otherwise an ethernet header is used. + hdrSize int + + // addr is the address of the endpoint. + addr tcpip.LinkAddress + + // caps holds the endpoint capabilities. + caps stack.LinkEndpointCapabilities + + // closed is a function to be called when the FD's peer (if any) closes + // its end of the communication pipe. + closed func(*tcpip.Error) + + vv *buffer.VectorisedView + iovecs []syscall.Iovec + views []buffer.View +} + +// Options specify the details about the fd-based endpoint to be created. +type Options struct { + FD int + MTU uint32 + EthernetHeader bool + ChecksumOffload bool + ClosedFunc func(*tcpip.Error) + Address tcpip.LinkAddress +} + +// New creates a new fd-based endpoint. +// +// Makes fd non-blocking, but does not take ownership of fd, which must remain +// open for the lifetime of the returned endpoint. +func New(opts *Options) tcpip.LinkEndpointID { + syscall.SetNonblock(opts.FD, true) + + caps := stack.LinkEndpointCapabilities(0) + if opts.ChecksumOffload { + caps |= stack.CapabilityChecksumOffload + } + + hdrSize := 0 + if opts.EthernetHeader { + hdrSize = header.EthernetMinimumSize + caps |= stack.CapabilityResolutionRequired + } + + e := &endpoint{ + fd: opts.FD, + mtu: opts.MTU, + caps: caps, + closed: opts.ClosedFunc, + addr: opts.Address, + hdrSize: hdrSize, + views: make([]buffer.View, len(BufConfig)), + iovecs: make([]syscall.Iovec, len(BufConfig)), + } + vv := buffer.NewVectorisedView(0, e.views) + e.vv = &vv + return stack.RegisterLinkEndpoint(e) +} + +// Attach launches the goroutine that reads packets from the file descriptor and +// dispatches them via the provided dispatcher. +func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { + go e.dispatchLoop(dispatcher) // S/R-FIXME +} + +// MTU implements stack.LinkEndpoint.MTU. It returns the value initialized +// during construction. +func (e *endpoint) MTU() uint32 { + return e.mtu +} + +// Capabilities implements stack.LinkEndpoint.Capabilities. +func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities { + return e.caps +} + +// MaxHeaderLength returns the maximum size of the link-layer header. +func (e *endpoint) MaxHeaderLength() uint16 { + return uint16(e.hdrSize) +} + +// LinkAddress returns the link address of this endpoint. +func (e *endpoint) LinkAddress() tcpip.LinkAddress { + return e.addr +} + +// WritePacket writes outbound packets to the file descriptor. If it is not +// currently writable, the packet is dropped. +func (e *endpoint) WritePacket(r *stack.Route, hdr *buffer.Prependable, payload buffer.View, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { + if e.hdrSize > 0 { + // Add ethernet header if needed. + eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize)) + eth.Encode(&header.EthernetFields{ + DstAddr: r.RemoteLinkAddress, + SrcAddr: e.addr, + Type: protocol, + }) + } + + if len(payload) == 0 { + return rawfile.NonBlockingWrite(e.fd, hdr.UsedBytes()) + + } + + return rawfile.NonBlockingWrite2(e.fd, hdr.UsedBytes(), payload) +} + +func (e *endpoint) capViews(n int, buffers []int) int { + c := 0 + for i, s := range buffers { + c += s + if c >= n { + e.views[i].CapLength(s - (c - n)) + return i + 1 + } + } + return len(buffers) +} + +func (e *endpoint) allocateViews(bufConfig []int) { + for i, v := range e.views { + if v != nil { + break + } + b := buffer.NewView(bufConfig[i]) + e.views[i] = b + e.iovecs[i] = syscall.Iovec{ + Base: &b[0], + Len: uint64(len(b)), + } + } +} + +// dispatch reads one packet from the file descriptor and dispatches it. +func (e *endpoint) dispatch(d stack.NetworkDispatcher, largeV buffer.View) (bool, *tcpip.Error) { + e.allocateViews(BufConfig) + + n, err := rawfile.BlockingReadv(e.fd, e.iovecs) + if err != nil { + return false, err + } + + if n <= e.hdrSize { + return false, nil + } + + var p tcpip.NetworkProtocolNumber + var addr tcpip.LinkAddress + if e.hdrSize > 0 { + eth := header.Ethernet(e.views[0]) + p = eth.Type() + addr = eth.SourceAddress() + } else { + // We don't get any indication of what the packet is, so try to guess + // if it's an IPv4 or IPv6 packet. + switch header.IPVersion(e.views[0]) { + case header.IPv4Version: + p = header.IPv4ProtocolNumber + case header.IPv6Version: + p = header.IPv6ProtocolNumber + default: + return true, nil + } + } + + used := e.capViews(n, BufConfig) + e.vv.SetViews(e.views[:used]) + e.vv.SetSize(n) + e.vv.TrimFront(e.hdrSize) + + d.DeliverNetworkPacket(e, addr, p, e.vv) + + // Prepare e.views for another packet: release used views. + for i := 0; i < used; i++ { + e.views[i] = nil + } + + return true, nil +} + +// dispatchLoop reads packets from the file descriptor in a loop and dispatches +// them to the network stack. +func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) *tcpip.Error { + v := buffer.NewView(header.MaxIPPacketSize) + for { + cont, err := e.dispatch(d, v) + if err != nil || !cont { + if e.closed != nil { + e.closed(err) + } + return err + } + } +} + +// InjectableEndpoint is an injectable fd-based endpoint. The endpoint writes +// to the FD, but does not read from it. All reads come from injected packets. +type InjectableEndpoint struct { + endpoint + + dispatcher stack.NetworkDispatcher +} + +// Attach saves the stack network-layer dispatcher for use later when packets +// are injected. +func (e *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) { + e.dispatcher = dispatcher +} + +// Inject injects an inbound packet. +func (e *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv *buffer.VectorisedView) { + e.dispatcher.DeliverNetworkPacket(e, "", protocol, vv) +} + +// NewInjectable creates a new fd-based InjectableEndpoint. +func NewInjectable(fd int, mtu uint32) (tcpip.LinkEndpointID, *InjectableEndpoint) { + syscall.SetNonblock(fd, true) + + e := &InjectableEndpoint{endpoint: endpoint{ + fd: fd, + mtu: mtu, + }} + + return stack.RegisterLinkEndpoint(e), e +} diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go new file mode 100644 index 000000000..f7bbb28e1 --- /dev/null +++ b/pkg/tcpip/link/fdbased/endpoint_test.go @@ -0,0 +1,336 @@ +// Copyright 2016 The Netstack Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fdbased + +import ( + "fmt" + "math/rand" + "reflect" + "syscall" + "testing" + "time" + + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" +) + +type packetInfo struct { + raddr tcpip.LinkAddress + proto tcpip.NetworkProtocolNumber + contents buffer.View +} + +type context struct { + t *testing.T + fds [2]int + ep stack.LinkEndpoint + ch chan packetInfo + done chan struct{} +} + +func newContext(t *testing.T, opt *Options) *context { + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET, 0) + if err != nil { + t.Fatalf("Socketpair failed: %v", err) + } + + done := make(chan struct{}, 1) + opt.ClosedFunc = func(*tcpip.Error) { + done <- struct{}{} + } + + opt.FD = fds[1] + ep := stack.FindLinkEndpoint(New(opt)).(*endpoint) + + c := &context{ + t: t, + fds: fds, + ep: ep, + ch: make(chan packetInfo, 100), + done: done, + } + + ep.Attach(c) + + return c +} + +func (c *context) cleanup() { + syscall.Close(c.fds[0]) + <-c.done + syscall.Close(c.fds[1]) +} + +func (c *context) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remoteLinkAddr tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv *buffer.VectorisedView) { + c.ch <- packetInfo{remoteLinkAddr, protocol, vv.ToView()} +} + +func TestNoEthernetProperties(t *testing.T) { + const mtu = 1500 + c := newContext(t, &Options{MTU: mtu}) + defer c.cleanup() + + if want, v := uint16(0), c.ep.MaxHeaderLength(); want != v { + t.Fatalf("MaxHeaderLength() = %v, want %v", v, want) + } + + if want, v := uint32(mtu), c.ep.MTU(); want != v { + t.Fatalf("MTU() = %v, want %v", v, want) + } +} + +func TestEthernetProperties(t *testing.T) { + const mtu = 1500 + c := newContext(t, &Options{EthernetHeader: true, MTU: mtu}) + defer c.cleanup() + + if want, v := uint16(header.EthernetMinimumSize), c.ep.MaxHeaderLength(); want != v { + t.Fatalf("MaxHeaderLength() = %v, want %v", v, want) + } + + if want, v := uint32(mtu), c.ep.MTU(); want != v { + t.Fatalf("MTU() = %v, want %v", v, want) + } +} + +func TestAddress(t *testing.T) { + const mtu = 1500 + addrs := []tcpip.LinkAddress{"", "abc", "def"} + for _, a := range addrs { + t.Run(fmt.Sprintf("Address: %q", a), func(t *testing.T) { + c := newContext(t, &Options{Address: a, MTU: mtu}) + defer c.cleanup() + + if want, v := a, c.ep.LinkAddress(); want != v { + t.Fatalf("LinkAddress() = %v, want %v", v, want) + } + }) + } +} + +func TestWritePacket(t *testing.T) { + const ( + mtu = 1500 + laddr = tcpip.LinkAddress("\x11\x22\x33\x44\x55\x66") + raddr = tcpip.LinkAddress("\x77\x88\x99\xaa\xbb\xcc") + proto = 10 + ) + + lengths := []int{0, 100, 1000} + eths := []bool{true, false} + + for _, eth := range eths { + for _, plen := range lengths { + t.Run(fmt.Sprintf("Eth=%v,PayloadLen=%v", eth, plen), func(t *testing.T) { + c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth}) + defer c.cleanup() + + r := &stack.Route{ + RemoteLinkAddress: raddr, + } + + // Build header. + hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()) + 100) + b := hdr.Prepend(100) + for i := range b { + b[i] = uint8(rand.Intn(256)) + } + + // Buiild payload and write. + payload := make([]byte, plen) + for i := range payload { + payload[i] = uint8(rand.Intn(256)) + } + want := append(hdr.UsedBytes(), payload...) + if err := c.ep.WritePacket(r, &hdr, payload, proto); err != nil { + t.Fatalf("WritePacket failed: %v", err) + } + + // Read from fd, then compare with what we wrote. + b = make([]byte, mtu) + n, err := syscall.Read(c.fds[0], b) + if err != nil { + t.Fatalf("Read failed: %v", err) + } + b = b[:n] + if eth { + h := header.Ethernet(b) + b = b[header.EthernetMinimumSize:] + + if a := h.SourceAddress(); a != laddr { + t.Fatalf("SourceAddress() = %v, want %v", a, laddr) + } + + if a := h.DestinationAddress(); a != raddr { + t.Fatalf("DestinationAddress() = %v, want %v", a, raddr) + } + + if et := h.Type(); et != proto { + t.Fatalf("Type() = %v, want %v", et, proto) + } + } + if len(b) != len(want) { + t.Fatalf("Read returned %v bytes, want %v", len(b), len(want)) + } + if !reflect.DeepEqual(b, want) { + t.Fatalf("Read returned %x, want %x", b, want) + } + }) + } + } +} + +func TestDeliverPacket(t *testing.T) { + const ( + mtu = 1500 + laddr = tcpip.LinkAddress("\x11\x22\x33\x44\x55\x66") + raddr = tcpip.LinkAddress("\x77\x88\x99\xaa\xbb\xcc") + proto = 10 + ) + + lengths := []int{100, 1000} + eths := []bool{true, false} + + for _, eth := range eths { + for _, plen := range lengths { + t.Run(fmt.Sprintf("Eth=%v,PayloadLen=%v", eth, plen), func(t *testing.T) { + c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth}) + defer c.cleanup() + + // Build packet. + b := make([]byte, plen) + all := b + for i := range b { + b[i] = uint8(rand.Intn(256)) + } + + if !eth { + // So that it looks like an IPv4 packet. + b[0] = 0x40 + } else { + hdr := make(header.Ethernet, header.EthernetMinimumSize) + hdr.Encode(&header.EthernetFields{ + SrcAddr: raddr, + DstAddr: laddr, + Type: proto, + }) + all = append(hdr, b...) + } + + // Write packet via the file descriptor. + if _, err := syscall.Write(c.fds[0], all); err != nil { + t.Fatalf("Write failed: %v", err) + } + + // Receive packet through the endpoint. + select { + case pi := <-c.ch: + want := packetInfo{ + raddr: raddr, + proto: proto, + contents: b, + } + if !eth { + want.proto = header.IPv4ProtocolNumber + want.raddr = "" + } + if !reflect.DeepEqual(want, pi) { + t.Fatalf("Unexpected received packet: %+v, want %+v", pi, want) + } + case <-time.After(10 * time.Second): + t.Fatalf("Timed out waiting for packet") + } + }) + } + } +} + +func TestBufConfigMaxLength(t *testing.T) { + got := 0 + for _, i := range BufConfig { + got += i + } + want := header.MaxIPPacketSize // maximum TCP packet size + if got < want { + t.Errorf("total buffer size is invalid: got %d, want >= %d", got, want) + } +} + +func TestBufConfigFirst(t *testing.T) { + // The stack assumes that the TCP/IP header is enterily contained in the first view. + // Therefore, the first view needs to be large enough to contain the maximum TCP/IP + // header, which is 120 bytes (60 bytes for IP + 60 bytes for TCP). + want := 120 + got := BufConfig[0] + if got < want { + t.Errorf("first view has an invalid size: got %d, want >= %d", got, want) + } +} + +func build(bufConfig []int) *endpoint { + e := &endpoint{ + views: make([]buffer.View, len(bufConfig)), + iovecs: make([]syscall.Iovec, len(bufConfig)), + } + e.allocateViews(bufConfig) + return e +} + +var capLengthTestCases = []struct { + comment string + config []int + n int + wantUsed int + wantLengths []int +}{ + { + comment: "Single slice", + config: []int{2}, + n: 1, + wantUsed: 1, + wantLengths: []int{1}, + }, + { + comment: "Multiple slices", + config: []int{1, 2}, + n: 2, + wantUsed: 2, + wantLengths: []int{1, 1}, + }, + { + comment: "Entire buffer", + config: []int{1, 2}, + n: 3, + wantUsed: 2, + wantLengths: []int{1, 2}, + }, + { + comment: "Entire buffer but not on the last slice", + config: []int{1, 2, 3}, + n: 3, + wantUsed: 2, + wantLengths: []int{1, 2, 3}, + }, +} + +func TestCapLength(t *testing.T) { + for _, c := range capLengthTestCases { + e := build(c.config) + used := e.capViews(c.n, c.config) + if used != c.wantUsed { + t.Errorf("Test \"%s\" failed when calling capViews(%d, %v). Got %d. Want %d", c.comment, c.n, c.config, used, c.wantUsed) + } + lengths := make([]int, len(e.views)) + for i, v := range e.views { + lengths[i] = len(v) + } + if !reflect.DeepEqual(lengths, c.wantLengths) { + t.Errorf("Test \"%s\" failed when calling capViews(%d, %v). Got %v. Want %v", c.comment, c.n, c.config, lengths, c.wantLengths) + } + + } +} |