summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/link
diff options
context:
space:
mode:
authorkevin.xu <cming.xu@gmail.com>2020-04-27 21:51:31 +0800
committerGitHub <noreply@github.com>2020-04-27 21:51:31 +0800
commite896ca54db67524afc20b644d43c72185e72dc0e (patch)
tree2a16f3a62a5cafd098f1f028c621f1b655589d69 /pkg/tcpip/link
parent1f19624fa127d7d59cabe29593cc80b7fe6c81f8 (diff)
parent3c67754663f424f2ebbc0ff2a4c80e30618d5355 (diff)
Merge pull request #1 from google/master
catch up
Diffstat (limited to 'pkg/tcpip/link')
-rw-r--r--pkg/tcpip/link/channel/BUILD6
-rw-r--r--pkg/tcpip/link/channel/channel.go234
-rw-r--r--pkg/tcpip/link/fdbased/BUILD11
-rw-r--r--pkg/tcpip/link/fdbased/endpoint.go198
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_test.go111
-rw-r--r--pkg/tcpip/link/fdbased/endpoint_unsafe.go9
-rw-r--r--pkg/tcpip/link/fdbased/mmap.go6
-rw-r--r--pkg/tcpip/link/fdbased/packet_dispatchers.go18
-rw-r--r--pkg/tcpip/link/loopback/BUILD5
-rw-r--r--pkg/tcpip/link/loopback/loopback.go42
-rw-r--r--pkg/tcpip/link/muxed/BUILD10
-rw-r--r--pkg/tcpip/link/muxed/injectable.go14
-rw-r--r--pkg/tcpip/link/muxed/injectable_test.go12
-rw-r--r--pkg/tcpip/link/rawfile/BUILD14
-rw-r--r--pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go2
-rw-r--r--pkg/tcpip/link/rawfile/rawfile_test.go46
-rw-r--r--pkg/tcpip/link/rawfile/rawfile_unsafe.go6
-rw-r--r--pkg/tcpip/link/sharedmem/BUILD12
-rw-r--r--pkg/tcpip/link/sharedmem/pipe/BUILD9
-rw-r--r--pkg/tcpip/link/sharedmem/pipe/pipe_test.go3
-rw-r--r--pkg/tcpip/link/sharedmem/queue/BUILD8
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem.go24
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem_test.go72
-rw-r--r--pkg/tcpip/link/sniffer/BUILD7
-rw-r--r--pkg/tcpip/link/sniffer/sniffer.go244
-rw-r--r--pkg/tcpip/link/tun/BUILD23
-rw-r--r--pkg/tcpip/link/tun/device.go358
-rw-r--r--pkg/tcpip/link/tun/protocol.go56
-rw-r--r--pkg/tcpip/link/waitable/BUILD10
-rw-r--r--pkg/tcpip/link/waitable/waitable.go18
-rw-r--r--pkg/tcpip/link/waitable/waitable_test.go24
31 files changed, 1143 insertions, 469 deletions
diff --git a/pkg/tcpip/link/channel/BUILD b/pkg/tcpip/link/channel/BUILD
index 97a794986..b8b93e78e 100644
--- a/pkg/tcpip/link/channel/BUILD
+++ b/pkg/tcpip/link/channel/BUILD
@@ -1,13 +1,13 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
go_library(
name = "channel",
srcs = ["channel.go"],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/channel",
- visibility = ["//:sandbox"],
+ visibility = ["//visibility:public"],
deps = [
+ "//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/stack",
diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index 14f197a77..9bf67686d 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -18,6 +18,9 @@
package channel
import (
+ "context"
+
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/stack"
@@ -25,54 +28,166 @@ import (
// PacketInfo holds all the information about an outbound packet.
type PacketInfo struct {
- Header buffer.View
- Payload buffer.View
- Proto tcpip.NetworkProtocolNumber
- GSO *stack.GSO
+ Pkt *stack.PacketBuffer
+ Proto tcpip.NetworkProtocolNumber
+ GSO *stack.GSO
+ Route stack.Route
+}
+
+// Notification is the interface for receiving notification from the packet
+// queue.
+type Notification interface {
+ // WriteNotify will be called when a write happens to the queue.
+ WriteNotify()
+}
+
+// NotificationHandle is an opaque handle to the registered notification target.
+// It can be used to unregister the notification when no longer interested.
+//
+// +stateify savable
+type NotificationHandle struct {
+ n Notification
+}
+
+type queue struct {
+ // c is the outbound packet channel.
+ c chan PacketInfo
+ // mu protects fields below.
+ mu sync.RWMutex
+ notify []*NotificationHandle
+}
+
+func (q *queue) Close() {
+ close(q.c)
+}
+
+func (q *queue) Read() (PacketInfo, bool) {
+ select {
+ case p := <-q.c:
+ return p, true
+ default:
+ return PacketInfo{}, false
+ }
+}
+
+func (q *queue) ReadContext(ctx context.Context) (PacketInfo, bool) {
+ select {
+ case pkt := <-q.c:
+ return pkt, true
+ case <-ctx.Done():
+ return PacketInfo{}, false
+ }
+}
+
+func (q *queue) Write(p PacketInfo) bool {
+ wrote := false
+ select {
+ case q.c <- p:
+ wrote = true
+ default:
+ }
+ q.mu.Lock()
+ notify := q.notify
+ q.mu.Unlock()
+
+ if wrote {
+ // Send notification outside of lock.
+ for _, h := range notify {
+ h.n.WriteNotify()
+ }
+ }
+ return wrote
+}
+
+func (q *queue) Num() int {
+ return len(q.c)
+}
+
+func (q *queue) AddNotify(notify Notification) *NotificationHandle {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+ h := &NotificationHandle{n: notify}
+ q.notify = append(q.notify, h)
+ return h
+}
+
+func (q *queue) RemoveNotify(handle *NotificationHandle) {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+ // Make a copy, since we reads the array outside of lock when notifying.
+ notify := make([]*NotificationHandle, 0, len(q.notify))
+ for _, h := range q.notify {
+ if h != handle {
+ notify = append(notify, h)
+ }
+ }
+ q.notify = notify
}
// Endpoint is link layer endpoint that stores outbound packets in a channel
// and allows injection of inbound packets.
type Endpoint struct {
- dispatcher stack.NetworkDispatcher
- mtu uint32
- linkAddr tcpip.LinkAddress
- GSO bool
+ dispatcher stack.NetworkDispatcher
+ mtu uint32
+ linkAddr tcpip.LinkAddress
+ LinkEPCapabilities stack.LinkEndpointCapabilities
- // C is where outbound packets are queued.
- C chan PacketInfo
+ // Outbound packet queue.
+ q *queue
}
// New creates a new channel endpoint.
func New(size int, mtu uint32, linkAddr tcpip.LinkAddress) *Endpoint {
return &Endpoint{
- C: make(chan PacketInfo, size),
+ q: &queue{
+ c: make(chan PacketInfo, size),
+ },
mtu: mtu,
linkAddr: linkAddr,
}
}
+// Close closes e. Further packet injections will panic. Reads continue to
+// succeed until all packets are read.
+func (e *Endpoint) Close() {
+ e.q.Close()
+}
+
+// Read does non-blocking read one packet from the outbound packet queue.
+func (e *Endpoint) Read() (PacketInfo, bool) {
+ return e.q.Read()
+}
+
+// ReadContext does blocking read for one packet from the outbound packet queue.
+// It can be cancelled by ctx, and in this case, it returns false.
+func (e *Endpoint) ReadContext(ctx context.Context) (PacketInfo, bool) {
+ return e.q.ReadContext(ctx)
+}
+
// Drain removes all outbound packets from the channel and counts them.
func (e *Endpoint) Drain() int {
c := 0
for {
- select {
- case <-e.C:
- c++
- default:
+ if _, ok := e.Read(); !ok {
return c
}
+ c++
}
}
-// Inject injects an inbound packet.
-func (e *Endpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
- e.InjectLinkAddr(protocol, "", vv)
+// NumQueued returns the number of packet queued for outbound.
+func (e *Endpoint) NumQueued() int {
+ return e.q.Num()
+}
+
+// InjectInbound injects an inbound packet.
+func (e *Endpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+ e.InjectLinkAddr(protocol, "", pkt)
}
// InjectLinkAddr injects an inbound packet with a remote link address.
-func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, vv buffer.VectorisedView) {
- e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, vv.Clone(nil), nil /* linkHeader */)
+func (e *Endpoint) InjectLinkAddr(protocol tcpip.NetworkProtocolNumber, remote tcpip.LinkAddress, pkt stack.PacketBuffer) {
+ e.dispatcher.DeliverNetworkPacket(e, remote, "" /* local */, protocol, pkt)
}
// Attach saves the stack network-layer dispatcher for use later when packets
@@ -94,11 +209,7 @@ func (e *Endpoint) MTU() uint32 {
// Capabilities implements stack.LinkEndpoint.Capabilities.
func (e *Endpoint) Capabilities() stack.LinkEndpointCapabilities {
- caps := stack.LinkEndpointCapabilities(0)
- if e.GSO {
- caps |= stack.CapabilityHardwareGSO
- }
- return caps
+ return e.LinkEPCapabilities
}
// GSOMaxSize returns the maximum GSO packet size.
@@ -118,65 +229,70 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress {
}
// WritePacket stores outbound packets into the channel.
-func (e *Endpoint) WritePacket(_ *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+ // Clone r then release its resource so we only get the relevant fields from
+ // stack.Route without holding a reference to a NIC's endpoint.
+ route := r.Clone()
+ route.Release()
p := PacketInfo{
- Header: hdr.View(),
- Proto: protocol,
- Payload: payload.ToView(),
- GSO: gso,
+ Pkt: &pkt,
+ Proto: protocol,
+ GSO: gso,
+ Route: route,
}
- select {
- case e.C <- p:
- default:
- }
+ e.q.Write(p)
return nil
}
// WritePackets stores outbound packets into the channel.
-func (e *Endpoint) WritePackets(_ *stack.Route, gso *stack.GSO, hdrs []stack.PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
- payloadView := payload.ToView()
+func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+ // Clone r then release its resource so we only get the relevant fields from
+ // stack.Route without holding a reference to a NIC's endpoint.
+ route := r.Clone()
+ route.Release()
n := 0
-packetLoop:
- for i := range hdrs {
- hdr := &hdrs[i].Hdr
- off := hdrs[i].Off
- size := hdrs[i].Size
+ for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
p := PacketInfo{
- Header: hdr.View(),
- Proto: protocol,
- Payload: buffer.NewViewFromBytes(payloadView[off : off+size]),
- GSO: gso,
+ Pkt: pkt,
+ Proto: protocol,
+ GSO: gso,
+ Route: route,
}
- select {
- case e.C <- p:
- n++
- default:
- break packetLoop
+ if !e.q.Write(p) {
+ break
}
+ n++
}
return n, nil
}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *Endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
+func (e *Endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
p := PacketInfo{
- Header: packet.ToView(),
- Proto: 0,
- Payload: buffer.View{},
- GSO: nil,
+ Pkt: &stack.PacketBuffer{Data: vv},
+ Proto: 0,
+ GSO: nil,
}
- select {
- case e.C <- p:
- default:
- }
+ e.q.Write(p)
return nil
}
// Wait implements stack.LinkEndpoint.Wait.
func (*Endpoint) Wait() {}
+
+// AddNotify adds a notification target for receiving event about outgoing
+// packets.
+func (e *Endpoint) AddNotify(notify Notification) *NotificationHandle {
+ return e.q.AddNotify(notify)
+}
+
+// RemoveNotify removes handle from the list of notification targets.
+func (e *Endpoint) RemoveNotify(handle *NotificationHandle) {
+ e.q.RemoveNotify(handle)
+}
diff --git a/pkg/tcpip/link/fdbased/BUILD b/pkg/tcpip/link/fdbased/BUILD
index 8fa9e3984..abe725548 100644
--- a/pkg/tcpip/link/fdbased/BUILD
+++ b/pkg/tcpip/link/fdbased/BUILD
@@ -1,5 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -13,11 +12,9 @@ go_library(
"mmap_unsafe.go",
"packet_dispatchers.go",
],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/fdbased",
- visibility = [
- "//visibility:public",
- ],
+ visibility = ["//visibility:public"],
deps = [
+ "//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
@@ -31,7 +28,7 @@ go_test(
name = "fdbased_test",
size = "small",
srcs = ["endpoint_test.go"],
- embed = [":fdbased"],
+ library = ":fdbased",
deps = [
"//pkg/tcpip",
"//pkg/tcpip/buffer",
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index ae4858529..b857ce9d0 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -41,10 +41,10 @@ package fdbased
import (
"fmt"
- "sync"
"syscall"
"golang.org/x/sys/unix"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -91,7 +91,7 @@ func (p PacketDispatchMode) String() string {
case PacketMMap:
return "PacketMMap"
default:
- return fmt.Sprintf("unknown packet dispatch mode %v", p)
+ return fmt.Sprintf("unknown packet dispatch mode '%d'", p)
}
}
@@ -386,10 +386,11 @@ const (
// WritePacket writes outbound packets to the file descriptor. If it is not
// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
if e.hdrSize > 0 {
// Add ethernet header if needed.
- eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize))
+ eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
+ pkt.LinkHeader = buffer.View(eth)
ethHdr := &header.EthernetFields{
DstAddr: r.RemoteLinkAddress,
Type: protocol,
@@ -404,17 +405,17 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
eth.Encode(ethHdr)
}
+ fd := e.fds[pkt.Hash%uint32(len(e.fds))]
if e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
vnetHdr := virtioNetHdr{}
- vnetHdrBuf := vnetHdrToByteSlice(&vnetHdr)
if gso != nil {
- vnetHdr.hdrLen = uint16(hdr.UsedLength())
+ vnetHdr.hdrLen = uint16(pkt.Header.UsedLength())
if gso.NeedsCsum {
vnetHdr.flags = _VIRTIO_NET_HDR_F_NEEDS_CSUM
vnetHdr.csumStart = header.EthernetMinimumSize + gso.L3HdrLen
vnetHdr.csumOffset = gso.CsumOffset
}
- if gso.Type != stack.GSONone && uint16(payload.Size()) > gso.MSS {
+ if gso.Type != stack.GSONone && uint16(pkt.Data.Size()) > gso.MSS {
switch gso.Type {
case stack.GSOTCPv4:
vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV4
@@ -427,124 +428,120 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
}
}
- return rawfile.NonBlockingWrite3(e.fds[0], vnetHdrBuf, hdr.View(), payload.ToView())
+ vnetHdrBuf := vnetHdrToByteSlice(&vnetHdr)
+ return rawfile.NonBlockingWrite3(fd, vnetHdrBuf, pkt.Header.View(), pkt.Data.ToView())
}
- if payload.Size() == 0 {
- return rawfile.NonBlockingWrite(e.fds[0], hdr.View())
+ if pkt.Data.Size() == 0 {
+ return rawfile.NonBlockingWrite(fd, pkt.Header.View())
}
- return rawfile.NonBlockingWrite3(e.fds[0], hdr.View(), payload.ToView(), nil)
+ return rawfile.NonBlockingWrite3(fd, pkt.Header.View(), pkt.Data.ToView(), nil)
}
// WritePackets writes outbound packets to the file descriptor. If it is not
// currently writable, the packet is dropped.
-func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, hdrs []stack.PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
- var ethHdrBuf []byte
- // hdr + data
- iovLen := 2
- if e.hdrSize > 0 {
- // Add ethernet header if needed.
- ethHdrBuf = make([]byte, header.EthernetMinimumSize)
- eth := header.Ethernet(ethHdrBuf)
- ethHdr := &header.EthernetFields{
- DstAddr: r.RemoteLinkAddress,
- Type: protocol,
- }
-
- // Preserve the src address if it's set in the route.
- if r.LocalLinkAddress != "" {
- ethHdr.SrcAddr = r.LocalLinkAddress
- } else {
- ethHdr.SrcAddr = e.addr
- }
- eth.Encode(ethHdr)
- iovLen++
- }
+//
+// NOTE: This API uses sendmmsg to batch packets. As a result the underlying FD
+// picked to write the packet out has to be the same for all packets in the
+// list. In other words all packets in the batch should belong to the same
+// flow.
+func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+ n := pkts.Len()
- n := len(hdrs)
-
- views := payload.Views()
- /*
- * Each bondary in views can add one more iovec.
- *
- * payload | | | |
- * -----------------------------
- * packets | | | | | | |
- * -----------------------------
- * iovecs | | | | | | | | |
- */
- iovec := make([]syscall.Iovec, n*iovLen+len(views)-1)
mmsgHdrs := make([]rawfile.MMsgHdr, n)
+ i := 0
+ for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
+ var ethHdrBuf []byte
+ iovLen := 0
+ if e.hdrSize > 0 {
+ // Add ethernet header if needed.
+ ethHdrBuf = make([]byte, header.EthernetMinimumSize)
+ eth := header.Ethernet(ethHdrBuf)
+ ethHdr := &header.EthernetFields{
+ DstAddr: r.RemoteLinkAddress,
+ Type: protocol,
+ }
- iovecIdx := 0
- viewIdx := 0
- viewOff := 0
- off := 0
- nextOff := 0
- for i := range hdrs {
- prevIovecIdx := iovecIdx
- mmsgHdr := &mmsgHdrs[i]
- mmsgHdr.Msg.Iov = &iovec[iovecIdx]
- packetSize := hdrs[i].Size
- hdr := &hdrs[i].Hdr
-
- off = hdrs[i].Off
- if off != nextOff {
- // We stop in a different point last time.
- size := packetSize
- viewIdx = 0
- viewOff = 0
- for size > 0 {
- if size >= len(views[viewIdx]) {
- viewIdx++
- viewOff = 0
- size -= len(views[viewIdx])
- } else {
- viewOff = size
- size = 0
+ // Preserve the src address if it's set in the route.
+ if r.LocalLinkAddress != "" {
+ ethHdr.SrcAddr = r.LocalLinkAddress
+ } else {
+ ethHdr.SrcAddr = e.addr
+ }
+ eth.Encode(ethHdr)
+ iovLen++
+ }
+
+ var vnetHdrBuf []byte
+ vnetHdr := virtioNetHdr{}
+ if e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
+ if gso != nil {
+ vnetHdr.hdrLen = uint16(pkt.Header.UsedLength())
+ if gso.NeedsCsum {
+ vnetHdr.flags = _VIRTIO_NET_HDR_F_NEEDS_CSUM
+ vnetHdr.csumStart = header.EthernetMinimumSize + gso.L3HdrLen
+ vnetHdr.csumOffset = gso.CsumOffset
+ }
+ if gso.Type != stack.GSONone && uint16(pkt.Data.Size()) > gso.MSS {
+ switch gso.Type {
+ case stack.GSOTCPv4:
+ vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV4
+ case stack.GSOTCPv6:
+ vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV6
+ default:
+ panic(fmt.Sprintf("Unknown gso type: %v", gso.Type))
+ }
+ vnetHdr.gsoSize = gso.MSS
}
}
+ vnetHdrBuf = vnetHdrToByteSlice(&vnetHdr)
+ iovLen++
}
- nextOff = off + packetSize
+ iovecs := make([]syscall.Iovec, iovLen+1+len(pkt.Data.Views()))
+ mmsgHdr := &mmsgHdrs[i]
+ mmsgHdr.Msg.Iov = &iovecs[0]
+ iovecIdx := 0
+ if vnetHdrBuf != nil {
+ v := &iovecs[iovecIdx]
+ v.Base = &vnetHdrBuf[0]
+ v.Len = uint64(len(vnetHdrBuf))
+ iovecIdx++
+ }
if ethHdrBuf != nil {
- v := &iovec[iovecIdx]
+ v := &iovecs[iovecIdx]
v.Base = &ethHdrBuf[0]
v.Len = uint64(len(ethHdrBuf))
iovecIdx++
}
-
- v := &iovec[iovecIdx]
+ pktSize := uint64(0)
+ // Encode L3 Header
+ v := &iovecs[iovecIdx]
+ hdr := &pkt.Header
hdrView := hdr.View()
v.Base = &hdrView[0]
v.Len = uint64(len(hdrView))
+ pktSize += v.Len
iovecIdx++
- for packetSize > 0 {
- vec := &iovec[iovecIdx]
+ // Now encode the Transport Payload.
+ pktViews := pkt.Data.Views()
+ for i := range pktViews {
+ vec := &iovecs[iovecIdx]
iovecIdx++
-
- v := views[viewIdx]
- vec.Base = &v[viewOff]
- s := len(v) - viewOff
- if s <= packetSize {
- viewIdx++
- viewOff = 0
- } else {
- s = packetSize
- viewOff += s
- }
- vec.Len = uint64(s)
- packetSize -= s
+ vec.Base = &pktViews[i][0]
+ vec.Len = uint64(len(pktViews[i]))
+ pktSize += vec.Len
}
-
- mmsgHdr.Msg.Iovlen = uint64(iovecIdx - prevIovecIdx)
+ mmsgHdr.Msg.Iovlen = uint64(iovecIdx)
+ i++
}
packets := 0
for packets < n {
- sent, err := rawfile.NonBlockingSendMMsg(e.fds[0], mmsgHdrs)
+ fd := e.fds[pkts.Front().Hash%uint32(len(e.fds))]
+ sent, err := rawfile.NonBlockingSendMMsg(fd, mmsgHdrs)
if err != nil {
return packets, err
}
@@ -554,9 +551,14 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, hdrs []stack.Pac
return packets, nil
}
+// viewsEqual tests whether v1 and v2 refer to the same backing bytes.
+func viewsEqual(vs1, vs2 []buffer.View) bool {
+ return len(vs1) == len(vs2) && (len(vs1) == 0 || &vs1[0] == &vs2[0])
+}
+
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
- return rawfile.NonBlockingWrite(e.fds[0], packet.ToView())
+func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
+ return rawfile.NonBlockingWrite(e.fds[0], vv.ToView())
}
// InjectOutobund implements stack.InjectableEndpoint.InjectOutbound.
@@ -598,8 +600,8 @@ func (e *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) {
}
// InjectInbound injects an inbound packet.
-func (e *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
- e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */)
+func (e *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+ e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, pkt)
}
// NewInjectable creates a new fd-based InjectableEndpoint.
diff --git a/pkg/tcpip/link/fdbased/endpoint_test.go b/pkg/tcpip/link/fdbased/endpoint_test.go
index 59378b96c..3bfb15a8e 100644
--- a/pkg/tcpip/link/fdbased/endpoint_test.go
+++ b/pkg/tcpip/link/fdbased/endpoint_test.go
@@ -43,43 +43,48 @@ const (
)
type packetInfo struct {
- raddr tcpip.LinkAddress
- proto tcpip.NetworkProtocolNumber
- contents buffer.View
- linkHeader buffer.View
+ raddr tcpip.LinkAddress
+ proto tcpip.NetworkProtocolNumber
+ contents stack.PacketBuffer
}
type context struct {
- t *testing.T
- fds [2]int
- ep stack.LinkEndpoint
- ch chan packetInfo
- done chan struct{}
+ t *testing.T
+ readFDs []int
+ writeFDs []int
+ ep stack.LinkEndpoint
+ ch chan packetInfo
+ done chan struct{}
}
func newContext(t *testing.T, opt *Options) *context {
- fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET, 0)
+ firstFDPair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET, 0)
+ if err != nil {
+ t.Fatalf("Socketpair failed: %v", err)
+ }
+ secondFDPair, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_SEQPACKET, 0)
if err != nil {
t.Fatalf("Socketpair failed: %v", err)
}
- done := make(chan struct{}, 1)
+ done := make(chan struct{}, 2)
opt.ClosedFunc = func(*tcpip.Error) {
done <- struct{}{}
}
- opt.FDs = []int{fds[1]}
+ opt.FDs = []int{firstFDPair[1], secondFDPair[1]}
ep, err := New(opt)
if err != nil {
t.Fatalf("Failed to create FD endpoint: %v", err)
}
c := &context{
- t: t,
- fds: fds,
- ep: ep,
- ch: make(chan packetInfo, 100),
- done: done,
+ t: t,
+ readFDs: []int{firstFDPair[0], secondFDPair[0]},
+ writeFDs: opt.FDs,
+ ep: ep,
+ ch: make(chan packetInfo, 100),
+ done: done,
}
ep.Attach(c)
@@ -88,13 +93,18 @@ func newContext(t *testing.T, opt *Options) *context {
}
func (c *context) cleanup() {
- syscall.Close(c.fds[0])
+ for _, fd := range c.readFDs {
+ syscall.Close(fd)
+ }
+ <-c.done
<-c.done
- syscall.Close(c.fds[1])
+ for _, fd := range c.writeFDs {
+ syscall.Close(fd)
+ }
}
-func (c *context) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote tcpip.LinkAddress, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) {
- c.ch <- packetInfo{remote, protocol, vv.ToView(), linkHeader}
+func (c *context) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote tcpip.LinkAddress, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+ c.ch <- packetInfo{remote, protocol, pkt}
}
func TestNoEthernetProperties(t *testing.T) {
@@ -137,7 +147,7 @@ func TestAddress(t *testing.T) {
}
}
-func testWritePacket(t *testing.T, plen int, eth bool, gsoMaxSize uint32) {
+func testWritePacket(t *testing.T, plen int, eth bool, gsoMaxSize uint32, hash uint32) {
c := newContext(t, &Options{Address: laddr, MTU: mtu, EthernetHeader: eth, GSOMaxSize: gsoMaxSize})
defer c.cleanup()
@@ -169,13 +179,18 @@ func testWritePacket(t *testing.T, plen int, eth bool, gsoMaxSize uint32) {
L3HdrLen: header.IPv4MaximumHeaderSize,
}
}
- if err := c.ep.WritePacket(r, gso, hdr, payload.ToVectorisedView(), proto); err != nil {
+ if err := c.ep.WritePacket(r, gso, proto, stack.PacketBuffer{
+ Header: hdr,
+ Data: payload.ToVectorisedView(),
+ Hash: hash,
+ }); err != nil {
t.Fatalf("WritePacket failed: %v", err)
}
- // Read from fd, then compare with what we wrote.
+ // Read from the corresponding FD, then compare with what we wrote.
b = make([]byte, mtu)
- n, err := syscall.Read(c.fds[0], b)
+ fd := c.readFDs[hash%uint32(len(c.readFDs))]
+ n, err := syscall.Read(fd, b)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
@@ -236,7 +251,7 @@ func TestWritePacket(t *testing.T) {
t.Run(
fmt.Sprintf("Eth=%v,PayloadLen=%v,GSOMaxSize=%v", eth, plen, gso),
func(t *testing.T) {
- testWritePacket(t, plen, eth, gso)
+ testWritePacket(t, plen, eth, gso, 0)
},
)
}
@@ -244,6 +259,27 @@ func TestWritePacket(t *testing.T) {
}
}
+func TestHashedWritePacket(t *testing.T) {
+ lengths := []int{0, 100, 1000}
+ eths := []bool{true, false}
+ gsos := []uint32{0, 32768}
+ hashes := []uint32{0, 1}
+ for _, eth := range eths {
+ for _, plen := range lengths {
+ for _, gso := range gsos {
+ for _, hash := range hashes {
+ t.Run(
+ fmt.Sprintf("Eth=%v,PayloadLen=%v,GSOMaxSize=%v,Hash=%d", eth, plen, gso, hash),
+ func(t *testing.T) {
+ testWritePacket(t, plen, eth, gso, hash)
+ },
+ )
+ }
+ }
+ }
+ }
+}
+
func TestPreserveSrcAddress(t *testing.T) {
baddr := tcpip.LinkAddress("\xcc\xbb\xaa\x77\x88\x99")
@@ -259,13 +295,16 @@ func TestPreserveSrcAddress(t *testing.T) {
// WritePacket panics given a prependable with anything less than
// the minimum size of the ethernet header.
hdr := buffer.NewPrependable(header.EthernetMinimumSize)
- if err := c.ep.WritePacket(r, nil /* gso */, hdr, buffer.VectorisedView{}, proto); err != nil {
+ if err := c.ep.WritePacket(r, nil /* gso */, proto, stack.PacketBuffer{
+ Header: hdr,
+ Data: buffer.VectorisedView{},
+ }); err != nil {
t.Fatalf("WritePacket failed: %v", err)
}
// Read from the FD, then compare with what we wrote.
b := make([]byte, mtu)
- n, err := syscall.Read(c.fds[0], b)
+ n, err := syscall.Read(c.readFDs[0], b)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
@@ -309,7 +348,7 @@ func TestDeliverPacket(t *testing.T) {
}
// Write packet via the file descriptor.
- if _, err := syscall.Write(c.fds[0], all); err != nil {
+ if _, err := syscall.Write(c.readFDs[0], all); err != nil {
t.Fatalf("Write failed: %v", err)
}
@@ -317,15 +356,21 @@ func TestDeliverPacket(t *testing.T) {
select {
case pi := <-c.ch:
want := packetInfo{
- raddr: raddr,
- proto: proto,
- contents: b,
- linkHeader: buffer.View(hdr),
+ raddr: raddr,
+ proto: proto,
+ contents: stack.PacketBuffer{
+ Data: buffer.View(b).ToVectorisedView(),
+ LinkHeader: buffer.View(hdr),
+ },
}
if !eth {
want.proto = header.IPv4ProtocolNumber
want.raddr = ""
}
+ // want.contents.Data will be a single
+ // view, so make pi do the same for the
+ // DeepEqual check.
+ pi.contents.Data = pi.contents.Data.ToView().ToVectorisedView()
if !reflect.DeepEqual(want, pi) {
t.Fatalf("Unexpected received packet: %+v, want %+v", pi, want)
}
diff --git a/pkg/tcpip/link/fdbased/endpoint_unsafe.go b/pkg/tcpip/link/fdbased/endpoint_unsafe.go
index 97a477b61..d81858353 100644
--- a/pkg/tcpip/link/fdbased/endpoint_unsafe.go
+++ b/pkg/tcpip/link/fdbased/endpoint_unsafe.go
@@ -24,9 +24,10 @@ import (
const virtioNetHdrSize = int(unsafe.Sizeof(virtioNetHdr{}))
func vnetHdrToByteSlice(hdr *virtioNetHdr) (slice []byte) {
- sh := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
- sh.Data = uintptr(unsafe.Pointer(hdr))
- sh.Len = virtioNetHdrSize
- sh.Cap = virtioNetHdrSize
+ *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) = reflect.SliceHeader{
+ Data: uintptr((unsafe.Pointer(hdr))),
+ Len: virtioNetHdrSize,
+ Cap: virtioNetHdrSize,
+ }
return
}
diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go
index 554d45715..fe2bf3b0b 100644
--- a/pkg/tcpip/link/fdbased/mmap.go
+++ b/pkg/tcpip/link/fdbased/mmap.go
@@ -25,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
const (
@@ -190,6 +191,9 @@ func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) {
}
pkt = pkt[d.e.hdrSize:]
- d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)}), buffer.View(eth))
+ d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, stack.PacketBuffer{
+ Data: buffer.View(pkt).ToVectorisedView(),
+ LinkHeader: buffer.View(eth),
+ })
return true, nil
}
diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go
index 12168a1dc..cb4cbea69 100644
--- a/pkg/tcpip/link/fdbased/packet_dispatchers.go
+++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go
@@ -139,10 +139,13 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
}
used := d.capViews(n, BufConfig)
- vv := buffer.NewVectorisedView(n, d.views[:used])
- vv.TrimFront(d.e.hdrSize)
+ pkt := stack.PacketBuffer{
+ Data: buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)),
+ LinkHeader: buffer.View(eth),
+ }
+ pkt.Data.TrimFront(d.e.hdrSize)
- d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv, buffer.View(eth))
+ d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt)
// Prepare e.views for another packet: release used views.
for i := 0; i < used; i++ {
@@ -293,9 +296,12 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
}
used := d.capViews(k, int(n), BufConfig)
- vv := buffer.NewVectorisedView(int(n), d.views[k][:used])
- vv.TrimFront(d.e.hdrSize)
- d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv, buffer.View(eth))
+ pkt := stack.PacketBuffer{
+ Data: buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)),
+ LinkHeader: buffer.View(eth),
+ }
+ pkt.Data.TrimFront(d.e.hdrSize)
+ d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, pkt)
// Prepare e.views for another packet: release used views.
for i := 0; i < used; i++ {
diff --git a/pkg/tcpip/link/loopback/BUILD b/pkg/tcpip/link/loopback/BUILD
index 23e4d1418..6bf3805b7 100644
--- a/pkg/tcpip/link/loopback/BUILD
+++ b/pkg/tcpip/link/loopback/BUILD
@@ -1,12 +1,11 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
go_library(
name = "loopback",
srcs = ["loopback.go"],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/loopback",
- visibility = ["//:sandbox"],
+ visibility = ["//visibility:public"],
deps = [
"//pkg/tcpip",
"//pkg/tcpip/buffer",
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index a3b48fa73..073c84ef9 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -76,36 +76,40 @@ func (*endpoint) Wait() {}
// WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound
// packets to the network-layer dispatcher.
-func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
- views := make([]buffer.View, 1, 1+len(payload.Views()))
- views[0] = hdr.View()
- views = append(views, payload.Views()...)
- vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views)
-
- // Because we're immediately turning around and writing the packet back to the
- // rx path, we intentionally don't preserve the remote and local link
- // addresses from the stack.Route we're passed.
- e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */)
+func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+ views := make([]buffer.View, 1, 1+len(pkt.Data.Views()))
+ views[0] = pkt.Header.View()
+ views = append(views, pkt.Data.Views()...)
+
+ // Because we're immediately turning around and writing the packet back
+ // to the rx path, we intentionally don't preserve the remote and local
+ // link addresses from the stack.Route we're passed.
+ e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, stack.PacketBuffer{
+ Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views),
+ })
return nil
}
// WritePackets implements stack.LinkEndpoint.WritePackets.
-func (e *endpoint) WritePackets(_ *stack.Route, _ *stack.GSO, hdrs []stack.PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+func (e *endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList, tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
panic("not implemented")
}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
- // Reject the packet if it's shorter than an ethernet header.
- if packet.Size() < header.EthernetMinimumSize {
+func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
+ // There should be an ethernet header at the beginning of vv.
+ hdr, ok := vv.PullUp(header.EthernetMinimumSize)
+ if !ok {
+ // Reject the packet if it's shorter than an ethernet header.
return tcpip.ErrBadAddress
}
-
- // There should be an ethernet header at the beginning of packet.
- linkHeader := header.Ethernet(packet.First()[:header.EthernetMinimumSize])
- packet.TrimFront(len(linkHeader))
- e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, linkHeader.Type(), packet, buffer.View(linkHeader))
+ linkHeader := header.Ethernet(hdr)
+ vv.TrimFront(len(linkHeader))
+ e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, linkHeader.Type(), stack.PacketBuffer{
+ Data: vv,
+ LinkHeader: buffer.View(linkHeader),
+ })
return nil
}
diff --git a/pkg/tcpip/link/muxed/BUILD b/pkg/tcpip/link/muxed/BUILD
index 1bab380b0..82b441b79 100644
--- a/pkg/tcpip/link/muxed/BUILD
+++ b/pkg/tcpip/link/muxed/BUILD
@@ -1,15 +1,11 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
go_library(
name = "muxed",
srcs = ["injectable.go"],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/muxed",
- visibility = [
- "//visibility:public",
- ],
+ visibility = ["//visibility:public"],
deps = [
"//pkg/tcpip",
"//pkg/tcpip/buffer",
@@ -21,7 +17,7 @@ go_test(
name = "muxed_test",
size = "small",
srcs = ["injectable_test.go"],
- embed = [":muxed"],
+ library = ":muxed",
deps = [
"//pkg/tcpip",
"//pkg/tcpip/buffer",
diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go
index 682b60291..a5478ce17 100644
--- a/pkg/tcpip/link/muxed/injectable.go
+++ b/pkg/tcpip/link/muxed/injectable.go
@@ -80,33 +80,33 @@ func (m *InjectableEndpoint) IsAttached() bool {
}
// InjectInbound implements stack.InjectableLinkEndpoint.
-func (m *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) {
- m.dispatcher.DeliverNetworkPacket(m, "" /* remote */, "" /* local */, protocol, vv, nil /* linkHeader */)
+func (m *InjectableEndpoint) InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+ m.dispatcher.DeliverNetworkPacket(m, "" /* remote */, "" /* local */, protocol, pkt)
}
// WritePackets writes outbound packets to the appropriate
// LinkInjectableEndpoint based on the RemoteAddress. HandleLocal only works if
// r.RemoteAddress has a route registered in this endpoint.
-func (m *InjectableEndpoint) WritePackets(r *stack.Route, gso *stack.GSO, hdrs []stack.PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+func (m *InjectableEndpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
endpoint, ok := m.routes[r.RemoteAddress]
if !ok {
return 0, tcpip.ErrNoRoute
}
- return endpoint.WritePackets(r, gso, hdrs, payload, protocol)
+ return endpoint.WritePackets(r, gso, pkts, protocol)
}
// WritePacket writes outbound packets to the appropriate LinkInjectableEndpoint
// based on the RemoteAddress. HandleLocal only works if r.RemoteAddress has a
// route registered in this endpoint.
-func (m *InjectableEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (m *InjectableEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
if endpoint, ok := m.routes[r.RemoteAddress]; ok {
- return endpoint.WritePacket(r, gso, hdr, payload, protocol)
+ return endpoint.WritePacket(r, gso, protocol, pkt)
}
return tcpip.ErrNoRoute
}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (m *InjectableEndpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
+func (m *InjectableEndpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error {
// WriteRawPacket doesn't get a route or network address, so there's
// nowhere to write this.
return tcpip.ErrNoRoute
diff --git a/pkg/tcpip/link/muxed/injectable_test.go b/pkg/tcpip/link/muxed/injectable_test.go
index 9cd300af8..87c734c1f 100644
--- a/pkg/tcpip/link/muxed/injectable_test.go
+++ b/pkg/tcpip/link/muxed/injectable_test.go
@@ -50,8 +50,10 @@ func TestInjectableEndpointDispatch(t *testing.T) {
hdr.Prepend(1)[0] = 0xFA
packetRoute := stack.Route{RemoteAddress: dstIP}
- endpoint.WritePacket(&packetRoute, nil /* gso */, hdr,
- buffer.NewViewFromBytes([]byte{0xFB}).ToVectorisedView(), ipv4.ProtocolNumber)
+ endpoint.WritePacket(&packetRoute, nil /* gso */, ipv4.ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buffer.NewViewFromBytes([]byte{0xFB}).ToVectorisedView(),
+ })
buf := make([]byte, 6500)
bytesRead, err := sock.Read(buf)
@@ -68,8 +70,10 @@ func TestInjectableEndpointDispatchHdrOnly(t *testing.T) {
hdr := buffer.NewPrependable(1)
hdr.Prepend(1)[0] = 0xFA
packetRoute := stack.Route{RemoteAddress: dstIP}
- endpoint.WritePacket(&packetRoute, nil /* gso */, hdr,
- buffer.NewView(0).ToVectorisedView(), ipv4.ProtocolNumber)
+ endpoint.WritePacket(&packetRoute, nil /* gso */, ipv4.ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buffer.NewView(0).ToVectorisedView(),
+ })
buf := make([]byte, 6500)
bytesRead, err := sock.Read(buf)
if err != nil {
diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD
index 05c7b8024..9cc08d0e2 100644
--- a/pkg/tcpip/link/rawfile/BUILD
+++ b/pkg/tcpip/link/rawfile/BUILD
@@ -1,4 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -12,12 +12,16 @@ go_library(
"errors.go",
"rawfile_unsafe.go",
],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/rawfile",
- visibility = [
- "//visibility:public",
- ],
+ visibility = ["//visibility:public"],
deps = [
"//pkg/tcpip",
"@org_golang_x_sys//unix:go_default_library",
],
)
+
+go_test(
+ name = "rawfile_test",
+ size = "small",
+ srcs = ["rawfile_test.go"],
+ library = ":rawfile",
+)
diff --git a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
index dda3b10a6..0b5a6cf49 100644
--- a/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
+++ b/pkg/tcpip/link/rawfile/blockingpoll_yield_unsafe.go
@@ -14,7 +14,7 @@
// +build linux,amd64 linux,arm64
// +build go1.12
-// +build !go1.14
+// +build !go1.15
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/tcpip/link/rawfile/rawfile_test.go b/pkg/tcpip/link/rawfile/rawfile_test.go
new file mode 100644
index 000000000..8f14ba761
--- /dev/null
+++ b/pkg/tcpip/link/rawfile/rawfile_test.go
@@ -0,0 +1,46 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build linux
+
+package rawfile
+
+import (
+ "syscall"
+ "testing"
+)
+
+func TestNonBlockingWrite3ZeroLength(t *testing.T) {
+ fd, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
+ if err != nil {
+ t.Fatalf("failed to open /dev/null: %v", err)
+ }
+ defer syscall.Close(fd)
+
+ if err := NonBlockingWrite3(fd, []byte{}, []byte{0}, nil); err != nil {
+ t.Fatalf("failed to write: %v", err)
+ }
+}
+
+func TestNonBlockingWrite3Nil(t *testing.T) {
+ fd, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
+ if err != nil {
+ t.Fatalf("failed to open /dev/null: %v", err)
+ }
+ defer syscall.Close(fd)
+
+ if err := NonBlockingWrite3(fd, nil, []byte{0}, nil); err != nil {
+ t.Fatalf("failed to write: %v", err)
+ }
+}
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 44e25d475..92efd0bf8 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -76,9 +76,13 @@ func NonBlockingWrite3(fd int, b1, b2, b3 []byte) *tcpip.Error {
// We have two buffers. Build the iovec that represents them and issue
// a writev syscall.
+ var base *byte
+ if len(b1) > 0 {
+ base = &b1[0]
+ }
iovec := [3]syscall.Iovec{
{
- Base: &b1[0],
+ Base: base,
Len: uint64(len(b1)),
},
{
diff --git a/pkg/tcpip/link/sharedmem/BUILD b/pkg/tcpip/link/sharedmem/BUILD
index 0a5ea3dc4..13243ebbb 100644
--- a/pkg/tcpip/link/sharedmem/BUILD
+++ b/pkg/tcpip/link/sharedmem/BUILD
@@ -1,5 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -11,12 +10,10 @@ go_library(
"sharedmem_unsafe.go",
"tx.go",
],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem",
- visibility = [
- "//:sandbox",
- ],
+ visibility = ["//visibility:public"],
deps = [
"//pkg/log",
+ "//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
@@ -31,8 +28,9 @@ go_test(
srcs = [
"sharedmem_test.go",
],
- embed = [":sharedmem"],
+ library = ":sharedmem",
deps = [
+ "//pkg/sync",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
"//pkg/tcpip/header",
diff --git a/pkg/tcpip/link/sharedmem/pipe/BUILD b/pkg/tcpip/link/sharedmem/pipe/BUILD
index 330ed5e94..87020ec08 100644
--- a/pkg/tcpip/link/sharedmem/pipe/BUILD
+++ b/pkg/tcpip/link/sharedmem/pipe/BUILD
@@ -1,5 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -11,8 +10,7 @@ go_library(
"rx.go",
"tx.go",
],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/pipe",
- visibility = ["//:sandbox"],
+ visibility = ["//visibility:public"],
)
go_test(
@@ -20,5 +18,6 @@ go_test(
srcs = [
"pipe_test.go",
],
- embed = [":pipe"],
+ library = ":pipe",
+ deps = ["//pkg/sync"],
)
diff --git a/pkg/tcpip/link/sharedmem/pipe/pipe_test.go b/pkg/tcpip/link/sharedmem/pipe/pipe_test.go
index 59ef69a8b..dc239a0d0 100644
--- a/pkg/tcpip/link/sharedmem/pipe/pipe_test.go
+++ b/pkg/tcpip/link/sharedmem/pipe/pipe_test.go
@@ -18,8 +18,9 @@ import (
"math/rand"
"reflect"
"runtime"
- "sync"
"testing"
+
+ "gvisor.dev/gvisor/pkg/sync"
)
func TestSimpleReadWrite(t *testing.T) {
diff --git a/pkg/tcpip/link/sharedmem/queue/BUILD b/pkg/tcpip/link/sharedmem/queue/BUILD
index de1ce043d..3ba06af73 100644
--- a/pkg/tcpip/link/sharedmem/queue/BUILD
+++ b/pkg/tcpip/link/sharedmem/queue/BUILD
@@ -1,5 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -9,8 +8,7 @@ go_library(
"rx.go",
"tx.go",
],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue",
- visibility = ["//:sandbox"],
+ visibility = ["//visibility:public"],
deps = [
"//pkg/log",
"//pkg/tcpip/link/sharedmem/pipe",
@@ -22,7 +20,7 @@ go_test(
srcs = [
"queue_test.go",
],
- embed = [":queue"],
+ library = ":queue",
deps = [
"//pkg/tcpip/link/sharedmem/pipe",
],
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index 279e2b457..0796d717e 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -23,11 +23,11 @@
package sharedmem
import (
- "sync"
"sync/atomic"
"syscall"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -185,9 +185,10 @@ func (e *endpoint) LinkAddress() tcpip.LinkAddress {
// WritePacket writes outbound packets to the file descriptor. If it is not
// currently writable, the packet is dropped.
-func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
// Add the ethernet header here.
- eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize))
+ eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
+ pkt.LinkHeader = buffer.View(eth)
ethHdr := &header.EthernetFields{
DstAddr: r.RemoteLinkAddress,
Type: protocol,
@@ -199,10 +200,10 @@ func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.Prependa
}
eth.Encode(ethHdr)
- v := payload.ToView()
+ v := pkt.Data.ToView()
// Transmit the packet.
e.mu.Lock()
- ok := e.tx.transmit(hdr.View(), v)
+ ok := e.tx.transmit(pkt.Header.View(), v)
e.mu.Unlock()
if !ok {
@@ -213,13 +214,13 @@ func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.Prependa
}
// WritePackets implements stack.LinkEndpoint.WritePackets.
-func (e *endpoint) WritePackets(r *stack.Route, _ *stack.GSO, hdrs []stack.PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+func (e *endpoint) WritePackets(r *stack.Route, _ *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
panic("not implemented")
}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
- v := packet.ToView()
+func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
+ v := vv.ToView()
// Transmit the packet.
e.mu.Lock()
ok := e.tx.transmit(v, buffer.View{})
@@ -273,8 +274,11 @@ func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) {
}
// Send packet up the stack.
- eth := header.Ethernet(b)
- d.DeliverNetworkPacket(e, eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), buffer.View(b[header.EthernetMinimumSize:]).ToVectorisedView(), buffer.View(eth))
+ eth := header.Ethernet(b[:header.EthernetMinimumSize])
+ d.DeliverNetworkPacket(e, eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), stack.PacketBuffer{
+ Data: buffer.View(b[header.EthernetMinimumSize:]).ToVectorisedView(),
+ LinkHeader: buffer.View(eth),
+ })
}
// Clean state.
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index f3e9705c9..33f640b85 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -22,11 +22,11 @@ import (
"math/rand"
"os"
"strings"
- "sync"
"syscall"
"testing"
"time"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -131,13 +131,12 @@ func newTestContext(t *testing.T, mtu, bufferSize uint32, addr tcpip.LinkAddress
return c
}
-func (c *testContext) DeliverNetworkPacket(_ stack.LinkEndpoint, remoteLinkAddr, localLinkAddr tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) {
+func (c *testContext) DeliverNetworkPacket(_ stack.LinkEndpoint, remoteLinkAddr, localLinkAddr tcpip.LinkAddress, proto tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
c.mu.Lock()
c.packets = append(c.packets, packetInfo{
- addr: remoteLinkAddr,
- proto: proto,
- vv: vv.Clone(nil),
- linkHeader: linkHeader,
+ addr: remoteLinkAddr,
+ proto: proto,
+ vv: pkt.Data.Clone(nil),
})
c.mu.Unlock()
@@ -274,7 +273,10 @@ func TestSimpleSend(t *testing.T) {
randomFill(buf)
proto := tcpip.NetworkProtocolNumber(rand.Intn(0x10000))
- if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), proto); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, proto, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ }); err != nil {
t.Fatalf("WritePacket failed: %v", err)
}
@@ -343,7 +345,9 @@ func TestPreserveSrcAddressInSend(t *testing.T) {
hdr := buffer.NewPrependable(header.EthernetMinimumSize)
proto := tcpip.NetworkProtocolNumber(rand.Intn(0x10000))
- if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buffer.VectorisedView{}, proto); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, proto, stack.PacketBuffer{
+ Header: hdr,
+ }); err != nil {
t.Fatalf("WritePacket failed: %v", err)
}
@@ -397,7 +401,10 @@ func TestFillTxQueue(t *testing.T) {
for i := queuePipeSize / 40; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ }); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
@@ -412,7 +419,10 @@ func TestFillTxQueue(t *testing.T) {
// Next attempt to write must fail.
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != want {
+ if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ }); err != want {
t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want)
}
}
@@ -437,7 +447,10 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
// Send two packets so that the id slice has at least two slots.
for i := 2; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ }); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
}
@@ -457,7 +470,10 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
ids := make(map[uint64]struct{})
for i := queuePipeSize / 40; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ }); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
@@ -472,7 +488,10 @@ func TestFillTxQueueAfterBadCompletion(t *testing.T) {
// Next attempt to write must fail.
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != want {
+ if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ }); err != want {
t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want)
}
}
@@ -495,7 +514,10 @@ func TestFillTxMemory(t *testing.T) {
ids := make(map[uint64]struct{})
for i := queueDataSize / bufferSize; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ }); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
@@ -511,7 +533,10 @@ func TestFillTxMemory(t *testing.T) {
// Next attempt to write must fail.
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber)
+ err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ })
if want := tcpip.ErrWouldBlock; err != want {
t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want)
}
@@ -536,7 +561,10 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
// until there is only one buffer left.
for i := queueDataSize/bufferSize - 1; i > 0; i-- {
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ }); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
@@ -549,7 +577,10 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
{
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
uu := buffer.NewView(bufferSize).ToVectorisedView()
- if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, hdr, uu, header.IPv4ProtocolNumber); err != want {
+ if want, err := tcpip.ErrWouldBlock, c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: uu,
+ }); err != want {
t.Fatalf("WritePacket return unexpected result: got %v, want %v", err, want)
}
}
@@ -557,7 +588,10 @@ func TestFillTxMemoryWithMultiBuffer(t *testing.T) {
// Attempt to write the one-buffer packet again. It must succeed.
{
hdr := buffer.NewPrependable(int(c.ep.MaxHeaderLength()))
- if err := c.ep.WritePacket(&r, nil /* gso */, hdr, buf.ToVectorisedView(), header.IPv4ProtocolNumber); err != nil {
+ if err := c.ep.WritePacket(&r, nil /* gso */, header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Header: hdr,
+ Data: buf.ToVectorisedView(),
+ }); err != nil {
t.Fatalf("WritePacket failed unexpectedly: %v", err)
}
}
@@ -640,7 +674,7 @@ func TestSimpleReceive(t *testing.T) {
// Wait for packet to be received, then check it.
c.waitForPackets(1, time.After(5*time.Second), "Timeout waiting for packet")
c.mu.Lock()
- rcvd := []byte(c.packets[0].vv.First())
+ rcvd := []byte(c.packets[0].vv.ToView())
c.packets = c.packets[:0]
c.mu.Unlock()
diff --git a/pkg/tcpip/link/sniffer/BUILD b/pkg/tcpip/link/sniffer/BUILD
index 1756114e6..230a8d53a 100644
--- a/pkg/tcpip/link/sniffer/BUILD
+++ b/pkg/tcpip/link/sniffer/BUILD
@@ -1,4 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
@@ -8,10 +8,7 @@ go_library(
"pcap.go",
"sniffer.go",
],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/sniffer",
- visibility = [
- "//visibility:public",
- ],
+ visibility = ["//visibility:public"],
deps = [
"//pkg/log",
"//pkg/tcpip",
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index 39757ea2a..0799c8f4d 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -21,11 +21,9 @@
package sniffer
import (
- "bytes"
"encoding/binary"
"fmt"
"io"
- "os"
"sync/atomic"
"time"
@@ -42,17 +40,24 @@ import (
// LogPackets must be accessed atomically.
var LogPackets uint32 = 1
-// LogPacketsToFile is a flag used to enable or disable logging packets to a
-// pcap file. Valid values are 0 or 1. A file must have been specified when the
+// LogPacketsToPCAP is a flag used to enable or disable logging packets to a
+// pcap writer. Valid values are 0 or 1. A writer must have been specified when the
// sniffer was created for this flag to have effect.
//
-// LogPacketsToFile must be accessed atomically.
-var LogPacketsToFile uint32 = 1
+// LogPacketsToPCAP must be accessed atomically.
+var LogPacketsToPCAP uint32 = 1
+
+var transportProtocolMinSizes map[tcpip.TransportProtocolNumber]int = map[tcpip.TransportProtocolNumber]int{
+ header.ICMPv4ProtocolNumber: header.IPv4MinimumSize,
+ header.ICMPv6ProtocolNumber: header.IPv6MinimumSize,
+ header.UDPProtocolNumber: header.UDPMinimumSize,
+ header.TCPProtocolNumber: header.TCPMinimumSize,
+}
type endpoint struct {
dispatcher stack.NetworkDispatcher
lower stack.LinkEndpoint
- file *os.File
+ writer io.Writer
maxPCAPLen uint32
}
@@ -92,23 +97,22 @@ func writePCAPHeader(w io.Writer, maxLen uint32) error {
})
}
-// NewWithFile creates a new sniffer link-layer endpoint. It wraps around
-// another endpoint and logs packets and they traverse the endpoint.
+// NewWithWriter creates a new sniffer link-layer endpoint. It wraps around
+// another endpoint and logs packets as they traverse the endpoint.
//
-// Packets can be logged to file in the pcap format. A sniffer created
-// with this function will not emit packets using the standard log
-// package.
+// Packets are logged to writer in the pcap format. A sniffer created with this
+// function will not emit packets using the standard log package.
//
// snapLen is the maximum amount of a packet to be saved. Packets with a length
-// less than or equal too snapLen will be saved in their entirety. Longer
+// less than or equal to snapLen will be saved in their entirety. Longer
// packets will be truncated to snapLen.
-func NewWithFile(lower stack.LinkEndpoint, file *os.File, snapLen uint32) (stack.LinkEndpoint, error) {
- if err := writePCAPHeader(file, snapLen); err != nil {
+func NewWithWriter(lower stack.LinkEndpoint, writer io.Writer, snapLen uint32) (stack.LinkEndpoint, error) {
+ if err := writePCAPHeader(writer, snapLen); err != nil {
return nil, err
}
return &endpoint{
lower: lower,
- file: file,
+ writer: writer,
maxPCAPLen: snapLen,
}, nil
}
@@ -116,38 +120,9 @@ func NewWithFile(lower stack.LinkEndpoint, file *os.File, snapLen uint32) (stack
// DeliverNetworkPacket implements the stack.NetworkDispatcher interface. It is
// called by the link-layer endpoint being wrapped when a packet arrives, and
// logs the packet before forwarding to the actual dispatcher.
-func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) {
- if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil {
- logPacket("recv", protocol, vv.First(), nil)
- }
- if e.file != nil && atomic.LoadUint32(&LogPacketsToFile) == 1 {
- vs := vv.Views()
- length := vv.Size()
- if length > int(e.maxPCAPLen) {
- length = int(e.maxPCAPLen)
- }
-
- buf := bytes.NewBuffer(make([]byte, 0, pcapPacketHeaderLen+length))
- if err := binary.Write(buf, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(vv.Size()))); err != nil {
- panic(err)
- }
- for _, v := range vs {
- if length == 0 {
- break
- }
- if len(v) > length {
- v = v[:length]
- }
- if _, err := buf.Write([]byte(v)); err != nil {
- panic(err)
- }
- length -= len(v)
- }
- if _, err := e.file.Write(buf.Bytes()); err != nil {
- panic(err)
- }
- }
- e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv, linkHeader)
+func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
+ e.dumpPacket("recv", nil, protocol, &pkt)
+ e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, pkt)
}
// Attach implements the stack.LinkEndpoint interface. It saves the dispatcher
@@ -193,31 +168,39 @@ func (e *endpoint) GSOMaxSize() uint32 {
return 0
}
-func (e *endpoint) dumpPacket(gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) {
- if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil {
- logPacket("send", protocol, hdr.View(), gso)
+func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
+ writer := e.writer
+ if writer == nil && atomic.LoadUint32(&LogPackets) == 1 {
+ logPacket(prefix, protocol, pkt, gso)
}
- if e.file != nil && atomic.LoadUint32(&LogPacketsToFile) == 1 {
- hdrBuf := hdr.View()
- length := len(hdrBuf) + payload.Size()
- if length > int(e.maxPCAPLen) {
- length = int(e.maxPCAPLen)
+ if writer != nil && atomic.LoadUint32(&LogPacketsToPCAP) == 1 {
+ totalLength := pkt.Header.UsedLength() + pkt.Data.Size()
+ length := totalLength
+ if max := int(e.maxPCAPLen); length > max {
+ length = max
}
-
- buf := bytes.NewBuffer(make([]byte, 0, pcapPacketHeaderLen+length))
- if err := binary.Write(buf, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(len(hdrBuf)+payload.Size()))); err != nil {
+ if err := binary.Write(writer, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(totalLength))); err != nil {
panic(err)
}
- if len(hdrBuf) > length {
- hdrBuf = hdrBuf[:length]
- }
- if _, err := buf.Write(hdrBuf); err != nil {
- panic(err)
+ write := func(b []byte) {
+ if len(b) > length {
+ b = b[:length]
+ }
+ for len(b) != 0 {
+ n, err := writer.Write(b)
+ if err != nil {
+ panic(err)
+ }
+ b = b[n:]
+ length -= n
+ }
}
- length -= len(hdrBuf)
- logVectorisedView(payload, length, buf)
- if _, err := e.file.Write(buf.Bytes()); err != nil {
- panic(err)
+ write(pkt.Header.View())
+ for _, view := range pkt.Data.Views() {
+ if length == 0 {
+ break
+ }
+ write(view)
}
}
}
@@ -225,68 +208,33 @@ func (e *endpoint) dumpPacket(gso *stack.GSO, hdr buffer.Prependable, payload bu
// WritePacket implements the stack.LinkEndpoint interface. It is called by
// higher-level protocols to write packets; it just logs the packet and
// forwards the request to the lower endpoint.
-func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
- e.dumpPacket(gso, hdr, payload, protocol)
- return e.lower.WritePacket(r, gso, hdr, payload, protocol)
+func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
+ e.dumpPacket("send", gso, protocol, &pkt)
+ return e.lower.WritePacket(r, gso, protocol, pkt)
}
// WritePackets implements the stack.LinkEndpoint interface. It is called by
// higher-level protocols to write packets; it just logs the packet and
// forwards the request to the lower endpoint.
-func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, hdrs []stack.PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
- view := payload.ToView()
- for _, d := range hdrs {
- e.dumpPacket(gso, d.Hdr, buffer.NewVectorisedView(d.Size, []buffer.View{view[d.Off:][:d.Size]}), protocol)
+func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+ for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
+ e.dumpPacket("send", gso, protocol, pkt)
}
- return e.lower.WritePackets(r, gso, hdrs, payload, protocol)
+ return e.lower.WritePackets(r, gso, pkts, protocol)
}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
- if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil {
- logPacket("send", 0, buffer.View("[raw packet, no header available]"), nil /* gso */)
- }
- if e.file != nil && atomic.LoadUint32(&LogPacketsToFile) == 1 {
- length := packet.Size()
- if length > int(e.maxPCAPLen) {
- length = int(e.maxPCAPLen)
- }
-
- buf := bytes.NewBuffer(make([]byte, 0, pcapPacketHeaderLen+length))
- if err := binary.Write(buf, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(packet.Size()))); err != nil {
- panic(err)
- }
- logVectorisedView(packet, length, buf)
- if _, err := e.file.Write(buf.Bytes()); err != nil {
- panic(err)
- }
- }
- return e.lower.WriteRawPacket(packet)
-}
-
-func logVectorisedView(vv buffer.VectorisedView, length int, buf *bytes.Buffer) {
- if length <= 0 {
- return
- }
- for _, v := range vv.Views() {
- if len(v) > length {
- v = v[:length]
- }
- n, err := buf.Write(v)
- if err != nil {
- panic(err)
- }
- length -= n
- if length == 0 {
- return
- }
- }
+func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
+ e.dumpPacket("send", nil, 0, &stack.PacketBuffer{
+ Data: vv,
+ })
+ return e.lower.WriteRawPacket(vv)
}
// Wait implements stack.LinkEndpoint.Wait.
-func (*endpoint) Wait() {}
+func (e *endpoint) Wait() { e.lower.Wait() }
-func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.View, gso *stack.GSO) {
+func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer, gso *stack.GSO) {
// Figure out the network layer info.
var transProto uint8
src := tcpip.Address("unknown")
@@ -295,28 +243,49 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.Vie
size := uint16(0)
var fragmentOffset uint16
var moreFragments bool
+
+ // Create a clone of pkt, including any headers if present. Avoid allocating
+ // backing memory for the clone.
+ views := [8]buffer.View{}
+ vv := buffer.NewVectorisedView(0, views[:0])
+ vv.AppendView(pkt.Header.View())
+ vv.Append(pkt.Data)
+
switch protocol {
case header.IPv4ProtocolNumber:
- ipv4 := header.IPv4(b)
+ hdr, ok := vv.PullUp(header.IPv4MinimumSize)
+ if !ok {
+ return
+ }
+ ipv4 := header.IPv4(hdr)
fragmentOffset = ipv4.FragmentOffset()
moreFragments = ipv4.Flags()&header.IPv4FlagMoreFragments == header.IPv4FlagMoreFragments
src = ipv4.SourceAddress()
dst = ipv4.DestinationAddress()
transProto = ipv4.Protocol()
size = ipv4.TotalLength() - uint16(ipv4.HeaderLength())
- b = b[ipv4.HeaderLength():]
+ vv.TrimFront(int(ipv4.HeaderLength()))
id = int(ipv4.ID())
case header.IPv6ProtocolNumber:
- ipv6 := header.IPv6(b)
+ hdr, ok := vv.PullUp(header.IPv6MinimumSize)
+ if !ok {
+ return
+ }
+ ipv6 := header.IPv6(hdr)
src = ipv6.SourceAddress()
dst = ipv6.DestinationAddress()
transProto = ipv6.NextHeader()
size = ipv6.PayloadLength()
- b = b[header.IPv6MinimumSize:]
+ vv.TrimFront(header.IPv6MinimumSize)
case header.ARPProtocolNumber:
- arp := header.ARP(b)
+ hdr, ok := vv.PullUp(header.ARPSize)
+ if !ok {
+ return
+ }
+ vv.TrimFront(header.ARPSize)
+ arp := header.ARP(hdr)
log.Infof(
"%s arp %v (%v) -> %v (%v) valid:%v",
prefix,
@@ -330,6 +299,13 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.Vie
return
}
+ // We aren't guaranteed to have a transport header - it's possible for
+ // writes via raw endpoints to contain only network headers.
+ if minSize, ok := transportProtocolMinSizes[tcpip.TransportProtocolNumber(transProto)]; ok && vv.Size() < minSize {
+ log.Infof("%s %v -> %v transport protocol: %d, but no transport header found (possible raw packet)", prefix, src, dst, transProto)
+ return
+ }
+
// Figure out the transport layer info.
transName := "unknown"
srcPort := uint16(0)
@@ -338,7 +314,11 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.Vie
switch tcpip.TransportProtocolNumber(transProto) {
case header.ICMPv4ProtocolNumber:
transName = "icmp"
- icmp := header.ICMPv4(b)
+ hdr, ok := vv.PullUp(header.ICMPv4MinimumSize)
+ if !ok {
+ break
+ }
+ icmp := header.ICMPv4(hdr)
icmpType := "unknown"
if fragmentOffset == 0 {
switch icmp.Type() {
@@ -371,7 +351,11 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.Vie
case header.ICMPv6ProtocolNumber:
transName = "icmp"
- icmp := header.ICMPv6(b)
+ hdr, ok := vv.PullUp(header.ICMPv6MinimumSize)
+ if !ok {
+ break
+ }
+ icmp := header.ICMPv6(hdr)
icmpType := "unknown"
switch icmp.Type() {
case header.ICMPv6DstUnreachable:
@@ -402,7 +386,11 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.Vie
case header.UDPProtocolNumber:
transName = "udp"
- udp := header.UDP(b)
+ hdr, ok := vv.PullUp(header.UDPMinimumSize)
+ if !ok {
+ break
+ }
+ udp := header.UDP(hdr)
if fragmentOffset == 0 && len(udp) >= header.UDPMinimumSize {
srcPort = udp.SourcePort()
dstPort = udp.DestinationPort()
@@ -412,7 +400,11 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.Vie
case header.TCPProtocolNumber:
transName = "tcp"
- tcp := header.TCP(b)
+ hdr, ok := vv.PullUp(header.TCPMinimumSize)
+ if !ok {
+ break
+ }
+ tcp := header.TCP(hdr)
if fragmentOffset == 0 && len(tcp) >= header.TCPMinimumSize {
offset := int(tcp.DataOffset())
if offset < header.TCPMinimumSize {
diff --git a/pkg/tcpip/link/tun/BUILD b/pkg/tcpip/link/tun/BUILD
index 92dce8fac..e0db6cf54 100644
--- a/pkg/tcpip/link/tun/BUILD
+++ b/pkg/tcpip/link/tun/BUILD
@@ -1,12 +1,25 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
go_library(
name = "tun",
- srcs = ["tun_unsafe.go"],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/tun",
- visibility = [
- "//visibility:public",
+ srcs = [
+ "device.go",
+ "protocol.go",
+ "tun_unsafe.go",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/refs",
+ "//pkg/sync",
+ "//pkg/syserror",
+ "//pkg/tcpip",
+ "//pkg/tcpip/buffer",
+ "//pkg/tcpip/header",
+ "//pkg/tcpip/link/channel",
+ "//pkg/tcpip/stack",
+ "//pkg/waiter",
],
)
diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go
new file mode 100644
index 000000000..617446ea2
--- /dev/null
+++ b/pkg/tcpip/link/tun/device.go
@@ -0,0 +1,358 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tun
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/link/channel"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
+)
+
+const (
+ // drivers/net/tun.c:tun_net_init()
+ defaultDevMtu = 1500
+
+ // Queue length for outbound packet, arriving at fd side for read. Overflow
+ // causes packet drops. gVisor implementation-specific.
+ defaultDevOutQueueLen = 1024
+)
+
+var zeroMAC [6]byte
+
+// Device is an opened /dev/net/tun device.
+//
+// +stateify savable
+type Device struct {
+ waiter.Queue
+
+ mu sync.RWMutex `state:"nosave"`
+ endpoint *tunEndpoint
+ notifyHandle *channel.NotificationHandle
+ flags uint16
+}
+
+// beforeSave is invoked by stateify.
+func (d *Device) beforeSave() {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ // TODO(b/110961832): Restore the device to stack. At this moment, the stack
+ // is not savable.
+ if d.endpoint != nil {
+ panic("/dev/net/tun does not support save/restore when a device is associated with it.")
+ }
+}
+
+// Release implements fs.FileOperations.Release.
+func (d *Device) Release() {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+
+ // Decrease refcount if there is an endpoint associated with this file.
+ if d.endpoint != nil {
+ d.endpoint.RemoveNotify(d.notifyHandle)
+ d.endpoint.DecRef()
+ d.endpoint = nil
+ }
+}
+
+// SetIff services TUNSETIFF ioctl(2) request.
+func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) error {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+
+ if d.endpoint != nil {
+ return syserror.EINVAL
+ }
+
+ // Input validations.
+ isTun := flags&linux.IFF_TUN != 0
+ isTap := flags&linux.IFF_TAP != 0
+ supportedFlags := uint16(linux.IFF_TUN | linux.IFF_TAP | linux.IFF_NO_PI)
+ if isTap && isTun || !isTap && !isTun || flags&^supportedFlags != 0 {
+ return syserror.EINVAL
+ }
+
+ prefix := "tun"
+ if isTap {
+ prefix = "tap"
+ }
+
+ linkCaps := stack.CapabilityNone
+ if isTap {
+ linkCaps |= stack.CapabilityResolutionRequired
+ }
+
+ endpoint, err := attachOrCreateNIC(s, name, prefix, linkCaps)
+ if err != nil {
+ return syserror.EINVAL
+ }
+
+ d.endpoint = endpoint
+ d.notifyHandle = d.endpoint.AddNotify(d)
+ d.flags = flags
+ return nil
+}
+
+func attachOrCreateNIC(s *stack.Stack, name, prefix string, linkCaps stack.LinkEndpointCapabilities) (*tunEndpoint, error) {
+ for {
+ // 1. Try to attach to an existing NIC.
+ if name != "" {
+ if nic, found := s.GetNICByName(name); found {
+ endpoint, ok := nic.LinkEndpoint().(*tunEndpoint)
+ if !ok {
+ // Not a NIC created by tun device.
+ return nil, syserror.EOPNOTSUPP
+ }
+ if !endpoint.TryIncRef() {
+ // Race detected: NIC got deleted in between.
+ continue
+ }
+ return endpoint, nil
+ }
+ }
+
+ // 2. Creating a new NIC.
+ id := tcpip.NICID(s.UniqueID())
+ endpoint := &tunEndpoint{
+ Endpoint: channel.New(defaultDevOutQueueLen, defaultDevMtu, ""),
+ stack: s,
+ nicID: id,
+ name: name,
+ }
+ endpoint.Endpoint.LinkEPCapabilities = linkCaps
+ if endpoint.name == "" {
+ endpoint.name = fmt.Sprintf("%s%d", prefix, id)
+ }
+ err := s.CreateNICWithOptions(endpoint.nicID, endpoint, stack.NICOptions{
+ Name: endpoint.name,
+ })
+ switch err {
+ case nil:
+ return endpoint, nil
+ case tcpip.ErrDuplicateNICID:
+ // Race detected: A NIC has been created in between.
+ continue
+ default:
+ return nil, syserror.EINVAL
+ }
+ }
+}
+
+// Write inject one inbound packet to the network interface.
+func (d *Device) Write(data []byte) (int64, error) {
+ d.mu.RLock()
+ endpoint := d.endpoint
+ d.mu.RUnlock()
+ if endpoint == nil {
+ return 0, syserror.EBADFD
+ }
+ if !endpoint.IsAttached() {
+ return 0, syserror.EIO
+ }
+
+ dataLen := int64(len(data))
+
+ // Packet information.
+ var pktInfoHdr PacketInfoHeader
+ if !d.hasFlags(linux.IFF_NO_PI) {
+ if len(data) < PacketInfoHeaderSize {
+ // Ignore bad packet.
+ return dataLen, nil
+ }
+ pktInfoHdr = PacketInfoHeader(data[:PacketInfoHeaderSize])
+ data = data[PacketInfoHeaderSize:]
+ }
+
+ // Ethernet header (TAP only).
+ var ethHdr header.Ethernet
+ if d.hasFlags(linux.IFF_TAP) {
+ if len(data) < header.EthernetMinimumSize {
+ // Ignore bad packet.
+ return dataLen, nil
+ }
+ ethHdr = header.Ethernet(data[:header.EthernetMinimumSize])
+ data = data[header.EthernetMinimumSize:]
+ }
+
+ // Try to determine network protocol number, default zero.
+ var protocol tcpip.NetworkProtocolNumber
+ switch {
+ case pktInfoHdr != nil:
+ protocol = pktInfoHdr.Protocol()
+ case ethHdr != nil:
+ protocol = ethHdr.Type()
+ }
+
+ // Try to determine remote link address, default zero.
+ var remote tcpip.LinkAddress
+ switch {
+ case ethHdr != nil:
+ remote = ethHdr.SourceAddress()
+ default:
+ remote = tcpip.LinkAddress(zeroMAC[:])
+ }
+
+ pkt := stack.PacketBuffer{
+ Data: buffer.View(data).ToVectorisedView(),
+ }
+ if ethHdr != nil {
+ pkt.LinkHeader = buffer.View(ethHdr)
+ }
+ endpoint.InjectLinkAddr(protocol, remote, pkt)
+ return dataLen, nil
+}
+
+// Read reads one outgoing packet from the network interface.
+func (d *Device) Read() ([]byte, error) {
+ d.mu.RLock()
+ endpoint := d.endpoint
+ d.mu.RUnlock()
+ if endpoint == nil {
+ return nil, syserror.EBADFD
+ }
+
+ for {
+ info, ok := endpoint.Read()
+ if !ok {
+ return nil, syserror.ErrWouldBlock
+ }
+
+ v, ok := d.encodePkt(&info)
+ if !ok {
+ // Ignore unsupported packet.
+ continue
+ }
+ return v, nil
+ }
+}
+
+// encodePkt encodes packet for fd side.
+func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) {
+ var vv buffer.VectorisedView
+
+ // Packet information.
+ if !d.hasFlags(linux.IFF_NO_PI) {
+ hdr := make(PacketInfoHeader, PacketInfoHeaderSize)
+ hdr.Encode(&PacketInfoFields{
+ Protocol: info.Proto,
+ })
+ vv.AppendView(buffer.View(hdr))
+ }
+
+ // If the packet does not already have link layer header, and the route
+ // does not exist, we can't compute it. This is possibly a raw packet, tun
+ // device doesn't support this at the moment.
+ if info.Pkt.LinkHeader == nil && info.Route.RemoteLinkAddress == "" {
+ return nil, false
+ }
+
+ // Ethernet header (TAP only).
+ if d.hasFlags(linux.IFF_TAP) {
+ // Add ethernet header if not provided.
+ if info.Pkt.LinkHeader == nil {
+ hdr := &header.EthernetFields{
+ SrcAddr: info.Route.LocalLinkAddress,
+ DstAddr: info.Route.RemoteLinkAddress,
+ Type: info.Proto,
+ }
+ if hdr.SrcAddr == "" {
+ hdr.SrcAddr = d.endpoint.LinkAddress()
+ }
+
+ eth := make(header.Ethernet, header.EthernetMinimumSize)
+ eth.Encode(hdr)
+ vv.AppendView(buffer.View(eth))
+ } else {
+ vv.AppendView(info.Pkt.LinkHeader)
+ }
+ }
+
+ // Append upper headers.
+ vv.AppendView(buffer.View(info.Pkt.Header.View()[len(info.Pkt.LinkHeader):]))
+ // Append data payload.
+ vv.Append(info.Pkt.Data)
+
+ return vv.ToView(), true
+}
+
+// Name returns the name of the attached network interface. Empty string if
+// unattached.
+func (d *Device) Name() string {
+ d.mu.RLock()
+ defer d.mu.RUnlock()
+ if d.endpoint != nil {
+ return d.endpoint.name
+ }
+ return ""
+}
+
+// Flags returns the flags set for d. Zero value if unset.
+func (d *Device) Flags() uint16 {
+ d.mu.RLock()
+ defer d.mu.RUnlock()
+ return d.flags
+}
+
+func (d *Device) hasFlags(flags uint16) bool {
+ return d.flags&flags == flags
+}
+
+// Readiness implements watier.Waitable.Readiness.
+func (d *Device) Readiness(mask waiter.EventMask) waiter.EventMask {
+ if mask&waiter.EventIn != 0 {
+ d.mu.RLock()
+ endpoint := d.endpoint
+ d.mu.RUnlock()
+ if endpoint != nil && endpoint.NumQueued() == 0 {
+ mask &= ^waiter.EventIn
+ }
+ }
+ return mask & (waiter.EventIn | waiter.EventOut)
+}
+
+// WriteNotify implements channel.Notification.WriteNotify.
+func (d *Device) WriteNotify() {
+ d.Notify(waiter.EventIn)
+}
+
+// tunEndpoint is the link endpoint for the NIC created by the tun device.
+//
+// It is ref-counted as multiple opening files can attach to the same NIC.
+// The last owner is responsible for deleting the NIC.
+type tunEndpoint struct {
+ *channel.Endpoint
+
+ refs.AtomicRefCount
+
+ stack *stack.Stack
+ nicID tcpip.NICID
+ name string
+}
+
+// DecRef decrements refcount of e, removes NIC if refcount goes to 0.
+func (e *tunEndpoint) DecRef() {
+ e.DecRefWithDestructor(func() {
+ e.stack.RemoveNIC(e.nicID)
+ })
+}
diff --git a/pkg/tcpip/link/tun/protocol.go b/pkg/tcpip/link/tun/protocol.go
new file mode 100644
index 000000000..89d9d91a9
--- /dev/null
+++ b/pkg/tcpip/link/tun/protocol.go
@@ -0,0 +1,56 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tun
+
+import (
+ "encoding/binary"
+
+ "gvisor.dev/gvisor/pkg/tcpip"
+)
+
+const (
+ // PacketInfoHeaderSize is the size of the packet information header.
+ PacketInfoHeaderSize = 4
+
+ offsetFlags = 0
+ offsetProtocol = 2
+)
+
+// PacketInfoFields contains fields sent through the wire if IFF_NO_PI flag is
+// not set.
+type PacketInfoFields struct {
+ Flags uint16
+ Protocol tcpip.NetworkProtocolNumber
+}
+
+// PacketInfoHeader is the wire representation of the packet information sent if
+// IFF_NO_PI flag is not set.
+type PacketInfoHeader []byte
+
+// Encode encodes f into h.
+func (h PacketInfoHeader) Encode(f *PacketInfoFields) {
+ binary.BigEndian.PutUint16(h[offsetFlags:][:2], f.Flags)
+ binary.BigEndian.PutUint16(h[offsetProtocol:][:2], uint16(f.Protocol))
+}
+
+// Flags returns the flag field in h.
+func (h PacketInfoHeader) Flags() uint16 {
+ return binary.BigEndian.Uint16(h[offsetFlags:])
+}
+
+// Protocol returns the protocol field in h.
+func (h PacketInfoHeader) Protocol() tcpip.NetworkProtocolNumber {
+ return tcpip.NetworkProtocolNumber(binary.BigEndian.Uint16(h[offsetProtocol:]))
+}
diff --git a/pkg/tcpip/link/waitable/BUILD b/pkg/tcpip/link/waitable/BUILD
index 0746dc8ec..0956d2c65 100644
--- a/pkg/tcpip/link/waitable/BUILD
+++ b/pkg/tcpip/link/waitable/BUILD
@@ -1,5 +1,4 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test")
package(licenses = ["notice"])
@@ -8,10 +7,7 @@ go_library(
srcs = [
"waitable.go",
],
- importpath = "gvisor.dev/gvisor/pkg/tcpip/link/waitable",
- visibility = [
- "//visibility:public",
- ],
+ visibility = ["//visibility:public"],
deps = [
"//pkg/gate",
"//pkg/tcpip",
@@ -25,7 +21,7 @@ go_test(
srcs = [
"waitable_test.go",
],
- embed = [":waitable"],
+ library = ":waitable",
deps = [
"//pkg/tcpip",
"//pkg/tcpip/buffer",
diff --git a/pkg/tcpip/link/waitable/waitable.go b/pkg/tcpip/link/waitable/waitable.go
index a04fc1062..2b3741276 100644
--- a/pkg/tcpip/link/waitable/waitable.go
+++ b/pkg/tcpip/link/waitable/waitable.go
@@ -50,12 +50,12 @@ func New(lower stack.LinkEndpoint) *Endpoint {
// It is called by the link-layer endpoint being wrapped when a packet arrives,
// and only forwards to the actual dispatcher if Wait or WaitDispatch haven't
// been called.
-func (e *Endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) {
+func (e *Endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
if !e.dispatchGate.Enter() {
return
}
- e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv, linkHeader)
+ e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, pkt)
e.dispatchGate.Leave()
}
@@ -99,12 +99,12 @@ func (e *Endpoint) LinkAddress() tcpip.LinkAddress {
// WritePacket implements stack.LinkEndpoint.WritePacket. It is called by
// higher-level protocols to write packets. It only forwards packets to the
// lower endpoint if Wait or WaitWrite haven't been called.
-func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
if !e.writeGate.Enter() {
return nil
}
- err := e.lower.WritePacket(r, gso, hdr, payload, protocol)
+ err := e.lower.WritePacket(r, gso, protocol, pkt)
e.writeGate.Leave()
return err
}
@@ -112,23 +112,23 @@ func (e *Endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
// WritePackets implements stack.LinkEndpoint.WritePackets. It is called by
// higher-level protocols to write packets. It only forwards packets to the
// lower endpoint if Wait or WaitWrite haven't been called.
-func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, hdrs []stack.PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
if !e.writeGate.Enter() {
- return len(hdrs), nil
+ return pkts.Len(), nil
}
- n, err := e.lower.WritePackets(r, gso, hdrs, payload, protocol)
+ n, err := e.lower.WritePackets(r, gso, pkts, protocol)
e.writeGate.Leave()
return n, err
}
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
-func (e *Endpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
+func (e *Endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
if !e.writeGate.Enter() {
return nil
}
- err := e.lower.WriteRawPacket(packet)
+ err := e.lower.WriteRawPacket(vv)
e.writeGate.Leave()
return err
}
diff --git a/pkg/tcpip/link/waitable/waitable_test.go b/pkg/tcpip/link/waitable/waitable_test.go
index 5f0f8fa2d..54eb5322b 100644
--- a/pkg/tcpip/link/waitable/waitable_test.go
+++ b/pkg/tcpip/link/waitable/waitable_test.go
@@ -35,7 +35,7 @@ type countedEndpoint struct {
dispatcher stack.NetworkDispatcher
}
-func (e *countedEndpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView, linkHeader buffer.View) {
+func (e *countedEndpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) {
e.dispatchCount++
}
@@ -65,18 +65,18 @@ func (e *countedEndpoint) LinkAddress() tcpip.LinkAddress {
return e.linkAddr
}
-func (e *countedEndpoint) WritePacket(r *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error {
+func (e *countedEndpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt stack.PacketBuffer) *tcpip.Error {
e.writeCount++
return nil
}
// WritePackets implements stack.LinkEndpoint.WritePackets.
-func (e *countedEndpoint) WritePackets(r *stack.Route, _ *stack.GSO, hdrs []stack.PacketDescriptor, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
- e.writeCount += len(hdrs)
- return len(hdrs), nil
+func (e *countedEndpoint) WritePackets(r *stack.Route, _ *stack.GSO, pkts stack.PacketBufferList, protocol tcpip.NetworkProtocolNumber) (int, *tcpip.Error) {
+ e.writeCount += pkts.Len()
+ return pkts.Len(), nil
}
-func (e *countedEndpoint) WriteRawPacket(packet buffer.VectorisedView) *tcpip.Error {
+func (e *countedEndpoint) WriteRawPacket(buffer.VectorisedView) *tcpip.Error {
e.writeCount++
return nil
}
@@ -89,21 +89,21 @@ func TestWaitWrite(t *testing.T) {
wep := New(ep)
// Write and check that it goes through.
- wep.WritePacket(nil, nil /* gso */, buffer.Prependable{}, buffer.VectorisedView{}, 0)
+ wep.WritePacket(nil, nil /* gso */, 0, stack.PacketBuffer{})
if want := 1; ep.writeCount != want {
t.Fatalf("Unexpected writeCount: got=%v, want=%v", ep.writeCount, want)
}
// Wait on dispatches, then try to write. It must go through.
wep.WaitDispatch()
- wep.WritePacket(nil, nil /* gso */, buffer.Prependable{}, buffer.VectorisedView{}, 0)
+ wep.WritePacket(nil, nil /* gso */, 0, stack.PacketBuffer{})
if want := 2; ep.writeCount != want {
t.Fatalf("Unexpected writeCount: got=%v, want=%v", ep.writeCount, want)
}
// Wait on writes, then try to write. It must not go through.
wep.WaitWrite()
- wep.WritePacket(nil, nil /* gso */, buffer.Prependable{}, buffer.VectorisedView{}, 0)
+ wep.WritePacket(nil, nil /* gso */, 0, stack.PacketBuffer{})
if want := 2; ep.writeCount != want {
t.Fatalf("Unexpected writeCount: got=%v, want=%v", ep.writeCount, want)
}
@@ -120,21 +120,21 @@ func TestWaitDispatch(t *testing.T) {
}
// Dispatch and check that it goes through.
- ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, buffer.VectorisedView{}, buffer.View{})
+ ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, stack.PacketBuffer{})
if want := 1; ep.dispatchCount != want {
t.Fatalf("Unexpected dispatchCount: got=%v, want=%v", ep.dispatchCount, want)
}
// Wait on writes, then try to dispatch. It must go through.
wep.WaitWrite()
- ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, buffer.VectorisedView{}, buffer.View{})
+ ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, stack.PacketBuffer{})
if want := 2; ep.dispatchCount != want {
t.Fatalf("Unexpected dispatchCount: got=%v, want=%v", ep.dispatchCount, want)
}
// Wait on dispatches, then try to dispatch. It must not go through.
wep.WaitDispatch()
- ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, buffer.VectorisedView{}, buffer.View{})
+ ep.dispatcher.DeliverNetworkPacket(ep, "", "", 0, stack.PacketBuffer{})
if want := 2; ep.dispatchCount != want {
t.Fatalf("Unexpected dispatchCount: got=%v, want=%v", ep.dispatchCount, want)
}