diff options
author | gVisor bot <gvisor-bot@google.com> | 2019-06-02 06:44:55 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2019-06-02 06:44:55 +0000 |
commit | ceb0d792f328d1fc0692197d8856a43c3936a571 (patch) | |
tree | 83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/tcpip/link | |
parent | deb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff) | |
parent | 216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff) |
Merge 216da0b7 (automated)
Diffstat (limited to 'pkg/tcpip/link')
-rw-r--r-- | pkg/tcpip/link/fdbased/endpoint.go | 372 | ||||
-rw-r--r-- | pkg/tcpip/link/fdbased/endpoint_unsafe.go | 32 | ||||
-rwxr-xr-x | pkg/tcpip/link/fdbased/fdbased_state_autogen.go | 4 | ||||
-rw-r--r-- | pkg/tcpip/link/fdbased/mmap.go | 25 | ||||
-rw-r--r-- | pkg/tcpip/link/fdbased/mmap_amd64.go | 194 | ||||
-rw-r--r-- | pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go | 84 | ||||
-rw-r--r-- | pkg/tcpip/link/fdbased/packet_dispatchers.go | 309 | ||||
-rw-r--r-- | pkg/tcpip/link/loopback/loopback.go | 87 | ||||
-rwxr-xr-x | pkg/tcpip/link/loopback/loopback_state_autogen.go | 4 | ||||
-rw-r--r-- | pkg/tcpip/link/rawfile/blockingpoll_amd64.s | 40 | ||||
-rw-r--r-- | pkg/tcpip/link/rawfile/blockingpoll_amd64_unsafe.go | 60 | ||||
-rw-r--r-- | pkg/tcpip/link/rawfile/blockingpoll_unsafe.go | 29 | ||||
-rw-r--r-- | pkg/tcpip/link/rawfile/errors.go | 70 | ||||
-rwxr-xr-x | pkg/tcpip/link/rawfile/rawfile_state_autogen.go | 4 | ||||
-rw-r--r-- | pkg/tcpip/link/rawfile/rawfile_unsafe.go | 182 | ||||
-rw-r--r-- | pkg/tcpip/link/sniffer/pcap.go | 66 | ||||
-rw-r--r-- | pkg/tcpip/link/sniffer/sniffer.go | 408 | ||||
-rwxr-xr-x | pkg/tcpip/link/sniffer/sniffer_state_autogen.go | 4 |
18 files changed, 1974 insertions, 0 deletions
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go new file mode 100644 index 000000000..1f889c2a0 --- /dev/null +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -0,0 +1,372 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +// Package fdbased provides the implemention of data-link layer endpoints +// backed by boundary-preserving file descriptors (e.g., TUN devices, +// seqpacket/datagram sockets). +// +// FD based endpoints can be used in the networking stack by calling New() to +// create a new endpoint, and then passing it as an argument to +// Stack.CreateNIC(). +package fdbased + +import ( + "fmt" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" +) + +// linkDispatcher reads packets from the link FD and dispatches them to the +// NetworkDispatcher. +type linkDispatcher interface { + dispatch() (bool, *tcpip.Error) +} + +// PacketDispatchMode are the various supported methods of receiving and +// dispatching packets from the underlying FD. +type PacketDispatchMode int + +const ( + // Readv is the default dispatch mode and is the least performant of the + // dispatch options but the one that is supported by all underlying FD + // types. + Readv PacketDispatchMode = iota + // RecvMMsg enables use of recvmmsg() syscall instead of readv() to + // read inbound packets. This reduces # of syscalls needed to process + // packets. + // + // NOTE: recvmmsg() is only supported for sockets, so if the underlying + // FD is not a socket then the code will still fall back to the readv() + // path. + RecvMMsg + // PacketMMap enables use of PACKET_RX_RING to receive packets from the + // NIC. PacketMMap requires that the underlying FD be an AF_PACKET. The + // primary use-case for this is runsc which uses an AF_PACKET FD to + // receive packets from the veth device. + PacketMMap +) + +type endpoint struct { + // fd is the file descriptor used to send and receive packets. + fd int + + // mtu (maximum transmission unit) is the maximum size of a packet. + mtu uint32 + + // hdrSize specifies the link-layer header size. If set to 0, no header + // is added/removed; otherwise an ethernet header is used. + hdrSize int + + // addr is the address of the endpoint. + addr tcpip.LinkAddress + + // caps holds the endpoint capabilities. + caps stack.LinkEndpointCapabilities + + // closed is a function to be called when the FD's peer (if any) closes + // its end of the communication pipe. + closed func(*tcpip.Error) + + inboundDispatcher linkDispatcher + dispatcher stack.NetworkDispatcher + + // packetDispatchMode controls the packet dispatcher used by this + // endpoint. + packetDispatchMode PacketDispatchMode + + // gsoMaxSize is the maximum GSO packet size. It is zero if GSO is + // disabled. + gsoMaxSize uint32 +} + +// Options specify the details about the fd-based endpoint to be created. +type Options struct { + FD int + MTU uint32 + EthernetHeader bool + ClosedFunc func(*tcpip.Error) + Address tcpip.LinkAddress + SaveRestore bool + DisconnectOk bool + GSOMaxSize uint32 + PacketDispatchMode PacketDispatchMode + TXChecksumOffload bool + RXChecksumOffload bool +} + +// New creates a new fd-based endpoint. +// +// Makes fd non-blocking, but does not take ownership of fd, which must remain +// open for the lifetime of the returned endpoint. +func New(opts *Options) (tcpip.LinkEndpointID, error) { + if err := syscall.SetNonblock(opts.FD, true); err != nil { + return 0, fmt.Errorf("syscall.SetNonblock(%v) failed: %v", opts.FD, err) + } + + caps := stack.LinkEndpointCapabilities(0) + if opts.RXChecksumOffload { + caps |= stack.CapabilityRXChecksumOffload + } + + if opts.TXChecksumOffload { + caps |= stack.CapabilityTXChecksumOffload + } + + hdrSize := 0 + if opts.EthernetHeader { + hdrSize = header.EthernetMinimumSize + caps |= stack.CapabilityResolutionRequired + } + + if opts.SaveRestore { + caps |= stack.CapabilitySaveRestore + } + + if opts.DisconnectOk { + caps |= stack.CapabilityDisconnectOk + } + + e := &endpoint{ + fd: opts.FD, + mtu: opts.MTU, + caps: caps, + closed: opts.ClosedFunc, + addr: opts.Address, + hdrSize: hdrSize, + packetDispatchMode: opts.PacketDispatchMode, + } + + isSocket, err := isSocketFD(e.fd) + if err != nil { + return 0, err + } + if isSocket { + if opts.GSOMaxSize != 0 { + e.caps |= stack.CapabilityGSO + e.gsoMaxSize = opts.GSOMaxSize + } + } + e.inboundDispatcher, err = createInboundDispatcher(e, isSocket) + if err != nil { + return 0, fmt.Errorf("createInboundDispatcher(...) = %v", err) + } + + return stack.RegisterLinkEndpoint(e), nil +} + +func createInboundDispatcher(e *endpoint, isSocket bool) (linkDispatcher, error) { + // By default use the readv() dispatcher as it works with all kinds of + // FDs (tap/tun/unix domain sockets and af_packet). + inboundDispatcher, err := newReadVDispatcher(e.fd, e) + if err != nil { + return nil, fmt.Errorf("newReadVDispatcher(%d, %+v) = %v", e.fd, e, err) + } + + if isSocket { + switch e.packetDispatchMode { + case PacketMMap: + inboundDispatcher, err = newPacketMMapDispatcher(e.fd, e) + if err != nil { + return nil, fmt.Errorf("newPacketMMapDispatcher(%d, %+v) = %v", e.fd, e, err) + } + case RecvMMsg: + // If the provided FD is a socket then we optimize + // packet reads by using recvmmsg() instead of read() to + // read packets in a batch. + inboundDispatcher, err = newRecvMMsgDispatcher(e.fd, e) + if err != nil { + return nil, fmt.Errorf("newRecvMMsgDispatcher(%d, %+v) = %v", e.fd, e, err) + } + } + } + return inboundDispatcher, nil +} + +func isSocketFD(fd int) (bool, error) { + var stat syscall.Stat_t + if err := syscall.Fstat(fd, &stat); err != nil { + return false, fmt.Errorf("syscall.Fstat(%v,...) failed: %v", fd, err) + } + return (stat.Mode & syscall.S_IFSOCK) == syscall.S_IFSOCK, nil +} + +// Attach launches the goroutine that reads packets from the file descriptor and +// dispatches them via the provided dispatcher. +func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { + e.dispatcher = dispatcher + // Link endpoints are not savable. When transportation endpoints are + // saved, they stop sending outgoing packets and all incoming packets + // are rejected. + go e.dispatchLoop() // S/R-SAFE: See above. +} + +// IsAttached implements stack.LinkEndpoint.IsAttached. +func (e *endpoint) IsAttached() bool { + return e.dispatcher != nil +} + +// MTU implements stack.LinkEndpoint.MTU. It returns the value initialized +// during construction. +func (e *endpoint) MTU() uint32 { + return e.mtu +} + +// Capabilities implements stack.LinkEndpoint.Capabilities. +func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities { + return e.caps +} + +// MaxHeaderLength returns the maximum size of the link-layer header. +func (e *endpoint) MaxHeaderLength() uint16 { + return uint16(e.hdrSize) +} + +// LinkAddress returns the link address of this endpoint. +func (e *endpoint) LinkAddress() tcpip.LinkAddress { + return e.addr +} + +// virtioNetHdr is declared in linux/virtio_net.h. +type virtioNetHdr struct { + flags uint8 + gsoType uint8 + hdrLen uint16 + gsoSize uint16 + csumStart uint16 + csumOffset uint16 +} + +// These constants are declared in linux/virtio_net.h. +const ( + _VIRTIO_NET_HDR_F_NEEDS_CSUM = 1 + + _VIRTIO_NET_HDR_GSO_TCPV4 = 1 + _VIRTIO_NET_HDR_GSO_TCPV6 = 4 +) + +// WritePacket writes outbound packets to the file descriptor. If it is not +// currently writable, the packet is dropped. +func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { + if e.hdrSize > 0 { + // Add ethernet header if needed. + eth := header.Ethernet(hdr.Prepend(header.EthernetMinimumSize)) + ethHdr := &header.EthernetFields{ + DstAddr: r.RemoteLinkAddress, + Type: protocol, + } + + // Preserve the src address if it's set in the route. + if r.LocalLinkAddress != "" { + ethHdr.SrcAddr = r.LocalLinkAddress + } else { + ethHdr.SrcAddr = e.addr + } + eth.Encode(ethHdr) + } + + if e.Capabilities()&stack.CapabilityGSO != 0 { + vnetHdr := virtioNetHdr{} + vnetHdrBuf := vnetHdrToByteSlice(&vnetHdr) + if gso != nil { + vnetHdr.hdrLen = uint16(hdr.UsedLength()) + if gso.NeedsCsum { + vnetHdr.flags = _VIRTIO_NET_HDR_F_NEEDS_CSUM + vnetHdr.csumStart = header.EthernetMinimumSize + gso.L3HdrLen + vnetHdr.csumOffset = gso.CsumOffset + } + if gso.Type != stack.GSONone && uint16(payload.Size()) > gso.MSS { + switch gso.Type { + case stack.GSOTCPv4: + vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV4 + case stack.GSOTCPv6: + vnetHdr.gsoType = _VIRTIO_NET_HDR_GSO_TCPV6 + default: + panic(fmt.Sprintf("Unknown gso type: %v", gso.Type)) + } + vnetHdr.gsoSize = gso.MSS + } + } + + return rawfile.NonBlockingWrite3(e.fd, vnetHdrBuf, hdr.View(), payload.ToView()) + } + + if payload.Size() == 0 { + return rawfile.NonBlockingWrite(e.fd, hdr.View()) + } + + return rawfile.NonBlockingWrite3(e.fd, hdr.View(), payload.ToView(), nil) +} + +// WriteRawPacket writes a raw packet directly to the file descriptor. +func (e *endpoint) WriteRawPacket(dest tcpip.Address, packet []byte) *tcpip.Error { + return rawfile.NonBlockingWrite(e.fd, packet) +} + +// dispatchLoop reads packets from the file descriptor in a loop and dispatches +// them to the network stack. +func (e *endpoint) dispatchLoop() *tcpip.Error { + for { + cont, err := e.inboundDispatcher.dispatch() + if err != nil || !cont { + if e.closed != nil { + e.closed(err) + } + return err + } + } +} + +// GSOMaxSize returns the maximum GSO packet size. +func (e *endpoint) GSOMaxSize() uint32 { + return e.gsoMaxSize +} + +// InjectableEndpoint is an injectable fd-based endpoint. The endpoint writes +// to the FD, but does not read from it. All reads come from injected packets. +type InjectableEndpoint struct { + endpoint + + dispatcher stack.NetworkDispatcher +} + +// Attach saves the stack network-layer dispatcher for use later when packets +// are injected. +func (e *InjectableEndpoint) Attach(dispatcher stack.NetworkDispatcher) { + e.dispatcher = dispatcher +} + +// Inject injects an inbound packet. +func (e *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { + e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv) +} + +// NewInjectable creates a new fd-based InjectableEndpoint. +func NewInjectable(fd int, mtu uint32, capabilities stack.LinkEndpointCapabilities) (tcpip.LinkEndpointID, *InjectableEndpoint) { + syscall.SetNonblock(fd, true) + + e := &InjectableEndpoint{endpoint: endpoint{ + fd: fd, + mtu: mtu, + caps: capabilities, + }} + + return stack.RegisterLinkEndpoint(e), e +} diff --git a/pkg/tcpip/link/fdbased/endpoint_unsafe.go b/pkg/tcpip/link/fdbased/endpoint_unsafe.go new file mode 100644 index 000000000..97a477b61 --- /dev/null +++ b/pkg/tcpip/link/fdbased/endpoint_unsafe.go @@ -0,0 +1,32 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package fdbased + +import ( + "reflect" + "unsafe" +) + +const virtioNetHdrSize = int(unsafe.Sizeof(virtioNetHdr{})) + +func vnetHdrToByteSlice(hdr *virtioNetHdr) (slice []byte) { + sh := (*reflect.SliceHeader)(unsafe.Pointer(&slice)) + sh.Data = uintptr(unsafe.Pointer(hdr)) + sh.Len = virtioNetHdrSize + sh.Cap = virtioNetHdrSize + return +} diff --git a/pkg/tcpip/link/fdbased/fdbased_state_autogen.go b/pkg/tcpip/link/fdbased/fdbased_state_autogen.go new file mode 100755 index 000000000..0555db528 --- /dev/null +++ b/pkg/tcpip/link/fdbased/fdbased_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package fdbased + diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go new file mode 100644 index 000000000..6b7f2a185 --- /dev/null +++ b/pkg/tcpip/link/fdbased/mmap.go @@ -0,0 +1,25 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !linux !amd64 + +package fdbased + +import "gvisor.googlesource.com/gvisor/pkg/tcpip" + +// Stubbed out version for non-linux/non-amd64 platforms. + +func newPacketMMapDispatcher(fd int, e *endpoint) (linkDispatcher, *tcpip.Error) { + return nil, nil +} diff --git a/pkg/tcpip/link/fdbased/mmap_amd64.go b/pkg/tcpip/link/fdbased/mmap_amd64.go new file mode 100644 index 000000000..1c2d8c468 --- /dev/null +++ b/pkg/tcpip/link/fdbased/mmap_amd64.go @@ -0,0 +1,194 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux,amd64 + +package fdbased + +import ( + "encoding/binary" + "syscall" + + "golang.org/x/sys/unix" + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" +) + +const ( + tPacketAlignment = uintptr(16) + tpStatusKernel = 0 + tpStatusUser = 1 + tpStatusCopy = 2 + tpStatusLosing = 4 +) + +// We overallocate the frame size to accommodate space for the +// TPacketHdr+RawSockAddrLinkLayer+MAC header and any padding. +// +// Memory allocated for the ring buffer: tpBlockSize * tpBlockNR = 2 MiB +// +// NOTE: +// Frames need to be aligned at 16 byte boundaries. +// BlockSize needs to be page aligned. +// +// For details see PACKET_MMAP setting constraints in +// https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt +const ( + tpFrameSize = 65536 + 128 + tpBlockSize = tpFrameSize * 32 + tpBlockNR = 1 + tpFrameNR = (tpBlockSize * tpBlockNR) / tpFrameSize +) + +// tPacketAlign aligns the pointer v at a tPacketAlignment boundary. Direct +// translation of the TPACKET_ALIGN macro in <linux/if_packet.h>. +func tPacketAlign(v uintptr) uintptr { + return (v + tPacketAlignment - 1) & uintptr(^(tPacketAlignment - 1)) +} + +// tPacketReq is the tpacket_req structure as described in +// https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt +type tPacketReq struct { + tpBlockSize uint32 + tpBlockNR uint32 + tpFrameSize uint32 + tpFrameNR uint32 +} + +// tPacketHdr is tpacket_hdr structure as described in <linux/if_packet.h> +type tPacketHdr []byte + +const ( + tpStatusOffset = 0 + tpLenOffset = 8 + tpSnapLenOffset = 12 + tpMacOffset = 16 + tpNetOffset = 18 + tpSecOffset = 20 + tpUSecOffset = 24 +) + +func (t tPacketHdr) tpLen() uint32 { + return binary.LittleEndian.Uint32(t[tpLenOffset:]) +} + +func (t tPacketHdr) tpSnapLen() uint32 { + return binary.LittleEndian.Uint32(t[tpSnapLenOffset:]) +} + +func (t tPacketHdr) tpMac() uint16 { + return binary.LittleEndian.Uint16(t[tpMacOffset:]) +} + +func (t tPacketHdr) tpNet() uint16 { + return binary.LittleEndian.Uint16(t[tpNetOffset:]) +} + +func (t tPacketHdr) tpSec() uint32 { + return binary.LittleEndian.Uint32(t[tpSecOffset:]) +} + +func (t tPacketHdr) tpUSec() uint32 { + return binary.LittleEndian.Uint32(t[tpUSecOffset:]) +} + +func (t tPacketHdr) Payload() []byte { + return t[uint32(t.tpMac()) : uint32(t.tpMac())+t.tpSnapLen()] +} + +// packetMMapDispatcher uses PACKET_RX_RING's to read/dispatch inbound packets. +// See: mmap_amd64_unsafe.go for implementation details. +type packetMMapDispatcher struct { + // fd is the file descriptor used to send and receive packets. + fd int + + // e is the endpoint this dispatcher is attached to. + e *endpoint + + // ringBuffer is only used when PacketMMap dispatcher is used and points + // to the start of the mmapped PACKET_RX_RING buffer. + ringBuffer []byte + + // ringOffset is the current offset into the ring buffer where the next + // inbound packet will be placed by the kernel. + ringOffset int +} + +func (d *packetMMapDispatcher) readMMappedPacket() ([]byte, *tcpip.Error) { + hdr := tPacketHdr(d.ringBuffer[d.ringOffset*tpFrameSize:]) + for hdr.tpStatus()&tpStatusUser == 0 { + event := rawfile.PollEvent{ + FD: int32(d.fd), + Events: unix.POLLIN | unix.POLLERR, + } + if _, errno := rawfile.BlockingPoll(&event, 1, -1); errno != 0 { + if errno == syscall.EINTR { + continue + } + return nil, rawfile.TranslateErrno(errno) + } + if hdr.tpStatus()&tpStatusCopy != 0 { + // This frame is truncated so skip it after flipping the + // buffer to the kernel. + hdr.setTPStatus(tpStatusKernel) + d.ringOffset = (d.ringOffset + 1) % tpFrameNR + hdr = (tPacketHdr)(d.ringBuffer[d.ringOffset*tpFrameSize:]) + continue + } + } + + // Copy out the packet from the mmapped frame to a locally owned buffer. + pkt := make([]byte, hdr.tpSnapLen()) + copy(pkt, hdr.Payload()) + // Release packet to kernel. + hdr.setTPStatus(tpStatusKernel) + d.ringOffset = (d.ringOffset + 1) % tpFrameNR + return pkt, nil +} + +// dispatch reads packets from an mmaped ring buffer and dispatches them to the +// network stack. +func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) { + pkt, err := d.readMMappedPacket() + if err != nil { + return false, err + } + var ( + p tcpip.NetworkProtocolNumber + remote, local tcpip.LinkAddress + ) + if d.e.hdrSize > 0 { + eth := header.Ethernet(pkt) + p = eth.Type() + remote = eth.SourceAddress() + local = eth.DestinationAddress() + } else { + // We don't get any indication of what the packet is, so try to guess + // if it's an IPv4 or IPv6 packet. + switch header.IPVersion(pkt) { + case header.IPv4Version: + p = header.IPv4ProtocolNumber + case header.IPv6Version: + p = header.IPv6ProtocolNumber + default: + return true, nil + } + } + + pkt = pkt[d.e.hdrSize:] + d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, buffer.NewVectorisedView(len(pkt), []buffer.View{buffer.View(pkt)})) + return true, nil +} diff --git a/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go b/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go new file mode 100644 index 000000000..47cb1d1cc --- /dev/null +++ b/pkg/tcpip/link/fdbased/mmap_amd64_unsafe.go @@ -0,0 +1,84 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux,amd64 + +package fdbased + +import ( + "fmt" + "sync/atomic" + "syscall" + "unsafe" + + "golang.org/x/sys/unix" +) + +// tPacketHdrlen is the TPACKET_HDRLEN variable defined in <linux/if_packet.h>. +var tPacketHdrlen = tPacketAlign(unsafe.Sizeof(tPacketHdr{}) + unsafe.Sizeof(syscall.RawSockaddrLinklayer{})) + +// tpStatus returns the frame status field. +// The status is concurrently updated by the kernel as a result we must +// use atomic operations to prevent races. +func (t tPacketHdr) tpStatus() uint32 { + hdr := unsafe.Pointer(&t[0]) + statusPtr := unsafe.Pointer(uintptr(hdr) + uintptr(tpStatusOffset)) + return atomic.LoadUint32((*uint32)(statusPtr)) +} + +// setTPStatus set's the frame status to the provided status. +// The status is concurrently updated by the kernel as a result we must +// use atomic operations to prevent races. +func (t tPacketHdr) setTPStatus(status uint32) { + hdr := unsafe.Pointer(&t[0]) + statusPtr := unsafe.Pointer(uintptr(hdr) + uintptr(tpStatusOffset)) + atomic.StoreUint32((*uint32)(statusPtr), status) +} + +func newPacketMMapDispatcher(fd int, e *endpoint) (linkDispatcher, error) { + d := &packetMMapDispatcher{ + fd: fd, + e: e, + } + pageSize := unix.Getpagesize() + if tpBlockSize%pageSize != 0 { + return nil, fmt.Errorf("tpBlockSize: %d is not page aligned, pagesize: %d", tpBlockSize, pageSize) + } + tReq := tPacketReq{ + tpBlockSize: uint32(tpBlockSize), + tpBlockNR: uint32(tpBlockNR), + tpFrameSize: uint32(tpFrameSize), + tpFrameNR: uint32(tpFrameNR), + } + // Setup PACKET_RX_RING. + if err := setsockopt(d.fd, syscall.SOL_PACKET, syscall.PACKET_RX_RING, unsafe.Pointer(&tReq), unsafe.Sizeof(tReq)); err != nil { + return nil, fmt.Errorf("failed to enable PACKET_RX_RING: %v", err) + } + // Let's mmap the blocks. + sz := tpBlockSize * tpBlockNR + buf, err := syscall.Mmap(d.fd, 0, sz, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED) + if err != nil { + return nil, fmt.Errorf("syscall.Mmap(...,0, %v, ...) failed = %v", sz, err) + } + d.ringBuffer = buf + return d, nil +} + +func setsockopt(fd, level, name int, val unsafe.Pointer, vallen uintptr) error { + if _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT, uintptr(fd), uintptr(level), uintptr(name), uintptr(val), vallen, 0); errno != 0 { + return error(errno) + } + + return nil +} diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go new file mode 100644 index 000000000..1ae0e3359 --- /dev/null +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -0,0 +1,309 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package fdbased + +import ( + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/link/rawfile" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" +) + +// BufConfig defines the shape of the vectorised view used to read packets from the NIC. +var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768} + +// readVDispatcher uses readv() system call to read inbound packets and +// dispatches them. +type readVDispatcher struct { + // fd is the file descriptor used to send and receive packets. + fd int + + // e is the endpoint this dispatcher is attached to. + e *endpoint + + // views are the actual buffers that hold the packet contents. + views []buffer.View + + // iovecs are initialized with base pointers/len of the corresponding + // entries in the views defined above, except when GSO is enabled then + // the first iovec points to a buffer for the vnet header which is + // stripped before the views are passed up the stack for further + // processing. + iovecs []syscall.Iovec +} + +func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) { + d := &readVDispatcher{fd: fd, e: e} + d.views = make([]buffer.View, len(BufConfig)) + iovLen := len(BufConfig) + if d.e.Capabilities()&stack.CapabilityGSO != 0 { + iovLen++ + } + d.iovecs = make([]syscall.Iovec, iovLen) + return d, nil +} + +func (d *readVDispatcher) allocateViews(bufConfig []int) { + var vnetHdr [virtioNetHdrSize]byte + vnetHdrOff := 0 + if d.e.Capabilities()&stack.CapabilityGSO != 0 { + // The kernel adds virtioNetHdr before each packet, but + // we don't use it, so so we allocate a buffer for it, + // add it in iovecs but don't add it in a view. + d.iovecs[0] = syscall.Iovec{ + Base: &vnetHdr[0], + Len: uint64(virtioNetHdrSize), + } + vnetHdrOff++ + } + for i := 0; i < len(bufConfig); i++ { + if d.views[i] != nil { + break + } + b := buffer.NewView(bufConfig[i]) + d.views[i] = b + d.iovecs[i+vnetHdrOff] = syscall.Iovec{ + Base: &b[0], + Len: uint64(len(b)), + } + } +} + +func (d *readVDispatcher) capViews(n int, buffers []int) int { + c := 0 + for i, s := range buffers { + c += s + if c >= n { + d.views[i].CapLength(s - (c - n)) + return i + 1 + } + } + return len(buffers) +} + +// dispatch reads one packet from the file descriptor and dispatches it. +func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) { + d.allocateViews(BufConfig) + + n, err := rawfile.BlockingReadv(d.fd, d.iovecs) + if err != nil { + return false, err + } + if d.e.Capabilities()&stack.CapabilityGSO != 0 { + // Skip virtioNetHdr which is added before each packet, it + // isn't used and it isn't in a view. + n -= virtioNetHdrSize + } + if n <= d.e.hdrSize { + return false, nil + } + + var ( + p tcpip.NetworkProtocolNumber + remote, local tcpip.LinkAddress + ) + if d.e.hdrSize > 0 { + eth := header.Ethernet(d.views[0]) + p = eth.Type() + remote = eth.SourceAddress() + local = eth.DestinationAddress() + } else { + // We don't get any indication of what the packet is, so try to guess + // if it's an IPv4 or IPv6 packet. + switch header.IPVersion(d.views[0]) { + case header.IPv4Version: + p = header.IPv4ProtocolNumber + case header.IPv6Version: + p = header.IPv6ProtocolNumber + default: + return true, nil + } + } + + used := d.capViews(n, BufConfig) + vv := buffer.NewVectorisedView(n, d.views[:used]) + vv.TrimFront(d.e.hdrSize) + + d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv) + + // Prepare e.views for another packet: release used views. + for i := 0; i < used; i++ { + d.views[i] = nil + } + + return true, nil +} + +// recvMMsgDispatcher uses the recvmmsg system call to read inbound packets and +// dispatches them. +type recvMMsgDispatcher struct { + // fd is the file descriptor used to send and receive packets. + fd int + + // e is the endpoint this dispatcher is attached to. + e *endpoint + + // views is an array of array of buffers that contain packet contents. + views [][]buffer.View + + // iovecs is an array of array of iovec records where each iovec base + // pointer and length are initialzed to the corresponding view above, + // except when GSO is neabled then the first iovec in each array of + // iovecs points to a buffer for the vnet header which is stripped + // before the views are passed up the stack for further processing. + iovecs [][]syscall.Iovec + + // msgHdrs is an array of MMsgHdr objects where each MMsghdr is used to + // reference an array of iovecs in the iovecs field defined above. This + // array is passed as the parameter to recvmmsg call to retrieve + // potentially more than 1 packet per syscall. + msgHdrs []rawfile.MMsgHdr +} + +const ( + // MaxMsgsPerRecv is the maximum number of packets we want to retrieve + // in a single RecvMMsg call. + MaxMsgsPerRecv = 8 +) + +func newRecvMMsgDispatcher(fd int, e *endpoint) (linkDispatcher, error) { + d := &recvMMsgDispatcher{ + fd: fd, + e: e, + } + d.views = make([][]buffer.View, MaxMsgsPerRecv) + for i := range d.views { + d.views[i] = make([]buffer.View, len(BufConfig)) + } + d.iovecs = make([][]syscall.Iovec, MaxMsgsPerRecv) + iovLen := len(BufConfig) + if d.e.Capabilities()&stack.CapabilityGSO != 0 { + // virtioNetHdr is prepended before each packet. + iovLen++ + } + for i := range d.iovecs { + d.iovecs[i] = make([]syscall.Iovec, iovLen) + } + d.msgHdrs = make([]rawfile.MMsgHdr, MaxMsgsPerRecv) + for i := range d.msgHdrs { + d.msgHdrs[i].Msg.Iov = &d.iovecs[i][0] + d.msgHdrs[i].Msg.Iovlen = uint64(iovLen) + } + return d, nil +} + +func (d *recvMMsgDispatcher) capViews(k, n int, buffers []int) int { + c := 0 + for i, s := range buffers { + c += s + if c >= n { + d.views[k][i].CapLength(s - (c - n)) + return i + 1 + } + } + return len(buffers) +} + +func (d *recvMMsgDispatcher) allocateViews(bufConfig []int) { + for k := 0; k < len(d.views); k++ { + var vnetHdr [virtioNetHdrSize]byte + vnetHdrOff := 0 + if d.e.Capabilities()&stack.CapabilityGSO != 0 { + // The kernel adds virtioNetHdr before each packet, but + // we don't use it, so so we allocate a buffer for it, + // add it in iovecs but don't add it in a view. + d.iovecs[k][0] = syscall.Iovec{ + Base: &vnetHdr[0], + Len: uint64(virtioNetHdrSize), + } + vnetHdrOff++ + } + for i := 0; i < len(bufConfig); i++ { + if d.views[k][i] != nil { + break + } + b := buffer.NewView(bufConfig[i]) + d.views[k][i] = b + d.iovecs[k][i+vnetHdrOff] = syscall.Iovec{ + Base: &b[0], + Len: uint64(len(b)), + } + } + } +} + +// recvMMsgDispatch reads more than one packet at a time from the file +// descriptor and dispatches it. +func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) { + d.allocateViews(BufConfig) + + nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs) + if err != nil { + return false, err + } + // Process each of received packets. + for k := 0; k < nMsgs; k++ { + n := int(d.msgHdrs[k].Len) + if d.e.Capabilities()&stack.CapabilityGSO != 0 { + n -= virtioNetHdrSize + } + if n <= d.e.hdrSize { + return false, nil + } + + var ( + p tcpip.NetworkProtocolNumber + remote, local tcpip.LinkAddress + ) + if d.e.hdrSize > 0 { + eth := header.Ethernet(d.views[k][0]) + p = eth.Type() + remote = eth.SourceAddress() + local = eth.DestinationAddress() + } else { + // We don't get any indication of what the packet is, so try to guess + // if it's an IPv4 or IPv6 packet. + switch header.IPVersion(d.views[k][0]) { + case header.IPv4Version: + p = header.IPv4ProtocolNumber + case header.IPv6Version: + p = header.IPv6ProtocolNumber + default: + return true, nil + } + } + + used := d.capViews(k, int(n), BufConfig) + vv := buffer.NewVectorisedView(int(n), d.views[k][:used]) + vv.TrimFront(d.e.hdrSize) + d.e.dispatcher.DeliverNetworkPacket(d.e, remote, local, p, vv) + + // Prepare e.views for another packet: release used views. + for i := 0; i < used; i++ { + d.views[k][i] = nil + } + } + + for k := 0; k < nMsgs; k++ { + d.msgHdrs[k].Len = 0 + } + + return true, nil +} diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go new file mode 100644 index 000000000..2c1148123 --- /dev/null +++ b/pkg/tcpip/link/loopback/loopback.go @@ -0,0 +1,87 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package loopback provides the implemention of loopback data-link layer +// endpoints. Such endpoints just turn outbound packets into inbound ones. +// +// Loopback endpoints can be used in the networking stack by calling New() to +// create a new endpoint, and then passing it as an argument to +// Stack.CreateNIC(). +package loopback + +import ( + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" +) + +type endpoint struct { + dispatcher stack.NetworkDispatcher +} + +// New creates a new loopback endpoint. This link-layer endpoint just turns +// outbound packets into inbound packets. +func New() tcpip.LinkEndpointID { + return stack.RegisterLinkEndpoint(&endpoint{}) +} + +// Attach implements stack.LinkEndpoint.Attach. It just saves the stack network- +// layer dispatcher for later use when packets need to be dispatched. +func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { + e.dispatcher = dispatcher +} + +// IsAttached implements stack.LinkEndpoint.IsAttached. +func (e *endpoint) IsAttached() bool { + return e.dispatcher != nil +} + +// MTU implements stack.LinkEndpoint.MTU. It returns a constant that matches the +// linux loopback interface. +func (*endpoint) MTU() uint32 { + return 65536 +} + +// Capabilities implements stack.LinkEndpoint.Capabilities. Loopback advertises +// itself as supporting checksum offload, but in reality it's just omitted. +func (*endpoint) Capabilities() stack.LinkEndpointCapabilities { + return stack.CapabilityRXChecksumOffload | stack.CapabilityTXChecksumOffload | stack.CapabilitySaveRestore | stack.CapabilityLoopback +} + +// MaxHeaderLength implements stack.LinkEndpoint.MaxHeaderLength. Given that the +// loopback interface doesn't have a header, it just returns 0. +func (*endpoint) MaxHeaderLength() uint16 { + return 0 +} + +// LinkAddress returns the link address of this endpoint. +func (*endpoint) LinkAddress() tcpip.LinkAddress { + return "" +} + +// WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound +// packets to the network-layer dispatcher. +func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { + views := make([]buffer.View, 1, 1+len(payload.Views())) + views[0] = hdr.View() + views = append(views, payload.Views()...) + vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views) + + // Because we're immediately turning around and writing the packet back to the + // rx path, we intentionally don't preserve the remote and local link + // addresses from the stack.Route we're passed. + e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, protocol, vv) + + return nil +} diff --git a/pkg/tcpip/link/loopback/loopback_state_autogen.go b/pkg/tcpip/link/loopback/loopback_state_autogen.go new file mode 100755 index 000000000..87ec8cfc7 --- /dev/null +++ b/pkg/tcpip/link/loopback/loopback_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package loopback + diff --git a/pkg/tcpip/link/rawfile/blockingpoll_amd64.s b/pkg/tcpip/link/rawfile/blockingpoll_amd64.s new file mode 100644 index 000000000..b54131573 --- /dev/null +++ b/pkg/tcpip/link/rawfile/blockingpoll_amd64.s @@ -0,0 +1,40 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "textflag.h" + +// BlockingPoll makes the poll() syscall while calling the version of +// entersyscall that relinquishes the P so that other Gs can run. This is meant +// to be called in cases when the syscall is expected to block. +// +// func BlockingPoll(fds *PollEvent, nfds int, timeout int64) (n int, err syscall.Errno) +TEXT ·BlockingPoll(SB),NOSPLIT,$0-40 + CALL ·callEntersyscallblock(SB) + MOVQ fds+0(FP), DI + MOVQ nfds+8(FP), SI + MOVQ timeout+16(FP), DX + MOVQ $0x7, AX // SYS_POLL + SYSCALL + CMPQ AX, $0xfffffffffffff001 + JLS ok + MOVQ $-1, n+24(FP) + NEGQ AX + MOVQ AX, err+32(FP) + CALL ·callExitsyscall(SB) + RET +ok: + MOVQ AX, n+24(FP) + MOVQ $0, err+32(FP) + CALL ·callExitsyscall(SB) + RET diff --git a/pkg/tcpip/link/rawfile/blockingpoll_amd64_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_amd64_unsafe.go new file mode 100644 index 000000000..c87268610 --- /dev/null +++ b/pkg/tcpip/link/rawfile/blockingpoll_amd64_unsafe.go @@ -0,0 +1,60 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux,amd64 +// +build go1.12 +// +build !go1.14 + +// Check go:linkname function signatures when updating Go version. + +package rawfile + +import ( + "syscall" + _ "unsafe" // for go:linkname +) + +//go:noescape +func BlockingPoll(fds *PollEvent, nfds int, timeout int64) (int, syscall.Errno) + +// Use go:linkname to call into the runtime. As of Go 1.12 this has to +// be done from Go code so that we make an ABIInternal call to an +// ABIInternal function; see https://golang.org/issue/27539. + +// We need to call both entersyscallblock and exitsyscall this way so +// that the runtime's check on the stack pointer lines up. + +// Note that calling an unexported function in the runtime package is +// unsafe and this hack is likely to break in future Go releases. + +//go:linkname entersyscallblock runtime.entersyscallblock +func entersyscallblock() + +//go:linkname exitsyscall runtime.exitsyscall +func exitsyscall() + +// These forwarding functions must be nosplit because 1) we must +// disallow preemption between entersyscallblock and exitsyscall, and +// 2) we have an untyped assembly frame on the stack which can not be +// grown or moved. + +//go:nosplit +func callEntersyscallblock() { + entersyscallblock() +} + +//go:nosplit +func callExitsyscall() { + exitsyscall() +} diff --git a/pkg/tcpip/link/rawfile/blockingpoll_unsafe.go b/pkg/tcpip/link/rawfile/blockingpoll_unsafe.go new file mode 100644 index 000000000..4eab77c74 --- /dev/null +++ b/pkg/tcpip/link/rawfile/blockingpoll_unsafe.go @@ -0,0 +1,29 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux,!amd64 + +package rawfile + +import ( + "syscall" + "unsafe" +) + +// BlockingPoll is just a stub function that forwards to the poll() system call +// on non-amd64 platforms. +func BlockingPoll(fds *PollEvent, nfds int, timeout int64) (int, syscall.Errno) { + n, _, e := syscall.Syscall(syscall.SYS_POLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(timeout)) + return int(n), e +} diff --git a/pkg/tcpip/link/rawfile/errors.go b/pkg/tcpip/link/rawfile/errors.go new file mode 100644 index 000000000..8bde41637 --- /dev/null +++ b/pkg/tcpip/link/rawfile/errors.go @@ -0,0 +1,70 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package rawfile + +import ( + "fmt" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/tcpip" +) + +const maxErrno = 134 + +var translations [maxErrno]*tcpip.Error + +// TranslateErrno translate an errno from the syscall package into a +// *tcpip.Error. +// +// Valid, but unreconigized errnos will be translated to +// tcpip.ErrInvalidEndpointState (EINVAL). Panics on invalid errnos. +func TranslateErrno(e syscall.Errno) *tcpip.Error { + if err := translations[e]; err != nil { + return err + } + return tcpip.ErrInvalidEndpointState +} + +func addTranslation(host syscall.Errno, trans *tcpip.Error) { + if translations[host] != nil { + panic(fmt.Sprintf("duplicate translation for host errno %q (%d)", host.Error(), host)) + } + translations[host] = trans +} + +func init() { + addTranslation(syscall.EEXIST, tcpip.ErrDuplicateAddress) + addTranslation(syscall.ENETUNREACH, tcpip.ErrNoRoute) + addTranslation(syscall.EINVAL, tcpip.ErrInvalidEndpointState) + addTranslation(syscall.EALREADY, tcpip.ErrAlreadyConnecting) + addTranslation(syscall.EISCONN, tcpip.ErrAlreadyConnected) + addTranslation(syscall.EADDRINUSE, tcpip.ErrPortInUse) + addTranslation(syscall.EADDRNOTAVAIL, tcpip.ErrBadLocalAddress) + addTranslation(syscall.EPIPE, tcpip.ErrClosedForSend) + addTranslation(syscall.EWOULDBLOCK, tcpip.ErrWouldBlock) + addTranslation(syscall.ECONNREFUSED, tcpip.ErrConnectionRefused) + addTranslation(syscall.ETIMEDOUT, tcpip.ErrTimeout) + addTranslation(syscall.EINPROGRESS, tcpip.ErrConnectStarted) + addTranslation(syscall.EDESTADDRREQ, tcpip.ErrDestinationRequired) + addTranslation(syscall.ENOTSUP, tcpip.ErrNotSupported) + addTranslation(syscall.ENOTTY, tcpip.ErrQueueSizeNotSupported) + addTranslation(syscall.ENOTCONN, tcpip.ErrNotConnected) + addTranslation(syscall.ECONNRESET, tcpip.ErrConnectionReset) + addTranslation(syscall.ECONNABORTED, tcpip.ErrConnectionAborted) + addTranslation(syscall.EMSGSIZE, tcpip.ErrMessageTooLong) + addTranslation(syscall.ENOBUFS, tcpip.ErrNoBufferSpace) +} diff --git a/pkg/tcpip/link/rawfile/rawfile_state_autogen.go b/pkg/tcpip/link/rawfile/rawfile_state_autogen.go new file mode 100755 index 000000000..662c04444 --- /dev/null +++ b/pkg/tcpip/link/rawfile/rawfile_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package rawfile + diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go new file mode 100644 index 000000000..86db7a487 --- /dev/null +++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go @@ -0,0 +1,182 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +// Package rawfile contains utilities for using the netstack with raw host +// files on Linux hosts. +package rawfile + +import ( + "syscall" + "unsafe" + + "gvisor.googlesource.com/gvisor/pkg/tcpip" +) + +// GetMTU determines the MTU of a network interface device. +func GetMTU(name string) (uint32, error) { + fd, err := syscall.Socket(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0) + if err != nil { + return 0, err + } + + defer syscall.Close(fd) + + var ifreq struct { + name [16]byte + mtu int32 + _ [20]byte + } + + copy(ifreq.name[:], name) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCGIFMTU, uintptr(unsafe.Pointer(&ifreq))) + if errno != 0 { + return 0, errno + } + + return uint32(ifreq.mtu), nil +} + +// NonBlockingWrite writes the given buffer to a file descriptor. It fails if +// partial data is written. +func NonBlockingWrite(fd int, buf []byte) *tcpip.Error { + var ptr unsafe.Pointer + if len(buf) > 0 { + ptr = unsafe.Pointer(&buf[0]) + } + + _, _, e := syscall.RawSyscall(syscall.SYS_WRITE, uintptr(fd), uintptr(ptr), uintptr(len(buf))) + if e != 0 { + return TranslateErrno(e) + } + + return nil +} + +// NonBlockingWrite3 writes up to three byte slices to a file descriptor in a +// single syscall. It fails if partial data is written. +func NonBlockingWrite3(fd int, b1, b2, b3 []byte) *tcpip.Error { + // If the is no second buffer, issue a regular write. + if len(b2) == 0 { + return NonBlockingWrite(fd, b1) + } + + // We have two buffers. Build the iovec that represents them and issue + // a writev syscall. + iovec := [3]syscall.Iovec{ + { + Base: &b1[0], + Len: uint64(len(b1)), + }, + { + Base: &b2[0], + Len: uint64(len(b2)), + }, + } + iovecLen := uintptr(2) + + if len(b3) > 0 { + iovecLen++ + iovec[2].Base = &b3[0] + iovec[2].Len = uint64(len(b3)) + } + + _, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovec[0])), iovecLen) + if e != 0 { + return TranslateErrno(e) + } + + return nil +} + +// PollEvent represents the pollfd structure passed to a poll() system call. +type PollEvent struct { + FD int32 + Events int16 + Revents int16 +} + +// BlockingRead reads from a file descriptor that is set up as non-blocking. If +// no data is available, it will block in a poll() syscall until the file +// descirptor becomes readable. +func BlockingRead(fd int, b []byte) (int, *tcpip.Error) { + for { + n, _, e := syscall.RawSyscall(syscall.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(&b[0])), uintptr(len(b))) + if e == 0 { + return int(n), nil + } + + event := PollEvent{ + FD: int32(fd), + Events: 1, // POLLIN + } + + _, e = BlockingPoll(&event, 1, -1) + if e != 0 && e != syscall.EINTR { + return 0, TranslateErrno(e) + } + } +} + +// BlockingReadv reads from a file descriptor that is set up as non-blocking and +// stores the data in a list of iovecs buffers. If no data is available, it will +// block in a poll() syscall until the file descriptor becomes readable. +func BlockingReadv(fd int, iovecs []syscall.Iovec) (int, *tcpip.Error) { + for { + n, _, e := syscall.RawSyscall(syscall.SYS_READV, uintptr(fd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(len(iovecs))) + if e == 0 { + return int(n), nil + } + + event := PollEvent{ + FD: int32(fd), + Events: 1, // POLLIN + } + + _, e = BlockingPoll(&event, 1, -1) + if e != 0 && e != syscall.EINTR { + return 0, TranslateErrno(e) + } + } +} + +// MMsgHdr represents the mmsg_hdr structure required by recvmmsg() on linux. +type MMsgHdr struct { + Msg syscall.Msghdr + Len uint32 + _ [4]byte +} + +// BlockingRecvMMsg reads from a file descriptor that is set up as non-blocking +// and stores the received messages in a slice of MMsgHdr structures. If no data +// is available, it will block in a poll() syscall until the file descriptor +// becomes readable. +func BlockingRecvMMsg(fd int, msgHdrs []MMsgHdr) (int, *tcpip.Error) { + for { + n, _, e := syscall.RawSyscall6(syscall.SYS_RECVMMSG, uintptr(fd), uintptr(unsafe.Pointer(&msgHdrs[0])), uintptr(len(msgHdrs)), syscall.MSG_DONTWAIT, 0, 0) + if e == 0 { + return int(n), nil + } + + event := PollEvent{ + FD: int32(fd), + Events: 1, // POLLIN + } + + if _, e := BlockingPoll(&event, 1, -1); e != 0 && e != syscall.EINTR { + return 0, TranslateErrno(e) + } + } +} diff --git a/pkg/tcpip/link/sniffer/pcap.go b/pkg/tcpip/link/sniffer/pcap.go new file mode 100644 index 000000000..c16c19647 --- /dev/null +++ b/pkg/tcpip/link/sniffer/pcap.go @@ -0,0 +1,66 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sniffer + +import "time" + +type pcapHeader struct { + // MagicNumber is the file magic number. + MagicNumber uint32 + + // VersionMajor is the major version number. + VersionMajor uint16 + + // VersionMinor is the minor version number. + VersionMinor uint16 + + // Thiszone is the GMT to local correction. + Thiszone int32 + + // Sigfigs is the accuracy of timestamps. + Sigfigs uint32 + + // Snaplen is the max length of captured packets, in octets. + Snaplen uint32 + + // Network is the data link type. + Network uint32 +} + +const pcapPacketHeaderLen = 16 + +type pcapPacketHeader struct { + // Seconds is the timestamp seconds. + Seconds uint32 + + // Microseconds is the timestamp microseconds. + Microseconds uint32 + + // IncludedLength is the number of octets of packet saved in file. + IncludedLength uint32 + + // OriginalLength is the actual length of packet. + OriginalLength uint32 +} + +func newPCAPPacketHeader(incLen, orgLen uint32) pcapPacketHeader { + now := time.Now() + return pcapPacketHeader{ + Seconds: uint32(now.Unix()), + Microseconds: uint32(now.Nanosecond() / 1000), + IncludedLength: incLen, + OriginalLength: orgLen, + } +} diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go new file mode 100644 index 000000000..fccabd554 --- /dev/null +++ b/pkg/tcpip/link/sniffer/sniffer.go @@ -0,0 +1,408 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package sniffer provides the implementation of data-link layer endpoints that +// wrap another endpoint and logs inbound and outbound packets. +// +// Sniffer endpoints can be used in the networking stack by calling New(eID) to +// create a new endpoint, where eID is the ID of the endpoint being wrapped, +// and then passing it as an argument to Stack.CreateNIC(). +package sniffer + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "os" + "sync/atomic" + "time" + + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" +) + +// LogPackets is a flag used to enable or disable packet logging via the log +// package. Valid values are 0 or 1. +// +// LogPackets must be accessed atomically. +var LogPackets uint32 = 1 + +// LogPacketsToFile is a flag used to enable or disable logging packets to a +// pcap file. Valid values are 0 or 1. A file must have been specified when the +// sniffer was created for this flag to have effect. +// +// LogPacketsToFile must be accessed atomically. +var LogPacketsToFile uint32 = 1 + +type endpoint struct { + dispatcher stack.NetworkDispatcher + lower stack.LinkEndpoint + file *os.File + maxPCAPLen uint32 +} + +// New creates a new sniffer link-layer endpoint. It wraps around another +// endpoint and logs packets and they traverse the endpoint. +func New(lower tcpip.LinkEndpointID) tcpip.LinkEndpointID { + return stack.RegisterLinkEndpoint(&endpoint{ + lower: stack.FindLinkEndpoint(lower), + }) +} + +func zoneOffset() (int32, error) { + loc, err := time.LoadLocation("Local") + if err != nil { + return 0, err + } + date := time.Date(0, 0, 0, 0, 0, 0, 0, loc) + _, offset := date.Zone() + return int32(offset), nil +} + +func writePCAPHeader(w io.Writer, maxLen uint32) error { + offset, err := zoneOffset() + if err != nil { + return err + } + return binary.Write(w, binary.BigEndian, pcapHeader{ + // From https://wiki.wireshark.org/Development/LibpcapFileFormat + MagicNumber: 0xa1b2c3d4, + + VersionMajor: 2, + VersionMinor: 4, + Thiszone: offset, + Sigfigs: 0, + Snaplen: maxLen, + Network: 101, // LINKTYPE_RAW + }) +} + +// NewWithFile creates a new sniffer link-layer endpoint. It wraps around +// another endpoint and logs packets and they traverse the endpoint. +// +// Packets can be logged to file in the pcap format. A sniffer created +// with this function will not emit packets using the standard log +// package. +// +// snapLen is the maximum amount of a packet to be saved. Packets with a length +// less than or equal too snapLen will be saved in their entirety. Longer +// packets will be truncated to snapLen. +func NewWithFile(lower tcpip.LinkEndpointID, file *os.File, snapLen uint32) (tcpip.LinkEndpointID, error) { + if err := writePCAPHeader(file, snapLen); err != nil { + return 0, err + } + return stack.RegisterLinkEndpoint(&endpoint{ + lower: stack.FindLinkEndpoint(lower), + file: file, + maxPCAPLen: snapLen, + }), nil +} + +// DeliverNetworkPacket implements the stack.NetworkDispatcher interface. It is +// called by the link-layer endpoint being wrapped when a packet arrives, and +// logs the packet before forwarding to the actual dispatcher. +func (e *endpoint) DeliverNetworkPacket(linkEP stack.LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, vv buffer.VectorisedView) { + if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil { + logPacket("recv", protocol, vv.First()) + } + if e.file != nil && atomic.LoadUint32(&LogPacketsToFile) == 1 { + vs := vv.Views() + length := vv.Size() + if length > int(e.maxPCAPLen) { + length = int(e.maxPCAPLen) + } + + buf := bytes.NewBuffer(make([]byte, 0, pcapPacketHeaderLen+length)) + if err := binary.Write(buf, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(vv.Size()))); err != nil { + panic(err) + } + for _, v := range vs { + if length == 0 { + break + } + if len(v) > length { + v = v[:length] + } + if _, err := buf.Write([]byte(v)); err != nil { + panic(err) + } + length -= len(v) + } + if _, err := e.file.Write(buf.Bytes()); err != nil { + panic(err) + } + } + e.dispatcher.DeliverNetworkPacket(e, remote, local, protocol, vv) +} + +// Attach implements the stack.LinkEndpoint interface. It saves the dispatcher +// and registers with the lower endpoint as its dispatcher so that "e" is called +// for inbound packets. +func (e *endpoint) Attach(dispatcher stack.NetworkDispatcher) { + e.dispatcher = dispatcher + e.lower.Attach(e) +} + +// IsAttached implements stack.LinkEndpoint.IsAttached. +func (e *endpoint) IsAttached() bool { + return e.dispatcher != nil +} + +// MTU implements stack.LinkEndpoint.MTU. It just forwards the request to the +// lower endpoint. +func (e *endpoint) MTU() uint32 { + return e.lower.MTU() +} + +// Capabilities implements stack.LinkEndpoint.Capabilities. It just forwards the +// request to the lower endpoint. +func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities { + return e.lower.Capabilities() +} + +// MaxHeaderLength implements the stack.LinkEndpoint interface. It just forwards +// the request to the lower endpoint. +func (e *endpoint) MaxHeaderLength() uint16 { + return e.lower.MaxHeaderLength() +} + +func (e *endpoint) LinkAddress() tcpip.LinkAddress { + return e.lower.LinkAddress() +} + +// GSOMaxSize returns the maximum GSO packet size. +func (e *endpoint) GSOMaxSize() uint32 { + if gso, ok := e.lower.(stack.GSOEndpoint); ok { + return gso.GSOMaxSize() + } + return 0 +} + +// WritePacket implements the stack.LinkEndpoint interface. It is called by +// higher-level protocols to write packets; it just logs the packet and forwards +// the request to the lower endpoint. +func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.NetworkProtocolNumber) *tcpip.Error { + if atomic.LoadUint32(&LogPackets) == 1 && e.file == nil { + logPacket("send", protocol, hdr.View()) + } + if e.file != nil && atomic.LoadUint32(&LogPacketsToFile) == 1 { + hdrBuf := hdr.View() + length := len(hdrBuf) + payload.Size() + if length > int(e.maxPCAPLen) { + length = int(e.maxPCAPLen) + } + + buf := bytes.NewBuffer(make([]byte, 0, pcapPacketHeaderLen+length)) + if err := binary.Write(buf, binary.BigEndian, newPCAPPacketHeader(uint32(length), uint32(len(hdrBuf)+payload.Size()))); err != nil { + panic(err) + } + if len(hdrBuf) > length { + hdrBuf = hdrBuf[:length] + } + if _, err := buf.Write(hdrBuf); err != nil { + panic(err) + } + length -= len(hdrBuf) + if length > 0 { + for _, v := range payload.Views() { + if len(v) > length { + v = v[:length] + } + n, err := buf.Write(v) + if err != nil { + panic(err) + } + length -= n + if length == 0 { + break + } + } + } + if _, err := e.file.Write(buf.Bytes()); err != nil { + panic(err) + } + } + return e.lower.WritePacket(r, gso, hdr, payload, protocol) +} + +func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.View) { + // Figure out the network layer info. + var transProto uint8 + src := tcpip.Address("unknown") + dst := tcpip.Address("unknown") + id := 0 + size := uint16(0) + var fragmentOffset uint16 + var moreFragments bool + switch protocol { + case header.IPv4ProtocolNumber: + ipv4 := header.IPv4(b) + fragmentOffset = ipv4.FragmentOffset() + moreFragments = ipv4.Flags()&header.IPv4FlagMoreFragments == header.IPv4FlagMoreFragments + src = ipv4.SourceAddress() + dst = ipv4.DestinationAddress() + transProto = ipv4.Protocol() + size = ipv4.TotalLength() - uint16(ipv4.HeaderLength()) + b = b[ipv4.HeaderLength():] + id = int(ipv4.ID()) + + case header.IPv6ProtocolNumber: + ipv6 := header.IPv6(b) + src = ipv6.SourceAddress() + dst = ipv6.DestinationAddress() + transProto = ipv6.NextHeader() + size = ipv6.PayloadLength() + b = b[header.IPv6MinimumSize:] + + case header.ARPProtocolNumber: + arp := header.ARP(b) + log.Infof( + "%s arp %v (%v) -> %v (%v) valid:%v", + prefix, + tcpip.Address(arp.ProtocolAddressSender()), tcpip.LinkAddress(arp.HardwareAddressSender()), + tcpip.Address(arp.ProtocolAddressTarget()), tcpip.LinkAddress(arp.HardwareAddressTarget()), + arp.IsValid(), + ) + return + default: + log.Infof("%s unknown network protocol", prefix) + return + } + + // Figure out the transport layer info. + transName := "unknown" + srcPort := uint16(0) + dstPort := uint16(0) + details := "" + switch tcpip.TransportProtocolNumber(transProto) { + case header.ICMPv4ProtocolNumber: + transName = "icmp" + icmp := header.ICMPv4(b) + icmpType := "unknown" + if fragmentOffset == 0 { + switch icmp.Type() { + case header.ICMPv4EchoReply: + icmpType = "echo reply" + case header.ICMPv4DstUnreachable: + icmpType = "destination unreachable" + case header.ICMPv4SrcQuench: + icmpType = "source quench" + case header.ICMPv4Redirect: + icmpType = "redirect" + case header.ICMPv4Echo: + icmpType = "echo" + case header.ICMPv4TimeExceeded: + icmpType = "time exceeded" + case header.ICMPv4ParamProblem: + icmpType = "param problem" + case header.ICMPv4Timestamp: + icmpType = "timestamp" + case header.ICMPv4TimestampReply: + icmpType = "timestamp reply" + case header.ICMPv4InfoRequest: + icmpType = "info request" + case header.ICMPv4InfoReply: + icmpType = "info reply" + } + } + log.Infof("%s %s %v -> %v %s len:%d id:%04x code:%d", prefix, transName, src, dst, icmpType, size, id, icmp.Code()) + return + + case header.ICMPv6ProtocolNumber: + transName = "icmp" + icmp := header.ICMPv6(b) + icmpType := "unknown" + switch icmp.Type() { + case header.ICMPv6DstUnreachable: + icmpType = "destination unreachable" + case header.ICMPv6PacketTooBig: + icmpType = "packet too big" + case header.ICMPv6TimeExceeded: + icmpType = "time exceeded" + case header.ICMPv6ParamProblem: + icmpType = "param problem" + case header.ICMPv6EchoRequest: + icmpType = "echo request" + case header.ICMPv6EchoReply: + icmpType = "echo reply" + case header.ICMPv6RouterSolicit: + icmpType = "router solicit" + case header.ICMPv6RouterAdvert: + icmpType = "router advert" + case header.ICMPv6NeighborSolicit: + icmpType = "neighbor solicit" + case header.ICMPv6NeighborAdvert: + icmpType = "neighbor advert" + case header.ICMPv6RedirectMsg: + icmpType = "redirect message" + } + log.Infof("%s %s %v -> %v %s len:%d id:%04x code:%d", prefix, transName, src, dst, icmpType, size, id, icmp.Code()) + return + + case header.UDPProtocolNumber: + transName = "udp" + udp := header.UDP(b) + if fragmentOffset == 0 && len(udp) >= header.UDPMinimumSize { + srcPort = udp.SourcePort() + dstPort = udp.DestinationPort() + } + size -= header.UDPMinimumSize + + details = fmt.Sprintf("xsum: 0x%x", udp.Checksum()) + + case header.TCPProtocolNumber: + transName = "tcp" + tcp := header.TCP(b) + if fragmentOffset == 0 && len(tcp) >= header.TCPMinimumSize { + offset := int(tcp.DataOffset()) + if offset < header.TCPMinimumSize { + details += fmt.Sprintf("invalid packet: tcp data offset too small %d", offset) + break + } + if offset > len(tcp) && !moreFragments { + details += fmt.Sprintf("invalid packet: tcp data offset %d larger than packet buffer length %d", offset, len(tcp)) + break + } + + srcPort = tcp.SourcePort() + dstPort = tcp.DestinationPort() + size -= uint16(offset) + + // Initialize the TCP flags. + flags := tcp.Flags() + flagsStr := []byte("FSRPAU") + for i := range flagsStr { + if flags&(1<<uint(i)) == 0 { + flagsStr[i] = ' ' + } + } + details = fmt.Sprintf("flags:0x%02x (%v) seqnum: %v ack: %v win: %v xsum:0x%x", flags, string(flagsStr), tcp.SequenceNumber(), tcp.AckNumber(), tcp.WindowSize(), tcp.Checksum()) + if flags&header.TCPFlagSyn != 0 { + details += fmt.Sprintf(" options: %+v", header.ParseSynOptions(tcp.Options(), flags&header.TCPFlagAck != 0)) + } else { + details += fmt.Sprintf(" options: %+v", tcp.ParsedOptions()) + } + } + + default: + log.Infof("%s %v -> %v unknown transport protocol: %d", prefix, src, dst, transProto) + return + } + + log.Infof("%s %s %v:%v -> %v:%v len:%d id:%04x %s", prefix, transName, src, srcPort, dst, dstPort, size, id, details) +} diff --git a/pkg/tcpip/link/sniffer/sniffer_state_autogen.go b/pkg/tcpip/link/sniffer/sniffer_state_autogen.go new file mode 100755 index 000000000..cfd84a739 --- /dev/null +++ b/pkg/tcpip/link/sniffer/sniffer_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package sniffer + |