summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip')
-rw-r--r--pkg/tcpip/buffer/view.go10
-rw-r--r--pkg/tcpip/link/channel/channel.go4
-rw-r--r--pkg/tcpip/link/fdbased/endpoint.go14
-rw-r--r--pkg/tcpip/link/fdbased/mmap.go16
-rw-r--r--pkg/tcpip/link/fdbased/packet_dispatchers.go45
-rw-r--r--pkg/tcpip/link/loopback/loopback.go21
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem.go25
-rw-r--r--pkg/tcpip/link/sharedmem/tx.go14
-rw-r--r--pkg/tcpip/link/sniffer/sniffer.go19
-rw-r--r--pkg/tcpip/link/tun/device.go23
-rw-r--r--pkg/tcpip/network/arp/arp.go26
-rw-r--r--pkg/tcpip/network/ipv4/icmp.go38
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go188
-rw-r--r--pkg/tcpip/network/ipv6/icmp.go55
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go47
-rw-r--r--pkg/tcpip/stack/conntrack.go31
-rw-r--r--pkg/tcpip/stack/headertype_string.go39
-rw-r--r--pkg/tcpip/stack/iptables.go2
-rw-r--r--pkg/tcpip/stack/iptables_targets.go9
-rw-r--r--pkg/tcpip/stack/ndp.go32
-rw-r--r--pkg/tcpip/stack/nic.go40
-rw-r--r--pkg/tcpip/stack/packet_buffer.go260
-rw-r--r--pkg/tcpip/stack/route.go5
-rw-r--r--pkg/tcpip/transport/icmp/endpoint.go39
-rw-r--r--pkg/tcpip/transport/packet/endpoint.go35
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go30
-rw-r--r--pkg/tcpip/transport/tcp/connect.go30
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go17
-rw-r--r--pkg/tcpip/transport/tcp/segment.go2
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go26
-rw-r--r--pkg/tcpip/transport/udp/protocol.go57
31 files changed, 700 insertions, 499 deletions
diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go
index 9a3c5d6c3..ea0c5413d 100644
--- a/pkg/tcpip/buffer/view.go
+++ b/pkg/tcpip/buffer/view.go
@@ -65,6 +65,16 @@ func (v View) ToVectorisedView() VectorisedView {
return NewVectorisedView(len(v), []View{v})
}
+// IsEmpty returns whether v is of length zero.
+func (v View) IsEmpty() bool {
+ return len(v) == 0
+}
+
+// Size returns the length of v.
+func (v View) Size() int {
+ return len(v)
+}
+
// VectorisedView is a vectorised version of View using non contiguous memory.
// It supports all the convenience methods supported by View.
//
diff --git a/pkg/tcpip/link/channel/channel.go b/pkg/tcpip/link/channel/channel.go
index e12a5929b..c95aef63c 100644
--- a/pkg/tcpip/link/channel/channel.go
+++ b/pkg/tcpip/link/channel/channel.go
@@ -274,7 +274,9 @@ func (e *Endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *Endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
p := PacketInfo{
- Pkt: &stack.PacketBuffer{Data: vv},
+ Pkt: stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: vv,
+ }),
Proto: 0,
GSO: nil,
}
diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go
index c18bb91fb..975309fc8 100644
--- a/pkg/tcpip/link/fdbased/endpoint.go
+++ b/pkg/tcpip/link/fdbased/endpoint.go
@@ -390,8 +390,7 @@ const (
func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
if e.hdrSize > 0 {
// Add ethernet header if needed.
- eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
- pkt.LinkHeader = buffer.View(eth)
+ eth := header.Ethernet(pkt.LinkHeader().Push(header.EthernetMinimumSize))
ethHdr := &header.EthernetFields{
DstAddr: remote,
Type: protocol,
@@ -420,7 +419,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne
if e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
vnetHdr := virtioNetHdr{}
if gso != nil {
- vnetHdr.hdrLen = uint16(pkt.Header.UsedLength())
+ vnetHdr.hdrLen = uint16(pkt.HeaderSize())
if gso.NeedsCsum {
vnetHdr.flags = _VIRTIO_NET_HDR_F_NEEDS_CSUM
vnetHdr.csumStart = header.EthernetMinimumSize + gso.L3HdrLen
@@ -443,11 +442,9 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne
builder.Add(vnetHdrBuf)
}
- builder.Add(pkt.Header.View())
- for _, v := range pkt.Data.Views() {
+ for _, v := range pkt.Views() {
builder.Add(v)
}
-
return rawfile.NonBlockingWriteIovec(fd, builder.Build())
}
@@ -463,7 +460,7 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc
if e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
vnetHdr := virtioNetHdr{}
if pkt.GSOOptions != nil {
- vnetHdr.hdrLen = uint16(pkt.Header.UsedLength())
+ vnetHdr.hdrLen = uint16(pkt.HeaderSize())
if pkt.GSOOptions.NeedsCsum {
vnetHdr.flags = _VIRTIO_NET_HDR_F_NEEDS_CSUM
vnetHdr.csumStart = header.EthernetMinimumSize + pkt.GSOOptions.L3HdrLen
@@ -486,8 +483,7 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, *tc
var builder iovec.Builder
builder.Add(vnetHdrBuf)
- builder.Add(pkt.Header.View())
- for _, v := range pkt.Data.Views() {
+ for _, v := range pkt.Views() {
builder.Add(v)
}
iovecs := builder.Build()
diff --git a/pkg/tcpip/link/fdbased/mmap.go b/pkg/tcpip/link/fdbased/mmap.go
index 2dfd29aa9..c475dda20 100644
--- a/pkg/tcpip/link/fdbased/mmap.go
+++ b/pkg/tcpip/link/fdbased/mmap.go
@@ -18,6 +18,7 @@ package fdbased
import (
"encoding/binary"
+ "fmt"
"syscall"
"golang.org/x/sys/unix"
@@ -170,10 +171,9 @@ func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) {
var (
p tcpip.NetworkProtocolNumber
remote, local tcpip.LinkAddress
- eth header.Ethernet
)
if d.e.hdrSize > 0 {
- eth = header.Ethernet(pkt)
+ eth := header.Ethernet(pkt)
p = eth.Type()
remote = eth.SourceAddress()
local = eth.DestinationAddress()
@@ -190,10 +190,14 @@ func (d *packetMMapDispatcher) dispatch() (bool, *tcpip.Error) {
}
}
- pkt = pkt[d.e.hdrSize:]
- d.e.dispatcher.DeliverNetworkPacket(remote, local, p, &stack.PacketBuffer{
- Data: buffer.View(pkt).ToVectorisedView(),
- LinkHeader: buffer.View(eth),
+ pbuf := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: buffer.View(pkt).ToVectorisedView(),
})
+ if d.e.hdrSize > 0 {
+ if _, ok := pbuf.LinkHeader().Consume(d.e.hdrSize); !ok {
+ panic(fmt.Sprintf("LinkHeader().Consume(%d) must succeed", d.e.hdrSize))
+ }
+ }
+ d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pbuf)
return true, nil
}
diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go
index d8f2504b3..8c3ca86d6 100644
--- a/pkg/tcpip/link/fdbased/packet_dispatchers.go
+++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go
@@ -103,7 +103,7 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
d.allocateViews(BufConfig)
n, err := rawfile.BlockingReadv(d.fd, d.iovecs)
- if err != nil {
+ if n == 0 || err != nil {
return false, err
}
if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
@@ -111,17 +111,22 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
// isn't used and it isn't in a view.
n -= virtioNetHdrSize
}
- if n <= d.e.hdrSize {
- return false, nil
- }
+
+ used := d.capViews(n, BufConfig)
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)),
+ })
var (
p tcpip.NetworkProtocolNumber
remote, local tcpip.LinkAddress
- eth header.Ethernet
)
if d.e.hdrSize > 0 {
- eth = header.Ethernet(d.views[0][:header.EthernetMinimumSize])
+ hdr, ok := pkt.LinkHeader().Consume(d.e.hdrSize)
+ if !ok {
+ return false, nil
+ }
+ eth := header.Ethernet(hdr)
p = eth.Type()
remote = eth.SourceAddress()
local = eth.DestinationAddress()
@@ -138,13 +143,6 @@ func (d *readVDispatcher) dispatch() (bool, *tcpip.Error) {
}
}
- used := d.capViews(n, BufConfig)
- pkt := &stack.PacketBuffer{
- Data: buffer.NewVectorisedView(n, append([]buffer.View(nil), d.views[:used]...)),
- LinkHeader: buffer.View(eth),
- }
- pkt.Data.TrimFront(d.e.hdrSize)
-
d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
// Prepare e.views for another packet: release used views.
@@ -268,17 +266,22 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
if d.e.Capabilities()&stack.CapabilityHardwareGSO != 0 {
n -= virtioNetHdrSize
}
- if n <= d.e.hdrSize {
- return false, nil
- }
+
+ used := d.capViews(k, int(n), BufConfig)
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)),
+ })
var (
p tcpip.NetworkProtocolNumber
remote, local tcpip.LinkAddress
- eth header.Ethernet
)
if d.e.hdrSize > 0 {
- eth = header.Ethernet(d.views[k][0][:header.EthernetMinimumSize])
+ hdr, ok := pkt.LinkHeader().Consume(d.e.hdrSize)
+ if !ok {
+ return false, nil
+ }
+ eth := header.Ethernet(hdr)
p = eth.Type()
remote = eth.SourceAddress()
local = eth.DestinationAddress()
@@ -295,12 +298,6 @@ func (d *recvMMsgDispatcher) dispatch() (bool, *tcpip.Error) {
}
}
- used := d.capViews(k, int(n), BufConfig)
- pkt := &stack.PacketBuffer{
- Data: buffer.NewVectorisedView(int(n), append([]buffer.View(nil), d.views[k][:used]...)),
- LinkHeader: buffer.View(eth),
- }
- pkt.Data.TrimFront(d.e.hdrSize)
d.e.dispatcher.DeliverNetworkPacket(remote, local, p, pkt)
// Prepare e.views for another packet: release used views.
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index 781cdd317..38aa694e4 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -77,16 +77,16 @@ func (*endpoint) Wait() {}
// WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound
// packets to the network-layer dispatcher.
func (e *endpoint) WritePacket(_ *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
- views := make([]buffer.View, 1, 1+len(pkt.Data.Views()))
- views[0] = pkt.Header.View()
- views = append(views, pkt.Data.Views()...)
+ // Construct data as the unparsed portion for the loopback packet.
+ data := buffer.NewVectorisedView(pkt.Size(), pkt.Views())
// Because we're immediately turning around and writing the packet back
// to the rx path, we intentionally don't preserve the remote and local
// link addresses from the stack.Route we're passed.
- e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, protocol, &stack.PacketBuffer{
- Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views),
+ newPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: data,
})
+ e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, protocol, newPkt)
return nil
}
@@ -98,18 +98,17 @@ func (e *endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: vv,
+ })
// There should be an ethernet header at the beginning of vv.
- hdr, ok := vv.PullUp(header.EthernetMinimumSize)
+ hdr, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize)
if !ok {
// Reject the packet if it's shorter than an ethernet header.
return tcpip.ErrBadAddress
}
linkHeader := header.Ethernet(hdr)
- vv.TrimFront(len(linkHeader))
- e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, linkHeader.Type(), &stack.PacketBuffer{
- Data: vv,
- LinkHeader: buffer.View(linkHeader),
- })
+ e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, linkHeader.Type(), pkt)
return nil
}
diff --git a/pkg/tcpip/link/sharedmem/sharedmem.go b/pkg/tcpip/link/sharedmem/sharedmem.go
index 507c76b76..7fb8a6c49 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem.go
@@ -186,8 +186,7 @@ func (e *endpoint) LinkAddress() tcpip.LinkAddress {
// AddHeader implements stack.LinkEndpoint.AddHeader.
func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
// Add ethernet header if needed.
- eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
- pkt.LinkHeader = buffer.View(eth)
+ eth := header.Ethernet(pkt.LinkHeader().Push(header.EthernetMinimumSize))
ethHdr := &header.EthernetFields{
DstAddr: remote,
Type: protocol,
@@ -207,10 +206,10 @@ func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net
func (e *endpoint) WritePacket(r *stack.Route, _ *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) *tcpip.Error {
e.AddHeader(r.LocalLinkAddress, r.RemoteLinkAddress, protocol, pkt)
- v := pkt.Data.ToView()
+ views := pkt.Views()
// Transmit the packet.
e.mu.Lock()
- ok := e.tx.transmit(pkt.Header.View(), v)
+ ok := e.tx.transmit(views...)
e.mu.Unlock()
if !ok {
@@ -227,10 +226,10 @@ func (e *endpoint) WritePackets(r *stack.Route, _ *stack.GSO, pkts stack.PacketB
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
- v := vv.ToView()
+ views := vv.Views()
// Transmit the packet.
e.mu.Lock()
- ok := e.tx.transmit(v, buffer.View{})
+ ok := e.tx.transmit(views...)
e.mu.Unlock()
if !ok {
@@ -276,16 +275,18 @@ func (e *endpoint) dispatchLoop(d stack.NetworkDispatcher) {
rxb[i].Size = e.bufferSize
}
- if n < header.EthernetMinimumSize {
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: buffer.View(b).ToVectorisedView(),
+ })
+
+ hdr, ok := pkt.LinkHeader().Consume(header.EthernetMinimumSize)
+ if !ok {
continue
}
+ eth := header.Ethernet(hdr)
// Send packet up the stack.
- eth := header.Ethernet(b[:header.EthernetMinimumSize])
- d.DeliverNetworkPacket(eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), &stack.PacketBuffer{
- Data: buffer.View(b[header.EthernetMinimumSize:]).ToVectorisedView(),
- LinkHeader: buffer.View(eth),
- })
+ d.DeliverNetworkPacket(eth.SourceAddress(), eth.DestinationAddress(), eth.Type(), pkt)
}
// Clean state.
diff --git a/pkg/tcpip/link/sharedmem/tx.go b/pkg/tcpip/link/sharedmem/tx.go
index 6b8d7859d..44f421c2d 100644
--- a/pkg/tcpip/link/sharedmem/tx.go
+++ b/pkg/tcpip/link/sharedmem/tx.go
@@ -18,6 +18,7 @@ import (
"math"
"syscall"
+ "gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/link/sharedmem/queue"
)
@@ -76,9 +77,9 @@ func (t *tx) cleanup() {
syscall.Munmap(t.data)
}
-// transmit sends a packet made up of up to two buffers. Returns a boolean that
-// specifies whether the packet was successfully transmitted.
-func (t *tx) transmit(a, b []byte) bool {
+// transmit sends a packet made of bufs. Returns a boolean that specifies
+// whether the packet was successfully transmitted.
+func (t *tx) transmit(bufs ...buffer.View) bool {
// Pull completions from the tx queue and add their buffers back to the
// pool so that we can reuse them.
for {
@@ -93,7 +94,10 @@ func (t *tx) transmit(a, b []byte) bool {
}
bSize := t.bufs.entrySize
- total := uint32(len(a) + len(b))
+ total := uint32(0)
+ for _, data := range bufs {
+ total += uint32(len(data))
+ }
bufCount := (total + bSize - 1) / bSize
// Allocate enough buffers to hold all the data.
@@ -115,7 +119,7 @@ func (t *tx) transmit(a, b []byte) bool {
// Copy data into allocated buffers.
nBuf := buf
var dBuf []byte
- for _, data := range [][]byte{a, b} {
+ for _, data := range bufs {
for len(data) > 0 {
if len(dBuf) == 0 {
dBuf = t.data[nBuf.Offset:][:nBuf.Size]
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index 509076643..4fb127978 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -134,7 +134,7 @@ func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.Netw
logPacket(prefix, protocol, pkt, gso)
}
if writer != nil && atomic.LoadUint32(&LogPacketsToPCAP) == 1 {
- totalLength := pkt.Header.UsedLength() + pkt.Data.Size()
+ totalLength := pkt.Size()
length := totalLength
if max := int(e.maxPCAPLen); length > max {
length = max
@@ -155,12 +155,11 @@ func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.Netw
length -= n
}
}
- write(pkt.Header.View())
- for _, view := range pkt.Data.Views() {
+ for _, v := range pkt.Views() {
if length == 0 {
break
}
- write(view)
+ write(v)
}
}
}
@@ -185,9 +184,9 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
- e.dumpPacket("send", nil, 0, &stack.PacketBuffer{
+ e.dumpPacket("send", nil, 0, stack.NewPacketBuffer(stack.PacketBufferOptions{
Data: vv,
- })
+ }))
return e.Endpoint.WriteRawPacket(vv)
}
@@ -201,12 +200,8 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
var fragmentOffset uint16
var moreFragments bool
- // Create a clone of pkt, including any headers if present. Avoid allocating
- // backing memory for the clone.
- views := [8]buffer.View{}
- vv := buffer.NewVectorisedView(0, views[:0])
- vv.AppendView(pkt.Header.View())
- vv.Append(pkt.Data)
+ // Examine the packet using a new VV. Backing storage must not be written.
+ vv := buffer.NewVectorisedView(pkt.Size(), pkt.Views())
switch protocol {
case header.IPv4ProtocolNumber:
diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go
index 22b0a12bd..3b1510a33 100644
--- a/pkg/tcpip/link/tun/device.go
+++ b/pkg/tcpip/link/tun/device.go
@@ -215,12 +215,11 @@ func (d *Device) Write(data []byte) (int64, error) {
remote = tcpip.LinkAddress(zeroMAC[:])
}
- pkt := &stack.PacketBuffer{
- Data: buffer.View(data).ToVectorisedView(),
- }
- if ethHdr != nil {
- pkt.LinkHeader = buffer.View(ethHdr)
- }
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: len(ethHdr),
+ Data: buffer.View(data).ToVectorisedView(),
+ })
+ copy(pkt.LinkHeader().Push(len(ethHdr)), ethHdr)
endpoint.InjectLinkAddr(protocol, remote, pkt)
return dataLen, nil
}
@@ -265,21 +264,22 @@ func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) {
// If the packet does not already have link layer header, and the route
// does not exist, we can't compute it. This is possibly a raw packet, tun
// device doesn't support this at the moment.
- if info.Pkt.LinkHeader == nil && info.Route.RemoteLinkAddress == "" {
+ if info.Pkt.LinkHeader().View().IsEmpty() && info.Route.RemoteLinkAddress == "" {
return nil, false
}
// Ethernet header (TAP only).
if d.hasFlags(linux.IFF_TAP) {
// Add ethernet header if not provided.
- if info.Pkt.LinkHeader == nil {
+ if info.Pkt.LinkHeader().View().IsEmpty() {
d.endpoint.AddHeader(info.Route.LocalLinkAddress, info.Route.RemoteLinkAddress, info.Proto, info.Pkt)
}
- vv.AppendView(info.Pkt.LinkHeader)
+ vv.AppendView(info.Pkt.LinkHeader().View())
}
// Append upper headers.
- vv.AppendView(buffer.View(info.Pkt.Header.View()[len(info.Pkt.LinkHeader):]))
+ vv.AppendView(info.Pkt.NetworkHeader().View())
+ vv.AppendView(info.Pkt.TransportHeader().View())
// Append data payload.
vv.Append(info.Pkt.Data)
@@ -361,8 +361,7 @@ func (e *tunEndpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.
if !e.isTap {
return
}
- eth := header.Ethernet(pkt.Header.Prepend(header.EthernetMinimumSize))
- pkt.LinkHeader = buffer.View(eth)
+ eth := header.Ethernet(pkt.LinkHeader().Push(header.EthernetMinimumSize))
hdr := &header.EthernetFields{
SrcAddr: local,
DstAddr: remote,
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index 31a242482..1ad788a17 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -99,7 +99,7 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
}
func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
- h := header.ARP(pkt.NetworkHeader)
+ h := header.ARP(pkt.NetworkHeader().View())
if !h.IsValid() {
return
}
@@ -110,17 +110,17 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
if e.linkAddrCache.CheckLocalAddress(e.nicID, header.IPv4ProtocolNumber, localAddr) == 0 {
return // we have no useful answer, ignore the request
}
- hdr := buffer.NewPrependable(int(e.linkEP.MaxHeaderLength()) + header.ARPSize)
- packet := header.ARP(hdr.Prepend(header.ARPSize))
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: int(e.linkEP.MaxHeaderLength()) + header.ARPSize,
+ })
+ packet := header.ARP(pkt.NetworkHeader().Push(header.ARPSize))
packet.SetIPv4OverEthernet()
packet.SetOp(header.ARPReply)
copy(packet.HardwareAddressSender(), r.LocalLinkAddress[:])
copy(packet.ProtocolAddressSender(), h.ProtocolAddressTarget())
copy(packet.HardwareAddressTarget(), h.HardwareAddressSender())
copy(packet.ProtocolAddressTarget(), h.ProtocolAddressSender())
- e.linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, &stack.PacketBuffer{
- Header: hdr,
- })
+ _ = e.linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, pkt)
fallthrough // also fill the cache from requests
case header.ARPReply:
addr := tcpip.Address(h.ProtocolAddressSender())
@@ -168,17 +168,17 @@ func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAdd
r.RemoteLinkAddress = header.EthernetBroadcastAddress
}
- hdr := buffer.NewPrependable(int(linkEP.MaxHeaderLength()) + header.ARPSize)
- h := header.ARP(hdr.Prepend(header.ARPSize))
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: int(linkEP.MaxHeaderLength()) + header.ARPSize,
+ })
+ h := header.ARP(pkt.NetworkHeader().Push(header.ARPSize))
h.SetIPv4OverEthernet()
h.SetOp(header.ARPRequest)
copy(h.HardwareAddressSender(), linkEP.LinkAddress())
copy(h.ProtocolAddressSender(), localAddr)
copy(h.ProtocolAddressTarget(), addr)
- return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, &stack.PacketBuffer{
- Header: hdr,
- })
+ return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, pkt)
}
// ResolveStaticAddress implements stack.LinkAddressResolver.ResolveStaticAddress.
@@ -210,12 +210,10 @@ func (*protocol) Wait() {}
// Parse implements stack.NetworkProtocol.Parse.
func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) {
- hdr, ok := pkt.Data.PullUp(header.ARPSize)
+ _, ok = pkt.NetworkHeader().Consume(header.ARPSize)
if !ok {
return 0, false, false
}
- pkt.NetworkHeader = hdr
- pkt.Data.TrimFront(header.ARPSize)
return 0, false, true
}
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 94803a359..067d770f3 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -89,12 +89,14 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
return
}
+ // Make a copy of data before pkt gets sent to raw socket.
+ // DeliverTransportPacket will take ownership of pkt.
+ replyData := pkt.Data.Clone(nil)
+ replyData.TrimFront(header.ICMPv4MinimumSize)
+
// It's possible that a raw socket expects to receive this.
h.SetChecksum(wantChecksum)
- e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, &stack.PacketBuffer{
- Data: pkt.Data.Clone(nil),
- NetworkHeader: append(buffer.View(nil), pkt.NetworkHeader...),
- })
+ e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, pkt)
remoteLinkAddr := r.RemoteLinkAddress
@@ -116,24 +118,26 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer) {
// Use the remote link address from the incoming packet.
r.ResolveWith(remoteLinkAddr)
- vv := pkt.Data.Clone(nil)
- vv.TrimFront(header.ICMPv4MinimumSize)
- hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv4MinimumSize)
- pkt := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize))
- copy(pkt, h)
- pkt.SetType(header.ICMPv4EchoReply)
- pkt.SetChecksum(0)
- pkt.SetChecksum(^header.Checksum(pkt, header.ChecksumVV(vv, 0)))
+ // Prepare a reply packet.
+ icmpHdr := make(header.ICMPv4, header.ICMPv4MinimumSize)
+ copy(icmpHdr, h)
+ icmpHdr.SetType(header.ICMPv4EchoReply)
+ icmpHdr.SetChecksum(0)
+ icmpHdr.SetChecksum(^header.Checksum(icmpHdr, header.ChecksumVV(replyData, 0)))
+ dataVV := buffer.View(icmpHdr).ToVectorisedView()
+ dataVV.Append(replyData)
+ replyPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: int(r.MaxHeaderLength()),
+ Data: dataVV,
+ })
+
+ // Send out the reply packet.
sent := stats.ICMP.V4PacketsSent
if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{
Protocol: header.ICMPv4ProtocolNumber,
TTL: r.DefaultTTL(),
TOS: stack.DefaultTOS,
- }, &stack.PacketBuffer{
- Header: hdr,
- Data: vv,
- TransportHeader: buffer.View(pkt),
- }); err != nil {
+ }, replyPkt); err != nil {
sent.Dropped.Increment()
return
}
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 9ff27a363..3cd48ceb3 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -21,7 +21,6 @@
package ipv4
import (
- "fmt"
"sync/atomic"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -127,14 +126,12 @@ func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber {
}
// writePacketFragments calls e.linkEP.WritePacket with each packet fragment to
-// write. It assumes that the IP header is entirely in pkt.Header but does not
-// assume that only the IP header is in pkt.Header. It assumes that the input
-// packet's stated length matches the length of the header+payload. mtu
-// includes the IP header and options. This does not support the DontFragment
-// IP flag.
+// write. It assumes that the IP header is already present in pkt.NetworkHeader.
+// pkt.TransportHeader may be set. mtu includes the IP header and options. This
+// does not support the DontFragment IP flag.
func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int, pkt *stack.PacketBuffer) *tcpip.Error {
// This packet is too big, it needs to be fragmented.
- ip := header.IPv4(pkt.Header.View())
+ ip := header.IPv4(pkt.NetworkHeader().View())
flags := ip.Flags()
// Update mtu to take into account the header, which will exist in all
@@ -148,88 +145,84 @@ func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, mtu int,
outerMTU := innerMTU + int(ip.HeaderLength())
offset := ip.FragmentOffset()
- originalAvailableLength := pkt.Header.AvailableLength()
+
+ // Keep the length reserved for link-layer, we need to create fragments with
+ // the same reserved length.
+ reservedForLink := pkt.AvailableHeaderBytes()
+
+ // Destroy the packet, pull all payloads out for fragmentation.
+ transHeader, data := pkt.TransportHeader().View(), pkt.Data
+
+ // Where possible, the first fragment that is sent has the same
+ // number of bytes reserved for header as the input packet. The link-layer
+ // endpoint may depend on this for looking at, eg, L4 headers.
+ transFitsFirst := len(transHeader) <= innerMTU
+
for i := 0; i < n; i++ {
- // Where possible, the first fragment that is sent has the same
- // pkt.Header.UsedLength() as the input packet. The link-layer
- // endpoint may depend on this for looking at, eg, L4 headers.
- h := ip
- if i > 0 {
- pkt.Header = buffer.NewPrependable(int(ip.HeaderLength()) + originalAvailableLength)
- h = header.IPv4(pkt.Header.Prepend(int(ip.HeaderLength())))
- copy(h, ip[:ip.HeaderLength()])
+ reserve := reservedForLink + int(ip.HeaderLength())
+ if i == 0 && transFitsFirst {
+ // Reserve for transport header if it's going to be put in the first
+ // fragment.
+ reserve += len(transHeader)
+ }
+ fragPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: reserve,
+ })
+ fragPkt.NetworkProtocolNumber = header.IPv4ProtocolNumber
+
+ // Copy data for the fragment.
+ avail := innerMTU
+
+ if n := len(transHeader); n > 0 {
+ if n > avail {
+ n = avail
+ }
+ if i == 0 && transFitsFirst {
+ copy(fragPkt.TransportHeader().Push(n), transHeader)
+ } else {
+ fragPkt.Data.AppendView(transHeader[:n:n])
+ }
+ transHeader = transHeader[n:]
+ avail -= n
}
+
+ if avail > 0 {
+ n := data.Size()
+ if n > avail {
+ n = avail
+ }
+ data.ReadToVV(&fragPkt.Data, n)
+ avail -= n
+ }
+
+ copied := uint16(innerMTU - avail)
+
+ // Set lengths in header and calculate checksum.
+ h := header.IPv4(fragPkt.NetworkHeader().Push(len(ip)))
+ copy(h, ip)
if i != n-1 {
h.SetTotalLength(uint16(outerMTU))
h.SetFlagsFragmentOffset(flags|header.IPv4FlagMoreFragments, offset)
} else {
- h.SetTotalLength(uint16(h.HeaderLength()) + uint16(pkt.Data.Size()))
+ h.SetTotalLength(uint16(h.HeaderLength()) + copied)
h.SetFlagsFragmentOffset(flags, offset)
}
h.SetChecksum(0)
h.SetChecksum(^h.CalculateChecksum())
- offset += uint16(innerMTU)
- if i > 0 {
- newPayload := pkt.Data.Clone(nil)
- newPayload.CapLength(innerMTU)
- if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, &stack.PacketBuffer{
- Header: pkt.Header,
- Data: newPayload,
- NetworkHeader: buffer.View(h),
- NetworkProtocolNumber: header.IPv4ProtocolNumber,
- }); err != nil {
- return err
- }
- r.Stats().IP.PacketsSent.Increment()
- pkt.Data.TrimFront(newPayload.Size())
- continue
- }
- // Special handling for the first fragment because it comes
- // from the header.
- if outerMTU >= pkt.Header.UsedLength() {
- // This fragment can fit all of pkt.Header and possibly
- // some of pkt.Data, too.
- newPayload := pkt.Data.Clone(nil)
- newPayloadLength := outerMTU - pkt.Header.UsedLength()
- newPayload.CapLength(newPayloadLength)
- if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, &stack.PacketBuffer{
- Header: pkt.Header,
- Data: newPayload,
- NetworkHeader: buffer.View(h),
- NetworkProtocolNumber: header.IPv4ProtocolNumber,
- }); err != nil {
- return err
- }
- r.Stats().IP.PacketsSent.Increment()
- pkt.Data.TrimFront(newPayloadLength)
- } else {
- // The fragment is too small to fit all of pkt.Header.
- startOfHdr := pkt.Header
- startOfHdr.TrimBack(pkt.Header.UsedLength() - outerMTU)
- emptyVV := buffer.NewVectorisedView(0, []buffer.View{})
- if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, &stack.PacketBuffer{
- Header: startOfHdr,
- Data: emptyVV,
- NetworkHeader: buffer.View(h),
- NetworkProtocolNumber: header.IPv4ProtocolNumber,
- }); err != nil {
- return err
- }
- r.Stats().IP.PacketsSent.Increment()
- // Add the unused bytes of pkt.Header into the pkt.Data
- // that remains to be sent.
- restOfHdr := pkt.Header.View()[outerMTU:]
- tmp := buffer.NewVectorisedView(len(restOfHdr), []buffer.View{buffer.NewViewFromBytes(restOfHdr)})
- tmp.Append(pkt.Data)
- pkt.Data = tmp
+ offset += copied
+
+ // Send out the fragment.
+ if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, fragPkt); err != nil {
+ return err
}
+ r.Stats().IP.PacketsSent.Increment()
}
return nil
}
-func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadSize int, params stack.NetworkHeaderParams) header.IPv4 {
- ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
- length := uint16(hdr.UsedLength() + payloadSize)
+func (e *endpoint) addIPHeader(r *stack.Route, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams) {
+ ip := header.IPv4(pkt.NetworkHeader().Push(header.IPv4MinimumSize))
+ length := uint16(pkt.Size())
// RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic
// datagrams. Since the DF bit is never being set here, all datagrams
// are non-atomic and need an ID.
@@ -245,14 +238,12 @@ func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadS
DstAddr: r.RemoteAddress,
})
ip.SetChecksum(^ip.CalculateChecksum())
- return ip
+ pkt.NetworkProtocolNumber = header.IPv4ProtocolNumber
}
// WritePacket writes a packet to the given destination address and protocol.
func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error {
- ip := e.addIPHeader(r, &pkt.Header, pkt.Data.Size(), params)
- pkt.NetworkHeader = buffer.View(ip)
- pkt.NetworkProtocolNumber = header.IPv4ProtocolNumber
+ e.addIPHeader(r, pkt, params)
// iptables filtering. All packets that reach here are locally
// generated.
@@ -269,7 +260,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
// only NATted packets, but removing this check short circuits broadcasts
// before they are sent out to other hosts.
if pkt.NatDone {
- netHeader := header.IPv4(pkt.NetworkHeader)
+ netHeader := header.IPv4(pkt.NetworkHeader().View())
ep, err := e.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress())
if err == nil {
route := r.ReverseRoute(netHeader.SourceAddress(), netHeader.DestinationAddress())
@@ -286,7 +277,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
if r.Loop&stack.PacketOut == 0 {
return nil
}
- if pkt.Header.UsedLength()+pkt.Data.Size() > int(e.linkEP.MTU()) && (gso == nil || gso.Type == stack.GSONone) {
+ if pkt.Size() > int(e.linkEP.MTU()) && (gso == nil || gso.Type == stack.GSONone) {
return e.writePacketFragments(r, gso, int(e.linkEP.MTU()), pkt)
}
if err := e.linkEP.WritePacket(r, gso, ProtocolNumber, pkt); err != nil {
@@ -306,9 +297,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
}
for pkt := pkts.Front(); pkt != nil; {
- ip := e.addIPHeader(r, &pkt.Header, pkt.Data.Size(), params)
- pkt.NetworkHeader = buffer.View(ip)
- pkt.NetworkProtocolNumber = header.IPv4ProtocolNumber
+ e.addIPHeader(r, pkt, params)
pkt = pkt.Next()
}
@@ -333,7 +322,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
continue
}
if _, ok := natPkts[pkt]; ok {
- netHeader := header.IPv4(pkt.NetworkHeader)
+ netHeader := header.IPv4(pkt.NetworkHeader().View())
if ep, err := e.stack.FindNetworkEndpoint(header.IPv4ProtocolNumber, netHeader.DestinationAddress()); err == nil {
src := netHeader.SourceAddress()
dst := netHeader.DestinationAddress()
@@ -402,17 +391,14 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBu
r.Stats().IP.PacketsSent.Increment()
- ip = ip[:ip.HeaderLength()]
- pkt.Header = buffer.NewPrependableFromView(buffer.View(ip))
- pkt.Data.TrimFront(int(ip.HeaderLength()))
return e.linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, pkt)
}
// HandlePacket is called by the link layer when new ipv4 packets arrive for
// this endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
- h := header.IPv4(pkt.NetworkHeader)
- if !h.IsValid(pkt.Data.Size() + len(pkt.NetworkHeader) + len(pkt.TransportHeader)) {
+ h := header.IPv4(pkt.NetworkHeader().View())
+ if !h.IsValid(pkt.Data.Size() + pkt.NetworkHeader().View().Size() + pkt.TransportHeader().View().Size()) {
r.Stats().IP.MalformedPacketsReceived.Increment()
return
}
@@ -426,7 +412,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
}
if h.More() || h.FragmentOffset() != 0 {
- if pkt.Data.Size()+len(pkt.TransportHeader) == 0 {
+ if pkt.Data.Size()+pkt.TransportHeader().View().Size() == 0 {
// Drop the packet as it's marked as a fragment but has
// no payload.
r.Stats().IP.MalformedPacketsReceived.Increment()
@@ -470,7 +456,6 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
}
p := h.TransportProtocol()
if p == header.ICMPv4ProtocolNumber {
- pkt.NetworkHeader.CapLength(int(h.HeaderLength()))
e.handleICMP(r, pkt)
return
}
@@ -560,14 +545,19 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNu
}
ipHdr := header.IPv4(hdr)
- // If there are options, pull those into hdr as well.
- if headerLen := int(ipHdr.HeaderLength()); headerLen > header.IPv4MinimumSize && headerLen <= pkt.Data.Size() {
- hdr, ok = pkt.Data.PullUp(headerLen)
- if !ok {
- panic(fmt.Sprintf("There are only %d bytes in pkt.Data, but there should be at least %d", pkt.Data.Size(), headerLen))
- }
- ipHdr = header.IPv4(hdr)
+ // Header may have options, determine the true header length.
+ headerLen := int(ipHdr.HeaderLength())
+ if headerLen < header.IPv4MinimumSize {
+ // TODO(gvisor.dev/issue/2404): Per RFC 791, IHL needs to be at least 5 in
+ // order for the packet to be valid. Figure out if we want to reject this
+ // case.
+ headerLen = header.IPv4MinimumSize
+ }
+ hdr, ok = pkt.NetworkHeader().Consume(headerLen)
+ if !ok {
+ return 0, false, false
}
+ ipHdr = header.IPv4(hdr)
// If this is a fragment, don't bother parsing the transport header.
parseTransportHeader := true
@@ -576,8 +566,6 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNu
}
pkt.NetworkProtocolNumber = header.IPv4ProtocolNumber
- pkt.NetworkHeader = hdr
- pkt.Data.TrimFront(len(hdr))
pkt.Data.CapLength(int(ipHdr.TotalLength()) - len(hdr))
return ipHdr.TransportProtocol(), parseTransportHeader, true
}
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index ded91d83a..39ae19295 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -83,7 +83,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
return
}
h := header.ICMPv6(v)
- iph := header.IPv6(pkt.NetworkHeader)
+ iph := header.IPv6(pkt.NetworkHeader().View())
// Validate ICMPv6 checksum before processing the packet.
//
@@ -276,8 +276,10 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
optsSerializer := header.NDPOptionsSerializer{
header.NDPTargetLinkLayerAddressOption(r.LocalLinkAddress),
}
- hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv6NeighborAdvertMinimumSize + int(optsSerializer.Length()))
- packet := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborAdvertSize))
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: int(r.MaxHeaderLength()) + header.ICMPv6NeighborAdvertMinimumSize + int(optsSerializer.Length()),
+ })
+ packet := header.ICMPv6(pkt.TransportHeader().Push(header.ICMPv6NeighborAdvertSize))
packet.SetType(header.ICMPv6NeighborAdvert)
na := header.NDPNeighborAdvert(packet.NDPPayload())
na.SetSolicitedFlag(solicited)
@@ -293,9 +295,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
//
// The IP Hop Limit field has a value of 255, i.e., the packet
// could not possibly have been forwarded by a router.
- if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: header.NDPHopLimit, TOS: stack.DefaultTOS}, &stack.PacketBuffer{
- Header: hdr,
- }); err != nil {
+ if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: header.NDPHopLimit, TOS: stack.DefaultTOS}, pkt); err != nil {
sent.Dropped.Increment()
return
}
@@ -384,7 +384,7 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
case header.ICMPv6EchoRequest:
received.EchoRequest.Increment()
- icmpHdr, ok := pkt.Data.PullUp(header.ICMPv6EchoMinimumSize)
+ icmpHdr, ok := pkt.TransportHeader().Consume(header.ICMPv6EchoMinimumSize)
if !ok {
received.Invalid.Increment()
return
@@ -409,16 +409,15 @@ func (e *endpoint) handleICMP(r *stack.Route, pkt *stack.PacketBuffer, hasFragme
// Use the link address from the source of the original packet.
r.ResolveWith(remoteLinkAddr)
- pkt.Data.TrimFront(header.ICMPv6EchoMinimumSize)
- hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv6EchoMinimumSize)
- packet := header.ICMPv6(hdr.Prepend(header.ICMPv6EchoMinimumSize))
+ replyPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: int(r.MaxHeaderLength()) + header.ICMPv6EchoMinimumSize,
+ Data: pkt.Data,
+ })
+ packet := header.ICMPv6(replyPkt.TransportHeader().Push(header.ICMPv6EchoMinimumSize))
copy(packet, icmpHdr)
packet.SetType(header.ICMPv6EchoReply)
packet.SetChecksum(header.ICMPv6Checksum(packet, r.LocalAddress, r.RemoteAddress, pkt.Data))
- if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, &stack.PacketBuffer{
- Header: hdr,
- Data: pkt.Data,
- }); err != nil {
+ if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, replyPkt); err != nil {
sent.Dropped.Increment()
return
}
@@ -539,17 +538,19 @@ func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAdd
r.RemoteLinkAddress = header.EthernetAddressFromMulticastIPv6Address(snaddr)
}
- hdr := buffer.NewPrependable(int(linkEP.MaxHeaderLength()) + header.IPv6MinimumSize + header.ICMPv6NeighborAdvertSize)
- pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborAdvertSize))
- pkt.SetType(header.ICMPv6NeighborSolicit)
- copy(pkt[icmpV6OptOffset-len(addr):], addr)
- pkt[icmpV6OptOffset] = ndpOptSrcLinkAddr
- pkt[icmpV6LengthOffset] = 1
- copy(pkt[icmpV6LengthOffset+1:], linkEP.LinkAddress())
- pkt.SetChecksum(header.ICMPv6Checksum(pkt, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
-
- length := uint16(hdr.UsedLength())
- ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: int(linkEP.MaxHeaderLength()) + header.IPv6MinimumSize + header.ICMPv6NeighborAdvertSize,
+ })
+ icmpHdr := header.ICMPv6(pkt.TransportHeader().Push(header.ICMPv6NeighborAdvertSize))
+ icmpHdr.SetType(header.ICMPv6NeighborSolicit)
+ copy(icmpHdr[icmpV6OptOffset-len(addr):], addr)
+ icmpHdr[icmpV6OptOffset] = ndpOptSrcLinkAddr
+ icmpHdr[icmpV6LengthOffset] = 1
+ copy(icmpHdr[icmpV6LengthOffset+1:], linkEP.LinkAddress())
+ icmpHdr.SetChecksum(header.ICMPv6Checksum(icmpHdr, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
+
+ length := uint16(pkt.Size())
+ ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize))
ip.Encode(&header.IPv6Fields{
PayloadLength: length,
NextHeader: uint8(header.ICMPv6ProtocolNumber),
@@ -559,9 +560,7 @@ func (*protocol) LinkAddressRequest(addr, localAddr tcpip.Address, remoteLinkAdd
})
// TODO(stijlist): count this in ICMP stats.
- return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, &stack.PacketBuffer{
- Header: hdr,
- })
+ return linkEP.WritePacket(r, nil /* gso */, ProtocolNumber, pkt)
}
// ResolveStaticAddress implements stack.LinkAddressResolver.
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index d7d7fc611..0ade655b2 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -99,9 +99,9 @@ func (e *endpoint) GSOMaxSize() uint32 {
return 0
}
-func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadSize int, params stack.NetworkHeaderParams) header.IPv6 {
- length := uint16(hdr.UsedLength() + payloadSize)
- ip := header.IPv6(hdr.Prepend(header.IPv6MinimumSize))
+func (e *endpoint) addIPHeader(r *stack.Route, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams) {
+ length := uint16(pkt.Size())
+ ip := header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize))
ip.Encode(&header.IPv6Fields{
PayloadLength: length,
NextHeader: uint8(params.Protocol),
@@ -110,26 +110,20 @@ func (e *endpoint) addIPHeader(r *stack.Route, hdr *buffer.Prependable, payloadS
SrcAddr: r.LocalAddress,
DstAddr: r.RemoteAddress,
})
- return ip
+ pkt.NetworkProtocolNumber = header.IPv6ProtocolNumber
}
// WritePacket writes a packet to the given destination address and protocol.
func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) *tcpip.Error {
- ip := e.addIPHeader(r, &pkt.Header, pkt.Data.Size(), params)
- pkt.NetworkHeader = buffer.View(ip)
- pkt.NetworkProtocolNumber = header.IPv6ProtocolNumber
+ e.addIPHeader(r, pkt, params)
if r.Loop&stack.PacketLoop != 0 {
- // The inbound path expects the network header to still be in
- // the PacketBuffer's Data field.
- views := make([]buffer.View, 1, 1+len(pkt.Data.Views()))
- views[0] = pkt.Header.View()
- views = append(views, pkt.Data.Views()...)
loopedR := r.MakeLoopedRoute()
- e.HandlePacket(&loopedR, &stack.PacketBuffer{
- Data: buffer.NewVectorisedView(len(views[0])+pkt.Data.Size(), views),
- })
+ e.HandlePacket(&loopedR, stack.NewPacketBuffer(stack.PacketBufferOptions{
+ // The inbound path expects an unparsed packet.
+ Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views()),
+ }))
loopedR.Release()
}
@@ -151,9 +145,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
}
for pb := pkts.Front(); pb != nil; pb = pb.Next() {
- ip := e.addIPHeader(r, &pb.Header, pb.Data.Size(), params)
- pb.NetworkHeader = buffer.View(ip)
- pb.NetworkProtocolNumber = header.IPv6ProtocolNumber
+ e.addIPHeader(r, pb, params)
}
n, err := e.linkEP.WritePackets(r, gso, pkts, ProtocolNumber)
@@ -171,8 +163,8 @@ func (*endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuff
// HandlePacket is called by the link layer when new ipv6 packets arrive for
// this endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
- h := header.IPv6(pkt.NetworkHeader)
- if !h.IsValid(pkt.Data.Size() + len(pkt.NetworkHeader) + len(pkt.TransportHeader)) {
+ h := header.IPv6(pkt.NetworkHeader().View())
+ if !h.IsValid(pkt.Data.Size() + pkt.NetworkHeader().View().Size() + pkt.TransportHeader().View().Size()) {
r.Stats().IP.MalformedPacketsReceived.Increment()
return
}
@@ -181,8 +173,8 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
// - Any IPv6 header bytes after the first 40 (i.e. extensions).
// - The transport header, if present.
// - Any other payload data.
- vv := pkt.NetworkHeader[header.IPv6MinimumSize:].ToVectorisedView()
- vv.AppendView(pkt.TransportHeader)
+ vv := pkt.NetworkHeader().View()[header.IPv6MinimumSize:].ToVectorisedView()
+ vv.AppendView(pkt.TransportHeader().View())
vv.Append(pkt.Data)
it := header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(h.NextHeader()), vv)
hasFragmentHeader := false
@@ -410,7 +402,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt *stack.PacketBuffer) {
//
// For reassembled fragments, pkt.TransportHeader is unset, so this is a
// no-op and pkt.Data begins with the transport header.
- extHdr.Buf.TrimFront(len(pkt.TransportHeader))
+ extHdr.Buf.TrimFront(pkt.TransportHeader().View().Size())
pkt.Data = extHdr.Buf
if p := tcpip.TransportProtocolNumber(extHdr.Identifier); p == header.ICMPv6ProtocolNumber {
@@ -581,17 +573,14 @@ traverseExtensions:
}
}
- // Put the IPv6 header with extensions in pkt.NetworkHeader.
- hdr, ok = pkt.Data.PullUp(header.IPv6MinimumSize + extensionsSize)
+ // Put the IPv6 header with extensions in pkt.NetworkHeader().
+ hdr, ok = pkt.NetworkHeader().Consume(header.IPv6MinimumSize + extensionsSize)
if !ok {
panic(fmt.Sprintf("pkt.Data should have at least %d bytes, but only has %d.", header.IPv6MinimumSize+extensionsSize, pkt.Data.Size()))
}
ipHdr = header.IPv6(hdr)
-
- pkt.NetworkProtocolNumber = header.IPv6ProtocolNumber
- pkt.NetworkHeader = hdr
- pkt.Data.TrimFront(len(hdr))
pkt.Data.CapLength(int(ipHdr.PayloadLength()))
+ pkt.NetworkProtocolNumber = header.IPv6ProtocolNumber
return nextHdr, foundNext, true
}
diff --git a/pkg/tcpip/stack/conntrack.go b/pkg/tcpip/stack/conntrack.go
index 470c265aa..7dd344b4f 100644
--- a/pkg/tcpip/stack/conntrack.go
+++ b/pkg/tcpip/stack/conntrack.go
@@ -199,12 +199,12 @@ type bucket struct {
func packetToTupleID(pkt *PacketBuffer) (tupleID, *tcpip.Error) {
// TODO(gvisor.dev/issue/170): Need to support for other
// protocols as well.
- netHeader := header.IPv4(pkt.NetworkHeader)
- if netHeader == nil || netHeader.TransportProtocol() != header.TCPProtocolNumber {
+ netHeader := header.IPv4(pkt.NetworkHeader().View())
+ if len(netHeader) < header.IPv4MinimumSize || netHeader.TransportProtocol() != header.TCPProtocolNumber {
return tupleID{}, tcpip.ErrUnknownProtocol
}
- tcpHeader := header.TCP(pkt.TransportHeader)
- if tcpHeader == nil {
+ tcpHeader := header.TCP(pkt.TransportHeader().View())
+ if len(tcpHeader) < header.TCPMinimumSize {
return tupleID{}, tcpip.ErrUnknownProtocol
}
@@ -344,8 +344,8 @@ func handlePacketPrerouting(pkt *PacketBuffer, conn *conn, dir direction) {
return
}
- netHeader := header.IPv4(pkt.NetworkHeader)
- tcpHeader := header.TCP(pkt.TransportHeader)
+ netHeader := header.IPv4(pkt.NetworkHeader().View())
+ tcpHeader := header.TCP(pkt.TransportHeader().View())
// For prerouting redirection, packets going in the original direction
// have their destinations modified and replies have their sources
@@ -377,8 +377,8 @@ func handlePacketOutput(pkt *PacketBuffer, conn *conn, gso *GSO, r *Route, dir d
return
}
- netHeader := header.IPv4(pkt.NetworkHeader)
- tcpHeader := header.TCP(pkt.TransportHeader)
+ netHeader := header.IPv4(pkt.NetworkHeader().View())
+ tcpHeader := header.TCP(pkt.TransportHeader().View())
// For output redirection, packets going in the original direction
// have their destinations modified and replies have their sources
@@ -396,8 +396,7 @@ func handlePacketOutput(pkt *PacketBuffer, conn *conn, gso *GSO, r *Route, dir d
// Calculate the TCP checksum and set it.
tcpHeader.SetChecksum(0)
- hdr := &pkt.Header
- length := uint16(pkt.Data.Size()+hdr.UsedLength()) - uint16(netHeader.HeaderLength())
+ length := uint16(pkt.Size()) - uint16(netHeader.HeaderLength())
xsum := r.PseudoHeaderChecksum(header.TCPProtocolNumber, length)
if gso != nil && gso.NeedsCsum {
tcpHeader.SetChecksum(xsum)
@@ -423,7 +422,7 @@ func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Rou
}
// TODO(gvisor.dev/issue/170): Support other transport protocols.
- if pkt.NetworkHeader == nil || header.IPv4(pkt.NetworkHeader).TransportProtocol() != header.TCPProtocolNumber {
+ if nh := pkt.NetworkHeader().View(); nh.IsEmpty() || header.IPv4(nh).TransportProtocol() != header.TCPProtocolNumber {
return false
}
@@ -433,8 +432,8 @@ func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Rou
return true
}
- tcpHeader := header.TCP(pkt.TransportHeader)
- if tcpHeader == nil {
+ tcpHeader := header.TCP(pkt.TransportHeader().View())
+ if len(tcpHeader) < header.TCPMinimumSize {
return false
}
@@ -455,7 +454,7 @@ func (ct *ConnTrack) handlePacket(pkt *PacketBuffer, hook Hook, gso *GSO, r *Rou
// Mark the connection as having been used recently so it isn't reaped.
conn.lastUsed = time.Now()
// Update connection state.
- conn.updateLocked(header.TCP(pkt.TransportHeader), hook)
+ conn.updateLocked(header.TCP(pkt.TransportHeader().View()), hook)
return false
}
@@ -474,7 +473,7 @@ func (ct *ConnTrack) maybeInsertNoop(pkt *PacketBuffer, hook Hook) {
}
// We only track TCP connections.
- if pkt.NetworkHeader == nil || header.IPv4(pkt.NetworkHeader).TransportProtocol() != header.TCPProtocolNumber {
+ if nh := pkt.NetworkHeader().View(); nh.IsEmpty() || header.IPv4(nh).TransportProtocol() != header.TCPProtocolNumber {
return
}
@@ -486,7 +485,7 @@ func (ct *ConnTrack) maybeInsertNoop(pkt *PacketBuffer, hook Hook) {
return
}
conn := newConn(tid, tid.reply(), manipNone, hook)
- conn.updateLocked(header.TCP(pkt.TransportHeader), hook)
+ conn.updateLocked(header.TCP(pkt.TransportHeader().View()), hook)
ct.insertConn(conn)
}
diff --git a/pkg/tcpip/stack/headertype_string.go b/pkg/tcpip/stack/headertype_string.go
new file mode 100644
index 000000000..5efddfaaf
--- /dev/null
+++ b/pkg/tcpip/stack/headertype_string.go
@@ -0,0 +1,39 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at //
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by "stringer -type headerType ."; DO NOT EDIT.
+
+package stack
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[linkHeader-0]
+ _ = x[networkHeader-1]
+ _ = x[transportHeader-2]
+ _ = x[numHeaderType-3]
+}
+
+const _headerType_name = "linkHeadernetworkHeadertransportHeadernumHeaderType"
+
+var _headerType_index = [...]uint8{0, 10, 23, 38, 51}
+
+func (i headerType) String() string {
+ if i < 0 || i >= headerType(len(_headerType_index)-1) {
+ return "headerType(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _headerType_name[_headerType_index[i]:_headerType_index[i+1]]
+}
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 110ba073d..c37da814f 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -394,7 +394,7 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx
rule := table.Rules[ruleIdx]
// Check whether the packet matches the IP header filter.
- if !rule.Filter.match(header.IPv4(pkt.NetworkHeader), hook, nicName) {
+ if !rule.Filter.match(header.IPv4(pkt.NetworkHeader().View()), hook, nicName) {
// Continue on to the next rule.
return RuleJump, ruleIdx + 1
}
diff --git a/pkg/tcpip/stack/iptables_targets.go b/pkg/tcpip/stack/iptables_targets.go
index dc88033c7..5f1b2af64 100644
--- a/pkg/tcpip/stack/iptables_targets.go
+++ b/pkg/tcpip/stack/iptables_targets.go
@@ -99,7 +99,7 @@ func (rt RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, gso
}
// Drop the packet if network and transport header are not set.
- if pkt.NetworkHeader == nil || pkt.TransportHeader == nil {
+ if pkt.NetworkHeader().View().IsEmpty() || pkt.TransportHeader().View().IsEmpty() {
return RuleDrop, 0
}
@@ -118,17 +118,16 @@ func (rt RedirectTarget) Action(pkt *PacketBuffer, ct *ConnTrack, hook Hook, gso
// TODO(gvisor.dev/issue/170): Check Flags in RedirectTarget if
// we need to change dest address (for OUTPUT chain) or ports.
- netHeader := header.IPv4(pkt.NetworkHeader)
+ netHeader := header.IPv4(pkt.NetworkHeader().View())
switch protocol := netHeader.TransportProtocol(); protocol {
case header.UDPProtocolNumber:
- udpHeader := header.UDP(pkt.TransportHeader)
+ udpHeader := header.UDP(pkt.TransportHeader().View())
udpHeader.SetDestinationPort(rt.MinPort)
// Calculate UDP checksum and set it.
if hook == Output {
udpHeader.SetChecksum(0)
- hdr := &pkt.Header
- length := uint16(pkt.Data.Size()+hdr.UsedLength()) - uint16(netHeader.HeaderLength())
+ length := uint16(pkt.Size()) - uint16(netHeader.HeaderLength())
// Only calculate the checksum if offloading isn't supported.
if r.Capabilities()&CapabilityTXChecksumOffload == 0 {
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go
index 5174e639c..93567806b 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/stack/ndp.go
@@ -746,12 +746,16 @@ func (ndp *ndpState) sendDADPacket(addr tcpip.Address, ref *referencedNetworkEnd
panic(fmt.Sprintf("ndp: route resolution not immediate for route to send NDP NS for DAD (%s -> %s on NIC(%d))", header.IPv6Any, snmc, ndp.nic.ID()))
}
- hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv6NeighborSolicitMinimumSize)
- pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6NeighborSolicitMinimumSize))
- pkt.SetType(header.ICMPv6NeighborSolicit)
- ns := header.NDPNeighborSolicit(pkt.NDPPayload())
+ icmpData := header.ICMPv6(buffer.NewView(header.ICMPv6NeighborSolicitMinimumSize))
+ icmpData.SetType(header.ICMPv6NeighborSolicit)
+ ns := header.NDPNeighborSolicit(icmpData.NDPPayload())
ns.SetTargetAddress(addr)
- pkt.SetChecksum(header.ICMPv6Checksum(pkt, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
+ icmpData.SetChecksum(header.ICMPv6Checksum(icmpData, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
+
+ pkt := NewPacketBuffer(PacketBufferOptions{
+ ReserveHeaderBytes: int(r.MaxHeaderLength()),
+ Data: buffer.View(icmpData).ToVectorisedView(),
+ })
sent := r.Stats().ICMP.V6PacketsSent
if err := r.WritePacket(nil,
@@ -759,7 +763,7 @@ func (ndp *ndpState) sendDADPacket(addr tcpip.Address, ref *referencedNetworkEnd
Protocol: header.ICMPv6ProtocolNumber,
TTL: header.NDPHopLimit,
TOS: DefaultTOS,
- }, &PacketBuffer{Header: hdr},
+ }, pkt,
); err != nil {
sent.Dropped.Increment()
return err
@@ -1897,12 +1901,16 @@ func (ndp *ndpState) startSolicitingRouters() {
}
}
payloadSize := header.ICMPv6HeaderSize + header.NDPRSMinimumSize + int(optsSerializer.Length())
- hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + payloadSize)
- pkt := header.ICMPv6(hdr.Prepend(payloadSize))
- pkt.SetType(header.ICMPv6RouterSolicit)
- rs := header.NDPRouterSolicit(pkt.NDPPayload())
+ icmpData := header.ICMPv6(buffer.NewView(payloadSize))
+ icmpData.SetType(header.ICMPv6RouterSolicit)
+ rs := header.NDPRouterSolicit(icmpData.NDPPayload())
rs.Options().Serialize(optsSerializer)
- pkt.SetChecksum(header.ICMPv6Checksum(pkt, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
+ icmpData.SetChecksum(header.ICMPv6Checksum(icmpData, r.LocalAddress, r.RemoteAddress, buffer.VectorisedView{}))
+
+ pkt := NewPacketBuffer(PacketBufferOptions{
+ ReserveHeaderBytes: int(r.MaxHeaderLength()),
+ Data: buffer.View(icmpData).ToVectorisedView(),
+ })
sent := r.Stats().ICMP.V6PacketsSent
if err := r.WritePacket(nil,
@@ -1910,7 +1918,7 @@ func (ndp *ndpState) startSolicitingRouters() {
Protocol: header.ICMPv6ProtocolNumber,
TTL: header.NDPHopLimit,
TOS: DefaultTOS,
- }, &PacketBuffer{Header: hdr},
+ }, pkt,
); err != nil {
sent.Dropped.Increment()
log.Printf("startSolicitingRouters: error writing NDP router solicit message on NIC(%d); err = %s", ndp.nic.ID(), err)
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index eaaf756cd..2315ea5b9 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -1299,7 +1299,7 @@ func (n *NIC) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
}
}
- src, dst := netProto.ParseAddresses(pkt.NetworkHeader)
+ src, dst := netProto.ParseAddresses(pkt.NetworkHeader().View())
if n.stack.handleLocal && !n.isLoopback() && n.getRef(protocol, src) != nil {
// The source address is one of our own, so we never should have gotten a
@@ -1401,24 +1401,19 @@ func (n *NIC) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tc
func (n *NIC) forwardPacket(r *Route, protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
// TODO(b/143425874) Decrease the TTL field in forwarded packets.
- // TODO(b/151227689): Avoid copying the packet when forwarding. We can do this
- // by having lower layers explicity write each header instead of just
- // pkt.Header.
- // pkt may have set its NetworkHeader and TransportHeader. If we're
- // forwarding, we'll have to copy them into pkt.Header.
- pkt.Header = buffer.NewPrependable(int(n.linkEP.MaxHeaderLength()) + len(pkt.NetworkHeader) + len(pkt.TransportHeader))
- if n := copy(pkt.Header.Prepend(len(pkt.TransportHeader)), pkt.TransportHeader); n != len(pkt.TransportHeader) {
- panic(fmt.Sprintf("copied %d bytes, expected %d", n, len(pkt.TransportHeader)))
- }
- if n := copy(pkt.Header.Prepend(len(pkt.NetworkHeader)), pkt.NetworkHeader); n != len(pkt.NetworkHeader) {
- panic(fmt.Sprintf("copied %d bytes, expected %d", n, len(pkt.NetworkHeader)))
- }
+ // pkt may have set its header and may not have enough headroom for link-layer
+ // header for the other link to prepend. Here we create a new packet to
+ // forward.
+ fwdPkt := NewPacketBuffer(PacketBufferOptions{
+ ReserveHeaderBytes: int(n.linkEP.MaxHeaderLength()),
+ Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views()),
+ })
- // WritePacket takes ownership of pkt, calculate numBytes first.
- numBytes := pkt.Header.UsedLength() + pkt.Data.Size()
+ // WritePacket takes ownership of fwdPkt, calculate numBytes first.
+ numBytes := fwdPkt.Size()
- if err := n.linkEP.WritePacket(r, nil /* gso */, protocol, pkt); err != nil {
+ if err := n.linkEP.WritePacket(r, nil /* gso */, protocol, fwdPkt); err != nil {
r.Stats().IP.OutgoingPacketErrors.Increment()
return
}
@@ -1443,34 +1438,31 @@ func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolN
// validly formed.
n.stack.demux.deliverRawPacket(r, protocol, pkt)
- // TransportHeader is nil only when pkt is an ICMP packet or was reassembled
+ // TransportHeader is empty only when pkt is an ICMP packet or was reassembled
// from fragments.
- if pkt.TransportHeader == nil {
+ if pkt.TransportHeader().View().IsEmpty() {
// TODO(gvisor.dev/issue/170): ICMP packets don't have their TransportHeader
// fields set yet, parse it here. See icmp/protocol.go:protocol.Parse for a
// full explanation.
if protocol == header.ICMPv4ProtocolNumber || protocol == header.ICMPv6ProtocolNumber {
// ICMP packets may be longer, but until icmp.Parse is implemented, here
// we parse it using the minimum size.
- transHeader, ok := pkt.Data.PullUp(transProto.MinimumPacketSize())
- if !ok {
+ if _, ok := pkt.TransportHeader().Consume(transProto.MinimumPacketSize()); !ok {
n.stack.stats.MalformedRcvdPackets.Increment()
return
}
- pkt.TransportHeader = transHeader
- pkt.Data.TrimFront(len(pkt.TransportHeader))
} else {
// This is either a bad packet or was re-assembled from fragments.
transProto.Parse(pkt)
}
}
- if len(pkt.TransportHeader) < transProto.MinimumPacketSize() {
+ if pkt.TransportHeader().View().Size() < transProto.MinimumPacketSize() {
n.stack.stats.MalformedRcvdPackets.Increment()
return
}
- srcPort, dstPort, err := transProto.ParsePorts(pkt.TransportHeader)
+ srcPort, dstPort, err := transProto.ParsePorts(pkt.TransportHeader().View())
if err != nil {
n.stack.stats.MalformedRcvdPackets.Increment()
return
diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go
index 9e871f968..17b8beebb 100644
--- a/pkg/tcpip/stack/packet_buffer.go
+++ b/pkg/tcpip/stack/packet_buffer.go
@@ -14,16 +14,43 @@
package stack
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
)
+type headerType int
+
+const (
+ linkHeader headerType = iota
+ networkHeader
+ transportHeader
+ numHeaderType
+)
+
+// PacketBufferOptions specifies options for PacketBuffer creation.
+type PacketBufferOptions struct {
+ // ReserveHeaderBytes is the number of bytes to reserve for headers. Total
+ // number of bytes pushed onto the headers must not exceed this value.
+ ReserveHeaderBytes int
+
+ // Data is the initial unparsed data for the new packet. If set, it will be
+ // owned by the new packet.
+ Data buffer.VectorisedView
+}
+
// A PacketBuffer contains all the data of a network packet.
//
// As a PacketBuffer traverses up the stack, it may be necessary to pass it to
-// multiple endpoints. Clone() should be called in such cases so that
-// modifications to the Data field do not affect other copies.
+// multiple endpoints.
+//
+// The whole packet is expected to be a series of bytes in the following order:
+// LinkHeader, NetworkHeader, TransportHeader, and Data. Any of them can be
+// empty. Use of PacketBuffer in any other order is unsupported.
+//
+// PacketBuffer must be created with NewPacketBuffer.
type PacketBuffer struct {
_ sync.NoCopy
@@ -31,36 +58,27 @@ type PacketBuffer struct {
// PacketBuffers.
PacketBufferEntry
- // Data holds the payload of the packet. For inbound packets, it also
- // holds the headers, which are consumed as the packet moves up the
- // stack. Headers are guaranteed not to be split across views.
+ // Data holds the payload of the packet.
+ //
+ // For inbound packets, Data is initially the whole packet. Then gets moved to
+ // headers via PacketHeader.Consume, when the packet is being parsed.
//
- // The bytes backing Data are immutable, but Data itself may be trimmed
- // or otherwise modified.
+ // For outbound packets, Data is the innermost layer, defined by the protocol.
+ // Headers are pushed in front of it via PacketHeader.Push.
+ //
+ // The bytes backing Data are immutable, a.k.a. users shouldn't write to its
+ // backing storage.
Data buffer.VectorisedView
- // Header holds the headers of outbound packets. As a packet is passed
- // down the stack, each layer adds to Header. Note that forwarded
- // packets don't populate Headers on their way out -- their headers and
- // payload are never parsed out and remain in Data.
- //
- // TODO(gvisor.dev/issue/170): Forwarded packets don't currently
- // populate Header, but should. This will be doable once early parsing
- // (https://github.com/google/gvisor/pull/1995) is supported.
- Header buffer.Prependable
+ // headers stores metadata about each header.
+ headers [numHeaderType]headerInfo
- // These fields are used by both inbound and outbound packets. They
- // typically overlap with the Data and Header fields.
+ // header is the internal storage for outbound packets. Headers will be pushed
+ // (prepended) on this storage as the packet is being constructed.
//
- // The bytes backing these views are immutable. Each field may be nil
- // if either it has not been set yet or no such header exists (e.g.
- // packets sent via loopback may not have a link header).
- //
- // These fields may be Views into other slices (either Data or Header).
- // SR dosen't support this, so deep copies are necessary in some cases.
- LinkHeader buffer.View
- NetworkHeader buffer.View
- TransportHeader buffer.View
+ // TODO(gvisor.dev/issue/2404): Switch to an implementation that header and
+ // data are held in the same underlying buffer storage.
+ header buffer.Prependable
// NetworkProtocol is only valid when NetworkHeader is set.
// TODO(gvisor.dev/issue/3574): Remove the separately passed protocol
@@ -89,20 +107,137 @@ type PacketBuffer struct {
PktType tcpip.PacketType
}
-// Clone makes a copy of pk. It clones the Data field, which creates a new
-// VectorisedView but does not deep copy the underlying bytes.
-//
-// Clone also does not deep copy any of its other fields.
+// NewPacketBuffer creates a new PacketBuffer with opts.
+func NewPacketBuffer(opts PacketBufferOptions) *PacketBuffer {
+ pk := &PacketBuffer{
+ Data: opts.Data,
+ }
+ if opts.ReserveHeaderBytes != 0 {
+ pk.header = buffer.NewPrependable(opts.ReserveHeaderBytes)
+ }
+ return pk
+}
+
+// ReservedHeaderBytes returns the number of bytes initially reserved for
+// headers.
+func (pk *PacketBuffer) ReservedHeaderBytes() int {
+ return pk.header.UsedLength() + pk.header.AvailableLength()
+}
+
+// AvailableHeaderBytes returns the number of bytes currently available for
+// headers. This is relevant to PacketHeader.Push method only.
+func (pk *PacketBuffer) AvailableHeaderBytes() int {
+ return pk.header.AvailableLength()
+}
+
+// LinkHeader returns the handle to link-layer header.
+func (pk *PacketBuffer) LinkHeader() PacketHeader {
+ return PacketHeader{
+ pk: pk,
+ typ: linkHeader,
+ }
+}
+
+// NetworkHeader returns the handle to network-layer header.
+func (pk *PacketBuffer) NetworkHeader() PacketHeader {
+ return PacketHeader{
+ pk: pk,
+ typ: networkHeader,
+ }
+}
+
+// TransportHeader returns the handle to transport-layer header.
+func (pk *PacketBuffer) TransportHeader() PacketHeader {
+ return PacketHeader{
+ pk: pk,
+ typ: transportHeader,
+ }
+}
+
+// HeaderSize returns the total size of all headers in bytes.
+func (pk *PacketBuffer) HeaderSize() int {
+ // Note for inbound packets (Consume called), headers are not stored in
+ // pk.header. Thus, calculation of size of each header is needed.
+ var size int
+ for i := range pk.headers {
+ size += len(pk.headers[i].buf)
+ }
+ return size
+}
+
+// Size returns the size of packet in bytes.
+func (pk *PacketBuffer) Size() int {
+ return pk.HeaderSize() + pk.Data.Size()
+}
+
+// Views returns the underlying storage of the whole packet.
+func (pk *PacketBuffer) Views() []buffer.View {
+ // Optimization for outbound packets that headers are in pk.header.
+ useHeader := true
+ for i := range pk.headers {
+ if !canUseHeader(&pk.headers[i]) {
+ useHeader = false
+ break
+ }
+ }
+
+ dataViews := pk.Data.Views()
+
+ var vs []buffer.View
+ if useHeader {
+ vs = make([]buffer.View, 0, 1+len(dataViews))
+ vs = append(vs, pk.header.View())
+ } else {
+ vs = make([]buffer.View, 0, len(pk.headers)+len(dataViews))
+ for i := range pk.headers {
+ if v := pk.headers[i].buf; len(v) > 0 {
+ vs = append(vs, v)
+ }
+ }
+ }
+ return append(vs, dataViews...)
+}
+
+func canUseHeader(h *headerInfo) bool {
+ // h.offset will be negative if the header was pushed in to prependable
+ // portion, or doesn't matter when it's empty.
+ return len(h.buf) == 0 || h.offset < 0
+}
+
+func (pk *PacketBuffer) push(typ headerType, size int) buffer.View {
+ h := &pk.headers[typ]
+ if h.buf != nil {
+ panic(fmt.Sprintf("push must not be called twice: type %s", typ))
+ }
+ h.buf = buffer.View(pk.header.Prepend(size))
+ h.offset = -pk.header.UsedLength()
+ return h.buf
+}
+
+func (pk *PacketBuffer) consume(typ headerType, size int) (v buffer.View, consumed bool) {
+ h := &pk.headers[typ]
+ if h.buf != nil {
+ panic(fmt.Sprintf("consume must not be called twice: type %s", typ))
+ }
+ v, ok := pk.Data.PullUp(size)
+ if !ok {
+ return
+ }
+ pk.Data.TrimFront(size)
+ h.buf = v
+ return h.buf, true
+}
+
+// Clone makes a shallow copy of pk.
//
-// FIXME(b/153685824): Data gets copied but not other header references.
+// Clone should be called in such cases so that no modifications is done to
+// underlying packet payload.
func (pk *PacketBuffer) Clone() *PacketBuffer {
- return &PacketBuffer{
+ newPk := &PacketBuffer{
PacketBufferEntry: pk.PacketBufferEntry,
Data: pk.Data.Clone(nil),
- Header: pk.Header,
- LinkHeader: pk.LinkHeader,
- NetworkHeader: pk.NetworkHeader,
- TransportHeader: pk.TransportHeader,
+ headers: pk.headers,
+ header: pk.header,
Hash: pk.Hash,
Owner: pk.Owner,
EgressRoute: pk.EgressRoute,
@@ -110,4 +245,55 @@ func (pk *PacketBuffer) Clone() *PacketBuffer {
NetworkProtocolNumber: pk.NetworkProtocolNumber,
NatDone: pk.NatDone,
}
+ return newPk
+}
+
+// headerInfo stores metadata about a header in a packet.
+type headerInfo struct {
+ // buf is the memorized slice for both prepended and consumed header.
+ // When header is prepended, buf serves as memorized value, which is a slice
+ // of pk.header. When header is consumed, buf is the slice pulled out from
+ // pk.Data, which is the only place to hold this header.
+ buf buffer.View
+
+ // offset will be a negative number denoting the offset where this header is
+ // from the end of pk.header, if it is prepended. Otherwise, zero.
+ offset int
+}
+
+// PacketHeader is a handle object to a header in the underlying packet.
+type PacketHeader struct {
+ pk *PacketBuffer
+ typ headerType
+}
+
+// View returns the underlying storage of h.
+func (h PacketHeader) View() buffer.View {
+ return h.pk.headers[h.typ].buf
+}
+
+// Push pushes size bytes in the front of its residing packet, and returns the
+// backing storage. Callers may only call one of Push or Consume once on each
+// header in the lifetime of the underlying packet.
+func (h PacketHeader) Push(size int) buffer.View {
+ return h.pk.push(h.typ, size)
+}
+
+// Consume moves the first size bytes of the unparsed data portion in the packet
+// to h, and returns the backing storage. In the case of data is shorter than
+// size, consumed will be false, and the state of h will not be affected.
+// Callers may only call one of Push or Consume once on each header in the
+// lifetime of the underlying packet.
+func (h PacketHeader) Consume(size int) (v buffer.View, consumed bool) {
+ return h.pk.consume(h.typ, size)
+}
+
+// PayloadSince returns packet payload starting from and including a particular
+// header. This method isn't optimized and should be used in test only.
+func PayloadSince(h PacketHeader) buffer.View {
+ var v buffer.View
+ for _, hinfo := range h.pk.headers[h.typ:] {
+ v = append(v, hinfo.buf...)
+ }
+ return append(v, h.pk.Data.ToView()...)
}
diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go
index 9ce0a2c22..e267bebb0 100644
--- a/pkg/tcpip/stack/route.go
+++ b/pkg/tcpip/stack/route.go
@@ -173,7 +173,7 @@ func (r *Route) WritePacket(gso *GSO, params NetworkHeaderParams, pkt *PacketBuf
}
// WritePacket takes ownership of pkt, calculate numBytes first.
- numBytes := pkt.Header.UsedLength() + pkt.Data.Size()
+ numBytes := pkt.Size()
err := r.ref.ep.WritePacket(r, gso, params, pkt)
if err != nil {
@@ -203,8 +203,7 @@ func (r *Route) WritePackets(gso *GSO, pkts PacketBufferList, params NetworkHead
writtenBytes := 0
for i, pb := 0, pkts.Front(); i < n && pb != nil; i, pb = i+1, pb.Next() {
- writtenBytes += pb.Header.UsedLength()
- writtenBytes += pb.Data.Size()
+ writtenBytes += pb.Size()
}
r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(writtenBytes))
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index 4612be4e7..bd6f49eb8 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -430,9 +430,12 @@ func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpi
return tcpip.ErrInvalidEndpointState
}
- hdr := buffer.NewPrependable(header.ICMPv4MinimumSize + int(r.MaxHeaderLength()))
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: header.ICMPv4MinimumSize + int(r.MaxHeaderLength()),
+ })
+ pkt.Owner = owner
- icmpv4 := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize))
+ icmpv4 := header.ICMPv4(pkt.TransportHeader().Push(header.ICMPv4MinimumSize))
copy(icmpv4, data)
// Set the ident to the user-specified port. Sequence number should
// already be set by the user.
@@ -447,15 +450,12 @@ func send4(r *stack.Route, ident uint16, data buffer.View, ttl uint8, owner tcpi
icmpv4.SetChecksum(0)
icmpv4.SetChecksum(^header.Checksum(icmpv4, header.Checksum(data, 0)))
+ pkt.Data = data.ToVectorisedView()
+
if ttl == 0 {
ttl = r.DefaultTTL()
}
- return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: ttl, TOS: stack.DefaultTOS}, &stack.PacketBuffer{
- Header: hdr,
- Data: data.ToVectorisedView(),
- TransportHeader: buffer.View(icmpv4),
- Owner: owner,
- })
+ return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: ttl, TOS: stack.DefaultTOS}, pkt)
}
func send6(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Error {
@@ -463,9 +463,11 @@ func send6(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Err
return tcpip.ErrInvalidEndpointState
}
- hdr := buffer.NewPrependable(header.ICMPv6MinimumSize + int(r.MaxHeaderLength()))
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: header.ICMPv6MinimumSize + int(r.MaxHeaderLength()),
+ })
- icmpv6 := header.ICMPv6(hdr.Prepend(header.ICMPv6MinimumSize))
+ icmpv6 := header.ICMPv6(pkt.TransportHeader().Push(header.ICMPv6MinimumSize))
copy(icmpv6, data)
// Set the ident. Sequence number is provided by the user.
icmpv6.SetIdent(ident)
@@ -477,15 +479,12 @@ func send6(r *stack.Route, ident uint16, data buffer.View, ttl uint8) *tcpip.Err
dataVV := data.ToVectorisedView()
icmpv6.SetChecksum(header.ICMPv6Checksum(icmpv6, r.LocalAddress, r.RemoteAddress, dataVV))
+ pkt.Data = dataVV
if ttl == 0 {
ttl = r.DefaultTTL()
}
- return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: ttl, TOS: stack.DefaultTOS}, &stack.PacketBuffer{
- Header: hdr,
- Data: dataVV,
- TransportHeader: buffer.View(icmpv6),
- })
+ return r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: ttl, TOS: stack.DefaultTOS}, pkt)
}
// checkV4MappedLocked determines the effective network protocol and converts
@@ -748,14 +747,18 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
// Only accept echo replies.
switch e.NetProto {
case header.IPv4ProtocolNumber:
- h := header.ICMPv4(pkt.TransportHeader)
+ h := header.ICMPv4(pkt.TransportHeader().View())
+ // TODO(b/129292233): Determine if len(h) check is still needed after early
+ // parsing.
if len(h) < header.ICMPv4MinimumSize || h.Type() != header.ICMPv4EchoReply {
e.stack.Stats().DroppedPackets.Increment()
e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
return
}
case header.IPv6ProtocolNumber:
- h := header.ICMPv6(pkt.TransportHeader)
+ h := header.ICMPv6(pkt.TransportHeader().View())
+ // TODO(b/129292233): Determine if len(h) check is still needed after early
+ // parsing.
if len(h) < header.ICMPv6MinimumSize || h.Type() != header.ICMPv6EchoReply {
e.stack.Stats().DroppedPackets.Increment()
e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
@@ -791,7 +794,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
}
// ICMP socket's data includes ICMP header.
- packet.data = pkt.TransportHeader.ToVectorisedView()
+ packet.data = pkt.TransportHeader().View().ToVectorisedView()
packet.data.Append(pkt.Data)
e.rcvList.PushBack(packet)
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index df478115d..1b03ad6bb 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -433,9 +433,9 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress,
// Push new packet into receive list and increment the buffer size.
var packet packet
// TODO(gvisor.dev/issue/173): Return network protocol.
- if len(pkt.LinkHeader) > 0 {
+ if !pkt.LinkHeader().View().IsEmpty() {
// Get info directly from the ethernet header.
- hdr := header.Ethernet(pkt.LinkHeader)
+ hdr := header.Ethernet(pkt.LinkHeader().View())
packet.senderAddr = tcpip.FullAddress{
NIC: nicID,
Addr: tcpip.Address(hdr.SourceAddress()),
@@ -458,9 +458,14 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress,
case tcpip.PacketHost:
packet.data = pkt.Data
case tcpip.PacketOutgoing:
- // Strip Link Header from the Header.
- pkt.Header = buffer.NewPrependableFromView(pkt.Header.View()[len(pkt.LinkHeader):])
- combinedVV := pkt.Header.View().ToVectorisedView()
+ // Strip Link Header.
+ var combinedVV buffer.VectorisedView
+ if v := pkt.NetworkHeader().View(); !v.IsEmpty() {
+ combinedVV.AppendView(v)
+ }
+ if v := pkt.TransportHeader().View(); !v.IsEmpty() {
+ combinedVV.AppendView(v)
+ }
combinedVV.Append(pkt.Data)
packet.data = combinedVV
default:
@@ -471,9 +476,8 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress,
// Raw packets need their ethernet headers prepended before
// queueing.
var linkHeader buffer.View
- var combinedVV buffer.VectorisedView
if pkt.PktType != tcpip.PacketOutgoing {
- if len(pkt.LinkHeader) == 0 {
+ if pkt.LinkHeader().View().IsEmpty() {
// We weren't provided with an actual ethernet header,
// so fake one.
ethFields := header.EthernetFields{
@@ -485,19 +489,14 @@ func (ep *endpoint) HandlePacket(nicID tcpip.NICID, localAddr tcpip.LinkAddress,
fakeHeader.Encode(&ethFields)
linkHeader = buffer.View(fakeHeader)
} else {
- linkHeader = append(buffer.View(nil), pkt.LinkHeader...)
+ linkHeader = append(buffer.View(nil), pkt.LinkHeader().View()...)
}
- combinedVV = linkHeader.ToVectorisedView()
- }
- if pkt.PktType == tcpip.PacketOutgoing {
- // For outgoing packets the Link, Network and Transport
- // headers are in the pkt.Header fields normally unless
- // a Raw socket is in use in which case pkt.Header could
- // be nil.
- combinedVV.AppendView(pkt.Header.View())
+ combinedVV := linkHeader.ToVectorisedView()
+ combinedVV.Append(pkt.Data)
+ packet.data = combinedVV
+ } else {
+ packet.data = buffer.NewVectorisedView(pkt.Size(), pkt.Views())
}
- combinedVV.Append(pkt.Data)
- packet.data = combinedVV
}
packet.timestampNS = ep.stack.Clock().NowNanoseconds()
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index f85a68554..edc2b5b61 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -352,18 +352,23 @@ func (e *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (int64,
}
if e.hdrIncluded {
- if err := route.WriteHeaderIncludedPacket(&stack.PacketBuffer{
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
Data: buffer.View(payloadBytes).ToVectorisedView(),
- }); err != nil {
+ })
+ if err := route.WriteHeaderIncludedPacket(pkt); err != nil {
return 0, nil, err
}
} else {
- hdr := buffer.NewPrependable(len(payloadBytes) + int(route.MaxHeaderLength()))
- if err := route.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: e.TransProto, TTL: route.DefaultTTL(), TOS: stack.DefaultTOS}, &stack.PacketBuffer{
- Header: hdr,
- Data: buffer.View(payloadBytes).ToVectorisedView(),
- Owner: e.owner,
- }); err != nil {
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: int(route.MaxHeaderLength()),
+ Data: buffer.View(payloadBytes).ToVectorisedView(),
+ })
+ pkt.Owner = e.owner
+ if err := route.WritePacket(nil /* gso */, stack.NetworkHeaderParams{
+ Protocol: e.TransProto,
+ TTL: route.DefaultTTL(),
+ TOS: stack.DefaultTOS,
+ }, pkt); err != nil {
return 0, nil, err
}
}
@@ -691,12 +696,13 @@ func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
// slice. Save/restore doesn't support overlapping slices and will fail.
var combinedVV buffer.VectorisedView
if e.TransportEndpointInfo.NetProto == header.IPv4ProtocolNumber {
- headers := make(buffer.View, 0, len(pkt.NetworkHeader)+len(pkt.TransportHeader))
- headers = append(headers, pkt.NetworkHeader...)
- headers = append(headers, pkt.TransportHeader...)
+ network, transport := pkt.NetworkHeader().View(), pkt.TransportHeader().View()
+ headers := make(buffer.View, 0, len(network)+len(transport))
+ headers = append(headers, network...)
+ headers = append(headers, transport...)
combinedVV = headers.ToVectorisedView()
} else {
- combinedVV = append(buffer.View(nil), pkt.TransportHeader...).ToVectorisedView()
+ combinedVV = append(buffer.View(nil), pkt.TransportHeader().View()...).ToVectorisedView()
}
combinedVV.Append(pkt.Data)
packet.data = combinedVV
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 46702906b..290172ac9 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -746,11 +746,7 @@ func (e *endpoint) sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedV
func buildTCPHdr(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso *stack.GSO) {
optLen := len(tf.opts)
- hdr := &pkt.Header
- packetSize := pkt.Data.Size()
- // Initialize the header.
- tcp := header.TCP(hdr.Prepend(header.TCPMinimumSize + optLen))
- pkt.TransportHeader = buffer.View(tcp)
+ tcp := header.TCP(pkt.TransportHeader().Push(header.TCPMinimumSize + optLen))
tcp.Encode(&header.TCPFields{
SrcPort: tf.id.LocalPort,
DstPort: tf.id.RemotePort,
@@ -762,8 +758,7 @@ func buildTCPHdr(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso *sta
})
copy(tcp[header.TCPMinimumSize:], tf.opts)
- length := uint16(hdr.UsedLength() + packetSize)
- xsum := r.PseudoHeaderChecksum(ProtocolNumber, length)
+ xsum := r.PseudoHeaderChecksum(ProtocolNumber, uint16(pkt.Size()))
// Only calculate the checksum if offloading isn't supported.
if gso != nil && gso.NeedsCsum {
// This is called CHECKSUM_PARTIAL in the Linux kernel. We
@@ -801,17 +796,18 @@ func sendTCPBatch(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso
packetSize = size
}
size -= packetSize
- var pkt stack.PacketBuffer
- pkt.Header = buffer.NewPrependable(hdrSize)
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: hdrSize,
+ })
pkt.Hash = tf.txHash
pkt.Owner = owner
pkt.EgressRoute = r
pkt.GSOOptions = gso
pkt.NetworkProtocolNumber = r.NetworkProtocolNumber()
data.ReadToVV(&pkt.Data, packetSize)
- buildTCPHdr(r, tf, &pkt, gso)
+ buildTCPHdr(r, tf, pkt, gso)
tf.seq = tf.seq.Add(seqnum.Size(packetSize))
- pkts.PushBack(&pkt)
+ pkts.PushBack(pkt)
}
if tf.ttl == 0 {
@@ -837,12 +833,12 @@ func sendTCP(r *stack.Route, tf tcpFields, data buffer.VectorisedView, gso *stac
return sendTCPBatch(r, tf, data, gso, owner)
}
- pkt := &stack.PacketBuffer{
- Header: buffer.NewPrependable(header.TCPMinimumSize + int(r.MaxHeaderLength()) + optLen),
- Data: data,
- Hash: tf.txHash,
- Owner: owner,
- }
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: header.TCPMinimumSize + int(r.MaxHeaderLength()) + optLen,
+ Data: data,
+ })
+ pkt.Hash = tf.txHash
+ pkt.Owner = owner
buildTCPHdr(r, tf, pkt, gso)
if tf.ttl == 0 {
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 49a673b42..c5afa2680 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -21,7 +21,6 @@
package tcp
import (
- "fmt"
"runtime"
"strings"
"time"
@@ -547,22 +546,22 @@ func (p *protocol) SynRcvdCounter() *synRcvdCounter {
// Parse implements stack.TransportProtocol.Parse.
func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
- hdr, ok := pkt.Data.PullUp(header.TCPMinimumSize)
+ // TCP header is variable length, peek at it first.
+ hdrLen := header.TCPMinimumSize
+ hdr, ok := pkt.Data.PullUp(hdrLen)
if !ok {
return false
}
// If the header has options, pull those up as well.
if offset := int(header.TCP(hdr).DataOffset()); offset > header.TCPMinimumSize && offset <= pkt.Data.Size() {
- hdr, ok = pkt.Data.PullUp(offset)
- if !ok {
- panic(fmt.Sprintf("There should be at least %d bytes in pkt.Data.", offset))
- }
+ // TODO(gvisor.dev/issue/2404): Figure out whether to reject this kind of
+ // packets.
+ hdrLen = offset
}
- pkt.TransportHeader = hdr
- pkt.Data.TrimFront(len(hdr))
- return true
+ _, ok = pkt.TransportHeader().Consume(hdrLen)
+ return ok
}
// NewProtocol returns a TCP transport protocol.
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index bb60dc29d..94307d31a 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -68,7 +68,7 @@ func newSegment(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketB
route: r.Clone(),
}
s.data = pkt.Data.Clone(s.views[:])
- s.hdr = header.TCP(pkt.TransportHeader)
+ s.hdr = header.TCP(pkt.TransportHeader().View())
s.rcvdTime = time.Now()
return s
}
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 4a2b6c03a..73608783c 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -986,13 +986,16 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
// sendUDP sends a UDP segment via the provided network endpoint and under the
// provided identity.
func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort uint16, ttl uint8, useDefaultTTL bool, tos uint8, owner tcpip.PacketOwner, noChecksum bool) *tcpip.Error {
- // Allocate a buffer for the UDP header.
- hdr := buffer.NewPrependable(header.UDPMinimumSize + int(r.MaxHeaderLength()))
+ pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: header.UDPMinimumSize + int(r.MaxHeaderLength()),
+ Data: data,
+ })
+ pkt.Owner = owner
- // Initialize the header.
- udp := header.UDP(hdr.Prepend(header.UDPMinimumSize))
+ // Initialize the UDP header.
+ udp := header.UDP(pkt.TransportHeader().Push(header.UDPMinimumSize))
- length := uint16(hdr.UsedLength() + data.Size())
+ length := uint16(pkt.Size())
udp.Encode(&header.UDPFields{
SrcPort: localPort,
DstPort: remotePort,
@@ -1019,12 +1022,7 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u
Protocol: ProtocolNumber,
TTL: ttl,
TOS: tos,
- }, &stack.PacketBuffer{
- Header: hdr,
- Data: data,
- TransportHeader: buffer.View(udp),
- Owner: owner,
- }); err != nil {
+ }, pkt); err != nil {
r.Stats().UDP.PacketSendErrors.Increment()
return err
}
@@ -1372,7 +1370,7 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
// endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
// Get the header then trim it from the view.
- hdr := header.UDP(pkt.TransportHeader)
+ hdr := header.UDP(pkt.TransportHeader().View())
if int(hdr.Length()) > pkt.Data.Size()+header.UDPMinimumSize {
// Malformed packet.
e.stack.Stats().UDP.MalformedPacketsReceived.Increment()
@@ -1443,9 +1441,9 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
// Save any useful information from the network header to the packet.
switch r.NetProto {
case header.IPv4ProtocolNumber:
- packet.tos, _ = header.IPv4(pkt.NetworkHeader).TOS()
+ packet.tos, _ = header.IPv4(pkt.NetworkHeader().View()).TOS()
case header.IPv6ProtocolNumber:
- packet.tos, _ = header.IPv6(pkt.NetworkHeader).TOS()
+ packet.tos, _ = header.IPv6(pkt.NetworkHeader().View()).TOS()
}
// TODO(gvisor.dev/issue/3556): r.LocalAddress may be a multicast or broadcast
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index 0e7464e3a..63d4bed7c 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -82,7 +82,7 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) {
// HandleUnknownDestinationPacket handles packets targeted at this protocol but
// that don't match any existing endpoint.
func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
- hdr := header.UDP(pkt.TransportHeader)
+ hdr := header.UDP(pkt.TransportHeader().View())
if int(hdr.Length()) > pkt.Data.Size()+header.UDPMinimumSize {
// Malformed packet.
r.Stack().Stats().UDP.MalformedPacketsReceived.Increment()
@@ -130,7 +130,7 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
}
headerLen := int(r.MaxHeaderLength()) + header.ICMPv4MinimumSize
available := int(mtu) - headerLen
- payloadLen := len(pkt.NetworkHeader) + len(pkt.TransportHeader) + pkt.Data.Size()
+ payloadLen := pkt.NetworkHeader().View().Size() + pkt.TransportHeader().View().Size() + pkt.Data.Size()
if payloadLen > available {
payloadLen = available
}
@@ -139,22 +139,21 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
// For example, a raw or packet socket may use what UDP
// considers an unreachable destination. Thus we deep copy pkt
// to prevent multiple ownership and SR errors.
- newHeader := append(buffer.View(nil), pkt.NetworkHeader...)
- newHeader = append(newHeader, pkt.TransportHeader...)
+ newHeader := append(buffer.View(nil), pkt.NetworkHeader().View()...)
+ newHeader = append(newHeader, pkt.TransportHeader().View()...)
payload := newHeader.ToVectorisedView()
payload.AppendView(pkt.Data.ToView())
payload.CapLength(payloadLen)
- hdr := buffer.NewPrependable(headerLen)
- pkt := header.ICMPv4(hdr.Prepend(header.ICMPv4MinimumSize))
- pkt.SetType(header.ICMPv4DstUnreachable)
- pkt.SetCode(header.ICMPv4PortUnreachable)
- pkt.SetChecksum(header.ICMPv4Checksum(pkt, payload))
- r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, &stack.PacketBuffer{
- Header: hdr,
- TransportHeader: buffer.View(pkt),
- Data: payload,
+ icmpPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: headerLen,
+ Data: payload,
})
+ icmpHdr := header.ICMPv4(icmpPkt.TransportHeader().Push(header.ICMPv4MinimumSize))
+ icmpHdr.SetType(header.ICMPv4DstUnreachable)
+ icmpHdr.SetCode(header.ICMPv4PortUnreachable)
+ icmpHdr.SetChecksum(header.ICMPv4Checksum(icmpHdr, icmpPkt.Data))
+ r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv4ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, icmpPkt)
case header.IPv6AddressSize:
if !r.Stack().AllowICMPMessage() {
@@ -175,24 +174,24 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
}
headerLen := int(r.MaxHeaderLength()) + header.ICMPv6DstUnreachableMinimumSize
available := int(mtu) - headerLen
- payloadLen := len(pkt.NetworkHeader) + len(pkt.TransportHeader) + pkt.Data.Size()
+ network, transport := pkt.NetworkHeader().View(), pkt.TransportHeader().View()
+ payloadLen := len(network) + len(transport) + pkt.Data.Size()
if payloadLen > available {
payloadLen = available
}
- payload := buffer.NewVectorisedView(len(pkt.NetworkHeader)+len(pkt.TransportHeader), []buffer.View{pkt.NetworkHeader, pkt.TransportHeader})
+ payload := buffer.NewVectorisedView(len(network)+len(transport), []buffer.View{network, transport})
payload.Append(pkt.Data)
payload.CapLength(payloadLen)
- hdr := buffer.NewPrependable(headerLen)
- pkt := header.ICMPv6(hdr.Prepend(header.ICMPv6DstUnreachableMinimumSize))
- pkt.SetType(header.ICMPv6DstUnreachable)
- pkt.SetCode(header.ICMPv6PortUnreachable)
- pkt.SetChecksum(header.ICMPv6Checksum(pkt, r.LocalAddress, r.RemoteAddress, payload))
- r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, &stack.PacketBuffer{
- Header: hdr,
- TransportHeader: buffer.View(pkt),
- Data: payload,
+ icmpPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ ReserveHeaderBytes: headerLen,
+ Data: payload,
})
+ icmpHdr := header.ICMPv6(icmpPkt.TransportHeader().Push(header.ICMPv6DstUnreachableMinimumSize))
+ icmpHdr.SetType(header.ICMPv6DstUnreachable)
+ icmpHdr.SetCode(header.ICMPv6PortUnreachable)
+ icmpHdr.SetChecksum(header.ICMPv6Checksum(icmpHdr, r.LocalAddress, r.RemoteAddress, icmpPkt.Data))
+ r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, icmpPkt)
}
return true
}
@@ -215,14 +214,8 @@ func (*protocol) Wait() {}
// Parse implements stack.TransportProtocol.Parse.
func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
- h, ok := pkt.Data.PullUp(header.UDPMinimumSize)
- if !ok {
- // Packet is too small
- return false
- }
- pkt.TransportHeader = h
- pkt.Data.TrimFront(header.UDPMinimumSize)
- return true
+ _, ok := pkt.TransportHeader().Consume(header.UDPMinimumSize)
+ return ok
}
// NewProtocol returns a UDP transport protocol.