summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/sentry/socket/socket.go1
-rw-r--r--pkg/tcpip/link/loopback/loopback.go30
-rw-r--r--pkg/tcpip/stack/nic.go54
-rw-r--r--pkg/tcpip/stack/stack.go66
-rw-r--r--pkg/tcpip/transport/icmp/endpoint.go1
-rw-r--r--pkg/tcpip/transport/packet/endpoint.go48
-rw-r--r--pkg/tcpip/transport/packet/endpoint_state.go6
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go4
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go1
-rw-r--r--pkg/tcpip/transport/udp/endpoint_state.go6
10 files changed, 162 insertions, 55 deletions
diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go
index 658e90bb9..83b9d9389 100644
--- a/pkg/sentry/socket/socket.go
+++ b/pkg/sentry/socket/socket.go
@@ -750,6 +750,7 @@ func AddressAndFamily(addr []byte) (tcpip.FullAddress, uint16, *syserr.Error) {
return tcpip.FullAddress{
NIC: tcpip.NICID(a.InterfaceIndex),
Addr: tcpip.Address(a.HardwareAddr[:header.EthernetAddressSize]),
+ Port: Ntohs(a.Protocol),
}, family, nil
case linux.AF_UNSPEC:
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index d7bbfa639..ca1f9c08d 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -76,19 +76,8 @@ func (*endpoint) Wait() {}
// WritePacket implements stack.LinkEndpoint.WritePacket. It delivers outbound
// packets to the network-layer dispatcher.
-func (e *endpoint) WritePacket(_ stack.RouteInfo, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error {
- // Construct data as the unparsed portion for the loopback packet.
- data := buffer.NewVectorisedView(pkt.Size(), pkt.Views())
-
- // Because we're immediately turning around and writing the packet back
- // to the rx path, we intentionally don't preserve the remote and local
- // link addresses from the stack.Route we're passed.
- newPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
- Data: data,
- })
- e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, protocol, newPkt)
-
- return nil
+func (e *endpoint) WritePacket(_ stack.RouteInfo, _ tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) tcpip.Error {
+ return e.WriteRawPacket(pkt)
}
// WritePackets implements stack.LinkEndpoint.WritePackets.
@@ -105,4 +94,17 @@ func (e *endpoint) AddHeader(local, remote tcpip.LinkAddress, protocol tcpip.Net
}
// WriteRawPacket implements stack.LinkEndpoint.
-func (*endpoint) WriteRawPacket(*stack.PacketBuffer) tcpip.Error { return &tcpip.ErrNotSupported{} }
+func (e *endpoint) WriteRawPacket(pkt *stack.PacketBuffer) tcpip.Error {
+ // Construct data as the unparsed portion for the loopback packet.
+ data := buffer.NewVectorisedView(pkt.Size(), pkt.Views())
+
+ // Because we're immediately turning around and writing the packet back
+ // to the rx path, we intentionally don't preserve the remote and local
+ // link addresses from the stack.Route we're passed.
+ newPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
+ Data: data,
+ })
+ e.dispatcher.DeliverNetworkPacket("" /* remote */, "" /* local */, pkt.NetworkProtocolNumber, newPkt)
+
+ return nil
+}
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index b854d868c..ddc1ddab6 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -734,10 +734,29 @@ func (n *nic) DeliverNetworkPacket(remote, local tcpip.LinkAddress, protocol tcp
n.mu.RUnlock()
// Deliver to interested packet endpoints without holding NIC lock.
+ var packetEPPkt *PacketBuffer
deliverPacketEPs := func(ep PacketEndpoint) {
- p := pkt.Clone()
- p.PktType = tcpip.PacketHost
- ep.HandlePacket(n.id, local, protocol, p)
+ if packetEPPkt == nil {
+ // Packet endpoints hold the full packet.
+ //
+ // We perform a deep copy because higher-level endpoints may point to
+ // the middle of a view that is held by a packet endpoint. Save/Restore
+ // does not support overlapping slices and will panic in this case.
+ //
+ // TODO(https://gvisor.dev/issue/6517): Avoid this copy once S/R supports
+ // overlapping slices (e.g. by passing a shallow copy of pkt to the packet
+ // endpoint).
+ packetEPPkt = NewPacketBuffer(PacketBufferOptions{
+ Data: PayloadSince(pkt.LinkHeader()).ToVectorisedView(),
+ })
+ // If a link header was populated in the original packet buffer, then
+ // populate it in the packet buffer we provide to packet endpoints as
+ // packet endpoints inspect link headers.
+ packetEPPkt.LinkHeader().Consume(pkt.LinkHeader().View().Size())
+ packetEPPkt.PktType = tcpip.PacketHost
+ }
+
+ ep.HandlePacket(n.id, local, protocol, packetEPPkt.Clone())
}
if protoEPs != nil {
protoEPs.forEach(deliverPacketEPs)
@@ -758,13 +777,30 @@ func (n *nic) DeliverOutboundPacket(remote, local tcpip.LinkAddress, protocol tc
eps := n.mu.packetEPs[header.EthernetProtocolAll]
n.mu.RUnlock()
+ var packetEPPkt *PacketBuffer
eps.forEach(func(ep PacketEndpoint) {
- p := pkt.Clone()
- p.PktType = tcpip.PacketOutgoing
- // Add the link layer header as outgoing packets are intercepted
- // before the link layer header is created.
- n.LinkEndpoint.AddHeader(local, remote, protocol, p)
- ep.HandlePacket(n.id, local, protocol, p)
+ if packetEPPkt == nil {
+ // Packet endpoints hold the full packet.
+ //
+ // We perform a deep copy because higher-level endpoints may point to
+ // the middle of a view that is held by a packet endpoint. Save/Restore
+ // does not support overlapping slices and will panic in this case.
+ //
+ // TODO(https://gvisor.dev/issue/6517): Avoid this copy once S/R supports
+ // overlapping slices (e.g. by passing a shallow copy of pkt to the packet
+ // endpoint).
+ packetEPPkt = NewPacketBuffer(PacketBufferOptions{
+ ReserveHeaderBytes: pkt.AvailableHeaderBytes(),
+ Data: PayloadSince(pkt.NetworkHeader()).ToVectorisedView(),
+ })
+ // Add the link layer header as outgoing packets are intercepted before
+ // the link layer header is created and packet endpoints are interested
+ // in the link header.
+ n.LinkEndpoint.AddHeader(local, remote, protocol, packetEPPkt)
+ packetEPPkt.PktType = tcpip.PacketOutgoing
+ }
+
+ ep.HandlePacket(n.id, local, protocol, packetEPPkt.Clone())
})
}
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index cfa8a2e8f..cb741e540 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -72,7 +72,8 @@ type Stack struct {
// rawFactory creates raw endpoints. If nil, raw endpoints are
// disabled. It is set during Stack creation and is immutable.
- rawFactory RawFactory
+ rawFactory RawFactory
+ packetEndpointWriteSupported bool
demux *transportDemuxer
@@ -218,6 +219,10 @@ type Options struct {
// this is non-nil.
RawFactory RawFactory
+ // AllowPacketEndpointWrite determines if packet endpoints support write
+ // operations.
+ AllowPacketEndpointWrite bool
+
// RandSource is an optional source to use to generate random
// numbers. If omitted it defaults to a Source seeded by the data
// returned by the stack secure RNG.
@@ -359,23 +364,24 @@ func New(opts Options) *Stack {
opts.NUDConfigs.resetInvalidFields()
s := &Stack{
- transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
- networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
- nics: make(map[tcpip.NICID]*nic),
- defaultForwardingEnabled: make(map[tcpip.NetworkProtocolNumber]struct{}),
- cleanupEndpoints: make(map[TransportEndpoint]struct{}),
- PortManager: ports.NewPortManager(),
- clock: clock,
- stats: opts.Stats.FillIn(),
- handleLocal: opts.HandleLocal,
- tables: opts.IPTables,
- icmpRateLimiter: NewICMPRateLimiter(),
- seed: seed,
- nudConfigs: opts.NUDConfigs,
- uniqueIDGenerator: opts.UniqueID,
- nudDisp: opts.NUDDisp,
- randomGenerator: randomGenerator,
- secureRNG: opts.SecureRNG,
+ transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
+ networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
+ nics: make(map[tcpip.NICID]*nic),
+ packetEndpointWriteSupported: opts.AllowPacketEndpointWrite,
+ defaultForwardingEnabled: make(map[tcpip.NetworkProtocolNumber]struct{}),
+ cleanupEndpoints: make(map[TransportEndpoint]struct{}),
+ PortManager: ports.NewPortManager(),
+ clock: clock,
+ stats: opts.Stats.FillIn(),
+ handleLocal: opts.HandleLocal,
+ tables: opts.IPTables,
+ icmpRateLimiter: NewICMPRateLimiter(),
+ seed: seed,
+ nudConfigs: opts.NUDConfigs,
+ uniqueIDGenerator: opts.UniqueID,
+ nudDisp: opts.NUDDisp,
+ randomGenerator: randomGenerator,
+ secureRNG: opts.SecureRNG,
sendBufferSize: tcpip.SendBufferSizeOption{
Min: MinBufferSize,
Default: DefaultBufferSize,
@@ -1653,9 +1659,27 @@ func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress,
ReserveHeaderBytes: int(nic.MaxHeaderLength()),
Data: payload,
})
+ pkt.NetworkProtocolNumber = netProto
return nic.WritePacketToRemote(remote, netProto, pkt)
}
+// WriteRawPacket writes data directly to the specified NIC without adding any
+// headers.
+func (s *Stack) WriteRawPacket(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber, payload buffer.VectorisedView) tcpip.Error {
+ s.mu.RLock()
+ nic, ok := s.nics[nicID]
+ s.mu.RUnlock()
+ if !ok {
+ return &tcpip.ErrUnknownNICID{}
+ }
+
+ pkt := NewPacketBuffer(PacketBufferOptions{
+ Data: payload,
+ })
+ pkt.NetworkProtocolNumber = proto
+ return nic.WriteRawPacket(pkt)
+}
+
// NetworkProtocolInstance returns the protocol instance in the stack for the
// specified network protocol. This method is public for protocol implementers
// and tests to use.
@@ -1947,3 +1971,9 @@ func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProto
return false
}
+
+// PacketEndpointWriteSupported returns true iff packet endpoints support write
+// operations.
+func (s *Stack) PacketEndpointWriteSupported() bool {
+ return s.packetEndpointWriteSupported
+}
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index f9a15efb2..00497bf07 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -329,6 +329,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
route = r
}
+ // TODO(https://gvisor.dev/issue/6538): Avoid this allocation.
v := make([]byte, p.Len())
if _, err := io.ReadFull(p, v); err != nil {
return 0, &tcpip.ErrBadBuffer{}
diff --git a/pkg/tcpip/transport/packet/endpoint.go b/pkg/tcpip/transport/packet/endpoint.go
index 8e7bb6c6e..89b4720aa 100644
--- a/pkg/tcpip/transport/packet/endpoint.go
+++ b/pkg/tcpip/transport/packet/endpoint.go
@@ -207,8 +207,52 @@ func (ep *endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResul
return res, nil
}
-func (*endpoint) Write(tcpip.Payloader, tcpip.WriteOptions) (int64, tcpip.Error) {
- return 0, &tcpip.ErrInvalidOptionValue{}
+func (ep *endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error) {
+ if !ep.stack.PacketEndpointWriteSupported() {
+ return 0, &tcpip.ErrNotSupported{}
+ }
+
+ ep.mu.Lock()
+ closed := ep.closed
+ nicID := ep.boundNIC
+ ep.mu.Unlock()
+ if closed {
+ return 0, &tcpip.ErrClosedForSend{}
+ }
+
+ var remote tcpip.LinkAddress
+ proto := ep.netProto
+ if to := opts.To; to != nil {
+ remote = tcpip.LinkAddress(to.Addr)
+
+ if n := to.NIC; n != 0 {
+ nicID = n
+ }
+
+ if p := to.Port; p != 0 {
+ proto = tcpip.NetworkProtocolNumber(p)
+ }
+ }
+
+ if nicID == 0 {
+ return 0, &tcpip.ErrInvalidOptionValue{}
+ }
+
+ // TODO(https://gvisor.dev/issue/6538): Avoid this allocation.
+ payloadBytes := make(buffer.View, p.Len())
+ if _, err := io.ReadFull(p, payloadBytes); err != nil {
+ return 0, &tcpip.ErrBadBuffer{}
+ }
+
+ if err := func() tcpip.Error {
+ if ep.cooked {
+ return ep.stack.WritePacketToRemote(nicID, remote, proto, payloadBytes.ToVectorisedView())
+ }
+ return ep.stack.WriteRawPacket(nicID, proto, payloadBytes.ToVectorisedView())
+ }(); err != nil {
+ return 0, err
+ }
+ return int64(len(payloadBytes)), nil
}
// Disconnect implements tcpip.Endpoint.Disconnect. Packet sockets cannot be
diff --git a/pkg/tcpip/transport/packet/endpoint_state.go b/pkg/tcpip/transport/packet/endpoint_state.go
index e729921db..5c688d286 100644
--- a/pkg/tcpip/transport/packet/endpoint_state.go
+++ b/pkg/tcpip/transport/packet/endpoint_state.go
@@ -34,17 +34,11 @@ func (p *packet) loadReceivedAt(nsec int64) {
// saveData saves packet.data field.
func (p *packet) saveData() buffer.VectorisedView {
- // We cannot save p.data directly as p.data.views may alias to p.views,
- // which is not allowed by state framework (in-struct pointer).
return p.data.Clone(nil)
}
// loadData loads packet.data field.
func (p *packet) loadData(data buffer.VectorisedView) {
- // NOTE: We cannot do the p.data = data.Clone(p.views[:]) optimization
- // here because data.views is not guaranteed to be loaded by now. Plus,
- // data.views will be allocated anyway so there really is little point
- // of utilizing p.views for data.views.
p.data = data
}
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index 55854ba59..3bf6c0a8f 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -281,6 +281,7 @@ func (e *endpoint) write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcp
return nil, nil, nil, &tcpip.ErrInvalidEndpointState{}
}
+ // TODO(https://gvisor.dev/issue/6538): Avoid this allocation.
payloadBytes := make([]byte, p.Len())
if _, err := io.ReadFull(p, payloadBytes); err != nil {
return nil, nil, nil, &tcpip.ErrBadBuffer{}
@@ -600,6 +601,9 @@ func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) {
// We copy headers' underlying bytes because pkt.*Header may point to
// the middle of a slice, and another struct may point to the "outer"
// slice. Save/restore doesn't support overlapping slices and will fail.
+ //
+ // TODO(https://gvisor.dev/issue/6517): Avoid the copy once S/R supports
+ // overlapping slices.
var combinedVV buffer.VectorisedView
if e.TransportEndpointInfo.NetProto == header.IPv4ProtocolNumber {
network, transport := pkt.NetworkHeader().View(), pkt.TransportHeader().View()
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index ac7ecb5f8..4b6bdc3be 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -366,6 +366,7 @@ func (e *endpoint) prepareForWrite(p tcpip.Payloader, opts tcpip.WriteOptions) (
return udpPacketInfo{}, err
}
+ // TODO(https://gvisor.dev/issue/6538): Avoid this allocation.
v := make([]byte, p.Len())
if _, err := io.ReadFull(p, v); err != nil {
ctx.Release()
diff --git a/pkg/tcpip/transport/udp/endpoint_state.go b/pkg/tcpip/transport/udp/endpoint_state.go
index 20c45ab87..2ff8b0482 100644
--- a/pkg/tcpip/transport/udp/endpoint_state.go
+++ b/pkg/tcpip/transport/udp/endpoint_state.go
@@ -36,17 +36,11 @@ func (p *udpPacket) loadReceivedAt(nsec int64) {
// saveData saves udpPacket.data field.
func (p *udpPacket) saveData() buffer.VectorisedView {
- // We cannot save p.data directly as p.data.views may alias to p.views,
- // which is not allowed by state framework (in-struct pointer).
return p.data.Clone(nil)
}
// loadData loads udpPacket.data field.
func (p *udpPacket) loadData(data buffer.VectorisedView) {
- // NOTE: We cannot do the p.data = data.Clone(p.views[:]) optimization
- // here because data.views is not guaranteed to be loaded by now. Plus,
- // data.views will be allocated anyway so there really is little point
- // of utilizing p.views for data.views.
p.data = data
}