summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip
diff options
context:
space:
mode:
authorKevin Krakauer <krakauer@google.com>2019-07-12 18:08:03 -0700
committergVisor bot <gvisor-bot@google.com>2019-07-12 18:09:12 -0700
commit9b4d3280e172063a6563d9e72a75b500442ed9b9 (patch)
tree01e4fb8d506559a5ed5c6c3652265ec833230277 /pkg/tcpip
parent17bab652afebdc43d77969431b9147cca039f61e (diff)
Add IPPROTO_RAW, which allows raw sockets to write IP headers.
iptables also relies on IPPROTO_RAW in a way. It opens such a socket to manipulate the kernel's tables, but it doesn't actually use any of the functionality. Blegh. PiperOrigin-RevId: 257903078
Diffstat (limited to 'pkg/tcpip')
-rw-r--r--pkg/tcpip/network/arp/arp.go4
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go49
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go7
-rw-r--r--pkg/tcpip/stack/registration.go20
-rw-r--r--pkg/tcpip/stack/route.go12
-rw-r--r--pkg/tcpip/stack/stack.go10
-rw-r--r--pkg/tcpip/stack/stack_test.go4
-rw-r--r--pkg/tcpip/transport/raw/BUILD1
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go100
-rw-r--r--pkg/tcpip/transport/raw/protocol.go32
10 files changed, 215 insertions, 24 deletions
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index ca3d6c0bf..cb35635fc 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -83,6 +83,10 @@ func (e *endpoint) WritePacket(*stack.Route, *stack.GSO, buffer.Prependable, buf
return tcpip.ErrNotSupported
}
+func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.VectorisedView, loop stack.PacketLooping) *tcpip.Error {
+ return tcpip.ErrNotSupported
+}
+
func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) {
v := vv.First()
h := header.ARP(v)
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 1e3a7425a..e44a73d96 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -232,6 +232,55 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
return nil
}
+// WriteHeaderIncludedPacket writes a packet already containing a network
+// header through the given route.
+func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.VectorisedView, loop stack.PacketLooping) *tcpip.Error {
+ // The packet already has an IP header, but there are a few required
+ // checks.
+ ip := header.IPv4(payload.First())
+ if !ip.IsValid(payload.Size()) {
+ return tcpip.ErrInvalidOptionValue
+ }
+
+ // Always set the total length.
+ ip.SetTotalLength(uint16(payload.Size()))
+
+ // Set the source address when zero.
+ if ip.SourceAddress() == tcpip.Address(([]byte{0, 0, 0, 0})) {
+ ip.SetSourceAddress(r.LocalAddress)
+ }
+
+ // Set the destination. If the packet already included a destination,
+ // it will be part of the route.
+ ip.SetDestinationAddress(r.RemoteAddress)
+
+ // Set the packet ID when zero.
+ if ip.ID() == 0 {
+ id := uint32(0)
+ if payload.Size() > header.IPv4MaximumHeaderSize+8 {
+ // Packets of 68 bytes or less are required by RFC 791 to not be
+ // fragmented, so we only assign ids to larger packets.
+ id = atomic.AddUint32(&ids[hashRoute(r, 0 /* protocol */)%buckets], 1)
+ }
+ ip.SetID(uint16(id))
+ }
+
+ // Always set the checksum.
+ ip.SetChecksum(0)
+ ip.SetChecksum(^ip.CalculateChecksum())
+
+ if loop&stack.PacketLoop != 0 {
+ e.HandlePacket(r, payload)
+ }
+ if loop&stack.PacketOut == 0 {
+ return nil
+ }
+
+ hdr := buffer.NewPrependableFromView(payload.ToView())
+ r.Stats().IP.PacketsSent.Increment()
+ return e.linkEP.WritePacket(r, nil /* gso */, hdr, buffer.VectorisedView{}, ProtocolNumber)
+}
+
// HandlePacket is called by the link layer when new ipv4 packets arrive for
// this endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) {
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 27367d6c5..e3e8739fd 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -120,6 +120,13 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prepen
return e.linkEP.WritePacket(r, gso, hdr, payload, ProtocolNumber)
}
+// WriteHeaderIncludedPacker implements stack.NetworkEndpoint. It is not yet
+// supported by IPv6.
+func (*endpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.VectorisedView, loop stack.PacketLooping) *tcpip.Error {
+ // TODO(b/119580726): Support IPv6 header-included packets.
+ return tcpip.ErrNotSupported
+}
+
// HandlePacket is called by the link layer when new ipv6 packets arrive for
// this endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) {
diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go
index 0ecaa0833..462265281 100644
--- a/pkg/tcpip/stack/registration.go
+++ b/pkg/tcpip/stack/registration.go
@@ -174,6 +174,10 @@ type NetworkEndpoint interface {
// protocol.
WritePacket(r *Route, gso *GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.TransportProtocolNumber, ttl uint8, loop PacketLooping) *tcpip.Error
+ // WriteHeaderIncludedPacket writes a packet that includes a network
+ // header to the given destination address.
+ WriteHeaderIncludedPacket(r *Route, payload buffer.VectorisedView, loop PacketLooping) *tcpip.Error
+
// ID returns the network protocol endpoint ID.
ID() *NetworkEndpointID
@@ -357,10 +361,19 @@ type TransportProtocolFactory func() TransportProtocol
// instantiate network protocols.
type NetworkProtocolFactory func() NetworkProtocol
+// UnassociatedEndpointFactory produces endpoints for writing packets not
+// associated with a particular transport protocol. Such endpoints can be used
+// to write arbitrary packets that include the IP header.
+type UnassociatedEndpointFactory interface {
+ NewUnassociatedRawEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error)
+}
+
var (
transportProtocols = make(map[string]TransportProtocolFactory)
networkProtocols = make(map[string]NetworkProtocolFactory)
+ unassociatedFactory UnassociatedEndpointFactory
+
linkEPMu sync.RWMutex
nextLinkEndpointID tcpip.LinkEndpointID = 1
linkEndpoints = make(map[tcpip.LinkEndpointID]LinkEndpoint)
@@ -380,6 +393,13 @@ func RegisterNetworkProtocolFactory(name string, p NetworkProtocolFactory) {
networkProtocols[name] = p
}
+// RegisterUnassociatedFactory registers a factory to produce endpoints not
+// associated with any particular transport protocol. This function is intended
+// to be called by init() functions of the protocols.
+func RegisterUnassociatedFactory(f UnassociatedEndpointFactory) {
+ unassociatedFactory = f
+}
+
// RegisterLinkEndpoint register a link-layer protocol endpoint and returns an
// ID that can be used to refer to it.
func RegisterLinkEndpoint(linkEP LinkEndpoint) tcpip.LinkEndpointID {
diff --git a/pkg/tcpip/stack/route.go b/pkg/tcpip/stack/route.go
index 36d7b6ac7..391ab4344 100644
--- a/pkg/tcpip/stack/route.go
+++ b/pkg/tcpip/stack/route.go
@@ -163,6 +163,18 @@ func (r *Route) WritePacket(gso *GSO, hdr buffer.Prependable, payload buffer.Vec
return err
}
+// WriteHeaderIncludedPacket writes a packet already containing a network
+// header through the given route.
+func (r *Route) WriteHeaderIncludedPacket(payload buffer.VectorisedView) *tcpip.Error {
+ if err := r.ref.ep.WriteHeaderIncludedPacket(r, payload, r.loop); err != nil {
+ r.Stats().IP.OutgoingPacketErrors.Increment()
+ return err
+ }
+ r.ref.nic.stats.Tx.Packets.Increment()
+ r.ref.nic.stats.Tx.Bytes.IncrementBy(uint64(payload.Size()))
+ return nil
+}
+
// DefaultTTL returns the default TTL of the underlying network endpoint.
func (r *Route) DefaultTTL() uint8 {
return r.ref.ep.DefaultTTL()
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 2d7f56ca9..3e8fb2a6c 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -340,6 +340,8 @@ type Stack struct {
networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol
linkAddrResolvers map[tcpip.NetworkProtocolNumber]LinkAddressResolver
+ unassociatedFactory UnassociatedEndpointFactory
+
demux *transportDemuxer
stats tcpip.Stats
@@ -442,6 +444,8 @@ func New(network []string, transport []string, opts Options) *Stack {
}
}
+ s.unassociatedFactory = unassociatedFactory
+
// Create the global transport demuxer.
s.demux = newTransportDemuxer(s)
@@ -574,11 +578,15 @@ func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcp
// NewRawEndpoint creates a new raw transport layer endpoint of the given
// protocol. Raw endpoints receive all traffic for a given protocol regardless
// of address.
-func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) {
if !s.raw {
return nil, tcpip.ErrNotPermitted
}
+ if !associated {
+ return s.unassociatedFactory.NewUnassociatedRawEndpoint(s, network, transport, waiterQueue)
+ }
+
t, ok := s.transportProtocols[transport]
if !ok {
return nil, tcpip.ErrUnknownProtocol
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 69884af03..959071dbe 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -137,6 +137,10 @@ func (f *fakeNetworkEndpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr bu
return f.linkEP.WritePacket(r, gso, hdr, payload, fakeNetNumber)
}
+func (*fakeNetworkEndpoint) WriteHeaderIncludedPacket(r *stack.Route, payload buffer.VectorisedView, loop stack.PacketLooping) *tcpip.Error {
+ return tcpip.ErrNotSupported
+}
+
func (*fakeNetworkEndpoint) Close() {}
type fakeNetGoodOption bool
diff --git a/pkg/tcpip/transport/raw/BUILD b/pkg/tcpip/transport/raw/BUILD
index 34a14bf7f..bc4b255b4 100644
--- a/pkg/tcpip/transport/raw/BUILD
+++ b/pkg/tcpip/transport/raw/BUILD
@@ -21,6 +21,7 @@ go_library(
"endpoint.go",
"endpoint_state.go",
"packet_list.go",
+ "protocol.go",
],
importpath = "gvisor.dev/gvisor/pkg/tcpip/transport/raw",
imports = ["gvisor.dev/gvisor/pkg/tcpip/buffer"],
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index 42aded77f..a29587658 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -67,6 +67,7 @@ type endpoint struct {
netProto tcpip.NetworkProtocolNumber
transProto tcpip.TransportProtocolNumber
waiterQueue *waiter.Queue
+ associated bool
// The following fields are used to manage the receive queue and are
// protected by rcvMu.
@@ -97,8 +98,12 @@ type endpoint struct {
}
// NewEndpoint returns a raw endpoint for the given protocols.
-// TODO(b/129292371): IP_HDRINCL, IPPROTO_RAW, and AF_PACKET.
+// TODO(b/129292371): IP_HDRINCL and AF_PACKET.
func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+ return newEndpoint(stack, netProto, transProto, waiterQueue, true /* associated */)
+}
+
+func newEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, *tcpip.Error) {
if netProto != header.IPv4ProtocolNumber {
return nil, tcpip.ErrUnknownProtocol
}
@@ -110,6 +115,16 @@ func NewEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, trans
waiterQueue: waiterQueue,
rcvBufSizeMax: 32 * 1024,
sndBufSize: 32 * 1024,
+ associated: associated,
+ }
+
+ // Unassociated endpoints are write-only and users call Write() with IP
+ // headers included. Because they're write-only, We don't need to
+ // register with the stack.
+ if !associated {
+ ep.rcvBufSizeMax = 0
+ ep.waiterQueue = nil
+ return ep, nil
}
if err := ep.stack.RegisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep); err != nil {
@@ -124,7 +139,7 @@ func (ep *endpoint) Close() {
ep.mu.Lock()
defer ep.mu.Unlock()
- if ep.closed {
+ if ep.closed || !ep.associated {
return
}
@@ -142,8 +157,11 @@ func (ep *endpoint) Close() {
if ep.connected {
ep.route.Release()
+ ep.connected = false
}
+ ep.closed = true
+
ep.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
}
@@ -152,6 +170,10 @@ func (ep *endpoint) ModerateRecvBuf(copied int) {}
// Read implements tcpip.Endpoint.Read.
func (ep *endpoint) Read(addr *tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
+ if !ep.associated {
+ return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrInvalidOptionValue
+ }
+
ep.rcvMu.Lock()
// If there's no data to read, return that read would block or that the
@@ -192,6 +214,33 @@ func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintp
return 0, nil, tcpip.ErrInvalidEndpointState
}
+ payloadBytes, err := payload.Get(payload.Size())
+ if err != nil {
+ ep.mu.RUnlock()
+ return 0, nil, err
+ }
+
+ // If this is an unassociated socket and callee provided a nonzero
+ // destination address, route using that address.
+ if !ep.associated {
+ ip := header.IPv4(payloadBytes)
+ if !ip.IsValid(payload.Size()) {
+ ep.mu.RUnlock()
+ return 0, nil, tcpip.ErrInvalidOptionValue
+ }
+ dstAddr := ip.DestinationAddress()
+ // Update dstAddr with the address in the IP header, unless
+ // opts.To is set (e.g. if sendto specifies a specific
+ // address).
+ if dstAddr != tcpip.Address([]byte{0, 0, 0, 0}) && opts.To == nil {
+ opts.To = &tcpip.FullAddress{
+ NIC: 0, // NIC is unset.
+ Addr: dstAddr, // The address from the payload.
+ Port: 0, // There are no ports here.
+ }
+ }
+ }
+
// Did the user caller provide a destination? If not, use the connected
// destination.
if opts.To == nil {
@@ -216,12 +265,12 @@ func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintp
return 0, nil, tcpip.ErrInvalidEndpointState
}
- n, ch, err := ep.finishWrite(payload, savedRoute)
+ n, ch, err := ep.finishWrite(payloadBytes, savedRoute)
ep.mu.Unlock()
return n, ch, err
}
- n, ch, err := ep.finishWrite(payload, &ep.route)
+ n, ch, err := ep.finishWrite(payloadBytes, &ep.route)
ep.mu.RUnlock()
return n, ch, err
}
@@ -248,7 +297,7 @@ func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintp
return 0, nil, err
}
- n, ch, err := ep.finishWrite(payload, &route)
+ n, ch, err := ep.finishWrite(payloadBytes, &route)
route.Release()
ep.mu.RUnlock()
return n, ch, err
@@ -256,7 +305,7 @@ func (ep *endpoint) Write(payload tcpip.Payload, opts tcpip.WriteOptions) (uintp
// finishWrite writes the payload to a route. It resolves the route if
// necessary. It's really just a helper to make defer unnecessary in Write.
-func (ep *endpoint) finishWrite(payload tcpip.Payload, route *stack.Route) (uintptr, <-chan struct{}, *tcpip.Error) {
+func (ep *endpoint) finishWrite(payloadBytes []byte, route *stack.Route) (uintptr, <-chan struct{}, *tcpip.Error) {
// We may need to resolve the route (match a link layer address to the
// network address). If that requires blocking (e.g. to use ARP),
// return a channel on which the caller can wait.
@@ -269,13 +318,14 @@ func (ep *endpoint) finishWrite(payload tcpip.Payload, route *stack.Route) (uint
}
}
- payloadBytes, err := payload.Get(payload.Size())
- if err != nil {
- return 0, nil, err
- }
-
switch ep.netProto {
case header.IPv4ProtocolNumber:
+ if !ep.associated {
+ if err := route.WriteHeaderIncludedPacket(buffer.View(payloadBytes).ToVectorisedView()); err != nil {
+ return 0, nil, err
+ }
+ break
+ }
hdr := buffer.NewPrependable(len(payloadBytes) + int(route.MaxHeaderLength()))
if err := route.WritePacket(nil /* gso */, hdr, buffer.View(payloadBytes).ToVectorisedView(), ep.transProto, route.DefaultTTL()); err != nil {
return 0, nil, err
@@ -335,15 +385,17 @@ func (ep *endpoint) Connect(addr tcpip.FullAddress) *tcpip.Error {
}
defer route.Release()
- // Re-register the endpoint with the appropriate NIC.
- if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil {
- return err
+ if ep.associated {
+ // Re-register the endpoint with the appropriate NIC.
+ if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil {
+ return err
+ }
+ ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep)
+ ep.registeredNIC = nic
}
- ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep)
- // Save the route and NIC we've connected via.
+ // Save the route we've connected via.
ep.route = route.Clone()
- ep.registeredNIC = nic
ep.connected = true
return nil
@@ -386,14 +438,16 @@ func (ep *endpoint) Bind(addr tcpip.FullAddress) *tcpip.Error {
return tcpip.ErrBadLocalAddress
}
- // Re-register the endpoint with the appropriate NIC.
- if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil {
- return err
+ if ep.associated {
+ // Re-register the endpoint with the appropriate NIC.
+ if err := ep.stack.RegisterRawTransportEndpoint(addr.NIC, ep.netProto, ep.transProto, ep); err != nil {
+ return err
+ }
+ ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep)
+ ep.registeredNIC = addr.NIC
+ ep.boundNIC = addr.NIC
}
- ep.stack.UnregisterRawTransportEndpoint(ep.registeredNIC, ep.netProto, ep.transProto, ep)
- ep.registeredNIC = addr.NIC
- ep.boundNIC = addr.NIC
ep.boundAddr = addr.Addr
ep.bound = true
diff --git a/pkg/tcpip/transport/raw/protocol.go b/pkg/tcpip/transport/raw/protocol.go
new file mode 100644
index 000000000..783c21e6b
--- /dev/null
+++ b/pkg/tcpip/transport/raw/protocol.go
@@ -0,0 +1,32 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package raw
+
+import (
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
+ "gvisor.dev/gvisor/pkg/waiter"
+)
+
+type factory struct{}
+
+// NewUnassociatedRawEndpoint implements stack.UnassociatedEndpointFactory.
+func (factory) NewUnassociatedRawEndpoint(stack *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, *tcpip.Error) {
+ return newEndpoint(stack, netProto, transProto, waiterQueue, false /* associated */)
+}
+
+func init() {
+ stack.RegisterUnassociatedFactory(factory{})
+}