diff options
author | gVisor bot <gvisor-bot@google.com> | 2019-06-02 06:44:55 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2019-06-02 06:44:55 +0000 |
commit | ceb0d792f328d1fc0692197d8856a43c3936a571 (patch) | |
tree | 83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/tcpip/network/ipv4 | |
parent | deb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff) | |
parent | 216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff) |
Merge 216da0b7 (automated)
Diffstat (limited to 'pkg/tcpip/network/ipv4')
-rw-r--r-- | pkg/tcpip/network/ipv4/icmp.go | 160 | ||||
-rw-r--r-- | pkg/tcpip/network/ipv4/ipv4.go | 344 | ||||
-rwxr-xr-x | pkg/tcpip/network/ipv4/ipv4_state_autogen.go | 4 |
3 files changed, 508 insertions, 0 deletions
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go new file mode 100644 index 000000000..770f56c3d --- /dev/null +++ b/pkg/tcpip/network/ipv4/icmp.go @@ -0,0 +1,160 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipv4 + +import ( + "encoding/binary" + + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" +) + +// handleControl handles the case when an ICMP packet contains the headers of +// the original packet that caused the ICMP one to be sent. This information is +// used to find out which transport endpoint must be notified about the ICMP +// packet. +func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, vv buffer.VectorisedView) { + h := header.IPv4(vv.First()) + + // We don't use IsValid() here because ICMP only requires that the IP + // header plus 8 bytes of the transport header be included. So it's + // likely that it is truncated, which would cause IsValid to return + // false. + // + // Drop packet if it doesn't have the basic IPv4 header or if the + // original source address doesn't match the endpoint's address. + if len(h) < header.IPv4MinimumSize || h.SourceAddress() != e.id.LocalAddress { + return + } + + hlen := int(h.HeaderLength()) + if vv.Size() < hlen || h.FragmentOffset() != 0 { + // We won't be able to handle this if it doesn't contain the + // full IPv4 header, or if it's a fragment not at offset 0 + // (because it won't have the transport header). + return + } + + // Skip the ip header, then deliver control message. + vv.TrimFront(hlen) + p := h.TransportProtocol() + e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, h.DestinationAddress(), ProtocolNumber, p, typ, extra, vv) +} + +func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, vv buffer.VectorisedView) { + stats := r.Stats() + received := stats.ICMP.V4PacketsReceived + v := vv.First() + if len(v) < header.ICMPv4MinimumSize { + received.Invalid.Increment() + return + } + h := header.ICMPv4(v) + + // TODO(b/112892170): Meaningfully handle all ICMP types. + switch h.Type() { + case header.ICMPv4Echo: + received.Echo.Increment() + if len(v) < header.ICMPv4EchoMinimumSize { + received.Invalid.Increment() + return + } + + // Only send a reply if the checksum is valid. + wantChecksum := h.Checksum() + // Reset the checksum field to 0 to can calculate the proper + // checksum. We'll have to reset this before we hand the packet + // off. + h.SetChecksum(0) + gotChecksum := ^header.ChecksumVV(vv, 0 /* initial */) + if gotChecksum != wantChecksum { + // It's possible that a raw socket expects to receive this. + h.SetChecksum(wantChecksum) + e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, netHeader, vv) + received.Invalid.Increment() + return + } + + // It's possible that a raw socket expects to receive this. + h.SetChecksum(wantChecksum) + e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, netHeader, vv) + + vv := vv.Clone(nil) + vv.TrimFront(header.ICMPv4EchoMinimumSize) + hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv4EchoMinimumSize) + pkt := header.ICMPv4(hdr.Prepend(header.ICMPv4EchoMinimumSize)) + copy(pkt, h) + pkt.SetType(header.ICMPv4EchoReply) + pkt.SetChecksum(^header.Checksum(pkt, header.ChecksumVV(vv, 0))) + sent := stats.ICMP.V4PacketsSent + if err := r.WritePacket(nil /* gso */, hdr, vv, header.ICMPv4ProtocolNumber, r.DefaultTTL()); err != nil { + sent.Dropped.Increment() + return + } + sent.EchoReply.Increment() + + case header.ICMPv4EchoReply: + received.EchoReply.Increment() + if len(v) < header.ICMPv4EchoMinimumSize { + received.Invalid.Increment() + return + } + e.dispatcher.DeliverTransportPacket(r, header.ICMPv4ProtocolNumber, netHeader, vv) + + case header.ICMPv4DstUnreachable: + received.DstUnreachable.Increment() + if len(v) < header.ICMPv4DstUnreachableMinimumSize { + received.Invalid.Increment() + return + } + vv.TrimFront(header.ICMPv4DstUnreachableMinimumSize) + switch h.Code() { + case header.ICMPv4PortUnreachable: + e.handleControl(stack.ControlPortUnreachable, 0, vv) + + case header.ICMPv4FragmentationNeeded: + mtu := uint32(binary.BigEndian.Uint16(v[header.ICMPv4DstUnreachableMinimumSize-2:])) + e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), vv) + } + + case header.ICMPv4SrcQuench: + received.SrcQuench.Increment() + + case header.ICMPv4Redirect: + received.Redirect.Increment() + + case header.ICMPv4TimeExceeded: + received.TimeExceeded.Increment() + + case header.ICMPv4ParamProblem: + received.ParamProblem.Increment() + + case header.ICMPv4Timestamp: + received.Timestamp.Increment() + + case header.ICMPv4TimestampReply: + received.TimestampReply.Increment() + + case header.ICMPv4InfoRequest: + received.InfoRequest.Increment() + + case header.ICMPv4InfoReply: + received.InfoReply.Increment() + + default: + received.Invalid.Increment() + } +} diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go new file mode 100644 index 000000000..da07a39e5 --- /dev/null +++ b/pkg/tcpip/network/ipv4/ipv4.go @@ -0,0 +1,344 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ipv4 contains the implementation of the ipv4 network protocol. To use +// it in the networking stack, this package must be added to the project, and +// activated on the stack by passing ipv4.ProtocolName (or "ipv4") as one of the +// network protocols when calling stack.New(). Then endpoints can be created +// by passing ipv4.ProtocolNumber as the network protocol number when calling +// Stack.NewEndpoint(). +package ipv4 + +import ( + "sync/atomic" + + "gvisor.googlesource.com/gvisor/pkg/tcpip" + "gvisor.googlesource.com/gvisor/pkg/tcpip/buffer" + "gvisor.googlesource.com/gvisor/pkg/tcpip/header" + "gvisor.googlesource.com/gvisor/pkg/tcpip/network/fragmentation" + "gvisor.googlesource.com/gvisor/pkg/tcpip/network/hash" + "gvisor.googlesource.com/gvisor/pkg/tcpip/stack" +) + +const ( + // ProtocolName is the string representation of the ipv4 protocol name. + ProtocolName = "ipv4" + + // ProtocolNumber is the ipv4 protocol number. + ProtocolNumber = header.IPv4ProtocolNumber + + // MaxTotalSize is maximum size that can be encoded in the 16-bit + // TotalLength field of the ipv4 header. + MaxTotalSize = 0xffff + + // buckets is the number of identifier buckets. + buckets = 2048 +) + +type endpoint struct { + nicid tcpip.NICID + id stack.NetworkEndpointID + linkEP stack.LinkEndpoint + dispatcher stack.TransportDispatcher + fragmentation *fragmentation.Fragmentation +} + +// NewEndpoint creates a new ipv4 endpoint. +func (p *protocol) NewEndpoint(nicid tcpip.NICID, addr tcpip.Address, linkAddrCache stack.LinkAddressCache, dispatcher stack.TransportDispatcher, linkEP stack.LinkEndpoint) (stack.NetworkEndpoint, *tcpip.Error) { + e := &endpoint{ + nicid: nicid, + id: stack.NetworkEndpointID{LocalAddress: addr}, + linkEP: linkEP, + dispatcher: dispatcher, + fragmentation: fragmentation.NewFragmentation(fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, fragmentation.DefaultReassembleTimeout), + } + + return e, nil +} + +// DefaultTTL is the default time-to-live value for this endpoint. +func (e *endpoint) DefaultTTL() uint8 { + return 255 +} + +// MTU implements stack.NetworkEndpoint.MTU. It returns the link-layer MTU minus +// the network layer max header length. +func (e *endpoint) MTU() uint32 { + return calculateMTU(e.linkEP.MTU()) +} + +// Capabilities implements stack.NetworkEndpoint.Capabilities. +func (e *endpoint) Capabilities() stack.LinkEndpointCapabilities { + return e.linkEP.Capabilities() +} + +// NICID returns the ID of the NIC this endpoint belongs to. +func (e *endpoint) NICID() tcpip.NICID { + return e.nicid +} + +// ID returns the ipv4 endpoint ID. +func (e *endpoint) ID() *stack.NetworkEndpointID { + return &e.id +} + +// MaxHeaderLength returns the maximum length needed by ipv4 headers (and +// underlying protocols). +func (e *endpoint) MaxHeaderLength() uint16 { + return e.linkEP.MaxHeaderLength() + header.IPv4MinimumSize +} + +// GSOMaxSize returns the maximum GSO packet size. +func (e *endpoint) GSOMaxSize() uint32 { + if gso, ok := e.linkEP.(stack.GSOEndpoint); ok { + return gso.GSOMaxSize() + } + return 0 +} + +// writePacketFragments calls e.linkEP.WritePacket with each packet fragment to +// write. It assumes that the IP header is entirely in hdr but does not assume +// that only the IP header is in hdr. It assumes that the input packet's stated +// length matches the length of the hdr+payload. mtu includes the IP header and +// options. This does not support the DontFragment IP flag. +func (e *endpoint) writePacketFragments(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, mtu int) *tcpip.Error { + // This packet is too big, it needs to be fragmented. + ip := header.IPv4(hdr.View()) + flags := ip.Flags() + + // Update mtu to take into account the header, which will exist in all + // fragments anyway. + innerMTU := mtu - int(ip.HeaderLength()) + + // Round the MTU down to align to 8 bytes. Then calculate the number of + // fragments. Calculate fragment sizes as in RFC791. + innerMTU &^= 7 + n := (int(ip.PayloadLength()) + innerMTU - 1) / innerMTU + + outerMTU := innerMTU + int(ip.HeaderLength()) + offset := ip.FragmentOffset() + originalAvailableLength := hdr.AvailableLength() + for i := 0; i < n; i++ { + // Where possible, the first fragment that is sent has the same + // hdr.UsedLength() as the input packet. The link-layer endpoint may depends + // on this for looking at, eg, L4 headers. + h := ip + if i > 0 { + hdr = buffer.NewPrependable(int(ip.HeaderLength()) + originalAvailableLength) + h = header.IPv4(hdr.Prepend(int(ip.HeaderLength()))) + copy(h, ip[:ip.HeaderLength()]) + } + if i != n-1 { + h.SetTotalLength(uint16(outerMTU)) + h.SetFlagsFragmentOffset(flags|header.IPv4FlagMoreFragments, offset) + } else { + h.SetTotalLength(uint16(h.HeaderLength()) + uint16(payload.Size())) + h.SetFlagsFragmentOffset(flags, offset) + } + h.SetChecksum(0) + h.SetChecksum(^h.CalculateChecksum()) + offset += uint16(innerMTU) + if i > 0 { + newPayload := payload.Clone([]buffer.View{}) + newPayload.CapLength(innerMTU) + if err := e.linkEP.WritePacket(r, gso, hdr, newPayload, ProtocolNumber); err != nil { + return err + } + r.Stats().IP.PacketsSent.Increment() + payload.TrimFront(newPayload.Size()) + continue + } + // Special handling for the first fragment because it comes from the hdr. + if outerMTU >= hdr.UsedLength() { + // This fragment can fit all of hdr and possibly some of payload, too. + newPayload := payload.Clone([]buffer.View{}) + newPayloadLength := outerMTU - hdr.UsedLength() + newPayload.CapLength(newPayloadLength) + if err := e.linkEP.WritePacket(r, gso, hdr, newPayload, ProtocolNumber); err != nil { + return err + } + r.Stats().IP.PacketsSent.Increment() + payload.TrimFront(newPayloadLength) + } else { + // The fragment is too small to fit all of hdr. + startOfHdr := hdr + startOfHdr.TrimBack(hdr.UsedLength() - outerMTU) + emptyVV := buffer.NewVectorisedView(0, []buffer.View{}) + if err := e.linkEP.WritePacket(r, gso, startOfHdr, emptyVV, ProtocolNumber); err != nil { + return err + } + r.Stats().IP.PacketsSent.Increment() + // Add the unused bytes of hdr into the payload that remains to be sent. + restOfHdr := hdr.View()[outerMTU:] + tmp := buffer.NewVectorisedView(len(restOfHdr), []buffer.View{buffer.NewViewFromBytes(restOfHdr)}) + tmp.Append(payload) + payload = tmp + } + } + return nil +} + +// WritePacket writes a packet to the given destination address and protocol. +func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, hdr buffer.Prependable, payload buffer.VectorisedView, protocol tcpip.TransportProtocolNumber, ttl uint8, loop stack.PacketLooping) *tcpip.Error { + ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize)) + length := uint16(hdr.UsedLength() + payload.Size()) + id := uint32(0) + if length > header.IPv4MaximumHeaderSize+8 { + // Packets of 68 bytes or less are required by RFC 791 to not be + // fragmented, so we only assign ids to larger packets. + id = atomic.AddUint32(&ids[hashRoute(r, protocol)%buckets], 1) + } + ip.Encode(&header.IPv4Fields{ + IHL: header.IPv4MinimumSize, + TotalLength: length, + ID: uint16(id), + TTL: ttl, + Protocol: uint8(protocol), + SrcAddr: r.LocalAddress, + DstAddr: r.RemoteAddress, + }) + ip.SetChecksum(^ip.CalculateChecksum()) + + if loop&stack.PacketLoop != 0 { + views := make([]buffer.View, 1, 1+len(payload.Views())) + views[0] = hdr.View() + views = append(views, payload.Views()...) + vv := buffer.NewVectorisedView(len(views[0])+payload.Size(), views) + e.HandlePacket(r, vv) + } + if loop&stack.PacketOut == 0 { + return nil + } + if hdr.UsedLength()+payload.Size() > int(e.linkEP.MTU()) && (gso == nil || gso.Type == stack.GSONone) { + return e.writePacketFragments(r, gso, hdr, payload, int(e.linkEP.MTU())) + } + if err := e.linkEP.WritePacket(r, gso, hdr, payload, ProtocolNumber); err != nil { + return err + } + r.Stats().IP.PacketsSent.Increment() + return nil +} + +// HandlePacket is called by the link layer when new ipv4 packets arrive for +// this endpoint. +func (e *endpoint) HandlePacket(r *stack.Route, vv buffer.VectorisedView) { + headerView := vv.First() + h := header.IPv4(headerView) + if !h.IsValid(vv.Size()) { + return + } + + hlen := int(h.HeaderLength()) + tlen := int(h.TotalLength()) + vv.TrimFront(hlen) + vv.CapLength(tlen - hlen) + + more := (h.Flags() & header.IPv4FlagMoreFragments) != 0 + if more || h.FragmentOffset() != 0 { + // The packet is a fragment, let's try to reassemble it. + last := h.FragmentOffset() + uint16(vv.Size()) - 1 + var ready bool + vv, ready = e.fragmentation.Process(hash.IPv4FragmentHash(h), h.FragmentOffset(), last, more, vv) + if !ready { + return + } + } + p := h.TransportProtocol() + if p == header.ICMPv4ProtocolNumber { + headerView.CapLength(hlen) + e.handleICMP(r, headerView, vv) + return + } + r.Stats().IP.PacketsDelivered.Increment() + e.dispatcher.DeliverTransportPacket(r, p, headerView, vv) +} + +// Close cleans up resources associated with the endpoint. +func (e *endpoint) Close() {} + +type protocol struct{} + +// NewProtocol creates a new protocol ipv4 protocol descriptor. This is exported +// only for tests that short-circuit the stack. Regular use of the protocol is +// done via the stack, which gets a protocol descriptor from the init() function +// below. +func NewProtocol() stack.NetworkProtocol { + return &protocol{} +} + +// Number returns the ipv4 protocol number. +func (p *protocol) Number() tcpip.NetworkProtocolNumber { + return ProtocolNumber +} + +// MinimumPacketSize returns the minimum valid ipv4 packet size. +func (p *protocol) MinimumPacketSize() int { + return header.IPv4MinimumSize +} + +// ParseAddresses implements NetworkProtocol.ParseAddresses. +func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) { + h := header.IPv4(v) + return h.SourceAddress(), h.DestinationAddress() +} + +// SetOption implements NetworkProtocol.SetOption. +func (p *protocol) SetOption(option interface{}) *tcpip.Error { + return tcpip.ErrUnknownProtocolOption +} + +// Option implements NetworkProtocol.Option. +func (p *protocol) Option(option interface{}) *tcpip.Error { + return tcpip.ErrUnknownProtocolOption +} + +// calculateMTU calculates the network-layer payload MTU based on the link-layer +// payload mtu. +func calculateMTU(mtu uint32) uint32 { + if mtu > MaxTotalSize { + mtu = MaxTotalSize + } + return mtu - header.IPv4MinimumSize +} + +// hashRoute calculates a hash value for the given route. It uses the source & +// destination address, the transport protocol number, and a random initial +// value (generated once on initialization) to generate the hash. +func hashRoute(r *stack.Route, protocol tcpip.TransportProtocolNumber) uint32 { + t := r.LocalAddress + a := uint32(t[0]) | uint32(t[1])<<8 | uint32(t[2])<<16 | uint32(t[3])<<24 + t = r.RemoteAddress + b := uint32(t[0]) | uint32(t[1])<<8 | uint32(t[2])<<16 | uint32(t[3])<<24 + return hash.Hash3Words(a, b, uint32(protocol), hashIV) +} + +var ( + ids []uint32 + hashIV uint32 +) + +func init() { + ids = make([]uint32, buckets) + + // Randomly initialize hashIV and the ids. + r := hash.RandN32(1 + buckets) + for i := range ids { + ids[i] = r[i] + } + hashIV = r[buckets] + + stack.RegisterNetworkProtocolFactory(ProtocolName, func() stack.NetworkProtocol { + return &protocol{} + }) +} diff --git a/pkg/tcpip/network/ipv4/ipv4_state_autogen.go b/pkg/tcpip/network/ipv4/ipv4_state_autogen.go new file mode 100755 index 000000000..6b2cc0142 --- /dev/null +++ b/pkg/tcpip/network/ipv4/ipv4_state_autogen.go @@ -0,0 +1,4 @@ +// automatically generated by stateify. + +package ipv4 + |