diff options
-rw-r--r-- | pkg/sentry/socket/epsocket/epsocket.go | 1 | ||||
-rw-r--r-- | pkg/tcpip/header/tcp.go | 68 | ||||
-rw-r--r-- | pkg/tcpip/link/fdbased/endpoint.go | 18 | ||||
-rw-r--r-- | pkg/tcpip/link/loopback/loopback.go | 2 | ||||
-rw-r--r-- | pkg/tcpip/link/muxed/injectable.go | 2 | ||||
-rw-r--r-- | pkg/tcpip/link/muxed/injectable_test.go | 4 | ||||
-rw-r--r-- | pkg/tcpip/stack/registration.go | 10 | ||||
-rw-r--r-- | pkg/tcpip/tcpip.go | 3 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/connect.go | 2 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/endpoint.go | 9 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/forwarder.go | 2 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/protocol.go | 2 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/segment.go | 31 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/tcp_test.go | 29 | ||||
-rw-r--r-- | pkg/tcpip/transport/udp/endpoint.go | 2 | ||||
-rw-r--r-- | runsc/boot/network.go | 1 |
16 files changed, 129 insertions, 57 deletions
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go index e170da169..5bcafad98 100644 --- a/pkg/sentry/socket/epsocket/epsocket.go +++ b/pkg/sentry/socket/epsocket/epsocket.go @@ -154,6 +154,7 @@ var Metrics = tcpip.Stats{ SlowStartRetransmits: mustCreateMetric("/netstack/tcp/slow_start_retransmits", "Number of segments retransmitted in slow start mode."), FastRetransmit: mustCreateMetric("/netstack/tcp/fast_retransmit", "Number of TCP segments which were fast retransmitted."), Timeouts: mustCreateMetric("/netstack/tcp/timeouts", "Number of times RTO expired."), + ChecksumErrors: mustCreateMetric("/netstack/tcp/checksum_errors", "Number of segments dropped due to bad checksums."), }, UDP: tcpip.UDPStats{ PacketsReceived: mustCreateMetric("/netstack/udp/packets_received", "Number of UDP datagrams received via HandlePacket."), diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 6e3ee2e50..e656ebb15 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -22,16 +22,17 @@ import ( "gvisor.googlesource.com/gvisor/pkg/tcpip/seqnum" ) +// These constants are the offsets of the respective fields in the TCP header. const ( - srcPort = 0 - dstPort = 2 - seqNum = 4 - ackNum = 8 - dataOffset = 12 - tcpFlags = 13 - winSize = 14 - tcpChecksum = 16 - urgentPtr = 18 + TCPSrcPortOffset = 0 + TCPDstPortOffset = 2 + TCPSeqNumOffset = 4 + TCPAckNumOffset = 8 + TCPDataOffset = 12 + TCPFlagsOffset = 13 + TCPWinSizeOffset = 14 + TCPChecksumOffset = 16 + TCPUrgentPtrOffset = 18 ) const ( @@ -179,27 +180,27 @@ const ( // SourcePort returns the "source port" field of the tcp header. func (b TCP) SourcePort() uint16 { - return binary.BigEndian.Uint16(b[srcPort:]) + return binary.BigEndian.Uint16(b[TCPSrcPortOffset:]) } // DestinationPort returns the "destination port" field of the tcp header. func (b TCP) DestinationPort() uint16 { - return binary.BigEndian.Uint16(b[dstPort:]) + return binary.BigEndian.Uint16(b[TCPDstPortOffset:]) } // SequenceNumber returns the "sequence number" field of the tcp header. func (b TCP) SequenceNumber() uint32 { - return binary.BigEndian.Uint32(b[seqNum:]) + return binary.BigEndian.Uint32(b[TCPSeqNumOffset:]) } // AckNumber returns the "ack number" field of the tcp header. func (b TCP) AckNumber() uint32 { - return binary.BigEndian.Uint32(b[ackNum:]) + return binary.BigEndian.Uint32(b[TCPAckNumOffset:]) } // DataOffset returns the "data offset" field of the tcp header. func (b TCP) DataOffset() uint8 { - return (b[dataOffset] >> 4) * 4 + return (b[TCPDataOffset] >> 4) * 4 } // Payload returns the data in the tcp packet. @@ -209,32 +210,32 @@ func (b TCP) Payload() []byte { // Flags returns the flags field of the tcp header. func (b TCP) Flags() uint8 { - return b[tcpFlags] + return b[TCPFlagsOffset] } // WindowSize returns the "window size" field of the tcp header. func (b TCP) WindowSize() uint16 { - return binary.BigEndian.Uint16(b[winSize:]) + return binary.BigEndian.Uint16(b[TCPWinSizeOffset:]) } // Checksum returns the "checksum" field of the tcp header. func (b TCP) Checksum() uint16 { - return binary.BigEndian.Uint16(b[tcpChecksum:]) + return binary.BigEndian.Uint16(b[TCPChecksumOffset:]) } // SetSourcePort sets the "source port" field of the tcp header. func (b TCP) SetSourcePort(port uint16) { - binary.BigEndian.PutUint16(b[srcPort:], port) + binary.BigEndian.PutUint16(b[TCPSrcPortOffset:], port) } // SetDestinationPort sets the "destination port" field of the tcp header. func (b TCP) SetDestinationPort(port uint16) { - binary.BigEndian.PutUint16(b[dstPort:], port) + binary.BigEndian.PutUint16(b[TCPDstPortOffset:], port) } // SetChecksum sets the checksum field of the tcp header. func (b TCP) SetChecksum(checksum uint16) { - binary.BigEndian.PutUint16(b[tcpChecksum:], checksum) + binary.BigEndian.PutUint16(b[TCPChecksumOffset:], checksum) } // CalculateChecksum calculates the checksum of the tcp segment. @@ -258,20 +259,20 @@ func (b TCP) ParsedOptions() TCPOptions { } func (b TCP) encodeSubset(seq, ack uint32, flags uint8, rcvwnd uint16) { - binary.BigEndian.PutUint32(b[seqNum:], seq) - binary.BigEndian.PutUint32(b[ackNum:], ack) - b[tcpFlags] = flags - binary.BigEndian.PutUint16(b[winSize:], rcvwnd) + binary.BigEndian.PutUint32(b[TCPSeqNumOffset:], seq) + binary.BigEndian.PutUint32(b[TCPAckNumOffset:], ack) + b[TCPFlagsOffset] = flags + binary.BigEndian.PutUint16(b[TCPWinSizeOffset:], rcvwnd) } // Encode encodes all the fields of the tcp header. func (b TCP) Encode(t *TCPFields) { b.encodeSubset(t.SeqNum, t.AckNum, t.Flags, t.WindowSize) - binary.BigEndian.PutUint16(b[srcPort:], t.SrcPort) - binary.BigEndian.PutUint16(b[dstPort:], t.DstPort) - b[dataOffset] = (t.DataOffset / 4) << 4 - binary.BigEndian.PutUint16(b[tcpChecksum:], t.Checksum) - binary.BigEndian.PutUint16(b[urgentPtr:], t.UrgentPointer) + binary.BigEndian.PutUint16(b[TCPSrcPortOffset:], t.SrcPort) + binary.BigEndian.PutUint16(b[TCPDstPortOffset:], t.DstPort) + b[TCPDataOffset] = (t.DataOffset / 4) << 4 + binary.BigEndian.PutUint16(b[TCPChecksumOffset:], t.Checksum) + binary.BigEndian.PutUint16(b[TCPUrgentPtrOffset:], t.UrgentPointer) } // EncodePartial updates a subset of the fields of the tcp header. It is useful @@ -290,18 +291,13 @@ func (b TCP) EncodePartial(partialChecksum, length uint16, seqnum, acknum uint32 b.encodeSubset(seqnum, acknum, flags, rcvwnd) // Add the contributions of the passed-in fields to the checksum. - checksum = Checksum(b[seqNum:seqNum+8], checksum) - checksum = Checksum(b[winSize:winSize+2], checksum) + checksum = Checksum(b[TCPSeqNumOffset:TCPSeqNumOffset+8], checksum) + checksum = Checksum(b[TCPWinSizeOffset:TCPWinSizeOffset+2], checksum) // Encode the checksum. b.SetChecksum(^checksum) } -// TCPChecksumOffset returns offset of the checksum field. -func TCPChecksumOffset() uint16 { - return tcpChecksum -} - // ParseSynOptions parses the options received in a SYN segment and returns the // relevant ones. opts should point to the option part of the TCP Header. func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions { diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index 20e34c5ee..84439a9ed 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -122,13 +122,14 @@ type Options struct { FD int MTU uint32 EthernetHeader bool - ChecksumOffload bool ClosedFunc func(*tcpip.Error) Address tcpip.LinkAddress SaveRestore bool DisconnectOk bool GSOMaxSize uint32 PacketDispatchMode PacketDispatchMode + TXChecksumOffload bool + RXChecksumOffload bool } // New creates a new fd-based endpoint. @@ -142,8 +143,12 @@ func New(opts *Options) tcpip.LinkEndpointID { } caps := stack.LinkEndpointCapabilities(0) - if opts.ChecksumOffload { - caps |= stack.CapabilityChecksumOffload + if opts.RXChecksumOffload { + caps |= stack.CapabilityRXChecksumOffload + } + + if opts.TXChecksumOffload { + caps |= stack.CapabilityTXChecksumOffload } hdrSize := 0 @@ -527,12 +532,13 @@ func (e *InjectableEndpoint) Inject(protocol tcpip.NetworkProtocolNumber, vv buf } // NewInjectable creates a new fd-based InjectableEndpoint. -func NewInjectable(fd int, mtu uint32) (tcpip.LinkEndpointID, *InjectableEndpoint) { +func NewInjectable(fd int, mtu uint32, capabilities stack.LinkEndpointCapabilities) (tcpip.LinkEndpointID, *InjectableEndpoint) { syscall.SetNonblock(fd, true) e := &InjectableEndpoint{endpoint: endpoint{ - fd: fd, - mtu: mtu, + fd: fd, + mtu: mtu, + caps: capabilities, }} return stack.RegisterLinkEndpoint(e), e diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go index d58c0f885..2dc4bcfda 100644 --- a/pkg/tcpip/link/loopback/loopback.go +++ b/pkg/tcpip/link/loopback/loopback.go @@ -56,7 +56,7 @@ func (*endpoint) MTU() uint32 { // Capabilities implements stack.LinkEndpoint.Capabilities. Loopback advertises // itself as supporting checksum offload, but in reality it's just omitted. func (*endpoint) Capabilities() stack.LinkEndpointCapabilities { - return stack.CapabilityChecksumOffload | stack.CapabilitySaveRestore | stack.CapabilityLoopback + return stack.CapabilityRXChecksumOffload | stack.CapabilityTXChecksumOffload | stack.CapabilitySaveRestore | stack.CapabilityLoopback } // MaxHeaderLength implements stack.LinkEndpoint.MaxHeaderLength. Given that the diff --git a/pkg/tcpip/link/muxed/injectable.go b/pkg/tcpip/link/muxed/injectable.go index 99edc232d..b3e71c7fc 100644 --- a/pkg/tcpip/link/muxed/injectable.go +++ b/pkg/tcpip/link/muxed/injectable.go @@ -105,7 +105,7 @@ func (m *InjectableEndpoint) WriteRawPacket(dest tcpip.Address, packet []byte) * } // NewInjectableEndpoint creates a new multi-endpoint injectable endpoint. -func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint, mtu uint32) (tcpip.LinkEndpointID, *InjectableEndpoint) { +func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint) (tcpip.LinkEndpointID, *InjectableEndpoint) { e := &InjectableEndpoint{ routes: routes, } diff --git a/pkg/tcpip/link/muxed/injectable_test.go b/pkg/tcpip/link/muxed/injectable_test.go index 7d25effad..031449a05 100644 --- a/pkg/tcpip/link/muxed/injectable_test.go +++ b/pkg/tcpip/link/muxed/injectable_test.go @@ -87,8 +87,8 @@ func makeTestInjectableEndpoint(t *testing.T) (*InjectableEndpoint, *os.File, tc if err != nil { t.Fatal("Failed to create socket pair:", err) } - _, underlyingEndpoint := fdbased.NewInjectable(pair[1], 6500) + _, underlyingEndpoint := fdbased.NewInjectable(pair[1], 6500, stack.CapabilityNone) routes := map[tcpip.Address]stack.InjectableLinkEndpoint{dstIP: underlyingEndpoint} - _, endpoint := NewInjectableEndpoint(routes, 6500) + _, endpoint := NewInjectableEndpoint(routes) return endpoint, os.NewFile(uintptr(pair[0]), "test route end"), dstIP } diff --git a/pkg/tcpip/stack/registration.go b/pkg/tcpip/stack/registration.go index f3cc849ec..6e1660051 100644 --- a/pkg/tcpip/stack/registration.go +++ b/pkg/tcpip/stack/registration.go @@ -232,7 +232,15 @@ type LinkEndpointCapabilities uint // The following are the supported link endpoint capabilities. const ( - CapabilityChecksumOffload LinkEndpointCapabilities = 1 << iota + CapabilityNone LinkEndpointCapabilities = 0 + // CapabilityTXChecksumOffload indicates that the link endpoint supports + // checksum computation for outgoing packets and the stack can skip + // computing checksums when sending packets. + CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota + // CapabilityRXChecksumOffload indicates that the link endpoint supports + // checksum verification on received packets and that it's safe for the + // stack to skip checksum verification. + CapabilityRXChecksumOffload CapabilityResolutionRequired CapabilitySaveRestore CapabilityDisconnectOk diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index e9f73635f..e898dcbca 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -801,6 +801,9 @@ type TCPStats struct { // Timeouts is the number of times the RTO expired. Timeouts *StatCounter + + // ChecksumErrors is the number of segments dropped due to bad checksums. + ChecksumErrors *StatCounter } // UDPStats collects UDP-specific stats. diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index 056e0b09a..6c4a4d95e 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -595,7 +595,7 @@ func sendTCP(r *stack.Route, id stack.TransportEndpointID, data buffer.Vectorise // TCP header, then the kernel calculate a checksum of the // header and data and get the right sum of the TCP packet. tcp.SetChecksum(xsum) - } else if r.Capabilities()&stack.CapabilityChecksumOffload == 0 { + } else if r.Capabilities()&stack.CapabilityTXChecksumOffload == 0 { xsum = header.ChecksumVV(data, xsum) tcp.SetChecksum(^tcp.CalculateChecksum(xsum)) } diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 41c87cc7e..b5d05af7d 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -1447,6 +1447,13 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, vv return } + if !s.csumValid { + e.stack.Stats().MalformedRcvdPackets.Increment() + e.stack.Stats().TCP.ChecksumErrors.Increment() + s.decRef() + return + } + e.stack.Stats().TCP.ValidSegmentsReceived.Increment() if (s.flags & header.TCPFlagRst) != 0 { e.stack.Stats().TCP.ResetsReceived.Increment() @@ -1721,7 +1728,7 @@ func (e *endpoint) initGSO() { panic(fmt.Sprintf("Unknown netProto: %v", e.netProto)) } gso.NeedsCsum = true - gso.CsumOffset = header.TCPChecksumOffset() + gso.CsumOffset = header.TCPChecksumOffset gso.MaxSize = e.route.GSOMaxSize() e.gso = gso } diff --git a/pkg/tcpip/transport/tcp/forwarder.go b/pkg/tcpip/transport/tcp/forwarder.go index 7a6589cfd..6a7efaf1d 100644 --- a/pkg/tcpip/transport/tcp/forwarder.go +++ b/pkg/tcpip/transport/tcp/forwarder.go @@ -68,7 +68,7 @@ func (f *Forwarder) HandlePacket(r *stack.Route, id stack.TransportEndpointID, n defer s.decRef() // We only care about well-formed SYN packets. - if !s.parse() || s.flags != header.TCPFlagSyn { + if !s.parse() || !s.csumValid || s.flags != header.TCPFlagSyn { return false } diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 230668b5d..b5fb160bc 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -130,7 +130,7 @@ func (*protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Transpo s := newSegment(r, id, vv) defer s.decRef() - if !s.parse() { + if !s.parse() || !s.csumValid { return false } diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go index df8402bf9..c603fe713 100644 --- a/pkg/tcpip/transport/tcp/segment.go +++ b/pkg/tcpip/transport/tcp/segment.go @@ -45,6 +45,10 @@ type segment struct { ackNumber seqnum.Value flags uint8 window seqnum.Size + // csum is only populated for received segments. + csum uint16 + // csumValid is true if the csum in the received segment is valid. + csumValid bool // parsedOptions stores the parsed values from the options in the segment. parsedOptions header.TCPOptions @@ -124,7 +128,13 @@ func (s *segment) logicalLen() seqnum.Size { // parse populates the sequence & ack numbers, flags, and window fields of the // segment from the TCP header stored in the data. It then updates the view to -// skip the data. Returns boolean indicating if the parsing was successful. +// skip the header. +// +// Returns boolean indicating if the parsing was successful. +// +// If checksum verification is not offloaded then parse also verifies the +// TCP checksum and stores the checksum and result of checksum verification in +// the csum and csumValid fields of the segment. func (s *segment) parse() bool { h := header.TCP(s.data.First()) @@ -145,12 +155,27 @@ func (s *segment) parse() bool { s.options = []byte(h[header.TCPMinimumSize:offset]) s.parsedOptions = header.ParseTCPOptions(s.options) - s.data.TrimFront(offset) + + // Query the link capabilities to decide if checksum validation is + // required. + verifyChecksum := true + if s.route.Capabilities()&stack.CapabilityRXChecksumOffload != 0 { + s.csumValid = true + verifyChecksum = false + s.data.TrimFront(offset) + } + if verifyChecksum { + s.csum = h.Checksum() + xsum := s.route.PseudoHeaderChecksum(ProtocolNumber, uint16(s.data.Size())) + xsum = h.CalculateChecksum(xsum) + s.data.TrimFront(offset) + xsum = header.ChecksumVV(s.data, xsum) + s.csumValid = xsum == 0xffff + } s.sequenceNumber = seqnum.Value(h.SequenceNumber()) s.ackNumber = seqnum.Value(h.AckNumber()) s.flags = h.Flags() s.window = seqnum.Size(h.WindowSize()) - return true } diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 7f2615ca9..af50ac8af 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -2963,8 +2963,7 @@ func TestReceivedInvalidSegmentCountIncrement(t *testing.T) { RcvWnd: 30000, }) tcpbuf := vv.First()[header.IPv4MinimumSize:] - // 12 is the TCP header data offset. - tcpbuf[12] = ((header.TCPMinimumSize - 1) / 4) << 4 + tcpbuf[header.TCPDataOffset] = ((header.TCPMinimumSize - 1) / 4) << 4 c.SendSegment(vv) @@ -2973,6 +2972,32 @@ func TestReceivedInvalidSegmentCountIncrement(t *testing.T) { } } +func TestReceivedIncorrectChecksumIncrement(t *testing.T) { + c := context.New(t, defaultMTU) + defer c.Cleanup() + c.CreateConnected(789, 30000, nil) + stats := c.Stack().Stats() + want := stats.TCP.ChecksumErrors.Value() + 1 + vv := c.BuildSegment([]byte{0x1, 0x2, 0x3}, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagAck, + SeqNum: seqnum.Value(790), + AckNum: c.IRS.Add(1), + RcvWnd: 30000, + }) + tcpbuf := vv.First()[header.IPv4MinimumSize:] + // Overwrite a byte in the payload which should cause checksum + // verification to fail. + tcpbuf[(tcpbuf[header.TCPDataOffset]>>4)*4] = 0x4 + + c.SendSegment(vv) + + if got := stats.TCP.ChecksumErrors.Value(); got != want { + t.Errorf("got stats.TCP.ChecksumErrors.Value() = %d, want = %d", got, want) + } +} + func TestReceivedSegmentQueuing(t *testing.T) { // This test sends 200 segments containing a few bytes each to an // endpoint and checks that they're all received and acknowledged by diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 19e532180..1f9251de3 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -640,7 +640,7 @@ func sendUDP(r *stack.Route, data buffer.VectorisedView, localPort, remotePort u }) // Only calculate the checksum if offloading isn't supported. - if r.Capabilities()&stack.CapabilityChecksumOffload == 0 { + if r.Capabilities()&stack.CapabilityTXChecksumOffload == 0 { xsum := r.PseudoHeaderChecksum(ProtocolNumber, length) for _, v := range data.Views() { xsum = header.Checksum(v, xsum) diff --git a/runsc/boot/network.go b/runsc/boot/network.go index 77291415b..3915a021f 100644 --- a/runsc/boot/network.go +++ b/runsc/boot/network.go @@ -142,6 +142,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct Address: mac, PacketDispatchMode: fdbased.PacketMMap, GSOMaxSize: link.GSOMaxSize, + RXChecksumOffload: true, }) log.Infof("Enabling interface %q with id %d on addresses %+v (%v)", link.Name, nicID, link.Addresses, mac) |