diff options
-rw-r--r-- | pkg/tcpip/transport/tcp/accept.go | 16 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/connect.go | 16 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/endpoint.go | 34 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/protocol.go | 23 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/rack.go | 3 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/snd.go | 5 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/tcp_test.go | 6 |
7 files changed, 50 insertions, 53 deletions
diff --git a/pkg/tcpip/transport/tcp/accept.go b/pkg/tcpip/transport/tcp/accept.go index 6c6ec095e..f8269efa6 100644 --- a/pkg/tcpip/transport/tcp/accept.go +++ b/pkg/tcpip/transport/tcp/accept.go @@ -602,10 +602,19 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err synOpts := header.TCPSynOptions{ WS: -1, TS: opts.TS, - TSVal: tcpTimeStamp(e.stack.Clock().NowMonotonic(), timeStampOffset(e.protocol.tsOffsetSecret, s.dstAddr, s.srcAddr)), TSEcr: opts.TSVal, MSS: calculateAdvertisedMSS(e.userMSS, route), } + if opts.TS { + // Create a barely-sufficient endpoint to calculate the TSVal. + pseudoEndpoint := endpoint{ + TCPEndpointStateInner: stack.TCPEndpointStateInner{ + TSOffset: e.protocol.tsOffset(s.dstAddr, s.srcAddr), + }, + stack: e.stack, + } + synOpts.TSVal = pseudoEndpoint.tsValNow() + } cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS)) fields := tcpFields{ id: s.id, @@ -727,10 +736,7 @@ func (e *endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err } n.isRegistered = true - - // Reset the tsOffset for the newly created endpoint to the one - // that we have used in SYN-ACK in order to calculate RTT. - n.TSOffset = timeStampOffset(e.protocol.tsOffsetSecret, s.dstAddr, s.srcAddr) + n.TSOffset = n.protocol.tsOffset(s.dstAddr, s.srcAddr) // Switch state to connected. n.isConnectNotified = true diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index ac9091d04..f331655fc 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -123,7 +123,7 @@ func (e *endpoint) newHandshake() *handshake { // Store reference to handshake state in endpoint. e.h = h // By the time handshake is created, e.ID is already initialized. - e.TSOffset = timeStampOffset(e.protocol.tsOffsetSecret, e.ID.LocalAddress, e.ID.RemoteAddress) + e.TSOffset = e.protocol.tsOffset(e.ID.LocalAddress, e.ID.RemoteAddress) return h } @@ -292,7 +292,7 @@ func (h *handshake) synSentState(s *segment) tcpip.Error { synOpts := header.TCPSynOptions{ WS: int(h.effectiveRcvWndScale()), TS: rcvSynOpts.TS, - TSVal: h.ep.timestamp(), + TSVal: h.ep.tsValNow(), TSEcr: h.ep.recentTimestamp(), // We only send SACKPermitted if the other side indicated it @@ -362,7 +362,7 @@ func (h *handshake) synRcvdState(s *segment) tcpip.Error { synOpts := header.TCPSynOptions{ WS: h.rcvWndScale, TS: h.ep.SendTSOk, - TSVal: h.ep.timestamp(), + TSVal: h.ep.tsValNow(), TSEcr: h.ep.recentTimestamp(), SACKPermitted: h.ep.SACKPermitted, MSS: h.ep.amss, @@ -490,7 +490,7 @@ func (h *handshake) start() { synOpts := header.TCPSynOptions{ WS: h.rcvWndScale, TS: true, - TSVal: h.ep.timestamp(), + TSVal: h.ep.tsValNow(), TSEcr: h.ep.recentTimestamp(), SACKPermitted: bool(sackEnabled), MSS: h.ep.amss, @@ -623,12 +623,14 @@ func (h *handshake) transitionToStateEstablishedLocked(s *segment) { // (indicated by a negative send window scale). h.ep.snd = newSender(h.ep, h.iss, h.ackNum-1, h.sndWnd, h.mss, h.sndWndScale) + now := h.ep.stack.Clock().NowMonotonic() + var rtt time.Duration if h.ep.SendTSOk && s.parsedOptions.TSEcr != 0 { - rtt = time.Duration(h.ep.timestamp()-s.parsedOptions.TSEcr) * time.Millisecond + rtt = h.ep.elapsed(now, s.parsedOptions.TSEcr) } if !h.sampleRTTWithTSOnly && rtt == 0 { - rtt = h.ep.stack.Clock().NowMonotonic().Sub(h.startTime) + rtt = now.Sub(h.startTime) } if rtt > 0 { @@ -919,7 +921,7 @@ func (e *endpoint) makeOptions(sackBlocks []header.SACKBlock) []byte { // Ref: https://tools.ietf.org/html/rfc7323#section-5.4. offset += header.EncodeNOP(options[offset:]) offset += header.EncodeNOP(options[offset:]) - offset += header.EncodeTSOption(e.timestamp(), e.recentTimestamp(), options[offset:]) + offset += header.EncodeTSOption(e.tsValNow(), e.recentTimestamp(), options[offset:]) } if e.SACKPermitted && len(sackBlocks) > 0 { offset += header.EncodeNOP(options[offset:]) diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 75cd9e90c..0623ee8ed 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2912,38 +2912,16 @@ func (e *endpoint) maybeEnableTimestamp(synOpts header.TCPSynOptions) { } } -// timestamp returns the timestamp value to be used in the TSVal field of the -// timestamp option for outgoing TCP segments for a given endpoint. -func (e *endpoint) timestamp() uint32 { - return tcpTimeStamp(e.stack.Clock().NowMonotonic(), e.TSOffset) +func (e *endpoint) tsVal(now tcpip.MonotonicTime) uint32 { + return uint32(now.Sub(tcpip.MonotonicTime{}).Milliseconds()) + e.TSOffset } -// tcpTimeStamp returns a timestamp offset by the provided offset. This is -// not inlined above as it's used when SYN cookies are in use and endpoint -// is not created at the time when the SYN cookie is sent. -func tcpTimeStamp(curTime tcpip.MonotonicTime, offset uint32) uint32 { - d := curTime.Sub(tcpip.MonotonicTime{}) - return uint32(d.Milliseconds()) + offset +func (e *endpoint) tsValNow() uint32 { + return e.tsVal(e.stack.Clock().NowMonotonic()) } -// timeStampOffset returns a randomized timestamp offset to be used when sending -// timestamp values in a timestamp option for a TCP segment. -func timeStampOffset(secret uint32, src, dst tcpip.Address) uint32 { - // Initialize a random tsOffset that will be added to the recentTS - // everytime the timestamp is sent when the Timestamp option is enabled. - // - // See https://tools.ietf.org/html/rfc7323#section-5.4 for details on - // why this is required. - // - // TODO(https://gvisor.dev/issues/6473): This is not really secure as - // it does not use the recommended algorithm linked above. - h := jenkins.Sum32(secret) - // Per hash.Hash.Writer: - // - // It never returns an error. - _, _ = h.Write([]byte(src)) - _, _ = h.Write([]byte(dst)) - return h.Sum32() +func (e *endpoint) elapsed(now tcpip.MonotonicTime, tsEcr uint32) time.Duration { + return time.Duration(e.tsVal(now)-tsEcr) * time.Millisecond } // maybeEnableSACKPermitted marks the SACKPermitted option enabled for this endpoint diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go index 00a083dbe..b0ffd2429 100644 --- a/pkg/tcpip/transport/tcp/protocol.go +++ b/pkg/tcpip/transport/tcp/protocol.go @@ -23,6 +23,7 @@ import ( "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/buffer" + "gvisor.dev/gvisor/pkg/tcpip/hash/jenkins" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/header/parse" "gvisor.dev/gvisor/pkg/tcpip/seqnum" @@ -49,10 +50,6 @@ const ( // MaxBufferSize is the largest size a receive/send buffer can grow to. MaxBufferSize = 4 << 20 // 4MB - // MaxUnprocessedSegments is the maximum number of unprocessed segments - // that can be queued for a given endpoint. - MaxUnprocessedSegments = 300 - // DefaultTCPLingerTimeout is the amount of time that sockets linger in // FIN_WAIT_2 state before being marked closed. DefaultTCPLingerTimeout = 60 * time.Second @@ -161,6 +158,24 @@ func (p *protocol) HandleUnknownDestinationPacket(id stack.TransportEndpointID, return stack.UnknownDestinationPacketHandled } +func (p *protocol) tsOffset(src, dst tcpip.Address) uint32 { + // Initialize a random tsOffset that will be added to the recentTS + // everytime the timestamp is sent when the Timestamp option is enabled. + // + // See https://tools.ietf.org/html/rfc7323#section-5.4 for details on + // why this is required. + // + // TODO(https://gvisor.dev/issues/6473): This is not really secure as + // it does not use the recommended algorithm linked above. + h := jenkins.Sum32(p.tsOffsetSecret) + // Per hash.Hash.Writer: + // + // It never returns an error. + _, _ = h.Write([]byte(src)) + _, _ = h.Write([]byte(dst)) + return h.Sum32() +} + // replyWithReset replies to the given segment with a reset segment. // // If the passed TTL is 0, then the route's default TTL will be used. diff --git a/pkg/tcpip/transport/tcp/rack.go b/pkg/tcpip/transport/tcp/rack.go index 0da4eafaa..3b055c294 100644 --- a/pkg/tcpip/transport/tcp/rack.go +++ b/pkg/tcpip/transport/tcp/rack.go @@ -80,7 +80,6 @@ func (rc *rackControl) init(snd *sender, iss seqnum.Value) { // See: https://tools.ietf.org/html/draft-ietf-tcpm-rack-09#section-6.2 func (rc *rackControl) update(seg *segment, ackSeg *segment) { rtt := rc.snd.ep.stack.Clock().NowMonotonic().Sub(seg.xmitTime) - tsOffset := rc.snd.ep.TSOffset // If the ACK is for a retransmitted packet, do not update if it is a // spurious inference which is determined by below checks: @@ -92,7 +91,7 @@ func (rc *rackControl) update(seg *segment, ackSeg *segment) { // step 2 if seg.xmitCount > 1 { if ackSeg.parsedOptions.TS && ackSeg.parsedOptions.TSEcr != 0 { - if ackSeg.parsedOptions.TSEcr < tcpTimeStamp(seg.xmitTime, tsOffset) { + if ackSeg.parsedOptions.TSEcr < rc.snd.ep.tsVal(seg.xmitTime) { return } } diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index a1f1c4e59..2fabf1594 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -1345,10 +1345,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { // some new data, i.e., only if it advances the left edge of // the send window. if s.ep.SendTSOk && rcvdSeg.parsedOptions.TSEcr != 0 { - // TSVal/Ecr values sent by Netstack are at a millisecond - // granularity. - elapsed := time.Duration(s.ep.timestamp()-rcvdSeg.parsedOptions.TSEcr) * time.Millisecond - s.updateRTO(elapsed) + s.updateRTO(s.ep.elapsed(s.ep.stack.Clock().NowMonotonic(), rcvdSeg.parsedOptions.TSEcr)) } if s.shouldSchedulePTO() { diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index 36bc638a8..90b74a2a7 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -4959,7 +4959,7 @@ func TestConnectAvoidsBoundPorts(t *testing.T) { t.Fatalf("got s.SetPortRange(%d, %d) = %s, want = nil", start, end, err) } for i := start; i <= end; i++ { - if makeEP(exhaustedNetwork).Bind(tcpip.FullAddress{Addr: address(t, exhaustedAddressType, isAny), Port: uint16(i)}); err != nil { + if err := makeEP(exhaustedNetwork).Bind(tcpip.FullAddress{Addr: address(t, exhaustedAddressType, isAny), Port: uint16(i)}); err != nil { t.Fatalf("Bind(%d) failed: %s", i, err) } } @@ -8033,7 +8033,7 @@ func TestHandshakeRTT(t *testing.T) { if err := c.EP.GetSockOpt(&info); err != nil { t.Fatalf("c.EP.GetSockOpt(&%T) = %s", info, err) } - if got := time.Duration(info.RTT).Round(tt.wantRTT); got != tt.wantRTT { + if got := info.RTT.Round(tt.wantRTT); got != tt.wantRTT { t.Fatalf("got info.RTT=%s, expect %s", got, tt.wantRTT) } if info.RTTVar != 0 && tt.wantRTT == 0 { @@ -8209,7 +8209,7 @@ func TestSendBufferTuning(t *testing.T) { if err := c.EP.GetSockOpt(&info); err != nil { t.Fatalf("GetSockOpt failed: %v", err) } - outSz = (int64(info.SndCwnd) * packetOverheadFactor * (maxPayload)) + outSz = int64(info.SndCwnd) * packetOverheadFactor * maxPayload } if newSz := c.EP.SocketOptions().GetSendBufferSize(); newSz != outSz { |