From 79511e8a50facd509b8180d0160762b510dd6196 Mon Sep 17 00:00:00 2001 From: Rahat Mahmood Date: Thu, 1 Aug 2019 13:57:41 -0700 Subject: Implement getsockopt(TCP_INFO). Export some readily-available fields for TCP_INFO and stub out the rest. PiperOrigin-RevId: 261191548 --- pkg/tcpip/transport/tcp/endpoint.go | 56 +++++++++++++++++------------- pkg/tcpip/transport/tcp/rcv.go | 27 ++++++++------ pkg/tcpip/transport/tcp/sack_scoreboard.go | 4 +-- pkg/tcpip/transport/tcp/snd.go | 5 +++ 4 files changed, 55 insertions(+), 37 deletions(-) (limited to 'pkg/tcpip/transport') diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index cc49c8272..e94307bd5 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -108,6 +108,9 @@ func (s EndpointState) String() string { } } +// InfoOption is used by GetSockOpt to expose TCP endpoint state. +type InfoOption stack.TCPEndpointState + // Reasons for notifying the protocol goroutine. const ( notifyNonZeroReceiveWindow = 1 << iota @@ -202,12 +205,14 @@ type endpoint struct { // to indicate to users that no more data is coming. // // rcvListMu can be taken after the endpoint mu below. - rcvListMu sync.Mutex `state:"nosave"` - rcvList segmentList `state:"wait"` - rcvClosed bool - rcvBufSize int - rcvBufUsed int - rcvAutoParams rcvBufAutoTuneParams + rcvListMu sync.Mutex `state:"nosave"` + rcvList segmentList `state:"wait"` + rcvClosed bool + rcvBufSize int + rcvBufUsed int + rcvAutoParams rcvBufAutoTuneParams + rcvLastAckNanos int64 // timestamp + rcvLastDataNanos int64 // timestamp // zeroWindow indicates that the window was closed due to receive buffer // space being filled up. This is set by the worker goroutine before // moving a segment to the rcvList. This setting is cleared by the @@ -1198,17 +1203,10 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { } return nil - case *tcpip.TCPInfoOption: - *o = tcpip.TCPInfoOption{} - e.mu.RLock() - snd := e.snd - e.mu.RUnlock() - if snd != nil { - snd.rtt.Lock() - o.RTT = snd.rtt.srtt - o.RTTVar = snd.rtt.rttvar - snd.rtt.Unlock() - } + case *InfoOption: + e.workMu.Lock() + *o = InfoOption(e.completeState()) + e.workMu.Unlock() return nil case *tcpip.KeepaliveEnabledOption: @@ -1933,22 +1931,27 @@ func (e *endpoint) maxOptionSize() (size int) { } // completeState makes a full copy of the endpoint and returns it. This is used -// before invoking the probe. The state returned may not be fully consistent if -// there are intervening syscalls when the state is being copied. +// before invoking the probe and for getsockopt(TCP_INFO). The state returned +// may not be fully consistent if there are intervening syscalls when the state +// is being copied. func (e *endpoint) completeState() stack.TCPEndpointState { var s stack.TCPEndpointState s.SegTime = time.Now() - // Copy EndpointID. - e.mu.Lock() + e.mu.RLock() s.ID = stack.TCPEndpointID(e.id) - e.mu.Unlock() + s.ProtocolState = uint32(e.state) + s.AMSS = e.amss + s.RcvMSS = int(e.amss) - e.maxOptionSize() + e.mu.RUnlock() // Copy endpoint rcv state. e.rcvListMu.Lock() s.RcvBufSize = e.rcvBufSize s.RcvBufUsed = e.rcvBufUsed s.RcvClosed = e.rcvClosed + s.RcvLastAckNanos = e.rcvLastAckNanos + s.RcvLastDataNanos = e.rcvLastDataNanos s.RcvAutoParams.MeasureTime = e.rcvAutoParams.measureTime s.RcvAutoParams.CopiedBytes = e.rcvAutoParams.copied s.RcvAutoParams.PrevCopiedBytes = e.rcvAutoParams.prevCopied @@ -1956,6 +1959,7 @@ func (e *endpoint) completeState() stack.TCPEndpointState { s.RcvAutoParams.RTTMeasureSeqNumber = e.rcvAutoParams.rttMeasureSeqNumber s.RcvAutoParams.RTTMeasureTime = e.rcvAutoParams.rttMeasureTime s.RcvAutoParams.Disabled = e.rcvAutoParams.disabled + e.rcvListMu.Unlock() // Endpoint TCP Option state. @@ -1965,7 +1969,7 @@ func (e *endpoint) completeState() stack.TCPEndpointState { s.SACKPermitted = e.sackPermitted s.SACK.Blocks = make([]header.SACKBlock, e.sack.NumBlocks) copy(s.SACK.Blocks, e.sack.Blocks[:e.sack.NumBlocks]) - s.SACK.ReceivedBlocks, s.SACK.MaxSACKED = e.scoreboard.Copy() + s.SACK.ReceivedBlocks, s.SACK.Sacked, s.SACK.MaxSACKED = e.scoreboard.Copy() // Copy endpoint send state. e.sndBufMu.Lock() @@ -2009,12 +2013,14 @@ func (e *endpoint) completeState() stack.TCPEndpointState { RTTMeasureTime: e.snd.rttMeasureTime, Closed: e.snd.closed, RTO: e.snd.rto, + MSS: e.snd.mss, MaxPayloadSize: e.snd.maxPayloadSize, SndWndScale: e.snd.sndWndScale, MaxSentAck: e.snd.maxSentAck, } e.snd.rtt.Lock() s.Sender.SRTT = e.snd.rtt.srtt + s.Sender.RTTVar = e.snd.rtt.rttvar s.Sender.SRTTInited = e.snd.rtt.srttInited e.snd.rtt.Unlock() @@ -2059,8 +2065,8 @@ func (e *endpoint) initGSO() { // State implements tcpip.Endpoint.State. It exports the endpoint's protocol // state for diagnostics. func (e *endpoint) State() uint32 { - e.mu.Lock() - defer e.mu.Unlock() + e.mu.RLock() + defer e.mu.RUnlock() return uint32(e.state) } diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go index e90f9a7d9..a8f490c4a 100644 --- a/pkg/tcpip/transport/tcp/rcv.go +++ b/pkg/tcpip/transport/tcp/rcv.go @@ -220,25 +220,24 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum return true } -// updateRTT updates the receiver RTT measurement based on the sequence number -// of the received segment. -func (r *receiver) updateRTT() { +// updateRTTLocked updates the receiver RTT measurement based on the sequence +// number of the received segment. +// +// Precondition: Caller must hold r.ep.rcvListMu. +func (r *receiver) updateRTTLocked() { // From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf // // A system that is only transmitting acknowledgements can still // estimate the round-trip time by observing the time between when a byte // is first acknowledged and the receipt of data that is at least one // window beyond the sequence number that was acknowledged. - r.ep.rcvListMu.Lock() if r.ep.rcvAutoParams.rttMeasureTime.IsZero() { // New measurement. r.ep.rcvAutoParams.rttMeasureTime = time.Now() r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) - r.ep.rcvListMu.Unlock() return } if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) { - r.ep.rcvListMu.Unlock() return } rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime) @@ -250,7 +249,6 @@ func (r *receiver) updateRTT() { } r.ep.rcvAutoParams.rttMeasureTime = time.Now() r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd) - r.ep.rcvListMu.Unlock() } // handleRcvdSegment handles TCP segments directed at the connection managed by @@ -291,11 +289,20 @@ func (r *receiver) handleRcvdSegment(s *segment) { return } - // Since we consumed a segment update the receiver's RTT estimate - // if required. + r.ep.rcvListMu.Lock() + // FIXME(b/137581805): Using the runtime clock here is incorrect as it + // doesn't account for potentially virtualized time. + now := time.Now().UnixNano() + if s.flagIsSet(header.TCPFlagAck) { + r.ep.rcvLastAckNanos = now + } if segLen > 0 { - r.updateRTT() + // Since we consumed a segment update the receiver's RTT estimate if + // required. + r.ep.rcvLastDataNanos = now + r.updateRTTLocked() } + r.ep.rcvListMu.Unlock() // By consuming the current segment, we may have filled a gap in the // sequence number domain that allows pending segments to be consumed diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard.go b/pkg/tcpip/transport/tcp/sack_scoreboard.go index 7ef2df377..02e52a63b 100644 --- a/pkg/tcpip/transport/tcp/sack_scoreboard.go +++ b/pkg/tcpip/transport/tcp/sack_scoreboard.go @@ -208,12 +208,12 @@ func (s *SACKScoreboard) Delete(seq seqnum.Value) { } // Copy provides a copy of the SACK scoreboard. -func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, maxSACKED seqnum.Value) { +func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, sacked seqnum.Size, maxSACKED seqnum.Value) { s.ranges.Ascend(func(i btree.Item) bool { sackBlocks = append(sackBlocks, i.(header.SACKBlock)) return true }) - return sackBlocks, s.maxSACKED + return sackBlocks, s.sacked, s.maxSACKED } // IsRangeLost implements the IsLost(SeqNum) operation defined in RFC 6675 diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 0fee7ab72..daf28a49a 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -124,6 +124,10 @@ type sender struct { rtt rtt rto time.Duration + // mss is the largest segment that can be sent without fragmentation. + // Initialized when then sender is created, read-only afterwards. + mss int + // maxPayloadSize is the maximum size of the payload of a given segment. // It is initialized on demand. maxPayloadSize int @@ -201,6 +205,7 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint rto: 1 * time.Second, rttMeasureSeqNum: iss + 1, lastSendTime: time.Now(), + mss: int(mss), maxPayloadSize: maxPayloadSize, maxSentAck: irs + 1, fr: fastRecovery{ -- cgit v1.2.3