summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip
diff options
context:
space:
mode:
authorRahat Mahmood <rahat@google.com>2019-08-02 12:51:23 -0700
committergVisor bot <gvisor-bot@google.com>2019-08-02 12:52:40 -0700
commit2906dffcdbd4398361d5d7f718faa4f24fdc3b8e (patch)
treea4579e1fca9e9dec457329c7b4f4b96b8206bda3 /pkg/tcpip
parentaaaefdf9cadf033fa281b612315c3227f5ab1c7a (diff)
Automated rollback of changelist 261191548
PiperOrigin-RevId: 261373749
Diffstat (limited to 'pkg/tcpip')
-rw-r--r--pkg/tcpip/stack/stack.go30
-rw-r--r--pkg/tcpip/tcpip.go8
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go56
-rw-r--r--pkg/tcpip/transport/tcp/rcv.go27
-rw-r--r--pkg/tcpip/transport/tcp/sack_scoreboard.go4
-rw-r--r--pkg/tcpip/transport/tcp/snd.go5
6 files changed, 45 insertions, 85 deletions
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 7c31cf493..6156c3f46 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -208,10 +208,6 @@ type TCPSenderState struct {
// Cubic holds the state related to CUBIC congestion control.
Cubic TCPCubicState
-
- // MSS is the size of the largest segment that can be sent without
- // fragmentation.
- MSS int
}
// TCPSACKInfo holds TCP SACK related information for a given TCP endpoint.
@@ -224,9 +220,6 @@ type TCPSACKInfo struct {
// from the peer endpoint.
ReceivedBlocks []header.SACKBlock
- // Sacked is the current number of bytes held in the SACK scoreboard.
- Sacked seqnum.Size
-
// MaxSACKED is the highest sequence number that has been SACKED
// by the peer.
MaxSACKED seqnum.Value
@@ -276,14 +269,6 @@ type TCPEndpointState struct {
// ID is a copy of the TransportEndpointID for the endpoint.
ID TCPEndpointID
- // ProtocolState denotes the TCP state the endpoint is currently
- // in, encoded in a netstack-specific manner. Should be translated
- // to the Linux ABI before exposing to userspace.
- ProtocolState uint32
-
- // AMSS is the MSS advertised to the peer by this endpoint.
- AMSS uint16
-
// SegTime denotes the absolute time when this segment was received.
SegTime time.Time
@@ -301,18 +286,6 @@ type TCPEndpointState struct {
// RcvClosed if true, indicates the endpoint has been closed for reading.
RcvClosed bool
- // RcvLastAck is the time of receipt of the last packet with the
- // ACK flag set.
- RcvLastAckNanos int64
-
- // RcvLastData is the time of reciept of the last packet
- // containing data.
- RcvLastDataNanos int64
-
- // RcvMSS is the size of the largest segment the receiver is willing to
- // accept, not including TCP headers and options.
- RcvMSS int
-
// SendTSOk is used to indicate when the TS Option has been negotiated.
// When sendTSOk is true every non-RST segment should carry a TS as per
// RFC7323#section-1.1.
@@ -353,9 +326,6 @@ type TCPEndpointState struct {
// SndMTU is the smallest MTU seen in the control packets received.
SndMTU int
- // MaxOptionSize is the maximum size of TCP options.
- MaxOptionSize int
-
// Receiver holds variables related to the TCP receiver for the endpoint.
Receiver TCPReceiverState
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 690c00edb..4208c0303 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -476,6 +476,14 @@ type QuickAckOption int
// Only supported on Unix sockets.
type PasscredOption int
+// TCPInfoOption is used by GetSockOpt to expose TCP statistics.
+//
+// TODO(b/64800844): Add and populate stat fields.
+type TCPInfoOption struct {
+ RTT time.Duration
+ RTTVar time.Duration
+}
+
// KeepaliveEnabledOption is used by SetSockOpt/GetSockOpt to specify whether
// TCP keepalive is enabled for this socket.
type KeepaliveEnabledOption int
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index e94307bd5..cc49c8272 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -108,9 +108,6 @@ func (s EndpointState) String() string {
}
}
-// InfoOption is used by GetSockOpt to expose TCP endpoint state.
-type InfoOption stack.TCPEndpointState
-
// Reasons for notifying the protocol goroutine.
const (
notifyNonZeroReceiveWindow = 1 << iota
@@ -205,14 +202,12 @@ type endpoint struct {
// to indicate to users that no more data is coming.
//
// rcvListMu can be taken after the endpoint mu below.
- rcvListMu sync.Mutex `state:"nosave"`
- rcvList segmentList `state:"wait"`
- rcvClosed bool
- rcvBufSize int
- rcvBufUsed int
- rcvAutoParams rcvBufAutoTuneParams
- rcvLastAckNanos int64 // timestamp
- rcvLastDataNanos int64 // timestamp
+ rcvListMu sync.Mutex `state:"nosave"`
+ rcvList segmentList `state:"wait"`
+ rcvClosed bool
+ rcvBufSize int
+ rcvBufUsed int
+ rcvAutoParams rcvBufAutoTuneParams
// zeroWindow indicates that the window was closed due to receive buffer
// space being filled up. This is set by the worker goroutine before
// moving a segment to the rcvList. This setting is cleared by the
@@ -1203,10 +1198,17 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error {
}
return nil
- case *InfoOption:
- e.workMu.Lock()
- *o = InfoOption(e.completeState())
- e.workMu.Unlock()
+ case *tcpip.TCPInfoOption:
+ *o = tcpip.TCPInfoOption{}
+ e.mu.RLock()
+ snd := e.snd
+ e.mu.RUnlock()
+ if snd != nil {
+ snd.rtt.Lock()
+ o.RTT = snd.rtt.srtt
+ o.RTTVar = snd.rtt.rttvar
+ snd.rtt.Unlock()
+ }
return nil
case *tcpip.KeepaliveEnabledOption:
@@ -1931,27 +1933,22 @@ func (e *endpoint) maxOptionSize() (size int) {
}
// completeState makes a full copy of the endpoint and returns it. This is used
-// before invoking the probe and for getsockopt(TCP_INFO). The state returned
-// may not be fully consistent if there are intervening syscalls when the state
-// is being copied.
+// before invoking the probe. The state returned may not be fully consistent if
+// there are intervening syscalls when the state is being copied.
func (e *endpoint) completeState() stack.TCPEndpointState {
var s stack.TCPEndpointState
s.SegTime = time.Now()
- e.mu.RLock()
+ // Copy EndpointID.
+ e.mu.Lock()
s.ID = stack.TCPEndpointID(e.id)
- s.ProtocolState = uint32(e.state)
- s.AMSS = e.amss
- s.RcvMSS = int(e.amss) - e.maxOptionSize()
- e.mu.RUnlock()
+ e.mu.Unlock()
// Copy endpoint rcv state.
e.rcvListMu.Lock()
s.RcvBufSize = e.rcvBufSize
s.RcvBufUsed = e.rcvBufUsed
s.RcvClosed = e.rcvClosed
- s.RcvLastAckNanos = e.rcvLastAckNanos
- s.RcvLastDataNanos = e.rcvLastDataNanos
s.RcvAutoParams.MeasureTime = e.rcvAutoParams.measureTime
s.RcvAutoParams.CopiedBytes = e.rcvAutoParams.copied
s.RcvAutoParams.PrevCopiedBytes = e.rcvAutoParams.prevCopied
@@ -1959,7 +1956,6 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
s.RcvAutoParams.RTTMeasureSeqNumber = e.rcvAutoParams.rttMeasureSeqNumber
s.RcvAutoParams.RTTMeasureTime = e.rcvAutoParams.rttMeasureTime
s.RcvAutoParams.Disabled = e.rcvAutoParams.disabled
-
e.rcvListMu.Unlock()
// Endpoint TCP Option state.
@@ -1969,7 +1965,7 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
s.SACKPermitted = e.sackPermitted
s.SACK.Blocks = make([]header.SACKBlock, e.sack.NumBlocks)
copy(s.SACK.Blocks, e.sack.Blocks[:e.sack.NumBlocks])
- s.SACK.ReceivedBlocks, s.SACK.Sacked, s.SACK.MaxSACKED = e.scoreboard.Copy()
+ s.SACK.ReceivedBlocks, s.SACK.MaxSACKED = e.scoreboard.Copy()
// Copy endpoint send state.
e.sndBufMu.Lock()
@@ -2013,14 +2009,12 @@ func (e *endpoint) completeState() stack.TCPEndpointState {
RTTMeasureTime: e.snd.rttMeasureTime,
Closed: e.snd.closed,
RTO: e.snd.rto,
- MSS: e.snd.mss,
MaxPayloadSize: e.snd.maxPayloadSize,
SndWndScale: e.snd.sndWndScale,
MaxSentAck: e.snd.maxSentAck,
}
e.snd.rtt.Lock()
s.Sender.SRTT = e.snd.rtt.srtt
- s.Sender.RTTVar = e.snd.rtt.rttvar
s.Sender.SRTTInited = e.snd.rtt.srttInited
e.snd.rtt.Unlock()
@@ -2065,8 +2059,8 @@ func (e *endpoint) initGSO() {
// State implements tcpip.Endpoint.State. It exports the endpoint's protocol
// state for diagnostics.
func (e *endpoint) State() uint32 {
- e.mu.RLock()
- defer e.mu.RUnlock()
+ e.mu.Lock()
+ defer e.mu.Unlock()
return uint32(e.state)
}
diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index a8f490c4a..e90f9a7d9 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -220,24 +220,25 @@ func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum
return true
}
-// updateRTTLocked updates the receiver RTT measurement based on the sequence
-// number of the received segment.
-//
-// Precondition: Caller must hold r.ep.rcvListMu.
-func (r *receiver) updateRTTLocked() {
+// updateRTT updates the receiver RTT measurement based on the sequence number
+// of the received segment.
+func (r *receiver) updateRTT() {
// From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf
//
// A system that is only transmitting acknowledgements can still
// estimate the round-trip time by observing the time between when a byte
// is first acknowledged and the receipt of data that is at least one
// window beyond the sequence number that was acknowledged.
+ r.ep.rcvListMu.Lock()
if r.ep.rcvAutoParams.rttMeasureTime.IsZero() {
// New measurement.
r.ep.rcvAutoParams.rttMeasureTime = time.Now()
r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
+ r.ep.rcvListMu.Unlock()
return
}
if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) {
+ r.ep.rcvListMu.Unlock()
return
}
rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime)
@@ -249,6 +250,7 @@ func (r *receiver) updateRTTLocked() {
}
r.ep.rcvAutoParams.rttMeasureTime = time.Now()
r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
+ r.ep.rcvListMu.Unlock()
}
// handleRcvdSegment handles TCP segments directed at the connection managed by
@@ -289,20 +291,11 @@ func (r *receiver) handleRcvdSegment(s *segment) {
return
}
- r.ep.rcvListMu.Lock()
- // FIXME(b/137581805): Using the runtime clock here is incorrect as it
- // doesn't account for potentially virtualized time.
- now := time.Now().UnixNano()
- if s.flagIsSet(header.TCPFlagAck) {
- r.ep.rcvLastAckNanos = now
- }
+ // Since we consumed a segment update the receiver's RTT estimate
+ // if required.
if segLen > 0 {
- // Since we consumed a segment update the receiver's RTT estimate if
- // required.
- r.ep.rcvLastDataNanos = now
- r.updateRTTLocked()
+ r.updateRTT()
}
- r.ep.rcvListMu.Unlock()
// By consuming the current segment, we may have filled a gap in the
// sequence number domain that allows pending segments to be consumed
diff --git a/pkg/tcpip/transport/tcp/sack_scoreboard.go b/pkg/tcpip/transport/tcp/sack_scoreboard.go
index 02e52a63b..7ef2df377 100644
--- a/pkg/tcpip/transport/tcp/sack_scoreboard.go
+++ b/pkg/tcpip/transport/tcp/sack_scoreboard.go
@@ -208,12 +208,12 @@ func (s *SACKScoreboard) Delete(seq seqnum.Value) {
}
// Copy provides a copy of the SACK scoreboard.
-func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, sacked seqnum.Size, maxSACKED seqnum.Value) {
+func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, maxSACKED seqnum.Value) {
s.ranges.Ascend(func(i btree.Item) bool {
sackBlocks = append(sackBlocks, i.(header.SACKBlock))
return true
})
- return sackBlocks, s.sacked, s.maxSACKED
+ return sackBlocks, s.maxSACKED
}
// IsRangeLost implements the IsLost(SeqNum) operation defined in RFC 6675
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index daf28a49a..0fee7ab72 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -124,10 +124,6 @@ type sender struct {
rtt rtt
rto time.Duration
- // mss is the largest segment that can be sent without fragmentation.
- // Initialized when then sender is created, read-only afterwards.
- mss int
-
// maxPayloadSize is the maximum size of the payload of a given segment.
// It is initialized on demand.
maxPayloadSize int
@@ -205,7 +201,6 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint
rto: 1 * time.Second,
rttMeasureSeqNum: iss + 1,
lastSendTime: time.Now(),
- mss: int(mss),
maxPayloadSize: maxPayloadSize,
maxSentAck: irs + 1,
fr: fastRecovery{