diff options
author | Nick Brown <nickbrow@google.com> | 2021-04-19 16:41:56 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-04-19 16:43:30 -0700 |
commit | 7bfc76d946b6c3f02fc32831ddc282ac2816d5ed (patch) | |
tree | 606ee32bcf8b932d9675d18187cfeced3873105a /pkg/tcpip/stack | |
parent | 276ff149a4555b69c4c99fdcd4e1a22ccc8b9463 (diff) |
De-duplicate TCP state in TCPEndpointState vs tcp.endpoint
This change replaces individual private members in tcp.endpoint with a single
private TCPEndpointState member.
Some internal substructures within endpoint (receiver, sender) have been broken
into a public substructure (which is then copied into the TCPEndpointState
returned from completeState()) alongside other private fields.
Fixes #4466
PiperOrigin-RevId: 369329514
Diffstat (limited to 'pkg/tcpip/stack')
-rw-r--r-- | pkg/tcpip/stack/BUILD | 1 | ||||
-rw-r--r-- | pkg/tcpip/stack/stack.go | 301 | ||||
-rw-r--r-- | pkg/tcpip/stack/stack_global_state.go | 72 | ||||
-rw-r--r-- | pkg/tcpip/stack/tcp.go | 455 |
4 files changed, 528 insertions, 301 deletions
diff --git a/pkg/tcpip/stack/BUILD b/pkg/tcpip/stack/BUILD index d19b879dc..2bd6a67f5 100644 --- a/pkg/tcpip/stack/BUILD +++ b/pkg/tcpip/stack/BUILD @@ -67,6 +67,7 @@ go_library( "stack.go", "stack_global_state.go", "stack_options.go", + "tcp.go", "transport_demuxer.go", "tuple_list.go", ], diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index f23112410..8c8909acd 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -35,7 +35,6 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/buffer" "gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/ports" - "gvisor.dev/gvisor/pkg/tcpip/seqnum" "gvisor.dev/gvisor/pkg/waiter" ) @@ -56,306 +55,6 @@ type transportProtocolState struct { defaultHandler func(id TransportEndpointID, pkt *PacketBuffer) bool } -// TCPProbeFunc is the expected function type for a TCP probe function to be -// passed to stack.AddTCPProbe. -type TCPProbeFunc func(s TCPEndpointState) - -// TCPCubicState is used to hold a copy of the internal cubic state when the -// TCPProbeFunc is invoked. -type TCPCubicState struct { - WLastMax float64 - WMax float64 - T time.Time - TimeSinceLastCongestion time.Duration - C float64 - K float64 - Beta float64 - WC float64 - WEst float64 -} - -// TCPRACKState is used to hold a copy of the internal RACK state when the -// TCPProbeFunc is invoked. -type TCPRACKState struct { - XmitTime time.Time - EndSequence seqnum.Value - FACK seqnum.Value - RTT time.Duration - Reord bool - DSACKSeen bool - ReoWnd time.Duration - ReoWndIncr uint8 - ReoWndPersist int8 - RTTSeq seqnum.Value -} - -// TCPEndpointID is the unique 4 tuple that identifies a given endpoint. -type TCPEndpointID struct { - // LocalPort is the local port associated with the endpoint. - LocalPort uint16 - - // LocalAddress is the local [network layer] address associated with - // the endpoint. - LocalAddress tcpip.Address - - // RemotePort is the remote port associated with the endpoint. - RemotePort uint16 - - // RemoteAddress it the remote [network layer] address associated with - // the endpoint. - RemoteAddress tcpip.Address -} - -// TCPFastRecoveryState holds a copy of the internal fast recovery state of a -// TCP endpoint. -type TCPFastRecoveryState struct { - // Active if true indicates the endpoint is in fast recovery. - Active bool - - // First is the first unacknowledged sequence number being recovered. - First seqnum.Value - - // Last is the 'recover' sequence number that indicates the point at - // which we should exit recovery barring any timeouts etc. - Last seqnum.Value - - // MaxCwnd is the maximum value we are permitted to grow the congestion - // window during recovery. This is set at the time we enter recovery. - MaxCwnd int - - // HighRxt is the highest sequence number which has been retransmitted - // during the current loss recovery phase. - // See: RFC 6675 Section 2 for details. - HighRxt seqnum.Value - - // RescueRxt is the highest sequence number which has been - // optimistically retransmitted to prevent stalling of the ACK clock - // when there is loss at the end of the window and no new data is - // available for transmission. - // See: RFC 6675 Section 2 for details. - RescueRxt seqnum.Value -} - -// TCPReceiverState holds a copy of the internal state of the receiver for -// a given TCP endpoint. -type TCPReceiverState struct { - // RcvNxt is the TCP variable RCV.NXT. - RcvNxt seqnum.Value - - // RcvAcc is the TCP variable RCV.ACC. - RcvAcc seqnum.Value - - // RcvWndScale is the window scaling to use for inbound segments. - RcvWndScale uint8 - - // PendingBufUsed is the number of bytes pending in the receive - // queue. - PendingBufUsed int -} - -// TCPSenderState holds a copy of the internal state of the sender for -// a given TCP Endpoint. -type TCPSenderState struct { - // LastSendTime is the time at which we sent the last segment. - LastSendTime time.Time - - // DupAckCount is the number of Duplicate ACK's received. - DupAckCount int - - // SndCwnd is the size of the sending congestion window in packets. - SndCwnd int - - // Ssthresh is the slow start threshold in packets. - Ssthresh int - - // SndCAAckCount is the number of packets consumed in congestion - // avoidance mode. - SndCAAckCount int - - // Outstanding is the number of packets in flight. - Outstanding int - - // SackedOut is the number of packets which have been selectively acked. - SackedOut int - - // SndWnd is the send window size in bytes. - SndWnd seqnum.Size - - // SndUna is the next unacknowledged sequence number. - SndUna seqnum.Value - - // SndNxt is the sequence number of the next segment to be sent. - SndNxt seqnum.Value - - // RTTMeasureSeqNum is the sequence number being used for the latest RTT - // measurement. - RTTMeasureSeqNum seqnum.Value - - // RTTMeasureTime is the time when the RTTMeasureSeqNum was sent. - RTTMeasureTime time.Time - - // Closed indicates that the caller has closed the endpoint for sending. - Closed bool - - // SRTT is the smoothed round-trip time as defined in section 2 of - // RFC 6298. - SRTT time.Duration - - // RTO is the retransmit timeout as defined in section of 2 of RFC 6298. - RTO time.Duration - - // RTTVar is the round-trip time variation as defined in section 2 of - // RFC 6298. - RTTVar time.Duration - - // SRTTInited if true indicates take a valid RTT measurement has been - // completed. - SRTTInited bool - - // MaxPayloadSize is the maximum size of the payload of a given segment. - // It is initialized on demand. - MaxPayloadSize int - - // SndWndScale is the number of bits to shift left when reading the send - // window size from a segment. - SndWndScale uint8 - - // MaxSentAck is the highest acknowledgement number sent till now. - MaxSentAck seqnum.Value - - // FastRecovery holds the fast recovery state for the endpoint. - FastRecovery TCPFastRecoveryState - - // Cubic holds the state related to CUBIC congestion control. - Cubic TCPCubicState - - // RACKState holds the state related to RACK loss detection algorithm. - RACKState TCPRACKState -} - -// TCPSACKInfo holds TCP SACK related information for a given TCP endpoint. -type TCPSACKInfo struct { - // Blocks is the list of SACK Blocks that identify the out of order segments - // held by a given TCP endpoint. - Blocks []header.SACKBlock - - // ReceivedBlocks are the SACK blocks received by this endpoint - // from the peer endpoint. - ReceivedBlocks []header.SACKBlock - - // MaxSACKED is the highest sequence number that has been SACKED - // by the peer. - MaxSACKED seqnum.Value -} - -// RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning. -type RcvBufAutoTuneParams struct { - // MeasureTime is the time at which the current measurement - // was started. - MeasureTime time.Time - - // CopiedBytes is the number of bytes copied to user space since - // this measure began. - CopiedBytes int - - // PrevCopiedBytes is the number of bytes copied to userspace in - // the previous RTT period. - PrevCopiedBytes int - - // RcvBufSize is the auto tuned receive buffer size. - RcvBufSize int - - // RTT is the smoothed RTT as measured by observing the time between - // when a byte is first acknowledged and the receipt of data that is at - // least one window beyond the sequence number that was acknowledged. - RTT time.Duration - - // RTTVar is the "round-trip time variation" as defined in section 2 - // of RFC6298. - RTTVar time.Duration - - // RTTMeasureSeqNumber is the highest acceptable sequence number at the - // time this RTT measurement period began. - RTTMeasureSeqNumber seqnum.Value - - // RTTMeasureTime is the absolute time at which the current RTT - // measurement period began. - RTTMeasureTime time.Time - - // Disabled is true if an explicit receive buffer is set for the - // endpoint. - Disabled bool -} - -// TCPEndpointState is a copy of the internal state of a TCP endpoint. -type TCPEndpointState struct { - // ID is a copy of the TransportEndpointID for the endpoint. - ID TCPEndpointID - - // SegTime denotes the absolute time when this segment was received. - SegTime time.Time - - // RcvBufSize is the size of the receive socket buffer for the endpoint. - RcvBufSize int - - // RcvBufUsed is the amount of bytes actually held in the receive socket - // buffer for the endpoint. - RcvBufUsed int - - // RcvBufAutoTuneParams is used to hold state variables to compute - // the auto tuned receive buffer size. - RcvAutoParams RcvBufAutoTuneParams - - // RcvClosed if true, indicates the endpoint has been closed for reading. - RcvClosed bool - - // SendTSOk is used to indicate when the TS Option has been negotiated. - // When sendTSOk is true every non-RST segment should carry a TS as per - // RFC7323#section-1.1. - SendTSOk bool - - // RecentTS is the timestamp that should be sent in the TSEcr field of - // the timestamp for future segments sent by the endpoint. This field is - // updated if required when a new segment is received by this endpoint. - RecentTS uint32 - - // TSOffset is a randomized offset added to the value of the TSVal field - // in the timestamp option. - TSOffset uint32 - - // SACKPermitted is set to true if the peer sends the TCPSACKPermitted - // option in the SYN/SYN-ACK. - SACKPermitted bool - - // SACK holds TCP SACK related information for this endpoint. - SACK TCPSACKInfo - - // SndBufSize is the size of the socket send buffer. - SndBufSize int - - // SndBufUsed is the number of bytes held in the socket send buffer. - SndBufUsed int - - // SndClosed indicates that the endpoint has been closed for sends. - SndClosed bool - - // SndBufInQueue is the number of bytes in the send queue. - SndBufInQueue seqnum.Size - - // PacketTooBigCount is used to notify the main protocol routine how - // many times a "packet too big" control packet is received. - PacketTooBigCount int - - // SndMTU is the smallest MTU seen in the control packets received. - SndMTU int - - // Receiver holds variables related to the TCP receiver for the endpoint. - Receiver TCPReceiverState - - // Sender holds state related to the TCP Sender for the endpoint. - Sender TCPSenderState -} - // ResumableEndpoint is an endpoint that needs to be resumed after restore. type ResumableEndpoint interface { // Resume resumes an endpoint after restore. This can be used to restart diff --git a/pkg/tcpip/stack/stack_global_state.go b/pkg/tcpip/stack/stack_global_state.go index dfec4258a..33824afd0 100644 --- a/pkg/tcpip/stack/stack_global_state.go +++ b/pkg/tcpip/stack/stack_global_state.go @@ -14,6 +14,78 @@ package stack +import "time" + // StackFromEnv is the global stack created in restore run. // FIXME(b/36201077) var StackFromEnv *Stack + +// saveT is invoked by stateify. +func (t *TCPCubicState) saveT() unixTime { + return unixTime{t.T.Unix(), t.T.UnixNano()} +} + +// loadT is invoked by stateify. +func (t *TCPCubicState) loadT(unix unixTime) { + t.T = time.Unix(unix.second, unix.nano) +} + +// saveXmitTime is invoked by stateify. +func (t *TCPRACKState) saveXmitTime() unixTime { + return unixTime{t.XmitTime.Unix(), t.XmitTime.UnixNano()} +} + +// loadXmitTime is invoked by stateify. +func (t *TCPRACKState) loadXmitTime(unix unixTime) { + t.XmitTime = time.Unix(unix.second, unix.nano) +} + +// saveLastSendTime is invoked by stateify. +func (t *TCPSenderState) saveLastSendTime() unixTime { + return unixTime{t.LastSendTime.Unix(), t.LastSendTime.UnixNano()} +} + +// loadLastSendTime is invoked by stateify. +func (t *TCPSenderState) loadLastSendTime(unix unixTime) { + t.LastSendTime = time.Unix(unix.second, unix.nano) +} + +// saveRTTMeasureTime is invoked by stateify. +func (t *TCPSenderState) saveRTTMeasureTime() unixTime { + return unixTime{t.RTTMeasureTime.Unix(), t.RTTMeasureTime.UnixNano()} +} + +// loadRTTMeasureTime is invoked by stateify. +func (t *TCPSenderState) loadRTTMeasureTime(unix unixTime) { + t.RTTMeasureTime = time.Unix(unix.second, unix.nano) +} + +// saveMeasureTime is invoked by stateify. +func (r *RcvBufAutoTuneParams) saveMeasureTime() unixTime { + return unixTime{r.MeasureTime.Unix(), r.MeasureTime.UnixNano()} +} + +// loadMeasureTime is invoked by stateify. +func (r *RcvBufAutoTuneParams) loadMeasureTime(unix unixTime) { + r.MeasureTime = time.Unix(unix.second, unix.nano) +} + +// saveRTTMeasureTime is invoked by stateify. +func (r *RcvBufAutoTuneParams) saveRTTMeasureTime() unixTime { + return unixTime{r.RTTMeasureTime.Unix(), r.RTTMeasureTime.UnixNano()} +} + +// loadRTTMeasureTime is invoked by stateify. +func (r *RcvBufAutoTuneParams) loadRTTMeasureTime(unix unixTime) { + r.RTTMeasureTime = time.Unix(unix.second, unix.nano) +} + +// saveSegTime is invoked by stateify. +func (t *TCPEndpointState) saveSegTime() unixTime { + return unixTime{t.SegTime.Unix(), t.SegTime.UnixNano()} +} + +// loadSegTime is invoked by stateify. +func (t *TCPEndpointState) loadSegTime(unix unixTime) { + t.SegTime = time.Unix(unix.second, unix.nano) +} diff --git a/pkg/tcpip/stack/tcp.go b/pkg/tcpip/stack/tcp.go new file mode 100644 index 000000000..2e32b8a8e --- /dev/null +++ b/pkg/tcpip/stack/tcp.go @@ -0,0 +1,455 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stack + +import ( + "time" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/seqnum" +) + +// TCPProbeFunc is the expected function type for a TCP probe function to be +// passed to stack.AddTCPProbe. +type TCPProbeFunc func(s TCPEndpointState) + +// TCPCubicState is used to hold a copy of the internal cubic state when the +// TCPProbeFunc is invoked. +// +// +stateify savable +type TCPCubicState struct { + // WLastMax is the previous wMax value. + WLastMax float64 + + // WMax is the value of the congestion window at the time of the last + // congestion event. + WMax float64 + + // T is the time when the current congestion avoidance was entered. + T time.Time `state:".(unixTime)"` + + // TimeSinceLastCongestion denotes the time since the current + // congestion avoidance was entered. + TimeSinceLastCongestion time.Duration + + // C is the cubic constant as specified in RFC8312, page 11. + C float64 + + // K is the time period (in seconds) that the above function takes to + // increase the current window size to WMax if there are no further + // congestion events and is calculated using the following equation: + // + // K = cubic_root(WMax*(1-beta_cubic)/C) (Eq. 2, page 5) + K float64 + + // Beta is the CUBIC multiplication decrease factor. That is, when a + // congestion event is detected, CUBIC reduces its cwnd to + // WC(0)=WMax*beta_cubic. + Beta float64 + + // WC is window computed by CUBIC at time TimeSinceLastCongestion. It's + // calculated using the formula: + // + // WC(TimeSinceLastCongestion) = C*(t-K)^3 + WMax (Eq. 1) + WC float64 + + // WEst is the window computed by CUBIC at time + // TimeSinceLastCongestion+RTT i.e WC(TimeSinceLastCongestion+RTT). + WEst float64 +} + +// TCPRACKState is used to hold a copy of the internal RACK state when the +// TCPProbeFunc is invoked. +// +// +stateify savable +type TCPRACKState struct { + // XmitTime is the transmission timestamp of the most recent + // acknowledged segment. + XmitTime time.Time `state:".(unixTime)"` + + // EndSequence is the ending TCP sequence number of the most recent + // acknowledged segment. + EndSequence seqnum.Value + + // FACK is the highest selectively or cumulatively acknowledged + // sequence. + FACK seqnum.Value + + // RTT is the round trip time of the most recently delivered packet on + // the connection (either cumulatively acknowledged or selectively + // acknowledged) that was not marked invalid as a possible spurious + // retransmission. + RTT time.Duration + + // Reord is true iff reordering has been detected on this connection. + Reord bool + + // DSACKSeen is true iff the connection has seen a DSACK. + DSACKSeen bool + + // ReoWnd is the reordering window time used for recording packet + // transmission times. It is used to defer the moment at which RACK + // marks a packet lost. + ReoWnd time.Duration + + // ReoWndIncr is the multiplier applied to adjust reorder window. + ReoWndIncr uint8 + + // ReoWndPersist is the number of loss recoveries before resetting + // reorder window. + ReoWndPersist int8 + + // RTTSeq is the SND.NXT when RTT is updated. + RTTSeq seqnum.Value +} + +// TCPEndpointID is the unique 4 tuple that identifies a given endpoint. +// +// +stateify savable +type TCPEndpointID struct { + // LocalPort is the local port associated with the endpoint. + LocalPort uint16 + + // LocalAddress is the local [network layer] address associated with + // the endpoint. + LocalAddress tcpip.Address + + // RemotePort is the remote port associated with the endpoint. + RemotePort uint16 + + // RemoteAddress it the remote [network layer] address associated with + // the endpoint. + RemoteAddress tcpip.Address +} + +// TCPFastRecoveryState holds a copy of the internal fast recovery state of a +// TCP endpoint. +// +// +stateify savable +type TCPFastRecoveryState struct { + // Active if true indicates the endpoint is in fast recovery. The + // following fields are only meaningful when Active is true. + Active bool + + // First is the first unacknowledged sequence number being recovered. + First seqnum.Value + + // Last is the 'recover' sequence number that indicates the point at + // which we should exit recovery barring any timeouts etc. + Last seqnum.Value + + // MaxCwnd is the maximum value we are permitted to grow the congestion + // window during recovery. This is set at the time we enter recovery. + // It exists to avoid attacks where the receiver intentionally sends + // duplicate acks to artificially inflate the sender's cwnd. + MaxCwnd int + + // HighRxt is the highest sequence number which has been retransmitted + // during the current loss recovery phase. See: RFC 6675 Section 2 for + // details. + HighRxt seqnum.Value + + // RescueRxt is the highest sequence number which has been + // optimistically retransmitted to prevent stalling of the ACK clock + // when there is loss at the end of the window and no new data is + // available for transmission. See: RFC 6675 Section 2 for details. + RescueRxt seqnum.Value +} + +// TCPReceiverState holds a copy of the internal state of the receiver for a +// given TCP endpoint. +// +// +stateify savable +type TCPReceiverState struct { + // RcvNxt is the TCP variable RCV.NXT. + RcvNxt seqnum.Value + + // RcvAcc is one beyond the last acceptable sequence number. That is, + // the "largest" sequence value that the receiver has announced to its + // peer that it's willing to accept. This may be different than RcvNxt + // + (last advertised receive window) if the receive window is reduced; + // in that case we have to reduce the window as we receive more data + // instead of shrinking it. + RcvAcc seqnum.Value + + // RcvWndScale is the window scaling to use for inbound segments. + RcvWndScale uint8 + + // PendingBufUsed is the number of bytes pending in the receive queue. + PendingBufUsed int +} + +// TCPRTTState holds a copy of information about the endpoint's round trip +// time. +// +// +stateify savable +type TCPRTTState struct { + // SRTT is the smoothed round trip time defined in section 2 of RFC + // 6298. + SRTT time.Duration + + // RTTVar is the round-trip time variation as defined in section 2 of + // RFC 6298. + RTTVar time.Duration + + // SRTTInited if true indicates that a valid RTT measurement has been + // completed. + SRTTInited bool +} + +// TCPSenderState holds a copy of the internal state of the sender for a given +// TCP Endpoint. +// +// +stateify savable +type TCPSenderState struct { + // LastSendTime is the timestamp at which we sent the last segment. + LastSendTime time.Time `state:".(unixTime)"` + + // DupAckCount is the number of Duplicate ACKs received. It is used for + // fast retransmit. + DupAckCount int + + // SndCwnd is the size of the sending congestion window in packets. + SndCwnd int + + // Ssthresh is the threshold between slow start and congestion + // avoidance. + Ssthresh int + + // SndCAAckCount is the number of packets acknowledged during + // congestion avoidance. When enough packets have been ack'd (typically + // cwnd packets), the congestion window is incremented by one. + SndCAAckCount int + + // Outstanding is the number of packets that have been sent but not yet + // acknowledged. + Outstanding int + + // SackedOut is the number of packets which have been selectively + // acked. + SackedOut int + + // SndWnd is the send window size in bytes. + SndWnd seqnum.Size + + // SndUna is the next unacknowledged sequence number. + SndUna seqnum.Value + + // SndNxt is the sequence number of the next segment to be sent. + SndNxt seqnum.Value + + // RTTMeasureSeqNum is the sequence number being used for the latest + // RTT measurement. + RTTMeasureSeqNum seqnum.Value + + // RTTMeasureTime is the time when the RTTMeasureSeqNum was sent. + RTTMeasureTime time.Time `state:".(unixTime)"` + + // Closed indicates that the caller has closed the endpoint for + // sending. + Closed bool + + // RTO is the retransmit timeout as defined in section of 2 of RFC + // 6298. + RTO time.Duration + + // RTTState holds information about the endpoint's round trip time. + RTTState TCPRTTState + + // MaxPayloadSize is the maximum size of the payload of a given + // segment. It is initialized on demand. + MaxPayloadSize int + + // SndWndScale is the number of bits to shift left when reading the + // send window size from a segment. + SndWndScale uint8 + + // MaxSentAck is the highest acknowledgement number sent till now. + MaxSentAck seqnum.Value + + // FastRecovery holds the fast recovery state for the endpoint. + FastRecovery TCPFastRecoveryState + + // Cubic holds the state related to CUBIC congestion control. + Cubic TCPCubicState + + // RACKState holds the state related to RACK loss detection algorithm. + RACKState TCPRACKState +} + +// TCPSACKInfo holds TCP SACK related information for a given TCP endpoint. +// +// +stateify savable +type TCPSACKInfo struct { + // Blocks is the list of SACK Blocks that identify the out of order + // segments held by a given TCP endpoint. + Blocks []header.SACKBlock + + // ReceivedBlocks are the SACK blocks received by this endpoint from + // the peer endpoint. + ReceivedBlocks []header.SACKBlock + + // MaxSACKED is the highest sequence number that has been SACKED by the + // peer. + MaxSACKED seqnum.Value +} + +// RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning. +// +// +stateify savable +type RcvBufAutoTuneParams struct { + // MeasureTime is the time at which the current measurement was + // started. + MeasureTime time.Time `state:".(unixTime)"` + + // CopiedBytes is the number of bytes copied to user space since this + // measure began. + CopiedBytes int + + // PrevCopiedBytes is the number of bytes copied to userspace in the + // previous RTT period. + PrevCopiedBytes int + + // RcvBufSize is the auto tuned receive buffer size. + RcvBufSize int + + // RTT is the smoothed RTT as measured by observing the time between + // when a byte is first acknowledged and the receipt of data that is at + // least one window beyond the sequence number that was acknowledged. + RTT time.Duration + + // RTTVar is the "round-trip time variation" as defined in section 2 of + // RFC6298. + RTTVar time.Duration + + // RTTMeasureSeqNumber is the highest acceptable sequence number at the + // time this RTT measurement period began. + RTTMeasureSeqNumber seqnum.Value + + // RTTMeasureTime is the absolute time at which the current RTT + // measurement period began. + RTTMeasureTime time.Time `state:".(unixTime)"` + + // Disabled is true if an explicit receive buffer is set for the + // endpoint. + Disabled bool +} + +// TCPRcvBufState contains information about the state of an endpoint's receive +// socket buffer. +// +// +stateify savable +type TCPRcvBufState struct { + // RcvBufSize is the size of the receive socket buffer for the + // endpoint. + RcvBufSize int + + // RcvBufUsed is the amount of bytes actually held in the receive + // socket buffer for the endpoint. + RcvBufUsed int + + // RcvBufAutoTuneParams is used to hold state variables to compute the + // auto tuned receive buffer size. + RcvAutoParams RcvBufAutoTuneParams + + // RcvClosed if true, indicates the endpoint has been closed for + // reading. + RcvClosed bool +} + +// TCPSndBufState contains information about the state of an endpoint's send +// socket buffer. +// +// +stateify savable +type TCPSndBufState struct { + // SndBufSize is the size of the socket send buffer. + SndBufSize int + + // SndBufUsed is the number of bytes held in the socket send buffer. + SndBufUsed int + + // SndClosed indicates that the endpoint has been closed for sends. + SndClosed bool + + // SndBufInQueue is the number of bytes in the send queue. + SndBufInQueue seqnum.Size + + // PacketTooBigCount is used to notify the main protocol routine how + // many times a "packet too big" control packet is received. + PacketTooBigCount int + + // SndMTU is the smallest MTU seen in the control packets received. + SndMTU int +} + +// TCPEndpointStateInner contains the members of TCPEndpointState used directly +// (that is, not within another containing struct) within the endpoint's +// internal implementation. +// +// +stateify savable +type TCPEndpointStateInner struct { + // TSOffset is a randomized offset added to the value of the TSVal + // field in the timestamp option. + TSOffset uint32 + + // SACKPermitted is set to true if the peer sends the TCPSACKPermitted + // option in the SYN/SYN-ACK. + SACKPermitted bool + + // SendTSOk is used to indicate when the TS Option has been negotiated. + // When sendTSOk is true every non-RST segment should carry a TS as per + // RFC7323#section-1.1. + SendTSOk bool + + // RecentTS is the timestamp that should be sent in the TSEcr field of + // the timestamp for future segments sent by the endpoint. This field + // is updated if required when a new segment is received by this + // endpoint. + RecentTS uint32 +} + +// TCPEndpointState is a copy of the internal state of a TCP endpoint. +// +// +stateify savable +type TCPEndpointState struct { + // TCPEndpointStateInner contains the members of TCPEndpointState used + // by the endpoint's internal implementation. + TCPEndpointStateInner + + // ID is a copy of the TransportEndpointID for the endpoint. + ID TCPEndpointID + + // SegTime denotes the absolute time when this segment was received. + SegTime time.Time `state:".(unixTime)"` + + // RcvBufState contains information about the state of the endpoint's + // receive socket buffer. + RcvBufState TCPRcvBufState + + // SndBufState contains information about the state of the endpoint's + // send socket buffer. + SndBufState TCPSndBufState + + // SACK holds TCP SACK related information for this endpoint. + SACK TCPSACKInfo + + // Receiver holds variables related to the TCP receiver for the + // endpoint. + Receiver TCPReceiverState + + // Sender holds state related to the TCP Sender for the endpoint. + Sender TCPSenderState +} |