summaryrefslogtreecommitdiffhomepage
path: root/pkg/tcpip/stack
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/tcpip/stack')
-rw-r--r--pkg/tcpip/stack/stack.go301
-rw-r--r--pkg/tcpip/stack/stack_global_state.go72
-rw-r--r--pkg/tcpip/stack/stack_state_autogen.go579
-rw-r--r--pkg/tcpip/stack/tcp.go455
4 files changed, 1106 insertions, 301 deletions
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index f23112410..8c8909acd 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -35,7 +35,6 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/ports"
- "gvisor.dev/gvisor/pkg/tcpip/seqnum"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -56,306 +55,6 @@ type transportProtocolState struct {
defaultHandler func(id TransportEndpointID, pkt *PacketBuffer) bool
}
-// TCPProbeFunc is the expected function type for a TCP probe function to be
-// passed to stack.AddTCPProbe.
-type TCPProbeFunc func(s TCPEndpointState)
-
-// TCPCubicState is used to hold a copy of the internal cubic state when the
-// TCPProbeFunc is invoked.
-type TCPCubicState struct {
- WLastMax float64
- WMax float64
- T time.Time
- TimeSinceLastCongestion time.Duration
- C float64
- K float64
- Beta float64
- WC float64
- WEst float64
-}
-
-// TCPRACKState is used to hold a copy of the internal RACK state when the
-// TCPProbeFunc is invoked.
-type TCPRACKState struct {
- XmitTime time.Time
- EndSequence seqnum.Value
- FACK seqnum.Value
- RTT time.Duration
- Reord bool
- DSACKSeen bool
- ReoWnd time.Duration
- ReoWndIncr uint8
- ReoWndPersist int8
- RTTSeq seqnum.Value
-}
-
-// TCPEndpointID is the unique 4 tuple that identifies a given endpoint.
-type TCPEndpointID struct {
- // LocalPort is the local port associated with the endpoint.
- LocalPort uint16
-
- // LocalAddress is the local [network layer] address associated with
- // the endpoint.
- LocalAddress tcpip.Address
-
- // RemotePort is the remote port associated with the endpoint.
- RemotePort uint16
-
- // RemoteAddress it the remote [network layer] address associated with
- // the endpoint.
- RemoteAddress tcpip.Address
-}
-
-// TCPFastRecoveryState holds a copy of the internal fast recovery state of a
-// TCP endpoint.
-type TCPFastRecoveryState struct {
- // Active if true indicates the endpoint is in fast recovery.
- Active bool
-
- // First is the first unacknowledged sequence number being recovered.
- First seqnum.Value
-
- // Last is the 'recover' sequence number that indicates the point at
- // which we should exit recovery barring any timeouts etc.
- Last seqnum.Value
-
- // MaxCwnd is the maximum value we are permitted to grow the congestion
- // window during recovery. This is set at the time we enter recovery.
- MaxCwnd int
-
- // HighRxt is the highest sequence number which has been retransmitted
- // during the current loss recovery phase.
- // See: RFC 6675 Section 2 for details.
- HighRxt seqnum.Value
-
- // RescueRxt is the highest sequence number which has been
- // optimistically retransmitted to prevent stalling of the ACK clock
- // when there is loss at the end of the window and no new data is
- // available for transmission.
- // See: RFC 6675 Section 2 for details.
- RescueRxt seqnum.Value
-}
-
-// TCPReceiverState holds a copy of the internal state of the receiver for
-// a given TCP endpoint.
-type TCPReceiverState struct {
- // RcvNxt is the TCP variable RCV.NXT.
- RcvNxt seqnum.Value
-
- // RcvAcc is the TCP variable RCV.ACC.
- RcvAcc seqnum.Value
-
- // RcvWndScale is the window scaling to use for inbound segments.
- RcvWndScale uint8
-
- // PendingBufUsed is the number of bytes pending in the receive
- // queue.
- PendingBufUsed int
-}
-
-// TCPSenderState holds a copy of the internal state of the sender for
-// a given TCP Endpoint.
-type TCPSenderState struct {
- // LastSendTime is the time at which we sent the last segment.
- LastSendTime time.Time
-
- // DupAckCount is the number of Duplicate ACK's received.
- DupAckCount int
-
- // SndCwnd is the size of the sending congestion window in packets.
- SndCwnd int
-
- // Ssthresh is the slow start threshold in packets.
- Ssthresh int
-
- // SndCAAckCount is the number of packets consumed in congestion
- // avoidance mode.
- SndCAAckCount int
-
- // Outstanding is the number of packets in flight.
- Outstanding int
-
- // SackedOut is the number of packets which have been selectively acked.
- SackedOut int
-
- // SndWnd is the send window size in bytes.
- SndWnd seqnum.Size
-
- // SndUna is the next unacknowledged sequence number.
- SndUna seqnum.Value
-
- // SndNxt is the sequence number of the next segment to be sent.
- SndNxt seqnum.Value
-
- // RTTMeasureSeqNum is the sequence number being used for the latest RTT
- // measurement.
- RTTMeasureSeqNum seqnum.Value
-
- // RTTMeasureTime is the time when the RTTMeasureSeqNum was sent.
- RTTMeasureTime time.Time
-
- // Closed indicates that the caller has closed the endpoint for sending.
- Closed bool
-
- // SRTT is the smoothed round-trip time as defined in section 2 of
- // RFC 6298.
- SRTT time.Duration
-
- // RTO is the retransmit timeout as defined in section of 2 of RFC 6298.
- RTO time.Duration
-
- // RTTVar is the round-trip time variation as defined in section 2 of
- // RFC 6298.
- RTTVar time.Duration
-
- // SRTTInited if true indicates take a valid RTT measurement has been
- // completed.
- SRTTInited bool
-
- // MaxPayloadSize is the maximum size of the payload of a given segment.
- // It is initialized on demand.
- MaxPayloadSize int
-
- // SndWndScale is the number of bits to shift left when reading the send
- // window size from a segment.
- SndWndScale uint8
-
- // MaxSentAck is the highest acknowledgement number sent till now.
- MaxSentAck seqnum.Value
-
- // FastRecovery holds the fast recovery state for the endpoint.
- FastRecovery TCPFastRecoveryState
-
- // Cubic holds the state related to CUBIC congestion control.
- Cubic TCPCubicState
-
- // RACKState holds the state related to RACK loss detection algorithm.
- RACKState TCPRACKState
-}
-
-// TCPSACKInfo holds TCP SACK related information for a given TCP endpoint.
-type TCPSACKInfo struct {
- // Blocks is the list of SACK Blocks that identify the out of order segments
- // held by a given TCP endpoint.
- Blocks []header.SACKBlock
-
- // ReceivedBlocks are the SACK blocks received by this endpoint
- // from the peer endpoint.
- ReceivedBlocks []header.SACKBlock
-
- // MaxSACKED is the highest sequence number that has been SACKED
- // by the peer.
- MaxSACKED seqnum.Value
-}
-
-// RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning.
-type RcvBufAutoTuneParams struct {
- // MeasureTime is the time at which the current measurement
- // was started.
- MeasureTime time.Time
-
- // CopiedBytes is the number of bytes copied to user space since
- // this measure began.
- CopiedBytes int
-
- // PrevCopiedBytes is the number of bytes copied to userspace in
- // the previous RTT period.
- PrevCopiedBytes int
-
- // RcvBufSize is the auto tuned receive buffer size.
- RcvBufSize int
-
- // RTT is the smoothed RTT as measured by observing the time between
- // when a byte is first acknowledged and the receipt of data that is at
- // least one window beyond the sequence number that was acknowledged.
- RTT time.Duration
-
- // RTTVar is the "round-trip time variation" as defined in section 2
- // of RFC6298.
- RTTVar time.Duration
-
- // RTTMeasureSeqNumber is the highest acceptable sequence number at the
- // time this RTT measurement period began.
- RTTMeasureSeqNumber seqnum.Value
-
- // RTTMeasureTime is the absolute time at which the current RTT
- // measurement period began.
- RTTMeasureTime time.Time
-
- // Disabled is true if an explicit receive buffer is set for the
- // endpoint.
- Disabled bool
-}
-
-// TCPEndpointState is a copy of the internal state of a TCP endpoint.
-type TCPEndpointState struct {
- // ID is a copy of the TransportEndpointID for the endpoint.
- ID TCPEndpointID
-
- // SegTime denotes the absolute time when this segment was received.
- SegTime time.Time
-
- // RcvBufSize is the size of the receive socket buffer for the endpoint.
- RcvBufSize int
-
- // RcvBufUsed is the amount of bytes actually held in the receive socket
- // buffer for the endpoint.
- RcvBufUsed int
-
- // RcvBufAutoTuneParams is used to hold state variables to compute
- // the auto tuned receive buffer size.
- RcvAutoParams RcvBufAutoTuneParams
-
- // RcvClosed if true, indicates the endpoint has been closed for reading.
- RcvClosed bool
-
- // SendTSOk is used to indicate when the TS Option has been negotiated.
- // When sendTSOk is true every non-RST segment should carry a TS as per
- // RFC7323#section-1.1.
- SendTSOk bool
-
- // RecentTS is the timestamp that should be sent in the TSEcr field of
- // the timestamp for future segments sent by the endpoint. This field is
- // updated if required when a new segment is received by this endpoint.
- RecentTS uint32
-
- // TSOffset is a randomized offset added to the value of the TSVal field
- // in the timestamp option.
- TSOffset uint32
-
- // SACKPermitted is set to true if the peer sends the TCPSACKPermitted
- // option in the SYN/SYN-ACK.
- SACKPermitted bool
-
- // SACK holds TCP SACK related information for this endpoint.
- SACK TCPSACKInfo
-
- // SndBufSize is the size of the socket send buffer.
- SndBufSize int
-
- // SndBufUsed is the number of bytes held in the socket send buffer.
- SndBufUsed int
-
- // SndClosed indicates that the endpoint has been closed for sends.
- SndClosed bool
-
- // SndBufInQueue is the number of bytes in the send queue.
- SndBufInQueue seqnum.Size
-
- // PacketTooBigCount is used to notify the main protocol routine how
- // many times a "packet too big" control packet is received.
- PacketTooBigCount int
-
- // SndMTU is the smallest MTU seen in the control packets received.
- SndMTU int
-
- // Receiver holds variables related to the TCP receiver for the endpoint.
- Receiver TCPReceiverState
-
- // Sender holds state related to the TCP Sender for the endpoint.
- Sender TCPSenderState
-}
-
// ResumableEndpoint is an endpoint that needs to be resumed after restore.
type ResumableEndpoint interface {
// Resume resumes an endpoint after restore. This can be used to restart
diff --git a/pkg/tcpip/stack/stack_global_state.go b/pkg/tcpip/stack/stack_global_state.go
index dfec4258a..33824afd0 100644
--- a/pkg/tcpip/stack/stack_global_state.go
+++ b/pkg/tcpip/stack/stack_global_state.go
@@ -14,6 +14,78 @@
package stack
+import "time"
+
// StackFromEnv is the global stack created in restore run.
// FIXME(b/36201077)
var StackFromEnv *Stack
+
+// saveT is invoked by stateify.
+func (t *TCPCubicState) saveT() unixTime {
+ return unixTime{t.T.Unix(), t.T.UnixNano()}
+}
+
+// loadT is invoked by stateify.
+func (t *TCPCubicState) loadT(unix unixTime) {
+ t.T = time.Unix(unix.second, unix.nano)
+}
+
+// saveXmitTime is invoked by stateify.
+func (t *TCPRACKState) saveXmitTime() unixTime {
+ return unixTime{t.XmitTime.Unix(), t.XmitTime.UnixNano()}
+}
+
+// loadXmitTime is invoked by stateify.
+func (t *TCPRACKState) loadXmitTime(unix unixTime) {
+ t.XmitTime = time.Unix(unix.second, unix.nano)
+}
+
+// saveLastSendTime is invoked by stateify.
+func (t *TCPSenderState) saveLastSendTime() unixTime {
+ return unixTime{t.LastSendTime.Unix(), t.LastSendTime.UnixNano()}
+}
+
+// loadLastSendTime is invoked by stateify.
+func (t *TCPSenderState) loadLastSendTime(unix unixTime) {
+ t.LastSendTime = time.Unix(unix.second, unix.nano)
+}
+
+// saveRTTMeasureTime is invoked by stateify.
+func (t *TCPSenderState) saveRTTMeasureTime() unixTime {
+ return unixTime{t.RTTMeasureTime.Unix(), t.RTTMeasureTime.UnixNano()}
+}
+
+// loadRTTMeasureTime is invoked by stateify.
+func (t *TCPSenderState) loadRTTMeasureTime(unix unixTime) {
+ t.RTTMeasureTime = time.Unix(unix.second, unix.nano)
+}
+
+// saveMeasureTime is invoked by stateify.
+func (r *RcvBufAutoTuneParams) saveMeasureTime() unixTime {
+ return unixTime{r.MeasureTime.Unix(), r.MeasureTime.UnixNano()}
+}
+
+// loadMeasureTime is invoked by stateify.
+func (r *RcvBufAutoTuneParams) loadMeasureTime(unix unixTime) {
+ r.MeasureTime = time.Unix(unix.second, unix.nano)
+}
+
+// saveRTTMeasureTime is invoked by stateify.
+func (r *RcvBufAutoTuneParams) saveRTTMeasureTime() unixTime {
+ return unixTime{r.RTTMeasureTime.Unix(), r.RTTMeasureTime.UnixNano()}
+}
+
+// loadRTTMeasureTime is invoked by stateify.
+func (r *RcvBufAutoTuneParams) loadRTTMeasureTime(unix unixTime) {
+ r.RTTMeasureTime = time.Unix(unix.second, unix.nano)
+}
+
+// saveSegTime is invoked by stateify.
+func (t *TCPEndpointState) saveSegTime() unixTime {
+ return unixTime{t.SegTime.Unix(), t.SegTime.UnixNano()}
+}
+
+// loadSegTime is invoked by stateify.
+func (t *TCPEndpointState) loadSegTime(unix unixTime) {
+ t.SegTime = time.Unix(unix.second, unix.nano)
+}
diff --git a/pkg/tcpip/stack/stack_state_autogen.go b/pkg/tcpip/stack/stack_state_autogen.go
index 6b10c1e98..4c1133f2b 100644
--- a/pkg/tcpip/stack/stack_state_autogen.go
+++ b/pkg/tcpip/stack/stack_state_autogen.go
@@ -600,6 +600,572 @@ func (t *TransportEndpointInfo) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(5, &t.RegisterNICID)
}
+func (t *TCPCubicState) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPCubicState"
+}
+
+func (t *TCPCubicState) StateFields() []string {
+ return []string{
+ "WLastMax",
+ "WMax",
+ "T",
+ "TimeSinceLastCongestion",
+ "C",
+ "K",
+ "Beta",
+ "WC",
+ "WEst",
+ }
+}
+
+func (t *TCPCubicState) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPCubicState) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ var TValue unixTime = t.saveT()
+ stateSinkObject.SaveValue(2, TValue)
+ stateSinkObject.Save(0, &t.WLastMax)
+ stateSinkObject.Save(1, &t.WMax)
+ stateSinkObject.Save(3, &t.TimeSinceLastCongestion)
+ stateSinkObject.Save(4, &t.C)
+ stateSinkObject.Save(5, &t.K)
+ stateSinkObject.Save(6, &t.Beta)
+ stateSinkObject.Save(7, &t.WC)
+ stateSinkObject.Save(8, &t.WEst)
+}
+
+func (t *TCPCubicState) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPCubicState) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.WLastMax)
+ stateSourceObject.Load(1, &t.WMax)
+ stateSourceObject.Load(3, &t.TimeSinceLastCongestion)
+ stateSourceObject.Load(4, &t.C)
+ stateSourceObject.Load(5, &t.K)
+ stateSourceObject.Load(6, &t.Beta)
+ stateSourceObject.Load(7, &t.WC)
+ stateSourceObject.Load(8, &t.WEst)
+ stateSourceObject.LoadValue(2, new(unixTime), func(y interface{}) { t.loadT(y.(unixTime)) })
+}
+
+func (t *TCPRACKState) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPRACKState"
+}
+
+func (t *TCPRACKState) StateFields() []string {
+ return []string{
+ "XmitTime",
+ "EndSequence",
+ "FACK",
+ "RTT",
+ "Reord",
+ "DSACKSeen",
+ "ReoWnd",
+ "ReoWndIncr",
+ "ReoWndPersist",
+ "RTTSeq",
+ }
+}
+
+func (t *TCPRACKState) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPRACKState) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ var XmitTimeValue unixTime = t.saveXmitTime()
+ stateSinkObject.SaveValue(0, XmitTimeValue)
+ stateSinkObject.Save(1, &t.EndSequence)
+ stateSinkObject.Save(2, &t.FACK)
+ stateSinkObject.Save(3, &t.RTT)
+ stateSinkObject.Save(4, &t.Reord)
+ stateSinkObject.Save(5, &t.DSACKSeen)
+ stateSinkObject.Save(6, &t.ReoWnd)
+ stateSinkObject.Save(7, &t.ReoWndIncr)
+ stateSinkObject.Save(8, &t.ReoWndPersist)
+ stateSinkObject.Save(9, &t.RTTSeq)
+}
+
+func (t *TCPRACKState) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPRACKState) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(1, &t.EndSequence)
+ stateSourceObject.Load(2, &t.FACK)
+ stateSourceObject.Load(3, &t.RTT)
+ stateSourceObject.Load(4, &t.Reord)
+ stateSourceObject.Load(5, &t.DSACKSeen)
+ stateSourceObject.Load(6, &t.ReoWnd)
+ stateSourceObject.Load(7, &t.ReoWndIncr)
+ stateSourceObject.Load(8, &t.ReoWndPersist)
+ stateSourceObject.Load(9, &t.RTTSeq)
+ stateSourceObject.LoadValue(0, new(unixTime), func(y interface{}) { t.loadXmitTime(y.(unixTime)) })
+}
+
+func (t *TCPEndpointID) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPEndpointID"
+}
+
+func (t *TCPEndpointID) StateFields() []string {
+ return []string{
+ "LocalPort",
+ "LocalAddress",
+ "RemotePort",
+ "RemoteAddress",
+ }
+}
+
+func (t *TCPEndpointID) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPEndpointID) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ stateSinkObject.Save(0, &t.LocalPort)
+ stateSinkObject.Save(1, &t.LocalAddress)
+ stateSinkObject.Save(2, &t.RemotePort)
+ stateSinkObject.Save(3, &t.RemoteAddress)
+}
+
+func (t *TCPEndpointID) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPEndpointID) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.LocalPort)
+ stateSourceObject.Load(1, &t.LocalAddress)
+ stateSourceObject.Load(2, &t.RemotePort)
+ stateSourceObject.Load(3, &t.RemoteAddress)
+}
+
+func (t *TCPFastRecoveryState) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPFastRecoveryState"
+}
+
+func (t *TCPFastRecoveryState) StateFields() []string {
+ return []string{
+ "Active",
+ "First",
+ "Last",
+ "MaxCwnd",
+ "HighRxt",
+ "RescueRxt",
+ }
+}
+
+func (t *TCPFastRecoveryState) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPFastRecoveryState) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ stateSinkObject.Save(0, &t.Active)
+ stateSinkObject.Save(1, &t.First)
+ stateSinkObject.Save(2, &t.Last)
+ stateSinkObject.Save(3, &t.MaxCwnd)
+ stateSinkObject.Save(4, &t.HighRxt)
+ stateSinkObject.Save(5, &t.RescueRxt)
+}
+
+func (t *TCPFastRecoveryState) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPFastRecoveryState) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.Active)
+ stateSourceObject.Load(1, &t.First)
+ stateSourceObject.Load(2, &t.Last)
+ stateSourceObject.Load(3, &t.MaxCwnd)
+ stateSourceObject.Load(4, &t.HighRxt)
+ stateSourceObject.Load(5, &t.RescueRxt)
+}
+
+func (t *TCPReceiverState) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPReceiverState"
+}
+
+func (t *TCPReceiverState) StateFields() []string {
+ return []string{
+ "RcvNxt",
+ "RcvAcc",
+ "RcvWndScale",
+ "PendingBufUsed",
+ }
+}
+
+func (t *TCPReceiverState) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPReceiverState) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ stateSinkObject.Save(0, &t.RcvNxt)
+ stateSinkObject.Save(1, &t.RcvAcc)
+ stateSinkObject.Save(2, &t.RcvWndScale)
+ stateSinkObject.Save(3, &t.PendingBufUsed)
+}
+
+func (t *TCPReceiverState) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPReceiverState) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.RcvNxt)
+ stateSourceObject.Load(1, &t.RcvAcc)
+ stateSourceObject.Load(2, &t.RcvWndScale)
+ stateSourceObject.Load(3, &t.PendingBufUsed)
+}
+
+func (t *TCPRTTState) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPRTTState"
+}
+
+func (t *TCPRTTState) StateFields() []string {
+ return []string{
+ "SRTT",
+ "RTTVar",
+ "SRTTInited",
+ }
+}
+
+func (t *TCPRTTState) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPRTTState) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ stateSinkObject.Save(0, &t.SRTT)
+ stateSinkObject.Save(1, &t.RTTVar)
+ stateSinkObject.Save(2, &t.SRTTInited)
+}
+
+func (t *TCPRTTState) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPRTTState) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.SRTT)
+ stateSourceObject.Load(1, &t.RTTVar)
+ stateSourceObject.Load(2, &t.SRTTInited)
+}
+
+func (t *TCPSenderState) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPSenderState"
+}
+
+func (t *TCPSenderState) StateFields() []string {
+ return []string{
+ "LastSendTime",
+ "DupAckCount",
+ "SndCwnd",
+ "Ssthresh",
+ "SndCAAckCount",
+ "Outstanding",
+ "SackedOut",
+ "SndWnd",
+ "SndUna",
+ "SndNxt",
+ "RTTMeasureSeqNum",
+ "RTTMeasureTime",
+ "Closed",
+ "RTO",
+ "RTTState",
+ "MaxPayloadSize",
+ "SndWndScale",
+ "MaxSentAck",
+ "FastRecovery",
+ "Cubic",
+ "RACKState",
+ }
+}
+
+func (t *TCPSenderState) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPSenderState) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ var LastSendTimeValue unixTime = t.saveLastSendTime()
+ stateSinkObject.SaveValue(0, LastSendTimeValue)
+ var RTTMeasureTimeValue unixTime = t.saveRTTMeasureTime()
+ stateSinkObject.SaveValue(11, RTTMeasureTimeValue)
+ stateSinkObject.Save(1, &t.DupAckCount)
+ stateSinkObject.Save(2, &t.SndCwnd)
+ stateSinkObject.Save(3, &t.Ssthresh)
+ stateSinkObject.Save(4, &t.SndCAAckCount)
+ stateSinkObject.Save(5, &t.Outstanding)
+ stateSinkObject.Save(6, &t.SackedOut)
+ stateSinkObject.Save(7, &t.SndWnd)
+ stateSinkObject.Save(8, &t.SndUna)
+ stateSinkObject.Save(9, &t.SndNxt)
+ stateSinkObject.Save(10, &t.RTTMeasureSeqNum)
+ stateSinkObject.Save(12, &t.Closed)
+ stateSinkObject.Save(13, &t.RTO)
+ stateSinkObject.Save(14, &t.RTTState)
+ stateSinkObject.Save(15, &t.MaxPayloadSize)
+ stateSinkObject.Save(16, &t.SndWndScale)
+ stateSinkObject.Save(17, &t.MaxSentAck)
+ stateSinkObject.Save(18, &t.FastRecovery)
+ stateSinkObject.Save(19, &t.Cubic)
+ stateSinkObject.Save(20, &t.RACKState)
+}
+
+func (t *TCPSenderState) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPSenderState) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(1, &t.DupAckCount)
+ stateSourceObject.Load(2, &t.SndCwnd)
+ stateSourceObject.Load(3, &t.Ssthresh)
+ stateSourceObject.Load(4, &t.SndCAAckCount)
+ stateSourceObject.Load(5, &t.Outstanding)
+ stateSourceObject.Load(6, &t.SackedOut)
+ stateSourceObject.Load(7, &t.SndWnd)
+ stateSourceObject.Load(8, &t.SndUna)
+ stateSourceObject.Load(9, &t.SndNxt)
+ stateSourceObject.Load(10, &t.RTTMeasureSeqNum)
+ stateSourceObject.Load(12, &t.Closed)
+ stateSourceObject.Load(13, &t.RTO)
+ stateSourceObject.Load(14, &t.RTTState)
+ stateSourceObject.Load(15, &t.MaxPayloadSize)
+ stateSourceObject.Load(16, &t.SndWndScale)
+ stateSourceObject.Load(17, &t.MaxSentAck)
+ stateSourceObject.Load(18, &t.FastRecovery)
+ stateSourceObject.Load(19, &t.Cubic)
+ stateSourceObject.Load(20, &t.RACKState)
+ stateSourceObject.LoadValue(0, new(unixTime), func(y interface{}) { t.loadLastSendTime(y.(unixTime)) })
+ stateSourceObject.LoadValue(11, new(unixTime), func(y interface{}) { t.loadRTTMeasureTime(y.(unixTime)) })
+}
+
+func (t *TCPSACKInfo) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPSACKInfo"
+}
+
+func (t *TCPSACKInfo) StateFields() []string {
+ return []string{
+ "Blocks",
+ "ReceivedBlocks",
+ "MaxSACKED",
+ }
+}
+
+func (t *TCPSACKInfo) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPSACKInfo) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ stateSinkObject.Save(0, &t.Blocks)
+ stateSinkObject.Save(1, &t.ReceivedBlocks)
+ stateSinkObject.Save(2, &t.MaxSACKED)
+}
+
+func (t *TCPSACKInfo) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPSACKInfo) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.Blocks)
+ stateSourceObject.Load(1, &t.ReceivedBlocks)
+ stateSourceObject.Load(2, &t.MaxSACKED)
+}
+
+func (r *RcvBufAutoTuneParams) StateTypeName() string {
+ return "pkg/tcpip/stack.RcvBufAutoTuneParams"
+}
+
+func (r *RcvBufAutoTuneParams) StateFields() []string {
+ return []string{
+ "MeasureTime",
+ "CopiedBytes",
+ "PrevCopiedBytes",
+ "RcvBufSize",
+ "RTT",
+ "RTTVar",
+ "RTTMeasureSeqNumber",
+ "RTTMeasureTime",
+ "Disabled",
+ }
+}
+
+func (r *RcvBufAutoTuneParams) beforeSave() {}
+
+// +checklocksignore
+func (r *RcvBufAutoTuneParams) StateSave(stateSinkObject state.Sink) {
+ r.beforeSave()
+ var MeasureTimeValue unixTime = r.saveMeasureTime()
+ stateSinkObject.SaveValue(0, MeasureTimeValue)
+ var RTTMeasureTimeValue unixTime = r.saveRTTMeasureTime()
+ stateSinkObject.SaveValue(7, RTTMeasureTimeValue)
+ stateSinkObject.Save(1, &r.CopiedBytes)
+ stateSinkObject.Save(2, &r.PrevCopiedBytes)
+ stateSinkObject.Save(3, &r.RcvBufSize)
+ stateSinkObject.Save(4, &r.RTT)
+ stateSinkObject.Save(5, &r.RTTVar)
+ stateSinkObject.Save(6, &r.RTTMeasureSeqNumber)
+ stateSinkObject.Save(8, &r.Disabled)
+}
+
+func (r *RcvBufAutoTuneParams) afterLoad() {}
+
+// +checklocksignore
+func (r *RcvBufAutoTuneParams) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(1, &r.CopiedBytes)
+ stateSourceObject.Load(2, &r.PrevCopiedBytes)
+ stateSourceObject.Load(3, &r.RcvBufSize)
+ stateSourceObject.Load(4, &r.RTT)
+ stateSourceObject.Load(5, &r.RTTVar)
+ stateSourceObject.Load(6, &r.RTTMeasureSeqNumber)
+ stateSourceObject.Load(8, &r.Disabled)
+ stateSourceObject.LoadValue(0, new(unixTime), func(y interface{}) { r.loadMeasureTime(y.(unixTime)) })
+ stateSourceObject.LoadValue(7, new(unixTime), func(y interface{}) { r.loadRTTMeasureTime(y.(unixTime)) })
+}
+
+func (t *TCPRcvBufState) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPRcvBufState"
+}
+
+func (t *TCPRcvBufState) StateFields() []string {
+ return []string{
+ "RcvBufSize",
+ "RcvBufUsed",
+ "RcvAutoParams",
+ "RcvClosed",
+ }
+}
+
+func (t *TCPRcvBufState) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPRcvBufState) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ stateSinkObject.Save(0, &t.RcvBufSize)
+ stateSinkObject.Save(1, &t.RcvBufUsed)
+ stateSinkObject.Save(2, &t.RcvAutoParams)
+ stateSinkObject.Save(3, &t.RcvClosed)
+}
+
+func (t *TCPRcvBufState) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPRcvBufState) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.RcvBufSize)
+ stateSourceObject.Load(1, &t.RcvBufUsed)
+ stateSourceObject.Load(2, &t.RcvAutoParams)
+ stateSourceObject.Load(3, &t.RcvClosed)
+}
+
+func (t *TCPSndBufState) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPSndBufState"
+}
+
+func (t *TCPSndBufState) StateFields() []string {
+ return []string{
+ "SndBufSize",
+ "SndBufUsed",
+ "SndClosed",
+ "SndBufInQueue",
+ "PacketTooBigCount",
+ "SndMTU",
+ }
+}
+
+func (t *TCPSndBufState) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPSndBufState) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ stateSinkObject.Save(0, &t.SndBufSize)
+ stateSinkObject.Save(1, &t.SndBufUsed)
+ stateSinkObject.Save(2, &t.SndClosed)
+ stateSinkObject.Save(3, &t.SndBufInQueue)
+ stateSinkObject.Save(4, &t.PacketTooBigCount)
+ stateSinkObject.Save(5, &t.SndMTU)
+}
+
+func (t *TCPSndBufState) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPSndBufState) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.SndBufSize)
+ stateSourceObject.Load(1, &t.SndBufUsed)
+ stateSourceObject.Load(2, &t.SndClosed)
+ stateSourceObject.Load(3, &t.SndBufInQueue)
+ stateSourceObject.Load(4, &t.PacketTooBigCount)
+ stateSourceObject.Load(5, &t.SndMTU)
+}
+
+func (t *TCPEndpointStateInner) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPEndpointStateInner"
+}
+
+func (t *TCPEndpointStateInner) StateFields() []string {
+ return []string{
+ "TSOffset",
+ "SACKPermitted",
+ "SendTSOk",
+ "RecentTS",
+ }
+}
+
+func (t *TCPEndpointStateInner) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPEndpointStateInner) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ stateSinkObject.Save(0, &t.TSOffset)
+ stateSinkObject.Save(1, &t.SACKPermitted)
+ stateSinkObject.Save(2, &t.SendTSOk)
+ stateSinkObject.Save(3, &t.RecentTS)
+}
+
+func (t *TCPEndpointStateInner) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPEndpointStateInner) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.TSOffset)
+ stateSourceObject.Load(1, &t.SACKPermitted)
+ stateSourceObject.Load(2, &t.SendTSOk)
+ stateSourceObject.Load(3, &t.RecentTS)
+}
+
+func (t *TCPEndpointState) StateTypeName() string {
+ return "pkg/tcpip/stack.TCPEndpointState"
+}
+
+func (t *TCPEndpointState) StateFields() []string {
+ return []string{
+ "TCPEndpointStateInner",
+ "ID",
+ "SegTime",
+ "RcvBufState",
+ "SndBufState",
+ "SACK",
+ "Receiver",
+ "Sender",
+ }
+}
+
+func (t *TCPEndpointState) beforeSave() {}
+
+// +checklocksignore
+func (t *TCPEndpointState) StateSave(stateSinkObject state.Sink) {
+ t.beforeSave()
+ var SegTimeValue unixTime = t.saveSegTime()
+ stateSinkObject.SaveValue(2, SegTimeValue)
+ stateSinkObject.Save(0, &t.TCPEndpointStateInner)
+ stateSinkObject.Save(1, &t.ID)
+ stateSinkObject.Save(3, &t.RcvBufState)
+ stateSinkObject.Save(4, &t.SndBufState)
+ stateSinkObject.Save(5, &t.SACK)
+ stateSinkObject.Save(6, &t.Receiver)
+ stateSinkObject.Save(7, &t.Sender)
+}
+
+func (t *TCPEndpointState) afterLoad() {}
+
+// +checklocksignore
+func (t *TCPEndpointState) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &t.TCPEndpointStateInner)
+ stateSourceObject.Load(1, &t.ID)
+ stateSourceObject.Load(3, &t.RcvBufState)
+ stateSourceObject.Load(4, &t.SndBufState)
+ stateSourceObject.Load(5, &t.SACK)
+ stateSourceObject.Load(6, &t.Receiver)
+ stateSourceObject.Load(7, &t.Sender)
+ stateSourceObject.LoadValue(2, new(unixTime), func(y interface{}) { t.loadSegTime(y.(unixTime)) })
+}
+
func (ep *multiPortEndpoint) StateTypeName() string {
return "pkg/tcpip/stack.multiPortEndpoint"
}
@@ -712,6 +1278,19 @@ func init() {
state.Register((*GSOType)(nil))
state.Register((*GSO)(nil))
state.Register((*TransportEndpointInfo)(nil))
+ state.Register((*TCPCubicState)(nil))
+ state.Register((*TCPRACKState)(nil))
+ state.Register((*TCPEndpointID)(nil))
+ state.Register((*TCPFastRecoveryState)(nil))
+ state.Register((*TCPReceiverState)(nil))
+ state.Register((*TCPRTTState)(nil))
+ state.Register((*TCPSenderState)(nil))
+ state.Register((*TCPSACKInfo)(nil))
+ state.Register((*RcvBufAutoTuneParams)(nil))
+ state.Register((*TCPRcvBufState)(nil))
+ state.Register((*TCPSndBufState)(nil))
+ state.Register((*TCPEndpointStateInner)(nil))
+ state.Register((*TCPEndpointState)(nil))
state.Register((*multiPortEndpoint)(nil))
state.Register((*tupleList)(nil))
state.Register((*tupleEntry)(nil))
diff --git a/pkg/tcpip/stack/tcp.go b/pkg/tcpip/stack/tcp.go
new file mode 100644
index 000000000..2e32b8a8e
--- /dev/null
+++ b/pkg/tcpip/stack/tcp.go
@@ -0,0 +1,455 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stack
+
+import (
+ "time"
+
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/tcpip/seqnum"
+)
+
+// TCPProbeFunc is the expected function type for a TCP probe function to be
+// passed to stack.AddTCPProbe.
+type TCPProbeFunc func(s TCPEndpointState)
+
+// TCPCubicState is used to hold a copy of the internal cubic state when the
+// TCPProbeFunc is invoked.
+//
+// +stateify savable
+type TCPCubicState struct {
+ // WLastMax is the previous wMax value.
+ WLastMax float64
+
+ // WMax is the value of the congestion window at the time of the last
+ // congestion event.
+ WMax float64
+
+ // T is the time when the current congestion avoidance was entered.
+ T time.Time `state:".(unixTime)"`
+
+ // TimeSinceLastCongestion denotes the time since the current
+ // congestion avoidance was entered.
+ TimeSinceLastCongestion time.Duration
+
+ // C is the cubic constant as specified in RFC8312, page 11.
+ C float64
+
+ // K is the time period (in seconds) that the above function takes to
+ // increase the current window size to WMax if there are no further
+ // congestion events and is calculated using the following equation:
+ //
+ // K = cubic_root(WMax*(1-beta_cubic)/C) (Eq. 2, page 5)
+ K float64
+
+ // Beta is the CUBIC multiplication decrease factor. That is, when a
+ // congestion event is detected, CUBIC reduces its cwnd to
+ // WC(0)=WMax*beta_cubic.
+ Beta float64
+
+ // WC is window computed by CUBIC at time TimeSinceLastCongestion. It's
+ // calculated using the formula:
+ //
+ // WC(TimeSinceLastCongestion) = C*(t-K)^3 + WMax (Eq. 1)
+ WC float64
+
+ // WEst is the window computed by CUBIC at time
+ // TimeSinceLastCongestion+RTT i.e WC(TimeSinceLastCongestion+RTT).
+ WEst float64
+}
+
+// TCPRACKState is used to hold a copy of the internal RACK state when the
+// TCPProbeFunc is invoked.
+//
+// +stateify savable
+type TCPRACKState struct {
+ // XmitTime is the transmission timestamp of the most recent
+ // acknowledged segment.
+ XmitTime time.Time `state:".(unixTime)"`
+
+ // EndSequence is the ending TCP sequence number of the most recent
+ // acknowledged segment.
+ EndSequence seqnum.Value
+
+ // FACK is the highest selectively or cumulatively acknowledged
+ // sequence.
+ FACK seqnum.Value
+
+ // RTT is the round trip time of the most recently delivered packet on
+ // the connection (either cumulatively acknowledged or selectively
+ // acknowledged) that was not marked invalid as a possible spurious
+ // retransmission.
+ RTT time.Duration
+
+ // Reord is true iff reordering has been detected on this connection.
+ Reord bool
+
+ // DSACKSeen is true iff the connection has seen a DSACK.
+ DSACKSeen bool
+
+ // ReoWnd is the reordering window time used for recording packet
+ // transmission times. It is used to defer the moment at which RACK
+ // marks a packet lost.
+ ReoWnd time.Duration
+
+ // ReoWndIncr is the multiplier applied to adjust reorder window.
+ ReoWndIncr uint8
+
+ // ReoWndPersist is the number of loss recoveries before resetting
+ // reorder window.
+ ReoWndPersist int8
+
+ // RTTSeq is the SND.NXT when RTT is updated.
+ RTTSeq seqnum.Value
+}
+
+// TCPEndpointID is the unique 4 tuple that identifies a given endpoint.
+//
+// +stateify savable
+type TCPEndpointID struct {
+ // LocalPort is the local port associated with the endpoint.
+ LocalPort uint16
+
+ // LocalAddress is the local [network layer] address associated with
+ // the endpoint.
+ LocalAddress tcpip.Address
+
+ // RemotePort is the remote port associated with the endpoint.
+ RemotePort uint16
+
+ // RemoteAddress it the remote [network layer] address associated with
+ // the endpoint.
+ RemoteAddress tcpip.Address
+}
+
+// TCPFastRecoveryState holds a copy of the internal fast recovery state of a
+// TCP endpoint.
+//
+// +stateify savable
+type TCPFastRecoveryState struct {
+ // Active if true indicates the endpoint is in fast recovery. The
+ // following fields are only meaningful when Active is true.
+ Active bool
+
+ // First is the first unacknowledged sequence number being recovered.
+ First seqnum.Value
+
+ // Last is the 'recover' sequence number that indicates the point at
+ // which we should exit recovery barring any timeouts etc.
+ Last seqnum.Value
+
+ // MaxCwnd is the maximum value we are permitted to grow the congestion
+ // window during recovery. This is set at the time we enter recovery.
+ // It exists to avoid attacks where the receiver intentionally sends
+ // duplicate acks to artificially inflate the sender's cwnd.
+ MaxCwnd int
+
+ // HighRxt is the highest sequence number which has been retransmitted
+ // during the current loss recovery phase. See: RFC 6675 Section 2 for
+ // details.
+ HighRxt seqnum.Value
+
+ // RescueRxt is the highest sequence number which has been
+ // optimistically retransmitted to prevent stalling of the ACK clock
+ // when there is loss at the end of the window and no new data is
+ // available for transmission. See: RFC 6675 Section 2 for details.
+ RescueRxt seqnum.Value
+}
+
+// TCPReceiverState holds a copy of the internal state of the receiver for a
+// given TCP endpoint.
+//
+// +stateify savable
+type TCPReceiverState struct {
+ // RcvNxt is the TCP variable RCV.NXT.
+ RcvNxt seqnum.Value
+
+ // RcvAcc is one beyond the last acceptable sequence number. That is,
+ // the "largest" sequence value that the receiver has announced to its
+ // peer that it's willing to accept. This may be different than RcvNxt
+ // + (last advertised receive window) if the receive window is reduced;
+ // in that case we have to reduce the window as we receive more data
+ // instead of shrinking it.
+ RcvAcc seqnum.Value
+
+ // RcvWndScale is the window scaling to use for inbound segments.
+ RcvWndScale uint8
+
+ // PendingBufUsed is the number of bytes pending in the receive queue.
+ PendingBufUsed int
+}
+
+// TCPRTTState holds a copy of information about the endpoint's round trip
+// time.
+//
+// +stateify savable
+type TCPRTTState struct {
+ // SRTT is the smoothed round trip time defined in section 2 of RFC
+ // 6298.
+ SRTT time.Duration
+
+ // RTTVar is the round-trip time variation as defined in section 2 of
+ // RFC 6298.
+ RTTVar time.Duration
+
+ // SRTTInited if true indicates that a valid RTT measurement has been
+ // completed.
+ SRTTInited bool
+}
+
+// TCPSenderState holds a copy of the internal state of the sender for a given
+// TCP Endpoint.
+//
+// +stateify savable
+type TCPSenderState struct {
+ // LastSendTime is the timestamp at which we sent the last segment.
+ LastSendTime time.Time `state:".(unixTime)"`
+
+ // DupAckCount is the number of Duplicate ACKs received. It is used for
+ // fast retransmit.
+ DupAckCount int
+
+ // SndCwnd is the size of the sending congestion window in packets.
+ SndCwnd int
+
+ // Ssthresh is the threshold between slow start and congestion
+ // avoidance.
+ Ssthresh int
+
+ // SndCAAckCount is the number of packets acknowledged during
+ // congestion avoidance. When enough packets have been ack'd (typically
+ // cwnd packets), the congestion window is incremented by one.
+ SndCAAckCount int
+
+ // Outstanding is the number of packets that have been sent but not yet
+ // acknowledged.
+ Outstanding int
+
+ // SackedOut is the number of packets which have been selectively
+ // acked.
+ SackedOut int
+
+ // SndWnd is the send window size in bytes.
+ SndWnd seqnum.Size
+
+ // SndUna is the next unacknowledged sequence number.
+ SndUna seqnum.Value
+
+ // SndNxt is the sequence number of the next segment to be sent.
+ SndNxt seqnum.Value
+
+ // RTTMeasureSeqNum is the sequence number being used for the latest
+ // RTT measurement.
+ RTTMeasureSeqNum seqnum.Value
+
+ // RTTMeasureTime is the time when the RTTMeasureSeqNum was sent.
+ RTTMeasureTime time.Time `state:".(unixTime)"`
+
+ // Closed indicates that the caller has closed the endpoint for
+ // sending.
+ Closed bool
+
+ // RTO is the retransmit timeout as defined in section of 2 of RFC
+ // 6298.
+ RTO time.Duration
+
+ // RTTState holds information about the endpoint's round trip time.
+ RTTState TCPRTTState
+
+ // MaxPayloadSize is the maximum size of the payload of a given
+ // segment. It is initialized on demand.
+ MaxPayloadSize int
+
+ // SndWndScale is the number of bits to shift left when reading the
+ // send window size from a segment.
+ SndWndScale uint8
+
+ // MaxSentAck is the highest acknowledgement number sent till now.
+ MaxSentAck seqnum.Value
+
+ // FastRecovery holds the fast recovery state for the endpoint.
+ FastRecovery TCPFastRecoveryState
+
+ // Cubic holds the state related to CUBIC congestion control.
+ Cubic TCPCubicState
+
+ // RACKState holds the state related to RACK loss detection algorithm.
+ RACKState TCPRACKState
+}
+
+// TCPSACKInfo holds TCP SACK related information for a given TCP endpoint.
+//
+// +stateify savable
+type TCPSACKInfo struct {
+ // Blocks is the list of SACK Blocks that identify the out of order
+ // segments held by a given TCP endpoint.
+ Blocks []header.SACKBlock
+
+ // ReceivedBlocks are the SACK blocks received by this endpoint from
+ // the peer endpoint.
+ ReceivedBlocks []header.SACKBlock
+
+ // MaxSACKED is the highest sequence number that has been SACKED by the
+ // peer.
+ MaxSACKED seqnum.Value
+}
+
+// RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning.
+//
+// +stateify savable
+type RcvBufAutoTuneParams struct {
+ // MeasureTime is the time at which the current measurement was
+ // started.
+ MeasureTime time.Time `state:".(unixTime)"`
+
+ // CopiedBytes is the number of bytes copied to user space since this
+ // measure began.
+ CopiedBytes int
+
+ // PrevCopiedBytes is the number of bytes copied to userspace in the
+ // previous RTT period.
+ PrevCopiedBytes int
+
+ // RcvBufSize is the auto tuned receive buffer size.
+ RcvBufSize int
+
+ // RTT is the smoothed RTT as measured by observing the time between
+ // when a byte is first acknowledged and the receipt of data that is at
+ // least one window beyond the sequence number that was acknowledged.
+ RTT time.Duration
+
+ // RTTVar is the "round-trip time variation" as defined in section 2 of
+ // RFC6298.
+ RTTVar time.Duration
+
+ // RTTMeasureSeqNumber is the highest acceptable sequence number at the
+ // time this RTT measurement period began.
+ RTTMeasureSeqNumber seqnum.Value
+
+ // RTTMeasureTime is the absolute time at which the current RTT
+ // measurement period began.
+ RTTMeasureTime time.Time `state:".(unixTime)"`
+
+ // Disabled is true if an explicit receive buffer is set for the
+ // endpoint.
+ Disabled bool
+}
+
+// TCPRcvBufState contains information about the state of an endpoint's receive
+// socket buffer.
+//
+// +stateify savable
+type TCPRcvBufState struct {
+ // RcvBufSize is the size of the receive socket buffer for the
+ // endpoint.
+ RcvBufSize int
+
+ // RcvBufUsed is the amount of bytes actually held in the receive
+ // socket buffer for the endpoint.
+ RcvBufUsed int
+
+ // RcvBufAutoTuneParams is used to hold state variables to compute the
+ // auto tuned receive buffer size.
+ RcvAutoParams RcvBufAutoTuneParams
+
+ // RcvClosed if true, indicates the endpoint has been closed for
+ // reading.
+ RcvClosed bool
+}
+
+// TCPSndBufState contains information about the state of an endpoint's send
+// socket buffer.
+//
+// +stateify savable
+type TCPSndBufState struct {
+ // SndBufSize is the size of the socket send buffer.
+ SndBufSize int
+
+ // SndBufUsed is the number of bytes held in the socket send buffer.
+ SndBufUsed int
+
+ // SndClosed indicates that the endpoint has been closed for sends.
+ SndClosed bool
+
+ // SndBufInQueue is the number of bytes in the send queue.
+ SndBufInQueue seqnum.Size
+
+ // PacketTooBigCount is used to notify the main protocol routine how
+ // many times a "packet too big" control packet is received.
+ PacketTooBigCount int
+
+ // SndMTU is the smallest MTU seen in the control packets received.
+ SndMTU int
+}
+
+// TCPEndpointStateInner contains the members of TCPEndpointState used directly
+// (that is, not within another containing struct) within the endpoint's
+// internal implementation.
+//
+// +stateify savable
+type TCPEndpointStateInner struct {
+ // TSOffset is a randomized offset added to the value of the TSVal
+ // field in the timestamp option.
+ TSOffset uint32
+
+ // SACKPermitted is set to true if the peer sends the TCPSACKPermitted
+ // option in the SYN/SYN-ACK.
+ SACKPermitted bool
+
+ // SendTSOk is used to indicate when the TS Option has been negotiated.
+ // When sendTSOk is true every non-RST segment should carry a TS as per
+ // RFC7323#section-1.1.
+ SendTSOk bool
+
+ // RecentTS is the timestamp that should be sent in the TSEcr field of
+ // the timestamp for future segments sent by the endpoint. This field
+ // is updated if required when a new segment is received by this
+ // endpoint.
+ RecentTS uint32
+}
+
+// TCPEndpointState is a copy of the internal state of a TCP endpoint.
+//
+// +stateify savable
+type TCPEndpointState struct {
+ // TCPEndpointStateInner contains the members of TCPEndpointState used
+ // by the endpoint's internal implementation.
+ TCPEndpointStateInner
+
+ // ID is a copy of the TransportEndpointID for the endpoint.
+ ID TCPEndpointID
+
+ // SegTime denotes the absolute time when this segment was received.
+ SegTime time.Time `state:".(unixTime)"`
+
+ // RcvBufState contains information about the state of the endpoint's
+ // receive socket buffer.
+ RcvBufState TCPRcvBufState
+
+ // SndBufState contains information about the state of the endpoint's
+ // send socket buffer.
+ SndBufState TCPSndBufState
+
+ // SACK holds TCP SACK related information for this endpoint.
+ SACK TCPSACKInfo
+
+ // Receiver holds variables related to the TCP receiver for the
+ // endpoint.
+ Receiver TCPReceiverState
+
+ // Sender holds state related to the TCP Sender for the endpoint.
+ Sender TCPSenderState
+}