diff options
-rw-r--r-- | pkg/abi/linux/linux_abi_autogen_unsafe.go | 26 | ||||
-rw-r--r-- | pkg/abi/linux/socket.go | 12 | ||||
-rw-r--r-- | pkg/abi/linux/tcp.go | 9 | ||||
-rw-r--r-- | pkg/sentry/socket/netstack/netstack.go | 23 | ||||
-rw-r--r-- | pkg/tcpip/tcpip.go | 44 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/endpoint.go | 34 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/rack.go | 2 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/snd.go | 36 |
8 files changed, 144 insertions, 42 deletions
diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go index 4960295ae..19f823232 100644 --- a/pkg/abi/linux/linux_abi_autogen_unsafe.go +++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go @@ -8951,7 +8951,7 @@ func (s *SockAddrUnix) WriteTo(writer io.Writer) (int64, error) { // SizeBytes implements marshal.Marshallable.SizeBytes. func (t *TCPInfo) SizeBytes() int { - return 192 + return 224 } // MarshalBytes implements marshal.Marshallable.MarshalBytes. @@ -9048,6 +9048,18 @@ func (t *TCPInfo) MarshalBytes(dst []byte) { dst = dst[8:] usermem.ByteOrder.PutUint64(dst[:8], uint64(t.SndBufLimited)) dst = dst[8:] + usermem.ByteOrder.PutUint32(dst[:4], uint32(t.Delievered)) + dst = dst[4:] + usermem.ByteOrder.PutUint32(dst[:4], uint32(t.DelieveredCe)) + dst = dst[4:] + usermem.ByteOrder.PutUint64(dst[:8], uint64(t.BytesSent)) + dst = dst[8:] + usermem.ByteOrder.PutUint64(dst[:8], uint64(t.BytesRetrans)) + dst = dst[8:] + usermem.ByteOrder.PutUint32(dst[:4], uint32(t.DSACKDups)) + dst = dst[4:] + usermem.ByteOrder.PutUint32(dst[:4], uint32(t.ReordSeen)) + dst = dst[4:] } // UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes. @@ -9144,6 +9156,18 @@ func (t *TCPInfo) UnmarshalBytes(src []byte) { src = src[8:] t.SndBufLimited = uint64(usermem.ByteOrder.Uint64(src[:8])) src = src[8:] + t.Delievered = uint32(usermem.ByteOrder.Uint32(src[:4])) + src = src[4:] + t.DelieveredCe = uint32(usermem.ByteOrder.Uint32(src[:4])) + src = src[4:] + t.BytesSent = uint64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] + t.BytesRetrans = uint64(usermem.ByteOrder.Uint64(src[:8])) + src = src[8:] + t.DSACKDups = uint32(usermem.ByteOrder.Uint32(src[:4])) + src = src[4:] + t.ReordSeen = uint32(usermem.ByteOrder.Uint32(src[:4])) + src = src[4:] } // Packed implements marshal.Marshallable.Packed. diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go index 8591acbf2..cb33c37bd 100644 --- a/pkg/abi/linux/socket.go +++ b/pkg/abi/linux/socket.go @@ -416,6 +416,18 @@ type TCPInfo struct { RwndLimited uint64 // SndBufLimited is the time in microseconds limited by send buffer. SndBufLimited uint64 + + Delievered uint32 + DelieveredCe uint32 + + // BytesSent is RFC4898 tcpEStatsPerfHCDataOctetsOut. + BytesSent uint64 + // BytesRetrans is RFC4898 tcpEStatsPerfOctetsRetrans. + BytesRetrans uint64 + // DSACKDups is RFC4898 tcpEStatsStackDSACKDups. + DSACKDups uint32 + // ReordSeen is the number of reordering events seen. + ReordSeen uint32 } // SizeOfTCPInfo is the binary size of a TCPInfo struct. diff --git a/pkg/abi/linux/tcp.go b/pkg/abi/linux/tcp.go index 2a8d4708b..1a3c0916f 100644 --- a/pkg/abi/linux/tcp.go +++ b/pkg/abi/linux/tcp.go @@ -59,3 +59,12 @@ const ( MAX_TCP_KEEPINTVL = 32767 MAX_TCP_KEEPCNT = 127 ) + +// Congestion control states from include/uapi/linux/tcp.h. +const ( + TCP_CA_Open = 0 + TCP_CA_Disorder = 1 + TCP_CA_CWR = 2 + TCP_CA_Recovery = 3 + TCP_CA_Loss = 4 +) diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 7065a0e46..3115a227d 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1098,6 +1098,29 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name, // TODO(b/64800844): Translate fields once they are added to // tcpip.TCPInfoOption. info := linux.TCPInfo{} + switch v.CcState { + case tcpip.RTORecovery: + info.CaState = linux.TCP_CA_Loss + case tcpip.FastRecovery, tcpip.SACKRecovery: + info.CaState = linux.TCP_CA_Recovery + case tcpip.Disorder: + info.CaState = linux.TCP_CA_Disorder + case tcpip.Open: + info.CaState = linux.TCP_CA_Open + } + info.RTO = uint32(v.RTO / time.Microsecond) + info.RTT = uint32(v.RTT / time.Microsecond) + info.RTTVar = uint32(v.RTTVar / time.Microsecond) + info.SndSsthresh = v.SndSsthresh + info.SndCwnd = v.SndCwnd + + // In netstack reorderSeen is updated only when RACK is enabled. + // We only track whether the reordering is seen, which is + // different than Linux where reorderSeen is not specific to + // RACK and is incremented when a reordering event is seen. + if v.ReorderSeen { + info.ReordSeen = 1 + } // Linux truncates the output binary to outLen. buf := t.CopyScratchBuffer(info.SizeBytes()) diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index 812ee36ed..e70ae69ef 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -993,12 +993,54 @@ type SettableSocketOption interface { isSettableSocketOption() } +// CongestionControlState indicates the current congestion control state for +// TCP sender. +type CongestionControlState int + +const ( + // Open indicates that the sender is receiving acks in order and + // no loss or dupACK's etc have been detected. + Open CongestionControlState = iota + // RTORecovery indicates that an RTO has occurred and the sender + // has entered an RTO based recovery phase. + RTORecovery + // FastRecovery indicates that the sender has entered FastRecovery + // based on receiving nDupAck's. This state is entered only when + // SACK is not in use. + FastRecovery + // SACKRecovery indicates that the sender has entered SACK based + // recovery. + SACKRecovery + // Disorder indicates the sender either received some SACK blocks + // or dupACK's. + Disorder +) + // TCPInfoOption is used by GetSockOpt to expose TCP statistics. // // TODO(b/64800844): Add and populate stat fields. type TCPInfoOption struct { - RTT time.Duration + // RTT is the smoothed round trip time. + RTT time.Duration + + // RTTVar is the round trip time variation. RTTVar time.Duration + + // RTO is the retransmission timeout for the endpoint. + RTO time.Duration + + // CcState is the congestion control state. + CcState CongestionControlState + + // SndCwnd is the congestion window, in packets. + SndCwnd uint32 + + // SndSsthresh is the threshold between slow start and congestion + // avoidance. + SndSsthresh uint32 + + // ReorderSeen indicates if reordering is seen in the endpoint. + ReorderSeen bool } func (*TCPInfoOption) isGettableSocketOption() {} diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index b6bd6d455..bfa5b01fb 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2011,20 +2011,34 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) { } } +func (e *endpoint) getTCPInfo() tcpip.TCPInfoOption { + info := tcpip.TCPInfoOption{} + e.LockUser() + snd := e.snd + if snd != nil { + // We do not calculate RTT before sending the data packets. If + // the connection did not send and receive data, then RTT will + // be zero. + snd.rtt.Lock() + info.RTT = snd.rtt.srtt + info.RTTVar = snd.rtt.rttvar + snd.rtt.Unlock() + + info.RTO = snd.rto + info.CcState = snd.state + info.SndSsthresh = uint32(snd.sndSsthresh) + info.SndCwnd = uint32(snd.sndCwnd) + info.ReorderSeen = snd.rc.reorderSeen + } + e.UnlockUser() + return info +} + // GetSockOpt implements tcpip.Endpoint.GetSockOpt. func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error { switch o := opt.(type) { case *tcpip.TCPInfoOption: - *o = tcpip.TCPInfoOption{} - e.LockUser() - snd := e.snd - e.UnlockUser() - if snd != nil { - snd.rtt.Lock() - o.RTT = snd.rtt.srtt - o.RTTVar = snd.rtt.rttvar - snd.rtt.Unlock() - } + *o = e.getTCPInfo() case *tcpip.KeepaliveIdleOption: e.keepalive.Lock() diff --git a/pkg/tcpip/transport/tcp/rack.go b/pkg/tcpip/transport/tcp/rack.go index 307bacca5..fdb7e3dc6 100644 --- a/pkg/tcpip/transport/tcp/rack.go +++ b/pkg/tcpip/transport/tcp/rack.go @@ -162,7 +162,7 @@ func (s *sender) shouldSchedulePTO() bool { // The connection supports SACK. s.ep.sackPermitted && // The connection is not in loss recovery. - (s.state != RTORecovery && s.state != SACKRecovery) && + (s.state != tcpip.RTORecovery && s.state != tcpip.SACKRecovery) && // The connection has no SACKed sequences in the SACK scoreboard. s.ep.scoreboard.Sacked() == 0 } diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 079d90848..28ef9f899 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -48,28 +48,6 @@ const ( MaxRetries = 15 ) -// ccState indicates the current congestion control state for this sender. -type ccState int - -const ( - // Open indicates that the sender is receiving acks in order and - // no loss or dupACK's etc have been detected. - Open ccState = iota - // RTORecovery indicates that an RTO has occurred and the sender - // has entered an RTO based recovery phase. - RTORecovery - // FastRecovery indicates that the sender has entered FastRecovery - // based on receiving nDupAck's. This state is entered only when - // SACK is not in use. - FastRecovery - // SACKRecovery indicates that the sender has entered SACK based - // recovery. - SACKRecovery - // Disorder indicates the sender either received some SACK blocks - // or dupACK's. - Disorder -) - // congestionControl is an interface that must be implemented by any supported // congestion control algorithm. type congestionControl interface { @@ -204,7 +182,7 @@ type sender struct { maxSentAck seqnum.Value // state is the current state of congestion control for this endpoint. - state ccState + state tcpip.CongestionControlState // cc is the congestion control algorithm in use for this sender. cc congestionControl @@ -593,7 +571,7 @@ func (s *sender) retransmitTimerExpired() bool { s.leaveRecovery() } - s.state = RTORecovery + s.state = tcpip.RTORecovery s.cc.HandleRTOExpired() // Mark the next segment to be sent as the first unacknowledged one and @@ -1018,7 +996,7 @@ func (s *sender) sendData() { // "A TCP SHOULD set cwnd to no more than RW before beginning // transmission if the TCP has not sent data in the interval exceeding // the retrasmission timeout." - if !s.fr.active && s.state != RTORecovery && time.Now().Sub(s.lastSendTime) > s.rto { + if !s.fr.active && s.state != tcpip.RTORecovery && time.Now().Sub(s.lastSendTime) > s.rto { if s.sndCwnd > InitialCwnd { s.sndCwnd = InitialCwnd } @@ -1062,14 +1040,14 @@ func (s *sender) enterRecovery() { s.fr.highRxt = s.sndUna s.fr.rescueRxt = s.sndUna if s.ep.sackPermitted { - s.state = SACKRecovery + s.state = tcpip.SACKRecovery s.ep.stack.Stats().TCP.SACKRecovery.Increment() // Set TLPRxtOut to false according to // https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.1. s.rc.tlpRxtOut = false return } - s.state = FastRecovery + s.state = tcpip.FastRecovery s.ep.stack.Stats().TCP.FastRecovery.Increment() } @@ -1166,7 +1144,7 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) { s.fr.highRxt = s.sndUna - 1 // Do run SetPipe() to calculate the outstanding segments. s.SetPipe() - s.state = Disorder + s.state = tcpip.Disorder return false } @@ -1464,7 +1442,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { if !s.fr.active { s.cc.Update(originalOutstanding - s.outstanding) if s.fr.last.LessThan(s.sndUna) { - s.state = Open + s.state = tcpip.Open } } |