summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/abi/linux/linux_abi_autogen_unsafe.go26
-rw-r--r--pkg/abi/linux/socket.go12
-rw-r--r--pkg/abi/linux/tcp.go9
-rw-r--r--pkg/sentry/socket/netstack/netstack.go23
-rw-r--r--pkg/tcpip/tcpip.go44
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go34
-rw-r--r--pkg/tcpip/transport/tcp/rack.go2
-rw-r--r--pkg/tcpip/transport/tcp/snd.go36
8 files changed, 144 insertions, 42 deletions
diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go
index 4960295ae..19f823232 100644
--- a/pkg/abi/linux/linux_abi_autogen_unsafe.go
+++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go
@@ -8951,7 +8951,7 @@ func (s *SockAddrUnix) WriteTo(writer io.Writer) (int64, error) {
// SizeBytes implements marshal.Marshallable.SizeBytes.
func (t *TCPInfo) SizeBytes() int {
- return 192
+ return 224
}
// MarshalBytes implements marshal.Marshallable.MarshalBytes.
@@ -9048,6 +9048,18 @@ func (t *TCPInfo) MarshalBytes(dst []byte) {
dst = dst[8:]
usermem.ByteOrder.PutUint64(dst[:8], uint64(t.SndBufLimited))
dst = dst[8:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(t.Delievered))
+ dst = dst[4:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(t.DelieveredCe))
+ dst = dst[4:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(t.BytesSent))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint64(dst[:8], uint64(t.BytesRetrans))
+ dst = dst[8:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(t.DSACKDups))
+ dst = dst[4:]
+ usermem.ByteOrder.PutUint32(dst[:4], uint32(t.ReordSeen))
+ dst = dst[4:]
}
// UnmarshalBytes implements marshal.Marshallable.UnmarshalBytes.
@@ -9144,6 +9156,18 @@ func (t *TCPInfo) UnmarshalBytes(src []byte) {
src = src[8:]
t.SndBufLimited = uint64(usermem.ByteOrder.Uint64(src[:8]))
src = src[8:]
+ t.Delievered = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ t.DelieveredCe = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ t.BytesSent = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ t.BytesRetrans = uint64(usermem.ByteOrder.Uint64(src[:8]))
+ src = src[8:]
+ t.DSACKDups = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
+ t.ReordSeen = uint32(usermem.ByteOrder.Uint32(src[:4]))
+ src = src[4:]
}
// Packed implements marshal.Marshallable.Packed.
diff --git a/pkg/abi/linux/socket.go b/pkg/abi/linux/socket.go
index 8591acbf2..cb33c37bd 100644
--- a/pkg/abi/linux/socket.go
+++ b/pkg/abi/linux/socket.go
@@ -416,6 +416,18 @@ type TCPInfo struct {
RwndLimited uint64
// SndBufLimited is the time in microseconds limited by send buffer.
SndBufLimited uint64
+
+ Delievered uint32
+ DelieveredCe uint32
+
+ // BytesSent is RFC4898 tcpEStatsPerfHCDataOctetsOut.
+ BytesSent uint64
+ // BytesRetrans is RFC4898 tcpEStatsPerfOctetsRetrans.
+ BytesRetrans uint64
+ // DSACKDups is RFC4898 tcpEStatsStackDSACKDups.
+ DSACKDups uint32
+ // ReordSeen is the number of reordering events seen.
+ ReordSeen uint32
}
// SizeOfTCPInfo is the binary size of a TCPInfo struct.
diff --git a/pkg/abi/linux/tcp.go b/pkg/abi/linux/tcp.go
index 2a8d4708b..1a3c0916f 100644
--- a/pkg/abi/linux/tcp.go
+++ b/pkg/abi/linux/tcp.go
@@ -59,3 +59,12 @@ const (
MAX_TCP_KEEPINTVL = 32767
MAX_TCP_KEEPCNT = 127
)
+
+// Congestion control states from include/uapi/linux/tcp.h.
+const (
+ TCP_CA_Open = 0
+ TCP_CA_Disorder = 1
+ TCP_CA_CWR = 2
+ TCP_CA_Recovery = 3
+ TCP_CA_Loss = 4
+)
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 7065a0e46..3115a227d 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -1098,6 +1098,29 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name,
// TODO(b/64800844): Translate fields once they are added to
// tcpip.TCPInfoOption.
info := linux.TCPInfo{}
+ switch v.CcState {
+ case tcpip.RTORecovery:
+ info.CaState = linux.TCP_CA_Loss
+ case tcpip.FastRecovery, tcpip.SACKRecovery:
+ info.CaState = linux.TCP_CA_Recovery
+ case tcpip.Disorder:
+ info.CaState = linux.TCP_CA_Disorder
+ case tcpip.Open:
+ info.CaState = linux.TCP_CA_Open
+ }
+ info.RTO = uint32(v.RTO / time.Microsecond)
+ info.RTT = uint32(v.RTT / time.Microsecond)
+ info.RTTVar = uint32(v.RTTVar / time.Microsecond)
+ info.SndSsthresh = v.SndSsthresh
+ info.SndCwnd = v.SndCwnd
+
+ // In netstack reorderSeen is updated only when RACK is enabled.
+ // We only track whether the reordering is seen, which is
+ // different than Linux where reorderSeen is not specific to
+ // RACK and is incremented when a reordering event is seen.
+ if v.ReorderSeen {
+ info.ReordSeen = 1
+ }
// Linux truncates the output binary to outLen.
buf := t.CopyScratchBuffer(info.SizeBytes())
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 812ee36ed..e70ae69ef 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -993,12 +993,54 @@ type SettableSocketOption interface {
isSettableSocketOption()
}
+// CongestionControlState indicates the current congestion control state for
+// TCP sender.
+type CongestionControlState int
+
+const (
+ // Open indicates that the sender is receiving acks in order and
+ // no loss or dupACK's etc have been detected.
+ Open CongestionControlState = iota
+ // RTORecovery indicates that an RTO has occurred and the sender
+ // has entered an RTO based recovery phase.
+ RTORecovery
+ // FastRecovery indicates that the sender has entered FastRecovery
+ // based on receiving nDupAck's. This state is entered only when
+ // SACK is not in use.
+ FastRecovery
+ // SACKRecovery indicates that the sender has entered SACK based
+ // recovery.
+ SACKRecovery
+ // Disorder indicates the sender either received some SACK blocks
+ // or dupACK's.
+ Disorder
+)
+
// TCPInfoOption is used by GetSockOpt to expose TCP statistics.
//
// TODO(b/64800844): Add and populate stat fields.
type TCPInfoOption struct {
- RTT time.Duration
+ // RTT is the smoothed round trip time.
+ RTT time.Duration
+
+ // RTTVar is the round trip time variation.
RTTVar time.Duration
+
+ // RTO is the retransmission timeout for the endpoint.
+ RTO time.Duration
+
+ // CcState is the congestion control state.
+ CcState CongestionControlState
+
+ // SndCwnd is the congestion window, in packets.
+ SndCwnd uint32
+
+ // SndSsthresh is the threshold between slow start and congestion
+ // avoidance.
+ SndSsthresh uint32
+
+ // ReorderSeen indicates if reordering is seen in the endpoint.
+ ReorderSeen bool
}
func (*TCPInfoOption) isGettableSocketOption() {}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index b6bd6d455..bfa5b01fb 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -2011,20 +2011,34 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
}
}
+func (e *endpoint) getTCPInfo() tcpip.TCPInfoOption {
+ info := tcpip.TCPInfoOption{}
+ e.LockUser()
+ snd := e.snd
+ if snd != nil {
+ // We do not calculate RTT before sending the data packets. If
+ // the connection did not send and receive data, then RTT will
+ // be zero.
+ snd.rtt.Lock()
+ info.RTT = snd.rtt.srtt
+ info.RTTVar = snd.rtt.rttvar
+ snd.rtt.Unlock()
+
+ info.RTO = snd.rto
+ info.CcState = snd.state
+ info.SndSsthresh = uint32(snd.sndSsthresh)
+ info.SndCwnd = uint32(snd.sndCwnd)
+ info.ReorderSeen = snd.rc.reorderSeen
+ }
+ e.UnlockUser()
+ return info
+}
+
// GetSockOpt implements tcpip.Endpoint.GetSockOpt.
func (e *endpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
switch o := opt.(type) {
case *tcpip.TCPInfoOption:
- *o = tcpip.TCPInfoOption{}
- e.LockUser()
- snd := e.snd
- e.UnlockUser()
- if snd != nil {
- snd.rtt.Lock()
- o.RTT = snd.rtt.srtt
- o.RTTVar = snd.rtt.rttvar
- snd.rtt.Unlock()
- }
+ *o = e.getTCPInfo()
case *tcpip.KeepaliveIdleOption:
e.keepalive.Lock()
diff --git a/pkg/tcpip/transport/tcp/rack.go b/pkg/tcpip/transport/tcp/rack.go
index 307bacca5..fdb7e3dc6 100644
--- a/pkg/tcpip/transport/tcp/rack.go
+++ b/pkg/tcpip/transport/tcp/rack.go
@@ -162,7 +162,7 @@ func (s *sender) shouldSchedulePTO() bool {
// The connection supports SACK.
s.ep.sackPermitted &&
// The connection is not in loss recovery.
- (s.state != RTORecovery && s.state != SACKRecovery) &&
+ (s.state != tcpip.RTORecovery && s.state != tcpip.SACKRecovery) &&
// The connection has no SACKed sequences in the SACK scoreboard.
s.ep.scoreboard.Sacked() == 0
}
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 079d90848..28ef9f899 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -48,28 +48,6 @@ const (
MaxRetries = 15
)
-// ccState indicates the current congestion control state for this sender.
-type ccState int
-
-const (
- // Open indicates that the sender is receiving acks in order and
- // no loss or dupACK's etc have been detected.
- Open ccState = iota
- // RTORecovery indicates that an RTO has occurred and the sender
- // has entered an RTO based recovery phase.
- RTORecovery
- // FastRecovery indicates that the sender has entered FastRecovery
- // based on receiving nDupAck's. This state is entered only when
- // SACK is not in use.
- FastRecovery
- // SACKRecovery indicates that the sender has entered SACK based
- // recovery.
- SACKRecovery
- // Disorder indicates the sender either received some SACK blocks
- // or dupACK's.
- Disorder
-)
-
// congestionControl is an interface that must be implemented by any supported
// congestion control algorithm.
type congestionControl interface {
@@ -204,7 +182,7 @@ type sender struct {
maxSentAck seqnum.Value
// state is the current state of congestion control for this endpoint.
- state ccState
+ state tcpip.CongestionControlState
// cc is the congestion control algorithm in use for this sender.
cc congestionControl
@@ -593,7 +571,7 @@ func (s *sender) retransmitTimerExpired() bool {
s.leaveRecovery()
}
- s.state = RTORecovery
+ s.state = tcpip.RTORecovery
s.cc.HandleRTOExpired()
// Mark the next segment to be sent as the first unacknowledged one and
@@ -1018,7 +996,7 @@ func (s *sender) sendData() {
// "A TCP SHOULD set cwnd to no more than RW before beginning
// transmission if the TCP has not sent data in the interval exceeding
// the retrasmission timeout."
- if !s.fr.active && s.state != RTORecovery && time.Now().Sub(s.lastSendTime) > s.rto {
+ if !s.fr.active && s.state != tcpip.RTORecovery && time.Now().Sub(s.lastSendTime) > s.rto {
if s.sndCwnd > InitialCwnd {
s.sndCwnd = InitialCwnd
}
@@ -1062,14 +1040,14 @@ func (s *sender) enterRecovery() {
s.fr.highRxt = s.sndUna
s.fr.rescueRxt = s.sndUna
if s.ep.sackPermitted {
- s.state = SACKRecovery
+ s.state = tcpip.SACKRecovery
s.ep.stack.Stats().TCP.SACKRecovery.Increment()
// Set TLPRxtOut to false according to
// https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.1.
s.rc.tlpRxtOut = false
return
}
- s.state = FastRecovery
+ s.state = tcpip.FastRecovery
s.ep.stack.Stats().TCP.FastRecovery.Increment()
}
@@ -1166,7 +1144,7 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) {
s.fr.highRxt = s.sndUna - 1
// Do run SetPipe() to calculate the outstanding segments.
s.SetPipe()
- s.state = Disorder
+ s.state = tcpip.Disorder
return false
}
@@ -1464,7 +1442,7 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
if !s.fr.active {
s.cc.Update(originalOutstanding - s.outstanding)
if s.fr.last.LessThan(s.sndUna) {
- s.state = Open
+ s.state = tcpip.Open
}
}