summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/tcpip/transport/tcp/rack.go45
-rw-r--r--pkg/tcpip/transport/tcp/snd.go59
2 files changed, 88 insertions, 16 deletions
diff --git a/pkg/tcpip/transport/tcp/rack.go b/pkg/tcpip/transport/tcp/rack.go
index b71e6b992..5a4ee70f5 100644
--- a/pkg/tcpip/transport/tcp/rack.go
+++ b/pkg/tcpip/transport/tcp/rack.go
@@ -67,6 +67,14 @@ type rackControl struct {
// probeTimer and probeWaker are used to schedule PTO for RACK TLP algorithm.
probeTimer timer `state:"nosave"`
probeWaker sleep.Waker `state:"nosave"`
+
+ // tlpRxtOut indicates whether there is an unacknowledged
+ // TLP retransmission.
+ tlpRxtOut bool
+
+ // tlpHighRxt the value of sender.sndNxt at the time of sending
+ // a TLP retransmission.
+ tlpHighRxt seqnum.Value
}
// init initializes RACK specific fields.
@@ -203,3 +211,40 @@ func (s *sender) probeTimerExpired() *tcpip.Error {
// Arm RTO timer only.
return nil
}
+
+// detectTLPRecovery detects if recovery was accomplished by the loss probes
+// and updates TLP state accordingly.
+// See https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.3.
+func (s *sender) detectTLPRecovery(ack seqnum.Value, rcvdSeg *segment) {
+ if !(s.ep.sackPermitted && s.rc.tlpRxtOut) {
+ return
+ }
+
+ // Step 1.
+ if s.isDupAck(rcvdSeg) && ack == s.rc.tlpHighRxt {
+ var sbAboveTLPHighRxt bool
+ for _, sb := range rcvdSeg.parsedOptions.SACKBlocks {
+ if s.rc.tlpHighRxt.LessThan(sb.End) {
+ sbAboveTLPHighRxt = true
+ break
+ }
+ }
+ if !sbAboveTLPHighRxt {
+ // TLP episode is complete.
+ s.rc.tlpRxtOut = false
+ }
+ }
+
+ if s.rc.tlpRxtOut && s.rc.tlpHighRxt.LessThanEq(ack) {
+ // TLP episode is complete.
+ s.rc.tlpRxtOut = false
+ if !checkDSACK(rcvdSeg) {
+ // Step 2. Either the original packet or the retransmission (in the
+ // form of a probe) was lost. Invoke a congestion control response
+ // equivalent to fast recovery.
+ s.cc.HandleNDupAcks()
+ s.enterRecovery()
+ s.leaveRecovery()
+ }
+ }
+}
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index c0e9d98e3..079d90848 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -533,6 +533,10 @@ func (s *sender) retransmitTimerExpired() bool {
s.ep.stack.Stats().TCP.Timeouts.Increment()
s.ep.stats.SendErrors.Timeouts.Increment()
+ // Set TLPRxtOut to false according to
+ // https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.1.
+ s.rc.tlpRxtOut = false
+
// Give up if we've waited more than a minute since the last resend or
// if a user time out is set and we have exceeded the user specified
// timeout since the first retransmission.
@@ -1060,6 +1064,9 @@ func (s *sender) enterRecovery() {
if s.ep.sackPermitted {
s.state = SACKRecovery
s.ep.stack.Stats().TCP.SACKRecovery.Increment()
+ // Set TLPRxtOut to false according to
+ // https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.1.
+ s.rc.tlpRxtOut = false
return
}
s.state = FastRecovery
@@ -1143,19 +1150,11 @@ func (s *sender) SetPipe() {
// detected. It manages the state related to duplicate acks and determines if
// a retransmit is needed according to the rules in RFC 6582 (NewReno).
func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) {
- ack := seg.ackNumber
-
- // We're not in fast recovery yet. A segment is considered a duplicate
- // only if it doesn't carry any data and doesn't update the send window,
- // because if it does, it wasn't sent in response to an out-of-order
- // segment. If SACK is enabled then we have an additional check to see
- // if the segment carries new SACK information. If it does then it is
- // considered a duplicate ACK as per RFC6675.
- if ack != s.sndUna || seg.logicalLen() != 0 || s.sndWnd != seg.window || ack == s.sndNxt {
- if !s.ep.sackPermitted || !seg.hasNewSACKInfo {
- s.dupAckCount = 0
- return false
- }
+ // We're not in fast recovery yet.
+
+ if !s.isDupAck(seg) {
+ s.dupAckCount = 0
+ return false
}
s.dupAckCount++
@@ -1186,6 +1185,31 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) {
return true
}
+// isDupAck determines if seg is a duplicate ack as defined in
+// https://tools.ietf.org/html/rfc5681#section-2.
+func (s *sender) isDupAck(seg *segment) bool {
+ // A TCP that utilizes selective acknowledgments (SACKs) [RFC2018, RFC2883]
+ // can leverage the SACK information to determine when an incoming ACK is a
+ // "duplicate" (e.g., if the ACK contains previously unknown SACK
+ // information).
+ if s.ep.sackPermitted && !seg.hasNewSACKInfo {
+ return false
+ }
+
+ // (a) The receiver of the ACK has outstanding data.
+ return s.sndUna != s.sndNxt &&
+ // (b) The incoming acknowledgment carries no data.
+ seg.logicalLen() == 0 &&
+ // (c) The SYN and FIN bits are both off.
+ !seg.flagIsSet(header.TCPFlagFin) && !seg.flagIsSet(header.TCPFlagSyn) &&
+ // (d) the ACK number is equal to the greatest acknowledgment received on
+ // the given connection (TCP.UNA from RFC793).
+ seg.ackNumber == s.sndUna &&
+ // (e) the advertised window in the incoming acknowledgment equals the
+ // advertised window in the last incoming acknowledgment.
+ s.sndWnd == seg.window
+}
+
// Iterate the writeList and update RACK for each segment which is newly acked
// either cumulatively or selectively. Loop through the segments which are
// sacked, and update the RACK related variables and check for reordering.
@@ -1196,7 +1220,7 @@ func (s *sender) walkSACK(rcvdSeg *segment) {
// Look for DSACK block.
idx := 0
n := len(rcvdSeg.parsedOptions.SACKBlocks)
- if s.checkDSACK(rcvdSeg) {
+ if checkDSACK(rcvdSeg) {
s.rc.setDSACKSeen()
idx = 1
n--
@@ -1228,8 +1252,8 @@ func (s *sender) walkSACK(rcvdSeg *segment) {
}
}
-// checkDSACK checks if a DSACK is reported and updates it in RACK.
-func (s *sender) checkDSACK(rcvdSeg *segment) bool {
+// checkDSACK checks if a DSACK is reported.
+func checkDSACK(rcvdSeg *segment) bool {
n := len(rcvdSeg.parsedOptions.SACKBlocks)
if n == 0 {
return false
@@ -1338,6 +1362,9 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
fastRetransmit = s.detectLoss(rcvdSeg)
}
+ // See if TLP based recovery was successful.
+ s.detectTLPRecovery(ack, rcvdSeg)
+
// Stash away the current window size.
s.sndWnd = rcvdSeg.window