diff options
-rw-r--r-- | pkg/tcpip/transport/tcp/rack.go | 45 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/snd.go | 59 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/tcp_state_autogen.go | 6 |
3 files changed, 94 insertions, 16 deletions
diff --git a/pkg/tcpip/transport/tcp/rack.go b/pkg/tcpip/transport/tcp/rack.go index b71e6b992..5a4ee70f5 100644 --- a/pkg/tcpip/transport/tcp/rack.go +++ b/pkg/tcpip/transport/tcp/rack.go @@ -67,6 +67,14 @@ type rackControl struct { // probeTimer and probeWaker are used to schedule PTO for RACK TLP algorithm. probeTimer timer `state:"nosave"` probeWaker sleep.Waker `state:"nosave"` + + // tlpRxtOut indicates whether there is an unacknowledged + // TLP retransmission. + tlpRxtOut bool + + // tlpHighRxt the value of sender.sndNxt at the time of sending + // a TLP retransmission. + tlpHighRxt seqnum.Value } // init initializes RACK specific fields. @@ -203,3 +211,40 @@ func (s *sender) probeTimerExpired() *tcpip.Error { // Arm RTO timer only. return nil } + +// detectTLPRecovery detects if recovery was accomplished by the loss probes +// and updates TLP state accordingly. +// See https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.3. +func (s *sender) detectTLPRecovery(ack seqnum.Value, rcvdSeg *segment) { + if !(s.ep.sackPermitted && s.rc.tlpRxtOut) { + return + } + + // Step 1. + if s.isDupAck(rcvdSeg) && ack == s.rc.tlpHighRxt { + var sbAboveTLPHighRxt bool + for _, sb := range rcvdSeg.parsedOptions.SACKBlocks { + if s.rc.tlpHighRxt.LessThan(sb.End) { + sbAboveTLPHighRxt = true + break + } + } + if !sbAboveTLPHighRxt { + // TLP episode is complete. + s.rc.tlpRxtOut = false + } + } + + if s.rc.tlpRxtOut && s.rc.tlpHighRxt.LessThanEq(ack) { + // TLP episode is complete. + s.rc.tlpRxtOut = false + if !checkDSACK(rcvdSeg) { + // Step 2. Either the original packet or the retransmission (in the + // form of a probe) was lost. Invoke a congestion control response + // equivalent to fast recovery. + s.cc.HandleNDupAcks() + s.enterRecovery() + s.leaveRecovery() + } + } +} diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index c0e9d98e3..079d90848 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -533,6 +533,10 @@ func (s *sender) retransmitTimerExpired() bool { s.ep.stack.Stats().TCP.Timeouts.Increment() s.ep.stats.SendErrors.Timeouts.Increment() + // Set TLPRxtOut to false according to + // https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.1. + s.rc.tlpRxtOut = false + // Give up if we've waited more than a minute since the last resend or // if a user time out is set and we have exceeded the user specified // timeout since the first retransmission. @@ -1060,6 +1064,9 @@ func (s *sender) enterRecovery() { if s.ep.sackPermitted { s.state = SACKRecovery s.ep.stack.Stats().TCP.SACKRecovery.Increment() + // Set TLPRxtOut to false according to + // https://tools.ietf.org/html/draft-ietf-tcpm-rack-08#section-7.6.1. + s.rc.tlpRxtOut = false return } s.state = FastRecovery @@ -1143,19 +1150,11 @@ func (s *sender) SetPipe() { // detected. It manages the state related to duplicate acks and determines if // a retransmit is needed according to the rules in RFC 6582 (NewReno). func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) { - ack := seg.ackNumber - - // We're not in fast recovery yet. A segment is considered a duplicate - // only if it doesn't carry any data and doesn't update the send window, - // because if it does, it wasn't sent in response to an out-of-order - // segment. If SACK is enabled then we have an additional check to see - // if the segment carries new SACK information. If it does then it is - // considered a duplicate ACK as per RFC6675. - if ack != s.sndUna || seg.logicalLen() != 0 || s.sndWnd != seg.window || ack == s.sndNxt { - if !s.ep.sackPermitted || !seg.hasNewSACKInfo { - s.dupAckCount = 0 - return false - } + // We're not in fast recovery yet. + + if !s.isDupAck(seg) { + s.dupAckCount = 0 + return false } s.dupAckCount++ @@ -1186,6 +1185,31 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) { return true } +// isDupAck determines if seg is a duplicate ack as defined in +// https://tools.ietf.org/html/rfc5681#section-2. +func (s *sender) isDupAck(seg *segment) bool { + // A TCP that utilizes selective acknowledgments (SACKs) [RFC2018, RFC2883] + // can leverage the SACK information to determine when an incoming ACK is a + // "duplicate" (e.g., if the ACK contains previously unknown SACK + // information). + if s.ep.sackPermitted && !seg.hasNewSACKInfo { + return false + } + + // (a) The receiver of the ACK has outstanding data. + return s.sndUna != s.sndNxt && + // (b) The incoming acknowledgment carries no data. + seg.logicalLen() == 0 && + // (c) The SYN and FIN bits are both off. + !seg.flagIsSet(header.TCPFlagFin) && !seg.flagIsSet(header.TCPFlagSyn) && + // (d) the ACK number is equal to the greatest acknowledgment received on + // the given connection (TCP.UNA from RFC793). + seg.ackNumber == s.sndUna && + // (e) the advertised window in the incoming acknowledgment equals the + // advertised window in the last incoming acknowledgment. + s.sndWnd == seg.window +} + // Iterate the writeList and update RACK for each segment which is newly acked // either cumulatively or selectively. Loop through the segments which are // sacked, and update the RACK related variables and check for reordering. @@ -1196,7 +1220,7 @@ func (s *sender) walkSACK(rcvdSeg *segment) { // Look for DSACK block. idx := 0 n := len(rcvdSeg.parsedOptions.SACKBlocks) - if s.checkDSACK(rcvdSeg) { + if checkDSACK(rcvdSeg) { s.rc.setDSACKSeen() idx = 1 n-- @@ -1228,8 +1252,8 @@ func (s *sender) walkSACK(rcvdSeg *segment) { } } -// checkDSACK checks if a DSACK is reported and updates it in RACK. -func (s *sender) checkDSACK(rcvdSeg *segment) bool { +// checkDSACK checks if a DSACK is reported. +func checkDSACK(rcvdSeg *segment) bool { n := len(rcvdSeg.parsedOptions.SACKBlocks) if n == 0 { return false @@ -1338,6 +1362,9 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) { fastRetransmit = s.detectLoss(rcvdSeg) } + // See if TLP based recovery was successful. + s.detectTLPRecovery(ack, rcvdSeg) + // Stash away the current window size. s.sndWnd = rcvdSeg.window diff --git a/pkg/tcpip/transport/tcp/tcp_state_autogen.go b/pkg/tcpip/transport/tcp/tcp_state_autogen.go index 272ad67bd..251909dae 100644 --- a/pkg/tcpip/transport/tcp/tcp_state_autogen.go +++ b/pkg/tcpip/transport/tcp/tcp_state_autogen.go @@ -403,6 +403,8 @@ func (rc *rackControl) StateFields() []string { "rtt", "reorderSeen", "xmitTime", + "tlpRxtOut", + "tlpHighRxt", } } @@ -418,6 +420,8 @@ func (rc *rackControl) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(3, &rc.minRTT) stateSinkObject.Save(4, &rc.rtt) stateSinkObject.Save(5, &rc.reorderSeen) + stateSinkObject.Save(7, &rc.tlpRxtOut) + stateSinkObject.Save(8, &rc.tlpHighRxt) } func (rc *rackControl) StateLoad(stateSourceObject state.Source) { @@ -427,6 +431,8 @@ func (rc *rackControl) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(3, &rc.minRTT) stateSourceObject.Load(4, &rc.rtt) stateSourceObject.Load(5, &rc.reorderSeen) + stateSourceObject.Load(7, &rc.tlpRxtOut) + stateSourceObject.Load(8, &rc.tlpHighRxt) stateSourceObject.LoadValue(6, new(unixTime), func(y interface{}) { rc.loadXmitTime(y.(unixTime)) }) stateSourceObject.AfterLoad(rc.afterLoad) } |