summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/tcpip/transport/tcp/BUILD2
-rw-r--r--pkg/tcpip/transport/tcp/reno_recovery.go67
-rw-r--r--pkg/tcpip/transport/tcp/sack_recovery.go120
-rw-r--r--pkg/tcpip/transport/tcp/snd.go283
4 files changed, 187 insertions, 285 deletions
diff --git a/pkg/tcpip/transport/tcp/BUILD b/pkg/tcpip/transport/tcp/BUILD
index 3d8174a4f..518449602 100644
--- a/pkg/tcpip/transport/tcp/BUILD
+++ b/pkg/tcpip/transport/tcp/BUILD
@@ -45,9 +45,7 @@ go_library(
"rcv.go",
"rcv_state.go",
"reno.go",
- "reno_recovery.go",
"sack.go",
- "sack_recovery.go",
"sack_scoreboard.go",
"segment.go",
"segment_heap.go",
diff --git a/pkg/tcpip/transport/tcp/reno_recovery.go b/pkg/tcpip/transport/tcp/reno_recovery.go
deleted file mode 100644
index 2aa708e97..000000000
--- a/pkg/tcpip/transport/tcp/reno_recovery.go
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tcp
-
-// renoRecovery stores the variables related to TCP Reno loss recovery
-// algorithm.
-//
-// +stateify savable
-type renoRecovery struct {
- s *sender
-}
-
-func newRenoRecovery(s *sender) *renoRecovery {
- return &renoRecovery{s: s}
-}
-
-func (rr *renoRecovery) DoRecovery(rcvdSeg *segment, fastRetransmit bool) {
- ack := rcvdSeg.ackNumber
- snd := rr.s
-
- // We are in fast recovery mode. Ignore the ack if it's out of range.
- if !ack.InRange(snd.sndUna, snd.sndNxt+1) {
- return
- }
-
- // Don't count this as a duplicate if it is carrying data or
- // updating the window.
- if rcvdSeg.logicalLen() != 0 || snd.sndWnd != rcvdSeg.window {
- return
- }
-
- // Inflate the congestion window if we're getting duplicate acks
- // for the packet we retransmitted.
- if !fastRetransmit && ack == snd.fr.first {
- // We received a dup, inflate the congestion window by 1 packet
- // if we're not at the max yet. Only inflate the window if
- // regular FastRecovery is in use, RFC6675 does not require
- // inflating cwnd on duplicate ACKs.
- if snd.sndCwnd < snd.fr.maxCwnd {
- snd.sndCwnd++
- }
- return
- }
-
- // A partial ack was received. Retransmit this packet and remember it
- // so that we don't retransmit it again.
- //
- // We don't inflate the window because we're putting the same packet
- // back onto the wire.
- //
- // N.B. The retransmit timer will be reset by the caller.
- snd.fr.first = ack
- snd.dupAckCount = 0
- snd.resendSegment()
-}
diff --git a/pkg/tcpip/transport/tcp/sack_recovery.go b/pkg/tcpip/transport/tcp/sack_recovery.go
deleted file mode 100644
index 7e813fa96..000000000
--- a/pkg/tcpip/transport/tcp/sack_recovery.go
+++ /dev/null
@@ -1,120 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tcp
-
-import "gvisor.dev/gvisor/pkg/tcpip/seqnum"
-
-// sackRecovery stores the variables related to TCP SACK loss recovery
-// algorithm.
-//
-// +stateify savable
-type sackRecovery struct {
- s *sender
-}
-
-func newSACKRecovery(s *sender) *sackRecovery {
- return &sackRecovery{s: s}
-}
-
-// handleSACKRecovery implements the loss recovery phase as described in RFC6675
-// section 5, step C.
-func (sr *sackRecovery) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) {
- snd := sr.s
- snd.SetPipe()
-
- if smss := int(snd.ep.scoreboard.SMSS()); limit > smss {
- // Cap segment size limit to s.smss as SACK recovery requires
- // that all retransmissions or new segments send during recovery
- // be of <= SMSS.
- limit = smss
- }
-
- nextSegHint := snd.writeList.Front()
- for snd.outstanding < snd.sndCwnd {
- var nextSeg *segment
- var rescueRtx bool
- nextSeg, nextSegHint, rescueRtx = snd.NextSeg(nextSegHint)
- if nextSeg == nil {
- return dataSent
- }
- if !snd.isAssignedSequenceNumber(nextSeg) || snd.sndNxt.LessThanEq(nextSeg.sequenceNumber) {
- // New data being sent.
-
- // Step C.3 described below is handled by
- // maybeSendSegment which increments sndNxt when
- // a segment is transmitted.
- //
- // Step C.3 "If any of the data octets sent in
- // (C.1) are above HighData, HighData must be
- // updated to reflect the transmission of
- // previously unsent data."
- //
- // We pass s.smss as the limit as the Step 2) requires that
- // new data sent should be of size s.smss or less.
- if sent := snd.maybeSendSegment(nextSeg, limit, end); !sent {
- return dataSent
- }
- dataSent = true
- snd.outstanding++
- snd.writeNext = nextSeg.Next()
- continue
- }
-
- // Now handle the retransmission case where we matched either step 1,3 or 4
- // of the NextSeg algorithm.
- // RFC 6675, Step C.4.
- //
- // "The estimate of the amount of data outstanding in the network
- // must be updated by incrementing pipe by the number of octets
- // transmitted in (C.1)."
- snd.outstanding++
- dataSent = true
- snd.sendSegment(nextSeg)
-
- segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen())
- if rescueRtx {
- // We do the last part of rule (4) of NextSeg here to update
- // RescueRxt as until this point we don't know if we are going
- // to use the rescue transmission.
- snd.fr.rescueRxt = snd.fr.last
- } else {
- // RFC 6675, Step C.2
- //
- // "If any of the data octets sent in (C.1) are below
- // HighData, HighRxt MUST be set to the highest sequence
- // number of the retransmitted segment unless NextSeg ()
- // rule (4) was invoked for this retransmission."
- snd.fr.highRxt = segEnd - 1
- }
- }
- return dataSent
-}
-
-func (sr *sackRecovery) DoRecovery(rcvdSeg *segment, fastRetransmit bool) {
- snd := sr.s
- if fastRetransmit {
- snd.resendSegment()
- }
-
- // We are in fast recovery mode. Ignore the ack if it's out of range.
- if ack := rcvdSeg.ackNumber; !ack.InRange(snd.sndUna, snd.sndNxt+1) {
- return
- }
-
- // RFC 6675 recovery algorithm step C 1-5.
- end := snd.sndUna.Add(snd.sndWnd)
- dataSent := sr.handleSACKRecovery(snd.maxPayloadSize, end)
- snd.postXmit(dataSent)
-}
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 201cf9aa9..0e0fdf14c 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -92,17 +92,6 @@ type congestionControl interface {
PostRecovery()
}
-// lossRecovery is an interface that must be implemented by any supported
-// loss recovery algorithm.
-type lossRecovery interface {
- // DoRecovery is invoked when loss is detected and segments need
- // to be retransmitted. The cumulative or selective ACK is passed along
- // with the flag which identifies whether the connection entered fast
- // retransmit with this ACK and to retransmit the first unacknowledged
- // segment.
- DoRecovery(rcvdSeg *segment, fastRetransmit bool)
-}
-
// sender holds the state necessary to send TCP segments.
//
// +stateify savable
@@ -119,9 +108,6 @@ type sender struct {
// fr holds state related to fast recovery.
fr fastRecovery
- // lr is the loss recovery algorithm used by the sender.
- lr lossRecovery
-
// sndCwnd is the congestion window, in packets.
sndCwnd int
@@ -290,8 +276,6 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint
s.cc = s.initCongestionControl(ep.cc)
- s.lr = s.initLossRecovery()
-
// A negative sndWndScale means that no scaling is in use, otherwise we
// store the scaling value.
if sndWndScale > 0 {
@@ -346,14 +330,6 @@ func (s *sender) initCongestionControl(congestionControlName tcpip.CongestionCon
}
}
-// initLossRecovery initiates the loss recovery algorithm for the sender.
-func (s *sender) initLossRecovery() lossRecovery {
- if s.ep.sackPermitted {
- return newSACKRecovery(s)
- }
- return newRenoRecovery(s)
-}
-
// updateMaxPayloadSize updates the maximum payload size based on the given
// MTU. If this is in response to "packet too big" control packets (indicated
// by the count argument), it also reduces the number of outstanding packets and
@@ -574,7 +550,7 @@ func (s *sender) retransmitTimerExpired() bool {
// We were attempting fast recovery but were not successful.
// Leave the state. We don't need to update ssthresh because it
// has already been updated when entered fast-recovery.
- s.leaveRecovery()
+ s.leaveFastRecovery()
}
s.state = RTORecovery
@@ -937,6 +913,79 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
return true
}
+// handleSACKRecovery implements the loss recovery phase as described in RFC6675
+// section 5, step C.
+func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) {
+ s.SetPipe()
+
+ if smss := int(s.ep.scoreboard.SMSS()); limit > smss {
+ // Cap segment size limit to s.smss as SACK recovery requires
+ // that all retransmissions or new segments send during recovery
+ // be of <= SMSS.
+ limit = smss
+ }
+
+ nextSegHint := s.writeList.Front()
+ for s.outstanding < s.sndCwnd {
+ var nextSeg *segment
+ var rescueRtx bool
+ nextSeg, nextSegHint, rescueRtx = s.NextSeg(nextSegHint)
+ if nextSeg == nil {
+ return dataSent
+ }
+ if !s.isAssignedSequenceNumber(nextSeg) || s.sndNxt.LessThanEq(nextSeg.sequenceNumber) {
+ // New data being sent.
+
+ // Step C.3 described below is handled by
+ // maybeSendSegment which increments sndNxt when
+ // a segment is transmitted.
+ //
+ // Step C.3 "If any of the data octets sent in
+ // (C.1) are above HighData, HighData must be
+ // updated to reflect the transmission of
+ // previously unsent data."
+ //
+ // We pass s.smss as the limit as the Step 2) requires that
+ // new data sent should be of size s.smss or less.
+ if sent := s.maybeSendSegment(nextSeg, limit, end); !sent {
+ return dataSent
+ }
+ dataSent = true
+ s.outstanding++
+ s.writeNext = nextSeg.Next()
+ continue
+ }
+
+ // Now handle the retransmission case where we matched either step 1,3 or 4
+ // of the NextSeg algorithm.
+ // RFC 6675, Step C.4.
+ //
+ // "The estimate of the amount of data outstanding in the network
+ // must be updated by incrementing pipe by the number of octets
+ // transmitted in (C.1)."
+ s.outstanding++
+ dataSent = true
+ s.sendSegment(nextSeg)
+
+ segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen())
+ if rescueRtx {
+ // We do the last part of rule (4) of NextSeg here to update
+ // RescueRxt as until this point we don't know if we are going
+ // to use the rescue transmission.
+ s.fr.rescueRxt = s.fr.last
+ } else {
+ // RFC 6675, Step C.2
+ //
+ // "If any of the data octets sent in (C.1) are below
+ // HighData, HighRxt MUST be set to the highest sequence
+ // number of the retransmitted segment unless NextSeg ()
+ // rule (4) was invoked for this retransmission."
+ s.fr.highRxt = segEnd - 1
+ }
+ }
+ return dataSent
+}
+
func (s *sender) sendZeroWindowProbe() {
ack, win := s.ep.rcv.getSendParams()
s.unackZeroWindowProbes++
@@ -965,30 +1014,6 @@ func (s *sender) disableZeroWindowProbing() {
s.resendTimer.disable()
}
-func (s *sender) postXmit(dataSent bool) {
- if dataSent {
- // We sent data, so we should stop the keepalive timer to ensure
- // that no keepalives are sent while there is pending data.
- s.ep.disableKeepaliveTimer()
- }
-
- // If the sender has advertized zero receive window and we have
- // data to be sent out, start zero window probing to query the
- // the remote for it's receive window size.
- if s.writeNext != nil && s.sndWnd == 0 {
- s.enableZeroWindowProbing()
- }
-
- // Enable the timer if we have pending data and it's not enabled yet.
- if !s.resendTimer.enabled() && s.sndUna != s.sndNxt {
- s.resendTimer.enable(s.rto)
- }
- // If we have no more pending data, start the keepalive timer.
- if s.sndUna == s.sndNxt {
- s.ep.resetKeepaliveTimer(false)
- }
-}
-
// sendData sends new data segments. It is called when data becomes available or
// when the send window opens up.
func (s *sender) sendData() {
@@ -1009,29 +1034,55 @@ func (s *sender) sendData() {
}
var dataSent bool
- for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.Next() {
- cwndLimit := (s.sndCwnd - s.outstanding) * s.maxPayloadSize
- if cwndLimit < limit {
- limit = cwndLimit
- }
- if s.isAssignedSequenceNumber(seg) && s.ep.sackPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) {
- // Move writeNext along so that we don't try and scan data that
- // has already been SACKED.
+
+ // RFC 6675 recovery algorithm step C 1-5.
+ if s.fr.active && s.ep.sackPermitted {
+ dataSent = s.handleSACKRecovery(s.maxPayloadSize, end)
+ } else {
+ for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.Next() {
+ cwndLimit := (s.sndCwnd - s.outstanding) * s.maxPayloadSize
+ if cwndLimit < limit {
+ limit = cwndLimit
+ }
+ if s.isAssignedSequenceNumber(seg) && s.ep.sackPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) {
+ // Move writeNext along so that we don't try and scan data that
+ // has already been SACKED.
+ s.writeNext = seg.Next()
+ continue
+ }
+ if sent := s.maybeSendSegment(seg, limit, end); !sent {
+ break
+ }
+ dataSent = true
+ s.outstanding += s.pCount(seg)
s.writeNext = seg.Next()
- continue
}
- if sent := s.maybeSendSegment(seg, limit, end); !sent {
- break
- }
- dataSent = true
- s.outstanding += s.pCount(seg)
- s.writeNext = seg.Next()
}
- s.postXmit(dataSent)
+ if dataSent {
+ // We sent data, so we should stop the keepalive timer to ensure
+ // that no keepalives are sent while there is pending data.
+ s.ep.disableKeepaliveTimer()
+ }
+
+ // If the sender has advertized zero receive window and we have
+ // data to be sent out, start zero window probing to query the
+ // the remote for it's receive window size.
+ if s.writeNext != nil && s.sndWnd == 0 {
+ s.enableZeroWindowProbing()
+ }
+
+ // Enable the timer if we have pending data and it's not enabled yet.
+ if !s.resendTimer.enabled() && s.sndUna != s.sndNxt {
+ s.resendTimer.enable(s.rto)
+ }
+ // If we have no more pending data, start the keepalive timer.
+ if s.sndUna == s.sndNxt {
+ s.ep.resetKeepaliveTimer(false)
+ }
}
-func (s *sender) enterRecovery() {
+func (s *sender) enterFastRecovery() {
s.fr.active = true
// Save state to reflect we're now in fast recovery.
//
@@ -1044,7 +1095,6 @@ func (s *sender) enterRecovery() {
s.fr.maxCwnd = s.sndCwnd + s.outstanding
s.fr.highRxt = s.sndUna
s.fr.rescueRxt = s.sndUna
-
if s.ep.sackPermitted {
s.state = SACKRecovery
s.ep.stack.Stats().TCP.SACKRecovery.Increment()
@@ -1054,7 +1104,7 @@ func (s *sender) enterRecovery() {
s.ep.stack.Stats().TCP.FastRecovery.Increment()
}
-func (s *sender) leaveRecovery() {
+func (s *sender) leaveFastRecovery() {
s.fr.active = false
s.fr.maxCwnd = 0
s.dupAckCount = 0
@@ -1065,6 +1115,57 @@ func (s *sender) leaveRecovery() {
s.cc.PostRecovery()
}
+func (s *sender) handleFastRecovery(seg *segment) (rtx bool) {
+ ack := seg.ackNumber
+ // We are in fast recovery mode. Ignore the ack if it's out of
+ // range.
+ if !ack.InRange(s.sndUna, s.sndNxt+1) {
+ return false
+ }
+
+ // Leave fast recovery if it acknowledges all the data covered by
+ // this fast recovery session.
+ if s.fr.last.LessThan(ack) {
+ s.leaveFastRecovery()
+ return false
+ }
+
+ if s.ep.sackPermitted {
+ // When SACK is enabled we let retransmission be governed by
+ // the SACK logic.
+ return false
+ }
+
+ // Don't count this as a duplicate if it is carrying data or
+ // updating the window.
+ if seg.logicalLen() != 0 || s.sndWnd != seg.window {
+ return false
+ }
+
+ // Inflate the congestion window if we're getting duplicate acks
+ // for the packet we retransmitted.
+ if ack == s.fr.first {
+ // We received a dup, inflate the congestion window by 1 packet
+ // if we're not at the max yet. Only inflate the window if
+ // regular FastRecovery is in use, RFC6675 does not require
+ // inflating cwnd on duplicate ACKs.
+ if s.sndCwnd < s.fr.maxCwnd {
+ s.sndCwnd++
+ }
+ return false
+ }
+
+ // A partial ack was received. Retransmit this packet and
+ // remember it so that we don't retransmit it again. We don't
+ // inflate the window because we're putting the same packet back
+ // onto the wire.
+ //
+ // N.B. The retransmit timer will be reset by the caller.
+ s.fr.first = ack
+ s.dupAckCount = 0
+ return true
+}
+
// isAssignedSequenceNumber relies on the fact that we only set flags once a
// sequencenumber is assigned and that is only done right before we send the
// segment. As a result any segment that has a non-zero flag has a valid
@@ -1127,11 +1228,14 @@ func (s *sender) SetPipe() {
s.outstanding = pipe
}
-// detectLoss is called when an ack is received and returns whether a loss is
-// detected. It manages the state related to duplicate acks and determines if
-// a retransmit is needed according to the rules in RFC 6582 (NewReno).
-func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) {
+// checkDuplicateAck is called when an ack is received. It manages the state
+// related to duplicate acks and determines if a retransmit is needed according
+// to the rules in RFC 6582 (NewReno).
+func (s *sender) checkDuplicateAck(seg *segment) (rtx bool) {
ack := seg.ackNumber
+ if s.fr.active {
+ return s.handleFastRecovery(seg)
+ }
// We're not in fast recovery yet. A segment is considered a duplicate
// only if it doesn't carry any data and doesn't update the send window,
@@ -1162,16 +1266,15 @@ func (s *sender) detectLoss(seg *segment) (fastRetransmit bool) {
// See: https://tools.ietf.org/html/rfc6582#section-3.2 Step 2
//
// We only do the check here, the incrementing of last to the highest
- // sequence number transmitted till now is done when enterRecovery
+ // sequence number transmitted till now is done when enterFastRecovery
// is invoked.
if !s.fr.last.LessThan(seg.ackNumber) {
s.dupAckCount = 0
return false
}
s.cc.HandleNDupAcks()
- s.enterRecovery()
+ s.enterFastRecovery()
s.dupAckCount = 0
-
return true
}
@@ -1312,21 +1415,14 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
s.SetPipe()
}
- ack := rcvdSeg.ackNumber
- if s.fr.active {
- // Leave fast recovery if it acknowledges all the data covered by
- // this fast recovery session.
- if s.fr.last.LessThan(ack) {
- s.leaveRecovery()
- }
- }
-
- // Detect loss by counting the duplicates and enter recovery.
- fastRetransmit := s.detectLoss(rcvdSeg)
+ // Count the duplicates and do the fast retransmit if needed.
+ rtx := s.checkDuplicateAck(rcvdSeg)
// Stash away the current window size.
s.sndWnd = rcvdSeg.window
+ ack := rcvdSeg.ackNumber
+
// Disable zero window probing if remote advertizes a non-zero receive
// window. This can be with an ACK to the zero window probe (where the
// acknumber refers to the already acknowledged byte) OR to any previously
@@ -1443,24 +1539,19 @@ func (s *sender) handleRcvdSegment(rcvdSeg *segment) {
s.resendTimer.disable()
}
}
-
// Now that we've popped all acknowledged data from the retransmit
// queue, retransmit if needed.
- if s.fr.active {
- s.lr.DoRecovery(rcvdSeg, fastRetransmit)
- // When SACK is enabled data sending is governed by steps in
- // RFC 6675 Section 5 recovery steps A-C.
- // See: https://tools.ietf.org/html/rfc6675#section-5.
- if s.ep.sackPermitted {
- return
- }
+ if rtx {
+ s.resendSegment()
}
// Send more data now that some of the pending data has been ack'd, or
// that the window opened up, or the congestion window was inflated due
// to a duplicate ack during fast recovery. This will also re-enable
// the retransmit timer if needed.
- s.sendData()
+ if !s.ep.sackPermitted || s.fr.active || s.dupAckCount == 0 || rcvdSeg.hasNewSACKInfo {
+ s.sendData()
+ }
}
// sendSegment sends the specified segment.