summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/tcpip/stack/stack.go13
-rw-r--r--pkg/tcpip/stack/stack_options.go25
-rw-r--r--pkg/tcpip/transport/tcp/connect.go7
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go20
-rw-r--r--pkg/tcpip/transport/tcp/endpoint_state.go10
-rw-r--r--pkg/tcpip/transport/tcp/rcv.go4
-rw-r--r--pkg/tcpip/transport/tcp/snd.go10
-rw-r--r--pkg/tcpip/transport/tcp/tcp_state_autogen.go4
8 files changed, 88 insertions, 5 deletions
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index 57ad412a1..a51d758d0 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -458,6 +458,18 @@ type Stack struct {
// receiveBufferSize holds the min/default/max receive buffer sizes for
// endpoints other than TCP.
receiveBufferSize ReceiveBufferSizeOption
+
+ // tcpInvalidRateLimit is the maximal rate for sending duplicate
+ // acknowledgements in response to incoming TCP packets that are for an existing
+ // connection but that are invalid due to any of the following reasons:
+ //
+ // a) out-of-window sequence number.
+ // b) out-of-window acknowledgement number.
+ // c) PAWS check failure (when implemented).
+ //
+ // This is required to prevent potential ACK loops.
+ // Setting this to 0 will disable all rate limiting.
+ tcpInvalidRateLimit time.Duration
}
// UniqueID is an abstract generator of unique identifiers.
@@ -668,6 +680,7 @@ func New(opts Options) *Stack {
Default: DefaultBufferSize,
Max: DefaultMaxBufferSize,
},
+ tcpInvalidRateLimit: defaultTCPInvalidRateLimit,
}
// Add specified network protocols.
diff --git a/pkg/tcpip/stack/stack_options.go b/pkg/tcpip/stack/stack_options.go
index 8d9b20b7e..3066f4ffd 100644
--- a/pkg/tcpip/stack/stack_options.go
+++ b/pkg/tcpip/stack/stack_options.go
@@ -15,6 +15,8 @@
package stack
import (
+ "time"
+
"gvisor.dev/gvisor/pkg/tcpip"
)
@@ -29,6 +31,10 @@ const (
// DefaultMaxBufferSize is the default maximum permitted size of a
// send/receive buffer.
DefaultMaxBufferSize = 4 << 20 // 4 MiB
+
+ // defaultTCPInvalidRateLimit is the default value for
+ // stack.TCPInvalidRateLimit.
+ defaultTCPInvalidRateLimit = 500 * time.Millisecond
)
// ReceiveBufferSizeOption is used by stack.(Stack*).Option/SetOption to
@@ -39,6 +45,10 @@ type ReceiveBufferSizeOption struct {
Max int
}
+// TCPInvalidRateLimitOption is used by stack.(Stack*).Option/SetOption to get/set
+// stack.tcpInvalidRateLimit.
+type TCPInvalidRateLimitOption time.Duration
+
// SetOption allows setting stack wide options.
func (s *Stack) SetOption(option interface{}) tcpip.Error {
switch v := option.(type) {
@@ -74,6 +84,15 @@ func (s *Stack) SetOption(option interface{}) tcpip.Error {
s.mu.Unlock()
return nil
+ case TCPInvalidRateLimitOption:
+ if v < 0 {
+ return &tcpip.ErrInvalidOptionValue{}
+ }
+ s.mu.Lock()
+ s.tcpInvalidRateLimit = time.Duration(v)
+ s.mu.Unlock()
+ return nil
+
default:
return &tcpip.ErrUnknownProtocolOption{}
}
@@ -94,6 +113,12 @@ func (s *Stack) Option(option interface{}) tcpip.Error {
s.mu.RUnlock()
return nil
+ case *TCPInvalidRateLimitOption:
+ s.mu.RLock()
+ *v = TCPInvalidRateLimitOption(s.tcpInvalidRateLimit)
+ s.mu.RUnlock()
+ return nil
+
default:
return &tcpip.ErrUnknownProtocolOption{}
}
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 4695b66d6..34a631b53 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -333,7 +333,9 @@ func (h *handshake) synRcvdState(s *segment) tcpip.Error {
// number and "After sending the acknowledgment, drop the unacceptable
// segment and return."
if !s.sequenceNumber.InWindow(h.ackNum, h.rcvWnd) {
- h.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck, h.iss+1, h.ackNum, h.rcvWnd)
+ if h.ep.allowOutOfWindowAck() {
+ h.ep.sendRaw(buffer.VectorisedView{}, header.TCPFlagAck, h.iss+1, h.ackNum, h.rcvWnd)
+ }
return nil
}
@@ -1185,8 +1187,7 @@ func (e *endpoint) handleSegment(s *segment) (cont bool, err tcpip.Error) {
// endpoint MUST terminate its connection. The local TCP endpoint
// should then rely on SYN retransmission from the remote end to
// re-establish the connection.
-
- e.snd.sendAck()
+ e.snd.maybeSendOutOfWindowAck(s)
} else if s.flagIsSet(header.TCPFlagAck) {
// Patch the window size in the segment according to the
// send window scale.
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index e645aa194..4e5a6089f 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -688,6 +688,10 @@ type endpoint struct {
// ops is used to get socket level options.
ops tcpip.SocketOptions
+
+ // lastOutOfWindowAckTime is the time at which the an ACK was sent in response
+ // to an out of window segment being received by this endpoint.
+ lastOutOfWindowAckTime time.Time `state:".(unixTime)"`
}
// UniqueID implements stack.TransportEndpoint.UniqueID.
@@ -3125,3 +3129,19 @@ func GetTCPSendBufferLimits(s tcpip.StackHandler) tcpip.SendBufferSizeOption {
Max: ss.Max,
}
}
+
+// allowOutOfWindowAck returns true if an out-of-window ACK can be sent now.
+func (e *endpoint) allowOutOfWindowAck() bool {
+ var limit stack.TCPInvalidRateLimitOption
+ if err := e.stack.Option(&limit); err != nil {
+ panic(fmt.Sprintf("e.stack.Option(%+v) failed with error: %s", limit, err))
+ }
+
+ now := time.Now()
+ if now.Sub(e.lastOutOfWindowAckTime) < time.Duration(limit) {
+ return false
+ }
+
+ e.lastOutOfWindowAckTime = now
+ return true
+}
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index c21dbc682..e4368026f 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -308,6 +308,16 @@ func (e *endpoint) loadRecentTSTime(unix unixTime) {
e.recentTSTime = time.Unix(unix.second, unix.nano)
}
+// saveLastOutOfWindowAckTime is invoked by stateify.
+func (e *endpoint) saveLastOutOfWindowAckTime() unixTime {
+ return unixTime{e.lastOutOfWindowAckTime.Unix(), e.lastOutOfWindowAckTime.UnixNano()}
+}
+
+// loadLastOutOfWindowAckTime is invoked by stateify.
+func (e *endpoint) loadLastOutOfWindowAckTime(unix unixTime) {
+ e.lastOutOfWindowAckTime = time.Unix(unix.second, unix.nano)
+}
+
// saveMeasureTime is invoked by stateify.
func (r *rcvBufAutoTuneParams) saveMeasureTime() unixTime {
return unixTime{r.measureTime.Unix(), r.measureTime.UnixNano()}
diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index 7a7c402c4..a5c82b8fa 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -385,7 +385,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo
// fails, we ignore the packet:
// https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L5591
if r.ep.snd.sndNxt.LessThan(s.ackNumber) {
- r.ep.snd.sendAck()
+ r.ep.snd.maybeSendOutOfWindowAck(s)
return true, nil
}
@@ -454,7 +454,7 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err tcpip.Error) {
// send an ACK and stop further processing of the segment.
// This is according to RFC 793, page 68.
if !r.acceptable(segSeq, segLen) {
- r.ep.snd.sendAck()
+ r.ep.snd.maybeSendOutOfWindowAck(s)
return true, nil
}
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index dfc8fd248..538edd6cf 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -1548,3 +1548,13 @@ func (s *sender) sendSegmentFromView(data buffer.VectorisedView, flags byte, seq
return s.ep.sendRaw(data, flags, seq, rcvNxt, rcvWnd)
}
+
+// maybeSendOutOfWindowAck sends an ACK if we are not being rate limited
+// currently.
+func (s *sender) maybeSendOutOfWindowAck(seg *segment) {
+ // Data packets are unlikely to be part of an ACK loop. So always send
+ // an ACK for a packet w/ data.
+ if seg.payloadSize() > 0 || s.ep.allowOutOfWindowAck() {
+ s.sendAck()
+ }
+}
diff --git a/pkg/tcpip/transport/tcp/tcp_state_autogen.go b/pkg/tcpip/transport/tcp/tcp_state_autogen.go
index cb49e63fe..f19781bfe 100644
--- a/pkg/tcpip/transport/tcp/tcp_state_autogen.go
+++ b/pkg/tcpip/transport/tcp/tcp_state_autogen.go
@@ -217,6 +217,7 @@ func (e *endpoint) StateFields() []string {
"txHash",
"owner",
"ops",
+ "lastOutOfWindowAckTime",
}
}
@@ -228,6 +229,8 @@ func (e *endpoint) StateSave(stateSinkObject state.Sink) {
stateSinkObject.SaveValue(26, recentTSTimeValue)
var acceptedChanValue []*endpoint = e.saveAcceptedChan()
stateSinkObject.SaveValue(49, acceptedChanValue)
+ var lastOutOfWindowAckTimeValue unixTime = e.saveLastOutOfWindowAckTime()
+ stateSinkObject.SaveValue(61, lastOutOfWindowAckTimeValue)
stateSinkObject.Save(0, &e.EndpointInfo)
stateSinkObject.Save(1, &e.DefaultSocketOptionsHandler)
stateSinkObject.Save(2, &e.waiterQueue)
@@ -350,6 +353,7 @@ func (e *endpoint) StateLoad(stateSourceObject state.Source) {
stateSourceObject.LoadValue(13, new(EndpointState), func(y interface{}) { e.loadState(y.(EndpointState)) })
stateSourceObject.LoadValue(26, new(unixTime), func(y interface{}) { e.loadRecentTSTime(y.(unixTime)) })
stateSourceObject.LoadValue(49, new([]*endpoint), func(y interface{}) { e.loadAcceptedChan(y.([]*endpoint)) })
+ stateSourceObject.LoadValue(61, new(unixTime), func(y interface{}) { e.loadLastOutOfWindowAckTime(y.(unixTime)) })
stateSourceObject.AfterLoad(e.afterLoad)
}