summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMarek Majkowski <marek@cloudflare.com>2020-01-08 12:56:39 +0000
committerMarek Majkowski <marek@cloudflare.com>2020-01-08 12:56:39 +0000
commitc276e4740f225068dc394a500dfdddd64af1d18a (patch)
tree9947882f18ab1187df8646ba0b49b862b1996c97
parent08a97a6d193f46cd547fadae3bb4125cc788543b (diff)
Fix #1522 - implement silly window sydrome protection on rx side
Before, each of small read()'s that raises window either from zero or above threshold of aMSS, would generate an ACK. In a classic silly-window-syndrome scenario, we can imagine a pessimistic case when small read()'s generate a stream of ACKs. This PR fixes that, essentially treating window size < aMSS as zero. We send ACK exactly in a moment when window increases to >= aMSS or half of receive buffer size (whichever smaller).
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go73
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go14
2 files changed, 51 insertions, 36 deletions
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 5d42f8045..8ff125855 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -956,20 +956,11 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) {
e.rcvBufUsed -= len(v)
- avail := e.receiveBufferAvailableLocked()
-
- // If the window was small before, lower than MSS, send immediate
- // ack. Without this, the sender might be stuck waiting for the
- // window to grow, while we think the window is non-zero so we
- // don't need to send acks. To avoid silly window syndrome, send
- // ack only when the window grows above one MSS.
- crossedMSS := avail-len(v) < int(e.amss) && avail >= int(e.amss)
-
- // If the window was zero before this read and if the read freed up
- // enough buffer space for the scaled window to be non-zero then notify
- // the protocol goroutine to send a window update.
- if (e.zeroWindow && !e.zeroReceiveWindow(e.rcv.rcvWndScale)) || crossedMSS {
- e.zeroWindow = false
+ // If the window was small before this read and if the read
+ // freed up enough buffer space, to either fit an aMSS or half
+ // a receive buffer (whichever smaller), then notify the
+ // protocol goroutine to send a window update.
+ if e.windowCrossedACKThreshold(len(v)) == 1 {
e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow)
}
@@ -1143,13 +1134,35 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro
return num, tcpip.ControlMessages{}, nil
}
-// zeroReceiveWindow checks if the receive window to be announced now would be
-// zero, based on the amount of available buffer and the receive window scaling.
+// windowCrossedACKThreshold checks if the receive window to be announced now would be
+// under aMSS or under half receive buffer, whichever smaller. This is useful as
+// a receive side silly window syndrome prevention mechanism. If window grows
+// to reasonable value, we should send ACK to the sender to inform the rx space is now
+// large. We also want ensure a series of small read()'s won't trigger a flood of
+// spurious tiny ACK's.
//
-// It must be called with rcvListMu held.
-func (e *endpoint) zeroReceiveWindow(scale uint8) bool {
- avail := e.receiveBufferAvailableLocked()
- return (avail >> scale) == 0
+// For large receive buffers, the threshold is aMSS - once reader reads more than aMSS
+// we'll send ACK. For tiny receive buffers, the threshold is half of receive buffer size.
+// This is chosen arbitrairly.
+func (e *endpoint) windowCrossedACKThreshold(deltaBefore int) int {
+ newAvail := e.receiveBufferAvailableLocked()
+ oldAvail := newAvail - deltaBefore
+ if oldAvail < 0 {
+ oldAvail = 0
+ }
+
+ threshold := int(e.amss)
+ if threshold > e.rcvBufSize/2 {
+ threshold = e.rcvBufSize / 2
+ }
+
+ switch {
+ case oldAvail < threshold && newAvail >= threshold:
+ return 1
+ case oldAvail >= threshold && newAvail < threshold:
+ return -1
+ }
+ return 0
}
// SetSockOptInt sets a socket option.
@@ -1194,11 +1207,11 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error {
e.rcvAutoParams.disabled = true
- // Immediatelly send ACK in two cases: when the buffer
- // grows so that it leaves zero-window state, when the
- // buffer grows from small < MSS to >= MSS.
- if (e.zeroWindow && !e.zeroReceiveWindow(scale)) || (availBefore < int(e.amss) && availAfter >= int(e.amss)) {
- e.zeroWindow = false
+ // Immediatelly send an ACK to uncork the sender silly
+ // window syndrome prevetion, when our available space
+ // grows above aMSS or half receive buffer, whichever
+ // smaller.
+ if e.windowCrossedACKThreshold(availAfter-availBefore) == 1 {
mask |= notifyNonZeroReceiveWindow
}
e.rcvListMu.Unlock()
@@ -2239,13 +2252,11 @@ func (e *endpoint) readyToRead(s *segment) {
if s != nil {
s.incRef()
e.rcvBufUsed += s.data.Size()
- // Check if the receive window is now closed. If so make sure
- // we set the zero window before we deliver the segment to ensure
- // that a subsequent read of the segment will correctly trigger
- // a non-zero notification.
- if e.zeroReceiveWindow(e.rcv.rcvWndScale) {
+ // Increase counter if the receive window falls down
+ // below MSS or half receive buffer size, whichever
+ // smaller.
+ if e.windowCrossedACKThreshold(-s.data.Size()) == -1 {
e.stats.ReceiveErrors.ZeroRcvWindowState.Increment()
- e.zeroWindow = true
}
e.rcvList.PushBack(s)
} else {
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index a05365c6a..4c2e458e3 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -2091,10 +2091,14 @@ func TestZeroScaledWindowReceive(t *testing.T) {
)
}
- // Read some data. An ack should be sent in response to that.
- v, _, err := c.EP.Read(nil)
- if err != nil {
- t.Fatalf("Read failed: %v", err)
+ // Read at least 1MSS of data. An ack should be sent in response to that.
+ sz := 0
+ for sz < defaultMTU {
+ v, _, err := c.EP.Read(nil)
+ if err != nil {
+ t.Fatalf("Read failed: %v", err)
+ }
+ sz += len(v)
}
checker.IPv4(t, c.GetPacket(),
@@ -2103,7 +2107,7 @@ func TestZeroScaledWindowReceive(t *testing.T) {
checker.DstPort(context.TestPort),
checker.SeqNum(uint32(c.IRS)+1),
checker.AckNum(uint32(790+sent)),
- checker.Window(uint16(len(v)>>ws)),
+ checker.Window(uint16(sz>>ws)),
checker.TCPFlags(header.TCPFlagAck),
),
)