diff options
-rw-r--r-- | pkg/tcpip/transport/tcp/endpoint.go | 73 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/tcp_test.go | 14 |
2 files changed, 51 insertions, 36 deletions
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 5d42f8045..8ff125855 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -956,20 +956,11 @@ func (e *endpoint) readLocked() (buffer.View, *tcpip.Error) { e.rcvBufUsed -= len(v) - avail := e.receiveBufferAvailableLocked() - - // If the window was small before, lower than MSS, send immediate - // ack. Without this, the sender might be stuck waiting for the - // window to grow, while we think the window is non-zero so we - // don't need to send acks. To avoid silly window syndrome, send - // ack only when the window grows above one MSS. - crossedMSS := avail-len(v) < int(e.amss) && avail >= int(e.amss) - - // If the window was zero before this read and if the read freed up - // enough buffer space for the scaled window to be non-zero then notify - // the protocol goroutine to send a window update. - if (e.zeroWindow && !e.zeroReceiveWindow(e.rcv.rcvWndScale)) || crossedMSS { - e.zeroWindow = false + // If the window was small before this read and if the read + // freed up enough buffer space, to either fit an aMSS or half + // a receive buffer (whichever smaller), then notify the + // protocol goroutine to send a window update. + if e.windowCrossedACKThreshold(len(v)) == 1 { e.notifyProtocolGoroutine(notifyNonZeroReceiveWindow) } @@ -1143,13 +1134,35 @@ func (e *endpoint) Peek(vec [][]byte) (int64, tcpip.ControlMessages, *tcpip.Erro return num, tcpip.ControlMessages{}, nil } -// zeroReceiveWindow checks if the receive window to be announced now would be -// zero, based on the amount of available buffer and the receive window scaling. +// windowCrossedACKThreshold checks if the receive window to be announced now would be +// under aMSS or under half receive buffer, whichever smaller. This is useful as +// a receive side silly window syndrome prevention mechanism. If window grows +// to reasonable value, we should send ACK to the sender to inform the rx space is now +// large. We also want ensure a series of small read()'s won't trigger a flood of +// spurious tiny ACK's. // -// It must be called with rcvListMu held. -func (e *endpoint) zeroReceiveWindow(scale uint8) bool { - avail := e.receiveBufferAvailableLocked() - return (avail >> scale) == 0 +// For large receive buffers, the threshold is aMSS - once reader reads more than aMSS +// we'll send ACK. For tiny receive buffers, the threshold is half of receive buffer size. +// This is chosen arbitrairly. +func (e *endpoint) windowCrossedACKThreshold(deltaBefore int) int { + newAvail := e.receiveBufferAvailableLocked() + oldAvail := newAvail - deltaBefore + if oldAvail < 0 { + oldAvail = 0 + } + + threshold := int(e.amss) + if threshold > e.rcvBufSize/2 { + threshold = e.rcvBufSize / 2 + } + + switch { + case oldAvail < threshold && newAvail >= threshold: + return 1 + case oldAvail >= threshold && newAvail < threshold: + return -1 + } + return 0 } // SetSockOptInt sets a socket option. @@ -1194,11 +1207,11 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOpt, v int) *tcpip.Error { e.rcvAutoParams.disabled = true - // Immediatelly send ACK in two cases: when the buffer - // grows so that it leaves zero-window state, when the - // buffer grows from small < MSS to >= MSS. - if (e.zeroWindow && !e.zeroReceiveWindow(scale)) || (availBefore < int(e.amss) && availAfter >= int(e.amss)) { - e.zeroWindow = false + // Immediatelly send an ACK to uncork the sender silly + // window syndrome prevetion, when our available space + // grows above aMSS or half receive buffer, whichever + // smaller. + if e.windowCrossedACKThreshold(availAfter-availBefore) == 1 { mask |= notifyNonZeroReceiveWindow } e.rcvListMu.Unlock() @@ -2239,13 +2252,11 @@ func (e *endpoint) readyToRead(s *segment) { if s != nil { s.incRef() e.rcvBufUsed += s.data.Size() - // Check if the receive window is now closed. If so make sure - // we set the zero window before we deliver the segment to ensure - // that a subsequent read of the segment will correctly trigger - // a non-zero notification. - if e.zeroReceiveWindow(e.rcv.rcvWndScale) { + // Increase counter if the receive window falls down + // below MSS or half receive buffer size, whichever + // smaller. + if e.windowCrossedACKThreshold(-s.data.Size()) == -1 { e.stats.ReceiveErrors.ZeroRcvWindowState.Increment() - e.zeroWindow = true } e.rcvList.PushBack(s) } else { diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go index a05365c6a..4c2e458e3 100644 --- a/pkg/tcpip/transport/tcp/tcp_test.go +++ b/pkg/tcpip/transport/tcp/tcp_test.go @@ -2091,10 +2091,14 @@ func TestZeroScaledWindowReceive(t *testing.T) { ) } - // Read some data. An ack should be sent in response to that. - v, _, err := c.EP.Read(nil) - if err != nil { - t.Fatalf("Read failed: %v", err) + // Read at least 1MSS of data. An ack should be sent in response to that. + sz := 0 + for sz < defaultMTU { + v, _, err := c.EP.Read(nil) + if err != nil { + t.Fatalf("Read failed: %v", err) + } + sz += len(v) } checker.IPv4(t, c.GetPacket(), @@ -2103,7 +2107,7 @@ func TestZeroScaledWindowReceive(t *testing.T) { checker.DstPort(context.TestPort), checker.SeqNum(uint32(c.IRS)+1), checker.AckNum(uint32(790+sent)), - checker.Window(uint16(len(v)>>ws)), + checker.Window(uint16(sz>>ws)), checker.TCPFlags(header.TCPFlagAck), ), ) |