summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--benchmarks/tcp/tcp_proxy.go8
-rw-r--r--pkg/sentry/socket/netstack/stack.go12
-rw-r--r--pkg/tcpip/tcpip.go26
-rw-r--r--pkg/tcpip/transport/raw/endpoint.go72
-rw-r--r--pkg/tcpip/transport/tcp/connect.go2
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go16
-rw-r--r--pkg/tcpip/transport/tcp/endpoint_state.go2
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go59
-rw-r--r--pkg/tcpip/transport/tcp/tcp_sack_test.go4
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go26
-rw-r--r--pkg/tcpip/transport/tcp/testing/context/context.go10
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go57
-rw-r--r--pkg/tcpip/transport/udp/protocol.go70
-rw-r--r--runsc/boot/loader.go6
-rw-r--r--test/syscalls/linux/raw_socket_ipv4.cc379
-rw-r--r--test/syscalls/linux/socket_ipv4_udp_unbound.cc216
-rw-r--r--test/syscalls/linux/udp_socket_test_cases.cc115
17 files changed, 984 insertions, 96 deletions
diff --git a/benchmarks/tcp/tcp_proxy.go b/benchmarks/tcp/tcp_proxy.go
index f5aa0b515..b3a4dbea3 100644
--- a/benchmarks/tcp/tcp_proxy.go
+++ b/benchmarks/tcp/tcp_proxy.go
@@ -228,19 +228,19 @@ func newNetstackImpl(mode string) (impl, error) {
})
// Set protocol options.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(*sack)); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %v", err)
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(*sack)); err != nil {
+ return nil, fmt.Errorf("SetTransportProtocolOption for SACKEnabled failed: %s", err)
}
// Enable Receive Buffer Auto-Tuning.
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(*moderateRecvBuf)); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
+ return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err)
}
// Set Congestion Control to cubic if requested.
if *cubic {
if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.CongestionControlOption("cubic")); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption for CongestionControlOption(cubic) failed: %v", err)
+ return nil, fmt.Errorf("SetTransportProtocolOption for CongestionControlOption(cubic) failed: %s", err)
}
}
diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go
index 9b44c2b89..f97f9b6f3 100644
--- a/pkg/sentry/socket/netstack/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -144,7 +144,7 @@ func (s *Stack) AddInterfaceAddr(idx int32, addr inet.InterfaceAddr) error {
// TCPReceiveBufferSize implements inet.Stack.TCPReceiveBufferSize.
func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
- var rs tcp.ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &rs)
return inet.TCPBufferSize{
Min: rs.Min,
@@ -155,7 +155,7 @@ func (s *Stack) TCPReceiveBufferSize() (inet.TCPBufferSize, error) {
// SetTCPReceiveBufferSize implements inet.Stack.SetTCPReceiveBufferSize.
func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error {
- rs := tcp.ReceiveBufferSizeOption{
+ rs := tcpip.StackReceiveBufferSizeOption{
Min: size.Min,
Default: size.Default,
Max: size.Max,
@@ -165,7 +165,7 @@ func (s *Stack) SetTCPReceiveBufferSize(size inet.TCPBufferSize) error {
// TCPSendBufferSize implements inet.Stack.TCPSendBufferSize.
func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) {
- var ss tcp.SendBufferSizeOption
+ var ss tcpip.StackSendBufferSizeOption
err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &ss)
return inet.TCPBufferSize{
Min: ss.Min,
@@ -176,7 +176,7 @@ func (s *Stack) TCPSendBufferSize() (inet.TCPBufferSize, error) {
// SetTCPSendBufferSize implements inet.Stack.SetTCPSendBufferSize.
func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error {
- ss := tcp.SendBufferSizeOption{
+ ss := tcpip.StackSendBufferSizeOption{
Min: size.Min,
Default: size.Default,
Max: size.Max,
@@ -186,14 +186,14 @@ func (s *Stack) SetTCPSendBufferSize(size inet.TCPBufferSize) error {
// TCPSACKEnabled implements inet.Stack.TCPSACKEnabled.
func (s *Stack) TCPSACKEnabled() (bool, error) {
- var sack tcp.SACKEnabled
+ var sack tcpip.StackSACKEnabled
err := s.Stack.TransportProtocolOption(tcp.ProtocolNumber, &sack)
return bool(sack), syserr.TranslateNetstackError(err).ToError()
}
// SetTCPSACKEnabled implements inet.Stack.SetTCPSACKEnabled.
func (s *Stack) SetTCPSACKEnabled(enabled bool) error {
- return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enabled))).ToError()
+ return syserr.TranslateNetstackError(s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(enabled))).ToError()
}
// Statistics implements inet.Stack.Statistics.
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index b7b227328..3ad130b23 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -813,6 +813,32 @@ type OutOfBandInlineOption int
// a default TTL.
type DefaultTTLOption uint8
+// StackSACKEnabled is used by stack.(*Stack).TransportProtocolOption to
+// enable/disable SACK support in TCP. See: https://tools.ietf.org/html/rfc2018.
+type StackSACKEnabled bool
+
+// StackDelayEnabled is used by stack.(Stack*).TransportProtocolOption to
+// enable/disable Nagle's algorithm in TCP.
+type StackDelayEnabled bool
+
+// StackSendBufferSizeOption is used by stack.(Stack*).TransportProtocolOption
+// to get/set the default, min and max send buffer sizes.
+type StackSendBufferSizeOption struct {
+ Min int
+ Default int
+ Max int
+}
+
+// StackReceiveBufferSizeOption is used by
+// stack.(Stack*).TransportProtocolOption to get/set the default, min and max
+// receive buffer sizes.
+type StackReceiveBufferSizeOption struct {
+ Min int
+ Default int
+ Max int
+}
+
+//
// IPPacketInfo is the message struture for IP_PKTINFO.
//
// +stateify savable
diff --git a/pkg/tcpip/transport/raw/endpoint.go b/pkg/tcpip/transport/raw/endpoint.go
index a406d815e..6a7977259 100644
--- a/pkg/tcpip/transport/raw/endpoint.go
+++ b/pkg/tcpip/transport/raw/endpoint.go
@@ -26,6 +26,8 @@
package raw
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
@@ -66,16 +68,17 @@ type endpoint struct {
// protected by rcvMu.
rcvMu sync.Mutex `state:"nosave"`
rcvList rawPacketList
- rcvBufSizeMax int `state:".(int)"`
rcvBufSize int
+ rcvBufSizeMax int `state:".(int)"`
rcvClosed bool
// The following fields are protected by mu.
- mu sync.RWMutex `state:"nosave"`
- sndBufSize int
- closed bool
- connected bool
- bound bool
+ mu sync.RWMutex `state:"nosave"`
+ sndBufSize int
+ sndBufSizeMax int
+ closed bool
+ connected bool
+ bound bool
// route is the route to a remote network endpoint. It is set via
// Connect(), and is valid only when conneted is true.
route stack.Route `state:"manual"`
@@ -103,10 +106,21 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, transProt
},
waiterQueue: waiterQueue,
rcvBufSizeMax: 32 * 1024,
- sndBufSize: 32 * 1024,
+ sndBufSizeMax: 32 * 1024,
associated: associated,
}
+ // Override with stack defaults.
+ var ss tcpip.StackSendBufferSizeOption
+ if err := s.TransportProtocolOption(transProto, &ss); err == nil {
+ e.sndBufSizeMax = ss.Default
+ }
+
+ var rs tcpip.StackReceiveBufferSizeOption
+ if err := s.TransportProtocolOption(transProto, &rs); err == nil {
+ e.rcvBufSizeMax = rs.Default
+ }
+
// Unassociated endpoints are write-only and users call Write() with IP
// headers included. Because they're write-only, We don't need to
// register with the stack.
@@ -523,7 +537,46 @@ func (e *endpoint) SetSockOptBool(opt tcpip.SockOptBool, v bool) *tcpip.Error {
// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
- return tcpip.ErrUnknownProtocolOption
+ switch opt {
+ case tcpip.SendBufferSizeOption:
+ // Make sure the send buffer size is within the min and max
+ // allowed.
+ var ss tcpip.StackSendBufferSizeOption
+ if err := e.stack.TransportProtocolOption(e.TransProto, &ss); err != nil {
+ panic(fmt.Sprintf("s.TransportProtocolOption(%d, %+v) = %s", e.TransProto, ss, err))
+ }
+ if v > ss.Max {
+ v = ss.Max
+ }
+ if v < ss.Min {
+ v = ss.Min
+ }
+ e.mu.Lock()
+ e.sndBufSizeMax = v
+ e.mu.Unlock()
+ return nil
+
+ case tcpip.ReceiveBufferSizeOption:
+ // Make sure the receive buffer size is within the min and max
+ // allowed.
+ var rs tcpip.StackReceiveBufferSizeOption
+ if err := e.stack.TransportProtocolOption(e.TransProto, &rs); err != nil {
+ panic(fmt.Sprintf("s.TransportProtocolOption(%d, %+v) = %s", e.TransProto, rs, err))
+ }
+ if v > rs.Max {
+ v = rs.Max
+ }
+ if v < rs.Min {
+ v = rs.Min
+ }
+ e.rcvMu.Lock()
+ e.rcvBufSizeMax = v
+ e.rcvMu.Unlock()
+ return nil
+
+ default:
+ return tcpip.ErrUnknownProtocolOption
+ }
}
// GetSockOpt implements tcpip.Endpoint.GetSockOpt.
@@ -563,7 +616,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
case tcpip.SendBufferSizeOption:
e.mu.Lock()
- v := e.sndBufSize
+ v := e.sndBufSizeMax
e.mu.Unlock()
return v, nil
@@ -636,7 +689,6 @@ func (e *endpoint) HandlePacket(route *stack.Route, pkt *stack.PacketBuffer) {
e.rcvList.PushBack(packet)
e.rcvBufSize += packet.data.Size()
-
e.rcvMu.Unlock()
e.stats.PacketsReceived.Increment()
// Notify waiters that there's data to be read.
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 9d4dce826..377643b82 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -521,7 +521,7 @@ func (h *handshake) execute() *tcpip.Error {
s.AddWaker(&h.ep.newSegmentWaker, wakerForNewSegment)
defer s.Done()
- var sackEnabled SACKEnabled
+ var sackEnabled tcpip.StackSACKEnabled
if err := h.ep.stack.TransportProtocolOption(ProtocolNumber, &sackEnabled); err != nil {
// If stack returned an error when checking for SACKEnabled
// status then just default to switching off SACK negotiation.
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index f225b00e7..10df2bcd5 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -851,12 +851,12 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
maxSynRetries: DefaultSynRetries,
}
- var ss SendBufferSizeOption
+ var ss tcpip.StackSendBufferSizeOption
if err := s.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
e.sndBufSize = ss.Default
}
- var rs ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
if err := s.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
e.rcvBufSize = rs.Default
}
@@ -871,7 +871,7 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
e.rcvAutoParams.disabled = !bool(mrb)
}
- var de DelayEnabled
+ var de tcpip.StackDelayEnabled
if err := s.TransportProtocolOption(ProtocolNumber, &de); err == nil && de {
e.SetSockOptBool(tcpip.DelayOption, true)
}
@@ -1588,7 +1588,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
case tcpip.ReceiveBufferSizeOption:
// Make sure the receive buffer size is within the min and max
// allowed.
- var rs ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
if v < rs.Min {
v = rs.Min
@@ -1638,7 +1638,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
case tcpip.SendBufferSizeOption:
// Make sure the send buffer size is within the min and max
// allowed.
- var ss SendBufferSizeOption
+ var ss tcpip.StackSendBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
if v < ss.Min {
v = ss.Min
@@ -1678,7 +1678,7 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
return tcpip.ErrInvalidOptionValue
}
}
- var rs ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
if v < rs.Min/2 {
v = rs.Min / 2
@@ -2609,7 +2609,7 @@ func (e *endpoint) receiveBufferSize() int {
}
func (e *endpoint) maxReceiveBufferSize() int {
- var rs ReceiveBufferSizeOption
+ var rs tcpip.StackReceiveBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil {
// As a fallback return the hardcoded max buffer size.
return MaxBufferSize
@@ -2690,7 +2690,7 @@ func timeStampOffset() uint32 {
// if the SYN options indicate that the SACK option was negotiated and the TCP
// stack is configured to enable TCP SACK option.
func (e *endpoint) maybeEnableSACKPermitted(synOpts *header.TCPSynOptions) {
- var v SACKEnabled
+ var v tcpip.StackSACKEnabled
if err := e.stack.TransportProtocolOption(ProtocolNumber, &v); err != nil {
// Stack doesn't support SACK. So just return.
return
diff --git a/pkg/tcpip/transport/tcp/endpoint_state.go b/pkg/tcpip/transport/tcp/endpoint_state.go
index cbb779666..0bebec2d1 100644
--- a/pkg/tcpip/transport/tcp/endpoint_state.go
+++ b/pkg/tcpip/transport/tcp/endpoint_state.go
@@ -186,7 +186,7 @@ func (e *endpoint) Resume(s *stack.Stack) {
epState := e.origEndpointState
switch epState {
case StateInitial, StateBound, StateListen, StateConnecting, StateEstablished:
- var ss SendBufferSizeOption
+ var ss tcpip.StackSendBufferSizeOption
if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
if e.sndBufSize < ss.Min || e.sndBufSize > ss.Max {
panic(fmt.Sprintf("endpoint.sndBufSize %d is outside the min and max allowed [%d, %d]", e.sndBufSize, ss.Min, ss.Max))
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index 73b8a6782..3cff55afa 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -71,29 +71,6 @@ const (
DefaultSynRetries = 6
)
-// SACKEnabled option can be used to enable SACK support in the TCP
-// protocol. See: https://tools.ietf.org/html/rfc2018.
-type SACKEnabled bool
-
-// DelayEnabled option can be used to enable Nagle's algorithm in the TCP protocol.
-type DelayEnabled bool
-
-// SendBufferSizeOption allows the default, min and max send buffer sizes for
-// TCP endpoints to be queried or configured.
-type SendBufferSizeOption struct {
- Min int
- Default int
- Max int
-}
-
-// ReceiveBufferSizeOption allows the default, min and max receive buffer size
-// for TCP endpoints to be queried or configured.
-type ReceiveBufferSizeOption struct {
- Min int
- Default int
- Max int
-}
-
const (
ccReno = "reno"
ccCubic = "cubic"
@@ -160,8 +137,8 @@ type protocol struct {
mu sync.RWMutex
sackEnabled bool
delayEnabled bool
- sendBufferSize SendBufferSizeOption
- recvBufferSize ReceiveBufferSizeOption
+ sendBufferSize tcpip.StackSendBufferSizeOption
+ recvBufferSize tcpip.StackReceiveBufferSizeOption
congestionControl string
availableCongestionControl []string
moderateReceiveBuffer bool
@@ -272,19 +249,19 @@ func replyWithReset(s *segment, tos, ttl uint8) {
// SetOption implements stack.TransportProtocol.SetOption.
func (p *protocol) SetOption(option interface{}) *tcpip.Error {
switch v := option.(type) {
- case SACKEnabled:
+ case tcpip.StackSACKEnabled:
p.mu.Lock()
p.sackEnabled = bool(v)
p.mu.Unlock()
return nil
- case DelayEnabled:
+ case tcpip.StackDelayEnabled:
p.mu.Lock()
p.delayEnabled = bool(v)
p.mu.Unlock()
return nil
- case SendBufferSizeOption:
+ case tcpip.StackSendBufferSizeOption:
if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
return tcpip.ErrInvalidOptionValue
}
@@ -293,7 +270,7 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
p.mu.Unlock()
return nil
- case ReceiveBufferSizeOption:
+ case tcpip.StackReceiveBufferSizeOption:
if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
return tcpip.ErrInvalidOptionValue
}
@@ -386,25 +363,25 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
// Option implements stack.TransportProtocol.Option.
func (p *protocol) Option(option interface{}) *tcpip.Error {
switch v := option.(type) {
- case *SACKEnabled:
+ case *tcpip.StackSACKEnabled:
p.mu.RLock()
- *v = SACKEnabled(p.sackEnabled)
+ *v = tcpip.StackSACKEnabled(p.sackEnabled)
p.mu.RUnlock()
return nil
- case *DelayEnabled:
+ case *tcpip.StackDelayEnabled:
p.mu.RLock()
- *v = DelayEnabled(p.delayEnabled)
+ *v = tcpip.StackDelayEnabled(p.delayEnabled)
p.mu.RUnlock()
return nil
- case *SendBufferSizeOption:
+ case *tcpip.StackSendBufferSizeOption:
p.mu.RLock()
*v = p.sendBufferSize
p.mu.RUnlock()
return nil
- case *ReceiveBufferSizeOption:
+ case *tcpip.StackReceiveBufferSizeOption:
p.mu.RLock()
*v = p.recvBufferSize
p.mu.RUnlock()
@@ -514,8 +491,16 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
// NewProtocol returns a TCP transport protocol.
func NewProtocol() stack.TransportProtocol {
return &protocol{
- sendBufferSize: SendBufferSizeOption{MinBufferSize, DefaultSendBufferSize, MaxBufferSize},
- recvBufferSize: ReceiveBufferSizeOption{MinBufferSize, DefaultReceiveBufferSize, MaxBufferSize},
+ sendBufferSize: tcpip.StackSendBufferSizeOption{
+ Min: MinBufferSize,
+ Default: DefaultSendBufferSize,
+ Max: MaxBufferSize,
+ },
+ recvBufferSize: tcpip.StackReceiveBufferSizeOption{
+ Min: MinBufferSize,
+ Default: DefaultReceiveBufferSize,
+ Max: MaxBufferSize,
+ },
congestionControl: ccReno,
availableCongestionControl: []string{ccReno, ccCubic},
tcpLingerTimeout: DefaultTCPLingerTimeout,
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index fcc165f17..812e503bc 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -46,8 +46,8 @@ func createConnectedWithSACKAndTS(c *context.Context) *context.RawEndpoint {
func setStackSACKPermitted(t *testing.T, c *context.Context, enable bool) {
t.Helper()
- if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(enable)); err != nil {
- t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, SACKEnabled(%v) = %v", enable, err)
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(enable)); err != nil {
+ t.Fatalf("c.s.SetTransportProtocolOption(tcp.ProtocolNumber, StackSACKEnabled(%t) = %s", enable, err)
}
}
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 0668cedc9..aca6a7951 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -3987,7 +3987,10 @@ func TestDefaultBufferSizes(t *testing.T) {
checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
// Change the default send buffer size.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize * 2, tcp.DefaultSendBufferSize * 20}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSendBufferSizeOption{
+ Min: 1,
+ Default: tcp.DefaultSendBufferSize * 2,
+ Max: tcp.DefaultSendBufferSize * 20}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
@@ -4001,8 +4004,11 @@ func TestDefaultBufferSizes(t *testing.T) {
checkRecvBufferSize(t, ep, tcp.DefaultReceiveBufferSize)
// Change the default receive buffer size.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize * 3, tcp.DefaultReceiveBufferSize * 30}); err != nil {
- t.Fatalf("SetTransportProtocolOption failed: %s", err)
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{
+ Min: 1,
+ Default: tcp.DefaultReceiveBufferSize * 3,
+ Max: tcp.DefaultReceiveBufferSize * 30}); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %v", err)
}
ep.Close()
@@ -4029,11 +4035,11 @@ func TestMinMaxBufferSizes(t *testing.T) {
defer ep.Close()
// Change the min/max values for send/receive
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{200, tcp.DefaultReceiveBufferSize * 2, tcp.DefaultReceiveBufferSize * 20}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 200, Default: tcp.DefaultReceiveBufferSize * 2, Max: tcp.DefaultReceiveBufferSize * 20}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{300, tcp.DefaultSendBufferSize * 3, tcp.DefaultSendBufferSize * 30}); err != nil {
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSendBufferSizeOption{Min: 300, Default: tcp.DefaultSendBufferSize * 3, Max: tcp.DefaultSendBufferSize * 30}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
@@ -5672,7 +5678,7 @@ func TestReceiveBufferAutoTuningApplicationLimited(t *testing.T) {
// the segment queue holding unprocessed packets is limited to 500.
const receiveBufferSize = 80 << 10 // 80KB.
const maxReceiveBufferSize = receiveBufferSize * 10
- if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil {
+ if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
@@ -5793,7 +5799,7 @@ func TestReceiveBufferAutoTuning(t *testing.T) {
// the segment queue holding unprocessed packets is limited to 300.
const receiveBufferSize = 80 << 10 // 80KB.
const maxReceiveBufferSize = receiveBufferSize * 10
- if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, receiveBufferSize, maxReceiveBufferSize}); err != nil {
+ if err := stk.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 1, Default: receiveBufferSize, Max: maxReceiveBufferSize}); err != nil {
t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
@@ -5935,7 +5941,7 @@ func TestDelayEnabled(t *testing.T) {
checkDelayOption(t, c, false, false) // Delay is disabled by default.
for _, v := range []struct {
- delayEnabled tcp.DelayEnabled
+ delayEnabled tcpip.StackDelayEnabled
wantDelayOption bool
}{
{delayEnabled: false, wantDelayOption: false},
@@ -5950,10 +5956,10 @@ func TestDelayEnabled(t *testing.T) {
}
}
-func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcp.DelayEnabled, wantDelayOption bool) {
+func checkDelayOption(t *testing.T, c *context.Context, wantDelayEnabled tcpip.StackDelayEnabled, wantDelayOption bool) {
t.Helper()
- var gotDelayEnabled tcp.DelayEnabled
+ var gotDelayEnabled tcpip.StackDelayEnabled
if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &gotDelayEnabled); err != nil {
t.Fatalf("TransportProtocolOption(tcp, &gotDelayEnabled) failed: %s", err)
}
diff --git a/pkg/tcpip/transport/tcp/testing/context/context.go b/pkg/tcpip/transport/tcp/testing/context/context.go
index 9721f6caf..9e262c272 100644
--- a/pkg/tcpip/transport/tcp/testing/context/context.go
+++ b/pkg/tcpip/transport/tcp/testing/context/context.go
@@ -144,12 +144,12 @@ func New(t *testing.T, mtu uint32) *Context {
})
// Allow minimum send/receive buffer sizes to be 1 during tests.
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SendBufferSizeOption{1, tcp.DefaultSendBufferSize, 10 * tcp.DefaultSendBufferSize}); err != nil {
- t.Fatalf("SetTransportProtocolOption failed: %v", err)
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSendBufferSizeOption{Min: 1, Default: tcp.DefaultSendBufferSize, Max: 10 * tcp.DefaultSendBufferSize}); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
- if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.ReceiveBufferSizeOption{1, tcp.DefaultReceiveBufferSize, 10 * tcp.DefaultReceiveBufferSize}); err != nil {
- t.Fatalf("SetTransportProtocolOption failed: %v", err)
+ if err := s.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackReceiveBufferSizeOption{Min: 1, Default: tcp.DefaultReceiveBufferSize, Max: 10 * tcp.DefaultReceiveBufferSize}); err != nil {
+ t.Fatalf("SetTransportProtocolOption failed: %s", err)
}
// Increase minimum RTO in tests to avoid test flakes due to early
@@ -1091,7 +1091,7 @@ func (c *Context) PassiveConnectWithOptions(maxPayload, wndScale int, synOptions
// SACKEnabled returns true if the TCP Protocol option SACKEnabled is set to true
// for the Stack in the context.
func (c *Context) SACKEnabled() bool {
- var v tcp.SACKEnabled
+ var v tcpip.StackSACKEnabled
if err := c.Stack().TransportProtocolOption(tcp.ProtocolNumber, &v); err != nil {
// Stack doesn't support SACK. So just return.
return false
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index df5efbf6a..f51988047 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -15,6 +15,8 @@
package udp
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/sleep"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
@@ -94,6 +96,7 @@ type endpoint struct {
// The following fields are protected by the mu mutex.
mu sync.RWMutex `state:"nosave"`
sndBufSize int
+ sndBufSizeMax int
state EndpointState
route stack.Route `state:"manual"`
dstPort uint16
@@ -159,7 +162,7 @@ type multicastMembership struct {
}
func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) *endpoint {
- return &endpoint{
+ e := &endpoint{
stack: s,
TransportEndpointInfo: stack.TransportEndpointInfo{
NetProto: netProto,
@@ -181,10 +184,23 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
multicastTTL: 1,
multicastLoop: true,
rcvBufSizeMax: 32 * 1024,
- sndBufSize: 32 * 1024,
+ sndBufSizeMax: 32 * 1024,
state: StateInitial,
uniqueID: s.UniqueID(),
}
+
+ // Override with stack defaults.
+ var ss tcpip.StackSendBufferSizeOption
+ if err := s.TransportProtocolOption(ProtocolNumber, &ss); err == nil {
+ e.sndBufSizeMax = ss.Default
+ }
+
+ var rs tcpip.StackReceiveBufferSizeOption
+ if err := s.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
+ e.rcvBufSizeMax = rs.Default
+ }
+
+ return e
}
// UniqueID implements stack.TransportEndpoint.UniqueID.
@@ -611,8 +627,43 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
e.mu.Unlock()
case tcpip.ReceiveBufferSizeOption:
+ // Make sure the receive buffer size is within the min and max
+ // allowed.
+ var rs tcpip.StackReceiveBufferSizeOption
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err != nil {
+ panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %+v) = %s", ProtocolNumber, rs, err))
+ }
+
+ if v < rs.Min {
+ v = rs.Min
+ }
+ if v > rs.Max {
+ v = rs.Max
+ }
+
+ e.mu.Lock()
+ e.rcvBufSizeMax = v
+ e.mu.Unlock()
+ return nil
case tcpip.SendBufferSizeOption:
+ // Make sure the send buffer size is within the min and max
+ // allowed.
+ var ss tcpip.StackSendBufferSizeOption
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &ss); err != nil {
+ panic(fmt.Sprintf("e.stack.TransportProtocolOption(%d, %+v) = %s", ProtocolNumber, ss, err))
+ }
+
+ if v < ss.Min {
+ v = ss.Min
+ }
+ if v > ss.Max {
+ v = ss.Max
+ }
+ e.mu.Lock()
+ e.sndBufSizeMax = v
+ e.mu.Unlock()
+ return nil
}
return nil
@@ -861,7 +912,7 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
case tcpip.SendBufferSizeOption:
e.mu.Lock()
- v := e.sndBufSize
+ v := e.sndBufSizeMax
e.mu.Unlock()
return v, nil
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index 4218e7d03..fc93f93c0 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -21,6 +21,7 @@
package udp
import (
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -32,9 +33,27 @@ import (
const (
// ProtocolNumber is the udp protocol number.
ProtocolNumber = header.UDPProtocolNumber
+
+ // MinBufferSize is the smallest size of a receive or send buffer.
+ MinBufferSize = 4 << 10 // 4KiB bytes.
+
+ // DefaultSendBufferSize is the default size of the send buffer for
+ // an endpoint.
+ DefaultSendBufferSize = 32 << 10 // 32KiB
+
+ // DefaultReceiveBufferSize is the default size of the receive buffer
+ // for an endpoint.
+ DefaultReceiveBufferSize = 32 << 10 // 32KiB
+
+ // MaxBufferSize is the largest size a receive/send buffer can grow to.
+ MaxBufferSize = 4 << 20 // 4MiB
)
-type protocol struct{}
+type protocol struct {
+ mu sync.RWMutex
+ sendBufferSize tcpip.StackSendBufferSizeOption
+ recvBufferSize tcpip.StackReceiveBufferSizeOption
+}
// Number returns the udp protocol number.
func (*protocol) Number() tcpip.TransportProtocolNumber {
@@ -183,13 +202,49 @@ func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.Trans
}
// SetOption implements stack.TransportProtocol.SetOption.
-func (*protocol) SetOption(option interface{}) *tcpip.Error {
- return tcpip.ErrUnknownProtocolOption
+func (p *protocol) SetOption(option interface{}) *tcpip.Error {
+ switch v := option.(type) {
+ case tcpip.StackSendBufferSizeOption:
+ if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
+ return tcpip.ErrInvalidOptionValue
+ }
+ p.mu.Lock()
+ p.sendBufferSize = v
+ p.mu.Unlock()
+ return nil
+
+ case tcpip.StackReceiveBufferSizeOption:
+ if v.Min <= 0 || v.Default < v.Min || v.Default > v.Max {
+ return tcpip.ErrInvalidOptionValue
+ }
+ p.mu.Lock()
+ p.recvBufferSize = v
+ p.mu.Unlock()
+ return nil
+
+ default:
+ return tcpip.ErrUnknownProtocolOption
+ }
}
// Option implements stack.TransportProtocol.Option.
-func (*protocol) Option(option interface{}) *tcpip.Error {
- return tcpip.ErrUnknownProtocolOption
+func (p *protocol) Option(option interface{}) *tcpip.Error {
+ switch v := option.(type) {
+ case *tcpip.StackSendBufferSizeOption:
+ p.mu.RLock()
+ *v = p.sendBufferSize
+ p.mu.RUnlock()
+ return nil
+
+ case *tcpip.StackReceiveBufferSizeOption:
+ p.mu.RLock()
+ *v = p.recvBufferSize
+ p.mu.RUnlock()
+ return nil
+
+ default:
+ return tcpip.ErrUnknownProtocolOption
+ }
}
// Close implements stack.TransportProtocol.Close.
@@ -212,5 +267,8 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
// NewProtocol returns a UDP transport protocol.
func NewProtocol() stack.TransportProtocol {
- return &protocol{}
+ return &protocol{
+ sendBufferSize: tcpip.StackSendBufferSizeOption{Min: MinBufferSize, Default: DefaultSendBufferSize, Max: MaxBufferSize},
+ recvBufferSize: tcpip.StackReceiveBufferSizeOption{Min: MinBufferSize, Default: DefaultReceiveBufferSize, Max: MaxBufferSize},
+ }
}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index b05a8bd45..c6efcdc83 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -1058,8 +1058,8 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
})}
// Enable SACK Recovery.
- if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcp.SACKEnabled(true)); err != nil {
- return nil, fmt.Errorf("failed to enable SACK: %v", err)
+ if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.StackSACKEnabled(true)); err != nil {
+ return nil, fmt.Errorf("failed to enable SACK: %s", err)
}
// Set default TTLs as required by socket/netstack.
@@ -1068,7 +1068,7 @@ func newEmptySandboxNetworkStack(clock tcpip.Clock, uniqueID stack.UniqueID) (in
// Enable Receive Buffer Auto-Tuning.
if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
- return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
+ return nil, fmt.Errorf("SetTransportProtocolOption failed: %s", err)
}
s.FillIPTablesMetadata()
diff --git a/test/syscalls/linux/raw_socket_ipv4.cc b/test/syscalls/linux/raw_socket_ipv4.cc
index cde2f07c9..0116c3e94 100644
--- a/test/syscalls/linux/raw_socket_ipv4.cc
+++ b/test/syscalls/linux/raw_socket_ipv4.cc
@@ -357,10 +357,389 @@ TEST_P(RawSocketTest, BindConnectSendAndReceive) {
EXPECT_EQ(memcmp(recv_buf + sizeof(struct iphdr), kBuf, sizeof(kBuf)), 0);
}
+// Check that setting SO_RCVBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ // Discover minimum receive buf size by trying to set it to zero.
+ // See:
+ // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value so let's use a value that when doubled will still
+ // be smaller than min.
+ int below_min = min / 2 - 1;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_RCVBUF above max is clamped to the maximum
+// receive buffer size.
+TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ // Discover max buf size by trying to set the largest possible buffer size.
+ constexpr int kRcvBufSz = 0xffffffff;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+ SyscallSucceeds());
+
+ int above_max = max + 1;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored.
+TEST_P(RawSocketTest, SetSocketRecvBuf) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ int max = 0;
+ int min = 0;
+ {
+ // Discover max buf size by trying to set a really large buffer size.
+ constexpr int kRcvBufSz = 0xffffffff;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Discover minimum buffer size by trying to set a zero size receive buffer
+ // size.
+ // See:
+ // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ int quarter_sz = min + (max - min) / 4;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+ // TODO(gvisor.dev/issue/2926): Remove when Netstack matches linux behavior.
+ if (!IsRunningOnGvisor()) {
+ quarter_sz *= 2;
+ }
+ ASSERT_EQ(quarter_sz, val);
+}
+
+// Check that setting SO_SNDBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(RawSocketTest, SetSocketSendBufBelowMin) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kSndBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value so let's use a value that when doubled will still
+ // be smaller than min.
+ int below_min = min / 2 - 1;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_SNDBUF above max is clamped to the maximum
+// send buffer size.
+TEST_P(RawSocketTest, SetSocketSendBufAboveMax) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ // Discover maximum buffer size by trying to set it to a large value.
+ constexpr int kSndBufSz = 0xffffffff;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ int max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+ SyscallSucceeds());
+
+ int above_max = max + 1;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
+TEST_P(RawSocketTest, SetSocketSendBuf) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ int max = 0;
+ int min = 0;
+ {
+ // Discover maximum buffer size by trying to set it to a large value.
+ constexpr int kSndBufSz = 0xffffffff;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kSndBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ int quarter_sz = min + (max - min) / 4;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+ // TODO(gvisor.dev/issue/2926): Remove the gvisor special casing when Netstack
+ // matches linux behavior.
+ if (!IsRunningOnGvisor()) {
+ quarter_sz *= 2;
+ }
+
+ ASSERT_EQ(quarter_sz, val);
+}
+
void RawSocketTest::SendBuf(const char* buf, int buf_len) {
ASSERT_NO_FATAL_FAILURE(SendBufTo(s_, addr_, buf, buf_len));
}
+// Test that receive buffer limits are not enforced when the recv buffer is
+// empty.
+TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ ASSERT_THAT(
+ bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+ SyscallSucceeds());
+ ASSERT_THAT(
+ connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+ SyscallSucceeds());
+
+ int min = 0;
+ {
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Send data of size min and verify that it's received.
+ std::vector<char> buf(min);
+ RandomizeBuffer(buf.data(), buf.size());
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+
+ // Receive the packet and make sure it's identical.
+ std::vector<char> recv_buf(buf.size() + sizeof(struct iphdr));
+ ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+ EXPECT_EQ(
+ memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), buf.size()),
+ 0);
+ }
+
+ {
+ // Send data of size min + 1 and verify that its received. Both linux and
+ // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
+ // is currently empty.
+ std::vector<char> buf(min + 1);
+ RandomizeBuffer(buf.data(), buf.size());
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ // Receive the packet and make sure it's identical.
+ std::vector<char> recv_buf(buf.size() + sizeof(struct iphdr));
+ ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+ EXPECT_EQ(
+ memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(), buf.size()),
+ 0);
+ }
+}
+
+TEST_P(RawSocketTest, RecvBufLimits) {
+ // TCP stack generates RSTs for unknown endpoints and it complicates the test
+ // as we have to deal with the RST packets as well. For testing the raw socket
+ // endpoints buffer limit enforcement we can just test for UDP.
+ //
+ // We don't use SKIP_IF here because root_test_runner explicitly fails if a
+ // test is skipped.
+ if (Protocol() == IPPROTO_TCP) {
+ return;
+ }
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
+
+ ASSERT_THAT(
+ bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+ SyscallSucceeds());
+ ASSERT_THAT(
+ connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), sizeof(addr_)),
+ SyscallSucceeds());
+
+ int min = 0;
+ {
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ // Now set the limit to min * 2.
+ int new_rcv_buf_sz = min * 4;
+ if (!IsRunningOnGvisor()) {
+ // Linux doubles the value specified so just set to min.
+ new_rcv_buf_sz = min * 2;
+ }
+
+ ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
+ sizeof(new_rcv_buf_sz)),
+ SyscallSucceeds());
+ int rcv_buf_sz = 0;
+ {
+ socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
+ ASSERT_THAT(
+ getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len),
+ SyscallSucceeds());
+ }
+
+ // Set a receive timeout so that we don't block forever on reads if the test
+ // fails.
+ struct timeval tv {
+ .tv_sec = 1, .tv_usec = 0,
+ };
+ ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
+ SyscallSucceeds());
+
+ {
+ std::vector<char> buf(min);
+ RandomizeBuffer(buf.data(), buf.size());
+
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ int sent = 4;
+ if (IsRunningOnGvisor()) {
+ // Linux seems to drop the 4th packet even though technically it should
+ // fit in the receive buffer.
+ ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
+ sent++;
+ }
+
+ // Verify that the expected number of packets are available to be read.
+ for (int i = 0; i < sent - 1; i++) {
+ // Receive the packet and make sure it's identical.
+ std::vector<char> recv_buf(buf.size() + sizeof(struct iphdr));
+ ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
+ EXPECT_EQ(memcmp(recv_buf.data() + sizeof(struct iphdr), buf.data(),
+ buf.size()),
+ 0);
+ }
+
+ // Assert that the last packet is dropped because the receive buffer should
+ // be full after the first four packets.
+ std::vector<char> recv_buf(buf.size() + sizeof(struct iphdr));
+ struct iovec iov = {};
+ iov.iov_base = static_cast<void*>(const_cast<char*>(recv_buf.data()));
+ iov.iov_len = buf.size();
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+ ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT),
+ SyscallFailsWithErrno(EAGAIN));
+ }
+}
+
void RawSocketTest::SendBufTo(int sock, const struct sockaddr_in& addr,
const char* buf, int buf_len) {
// It's safe to use const_cast here because sendmsg won't modify the iovec or
diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
index 4db9553e1..de0f5f01b 100644
--- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc
+++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc
@@ -18,6 +18,7 @@
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
+#include <sys/types.h>
#include <sys/un.h>
#include <cstdio>
@@ -2236,5 +2237,220 @@ TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) {
EXPECT_EQ(received_pktinfo.ipi_spec_dst.s_addr, htonl(INADDR_LOOPBACK));
EXPECT_EQ(received_pktinfo.ipi_addr.s_addr, htonl(INADDR_LOOPBACK));
}
+
+// Check that setting SO_RCVBUF below min is clamped to the minimum
+// receive buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufBelowMin) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+ sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value so let's use a value that when doubled will still
+ // be smaller than min.
+ int below_min = min / 2 - 1;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &below_min,
+ sizeof(below_min)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_RCVBUF above max is clamped to the maximum
+// receive buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBufAboveMax) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ // Discover maxmimum buffer size by setting to a really large value.
+ constexpr int kRcvBufSz = 0xffffffff;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+ sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+ SyscallSucceeds());
+
+ int above_max = max + 1;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &above_max,
+ sizeof(above_max)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_RCVBUF min <= rcvBufSz <= max is honored.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketRecvBuf) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ int max = 0;
+ int min = 0;
+ {
+ // Discover maxmimum buffer size by setting to a really large value.
+ constexpr int kRcvBufSz = 0xffffffff;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+ sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &max, &max_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &kRcvBufSz,
+ sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ int quarter_sz = min + (max - min) / 4;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &quarter_sz,
+ sizeof(quarter_sz)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_RCVBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+ if (!IsRunningOnGvisor()) {
+ quarter_sz *= 2;
+ }
+ ASSERT_EQ(quarter_sz, val);
+}
+
+// Check that setting SO_SNDBUF below min is clamped to the minimum
+// send buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufBelowMin) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kSndBufSz = 0;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+ sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value so let's use a value that when doubled will still
+ // be smaller than min.
+ int below_min = min / 2 - 1;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &below_min,
+ sizeof(below_min)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ ASSERT_EQ(min, val);
+}
+
+// Check that setting SO_SNDBUF above max is clamped to the maximum
+// send buffer size.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBufAboveMax) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ // Discover maxmimum buffer size by setting to a really large value.
+ constexpr int kSndBufSz = 0xffffffff;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+ sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ int max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+ SyscallSucceeds());
+
+ int above_max = max + 1;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &above_max,
+ sizeof(above_max)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+ ASSERT_EQ(max, val);
+}
+
+// Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
+TEST_P(IPv4UDPUnboundSocketTest, SetSocketSendBuf) {
+ auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket());
+
+ int max = 0;
+ int min = 0;
+ {
+ // Discover maxmimum buffer size by setting to a really large value.
+ constexpr int kSndBufSz = 0xffffffff;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+ sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ max = 0;
+ socklen_t max_len = sizeof(max);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &max, &max_len),
+ SyscallSucceeds());
+ }
+
+ {
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kSndBufSz = 0;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
+ sizeof(kSndBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ int quarter_sz = min + (max - min) / 4;
+ ASSERT_THAT(setsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &quarter_sz,
+ sizeof(quarter_sz)),
+ SyscallSucceeds());
+
+ int val = 0;
+ socklen_t val_len = sizeof(val);
+ ASSERT_THAT(getsockopt(s->get(), SOL_SOCKET, SO_SNDBUF, &val, &val_len),
+ SyscallSucceeds());
+
+ // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
+ if (!IsRunningOnGvisor()) {
+ quarter_sz *= 2;
+ }
+
+ ASSERT_EQ(quarter_sz, val);
+}
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/udp_socket_test_cases.cc b/test/syscalls/linux/udp_socket_test_cases.cc
index 42521efef..cc1db3de8 100644
--- a/test/syscalls/linux/udp_socket_test_cases.cc
+++ b/test/syscalls/linux/udp_socket_test_cases.cc
@@ -1543,5 +1543,120 @@ TEST_P(UdpSocketTest, SendAndReceiveTOS) {
memcpy(&received_tos, CMSG_DATA(cmsg), sizeof(received_tos));
EXPECT_EQ(received_tos, sent_tos);
}
+
+TEST_P(UdpSocketTest, RecvBufLimitsEmptyRcvBuf) {
+ // Discover minimum buffer size by setting it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ int min = 0;
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+
+ // Bind s_ to loopback.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ {
+ // Send data of size min and verify that it's received.
+ std::vector<char> buf(min);
+ RandomizeBuffer(buf.data(), buf.size());
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ std::vector<char> received(buf.size());
+ EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT),
+ SyscallSucceedsWithValue(received.size()));
+ }
+
+ {
+ // Send data of size min + 1 and verify that its received. Both linux and
+ // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
+ // is currently empty.
+ std::vector<char> buf(min + 1);
+ RandomizeBuffer(buf.data(), buf.size());
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+
+ std::vector<char> received(buf.size());
+ EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT),
+ SyscallSucceedsWithValue(received.size()));
+ }
+}
+
+// Test that receive buffer limits are enforced.
+TEST_P(UdpSocketTest, RecvBufLimits) {
+ // Bind s_ to loopback.
+ ASSERT_THAT(bind(s_, addr_[0], addrlen_), SyscallSucceeds());
+
+ int min = 0;
+ {
+ // Discover minimum buffer size by trying to set it to zero.
+ constexpr int kRcvBufSz = 0;
+ ASSERT_THAT(
+ setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
+ SyscallSucceeds());
+
+ socklen_t min_len = sizeof(min);
+ ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
+ SyscallSucceeds());
+ }
+
+ // Now set the limit to min * 4.
+ int new_rcv_buf_sz = min * 4;
+ if (!IsRunningOnGvisor() || IsRunningWithHostinet()) {
+ // Linux doubles the value specified so just set to min * 2.
+ new_rcv_buf_sz = min * 2;
+ }
+
+ ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
+ sizeof(new_rcv_buf_sz)),
+ SyscallSucceeds());
+ int rcv_buf_sz = 0;
+ {
+ socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
+ ASSERT_THAT(
+ getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len),
+ SyscallSucceeds());
+ }
+
+ {
+ std::vector<char> buf(min);
+ RandomizeBuffer(buf.data(), buf.size());
+
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ int sent = 4;
+ if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
+ // Linux seems to drop the 4th packet even though technically it should
+ // fit in the receive buffer.
+ ASSERT_THAT(sendto(t_, buf.data(), buf.size(), 0, addr_[0], addrlen_),
+ SyscallSucceedsWithValue(buf.size()));
+ sent++;
+ }
+
+ for (int i = 0; i < sent - 1; i++) {
+ // Receive the data.
+ std::vector<char> received(buf.size());
+ EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT),
+ SyscallSucceedsWithValue(received.size()));
+ EXPECT_EQ(memcmp(buf.data(), received.data(), buf.size()), 0);
+ }
+
+ // The last receive should fail with EAGAIN as the last packet should have
+ // been dropped due to lack of space in the receive buffer.
+ std::vector<char> received(buf.size());
+ EXPECT_THAT(recv(s_, received.data(), received.size(), MSG_DONTWAIT),
+ SyscallFailsWithErrno(EAGAIN));
+ }
+}
+
} // namespace testing
} // namespace gvisor