diff options
-rw-r--r-- | pkg/sentry/socket/epsocket/epsocket.go | 20 | ||||
-rw-r--r-- | pkg/tcpip/header/ipv4.go | 4 | ||||
-rw-r--r-- | pkg/tcpip/header/tcp.go | 17 | ||||
-rw-r--r-- | pkg/tcpip/tcpip.go | 4 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/endpoint.go | 26 | ||||
-rw-r--r-- | test/syscalls/linux/tcp_socket.cc | 55 |
6 files changed, 123 insertions, 3 deletions
diff --git a/pkg/sentry/socket/epsocket/epsocket.go b/pkg/sentry/socket/epsocket/epsocket.go index 9d1bcfd41..69eff7373 100644 --- a/pkg/sentry/socket/epsocket/epsocket.go +++ b/pkg/sentry/socket/epsocket/epsocket.go @@ -867,6 +867,18 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa return int32(v), nil + case linux.TCP_MAXSEG: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + var v tcpip.MaxSegOption + if err := ep.GetSockOpt(&v); err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(v), nil + case linux.TCP_KEEPIDLE: if outLen < sizeOfInt32 { return nil, syserr.ErrInvalidArgument @@ -1219,6 +1231,14 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * v := usermem.ByteOrder.Uint32(optVal) return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.QuickAckOption(v))) + case linux.TCP_MAXSEG: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + + v := usermem.ByteOrder.Uint32(optVal) + return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.MaxSegOption(v))) + case linux.TCP_KEEPIDLE: if len(optVal) < sizeOfInt32 { return syserr.ErrInvalidArgument diff --git a/pkg/tcpip/header/ipv4.go b/pkg/tcpip/header/ipv4.go index 7da4c4845..7b8034de4 100644 --- a/pkg/tcpip/header/ipv4.go +++ b/pkg/tcpip/header/ipv4.go @@ -85,6 +85,10 @@ const ( // units, the header cannot exceed 15*4 = 60 bytes. IPv4MaximumHeaderSize = 60 + // MinIPFragmentPayloadSize is the minimum number of payload bytes that + // the first fragment must carry when an IPv4 packet is fragmented. + MinIPFragmentPayloadSize = 8 + // IPv4AddressSize is the size, in bytes, of an IPv4 address. IPv4AddressSize = 4 diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go index 1141443bb..82cfe785c 100644 --- a/pkg/tcpip/header/tcp.go +++ b/pkg/tcpip/header/tcp.go @@ -176,6 +176,21 @@ const ( // TCPProtocolNumber is TCP's transport protocol number. TCPProtocolNumber tcpip.TransportProtocolNumber = 6 + + // TCPMinimumMSS is the minimum acceptable value for MSS. This is the + // same as the value TCP_MIN_MSS defined net/tcp.h. + TCPMinimumMSS = IPv4MaximumHeaderSize + TCPHeaderMaximumSize + MinIPFragmentPayloadSize - IPv4MinimumSize - TCPMinimumSize + + // TCPMaximumMSS is the maximum acceptable value for MSS. + TCPMaximumMSS = 0xffff + + // TCPDefaultMSS is the MSS value that should be used if an MSS option + // is not received from the peer. It's also the value returned by + // TCP_MAXSEG option for a socket in an unconnected state. + // + // Per RFC 1122, page 85: "If an MSS option is not received at + // connection setup, TCP MUST assume a default send MSS of 536." + TCPDefaultMSS = 536 ) // SourcePort returns the "source port" field of the tcp header. @@ -306,7 +321,7 @@ func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions { synOpts := TCPSynOptions{ // Per RFC 1122, page 85: "If an MSS option is not received at // connection setup, TCP MUST assume a default send MSS of 536." - MSS: 536, + MSS: TCPDefaultMSS, // If no window scale option is specified, WS in options is // returned as -1; this is because the absence of the option // indicates that the we cannot use window scaling on the diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go index c61f96fb0..c4076666a 100644 --- a/pkg/tcpip/tcpip.go +++ b/pkg/tcpip/tcpip.go @@ -496,6 +496,10 @@ type AvailableCongestionControlOption string // buffer moderation. type ModerateReceiveBufferOption bool +// MaxSegOption is used by SetSockOpt/GetSockOpt to set/get the current +// Maximum Segment Size(MSS) value as specified using the TCP_MAXSEG option. +type MaxSegOption int + // MulticastTTLOption is used by SetSockOpt/GetSockOpt to control the default // TTL value for multicast messages. The default is 1. type MulticastTTLOption uint8 diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index cb40fea94..beb90afb5 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -117,6 +117,7 @@ const ( notifyDrain notifyReset notifyKeepaliveChanged + notifyMSSChanged ) // SACKInfo holds TCP SACK related information for a given endpoint. @@ -218,8 +219,6 @@ type endpoint struct { mu sync.RWMutex `state:"nosave"` id stack.TransportEndpointID - // state endpointState `state:".(endpointState)"` - // pState ProtocolState state EndpointState `state:".(EndpointState)"` isPortReserved bool `state:"manual"` @@ -313,6 +312,10 @@ type endpoint struct { // in SYN-RCVD state. synRcvdCount int + // userMSS if non-zero is the MSS value explicitly set by the user + // for this endpoint using the TCP_MAXSEG setsockopt. + userMSS int + // The following fields are used to manage the send buffer. When // segments are ready to be sent, they are added to sndQueue and the // protocol goroutine is signaled via sndWaker. @@ -917,6 +920,17 @@ func (e *endpoint) SetSockOpt(opt interface{}) *tcpip.Error { } return nil + case tcpip.MaxSegOption: + userMSS := v + if userMSS < header.TCPMinimumMSS || userMSS > header.TCPMaximumMSS { + return tcpip.ErrInvalidOptionValue + } + e.mu.Lock() + e.userMSS = int(userMSS) + e.mu.Unlock() + e.notifyProtocolGoroutine(notifyMSSChanged) + return nil + case tcpip.ReceiveBufferSizeOption: // Make sure the receive buffer size is within the min and max // allowed. @@ -1096,6 +1110,14 @@ func (e *endpoint) GetSockOpt(opt interface{}) *tcpip.Error { e.lastErrorMu.Unlock() return err + case *tcpip.MaxSegOption: + // This is just stubbed out. Linux never returns the user_mss + // value as it either returns the defaultMSS or returns the + // actual current MSS. Netstack just returns the defaultMSS + // always for now. + *o = header.TCPDefaultMSS + return nil + case *tcpip.SendBufferSizeOption: e.sndBufMu.Lock() *o = tcpip.SendBufferSizeOption(e.sndBufSize) diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc index 4597e91e3..8d77431f2 100644 --- a/test/syscalls/linux/tcp_socket.cc +++ b/test/syscalls/linux/tcp_socket.cc @@ -890,6 +890,61 @@ TEST_P(SimpleTcpSocketTest, SetCongestionControlFailsForUnsupported) { EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(kTcpCaNameMax))); } +TEST_P(SimpleTcpSocketTest, MaxSegDefault) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + constexpr int kDefaultMSS = 536; + int tcp_max_seg; + socklen_t optlen = sizeof(tcp_max_seg); + ASSERT_THAT( + getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, &optlen), + SyscallSucceedsWithValue(0)); + + EXPECT_EQ(kDefaultMSS, tcp_max_seg); + EXPECT_EQ(sizeof(tcp_max_seg), optlen); +} + +TEST_P(SimpleTcpSocketTest, SetMaxSeg) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + constexpr int kDefaultMSS = 536; + constexpr int kTCPMaxSeg = 1024; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &kTCPMaxSeg, + sizeof(kTCPMaxSeg)), + SyscallSucceedsWithValue(0)); + + // Linux actually never returns the user_mss value. It will always return the + // default MSS value defined above for an unconnected socket and always return + // the actual current MSS for a connected one. + int optval; + socklen_t optlen = sizeof(optval); + ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &optval, &optlen), + SyscallSucceedsWithValue(0)); + + EXPECT_EQ(kDefaultMSS, optval); + EXPECT_EQ(sizeof(optval), optlen); +} + +TEST_P(SimpleTcpSocketTest, SetMaxSegFailsForInvalidMSSValues) { + FileDescriptor s = + ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); + + { + constexpr int tcp_max_seg = 10; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, + sizeof(tcp_max_seg)), + SyscallFailsWithErrno(EINVAL)); + } + { + constexpr int tcp_max_seg = 75000; + ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, + sizeof(tcp_max_seg)), + SyscallFailsWithErrno(EINVAL)); + } +} + INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, ::testing::Values(AF_INET, AF_INET6)); |