summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/socket/netstack/netstack.go39
-rw-r--r--pkg/tcpip/tcpip.go17
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go64
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go21
-rw-r--r--test/syscalls/linux/socket_ip_tcp_generic.cc45
-rw-r--r--test/syscalls/linux/tcp_socket.cc185
6 files changed, 367 insertions, 4 deletions
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 9d032f052..9dea2b5ff 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -1321,6 +1321,29 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
return int32(time.Duration(v) / time.Second), nil
+ case linux.TCP_SYNCNT:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ v, err := ep.GetSockOptInt(tcpip.TCPSynCountOption)
+ if err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ return int32(v), nil
+
+ case linux.TCP_WINDOW_CLAMP:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ v, err := ep.GetSockOptInt(tcpip.TCPWindowClampOption)
+ if err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ return int32(v), nil
default:
emitUnimplementedEventTCP(t, name)
}
@@ -1790,6 +1813,22 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
}
return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v))))
+ case linux.TCP_SYNCNT:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+ v := usermem.ByteOrder.Uint32(optVal)
+
+ return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPSynCountOption, int(v)))
+
+ case linux.TCP_WINDOW_CLAMP:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+ v := usermem.ByteOrder.Uint32(optVal)
+
+ return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPWindowClampOption, int(v)))
+
case linux.TCP_REPAIR_OPTIONS:
t.Kernel().EmitUnimplementedEvent(t)
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 1ca4088c9..6270146fa 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -622,6 +622,19 @@ const (
//
// A zero value indicates the default.
TTLOption
+
+ // TCPSynCountOption is used by SetSockOpt/GetSockOpt to specify the number of
+ // SYN retransmits that TCP should send before aborting the attempt to
+ // connect. It cannot exceed 255.
+ //
+ // NOTE: This option is currently only stubbed out and is no-op.
+ TCPSynCountOption
+
+ // TCPWindowClampOption is used by SetSockOpt/GetSockOpt to bound the size
+ // of the advertised window to this value.
+ //
+ // NOTE: This option is currently only stubed out and is a no-op
+ TCPWindowClampOption
)
// ErrorOption is used in GetSockOpt to specify that the last error reported by
@@ -690,6 +703,10 @@ type TCPMinRTOOption time.Duration
// switches to using SYN cookies.
type TCPSynRcvdCountThresholdOption uint64
+// TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide
+// default for number of times SYN is retransmitted before aborting a connect.
+type TCPSynRetriesOption uint8
+
// MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a
// default interface for multicast.
type MulticastInterfaceOption struct {
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 07d3e64c8..71735029e 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -470,6 +470,17 @@ type endpoint struct {
// for this endpoint using the TCP_MAXSEG setsockopt.
userMSS uint16
+ // maxSynRetries is the maximum number of SYN retransmits that TCP should
+ // send before aborting the attempt to connect. It cannot exceed 255.
+ //
+ // NOTE: This is currently a no-op and does not change the SYN
+ // retransmissions.
+ maxSynRetries uint8
+
+ // windowClamp is used to bound the size of the advertised window to
+ // this value.
+ windowClamp uint32
+
// The following fields are used to manage the send buffer. When
// segments are ready to be sent, they are added to sndQueue and the
// protocol goroutine is signaled via sndWaker.
@@ -795,8 +806,10 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
interval: 75 * time.Second,
count: 9,
},
- uniqueID: s.UniqueID(),
- txHash: s.Rand().Uint32(),
+ uniqueID: s.UniqueID(),
+ txHash: s.Rand().Uint32(),
+ windowClamp: DefaultReceiveBufferSize,
+ maxSynRetries: DefaultSynRetries,
}
var ss SendBufferSizeOption
@@ -829,6 +842,11 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
e.tcpLingerTimeout = time.Duration(tcpLT)
}
+ var synRetries tcpip.TCPSynRetriesOption
+ if err := s.TransportProtocolOption(ProtocolNumber, &synRetries); err == nil {
+ e.maxSynRetries = uint8(synRetries)
+ }
+
if p := s.GetTCPProbe(); p != nil {
e.probe = p
}
@@ -1603,6 +1621,36 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
e.ttl = uint8(v)
e.UnlockUser()
+ case tcpip.TCPSynCountOption:
+ if v < 1 || v > 255 {
+ return tcpip.ErrInvalidOptionValue
+ }
+ e.LockUser()
+ e.maxSynRetries = uint8(v)
+ e.UnlockUser()
+
+ case tcpip.TCPWindowClampOption:
+ if v == 0 {
+ e.LockUser()
+ switch e.EndpointState() {
+ case StateClose, StateInitial:
+ e.windowClamp = 0
+ e.UnlockUser()
+ return nil
+ default:
+ e.UnlockUser()
+ return tcpip.ErrInvalidOptionValue
+ }
+ }
+ var rs ReceiveBufferSizeOption
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
+ if v < rs.Min/2 {
+ v = rs.Min / 2
+ }
+ }
+ e.LockUser()
+ e.windowClamp = uint32(v)
+ e.UnlockUser()
}
return nil
}
@@ -1826,6 +1874,18 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
e.UnlockUser()
return v, nil
+ case tcpip.TCPSynCountOption:
+ e.LockUser()
+ v := int(e.maxSynRetries)
+ e.UnlockUser()
+ return v, nil
+
+ case tcpip.TCPWindowClampOption:
+ e.LockUser()
+ v := int(e.windowClamp)
+ e.UnlockUser()
+ return v, nil
+
default:
return -1, tcpip.ErrUnknownProtocolOption
}
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index cfd9a4e8e..1e64283b5 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -64,6 +64,10 @@ const (
// DefaultTCPTimeWaitTimeout is the amount of time that sockets linger
// in TIME_WAIT state before being marked closed.
DefaultTCPTimeWaitTimeout = 60 * time.Second
+
+ // DefaultSynRetries is the default value for the number of SYN retransmits
+ // before a connect is aborted.
+ DefaultSynRetries = 6
)
// SACKEnabled option can be used to enable SACK support in the TCP
@@ -164,6 +168,7 @@ type protocol struct {
tcpTimeWaitTimeout time.Duration
minRTO time.Duration
synRcvdCount synRcvdCounter
+ synRetries uint8
dispatcher *dispatcher
}
@@ -346,6 +351,15 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
p.mu.Unlock()
return nil
+ case tcpip.TCPSynRetriesOption:
+ if v < 1 || v > 255 {
+ return tcpip.ErrInvalidOptionValue
+ }
+ p.mu.Lock()
+ p.synRetries = uint8(v)
+ p.mu.Unlock()
+ return nil
+
default:
return tcpip.ErrUnknownProtocolOption
}
@@ -420,6 +434,12 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
p.mu.RUnlock()
return nil
+ case *tcpip.TCPSynRetriesOption:
+ p.mu.RLock()
+ *v = tcpip.TCPSynRetriesOption(p.synRetries)
+ p.mu.RUnlock()
+ return nil
+
default:
return tcpip.ErrUnknownProtocolOption
}
@@ -452,6 +472,7 @@ func NewProtocol() stack.TransportProtocol {
tcpTimeWaitTimeout: DefaultTCPTimeWaitTimeout,
synRcvdCount: synRcvdCounter{threshold: SynRcvdCountThreshold},
dispatcher: newDispatcher(runtime.GOMAXPROCS(0)),
+ synRetries: DefaultSynRetries,
minRTO: MinRTO,
}
}
diff --git a/test/syscalls/linux/socket_ip_tcp_generic.cc b/test/syscalls/linux/socket_ip_tcp_generic.cc
index 27779e47c..fa81845fd 100644
--- a/test/syscalls/linux/socket_ip_tcp_generic.cc
+++ b/test/syscalls/linux/socket_ip_tcp_generic.cc
@@ -876,6 +876,51 @@ TEST_P(TCPSocketPairTest, SetTCPUserTimeoutAboveZero) {
EXPECT_EQ(get, kAbove);
}
+TEST_P(TCPSocketPairTest, SetTCPWindowClampBelowMinRcvBufConnectedSocket) {
+ auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair());
+ // Discover minimum receive buf by setting a really low value
+ // for the receive buffer.
+ constexpr int kZero = 0;
+ EXPECT_THAT(setsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &kZero,
+ sizeof(kZero)),
+ SyscallSucceeds());
+
+ // Now retrieve the minimum value for SO_RCVBUF as the set above should
+ // have caused SO_RCVBUF for the socket to be set to the minimum.
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(
+ getsockopt(sockets->first_fd(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ int min_so_rcvbuf = get;
+
+ {
+ // Setting TCP_WINDOW_CLAMP to zero for a connected socket is not permitted.
+ constexpr int kZero = 0;
+ EXPECT_THAT(setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+ &kZero, sizeof(kZero)),
+ SyscallFailsWithErrno(EINVAL));
+
+ // Non-zero clamp values below MIN_SO_RCVBUF/2 should result in the clamp
+ // being set to MIN_SO_RCVBUF/2.
+ int below_half_min_so_rcvbuf = min_so_rcvbuf / 2 - 1;
+ EXPECT_THAT(
+ setsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+ &below_half_min_so_rcvbuf, sizeof(below_half_min_so_rcvbuf)),
+ SyscallSucceeds());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+
+ ASSERT_THAT(getsockopt(sockets->first_fd(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+ &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(min_so_rcvbuf / 2, get);
+ }
+}
+
TEST_P(TCPSocketPairTest, TCPResetDuringClose_NoRandomSave) {
DisableSave ds; // Too many syscalls.
constexpr int kThreadCount = 1000;
diff --git a/test/syscalls/linux/tcp_socket.cc b/test/syscalls/linux/tcp_socket.cc
index d9c1ac0e1..a4d2953e1 100644
--- a/test/syscalls/linux/tcp_socket.cc
+++ b/test/syscalls/linux/tcp_socket.cc
@@ -1313,7 +1313,7 @@ TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptNeg) {
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(
- getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len),
+ getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(get, 0);
@@ -1326,7 +1326,7 @@ TEST_P(SimpleTcpSocketTest, GetTCPDeferAcceptDefault) {
int get = -1;
socklen_t get_len = sizeof(get);
ASSERT_THAT(
- getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len),
+ getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
SyscallSucceedsWithValue(0));
EXPECT_EQ(get_len, sizeof(get));
EXPECT_EQ(get, 0);
@@ -1378,6 +1378,187 @@ TEST_P(SimpleTcpSocketTest, TCPConnectSoRcvBufRace) {
SyscallSucceedsWithValue(0));
}
+TEST_P(SimpleTcpSocketTest, SetTCPSynCntLessThanOne) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ int default_syn_cnt = get;
+
+ {
+ // TCP_SYNCNT less than 1 should be rejected with an EINVAL.
+ constexpr int kZero = 0;
+ EXPECT_THAT(
+ setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kZero, sizeof(kZero)),
+ SyscallFailsWithErrno(EINVAL));
+
+ // TCP_SYNCNT less than 1 should be rejected with an EINVAL.
+ constexpr int kNeg = -1;
+ EXPECT_THAT(
+ setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kNeg, sizeof(kNeg)),
+ SyscallFailsWithErrno(EINVAL));
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+
+ ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(default_syn_cnt, get);
+ }
+}
+
+TEST_P(SimpleTcpSocketTest, GetTCPSynCntDefault) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ constexpr int kDefaultSynCnt = 6;
+
+ ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kDefaultSynCnt);
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPSynCntGreaterThanOne) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+ constexpr int kTCPSynCnt = 20;
+ ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
+ sizeof(kTCPSynCnt)),
+ SyscallSucceeds());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+ SyscallSucceeds());
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kTCPSynCnt);
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPSynCntAboveMax) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ int default_syn_cnt = get;
+ {
+ constexpr int kTCPSynCnt = 256;
+ ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
+ sizeof(kTCPSynCnt)),
+ SyscallFailsWithErrno(EINVAL));
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
+ SyscallSucceeds());
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, default_syn_cnt);
+ }
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPWindowClampBelowMinRcvBuf) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ // Discover minimum receive buf by setting a really low value
+ // for the receive buffer.
+ constexpr int kZero = 0;
+ EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
+ SyscallSucceeds());
+
+ // Now retrieve the minimum value for SO_RCVBUF as the set above should
+ // have caused SO_RCVBUF for the socket to be set to the minimum.
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ int min_so_rcvbuf = get;
+
+ {
+ // TCP_WINDOW_CLAMP less than min_so_rcvbuf/2 should be set to
+ // min_so_rcvbuf/2.
+ int below_half_min_rcvbuf = min_so_rcvbuf / 2 - 1;
+ EXPECT_THAT(
+ setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+ &below_half_min_rcvbuf, sizeof(below_half_min_rcvbuf)),
+ SyscallSucceeds());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(min_so_rcvbuf / 2, get);
+ }
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPWindowClampZeroClosedSocket) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+ constexpr int kZero = 0;
+ ASSERT_THAT(
+ setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &kZero, sizeof(kZero)),
+ SyscallSucceeds());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
+ SyscallSucceeds());
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(get, kZero);
+}
+
+TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) {
+ FileDescriptor s =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
+
+ // Discover minimum receive buf by setting a really low value
+ // for the receive buffer.
+ constexpr int kZero = 0;
+ EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
+ SyscallSucceeds());
+
+ // Now retrieve the minimum value for SO_RCVBUF as the set above should
+ // have caused SO_RCVBUF for the socket to be set to the minimum.
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+ ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ int min_so_rcvbuf = get;
+
+ {
+ int above_half_min_rcv_buf = min_so_rcvbuf / 2 + 1;
+ EXPECT_THAT(
+ setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
+ &above_half_min_rcv_buf, sizeof(above_half_min_rcv_buf)),
+ SyscallSucceeds());
+
+ int get = -1;
+ socklen_t get_len = sizeof(get);
+
+ ASSERT_THAT(
+ getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
+ SyscallSucceedsWithValue(0));
+ EXPECT_EQ(get_len, sizeof(get));
+ EXPECT_EQ(above_half_min_rcv_buf, get);
+ }
+}
+
INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest,
::testing::Values(AF_INET, AF_INET6));