summaryrefslogtreecommitdiffhomepage
path: root/test/syscalls
diff options
context:
space:
mode:
authorBhasker Hariharan <bhaskerh@google.com>2019-08-14 14:33:11 -0700
committergVisor bot <gvisor-bot@google.com>2019-08-14 14:34:27 -0700
commit570fb1db6b4e01be37386a379fea4d63e5a3cdc2 (patch)
tree44cd127ffaa546b3f0c9bc1b642fe8980e69bbd9 /test/syscalls
parentcee044c2ab009c9faae154e1751eef93430fc141 (diff)
Improve SendMsg performance.
SendMsg before this change would copy all the data over into a new slice even if the underlying socket could only accept a small amount of data. This is really inefficient with non-blocking sockets and under high throughput where large writes could get ErrWouldBlock or if there was say a timeout associated with the sendmsg() syscall. With this change we delay copying bytes in till they are needed and only copy what can be potentially sent/held in the socket buffer. Reducing the need to repeatedly copy data over. Also a minor fix to change state FIN-WAIT-1 when shutdown(..., SHUT_WR) is called instead of when we transmit the actual FIN. Otherwise the socket could remain in CONNECTED state even though the user has called shutdown() on the socket. Updates #627 PiperOrigin-RevId: 263430505
Diffstat (limited to 'test/syscalls')
-rw-r--r--test/syscalls/linux/BUILD4
-rw-r--r--test/syscalls/linux/partial_bad_buffer.cc110
2 files changed, 114 insertions, 0 deletions
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 16666e772..d28ce4ba1 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -1252,10 +1252,14 @@ cc_binary(
srcs = ["partial_bad_buffer.cc"],
linkstatic = 1,
deps = [
+ "//test/syscalls/linux:socket_test_util",
+ "//test/util:file_descriptor",
"//test/util:fs_util",
+ "//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
+ "@com_google_absl//absl/time",
"@com_google_googletest//:gtest",
],
)
diff --git a/test/syscalls/linux/partial_bad_buffer.cc b/test/syscalls/linux/partial_bad_buffer.cc
index 83b1ad4e4..33822ee57 100644
--- a/test/syscalls/linux/partial_bad_buffer.cc
+++ b/test/syscalls/linux/partial_bad_buffer.cc
@@ -14,13 +14,20 @@
#include <errno.h>
#include <fcntl.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
#include <sys/mman.h>
+#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/uio.h>
#include <unistd.h>
#include "gtest/gtest.h"
+#include "absl/time/clock.h"
+#include "test/syscalls/linux/socket_test_util.h"
+#include "test/util/file_descriptor.h"
#include "test/util/fs_util.h"
+#include "test/util/posix_error.h"
#include "test/util/temp_path.h"
#include "test/util/test_util.h"
@@ -299,6 +306,109 @@ TEST_F(PartialBadBufferTest, WriteEfaultIsntPartial) {
EXPECT_STREQ(buf, kMessage);
}
+PosixErrorOr<sockaddr_storage> InetLoopbackAddr(int family) {
+ struct sockaddr_storage addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.ss_family = family;
+ switch (family) {
+ case AF_INET:
+ reinterpret_cast<struct sockaddr_in*>(&addr)->sin_addr.s_addr =
+ htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ reinterpret_cast<struct sockaddr_in6*>(&addr)->sin6_addr =
+ in6addr_loopback;
+ break;
+ default:
+ return PosixError(EINVAL,
+ absl::StrCat("unknown socket family: ", family));
+ }
+ return addr;
+}
+
+// SendMsgTCP verifies that calling sendmsg with a bad address returns an
+// EFAULT. It also verifies that passing a buffer which is made up of 2
+// pages one valid and one guard page succeeds as long as the write is
+// for exactly the size of 1 page.
+TEST_F(PartialBadBufferTest, SendMsgTCP) {
+ auto listen_socket =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
+
+ // Initialize address to the loopback one.
+ sockaddr_storage addr = ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(AF_INET));
+ socklen_t addrlen = sizeof(addr);
+
+ // Bind to some port then start listening.
+ ASSERT_THAT(bind(listen_socket.get(),
+ reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+ SyscallSucceeds());
+
+ ASSERT_THAT(listen(listen_socket.get(), SOMAXCONN), SyscallSucceeds());
+
+ // Get the address we're listening on, then connect to it. We need to do this
+ // because we're allowing the stack to pick a port for us.
+ ASSERT_THAT(getsockname(listen_socket.get(),
+ reinterpret_cast<struct sockaddr*>(&addr), &addrlen),
+ SyscallSucceeds());
+
+ auto send_socket =
+ ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
+
+ ASSERT_THAT(
+ RetryEINTR(connect)(send_socket.get(),
+ reinterpret_cast<struct sockaddr*>(&addr), addrlen),
+ SyscallSucceeds());
+
+ // Accept the connection.
+ auto recv_socket =
+ ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_socket.get(), nullptr, nullptr));
+
+ // TODO(gvisor.dev/issue/674): Update this once Netstack matches linux
+ // behaviour on a setsockopt of SO_RCVBUF/SO_SNDBUF.
+ //
+ // Set SO_SNDBUF for socket to exactly kPageSize+1.
+ //
+ // gVisor does not double the value passed in SO_SNDBUF like linux does so we
+ // just increase it by 1 byte here for gVisor so that we can test writing 1
+ // byte past the valid page and check that it triggers an EFAULT
+ // correctly. Otherwise in gVisor the sendmsg call will just return with no
+ // error with kPageSize bytes written successfully.
+ const uint32_t buf_size = kPageSize + 1;
+ ASSERT_THAT(setsockopt(send_socket.get(), SOL_SOCKET, SO_SNDBUF, &buf_size,
+ sizeof(buf_size)),
+ SyscallSucceedsWithValue(0));
+
+ struct msghdr hdr = {};
+ struct iovec iov = {};
+ iov.iov_base = bad_buffer_;
+ iov.iov_len = kPageSize;
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ ASSERT_THAT(RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0),
+ SyscallFailsWithErrno(EFAULT));
+
+ // Now assert that writing kPageSize from addr_ succeeds.
+ iov.iov_base = addr_;
+ ASSERT_THAT(RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0),
+ SyscallSucceedsWithValue(kPageSize));
+ // Read all the data out so that we drain the socket SND_BUF on the sender.
+ std::vector<char> buffer(kPageSize);
+ ASSERT_THAT(RetryEINTR(read)(recv_socket.get(), buffer.data(), kPageSize),
+ SyscallSucceedsWithValue(kPageSize));
+
+ // Sleep for a shortwhile to ensure that we have time to process the
+ // ACKs. This is not strictly required unless running under gotsan which is a
+ // lot slower and can result in the next write to write only 1 byte instead of
+ // our intended kPageSize + 1.
+ absl::SleepFor(absl::Milliseconds(50));
+
+ // Now assert that writing > kPageSize results in EFAULT.
+ iov.iov_len = kPageSize + 1;
+ ASSERT_THAT(RetryEINTR(sendmsg)(send_socket.get(), &hdr, 0),
+ SyscallFailsWithErrno(EFAULT));
+}
+
} // namespace
} // namespace testing