diff options
-rw-r--r-- | pkg/sentry/socket/netstack/netstack.go | 15 | ||||
-rw-r--r-- | pkg/tcpip/ports/ports.go | 19 | ||||
-rw-r--r-- | pkg/tcpip/ports/ports_test.go | 2 | ||||
-rw-r--r-- | pkg/tcpip/transport/tcp/endpoint.go | 60 | ||||
-rw-r--r-- | pkg/tcpip/transport/udp/endpoint.go | 2 | ||||
-rw-r--r-- | test/syscalls/linux/socket_inet_loopback.cc | 38 | ||||
-rw-r--r-- | test/syscalls/linux/socket_inet_loopback_nogotsan.cc | 65 | ||||
-rw-r--r-- | test/syscalls/linux/socket_ipv4_udp_unbound.cc | 25 |
8 files changed, 189 insertions, 37 deletions
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 0e5913b60..4d0e33696 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -803,7 +803,20 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error { } // Issue the bind request to the endpoint. - return syserr.TranslateNetstackError(s.Endpoint.Bind(addr)) + err := s.Endpoint.Bind(addr) + if err == tcpip.ErrNoPortAvailable { + // Bind always returns EADDRINUSE irrespective of if the specified port was + // already bound or if an ephemeral port was requested but none were + // available. + // + // tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because + // UDP connect returns EAGAIN on ephemeral port exhaustion. + // + // TCP connect returns EADDRNOTAVAIL on ephemeral port exhaustion. + err = tcpip.ErrPortInUse + } + + return syserr.TranslateNetstackError(err) } // Listen implements the linux syscall listen(2) for sockets backed by diff --git a/pkg/tcpip/ports/ports.go b/pkg/tcpip/ports/ports.go index f6d592eb5..d87193650 100644 --- a/pkg/tcpip/ports/ports.go +++ b/pkg/tcpip/ports/ports.go @@ -400,7 +400,11 @@ func (s *PortManager) isPortAvailableLocked(networks []tcpip.NetworkProtocolNumb // reserved by another endpoint. If port is zero, ReservePort will search for // an unreserved ephemeral port and reserve it, returning its value in the // "port" return value. -func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress) (reservedPort uint16, err *tcpip.Error) { +// +// An optional testPort closure can be passed in which if provided will be used +// to test if the picked port can be used. The function should return true if +// the port is safe to use, false otherwise. +func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transport tcpip.TransportProtocolNumber, addr tcpip.Address, port uint16, flags Flags, bindToDevice tcpip.NICID, dest tcpip.FullAddress, testPort func(port uint16) bool) (reservedPort uint16, err *tcpip.Error) { s.mu.Lock() defer s.mu.Unlock() @@ -412,12 +416,23 @@ func (s *PortManager) ReservePort(networks []tcpip.NetworkProtocolNumber, transp if !s.reserveSpecificPort(networks, transport, addr, port, flags, bindToDevice, dst) { return 0, tcpip.ErrPortInUse } + if testPort != nil && !testPort(port) { + s.releasePortLocked(networks, transport, addr, port, flags.Bits(), bindToDevice, dst) + return 0, tcpip.ErrPortInUse + } return port, nil } // A port wasn't specified, so try to find one. return s.PickEphemeralPort(func(p uint16) (bool, *tcpip.Error) { - return s.reserveSpecificPort(networks, transport, addr, p, flags, bindToDevice, dst), nil + if !s.reserveSpecificPort(networks, transport, addr, p, flags, bindToDevice, dst) { + return false, nil + } + if testPort != nil && !testPort(p) { + s.releasePortLocked(networks, transport, addr, p, flags.Bits(), bindToDevice, dst) + return false, nil + } + return true, nil }) } diff --git a/pkg/tcpip/ports/ports_test.go b/pkg/tcpip/ports/ports_test.go index 58db5868c..4bc949fd8 100644 --- a/pkg/tcpip/ports/ports_test.go +++ b/pkg/tcpip/ports/ports_test.go @@ -332,7 +332,7 @@ func TestPortReservation(t *testing.T) { pm.ReleasePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device, test.dest) continue } - gotPort, err := pm.ReservePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device, test.dest) + gotPort, err := pm.ReservePort(net, fakeTransNumber, test.ip, test.port, test.flags, test.device, test.dest, nil /* testPort */) if err != test.want { t.Fatalf("ReservePort(.., .., %s, %d, %+v, %d, %v) = %v, want %v", test.ip, test.port, test.flags, test.device, test.dest, err, test.want) } diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go index 21a4b6e2f..9df22ac84 100644 --- a/pkg/tcpip/transport/tcp/endpoint.go +++ b/pkg/tcpip/transport/tcp/endpoint.go @@ -2169,7 +2169,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc if sameAddr && p == e.ID.RemotePort { return false, nil } - if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr); err != nil { + if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr, nil /* testPort */); err != nil { if err != tcpip.ErrPortInUse || !reuse { return false, nil } @@ -2207,7 +2207,7 @@ func (e *endpoint) connect(addr tcpip.FullAddress, handshake bool, run bool) *tc tcpEP.notifyProtocolGoroutine(notifyAbort) tcpEP.UnlockUser() // Now try and Reserve again if it fails then we skip. - if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr); err != nil { + if _, err := e.stack.ReservePort(netProtos, ProtocolNumber, e.ID.LocalAddress, p, e.portFlags, e.bindToDevice, addr, nil /* testPort */); err != nil { return false, nil } } @@ -2505,47 +2505,45 @@ func (e *endpoint) bindLocked(addr tcpip.FullAddress) (err *tcpip.Error) { } } - port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port, e.portFlags, e.bindToDevice, tcpip.FullAddress{}) - if err != nil { - return err - } - - e.boundBindToDevice = e.bindToDevice - e.boundPortFlags = e.portFlags - e.isPortReserved = true - e.effectiveNetProtos = netProtos - e.ID.LocalPort = port - - // Any failures beyond this point must remove the port registration. - defer func(portFlags ports.Flags, bindToDevice tcpip.NICID) { - if err != nil { - e.stack.ReleasePort(netProtos, ProtocolNumber, addr.Addr, port, portFlags, bindToDevice, tcpip.FullAddress{}) - e.isPortReserved = false - e.effectiveNetProtos = nil - e.ID.LocalPort = 0 - e.ID.LocalAddress = "" - e.boundNICID = 0 - e.boundBindToDevice = 0 - e.boundPortFlags = ports.Flags{} - } - }(e.boundPortFlags, e.boundBindToDevice) - + var nic tcpip.NICID // If an address is specified, we must ensure that it's one of our // local addresses. if len(addr.Addr) != 0 { - nic := e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr) + nic = e.stack.CheckLocalAddress(addr.NIC, netProto, addr.Addr) if nic == 0 { return tcpip.ErrBadLocalAddress } - - e.boundNICID = nic e.ID.LocalAddress = addr.Addr } - if err := e.stack.CheckRegisterTransportEndpoint(e.boundNICID, e.effectiveNetProtos, ProtocolNumber, e.ID, e.boundPortFlags, e.boundBindToDevice); err != nil { + port, err := e.stack.ReservePort(netProtos, ProtocolNumber, addr.Addr, addr.Port, e.portFlags, e.bindToDevice, tcpip.FullAddress{}, func(p uint16) bool { + id := e.ID + id.LocalPort = p + // CheckRegisterTransportEndpoint should only return an error if there is a + // listening endpoint bound with the same id and portFlags and bindToDevice + // options. + // + // NOTE: Only listening and connected endpoint register with + // demuxer. Further connected endpoints always have a remote + // address/port. Hence this will only return an error if there is a matching + // listening endpoint. + if err := e.stack.CheckRegisterTransportEndpoint(nic, netProtos, ProtocolNumber, id, e.portFlags, e.bindToDevice); err != nil { + return false + } + return true + }) + if err != nil { return err } + e.boundBindToDevice = e.bindToDevice + e.boundPortFlags = e.portFlags + // TODO(gvisor.dev/issue/3691): Add test to verify boundNICID is correct. + e.boundNICID = nic + e.isPortReserved = true + e.effectiveNetProtos = netProtos + e.ID.LocalPort = port + // Mark endpoint as bound. e.setEndpointState(StateBound) diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go index 73608783c..c33434b75 100644 --- a/pkg/tcpip/transport/udp/endpoint.go +++ b/pkg/tcpip/transport/udp/endpoint.go @@ -1226,7 +1226,7 @@ func (*endpoint) Accept() (tcpip.Endpoint, *waiter.Queue, *tcpip.Error) { func (e *endpoint) registerWithStack(nicID tcpip.NICID, netProtos []tcpip.NetworkProtocolNumber, id stack.TransportEndpointID) (stack.TransportEndpointID, tcpip.NICID, *tcpip.Error) { if e.ID.LocalPort == 0 { - port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.portFlags, e.bindToDevice, tcpip.FullAddress{}) + port, err := e.stack.ReservePort(netProtos, ProtocolNumber, id.LocalAddress, id.LocalPort, e.portFlags, e.bindToDevice, tcpip.FullAddress{}, nil /* testPort */) if err != nil { return id, e.bindToDevice, err } diff --git a/test/syscalls/linux/socket_inet_loopback.cc b/test/syscalls/linux/socket_inet_loopback.cc index c3b42682f..a62a10088 100644 --- a/test/syscalls/linux/socket_inet_loopback.cc +++ b/test/syscalls/linux/socket_inet_loopback.cc @@ -2573,6 +2573,44 @@ TEST_P(SocketMultiProtocolInetLoopbackTest, V4EphemeralPortReservedReuseAddr) { SyscallSucceeds()); } +TEST_P(SocketMultiProtocolInetLoopbackTest, + MultipleBindsAllowedNoListeningReuseAddr) { + const auto& param = GetParam(); + // UDP sockets are allowed to bind/listen on the port w/ SO_REUSEADDR, for TCP + // this is only permitted if there is no other listening socket. + SKIP_IF(param.type != SOCK_STREAM); + // Bind the v4 loopback on a v4 socket. + const TestAddress& test_addr = V4Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + + ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + + // Now create a socket and bind it to the same port, this should + // succeed since there is no listening socket for the same port. + FileDescriptor second_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + + ASSERT_THAT(setsockopt(second_fd.get(), SOL_SOCKET, SO_REUSEADDR, &kSockOptOn, + sizeof(kSockOptOn)), + SyscallSucceeds()); + ASSERT_THAT(bind(second_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len), + SyscallSucceeds()); +} + TEST_P(SocketMultiProtocolInetLoopbackTest, PortReuseTwoSockets) { auto const& param = GetParam(); TestAddress const& test_addr = V4Loopback(); diff --git a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc index 791e2bd51..1a0b53394 100644 --- a/test/syscalls/linux/socket_inet_loopback_nogotsan.cc +++ b/test/syscalls/linux/socket_inet_loopback_nogotsan.cc @@ -168,6 +168,71 @@ INSTANTIATE_TEST_SUITE_P( TestParam{V6Loopback(), V6Loopback()}), DescribeTestParam); +struct ProtocolTestParam { + std::string description; + int type; +}; + +std::string DescribeProtocolTestParam( + ::testing::TestParamInfo<ProtocolTestParam> const& info) { + return info.param.description; +} + +using SocketMultiProtocolInetLoopbackTest = + ::testing::TestWithParam<ProtocolTestParam>; + +TEST_P(SocketMultiProtocolInetLoopbackTest, + BindAvoidsListeningPortsReuseAddr_NoRandomSave) { + const auto& param = GetParam(); + // UDP sockets are allowed to bind/listen on the port w/ SO_REUSEADDR, for TCP + // this is only permitted if there is no other listening socket. + SKIP_IF(param.type != SOCK_STREAM); + + DisableSave ds; // Too many syscalls. + + // A map of port to file descriptor binding the port. + std::map<uint16_t, FileDescriptor> listen_sockets; + + // Exhaust all ephemeral ports. + while (true) { + // Bind the v4 loopback on a v4 socket. + TestAddress const& test_addr = V4Loopback(); + sockaddr_storage bound_addr = test_addr.addr; + FileDescriptor bound_fd = + ASSERT_NO_ERRNO_AND_VALUE(Socket(test_addr.family(), param.type, 0)); + + ASSERT_THAT(setsockopt(bound_fd.get(), SOL_SOCKET, SO_REUSEADDR, + &kSockOptOn, sizeof(kSockOptOn)), + SyscallSucceeds()); + + int ret = bind(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + test_addr.addr_len); + if (ret != 0) { + ASSERT_EQ(errno, EADDRINUSE); + break; + } + // Get the port that we bound. + socklen_t bound_addr_len = test_addr.addr_len; + ASSERT_THAT( + getsockname(bound_fd.get(), reinterpret_cast<sockaddr*>(&bound_addr), + &bound_addr_len), + SyscallSucceeds()); + uint16_t port = reinterpret_cast<sockaddr_in*>(&bound_addr)->sin_port; + + // Newly bound port should not already be in use by a listening socket. + ASSERT_EQ(listen_sockets.find(port), listen_sockets.end()); + auto fd = bound_fd.get(); + listen_sockets.insert(std::make_pair(port, std::move(bound_fd))); + ASSERT_THAT(listen(fd, SOMAXCONN), SyscallSucceeds()); + } +} + +INSTANTIATE_TEST_SUITE_P( + AllFamilies, SocketMultiProtocolInetLoopbackTest, + ::testing::Values(ProtocolTestParam{"TCP", SOCK_STREAM}, + ProtocolTestParam{"UDP", SOCK_DGRAM}), + DescribeProtocolTestParam); + } // namespace } // namespace testing diff --git a/test/syscalls/linux/socket_ipv4_udp_unbound.cc b/test/syscalls/linux/socket_ipv4_udp_unbound.cc index bc005e2bb..cdc9c2266 100644 --- a/test/syscalls/linux/socket_ipv4_udp_unbound.cc +++ b/test/syscalls/linux/socket_ipv4_udp_unbound.cc @@ -2121,7 +2121,7 @@ TEST_P(IPv4UDPUnboundSocketTest, ReuseAddrReusePortDistribution) { SyscallSucceedsWithValue(kMessageSize)); } -// Check that connect returns EADDRNOTAVAIL when out of local ephemeral ports. +// Check that connect returns EAGAIN when out of local ephemeral ports. // We disable S/R because this test creates a large number of sockets. TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) { auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); @@ -2154,6 +2154,29 @@ TEST_P(IPv4UDPUnboundSocketTest, UDPConnectPortExhaustion_NoRandomSave) { } } +// Check that bind returns EADDRINUSE when out of local ephemeral ports. +// We disable S/R because this test creates a large number of sockets. +TEST_P(IPv4UDPUnboundSocketTest, UDPBindPortExhaustion_NoRandomSave) { + auto receiver1 = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + constexpr int kClients = 65536; + auto addr = V4Loopback(); + // Disable cooperative S/R as we are making too many syscalls. + DisableSave ds; + std::vector<std::unique_ptr<FileDescriptor>> sockets; + for (int i = 0; i < kClients; i++) { + auto s = ASSERT_NO_ERRNO_AND_VALUE(NewSocket()); + + int ret = + bind(s->get(), reinterpret_cast<sockaddr*>(&addr.addr), addr.addr_len); + if (ret == 0) { + sockets.push_back(std::move(s)); + continue; + } + ASSERT_THAT(ret, SyscallFailsWithErrno(EADDRINUSE)); + break; + } +} + // Test that socket will receive packet info control message. TEST_P(IPv4UDPUnboundSocketTest, SetAndReceiveIPPKTINFO) { // TODO(gvisor.dev/issue/1202): ioctl() is not supported by hostinet. |