summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJamie Liu <jamieliu@google.com>2020-09-03 15:21:10 -0700
committergVisor bot <gvisor-bot@google.com>2020-09-03 15:24:48 -0700
commit76e51c8b9add2e8dd7538a9ccd126409c31f7cc0 (patch)
tree3b9ec84401f7bf227b9327546803d953ee22d6fb
parent30c20df76f969699619bc0819b6e54c9d6bcfb5a (diff)
Use atomic.Value for Stack.tcpProbeFunc.
b/166980357#comment56 shows: - 837 goroutines blocked in: gvisor/pkg/sync/sync.(*RWMutex).Lock gvisor/pkg/tcpip/stack/stack.(*Stack).StartTransportEndpointCleanup gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).cleanupLocked gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).completeWorkerLocked gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).protocolMainLoop.func1 gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).protocolMainLoop - 695 goroutines blocked in: gvisor/pkg/sync/sync.(*RWMutex).Lock gvisor/pkg/tcpip/stack/stack.(*Stack).CompleteTransportEndpointCleanup gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).cleanupLocked gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).completeWorkerLocked gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).protocolMainLoop.func1 gvisor/pkg/tcpip/transport/tcp/tcp.(*endpoint).protocolMainLoop - 3882 goroutines blocked in: gvisor/pkg/sync/sync.(*RWMutex).Lock gvisor/pkg/tcpip/stack/stack.(*Stack).GetTCPProbe gvisor/pkg/tcpip/transport/tcp/tcp.newEndpoint gvisor/pkg/tcpip/transport/tcp/tcp.(*protocol).NewEndpoint gvisor/pkg/tcpip/stack/stack.(*Stack).NewEndpoint All of these are contending on Stack.mu. Stack.StartTransportEndpointCleanup() and Stack.CompleteTransportEndpointCleanup() insert/delete TransportEndpoints in a map (Stack.cleanupEndpoints), and the former also does endpoint unregistration while holding Stack.mu, so it's not immediately clear how feasible it is to replace the map with a mutex-less implementation or how much doing so would help. However, Stack.GetTCPProbe() just reads a function object (Stack.tcpProbeFunc) that is almost always nil (as far as I can tell, Stack.AddTCPProbe() is only called in tests), and it's called for every new TCP endpoint. So converting it to an atomic.Value should significantly reduce contention on Stack.mu, improving TCP endpoint creation latency and allowing TCP endpoint cleanup to proceed. PiperOrigin-RevId: 330004140
-rw-r--r--pkg/tcpip/stack/stack.go20
1 files changed, 9 insertions, 11 deletions
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index c86ee1c13..66ce10357 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -429,7 +429,7 @@ type Stack struct {
// If not nil, then any new endpoints will have this probe function
// invoked everytime they receive a TCP segment.
- tcpProbeFunc TCPProbeFunc
+ tcpProbeFunc atomic.Value // TCPProbeFunc
// clock is used to generate user-visible times.
clock tcpip.Clock
@@ -1795,18 +1795,17 @@ func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) Tra
// guarantee provided on which probe will be invoked. Ideally this should only
// be called once per stack.
func (s *Stack) AddTCPProbe(probe TCPProbeFunc) {
- s.mu.Lock()
- s.tcpProbeFunc = probe
- s.mu.Unlock()
+ s.tcpProbeFunc.Store(probe)
}
// GetTCPProbe returns the TCPProbeFunc if installed with AddTCPProbe, nil
// otherwise.
func (s *Stack) GetTCPProbe() TCPProbeFunc {
- s.mu.Lock()
- p := s.tcpProbeFunc
- s.mu.Unlock()
- return p
+ p := s.tcpProbeFunc.Load()
+ if p == nil {
+ return nil
+ }
+ return p.(TCPProbeFunc)
}
// RemoveTCPProbe removes an installed TCP probe.
@@ -1815,9 +1814,8 @@ func (s *Stack) GetTCPProbe() TCPProbeFunc {
// have a probe attached. Endpoints already created will continue to invoke
// TCP probe.
func (s *Stack) RemoveTCPProbe() {
- s.mu.Lock()
- s.tcpProbeFunc = nil
- s.mu.Unlock()
+ // This must be TCPProbeFunc(nil) because atomic.Value.Store(nil) panics.
+ s.tcpProbeFunc.Store(TCPProbeFunc(nil))
}
// JoinGroup joins the given multicast group on the given NIC.