From 13a98df49ea1b36cd21c528293b626a6a3639f0b Mon Sep 17 00:00:00 2001 From: Rahat Mahmood Date: Thu, 8 Aug 2019 12:32:00 -0700 Subject: netstack: Don't start endpoint goroutines too soon on restore. Endpoint protocol goroutines were previously started as part of loading the endpoint. This is potentially too soon, as resources used by these goroutine may not have been loaded. Protocol goroutines may perform meaningful work as soon as they're started (ex: incoming connect) which can cause them to indirectly access resources that haven't been loaded yet. This CL defers resuming all protocol goroutines until the end of restore. PiperOrigin-RevId: 262409429 --- pkg/tcpip/stack/stack.go | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'pkg/tcpip/stack/stack.go') diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go index 78beb0dae..d45e547ee 100644 --- a/pkg/tcpip/stack/stack.go +++ b/pkg/tcpip/stack/stack.go @@ -334,6 +334,15 @@ type TCPEndpointState struct { Sender TCPSenderState } +// ResumableEndpoint is an endpoint that needs to be resumed after restore. +type ResumableEndpoint interface { + // Resume resumes an endpoint after restore. This can be used to restart + // background workers such as protocol goroutines. This must be called after + // all indirect dependencies of the endpoint has been restored, which + // generally implies at the end of the restore process. + Resume(*Stack) +} + // Stack is a networking stack, with all supported protocols, NICs, and route // table. type Stack struct { @@ -376,6 +385,10 @@ type Stack struct { // tables are the iptables packet filtering and manipulation rules. tables iptables.IPTables + + // resumableEndpoints is a list of endpoints that need to be resumed if the + // stack is being restored. + resumableEndpoints []ResumableEndpoint } // Options contains optional Stack configuration. @@ -1091,6 +1104,28 @@ func (s *Stack) UnregisterRawTransportEndpoint(nicID tcpip.NICID, netProto tcpip } } +// RegisterRestoredEndpoint records e as an endpoint that has been restored on +// this stack. +func (s *Stack) RegisterRestoredEndpoint(e ResumableEndpoint) { + s.mu.Lock() + s.resumableEndpoints = append(s.resumableEndpoints, e) + s.mu.Unlock() +} + +// Resume restarts the stack after a restore. This must be called after the +// entire system has been restored. +func (s *Stack) Resume() { + // ResumableEndpoint.Resume() may call other methods on s, so we can't hold + // s.mu while resuming the endpoints. + s.mu.Lock() + eps := s.resumableEndpoints + s.resumableEndpoints = nil + s.mu.Unlock() + for _, e := range eps { + e.Resume(s) + } +} + // NetworkProtocolInstance returns the protocol instance in the stack for the // specified network protocol. This method is public for protocol implementers // and tests to use. -- cgit v1.2.3