From d703340bc04a4269f420fdf24d946abcbc6a620b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 22 Jun 2021 10:47:37 -0700 Subject: runsc: don't kill sandbox, let it stop properly The typical sequence of calls to start a container looks like this ct, err := container.New(conf, containerArgs) defer ct.Destroy() ct.Start(conf) ws, err := ct.Wait() For the root container, ct.Destroy() kills the sandbox process. This doesn't look like a right wait to stop it. For example, all ongoing rpc calls are aborted in this case. If everything is going alright, we can just wait and it will exit itself. Reported-by: syzbot+084fca334720887441e7@syzkaller.appspotmail.com Signed-off-by: Andrei Vagin --- pkg/control/server/server.go | 5 +++-- pkg/urpc/urpc.go | 43 +++++++++++++++++++++++++++---------------- runsc/boot/controller.go | 5 +++-- runsc/boot/loader.go | 7 ++++++- runsc/sandbox/sandbox.go | 6 ++++++ 5 files changed, 45 insertions(+), 21 deletions(-) diff --git a/pkg/control/server/server.go b/pkg/control/server/server.go index 629dae8f4..eca06791c 100644 --- a/pkg/control/server/server.go +++ b/pkg/control/server/server.go @@ -21,6 +21,7 @@ implementations of the control interface. package server import ( + "context" "os" "gvisor.dev/gvisor/pkg/log" @@ -65,13 +66,13 @@ func (s *Server) Wait() { // Stop stops the server. Note that this function should only be called once // and the server should not be used afterwards. -func (s *Server) Stop() { +func (s *Server) Stop(ctx context.Context) { s.socket.Close() s.Wait() // This will cause existing clients to be terminated safely. If the // registered handlers have a Stop callback, it will be called. - s.server.Stop() + s.server.Stop(ctx) } // StartServing starts listening for connect and spawns the main service diff --git a/pkg/urpc/urpc.go b/pkg/urpc/urpc.go index 0e9a829f6..7872d6fa1 100644 --- a/pkg/urpc/urpc.go +++ b/pkg/urpc/urpc.go @@ -20,6 +20,7 @@ package urpc import ( "bytes" + "context" "encoding/json" "errors" "fmt" @@ -458,29 +459,39 @@ func (s *Server) StartHandling(client *unet.Socket) { // No new requests should be initiated after calling Stop. Existing clients // will be closed after completing any pending RPCs. This method will block // until all clients have disconnected. -func (s *Server) Stop() { - // Wait for all outstanding requests. - defer s.wg.Wait() +func (s *Server) Stop(ctx context.Context) { + done := make(chan bool) // Call any Stop callbacks. for _, stopper := range s.stoppers { stopper.Stop() } + go func() { + select { + case <-done: + return + case <-ctx.Done(): + } - // Close all known clients. - s.mu.Lock() - defer s.mu.Unlock() - for client, state := range s.clients { - switch state { - case idle: - // Close connection now. - client.Close() - s.clients[client] = closed - case processing: - // Request close when done. - s.clients[client] = closeRequested + // Close all known clients. + s.mu.Lock() + defer s.mu.Unlock() + for client, state := range s.clients { + switch state { + case idle: + // Close connection now. + client.Close() + s.clients[client] = closed + case processing: + // Request close when done. + s.clients[client] = closeRequested + } } - } + }() + // Wait for all outstanding requests. + s.wg.Wait() + done <- true + } // Client is a urpc client. diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go index d52cf5a00..b605aa40a 100644 --- a/runsc/boot/controller.go +++ b/runsc/boot/controller.go @@ -15,6 +15,7 @@ package boot import ( + "context" "errors" "fmt" "os" @@ -165,8 +166,8 @@ func newController(fd int, l *Loader) (*controller, error) { return ctrl, nil } -func (c *controller) stop() { - c.srv.Stop() +func (c *controller) stop(ctx context.Context) { + c.srv.Stop(ctx) } // containerManager manages sandbox containers. diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 8d71d7447..d8282d1d1 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -16,6 +16,7 @@ package boot import ( + gcontext "context" "errors" "fmt" mrand "math/rand" @@ -468,6 +469,8 @@ func createProcessArgs(id string, spec *specs.Spec, creds *auth.Credentials, k * return procArgs, nil } +const destroyTimeout = 15 * gtime.Second + // Destroy cleans up all resources used by the loader. // // Note that this will block until all open control server connections have @@ -482,7 +485,9 @@ func (l *Loader) Destroy() { // Stop the control server. This will indirectly stop any // long-running control operations that are in flight, e.g. // profiling operations. - l.ctrl.stop() + ctx, cancel := gcontext.WithTimeout(context.Background(), destroyTimeout) + defer cancel() + l.ctrl.stop(ctx) // Release all kernel resources. This is only safe after we can no longer // save/restore. diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 29e202b7d..f14cc7229 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -797,7 +797,13 @@ func (s *Sandbox) Wait(cid string) (unix.WaitStatus, error) { // Try the Wait RPC to the sandbox. var ws unix.WaitStatus err = conn.Call(boot.ContainerWait, &cid, &ws) + conn.Close() if err == nil { + if s.IsRootContainer(cid) { + if err := s.waitForStopped(); err != nil { + return unix.WaitStatus(0), err + } + } // It worked! return ws, nil } -- cgit v1.2.3