summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2021-06-21 17:16:19 -0700
committergVisor bot <gvisor-bot@google.com>2021-06-21 17:19:05 -0700
commit1e472a85729b723c0d737d5e1c68c875a158d6a6 (patch)
tree829ebda17a831383dacedbd0ff679a738aa4ebe5
parent298cf30627afad13c245232e8e9f270b257e736f (diff)
Improve stopped container handling
Getting state of a stopped container would fail and could lead containerd to not detecting that the container had actually stopped. Now stopped and deleted containers return `stopped` state. Also makes other messages more consistent when container is stopped. Some where still sending messages to runsc and failing in different ways. Now they go through `initState` state machine like the other messages. There are a few changes to improve debugability with it as well. Fixes #5861 PiperOrigin-RevId: 380698513
-rw-r--r--pkg/shim/BUILD1
-rw-r--r--pkg/shim/debug.go48
-rw-r--r--pkg/shim/proc/deleted_state.go30
-rw-r--r--pkg/shim/proc/exec.go17
-rw-r--r--pkg/shim/proc/exec_state.go72
-rw-r--r--pkg/shim/proc/init.go84
-rw-r--r--pkg/shim/proc/init_state.go174
-rw-r--r--pkg/shim/proc/proc.go18
-rw-r--r--pkg/shim/runsc/runsc.go87
-rw-r--r--pkg/shim/service.go45
10 files changed, 344 insertions, 232 deletions
diff --git a/pkg/shim/BUILD b/pkg/shim/BUILD
index fd6127b97..367765209 100644
--- a/pkg/shim/BUILD
+++ b/pkg/shim/BUILD
@@ -6,6 +6,7 @@ go_library(
name = "shim",
srcs = [
"api.go",
+ "debug.go",
"epoll.go",
"options.go",
"service.go",
diff --git a/pkg/shim/debug.go b/pkg/shim/debug.go
new file mode 100644
index 000000000..49f01990e
--- /dev/null
+++ b/pkg/shim/debug.go
@@ -0,0 +1,48 @@
+// Copyright 2021 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package shim
+
+import (
+ "os"
+ "os/signal"
+ "runtime"
+ "sync"
+ "syscall"
+
+ "github.com/containerd/containerd/log"
+)
+
+var once sync.Once
+
+func setDebugSigHandler() {
+ once.Do(func() {
+ dumpCh := make(chan os.Signal, 1)
+ signal.Notify(dumpCh, syscall.SIGUSR2)
+ go func() {
+ buf := make([]byte, 10240)
+ for range dumpCh {
+ for {
+ n := runtime.Stack(buf, true)
+ if n >= len(buf) {
+ buf = make([]byte, 2*len(buf))
+ continue
+ }
+ log.L.Debugf("User requested stack trace:\n%s", buf[:n])
+ }
+ }
+ }()
+ log.L.Debugf("For full process dump run: kill -%d %d", syscall.SIGUSR2, os.Getpid())
+ })
+}
diff --git a/pkg/shim/proc/deleted_state.go b/pkg/shim/proc/deleted_state.go
index d9b970c4d..b0bbe4d7e 100644
--- a/pkg/shim/proc/deleted_state.go
+++ b/pkg/shim/proc/deleted_state.go
@@ -22,28 +22,38 @@ import (
"github.com/containerd/console"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/pkg/process"
+ runc "github.com/containerd/go-runc"
)
type deletedState struct{}
-func (*deletedState) Resize(ws console.WinSize) error {
- return fmt.Errorf("cannot resize a deleted process.ss")
+func (*deletedState) Resize(console.WinSize) error {
+ return fmt.Errorf("cannot resize a deleted container/process")
}
-func (*deletedState) Start(ctx context.Context) error {
- return fmt.Errorf("cannot start a deleted process.ss")
+func (*deletedState) Start(context.Context) error {
+ return fmt.Errorf("cannot start a deleted container/process")
}
-func (*deletedState) Delete(ctx context.Context) error {
- return fmt.Errorf("cannot delete a deleted process.ss: %w", errdefs.ErrNotFound)
+func (*deletedState) Delete(context.Context) error {
+ return fmt.Errorf("cannot delete a deleted container/process: %w", errdefs.ErrNotFound)
}
-func (*deletedState) Kill(ctx context.Context, sig uint32, all bool) error {
- return fmt.Errorf("cannot kill a deleted process.ss: %w", errdefs.ErrNotFound)
+func (*deletedState) Kill(_ context.Context, signal uint32, _ bool) error {
+ return handleStoppedKill(signal)
}
-func (*deletedState) SetExited(status int) {}
+func (*deletedState) SetExited(int) {}
-func (*deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) {
+func (*deletedState) Exec(context.Context, string, *ExecConfig) (process.Process, error) {
return nil, fmt.Errorf("cannot exec in a deleted state")
}
+
+func (s *deletedState) State(context.Context) (string, error) {
+ // There is no "deleted" state, closest one is stopped.
+ return "stopped", nil
+}
+
+func (s *deletedState) Stats(context.Context, string) (*runc.Stats, error) {
+ return nil, fmt.Errorf("cannot stat a stopped container/process")
+}
diff --git a/pkg/shim/proc/exec.go b/pkg/shim/proc/exec.go
index e7968d9d5..14df3a778 100644
--- a/pkg/shim/proc/exec.go
+++ b/pkg/shim/proc/exec.go
@@ -145,16 +145,13 @@ func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error {
func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error {
internalPid := e.internalPid
- if internalPid != 0 {
- if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{
- Pid: internalPid,
- }); err != nil {
- // If this returns error, consider the process has
- // already stopped.
- //
- // TODO: Fix after signal handling is fixed.
- return fmt.Errorf("%s: %w", err.Error(), errdefs.ErrNotFound)
- }
+ if internalPid == 0 {
+ return nil
+ }
+
+ opts := runsc.KillOpts{Pid: internalPid}
+ if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &opts); err != nil {
+ return fmt.Errorf("%s: %w", err.Error(), errdefs.ErrNotFound)
}
return nil
}
diff --git a/pkg/shim/proc/exec_state.go b/pkg/shim/proc/exec_state.go
index 4dcda8b44..04a5d19b4 100644
--- a/pkg/shim/proc/exec_state.go
+++ b/pkg/shim/proc/exec_state.go
@@ -34,18 +34,21 @@ type execCreatedState struct {
p *execProcess
}
-func (s *execCreatedState) transition(name string) error {
- switch name {
- case "running":
+func (s *execCreatedState) name() string {
+ return "created"
+}
+
+func (s *execCreatedState) transition(transition stateTransition) {
+ switch transition {
+ case running:
s.p.execState = &execRunningState{p: s.p}
- case "stopped":
+ case stopped:
s.p.execState = &execStoppedState{p: s.p}
- case "deleted":
+ case deleted:
s.p.execState = &deletedState{}
default:
- return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
+ panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition))
}
- return nil
}
func (s *execCreatedState) Resize(ws console.WinSize) error {
@@ -56,14 +59,16 @@ func (s *execCreatedState) Start(ctx context.Context) error {
if err := s.p.start(ctx); err != nil {
return err
}
- return s.transition("running")
+ s.transition(running)
+ return nil
}
func (s *execCreatedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
- return s.transition("deleted")
+ s.transition(deleted)
+ return nil
}
func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error {
@@ -72,35 +77,35 @@ func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error
func (s *execCreatedState) SetExited(status int) {
s.p.setExited(status)
-
- if err := s.transition("stopped"); err != nil {
- panic(err)
- }
+ s.transition(stopped)
}
type execRunningState struct {
p *execProcess
}
-func (s *execRunningState) transition(name string) error {
- switch name {
- case "stopped":
+func (s *execRunningState) name() string {
+ return "running"
+}
+
+func (s *execRunningState) transition(transition stateTransition) {
+ switch transition {
+ case stopped:
s.p.execState = &execStoppedState{p: s.p}
default:
- return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
+ panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition))
}
- return nil
}
func (s *execRunningState) Resize(ws console.WinSize) error {
return s.p.resize(ws)
}
-func (s *execRunningState) Start(ctx context.Context) error {
+func (s *execRunningState) Start(context.Context) error {
return fmt.Errorf("cannot start a running process")
}
-func (s *execRunningState) Delete(ctx context.Context) error {
+func (s *execRunningState) Delete(context.Context) error {
return fmt.Errorf("cannot delete a running process")
}
@@ -110,31 +115,31 @@ func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error
func (s *execRunningState) SetExited(status int) {
s.p.setExited(status)
-
- if err := s.transition("stopped"); err != nil {
- panic(err)
- }
+ s.transition(stopped)
}
type execStoppedState struct {
p *execProcess
}
-func (s *execStoppedState) transition(name string) error {
- switch name {
- case "deleted":
+func (s *execStoppedState) name() string {
+ return "stopped"
+}
+
+func (s *execStoppedState) transition(transition stateTransition) {
+ switch transition {
+ case deleted:
s.p.execState = &deletedState{}
default:
- return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
+ panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition))
}
- return nil
}
-func (s *execStoppedState) Resize(ws console.WinSize) error {
+func (s *execStoppedState) Resize(console.WinSize) error {
return fmt.Errorf("cannot resize a stopped container")
}
-func (s *execStoppedState) Start(ctx context.Context) error {
+func (s *execStoppedState) Start(context.Context) error {
return fmt.Errorf("cannot start a stopped process")
}
@@ -142,13 +147,14 @@ func (s *execStoppedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
- return s.transition("deleted")
+ s.transition(deleted)
+ return nil
}
func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
-func (s *execStoppedState) SetExited(status int) {
+func (s *execStoppedState) SetExited(int) {
// no op
}
diff --git a/pkg/shim/proc/init.go b/pkg/shim/proc/init.go
index 664465e0d..6bf090813 100644
--- a/pkg/shim/proc/init.go
+++ b/pkg/shim/proc/init.go
@@ -39,6 +39,8 @@ import (
"gvisor.dev/gvisor/pkg/shim/runsc"
)
+const statusStopped = "stopped"
+
// Init represents an initial process for a container.
type Init struct {
wg sync.WaitGroup
@@ -201,10 +203,15 @@ func (p *Init) ExitedAt() time.Time {
func (p *Init) Status(ctx context.Context) (string, error) {
p.mu.Lock()
defer p.mu.Unlock()
+
+ return p.initState.State(ctx)
+}
+
+func (p *Init) state(ctx context.Context) (string, error) {
c, err := p.runtime.State(ctx, p.id)
if err != nil {
if strings.Contains(err.Error(), "does not exist") {
- return "stopped", nil
+ return statusStopped, nil
}
return "", p.runtimeError(err, "OCI runtime state failed")
}
@@ -231,10 +238,7 @@ func (p *Init) start(ctx context.Context) error {
status, err := p.runtime.Wait(context.Background(), p.id)
if err != nil {
log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id)
- // TODO(random-liu): Handle runsc kill error.
- if err := p.killAll(ctx); err != nil {
- log.G(ctx).WithError(err).Errorf("Failed to kill container %q", p.id)
- }
+ p.killAllLocked(ctx)
status = internalErrorCode
}
ExitCh <- Exit{
@@ -255,6 +259,12 @@ func (p *Init) SetExited(status int) {
}
func (p *Init) setExited(status int) {
+ if !p.exited.IsZero() {
+ log.L.Debugf("Status already set to %d, ignoring status: %d", p.status, status)
+ return
+ }
+
+ log.L.Debugf("Setting status: %d", status)
p.exited = time.Now()
p.status = status
p.Platform.ShutdownConsole(context.Background(), p.console)
@@ -270,15 +280,16 @@ func (p *Init) Delete(ctx context.Context) error {
}
func (p *Init) delete(ctx context.Context) error {
- p.killAll(ctx)
+ p.killAllLocked(ctx)
p.wg.Wait()
+
err := p.runtime.Delete(ctx, p.id, nil)
- // ignore errors if a runtime has already deleted the process
- // but we still hold metadata and pipes
- //
- // this is common during a checkpoint, runc will delete the container state
- // after a checkpoint and the container will no longer exist within runc
if err != nil {
+ // ignore errors if a runtime has already deleted the process
+ // but we still hold metadata and pipes
+ //
+ // this is common during a checkpoint, runc will delete the container state
+ // after a checkpoint and the container will no longer exist within runc
if strings.Contains(err.Error(), "does not exist") {
err = nil
} else {
@@ -326,29 +337,24 @@ func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
return p.initState.Kill(ctx, signal, all)
}
-func (p *Init) kill(context context.Context, signal uint32, all bool) error {
+func (p *Init) kill(ctx context.Context, signal uint32, all bool) error {
var (
killErr error
backoff = 100 * time.Millisecond
)
- timeout := 1 * time.Second
- for start := time.Now(); time.Now().Sub(start) < timeout; {
- c, err := p.runtime.State(context, p.id)
+ const timeout = time.Second
+ for start := time.Now(); time.Since(start) < timeout; {
+ state, err := p.initState.State(ctx)
if err != nil {
- if strings.Contains(err.Error(), "does not exist") {
- return fmt.Errorf("no such process: %w", errdefs.ErrNotFound)
- }
return p.runtimeError(err, "OCI runtime state failed")
}
// For runsc, signal only works when container is running state.
// If the container is not in running state, directly return
// "no such process"
- if p.convertStatus(c.Status) == "stopped" {
+ if state == statusStopped {
return fmt.Errorf("no such process: %w", errdefs.ErrNotFound)
}
- killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{
- All: all,
- })
+ killErr = p.runtime.Kill(ctx, p.id, int(signal), &runsc.KillOpts{All: all})
if killErr == nil {
return nil
}
@@ -358,22 +364,18 @@ func (p *Init) kill(context context.Context, signal uint32, all bool) error {
return p.runtimeError(killErr, "kill timeout")
}
-// KillAll kills all processes belonging to the init process.
-func (p *Init) KillAll(context context.Context) error {
+// KillAll kills all processes belonging to the init process. If
+// `runsc kill --all` returns error, assume the container has already stopped.
+func (p *Init) KillAll(context context.Context) {
p.mu.Lock()
defer p.mu.Unlock()
- return p.killAll(context)
+ p.killAllLocked(context)
}
-func (p *Init) killAll(context context.Context) error {
- p.runtime.Kill(context, p.id, int(unix.SIGKILL), &runsc.KillOpts{
- All: true,
- })
- // Ignore error handling for `runsc kill --all` for now.
- // * If it doesn't return error, it is good;
- // * If it returns error, consider the container has already stopped.
- // TODO: Fix `runsc kill --all` error handling.
- return nil
+func (p *Init) killAllLocked(context context.Context) {
+ if err := p.runtime.Kill(context, p.id, int(unix.SIGKILL), &runsc.KillOpts{All: true}); err != nil {
+ log.L.Warningf("Ignoring error killing container %q: %v", p.id, err)
+ }
}
// Stdin returns the stdin of the process.
@@ -396,7 +398,6 @@ func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (process.Pr
// exec returns a new exec'd process.
func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) {
- // process exec request
var spec specs.Process
if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
return nil, err
@@ -420,6 +421,17 @@ func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) {
return e, nil
}
+func (p *Init) Stats(ctx context.Context, id string) (*runc.Stats, error) {
+ p.mu.Lock()
+ defer p.mu.Unlock()
+
+ return p.initState.Stats(ctx, id)
+}
+
+func (p *Init) stats(ctx context.Context, id string) (*runc.Stats, error) {
+ return p.Runtime().Stats(ctx, id)
+}
+
// Stdio returns the stdio of the process.
func (p *Init) Stdio() stdio.Stdio {
return p.stdio
@@ -444,7 +456,7 @@ func (p *Init) runtimeError(rErr error, msg string) error {
func (p *Init) convertStatus(status string) string {
if status == "created" && !p.Sandbox && p.status == internalErrorCode {
// Treat start failure state for non-root container as stopped.
- return "stopped"
+ return statusStopped
}
return status
}
diff --git a/pkg/shim/proc/init_state.go b/pkg/shim/proc/init_state.go
index 0065fc385..d65020e76 100644
--- a/pkg/shim/proc/init_state.go
+++ b/pkg/shim/proc/init_state.go
@@ -19,16 +19,39 @@ import (
"context"
"fmt"
- "github.com/containerd/console"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/pkg/process"
+ runc "github.com/containerd/go-runc"
+ "golang.org/x/sys/unix"
)
+type stateTransition int
+
+const (
+ running stateTransition = iota
+ stopped
+ deleted
+)
+
+func (s stateTransition) String() string {
+ switch s {
+ case running:
+ return "running"
+ case stopped:
+ return "stopped"
+ case deleted:
+ return "deleted"
+ default:
+ panic(fmt.Sprintf("unknown state: %d", s))
+ }
+}
+
type initState interface {
- Resize(console.WinSize) error
Start(context.Context) error
Delete(context.Context) error
Exec(context.Context, string, *ExecConfig) (process.Process, error)
+ State(ctx context.Context) (string, error)
+ Stats(context.Context, string) (*runc.Stats, error)
Kill(context.Context, uint32, bool) error
SetExited(int)
}
@@ -37,22 +60,21 @@ type createdState struct {
p *Init
}
-func (s *createdState) transition(name string) error {
- switch name {
- case "running":
+func (s *createdState) name() string {
+ return "created"
+}
+
+func (s *createdState) transition(transition stateTransition) {
+ switch transition {
+ case running:
s.p.initState = &runningState{p: s.p}
- case "stopped":
- s.p.initState = &stoppedState{p: s.p}
- case "deleted":
+ case stopped:
+ s.p.initState = &stoppedState{process: s.p}
+ case deleted:
s.p.initState = &deletedState{}
default:
- return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
+ panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition))
}
- return nil
-}
-
-func (s *createdState) Resize(ws console.WinSize) error {
- return s.p.resize(ws)
}
func (s *createdState) Start(ctx context.Context) error {
@@ -66,20 +88,20 @@ func (s *createdState) Start(ctx context.Context) error {
if !s.p.Sandbox {
s.p.io.Close()
s.p.setExited(internalErrorCode)
- if err := s.transition("stopped"); err != nil {
- panic(err)
- }
+ s.transition(stopped)
}
return err
}
- return s.transition("running")
+ s.transition(running)
+ return nil
}
func (s *createdState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
- return s.transition("deleted")
+ s.transition(deleted)
+ return nil
}
func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error {
@@ -88,40 +110,48 @@ func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error {
func (s *createdState) SetExited(status int) {
s.p.setExited(status)
-
- if err := s.transition("stopped"); err != nil {
- panic(err)
- }
+ s.transition(stopped)
}
func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) {
return s.p.exec(path, r)
}
+func (s *createdState) State(ctx context.Context) (string, error) {
+ state, err := s.p.state(ctx)
+ if err == nil && state == statusStopped {
+ s.transition(stopped)
+ }
+ return state, err
+}
+
+func (s *createdState) Stats(ctx context.Context, id string) (*runc.Stats, error) {
+ return s.p.stats(ctx, id)
+}
+
type runningState struct {
p *Init
}
-func (s *runningState) transition(name string) error {
- switch name {
- case "stopped":
- s.p.initState = &stoppedState{p: s.p}
- default:
- return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
- }
- return nil
+func (s *runningState) name() string {
+ return "running"
}
-func (s *runningState) Resize(ws console.WinSize) error {
- return s.p.resize(ws)
+func (s *runningState) transition(transition stateTransition) {
+ switch transition {
+ case stopped:
+ s.p.initState = &stoppedState{process: s.p}
+ default:
+ panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition))
+ }
}
func (s *runningState) Start(ctx context.Context) error {
- return fmt.Errorf("cannot start a running process.ss")
+ return fmt.Errorf("cannot start a running container")
}
func (s *runningState) Delete(ctx context.Context) error {
- return fmt.Errorf("cannot delete a running process.ss")
+ return fmt.Errorf("cannot delete a running container")
}
func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error {
@@ -130,53 +160,81 @@ func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error {
func (s *runningState) SetExited(status int) {
s.p.setExited(status)
+ s.transition(stopped)
+}
+
+func (s *runningState) Exec(_ context.Context, path string, r *ExecConfig) (process.Process, error) {
+ return s.p.exec(path, r)
+}
- if err := s.transition("stopped"); err != nil {
- panic(err)
+func (s *runningState) State(ctx context.Context) (string, error) {
+ state, err := s.p.state(ctx)
+ if err == nil && state == "stopped" {
+ s.transition(stopped)
}
+ return state, err
}
-func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) {
- return s.p.exec(path, r)
+func (s *runningState) Stats(ctx context.Context, id string) (*runc.Stats, error) {
+ return s.p.stats(ctx, id)
}
type stoppedState struct {
- p *Init
+ process *Init
}
-func (s *stoppedState) transition(name string) error {
- switch name {
- case "deleted":
- s.p.initState = &deletedState{}
- default:
- return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
- }
- return nil
+func (s *stoppedState) name() string {
+ return "stopped"
}
-func (s *stoppedState) Resize(ws console.WinSize) error {
- return fmt.Errorf("cannot resize a stopped container")
+func (s *stoppedState) transition(transition stateTransition) {
+ switch transition {
+ case deleted:
+ s.process.initState = &deletedState{}
+ default:
+ panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition))
+ }
}
-func (s *stoppedState) Start(ctx context.Context) error {
- return fmt.Errorf("cannot start a stopped process.ss")
+func (s *stoppedState) Start(context.Context) error {
+ return fmt.Errorf("cannot start a stopped container")
}
func (s *stoppedState) Delete(ctx context.Context) error {
- if err := s.p.delete(ctx); err != nil {
+ if err := s.process.delete(ctx); err != nil {
return err
}
- return s.transition("deleted")
+ s.transition(deleted)
+ return nil
}
-func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
- return errdefs.ToGRPCf(errdefs.ErrNotFound, "process.ss %s not found", s.p.id)
+func (s *stoppedState) Kill(_ context.Context, signal uint32, _ bool) error {
+ return handleStoppedKill(signal)
}
func (s *stoppedState) SetExited(status int) {
- // no op
+ s.process.setExited(status)
}
-func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) {
+func (s *stoppedState) Exec(context.Context, string, *ExecConfig) (process.Process, error) {
return nil, fmt.Errorf("cannot exec in a stopped state")
}
+
+func (s *stoppedState) State(context.Context) (string, error) {
+ return "stopped", nil
+}
+
+func (s *stoppedState) Stats(context.Context, string) (*runc.Stats, error) {
+ return nil, fmt.Errorf("cannot stat a stopped container")
+}
+
+func handleStoppedKill(signal uint32) error {
+ switch unix.Signal(signal) {
+ case unix.SIGTERM, unix.SIGKILL:
+ // Container is already stopped, so everything inside the container has
+ // already been killed.
+ return nil
+ default:
+ return errdefs.ToGRPCf(errdefs.ErrNotFound, "process not found")
+ }
+}
diff --git a/pkg/shim/proc/proc.go b/pkg/shim/proc/proc.go
index edba3fca5..89ad3f505 100644
--- a/pkg/shim/proc/proc.go
+++ b/pkg/shim/proc/proc.go
@@ -17,23 +17,5 @@
// the sandbox process running the container.
package proc
-import (
- "fmt"
-)
-
// RunscRoot is the path to the root runsc state directory.
const RunscRoot = "/run/containerd/runsc"
-
-func stateName(v interface{}) string {
- switch v.(type) {
- case *runningState, *execRunningState:
- return "running"
- case *createdState, *execCreatedState:
- return "created"
- case *deletedState:
- return "deleted"
- case *stoppedState:
- return "stopped"
- }
- panic(fmt.Errorf("invalid state %v", v))
-}
diff --git a/pkg/shim/runsc/runsc.go b/pkg/shim/runsc/runsc.go
index ff0521d73..888cb0bcb 100644
--- a/pkg/shim/runsc/runsc.go
+++ b/pkg/shim/runsc/runsc.go
@@ -17,6 +17,7 @@
package runsc
import (
+ "bytes"
"context"
"encoding/json"
"fmt"
@@ -73,9 +74,9 @@ type Runsc struct {
// List returns all containers created inside the provided runsc root directory.
func (r *Runsc) List(context context.Context) ([]*runc.Container, error) {
- data, err := cmdOutput(r.command(context, "list", "--format=json"), false)
+ data, stderr, err := cmdOutput(r.command(context, "list", "--format=json"), false)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("%w: %s", err, stderr)
}
var out []*runc.Container
if err := json.Unmarshal(data, &out); err != nil {
@@ -86,9 +87,9 @@ func (r *Runsc) List(context context.Context) ([]*runc.Container, error) {
// State returns the state for the container provided by id.
func (r *Runsc) State(context context.Context, id string) (*runc.Container, error) {
- data, err := cmdOutput(r.command(context, "state", id), true)
+ data, stderr, err := cmdOutput(r.command(context, "state", id), false)
if err != nil {
- return nil, fmt.Errorf("%s: %s", err, data)
+ return nil, fmt.Errorf("%w: %s", err, stderr)
}
var c runc.Container
if err := json.Unmarshal(data, &c); err != nil {
@@ -142,9 +143,9 @@ func (r *Runsc) Create(context context.Context, id, bundle string, opts *CreateO
}
if cmd.Stdout == nil && cmd.Stderr == nil {
- data, err := cmdOutput(cmd, true)
+ out, _, err := cmdOutput(cmd, true)
if err != nil {
- return fmt.Errorf("%s: %s", err, data)
+ return fmt.Errorf("%w: %s", err, out)
}
return nil
}
@@ -168,15 +169,15 @@ func (r *Runsc) Create(context context.Context, id, bundle string, opts *CreateO
}
func (r *Runsc) Pause(context context.Context, id string) error {
- if _, err := cmdOutput(r.command(context, "pause", id), true); err != nil {
- return fmt.Errorf("unable to pause: %w", err)
+ if out, _, err := cmdOutput(r.command(context, "pause", id), true); err != nil {
+ return fmt.Errorf("unable to pause: %w: %s", err, out)
}
return nil
}
func (r *Runsc) Resume(context context.Context, id string) error {
- if _, err := cmdOutput(r.command(context, "resume", id), true); err != nil {
- return fmt.Errorf("unable to resume: %w", err)
+ if out, _, err := cmdOutput(r.command(context, "resume", id), true); err != nil {
+ return fmt.Errorf("unable to resume: %w: %s", err, out)
}
return nil
}
@@ -189,9 +190,9 @@ func (r *Runsc) Start(context context.Context, id string, cio runc.IO) error {
}
if cmd.Stdout == nil && cmd.Stderr == nil {
- data, err := cmdOutput(cmd, true)
+ out, _, err := cmdOutput(cmd, true)
if err != nil {
- return fmt.Errorf("%s: %s", err, data)
+ return fmt.Errorf("%w: %s", err, out)
}
return nil
}
@@ -221,12 +222,10 @@ type waitResult struct {
}
// Wait will wait for a running container, and return its exit status.
-//
-// TODO(random-liu): Add exec process support.
func (r *Runsc) Wait(context context.Context, id string) (int, error) {
- data, err := cmdOutput(r.command(context, "wait", id), true)
+ data, stderr, err := cmdOutput(r.command(context, "wait", id), false)
if err != nil {
- return 0, fmt.Errorf("%s: %s", err, data)
+ return 0, fmt.Errorf("%w: %s", err, stderr)
}
var res waitResult
if err := json.Unmarshal(data, &res); err != nil {
@@ -294,9 +293,9 @@ func (r *Runsc) Exec(context context.Context, id string, spec specs.Process, opt
opts.Set(cmd)
}
if cmd.Stdout == nil && cmd.Stderr == nil {
- data, err := cmdOutput(cmd, true)
+ out, _, err := cmdOutput(cmd, true)
if err != nil {
- return fmt.Errorf("%s: %s", err, data)
+ return fmt.Errorf("%w: %s", err, out)
}
return nil
}
@@ -391,20 +390,12 @@ func (r *Runsc) Kill(context context.Context, id string, sig int, opts *KillOpts
// Stats return the stats for a container like cpu, memory, and I/O.
func (r *Runsc) Stats(context context.Context, id string) (*runc.Stats, error) {
cmd := r.command(context, "events", "--stats", id)
- rd, err := cmd.StdoutPipe()
- if err != nil {
- return nil, err
- }
- ec, err := Monitor.Start(cmd)
+ data, stderr, err := cmdOutput(cmd, false)
if err != nil {
- return nil, err
+ return nil, fmt.Errorf("%w: %s", err, stderr)
}
- defer func() {
- rd.Close()
- Monitor.Wait(cmd, ec)
- }()
var e runc.Event
- if err := json.NewDecoder(rd).Decode(&e); err != nil {
+ if err := json.Unmarshal(data, &e); err != nil {
log.L.Debugf("Parsing events error: %v", err)
return nil, err
}
@@ -459,9 +450,9 @@ func (r *Runsc) Events(context context.Context, id string, interval time.Duratio
// Ps lists all the processes inside the container returning their pids.
func (r *Runsc) Ps(context context.Context, id string) ([]int, error) {
- data, err := cmdOutput(r.command(context, "ps", "--format", "json", id), true)
+ data, stderr, err := cmdOutput(r.command(context, "ps", "--format", "json", id), false)
if err != nil {
- return nil, fmt.Errorf("%s: %s", err, data)
+ return nil, fmt.Errorf("%w: %s", err, stderr)
}
var pids []int
if err := json.Unmarshal(data, &pids); err != nil {
@@ -472,9 +463,9 @@ func (r *Runsc) Ps(context context.Context, id string) ([]int, error) {
// Top lists all the processes inside the container returning the full ps data.
func (r *Runsc) Top(context context.Context, id string) (*runc.TopResults, error) {
- data, err := cmdOutput(r.command(context, "ps", "--format", "table", id), true)
+ data, stderr, err := cmdOutput(r.command(context, "ps", "--format", "table", id), false)
if err != nil {
- return nil, fmt.Errorf("%s: %s", err, data)
+ return nil, fmt.Errorf("%w: %s", err, stderr)
}
topResults, err := runc.ParsePSOutput(data)
@@ -517,9 +508,9 @@ func (r *Runsc) runOrError(cmd *exec.Cmd) error {
}
return err
}
- data, err := cmdOutput(cmd, true)
+ out, _, err := cmdOutput(cmd, true)
if err != nil {
- return fmt.Errorf("%s: %s", err, data)
+ return fmt.Errorf("%w: %s", err, out)
}
return nil
}
@@ -540,23 +531,29 @@ func (r *Runsc) command(context context.Context, args ...string) *exec.Cmd {
return cmd
}
-func cmdOutput(cmd *exec.Cmd, combined bool) ([]byte, error) {
- b := getBuf()
- defer putBuf(b)
+func cmdOutput(cmd *exec.Cmd, combined bool) ([]byte, []byte, error) {
+ stdout := getBuf()
+ defer putBuf(stdout)
+ cmd.Stdout = stdout
+ cmd.Stderr = stdout
- cmd.Stdout = b
- if combined {
- cmd.Stderr = b
+ var stderr *bytes.Buffer
+ if !combined {
+ stderr = getBuf()
+ defer putBuf(stderr)
+ cmd.Stderr = stderr
}
ec, err := Monitor.Start(cmd)
if err != nil {
- return nil, err
+ return nil, nil, err
}
status, err := Monitor.Wait(cmd, ec)
if err == nil && status != 0 {
- err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0])
+ err = fmt.Errorf("%q did not terminate sucessfully", cmd.Args[0])
}
-
- return b.Bytes(), err
+ if stderr == nil {
+ return stdout.Bytes(), nil, err
+ }
+ return stdout.Bytes(), stderr.Bytes(), err
}
diff --git a/pkg/shim/service.go b/pkg/shim/service.go
index 1f9adcb65..ea9a1ae10 100644
--- a/pkg/shim/service.go
+++ b/pkg/shim/service.go
@@ -81,8 +81,6 @@ const (
// New returns a new shim service that can be used via GRPC.
func New(ctx context.Context, id string, publisher shim.Publisher, cancel func()) (shim.Shim, error) {
- log.L.Debugf("service.New, id: %s", id)
-
var opts shim.Opts
if ctxOpts := ctx.Value(shim.OptsKey{}); ctxOpts != nil {
opts = ctxOpts.(shim.Opts)
@@ -304,8 +302,6 @@ func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error)
// Create creates a new initial process and container with the underlying OCI
// runtime.
func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) {
- log.L.Debugf("Create, id: %s, bundle: %q", r.ID, r.Bundle)
-
s.mu.Lock()
defer s.mu.Unlock()
@@ -396,6 +392,9 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*ta
log.L.Debugf("stdout: %s", r.Stdout)
log.L.Debugf("stderr: %s", r.Stderr)
log.L.Debugf("***************************")
+ if log.L.Logger.IsLevelEnabled(logrus.DebugLevel) {
+ setDebugSigHandler()
+ }
}
// Save state before any action is taken to ensure Cleanup() will have all
@@ -506,9 +505,6 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAP
if err != nil {
return nil, err
}
- if p == nil {
- return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
- }
if err := p.Delete(ctx); err != nil {
return nil, err
}
@@ -580,10 +576,12 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.
p, err := s.getProcess(r.ExecID)
if err != nil {
+ log.L.Debugf("State failed to find process: %v", err)
return nil, err
}
st, err := p.Status(ctx)
if err != nil {
+ log.L.Debugf("State failed: %v", err)
return nil, err
}
status := task.StatusUnknown
@@ -596,7 +594,7 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.
status = task.StatusStopped
}
sio := p.Stdio()
- return &taskAPI.StateResponse{
+ res := &taskAPI.StateResponse{
ID: p.ID(),
Bundle: s.bundle,
Pid: uint32(p.Pid()),
@@ -607,7 +605,9 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.
Terminal: sio.Terminal,
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
- }, nil
+ }
+ log.L.Debugf("State succeeded, response: %+v", res)
+ return res, nil
}
// Pause the container.
@@ -646,12 +646,11 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*types.Empt
if err != nil {
return nil, err
}
- if p == nil {
- return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
- }
if err := p.Kill(ctx, r.Signal, r.All); err != nil {
+ log.L.Debugf("Kill failed: %v", err)
return nil, errdefs.ToGRPC(err)
}
+ log.L.Debugf("Kill succeeded")
return empty, nil
}
@@ -740,7 +739,7 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.
log.L.Debugf("Stats error, id: %s: container not created", r.ID)
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
- stats, err := s.task.Runtime().Stats(ctx, s.id)
+ stats, err := s.task.Stats(ctx, s.id)
if err != nil {
log.L.Debugf("Stats error, id: %s: %v", r.ID, err)
return nil, err
@@ -821,17 +820,17 @@ func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.Wa
p, err := s.getProcess(r.ExecID)
if err != nil {
+ log.L.Debugf("Wait failed to find process: %v", err)
return nil, err
}
- if p == nil {
- return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
- }
p.Wait()
- return &taskAPI.WaitResponse{
+ res := &taskAPI.WaitResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
- }, nil
+ }
+ log.L.Debugf("Wait succeeded, response: %+v", res)
+ return res, nil
}
func (s *service) processExits(ctx context.Context) {
@@ -848,10 +847,7 @@ func (s *service) checkProcesses(ctx context.Context, e proc.Exit) {
if ip, ok := p.(*proc.Init); ok {
// Ensure all children are killed.
log.L.Debugf("Container init process exited, killing all container processes")
- if err := ip.KillAll(ctx); err != nil {
- log.G(ctx).WithError(err).WithField("id", ip.ID()).
- Error("failed to kill init's children")
- }
+ ip.KillAll(ctx)
}
p.SetExited(e.Status)
s.events <- &events.TaskExit{
@@ -909,9 +905,14 @@ func (s *service) forward(ctx context.Context, publisher shim.Publisher) {
func (s *service) getProcess(execID string) (process.Process, error) {
s.mu.Lock()
defer s.mu.Unlock()
+
if execID == "" {
+ if s.task == nil {
+ return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
+ }
return s.task, nil
}
+
p := s.processes[execID]
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID)