diff options
Diffstat (limited to 'pkg/shim')
-rw-r--r-- | pkg/shim/BUILD | 1 | ||||
-rw-r--r-- | pkg/shim/debug.go | 48 | ||||
-rw-r--r-- | pkg/shim/proc/BUILD | 2 | ||||
-rw-r--r-- | pkg/shim/proc/deleted_state.go | 30 | ||||
-rw-r--r-- | pkg/shim/proc/exec.go | 106 | ||||
-rw-r--r-- | pkg/shim/proc/exec_state.go | 84 | ||||
-rw-r--r-- | pkg/shim/proc/init.go | 84 | ||||
-rw-r--r-- | pkg/shim/proc/init_state.go | 175 | ||||
-rw-r--r-- | pkg/shim/proc/proc.go | 18 | ||||
-rw-r--r-- | pkg/shim/runsc/runsc.go | 87 | ||||
-rw-r--r-- | pkg/shim/service.go | 73 | ||||
-rw-r--r-- | pkg/shim/utils/BUILD | 14 | ||||
-rw-r--r-- | pkg/shim/utils/errors.go | 74 | ||||
-rw-r--r-- | pkg/shim/utils/errors_test.go | 50 |
14 files changed, 557 insertions, 289 deletions
diff --git a/pkg/shim/BUILD b/pkg/shim/BUILD index fd6127b97..367765209 100644 --- a/pkg/shim/BUILD +++ b/pkg/shim/BUILD @@ -6,6 +6,7 @@ go_library( name = "shim", srcs = [ "api.go", + "debug.go", "epoll.go", "options.go", "service.go", diff --git a/pkg/shim/debug.go b/pkg/shim/debug.go new file mode 100644 index 000000000..49f01990e --- /dev/null +++ b/pkg/shim/debug.go @@ -0,0 +1,48 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package shim + +import ( + "os" + "os/signal" + "runtime" + "sync" + "syscall" + + "github.com/containerd/containerd/log" +) + +var once sync.Once + +func setDebugSigHandler() { + once.Do(func() { + dumpCh := make(chan os.Signal, 1) + signal.Notify(dumpCh, syscall.SIGUSR2) + go func() { + buf := make([]byte, 10240) + for range dumpCh { + for { + n := runtime.Stack(buf, true) + if n >= len(buf) { + buf = make([]byte, 2*len(buf)) + continue + } + log.L.Debugf("User requested stack trace:\n%s", buf[:n]) + } + } + }() + log.L.Debugf("For full process dump run: kill -%d %d", syscall.SIGUSR2, os.Getpid()) + }) +} diff --git a/pkg/shim/proc/BUILD b/pkg/shim/proc/BUILD index 544bdc170..c8527a6d9 100644 --- a/pkg/shim/proc/BUILD +++ b/pkg/shim/proc/BUILD @@ -20,7 +20,9 @@ go_library( "//shim:__subpackages__", ], deps = [ + "//pkg/cleanup", "//pkg/shim/runsc", + "//pkg/shim/utils", "@com_github_containerd_console//:go_default_library", "@com_github_containerd_containerd//errdefs:go_default_library", "@com_github_containerd_containerd//log:go_default_library", diff --git a/pkg/shim/proc/deleted_state.go b/pkg/shim/proc/deleted_state.go index d9b970c4d..b0bbe4d7e 100644 --- a/pkg/shim/proc/deleted_state.go +++ b/pkg/shim/proc/deleted_state.go @@ -22,28 +22,38 @@ import ( "github.com/containerd/console" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/pkg/process" + runc "github.com/containerd/go-runc" ) type deletedState struct{} -func (*deletedState) Resize(ws console.WinSize) error { - return fmt.Errorf("cannot resize a deleted process.ss") +func (*deletedState) Resize(console.WinSize) error { + return fmt.Errorf("cannot resize a deleted container/process") } -func (*deletedState) Start(ctx context.Context) error { - return fmt.Errorf("cannot start a deleted process.ss") +func (*deletedState) Start(context.Context) error { + return fmt.Errorf("cannot start a deleted container/process") } -func (*deletedState) Delete(ctx context.Context) error { - return fmt.Errorf("cannot delete a deleted process.ss: %w", errdefs.ErrNotFound) +func (*deletedState) Delete(context.Context) error { + return fmt.Errorf("cannot delete a deleted container/process: %w", errdefs.ErrNotFound) } -func (*deletedState) Kill(ctx context.Context, sig uint32, all bool) error { - return fmt.Errorf("cannot kill a deleted process.ss: %w", errdefs.ErrNotFound) +func (*deletedState) Kill(_ context.Context, signal uint32, _ bool) error { + return handleStoppedKill(signal) } -func (*deletedState) SetExited(status int) {} +func (*deletedState) SetExited(int) {} -func (*deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { +func (*deletedState) Exec(context.Context, string, *ExecConfig) (process.Process, error) { return nil, fmt.Errorf("cannot exec in a deleted state") } + +func (s *deletedState) State(context.Context) (string, error) { + // There is no "deleted" state, closest one is stopped. + return "stopped", nil +} + +func (s *deletedState) Stats(context.Context, string) (*runc.Stats, error) { + return nil, fmt.Errorf("cannot stat a stopped container/process") +} diff --git a/pkg/shim/proc/exec.go b/pkg/shim/proc/exec.go index e7968d9d5..e0f2ae6fa 100644 --- a/pkg/shim/proc/exec.go +++ b/pkg/shim/proc/exec.go @@ -26,11 +26,13 @@ import ( "github.com/containerd/console" "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" "github.com/containerd/containerd/pkg/stdio" "github.com/containerd/fifo" runc "github.com/containerd/go-runc" specs "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/cleanup" "gvisor.dev/gvisor/pkg/shim/runsc" ) @@ -92,6 +94,12 @@ func (e *execProcess) SetExited(status int) { } func (e *execProcess) setExited(status int) { + if !e.exited.IsZero() { + log.L.Debugf("Exec: status already set to %d, ignoring status: %d", e.status, status) + return + } + + log.L.Debugf("Exec: setting status: %d", status) e.status = status e.exited = time.Now() e.parent.Platform.ShutdownConsole(context.Background(), e.console) @@ -105,7 +113,7 @@ func (e *execProcess) Delete(ctx context.Context) error { return e.execState.Delete(ctx) } -func (e *execProcess) delete(ctx context.Context) error { +func (e *execProcess) delete() error { e.wg.Wait() if e.io != nil { for _, c := range e.closers { @@ -113,12 +121,6 @@ func (e *execProcess) delete(ctx context.Context) error { } e.io.Close() } - pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - // silently ignore error - os.Remove(pidfile) - internalPidfile := filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - // silently ignore error - os.Remove(internalPidfile) return nil } @@ -145,16 +147,13 @@ func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error { func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error { internalPid := e.internalPid - if internalPid != 0 { - if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{ - Pid: internalPid, - }); err != nil { - // If this returns error, consider the process has - // already stopped. - // - // TODO: Fix after signal handling is fixed. - return fmt.Errorf("%s: %w", err.Error(), errdefs.ErrNotFound) - } + if internalPid == 0 { + return nil + } + + opts := runsc.KillOpts{Pid: internalPid} + if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &opts); err != nil { + return fmt.Errorf("%s: %w", err.Error(), errdefs.ErrNotFound) } return nil } @@ -174,42 +173,53 @@ func (e *execProcess) Start(ctx context.Context) error { return e.execState.Start(ctx) } -func (e *execProcess) start(ctx context.Context) (err error) { - var ( - socket *runc.Socket - pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - internalPidfile = filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - ) - if e.stdio.Terminal { - if socket, err = runc.NewTempConsoleSocket(); err != nil { +func (e *execProcess) start(ctx context.Context) error { + var socket *runc.Socket + + switch { + case e.stdio.Terminal: + s, err := runc.NewTempConsoleSocket() + if err != nil { return fmt.Errorf("failed to create runc console socket: %w", err) } - defer socket.Close() - } else if e.stdio.IsNull() { - if e.io, err = runc.NewNullIO(); err != nil { + defer s.Close() + socket = s + + case e.stdio.IsNull(): + io, err := runc.NewNullIO() + if err != nil { return fmt.Errorf("creating new NULL IO: %w", err) } - } else { - if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil { + e.io = io + + default: + io, err := runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)) + if err != nil { return fmt.Errorf("failed to create runc io pipes: %w", err) } + e.io = io } + opts := &runsc.ExecOpts{ - PidFile: pidfile, - InternalPidFile: internalPidfile, + PidFile: filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)), + InternalPidFile: filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)), IO: e.io, Detach: true, } + defer func() { + _ = os.Remove(opts.PidFile) + _ = os.Remove(opts.InternalPidFile) + }() if socket != nil { opts.ConsoleSocket = socket } + eventCh := e.parent.Monitor.Subscribe() - defer func() { - // Unsubscribe if an error is returned. - if err != nil { - e.parent.Monitor.Unsubscribe(eventCh) - } - }() + cu := cleanup.Make(func() { + e.parent.Monitor.Unsubscribe(eventCh) + }) + defer cu.Clean() + if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil { close(e.waitBlock) return e.parent.runtimeError(err, "OCI runtime exec failed") @@ -237,6 +247,7 @@ func (e *execProcess) start(ctx context.Context) (err error) { return fmt.Errorf("failed to start io pipe copy: %w", err) } } + pid, err := runc.ReadPidFile(opts.PidFile) if err != nil { return fmt.Errorf("failed to retrieve OCI runtime exec pid: %w", err) @@ -247,6 +258,7 @@ func (e *execProcess) start(ctx context.Context) (err error) { return fmt.Errorf("failed to retrieve OCI runtime exec internal pid: %w", err) } e.internalPid = internalPid + go func() { defer e.parent.Monitor.Unsubscribe(eventCh) for event := range eventCh { @@ -260,21 +272,25 @@ func (e *execProcess) start(ctx context.Context) (err error) { } } }() + + cu.Release() // cancel cleanup on success. return nil } -func (e *execProcess) Status(ctx context.Context) (string, error) { +func (e *execProcess) Status(context.Context) (string, error) { e.mu.Lock() defer e.mu.Unlock() // if we don't have a pid then the exec process has just been created if e.pid == 0 { return "created", nil } - // if we have a pid and it can be signaled, the process is running - // TODO(random-liu): Use `runsc kill --pid`. - if err := unix.Kill(e.pid, 0); err == nil { - return "running", nil + // This checks that `runsc exec` process is still running. This process has + // the same lifetime as the process executing inside the container. So instead + // of calling `runsc kill --pid`, just do a quick check that `runsc exec` is + // still running. + if err := unix.Kill(e.pid, 0); err != nil { + // Can't signal the process, it must have exited. + return "stopped", nil } - // else if we have a pid but it can nolonger be signaled, it has stopped - return "stopped", nil + return "running", nil } diff --git a/pkg/shim/proc/exec_state.go b/pkg/shim/proc/exec_state.go index 4dcda8b44..8d8ecf541 100644 --- a/pkg/shim/proc/exec_state.go +++ b/pkg/shim/proc/exec_state.go @@ -34,18 +34,21 @@ type execCreatedState struct { p *execProcess } -func (s *execCreatedState) transition(name string) error { - switch name { - case "running": +func (s *execCreatedState) name() string { + return "created" +} + +func (s *execCreatedState) transition(transition stateTransition) { + switch transition { + case running: s.p.execState = &execRunningState{p: s.p} - case "stopped": + case stopped: s.p.execState = &execStoppedState{p: s.p} - case "deleted": + case deleted: s.p.execState = &deletedState{} default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition)) } - return nil } func (s *execCreatedState) Resize(ws console.WinSize) error { @@ -56,14 +59,16 @@ func (s *execCreatedState) Start(ctx context.Context) error { if err := s.p.start(ctx); err != nil { return err } - return s.transition("running") + s.transition(running) + return nil } -func (s *execCreatedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { +func (s *execCreatedState) Delete(context.Context) error { + if err := s.p.delete(); err != nil { return err } - return s.transition("deleted") + s.transition(deleted) + return nil } func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error { @@ -72,35 +77,35 @@ func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error func (s *execCreatedState) SetExited(status int) { s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } + s.transition(stopped) } type execRunningState struct { p *execProcess } -func (s *execRunningState) transition(name string) error { - switch name { - case "stopped": +func (s *execRunningState) name() string { + return "running" +} + +func (s *execRunningState) transition(transition stateTransition) { + switch transition { + case stopped: s.p.execState = &execStoppedState{p: s.p} default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition)) } - return nil } func (s *execRunningState) Resize(ws console.WinSize) error { return s.p.resize(ws) } -func (s *execRunningState) Start(ctx context.Context) error { +func (s *execRunningState) Start(context.Context) error { return fmt.Errorf("cannot start a running process") } -func (s *execRunningState) Delete(ctx context.Context) error { +func (s *execRunningState) Delete(context.Context) error { return fmt.Errorf("cannot delete a running process") } @@ -110,45 +115,46 @@ func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error func (s *execRunningState) SetExited(status int) { s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } + s.transition(stopped) } type execStoppedState struct { p *execProcess } -func (s *execStoppedState) transition(name string) error { - switch name { - case "deleted": +func (s *execStoppedState) name() string { + return "stopped" +} + +func (s *execStoppedState) transition(transition stateTransition) { + switch transition { + case deleted: s.p.execState = &deletedState{} default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition)) } - return nil } -func (s *execStoppedState) Resize(ws console.WinSize) error { +func (s *execStoppedState) Resize(console.WinSize) error { return fmt.Errorf("cannot resize a stopped container") } -func (s *execStoppedState) Start(ctx context.Context) error { +func (s *execStoppedState) Start(context.Context) error { return fmt.Errorf("cannot start a stopped process") } -func (s *execStoppedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { +func (s *execStoppedState) Delete(context.Context) error { + if err := s.p.delete(); err != nil { return err } - return s.transition("deleted") + s.transition(deleted) + return nil } -func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) +func (s *execStoppedState) Kill(_ context.Context, sig uint32, _ bool) error { + return handleStoppedKill(sig) } -func (s *execStoppedState) SetExited(status int) { +func (s *execStoppedState) SetExited(int) { // no op } diff --git a/pkg/shim/proc/init.go b/pkg/shim/proc/init.go index 664465e0d..6bf090813 100644 --- a/pkg/shim/proc/init.go +++ b/pkg/shim/proc/init.go @@ -39,6 +39,8 @@ import ( "gvisor.dev/gvisor/pkg/shim/runsc" ) +const statusStopped = "stopped" + // Init represents an initial process for a container. type Init struct { wg sync.WaitGroup @@ -201,10 +203,15 @@ func (p *Init) ExitedAt() time.Time { func (p *Init) Status(ctx context.Context) (string, error) { p.mu.Lock() defer p.mu.Unlock() + + return p.initState.State(ctx) +} + +func (p *Init) state(ctx context.Context) (string, error) { c, err := p.runtime.State(ctx, p.id) if err != nil { if strings.Contains(err.Error(), "does not exist") { - return "stopped", nil + return statusStopped, nil } return "", p.runtimeError(err, "OCI runtime state failed") } @@ -231,10 +238,7 @@ func (p *Init) start(ctx context.Context) error { status, err := p.runtime.Wait(context.Background(), p.id) if err != nil { log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id) - // TODO(random-liu): Handle runsc kill error. - if err := p.killAll(ctx); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to kill container %q", p.id) - } + p.killAllLocked(ctx) status = internalErrorCode } ExitCh <- Exit{ @@ -255,6 +259,12 @@ func (p *Init) SetExited(status int) { } func (p *Init) setExited(status int) { + if !p.exited.IsZero() { + log.L.Debugf("Status already set to %d, ignoring status: %d", p.status, status) + return + } + + log.L.Debugf("Setting status: %d", status) p.exited = time.Now() p.status = status p.Platform.ShutdownConsole(context.Background(), p.console) @@ -270,15 +280,16 @@ func (p *Init) Delete(ctx context.Context) error { } func (p *Init) delete(ctx context.Context) error { - p.killAll(ctx) + p.killAllLocked(ctx) p.wg.Wait() + err := p.runtime.Delete(ctx, p.id, nil) - // ignore errors if a runtime has already deleted the process - // but we still hold metadata and pipes - // - // this is common during a checkpoint, runc will delete the container state - // after a checkpoint and the container will no longer exist within runc if err != nil { + // ignore errors if a runtime has already deleted the process + // but we still hold metadata and pipes + // + // this is common during a checkpoint, runc will delete the container state + // after a checkpoint and the container will no longer exist within runc if strings.Contains(err.Error(), "does not exist") { err = nil } else { @@ -326,29 +337,24 @@ func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { return p.initState.Kill(ctx, signal, all) } -func (p *Init) kill(context context.Context, signal uint32, all bool) error { +func (p *Init) kill(ctx context.Context, signal uint32, all bool) error { var ( killErr error backoff = 100 * time.Millisecond ) - timeout := 1 * time.Second - for start := time.Now(); time.Now().Sub(start) < timeout; { - c, err := p.runtime.State(context, p.id) + const timeout = time.Second + for start := time.Now(); time.Since(start) < timeout; { + state, err := p.initState.State(ctx) if err != nil { - if strings.Contains(err.Error(), "does not exist") { - return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) - } return p.runtimeError(err, "OCI runtime state failed") } // For runsc, signal only works when container is running state. // If the container is not in running state, directly return // "no such process" - if p.convertStatus(c.Status) == "stopped" { + if state == statusStopped { return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) } - killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{ - All: all, - }) + killErr = p.runtime.Kill(ctx, p.id, int(signal), &runsc.KillOpts{All: all}) if killErr == nil { return nil } @@ -358,22 +364,18 @@ func (p *Init) kill(context context.Context, signal uint32, all bool) error { return p.runtimeError(killErr, "kill timeout") } -// KillAll kills all processes belonging to the init process. -func (p *Init) KillAll(context context.Context) error { +// KillAll kills all processes belonging to the init process. If +// `runsc kill --all` returns error, assume the container has already stopped. +func (p *Init) KillAll(context context.Context) { p.mu.Lock() defer p.mu.Unlock() - return p.killAll(context) + p.killAllLocked(context) } -func (p *Init) killAll(context context.Context) error { - p.runtime.Kill(context, p.id, int(unix.SIGKILL), &runsc.KillOpts{ - All: true, - }) - // Ignore error handling for `runsc kill --all` for now. - // * If it doesn't return error, it is good; - // * If it returns error, consider the container has already stopped. - // TODO: Fix `runsc kill --all` error handling. - return nil +func (p *Init) killAllLocked(context context.Context) { + if err := p.runtime.Kill(context, p.id, int(unix.SIGKILL), &runsc.KillOpts{All: true}); err != nil { + log.L.Warningf("Ignoring error killing container %q: %v", p.id, err) + } } // Stdin returns the stdin of the process. @@ -396,7 +398,6 @@ func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (process.Pr // exec returns a new exec'd process. func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) { - // process exec request var spec specs.Process if err := json.Unmarshal(r.Spec.Value, &spec); err != nil { return nil, err @@ -420,6 +421,17 @@ func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) { return e, nil } +func (p *Init) Stats(ctx context.Context, id string) (*runc.Stats, error) { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Stats(ctx, id) +} + +func (p *Init) stats(ctx context.Context, id string) (*runc.Stats, error) { + return p.Runtime().Stats(ctx, id) +} + // Stdio returns the stdio of the process. func (p *Init) Stdio() stdio.Stdio { return p.stdio @@ -444,7 +456,7 @@ func (p *Init) runtimeError(rErr error, msg string) error { func (p *Init) convertStatus(status string) string { if status == "created" && !p.Sandbox && p.status == internalErrorCode { // Treat start failure state for non-root container as stopped. - return "stopped" + return statusStopped } return status } diff --git a/pkg/shim/proc/init_state.go b/pkg/shim/proc/init_state.go index 0065fc385..5347ddefe 100644 --- a/pkg/shim/proc/init_state.go +++ b/pkg/shim/proc/init_state.go @@ -19,16 +19,40 @@ import ( "context" "fmt" - "github.com/containerd/console" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/pkg/process" + runc "github.com/containerd/go-runc" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/shim/utils" ) +type stateTransition int + +const ( + running stateTransition = iota + stopped + deleted +) + +func (s stateTransition) String() string { + switch s { + case running: + return "running" + case stopped: + return "stopped" + case deleted: + return "deleted" + default: + panic(fmt.Sprintf("unknown state: %d", s)) + } +} + type initState interface { - Resize(console.WinSize) error Start(context.Context) error Delete(context.Context) error Exec(context.Context, string, *ExecConfig) (process.Process, error) + State(ctx context.Context) (string, error) + Stats(context.Context, string) (*runc.Stats, error) Kill(context.Context, uint32, bool) error SetExited(int) } @@ -37,22 +61,21 @@ type createdState struct { p *Init } -func (s *createdState) transition(name string) error { - switch name { - case "running": +func (s *createdState) name() string { + return "created" +} + +func (s *createdState) transition(transition stateTransition) { + switch transition { + case running: s.p.initState = &runningState{p: s.p} - case "stopped": - s.p.initState = &stoppedState{p: s.p} - case "deleted": + case stopped: + s.p.initState = &stoppedState{process: s.p} + case deleted: s.p.initState = &deletedState{} default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition)) } - return nil -} - -func (s *createdState) Resize(ws console.WinSize) error { - return s.p.resize(ws) } func (s *createdState) Start(ctx context.Context) error { @@ -66,20 +89,20 @@ func (s *createdState) Start(ctx context.Context) error { if !s.p.Sandbox { s.p.io.Close() s.p.setExited(internalErrorCode) - if err := s.transition("stopped"); err != nil { - panic(err) - } + s.transition(stopped) } return err } - return s.transition("running") + s.transition(running) + return nil } func (s *createdState) Delete(ctx context.Context) error { if err := s.p.delete(ctx); err != nil { return err } - return s.transition("deleted") + s.transition(deleted) + return nil } func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error { @@ -88,40 +111,48 @@ func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error { func (s *createdState) SetExited(status int) { s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } + s.transition(stopped) } func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { return s.p.exec(path, r) } +func (s *createdState) State(ctx context.Context) (string, error) { + state, err := s.p.state(ctx) + if err == nil && state == statusStopped { + s.transition(stopped) + } + return state, err +} + +func (s *createdState) Stats(ctx context.Context, id string) (*runc.Stats, error) { + return s.p.stats(ctx, id) +} + type runningState struct { p *Init } -func (s *runningState) transition(name string) error { - switch name { - case "stopped": - s.p.initState = &stoppedState{p: s.p} - default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil +func (s *runningState) name() string { + return "running" } -func (s *runningState) Resize(ws console.WinSize) error { - return s.p.resize(ws) +func (s *runningState) transition(transition stateTransition) { + switch transition { + case stopped: + s.p.initState = &stoppedState{process: s.p} + default: + panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition)) + } } func (s *runningState) Start(ctx context.Context) error { - return fmt.Errorf("cannot start a running process.ss") + return fmt.Errorf("cannot start a running container") } func (s *runningState) Delete(ctx context.Context) error { - return fmt.Errorf("cannot delete a running process.ss") + return fmt.Errorf("cannot delete a running container") } func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error { @@ -130,53 +161,81 @@ func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error { func (s *runningState) SetExited(status int) { s.p.setExited(status) + s.transition(stopped) +} + +func (s *runningState) Exec(_ context.Context, path string, r *ExecConfig) (process.Process, error) { + return s.p.exec(path, r) +} - if err := s.transition("stopped"); err != nil { - panic(err) +func (s *runningState) State(ctx context.Context) (string, error) { + state, err := s.p.state(ctx) + if err == nil && state == "stopped" { + s.transition(stopped) } + return state, err } -func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { - return s.p.exec(path, r) +func (s *runningState) Stats(ctx context.Context, id string) (*runc.Stats, error) { + return s.p.stats(ctx, id) } type stoppedState struct { - p *Init + process *Init } -func (s *stoppedState) transition(name string) error { - switch name { - case "deleted": - s.p.initState = &deletedState{} - default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil +func (s *stoppedState) name() string { + return "stopped" } -func (s *stoppedState) Resize(ws console.WinSize) error { - return fmt.Errorf("cannot resize a stopped container") +func (s *stoppedState) transition(transition stateTransition) { + switch transition { + case deleted: + s.process.initState = &deletedState{} + default: + panic(fmt.Sprintf("invalid state transition %q to %q", s.name(), transition)) + } } -func (s *stoppedState) Start(ctx context.Context) error { - return fmt.Errorf("cannot start a stopped process.ss") +func (s *stoppedState) Start(context.Context) error { + return fmt.Errorf("cannot start a stopped container") } func (s *stoppedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { + if err := s.process.delete(ctx); err != nil { return err } - return s.transition("deleted") + s.transition(deleted) + return nil } -func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error { - return errdefs.ToGRPCf(errdefs.ErrNotFound, "process.ss %s not found", s.p.id) +func (s *stoppedState) Kill(_ context.Context, signal uint32, _ bool) error { + return handleStoppedKill(signal) } func (s *stoppedState) SetExited(status int) { - // no op + s.process.setExited(status) } -func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { +func (s *stoppedState) Exec(context.Context, string, *ExecConfig) (process.Process, error) { return nil, fmt.Errorf("cannot exec in a stopped state") } + +func (s *stoppedState) State(context.Context) (string, error) { + return "stopped", nil +} + +func (s *stoppedState) Stats(context.Context, string) (*runc.Stats, error) { + return nil, fmt.Errorf("cannot stat a stopped container") +} + +func handleStoppedKill(signal uint32) error { + switch unix.Signal(signal) { + case unix.SIGTERM, unix.SIGKILL: + // Container is already stopped, so everything inside the container has + // already been killed. + return nil + default: + return utils.ErrToGRPCf(errdefs.ErrNotFound, "process not found") + } +} diff --git a/pkg/shim/proc/proc.go b/pkg/shim/proc/proc.go index edba3fca5..89ad3f505 100644 --- a/pkg/shim/proc/proc.go +++ b/pkg/shim/proc/proc.go @@ -17,23 +17,5 @@ // the sandbox process running the container. package proc -import ( - "fmt" -) - // RunscRoot is the path to the root runsc state directory. const RunscRoot = "/run/containerd/runsc" - -func stateName(v interface{}) string { - switch v.(type) { - case *runningState, *execRunningState: - return "running" - case *createdState, *execCreatedState: - return "created" - case *deletedState: - return "deleted" - case *stoppedState: - return "stopped" - } - panic(fmt.Errorf("invalid state %v", v)) -} diff --git a/pkg/shim/runsc/runsc.go b/pkg/shim/runsc/runsc.go index ff0521d73..888cb0bcb 100644 --- a/pkg/shim/runsc/runsc.go +++ b/pkg/shim/runsc/runsc.go @@ -17,6 +17,7 @@ package runsc import ( + "bytes" "context" "encoding/json" "fmt" @@ -73,9 +74,9 @@ type Runsc struct { // List returns all containers created inside the provided runsc root directory. func (r *Runsc) List(context context.Context) ([]*runc.Container, error) { - data, err := cmdOutput(r.command(context, "list", "--format=json"), false) + data, stderr, err := cmdOutput(r.command(context, "list", "--format=json"), false) if err != nil { - return nil, err + return nil, fmt.Errorf("%w: %s", err, stderr) } var out []*runc.Container if err := json.Unmarshal(data, &out); err != nil { @@ -86,9 +87,9 @@ func (r *Runsc) List(context context.Context) ([]*runc.Container, error) { // State returns the state for the container provided by id. func (r *Runsc) State(context context.Context, id string) (*runc.Container, error) { - data, err := cmdOutput(r.command(context, "state", id), true) + data, stderr, err := cmdOutput(r.command(context, "state", id), false) if err != nil { - return nil, fmt.Errorf("%s: %s", err, data) + return nil, fmt.Errorf("%w: %s", err, stderr) } var c runc.Container if err := json.Unmarshal(data, &c); err != nil { @@ -142,9 +143,9 @@ func (r *Runsc) Create(context context.Context, id, bundle string, opts *CreateO } if cmd.Stdout == nil && cmd.Stderr == nil { - data, err := cmdOutput(cmd, true) + out, _, err := cmdOutput(cmd, true) if err != nil { - return fmt.Errorf("%s: %s", err, data) + return fmt.Errorf("%w: %s", err, out) } return nil } @@ -168,15 +169,15 @@ func (r *Runsc) Create(context context.Context, id, bundle string, opts *CreateO } func (r *Runsc) Pause(context context.Context, id string) error { - if _, err := cmdOutput(r.command(context, "pause", id), true); err != nil { - return fmt.Errorf("unable to pause: %w", err) + if out, _, err := cmdOutput(r.command(context, "pause", id), true); err != nil { + return fmt.Errorf("unable to pause: %w: %s", err, out) } return nil } func (r *Runsc) Resume(context context.Context, id string) error { - if _, err := cmdOutput(r.command(context, "resume", id), true); err != nil { - return fmt.Errorf("unable to resume: %w", err) + if out, _, err := cmdOutput(r.command(context, "resume", id), true); err != nil { + return fmt.Errorf("unable to resume: %w: %s", err, out) } return nil } @@ -189,9 +190,9 @@ func (r *Runsc) Start(context context.Context, id string, cio runc.IO) error { } if cmd.Stdout == nil && cmd.Stderr == nil { - data, err := cmdOutput(cmd, true) + out, _, err := cmdOutput(cmd, true) if err != nil { - return fmt.Errorf("%s: %s", err, data) + return fmt.Errorf("%w: %s", err, out) } return nil } @@ -221,12 +222,10 @@ type waitResult struct { } // Wait will wait for a running container, and return its exit status. -// -// TODO(random-liu): Add exec process support. func (r *Runsc) Wait(context context.Context, id string) (int, error) { - data, err := cmdOutput(r.command(context, "wait", id), true) + data, stderr, err := cmdOutput(r.command(context, "wait", id), false) if err != nil { - return 0, fmt.Errorf("%s: %s", err, data) + return 0, fmt.Errorf("%w: %s", err, stderr) } var res waitResult if err := json.Unmarshal(data, &res); err != nil { @@ -294,9 +293,9 @@ func (r *Runsc) Exec(context context.Context, id string, spec specs.Process, opt opts.Set(cmd) } if cmd.Stdout == nil && cmd.Stderr == nil { - data, err := cmdOutput(cmd, true) + out, _, err := cmdOutput(cmd, true) if err != nil { - return fmt.Errorf("%s: %s", err, data) + return fmt.Errorf("%w: %s", err, out) } return nil } @@ -391,20 +390,12 @@ func (r *Runsc) Kill(context context.Context, id string, sig int, opts *KillOpts // Stats return the stats for a container like cpu, memory, and I/O. func (r *Runsc) Stats(context context.Context, id string) (*runc.Stats, error) { cmd := r.command(context, "events", "--stats", id) - rd, err := cmd.StdoutPipe() - if err != nil { - return nil, err - } - ec, err := Monitor.Start(cmd) + data, stderr, err := cmdOutput(cmd, false) if err != nil { - return nil, err + return nil, fmt.Errorf("%w: %s", err, stderr) } - defer func() { - rd.Close() - Monitor.Wait(cmd, ec) - }() var e runc.Event - if err := json.NewDecoder(rd).Decode(&e); err != nil { + if err := json.Unmarshal(data, &e); err != nil { log.L.Debugf("Parsing events error: %v", err) return nil, err } @@ -459,9 +450,9 @@ func (r *Runsc) Events(context context.Context, id string, interval time.Duratio // Ps lists all the processes inside the container returning their pids. func (r *Runsc) Ps(context context.Context, id string) ([]int, error) { - data, err := cmdOutput(r.command(context, "ps", "--format", "json", id), true) + data, stderr, err := cmdOutput(r.command(context, "ps", "--format", "json", id), false) if err != nil { - return nil, fmt.Errorf("%s: %s", err, data) + return nil, fmt.Errorf("%w: %s", err, stderr) } var pids []int if err := json.Unmarshal(data, &pids); err != nil { @@ -472,9 +463,9 @@ func (r *Runsc) Ps(context context.Context, id string) ([]int, error) { // Top lists all the processes inside the container returning the full ps data. func (r *Runsc) Top(context context.Context, id string) (*runc.TopResults, error) { - data, err := cmdOutput(r.command(context, "ps", "--format", "table", id), true) + data, stderr, err := cmdOutput(r.command(context, "ps", "--format", "table", id), false) if err != nil { - return nil, fmt.Errorf("%s: %s", err, data) + return nil, fmt.Errorf("%w: %s", err, stderr) } topResults, err := runc.ParsePSOutput(data) @@ -517,9 +508,9 @@ func (r *Runsc) runOrError(cmd *exec.Cmd) error { } return err } - data, err := cmdOutput(cmd, true) + out, _, err := cmdOutput(cmd, true) if err != nil { - return fmt.Errorf("%s: %s", err, data) + return fmt.Errorf("%w: %s", err, out) } return nil } @@ -540,23 +531,29 @@ func (r *Runsc) command(context context.Context, args ...string) *exec.Cmd { return cmd } -func cmdOutput(cmd *exec.Cmd, combined bool) ([]byte, error) { - b := getBuf() - defer putBuf(b) +func cmdOutput(cmd *exec.Cmd, combined bool) ([]byte, []byte, error) { + stdout := getBuf() + defer putBuf(stdout) + cmd.Stdout = stdout + cmd.Stderr = stdout - cmd.Stdout = b - if combined { - cmd.Stderr = b + var stderr *bytes.Buffer + if !combined { + stderr = getBuf() + defer putBuf(stderr) + cmd.Stderr = stderr } ec, err := Monitor.Start(cmd) if err != nil { - return nil, err + return nil, nil, err } status, err := Monitor.Wait(cmd, ec) if err == nil && status != 0 { - err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) + err = fmt.Errorf("%q did not terminate sucessfully", cmd.Args[0]) } - - return b.Bytes(), err + if stderr == nil { + return stdout.Bytes(), nil, err + } + return stdout.Bytes(), stderr.Bytes(), err } diff --git a/pkg/shim/service.go b/pkg/shim/service.go index 1f9adcb65..24e3b7a82 100644 --- a/pkg/shim/service.go +++ b/pkg/shim/service.go @@ -81,8 +81,6 @@ const ( // New returns a new shim service that can be used via GRPC. func New(ctx context.Context, id string, publisher shim.Publisher, cancel func()) (shim.Shim, error) { - log.L.Debugf("service.New, id: %s", id) - var opts shim.Opts if ctxOpts := ctx.Value(shim.OptsKey{}); ctxOpts != nil { opts = ctxOpts.(shim.Opts) @@ -304,8 +302,6 @@ func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) // Create creates a new initial process and container with the underlying OCI // runtime. func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { - log.L.Debugf("Create, id: %s, bundle: %q", r.ID, r.Bundle) - s.mu.Lock() defer s.mu.Unlock() @@ -396,6 +392,9 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*ta log.L.Debugf("stdout: %s", r.Stdout) log.L.Debugf("stderr: %s", r.Stderr) log.L.Debugf("***************************") + if log.L.Logger.IsLevelEnabled(logrus.DebugLevel) { + setDebugSigHandler() + } } // Save state before any action is taken to ensure Cleanup() will have all @@ -453,10 +452,10 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*ta } process, err := newInit(r.Bundle, filepath.Join(r.Bundle, "work"), ns, s.platform, config, &s.opts, st.Rootfs) if err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } if err := process.Create(ctx, config); err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } // Set up OOM notification on the sandbox's cgroup. This is done on @@ -506,9 +505,6 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAP if err != nil { return nil, err } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } if err := p.Delete(ctx); err != nil { return nil, err } @@ -534,10 +530,10 @@ func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*typ p := s.processes[r.ExecID] s.mu.Unlock() if p != nil { - return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) + return nil, utils.ErrToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) } if s.task == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } process, err := s.task.Exec(ctx, s.bundle, &proc.ExecConfig{ ID: r.ExecID, @@ -548,7 +544,7 @@ func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*typ Spec: r.Spec, }) if err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } s.mu.Lock() s.processes[r.ExecID] = process @@ -569,7 +565,7 @@ func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (* Height: uint16(r.Height), } if err := p.Resize(ws); err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } return empty, nil } @@ -580,10 +576,12 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI. p, err := s.getProcess(r.ExecID) if err != nil { + log.L.Debugf("State failed to find process: %v", err) return nil, err } st, err := p.Status(ctx) if err != nil { + log.L.Debugf("State failed: %v", err) return nil, err } status := task.StatusUnknown @@ -596,7 +594,7 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI. status = task.StatusStopped } sio := p.Stdio() - return &taskAPI.StateResponse{ + res := &taskAPI.StateResponse{ ID: p.ID(), Bundle: s.bundle, Pid: uint32(p.Pid()), @@ -607,7 +605,9 @@ func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI. Terminal: sio.Terminal, ExitStatus: uint32(p.ExitStatus()), ExitedAt: p.ExitedAt(), - }, nil + } + log.L.Debugf("State succeeded, response: %+v", res) + return res, nil } // Pause the container. @@ -615,7 +615,7 @@ func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*types.Em log.L.Debugf("Pause, id: %s", r.ID) if s.task == nil { log.L.Debugf("Pause error, id: %s: container not created", r.ID) - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } err := s.task.Runtime().Pause(ctx, r.ID) if err != nil { @@ -629,7 +629,7 @@ func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*types. log.L.Debugf("Resume, id: %s", r.ID) if s.task == nil { log.L.Debugf("Resume error, id: %s: container not created", r.ID) - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } err := s.task.Runtime().Resume(ctx, r.ID) if err != nil { @@ -646,12 +646,11 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*types.Empt if err != nil { return nil, err } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) + log.L.Debugf("Kill failed: %v", err) + return nil, utils.ErrToGRPC(err) } + log.L.Debugf("Kill succeeded") return empty, nil } @@ -661,7 +660,7 @@ func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.Pi pids, err := s.getContainerPids(ctx, r.ID) if err != nil { - return nil, errdefs.ToGRPC(err) + return nil, utils.ErrToGRPC(err) } var processes []*task.ProcessInfo for _, pid := range pids { @@ -707,7 +706,7 @@ func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*type // Checkpoint checkpoints the container. func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*types.Empty, error) { log.L.Debugf("Checkpoint, id: %s", r.ID) - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) + return empty, utils.ErrToGRPC(errdefs.ErrNotImplemented) } // Connect returns shim information such as the shim's pid. @@ -738,9 +737,9 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI. log.L.Debugf("Stats, id: %s", r.ID) if s.task == nil { log.L.Debugf("Stats error, id: %s: container not created", r.ID) - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } - stats, err := s.task.Runtime().Stats(ctx, s.id) + stats, err := s.task.Stats(ctx, s.id) if err != nil { log.L.Debugf("Stats error, id: %s: %v", r.ID, err) return nil, err @@ -812,7 +811,7 @@ func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI. // Update updates a running container. func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*types.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) + return empty, utils.ErrToGRPC(errdefs.ErrNotImplemented) } // Wait waits for a process to exit. @@ -821,17 +820,17 @@ func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.Wa p, err := s.getProcess(r.ExecID) if err != nil { + log.L.Debugf("Wait failed to find process: %v", err) return nil, err } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } p.Wait() - return &taskAPI.WaitResponse{ + res := &taskAPI.WaitResponse{ ExitStatus: uint32(p.ExitStatus()), ExitedAt: p.ExitedAt(), - }, nil + } + log.L.Debugf("Wait succeeded, response: %+v", res) + return res, nil } func (s *service) processExits(ctx context.Context) { @@ -848,10 +847,7 @@ func (s *service) checkProcesses(ctx context.Context, e proc.Exit) { if ip, ok := p.(*proc.Init); ok { // Ensure all children are killed. log.L.Debugf("Container init process exited, killing all container processes") - if err := ip.KillAll(ctx); err != nil { - log.G(ctx).WithError(err).WithField("id", ip.ID()). - Error("failed to kill init's children") - } + ip.KillAll(ctx) } p.SetExited(e.Status) s.events <- &events.TaskExit{ @@ -909,12 +905,17 @@ func (s *service) forward(ctx context.Context, publisher shim.Publisher) { func (s *service) getProcess(execID string) (process.Process, error) { s.mu.Lock() defer s.mu.Unlock() + if execID == "" { + if s.task == nil { + return nil, utils.ErrToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } return s.task, nil } + p := s.processes[execID] if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) + return nil, utils.ErrToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) } return p, nil } diff --git a/pkg/shim/utils/BUILD b/pkg/shim/utils/BUILD index 54a0aabb7..2eb82f63c 100644 --- a/pkg/shim/utils/BUILD +++ b/pkg/shim/utils/BUILD @@ -6,6 +6,7 @@ go_library( name = "utils", srcs = [ "annotations.go", + "errors.go", "utils.go", "volumes.go", ], @@ -14,14 +15,23 @@ go_library( "//shim:__subpackages__", ], deps = [ + "@com_github_containerd_containerd//errdefs:go_default_library", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + "@org_golang_google_grpc//codes:go_default_library", + "@org_golang_google_grpc//status:go_default_library", ], ) go_test( name = "utils_test", size = "small", - srcs = ["volumes_test.go"], + srcs = [ + "errors_test.go", + "volumes_test.go", + ], library = ":utils", - deps = ["@com_github_opencontainers_runtime_spec//specs-go:go_default_library"], + deps = [ + "@com_github_containerd_containerd//errdefs:go_default_library", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + ], ) diff --git a/pkg/shim/utils/errors.go b/pkg/shim/utils/errors.go new file mode 100644 index 000000000..971d68c36 --- /dev/null +++ b/pkg/shim/utils/errors.go @@ -0,0 +1,74 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "context" + "errors" + "fmt" + + "github.com/containerd/containerd/errdefs" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// ErrToGRPC wraps containerd's ToGRPC error mapper which depends on +// github.com/pkg/errors to work correctly. Once we upgrade to containerd v1.4, +// this function can go away and we can use errdefs.ToGRPC directly instead. +// +// TODO(gvisor.dev/issue/6232): Remove after upgrading to containerd v1.4 +func ErrToGRPC(err error) error { + return errToGRPCMsg(err, err.Error()) +} + +// ErrToGRPCf maps the error to grpc error codes, assembling the formatting +// string and combining it with the target error string. +// +// TODO(gvisor.dev/issue/6232): Remove after upgrading to containerd v1.4 +func ErrToGRPCf(err error, format string, args ...interface{}) error { + formatted := fmt.Sprintf(format, args...) + msg := fmt.Sprintf("%s: %s", formatted, err.Error()) + return errToGRPCMsg(err, msg) +} + +func errToGRPCMsg(err error, msg string) error { + if err == nil { + return nil + } + if _, ok := status.FromError(err); ok { + return err + } + + switch { + case errors.Is(err, errdefs.ErrInvalidArgument): + return status.Errorf(codes.InvalidArgument, msg) + case errors.Is(err, errdefs.ErrNotFound): + return status.Errorf(codes.NotFound, msg) + case errors.Is(err, errdefs.ErrAlreadyExists): + return status.Errorf(codes.AlreadyExists, msg) + case errors.Is(err, errdefs.ErrFailedPrecondition): + return status.Errorf(codes.FailedPrecondition, msg) + case errors.Is(err, errdefs.ErrUnavailable): + return status.Errorf(codes.Unavailable, msg) + case errors.Is(err, errdefs.ErrNotImplemented): + return status.Errorf(codes.Unimplemented, msg) + case errors.Is(err, context.Canceled): + return status.Errorf(codes.Canceled, msg) + case errors.Is(err, context.DeadlineExceeded): + return status.Errorf(codes.DeadlineExceeded, msg) + } + + return errdefs.ToGRPC(err) +} diff --git a/pkg/shim/utils/errors_test.go b/pkg/shim/utils/errors_test.go new file mode 100644 index 000000000..0a8fe34c8 --- /dev/null +++ b/pkg/shim/utils/errors_test.go @@ -0,0 +1,50 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "fmt" + "testing" + + "github.com/containerd/containerd/errdefs" +) + +func TestGRPCRoundTripsErrors(t *testing.T) { + for _, tc := range []struct { + name string + err error + test func(err error) bool + }{ + { + name: "passthrough", + err: errdefs.ErrNotFound, + test: errdefs.IsNotFound, + }, + { + name: "wrapped", + err: fmt.Errorf("oh no: %w", errdefs.ErrNotFound), + test: errdefs.IsNotFound, + }, + } { + t.Run(tc.name, func(t *testing.T) { + if err := errdefs.FromGRPC(ErrToGRPC(tc.err)); !tc.test(err) { + t.Errorf("errToGRPC got %+v", err) + } + if err := errdefs.FromGRPC(ErrToGRPCf(tc.err, "testing %s", "123")); !tc.test(err) { + t.Errorf("errToGRPCf got %+v", err) + } + }) + } +} |