From 8b0f0b4d11e0938eec8da411323b2ce35976ab56 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Tue, 12 Jan 2021 17:50:33 -0800 Subject: Delete shim v1 gvisor-containerd-shim is not compatible with containerd 1.1 or earlier. Starting from containerd 1.2, shim v2 is the preferred interface. PiperOrigin-RevId: 351485556 --- pkg/shim/BUILD | 47 ++ pkg/shim/api.go | 23 + pkg/shim/epoll.go | 129 +++ pkg/shim/options.go | 50 ++ pkg/shim/proc/BUILD | 36 + pkg/shim/proc/deleted_state.go | 49 ++ pkg/shim/proc/exec.go | 281 +++++++ pkg/shim/proc/exec_state.go | 154 ++++ pkg/shim/proc/init.go | 458 +++++++++++ pkg/shim/proc/init_state.go | 182 +++++ pkg/shim/proc/io.go | 162 ++++ pkg/shim/proc/proc.go | 39 + pkg/shim/proc/types.go | 68 ++ pkg/shim/proc/utils.go | 72 ++ pkg/shim/runtimeoptions/BUILD | 32 + pkg/shim/runtimeoptions/runtimeoptions.go | 17 + pkg/shim/runtimeoptions/runtimeoptions.proto | 25 + pkg/shim/runtimeoptions/runtimeoptions_cri.go | 383 +++++++++ pkg/shim/runtimeoptions/runtimeoptions_test.go | 57 ++ pkg/shim/service.go | 953 ++++++++++++++++++++++ pkg/shim/service_linux.go | 108 +++ pkg/shim/state.go | 48 ++ pkg/shim/utils/BUILD | 27 + pkg/shim/utils/annotations.go | 25 + pkg/shim/utils/utils.go | 57 ++ pkg/shim/utils/volumes.go | 155 ++++ pkg/shim/utils/volumes_test.go | 308 +++++++ pkg/shim/v1/proc/BUILD | 36 - pkg/shim/v1/proc/deleted_state.go | 49 -- pkg/shim/v1/proc/exec.go | 281 ------- pkg/shim/v1/proc/exec_state.go | 154 ---- pkg/shim/v1/proc/init.go | 460 ----------- pkg/shim/v1/proc/init_state.go | 182 ----- pkg/shim/v1/proc/io.go | 162 ---- pkg/shim/v1/proc/process.go | 38 - pkg/shim/v1/proc/types.go | 68 -- pkg/shim/v1/proc/utils.go | 72 -- pkg/shim/v1/shim/BUILD | 41 - pkg/shim/v1/shim/api.go | 41 - pkg/shim/v1/shim/platform.go | 106 --- pkg/shim/v1/shim/service.go | 572 ------------- pkg/shim/v1/shim/shim.go | 17 - pkg/shim/v1/utils/BUILD | 27 - pkg/shim/v1/utils/annotations.go | 25 - pkg/shim/v1/utils/utils.go | 57 -- pkg/shim/v1/utils/volumes.go | 155 ---- pkg/shim/v1/utils/volumes_test.go | 308 ------- pkg/shim/v2/BUILD | 47 -- pkg/shim/v2/api.go | 23 - pkg/shim/v2/epoll.go | 129 --- pkg/shim/v2/options.go | 50 -- pkg/shim/v2/runtimeoptions/BUILD | 32 - pkg/shim/v2/runtimeoptions/runtimeoptions.go | 17 - pkg/shim/v2/runtimeoptions/runtimeoptions.proto | 25 - pkg/shim/v2/runtimeoptions/runtimeoptions_cri.go | 383 --------- pkg/shim/v2/runtimeoptions/runtimeoptions_test.go | 57 -- pkg/shim/v2/service.go | 953 ---------------------- pkg/shim/v2/service_linux.go | 108 --- pkg/shim/v2/state.go | 48 -- pkg/test/criutil/criutil.go | 4 +- 60 files changed, 3946 insertions(+), 4726 deletions(-) create mode 100644 pkg/shim/BUILD create mode 100644 pkg/shim/api.go create mode 100644 pkg/shim/epoll.go create mode 100644 pkg/shim/options.go create mode 100644 pkg/shim/proc/BUILD create mode 100644 pkg/shim/proc/deleted_state.go create mode 100644 pkg/shim/proc/exec.go create mode 100644 pkg/shim/proc/exec_state.go create mode 100644 pkg/shim/proc/init.go create mode 100644 pkg/shim/proc/init_state.go create mode 100644 pkg/shim/proc/io.go create mode 100644 pkg/shim/proc/proc.go create mode 100644 pkg/shim/proc/types.go create mode 100644 pkg/shim/proc/utils.go create mode 100644 pkg/shim/runtimeoptions/BUILD create mode 100644 pkg/shim/runtimeoptions/runtimeoptions.go create mode 100644 pkg/shim/runtimeoptions/runtimeoptions.proto create mode 100644 pkg/shim/runtimeoptions/runtimeoptions_cri.go create mode 100644 pkg/shim/runtimeoptions/runtimeoptions_test.go create mode 100644 pkg/shim/service.go create mode 100644 pkg/shim/service_linux.go create mode 100644 pkg/shim/state.go create mode 100644 pkg/shim/utils/BUILD create mode 100644 pkg/shim/utils/annotations.go create mode 100644 pkg/shim/utils/utils.go create mode 100644 pkg/shim/utils/volumes.go create mode 100644 pkg/shim/utils/volumes_test.go delete mode 100644 pkg/shim/v1/proc/BUILD delete mode 100644 pkg/shim/v1/proc/deleted_state.go delete mode 100644 pkg/shim/v1/proc/exec.go delete mode 100644 pkg/shim/v1/proc/exec_state.go delete mode 100644 pkg/shim/v1/proc/init.go delete mode 100644 pkg/shim/v1/proc/init_state.go delete mode 100644 pkg/shim/v1/proc/io.go delete mode 100644 pkg/shim/v1/proc/process.go delete mode 100644 pkg/shim/v1/proc/types.go delete mode 100644 pkg/shim/v1/proc/utils.go delete mode 100644 pkg/shim/v1/shim/BUILD delete mode 100644 pkg/shim/v1/shim/api.go delete mode 100644 pkg/shim/v1/shim/platform.go delete mode 100644 pkg/shim/v1/shim/service.go delete mode 100644 pkg/shim/v1/shim/shim.go delete mode 100644 pkg/shim/v1/utils/BUILD delete mode 100644 pkg/shim/v1/utils/annotations.go delete mode 100644 pkg/shim/v1/utils/utils.go delete mode 100644 pkg/shim/v1/utils/volumes.go delete mode 100644 pkg/shim/v1/utils/volumes_test.go delete mode 100644 pkg/shim/v2/BUILD delete mode 100644 pkg/shim/v2/api.go delete mode 100644 pkg/shim/v2/epoll.go delete mode 100644 pkg/shim/v2/options.go delete mode 100644 pkg/shim/v2/runtimeoptions/BUILD delete mode 100644 pkg/shim/v2/runtimeoptions/runtimeoptions.go delete mode 100644 pkg/shim/v2/runtimeoptions/runtimeoptions.proto delete mode 100644 pkg/shim/v2/runtimeoptions/runtimeoptions_cri.go delete mode 100644 pkg/shim/v2/runtimeoptions/runtimeoptions_test.go delete mode 100644 pkg/shim/v2/service.go delete mode 100644 pkg/shim/v2/service_linux.go delete mode 100644 pkg/shim/v2/state.go (limited to 'pkg') diff --git a/pkg/shim/BUILD b/pkg/shim/BUILD new file mode 100644 index 000000000..4f7c02f5d --- /dev/null +++ b/pkg/shim/BUILD @@ -0,0 +1,47 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "shim", + srcs = [ + "api.go", + "epoll.go", + "options.go", + "service.go", + "service_linux.go", + "state.go", + ], + visibility = ["//shim:__subpackages__"], + deps = [ + "//pkg/cleanup", + "//pkg/shim/proc", + "//pkg/shim/runsc", + "//pkg/shim/runtimeoptions", + "//pkg/shim/utils", + "//runsc/specutils", + "@com_github_burntsushi_toml//:go_default_library", + "@com_github_containerd_cgroups//:go_default_library", + "@com_github_containerd_cgroups//stats/v1:go_default_library", + "@com_github_containerd_console//:go_default_library", + "@com_github_containerd_containerd//api/events:go_default_library", + "@com_github_containerd_containerd//api/types/task:go_default_library", + "@com_github_containerd_containerd//errdefs:go_default_library", + "@com_github_containerd_containerd//events:go_default_library", + "@com_github_containerd_containerd//log:go_default_library", + "@com_github_containerd_containerd//mount:go_default_library", + "@com_github_containerd_containerd//namespaces:go_default_library", + "@com_github_containerd_containerd//pkg/process:go_default_library", + "@com_github_containerd_containerd//pkg/stdio:go_default_library", + "@com_github_containerd_containerd//runtime:go_default_library", + "@com_github_containerd_containerd//runtime/linux/runctypes:go_default_library", + "@com_github_containerd_containerd//runtime/v2/shim:go_default_library", + "@com_github_containerd_containerd//runtime/v2/task:go_default_library", + "@com_github_containerd_containerd//sys/reaper:go_default_library", + "@com_github_containerd_fifo//:go_default_library", + "@com_github_containerd_typeurl//:go_default_library", + "@com_github_gogo_protobuf//types:go_default_library", + "@com_github_sirupsen_logrus//:go_default_library", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/pkg/shim/api.go b/pkg/shim/api.go new file mode 100644 index 000000000..6d1741f0c --- /dev/null +++ b/pkg/shim/api.go @@ -0,0 +1,23 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package shim + +import ( + "github.com/containerd/containerd/api/events" +) + +// TaskOOM is an alias for events.TaskOOM. +type TaskOOM = events.TaskOOM diff --git a/pkg/shim/epoll.go b/pkg/shim/epoll.go new file mode 100644 index 000000000..737d2b781 --- /dev/null +++ b/pkg/shim/epoll.go @@ -0,0 +1,129 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package shim + +import ( + "context" + "fmt" + "sync" + + "github.com/containerd/cgroups" + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/runtime" + "golang.org/x/sys/unix" +) + +func newOOMEpoller(publisher events.Publisher) (*epoller, error) { + fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) + if err != nil { + return nil, err + } + return &epoller{ + fd: fd, + publisher: publisher, + set: make(map[uintptr]*item), + }, nil +} + +type epoller struct { + mu sync.Mutex + + fd int + publisher events.Publisher + set map[uintptr]*item +} + +type item struct { + id string + cg cgroups.Cgroup +} + +func (e *epoller) Close() error { + return unix.Close(e.fd) +} + +func (e *epoller) run(ctx context.Context) { + var events [128]unix.EpollEvent + for { + select { + case <-ctx.Done(): + e.Close() + return + default: + n, err := unix.EpollWait(e.fd, events[:], -1) + if err != nil { + if err == unix.EINTR || err == unix.EAGAIN { + continue + } + // Should not happen. + panic(fmt.Errorf("cgroups: epoll wait: %w", err)) + } + for i := 0; i < n; i++ { + e.process(ctx, uintptr(events[i].Fd)) + } + } + } +} + +func (e *epoller) add(id string, cg cgroups.Cgroup) error { + e.mu.Lock() + defer e.mu.Unlock() + fd, err := cg.OOMEventFD() + if err != nil { + return err + } + e.set[fd] = &item{ + id: id, + cg: cg, + } + event := unix.EpollEvent{ + Fd: int32(fd), + Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR, + } + return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event) +} + +func (e *epoller) process(ctx context.Context, fd uintptr) { + flush(fd) + e.mu.Lock() + i, ok := e.set[fd] + if !ok { + e.mu.Unlock() + return + } + e.mu.Unlock() + if i.cg.State() == cgroups.Deleted { + e.mu.Lock() + delete(e.set, fd) + e.mu.Unlock() + unix.Close(int(fd)) + return + } + if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &TaskOOM{ + ContainerID: i.id, + }); err != nil { + // Should not happen. + panic(fmt.Errorf("publish OOM event: %w", err)) + } +} + +func flush(fd uintptr) error { + var buf [8]byte + _, err := unix.Read(int(fd), buf[:]) + return err +} diff --git a/pkg/shim/options.go b/pkg/shim/options.go new file mode 100644 index 000000000..e40a1a07d --- /dev/null +++ b/pkg/shim/options.go @@ -0,0 +1,50 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package shim + +const optionsType = "io.containerd.runsc.v1.options" + +// options is runtime options for io.containerd.runsc.v1. +type options struct { + // ShimCgroup is the cgroup the shim should be in. + ShimCgroup string `toml:"shim_cgroup" json:"shimCgroup"` + + // IoUID is the I/O's pipes uid. + IoUID uint32 `toml:"io_uid" json:"ioUid"` + + // IoGID is the I/O's pipes gid. + IoGID uint32 `toml:"io_gid" json:"ioGid"` + + // BinaryName is the binary name of the runsc binary. + BinaryName string `toml:"binary_name" json:"binaryName"` + + // Root is the runsc root directory. + Root string `toml:"root" json:"root"` + + // LogLevel sets the logging level. Some of the possible values are: debug, + // info, warning. + // + // This configuration only applies when the shim is running as a service. + LogLevel string `toml:"log_level" json:"logLevel"` + + // LogPath is the path to log directory. %ID% tags inside the string are + // replaced with the container ID. + // + // This configuration only applies when the shim is running as a service. + LogPath string `toml:"log_path" json:"logPath"` + + // RunscConfig is a key/value map of all runsc flags. + RunscConfig map[string]string `toml:"runsc_config" json:"runscConfig"` +} diff --git a/pkg/shim/proc/BUILD b/pkg/shim/proc/BUILD new file mode 100644 index 000000000..544bdc170 --- /dev/null +++ b/pkg/shim/proc/BUILD @@ -0,0 +1,36 @@ +load("//tools:defs.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "proc", + srcs = [ + "deleted_state.go", + "exec.go", + "exec_state.go", + "init.go", + "init_state.go", + "io.go", + "proc.go", + "types.go", + "utils.go", + ], + visibility = [ + "//pkg/shim:__subpackages__", + "//shim:__subpackages__", + ], + deps = [ + "//pkg/shim/runsc", + "@com_github_containerd_console//:go_default_library", + "@com_github_containerd_containerd//errdefs:go_default_library", + "@com_github_containerd_containerd//log:go_default_library", + "@com_github_containerd_containerd//mount:go_default_library", + "@com_github_containerd_containerd//pkg/process:go_default_library", + "@com_github_containerd_containerd//pkg/stdio:go_default_library", + "@com_github_containerd_fifo//:go_default_library", + "@com_github_containerd_go_runc//:go_default_library", + "@com_github_gogo_protobuf//types:go_default_library", + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + "@org_golang_x_sys//unix:go_default_library", + ], +) diff --git a/pkg/shim/proc/deleted_state.go b/pkg/shim/proc/deleted_state.go new file mode 100644 index 000000000..d9b970c4d --- /dev/null +++ b/pkg/shim/proc/deleted_state.go @@ -0,0 +1,49 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "fmt" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/pkg/process" +) + +type deletedState struct{} + +func (*deletedState) Resize(ws console.WinSize) error { + return fmt.Errorf("cannot resize a deleted process.ss") +} + +func (*deletedState) Start(ctx context.Context) error { + return fmt.Errorf("cannot start a deleted process.ss") +} + +func (*deletedState) Delete(ctx context.Context) error { + return fmt.Errorf("cannot delete a deleted process.ss: %w", errdefs.ErrNotFound) +} + +func (*deletedState) Kill(ctx context.Context, sig uint32, all bool) error { + return fmt.Errorf("cannot kill a deleted process.ss: %w", errdefs.ErrNotFound) +} + +func (*deletedState) SetExited(status int) {} + +func (*deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { + return nil, fmt.Errorf("cannot exec in a deleted state") +} diff --git a/pkg/shim/proc/exec.go b/pkg/shim/proc/exec.go new file mode 100644 index 000000000..1d1d90488 --- /dev/null +++ b/pkg/shim/proc/exec.go @@ -0,0 +1,281 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "sync" + "syscall" + "time" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/pkg/stdio" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" + specs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + + "gvisor.dev/gvisor/pkg/shim/runsc" +) + +type execProcess struct { + wg sync.WaitGroup + + execState execState + + mu sync.Mutex + id string + console console.Console + io runc.IO + status int + exited time.Time + pid int + internalPid int + closers []io.Closer + stdin io.Closer + stdio stdio.Stdio + path string + spec specs.Process + + parent *Init + waitBlock chan struct{} +} + +func (e *execProcess) Wait() { + <-e.waitBlock +} + +func (e *execProcess) ID() string { + return e.id +} + +func (e *execProcess) Pid() int { + e.mu.Lock() + defer e.mu.Unlock() + return e.pid +} + +func (e *execProcess) ExitStatus() int { + e.mu.Lock() + defer e.mu.Unlock() + return e.status +} + +func (e *execProcess) ExitedAt() time.Time { + e.mu.Lock() + defer e.mu.Unlock() + return e.exited +} + +func (e *execProcess) SetExited(status int) { + e.mu.Lock() + defer e.mu.Unlock() + + e.execState.SetExited(status) +} + +func (e *execProcess) setExited(status int) { + e.status = status + e.exited = time.Now() + e.parent.Platform.ShutdownConsole(context.Background(), e.console) + close(e.waitBlock) +} + +func (e *execProcess) Delete(ctx context.Context) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Delete(ctx) +} + +func (e *execProcess) delete(ctx context.Context) error { + e.wg.Wait() + if e.io != nil { + for _, c := range e.closers { + c.Close() + } + e.io.Close() + } + pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) + // silently ignore error + os.Remove(pidfile) + internalPidfile := filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) + // silently ignore error + os.Remove(internalPidfile) + return nil +} + +func (e *execProcess) Resize(ws console.WinSize) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Resize(ws) +} + +func (e *execProcess) resize(ws console.WinSize) error { + if e.console == nil { + return nil + } + return e.console.Resize(ws) +} + +func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Kill(ctx, sig, false) +} + +func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error { + internalPid := e.internalPid + if internalPid != 0 { + if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{ + Pid: internalPid, + }); err != nil { + // If this returns error, consider the process has + // already stopped. + // + // TODO: Fix after signal handling is fixed. + return fmt.Errorf("%s: %w", err.Error(), errdefs.ErrNotFound) + } + } + return nil +} + +func (e *execProcess) Stdin() io.Closer { + return e.stdin +} + +func (e *execProcess) Stdio() stdio.Stdio { + return e.stdio +} + +func (e *execProcess) Start(ctx context.Context) error { + e.mu.Lock() + defer e.mu.Unlock() + + return e.execState.Start(ctx) +} + +func (e *execProcess) start(ctx context.Context) (err error) { + var ( + socket *runc.Socket + pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) + internalPidfile = filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) + ) + if e.stdio.Terminal { + if socket, err = runc.NewTempConsoleSocket(); err != nil { + return fmt.Errorf("failed to create runc console socket: %w", err) + } + defer socket.Close() + } else if e.stdio.IsNull() { + if e.io, err = runc.NewNullIO(); err != nil { + return fmt.Errorf("creating new NULL IO: %w", err) + } + } else { + if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil { + return fmt.Errorf("failed to create runc io pipes: %w", err) + } + } + opts := &runsc.ExecOpts{ + PidFile: pidfile, + InternalPidFile: internalPidfile, + IO: e.io, + Detach: true, + } + if socket != nil { + opts.ConsoleSocket = socket + } + eventCh := e.parent.Monitor.Subscribe() + defer func() { + // Unsubscribe if an error is returned. + if err != nil { + e.parent.Monitor.Unsubscribe(eventCh) + } + }() + if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil { + close(e.waitBlock) + return e.parent.runtimeError(err, "OCI runtime exec failed") + } + if e.stdio.Stdin != "" { + sc, err := fifo.OpenFifo(context.Background(), e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return fmt.Errorf("failed to open stdin fifo %s: %w", e.stdio.Stdin, err) + } + e.closers = append(e.closers, sc) + e.stdin = sc + } + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + if socket != nil { + console, err := socket.ReceiveMaster() + if err != nil { + return fmt.Errorf("failed to retrieve console master: %w", err) + } + if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg); err != nil { + return fmt.Errorf("failed to start console copy: %w", err) + } + } else if !e.stdio.IsNull() { + if err := copyPipes(ctx, e.io, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg); err != nil { + return fmt.Errorf("failed to start io pipe copy: %w", err) + } + } + pid, err := runc.ReadPidFile(opts.PidFile) + if err != nil { + return fmt.Errorf("failed to retrieve OCI runtime exec pid: %w", err) + } + e.pid = pid + internalPid, err := runc.ReadPidFile(opts.InternalPidFile) + if err != nil { + return fmt.Errorf("failed to retrieve OCI runtime exec internal pid: %w", err) + } + e.internalPid = internalPid + go func() { + defer e.parent.Monitor.Unsubscribe(eventCh) + for event := range eventCh { + if event.Pid == e.pid { + ExitCh <- Exit{ + Timestamp: event.Timestamp, + ID: e.id, + Status: event.Status, + } + break + } + } + }() + return nil +} + +func (e *execProcess) Status(ctx context.Context) (string, error) { + e.mu.Lock() + defer e.mu.Unlock() + // if we don't have a pid then the exec process has just been created + if e.pid == 0 { + return "created", nil + } + // if we have a pid and it can be signaled, the process is running + // TODO(random-liu): Use `runsc kill --pid`. + if err := unix.Kill(e.pid, 0); err == nil { + return "running", nil + } + // else if we have a pid but it can nolonger be signaled, it has stopped + return "stopped", nil +} diff --git a/pkg/shim/proc/exec_state.go b/pkg/shim/proc/exec_state.go new file mode 100644 index 000000000..4dcda8b44 --- /dev/null +++ b/pkg/shim/proc/exec_state.go @@ -0,0 +1,154 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "fmt" + + "github.com/containerd/console" +) + +type execState interface { + Resize(console.WinSize) error + Start(context.Context) error + Delete(context.Context) error + Kill(context.Context, uint32, bool) error + SetExited(int) +} + +type execCreatedState struct { + p *execProcess +} + +func (s *execCreatedState) transition(name string) error { + switch name { + case "running": + s.p.execState = &execRunningState{p: s.p} + case "stopped": + s.p.execState = &execStoppedState{p: s.p} + case "deleted": + s.p.execState = &deletedState{} + default: + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *execCreatedState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *execCreatedState) Start(ctx context.Context) error { + if err := s.p.start(ctx); err != nil { + return err + } + return s.transition("running") +} + +func (s *execCreatedState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *execCreatedState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +type execRunningState struct { + p *execProcess +} + +func (s *execRunningState) transition(name string) error { + switch name { + case "stopped": + s.p.execState = &execStoppedState{p: s.p} + default: + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *execRunningState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *execRunningState) Start(ctx context.Context) error { + return fmt.Errorf("cannot start a running process") +} + +func (s *execRunningState) Delete(ctx context.Context) error { + return fmt.Errorf("cannot delete a running process") +} + +func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *execRunningState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +type execStoppedState struct { + p *execProcess +} + +func (s *execStoppedState) transition(name string) error { + switch name { + case "deleted": + s.p.execState = &deletedState{} + default: + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *execStoppedState) Resize(ws console.WinSize) error { + return fmt.Errorf("cannot resize a stopped container") +} + +func (s *execStoppedState) Start(ctx context.Context) error { + return fmt.Errorf("cannot start a stopped process") +} + +func (s *execStoppedState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *execStoppedState) SetExited(status int) { + // no op +} diff --git a/pkg/shim/proc/init.go b/pkg/shim/proc/init.go new file mode 100644 index 000000000..cacaade88 --- /dev/null +++ b/pkg/shim/proc/init.go @@ -0,0 +1,458 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "encoding/json" + "fmt" + "io" + "path/filepath" + "strings" + "sync" + "syscall" + "time" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/pkg/process" + "github.com/containerd/containerd/pkg/stdio" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" + specs "github.com/opencontainers/runtime-spec/specs-go" + + "gvisor.dev/gvisor/pkg/shim/runsc" +) + +// Init represents an initial process for a container. +type Init struct { + wg sync.WaitGroup + initState initState + + // mu is used to ensure that `Start()` and `Exited()` calls return in + // the right order when invoked in separate go routines. This is the + // case within the shim implementation as it makes use of the reaper + // interface. + mu sync.Mutex + + waitBlock chan struct{} + + WorkDir string + + id string + Bundle string + console console.Console + Platform stdio.Platform + io runc.IO + runtime *runsc.Runsc + status int + exited time.Time + pid int + closers []io.Closer + stdin io.Closer + stdio stdio.Stdio + Rootfs string + IoUID int + IoGID int + Sandbox bool + UserLog string + Monitor ProcessMonitor +} + +// NewRunsc returns a new runsc instance for a process. +func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc { + if root == "" { + root = RunscRoot + } + return &runsc.Runsc{ + Command: runtime, + PdeathSignal: syscall.SIGKILL, + Log: filepath.Join(path, "log.json"), + LogFormat: runc.JSON, + Root: filepath.Join(root, namespace), + Config: config, + } +} + +// New returns a new init process. +func New(id string, runtime *runsc.Runsc, stdio stdio.Stdio) *Init { + p := &Init{ + id: id, + runtime: runtime, + stdio: stdio, + status: 0, + waitBlock: make(chan struct{}), + } + p.initState = &createdState{p: p} + return p +} + +// Create the process with the provided config. +func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { + var socket *runc.Socket + if r.Terminal { + if socket, err = runc.NewTempConsoleSocket(); err != nil { + return fmt.Errorf("failed to create OCI runtime console socket: %w", err) + } + defer socket.Close() + } else if hasNoIO(r) { + if p.io, err = runc.NewNullIO(); err != nil { + return fmt.Errorf("creating new NULL IO: %w", err) + } + } else { + if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil { + return fmt.Errorf("failed to create OCI runtime io pipes: %w", err) + } + } + // pidFile is the file that will contain the sandbox pid. + pidFile := filepath.Join(p.Bundle, "init.pid") + opts := &runsc.CreateOpts{ + PidFile: pidFile, + } + if socket != nil { + opts.ConsoleSocket = socket + } + if p.Sandbox { + opts.IO = p.io + // UserLog is only useful for sandbox. + opts.UserLog = p.UserLog + } + if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil { + return p.runtimeError(err, "OCI runtime create failed") + } + if r.Stdin != "" { + sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return fmt.Errorf("failed to open stdin fifo %s: %w", r.Stdin, err) + } + p.stdin = sc + p.closers = append(p.closers, sc) + } + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + if socket != nil { + console, err := socket.ReceiveMaster() + if err != nil { + return fmt.Errorf("failed to retrieve console master: %w", err) + } + console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg) + if err != nil { + return fmt.Errorf("failed to start console copy: %w", err) + } + p.console = console + } else if !hasNoIO(r) { + if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg); err != nil { + return fmt.Errorf("failed to start io pipe copy: %w", err) + } + } + pid, err := runc.ReadPidFile(pidFile) + if err != nil { + return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err) + } + p.pid = pid + return nil +} + +// Wait waits for the process to exit. +func (p *Init) Wait() { + <-p.waitBlock +} + +// ID returns the ID of the process. +func (p *Init) ID() string { + return p.id +} + +// Pid returns the PID of the process. +func (p *Init) Pid() int { + return p.pid +} + +// ExitStatus returns the exit status of the process. +func (p *Init) ExitStatus() int { + p.mu.Lock() + defer p.mu.Unlock() + return p.status +} + +// ExitedAt returns the time when the process exited. +func (p *Init) ExitedAt() time.Time { + p.mu.Lock() + defer p.mu.Unlock() + return p.exited +} + +// Status returns the status of the process. +func (p *Init) Status(ctx context.Context) (string, error) { + p.mu.Lock() + defer p.mu.Unlock() + c, err := p.runtime.State(ctx, p.id) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return "stopped", nil + } + return "", p.runtimeError(err, "OCI runtime state failed") + } + return p.convertStatus(c.Status), nil +} + +// Start starts the init process. +func (p *Init) Start(ctx context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Start(ctx) +} + +func (p *Init) start(ctx context.Context) error { + var cio runc.IO + if !p.Sandbox { + cio = p.io + } + if err := p.runtime.Start(ctx, p.id, cio); err != nil { + return p.runtimeError(err, "OCI runtime start failed") + } + go func() { + status, err := p.runtime.Wait(context.Background(), p.id) + if err != nil { + log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id) + // TODO(random-liu): Handle runsc kill error. + if err := p.killAll(ctx); err != nil { + log.G(ctx).WithError(err).Errorf("Failed to kill container %q", p.id) + } + status = internalErrorCode + } + ExitCh <- Exit{ + Timestamp: time.Now(), + ID: p.id, + Status: status, + } + }() + return nil +} + +// SetExited set the exit stauts of the init process. +func (p *Init) SetExited(status int) { + p.mu.Lock() + defer p.mu.Unlock() + + p.initState.SetExited(status) +} + +func (p *Init) setExited(status int) { + p.exited = time.Now() + p.status = status + p.Platform.ShutdownConsole(context.Background(), p.console) + close(p.waitBlock) +} + +// Delete deletes the init process. +func (p *Init) Delete(ctx context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Delete(ctx) +} + +func (p *Init) delete(ctx context.Context) error { + p.killAll(ctx) + p.wg.Wait() + err := p.runtime.Delete(ctx, p.id, nil) + // ignore errors if a runtime has already deleted the process + // but we still hold metadata and pipes + // + // this is common during a checkpoint, runc will delete the container state + // after a checkpoint and the container will no longer exist within runc + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + err = nil + } else { + err = p.runtimeError(err, "failed to delete task") + } + } + if p.io != nil { + for _, c := range p.closers { + c.Close() + } + p.io.Close() + } + if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil { + log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount") + if err == nil { + err = fmt.Errorf("failed rootfs umount: %w", err2) + } + } + return err +} + +// Resize resizes the init processes console. +func (p *Init) Resize(ws console.WinSize) error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.console == nil { + return nil + } + return p.console.Resize(ws) +} + +func (p *Init) resize(ws console.WinSize) error { + if p.console == nil { + return nil + } + return p.console.Resize(ws) +} + +// Kill kills the init process. +func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Kill(ctx, signal, all) +} + +func (p *Init) kill(context context.Context, signal uint32, all bool) error { + var ( + killErr error + backoff = 100 * time.Millisecond + ) + timeout := 1 * time.Second + for start := time.Now(); time.Now().Sub(start) < timeout; { + c, err := p.runtime.State(context, p.id) + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) + } + return p.runtimeError(err, "OCI runtime state failed") + } + // For runsc, signal only works when container is running state. + // If the container is not in running state, directly return + // "no such process" + if p.convertStatus(c.Status) == "stopped" { + return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) + } + killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{ + All: all, + }) + if killErr == nil { + return nil + } + time.Sleep(backoff) + backoff *= 2 + } + return p.runtimeError(killErr, "kill timeout") +} + +// KillAll kills all processes belonging to the init process. +func (p *Init) KillAll(context context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + return p.killAll(context) +} + +func (p *Init) killAll(context context.Context) error { + p.runtime.Kill(context, p.id, int(syscall.SIGKILL), &runsc.KillOpts{ + All: true, + }) + // Ignore error handling for `runsc kill --all` for now. + // * If it doesn't return error, it is good; + // * If it returns error, consider the container has already stopped. + // TODO: Fix `runsc kill --all` error handling. + return nil +} + +// Stdin returns the stdin of the process. +func (p *Init) Stdin() io.Closer { + return p.stdin +} + +// Runtime returns the OCI runtime configured for the init process. +func (p *Init) Runtime() *runsc.Runsc { + return p.runtime +} + +// Exec returns a new child process. +func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { + p.mu.Lock() + defer p.mu.Unlock() + + return p.initState.Exec(ctx, path, r) +} + +// exec returns a new exec'd process. +func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) { + // process exec request + var spec specs.Process + if err := json.Unmarshal(r.Spec.Value, &spec); err != nil { + return nil, err + } + spec.Terminal = r.Terminal + + e := &execProcess{ + id: r.ID, + path: path, + parent: p, + spec: spec, + stdio: stdio.Stdio{ + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Terminal: r.Terminal, + }, + waitBlock: make(chan struct{}), + } + e.execState = &execCreatedState{p: e} + return e, nil +} + +// Stdio returns the stdio of the process. +func (p *Init) Stdio() stdio.Stdio { + return p.stdio +} + +func (p *Init) runtimeError(rErr error, msg string) error { + if rErr == nil { + return nil + } + + rMsg, err := getLastRuntimeError(p.runtime) + switch { + case err != nil: + return fmt.Errorf("%s: %w (unable to retrieve OCI runtime error: %v)", msg, rErr, err) + case rMsg == "": + return fmt.Errorf("%s: %w", msg, rErr) + default: + return fmt.Errorf("%s: %s", msg, rMsg) + } +} + +func (p *Init) convertStatus(status string) string { + if status == "created" && !p.Sandbox && p.status == internalErrorCode { + // Treat start failure state for non-root container as stopped. + return "stopped" + } + return status +} + +func withConditionalIO(c stdio.Stdio) runc.IOOpt { + return func(o *runc.IOOption) { + o.OpenStdin = c.Stdin != "" + o.OpenStdout = c.Stdout != "" + o.OpenStderr = c.Stderr != "" + } +} diff --git a/pkg/shim/proc/init_state.go b/pkg/shim/proc/init_state.go new file mode 100644 index 000000000..0065fc385 --- /dev/null +++ b/pkg/shim/proc/init_state.go @@ -0,0 +1,182 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "fmt" + + "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/pkg/process" +) + +type initState interface { + Resize(console.WinSize) error + Start(context.Context) error + Delete(context.Context) error + Exec(context.Context, string, *ExecConfig) (process.Process, error) + Kill(context.Context, uint32, bool) error + SetExited(int) +} + +type createdState struct { + p *Init +} + +func (s *createdState) transition(name string) error { + switch name { + case "running": + s.p.initState = &runningState{p: s.p} + case "stopped": + s.p.initState = &stoppedState{p: s.p} + case "deleted": + s.p.initState = &deletedState{} + default: + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *createdState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *createdState) Start(ctx context.Context) error { + if err := s.p.start(ctx); err != nil { + // Containerd doesn't allow deleting container in created state. + // However, for gvisor, a non-root container in created state can + // only go to running state. If the container can't be started, + // it can only stay in created state, and never be deleted. + // To work around that, we treat non-root container in start failure + // state as stopped. + if !s.p.Sandbox { + s.p.io.Close() + s.p.setExited(internalErrorCode) + if err := s.transition("stopped"); err != nil { + panic(err) + } + } + return err + } + return s.transition("running") +} + +func (s *createdState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *createdState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { + return s.p.exec(path, r) +} + +type runningState struct { + p *Init +} + +func (s *runningState) transition(name string) error { + switch name { + case "stopped": + s.p.initState = &stoppedState{p: s.p} + default: + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *runningState) Resize(ws console.WinSize) error { + return s.p.resize(ws) +} + +func (s *runningState) Start(ctx context.Context) error { + return fmt.Errorf("cannot start a running process.ss") +} + +func (s *runningState) Delete(ctx context.Context) error { + return fmt.Errorf("cannot delete a running process.ss") +} + +func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error { + return s.p.kill(ctx, sig, all) +} + +func (s *runningState) SetExited(status int) { + s.p.setExited(status) + + if err := s.transition("stopped"); err != nil { + panic(err) + } +} + +func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { + return s.p.exec(path, r) +} + +type stoppedState struct { + p *Init +} + +func (s *stoppedState) transition(name string) error { + switch name { + case "deleted": + s.p.initState = &deletedState{} + default: + return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) + } + return nil +} + +func (s *stoppedState) Resize(ws console.WinSize) error { + return fmt.Errorf("cannot resize a stopped container") +} + +func (s *stoppedState) Start(ctx context.Context) error { + return fmt.Errorf("cannot start a stopped process.ss") +} + +func (s *stoppedState) Delete(ctx context.Context) error { + if err := s.p.delete(ctx); err != nil { + return err + } + return s.transition("deleted") +} + +func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error { + return errdefs.ToGRPCf(errdefs.ErrNotFound, "process.ss %s not found", s.p.id) +} + +func (s *stoppedState) SetExited(status int) { + // no op +} + +func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { + return nil, fmt.Errorf("cannot exec in a stopped state") +} diff --git a/pkg/shim/proc/io.go b/pkg/shim/proc/io.go new file mode 100644 index 000000000..34d825fb7 --- /dev/null +++ b/pkg/shim/proc/io.go @@ -0,0 +1,162 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "context" + "fmt" + "io" + "os" + "sync" + "sync/atomic" + "syscall" + + "github.com/containerd/containerd/log" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" +) + +// TODO(random-liu): This file can be a util. + +var bufPool = sync.Pool{ + New: func() interface{} { + buffer := make([]byte, 32<<10) + return &buffer + }, +} + +func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg *sync.WaitGroup) error { + var sameFile *countingWriteCloser + for _, i := range []struct { + name string + dest func(wc io.WriteCloser, rc io.Closer) + }{ + { + name: stdout, + dest: func(wc io.WriteCloser, rc io.Closer) { + wg.Add(1) + go func() { + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil { + log.G(ctx).Warn("error copying stdout") + } + wg.Done() + wc.Close() + if rc != nil { + rc.Close() + } + }() + }, + }, { + name: stderr, + dest: func(wc io.WriteCloser, rc io.Closer) { + wg.Add(1) + go func() { + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil { + log.G(ctx).Warn("error copying stderr") + } + wg.Done() + wc.Close() + if rc != nil { + rc.Close() + } + }() + }, + }, + } { + ok, err := isFifo(i.name) + if err != nil { + return err + } + var ( + fw io.WriteCloser + fr io.Closer + ) + if ok { + if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) + } + if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) + } + } else { + if sameFile != nil { + sameFile.count++ + i.dest(sameFile, nil) + continue + } + if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) + } + if stdout == stderr { + sameFile = &countingWriteCloser{ + WriteCloser: fw, + count: 1, + } + } + } + i.dest(fw, fr) + } + if stdin == "" { + return nil + } + f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", stdin, err) + } + go func() { + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + + io.CopyBuffer(rio.Stdin(), f, *p) + rio.Stdin().Close() + f.Close() + }() + return nil +} + +// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times. +type countingWriteCloser struct { + io.WriteCloser + count int64 +} + +func (c *countingWriteCloser) Close() error { + if atomic.AddInt64(&c.count, -1) > 0 { + return nil + } + return c.WriteCloser.Close() +} + +// isFifo checks if a file is a fifo. +// +// If the file does not exist then it returns false. +func isFifo(path string) (bool, error) { + stat, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + if stat.Mode()&os.ModeNamedPipe == os.ModeNamedPipe { + return true, nil + } + return false, nil +} diff --git a/pkg/shim/proc/proc.go b/pkg/shim/proc/proc.go new file mode 100644 index 000000000..edba3fca5 --- /dev/null +++ b/pkg/shim/proc/proc.go @@ -0,0 +1,39 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package proc is responsible to manage the communication between the shim and +// the sandbox process running the container. +package proc + +import ( + "fmt" +) + +// RunscRoot is the path to the root runsc state directory. +const RunscRoot = "/run/containerd/runsc" + +func stateName(v interface{}) string { + switch v.(type) { + case *runningState, *execRunningState: + return "running" + case *createdState, *execCreatedState: + return "created" + case *deletedState: + return "deleted" + case *stoppedState: + return "stopped" + } + panic(fmt.Errorf("invalid state %v", v)) +} diff --git a/pkg/shim/proc/types.go b/pkg/shim/proc/types.go new file mode 100644 index 000000000..fc182cf5e --- /dev/null +++ b/pkg/shim/proc/types.go @@ -0,0 +1,68 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "time" + + runc "github.com/containerd/go-runc" + "github.com/gogo/protobuf/types" +) + +// Mount holds filesystem mount configuration. +type Mount struct { + Type string + Source string + Target string + Options []string +} + +// CreateConfig hold task creation configuration. +type CreateConfig struct { + ID string + Bundle string + Runtime string + Rootfs []Mount + Terminal bool + Stdin string + Stdout string + Stderr string +} + +// ExecConfig holds exec creation configuration. +type ExecConfig struct { + ID string + Terminal bool + Stdin string + Stdout string + Stderr string + Spec *types.Any +} + +// Exit is the type of exit events. +type Exit struct { + Timestamp time.Time + ID string + Status int +} + +// ProcessMonitor monitors process exit changes. +type ProcessMonitor interface { + // Subscribe to process exit changes + Subscribe() chan runc.Exit + // Unsubscribe to process exit changes + Unsubscribe(c chan runc.Exit) +} diff --git a/pkg/shim/proc/utils.go b/pkg/shim/proc/utils.go new file mode 100644 index 000000000..7c2c409af --- /dev/null +++ b/pkg/shim/proc/utils.go @@ -0,0 +1,72 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "encoding/json" + "io" + "os" + "strings" + "time" + + "gvisor.dev/gvisor/pkg/shim/runsc" +) + +const ( + internalErrorCode = 128 + bufferSize = 32 +) + +// ExitCh is the exit events channel for containers and exec processes +// inside the sandbox. +var ExitCh = make(chan Exit, bufferSize) + +// TODO(mlaventure): move to runc package? +func getLastRuntimeError(r *runsc.Runsc) (string, error) { + if r.Log == "" { + return "", nil + } + + f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400) + if err != nil { + return "", err + } + + var ( + errMsg string + log struct { + Level string + Msg string + Time time.Time + } + ) + + dec := json.NewDecoder(f) + for err = nil; err == nil; { + if err = dec.Decode(&log); err != nil && err != io.EOF { + return "", err + } + if log.Level == "error" { + errMsg = strings.TrimSpace(log.Msg) + } + } + + return errMsg, nil +} + +func hasNoIO(r *CreateConfig) bool { + return r.Stdin == "" && r.Stdout == "" && r.Stderr == "" +} diff --git a/pkg/shim/runtimeoptions/BUILD b/pkg/shim/runtimeoptions/BUILD new file mode 100644 index 000000000..029be7c09 --- /dev/null +++ b/pkg/shim/runtimeoptions/BUILD @@ -0,0 +1,32 @@ +load("//tools:defs.bzl", "go_library", "go_test", "proto_library") + +package(licenses = ["notice"]) + +proto_library( + name = "api", + srcs = [ + "runtimeoptions.proto", + ], +) + +go_library( + name = "runtimeoptions", + srcs = [ + "runtimeoptions.go", + "runtimeoptions_cri.go", + ], + visibility = ["//pkg/shim:__pkg__"], + deps = ["@com_github_gogo_protobuf//proto:go_default_library"], +) + +go_test( + name = "runtimeoptions_test", + size = "small", + srcs = ["runtimeoptions_test.go"], + library = ":runtimeoptions", + deps = [ + "@com_github_containerd_containerd//runtime/v1/shim/v1:go_default_library", + "@com_github_containerd_typeurl//:go_default_library", + "@com_github_gogo_protobuf//proto:go_default_library", + ], +) diff --git a/pkg/shim/runtimeoptions/runtimeoptions.go b/pkg/shim/runtimeoptions/runtimeoptions.go new file mode 100644 index 000000000..072dd87f0 --- /dev/null +++ b/pkg/shim/runtimeoptions/runtimeoptions.go @@ -0,0 +1,17 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package runtimeoptions contains the runtimeoptions proto. +package runtimeoptions diff --git a/pkg/shim/runtimeoptions/runtimeoptions.proto b/pkg/shim/runtimeoptions/runtimeoptions.proto new file mode 100644 index 000000000..057032e34 --- /dev/null +++ b/pkg/shim/runtimeoptions/runtimeoptions.proto @@ -0,0 +1,25 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package cri.runtimeoptions.v1; + +// This is a version of the runtimeoptions CRI API that is vendored. +// +// Importing the full CRI package is a nightmare. +message Options { + string type_url = 1; + string config_path = 2; +} diff --git a/pkg/shim/runtimeoptions/runtimeoptions_cri.go b/pkg/shim/runtimeoptions/runtimeoptions_cri.go new file mode 100644 index 000000000..e6102b4cf --- /dev/null +++ b/pkg/shim/runtimeoptions/runtimeoptions_cri.go @@ -0,0 +1,383 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runtimeoptions + +import ( + "fmt" + "io" + "reflect" + "strings" + + proto "github.com/gogo/protobuf/proto" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package + +type Options struct { + // TypeUrl specifies the type of the content inside the config file. + TypeUrl string `protobuf:"bytes,1,opt,name=type_url,json=typeUrl,proto3" json:"type_url,omitempty"` + // ConfigPath specifies the filesystem location of the config file + // used by the runtime. + ConfigPath string `protobuf:"bytes,2,opt,name=config_path,json=configPath,proto3" json:"config_path,omitempty"` +} + +func (m *Options) Reset() { *m = Options{} } +func (*Options) ProtoMessage() {} +func (*Options) Descriptor() ([]byte, []int) { return fileDescriptorApi, []int{0} } + +func (m *Options) GetTypeUrl() string { + if m != nil { + return m.TypeUrl + } + return "" +} + +func (m *Options) GetConfigPath() string { + if m != nil { + return m.ConfigPath + } + return "" +} + +func init() { + proto.RegisterType((*Options)(nil), "cri.runtimeoptions.v1.Options") +} + +func (m *Options) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Options) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.TypeUrl) > 0 { + dAtA[i] = 0xa + i++ + i = encodeVarintApi(dAtA, i, uint64(len(m.TypeUrl))) + i += copy(dAtA[i:], m.TypeUrl) + } + if len(m.ConfigPath) > 0 { + dAtA[i] = 0x12 + i++ + i = encodeVarintApi(dAtA, i, uint64(len(m.ConfigPath))) + i += copy(dAtA[i:], m.ConfigPath) + } + return i, nil +} + +func encodeVarintApi(dAtA []byte, offset int, v uint64) int { + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return offset + 1 +} + +func (m *Options) Size() (n int) { + var l int + _ = l + l = len(m.TypeUrl) + if l > 0 { + n += 1 + l + sovApi(uint64(l)) + } + l = len(m.ConfigPath) + if l > 0 { + n += 1 + l + sovApi(uint64(l)) + } + return n +} + +func sovApi(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} + +func sozApi(x uint64) (n int) { + return sovApi(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} + +func (this *Options) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&Options{`, + `TypeUrl:` + fmt.Sprintf("%v", this.TypeUrl) + `,`, + `ConfigPath:` + fmt.Sprintf("%v", this.ConfigPath) + `,`, + `}`, + }, "") + return s +} + +func valueToStringApi(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("*%v", pv) +} + +func (m *Options) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowApi + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Options: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Options: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field TypeUrl", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowApi + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthApi + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.TypeUrl = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ConfigPath", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowApi + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthApi + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.ConfigPath = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipApi(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthApi + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} + +func skipApi(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowApi + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowApi + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowApi + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthApi + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowApi + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipApi(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthApi = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowApi = fmt.Errorf("proto: integer overflow") +) + +func init() { proto.RegisterFile("api.proto", fileDescriptorApi) } + +var fileDescriptorApi = []byte{ + // 183 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x4c, 0x2c, 0xc8, 0xd4, + 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x12, 0x4d, 0x2e, 0xca, 0xd4, 0x2b, 0x2a, 0xcd, 0x2b, 0xc9, + 0xcc, 0x4d, 0xcd, 0x2f, 0x28, 0xc9, 0xcc, 0xcf, 0x2b, 0xd6, 0x2b, 0x33, 0x94, 0xd2, 0x4d, 0xcf, + 0x2c, 0xc9, 0x28, 0x4d, 0xd2, 0x4b, 0xce, 0xcf, 0xd5, 0x4f, 0xcf, 0x4f, 0xcf, 0xd7, 0x07, 0xab, + 0x4e, 0x2a, 0x4d, 0x03, 0xf3, 0xc0, 0x1c, 0x30, 0x0b, 0x62, 0x8a, 0x92, 0x2b, 0x17, 0xbb, 0x3f, + 0x44, 0xb3, 0x90, 0x24, 0x17, 0x47, 0x49, 0x65, 0x41, 0x6a, 0x7c, 0x69, 0x51, 0x8e, 0x04, 0xa3, + 0x02, 0xa3, 0x06, 0x67, 0x10, 0x3b, 0x88, 0x1f, 0x5a, 0x94, 0x23, 0x24, 0xcf, 0xc5, 0x9d, 0x9c, + 0x9f, 0x97, 0x96, 0x99, 0x1e, 0x5f, 0x90, 0x58, 0x92, 0x21, 0xc1, 0x04, 0x96, 0xe5, 0x82, 0x08, + 0x05, 0x24, 0x96, 0x64, 0x38, 0xc9, 0x9c, 0x78, 0x28, 0xc7, 0x78, 0xe3, 0xa1, 0x1c, 0x43, 0xc3, + 0x23, 0x39, 0xc6, 0x13, 0x8f, 0xe4, 0x18, 0x2f, 0x3c, 0x92, 0x63, 0x7c, 0xf0, 0x48, 0x8e, 0x71, + 0xc2, 0x63, 0x39, 0x86, 0x24, 0x36, 0xb0, 0x5d, 0xc6, 0x80, 0x00, 0x00, 0x00, 0xff, 0xff, 0x07, + 0x00, 0xf2, 0x18, 0xbe, 0x00, 0x00, 0x00, +} diff --git a/pkg/shim/runtimeoptions/runtimeoptions_test.go b/pkg/shim/runtimeoptions/runtimeoptions_test.go new file mode 100644 index 000000000..c59a2400e --- /dev/null +++ b/pkg/shim/runtimeoptions/runtimeoptions_test.go @@ -0,0 +1,57 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package runtimeoptions + +import ( + "bytes" + "testing" + + shim "github.com/containerd/containerd/runtime/v1/shim/v1" + "github.com/containerd/typeurl" + "github.com/gogo/protobuf/proto" +) + +func TestCreateTaskRequest(t *testing.T) { + // Serialize the top-level message. + const encodedText = `options: < + type_url: "cri.runtimeoptions.v1.Options" + value: "\n\010type_url\022\013config_path" +>` + got := &shim.CreateTaskRequest{} // Should have raw options. + if err := proto.UnmarshalText(encodedText, got); err != nil { + t.Fatalf("unable to unmarshal text: %v", err) + } + var textBuffer bytes.Buffer + if err := proto.MarshalText(&textBuffer, got); err != nil { + t.Errorf("unable to marshal text: %v", err) + } + t.Logf("got: %s", string(textBuffer.Bytes())) + + // Check the options. + wantOptions := &Options{} + wantOptions.TypeUrl = "type_url" + wantOptions.ConfigPath = "config_path" + gotMessage, err := typeurl.UnmarshalAny(got.Options) + if err != nil { + t.Fatalf("unable to unmarshal any: %v", err) + } + gotOptions, ok := gotMessage.(*Options) + if !ok { + t.Fatalf("got %v, want %v", gotMessage, wantOptions) + } + if !proto.Equal(gotOptions, wantOptions) { + t.Fatalf("got %v, want %v", gotOptions, wantOptions) + } +} diff --git a/pkg/shim/service.go b/pkg/shim/service.go new file mode 100644 index 000000000..9aba26ac7 --- /dev/null +++ b/pkg/shim/service.go @@ -0,0 +1,953 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package shim implements Containerd Shim v2 interface. +package shim + +import ( + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "sync" + "syscall" + "time" + + "github.com/BurntSushi/toml" + "github.com/containerd/cgroups" + cgroupsstats "github.com/containerd/cgroups/stats/v1" + "github.com/containerd/console" + "github.com/containerd/containerd/api/events" + "github.com/containerd/containerd/api/types/task" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/pkg/process" + "github.com/containerd/containerd/pkg/stdio" + "github.com/containerd/containerd/runtime" + "github.com/containerd/containerd/runtime/linux/runctypes" + "github.com/containerd/containerd/runtime/v2/shim" + taskAPI "github.com/containerd/containerd/runtime/v2/task" + "github.com/containerd/containerd/sys/reaper" + "github.com/containerd/typeurl" + "github.com/gogo/protobuf/types" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/cleanup" + + "gvisor.dev/gvisor/pkg/shim/proc" + "gvisor.dev/gvisor/pkg/shim/runsc" + "gvisor.dev/gvisor/pkg/shim/runtimeoptions" + "gvisor.dev/gvisor/pkg/shim/utils" + "gvisor.dev/gvisor/runsc/specutils" +) + +var ( + empty = &types.Empty{} + bufPool = sync.Pool{ + New: func() interface{} { + buffer := make([]byte, 32<<10) + return &buffer + }, + } +) + +var _ = (taskAPI.TaskService)(&service{}) + +const ( + // configFile is the default config file name. For containerd 1.2, + // we assume that a config.toml should exist in the runtime root. + configFile = "config.toml" + + // shimAddressPath is the relative path to a file that contains the address + // to the shim UDS. See service.shimAddress. + shimAddressPath = "address" +) + +// New returns a new shim service that can be used via GRPC. +func New(ctx context.Context, id string, publisher shim.Publisher, cancel func()) (shim.Shim, error) { + log.L.Debugf("service.New, id: %s", id) + + var opts shim.Opts + if ctxOpts := ctx.Value(shim.OptsKey{}); ctxOpts != nil { + opts = ctxOpts.(shim.Opts) + } + + ep, err := newOOMEpoller(publisher) + if err != nil { + return nil, err + } + go ep.run(ctx) + s := &service{ + id: id, + processes: make(map[string]process.Process), + events: make(chan interface{}, 128), + ec: proc.ExitCh, + oomPoller: ep, + cancel: cancel, + genericOptions: opts, + } + go s.processExits(ctx) + runsc.Monitor = &runsc.LogMonitor{Next: reaper.Default} + if err := s.initPlatform(); err != nil { + cancel() + return nil, fmt.Errorf("failed to initialized platform behavior: %w", err) + } + go s.forward(ctx, publisher) + + if address, err := shim.ReadAddress(shimAddressPath); err == nil { + s.shimAddress = address + } + + return s, nil +} + +// service is the shim implementation of a remote shim over GRPC. It runs in 2 +// different modes: +// 1. Service: process runs for the life time of the container and receives +// calls described in shimapi.TaskService interface. +// 2. Tool: process is short lived and runs only to perform the requested +// operations and then exits. It implements the direct functions in +// shim.Shim interface. +// +// When the service is running, it saves a json file with state information so +// that commands sent to the tool can load the state and perform the operation. +type service struct { + mu sync.Mutex + + // id is the container ID. + id string + + // bundle is a path provided by the caller on container creation. Store + // because it's needed in commands that don't receive bundle in the request. + bundle string + + // task is the main process that is running the container. + task *proc.Init + + // processes maps ExecId to processes running through exec. + processes map[string]process.Process + + events chan interface{} + + // platform handles operations related to the console. + platform stdio.Platform + + // genericOptions are options that come from the shim interface and are common + // to all shims. + genericOptions shim.Opts + + // opts are configuration options specific for this shim. + opts options + + // ex gets notified whenever the container init process or an exec'd process + // exits from inside the sandbox. + ec chan proc.Exit + + // oomPoller monitors the sandbox's cgroup for OOM notifications. + oomPoller *epoller + + // cancel is a function that needs to be called before the shim stops. The + // function is provided by the caller to New(). + cancel func() + + // shimAddress is the location of the UDS used to communicate to containerd. + shimAddress string +} + +func (s *service) newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) { + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + self, err := os.Executable() + if err != nil { + return nil, err + } + cwd, err := os.Getwd() + if err != nil { + return nil, err + } + args := []string{ + "-namespace", ns, + "-address", containerdAddress, + "-publish-binary", containerdBinary, + } + if s.genericOptions.Debug { + args = append(args, "-debug") + } + cmd := exec.Command(self, args...) + cmd.Dir = cwd + cmd.Env = append(os.Environ(), "GOMAXPROCS=2") + cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + return cmd, nil +} + +func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress, containerdTTRPCAddress string) (string, error) { + log.L.Debugf("StartShim, id: %s, binary: %q, address: %q", id, containerdBinary, containerdAddress) + + cmd, err := s.newCommand(ctx, containerdBinary, containerdAddress) + if err != nil { + return "", err + } + address, err := shim.SocketAddress(ctx, containerdAddress, id) + if err != nil { + return "", err + } + socket, err := shim.NewSocket(address) + if err != nil { + // The only time where this would happen is if there is a bug and the socket + // was not cleaned up in the cleanup method of the shim or we are using the + // grouping functionality where the new process should be run with the same + // shim as an existing container. + if !shim.SocketEaddrinuse(err) { + return "", fmt.Errorf("create new shim socket: %w", err) + } + if shim.CanConnect(address) { + if err := shim.WriteAddress(shimAddressPath, address); err != nil { + return "", fmt.Errorf("write existing socket for shim: %w", err) + } + return address, nil + } + if err := shim.RemoveSocket(address); err != nil { + return "", fmt.Errorf("remove pre-existing socket: %w", err) + } + if socket, err = shim.NewSocket(address); err != nil { + return "", fmt.Errorf("try create new shim socket 2x: %w", err) + } + } + cu := cleanup.Make(func() { + socket.Close() + _ = shim.RemoveSocket(address) + }) + defer cu.Clean() + + f, err := socket.File() + if err != nil { + return "", err + } + + cmd.ExtraFiles = append(cmd.ExtraFiles, f) + + log.L.Debugf("Executing: %q %s", cmd.Path, cmd.Args) + if err := cmd.Start(); err != nil { + f.Close() + return "", err + } + cu.Add(func() { cmd.Process.Kill() }) + + // make sure to wait after start + go cmd.Wait() + if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { + return "", err + } + if err := shim.WriteAddress(shimAddressPath, address); err != nil { + return "", err + } + if err := shim.SetScore(cmd.Process.Pid); err != nil { + return "", fmt.Errorf("failed to set OOM Score on shim: %w", err) + } + cu.Release() + return address, nil +} + +// Cleanup is called from another process (need to reload state) to stop the +// container and undo all operations done in Create(). +func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) { + log.L.Debugf("Cleanup") + + path, err := os.Getwd() + if err != nil { + return nil, err + } + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, err + } + var st state + if err := st.load(path); err != nil { + return nil, err + } + r := proc.NewRunsc(s.opts.Root, path, ns, st.Options.BinaryName, nil) + + if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{ + Force: true, + }); err != nil { + log.L.Infof("failed to remove runc container: %v", err) + } + if err := mount.UnmountAll(st.Rootfs, 0); err != nil { + log.L.Infof("failed to cleanup rootfs mount: %v", err) + } + return &taskAPI.DeleteResponse{ + ExitedAt: time.Now(), + ExitStatus: 128 + uint32(unix.SIGKILL), + }, nil +} + +// Create creates a new initial process and container with the underlying OCI +// runtime. +func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { + log.L.Debugf("Create, id: %s, bundle: %q", r.ID, r.Bundle) + + s.mu.Lock() + defer s.mu.Unlock() + + // Save the main task id and bundle to the shim for additional requests. + s.id = r.ID + s.bundle = r.Bundle + + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return nil, fmt.Errorf("create namespace: %w", err) + } + + // Read from root for now. + if r.Options != nil { + v, err := typeurl.UnmarshalAny(r.Options) + if err != nil { + return nil, err + } + var path string + switch o := v.(type) { + case *runctypes.CreateOptions: // containerd 1.2.x + s.opts.IoUID = o.IoUid + s.opts.IoGID = o.IoGid + s.opts.ShimCgroup = o.ShimCgroup + case *runctypes.RuncOptions: // containerd 1.2.x + root := proc.RunscRoot + if o.RuntimeRoot != "" { + root = o.RuntimeRoot + } + + s.opts.BinaryName = o.Runtime + + path = filepath.Join(root, configFile) + if _, err := os.Stat(path); err != nil { + if !os.IsNotExist(err) { + return nil, fmt.Errorf("stat config file %q: %w", path, err) + } + // A config file in runtime root is not required. + path = "" + } + case *runtimeoptions.Options: // containerd 1.3.x+ + if o.ConfigPath == "" { + break + } + if o.TypeUrl != optionsType { + return nil, fmt.Errorf("unsupported option type %q", o.TypeUrl) + } + path = o.ConfigPath + default: + return nil, fmt.Errorf("unsupported option type %q", r.Options.TypeUrl) + } + if path != "" { + if _, err = toml.DecodeFile(path, &s.opts); err != nil { + return nil, fmt.Errorf("decode config file %q: %w", path, err) + } + } + } + + if len(s.opts.LogLevel) != 0 { + lvl, err := logrus.ParseLevel(s.opts.LogLevel) + if err != nil { + return nil, err + } + logrus.SetLevel(lvl) + } + if len(s.opts.LogPath) != 0 { + logPath := runsc.FormatShimLogPath(s.opts.LogPath, s.id) + if err := os.MkdirAll(filepath.Dir(logPath), 0777); err != nil { + return nil, fmt.Errorf("failed to create log dir: %w", err) + } + logFile, err := os.Create(logPath) + if err != nil { + return nil, fmt.Errorf("failed to create log file: %w", err) + } + log.L.Debugf("Starting mirror log at %q", logPath) + std := logrus.StandardLogger() + std.SetOutput(io.MultiWriter(std.Out, logFile)) + + log.L.Debugf("Create shim") + log.L.Debugf("***************************") + log.L.Debugf("Args: %s", os.Args) + log.L.Debugf("PID: %d", os.Getpid()) + log.L.Debugf("ID: %s", s.id) + log.L.Debugf("Options: %+v", s.opts) + log.L.Debugf("Bundle: %s", r.Bundle) + log.L.Debugf("Terminal: %t", r.Terminal) + log.L.Debugf("stdin: %s", r.Stdin) + log.L.Debugf("stdout: %s", r.Stdout) + log.L.Debugf("stderr: %s", r.Stderr) + log.L.Debugf("***************************") + } + + // Save state before any action is taken to ensure Cleanup() will have all + // the information it needs to undo the operations. + st := state{ + Rootfs: filepath.Join(r.Bundle, "rootfs"), + Options: s.opts, + } + if err := st.save(r.Bundle); err != nil { + return nil, err + } + + if err := os.Mkdir(st.Rootfs, 0711); err != nil && !os.IsExist(err) { + return nil, err + } + + // Convert from types.Mount to proc.Mount. + var mounts []proc.Mount + for _, m := range r.Rootfs { + mounts = append(mounts, proc.Mount{ + Type: m.Type, + Source: m.Source, + Target: m.Target, + Options: m.Options, + }) + } + + // Cleans up all mounts in case of failure. + cu := cleanup.Make(func() { + if err := mount.UnmountAll(st.Rootfs, 0); err != nil { + log.L.Infof("failed to cleanup rootfs mount: %v", err) + } + }) + defer cu.Clean() + for _, rm := range mounts { + m := &mount.Mount{ + Type: rm.Type, + Source: rm.Source, + Options: rm.Options, + } + if err := m.Mount(st.Rootfs); err != nil { + return nil, fmt.Errorf("failed to mount rootfs component %v: %w", m, err) + } + } + + config := &proc.CreateConfig{ + ID: r.ID, + Bundle: r.Bundle, + Runtime: s.opts.BinaryName, + Rootfs: mounts, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + } + process, err := newInit(r.Bundle, filepath.Join(r.Bundle, "work"), ns, s.platform, config, &s.opts, st.Rootfs) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + if err := process.Create(ctx, config); err != nil { + return nil, errdefs.ToGRPC(err) + } + + // Set up OOM notification on the sandbox's cgroup. This is done on + // sandbox create since the sandbox process will be created here. + pid := process.Pid() + if pid > 0 { + cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid)) + if err != nil { + return nil, fmt.Errorf("loading cgroup for %d: %w", pid, err) + } + if err := s.oomPoller.add(s.id, cg); err != nil { + return nil, fmt.Errorf("add cg to OOM monitor: %w", err) + } + } + + // Success + cu.Release() + s.task = process + return &taskAPI.CreateTaskResponse{ + Pid: uint32(process.Pid()), + }, nil +} + +// Start starts a process. +func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { + log.L.Debugf("Start, id: %s, execID: %s", r.ID, r.ExecID) + + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if err := p.Start(ctx); err != nil { + return nil, err + } + // TODO: Set the cgroup and oom notifications on restore. + // https://github.com/google/gvisor-containerd-shim/issues/58 + return &taskAPI.StartResponse{ + Pid: uint32(p.Pid()), + }, nil +} + +// Delete deletes the initial process and container. +func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { + log.L.Debugf("Delete, id: %s, execID: %s", r.ID, r.ExecID) + + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + if err := p.Delete(ctx); err != nil { + return nil, err + } + if len(r.ExecID) != 0 { + s.mu.Lock() + delete(s.processes, r.ExecID) + s.mu.Unlock() + } else if s.platform != nil { + s.platform.Close() + } + return &taskAPI.DeleteResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + Pid: uint32(p.Pid()), + }, nil +} + +// Exec spawns an additional process inside the container. +func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*types.Empty, error) { + log.L.Debugf("Exec, id: %s, execID: %s", r.ID, r.ExecID) + + s.mu.Lock() + p := s.processes[r.ExecID] + s.mu.Unlock() + if p != nil { + return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) + } + if s.task == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + process, err := s.task.Exec(ctx, s.bundle, &proc.ExecConfig{ + ID: r.ExecID, + Terminal: r.Terminal, + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Spec: r.Spec, + }) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + s.mu.Lock() + s.processes[r.ExecID] = process + s.mu.Unlock() + return empty, nil +} + +// ResizePty resizes the terminal of a process. +func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*types.Empty, error) { + log.L.Debugf("ResizePty, id: %s, execID: %s, dimension: %dx%d", r.ID, r.ExecID, r.Height, r.Width) + + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + ws := console.WinSize{ + Width: uint16(r.Width), + Height: uint16(r.Height), + } + if err := p.Resize(ws); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// State returns runtime state information for a process. +func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { + log.L.Debugf("State, id: %s, execID: %s", r.ID, r.ExecID) + + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + st, err := p.Status(ctx) + if err != nil { + return nil, err + } + status := task.StatusUnknown + switch st { + case "created": + status = task.StatusCreated + case "running": + status = task.StatusRunning + case "stopped": + status = task.StatusStopped + } + sio := p.Stdio() + return &taskAPI.StateResponse{ + ID: p.ID(), + Bundle: s.bundle, + Pid: uint32(p.Pid()), + Status: status, + Stdin: sio.Stdin, + Stdout: sio.Stdout, + Stderr: sio.Stderr, + Terminal: sio.Terminal, + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +// Pause the container. +func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*types.Empty, error) { + log.L.Debugf("Pause, id: %s", r.ID) + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Resume the container. +func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*types.Empty, error) { + log.L.Debugf("Resume, id: %s", r.ID) + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Kill a process with the provided signal. +func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*types.Empty, error) { + log.L.Debugf("Kill, id: %s, execID: %s, signal: %d, all: %t", r.ID, r.ExecID, r.Signal, r.All) + + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + if err := p.Kill(ctx, r.Signal, r.All); err != nil { + return nil, errdefs.ToGRPC(err) + } + return empty, nil +} + +// Pids returns all pids inside the container. +func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { + log.L.Debugf("Pids, id: %s", r.ID) + + pids, err := s.getContainerPids(ctx, r.ID) + if err != nil { + return nil, errdefs.ToGRPC(err) + } + var processes []*task.ProcessInfo + for _, pid := range pids { + pInfo := task.ProcessInfo{ + Pid: pid, + } + for _, p := range s.processes { + if p.Pid() == int(pid) { + d := &runctypes.ProcessDetails{ + ExecID: p.ID(), + } + a, err := typeurl.MarshalAny(d) + if err != nil { + return nil, fmt.Errorf("failed to marshal process %d info: %w", pid, err) + } + pInfo.Info = a + break + } + } + processes = append(processes, &pInfo) + } + return &taskAPI.PidsResponse{ + Processes: processes, + }, nil +} + +// CloseIO closes the I/O context of a process. +func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*types.Empty, error) { + log.L.Debugf("CloseIO, id: %s, execID: %s, stdin: %t", r.ID, r.ExecID, r.Stdin) + + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if stdin := p.Stdin(); stdin != nil { + if err := stdin.Close(); err != nil { + return nil, fmt.Errorf("close stdin: %w", err) + } + } + return empty, nil +} + +// Checkpoint checkpoints the container. +func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*types.Empty, error) { + log.L.Debugf("Checkpoint, id: %s", r.ID) + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Connect returns shim information such as the shim's pid. +func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { + log.L.Debugf("Connect, id: %s", r.ID) + + var pid int + if s.task != nil { + pid = s.task.Pid() + } + return &taskAPI.ConnectResponse{ + ShimPid: uint32(os.Getpid()), + TaskPid: uint32(pid), + }, nil +} + +func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*types.Empty, error) { + log.L.Debugf("Shutdown, id: %s", r.ID) + s.cancel() + if s.shimAddress != "" { + _ = shim.RemoveSocket(s.shimAddress) + } + os.Exit(0) + panic("Should not get here") +} + +func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { + log.L.Debugf("Stats, id: %s", r.ID) + if s.task == nil { + log.L.Debugf("Stats error, id: %s: container not created", r.ID) + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + stats, err := s.task.Runtime().Stats(ctx, s.id) + if err != nil { + log.L.Debugf("Stats error, id: %s: %v", r.ID, err) + return nil, err + } + + // gvisor currently (as of 2020-03-03) only returns the total memory + // usage and current PID value[0]. However, we copy the common fields here + // so that future updates will propagate correct information. We're + // using the cgroups.Metrics structure so we're returning the same type + // as runc. + // + // [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81 + metrics := &cgroupsstats.Metrics{ + CPU: &cgroupsstats.CPUStat{ + Usage: &cgroupsstats.CPUUsage{ + Total: stats.Cpu.Usage.Total, + Kernel: stats.Cpu.Usage.Kernel, + User: stats.Cpu.Usage.User, + PerCPU: stats.Cpu.Usage.Percpu, + }, + Throttling: &cgroupsstats.Throttle{ + Periods: stats.Cpu.Throttling.Periods, + ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods, + ThrottledTime: stats.Cpu.Throttling.ThrottledTime, + }, + }, + Memory: &cgroupsstats.MemoryStat{ + Cache: stats.Memory.Cache, + Usage: &cgroupsstats.MemoryEntry{ + Limit: stats.Memory.Usage.Limit, + Usage: stats.Memory.Usage.Usage, + Max: stats.Memory.Usage.Max, + Failcnt: stats.Memory.Usage.Failcnt, + }, + Swap: &cgroupsstats.MemoryEntry{ + Limit: stats.Memory.Swap.Limit, + Usage: stats.Memory.Swap.Usage, + Max: stats.Memory.Swap.Max, + Failcnt: stats.Memory.Swap.Failcnt, + }, + Kernel: &cgroupsstats.MemoryEntry{ + Limit: stats.Memory.Kernel.Limit, + Usage: stats.Memory.Kernel.Usage, + Max: stats.Memory.Kernel.Max, + Failcnt: stats.Memory.Kernel.Failcnt, + }, + KernelTCP: &cgroupsstats.MemoryEntry{ + Limit: stats.Memory.KernelTCP.Limit, + Usage: stats.Memory.KernelTCP.Usage, + Max: stats.Memory.KernelTCP.Max, + Failcnt: stats.Memory.KernelTCP.Failcnt, + }, + }, + Pids: &cgroupsstats.PidsStat{ + Current: stats.Pids.Current, + Limit: stats.Pids.Limit, + }, + } + data, err := typeurl.MarshalAny(metrics) + if err != nil { + log.L.Debugf("Stats error, id: %s: %v", r.ID, err) + return nil, err + } + log.L.Debugf("Stats success, id: %s: %+v", r.ID, data) + return &taskAPI.StatsResponse{ + Stats: data, + }, nil +} + +// Update updates a running container. +func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*types.Empty, error) { + return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) +} + +// Wait waits for a process to exit. +func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { + log.L.Debugf("Wait, id: %s, execID: %s", r.ID, r.ExecID) + + p, err := s.getProcess(r.ExecID) + if err != nil { + return nil, err + } + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") + } + p.Wait() + + return &taskAPI.WaitResponse{ + ExitStatus: uint32(p.ExitStatus()), + ExitedAt: p.ExitedAt(), + }, nil +} + +func (s *service) processExits(ctx context.Context) { + for e := range s.ec { + s.checkProcesses(ctx, e) + } +} + +func (s *service) checkProcesses(ctx context.Context, e proc.Exit) { + // TODO(random-liu): Add `shouldKillAll` logic if container pid + // namespace is supported. + for _, p := range s.allProcesses() { + if p.ID() == e.ID { + if ip, ok := p.(*proc.Init); ok { + // Ensure all children are killed. + log.L.Debugf("Container init process exited, killing all container processes") + if err := ip.KillAll(ctx); err != nil { + log.G(ctx).WithError(err).WithField("id", ip.ID()). + Error("failed to kill init's children") + } + } + p.SetExited(e.Status) + s.events <- &events.TaskExit{ + ContainerID: s.id, + ID: p.ID(), + Pid: uint32(p.Pid()), + ExitStatus: uint32(e.Status), + ExitedAt: p.ExitedAt(), + } + return + } + } +} + +func (s *service) allProcesses() (o []process.Process) { + s.mu.Lock() + defer s.mu.Unlock() + for _, p := range s.processes { + o = append(o, p) + } + if s.task != nil { + o = append(o, s.task) + } + return o +} + +func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { + s.mu.Lock() + p := s.task + s.mu.Unlock() + if p == nil { + return nil, fmt.Errorf("container must be created: %w", errdefs.ErrFailedPrecondition) + } + ps, err := p.Runtime().Ps(ctx, id) + if err != nil { + return nil, err + } + pids := make([]uint32, 0, len(ps)) + for _, pid := range ps { + pids = append(pids, uint32(pid)) + } + return pids, nil +} + +func (s *service) forward(ctx context.Context, publisher shim.Publisher) { + for e := range s.events { + err := publisher.Publish(ctx, getTopic(e), e) + if err != nil { + // Should not happen. + panic(fmt.Errorf("post event: %w", err)) + } + } +} + +func (s *service) getProcess(execID string) (process.Process, error) { + s.mu.Lock() + defer s.mu.Unlock() + if execID == "" { + return s.task, nil + } + p := s.processes[execID] + if p == nil { + return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) + } + return p, nil +} + +func getTopic(e interface{}) string { + switch e.(type) { + case *events.TaskCreate: + return runtime.TaskCreateEventTopic + case *events.TaskStart: + return runtime.TaskStartEventTopic + case *events.TaskOOM: + return runtime.TaskOOMEventTopic + case *events.TaskExit: + return runtime.TaskExitEventTopic + case *events.TaskDelete: + return runtime.TaskDeleteEventTopic + case *events.TaskExecAdded: + return runtime.TaskExecAddedEventTopic + case *events.TaskExecStarted: + return runtime.TaskExecStartedEventTopic + default: + log.L.Infof("no topic for type %#v", e) + } + return runtime.TaskUnknownTopic +} + +func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.CreateConfig, options *options, rootfs string) (*proc.Init, error) { + spec, err := utils.ReadSpec(r.Bundle) + if err != nil { + return nil, fmt.Errorf("read oci spec: %w", err) + } + if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { + return nil, fmt.Errorf("update volume annotations: %w", err) + } + runsc.FormatRunscLogPath(r.ID, options.RunscConfig) + runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig) + p := proc.New(r.ID, runtime, stdio.Stdio{ + Stdin: r.Stdin, + Stdout: r.Stdout, + Stderr: r.Stderr, + Terminal: r.Terminal, + }) + p.Bundle = r.Bundle + p.Platform = platform + p.Rootfs = rootfs + p.WorkDir = workDir + p.IoUID = int(options.IoUID) + p.IoGID = int(options.IoGID) + p.Sandbox = specutils.SpecContainerType(spec) == specutils.ContainerTypeSandbox + p.UserLog = utils.UserLogPath(spec) + p.Monitor = reaper.Default + return p, nil +} diff --git a/pkg/shim/service_linux.go b/pkg/shim/service_linux.go new file mode 100644 index 000000000..11622ed60 --- /dev/null +++ b/pkg/shim/service_linux.go @@ -0,0 +1,108 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package shim + +import ( + "context" + "fmt" + "io" + "sync" + "syscall" + + "github.com/containerd/console" + "github.com/containerd/fifo" +) + +type linuxPlatform struct { + epoller *console.Epoller +} + +func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg *sync.WaitGroup) (console.Console, error) { + if p.epoller == nil { + return nil, fmt.Errorf("uninitialized epoller") + } + + epollConsole, err := p.epoller.Add(console) + if err != nil { + return nil, err + } + + if stdin != "" { + in, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) + if err != nil { + return nil, err + } + go func() { + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(epollConsole, in, *p) + }() + } + + outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) + if err != nil { + return nil, err + } + outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) + if err != nil { + return nil, err + } + wg.Add(1) + go func() { + p := bufPool.Get().(*[]byte) + defer bufPool.Put(p) + io.CopyBuffer(outw, epollConsole, *p) + epollConsole.Close() + outr.Close() + outw.Close() + wg.Done() + }() + return epollConsole, nil +} + +func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { + if p.epoller == nil { + return fmt.Errorf("uninitialized epoller") + } + epollConsole, ok := cons.(*console.EpollConsole) + if !ok { + return fmt.Errorf("expected EpollConsole, got %#v", cons) + } + return epollConsole.Shutdown(p.epoller.CloseConsole) +} + +func (p *linuxPlatform) Close() error { + return p.epoller.Close() +} + +// initialize a single epoll fd to manage our consoles. `initPlatform` should +// only be called once. +func (s *service) initPlatform() error { + if s.platform != nil { + return nil + } + epoller, err := console.NewEpoller() + if err != nil { + return fmt.Errorf("failed to initialize epoller: %w", err) + } + s.platform = &linuxPlatform{ + epoller: epoller, + } + go epoller.Wait() + return nil +} diff --git a/pkg/shim/state.go b/pkg/shim/state.go new file mode 100644 index 000000000..5e9e92ec3 --- /dev/null +++ b/pkg/shim/state.go @@ -0,0 +1,48 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package shim + +import ( + "encoding/json" + "io/ioutil" + "path/filepath" +) + +const filename = "state.json" + +// state holds information needed between shim invocations. +type state struct { + // Rootfs is the full path to the location rootfs was mounted. + Rootfs string `json:"rootfs"` + + // Options is the configuration loaded from config.toml. + Options options `json:"options"` +} + +func (s state) load(path string) error { + data, err := ioutil.ReadFile(filepath.Join(path, filename)) + if err != nil { + return err + } + return json.Unmarshal(data, &s) +} + +func (s state) save(path string) error { + data, err := json.Marshal(&s) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(path, filename), data, 0644) +} diff --git a/pkg/shim/utils/BUILD b/pkg/shim/utils/BUILD new file mode 100644 index 000000000..54a0aabb7 --- /dev/null +++ b/pkg/shim/utils/BUILD @@ -0,0 +1,27 @@ +load("//tools:defs.bzl", "go_library", "go_test") + +package(licenses = ["notice"]) + +go_library( + name = "utils", + srcs = [ + "annotations.go", + "utils.go", + "volumes.go", + ], + visibility = [ + "//pkg/shim:__subpackages__", + "//shim:__subpackages__", + ], + deps = [ + "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", + ], +) + +go_test( + name = "utils_test", + size = "small", + srcs = ["volumes_test.go"], + library = ":utils", + deps = ["@com_github_opencontainers_runtime_spec//specs-go:go_default_library"], +) diff --git a/pkg/shim/utils/annotations.go b/pkg/shim/utils/annotations.go new file mode 100644 index 000000000..1e9d3f365 --- /dev/null +++ b/pkg/shim/utils/annotations.go @@ -0,0 +1,25 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +// Annotations from the CRI annotations package. +// +// These are vendor due to import conflicts. +const ( + sandboxLogDirAnnotation = "io.kubernetes.cri.sandbox-log-directory" + containerTypeAnnotation = "io.kubernetes.cri.container-type" + containerTypeSandbox = "sandbox" + containerTypeContainer = "container" +) diff --git a/pkg/shim/utils/utils.go b/pkg/shim/utils/utils.go new file mode 100644 index 000000000..7b1cd983e --- /dev/null +++ b/pkg/shim/utils/utils.go @@ -0,0 +1,57 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package utils container miscellaneous utility function used by the shim. +package utils + +import ( + "encoding/json" + "io/ioutil" + "os" + "path/filepath" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +// ReadSpec reads OCI spec from the bundle directory. +func ReadSpec(bundle string) (*specs.Spec, error) { + f, err := os.Open(filepath.Join(bundle, "config.json")) + if err != nil { + return nil, err + } + b, err := ioutil.ReadAll(f) + if err != nil { + return nil, err + } + var spec specs.Spec + if err := json.Unmarshal(b, &spec); err != nil { + return nil, err + } + return &spec, nil +} + +// IsSandbox checks whether a container is a sandbox container. +func IsSandbox(spec *specs.Spec) bool { + t, ok := spec.Annotations[containerTypeAnnotation] + return !ok || t == containerTypeSandbox +} + +// UserLogPath gets user log path from OCI annotation. +func UserLogPath(spec *specs.Spec) string { + sandboxLogDir := spec.Annotations[sandboxLogDirAnnotation] + if sandboxLogDir == "" { + return "" + } + return filepath.Join(sandboxLogDir, "gvisor.log") +} diff --git a/pkg/shim/utils/volumes.go b/pkg/shim/utils/volumes.go new file mode 100644 index 000000000..52a428179 --- /dev/null +++ b/pkg/shim/utils/volumes.go @@ -0,0 +1,155 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "path/filepath" + "strings" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const volumeKeyPrefix = "dev.gvisor.spec.mount." + +var kubeletPodsDir = "/var/lib/kubelet/pods" + +// volumeName gets volume name from volume annotation key, example: +// dev.gvisor.spec.mount.NAME.share +func volumeName(k string) string { + return strings.SplitN(strings.TrimPrefix(k, volumeKeyPrefix), ".", 2)[0] +} + +// volumeFieldName gets volume field name from volume annotation key, example: +// `type` is the field of dev.gvisor.spec.mount.NAME.type +func volumeFieldName(k string) string { + parts := strings.Split(strings.TrimPrefix(k, volumeKeyPrefix), ".") + return parts[len(parts)-1] +} + +// podUID gets pod UID from the pod log path. +func podUID(s *specs.Spec) (string, error) { + sandboxLogDir := s.Annotations[sandboxLogDirAnnotation] + if sandboxLogDir == "" { + return "", fmt.Errorf("no sandbox log path annotation") + } + fields := strings.Split(filepath.Base(sandboxLogDir), "_") + switch len(fields) { + case 1: // This is the old CRI logging path. + return fields[0], nil + case 3: // This is the new CRI logging path. + return fields[2], nil + } + return "", fmt.Errorf("unexpected sandbox log path %q", sandboxLogDir) +} + +// isVolumeKey checks whether an annotation key is for volume. +func isVolumeKey(k string) bool { + return strings.HasPrefix(k, volumeKeyPrefix) +} + +// volumeSourceKey constructs the annotation key for volume source. +func volumeSourceKey(volume string) string { + return volumeKeyPrefix + volume + ".source" +} + +// volumePath searches the volume path in the kubelet pod directory. +func volumePath(volume, uid string) (string, error) { + // TODO: Support subpath when gvisor supports pod volume bind mount. + volumeSearchPath := fmt.Sprintf("%s/%s/volumes/*/%s", kubeletPodsDir, uid, volume) + dirs, err := filepath.Glob(volumeSearchPath) + if err != nil { + return "", err + } + if len(dirs) != 1 { + return "", fmt.Errorf("unexpected matched volume list %v", dirs) + } + return dirs[0], nil +} + +// isVolumePath checks whether a string is the volume path. +func isVolumePath(volume, path string) (bool, error) { + // TODO: Support subpath when gvisor supports pod volume bind mount. + volumeSearchPath := fmt.Sprintf("%s/*/volumes/*/%s", kubeletPodsDir, volume) + return filepath.Match(volumeSearchPath, path) +} + +// UpdateVolumeAnnotations add necessary OCI annotations for gvisor +// volume optimization. +func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { + var ( + uid string + err error + ) + if IsSandbox(s) { + uid, err = podUID(s) + if err != nil { + // Skip if we can't get pod UID, because this doesn't work + // for containerd 1.1. + return nil + } + } + var updated bool + for k, v := range s.Annotations { + if !isVolumeKey(k) { + continue + } + if volumeFieldName(k) != "type" { + continue + } + volume := volumeName(k) + if uid != "" { + // This is a sandbox. + path, err := volumePath(volume, uid) + if err != nil { + return fmt.Errorf("get volume path for %q: %w", volume, err) + } + s.Annotations[volumeSourceKey(volume)] = path + updated = true + } else { + // This is a container. + for i := range s.Mounts { + // An error is returned for sandbox if source + // annotation is not successfully applied, so + // it is guaranteed that the source annotation + // for sandbox has already been successfully + // applied at this point. + // + // The volume name is unique inside a pod, so + // matching without podUID is fine here. + // + // TODO: Pass podUID down to shim for containers to do + // more accurate matching. + if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes { + // gVisor requires the container mount type to match + // sandbox mount type. + s.Mounts[i].Type = v + updated = true + } + } + } + } + if !updated { + return nil + } + // Update bundle. + b, err := json.Marshal(s) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666) +} diff --git a/pkg/shim/utils/volumes_test.go b/pkg/shim/utils/volumes_test.go new file mode 100644 index 000000000..3e02c6151 --- /dev/null +++ b/pkg/shim/utils/volumes_test.go @@ -0,0 +1,308 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "reflect" + "testing" + + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestUpdateVolumeAnnotations(t *testing.T) { + dir, err := ioutil.TempDir("", "test-update-volume-annotations") + if err != nil { + t.Fatalf("create tempdir: %v", err) + } + defer os.RemoveAll(dir) + kubeletPodsDir = dir + + const ( + testPodUID = "testuid" + testVolumeName = "testvolume" + testLogDirPath = "/var/log/pods/testns_testname_" + testPodUID + testLegacyLogDirPath = "/var/log/pods/" + testPodUID + ) + testVolumePath := fmt.Sprintf("%s/%s/volumes/kubernetes.io~empty-dir/%s", dir, testPodUID, testVolumeName) + + if err := os.MkdirAll(testVolumePath, 0755); err != nil { + t.Fatalf("Create test volume: %v", err) + } + + for _, test := range []struct { + desc string + spec *specs.Spec + expected *specs.Spec + expectErr bool + expectUpdate bool + }{ + { + desc: "volume annotations for sandbox", + spec: &specs.Spec{ + Annotations: map[string]string{ + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + }, + }, + expectUpdate: true, + }, + { + desc: "volume annotations for sandbox with legacy log path", + spec: &specs.Spec{ + Annotations: map[string]string{ + sandboxLogDirAnnotation: testLegacyLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + sandboxLogDirAnnotation: testLegacyLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, + }, + }, + expectUpdate: true, + }, + { + desc: "tmpfs: volume annotations for container", + spec: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "tmpfs", + Source: testVolumePath, + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expectUpdate: true, + }, + { + desc: "bind: volume annotations for container", + spec: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: testVolumePath, + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expectUpdate: true, + }, + { + desc: "should not return error without pod log directory", + spec: &specs.Spec{ + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeSandbox, + "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", + "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", + "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", + }, + }, + }, + { + desc: "should return error if volume path does not exist", + spec: &specs.Spec{ + Annotations: map[string]string{ + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + "dev.gvisor.spec.mount.notexist.share": "pod", + "dev.gvisor.spec.mount.notexist.type": "tmpfs", + "dev.gvisor.spec.mount.notexist.options": "ro", + }, + }, + expectErr: true, + }, + { + desc: "no volume annotations for sandbox", + spec: &specs.Spec{ + Annotations: map[string]string{ + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + }, + }, + expected: &specs.Spec{ + Annotations: map[string]string{ + sandboxLogDirAnnotation: testLogDirPath, + containerTypeAnnotation: containerTypeSandbox, + }, + }, + }, + { + desc: "no volume annotations for container", + spec: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: "/test", + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + }, + }, + expected: &specs.Spec{ + Mounts: []specs.Mount{ + { + Destination: "/test", + Type: "bind", + Source: "/test", + Options: []string{"ro"}, + }, + { + Destination: "/random", + Type: "bind", + Source: "/random", + Options: []string{"ro"}, + }, + }, + Annotations: map[string]string{ + containerTypeAnnotation: containerTypeContainer, + }, + }, + }, + } { + t.Run(test.desc, func(t *testing.T) { + bundle, err := ioutil.TempDir(dir, "test-bundle") + if err != nil { + t.Fatalf("Create test bundle: %v", err) + } + err = UpdateVolumeAnnotations(bundle, test.spec) + if test.expectErr { + if err == nil { + t.Fatal("Expected error, but got nil") + } + return + } + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if !reflect.DeepEqual(test.expected, test.spec) { + t.Fatalf("Expected %+v, got %+v", test.expected, test.spec) + } + if test.expectUpdate { + b, err := ioutil.ReadFile(filepath.Join(bundle, "config.json")) + if err != nil { + t.Fatalf("Read spec from bundle: %v", err) + } + var spec specs.Spec + if err := json.Unmarshal(b, &spec); err != nil { + t.Fatalf("Unmarshal spec: %v", err) + } + if !reflect.DeepEqual(test.expected, &spec) { + t.Fatalf("Expected %+v, got %+v", test.expected, &spec) + } + } + }) + } +} diff --git a/pkg/shim/v1/proc/BUILD b/pkg/shim/v1/proc/BUILD deleted file mode 100644 index 4377306af..000000000 --- a/pkg/shim/v1/proc/BUILD +++ /dev/null @@ -1,36 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "proc", - srcs = [ - "deleted_state.go", - "exec.go", - "exec_state.go", - "init.go", - "init_state.go", - "io.go", - "process.go", - "types.go", - "utils.go", - ], - visibility = [ - "//pkg/shim:__subpackages__", - "//shim:__subpackages__", - ], - deps = [ - "//pkg/shim/runsc", - "@com_github_containerd_console//:go_default_library", - "@com_github_containerd_containerd//errdefs:go_default_library", - "@com_github_containerd_containerd//log:go_default_library", - "@com_github_containerd_containerd//mount:go_default_library", - "@com_github_containerd_containerd//pkg/process:go_default_library", - "@com_github_containerd_containerd//pkg/stdio:go_default_library", - "@com_github_containerd_fifo//:go_default_library", - "@com_github_containerd_go_runc//:go_default_library", - "@com_github_gogo_protobuf//types:go_default_library", - "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/shim/v1/proc/deleted_state.go b/pkg/shim/v1/proc/deleted_state.go deleted file mode 100644 index d9b970c4d..000000000 --- a/pkg/shim/v1/proc/deleted_state.go +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "context" - "fmt" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/pkg/process" -) - -type deletedState struct{} - -func (*deletedState) Resize(ws console.WinSize) error { - return fmt.Errorf("cannot resize a deleted process.ss") -} - -func (*deletedState) Start(ctx context.Context) error { - return fmt.Errorf("cannot start a deleted process.ss") -} - -func (*deletedState) Delete(ctx context.Context) error { - return fmt.Errorf("cannot delete a deleted process.ss: %w", errdefs.ErrNotFound) -} - -func (*deletedState) Kill(ctx context.Context, sig uint32, all bool) error { - return fmt.Errorf("cannot kill a deleted process.ss: %w", errdefs.ErrNotFound) -} - -func (*deletedState) SetExited(status int) {} - -func (*deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { - return nil, fmt.Errorf("cannot exec in a deleted state") -} diff --git a/pkg/shim/v1/proc/exec.go b/pkg/shim/v1/proc/exec.go deleted file mode 100644 index 1d1d90488..000000000 --- a/pkg/shim/v1/proc/exec.go +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "context" - "fmt" - "io" - "os" - "path/filepath" - "sync" - "syscall" - "time" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/pkg/stdio" - "github.com/containerd/fifo" - runc "github.com/containerd/go-runc" - specs "github.com/opencontainers/runtime-spec/specs-go" - "golang.org/x/sys/unix" - - "gvisor.dev/gvisor/pkg/shim/runsc" -) - -type execProcess struct { - wg sync.WaitGroup - - execState execState - - mu sync.Mutex - id string - console console.Console - io runc.IO - status int - exited time.Time - pid int - internalPid int - closers []io.Closer - stdin io.Closer - stdio stdio.Stdio - path string - spec specs.Process - - parent *Init - waitBlock chan struct{} -} - -func (e *execProcess) Wait() { - <-e.waitBlock -} - -func (e *execProcess) ID() string { - return e.id -} - -func (e *execProcess) Pid() int { - e.mu.Lock() - defer e.mu.Unlock() - return e.pid -} - -func (e *execProcess) ExitStatus() int { - e.mu.Lock() - defer e.mu.Unlock() - return e.status -} - -func (e *execProcess) ExitedAt() time.Time { - e.mu.Lock() - defer e.mu.Unlock() - return e.exited -} - -func (e *execProcess) SetExited(status int) { - e.mu.Lock() - defer e.mu.Unlock() - - e.execState.SetExited(status) -} - -func (e *execProcess) setExited(status int) { - e.status = status - e.exited = time.Now() - e.parent.Platform.ShutdownConsole(context.Background(), e.console) - close(e.waitBlock) -} - -func (e *execProcess) Delete(ctx context.Context) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Delete(ctx) -} - -func (e *execProcess) delete(ctx context.Context) error { - e.wg.Wait() - if e.io != nil { - for _, c := range e.closers { - c.Close() - } - e.io.Close() - } - pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - // silently ignore error - os.Remove(pidfile) - internalPidfile := filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - // silently ignore error - os.Remove(internalPidfile) - return nil -} - -func (e *execProcess) Resize(ws console.WinSize) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Resize(ws) -} - -func (e *execProcess) resize(ws console.WinSize) error { - if e.console == nil { - return nil - } - return e.console.Resize(ws) -} - -func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Kill(ctx, sig, false) -} - -func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error { - internalPid := e.internalPid - if internalPid != 0 { - if err := e.parent.runtime.Kill(ctx, e.parent.id, int(sig), &runsc.KillOpts{ - Pid: internalPid, - }); err != nil { - // If this returns error, consider the process has - // already stopped. - // - // TODO: Fix after signal handling is fixed. - return fmt.Errorf("%s: %w", err.Error(), errdefs.ErrNotFound) - } - } - return nil -} - -func (e *execProcess) Stdin() io.Closer { - return e.stdin -} - -func (e *execProcess) Stdio() stdio.Stdio { - return e.stdio -} - -func (e *execProcess) Start(ctx context.Context) error { - e.mu.Lock() - defer e.mu.Unlock() - - return e.execState.Start(ctx) -} - -func (e *execProcess) start(ctx context.Context) (err error) { - var ( - socket *runc.Socket - pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) - internalPidfile = filepath.Join(e.path, fmt.Sprintf("%s-internal.pid", e.id)) - ) - if e.stdio.Terminal { - if socket, err = runc.NewTempConsoleSocket(); err != nil { - return fmt.Errorf("failed to create runc console socket: %w", err) - } - defer socket.Close() - } else if e.stdio.IsNull() { - if e.io, err = runc.NewNullIO(); err != nil { - return fmt.Errorf("creating new NULL IO: %w", err) - } - } else { - if e.io, err = runc.NewPipeIO(e.parent.IoUID, e.parent.IoGID, withConditionalIO(e.stdio)); err != nil { - return fmt.Errorf("failed to create runc io pipes: %w", err) - } - } - opts := &runsc.ExecOpts{ - PidFile: pidfile, - InternalPidFile: internalPidfile, - IO: e.io, - Detach: true, - } - if socket != nil { - opts.ConsoleSocket = socket - } - eventCh := e.parent.Monitor.Subscribe() - defer func() { - // Unsubscribe if an error is returned. - if err != nil { - e.parent.Monitor.Unsubscribe(eventCh) - } - }() - if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil { - close(e.waitBlock) - return e.parent.runtimeError(err, "OCI runtime exec failed") - } - if e.stdio.Stdin != "" { - sc, err := fifo.OpenFifo(context.Background(), e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return fmt.Errorf("failed to open stdin fifo %s: %w", e.stdio.Stdin, err) - } - e.closers = append(e.closers, sc) - e.stdin = sc - } - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - if socket != nil { - console, err := socket.ReceiveMaster() - if err != nil { - return fmt.Errorf("failed to retrieve console master: %w", err) - } - if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg); err != nil { - return fmt.Errorf("failed to start console copy: %w", err) - } - } else if !e.stdio.IsNull() { - if err := copyPipes(ctx, e.io, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg); err != nil { - return fmt.Errorf("failed to start io pipe copy: %w", err) - } - } - pid, err := runc.ReadPidFile(opts.PidFile) - if err != nil { - return fmt.Errorf("failed to retrieve OCI runtime exec pid: %w", err) - } - e.pid = pid - internalPid, err := runc.ReadPidFile(opts.InternalPidFile) - if err != nil { - return fmt.Errorf("failed to retrieve OCI runtime exec internal pid: %w", err) - } - e.internalPid = internalPid - go func() { - defer e.parent.Monitor.Unsubscribe(eventCh) - for event := range eventCh { - if event.Pid == e.pid { - ExitCh <- Exit{ - Timestamp: event.Timestamp, - ID: e.id, - Status: event.Status, - } - break - } - } - }() - return nil -} - -func (e *execProcess) Status(ctx context.Context) (string, error) { - e.mu.Lock() - defer e.mu.Unlock() - // if we don't have a pid then the exec process has just been created - if e.pid == 0 { - return "created", nil - } - // if we have a pid and it can be signaled, the process is running - // TODO(random-liu): Use `runsc kill --pid`. - if err := unix.Kill(e.pid, 0); err == nil { - return "running", nil - } - // else if we have a pid but it can nolonger be signaled, it has stopped - return "stopped", nil -} diff --git a/pkg/shim/v1/proc/exec_state.go b/pkg/shim/v1/proc/exec_state.go deleted file mode 100644 index 4dcda8b44..000000000 --- a/pkg/shim/v1/proc/exec_state.go +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "context" - "fmt" - - "github.com/containerd/console" -) - -type execState interface { - Resize(console.WinSize) error - Start(context.Context) error - Delete(context.Context) error - Kill(context.Context, uint32, bool) error - SetExited(int) -} - -type execCreatedState struct { - p *execProcess -} - -func (s *execCreatedState) transition(name string) error { - switch name { - case "running": - s.p.execState = &execRunningState{p: s.p} - case "stopped": - s.p.execState = &execStoppedState{p: s.p} - case "deleted": - s.p.execState = &deletedState{} - default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *execCreatedState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *execCreatedState) Start(ctx context.Context) error { - if err := s.p.start(ctx); err != nil { - return err - } - return s.transition("running") -} - -func (s *execCreatedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *execCreatedState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -type execRunningState struct { - p *execProcess -} - -func (s *execRunningState) transition(name string) error { - switch name { - case "stopped": - s.p.execState = &execStoppedState{p: s.p} - default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *execRunningState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *execRunningState) Start(ctx context.Context) error { - return fmt.Errorf("cannot start a running process") -} - -func (s *execRunningState) Delete(ctx context.Context) error { - return fmt.Errorf("cannot delete a running process") -} - -func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *execRunningState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -type execStoppedState struct { - p *execProcess -} - -func (s *execStoppedState) transition(name string) error { - switch name { - case "deleted": - s.p.execState = &deletedState{} - default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *execStoppedState) Resize(ws console.WinSize) error { - return fmt.Errorf("cannot resize a stopped container") -} - -func (s *execStoppedState) Start(ctx context.Context) error { - return fmt.Errorf("cannot start a stopped process") -} - -func (s *execStoppedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *execStoppedState) SetExited(status int) { - // no op -} diff --git a/pkg/shim/v1/proc/init.go b/pkg/shim/v1/proc/init.go deleted file mode 100644 index 9fd7d978c..000000000 --- a/pkg/shim/v1/proc/init.go +++ /dev/null @@ -1,460 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "context" - "encoding/json" - "fmt" - "io" - "path/filepath" - "strings" - "sync" - "syscall" - "time" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/pkg/process" - "github.com/containerd/containerd/pkg/stdio" - "github.com/containerd/fifo" - runc "github.com/containerd/go-runc" - specs "github.com/opencontainers/runtime-spec/specs-go" - - "gvisor.dev/gvisor/pkg/shim/runsc" -) - -// InitPidFile name of the file that contains the init pid. -const InitPidFile = "init.pid" - -// Init represents an initial process for a container. -type Init struct { - wg sync.WaitGroup - initState initState - - // mu is used to ensure that `Start()` and `Exited()` calls return in - // the right order when invoked in separate go routines. This is the - // case within the shim implementation as it makes use of the reaper - // interface. - mu sync.Mutex - - waitBlock chan struct{} - - WorkDir string - - id string - Bundle string - console console.Console - Platform stdio.Platform - io runc.IO - runtime *runsc.Runsc - status int - exited time.Time - pid int - closers []io.Closer - stdin io.Closer - stdio stdio.Stdio - Rootfs string - IoUID int - IoGID int - Sandbox bool - UserLog string - Monitor ProcessMonitor -} - -// NewRunsc returns a new runsc instance for a process. -func NewRunsc(root, path, namespace, runtime string, config map[string]string) *runsc.Runsc { - if root == "" { - root = RunscRoot - } - return &runsc.Runsc{ - Command: runtime, - PdeathSignal: syscall.SIGKILL, - Log: filepath.Join(path, "log.json"), - LogFormat: runc.JSON, - Root: filepath.Join(root, namespace), - Config: config, - } -} - -// New returns a new init process. -func New(id string, runtime *runsc.Runsc, stdio stdio.Stdio) *Init { - p := &Init{ - id: id, - runtime: runtime, - stdio: stdio, - status: 0, - waitBlock: make(chan struct{}), - } - p.initState = &createdState{p: p} - return p -} - -// Create the process with the provided config. -func (p *Init) Create(ctx context.Context, r *CreateConfig) (err error) { - var socket *runc.Socket - if r.Terminal { - if socket, err = runc.NewTempConsoleSocket(); err != nil { - return fmt.Errorf("failed to create OCI runtime console socket: %w", err) - } - defer socket.Close() - } else if hasNoIO(r) { - if p.io, err = runc.NewNullIO(); err != nil { - return fmt.Errorf("creating new NULL IO: %w", err) - } - } else { - if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil { - return fmt.Errorf("failed to create OCI runtime io pipes: %w", err) - } - } - pidFile := filepath.Join(p.Bundle, InitPidFile) - opts := &runsc.CreateOpts{ - PidFile: pidFile, - } - if socket != nil { - opts.ConsoleSocket = socket - } - if p.Sandbox { - opts.IO = p.io - // UserLog is only useful for sandbox. - opts.UserLog = p.UserLog - } - if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil { - return p.runtimeError(err, "OCI runtime create failed") - } - if r.Stdin != "" { - sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return fmt.Errorf("failed to open stdin fifo %s: %w", r.Stdin, err) - } - p.stdin = sc - p.closers = append(p.closers, sc) - } - ctx, cancel := context.WithTimeout(ctx, 30*time.Second) - defer cancel() - if socket != nil { - console, err := socket.ReceiveMaster() - if err != nil { - return fmt.Errorf("failed to retrieve console master: %w", err) - } - console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg) - if err != nil { - return fmt.Errorf("failed to start console copy: %w", err) - } - p.console = console - } else if !hasNoIO(r) { - if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg); err != nil { - return fmt.Errorf("failed to start io pipe copy: %w", err) - } - } - pid, err := runc.ReadPidFile(pidFile) - if err != nil { - return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err) - } - p.pid = pid - return nil -} - -// Wait waits for the process to exit. -func (p *Init) Wait() { - <-p.waitBlock -} - -// ID returns the ID of the process. -func (p *Init) ID() string { - return p.id -} - -// Pid returns the PID of the process. -func (p *Init) Pid() int { - return p.pid -} - -// ExitStatus returns the exit status of the process. -func (p *Init) ExitStatus() int { - p.mu.Lock() - defer p.mu.Unlock() - return p.status -} - -// ExitedAt returns the time when the process exited. -func (p *Init) ExitedAt() time.Time { - p.mu.Lock() - defer p.mu.Unlock() - return p.exited -} - -// Status returns the status of the process. -func (p *Init) Status(ctx context.Context) (string, error) { - p.mu.Lock() - defer p.mu.Unlock() - c, err := p.runtime.State(ctx, p.id) - if err != nil { - if strings.Contains(err.Error(), "does not exist") { - return "stopped", nil - } - return "", p.runtimeError(err, "OCI runtime state failed") - } - return p.convertStatus(c.Status), nil -} - -// Start starts the init process. -func (p *Init) Start(ctx context.Context) error { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Start(ctx) -} - -func (p *Init) start(ctx context.Context) error { - var cio runc.IO - if !p.Sandbox { - cio = p.io - } - if err := p.runtime.Start(ctx, p.id, cio); err != nil { - return p.runtimeError(err, "OCI runtime start failed") - } - go func() { - status, err := p.runtime.Wait(context.Background(), p.id) - if err != nil { - log.G(ctx).WithError(err).Errorf("Failed to wait for container %q", p.id) - // TODO(random-liu): Handle runsc kill error. - if err := p.killAll(ctx); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to kill container %q", p.id) - } - status = internalErrorCode - } - ExitCh <- Exit{ - Timestamp: time.Now(), - ID: p.id, - Status: status, - } - }() - return nil -} - -// SetExited set the exit stauts of the init process. -func (p *Init) SetExited(status int) { - p.mu.Lock() - defer p.mu.Unlock() - - p.initState.SetExited(status) -} - -func (p *Init) setExited(status int) { - p.exited = time.Now() - p.status = status - p.Platform.ShutdownConsole(context.Background(), p.console) - close(p.waitBlock) -} - -// Delete deletes the init process. -func (p *Init) Delete(ctx context.Context) error { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Delete(ctx) -} - -func (p *Init) delete(ctx context.Context) error { - p.killAll(ctx) - p.wg.Wait() - err := p.runtime.Delete(ctx, p.id, nil) - // ignore errors if a runtime has already deleted the process - // but we still hold metadata and pipes - // - // this is common during a checkpoint, runc will delete the container state - // after a checkpoint and the container will no longer exist within runc - if err != nil { - if strings.Contains(err.Error(), "does not exist") { - err = nil - } else { - err = p.runtimeError(err, "failed to delete task") - } - } - if p.io != nil { - for _, c := range p.closers { - c.Close() - } - p.io.Close() - } - if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil { - log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount") - if err == nil { - err = fmt.Errorf("failed rootfs umount: %w", err2) - } - } - return err -} - -// Resize resizes the init processes console. -func (p *Init) Resize(ws console.WinSize) error { - p.mu.Lock() - defer p.mu.Unlock() - - if p.console == nil { - return nil - } - return p.console.Resize(ws) -} - -func (p *Init) resize(ws console.WinSize) error { - if p.console == nil { - return nil - } - return p.console.Resize(ws) -} - -// Kill kills the init process. -func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Kill(ctx, signal, all) -} - -func (p *Init) kill(context context.Context, signal uint32, all bool) error { - var ( - killErr error - backoff = 100 * time.Millisecond - ) - timeout := 1 * time.Second - for start := time.Now(); time.Now().Sub(start) < timeout; { - c, err := p.runtime.State(context, p.id) - if err != nil { - if strings.Contains(err.Error(), "does not exist") { - return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) - } - return p.runtimeError(err, "OCI runtime state failed") - } - // For runsc, signal only works when container is running state. - // If the container is not in running state, directly return - // "no such process" - if p.convertStatus(c.Status) == "stopped" { - return fmt.Errorf("no such process: %w", errdefs.ErrNotFound) - } - killErr = p.runtime.Kill(context, p.id, int(signal), &runsc.KillOpts{ - All: all, - }) - if killErr == nil { - return nil - } - time.Sleep(backoff) - backoff *= 2 - } - return p.runtimeError(killErr, "kill timeout") -} - -// KillAll kills all processes belonging to the init process. -func (p *Init) KillAll(context context.Context) error { - p.mu.Lock() - defer p.mu.Unlock() - return p.killAll(context) -} - -func (p *Init) killAll(context context.Context) error { - p.runtime.Kill(context, p.id, int(syscall.SIGKILL), &runsc.KillOpts{ - All: true, - }) - // Ignore error handling for `runsc kill --all` for now. - // * If it doesn't return error, it is good; - // * If it returns error, consider the container has already stopped. - // TODO: Fix `runsc kill --all` error handling. - return nil -} - -// Stdin returns the stdin of the process. -func (p *Init) Stdin() io.Closer { - return p.stdin -} - -// Runtime returns the OCI runtime configured for the init process. -func (p *Init) Runtime() *runsc.Runsc { - return p.runtime -} - -// Exec returns a new child process. -func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { - p.mu.Lock() - defer p.mu.Unlock() - - return p.initState.Exec(ctx, path, r) -} - -// exec returns a new exec'd process. -func (p *Init) exec(path string, r *ExecConfig) (process.Process, error) { - // process exec request - var spec specs.Process - if err := json.Unmarshal(r.Spec.Value, &spec); err != nil { - return nil, err - } - spec.Terminal = r.Terminal - - e := &execProcess{ - id: r.ID, - path: path, - parent: p, - spec: spec, - stdio: stdio.Stdio{ - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Terminal: r.Terminal, - }, - waitBlock: make(chan struct{}), - } - e.execState = &execCreatedState{p: e} - return e, nil -} - -// Stdio returns the stdio of the process. -func (p *Init) Stdio() stdio.Stdio { - return p.stdio -} - -func (p *Init) runtimeError(rErr error, msg string) error { - if rErr == nil { - return nil - } - - rMsg, err := getLastRuntimeError(p.runtime) - switch { - case err != nil: - return fmt.Errorf("%s: %w (unable to retrieve OCI runtime error: %v)", msg, rErr, err) - case rMsg == "": - return fmt.Errorf("%s: %w", msg, rErr) - default: - return fmt.Errorf("%s: %s", msg, rMsg) - } -} - -func (p *Init) convertStatus(status string) string { - if status == "created" && !p.Sandbox && p.status == internalErrorCode { - // Treat start failure state for non-root container as stopped. - return "stopped" - } - return status -} - -func withConditionalIO(c stdio.Stdio) runc.IOOpt { - return func(o *runc.IOOption) { - o.OpenStdin = c.Stdin != "" - o.OpenStdout = c.Stdout != "" - o.OpenStderr = c.Stderr != "" - } -} diff --git a/pkg/shim/v1/proc/init_state.go b/pkg/shim/v1/proc/init_state.go deleted file mode 100644 index 0065fc385..000000000 --- a/pkg/shim/v1/proc/init_state.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "context" - "fmt" - - "github.com/containerd/console" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/pkg/process" -) - -type initState interface { - Resize(console.WinSize) error - Start(context.Context) error - Delete(context.Context) error - Exec(context.Context, string, *ExecConfig) (process.Process, error) - Kill(context.Context, uint32, bool) error - SetExited(int) -} - -type createdState struct { - p *Init -} - -func (s *createdState) transition(name string) error { - switch name { - case "running": - s.p.initState = &runningState{p: s.p} - case "stopped": - s.p.initState = &stoppedState{p: s.p} - case "deleted": - s.p.initState = &deletedState{} - default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *createdState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *createdState) Start(ctx context.Context) error { - if err := s.p.start(ctx); err != nil { - // Containerd doesn't allow deleting container in created state. - // However, for gvisor, a non-root container in created state can - // only go to running state. If the container can't be started, - // it can only stay in created state, and never be deleted. - // To work around that, we treat non-root container in start failure - // state as stopped. - if !s.p.Sandbox { - s.p.io.Close() - s.p.setExited(internalErrorCode) - if err := s.transition("stopped"); err != nil { - panic(err) - } - } - return err - } - return s.transition("running") -} - -func (s *createdState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *createdState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { - return s.p.exec(path, r) -} - -type runningState struct { - p *Init -} - -func (s *runningState) transition(name string) error { - switch name { - case "stopped": - s.p.initState = &stoppedState{p: s.p} - default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *runningState) Resize(ws console.WinSize) error { - return s.p.resize(ws) -} - -func (s *runningState) Start(ctx context.Context) error { - return fmt.Errorf("cannot start a running process.ss") -} - -func (s *runningState) Delete(ctx context.Context) error { - return fmt.Errorf("cannot delete a running process.ss") -} - -func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error { - return s.p.kill(ctx, sig, all) -} - -func (s *runningState) SetExited(status int) { - s.p.setExited(status) - - if err := s.transition("stopped"); err != nil { - panic(err) - } -} - -func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { - return s.p.exec(path, r) -} - -type stoppedState struct { - p *Init -} - -func (s *stoppedState) transition(name string) error { - switch name { - case "deleted": - s.p.initState = &deletedState{} - default: - return fmt.Errorf("invalid state transition %q to %q", stateName(s), name) - } - return nil -} - -func (s *stoppedState) Resize(ws console.WinSize) error { - return fmt.Errorf("cannot resize a stopped container") -} - -func (s *stoppedState) Start(ctx context.Context) error { - return fmt.Errorf("cannot start a stopped process.ss") -} - -func (s *stoppedState) Delete(ctx context.Context) error { - if err := s.p.delete(ctx); err != nil { - return err - } - return s.transition("deleted") -} - -func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error { - return errdefs.ToGRPCf(errdefs.ErrNotFound, "process.ss %s not found", s.p.id) -} - -func (s *stoppedState) SetExited(status int) { - // no op -} - -func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (process.Process, error) { - return nil, fmt.Errorf("cannot exec in a stopped state") -} diff --git a/pkg/shim/v1/proc/io.go b/pkg/shim/v1/proc/io.go deleted file mode 100644 index 34d825fb7..000000000 --- a/pkg/shim/v1/proc/io.go +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "context" - "fmt" - "io" - "os" - "sync" - "sync/atomic" - "syscall" - - "github.com/containerd/containerd/log" - "github.com/containerd/fifo" - runc "github.com/containerd/go-runc" -) - -// TODO(random-liu): This file can be a util. - -var bufPool = sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32<<10) - return &buffer - }, -} - -func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg *sync.WaitGroup) error { - var sameFile *countingWriteCloser - for _, i := range []struct { - name string - dest func(wc io.WriteCloser, rc io.Closer) - }{ - { - name: stdout, - dest: func(wc io.WriteCloser, rc io.Closer) { - wg.Add(1) - go func() { - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil { - log.G(ctx).Warn("error copying stdout") - } - wg.Done() - wc.Close() - if rc != nil { - rc.Close() - } - }() - }, - }, { - name: stderr, - dest: func(wc io.WriteCloser, rc io.Closer) { - wg.Add(1) - go func() { - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil { - log.G(ctx).Warn("error copying stderr") - } - wg.Done() - wc.Close() - if rc != nil { - rc.Close() - } - }() - }, - }, - } { - ok, err := isFifo(i.name) - if err != nil { - return err - } - var ( - fw io.WriteCloser - fr io.Closer - ) - if ok { - if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) - } - if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) - } - } else { - if sameFile != nil { - sameFile.count++ - i.dest(sameFile, nil) - continue - } - if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", i.name, err) - } - if stdout == stderr { - sameFile = &countingWriteCloser{ - WriteCloser: fw, - count: 1, - } - } - } - i.dest(fw, fr) - } - if stdin == "" { - return nil - } - f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return fmt.Errorf("gvisor-containerd-shim: opening %s failed: %s", stdin, err) - } - go func() { - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - - io.CopyBuffer(rio.Stdin(), f, *p) - rio.Stdin().Close() - f.Close() - }() - return nil -} - -// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times. -type countingWriteCloser struct { - io.WriteCloser - count int64 -} - -func (c *countingWriteCloser) Close() error { - if atomic.AddInt64(&c.count, -1) > 0 { - return nil - } - return c.WriteCloser.Close() -} - -// isFifo checks if a file is a fifo. -// -// If the file does not exist then it returns false. -func isFifo(path string) (bool, error) { - stat, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return false, nil - } - return false, err - } - if stat.Mode()&os.ModeNamedPipe == os.ModeNamedPipe { - return true, nil - } - return false, nil -} diff --git a/pkg/shim/v1/proc/process.go b/pkg/shim/v1/proc/process.go deleted file mode 100644 index e8315326d..000000000 --- a/pkg/shim/v1/proc/process.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package proc contains process-related utilities. -package proc - -import ( - "fmt" -) - -// RunscRoot is the path to the root runsc state directory. -const RunscRoot = "/run/containerd/runsc" - -func stateName(v interface{}) string { - switch v.(type) { - case *runningState, *execRunningState: - return "running" - case *createdState, *execCreatedState: - return "created" - case *deletedState: - return "deleted" - case *stoppedState: - return "stopped" - } - panic(fmt.Errorf("invalid state %v", v)) -} diff --git a/pkg/shim/v1/proc/types.go b/pkg/shim/v1/proc/types.go deleted file mode 100644 index fc182cf5e..000000000 --- a/pkg/shim/v1/proc/types.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "time" - - runc "github.com/containerd/go-runc" - "github.com/gogo/protobuf/types" -) - -// Mount holds filesystem mount configuration. -type Mount struct { - Type string - Source string - Target string - Options []string -} - -// CreateConfig hold task creation configuration. -type CreateConfig struct { - ID string - Bundle string - Runtime string - Rootfs []Mount - Terminal bool - Stdin string - Stdout string - Stderr string -} - -// ExecConfig holds exec creation configuration. -type ExecConfig struct { - ID string - Terminal bool - Stdin string - Stdout string - Stderr string - Spec *types.Any -} - -// Exit is the type of exit events. -type Exit struct { - Timestamp time.Time - ID string - Status int -} - -// ProcessMonitor monitors process exit changes. -type ProcessMonitor interface { - // Subscribe to process exit changes - Subscribe() chan runc.Exit - // Unsubscribe to process exit changes - Unsubscribe(c chan runc.Exit) -} diff --git a/pkg/shim/v1/proc/utils.go b/pkg/shim/v1/proc/utils.go deleted file mode 100644 index 7c2c409af..000000000 --- a/pkg/shim/v1/proc/utils.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "encoding/json" - "io" - "os" - "strings" - "time" - - "gvisor.dev/gvisor/pkg/shim/runsc" -) - -const ( - internalErrorCode = 128 - bufferSize = 32 -) - -// ExitCh is the exit events channel for containers and exec processes -// inside the sandbox. -var ExitCh = make(chan Exit, bufferSize) - -// TODO(mlaventure): move to runc package? -func getLastRuntimeError(r *runsc.Runsc) (string, error) { - if r.Log == "" { - return "", nil - } - - f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400) - if err != nil { - return "", err - } - - var ( - errMsg string - log struct { - Level string - Msg string - Time time.Time - } - ) - - dec := json.NewDecoder(f) - for err = nil; err == nil; { - if err = dec.Decode(&log); err != nil && err != io.EOF { - return "", err - } - if log.Level == "error" { - errMsg = strings.TrimSpace(log.Msg) - } - } - - return errMsg, nil -} - -func hasNoIO(r *CreateConfig) bool { - return r.Stdin == "" && r.Stdout == "" && r.Stderr == "" -} diff --git a/pkg/shim/v1/shim/BUILD b/pkg/shim/v1/shim/BUILD deleted file mode 100644 index e5b6bf186..000000000 --- a/pkg/shim/v1/shim/BUILD +++ /dev/null @@ -1,41 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "shim", - srcs = [ - "api.go", - "platform.go", - "service.go", - "shim.go", - ], - visibility = [ - "//pkg/shim:__subpackages__", - "//shim:__subpackages__", - ], - deps = [ - "//pkg/shim/runsc", - "//pkg/shim/v1/proc", - "//pkg/shim/v1/utils", - "@com_github_containerd_console//:go_default_library", - "@com_github_containerd_containerd//api/events:go_default_library", - "@com_github_containerd_containerd//api/types/task:go_default_library", - "@com_github_containerd_containerd//errdefs:go_default_library", - "@com_github_containerd_containerd//events:go_default_library", - "@com_github_containerd_containerd//log:go_default_library", - "@com_github_containerd_containerd//mount:go_default_library", - "@com_github_containerd_containerd//namespaces:go_default_library", - "@com_github_containerd_containerd//pkg/process:go_default_library", - "@com_github_containerd_containerd//pkg/stdio:go_default_library", - "@com_github_containerd_containerd//runtime:go_default_library", - "@com_github_containerd_containerd//runtime/linux/runctypes:go_default_library", - "@com_github_containerd_containerd//runtime/v1/shim/v1:go_default_library", - "@com_github_containerd_containerd//sys/reaper:go_default_library", - "@com_github_containerd_fifo//:go_default_library", - "@com_github_containerd_typeurl//:go_default_library", - "@com_github_gogo_protobuf//types:go_default_library", - "@org_golang_google_grpc//codes:go_default_library", - "@org_golang_google_grpc//status:go_default_library", - ], -) diff --git a/pkg/shim/v1/shim/api.go b/pkg/shim/v1/shim/api.go deleted file mode 100644 index 8200eb012..000000000 --- a/pkg/shim/v1/shim/api.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package shim - -import ( - "github.com/containerd/containerd/api/events" -) - -// TaskCreate is an alias for events.TaskCreate. -type TaskCreate = events.TaskCreate - -// TaskStart is an alias for events.TaskStart. -type TaskStart = events.TaskStart - -// TaskOOM is an alias for events.TaskOOM. -type TaskOOM = events.TaskOOM - -// TaskExit is an alias for events.TaskExit. -type TaskExit = events.TaskExit - -// TaskDelete is an alias for events.TaskDelete. -type TaskDelete = events.TaskDelete - -// TaskExecAdded is an alias for events.TaskExecAdded. -type TaskExecAdded = events.TaskExecAdded - -// TaskExecStarted is an alias for events.TaskExecStarted. -type TaskExecStarted = events.TaskExecStarted diff --git a/pkg/shim/v1/shim/platform.go b/pkg/shim/v1/shim/platform.go deleted file mode 100644 index f590f80ef..000000000 --- a/pkg/shim/v1/shim/platform.go +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package shim - -import ( - "context" - "fmt" - "io" - "sync" - "syscall" - - "github.com/containerd/console" - "github.com/containerd/fifo" -) - -type linuxPlatform struct { - epoller *console.Epoller -} - -func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg *sync.WaitGroup) (console.Console, error) { - if p.epoller == nil { - return nil, fmt.Errorf("uninitialized epoller") - } - - epollConsole, err := p.epoller.Add(console) - if err != nil { - return nil, err - } - - if stdin != "" { - in, err := fifo.OpenFifo(ctx, stdin, syscall.O_RDONLY, 0) - if err != nil { - return nil, err - } - go func() { - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(epollConsole, in, *p) - }() - } - - outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) - if err != nil { - return nil, err - } - outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) - if err != nil { - return nil, err - } - wg.Add(1) - go func() { - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(outw, epollConsole, *p) - epollConsole.Close() - outr.Close() - outw.Close() - wg.Done() - }() - return epollConsole, nil -} - -func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { - if p.epoller == nil { - return fmt.Errorf("uninitialized epoller") - } - epollConsole, ok := cons.(*console.EpollConsole) - if !ok { - return fmt.Errorf("expected EpollConsole, got %#v", cons) - } - return epollConsole.Shutdown(p.epoller.CloseConsole) -} - -func (p *linuxPlatform) Close() error { - return p.epoller.Close() -} - -// initialize a single epoll fd to manage our consoles. `initPlatform` should -// only be called once. -func (s *Service) initPlatform() error { - if s.platform != nil { - return nil - } - epoller, err := console.NewEpoller() - if err != nil { - return fmt.Errorf("failed to initialize epoller: %w", err) - } - s.platform = &linuxPlatform{ - epoller: epoller, - } - go epoller.Wait() - return nil -} diff --git a/pkg/shim/v1/shim/service.go b/pkg/shim/v1/shim/service.go deleted file mode 100644 index 80aa59b33..000000000 --- a/pkg/shim/v1/shim/service.go +++ /dev/null @@ -1,572 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package shim - -import ( - "context" - "fmt" - "os" - "path/filepath" - "sync" - - "github.com/containerd/console" - "github.com/containerd/containerd/api/types/task" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/pkg/process" - "github.com/containerd/containerd/pkg/stdio" - "github.com/containerd/containerd/runtime" - "github.com/containerd/containerd/runtime/linux/runctypes" - shim "github.com/containerd/containerd/runtime/v1/shim/v1" - "github.com/containerd/containerd/sys/reaper" - "github.com/containerd/typeurl" - "github.com/gogo/protobuf/types" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - - "gvisor.dev/gvisor/pkg/shim/runsc" - "gvisor.dev/gvisor/pkg/shim/v1/proc" - "gvisor.dev/gvisor/pkg/shim/v1/utils" -) - -var ( - empty = &types.Empty{} - bufPool = sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32<<10) - return &buffer - }, - } -) - -// Config contains shim specific configuration. -type Config struct { - Path string - Namespace string - WorkDir string - RuntimeRoot string - RunscConfig map[string]string -} - -// NewService returns a new shim service that can be used via GRPC. -func NewService(config Config, publisher events.Publisher) (*Service, error) { - if config.Namespace == "" { - return nil, fmt.Errorf("shim namespace cannot be empty") - } - ctx := namespaces.WithNamespace(context.Background(), config.Namespace) - s := &Service{ - config: config, - context: ctx, - processes: make(map[string]process.Process), - events: make(chan interface{}, 128), - ec: proc.ExitCh, - } - go s.processExits() - if err := s.initPlatform(); err != nil { - return nil, fmt.Errorf("failed to initialized platform behavior: %w", err) - } - go s.forward(publisher) - return s, nil -} - -// Service is the shim implementation of a remote shim over GRPC. -type Service struct { - mu sync.Mutex - - config Config - context context.Context - processes map[string]process.Process - events chan interface{} - platform stdio.Platform - ec chan proc.Exit - - // Filled by Create() - id string - bundle string -} - -// Create creates a new initial process and container with the underlying OCI runtime. -func (s *Service) Create(ctx context.Context, r *shim.CreateTaskRequest) (_ *shim.CreateTaskResponse, err error) { - s.mu.Lock() - defer s.mu.Unlock() - - var mounts []proc.Mount - for _, m := range r.Rootfs { - mounts = append(mounts, proc.Mount{ - Type: m.Type, - Source: m.Source, - Target: m.Target, - Options: m.Options, - }) - } - - rootfs := filepath.Join(r.Bundle, "rootfs") - if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) { - return nil, err - } - - config := &proc.CreateConfig{ - ID: r.ID, - Bundle: r.Bundle, - Runtime: r.Runtime, - Rootfs: mounts, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - } - defer func() { - if err != nil { - if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { - log.G(ctx).WithError(err2).Warn("Failed to cleanup rootfs mount") - } - } - }() - for _, rm := range mounts { - m := &mount.Mount{ - Type: rm.Type, - Source: rm.Source, - Options: rm.Options, - } - if err := m.Mount(rootfs); err != nil { - return nil, fmt.Errorf("failed to mount rootfs component %v: %w", m, err) - } - } - process, err := newInit( - s.config.Path, - s.config.WorkDir, - s.config.RuntimeRoot, - s.config.Namespace, - s.config.RunscConfig, - s.platform, - config, - r.Options, - ) - if err := process.Create(ctx, config); err != nil { - return nil, errdefs.ToGRPC(err) - } - // Save the main task id and bundle to the shim for additional - // requests. - s.id = r.ID - s.bundle = r.Bundle - pid := process.Pid() - s.processes[r.ID] = process - return &shim.CreateTaskResponse{ - Pid: uint32(pid), - }, nil -} - -// Start starts a process. -func (s *Service) Start(ctx context.Context, r *shim.StartRequest) (*shim.StartResponse, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Start(ctx); err != nil { - return nil, err - } - return &shim.StartResponse{ - ID: p.ID(), - Pid: uint32(p.Pid()), - }, nil -} - -// Delete deletes the initial process and container. -func (s *Service) Delete(ctx context.Context, r *types.Empty) (*shim.DeleteResponse, error) { - p, err := s.getInitProcess() - if err != nil { - return nil, err - } - if err := p.Delete(ctx); err != nil { - return nil, err - } - s.mu.Lock() - delete(s.processes, s.id) - s.mu.Unlock() - s.platform.Close() - return &shim.DeleteResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - Pid: uint32(p.Pid()), - }, nil -} - -// DeleteProcess deletes an exec'd process. -func (s *Service) DeleteProcess(ctx context.Context, r *shim.DeleteProcessRequest) (*shim.DeleteResponse, error) { - if r.ID == s.id { - return nil, status.Errorf(codes.InvalidArgument, "cannot delete init process with DeleteProcess") - } - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Delete(ctx); err != nil { - return nil, err - } - s.mu.Lock() - delete(s.processes, r.ID) - s.mu.Unlock() - return &shim.DeleteResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - Pid: uint32(p.Pid()), - }, nil -} - -// Exec spawns an additional process inside the container. -func (s *Service) Exec(ctx context.Context, r *shim.ExecProcessRequest) (*types.Empty, error) { - s.mu.Lock() - - if p := s.processes[r.ID]; p != nil { - s.mu.Unlock() - return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ID) - } - - p := s.processes[s.id] - s.mu.Unlock() - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - - process, err := p.(*proc.Init).Exec(ctx, s.config.Path, &proc.ExecConfig{ - ID: r.ID, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Spec: r.Spec, - }) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - s.mu.Lock() - s.processes[r.ID] = process - s.mu.Unlock() - return empty, nil -} - -// ResizePty resises the terminal of a process. -func (s *Service) ResizePty(ctx context.Context, r *shim.ResizePtyRequest) (*types.Empty, error) { - if r.ID == "" { - return nil, errdefs.ToGRPCf(errdefs.ErrInvalidArgument, "id not provided") - } - ws := console.WinSize{ - Width: uint16(r.Width), - Height: uint16(r.Height), - } - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Resize(ws); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// State returns runtime state information for a process. -func (s *Service) State(ctx context.Context, r *shim.StateRequest) (*shim.StateResponse, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - st, err := p.Status(ctx) - if err != nil { - return nil, err - } - status := task.StatusUnknown - switch st { - case "created": - status = task.StatusCreated - case "running": - status = task.StatusRunning - case "stopped": - status = task.StatusStopped - } - sio := p.Stdio() - return &shim.StateResponse{ - ID: p.ID(), - Bundle: s.bundle, - Pid: uint32(p.Pid()), - Status: status, - Stdin: sio.Stdin, - Stdout: sio.Stdout, - Stderr: sio.Stderr, - Terminal: sio.Terminal, - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -// Pause pauses the container. -func (s *Service) Pause(ctx context.Context, r *types.Empty) (*types.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Resume resumes the container. -func (s *Service) Resume(ctx context.Context, r *types.Empty) (*types.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Kill kills a process with the provided signal. -func (s *Service) Kill(ctx context.Context, r *shim.KillRequest) (*types.Empty, error) { - if r.ID == "" { - p, err := s.getInitProcess() - if err != nil { - return nil, err - } - if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil - } - - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// ListPids returns all pids inside the container. -func (s *Service) ListPids(ctx context.Context, r *shim.ListPidsRequest) (*shim.ListPidsResponse, error) { - pids, err := s.getContainerPids(ctx, r.ID) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - var processes []*task.ProcessInfo - for _, pid := range pids { - pInfo := task.ProcessInfo{ - Pid: pid, - } - for _, p := range s.processes { - if p.Pid() == int(pid) { - d := &runctypes.ProcessDetails{ - ExecID: p.ID(), - } - a, err := typeurl.MarshalAny(d) - if err != nil { - return nil, fmt.Errorf("failed to marshal process %d info: %w", pid, err) - } - pInfo.Info = a - break - } - } - processes = append(processes, &pInfo) - } - return &shim.ListPidsResponse{ - Processes: processes, - }, nil -} - -// CloseIO closes the I/O context of a process. -func (s *Service) CloseIO(ctx context.Context, r *shim.CloseIORequest) (*types.Empty, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - if stdin := p.Stdin(); stdin != nil { - if err := stdin.Close(); err != nil { - return nil, fmt.Errorf("close stdin: %w", err) - } - } - return empty, nil -} - -// Checkpoint checkpoints the container. -func (s *Service) Checkpoint(ctx context.Context, r *shim.CheckpointTaskRequest) (*types.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// ShimInfo returns shim information such as the shim's pid. -func (s *Service) ShimInfo(ctx context.Context, r *types.Empty) (*shim.ShimInfoResponse, error) { - return &shim.ShimInfoResponse{ - ShimPid: uint32(os.Getpid()), - }, nil -} - -// Update updates a running container. -func (s *Service) Update(ctx context.Context, r *shim.UpdateTaskRequest) (*types.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Wait waits for a process to exit. -func (s *Service) Wait(ctx context.Context, r *shim.WaitRequest) (*shim.WaitResponse, error) { - p, err := s.getExecProcess(r.ID) - if err != nil { - return nil, err - } - p.Wait() - - return &shim.WaitResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -func (s *Service) processExits() { - for e := range s.ec { - s.checkProcesses(e) - } -} - -func (s *Service) allProcesses() []process.Process { - s.mu.Lock() - defer s.mu.Unlock() - - res := make([]process.Process, 0, len(s.processes)) - for _, p := range s.processes { - res = append(res, p) - } - return res -} - -func (s *Service) checkProcesses(e proc.Exit) { - for _, p := range s.allProcesses() { - if p.ID() == e.ID { - if ip, ok := p.(*proc.Init); ok { - // Ensure all children are killed. - if err := ip.KillAll(s.context); err != nil { - log.G(s.context).WithError(err).WithField("id", ip.ID()). - Error("failed to kill init's children") - } - } - p.SetExited(e.Status) - s.events <- &TaskExit{ - ContainerID: s.id, - ID: p.ID(), - Pid: uint32(p.Pid()), - ExitStatus: uint32(e.Status), - ExitedAt: p.ExitedAt(), - } - return - } - } -} - -func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { - p, err := s.getInitProcess() - if err != nil { - return nil, err - } - - ps, err := p.(*proc.Init).Runtime().Ps(ctx, id) - if err != nil { - return nil, err - } - pids := make([]uint32, 0, len(ps)) - for _, pid := range ps { - pids = append(pids, uint32(pid)) - } - return pids, nil -} - -func (s *Service) forward(publisher events.Publisher) { - for e := range s.events { - if err := publisher.Publish(s.context, getTopic(s.context, e), e); err != nil { - log.G(s.context).WithError(err).Error("post event") - } - } -} - -// getInitProcess returns the init process. -func (s *Service) getInitProcess() (process.Process, error) { - s.mu.Lock() - defer s.mu.Unlock() - p := s.processes[s.id] - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - return p, nil -} - -// getExecProcess returns the given exec process. -func (s *Service) getExecProcess(id string) (process.Process, error) { - s.mu.Lock() - defer s.mu.Unlock() - p := s.processes[id] - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s does not exist", id) - } - return p, nil -} - -func getTopic(ctx context.Context, e interface{}) string { - switch e.(type) { - case *TaskCreate: - return runtime.TaskCreateEventTopic - case *TaskStart: - return runtime.TaskStartEventTopic - case *TaskOOM: - return runtime.TaskOOMEventTopic - case *TaskExit: - return runtime.TaskExitEventTopic - case *TaskDelete: - return runtime.TaskDeleteEventTopic - case *TaskExecAdded: - return runtime.TaskExecAddedEventTopic - case *TaskExecStarted: - return runtime.TaskExecStartedEventTopic - default: - log.L.Printf("no topic for type %#v", e) - } - return runtime.TaskUnknownTopic -} - -func newInit(path, workDir, runtimeRoot, namespace string, config map[string]string, platform stdio.Platform, r *proc.CreateConfig, options *types.Any) (*proc.Init, error) { - var opts runctypes.CreateOptions - if options != nil { - v, err := typeurl.UnmarshalAny(options) - if err != nil { - return nil, err - } - opts = *v.(*runctypes.CreateOptions) - } - - spec, err := utils.ReadSpec(r.Bundle) - if err != nil { - return nil, fmt.Errorf("read oci spec: %w", err) - } - if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { - return nil, fmt.Errorf("update volume annotations: %w", err) - } - - runsc.FormatRunscLogPath(r.ID, config) - rootfs := filepath.Join(path, "rootfs") - runtime := proc.NewRunsc(runtimeRoot, path, namespace, r.Runtime, config) - p := proc.New(r.ID, runtime, stdio.Stdio{ - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Terminal: r.Terminal, - }) - p.Bundle = r.Bundle - p.Platform = platform - p.Rootfs = rootfs - p.WorkDir = workDir - p.IoUID = int(opts.IoUid) - p.IoGID = int(opts.IoGid) - p.Sandbox = utils.IsSandbox(spec) - p.UserLog = utils.UserLogPath(spec) - p.Monitor = reaper.Default - return p, nil -} diff --git a/pkg/shim/v1/shim/shim.go b/pkg/shim/v1/shim/shim.go deleted file mode 100644 index 1855a8769..000000000 --- a/pkg/shim/v1/shim/shim.go +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package shim contains the core containerd shim implementation. -package shim diff --git a/pkg/shim/v1/utils/BUILD b/pkg/shim/v1/utils/BUILD deleted file mode 100644 index 54a0aabb7..000000000 --- a/pkg/shim/v1/utils/BUILD +++ /dev/null @@ -1,27 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -package(licenses = ["notice"]) - -go_library( - name = "utils", - srcs = [ - "annotations.go", - "utils.go", - "volumes.go", - ], - visibility = [ - "//pkg/shim:__subpackages__", - "//shim:__subpackages__", - ], - deps = [ - "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", - ], -) - -go_test( - name = "utils_test", - size = "small", - srcs = ["volumes_test.go"], - library = ":utils", - deps = ["@com_github_opencontainers_runtime_spec//specs-go:go_default_library"], -) diff --git a/pkg/shim/v1/utils/annotations.go b/pkg/shim/v1/utils/annotations.go deleted file mode 100644 index 1e9d3f365..000000000 --- a/pkg/shim/v1/utils/annotations.go +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -// Annotations from the CRI annotations package. -// -// These are vendor due to import conflicts. -const ( - sandboxLogDirAnnotation = "io.kubernetes.cri.sandbox-log-directory" - containerTypeAnnotation = "io.kubernetes.cri.container-type" - containerTypeSandbox = "sandbox" - containerTypeContainer = "container" -) diff --git a/pkg/shim/v1/utils/utils.go b/pkg/shim/v1/utils/utils.go deleted file mode 100644 index 21e75d16d..000000000 --- a/pkg/shim/v1/utils/utils.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package utils contains utility functions. -package utils - -import ( - "encoding/json" - "io/ioutil" - "os" - "path/filepath" - - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -// ReadSpec reads OCI spec from the bundle directory. -func ReadSpec(bundle string) (*specs.Spec, error) { - f, err := os.Open(filepath.Join(bundle, "config.json")) - if err != nil { - return nil, err - } - b, err := ioutil.ReadAll(f) - if err != nil { - return nil, err - } - var spec specs.Spec - if err := json.Unmarshal(b, &spec); err != nil { - return nil, err - } - return &spec, nil -} - -// IsSandbox checks whether a container is a sandbox container. -func IsSandbox(spec *specs.Spec) bool { - t, ok := spec.Annotations[containerTypeAnnotation] - return !ok || t == containerTypeSandbox -} - -// UserLogPath gets user log path from OCI annotation. -func UserLogPath(spec *specs.Spec) string { - sandboxLogDir := spec.Annotations[sandboxLogDirAnnotation] - if sandboxLogDir == "" { - return "" - } - return filepath.Join(sandboxLogDir, "gvisor.log") -} diff --git a/pkg/shim/v1/utils/volumes.go b/pkg/shim/v1/utils/volumes.go deleted file mode 100644 index 52a428179..000000000 --- a/pkg/shim/v1/utils/volumes.go +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "path/filepath" - "strings" - - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -const volumeKeyPrefix = "dev.gvisor.spec.mount." - -var kubeletPodsDir = "/var/lib/kubelet/pods" - -// volumeName gets volume name from volume annotation key, example: -// dev.gvisor.spec.mount.NAME.share -func volumeName(k string) string { - return strings.SplitN(strings.TrimPrefix(k, volumeKeyPrefix), ".", 2)[0] -} - -// volumeFieldName gets volume field name from volume annotation key, example: -// `type` is the field of dev.gvisor.spec.mount.NAME.type -func volumeFieldName(k string) string { - parts := strings.Split(strings.TrimPrefix(k, volumeKeyPrefix), ".") - return parts[len(parts)-1] -} - -// podUID gets pod UID from the pod log path. -func podUID(s *specs.Spec) (string, error) { - sandboxLogDir := s.Annotations[sandboxLogDirAnnotation] - if sandboxLogDir == "" { - return "", fmt.Errorf("no sandbox log path annotation") - } - fields := strings.Split(filepath.Base(sandboxLogDir), "_") - switch len(fields) { - case 1: // This is the old CRI logging path. - return fields[0], nil - case 3: // This is the new CRI logging path. - return fields[2], nil - } - return "", fmt.Errorf("unexpected sandbox log path %q", sandboxLogDir) -} - -// isVolumeKey checks whether an annotation key is for volume. -func isVolumeKey(k string) bool { - return strings.HasPrefix(k, volumeKeyPrefix) -} - -// volumeSourceKey constructs the annotation key for volume source. -func volumeSourceKey(volume string) string { - return volumeKeyPrefix + volume + ".source" -} - -// volumePath searches the volume path in the kubelet pod directory. -func volumePath(volume, uid string) (string, error) { - // TODO: Support subpath when gvisor supports pod volume bind mount. - volumeSearchPath := fmt.Sprintf("%s/%s/volumes/*/%s", kubeletPodsDir, uid, volume) - dirs, err := filepath.Glob(volumeSearchPath) - if err != nil { - return "", err - } - if len(dirs) != 1 { - return "", fmt.Errorf("unexpected matched volume list %v", dirs) - } - return dirs[0], nil -} - -// isVolumePath checks whether a string is the volume path. -func isVolumePath(volume, path string) (bool, error) { - // TODO: Support subpath when gvisor supports pod volume bind mount. - volumeSearchPath := fmt.Sprintf("%s/*/volumes/*/%s", kubeletPodsDir, volume) - return filepath.Match(volumeSearchPath, path) -} - -// UpdateVolumeAnnotations add necessary OCI annotations for gvisor -// volume optimization. -func UpdateVolumeAnnotations(bundle string, s *specs.Spec) error { - var ( - uid string - err error - ) - if IsSandbox(s) { - uid, err = podUID(s) - if err != nil { - // Skip if we can't get pod UID, because this doesn't work - // for containerd 1.1. - return nil - } - } - var updated bool - for k, v := range s.Annotations { - if !isVolumeKey(k) { - continue - } - if volumeFieldName(k) != "type" { - continue - } - volume := volumeName(k) - if uid != "" { - // This is a sandbox. - path, err := volumePath(volume, uid) - if err != nil { - return fmt.Errorf("get volume path for %q: %w", volume, err) - } - s.Annotations[volumeSourceKey(volume)] = path - updated = true - } else { - // This is a container. - for i := range s.Mounts { - // An error is returned for sandbox if source - // annotation is not successfully applied, so - // it is guaranteed that the source annotation - // for sandbox has already been successfully - // applied at this point. - // - // The volume name is unique inside a pod, so - // matching without podUID is fine here. - // - // TODO: Pass podUID down to shim for containers to do - // more accurate matching. - if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes { - // gVisor requires the container mount type to match - // sandbox mount type. - s.Mounts[i].Type = v - updated = true - } - } - } - } - if !updated { - return nil - } - // Update bundle. - b, err := json.Marshal(s) - if err != nil { - return err - } - return ioutil.WriteFile(filepath.Join(bundle, "config.json"), b, 0666) -} diff --git a/pkg/shim/v1/utils/volumes_test.go b/pkg/shim/v1/utils/volumes_test.go deleted file mode 100644 index 3e02c6151..000000000 --- a/pkg/shim/v1/utils/volumes_test.go +++ /dev/null @@ -1,308 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "reflect" - "testing" - - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -func TestUpdateVolumeAnnotations(t *testing.T) { - dir, err := ioutil.TempDir("", "test-update-volume-annotations") - if err != nil { - t.Fatalf("create tempdir: %v", err) - } - defer os.RemoveAll(dir) - kubeletPodsDir = dir - - const ( - testPodUID = "testuid" - testVolumeName = "testvolume" - testLogDirPath = "/var/log/pods/testns_testname_" + testPodUID - testLegacyLogDirPath = "/var/log/pods/" + testPodUID - ) - testVolumePath := fmt.Sprintf("%s/%s/volumes/kubernetes.io~empty-dir/%s", dir, testPodUID, testVolumeName) - - if err := os.MkdirAll(testVolumePath, 0755); err != nil { - t.Fatalf("Create test volume: %v", err) - } - - for _, test := range []struct { - desc string - spec *specs.Spec - expected *specs.Spec - expectErr bool - expectUpdate bool - }{ - { - desc: "volume annotations for sandbox", - spec: &specs.Spec{ - Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, - }, - }, - expectUpdate: true, - }, - { - desc: "volume annotations for sandbox with legacy log path", - spec: &specs.Spec{ - Annotations: map[string]string{ - sandboxLogDirAnnotation: testLegacyLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - sandboxLogDirAnnotation: testLegacyLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - "dev.gvisor.spec.mount." + testVolumeName + ".source": testVolumePath, - }, - }, - expectUpdate: true, - }, - { - desc: "tmpfs: volume annotations for container", - spec: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: testVolumePath, - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "tmpfs", - Source: testVolumePath, - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expectUpdate: true, - }, - { - desc: "bind: volume annotations for container", - spec: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: testVolumePath, - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: testVolumePath, - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "container", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "bind", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expectUpdate: true, - }, - { - desc: "should not return error without pod log directory", - spec: &specs.Spec{ - Annotations: map[string]string{ - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount." + testVolumeName + ".share": "pod", - "dev.gvisor.spec.mount." + testVolumeName + ".type": "tmpfs", - "dev.gvisor.spec.mount." + testVolumeName + ".options": "ro", - }, - }, - }, - { - desc: "should return error if volume path does not exist", - spec: &specs.Spec{ - Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - "dev.gvisor.spec.mount.notexist.share": "pod", - "dev.gvisor.spec.mount.notexist.type": "tmpfs", - "dev.gvisor.spec.mount.notexist.options": "ro", - }, - }, - expectErr: true, - }, - { - desc: "no volume annotations for sandbox", - spec: &specs.Spec{ - Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - }, - }, - expected: &specs.Spec{ - Annotations: map[string]string{ - sandboxLogDirAnnotation: testLogDirPath, - containerTypeAnnotation: containerTypeSandbox, - }, - }, - }, - { - desc: "no volume annotations for container", - spec: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: "/test", - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - }, - }, - expected: &specs.Spec{ - Mounts: []specs.Mount{ - { - Destination: "/test", - Type: "bind", - Source: "/test", - Options: []string{"ro"}, - }, - { - Destination: "/random", - Type: "bind", - Source: "/random", - Options: []string{"ro"}, - }, - }, - Annotations: map[string]string{ - containerTypeAnnotation: containerTypeContainer, - }, - }, - }, - } { - t.Run(test.desc, func(t *testing.T) { - bundle, err := ioutil.TempDir(dir, "test-bundle") - if err != nil { - t.Fatalf("Create test bundle: %v", err) - } - err = UpdateVolumeAnnotations(bundle, test.spec) - if test.expectErr { - if err == nil { - t.Fatal("Expected error, but got nil") - } - return - } - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - if !reflect.DeepEqual(test.expected, test.spec) { - t.Fatalf("Expected %+v, got %+v", test.expected, test.spec) - } - if test.expectUpdate { - b, err := ioutil.ReadFile(filepath.Join(bundle, "config.json")) - if err != nil { - t.Fatalf("Read spec from bundle: %v", err) - } - var spec specs.Spec - if err := json.Unmarshal(b, &spec); err != nil { - t.Fatalf("Unmarshal spec: %v", err) - } - if !reflect.DeepEqual(test.expected, &spec) { - t.Fatalf("Expected %+v, got %+v", test.expected, &spec) - } - } - }) - } -} diff --git a/pkg/shim/v2/BUILD b/pkg/shim/v2/BUILD deleted file mode 100644 index b0e8daa51..000000000 --- a/pkg/shim/v2/BUILD +++ /dev/null @@ -1,47 +0,0 @@ -load("//tools:defs.bzl", "go_library") - -package(licenses = ["notice"]) - -go_library( - name = "v2", - srcs = [ - "api.go", - "epoll.go", - "options.go", - "service.go", - "service_linux.go", - "state.go", - ], - visibility = ["//shim:__subpackages__"], - deps = [ - "//pkg/cleanup", - "//pkg/shim/runsc", - "//pkg/shim/v1/proc", - "//pkg/shim/v1/utils", - "//pkg/shim/v2/runtimeoptions", - "//runsc/specutils", - "@com_github_burntsushi_toml//:go_default_library", - "@com_github_containerd_cgroups//:go_default_library", - "@com_github_containerd_cgroups//stats/v1:go_default_library", - "@com_github_containerd_console//:go_default_library", - "@com_github_containerd_containerd//api/events:go_default_library", - "@com_github_containerd_containerd//api/types/task:go_default_library", - "@com_github_containerd_containerd//errdefs:go_default_library", - "@com_github_containerd_containerd//events:go_default_library", - "@com_github_containerd_containerd//log:go_default_library", - "@com_github_containerd_containerd//mount:go_default_library", - "@com_github_containerd_containerd//namespaces:go_default_library", - "@com_github_containerd_containerd//pkg/process:go_default_library", - "@com_github_containerd_containerd//pkg/stdio:go_default_library", - "@com_github_containerd_containerd//runtime:go_default_library", - "@com_github_containerd_containerd//runtime/linux/runctypes:go_default_library", - "@com_github_containerd_containerd//runtime/v2/shim:go_default_library", - "@com_github_containerd_containerd//runtime/v2/task:go_default_library", - "@com_github_containerd_containerd//sys/reaper:go_default_library", - "@com_github_containerd_fifo//:go_default_library", - "@com_github_containerd_typeurl//:go_default_library", - "@com_github_gogo_protobuf//types:go_default_library", - "@com_github_sirupsen_logrus//:go_default_library", - "@org_golang_x_sys//unix:go_default_library", - ], -) diff --git a/pkg/shim/v2/api.go b/pkg/shim/v2/api.go deleted file mode 100644 index 5a60a04db..000000000 --- a/pkg/shim/v2/api.go +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v2 - -import ( - "github.com/containerd/containerd/api/events" -) - -// TaskOOM is an alias for events.TaskOOM. -type TaskOOM = events.TaskOOM diff --git a/pkg/shim/v2/epoll.go b/pkg/shim/v2/epoll.go deleted file mode 100644 index 41232cca8..000000000 --- a/pkg/shim/v2/epoll.go +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build linux - -package v2 - -import ( - "context" - "fmt" - "sync" - - "github.com/containerd/cgroups" - "github.com/containerd/containerd/events" - "github.com/containerd/containerd/runtime" - "golang.org/x/sys/unix" -) - -func newOOMEpoller(publisher events.Publisher) (*epoller, error) { - fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) - if err != nil { - return nil, err - } - return &epoller{ - fd: fd, - publisher: publisher, - set: make(map[uintptr]*item), - }, nil -} - -type epoller struct { - mu sync.Mutex - - fd int - publisher events.Publisher - set map[uintptr]*item -} - -type item struct { - id string - cg cgroups.Cgroup -} - -func (e *epoller) Close() error { - return unix.Close(e.fd) -} - -func (e *epoller) run(ctx context.Context) { - var events [128]unix.EpollEvent - for { - select { - case <-ctx.Done(): - e.Close() - return - default: - n, err := unix.EpollWait(e.fd, events[:], -1) - if err != nil { - if err == unix.EINTR || err == unix.EAGAIN { - continue - } - // Should not happen. - panic(fmt.Errorf("cgroups: epoll wait: %w", err)) - } - for i := 0; i < n; i++ { - e.process(ctx, uintptr(events[i].Fd)) - } - } - } -} - -func (e *epoller) add(id string, cg cgroups.Cgroup) error { - e.mu.Lock() - defer e.mu.Unlock() - fd, err := cg.OOMEventFD() - if err != nil { - return err - } - e.set[fd] = &item{ - id: id, - cg: cg, - } - event := unix.EpollEvent{ - Fd: int32(fd), - Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR, - } - return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event) -} - -func (e *epoller) process(ctx context.Context, fd uintptr) { - flush(fd) - e.mu.Lock() - i, ok := e.set[fd] - if !ok { - e.mu.Unlock() - return - } - e.mu.Unlock() - if i.cg.State() == cgroups.Deleted { - e.mu.Lock() - delete(e.set, fd) - e.mu.Unlock() - unix.Close(int(fd)) - return - } - if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &TaskOOM{ - ContainerID: i.id, - }); err != nil { - // Should not happen. - panic(fmt.Errorf("publish OOM event: %w", err)) - } -} - -func flush(fd uintptr) error { - var buf [8]byte - _, err := unix.Read(int(fd), buf[:]) - return err -} diff --git a/pkg/shim/v2/options.go b/pkg/shim/v2/options.go deleted file mode 100644 index 9db33fd1f..000000000 --- a/pkg/shim/v2/options.go +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v2 - -const optionsType = "io.containerd.runsc.v1.options" - -// options is runtime options for io.containerd.runsc.v1. -type options struct { - // ShimCgroup is the cgroup the shim should be in. - ShimCgroup string `toml:"shim_cgroup" json:"shimCgroup"` - - // IoUID is the I/O's pipes uid. - IoUID uint32 `toml:"io_uid" json:"ioUid"` - - // IoGID is the I/O's pipes gid. - IoGID uint32 `toml:"io_gid" json:"ioGid"` - - // BinaryName is the binary name of the runsc binary. - BinaryName string `toml:"binary_name" json:"binaryName"` - - // Root is the runsc root directory. - Root string `toml:"root" json:"root"` - - // LogLevel sets the logging level. Some of the possible values are: debug, - // info, warning. - // - // This configuration only applies when the shim is running as a service. - LogLevel string `toml:"log_level" json:"logLevel"` - - // LogPath is the path to log directory. %ID% tags inside the string are - // replaced with the container ID. - // - // This configuration only applies when the shim is running as a service. - LogPath string `toml:"log_path" json:"logPath"` - - // RunscConfig is a key/value map of all runsc flags. - RunscConfig map[string]string `toml:"runsc_config" json:"runscConfig"` -} diff --git a/pkg/shim/v2/runtimeoptions/BUILD b/pkg/shim/v2/runtimeoptions/BUILD deleted file mode 100644 index abb8c3be3..000000000 --- a/pkg/shim/v2/runtimeoptions/BUILD +++ /dev/null @@ -1,32 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test", "proto_library") - -package(licenses = ["notice"]) - -proto_library( - name = "api", - srcs = [ - "runtimeoptions.proto", - ], -) - -go_library( - name = "runtimeoptions", - srcs = [ - "runtimeoptions.go", - "runtimeoptions_cri.go", - ], - visibility = ["//pkg/shim/v2:__pkg__"], - deps = ["@com_github_gogo_protobuf//proto:go_default_library"], -) - -go_test( - name = "runtimeoptions_test", - size = "small", - srcs = ["runtimeoptions_test.go"], - library = ":runtimeoptions", - deps = [ - "@com_github_containerd_containerd//runtime/v1/shim/v1:go_default_library", - "@com_github_containerd_typeurl//:go_default_library", - "@com_github_gogo_protobuf//proto:go_default_library", - ], -) diff --git a/pkg/shim/v2/runtimeoptions/runtimeoptions.go b/pkg/shim/v2/runtimeoptions/runtimeoptions.go deleted file mode 100644 index 072dd87f0..000000000 --- a/pkg/shim/v2/runtimeoptions/runtimeoptions.go +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package runtimeoptions contains the runtimeoptions proto. -package runtimeoptions diff --git a/pkg/shim/v2/runtimeoptions/runtimeoptions.proto b/pkg/shim/v2/runtimeoptions/runtimeoptions.proto deleted file mode 100644 index 057032e34..000000000 --- a/pkg/shim/v2/runtimeoptions/runtimeoptions.proto +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package cri.runtimeoptions.v1; - -// This is a version of the runtimeoptions CRI API that is vendored. -// -// Importing the full CRI package is a nightmare. -message Options { - string type_url = 1; - string config_path = 2; -} diff --git a/pkg/shim/v2/runtimeoptions/runtimeoptions_cri.go b/pkg/shim/v2/runtimeoptions/runtimeoptions_cri.go deleted file mode 100644 index e6102b4cf..000000000 --- a/pkg/shim/v2/runtimeoptions/runtimeoptions_cri.go +++ /dev/null @@ -1,383 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package runtimeoptions - -import ( - "fmt" - "io" - "reflect" - "strings" - - proto "github.com/gogo/protobuf/proto" -) - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the proto package it is being compiled against. -// A compilation error at this line likely means your copy of the -// proto package needs to be updated. -const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package - -type Options struct { - // TypeUrl specifies the type of the content inside the config file. - TypeUrl string `protobuf:"bytes,1,opt,name=type_url,json=typeUrl,proto3" json:"type_url,omitempty"` - // ConfigPath specifies the filesystem location of the config file - // used by the runtime. - ConfigPath string `protobuf:"bytes,2,opt,name=config_path,json=configPath,proto3" json:"config_path,omitempty"` -} - -func (m *Options) Reset() { *m = Options{} } -func (*Options) ProtoMessage() {} -func (*Options) Descriptor() ([]byte, []int) { return fileDescriptorApi, []int{0} } - -func (m *Options) GetTypeUrl() string { - if m != nil { - return m.TypeUrl - } - return "" -} - -func (m *Options) GetConfigPath() string { - if m != nil { - return m.ConfigPath - } - return "" -} - -func init() { - proto.RegisterType((*Options)(nil), "cri.runtimeoptions.v1.Options") -} - -func (m *Options) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalTo(dAtA) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *Options) MarshalTo(dAtA []byte) (int, error) { - var i int - _ = i - var l int - _ = l - if len(m.TypeUrl) > 0 { - dAtA[i] = 0xa - i++ - i = encodeVarintApi(dAtA, i, uint64(len(m.TypeUrl))) - i += copy(dAtA[i:], m.TypeUrl) - } - if len(m.ConfigPath) > 0 { - dAtA[i] = 0x12 - i++ - i = encodeVarintApi(dAtA, i, uint64(len(m.ConfigPath))) - i += copy(dAtA[i:], m.ConfigPath) - } - return i, nil -} - -func encodeVarintApi(dAtA []byte, offset int, v uint64) int { - for v >= 1<<7 { - dAtA[offset] = uint8(v&0x7f | 0x80) - v >>= 7 - offset++ - } - dAtA[offset] = uint8(v) - return offset + 1 -} - -func (m *Options) Size() (n int) { - var l int - _ = l - l = len(m.TypeUrl) - if l > 0 { - n += 1 + l + sovApi(uint64(l)) - } - l = len(m.ConfigPath) - if l > 0 { - n += 1 + l + sovApi(uint64(l)) - } - return n -} - -func sovApi(x uint64) (n int) { - for { - n++ - x >>= 7 - if x == 0 { - break - } - } - return n -} - -func sozApi(x uint64) (n int) { - return sovApi(uint64((x << 1) ^ uint64((int64(x) >> 63)))) -} - -func (this *Options) String() string { - if this == nil { - return "nil" - } - s := strings.Join([]string{`&Options{`, - `TypeUrl:` + fmt.Sprintf("%v", this.TypeUrl) + `,`, - `ConfigPath:` + fmt.Sprintf("%v", this.ConfigPath) + `,`, - `}`, - }, "") - return s -} - -func valueToStringApi(v interface{}) string { - rv := reflect.ValueOf(v) - if rv.IsNil() { - return "nil" - } - pv := reflect.Indirect(rv).Interface() - return fmt.Sprintf("*%v", pv) -} - -func (m *Options) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowApi - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= (uint64(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: Options: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: Options: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - case 1: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field TypeUrl", wireType) - } - var stringLen uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowApi - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - stringLen |= (uint64(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - intStringLen := int(stringLen) - if intStringLen < 0 { - return ErrInvalidLengthApi - } - postIndex := iNdEx + intStringLen - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.TypeUrl = string(dAtA[iNdEx:postIndex]) - iNdEx = postIndex - case 2: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field ConfigPath", wireType) - } - var stringLen uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowApi - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - stringLen |= (uint64(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - intStringLen := int(stringLen) - if intStringLen < 0 { - return ErrInvalidLengthApi - } - postIndex := iNdEx + intStringLen - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.ConfigPath = string(dAtA[iNdEx:postIndex]) - iNdEx = postIndex - default: - iNdEx = preIndex - skippy, err := skipApi(dAtA[iNdEx:]) - if err != nil { - return err - } - if skippy < 0 { - return ErrInvalidLengthApi - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} - -func skipApi(dAtA []byte) (n int, err error) { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowApi - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= (uint64(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - wireType := int(wire & 0x7) - switch wireType { - case 0: - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowApi - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - iNdEx++ - if dAtA[iNdEx-1] < 0x80 { - break - } - } - return iNdEx, nil - case 1: - iNdEx += 8 - return iNdEx, nil - case 2: - var length int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowApi - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - length |= (int(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - iNdEx += length - if length < 0 { - return 0, ErrInvalidLengthApi - } - return iNdEx, nil - case 3: - for { - var innerWire uint64 - var start int = iNdEx - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowApi - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - innerWire |= (uint64(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - innerWireType := int(innerWire & 0x7) - if innerWireType == 4 { - break - } - next, err := skipApi(dAtA[start:]) - if err != nil { - return 0, err - } - iNdEx = start + next - } - return iNdEx, nil - case 4: - return iNdEx, nil - case 5: - iNdEx += 4 - return iNdEx, nil - default: - return 0, fmt.Errorf("proto: illegal wireType %d", wireType) - } - } - panic("unreachable") -} - -var ( - ErrInvalidLengthApi = fmt.Errorf("proto: negative length found during unmarshaling") - ErrIntOverflowApi = fmt.Errorf("proto: integer overflow") -) - -func init() { proto.RegisterFile("api.proto", fileDescriptorApi) } - -var fileDescriptorApi = []byte{ - // 183 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x4c, 0x2c, 0xc8, 0xd4, - 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x12, 0x4d, 0x2e, 0xca, 0xd4, 0x2b, 0x2a, 0xcd, 0x2b, 0xc9, - 0xcc, 0x4d, 0xcd, 0x2f, 0x28, 0xc9, 0xcc, 0xcf, 0x2b, 0xd6, 0x2b, 0x33, 0x94, 0xd2, 0x4d, 0xcf, - 0x2c, 0xc9, 0x28, 0x4d, 0xd2, 0x4b, 0xce, 0xcf, 0xd5, 0x4f, 0xcf, 0x4f, 0xcf, 0xd7, 0x07, 0xab, - 0x4e, 0x2a, 0x4d, 0x03, 0xf3, 0xc0, 0x1c, 0x30, 0x0b, 0x62, 0x8a, 0x92, 0x2b, 0x17, 0xbb, 0x3f, - 0x44, 0xb3, 0x90, 0x24, 0x17, 0x47, 0x49, 0x65, 0x41, 0x6a, 0x7c, 0x69, 0x51, 0x8e, 0x04, 0xa3, - 0x02, 0xa3, 0x06, 0x67, 0x10, 0x3b, 0x88, 0x1f, 0x5a, 0x94, 0x23, 0x24, 0xcf, 0xc5, 0x9d, 0x9c, - 0x9f, 0x97, 0x96, 0x99, 0x1e, 0x5f, 0x90, 0x58, 0x92, 0x21, 0xc1, 0x04, 0x96, 0xe5, 0x82, 0x08, - 0x05, 0x24, 0x96, 0x64, 0x38, 0xc9, 0x9c, 0x78, 0x28, 0xc7, 0x78, 0xe3, 0xa1, 0x1c, 0x43, 0xc3, - 0x23, 0x39, 0xc6, 0x13, 0x8f, 0xe4, 0x18, 0x2f, 0x3c, 0x92, 0x63, 0x7c, 0xf0, 0x48, 0x8e, 0x71, - 0xc2, 0x63, 0x39, 0x86, 0x24, 0x36, 0xb0, 0x5d, 0xc6, 0x80, 0x00, 0x00, 0x00, 0xff, 0xff, 0x07, - 0x00, 0xf2, 0x18, 0xbe, 0x00, 0x00, 0x00, -} diff --git a/pkg/shim/v2/runtimeoptions/runtimeoptions_test.go b/pkg/shim/v2/runtimeoptions/runtimeoptions_test.go deleted file mode 100644 index c59a2400e..000000000 --- a/pkg/shim/v2/runtimeoptions/runtimeoptions_test.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package runtimeoptions - -import ( - "bytes" - "testing" - - shim "github.com/containerd/containerd/runtime/v1/shim/v1" - "github.com/containerd/typeurl" - "github.com/gogo/protobuf/proto" -) - -func TestCreateTaskRequest(t *testing.T) { - // Serialize the top-level message. - const encodedText = `options: < - type_url: "cri.runtimeoptions.v1.Options" - value: "\n\010type_url\022\013config_path" ->` - got := &shim.CreateTaskRequest{} // Should have raw options. - if err := proto.UnmarshalText(encodedText, got); err != nil { - t.Fatalf("unable to unmarshal text: %v", err) - } - var textBuffer bytes.Buffer - if err := proto.MarshalText(&textBuffer, got); err != nil { - t.Errorf("unable to marshal text: %v", err) - } - t.Logf("got: %s", string(textBuffer.Bytes())) - - // Check the options. - wantOptions := &Options{} - wantOptions.TypeUrl = "type_url" - wantOptions.ConfigPath = "config_path" - gotMessage, err := typeurl.UnmarshalAny(got.Options) - if err != nil { - t.Fatalf("unable to unmarshal any: %v", err) - } - gotOptions, ok := gotMessage.(*Options) - if !ok { - t.Fatalf("got %v, want %v", gotMessage, wantOptions) - } - if !proto.Equal(gotOptions, wantOptions) { - t.Fatalf("got %v, want %v", gotOptions, wantOptions) - } -} diff --git a/pkg/shim/v2/service.go b/pkg/shim/v2/service.go deleted file mode 100644 index 6aaf5fab8..000000000 --- a/pkg/shim/v2/service.go +++ /dev/null @@ -1,953 +0,0 @@ -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package v2 implements Containerd Shim v2 interface. -package v2 - -import ( - "context" - "fmt" - "io" - "os" - "os/exec" - "path/filepath" - "sync" - "syscall" - "time" - - "github.com/BurntSushi/toml" - "github.com/containerd/cgroups" - cgroupsstats "github.com/containerd/cgroups/stats/v1" - "github.com/containerd/console" - "github.com/containerd/containerd/api/events" - "github.com/containerd/containerd/api/types/task" - "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/log" - "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/namespaces" - "github.com/containerd/containerd/pkg/process" - "github.com/containerd/containerd/pkg/stdio" - "github.com/containerd/containerd/runtime" - "github.com/containerd/containerd/runtime/linux/runctypes" - "github.com/containerd/containerd/runtime/v2/shim" - taskAPI "github.com/containerd/containerd/runtime/v2/task" - "github.com/containerd/containerd/sys/reaper" - "github.com/containerd/typeurl" - "github.com/gogo/protobuf/types" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/cleanup" - - "gvisor.dev/gvisor/pkg/shim/runsc" - "gvisor.dev/gvisor/pkg/shim/v1/proc" - "gvisor.dev/gvisor/pkg/shim/v1/utils" - "gvisor.dev/gvisor/pkg/shim/v2/runtimeoptions" - "gvisor.dev/gvisor/runsc/specutils" -) - -var ( - empty = &types.Empty{} - bufPool = sync.Pool{ - New: func() interface{} { - buffer := make([]byte, 32<<10) - return &buffer - }, - } -) - -var _ = (taskAPI.TaskService)(&service{}) - -const ( - // configFile is the default config file name. For containerd 1.2, - // we assume that a config.toml should exist in the runtime root. - configFile = "config.toml" - - // shimAddressPath is the relative path to a file that contains the address - // to the shim UDS. See service.shimAddress. - shimAddressPath = "address" -) - -// New returns a new shim service that can be used via GRPC. -func New(ctx context.Context, id string, publisher shim.Publisher, cancel func()) (shim.Shim, error) { - log.L.Debugf("service.New, id: %s", id) - - var opts shim.Opts - if ctxOpts := ctx.Value(shim.OptsKey{}); ctxOpts != nil { - opts = ctxOpts.(shim.Opts) - } - - ep, err := newOOMEpoller(publisher) - if err != nil { - return nil, err - } - go ep.run(ctx) - s := &service{ - id: id, - processes: make(map[string]process.Process), - events: make(chan interface{}, 128), - ec: proc.ExitCh, - oomPoller: ep, - cancel: cancel, - genericOptions: opts, - } - go s.processExits(ctx) - runsc.Monitor = &runsc.LogMonitor{Next: reaper.Default} - if err := s.initPlatform(); err != nil { - cancel() - return nil, fmt.Errorf("failed to initialized platform behavior: %w", err) - } - go s.forward(ctx, publisher) - - if address, err := shim.ReadAddress(shimAddressPath); err == nil { - s.shimAddress = address - } - - return s, nil -} - -// service is the shim implementation of a remote shim over GRPC. It runs in 2 -// different modes: -// 1. Service: process runs for the life time of the container and receives -// calls described in shimapi.TaskService interface. -// 2. Tool: process is short lived and runs only to perform the requested -// operations and then exits. It implements the direct functions in -// shim.Shim interface. -// -// When the service is running, it saves a json file with state information so -// that commands sent to the tool can load the state and perform the operation. -type service struct { - mu sync.Mutex - - // id is the container ID. - id string - - // bundle is a path provided by the caller on container creation. Store - // because it's needed in commands that don't receive bundle in the request. - bundle string - - // task is the main process that is running the container. - task *proc.Init - - // processes maps ExecId to processes running through exec. - processes map[string]process.Process - - events chan interface{} - - // platform handles operations related to the console. - platform stdio.Platform - - // genericOptions are options that come from the shim interface and are common - // to all shims. - genericOptions shim.Opts - - // opts are configuration options specific for this shim. - opts options - - // ex gets notified whenever the container init process or an exec'd process - // exits from inside the sandbox. - ec chan proc.Exit - - // oomPoller monitors the sandbox's cgroup for OOM notifications. - oomPoller *epoller - - // cancel is a function that needs to be called before the shim stops. The - // function is provided by the caller to New(). - cancel func() - - // shimAddress is the location of the UDS used to communicate to containerd. - shimAddress string -} - -func (s *service) newCommand(ctx context.Context, containerdBinary, containerdAddress string) (*exec.Cmd, error) { - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, err - } - self, err := os.Executable() - if err != nil { - return nil, err - } - cwd, err := os.Getwd() - if err != nil { - return nil, err - } - args := []string{ - "-namespace", ns, - "-address", containerdAddress, - "-publish-binary", containerdBinary, - } - if s.genericOptions.Debug { - args = append(args, "-debug") - } - cmd := exec.Command(self, args...) - cmd.Dir = cwd - cmd.Env = append(os.Environ(), "GOMAXPROCS=2") - cmd.SysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, - } - return cmd, nil -} - -func (s *service) StartShim(ctx context.Context, id, containerdBinary, containerdAddress, containerdTTRPCAddress string) (string, error) { - log.L.Debugf("StartShim, id: %s, binary: %q, address: %q", id, containerdBinary, containerdAddress) - - cmd, err := s.newCommand(ctx, containerdBinary, containerdAddress) - if err != nil { - return "", err - } - address, err := shim.SocketAddress(ctx, containerdAddress, id) - if err != nil { - return "", err - } - socket, err := shim.NewSocket(address) - if err != nil { - // The only time where this would happen is if there is a bug and the socket - // was not cleaned up in the cleanup method of the shim or we are using the - // grouping functionality where the new process should be run with the same - // shim as an existing container. - if !shim.SocketEaddrinuse(err) { - return "", fmt.Errorf("create new shim socket: %w", err) - } - if shim.CanConnect(address) { - if err := shim.WriteAddress(shimAddressPath, address); err != nil { - return "", fmt.Errorf("write existing socket for shim: %w", err) - } - return address, nil - } - if err := shim.RemoveSocket(address); err != nil { - return "", fmt.Errorf("remove pre-existing socket: %w", err) - } - if socket, err = shim.NewSocket(address); err != nil { - return "", fmt.Errorf("try create new shim socket 2x: %w", err) - } - } - cu := cleanup.Make(func() { - socket.Close() - _ = shim.RemoveSocket(address) - }) - defer cu.Clean() - - f, err := socket.File() - if err != nil { - return "", err - } - - cmd.ExtraFiles = append(cmd.ExtraFiles, f) - - log.L.Debugf("Executing: %q %s", cmd.Path, cmd.Args) - if err := cmd.Start(); err != nil { - f.Close() - return "", err - } - cu.Add(func() { cmd.Process.Kill() }) - - // make sure to wait after start - go cmd.Wait() - if err := shim.WritePidFile("shim.pid", cmd.Process.Pid); err != nil { - return "", err - } - if err := shim.WriteAddress(shimAddressPath, address); err != nil { - return "", err - } - if err := shim.SetScore(cmd.Process.Pid); err != nil { - return "", fmt.Errorf("failed to set OOM Score on shim: %w", err) - } - cu.Release() - return address, nil -} - -// Cleanup is called from another process (need to reload state) to stop the -// container and undo all operations done in Create(). -func (s *service) Cleanup(ctx context.Context) (*taskAPI.DeleteResponse, error) { - log.L.Debugf("Cleanup") - - path, err := os.Getwd() - if err != nil { - return nil, err - } - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, err - } - var st state - if err := st.load(path); err != nil { - return nil, err - } - r := proc.NewRunsc(s.opts.Root, path, ns, st.Options.BinaryName, nil) - - if err := r.Delete(ctx, s.id, &runsc.DeleteOpts{ - Force: true, - }); err != nil { - log.L.Infof("failed to remove runc container: %v", err) - } - if err := mount.UnmountAll(st.Rootfs, 0); err != nil { - log.L.Infof("failed to cleanup rootfs mount: %v", err) - } - return &taskAPI.DeleteResponse{ - ExitedAt: time.Now(), - ExitStatus: 128 + uint32(unix.SIGKILL), - }, nil -} - -// Create creates a new initial process and container with the underlying OCI -// runtime. -func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { - log.L.Debugf("Create, id: %s, bundle: %q", r.ID, r.Bundle) - - s.mu.Lock() - defer s.mu.Unlock() - - // Save the main task id and bundle to the shim for additional requests. - s.id = r.ID - s.bundle = r.Bundle - - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return nil, fmt.Errorf("create namespace: %w", err) - } - - // Read from root for now. - if r.Options != nil { - v, err := typeurl.UnmarshalAny(r.Options) - if err != nil { - return nil, err - } - var path string - switch o := v.(type) { - case *runctypes.CreateOptions: // containerd 1.2.x - s.opts.IoUID = o.IoUid - s.opts.IoGID = o.IoGid - s.opts.ShimCgroup = o.ShimCgroup - case *runctypes.RuncOptions: // containerd 1.2.x - root := proc.RunscRoot - if o.RuntimeRoot != "" { - root = o.RuntimeRoot - } - - s.opts.BinaryName = o.Runtime - - path = filepath.Join(root, configFile) - if _, err := os.Stat(path); err != nil { - if !os.IsNotExist(err) { - return nil, fmt.Errorf("stat config file %q: %w", path, err) - } - // A config file in runtime root is not required. - path = "" - } - case *runtimeoptions.Options: // containerd 1.3.x+ - if o.ConfigPath == "" { - break - } - if o.TypeUrl != optionsType { - return nil, fmt.Errorf("unsupported option type %q", o.TypeUrl) - } - path = o.ConfigPath - default: - return nil, fmt.Errorf("unsupported option type %q", r.Options.TypeUrl) - } - if path != "" { - if _, err = toml.DecodeFile(path, &s.opts); err != nil { - return nil, fmt.Errorf("decode config file %q: %w", path, err) - } - } - } - - if len(s.opts.LogLevel) != 0 { - lvl, err := logrus.ParseLevel(s.opts.LogLevel) - if err != nil { - return nil, err - } - logrus.SetLevel(lvl) - } - if len(s.opts.LogPath) != 0 { - logPath := runsc.FormatShimLogPath(s.opts.LogPath, s.id) - if err := os.MkdirAll(filepath.Dir(logPath), 0777); err != nil { - return nil, fmt.Errorf("failed to create log dir: %w", err) - } - logFile, err := os.Create(logPath) - if err != nil { - return nil, fmt.Errorf("failed to create log file: %w", err) - } - log.L.Debugf("Starting mirror log at %q", logPath) - std := logrus.StandardLogger() - std.SetOutput(io.MultiWriter(std.Out, logFile)) - - log.L.Debugf("Create shim") - log.L.Debugf("***************************") - log.L.Debugf("Args: %s", os.Args) - log.L.Debugf("PID: %d", os.Getpid()) - log.L.Debugf("ID: %s", s.id) - log.L.Debugf("Options: %+v", s.opts) - log.L.Debugf("Bundle: %s", r.Bundle) - log.L.Debugf("Terminal: %t", r.Terminal) - log.L.Debugf("stdin: %s", r.Stdin) - log.L.Debugf("stdout: %s", r.Stdout) - log.L.Debugf("stderr: %s", r.Stderr) - log.L.Debugf("***************************") - } - - // Save state before any action is taken to ensure Cleanup() will have all - // the information it needs to undo the operations. - st := state{ - Rootfs: filepath.Join(r.Bundle, "rootfs"), - Options: s.opts, - } - if err := st.save(r.Bundle); err != nil { - return nil, err - } - - if err := os.Mkdir(st.Rootfs, 0711); err != nil && !os.IsExist(err) { - return nil, err - } - - // Convert from types.Mount to proc.Mount. - var mounts []proc.Mount - for _, m := range r.Rootfs { - mounts = append(mounts, proc.Mount{ - Type: m.Type, - Source: m.Source, - Target: m.Target, - Options: m.Options, - }) - } - - // Cleans up all mounts in case of failure. - cu := cleanup.Make(func() { - if err := mount.UnmountAll(st.Rootfs, 0); err != nil { - log.L.Infof("failed to cleanup rootfs mount: %v", err) - } - }) - defer cu.Clean() - for _, rm := range mounts { - m := &mount.Mount{ - Type: rm.Type, - Source: rm.Source, - Options: rm.Options, - } - if err := m.Mount(st.Rootfs); err != nil { - return nil, fmt.Errorf("failed to mount rootfs component %v: %w", m, err) - } - } - - config := &proc.CreateConfig{ - ID: r.ID, - Bundle: r.Bundle, - Runtime: s.opts.BinaryName, - Rootfs: mounts, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - } - process, err := newInit(r.Bundle, filepath.Join(r.Bundle, "work"), ns, s.platform, config, &s.opts, st.Rootfs) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - if err := process.Create(ctx, config); err != nil { - return nil, errdefs.ToGRPC(err) - } - - // Set up OOM notification on the sandbox's cgroup. This is done on - // sandbox create since the sandbox process will be created here. - pid := process.Pid() - if pid > 0 { - cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(pid)) - if err != nil { - return nil, fmt.Errorf("loading cgroup for %d: %w", pid, err) - } - if err := s.oomPoller.add(s.id, cg); err != nil { - return nil, fmt.Errorf("add cg to OOM monitor: %w", err) - } - } - - // Success - cu.Release() - s.task = process - return &taskAPI.CreateTaskResponse{ - Pid: uint32(process.Pid()), - }, nil -} - -// Start starts a process. -func (s *service) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.StartResponse, error) { - log.L.Debugf("Start, id: %s, execID: %s", r.ID, r.ExecID) - - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if err := p.Start(ctx); err != nil { - return nil, err - } - // TODO: Set the cgroup and oom notifications on restore. - // https://github.com/google/gvisor-containerd-shim/issues/58 - return &taskAPI.StartResponse{ - Pid: uint32(p.Pid()), - }, nil -} - -// Delete deletes the initial process and container. -func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { - log.L.Debugf("Delete, id: %s, execID: %s", r.ID, r.ExecID) - - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - if err := p.Delete(ctx); err != nil { - return nil, err - } - if len(r.ExecID) != 0 { - s.mu.Lock() - delete(s.processes, r.ExecID) - s.mu.Unlock() - } else if s.platform != nil { - s.platform.Close() - } - return &taskAPI.DeleteResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - Pid: uint32(p.Pid()), - }, nil -} - -// Exec spawns an additional process inside the container. -func (s *service) Exec(ctx context.Context, r *taskAPI.ExecProcessRequest) (*types.Empty, error) { - log.L.Debugf("Exec, id: %s, execID: %s", r.ID, r.ExecID) - - s.mu.Lock() - p := s.processes[r.ExecID] - s.mu.Unlock() - if p != nil { - return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ExecID) - } - if s.task == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - process, err := s.task.Exec(ctx, s.bundle, &proc.ExecConfig{ - ID: r.ExecID, - Terminal: r.Terminal, - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Spec: r.Spec, - }) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - s.mu.Lock() - s.processes[r.ExecID] = process - s.mu.Unlock() - return empty, nil -} - -// ResizePty resizes the terminal of a process. -func (s *service) ResizePty(ctx context.Context, r *taskAPI.ResizePtyRequest) (*types.Empty, error) { - log.L.Debugf("ResizePty, id: %s, execID: %s, dimension: %dx%d", r.ID, r.ExecID, r.Height, r.Width) - - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - ws := console.WinSize{ - Width: uint16(r.Width), - Height: uint16(r.Height), - } - if err := p.Resize(ws); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// State returns runtime state information for a process. -func (s *service) State(ctx context.Context, r *taskAPI.StateRequest) (*taskAPI.StateResponse, error) { - log.L.Debugf("State, id: %s, execID: %s", r.ID, r.ExecID) - - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - st, err := p.Status(ctx) - if err != nil { - return nil, err - } - status := task.StatusUnknown - switch st { - case "created": - status = task.StatusCreated - case "running": - status = task.StatusRunning - case "stopped": - status = task.StatusStopped - } - sio := p.Stdio() - return &taskAPI.StateResponse{ - ID: p.ID(), - Bundle: s.bundle, - Pid: uint32(p.Pid()), - Status: status, - Stdin: sio.Stdin, - Stdout: sio.Stdout, - Stderr: sio.Stderr, - Terminal: sio.Terminal, - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -// Pause the container. -func (s *service) Pause(ctx context.Context, r *taskAPI.PauseRequest) (*types.Empty, error) { - log.L.Debugf("Pause, id: %s", r.ID) - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Resume the container. -func (s *service) Resume(ctx context.Context, r *taskAPI.ResumeRequest) (*types.Empty, error) { - log.L.Debugf("Resume, id: %s", r.ID) - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Kill a process with the provided signal. -func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (*types.Empty, error) { - log.L.Debugf("Kill, id: %s, execID: %s, signal: %d, all: %t", r.ID, r.ExecID, r.Signal, r.All) - - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - if err := p.Kill(ctx, r.Signal, r.All); err != nil { - return nil, errdefs.ToGRPC(err) - } - return empty, nil -} - -// Pids returns all pids inside the container. -func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (*taskAPI.PidsResponse, error) { - log.L.Debugf("Pids, id: %s", r.ID) - - pids, err := s.getContainerPids(ctx, r.ID) - if err != nil { - return nil, errdefs.ToGRPC(err) - } - var processes []*task.ProcessInfo - for _, pid := range pids { - pInfo := task.ProcessInfo{ - Pid: pid, - } - for _, p := range s.processes { - if p.Pid() == int(pid) { - d := &runctypes.ProcessDetails{ - ExecID: p.ID(), - } - a, err := typeurl.MarshalAny(d) - if err != nil { - return nil, fmt.Errorf("failed to marshal process %d info: %w", pid, err) - } - pInfo.Info = a - break - } - } - processes = append(processes, &pInfo) - } - return &taskAPI.PidsResponse{ - Processes: processes, - }, nil -} - -// CloseIO closes the I/O context of a process. -func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (*types.Empty, error) { - log.L.Debugf("CloseIO, id: %s, execID: %s, stdin: %t", r.ID, r.ExecID, r.Stdin) - - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if stdin := p.Stdin(); stdin != nil { - if err := stdin.Close(); err != nil { - return nil, fmt.Errorf("close stdin: %w", err) - } - } - return empty, nil -} - -// Checkpoint checkpoints the container. -func (s *service) Checkpoint(ctx context.Context, r *taskAPI.CheckpointTaskRequest) (*types.Empty, error) { - log.L.Debugf("Checkpoint, id: %s", r.ID) - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Connect returns shim information such as the shim's pid. -func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (*taskAPI.ConnectResponse, error) { - log.L.Debugf("Connect, id: %s", r.ID) - - var pid int - if s.task != nil { - pid = s.task.Pid() - } - return &taskAPI.ConnectResponse{ - ShimPid: uint32(os.Getpid()), - TaskPid: uint32(pid), - }, nil -} - -func (s *service) Shutdown(ctx context.Context, r *taskAPI.ShutdownRequest) (*types.Empty, error) { - log.L.Debugf("Shutdown, id: %s", r.ID) - s.cancel() - if s.shimAddress != "" { - _ = shim.RemoveSocket(s.shimAddress) - } - os.Exit(0) - panic("Should not get here") -} - -func (s *service) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) { - log.L.Debugf("Stats, id: %s", r.ID) - if s.task == nil { - log.L.Debugf("Stats error, id: %s: container not created", r.ID) - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - stats, err := s.task.Runtime().Stats(ctx, s.id) - if err != nil { - log.L.Debugf("Stats error, id: %s: %v", r.ID, err) - return nil, err - } - - // gvisor currently (as of 2020-03-03) only returns the total memory - // usage and current PID value[0]. However, we copy the common fields here - // so that future updates will propagate correct information. We're - // using the cgroups.Metrics structure so we're returning the same type - // as runc. - // - // [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81 - metrics := &cgroupsstats.Metrics{ - CPU: &cgroupsstats.CPUStat{ - Usage: &cgroupsstats.CPUUsage{ - Total: stats.Cpu.Usage.Total, - Kernel: stats.Cpu.Usage.Kernel, - User: stats.Cpu.Usage.User, - PerCPU: stats.Cpu.Usage.Percpu, - }, - Throttling: &cgroupsstats.Throttle{ - Periods: stats.Cpu.Throttling.Periods, - ThrottledPeriods: stats.Cpu.Throttling.ThrottledPeriods, - ThrottledTime: stats.Cpu.Throttling.ThrottledTime, - }, - }, - Memory: &cgroupsstats.MemoryStat{ - Cache: stats.Memory.Cache, - Usage: &cgroupsstats.MemoryEntry{ - Limit: stats.Memory.Usage.Limit, - Usage: stats.Memory.Usage.Usage, - Max: stats.Memory.Usage.Max, - Failcnt: stats.Memory.Usage.Failcnt, - }, - Swap: &cgroupsstats.MemoryEntry{ - Limit: stats.Memory.Swap.Limit, - Usage: stats.Memory.Swap.Usage, - Max: stats.Memory.Swap.Max, - Failcnt: stats.Memory.Swap.Failcnt, - }, - Kernel: &cgroupsstats.MemoryEntry{ - Limit: stats.Memory.Kernel.Limit, - Usage: stats.Memory.Kernel.Usage, - Max: stats.Memory.Kernel.Max, - Failcnt: stats.Memory.Kernel.Failcnt, - }, - KernelTCP: &cgroupsstats.MemoryEntry{ - Limit: stats.Memory.KernelTCP.Limit, - Usage: stats.Memory.KernelTCP.Usage, - Max: stats.Memory.KernelTCP.Max, - Failcnt: stats.Memory.KernelTCP.Failcnt, - }, - }, - Pids: &cgroupsstats.PidsStat{ - Current: stats.Pids.Current, - Limit: stats.Pids.Limit, - }, - } - data, err := typeurl.MarshalAny(metrics) - if err != nil { - log.L.Debugf("Stats error, id: %s: %v", r.ID, err) - return nil, err - } - log.L.Debugf("Stats success, id: %s: %+v", r.ID, data) - return &taskAPI.StatsResponse{ - Stats: data, - }, nil -} - -// Update updates a running container. -func (s *service) Update(ctx context.Context, r *taskAPI.UpdateTaskRequest) (*types.Empty, error) { - return empty, errdefs.ToGRPC(errdefs.ErrNotImplemented) -} - -// Wait waits for a process to exit. -func (s *service) Wait(ctx context.Context, r *taskAPI.WaitRequest) (*taskAPI.WaitResponse, error) { - log.L.Debugf("Wait, id: %s, execID: %s", r.ID, r.ExecID) - - p, err := s.getProcess(r.ExecID) - if err != nil { - return nil, err - } - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") - } - p.Wait() - - return &taskAPI.WaitResponse{ - ExitStatus: uint32(p.ExitStatus()), - ExitedAt: p.ExitedAt(), - }, nil -} - -func (s *service) processExits(ctx context.Context) { - for e := range s.ec { - s.checkProcesses(ctx, e) - } -} - -func (s *service) checkProcesses(ctx context.Context, e proc.Exit) { - // TODO(random-liu): Add `shouldKillAll` logic if container pid - // namespace is supported. - for _, p := range s.allProcesses() { - if p.ID() == e.ID { - if ip, ok := p.(*proc.Init); ok { - // Ensure all children are killed. - log.L.Debugf("Container init process exited, killing all container processes") - if err := ip.KillAll(ctx); err != nil { - log.G(ctx).WithError(err).WithField("id", ip.ID()). - Error("failed to kill init's children") - } - } - p.SetExited(e.Status) - s.events <- &events.TaskExit{ - ContainerID: s.id, - ID: p.ID(), - Pid: uint32(p.Pid()), - ExitStatus: uint32(e.Status), - ExitedAt: p.ExitedAt(), - } - return - } - } -} - -func (s *service) allProcesses() (o []process.Process) { - s.mu.Lock() - defer s.mu.Unlock() - for _, p := range s.processes { - o = append(o, p) - } - if s.task != nil { - o = append(o, s.task) - } - return o -} - -func (s *service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { - s.mu.Lock() - p := s.task - s.mu.Unlock() - if p == nil { - return nil, fmt.Errorf("container must be created: %w", errdefs.ErrFailedPrecondition) - } - ps, err := p.Runtime().Ps(ctx, id) - if err != nil { - return nil, err - } - pids := make([]uint32, 0, len(ps)) - for _, pid := range ps { - pids = append(pids, uint32(pid)) - } - return pids, nil -} - -func (s *service) forward(ctx context.Context, publisher shim.Publisher) { - for e := range s.events { - err := publisher.Publish(ctx, getTopic(e), e) - if err != nil { - // Should not happen. - panic(fmt.Errorf("post event: %w", err)) - } - } -} - -func (s *service) getProcess(execID string) (process.Process, error) { - s.mu.Lock() - defer s.mu.Unlock() - if execID == "" { - return s.task, nil - } - p := s.processes[execID] - if p == nil { - return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", execID) - } - return p, nil -} - -func getTopic(e interface{}) string { - switch e.(type) { - case *events.TaskCreate: - return runtime.TaskCreateEventTopic - case *events.TaskStart: - return runtime.TaskStartEventTopic - case *events.TaskOOM: - return runtime.TaskOOMEventTopic - case *events.TaskExit: - return runtime.TaskExitEventTopic - case *events.TaskDelete: - return runtime.TaskDeleteEventTopic - case *events.TaskExecAdded: - return runtime.TaskExecAddedEventTopic - case *events.TaskExecStarted: - return runtime.TaskExecStartedEventTopic - default: - log.L.Infof("no topic for type %#v", e) - } - return runtime.TaskUnknownTopic -} - -func newInit(path, workDir, namespace string, platform stdio.Platform, r *proc.CreateConfig, options *options, rootfs string) (*proc.Init, error) { - spec, err := utils.ReadSpec(r.Bundle) - if err != nil { - return nil, fmt.Errorf("read oci spec: %w", err) - } - if err := utils.UpdateVolumeAnnotations(r.Bundle, spec); err != nil { - return nil, fmt.Errorf("update volume annotations: %w", err) - } - runsc.FormatRunscLogPath(r.ID, options.RunscConfig) - runtime := proc.NewRunsc(options.Root, path, namespace, options.BinaryName, options.RunscConfig) - p := proc.New(r.ID, runtime, stdio.Stdio{ - Stdin: r.Stdin, - Stdout: r.Stdout, - Stderr: r.Stderr, - Terminal: r.Terminal, - }) - p.Bundle = r.Bundle - p.Platform = platform - p.Rootfs = rootfs - p.WorkDir = workDir - p.IoUID = int(options.IoUID) - p.IoGID = int(options.IoGID) - p.Sandbox = specutils.SpecContainerType(spec) == specutils.ContainerTypeSandbox - p.UserLog = utils.UserLogPath(spec) - p.Monitor = reaper.Default - return p, nil -} diff --git a/pkg/shim/v2/service_linux.go b/pkg/shim/v2/service_linux.go deleted file mode 100644 index 1800ab90b..000000000 --- a/pkg/shim/v2/service_linux.go +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2018 The containerd Authors. -// Copyright 2018 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build linux - -package v2 - -import ( - "context" - "fmt" - "io" - "sync" - "syscall" - - "github.com/containerd/console" - "github.com/containerd/fifo" -) - -type linuxPlatform struct { - epoller *console.Epoller -} - -func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console, stdin, stdout, stderr string, wg *sync.WaitGroup) (console.Console, error) { - if p.epoller == nil { - return nil, fmt.Errorf("uninitialized epoller") - } - - epollConsole, err := p.epoller.Add(console) - if err != nil { - return nil, err - } - - if stdin != "" { - in, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) - if err != nil { - return nil, err - } - go func() { - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(epollConsole, in, *p) - }() - } - - outw, err := fifo.OpenFifo(ctx, stdout, syscall.O_WRONLY, 0) - if err != nil { - return nil, err - } - outr, err := fifo.OpenFifo(ctx, stdout, syscall.O_RDONLY, 0) - if err != nil { - return nil, err - } - wg.Add(1) - go func() { - p := bufPool.Get().(*[]byte) - defer bufPool.Put(p) - io.CopyBuffer(outw, epollConsole, *p) - epollConsole.Close() - outr.Close() - outw.Close() - wg.Done() - }() - return epollConsole, nil -} - -func (p *linuxPlatform) ShutdownConsole(ctx context.Context, cons console.Console) error { - if p.epoller == nil { - return fmt.Errorf("uninitialized epoller") - } - epollConsole, ok := cons.(*console.EpollConsole) - if !ok { - return fmt.Errorf("expected EpollConsole, got %#v", cons) - } - return epollConsole.Shutdown(p.epoller.CloseConsole) -} - -func (p *linuxPlatform) Close() error { - return p.epoller.Close() -} - -// initialize a single epoll fd to manage our consoles. `initPlatform` should -// only be called once. -func (s *service) initPlatform() error { - if s.platform != nil { - return nil - } - epoller, err := console.NewEpoller() - if err != nil { - return fmt.Errorf("failed to initialize epoller: %w", err) - } - s.platform = &linuxPlatform{ - epoller: epoller, - } - go epoller.Wait() - return nil -} diff --git a/pkg/shim/v2/state.go b/pkg/shim/v2/state.go deleted file mode 100644 index 1f4be33d3..000000000 --- a/pkg/shim/v2/state.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package v2 - -import ( - "encoding/json" - "io/ioutil" - "path/filepath" -) - -const filename = "state.json" - -// state holds information needed between shim invocations. -type state struct { - // Rootfs is the full path to the location rootfs was mounted. - Rootfs string `json:"rootfs"` - - // Options is the configuration loaded from config.toml. - Options options `json:"options"` -} - -func (s state) load(path string) error { - data, err := ioutil.ReadFile(filepath.Join(path, filename)) - if err != nil { - return err - } - return json.Unmarshal(data, &s) -} - -func (s state) save(path string) error { - data, err := json.Marshal(&s) - if err != nil { - return err - } - return ioutil.WriteFile(filepath.Join(path, filename), data, 0644) -} diff --git a/pkg/test/criutil/criutil.go b/pkg/test/criutil/criutil.go index e41769017..3b41a2824 100644 --- a/pkg/test/criutil/criutil.go +++ b/pkg/test/criutil/criutil.go @@ -36,7 +36,6 @@ import ( type Crictl struct { logger testutil.Logger endpoint string - runpArgs []string cleanup []func() } @@ -72,14 +71,13 @@ func ResolvePath(executable string) string { // NewCrictl returns a Crictl configured with a timeout and an endpoint over // which it will talk to containerd. -func NewCrictl(logger testutil.Logger, endpoint string, runpArgs []string) *Crictl { +func NewCrictl(logger testutil.Logger, endpoint string) *Crictl { // Attempt to find the executable, but don't bother propagating the // error at this point. The first command executed will return with a // binary not found error. return &Crictl{ logger: logger, endpoint: endpoint, - runpArgs: runpArgs, } } -- cgit v1.2.3