diff options
author | Adin Scannell <ascannell@google.com> | 2020-07-09 16:59:42 -0700 |
---|---|---|
committer | Adin Scannell <ascannell@google.com> | 2020-07-09 16:59:42 -0700 |
commit | 5471dbe2f36976eb61f8c8fe8eaafd5da0569779 (patch) | |
tree | 08fa0f9691189012085c5a515dd81dea46a105e2 /shim/v2/main.go | |
parent | 5946f111827fa4e342a2e6e9c043c198d2e5cb03 (diff) | |
parent | 7f8172edf583e0d26bee5e06578a442c7507ba6f (diff) |
Merge gvisor-containerd-shim
Diffstat (limited to 'shim/v2/main.go')
-rw-r--r-- | shim/v2/main.go | 318 |
1 files changed, 318 insertions, 0 deletions
diff --git a/shim/v2/main.go b/shim/v2/main.go new file mode 100644 index 000000000..500bdddee --- /dev/null +++ b/shim/v2/main.go @@ -0,0 +1,318 @@ +// Copyright 2018 The containerd Authors. +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "flag" + "fmt" + "net" + "os" + "os/exec" + "os/signal" + "path/filepath" + "runtime" + "runtime/debug" + "strings" + "sync" + "syscall" + "time" + + "github.com/containerd/containerd/events" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/runtime/v1/linux/proc" + containerdshim "github.com/containerd/containerd/runtime/v1/shim" + shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" + "github.com/containerd/ttrpc" + "github.com/containerd/typeurl" + ptypes "github.com/gogo/protobuf/types" + "github.com/opencontainers/runc/libcontainer/system" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" + "github.com/google/gvisor-containerd-shim/pkg/v1/shim" +) + +var ( + debugFlag bool + namespaceFlag string + socketFlag string + addressFlag string + workdirFlag string + runtimeRootFlag string + containerdBinaryFlag string + shimConfigFlag string +) + +// ShimConfigPath is the default shim config file path. +const ShimConfigPath = "/etc/containerd/gvisor-containerd-shim.toml" + +func init() { + flag.BoolVar(&debugFlag, "debug", false, "enable debug output in logs") + flag.StringVar(&namespaceFlag, "namespace", "", "namespace that owns the shim") + flag.StringVar(&socketFlag, "socket", "", "abstract socket path to serve") + flag.StringVar(&addressFlag, "address", "", "grpc address back to main containerd") + flag.StringVar(&workdirFlag, "workdir", "", "path used to storge large temporary data") + // Containerd default to runc, unless another runtime is explicitly specified. + // We keep the same default to make the default behavior consistent. + flag.StringVar(&runtimeRootFlag, "runtime-root", proc.RuncRoot, "root directory for the runtime") + // currently, the `containerd publish` utility is embedded in the daemon binary. + // The daemon invokes `containerd-shim -containerd-binary ...` with its own os.Executable() path. + flag.StringVar(&containerdBinaryFlag, "containerd-binary", "containerd", "path to containerd binary (used for `containerd publish`)") + flag.StringVar(&shimConfigFlag, "config", ShimConfigPath, "path to the shim configuration file") + flag.Parse() +} + +func main() { + // This is a hack. Exec current process to run standard containerd-shim + // if runtime root is not `runsc`. We don't need this for shim v2 api. + if filepath.Base(runtimeRootFlag) != "runsc" { + if err := executeRuncShim(); err != nil { + fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) + os.Exit(1) + } + } + + debug.SetGCPercent(40) + go func() { + for range time.Tick(30 * time.Second) { + debug.FreeOSMemory() + } + }() + + if debugFlag { + logrus.SetLevel(logrus.DebugLevel) + } + + if os.Getenv("GOMAXPROCS") == "" { + // If GOMAXPROCS hasn't been set, we default to a value of 2 to reduce + // the number of Go stacks present in the shim. + runtime.GOMAXPROCS(2) + } + + // Run regular shim if needed. + if err := executeShim(); err != nil { + fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) + os.Exit(1) + } +} + +// executeRuncShim execs current process to a containerd-shim process and +// retains all flags and envs. +func executeRuncShim() error { + c, err := loadConfig(shimConfigFlag) + if err != nil && !os.IsNotExist(err) { + return errors.Wrap(err, "failed to load shim config") + } + shimPath := c.RuncShim + if shimPath == "" { + shimPath, err = exec.LookPath("containerd-shim") + if err != nil { + return errors.Wrapf(err, "lookup containerd-shim") + } + } + + args := append([]string{shimPath}, os.Args[1:]...) + if err := syscall.Exec(shimPath, args, os.Environ()); err != nil { + return errors.Wrapf(err, "exec containerd-shim %q", shimPath) + } + return nil +} + +func executeShim() error { + // start handling signals as soon as possible so that things are properly reaped + // or if runtime exits before we hit the handler + signals, err := setupSignals() + if err != nil { + return err + } + dump := make(chan os.Signal, 32) + signal.Notify(dump, syscall.SIGUSR1) + + path, err := os.Getwd() + if err != nil { + return err + } + server, err := ttrpc.NewServer(ttrpc.WithServerHandshaker(ttrpc.UnixSocketRequireSameUser())) + if err != nil { + return errors.Wrap(err, "failed creating server") + } + c, err := loadConfig(shimConfigFlag) + if err != nil && !os.IsNotExist(err) { + return errors.Wrap(err, "failed to load shim config") + } + sv, err := shim.NewService( + shim.Config{ + Path: path, + Namespace: namespaceFlag, + WorkDir: workdirFlag, + RuntimeRoot: runtimeRootFlag, + RunscConfig: c.RunscConfig, + }, + &remoteEventsPublisher{address: addressFlag}, + ) + if err != nil { + return err + } + logrus.Debug("registering ttrpc server") + shimapi.RegisterShimService(server, sv) + + socket := socketFlag + if err := serve(server, socket); err != nil { + return err + } + logger := logrus.WithFields(logrus.Fields{ + "pid": os.Getpid(), + "path": path, + "namespace": namespaceFlag, + }) + go func() { + for range dump { + dumpStacks(logger) + } + }() + return handleSignals(logger, signals, server, sv) +} + +// serve serves the ttrpc API over a unix socket at the provided path +// this function does not block +func serve(server *ttrpc.Server, path string) error { + var ( + l net.Listener + err error + ) + if path == "" { + l, err = net.FileListener(os.NewFile(3, "socket")) + path = "[inherited from parent]" + } else { + if len(path) > 106 { + return errors.Errorf("%q: unix socket path too long (> 106)", path) + } + l, err = net.Listen("unix", "\x00"+path) + } + if err != nil { + return err + } + logrus.WithField("socket", path).Debug("serving api on unix socket") + go func() { + defer l.Close() + if err := server.Serve(context.Background(), l); err != nil && + !strings.Contains(err.Error(), "use of closed network connection") { + logrus.WithError(err).Fatal("gvisor-containerd-shim: ttrpc server failure") + } + }() + return nil +} + +// setupSignals creates a new signal handler for all signals and sets the shim as a +// sub-reaper so that the container processes are reparented +func setupSignals() (chan os.Signal, error) { + signals := make(chan os.Signal, 32) + signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE) + // make sure runc is setup to use the monitor + // for waiting on processes + // TODO(random-liu): Move shim/reaper.go to a separate package. + runsc.Monitor = containerdshim.Default + // set the shim as the subreaper for all orphaned processes created by the container + if err := system.SetSubreaper(1); err != nil { + return nil, err + } + return signals, nil +} + +func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.Server, sv *shim.Service) error { + var ( + termOnce sync.Once + done = make(chan struct{}) + ) + + for { + select { + case <-done: + return nil + case s := <-signals: + switch s { + case unix.SIGCHLD: + if err := containerdshim.Reap(); err != nil { + logger.WithError(err).Error("reap exit status") + } + case unix.SIGTERM, unix.SIGINT: + go termOnce.Do(func() { + ctx := context.TODO() + if err := server.Shutdown(ctx); err != nil { + logger.WithError(err).Error("failed to shutdown server") + } + // Ensure our child is dead if any + sv.Kill(ctx, &shimapi.KillRequest{ + Signal: uint32(syscall.SIGKILL), + All: true, + }) + sv.Delete(context.Background(), &ptypes.Empty{}) + close(done) + }) + case unix.SIGPIPE: + } + } + } +} + +func dumpStacks(logger *logrus.Entry) { + var ( + buf []byte + stackSize int + ) + bufferLen := 16384 + for stackSize == len(buf) { + buf = make([]byte, bufferLen) + stackSize = runtime.Stack(buf, true) + bufferLen *= 2 + } + buf = buf[:stackSize] + logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) +} + +type remoteEventsPublisher struct { + address string +} + +func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { + ns, _ := namespaces.Namespace(ctx) + encoded, err := typeurl.MarshalAny(event) + if err != nil { + return err + } + data, err := encoded.Marshal() + if err != nil { + return err + } + cmd := exec.CommandContext(ctx, containerdBinaryFlag, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) + cmd.Stdin = bytes.NewReader(data) + c, err := containerdshim.Default.Start(cmd) + if err != nil { + return err + } + status, err := containerdshim.Default.Wait(cmd, c) + if err != nil { + return err + } + if status != 0 { + return errors.New("failed to publish event") + } + return nil +} |