/* Copyright The containerd Authors. Copyright 2018 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package main import ( "bytes" "context" "flag" "fmt" "net" "os" "os/exec" "os/signal" "path/filepath" "runtime" "runtime/debug" "strings" "sync" "syscall" "time" "github.com/containerd/containerd/events" "github.com/containerd/containerd/namespaces" "github.com/containerd/containerd/runtime/v1/linux/proc" containerdshim "github.com/containerd/containerd/runtime/v1/shim" shimapi "github.com/containerd/containerd/runtime/v1/shim/v1" "github.com/containerd/ttrpc" "github.com/containerd/typeurl" ptypes "github.com/gogo/protobuf/types" "github.com/opencontainers/runc/libcontainer/system" "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" runsc "github.com/google/gvisor-containerd-shim/pkg/go-runsc" "github.com/google/gvisor-containerd-shim/pkg/v1/shim" ) var ( debugFlag bool namespaceFlag string socketFlag string addressFlag string workdirFlag string runtimeRootFlag string containerdBinaryFlag string shimConfigFlag string ) // ShimConfigPath is the default shim config file path. const ShimConfigPath = "/etc/containerd/gvisor-containerd-shim.toml" func init() { flag.BoolVar(&debugFlag, "debug", false, "enable debug output in logs") flag.StringVar(&namespaceFlag, "namespace", "", "namespace that owns the shim") flag.StringVar(&socketFlag, "socket", "", "abstract socket path to serve") flag.StringVar(&addressFlag, "address", "", "grpc address back to main containerd") flag.StringVar(&workdirFlag, "workdir", "", "path used to storge large temporary data") // Containerd default to runc, unless another runtime is explicitly specified. // We keep the same default to make the default behavior consistent. flag.StringVar(&runtimeRootFlag, "runtime-root", proc.RuncRoot, "root directory for the runtime") // currently, the `containerd publish` utility is embedded in the daemon binary. // The daemon invokes `containerd-shim -containerd-binary ...` with its own os.Executable() path. flag.StringVar(&containerdBinaryFlag, "containerd-binary", "containerd", "path to containerd binary (used for `containerd publish`)") flag.StringVar(&shimConfigFlag, "config", ShimConfigPath, "path to the shim configuration file") flag.Parse() } func main() { // This is a hack. Exec current process to run standard containerd-shim // if runtime root is not `runsc`. We don't need this for shim v2 api. if filepath.Base(runtimeRootFlag) != "runsc" { if err := executeRuncShim(); err != nil { fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) os.Exit(1) } } debug.SetGCPercent(40) go func() { for range time.Tick(30 * time.Second) { debug.FreeOSMemory() } }() if debugFlag { logrus.SetLevel(logrus.DebugLevel) } if os.Getenv("GOMAXPROCS") == "" { // If GOMAXPROCS hasn't been set, we default to a value of 2 to reduce // the number of Go stacks present in the shim. runtime.GOMAXPROCS(2) } // Run regular shim if needed. if err := executeShim(); err != nil { fmt.Fprintf(os.Stderr, "gvisor-containerd-shim: %s\n", err) os.Exit(1) } } // executeRuncShim execs current process to a containerd-shim process and // retains all flags and envs. func executeRuncShim() error { c, err := loadConfig(shimConfigFlag) if err != nil && !os.IsNotExist(err) { return errors.Wrap(err, "failed to load shim config") } shimPath := c.RuncShim if shimPath == "" { shimPath, err = exec.LookPath("containerd-shim") if err != nil { return errors.Wrapf(err, "lookup containerd-shim") } } args := append([]string{shimPath}, os.Args[1:]...) if err := syscall.Exec(shimPath, args, os.Environ()); err != nil { return errors.Wrapf(err, "exec containerd-shim %q", shimPath) } return nil } func executeShim() error { // start handling signals as soon as possible so that things are properly reaped // or if runtime exits before we hit the handler signals, err := setupSignals() if err != nil { return err } dump := make(chan os.Signal, 32) signal.Notify(dump, syscall.SIGUSR1) path, err := os.Getwd() if err != nil { return err } server, err := ttrpc.NewServer(ttrpc.WithServerHandshaker(ttrpc.UnixSocketRequireSameUser())) if err != nil { return errors.Wrap(err, "failed creating server") } c, err := loadConfig(shimConfigFlag) if err != nil && !os.IsNotExist(err) { return errors.Wrap(err, "failed to load shim config") } sv, err := shim.NewService( shim.Config{ Path: path, Namespace: namespaceFlag, WorkDir: workdirFlag, RuntimeRoot: runtimeRootFlag, RunscConfig: c.RunscConfig, }, &remoteEventsPublisher{address: addressFlag}, ) if err != nil { return err } logrus.Debug("registering ttrpc server") shimapi.RegisterShimService(server, sv) socket := socketFlag if err := serve(server, socket); err != nil { return err } logger := logrus.WithFields(logrus.Fields{ "pid": os.Getpid(), "path": path, "namespace": namespaceFlag, }) go func() { for range dump { dumpStacks(logger) } }() return handleSignals(logger, signals, server, sv) } // serve serves the ttrpc API over a unix socket at the provided path // this function does not block func serve(server *ttrpc.Server, path string) error { var ( l net.Listener err error ) if path == "" { l, err = net.FileListener(os.NewFile(3, "socket")) path = "[inherited from parent]" } else { if len(path) > 106 { return errors.Errorf("%q: unix socket path too long (> 106)", path) } l, err = net.Listen("unix", "\x00"+path) } if err != nil { return err } logrus.WithField("socket", path).Debug("serving api on unix socket") go func() { defer l.Close() if err := server.Serve(context.Background(), l); err != nil && !strings.Contains(err.Error(), "use of closed network connection") { logrus.WithError(err).Fatal("gvisor-containerd-shim: ttrpc server failure") } }() return nil } // setupSignals creates a new signal handler for all signals and sets the shim as a // sub-reaper so that the container processes are reparented func setupSignals() (chan os.Signal, error) { signals := make(chan os.Signal, 32) signal.Notify(signals, unix.SIGTERM, unix.SIGINT, unix.SIGCHLD, unix.SIGPIPE) // make sure runc is setup to use the monitor // for waiting on processes // TODO(random-liu): Move shim/reaper.go to a separate package. runsc.Monitor = containerdshim.Default // set the shim as the subreaper for all orphaned processes created by the container if err := system.SetSubreaper(1); err != nil { return nil, err } return signals, nil } func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.Server, sv *shim.Service) error { var ( termOnce sync.Once done = make(chan struct{}) ) for { select { case <-done: return nil case s := <-signals: switch s { case unix.SIGCHLD: if err := containerdshim.Reap(); err != nil { logger.WithError(err).Error("reap exit status") } case unix.SIGTERM, unix.SIGINT: go termOnce.Do(func() { ctx := context.TODO() if err := server.Shutdown(ctx); err != nil { logger.WithError(err).Error("failed to shutdown server") } // Ensure our child is dead if any sv.Kill(ctx, &shimapi.KillRequest{ Signal: uint32(syscall.SIGKILL), All: true, }) sv.Delete(context.Background(), &ptypes.Empty{}) close(done) }) case unix.SIGPIPE: } } } } func dumpStacks(logger *logrus.Entry) { var ( buf []byte stackSize int ) bufferLen := 16384 for stackSize == len(buf) { buf = make([]byte, bufferLen) stackSize = runtime.Stack(buf, true) bufferLen *= 2 } buf = buf[:stackSize] logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) } type remoteEventsPublisher struct { address string } func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { ns, _ := namespaces.Namespace(ctx) encoded, err := typeurl.MarshalAny(event) if err != nil { return err } data, err := encoded.Marshal() if err != nil { return err } cmd := exec.CommandContext(ctx, containerdBinaryFlag, "--address", l.address, "publish", "--topic", topic, "--namespace", ns) cmd.Stdin = bytes.NewReader(data) c, err := containerdshim.Default.Start(cmd) if err != nil { return err } status, err := containerdshim.Default.Wait(cmd, c) if err != nil { return err } if status != 0 { return errors.New("failed to publish event") } return nil }